diff options
Diffstat (limited to 'src/regex/regex_internal.h')
-rw-r--r-- | src/regex/regex_internal.h | 456 |
1 files changed, 0 insertions, 456 deletions
diff --git a/src/regex/regex_internal.h b/src/regex/regex_internal.h deleted file mode 100644 index 8f29cff33..000000000 --- a/src/regex/regex_internal.h +++ /dev/null | |||
@@ -1,456 +0,0 @@ | |||
1 | /* | ||
2 | This file is part of GNUnet | ||
3 | Copyright (C) 2012 GNUnet e.V. | ||
4 | |||
5 | GNUnet is free software: you can redistribute it and/or modify it | ||
6 | under the terms of the GNU Affero General Public License as published | ||
7 | by the Free Software Foundation, either version 3 of the License, | ||
8 | or (at your option) any later version. | ||
9 | |||
10 | GNUnet is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | Affero General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU Affero General Public License | ||
16 | along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | |||
18 | SPDX-License-Identifier: AGPL3.0-or-later | ||
19 | */ | ||
20 | /** | ||
21 | * @file src/regex/regex_internal.h | ||
22 | * @brief common internal definitions for regex library. | ||
23 | * @author Maximilian Szengel | ||
24 | */ | ||
25 | #ifndef REGEX_INTERNAL_H | ||
26 | #define REGEX_INTERNAL_H | ||
27 | |||
28 | #include "regex_internal_lib.h" | ||
29 | |||
30 | #ifdef __cplusplus | ||
31 | extern "C" | ||
32 | { | ||
33 | #if 0 /* keep Emacsens' auto-indent happy */ | ||
34 | } | ||
35 | #endif | ||
36 | #endif | ||
37 | |||
38 | /** | ||
39 | * char array of literals that are allowed inside a regex (apart from the | ||
40 | * operators) | ||
41 | */ | ||
42 | #define ALLOWED_LITERALS \ | ||
43 | "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" | ||
44 | |||
45 | |||
46 | /** | ||
47 | * Transition between two states. Transitions are stored at the states from | ||
48 | * which they origin ('from_state'). Each state can have 0-n transitions. | ||
49 | * If label is NULL, this is considered to be an epsilon transition. | ||
50 | */ | ||
51 | struct REGEX_INTERNAL_Transition | ||
52 | { | ||
53 | /** | ||
54 | * This is a linked list. | ||
55 | */ | ||
56 | struct REGEX_INTERNAL_Transition *prev; | ||
57 | |||
58 | /** | ||
59 | * This is a linked list. | ||
60 | */ | ||
61 | struct REGEX_INTERNAL_Transition *next; | ||
62 | |||
63 | /** | ||
64 | * Unique id of this transition. | ||
65 | */ | ||
66 | unsigned int id; | ||
67 | |||
68 | /** | ||
69 | * Label for this transition. This is basically the edge label for the graph. | ||
70 | */ | ||
71 | char *label; | ||
72 | |||
73 | /** | ||
74 | * State to which this transition leads. | ||
75 | */ | ||
76 | struct REGEX_INTERNAL_State *to_state; | ||
77 | |||
78 | /** | ||
79 | * State from which this transition origins. | ||
80 | */ | ||
81 | struct REGEX_INTERNAL_State *from_state; | ||
82 | }; | ||
83 | |||
84 | |||
85 | /** | ||
86 | * A state. Can be used in DFA and NFA automatons. | ||
87 | */ | ||
88 | struct REGEX_INTERNAL_State; | ||
89 | |||
90 | |||
91 | /** | ||
92 | * Set of states. | ||
93 | */ | ||
94 | struct REGEX_INTERNAL_StateSet | ||
95 | { | ||
96 | /** | ||
97 | * Array of states. | ||
98 | */ | ||
99 | struct REGEX_INTERNAL_State **states; | ||
100 | |||
101 | /** | ||
102 | * Number of entries in *use* in the 'states' array. | ||
103 | */ | ||
104 | unsigned int off; | ||
105 | |||
106 | /** | ||
107 | * Length of the 'states' array. | ||
108 | */ | ||
109 | unsigned int size; | ||
110 | }; | ||
111 | |||
112 | |||
113 | /** | ||
114 | * A state. Can be used in DFA and NFA automatons. | ||
115 | */ | ||
116 | struct REGEX_INTERNAL_State | ||
117 | { | ||
118 | /** | ||
119 | * This is a linked list to keep states in an automaton. | ||
120 | */ | ||
121 | struct REGEX_INTERNAL_State *prev; | ||
122 | |||
123 | /** | ||
124 | * This is a linked list to keep states in an automaton. | ||
125 | */ | ||
126 | struct REGEX_INTERNAL_State *next; | ||
127 | |||
128 | /** | ||
129 | * This is a multi DLL for StateSet_MDLL. | ||
130 | */ | ||
131 | struct REGEX_INTERNAL_State *prev_SS; | ||
132 | |||
133 | /** | ||
134 | * This is a multi DLL for StateSet_MDLL. | ||
135 | */ | ||
136 | struct REGEX_INTERNAL_State *next_SS; | ||
137 | |||
138 | /** | ||
139 | * This is a multi DLL for StateSet_MDLL Stack. | ||
140 | */ | ||
141 | struct REGEX_INTERNAL_State *prev_ST; | ||
142 | |||
143 | /** | ||
144 | * This is a multi DLL for StateSet_MDLL Stack. | ||
145 | */ | ||
146 | struct REGEX_INTERNAL_State *next_ST; | ||
147 | |||
148 | /** | ||
149 | * Unique state id. | ||
150 | */ | ||
151 | unsigned int id; | ||
152 | |||
153 | /** | ||
154 | * Unique state id that is used for traversing the automaton. It is guaranteed | ||
155 | * to be > 0 and < state_count. | ||
156 | */ | ||
157 | unsigned int traversal_id; | ||
158 | |||
159 | /** | ||
160 | * If this is an accepting state or not. | ||
161 | */ | ||
162 | int accepting; | ||
163 | |||
164 | /** | ||
165 | * Marking of the state. This is used for marking all visited states when | ||
166 | * traversing all states of an automaton and for cases where the state id | ||
167 | * cannot be used (dfa minimization). | ||
168 | */ | ||
169 | int marked; | ||
170 | |||
171 | /** | ||
172 | * Marking the state as contained. This is used for checking, if the state is | ||
173 | * contained in a set in constant time. | ||
174 | */ | ||
175 | int contained; | ||
176 | |||
177 | /** | ||
178 | * Marking the state as part of an SCC (Strongly Connected Component). All | ||
179 | * states with the same scc_id are part of the same SCC. scc_id is 0, if state | ||
180 | * is not a part of any SCC. | ||
181 | */ | ||
182 | unsigned int scc_id; | ||
183 | |||
184 | /** | ||
185 | * Used for SCC detection. | ||
186 | */ | ||
187 | int index; | ||
188 | |||
189 | /** | ||
190 | * Used for SCC detection. | ||
191 | */ | ||
192 | int lowlink; | ||
193 | |||
194 | /** | ||
195 | * Human readable name of the state. Used for debugging and graph | ||
196 | * creation. | ||
197 | */ | ||
198 | char *name; | ||
199 | |||
200 | /** | ||
201 | * Hash of the state. | ||
202 | */ | ||
203 | struct GNUNET_HashCode hash; | ||
204 | |||
205 | /** | ||
206 | * Linear state ID acquired by depth-first-search. This ID should be used for | ||
207 | * storing information about the state in an array, because the 'id' of the | ||
208 | * state is not guaranteed to be linear. The 'dfs_id' is guaranteed to be > 0 | ||
209 | * and < 'state_count'. | ||
210 | */ | ||
211 | unsigned int dfs_id; | ||
212 | |||
213 | /** | ||
214 | * Proof for this state. | ||
215 | */ | ||
216 | char *proof; | ||
217 | |||
218 | /** | ||
219 | * Number of transitions from this state to other states. | ||
220 | */ | ||
221 | unsigned int transition_count; | ||
222 | |||
223 | /** | ||
224 | * DLL of transitions. | ||
225 | */ | ||
226 | struct REGEX_INTERNAL_Transition *transitions_head; | ||
227 | |||
228 | /** | ||
229 | * DLL of transitions. | ||
230 | */ | ||
231 | struct REGEX_INTERNAL_Transition *transitions_tail; | ||
232 | |||
233 | /** | ||
234 | * Number of incoming transitions. Used for compressing DFA paths. | ||
235 | */ | ||
236 | unsigned int incoming_transition_count; | ||
237 | |||
238 | /** | ||
239 | * Set of states on which this state is based on. Used when creating a DFA out | ||
240 | * of several NFA states. | ||
241 | */ | ||
242 | struct REGEX_INTERNAL_StateSet nfa_set; | ||
243 | }; | ||
244 | |||
245 | |||
246 | /** | ||
247 | * Type of an automaton. | ||
248 | */ | ||
249 | enum REGEX_INTERNAL_AutomatonType | ||
250 | { | ||
251 | NFA, | ||
252 | DFA | ||
253 | }; | ||
254 | |||
255 | |||
256 | /** | ||
257 | * Automaton representation. | ||
258 | */ | ||
259 | struct REGEX_INTERNAL_Automaton | ||
260 | { | ||
261 | /** | ||
262 | * Linked list of NFAs used for partial NFA creation. | ||
263 | */ | ||
264 | struct REGEX_INTERNAL_Automaton *prev; | ||
265 | |||
266 | /** | ||
267 | * Linked list of NFAs used for partial NFA creation. | ||
268 | */ | ||
269 | struct REGEX_INTERNAL_Automaton *next; | ||
270 | |||
271 | /** | ||
272 | * First state of the automaton. This is mainly used for constructing an NFA, | ||
273 | * where each NFA itself consists of one or more NFAs linked together. | ||
274 | */ | ||
275 | struct REGEX_INTERNAL_State *start; | ||
276 | |||
277 | /** | ||
278 | * End state of the partial NFA. This is undefined for DFAs | ||
279 | */ | ||
280 | struct REGEX_INTERNAL_State *end; | ||
281 | |||
282 | /** | ||
283 | * Number of states in the automaton. | ||
284 | */ | ||
285 | unsigned int state_count; | ||
286 | |||
287 | /** | ||
288 | * DLL of states. | ||
289 | */ | ||
290 | struct REGEX_INTERNAL_State *states_head; | ||
291 | |||
292 | /** | ||
293 | * DLL of states | ||
294 | */ | ||
295 | struct REGEX_INTERNAL_State *states_tail; | ||
296 | |||
297 | /** | ||
298 | * Type of the automaton. | ||
299 | */ | ||
300 | enum REGEX_INTERNAL_AutomatonType type; | ||
301 | |||
302 | /** | ||
303 | * Regex | ||
304 | */ | ||
305 | char *regex; | ||
306 | |||
307 | /** | ||
308 | * Canonical regex (result of RX->NFA->DFA->RX) | ||
309 | */ | ||
310 | char *canonical_regex; | ||
311 | |||
312 | /** | ||
313 | * GNUNET_YES, if multi strides have been added to the Automaton. | ||
314 | */ | ||
315 | int is_multistrided; | ||
316 | }; | ||
317 | |||
318 | |||
319 | /** | ||
320 | * Construct an NFA by parsing the regex string of length 'len'. | ||
321 | * | ||
322 | * @param regex regular expression string. | ||
323 | * @param len length of the string. | ||
324 | * | ||
325 | * @return NFA, needs to be freed using REGEX_INTERNAL_automaton_destroy. | ||
326 | */ | ||
327 | struct REGEX_INTERNAL_Automaton * | ||
328 | REGEX_INTERNAL_construct_nfa (const char *regex, const size_t len); | ||
329 | |||
330 | |||
331 | /** | ||
332 | * Function that gets passed to automaton traversal and is called before each | ||
333 | * next traversal from state 's' using transition 't' to check if traversal | ||
334 | * should proceed. Return GNUNET_NO to stop traversal or GNUNET_YES to continue. | ||
335 | * | ||
336 | * @param cls closure for the check. | ||
337 | * @param s current state in the traversal. | ||
338 | * @param t current transition from state 's' that will be used for the next | ||
339 | * step. | ||
340 | * | ||
341 | * @return GNUNET_YES to proceed traversal, GNUNET_NO to stop. | ||
342 | */ | ||
343 | typedef int (*REGEX_INTERNAL_traverse_check) (void *cls, | ||
344 | struct REGEX_INTERNAL_State *s, | ||
345 | struct REGEX_INTERNAL_Transition * | ||
346 | t); | ||
347 | |||
348 | |||
349 | /** | ||
350 | * Function that is called with each state, when traversing an automaton. | ||
351 | * | ||
352 | * @param cls closure. | ||
353 | * @param count current count of the state, from 0 to a->state_count -1. | ||
354 | * @param s state. | ||
355 | */ | ||
356 | typedef void (*REGEX_INTERNAL_traverse_action) (void *cls, | ||
357 | const unsigned int count, | ||
358 | struct REGEX_INTERNAL_State *s); | ||
359 | |||
360 | |||
361 | /** | ||
362 | * Traverses the given automaton using depth-first-search (DFS) from it's start | ||
363 | * state, visiting all reachable states and calling 'action' on each one of | ||
364 | * them. | ||
365 | * | ||
366 | * @param a automaton to be traversed. | ||
367 | * @param start start state, pass a->start or NULL to traverse the whole automaton. | ||
368 | * @param check function that is checked before advancing on each transition | ||
369 | * in the DFS. | ||
370 | * @param check_cls closure for check. | ||
371 | * @param action action to be performed on each state. | ||
372 | * @param action_cls closure for action | ||
373 | */ | ||
374 | void | ||
375 | REGEX_INTERNAL_automaton_traverse (const struct REGEX_INTERNAL_Automaton *a, | ||
376 | struct REGEX_INTERNAL_State *start, | ||
377 | REGEX_INTERNAL_traverse_check check, | ||
378 | void *check_cls, | ||
379 | REGEX_INTERNAL_traverse_action action, | ||
380 | void *action_cls); | ||
381 | |||
382 | /** | ||
383 | * Get the canonical regex of the given automaton. | ||
384 | * When constructing the automaton a proof is computed for each state, | ||
385 | * consisting of the regular expression leading to this state. A complete | ||
386 | * regex for the automaton can be computed by combining these proofs. | ||
387 | * As of now this function is only useful for testing. | ||
388 | * | ||
389 | * @param a automaton for which the canonical regex should be returned. | ||
390 | * | ||
391 | * @return canonical regex string. | ||
392 | */ | ||
393 | const char * | ||
394 | REGEX_INTERNAL_get_canonical_regex (struct REGEX_INTERNAL_Automaton *a); | ||
395 | |||
396 | |||
397 | /** | ||
398 | * Get the number of transitions that are contained in the given automaton. | ||
399 | * | ||
400 | * @param a automaton for which the number of transitions should be returned. | ||
401 | * | ||
402 | * @return number of transitions in the given automaton. | ||
403 | */ | ||
404 | unsigned int | ||
405 | REGEX_INTERNAL_get_transition_count (struct REGEX_INTERNAL_Automaton *a); | ||
406 | |||
407 | |||
408 | /** | ||
409 | * Context that contains an id counter for states and transitions as well as a | ||
410 | * DLL of automatons used as a stack for NFA construction. | ||
411 | */ | ||
412 | struct REGEX_INTERNAL_Context | ||
413 | { | ||
414 | /** | ||
415 | * Unique state id. | ||
416 | */ | ||
417 | unsigned int state_id; | ||
418 | |||
419 | /** | ||
420 | * Unique transition id. | ||
421 | */ | ||
422 | unsigned int transition_id; | ||
423 | |||
424 | /** | ||
425 | * DLL of REGEX_INTERNAL_Automaton's used as a stack. | ||
426 | */ | ||
427 | struct REGEX_INTERNAL_Automaton *stack_head; | ||
428 | |||
429 | /** | ||
430 | * DLL of REGEX_INTERNAL_Automaton's used as a stack. | ||
431 | */ | ||
432 | struct REGEX_INTERNAL_Automaton *stack_tail; | ||
433 | }; | ||
434 | |||
435 | |||
436 | /** | ||
437 | * Adds multi-strided transitions to the given 'dfa'. | ||
438 | * | ||
439 | * @param regex_ctx regex context needed to add transitions to the automaton. | ||
440 | * @param dfa DFA to which the multi strided transitions should be added. | ||
441 | * @param stride_len length of the strides. | ||
442 | */ | ||
443 | void | ||
444 | REGEX_INTERNAL_dfa_add_multi_strides (struct REGEX_INTERNAL_Context *regex_ctx, | ||
445 | struct REGEX_INTERNAL_Automaton *dfa, | ||
446 | const unsigned int stride_len); | ||
447 | |||
448 | |||
449 | #if 0 /* keep Emacsens' auto-indent happy */ | ||
450 | { | ||
451 | #endif | ||
452 | #ifdef __cplusplus | ||
453 | } | ||
454 | #endif | ||
455 | |||
456 | #endif | ||