diff options
Diffstat (limited to 'src/regex/regex_internal.h')
-rw-r--r-- | src/regex/regex_internal.h | 453 |
1 files changed, 0 insertions, 453 deletions
diff --git a/src/regex/regex_internal.h b/src/regex/regex_internal.h deleted file mode 100644 index d52479ffe..000000000 --- a/src/regex/regex_internal.h +++ /dev/null | |||
@@ -1,453 +0,0 @@ | |||
1 | /* | ||
2 | This file is part of GNUnet | ||
3 | Copyright (C) 2012 GNUnet e.V. | ||
4 | |||
5 | GNUnet is free software: you can redistribute it and/or modify it | ||
6 | under the terms of the GNU Affero General Public License as published | ||
7 | by the Free Software Foundation, either version 3 of the License, | ||
8 | or (at your option) any later version. | ||
9 | |||
10 | GNUnet is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | Affero General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU Affero General Public License | ||
16 | along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | */ | ||
18 | /** | ||
19 | * @file src/regex/regex_internal.h | ||
20 | * @brief common internal definitions for regex library. | ||
21 | * @author Maximilian Szengel | ||
22 | */ | ||
23 | #ifndef REGEX_INTERNAL_H | ||
24 | #define REGEX_INTERNAL_H | ||
25 | |||
26 | #include "regex_internal_lib.h" | ||
27 | |||
28 | #ifdef __cplusplus | ||
29 | extern "C" | ||
30 | { | ||
31 | #if 0 /* keep Emacsens' auto-indent happy */ | ||
32 | } | ||
33 | #endif | ||
34 | #endif | ||
35 | |||
36 | /** | ||
37 | * char array of literals that are allowed inside a regex (apart from the | ||
38 | * operators) | ||
39 | */ | ||
40 | #define ALLOWED_LITERALS "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" | ||
41 | |||
42 | |||
43 | /** | ||
44 | * Transition between two states. Transitions are stored at the states from | ||
45 | * which they origin ('from_state'). Each state can have 0-n transitions. | ||
46 | * If label is NULL, this is considered to be an epsilon transition. | ||
47 | */ | ||
48 | struct REGEX_INTERNAL_Transition | ||
49 | { | ||
50 | /** | ||
51 | * This is a linked list. | ||
52 | */ | ||
53 | struct REGEX_INTERNAL_Transition *prev; | ||
54 | |||
55 | /** | ||
56 | * This is a linked list. | ||
57 | */ | ||
58 | struct REGEX_INTERNAL_Transition *next; | ||
59 | |||
60 | /** | ||
61 | * Unique id of this transition. | ||
62 | */ | ||
63 | unsigned int id; | ||
64 | |||
65 | /** | ||
66 | * Label for this transition. This is basically the edge label for the graph. | ||
67 | */ | ||
68 | char *label; | ||
69 | |||
70 | /** | ||
71 | * State to which this transition leads. | ||
72 | */ | ||
73 | struct REGEX_INTERNAL_State *to_state; | ||
74 | |||
75 | /** | ||
76 | * State from which this transition origins. | ||
77 | */ | ||
78 | struct REGEX_INTERNAL_State *from_state; | ||
79 | }; | ||
80 | |||
81 | |||
82 | /** | ||
83 | * A state. Can be used in DFA and NFA automatons. | ||
84 | */ | ||
85 | struct REGEX_INTERNAL_State; | ||
86 | |||
87 | |||
88 | /** | ||
89 | * Set of states. | ||
90 | */ | ||
91 | struct REGEX_INTERNAL_StateSet | ||
92 | { | ||
93 | /** | ||
94 | * Array of states. | ||
95 | */ | ||
96 | struct REGEX_INTERNAL_State **states; | ||
97 | |||
98 | /** | ||
99 | * Number of entries in *use* in the 'states' array. | ||
100 | */ | ||
101 | unsigned int off; | ||
102 | |||
103 | /** | ||
104 | * Length of the 'states' array. | ||
105 | */ | ||
106 | unsigned int size; | ||
107 | }; | ||
108 | |||
109 | |||
110 | /** | ||
111 | * A state. Can be used in DFA and NFA automatons. | ||
112 | */ | ||
113 | struct REGEX_INTERNAL_State | ||
114 | { | ||
115 | /** | ||
116 | * This is a linked list to keep states in an automaton. | ||
117 | */ | ||
118 | struct REGEX_INTERNAL_State *prev; | ||
119 | |||
120 | /** | ||
121 | * This is a linked list to keep states in an automaton. | ||
122 | */ | ||
123 | struct REGEX_INTERNAL_State *next; | ||
124 | |||
125 | /** | ||
126 | * This is a multi DLL for StateSet_MDLL. | ||
127 | */ | ||
128 | struct REGEX_INTERNAL_State *prev_SS; | ||
129 | |||
130 | /** | ||
131 | * This is a multi DLL for StateSet_MDLL. | ||
132 | */ | ||
133 | struct REGEX_INTERNAL_State *next_SS; | ||
134 | |||
135 | /** | ||
136 | * This is a multi DLL for StateSet_MDLL Stack. | ||
137 | */ | ||
138 | struct REGEX_INTERNAL_State *prev_ST; | ||
139 | |||
140 | /** | ||
141 | * This is a multi DLL for StateSet_MDLL Stack. | ||
142 | */ | ||
143 | struct REGEX_INTERNAL_State *next_ST; | ||
144 | |||
145 | /** | ||
146 | * Unique state id. | ||
147 | */ | ||
148 | unsigned int id; | ||
149 | |||
150 | /** | ||
151 | * Unique state id that is used for traversing the automaton. It is guaranteed | ||
152 | * to be > 0 and < state_count. | ||
153 | */ | ||
154 | unsigned int traversal_id; | ||
155 | |||
156 | /** | ||
157 | * If this is an accepting state or not. | ||
158 | */ | ||
159 | int accepting; | ||
160 | |||
161 | /** | ||
162 | * Marking of the state. This is used for marking all visited states when | ||
163 | * traversing all states of an automaton and for cases where the state id | ||
164 | * cannot be used (dfa minimization). | ||
165 | */ | ||
166 | int marked; | ||
167 | |||
168 | /** | ||
169 | * Marking the state as contained. This is used for checking, if the state is | ||
170 | * contained in a set in constant time. | ||
171 | */ | ||
172 | int contained; | ||
173 | |||
174 | /** | ||
175 | * Marking the state as part of an SCC (Strongly Connected Component). All | ||
176 | * states with the same scc_id are part of the same SCC. scc_id is 0, if state | ||
177 | * is not a part of any SCC. | ||
178 | */ | ||
179 | unsigned int scc_id; | ||
180 | |||
181 | /** | ||
182 | * Used for SCC detection. | ||
183 | */ | ||
184 | int index; | ||
185 | |||
186 | /** | ||
187 | * Used for SCC detection. | ||
188 | */ | ||
189 | int lowlink; | ||
190 | |||
191 | /** | ||
192 | * Human readable name of the state. Used for debugging and graph | ||
193 | * creation. | ||
194 | */ | ||
195 | char *name; | ||
196 | |||
197 | /** | ||
198 | * Hash of the state. | ||
199 | */ | ||
200 | struct GNUNET_HashCode hash; | ||
201 | |||
202 | /** | ||
203 | * Linear state ID accquired by depth-first-search. This ID should be used for | ||
204 | * storing information about the state in an array, because the 'id' of the | ||
205 | * state is not guaranteed to be linear. The 'dfs_id' is guaranteed to be > 0 | ||
206 | * and < 'state_count'. | ||
207 | */ | ||
208 | unsigned int dfs_id; | ||
209 | |||
210 | /** | ||
211 | * Proof for this state. | ||
212 | */ | ||
213 | char *proof; | ||
214 | |||
215 | /** | ||
216 | * Number of transitions from this state to other states. | ||
217 | */ | ||
218 | unsigned int transition_count; | ||
219 | |||
220 | /** | ||
221 | * DLL of transitions. | ||
222 | */ | ||
223 | struct REGEX_INTERNAL_Transition *transitions_head; | ||
224 | |||
225 | /** | ||
226 | * DLL of transitions. | ||
227 | */ | ||
228 | struct REGEX_INTERNAL_Transition *transitions_tail; | ||
229 | |||
230 | /** | ||
231 | * Number of incoming transitions. Used for compressing DFA paths. | ||
232 | */ | ||
233 | unsigned int incoming_transition_count; | ||
234 | |||
235 | /** | ||
236 | * Set of states on which this state is based on. Used when creating a DFA out | ||
237 | * of several NFA states. | ||
238 | */ | ||
239 | struct REGEX_INTERNAL_StateSet nfa_set; | ||
240 | }; | ||
241 | |||
242 | |||
243 | /** | ||
244 | * Type of an automaton. | ||
245 | */ | ||
246 | enum REGEX_INTERNAL_AutomatonType | ||
247 | { | ||
248 | NFA, | ||
249 | DFA | ||
250 | }; | ||
251 | |||
252 | |||
253 | /** | ||
254 | * Automaton representation. | ||
255 | */ | ||
256 | struct REGEX_INTERNAL_Automaton | ||
257 | { | ||
258 | /** | ||
259 | * Linked list of NFAs used for partial NFA creation. | ||
260 | */ | ||
261 | struct REGEX_INTERNAL_Automaton *prev; | ||
262 | |||
263 | /** | ||
264 | * Linked list of NFAs used for partial NFA creation. | ||
265 | */ | ||
266 | struct REGEX_INTERNAL_Automaton *next; | ||
267 | |||
268 | /** | ||
269 | * First state of the automaton. This is mainly used for constructing an NFA, | ||
270 | * where each NFA itself consists of one or more NFAs linked together. | ||
271 | */ | ||
272 | struct REGEX_INTERNAL_State *start; | ||
273 | |||
274 | /** | ||
275 | * End state of the partial NFA. This is undefined for DFAs | ||
276 | */ | ||
277 | struct REGEX_INTERNAL_State *end; | ||
278 | |||
279 | /** | ||
280 | * Number of states in the automaton. | ||
281 | */ | ||
282 | unsigned int state_count; | ||
283 | |||
284 | /** | ||
285 | * DLL of states. | ||
286 | */ | ||
287 | struct REGEX_INTERNAL_State *states_head; | ||
288 | |||
289 | /** | ||
290 | * DLL of states | ||
291 | */ | ||
292 | struct REGEX_INTERNAL_State *states_tail; | ||
293 | |||
294 | /** | ||
295 | * Type of the automaton. | ||
296 | */ | ||
297 | enum REGEX_INTERNAL_AutomatonType type; | ||
298 | |||
299 | /** | ||
300 | * Regex | ||
301 | */ | ||
302 | char *regex; | ||
303 | |||
304 | /** | ||
305 | * Canonical regex (result of RX->NFA->DFA->RX) | ||
306 | */ | ||
307 | char *canonical_regex; | ||
308 | |||
309 | /** | ||
310 | * GNUNET_YES, if multi strides have been added to the Automaton. | ||
311 | */ | ||
312 | int is_multistrided; | ||
313 | }; | ||
314 | |||
315 | |||
316 | /** | ||
317 | * Construct an NFA by parsing the regex string of length 'len'. | ||
318 | * | ||
319 | * @param regex regular expression string. | ||
320 | * @param len length of the string. | ||
321 | * | ||
322 | * @return NFA, needs to be freed using REGEX_INTERNAL_automaton_destroy. | ||
323 | */ | ||
324 | struct REGEX_INTERNAL_Automaton * | ||
325 | REGEX_INTERNAL_construct_nfa (const char *regex, const size_t len); | ||
326 | |||
327 | |||
328 | /** | ||
329 | * Function that get's passed to automaton traversal and is called before each | ||
330 | * next traversal from state 's' using transition 't' to check if traversal | ||
331 | * should proceed. Return GNUNET_NO to stop traversal or GNUNET_YES to continue. | ||
332 | * | ||
333 | * @param cls closure for the check. | ||
334 | * @param s current state in the traversal. | ||
335 | * @param t current transition from state 's' that will be used for the next | ||
336 | * step. | ||
337 | * | ||
338 | * @return GNUNET_YES to proceed traversal, GNUNET_NO to stop. | ||
339 | */ | ||
340 | typedef int (*REGEX_INTERNAL_traverse_check) (void *cls, | ||
341 | struct REGEX_INTERNAL_State * s, | ||
342 | struct REGEX_INTERNAL_Transition * t); | ||
343 | |||
344 | |||
345 | /** | ||
346 | * Function that is called with each state, when traversing an automaton. | ||
347 | * | ||
348 | * @param cls closure. | ||
349 | * @param count current count of the state, from 0 to a->state_count -1. | ||
350 | * @param s state. | ||
351 | */ | ||
352 | typedef void (*REGEX_INTERNAL_traverse_action) (void *cls, | ||
353 | const unsigned int count, | ||
354 | struct REGEX_INTERNAL_State * s); | ||
355 | |||
356 | |||
357 | /** | ||
358 | * Traverses the given automaton using depth-first-search (DFS) from it's start | ||
359 | * state, visiting all reachable states and calling 'action' on each one of | ||
360 | * them. | ||
361 | * | ||
362 | * @param a automaton to be traversed. | ||
363 | * @param start start state, pass a->start or NULL to traverse the whole automaton. | ||
364 | * @param check function that is checked before advancing on each transition | ||
365 | * in the DFS. | ||
366 | * @param check_cls closure for check. | ||
367 | * @param action action to be performed on each state. | ||
368 | * @param action_cls closure for action | ||
369 | */ | ||
370 | void | ||
371 | REGEX_INTERNAL_automaton_traverse (const struct REGEX_INTERNAL_Automaton *a, | ||
372 | struct REGEX_INTERNAL_State *start, | ||
373 | REGEX_INTERNAL_traverse_check check, | ||
374 | void *check_cls, | ||
375 | REGEX_INTERNAL_traverse_action action, | ||
376 | void *action_cls); | ||
377 | |||
378 | /** | ||
379 | * Get the canonical regex of the given automaton. | ||
380 | * When constructing the automaton a proof is computed for each state, | ||
381 | * consisting of the regular expression leading to this state. A complete | ||
382 | * regex for the automaton can be computed by combining these proofs. | ||
383 | * As of now this function is only useful for testing. | ||
384 | * | ||
385 | * @param a automaton for which the canonical regex should be returned. | ||
386 | * | ||
387 | * @return canonical regex string. | ||
388 | */ | ||
389 | const char * | ||
390 | REGEX_INTERNAL_get_canonical_regex (struct REGEX_INTERNAL_Automaton *a); | ||
391 | |||
392 | |||
393 | /** | ||
394 | * Get the number of transitions that are contained in the given automaton. | ||
395 | * | ||
396 | * @param a automaton for which the number of transitions should be returned. | ||
397 | * | ||
398 | * @return number of transitions in the given automaton. | ||
399 | */ | ||
400 | unsigned int | ||
401 | REGEX_INTERNAL_get_transition_count (struct REGEX_INTERNAL_Automaton *a); | ||
402 | |||
403 | |||
404 | /** | ||
405 | * Context that contains an id counter for states and transitions as well as a | ||
406 | * DLL of automatons used as a stack for NFA construction. | ||
407 | */ | ||
408 | struct REGEX_INTERNAL_Context | ||
409 | { | ||
410 | /** | ||
411 | * Unique state id. | ||
412 | */ | ||
413 | unsigned int state_id; | ||
414 | |||
415 | /** | ||
416 | * Unique transition id. | ||
417 | */ | ||
418 | unsigned int transition_id; | ||
419 | |||
420 | /** | ||
421 | * DLL of REGEX_INTERNAL_Automaton's used as a stack. | ||
422 | */ | ||
423 | struct REGEX_INTERNAL_Automaton *stack_head; | ||
424 | |||
425 | /** | ||
426 | * DLL of REGEX_INTERNAL_Automaton's used as a stack. | ||
427 | */ | ||
428 | struct REGEX_INTERNAL_Automaton *stack_tail; | ||
429 | }; | ||
430 | |||
431 | |||
432 | /** | ||
433 | * Adds multi-strided transitions to the given 'dfa'. | ||
434 | * | ||
435 | * @param regex_ctx regex context needed to add transitions to the automaton. | ||
436 | * @param dfa DFA to which the multi strided transitions should be added. | ||
437 | * @param stride_len length of the strides. | ||
438 | */ | ||
439 | void | ||
440 | REGEX_INTERNAL_dfa_add_multi_strides (struct REGEX_INTERNAL_Context *regex_ctx, | ||
441 | struct REGEX_INTERNAL_Automaton *dfa, | ||
442 | const unsigned int stride_len); | ||
443 | |||
444 | |||
445 | |||
446 | #if 0 /* keep Emacsens' auto-indent happy */ | ||
447 | { | ||
448 | #endif | ||
449 | #ifdef __cplusplus | ||
450 | } | ||
451 | #endif | ||
452 | |||
453 | #endif | ||