diff options
author | Maximilian Szengel <gnunet@maxsz.de> | 2012-07-04 13:54:43 +0000 |
---|---|---|
committer | Maximilian Szengel <gnunet@maxsz.de> | 2012-07-04 13:54:43 +0000 |
commit | bd0e2cd49ae9ae916f6a4288ac8893128b8168d6 (patch) | |
tree | c680e35f3d2bb3cffb114808bd19f2299885af50 /src | |
parent | a93954693fef730d7a41c168b4961d19e5dff90c (diff) | |
download | gnunet-bd0e2cd49ae9ae916f6a4288ac8893128b8168d6.tar.gz gnunet-bd0e2cd49ae9ae916f6a4288ac8893128b8168d6.zip |
Summary: regex cleanup and bugfixes
Author: szengel
Diffstat (limited to 'src')
-rw-r--r-- | src/regex/Makefile.am | 2 | ||||
-rw-r--r-- | src/regex/regex.c | 579 | ||||
-rw-r--r-- | src/regex/regex_graph.c | 248 | ||||
-rw-r--r-- | src/regex/regex_internal.h | 235 | ||||
-rw-r--r-- | src/regex/test_regex_eval_api.c | 10 | ||||
-rw-r--r-- | src/regex/test_regex_proofs.c | 17 |
6 files changed, 595 insertions, 496 deletions
diff --git a/src/regex/Makefile.am b/src/regex/Makefile.am index 1284111d8..36062272e 100644 --- a/src/regex/Makefile.am +++ b/src/regex/Makefile.am | |||
@@ -12,7 +12,7 @@ lib_LTLIBRARIES = libgnunetregex.la | |||
12 | 12 | ||
13 | libgnunetregex_la_SOURCES = \ | 13 | libgnunetregex_la_SOURCES = \ |
14 | regex_internal.h regex.c \ | 14 | regex_internal.h regex.c \ |
15 | regex_random.c | 15 | regex_graph.c regex_random.c |
16 | libgnunetregex_la_LIBADD = -lm \ | 16 | libgnunetregex_la_LIBADD = -lm \ |
17 | $(top_builddir)/src/util/libgnunetutil.la | 17 | $(top_builddir)/src/util/libgnunetutil.la |
18 | libgnunetregex_la_LDFLAGS = \ | 18 | libgnunetregex_la_LDFLAGS = \ |
diff --git a/src/regex/regex.c b/src/regex/regex.c index a7ee9d9a8..a1619ef28 100644 --- a/src/regex/regex.c +++ b/src/regex/regex.c | |||
@@ -28,11 +28,13 @@ | |||
28 | #include "gnunet_regex_lib.h" | 28 | #include "gnunet_regex_lib.h" |
29 | #include "regex_internal.h" | 29 | #include "regex_internal.h" |
30 | 30 | ||
31 | |||
31 | /** | 32 | /** |
32 | * Constant for how many bits the initial string regex should have. | 33 | * Constant for how many bits the initial string regex should have. |
33 | */ | 34 | */ |
34 | #define INITIAL_BITS 10 | 35 | #define INITIAL_BITS 10 |
35 | 36 | ||
37 | |||
36 | /** | 38 | /** |
37 | * Context that contains an id counter for states and transitions as well as a | 39 | * Context that contains an id counter for states and transitions as well as a |
38 | * DLL of automatons used as a stack for NFA construction. | 40 | * DLL of automatons used as a stack for NFA construction. |
@@ -60,211 +62,6 @@ struct GNUNET_REGEX_Context | |||
60 | struct GNUNET_REGEX_Automaton *stack_tail; | 62 | struct GNUNET_REGEX_Automaton *stack_tail; |
61 | }; | 63 | }; |
62 | 64 | ||
63 | /** | ||
64 | * Type of an automaton. | ||
65 | */ | ||
66 | enum GNUNET_REGEX_AutomatonType | ||
67 | { | ||
68 | NFA, | ||
69 | DFA | ||
70 | }; | ||
71 | |||
72 | /** | ||
73 | * Automaton representation. | ||
74 | */ | ||
75 | struct GNUNET_REGEX_Automaton | ||
76 | { | ||
77 | /** | ||
78 | * Linked list of NFAs used for partial NFA creation. | ||
79 | */ | ||
80 | struct GNUNET_REGEX_Automaton *prev; | ||
81 | |||
82 | /** | ||
83 | * Linked list of NFAs used for partial NFA creation. | ||
84 | */ | ||
85 | struct GNUNET_REGEX_Automaton *next; | ||
86 | |||
87 | /** | ||
88 | * First state of the automaton. This is mainly used for constructing an NFA, | ||
89 | * where each NFA itself consists of one or more NFAs linked together. | ||
90 | */ | ||
91 | struct GNUNET_REGEX_State *start; | ||
92 | |||
93 | /** | ||
94 | * End state of the partial NFA. This is undefined for DFAs | ||
95 | */ | ||
96 | struct GNUNET_REGEX_State *end; | ||
97 | |||
98 | /** | ||
99 | * Number of states in the automaton. | ||
100 | */ | ||
101 | unsigned int state_count; | ||
102 | |||
103 | /** | ||
104 | * DLL of states. | ||
105 | */ | ||
106 | struct GNUNET_REGEX_State *states_head; | ||
107 | |||
108 | /** | ||
109 | * DLL of states | ||
110 | */ | ||
111 | struct GNUNET_REGEX_State *states_tail; | ||
112 | |||
113 | /** | ||
114 | * Type of the automaton. | ||
115 | */ | ||
116 | enum GNUNET_REGEX_AutomatonType type; | ||
117 | |||
118 | /** | ||
119 | * Regex | ||
120 | */ | ||
121 | char *regex; | ||
122 | |||
123 | /** | ||
124 | * Canonical regex (result of RX->NFA->DFA->RX) | ||
125 | */ | ||
126 | char *canonical_regex; | ||
127 | }; | ||
128 | |||
129 | /** | ||
130 | * A state. Can be used in DFA and NFA automatons. | ||
131 | */ | ||
132 | struct GNUNET_REGEX_State | ||
133 | { | ||
134 | /** | ||
135 | * This is a linked list. | ||
136 | */ | ||
137 | struct GNUNET_REGEX_State *prev; | ||
138 | |||
139 | /** | ||
140 | * This is a linked list. | ||
141 | */ | ||
142 | struct GNUNET_REGEX_State *next; | ||
143 | |||
144 | /** | ||
145 | * Unique state id. | ||
146 | */ | ||
147 | unsigned int id; | ||
148 | |||
149 | /** | ||
150 | * If this is an accepting state or not. | ||
151 | */ | ||
152 | int accepting; | ||
153 | |||
154 | /** | ||
155 | * Marking of the state. This is used for marking all visited states when | ||
156 | * traversing all states of an automaton and for cases where the state id | ||
157 | * cannot be used (dfa minimization). | ||
158 | */ | ||
159 | int marked; | ||
160 | |||
161 | /** | ||
162 | * Marking the state as contained. This is used for checking, if the state is | ||
163 | * contained in a set in constant time | ||
164 | */ | ||
165 | int contained; | ||
166 | |||
167 | /** | ||
168 | * Marking the state as part of an SCC (Strongly Connected Component). All | ||
169 | * states with the same scc_id are part of the same SCC. scc_id is 0, if state | ||
170 | * is not a part of any SCC. | ||
171 | */ | ||
172 | unsigned int scc_id; | ||
173 | |||
174 | /** | ||
175 | * Used for SCC detection. | ||
176 | */ | ||
177 | int index; | ||
178 | |||
179 | /** | ||
180 | * Used for SCC detection. | ||
181 | */ | ||
182 | int lowlink; | ||
183 | |||
184 | /** | ||
185 | * Human readable name of the automaton. Used for debugging and graph | ||
186 | * creation. | ||
187 | */ | ||
188 | char *name; | ||
189 | |||
190 | /** | ||
191 | * Hash of the state. | ||
192 | */ | ||
193 | struct GNUNET_HashCode hash; | ||
194 | |||
195 | /** | ||
196 | * State ID for proof creation. | ||
197 | */ | ||
198 | unsigned int proof_id; | ||
199 | |||
200 | /** | ||
201 | * Proof for this state. | ||
202 | */ | ||
203 | char *proof; | ||
204 | |||
205 | /** | ||
206 | * Number of transitions from this state to other states. | ||
207 | */ | ||
208 | unsigned int transition_count; | ||
209 | |||
210 | /** | ||
211 | * DLL of transitions. | ||
212 | */ | ||
213 | struct Transition *transitions_head; | ||
214 | |||
215 | /** | ||
216 | * DLL of transitions. | ||
217 | */ | ||
218 | struct Transition *transitions_tail; | ||
219 | |||
220 | /** | ||
221 | * Set of states on which this state is based on. Used when creating a DFA out | ||
222 | * of several NFA states. | ||
223 | */ | ||
224 | struct GNUNET_REGEX_StateSet *nfa_set; | ||
225 | }; | ||
226 | |||
227 | /** | ||
228 | * Transition between two states. Each state can have 0-n transitions. If label | ||
229 | * is 0, this is considered to be an epsilon transition. | ||
230 | */ | ||
231 | struct Transition | ||
232 | { | ||
233 | /** | ||
234 | * This is a linked list. | ||
235 | */ | ||
236 | struct Transition *prev; | ||
237 | |||
238 | /** | ||
239 | * This is a linked list. | ||
240 | */ | ||
241 | struct Transition *next; | ||
242 | |||
243 | /** | ||
244 | * Unique id of this transition. | ||
245 | */ | ||
246 | unsigned int id; | ||
247 | |||
248 | /** | ||
249 | * Label for this transition. This is basically the edge label for the graph. | ||
250 | */ | ||
251 | char label; | ||
252 | |||
253 | /** | ||
254 | * State to which this transition leads. | ||
255 | */ | ||
256 | struct GNUNET_REGEX_State *to_state; | ||
257 | |||
258 | /** | ||
259 | * State from which this transition origins. | ||
260 | */ | ||
261 | struct GNUNET_REGEX_State *from_state; | ||
262 | |||
263 | /** | ||
264 | * Mark this transition. For example when reversing the automaton. | ||
265 | */ | ||
266 | int mark; | ||
267 | }; | ||
268 | 65 | ||
269 | /** | 66 | /** |
270 | * Set of states. | 67 | * Set of states. |
@@ -282,6 +79,7 @@ struct GNUNET_REGEX_StateSet | |||
282 | unsigned int len; | 79 | unsigned int len; |
283 | }; | 80 | }; |
284 | 81 | ||
82 | |||
285 | /* | 83 | /* |
286 | * Debug helper functions | 84 | * Debug helper functions |
287 | */ | 85 | */ |
@@ -294,6 +92,7 @@ struct GNUNET_REGEX_StateSet | |||
294 | void | 92 | void |
295 | debug_print_transitions (struct GNUNET_REGEX_State *s); | 93 | debug_print_transitions (struct GNUNET_REGEX_State *s); |
296 | 94 | ||
95 | |||
297 | /** | 96 | /** |
298 | * Print information of the given state 's'. | 97 | * Print information of the given state 's'. |
299 | * | 98 | * |
@@ -318,6 +117,7 @@ debug_print_state (struct GNUNET_REGEX_State *s) | |||
318 | debug_print_transitions (s); | 117 | debug_print_transitions (s); |
319 | } | 118 | } |
320 | 119 | ||
120 | |||
321 | /** | 121 | /** |
322 | * Print debug information for all states contained in the automaton 'a'. | 122 | * Print debug information for all states contained in the automaton 'a'. |
323 | * | 123 | * |
@@ -332,13 +132,14 @@ debug_print_states (struct GNUNET_REGEX_Automaton *a) | |||
332 | debug_print_state (s); | 132 | debug_print_state (s); |
333 | } | 133 | } |
334 | 134 | ||
135 | |||
335 | /** | 136 | /** |
336 | * Print debug information for given transition 't'. | 137 | * Print debug information for given transition 't'. |
337 | * | 138 | * |
338 | * @param t transition for which to print debug info. | 139 | * @param t transition for which to print debug info. |
339 | */ | 140 | */ |
340 | void | 141 | void |
341 | debug_print_transition (struct Transition *t) | 142 | debug_print_transition (struct GNUNET_REGEX_Transition *t) |
342 | { | 143 | { |
343 | char *to_state; | 144 | char *to_state; |
344 | char *from_state; | 145 | char *from_state; |
@@ -366,10 +167,11 @@ debug_print_transition (struct Transition *t) | |||
366 | t->id, from_state, label, to_state); | 167 | t->id, from_state, label, to_state); |
367 | } | 168 | } |
368 | 169 | ||
170 | |||
369 | void | 171 | void |
370 | debug_print_transitions (struct GNUNET_REGEX_State *s) | 172 | debug_print_transitions (struct GNUNET_REGEX_State *s) |
371 | { | 173 | { |
372 | struct Transition *t; | 174 | struct GNUNET_REGEX_Transition *t; |
373 | 175 | ||
374 | for (t = s->transitions_head; NULL != t; t = t->next) | 176 | for (t = s->transitions_head; NULL != t; t = t->next) |
375 | debug_print_transition (t); | 177 | debug_print_transition (t); |
@@ -377,97 +179,6 @@ debug_print_transitions (struct GNUNET_REGEX_State *s) | |||
377 | 179 | ||
378 | 180 | ||
379 | /** | 181 | /** |
380 | * Recursive function doing DFS with 'v' as a start, detecting all SCCs inside | ||
381 | * the subgraph reachable from 'v'. Used with scc_tarjan function to detect all | ||
382 | * SCCs inside an automaton. | ||
383 | * | ||
384 | * @param scc_counter counter for numbering the sccs | ||
385 | * @param v start vertex | ||
386 | * @param index current index | ||
387 | * @param stack stack for saving all SCCs | ||
388 | * @param stack_size current size of the stack | ||
389 | */ | ||
390 | static void | ||
391 | scc_tarjan_strongconnect (unsigned int *scc_counter, | ||
392 | struct GNUNET_REGEX_State *v, unsigned int *index, | ||
393 | struct GNUNET_REGEX_State **stack, | ||
394 | unsigned int *stack_size) | ||
395 | { | ||
396 | struct GNUNET_REGEX_State *w; | ||
397 | struct Transition *t; | ||
398 | |||
399 | v->index = *index; | ||
400 | v->lowlink = *index; | ||
401 | (*index)++; | ||
402 | stack[(*stack_size)++] = v; | ||
403 | v->contained = 1; | ||
404 | |||
405 | for (t = v->transitions_head; NULL != t; t = t->next) | ||
406 | { | ||
407 | w = t->to_state; | ||
408 | if (NULL != w && w->index < 0) | ||
409 | { | ||
410 | scc_tarjan_strongconnect (scc_counter, w, index, stack, stack_size); | ||
411 | v->lowlink = (v->lowlink > w->lowlink) ? w->lowlink : v->lowlink; | ||
412 | } | ||
413 | else if (0 != w->contained) | ||
414 | v->lowlink = (v->lowlink > w->index) ? w->index : v->lowlink; | ||
415 | } | ||
416 | |||
417 | if (v->lowlink == v->index) | ||
418 | { | ||
419 | w = stack[--(*stack_size)]; | ||
420 | w->contained = 0; | ||
421 | |||
422 | if (v != w) | ||
423 | { | ||
424 | (*scc_counter)++; | ||
425 | while (v != w) | ||
426 | { | ||
427 | w->scc_id = *scc_counter; | ||
428 | w = stack[--(*stack_size)]; | ||
429 | w->contained = 0; | ||
430 | } | ||
431 | w->scc_id = *scc_counter; | ||
432 | } | ||
433 | } | ||
434 | } | ||
435 | |||
436 | |||
437 | /** | ||
438 | * Detect all SCCs (Strongly Connected Components) inside the given automaton. | ||
439 | * SCCs will be marked using the scc_id on each state. | ||
440 | * | ||
441 | * @param a the automaton for which SCCs should be computed and assigned. | ||
442 | */ | ||
443 | static void | ||
444 | scc_tarjan (struct GNUNET_REGEX_Automaton *a) | ||
445 | { | ||
446 | unsigned int index; | ||
447 | unsigned int scc_counter; | ||
448 | struct GNUNET_REGEX_State *v; | ||
449 | struct GNUNET_REGEX_State *stack[a->state_count]; | ||
450 | unsigned int stack_size; | ||
451 | |||
452 | for (v = a->states_head; NULL != v; v = v->next) | ||
453 | { | ||
454 | v->contained = 0; | ||
455 | v->index = -1; | ||
456 | v->lowlink = -1; | ||
457 | } | ||
458 | |||
459 | stack_size = 0; | ||
460 | index = 0; | ||
461 | scc_counter = 0; | ||
462 | |||
463 | for (v = a->states_head; NULL != v; v = v->next) | ||
464 | { | ||
465 | if (v->index < 0) | ||
466 | scc_tarjan_strongconnect (&scc_counter, v, &index, stack, &stack_size); | ||
467 | } | ||
468 | } | ||
469 | |||
470 | /** | ||
471 | * Adds a transition from one state to another on 'label'. Does not add | 182 | * Adds a transition from one state to another on 'label'. Does not add |
472 | * duplicate states. | 183 | * duplicate states. |
473 | * | 184 | * |
@@ -482,8 +193,8 @@ state_add_transition (struct GNUNET_REGEX_Context *ctx, | |||
482 | struct GNUNET_REGEX_State *to_state) | 193 | struct GNUNET_REGEX_State *to_state) |
483 | { | 194 | { |
484 | int is_dup; | 195 | int is_dup; |
485 | struct Transition *t; | 196 | struct GNUNET_REGEX_Transition *t; |
486 | struct Transition *oth; | 197 | struct GNUNET_REGEX_Transition *oth; |
487 | 198 | ||
488 | if (NULL == from_state) | 199 | if (NULL == from_state) |
489 | { | 200 | { |
@@ -513,7 +224,7 @@ state_add_transition (struct GNUNET_REGEX_Context *ctx, | |||
513 | break; | 224 | break; |
514 | } | 225 | } |
515 | 226 | ||
516 | t = GNUNET_malloc (sizeof (struct Transition)); | 227 | t = GNUNET_malloc (sizeof (struct GNUNET_REGEX_Transition)); |
517 | t->id = ctx->transition_id++; | 228 | t->id = ctx->transition_id++; |
518 | t->label = label; | 229 | t->label = label; |
519 | t->to_state = to_state; | 230 | t->to_state = to_state; |
@@ -525,14 +236,16 @@ state_add_transition (struct GNUNET_REGEX_Context *ctx, | |||
525 | from_state->transitions_tail, oth, t); | 236 | from_state->transitions_tail, oth, t); |
526 | } | 237 | } |
527 | 238 | ||
528 | /** | 239 | |
240 | /** | ||
529 | * Remove a 'transition' from 'state'. | 241 | * Remove a 'transition' from 'state'. |
530 | * | 242 | * |
531 | * @param state state from which the to-be-removed transition originates. | 243 | * @param state state from which the to-be-removed transition originates. |
532 | * @param transition transition that should be removed from state 'state'. | 244 | * @param transition transition that should be removed from state 'state'. |
533 | */ | 245 | */ |
534 | static void | 246 | static void |
535 | state_remove_transition (struct GNUNET_REGEX_State *state, struct Transition *transition) | 247 | state_remove_transition (struct GNUNET_REGEX_State *state, |
248 | struct GNUNET_REGEX_Transition *transition) | ||
536 | { | 249 | { |
537 | if (NULL == state || NULL == transition) | 250 | if (NULL == state || NULL == transition) |
538 | return; | 251 | return; |
@@ -541,10 +254,12 @@ state_remove_transition (struct GNUNET_REGEX_State *state, struct Transition *tr | |||
541 | return; | 254 | return; |
542 | 255 | ||
543 | state->transition_count--; | 256 | state->transition_count--; |
544 | GNUNET_CONTAINER_DLL_remove (state->transitions_head, state->transitions_tail, transition); | 257 | GNUNET_CONTAINER_DLL_remove (state->transitions_head, state->transitions_tail, |
258 | transition); | ||
545 | GNUNET_free (transition); | 259 | GNUNET_free (transition); |
546 | } | 260 | } |
547 | 261 | ||
262 | |||
548 | /** | 263 | /** |
549 | * Compare two states. Used for sorting. | 264 | * Compare two states. Used for sorting. |
550 | * | 265 | * |
@@ -567,6 +282,7 @@ state_compare (const void *a, const void *b) | |||
567 | return (*s1)->id - (*s2)->id; | 282 | return (*s1)->id - (*s2)->id; |
568 | } | 283 | } |
569 | 284 | ||
285 | |||
570 | /** | 286 | /** |
571 | * Get all edges leaving state 's'. | 287 | * Get all edges leaving state 's'. |
572 | * | 288 | * |
@@ -578,7 +294,7 @@ state_compare (const void *a, const void *b) | |||
578 | static unsigned int | 294 | static unsigned int |
579 | state_get_edges (struct GNUNET_REGEX_State *s, struct GNUNET_REGEX_Edge *edges) | 295 | state_get_edges (struct GNUNET_REGEX_State *s, struct GNUNET_REGEX_Edge *edges) |
580 | { | 296 | { |
581 | struct Transition *t; | 297 | struct GNUNET_REGEX_Transition *t; |
582 | unsigned int count; | 298 | unsigned int count; |
583 | 299 | ||
584 | if (NULL == s) | 300 | if (NULL == s) |
@@ -598,6 +314,7 @@ state_get_edges (struct GNUNET_REGEX_State *s, struct GNUNET_REGEX_Edge *edges) | |||
598 | return count; | 314 | return count; |
599 | } | 315 | } |
600 | 316 | ||
317 | |||
601 | /** | 318 | /** |
602 | * Compare to state sets by comparing the id's of the states that are contained | 319 | * Compare to state sets by comparing the id's of the states that are contained |
603 | * in each set. Both sets are expected to be sorted by id! | 320 | * in each set. Both sets are expected to be sorted by id! |
@@ -631,6 +348,7 @@ state_set_compare (struct GNUNET_REGEX_StateSet *sset1, | |||
631 | return result; | 348 | return result; |
632 | } | 349 | } |
633 | 350 | ||
351 | |||
634 | /** | 352 | /** |
635 | * Clears the given StateSet 'set' | 353 | * Clears the given StateSet 'set' |
636 | * | 354 | * |
@@ -646,6 +364,7 @@ state_set_clear (struct GNUNET_REGEX_StateSet *set) | |||
646 | } | 364 | } |
647 | } | 365 | } |
648 | 366 | ||
367 | |||
649 | /** | 368 | /** |
650 | * Clears an automaton fragment. Does not destroy the states inside the | 369 | * Clears an automaton fragment. Does not destroy the states inside the |
651 | * automaton. | 370 | * automaton. |
@@ -666,6 +385,7 @@ automaton_fragment_clear (struct GNUNET_REGEX_Automaton *a) | |||
666 | GNUNET_free (a); | 385 | GNUNET_free (a); |
667 | } | 386 | } |
668 | 387 | ||
388 | |||
669 | /** | 389 | /** |
670 | * Frees the memory used by State 's' | 390 | * Frees the memory used by State 's' |
671 | * | 391 | * |
@@ -674,8 +394,8 @@ automaton_fragment_clear (struct GNUNET_REGEX_Automaton *a) | |||
674 | static void | 394 | static void |
675 | automaton_destroy_state (struct GNUNET_REGEX_State *s) | 395 | automaton_destroy_state (struct GNUNET_REGEX_State *s) |
676 | { | 396 | { |
677 | struct Transition *t; | 397 | struct GNUNET_REGEX_Transition *t; |
678 | struct Transition *next_t; | 398 | struct GNUNET_REGEX_Transition *next_t; |
679 | 399 | ||
680 | if (NULL == s) | 400 | if (NULL == s) |
681 | return; | 401 | return; |
@@ -695,6 +415,7 @@ automaton_destroy_state (struct GNUNET_REGEX_State *s) | |||
695 | GNUNET_free (s); | 415 | GNUNET_free (s); |
696 | } | 416 | } |
697 | 417 | ||
418 | |||
698 | /** | 419 | /** |
699 | * Remove a state from the given automaton 'a'. Always use this function when | 420 | * Remove a state from the given automaton 'a'. Always use this function when |
700 | * altering the states of an automaton. Will also remove all transitions leading | 421 | * altering the states of an automaton. Will also remove all transitions leading |
@@ -709,7 +430,7 @@ automaton_remove_state (struct GNUNET_REGEX_Automaton *a, | |||
709 | { | 430 | { |
710 | struct GNUNET_REGEX_State *ss; | 431 | struct GNUNET_REGEX_State *ss; |
711 | struct GNUNET_REGEX_State *s_check; | 432 | struct GNUNET_REGEX_State *s_check; |
712 | struct Transition *t_check; | 433 | struct GNUNET_REGEX_Transition *t_check; |
713 | 434 | ||
714 | if (NULL == a || NULL == s) | 435 | if (NULL == a || NULL == s) |
715 | return; | 436 | return; |
@@ -737,6 +458,7 @@ automaton_remove_state (struct GNUNET_REGEX_Automaton *a, | |||
737 | automaton_destroy_state (ss); | 458 | automaton_destroy_state (ss); |
738 | } | 459 | } |
739 | 460 | ||
461 | |||
740 | /** | 462 | /** |
741 | * Merge two states into one. Will merge 's1' and 's2' into 's1' and destroy | 463 | * Merge two states into one. Will merge 's1' and 's2' into 's1' and destroy |
742 | * 's2'. | 464 | * 's2'. |
@@ -753,9 +475,9 @@ automaton_merge_states (struct GNUNET_REGEX_Context *ctx, | |||
753 | struct GNUNET_REGEX_State *s2) | 475 | struct GNUNET_REGEX_State *s2) |
754 | { | 476 | { |
755 | struct GNUNET_REGEX_State *s_check; | 477 | struct GNUNET_REGEX_State *s_check; |
756 | struct Transition *t_check; | 478 | struct GNUNET_REGEX_Transition *t_check; |
757 | struct Transition *t; | 479 | struct GNUNET_REGEX_Transition *t; |
758 | struct Transition *t_next; | 480 | struct GNUNET_REGEX_Transition *t_next; |
759 | char *new_name; | 481 | char *new_name; |
760 | int is_dup; | 482 | int is_dup; |
761 | 483 | ||
@@ -806,6 +528,7 @@ automaton_merge_states (struct GNUNET_REGEX_Context *ctx, | |||
806 | automaton_destroy_state (s2); | 528 | automaton_destroy_state (s2); |
807 | } | 529 | } |
808 | 530 | ||
531 | |||
809 | /** | 532 | /** |
810 | * Add a state to the automaton 'a', always use this function to alter the | 533 | * Add a state to the automaton 'a', always use this function to alter the |
811 | * states DLL of the automaton. | 534 | * states DLL of the automaton. |
@@ -821,15 +544,6 @@ automaton_add_state (struct GNUNET_REGEX_Automaton *a, | |||
821 | a->state_count++; | 544 | a->state_count++; |
822 | } | 545 | } |
823 | 546 | ||
824 | /** | ||
825 | * Function that is called with each state, when traversing an automaton. | ||
826 | * | ||
827 | * @param cls closure. | ||
828 | * @param count current count of the state, from 0 to a->state_count -1. | ||
829 | * @param s state. | ||
830 | */ | ||
831 | typedef void (*GNUNET_REGEX_traverse_action) (void *cls, unsigned int count, | ||
832 | struct GNUNET_REGEX_State * s); | ||
833 | 547 | ||
834 | /** | 548 | /** |
835 | * Depth-first traversal of all states that are reachable from state 's'. Expects the states to | 549 | * Depth-first traversal of all states that are reachable from state 's'. Expects the states to |
@@ -845,7 +559,7 @@ static void | |||
845 | automaton_state_traverse (struct GNUNET_REGEX_State *s, unsigned int *count, | 559 | automaton_state_traverse (struct GNUNET_REGEX_State *s, unsigned int *count, |
846 | GNUNET_REGEX_traverse_action action, void *action_cls) | 560 | GNUNET_REGEX_traverse_action action, void *action_cls) |
847 | { | 561 | { |
848 | struct Transition *t; | 562 | struct GNUNET_REGEX_Transition *t; |
849 | 563 | ||
850 | if (GNUNET_NO != s->marked) | 564 | if (GNUNET_NO != s->marked) |
851 | return; | 565 | return; |
@@ -866,9 +580,10 @@ automaton_state_traverse (struct GNUNET_REGEX_State *s, unsigned int *count, | |||
866 | * @param action action to be performed on each state. | 580 | * @param action action to be performed on each state. |
867 | * @param action_cls closure for action | 581 | * @param action_cls closure for action |
868 | */ | 582 | */ |
869 | static void | 583 | void |
870 | automaton_traverse (struct GNUNET_REGEX_Automaton *a, | 584 | GNUNET_REGEX_automaton_traverse (struct GNUNET_REGEX_Automaton *a, |
871 | GNUNET_REGEX_traverse_action action, void *action_cls) | 585 | GNUNET_REGEX_traverse_action action, |
586 | void *action_cls) | ||
872 | { | 587 | { |
873 | unsigned int count; | 588 | unsigned int count; |
874 | struct GNUNET_REGEX_State *s; | 589 | struct GNUNET_REGEX_State *s; |
@@ -884,9 +599,6 @@ automaton_traverse (struct GNUNET_REGEX_Automaton *a, | |||
884 | * Check if the given string 'str' needs parentheses around it when | 599 | * Check if the given string 'str' needs parentheses around it when |
885 | * using it to generate a regex. | 600 | * using it to generate a regex. |
886 | * | 601 | * |
887 | * Currently only tests for first and last characters being '()' respectively. | ||
888 | * FIXME: What about "(ab)|(cd)"? | ||
889 | * | ||
890 | * @param str string | 602 | * @param str string |
891 | * | 603 | * |
892 | * @return GNUNET_YES if parentheses are needed, GNUNET_NO otherwise | 604 | * @return GNUNET_YES if parentheses are needed, GNUNET_NO otherwise |
@@ -932,12 +644,9 @@ needs_parentheses (const char *str) | |||
932 | 644 | ||
933 | /** | 645 | /** |
934 | * Remove parentheses surrounding string 'str'. | 646 | * Remove parentheses surrounding string 'str'. |
935 | * Example: "(a)" becomes "a". | 647 | * Example: "(a)" becomes "a", "(a|b)|(a|c)" stays the same. |
936 | * You need to GNUNET_free the returned string. | 648 | * You need to GNUNET_free the returned string. |
937 | * | 649 | * |
938 | * Currently only tests for first and last characters being '()' respectively. | ||
939 | * FIXME: What about "(ab)|(cd)"? | ||
940 | * | ||
941 | * @param str string, free'd or re-used by this function, can be NULL | 650 | * @param str string, free'd or re-used by this function, can be NULL |
942 | * | 651 | * |
943 | * @return string without surrounding parentheses, string 'str' if no preceding | 652 | * @return string without surrounding parentheses, string 'str' if no preceding |
@@ -947,12 +656,18 @@ static char * | |||
947 | remove_parentheses (char *str) | 656 | remove_parentheses (char *str) |
948 | { | 657 | { |
949 | size_t slen; | 658 | size_t slen; |
659 | const char *pos; | ||
950 | 660 | ||
951 | if ((NULL == str) || ('(' != str[0]) || | 661 | if ((NULL == str) || ('(' != str[0]) || |
952 | (str[(slen = strlen (str)) - 1] != ')')) | 662 | (str[(slen = strlen (str)) - 1] != ')')) |
953 | return str; | 663 | return str; |
954 | memmove (str, &str[1], slen - 2); | 664 | |
955 | str[slen - 2] = '\0'; | 665 | pos = strchr (&str[1], ')'); |
666 | if (pos == &str[slen - 1]) | ||
667 | { | ||
668 | memmove (str, &str[1], slen - 2); | ||
669 | str[slen - 2] = '\0'; | ||
670 | } | ||
956 | return str; | 671 | return str; |
957 | } | 672 | } |
958 | 673 | ||
@@ -999,6 +714,7 @@ remove_epsilon (const char *str) | |||
999 | return GNUNET_strdup (str); | 714 | return GNUNET_strdup (str); |
1000 | } | 715 | } |
1001 | 716 | ||
717 | |||
1002 | /** | 718 | /** |
1003 | * Compare 'str1', starting from position 'k', with whole 'str2' | 719 | * Compare 'str1', starting from position 'k', with whole 'str2' |
1004 | * | 720 | * |
@@ -1034,8 +750,9 @@ nullstrcmp (const char *str1, const char *str2) | |||
1034 | return strcmp (str1, str2); | 750 | return strcmp (str1, str2); |
1035 | } | 751 | } |
1036 | 752 | ||
753 | |||
1037 | /** | 754 | /** |
1038 | * Helper function used as 'action' in 'automaton_traverse' function to create | 755 | * Helper function used as 'action' in 'GNUNET_REGEX_automaton_traverse' function to create |
1039 | * the depth-first numbering of the states. | 756 | * the depth-first numbering of the states. |
1040 | * | 757 | * |
1041 | * @param cls states array. | 758 | * @param cls states array. |
@@ -1051,11 +768,12 @@ number_states (void *cls, unsigned int count, struct GNUNET_REGEX_State *s) | |||
1051 | states[count] = s; | 768 | states[count] = s; |
1052 | } | 769 | } |
1053 | 770 | ||
1054 | /** | 771 | |
772 | /** | ||
1055 | * Construct the regular expression given the inductive step, | 773 | * Construct the regular expression given the inductive step, |
1056 | * $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* | 774 | * $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* |
1057 | * R^{(k-1)}_{kj}, and simplify the resulting expression saved in R_cur_ij. | 775 | * R^{(k-1)}_{kj}, and simplify the resulting expression saved in R_cur_ij. |
1058 | * | 776 | * |
1059 | * @param R_last_ij value of $R^{(k-1)_{ij}. | 777 | * @param R_last_ij value of $R^{(k-1)_{ij}. |
1060 | * @param R_last_ik value of $R^{(k-1)_{ik}. | 778 | * @param R_last_ik value of $R^{(k-1)_{ik}. |
1061 | * @param R_last_kk value of $R^{(k-1)_{kk}. | 779 | * @param R_last_kk value of $R^{(k-1)_{kk}. |
@@ -1390,7 +1108,7 @@ automaton_create_proofs_simplify (char *R_last_ij, char *R_last_ik, | |||
1390 | GNUNET_free_non_null (R_temp_ik); | 1108 | GNUNET_free_non_null (R_temp_ik); |
1391 | GNUNET_free_non_null (R_temp_kk); | 1109 | GNUNET_free_non_null (R_temp_kk); |
1392 | GNUNET_free_non_null (R_temp_kj); | 1110 | GNUNET_free_non_null (R_temp_kj); |
1393 | 1111 | ||
1394 | if (NULL == R_cur_l && NULL == R_cur_r) | 1112 | if (NULL == R_cur_l && NULL == R_cur_r) |
1395 | { | 1113 | { |
1396 | *R_cur_ij = NULL; | 1114 | *R_cur_ij = NULL; |
@@ -1417,10 +1135,12 @@ automaton_create_proofs_simplify (char *R_last_ij, char *R_last_ik, | |||
1417 | } | 1135 | } |
1418 | 1136 | ||
1419 | GNUNET_asprintf (R_cur_ij, "(%s|%s)", R_cur_l, R_cur_r); | 1137 | GNUNET_asprintf (R_cur_ij, "(%s|%s)", R_cur_l, R_cur_r); |
1138 | |||
1420 | GNUNET_free (R_cur_l); | 1139 | GNUNET_free (R_cur_l); |
1421 | GNUNET_free (R_cur_r); | 1140 | GNUNET_free (R_cur_r); |
1422 | } | 1141 | } |
1423 | 1142 | ||
1143 | |||
1424 | /** | 1144 | /** |
1425 | * create proofs for all states in the given automaton. Implementation of the | 1145 | * create proofs for all states in the given automaton. Implementation of the |
1426 | * algorithm descriped in chapter 3.2.1 of "Automata Theory, Languages, and | 1146 | * algorithm descriped in chapter 3.2.1 of "Automata Theory, Languages, and |
@@ -1436,7 +1156,7 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a) | |||
1436 | char *R_last[n][n]; | 1156 | char *R_last[n][n]; |
1437 | char *R_cur[n][n]; | 1157 | char *R_cur[n][n]; |
1438 | char *temp; | 1158 | char *temp; |
1439 | struct Transition *t; | 1159 | struct GNUNET_REGEX_Transition *t; |
1440 | char *complete_regex; | 1160 | char *complete_regex; |
1441 | unsigned int i; | 1161 | unsigned int i; |
1442 | unsigned int j; | 1162 | unsigned int j; |
@@ -1444,7 +1164,7 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a) | |||
1444 | 1164 | ||
1445 | 1165 | ||
1446 | /* create depth-first numbering of the states, initializes 'state' */ | 1166 | /* create depth-first numbering of the states, initializes 'state' */ |
1447 | automaton_traverse (a, &number_states, states); | 1167 | GNUNET_REGEX_automaton_traverse (a, &number_states, states); |
1448 | 1168 | ||
1449 | /* Compute regular expressions of length "1" between each pair of states */ | 1169 | /* Compute regular expressions of length "1" between each pair of states */ |
1450 | for (i = 0; i < n; i++) | 1170 | for (i = 0; i < n; i++) |
@@ -1547,13 +1267,6 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a) | |||
1547 | } | 1267 | } |
1548 | a->canonical_regex = complete_regex; | 1268 | a->canonical_regex = complete_regex; |
1549 | 1269 | ||
1550 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, | ||
1551 | "---------------------------------------------\n"); | ||
1552 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Regex: %s\n", a->regex); | ||
1553 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Complete Regex: %s\n", complete_regex); | ||
1554 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, | ||
1555 | "---------------------------------------------\n"); | ||
1556 | |||
1557 | // cleanup | 1270 | // cleanup |
1558 | for (i = 0; i < n; i++) | 1271 | for (i = 0; i < n; i++) |
1559 | { | 1272 | { |
@@ -1562,6 +1275,7 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a) | |||
1562 | } | 1275 | } |
1563 | } | 1276 | } |
1564 | 1277 | ||
1278 | |||
1565 | /** | 1279 | /** |
1566 | * Creates a new DFA state based on a set of NFA states. Needs to be freed using | 1280 | * Creates a new DFA state based on a set of NFA states. Needs to be freed using |
1567 | * automaton_destroy_state. | 1281 | * automaton_destroy_state. |
@@ -1579,7 +1293,7 @@ dfa_state_create (struct GNUNET_REGEX_Context *ctx, | |||
1579 | char *name; | 1293 | char *name; |
1580 | int len = 0; | 1294 | int len = 0; |
1581 | struct GNUNET_REGEX_State *cstate; | 1295 | struct GNUNET_REGEX_State *cstate; |
1582 | struct Transition *ctran; | 1296 | struct GNUNET_REGEX_Transition *ctran; |
1583 | unsigned int i; | 1297 | unsigned int i; |
1584 | 1298 | ||
1585 | s = GNUNET_malloc (sizeof (struct GNUNET_REGEX_State)); | 1299 | s = GNUNET_malloc (sizeof (struct GNUNET_REGEX_State)); |
@@ -1641,6 +1355,7 @@ dfa_state_create (struct GNUNET_REGEX_Context *ctx, | |||
1641 | return s; | 1355 | return s; |
1642 | } | 1356 | } |
1643 | 1357 | ||
1358 | |||
1644 | /** | 1359 | /** |
1645 | * Move from the given state 's' to the next state on transition 'label' | 1360 | * Move from the given state 's' to the next state on transition 'label' |
1646 | * | 1361 | * |
@@ -1652,7 +1367,7 @@ dfa_state_create (struct GNUNET_REGEX_Context *ctx, | |||
1652 | static struct GNUNET_REGEX_State * | 1367 | static struct GNUNET_REGEX_State * |
1653 | dfa_move (struct GNUNET_REGEX_State *s, const char label) | 1368 | dfa_move (struct GNUNET_REGEX_State *s, const char label) |
1654 | { | 1369 | { |
1655 | struct Transition *t; | 1370 | struct GNUNET_REGEX_Transition *t; |
1656 | struct GNUNET_REGEX_State *new_s; | 1371 | struct GNUNET_REGEX_State *new_s; |
1657 | 1372 | ||
1658 | if (NULL == s) | 1373 | if (NULL == s) |
@@ -1672,6 +1387,7 @@ dfa_move (struct GNUNET_REGEX_State *s, const char label) | |||
1672 | return new_s; | 1387 | return new_s; |
1673 | } | 1388 | } |
1674 | 1389 | ||
1390 | |||
1675 | /** | 1391 | /** |
1676 | * Remove all unreachable states from DFA 'a'. Unreachable states are those | 1392 | * Remove all unreachable states from DFA 'a'. Unreachable states are those |
1677 | * states that are not reachable from the starting state. | 1393 | * states that are not reachable from the starting state. |
@@ -1689,7 +1405,7 @@ dfa_remove_unreachable_states (struct GNUNET_REGEX_Automaton *a) | |||
1689 | s->marked = GNUNET_NO; | 1405 | s->marked = GNUNET_NO; |
1690 | 1406 | ||
1691 | // 2. traverse dfa from start state and mark all visited states | 1407 | // 2. traverse dfa from start state and mark all visited states |
1692 | automaton_traverse (a, NULL, NULL); | 1408 | GNUNET_REGEX_automaton_traverse (a, NULL, NULL); |
1693 | 1409 | ||
1694 | // 3. delete all states that were not visited | 1410 | // 3. delete all states that were not visited |
1695 | for (s = a->states_head; NULL != s; s = s_next) | 1411 | for (s = a->states_head; NULL != s; s = s_next) |
@@ -1700,6 +1416,7 @@ dfa_remove_unreachable_states (struct GNUNET_REGEX_Automaton *a) | |||
1700 | } | 1416 | } |
1701 | } | 1417 | } |
1702 | 1418 | ||
1419 | |||
1703 | /** | 1420 | /** |
1704 | * Remove all dead states from the DFA 'a'. Dead states are those states that do | 1421 | * Remove all dead states from the DFA 'a'. Dead states are those states that do |
1705 | * not transition to any other state but themselfes. | 1422 | * not transition to any other state but themselfes. |
@@ -1710,7 +1427,7 @@ static void | |||
1710 | dfa_remove_dead_states (struct GNUNET_REGEX_Automaton *a) | 1427 | dfa_remove_dead_states (struct GNUNET_REGEX_Automaton *a) |
1711 | { | 1428 | { |
1712 | struct GNUNET_REGEX_State *s; | 1429 | struct GNUNET_REGEX_State *s; |
1713 | struct Transition *t; | 1430 | struct GNUNET_REGEX_Transition *t; |
1714 | int dead; | 1431 | int dead; |
1715 | 1432 | ||
1716 | GNUNET_assert (DFA == a->type); | 1433 | GNUNET_assert (DFA == a->type); |
@@ -1738,6 +1455,7 @@ dfa_remove_dead_states (struct GNUNET_REGEX_Automaton *a) | |||
1738 | } | 1455 | } |
1739 | } | 1456 | } |
1740 | 1457 | ||
1458 | |||
1741 | /** | 1459 | /** |
1742 | * Merge all non distinguishable states in the DFA 'a' | 1460 | * Merge all non distinguishable states in the DFA 'a' |
1743 | * | 1461 | * |
@@ -1752,8 +1470,8 @@ dfa_merge_nondistinguishable_states (struct GNUNET_REGEX_Context *ctx, | |||
1752 | int table[a->state_count][a->state_count]; | 1470 | int table[a->state_count][a->state_count]; |
1753 | struct GNUNET_REGEX_State *s1; | 1471 | struct GNUNET_REGEX_State *s1; |
1754 | struct GNUNET_REGEX_State *s2; | 1472 | struct GNUNET_REGEX_State *s2; |
1755 | struct Transition *t1; | 1473 | struct GNUNET_REGEX_Transition *t1; |
1756 | struct Transition *t2; | 1474 | struct GNUNET_REGEX_Transition *t2; |
1757 | struct GNUNET_REGEX_State *s1_next; | 1475 | struct GNUNET_REGEX_State *s1_next; |
1758 | struct GNUNET_REGEX_State *s2_next; | 1476 | struct GNUNET_REGEX_State *s2_next; |
1759 | int change; | 1477 | int change; |
@@ -1832,6 +1550,7 @@ dfa_merge_nondistinguishable_states (struct GNUNET_REGEX_Context *ctx, | |||
1832 | } | 1550 | } |
1833 | } | 1551 | } |
1834 | 1552 | ||
1553 | |||
1835 | /** | 1554 | /** |
1836 | * Minimize the given DFA 'a' by removing all unreachable states, removing all | 1555 | * Minimize the given DFA 'a' by removing all unreachable states, removing all |
1837 | * dead states and merging all non distinguishable states | 1556 | * dead states and merging all non distinguishable states |
@@ -1858,6 +1577,7 @@ dfa_minimize (struct GNUNET_REGEX_Context *ctx, | |||
1858 | dfa_merge_nondistinguishable_states (ctx, a); | 1577 | dfa_merge_nondistinguishable_states (ctx, a); |
1859 | } | 1578 | } |
1860 | 1579 | ||
1580 | |||
1861 | /** | 1581 | /** |
1862 | * Creates a new NFA fragment. Needs to be cleared using | 1582 | * Creates a new NFA fragment. Needs to be cleared using |
1863 | * automaton_fragment_clear. | 1583 | * automaton_fragment_clear. |
@@ -1891,6 +1611,7 @@ nfa_fragment_create (struct GNUNET_REGEX_State *start, | |||
1891 | return n; | 1611 | return n; |
1892 | } | 1612 | } |
1893 | 1613 | ||
1614 | |||
1894 | /** | 1615 | /** |
1895 | * Adds a list of states to the given automaton 'n'. | 1616 | * Adds a list of states to the given automaton 'n'. |
1896 | * | 1617 | * |
@@ -1928,6 +1649,7 @@ nfa_add_states (struct GNUNET_REGEX_Automaton *n, | |||
1928 | n->state_count++; | 1649 | n->state_count++; |
1929 | } | 1650 | } |
1930 | 1651 | ||
1652 | |||
1931 | /** | 1653 | /** |
1932 | * Creates a new NFA state. Needs to be freed using automaton_destroy_state. | 1654 | * Creates a new NFA state. Needs to be freed using automaton_destroy_state. |
1933 | * | 1655 | * |
@@ -1955,6 +1677,7 @@ nfa_state_create (struct GNUNET_REGEX_Context *ctx, int accepting) | |||
1955 | return s; | 1677 | return s; |
1956 | } | 1678 | } |
1957 | 1679 | ||
1680 | |||
1958 | /** | 1681 | /** |
1959 | * Calculates the NFA closure set for the given state. | 1682 | * Calculates the NFA closure set for the given state. |
1960 | * | 1683 | * |
@@ -1973,7 +1696,7 @@ nfa_closure_create (struct GNUNET_REGEX_Automaton *nfa, | |||
1973 | struct GNUNET_REGEX_StateSet *cls_check; | 1696 | struct GNUNET_REGEX_StateSet *cls_check; |
1974 | struct GNUNET_REGEX_State *clsstate; | 1697 | struct GNUNET_REGEX_State *clsstate; |
1975 | struct GNUNET_REGEX_State *currentstate; | 1698 | struct GNUNET_REGEX_State *currentstate; |
1976 | struct Transition *ctran; | 1699 | struct GNUNET_REGEX_Transition *ctran; |
1977 | 1700 | ||
1978 | if (NULL == s) | 1701 | if (NULL == s) |
1979 | return NULL; | 1702 | return NULL; |
@@ -2021,6 +1744,7 @@ nfa_closure_create (struct GNUNET_REGEX_Automaton *nfa, | |||
2021 | return cls; | 1744 | return cls; |
2022 | } | 1745 | } |
2023 | 1746 | ||
1747 | |||
2024 | /** | 1748 | /** |
2025 | * Calculates the closure set for the given set of states. | 1749 | * Calculates the closure set for the given set of states. |
2026 | * | 1750 | * |
@@ -2077,6 +1801,7 @@ nfa_closure_set_create (struct GNUNET_REGEX_Automaton *nfa, | |||
2077 | return cls; | 1801 | return cls; |
2078 | } | 1802 | } |
2079 | 1803 | ||
1804 | |||
2080 | /** | 1805 | /** |
2081 | * Pops two NFA fragments (a, b) from the stack and concatenates them (ab) | 1806 | * Pops two NFA fragments (a, b) from the stack and concatenates them (ab) |
2082 | * | 1807 | * |
@@ -2109,6 +1834,7 @@ nfa_add_concatenation (struct GNUNET_REGEX_Context *ctx) | |||
2109 | GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new); | 1834 | GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new); |
2110 | } | 1835 | } |
2111 | 1836 | ||
1837 | |||
2112 | /** | 1838 | /** |
2113 | * Pops a NFA fragment from the stack (a) and adds a new fragment (a*) | 1839 | * Pops a NFA fragment from the stack (a) and adds a new fragment (a*) |
2114 | * | 1840 | * |
@@ -2150,6 +1876,7 @@ nfa_add_star_op (struct GNUNET_REGEX_Context *ctx) | |||
2150 | GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new); | 1876 | GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new); |
2151 | } | 1877 | } |
2152 | 1878 | ||
1879 | |||
2153 | /** | 1880 | /** |
2154 | * Pops an NFA fragment (a) from the stack and adds a new fragment (a+) | 1881 | * Pops an NFA fragment (a) from the stack and adds a new fragment (a+) |
2155 | * | 1882 | * |
@@ -2168,6 +1895,7 @@ nfa_add_plus_op (struct GNUNET_REGEX_Context *ctx) | |||
2168 | GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, a); | 1895 | GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, a); |
2169 | } | 1896 | } |
2170 | 1897 | ||
1898 | |||
2171 | /** | 1899 | /** |
2172 | * Pops an NFA fragment (a) from the stack and adds a new fragment (a?) | 1900 | * Pops an NFA fragment (a) from the stack and adds a new fragment (a?) |
2173 | * | 1901 | * |
@@ -2207,6 +1935,7 @@ nfa_add_question_op (struct GNUNET_REGEX_Context *ctx) | |||
2207 | GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new); | 1935 | GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new); |
2208 | } | 1936 | } |
2209 | 1937 | ||
1938 | |||
2210 | /** | 1939 | /** |
2211 | * Pops two NFA fragments (a, b) from the stack and adds a new NFA fragment that | 1940 | * Pops two NFA fragments (a, b) from the stack and adds a new NFA fragment that |
2212 | * alternates between a and b (a|b) | 1941 | * alternates between a and b (a|b) |
@@ -2248,6 +1977,7 @@ nfa_add_alternation (struct GNUNET_REGEX_Context *ctx) | |||
2248 | GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new); | 1977 | GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new); |
2249 | } | 1978 | } |
2250 | 1979 | ||
1980 | |||
2251 | /** | 1981 | /** |
2252 | * Adds a new nfa fragment to the stack | 1982 | * Adds a new nfa fragment to the stack |
2253 | * | 1983 | * |
@@ -2271,6 +2001,7 @@ nfa_add_label (struct GNUNET_REGEX_Context *ctx, const char lit) | |||
2271 | GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, n); | 2001 | GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, n); |
2272 | } | 2002 | } |
2273 | 2003 | ||
2004 | |||
2274 | /** | 2005 | /** |
2275 | * Initialize a new context | 2006 | * Initialize a new context |
2276 | * | 2007 | * |
@@ -2290,6 +2021,7 @@ GNUNET_REGEX_context_init (struct GNUNET_REGEX_Context *ctx) | |||
2290 | ctx->stack_tail = NULL; | 2021 | ctx->stack_tail = NULL; |
2291 | } | 2022 | } |
2292 | 2023 | ||
2024 | |||
2293 | /** | 2025 | /** |
2294 | * Construct an NFA by parsing the regex string of length 'len'. | 2026 | * Construct an NFA by parsing the regex string of length 'len'. |
2295 | * | 2027 | * |
@@ -2452,6 +2184,7 @@ error: | |||
2452 | return NULL; | 2184 | return NULL; |
2453 | } | 2185 | } |
2454 | 2186 | ||
2187 | |||
2455 | /** | 2188 | /** |
2456 | * Create DFA states based on given 'nfa' and starting with 'dfa_state'. | 2189 | * Create DFA states based on given 'nfa' and starting with 'dfa_state'. |
2457 | * | 2190 | * |
@@ -2467,7 +2200,7 @@ construct_dfa_states (struct GNUNET_REGEX_Context *ctx, | |||
2467 | struct GNUNET_REGEX_Automaton *dfa, | 2200 | struct GNUNET_REGEX_Automaton *dfa, |
2468 | struct GNUNET_REGEX_State *dfa_state) | 2201 | struct GNUNET_REGEX_State *dfa_state) |
2469 | { | 2202 | { |
2470 | struct Transition *ctran; | 2203 | struct GNUNET_REGEX_Transition *ctran; |
2471 | struct GNUNET_REGEX_State *state_iter; | 2204 | struct GNUNET_REGEX_State *state_iter; |
2472 | struct GNUNET_REGEX_State *new_dfa_state; | 2205 | struct GNUNET_REGEX_State *new_dfa_state; |
2473 | struct GNUNET_REGEX_State *state_contains; | 2206 | struct GNUNET_REGEX_State *state_contains; |
@@ -2505,6 +2238,7 @@ construct_dfa_states (struct GNUNET_REGEX_Context *ctx, | |||
2505 | } | 2238 | } |
2506 | } | 2239 | } |
2507 | 2240 | ||
2241 | |||
2508 | /** | 2242 | /** |
2509 | * Construct DFA for the given 'regex' of length 'len' | 2243 | * Construct DFA for the given 'regex' of length 'len' |
2510 | * | 2244 | * |
@@ -2555,6 +2289,7 @@ GNUNET_REGEX_construct_dfa (const char *regex, const size_t len) | |||
2555 | return dfa; | 2289 | return dfa; |
2556 | } | 2290 | } |
2557 | 2291 | ||
2292 | |||
2558 | /** | 2293 | /** |
2559 | * Free the memory allocated by constructing the GNUNET_REGEX_Automaton data | 2294 | * Free the memory allocated by constructing the GNUNET_REGEX_Automaton data |
2560 | * structure. | 2295 | * structure. |
@@ -2583,132 +2318,6 @@ GNUNET_REGEX_automaton_destroy (struct GNUNET_REGEX_Automaton *a) | |||
2583 | GNUNET_free (a); | 2318 | GNUNET_free (a); |
2584 | } | 2319 | } |
2585 | 2320 | ||
2586 | /** | ||
2587 | * Save a state to an open file pointer. cls is expected to be a file pointer to | ||
2588 | * an open file. Used only in conjunction with | ||
2589 | * GNUNET_REGEX_automaton_save_graph. | ||
2590 | * | ||
2591 | * @param cls file pointer. | ||
2592 | * @param count current count of the state, not used. | ||
2593 | * @param s state. | ||
2594 | */ | ||
2595 | void | ||
2596 | GNUNET_REGEX_automaton_save_graph_step (void *cls, unsigned int count, | ||
2597 | struct GNUNET_REGEX_State *s) | ||
2598 | { | ||
2599 | FILE *p; | ||
2600 | struct Transition *ctran; | ||
2601 | char *s_acc = NULL; | ||
2602 | char *s_tran = NULL; | ||
2603 | |||
2604 | p = cls; | ||
2605 | |||
2606 | if (s->accepting) | ||
2607 | { | ||
2608 | GNUNET_asprintf (&s_acc, | ||
2609 | "\"%s(%i)\" [shape=doublecircle, color=\"0.%i 0.8 0.95\"];\n", | ||
2610 | s->name, s->proof_id, s->scc_id); | ||
2611 | } | ||
2612 | else | ||
2613 | { | ||
2614 | GNUNET_asprintf (&s_acc, "\"%s(%i)\" [color=\"0.%i 0.8 0.95\"];\n", s->name, | ||
2615 | s->proof_id, s->scc_id); | ||
2616 | } | ||
2617 | |||
2618 | if (NULL == s_acc) | ||
2619 | { | ||
2620 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not print state %s\n", s->name); | ||
2621 | return; | ||
2622 | } | ||
2623 | fwrite (s_acc, strlen (s_acc), 1, p); | ||
2624 | GNUNET_free (s_acc); | ||
2625 | s_acc = NULL; | ||
2626 | |||
2627 | for (ctran = s->transitions_head; NULL != ctran; ctran = ctran->next) | ||
2628 | { | ||
2629 | if (NULL == ctran->to_state) | ||
2630 | { | ||
2631 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
2632 | "Transition from State %i has no state for transitioning\n", | ||
2633 | s->id); | ||
2634 | continue; | ||
2635 | } | ||
2636 | |||
2637 | if (ctran->label == 0) | ||
2638 | { | ||
2639 | GNUNET_asprintf (&s_tran, | ||
2640 | "\"%s(%i)\" -> \"%s(%i)\" [label = \"epsilon\", color=\"0.%i 0.8 0.95\"];\n", | ||
2641 | s->name, s->proof_id, ctran->to_state->name, | ||
2642 | ctran->to_state->proof_id, s->scc_id); | ||
2643 | } | ||
2644 | else | ||
2645 | { | ||
2646 | GNUNET_asprintf (&s_tran, | ||
2647 | "\"%s(%i)\" -> \"%s(%i)\" [label = \"%c\", color=\"0.%i 0.8 0.95\"];\n", | ||
2648 | s->name, s->proof_id, ctran->to_state->name, | ||
2649 | ctran->to_state->proof_id, ctran->label, s->scc_id); | ||
2650 | } | ||
2651 | |||
2652 | if (NULL == s_tran) | ||
2653 | { | ||
2654 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not print state %s\n", | ||
2655 | s->name); | ||
2656 | return; | ||
2657 | } | ||
2658 | |||
2659 | fwrite (s_tran, strlen (s_tran), 1, p); | ||
2660 | GNUNET_free (s_tran); | ||
2661 | s_tran = NULL; | ||
2662 | } | ||
2663 | } | ||
2664 | |||
2665 | /** | ||
2666 | * Save the given automaton as a GraphViz dot file | ||
2667 | * | ||
2668 | * @param a the automaton to be saved | ||
2669 | * @param filename where to save the file | ||
2670 | */ | ||
2671 | void | ||
2672 | GNUNET_REGEX_automaton_save_graph (struct GNUNET_REGEX_Automaton *a, | ||
2673 | const char *filename) | ||
2674 | { | ||
2675 | char *start; | ||
2676 | char *end; | ||
2677 | FILE *p; | ||
2678 | |||
2679 | if (NULL == a) | ||
2680 | { | ||
2681 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not print NFA, was NULL!"); | ||
2682 | return; | ||
2683 | } | ||
2684 | |||
2685 | if (NULL == filename || strlen (filename) < 1) | ||
2686 | { | ||
2687 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "No Filename given!"); | ||
2688 | return; | ||
2689 | } | ||
2690 | |||
2691 | p = fopen (filename, "w"); | ||
2692 | |||
2693 | if (NULL == p) | ||
2694 | { | ||
2695 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not open file for writing: %s", | ||
2696 | filename); | ||
2697 | return; | ||
2698 | } | ||
2699 | |||
2700 | /* First add the SCCs to the automaton, so we can color them nicely */ | ||
2701 | scc_tarjan (a); | ||
2702 | |||
2703 | start = "digraph G {\nrankdir=LR\n"; | ||
2704 | fwrite (start, strlen (start), 1, p); | ||
2705 | |||
2706 | automaton_traverse (a, &GNUNET_REGEX_automaton_save_graph_step, p); | ||
2707 | |||
2708 | end = "\n}\n"; | ||
2709 | fwrite (end, strlen (end), 1, p); | ||
2710 | fclose (p); | ||
2711 | } | ||
2712 | 2321 | ||
2713 | /** | 2322 | /** |
2714 | * Evaluates the given string using the given DFA automaton | 2323 | * Evaluates the given string using the given DFA automaton |
@@ -2750,6 +2359,7 @@ evaluate_dfa (struct GNUNET_REGEX_Automaton *a, const char *string) | |||
2750 | return 1; | 2359 | return 1; |
2751 | } | 2360 | } |
2752 | 2361 | ||
2362 | |||
2753 | /** | 2363 | /** |
2754 | * Evaluates the given string using the given NFA automaton | 2364 | * Evaluates the given string using the given NFA automaton |
2755 | * | 2365 | * |
@@ -2805,6 +2415,7 @@ evaluate_nfa (struct GNUNET_REGEX_Automaton *a, const char *string) | |||
2805 | return result; | 2415 | return result; |
2806 | } | 2416 | } |
2807 | 2417 | ||
2418 | |||
2808 | /** | 2419 | /** |
2809 | * Evaluates the given 'string' against the given compiled regex | 2420 | * Evaluates the given 'string' against the given compiled regex |
2810 | * | 2421 | * |
@@ -2857,6 +2468,7 @@ GNUNET_REGEX_get_canonical_regex (struct GNUNET_REGEX_Automaton *a) | |||
2857 | return a->canonical_regex; | 2468 | return a->canonical_regex; |
2858 | } | 2469 | } |
2859 | 2470 | ||
2471 | |||
2860 | /** | 2472 | /** |
2861 | * Get the first key for the given 'input_string'. This hashes the first x bits | 2473 | * Get the first key for the given 'input_string'. This hashes the first x bits |
2862 | * of the 'input_strings'. | 2474 | * of the 'input_strings'. |
@@ -2887,6 +2499,7 @@ GNUNET_REGEX_get_first_key (const char *input_string, unsigned int string_len, | |||
2887 | return size; | 2499 | return size; |
2888 | } | 2500 | } |
2889 | 2501 | ||
2502 | |||
2890 | /** | 2503 | /** |
2891 | * Check if the given 'proof' matches the given 'key'. | 2504 | * Check if the given 'proof' matches the given 'key'. |
2892 | * | 2505 | * |
@@ -2901,6 +2514,7 @@ GNUNET_REGEX_check_proof (const char *proof, const struct GNUNET_HashCode *key) | |||
2901 | return GNUNET_OK; | 2514 | return GNUNET_OK; |
2902 | } | 2515 | } |
2903 | 2516 | ||
2517 | |||
2904 | /** | 2518 | /** |
2905 | * Iterate over all edges helper function starting from state 's', calling | 2519 | * Iterate over all edges helper function starting from state 's', calling |
2906 | * iterator on for each edge. | 2520 | * iterator on for each edge. |
@@ -2913,7 +2527,7 @@ static void | |||
2913 | iterate_edge (struct GNUNET_REGEX_State *s, GNUNET_REGEX_KeyIterator iterator, | 2527 | iterate_edge (struct GNUNET_REGEX_State *s, GNUNET_REGEX_KeyIterator iterator, |
2914 | void *iterator_cls) | 2528 | void *iterator_cls) |
2915 | { | 2529 | { |
2916 | struct Transition *t; | 2530 | struct GNUNET_REGEX_Transition *t; |
2917 | struct GNUNET_REGEX_Edge edges[s->transition_count]; | 2531 | struct GNUNET_REGEX_Edge edges[s->transition_count]; |
2918 | unsigned int num_edges; | 2532 | unsigned int num_edges; |
2919 | 2533 | ||
@@ -2930,6 +2544,7 @@ iterate_edge (struct GNUNET_REGEX_State *s, GNUNET_REGEX_KeyIterator iterator, | |||
2930 | } | 2544 | } |
2931 | } | 2545 | } |
2932 | 2546 | ||
2547 | |||
2933 | /** | 2548 | /** |
2934 | * Iterate over all edges starting from start state of automaton 'a'. Calling | 2549 | * Iterate over all edges starting from start state of automaton 'a'. Calling |
2935 | * iterator for each edge. | 2550 | * iterator for each edge. |
diff --git a/src/regex/regex_graph.c b/src/regex/regex_graph.c new file mode 100644 index 000000000..3230ea702 --- /dev/null +++ b/src/regex/regex_graph.c | |||
@@ -0,0 +1,248 @@ | |||
1 | /* | ||
2 | This file is part of GNUnet | ||
3 | (C) 2012 Christian Grothoff (and other contributing authors) | ||
4 | |||
5 | GNUnet is free software; you can redistribute it and/or modify | ||
6 | it under the terms of the GNU General Public License as published | ||
7 | by the Free Software Foundation; either version 3, or (at your | ||
8 | option) any later version. | ||
9 | |||
10 | GNUnet is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU General Public License | ||
16 | along with GNUnet; see the file COPYING. If not, write to the | ||
17 | Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
18 | Boston, MA 02111-1307, USA. | ||
19 | */ | ||
20 | /** | ||
21 | * @file src/regex/regex_graph.c | ||
22 | * @brief functions for creating .dot graphs from regexes | ||
23 | * @author Maximilian Szengel | ||
24 | */ | ||
25 | #include "platform.h" | ||
26 | #include "gnunet_regex_lib.h" | ||
27 | #include "regex_internal.h" | ||
28 | |||
29 | /** | ||
30 | * Recursive function doing DFS with 'v' as a start, detecting all SCCs inside | ||
31 | * the subgraph reachable from 'v'. Used with scc_tarjan function to detect all | ||
32 | * SCCs inside an automaton. | ||
33 | * | ||
34 | * @param scc_counter counter for numbering the sccs | ||
35 | * @param v start vertex | ||
36 | * @param index current index | ||
37 | * @param stack stack for saving all SCCs | ||
38 | * @param stack_size current size of the stack | ||
39 | */ | ||
40 | static void | ||
41 | scc_tarjan_strongconnect (unsigned int *scc_counter, | ||
42 | struct GNUNET_REGEX_State *v, unsigned int *index, | ||
43 | struct GNUNET_REGEX_State **stack, | ||
44 | unsigned int *stack_size) | ||
45 | { | ||
46 | struct GNUNET_REGEX_State *w; | ||
47 | struct GNUNET_REGEX_Transition *t; | ||
48 | |||
49 | v->index = *index; | ||
50 | v->lowlink = *index; | ||
51 | (*index)++; | ||
52 | stack[(*stack_size)++] = v; | ||
53 | v->contained = 1; | ||
54 | |||
55 | for (t = v->transitions_head; NULL != t; t = t->next) | ||
56 | { | ||
57 | w = t->to_state; | ||
58 | if (NULL != w && w->index < 0) | ||
59 | { | ||
60 | scc_tarjan_strongconnect (scc_counter, w, index, stack, stack_size); | ||
61 | v->lowlink = (v->lowlink > w->lowlink) ? w->lowlink : v->lowlink; | ||
62 | } | ||
63 | else if (0 != w->contained) | ||
64 | v->lowlink = (v->lowlink > w->index) ? w->index : v->lowlink; | ||
65 | } | ||
66 | |||
67 | if (v->lowlink == v->index) | ||
68 | { | ||
69 | w = stack[--(*stack_size)]; | ||
70 | w->contained = 0; | ||
71 | |||
72 | if (v != w) | ||
73 | { | ||
74 | (*scc_counter)++; | ||
75 | while (v != w) | ||
76 | { | ||
77 | w->scc_id = *scc_counter; | ||
78 | w = stack[--(*stack_size)]; | ||
79 | w->contained = 0; | ||
80 | } | ||
81 | w->scc_id = *scc_counter; | ||
82 | } | ||
83 | } | ||
84 | } | ||
85 | |||
86 | |||
87 | /** | ||
88 | * Detect all SCCs (Strongly Connected Components) inside the given automaton. | ||
89 | * SCCs will be marked using the scc_id on each state. | ||
90 | * | ||
91 | * @param a the automaton for which SCCs should be computed and assigned. | ||
92 | */ | ||
93 | static void | ||
94 | scc_tarjan (struct GNUNET_REGEX_Automaton *a) | ||
95 | { | ||
96 | unsigned int index; | ||
97 | unsigned int scc_counter; | ||
98 | struct GNUNET_REGEX_State *v; | ||
99 | struct GNUNET_REGEX_State *stack[a->state_count]; | ||
100 | unsigned int stack_size; | ||
101 | |||
102 | for (v = a->states_head; NULL != v; v = v->next) | ||
103 | { | ||
104 | v->contained = 0; | ||
105 | v->index = -1; | ||
106 | v->lowlink = -1; | ||
107 | } | ||
108 | |||
109 | stack_size = 0; | ||
110 | index = 0; | ||
111 | scc_counter = 0; | ||
112 | |||
113 | for (v = a->states_head; NULL != v; v = v->next) | ||
114 | { | ||
115 | if (v->index < 0) | ||
116 | scc_tarjan_strongconnect (&scc_counter, v, &index, stack, &stack_size); | ||
117 | } | ||
118 | } | ||
119 | |||
120 | |||
121 | /** | ||
122 | * Save a state to an open file pointer. cls is expected to be a file pointer to | ||
123 | * an open file. Used only in conjunction with | ||
124 | * GNUNET_REGEX_automaton_save_graph. | ||
125 | * | ||
126 | * @param cls file pointer. | ||
127 | * @param count current count of the state, not used. | ||
128 | * @param s state. | ||
129 | */ | ||
130 | void | ||
131 | GNUNET_REGEX_automaton_save_graph_step (void *cls, unsigned int count, | ||
132 | struct GNUNET_REGEX_State *s) | ||
133 | { | ||
134 | FILE *p; | ||
135 | struct GNUNET_REGEX_Transition *ctran; | ||
136 | char *s_acc = NULL; | ||
137 | char *s_tran = NULL; | ||
138 | |||
139 | p = cls; | ||
140 | |||
141 | if (s->accepting) | ||
142 | { | ||
143 | GNUNET_asprintf (&s_acc, | ||
144 | "\"%s(%i)\" [shape=doublecircle, color=\"0.%i 0.8 0.95\"];\n", | ||
145 | s->name, s->proof_id, s->scc_id); | ||
146 | } | ||
147 | else | ||
148 | { | ||
149 | GNUNET_asprintf (&s_acc, "\"%s(%i)\" [color=\"0.%i 0.8 0.95\"];\n", s->name, | ||
150 | s->proof_id, s->scc_id); | ||
151 | } | ||
152 | |||
153 | if (NULL == s_acc) | ||
154 | { | ||
155 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not print state %s\n", s->name); | ||
156 | return; | ||
157 | } | ||
158 | fwrite (s_acc, strlen (s_acc), 1, p); | ||
159 | GNUNET_free (s_acc); | ||
160 | s_acc = NULL; | ||
161 | |||
162 | for (ctran = s->transitions_head; NULL != ctran; ctran = ctran->next) | ||
163 | { | ||
164 | if (NULL == ctran->to_state) | ||
165 | { | ||
166 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
167 | "Transition from State %i has no state for transitioning\n", | ||
168 | s->id); | ||
169 | continue; | ||
170 | } | ||
171 | |||
172 | if (ctran->label == 0) | ||
173 | { | ||
174 | GNUNET_asprintf (&s_tran, | ||
175 | "\"%s(%i)\" -> \"%s(%i)\" [label = \"epsilon\", color=\"0.%i 0.8 0.95\"];\n", | ||
176 | s->name, s->proof_id, ctran->to_state->name, | ||
177 | ctran->to_state->proof_id, s->scc_id); | ||
178 | } | ||
179 | else | ||
180 | { | ||
181 | GNUNET_asprintf (&s_tran, | ||
182 | "\"%s(%i)\" -> \"%s(%i)\" [label = \"%c\", color=\"0.%i 0.8 0.95\"];\n", | ||
183 | s->name, s->proof_id, ctran->to_state->name, | ||
184 | ctran->to_state->proof_id, ctran->label, s->scc_id); | ||
185 | } | ||
186 | |||
187 | if (NULL == s_tran) | ||
188 | { | ||
189 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not print state %s\n", | ||
190 | s->name); | ||
191 | return; | ||
192 | } | ||
193 | |||
194 | fwrite (s_tran, strlen (s_tran), 1, p); | ||
195 | GNUNET_free (s_tran); | ||
196 | s_tran = NULL; | ||
197 | } | ||
198 | } | ||
199 | |||
200 | |||
201 | /** | ||
202 | * Save the given automaton as a GraphViz dot file | ||
203 | * | ||
204 | * @param a the automaton to be saved | ||
205 | * @param filename where to save the file | ||
206 | */ | ||
207 | void | ||
208 | GNUNET_REGEX_automaton_save_graph (struct GNUNET_REGEX_Automaton *a, | ||
209 | const char *filename) | ||
210 | { | ||
211 | char *start; | ||
212 | char *end; | ||
213 | FILE *p; | ||
214 | |||
215 | if (NULL == a) | ||
216 | { | ||
217 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not print NFA, was NULL!"); | ||
218 | return; | ||
219 | } | ||
220 | |||
221 | if (NULL == filename || strlen (filename) < 1) | ||
222 | { | ||
223 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "No Filename given!"); | ||
224 | return; | ||
225 | } | ||
226 | |||
227 | p = fopen (filename, "w"); | ||
228 | |||
229 | if (NULL == p) | ||
230 | { | ||
231 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not open file for writing: %s", | ||
232 | filename); | ||
233 | return; | ||
234 | } | ||
235 | |||
236 | /* First add the SCCs to the automaton, so we can color them nicely */ | ||
237 | scc_tarjan (a); | ||
238 | |||
239 | start = "digraph G {\nrankdir=LR\n"; | ||
240 | fwrite (start, strlen (start), 1, p); | ||
241 | |||
242 | GNUNET_REGEX_automaton_traverse (a, &GNUNET_REGEX_automaton_save_graph_step, | ||
243 | p); | ||
244 | |||
245 | end = "\n}\n"; | ||
246 | fwrite (end, strlen (end), 1, p); | ||
247 | fclose (p); | ||
248 | } | ||
diff --git a/src/regex/regex_internal.h b/src/regex/regex_internal.h index 8ea597d40..d1e829633 100644 --- a/src/regex/regex_internal.h +++ b/src/regex/regex_internal.h | |||
@@ -43,6 +43,241 @@ extern "C" | |||
43 | 43 | ||
44 | 44 | ||
45 | /** | 45 | /** |
46 | * Transition between two states. Each state can have 0-n transitions. If label | ||
47 | * is 0, this is considered to be an epsilon transition. | ||
48 | */ | ||
49 | struct GNUNET_REGEX_Transition | ||
50 | { | ||
51 | /** | ||
52 | * This is a linked list. | ||
53 | */ | ||
54 | struct GNUNET_REGEX_Transition *prev; | ||
55 | |||
56 | /** | ||
57 | * This is a linked list. | ||
58 | */ | ||
59 | struct GNUNET_REGEX_Transition *next; | ||
60 | |||
61 | /** | ||
62 | * Unique id of this transition. | ||
63 | */ | ||
64 | unsigned int id; | ||
65 | |||
66 | /** | ||
67 | * Label for this transition. This is basically the edge label for the graph. | ||
68 | */ | ||
69 | char label; | ||
70 | |||
71 | /** | ||
72 | * State to which this transition leads. | ||
73 | */ | ||
74 | struct GNUNET_REGEX_State *to_state; | ||
75 | |||
76 | /** | ||
77 | * State from which this transition origins. | ||
78 | */ | ||
79 | struct GNUNET_REGEX_State *from_state; | ||
80 | |||
81 | /** | ||
82 | * Mark this transition. For example when reversing the automaton. | ||
83 | */ | ||
84 | int mark; | ||
85 | }; | ||
86 | |||
87 | |||
88 | /** | ||
89 | * A state. Can be used in DFA and NFA automatons. | ||
90 | */ | ||
91 | struct GNUNET_REGEX_State | ||
92 | { | ||
93 | /** | ||
94 | * This is a linked list. | ||
95 | */ | ||
96 | struct GNUNET_REGEX_State *prev; | ||
97 | |||
98 | /** | ||
99 | * This is a linked list. | ||
100 | */ | ||
101 | struct GNUNET_REGEX_State *next; | ||
102 | |||
103 | /** | ||
104 | * Unique state id. | ||
105 | */ | ||
106 | unsigned int id; | ||
107 | |||
108 | /** | ||
109 | * If this is an accepting state or not. | ||
110 | */ | ||
111 | int accepting; | ||
112 | |||
113 | /** | ||
114 | * Marking of the state. This is used for marking all visited states when | ||
115 | * traversing all states of an automaton and for cases where the state id | ||
116 | * cannot be used (dfa minimization). | ||
117 | */ | ||
118 | int marked; | ||
119 | |||
120 | /** | ||
121 | * Marking the state as contained. This is used for checking, if the state is | ||
122 | * contained in a set in constant time | ||
123 | */ | ||
124 | int contained; | ||
125 | |||
126 | /** | ||
127 | * Marking the state as part of an SCC (Strongly Connected Component). All | ||
128 | * states with the same scc_id are part of the same SCC. scc_id is 0, if state | ||
129 | * is not a part of any SCC. | ||
130 | */ | ||
131 | unsigned int scc_id; | ||
132 | |||
133 | /** | ||
134 | * Used for SCC detection. | ||
135 | */ | ||
136 | int index; | ||
137 | |||
138 | /** | ||
139 | * Used for SCC detection. | ||
140 | */ | ||
141 | int lowlink; | ||
142 | |||
143 | /** | ||
144 | * Human readable name of the automaton. Used for debugging and graph | ||
145 | * creation. | ||
146 | */ | ||
147 | char *name; | ||
148 | |||
149 | /** | ||
150 | * Hash of the state. | ||
151 | */ | ||
152 | struct GNUNET_HashCode hash; | ||
153 | |||
154 | /** | ||
155 | * State ID for proof creation. | ||
156 | */ | ||
157 | unsigned int proof_id; | ||
158 | |||
159 | /** | ||
160 | * Proof for this state. | ||
161 | */ | ||
162 | char *proof; | ||
163 | |||
164 | /** | ||
165 | * Number of transitions from this state to other states. | ||
166 | */ | ||
167 | unsigned int transition_count; | ||
168 | |||
169 | /** | ||
170 | * DLL of transitions. | ||
171 | */ | ||
172 | struct GNUNET_REGEX_Transition *transitions_head; | ||
173 | |||
174 | /** | ||
175 | * DLL of transitions. | ||
176 | */ | ||
177 | struct GNUNET_REGEX_Transition *transitions_tail; | ||
178 | |||
179 | /** | ||
180 | * Set of states on which this state is based on. Used when creating a DFA out | ||
181 | * of several NFA states. | ||
182 | */ | ||
183 | struct GNUNET_REGEX_StateSet *nfa_set; | ||
184 | }; | ||
185 | |||
186 | |||
187 | /** | ||
188 | * Type of an automaton. | ||
189 | */ | ||
190 | enum GNUNET_REGEX_AutomatonType | ||
191 | { | ||
192 | NFA, | ||
193 | DFA | ||
194 | }; | ||
195 | |||
196 | |||
197 | /** | ||
198 | * Automaton representation. | ||
199 | */ | ||
200 | struct GNUNET_REGEX_Automaton | ||
201 | { | ||
202 | /** | ||
203 | * Linked list of NFAs used for partial NFA creation. | ||
204 | */ | ||
205 | struct GNUNET_REGEX_Automaton *prev; | ||
206 | |||
207 | /** | ||
208 | * Linked list of NFAs used for partial NFA creation. | ||
209 | */ | ||
210 | struct GNUNET_REGEX_Automaton *next; | ||
211 | |||
212 | /** | ||
213 | * First state of the automaton. This is mainly used for constructing an NFA, | ||
214 | * where each NFA itself consists of one or more NFAs linked together. | ||
215 | */ | ||
216 | struct GNUNET_REGEX_State *start; | ||
217 | |||
218 | /** | ||
219 | * End state of the partial NFA. This is undefined for DFAs | ||
220 | */ | ||
221 | struct GNUNET_REGEX_State *end; | ||
222 | |||
223 | /** | ||
224 | * Number of states in the automaton. | ||
225 | */ | ||
226 | unsigned int state_count; | ||
227 | |||
228 | /** | ||
229 | * DLL of states. | ||
230 | */ | ||
231 | struct GNUNET_REGEX_State *states_head; | ||
232 | |||
233 | /** | ||
234 | * DLL of states | ||
235 | */ | ||
236 | struct GNUNET_REGEX_State *states_tail; | ||
237 | |||
238 | /** | ||
239 | * Type of the automaton. | ||
240 | */ | ||
241 | enum GNUNET_REGEX_AutomatonType type; | ||
242 | |||
243 | /** | ||
244 | * Regex | ||
245 | */ | ||
246 | char *regex; | ||
247 | |||
248 | /** | ||
249 | * Canonical regex (result of RX->NFA->DFA->RX) | ||
250 | */ | ||
251 | char *canonical_regex; | ||
252 | }; | ||
253 | |||
254 | |||
255 | /** | ||
256 | * Function that is called with each state, when traversing an automaton. | ||
257 | * | ||
258 | * @param cls closure. | ||
259 | * @param count current count of the state, from 0 to a->state_count -1. | ||
260 | * @param s state. | ||
261 | */ | ||
262 | typedef void (*GNUNET_REGEX_traverse_action) (void *cls, unsigned int count, | ||
263 | struct GNUNET_REGEX_State * s); | ||
264 | |||
265 | |||
266 | /** | ||
267 | * Traverses the given automaton from it's start state, visiting all reachable | ||
268 | * states and calling 'action' on each one of them. | ||
269 | * | ||
270 | * @param a automaton. | ||
271 | * @param action action to be performed on each state. | ||
272 | * @param action_cls closure for action | ||
273 | */ | ||
274 | void | ||
275 | GNUNET_REGEX_automaton_traverse (struct GNUNET_REGEX_Automaton *a, | ||
276 | GNUNET_REGEX_traverse_action action, | ||
277 | void *action_cls); | ||
278 | |||
279 | |||
280 | /** | ||
46 | * Get the canonical regex of the given automaton. | 281 | * Get the canonical regex of the given automaton. |
47 | * When constructing the automaton a proof is computed for each state, | 282 | * When constructing the automaton a proof is computed for each state, |
48 | * consisting of the regular expression leading to this state. A complete | 283 | * consisting of the regular expression leading to this state. A complete |
diff --git a/src/regex/test_regex_eval_api.c b/src/regex/test_regex_eval_api.c index 5a6575c86..37fd38c0d 100644 --- a/src/regex/test_regex_eval_api.c +++ b/src/regex/test_regex_eval_api.c | |||
@@ -227,7 +227,7 @@ main (int argc, char *argv[]) | |||
227 | int check_rand; | 227 | int check_rand; |
228 | char *check_proof; | 228 | char *check_proof; |
229 | 229 | ||
230 | struct Regex_String_Pair rxstr[14] = { | 230 | struct Regex_String_Pair rxstr[16] = { |
231 | {"ab?(abcd)?", 5, | 231 | {"ab?(abcd)?", 5, |
232 | {"ababcd", "abab", "aabcd", "a", "abb"}, | 232 | {"ababcd", "abab", "aabcd", "a", "abb"}, |
233 | {match, nomatch, match, match, nomatch}}, | 233 | {match, nomatch, match, match, nomatch}}, |
@@ -270,6 +270,12 @@ main (int argc, char *argv[]) | |||
270 | {"(ab|c)+", 7, | 270 | {"(ab|c)+", 7, |
271 | {"", "ab", "c", "abc", "ababcc", "acc", "abac"}, | 271 | {"", "ab", "c", "abc", "ababcc", "acc", "abac"}, |
272 | {nomatch, match, match, match, match, nomatch, nomatch}}, | 272 | {nomatch, match, match, match, match, nomatch, nomatch}}, |
273 | {"((j|2j)K|(j|2j)AK|(j|2j)(D|e|(j|2j)A(D|e))D*K)", 1, | ||
274 | {"", "2j2jADK", "j2jADK"}, | ||
275 | {nomatch, match, match}}, | ||
276 | {"((j|2j)K|(j|2j)(D|e|((j|2j)j|(j|2j)2j)A(D|e))D*K|(j|2j)AK)", 2, | ||
277 | {"", "2j2jjADK", "j2jADK"}, | ||
278 | {nomatch, match, match}}, | ||
273 | {"ab(c|d)+c*(a(b|c)d)+", 1, | 279 | {"ab(c|d)+c*(a(b|c)d)+", 1, |
274 | {"abacd"}, | 280 | {"abacd"}, |
275 | {nomatch}} | 281 | {nomatch}} |
@@ -279,7 +285,7 @@ main (int argc, char *argv[]) | |||
279 | check_dfa = 0; | 285 | check_dfa = 0; |
280 | check_rand = 0; | 286 | check_rand = 0; |
281 | 287 | ||
282 | for (i = 0; i < 14; i++) | 288 | for (i = 0; i < 16; i++) |
283 | { | 289 | { |
284 | if (0 != regcomp (&rx, rxstr[i].regex, REG_EXTENDED)) | 290 | if (0 != regcomp (&rx, rxstr[i].regex, REG_EXTENDED)) |
285 | { | 291 | { |
diff --git a/src/regex/test_regex_proofs.c b/src/regex/test_regex_proofs.c index 831f4dc48..8ba0e142f 100644 --- a/src/regex/test_regex_proofs.c +++ b/src/regex/test_regex_proofs.c | |||
@@ -106,16 +106,15 @@ test_proofs_static (void) | |||
106 | unsigned int i; | 106 | unsigned int i; |
107 | unsigned int error; | 107 | unsigned int error; |
108 | 108 | ||
109 | const char *regex[6] = { | 109 | const char *regex[8] = { |
110 | "a|aa*a", | 110 | "a|aa*a", |
111 | "a+", | 111 | "a+", |
112 | "a*", | 112 | "a*", |
113 | "a*a*", | 113 | "a*a*", |
114 | "(F*C|WfPf|y+F*C)", | 114 | "(F*C|WfPf|y+F*C)", |
115 | "y*F*C|WfPf", | 115 | "y*F*C|WfPf", |
116 | /* "2?jA?e?D*K", */ | 116 | "((a|b)c|(a|b)(d|(a|b)e))", |
117 | /* "((j|2j)K|(j|2j)AK|(j|2j)(D|e|(j|2j)A(D|e))D*K)", */ | 117 | "((a|b)(c|d)|(a|b)(a|b)e)" |
118 | /* "((j|2j)K|(j|2j)(D|e|((j|2j)j|(j|2j)2j)A(D|e))D*K|(j|2j)AK)", */ | ||
119 | }; | 118 | }; |
120 | 119 | ||
121 | const char *canon_rx1; | 120 | const char *canon_rx1; |
@@ -125,7 +124,7 @@ test_proofs_static (void) | |||
125 | 124 | ||
126 | error = 0; | 125 | error = 0; |
127 | 126 | ||
128 | for (i = 0; i < 6; i += 2) | 127 | for (i = 0; i < 8; i += 2) |
129 | { | 128 | { |
130 | dfa1 = GNUNET_REGEX_construct_dfa (regex[i], strlen (regex[i])); | 129 | dfa1 = GNUNET_REGEX_construct_dfa (regex[i], strlen (regex[i])); |
131 | dfa2 = GNUNET_REGEX_construct_dfa (regex[i + 1], strlen (regex[i + 1])); | 130 | dfa2 = GNUNET_REGEX_construct_dfa (regex[i + 1], strlen (regex[i + 1])); |
@@ -138,12 +137,8 @@ test_proofs_static (void) | |||
138 | if (error > 0) | 137 | if (error > 0) |
139 | { | 138 | { |
140 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | 139 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, |
141 | "Comparing canonical regex failed:\nrx1: %s\ncrx1: %s\nrx2: %s\ncrx2: %s\n", | 140 | "Comparing canonical regex failed:\nrx1:\t%s\ncrx1:\t%s\nrx2:\t%s\ncrx2:\t%s\n", |
142 | regex[i], canon_rx1, regex[i + 1], canon_rx2); | 141 | regex[i], canon_rx1, regex[i + 1], canon_rx2); |
143 | |||
144 | /* Save the graphs of the two conflicting DFAs */ | ||
145 | /* GNUNET_REGEX_automaton_save_graph (dfa1, "proofs_fail_dfa1.dot"); */ | ||
146 | /* GNUNET_REGEX_automaton_save_graph (dfa2, "proofs_fail_dfa2.dot"); */ | ||
147 | } | 142 | } |
148 | 143 | ||
149 | GNUNET_REGEX_automaton_destroy (dfa1); | 144 | GNUNET_REGEX_automaton_destroy (dfa1); |
@@ -170,7 +165,7 @@ main (int argc, char *argv[]) | |||
170 | error = 0; | 165 | error = 0; |
171 | 166 | ||
172 | error += test_proofs_static (); | 167 | error += test_proofs_static (); |
173 | /* error += test_proofs_random (10000, 10); */ | 168 | error += test_proofs_random (100, 30); |
174 | 169 | ||
175 | return error; | 170 | return error; |
176 | } | 171 | } |