diff options
author | Maximilian Szengel <gnunet@maxsz.de> | 2012-04-20 12:35:09 +0000 |
---|---|---|
committer | Maximilian Szengel <gnunet@maxsz.de> | 2012-04-20 12:35:09 +0000 |
commit | 6c889a1786be40c0d023e8971411bc327af352c6 (patch) | |
tree | a43fc51276c677f82e29bab330edb865e057174e /src | |
parent | 24875cb6766ad147e23f248b17afa889b3d0a099 (diff) | |
download | gnunet-6c889a1786be40c0d023e8971411bc327af352c6.tar.gz gnunet-6c889a1786be40c0d023e8971411bc327af352c6.zip |
recursion for dfa construction
Diffstat (limited to 'src')
-rw-r--r-- | src/regex/regex.c | 413 | ||||
-rw-r--r-- | src/regex/test_regex_iterate_api.c | 9 |
2 files changed, 223 insertions, 199 deletions
diff --git a/src/regex/regex.c b/src/regex/regex.c index 7aebd2191..6a8420ac8 100644 --- a/src/regex/regex.c +++ b/src/regex/regex.c | |||
@@ -31,8 +31,8 @@ | |||
31 | #define initial_bits 10 | 31 | #define initial_bits 10 |
32 | 32 | ||
33 | /** | 33 | /** |
34 | * Context that contains an id counter for states and transitions | 34 | * Context that contains an id counter for states and transitions as well as a |
35 | * as well as a DLL of automatons used as a stack for NFA construction. | 35 | * DLL of automatons used as a stack for NFA construction. |
36 | */ | 36 | */ |
37 | struct GNUNET_REGEX_Context | 37 | struct GNUNET_REGEX_Context |
38 | { | 38 | { |
@@ -87,10 +87,8 @@ struct GNUNET_REGEX_Automaton | |||
87 | struct GNUNET_REGEX_Automaton *next; | 87 | struct GNUNET_REGEX_Automaton *next; |
88 | 88 | ||
89 | /** | 89 | /** |
90 | * First state of the automaton. This is mainly | 90 | * First state of the automaton. This is mainly used for constructing an NFA, |
91 | * used for constructing an NFA, where each NFA | 91 | * where each NFA itself consists of one or more NFAs linked together. |
92 | * itself consists of one or more NFAs linked | ||
93 | * together. | ||
94 | */ | 92 | */ |
95 | struct GNUNET_REGEX_State *start; | 93 | struct GNUNET_REGEX_State *start; |
96 | 94 | ||
@@ -146,21 +144,22 @@ struct GNUNET_REGEX_State | |||
146 | int accepting; | 144 | int accepting; |
147 | 145 | ||
148 | /** | 146 | /** |
149 | * Marking of the state. This is used for marking all visited | 147 | * Marking of the state. This is used for marking all visited states when |
150 | * states when traversing all states of an automaton and for | 148 | * traversing all states of an automaton and for cases where the state id |
151 | * cases where the state id cannot be used (dfa minimization). | 149 | * cannot be used (dfa minimization). |
152 | */ | 150 | */ |
153 | int marked; | 151 | int marked; |
154 | 152 | ||
155 | /** | 153 | /** |
156 | * Marking the state as contained. This is used for checking, | 154 | * Marking the state as contained. This is used for checking, if the state is |
157 | * if the state is contained in a set in constant time | 155 | * contained in a set in constant time |
158 | */ | 156 | */ |
159 | int contained; | 157 | int contained; |
160 | 158 | ||
161 | /** | 159 | /** |
162 | * Marking the state as part of an SCC (Strongly Connected Component). | 160 | * Marking the state as part of an SCC (Strongly Connected Component). All |
163 | * All states with the same scc_id are part of the same SCC. | 161 | * states with the same scc_id are part of the same SCC. scc_id is 0, if state |
162 | * is not a part of any SCC. | ||
164 | */ | 163 | */ |
165 | unsigned int scc_id; | 164 | unsigned int scc_id; |
166 | 165 | ||
@@ -175,14 +174,22 @@ struct GNUNET_REGEX_State | |||
175 | int lowlink; | 174 | int lowlink; |
176 | 175 | ||
177 | /** | 176 | /** |
178 | * Human readable name of the automaton. Used for debugging | 177 | * Human readable name of the automaton. Used for debugging and graph |
179 | * and graph creation. | 178 | * creation. |
180 | */ | 179 | */ |
181 | char *name; | 180 | char *name; |
182 | 181 | ||
182 | /** | ||
183 | * Hash of the state. | ||
184 | */ | ||
183 | GNUNET_HashCode hash; | 185 | GNUNET_HashCode hash; |
184 | 186 | ||
185 | /** | 187 | /** |
188 | * Proof for this state. | ||
189 | */ | ||
190 | char *proof; | ||
191 | |||
192 | /** | ||
186 | * Number of transitions from this state to other states. | 193 | * Number of transitions from this state to other states. |
187 | */ | 194 | */ |
188 | unsigned int transition_count; | 195 | unsigned int transition_count; |
@@ -198,15 +205,15 @@ struct GNUNET_REGEX_State | |||
198 | struct Transition *transitions_tail; | 205 | struct Transition *transitions_tail; |
199 | 206 | ||
200 | /** | 207 | /** |
201 | * Set of states on which this state is based on. Used when | 208 | * Set of states on which this state is based on. Used when creating a DFA out |
202 | * creating a DFA out of several NFA states. | 209 | * of several NFA states. |
203 | */ | 210 | */ |
204 | struct GNUNET_REGEX_StateSet *nfa_set; | 211 | struct GNUNET_REGEX_StateSet *nfa_set; |
205 | }; | 212 | }; |
206 | 213 | ||
207 | /** | 214 | /** |
208 | * Transition between two states. Each state can have 0-n transitions. | 215 | * Transition between two states. Each state can have 0-n transitions. If label |
209 | * If label is 0, this is considered to be an epsilon transition. | 216 | * is 0, this is considered to be an epsilon transition. |
210 | */ | 217 | */ |
211 | struct Transition | 218 | struct Transition |
212 | { | 219 | { |
@@ -226,8 +233,7 @@ struct Transition | |||
226 | unsigned int id; | 233 | unsigned int id; |
227 | 234 | ||
228 | /** | 235 | /** |
229 | * label for this transition. This is basically the edge label for | 236 | * Label for this transition. This is basically the edge label for the graph. |
230 | * the graph. | ||
231 | */ | 237 | */ |
232 | char label; | 238 | char label; |
233 | 239 | ||
@@ -262,8 +268,9 @@ static void | |||
262 | debug_print_state (struct GNUNET_REGEX_State *s) | 268 | debug_print_state (struct GNUNET_REGEX_State *s) |
263 | { | 269 | { |
264 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, | 270 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, |
265 | "State %i: %s marked: %i accepting: %i scc_id: %i transitions: %i\n", s->id, | 271 | "State %i: %s marked: %i accepting: %i scc_id: %i transitions: %i\n", |
266 | s->name, s->marked, s->accepting, s->scc_id, s->transition_count); | 272 | s->id, s->name, s->marked, s->accepting, s->scc_id, |
273 | s->transition_count); | ||
267 | } | 274 | } |
268 | 275 | ||
269 | static void | 276 | static void |
@@ -304,9 +311,9 @@ debug_print_transitions (struct GNUNET_REGEX_State *s) | |||
304 | } | 311 | } |
305 | 312 | ||
306 | /** | 313 | /** |
307 | * Recursive function doing DFS with 'v' as a start, detecting all SCCs | 314 | * Recursive function doing DFS with 'v' as a start, detecting all SCCs inside |
308 | * inside the subgraph reachable from 'v'. Used with scc_tarjan function | 315 | * the subgraph reachable from 'v'. Used with scc_tarjan function to detect all |
309 | * to detect all SCCs inside an automaton. | 316 | * SCCs inside an automaton. |
310 | * | 317 | * |
311 | * @param ctx context | 318 | * @param ctx context |
312 | * @param v start vertex | 319 | * @param v start vertex |
@@ -394,6 +401,56 @@ scc_tarjan (struct GNUNET_REGEX_Context *ctx, struct GNUNET_REGEX_Automaton *a) | |||
394 | } | 401 | } |
395 | 402 | ||
396 | /** | 403 | /** |
404 | * Adds a transition from one state to another on 'label'. Does not add | ||
405 | * duplicate states. | ||
406 | * | ||
407 | * @param ctx context | ||
408 | * @param from_state starting state for the transition | ||
409 | * @param label transition label | ||
410 | * @param to_state state to where the transition should point to | ||
411 | */ | ||
412 | static void | ||
413 | state_add_transition (struct GNUNET_REGEX_Context *ctx, | ||
414 | struct GNUNET_REGEX_State *from_state, const char label, | ||
415 | struct GNUNET_REGEX_State *to_state) | ||
416 | { | ||
417 | int is_dup; | ||
418 | struct Transition *t; | ||
419 | |||
420 | if (NULL == from_state) | ||
421 | { | ||
422 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not create Transition.\n"); | ||
423 | return; | ||
424 | } | ||
425 | |||
426 | // Do not add duplicate state transitions | ||
427 | is_dup = GNUNET_NO; | ||
428 | for (t = from_state->transitions_head; NULL != t; t = t->next) | ||
429 | { | ||
430 | if (t->to_state == to_state && t->label == label && | ||
431 | t->from_state == from_state) | ||
432 | { | ||
433 | is_dup = GNUNET_YES; | ||
434 | break; | ||
435 | } | ||
436 | } | ||
437 | |||
438 | if (is_dup) | ||
439 | return; | ||
440 | |||
441 | t = GNUNET_malloc (sizeof (struct Transition)); | ||
442 | t->id = ctx->transition_id++; | ||
443 | t->label = label; | ||
444 | t->to_state = to_state; | ||
445 | t->from_state = from_state; | ||
446 | |||
447 | // Add outgoing transition to 'from_state' | ||
448 | from_state->transition_count++; | ||
449 | GNUNET_CONTAINER_DLL_insert (from_state->transitions_head, | ||
450 | from_state->transitions_tail, t); | ||
451 | } | ||
452 | |||
453 | /** | ||
397 | * Compare two states. Used for sorting. | 454 | * Compare two states. Used for sorting. |
398 | * | 455 | * |
399 | * @param a first state | 456 | * @param a first state |
@@ -416,8 +473,39 @@ state_compare (const void *a, const void *b) | |||
416 | } | 473 | } |
417 | 474 | ||
418 | /** | 475 | /** |
419 | * Compare to state sets by comparing the id's of the states that are | 476 | * Get all edges leaving state 's'. |
420 | * contained in each set. Both sets are expected to be sorted by id! | 477 | * |
478 | * @param s state. | ||
479 | * @param edges all edges leaving 's'. | ||
480 | * | ||
481 | * @return number of edges. | ||
482 | */ | ||
483 | static unsigned int | ||
484 | state_get_edges (struct GNUNET_REGEX_State *s, struct GNUNET_REGEX_Edge *edges) | ||
485 | { | ||
486 | struct Transition *t; | ||
487 | unsigned int count; | ||
488 | |||
489 | if (NULL == s) | ||
490 | return 0; | ||
491 | |||
492 | count = 0; | ||
493 | |||
494 | for (t = s->transitions_head; NULL != t; t = t->next) | ||
495 | { | ||
496 | if (NULL != t->to_state) | ||
497 | { | ||
498 | edges[count].label = &t->label; | ||
499 | edges[count].destination = t->to_state->hash; | ||
500 | count++; | ||
501 | } | ||
502 | } | ||
503 | return count; | ||
504 | } | ||
505 | |||
506 | /** | ||
507 | * Compare to state sets by comparing the id's of the states that are contained | ||
508 | * in each set. Both sets are expected to be sorted by id! | ||
421 | * | 509 | * |
422 | * @param sset1 first state set | 510 | * @param sset1 first state set |
423 | * @param sset2 second state set | 511 | * @param sset2 second state set |
@@ -465,60 +553,8 @@ state_set_clear (struct GNUNET_REGEX_StateSet *set) | |||
465 | } | 553 | } |
466 | 554 | ||
467 | /** | 555 | /** |
468 | * Adds a transition from one state to another on 'label'. Does not add | 556 | * Clears an automaton fragment. Does not destroy the states inside the |
469 | * duplicate states. | 557 | * automaton. |
470 | * | ||
471 | * @param ctx context | ||
472 | * @param from_state starting state for the transition | ||
473 | * @param label transition label | ||
474 | * @param to_state state to where the transition should point to | ||
475 | */ | ||
476 | static void | ||
477 | state_add_transition (struct GNUNET_REGEX_Context *ctx, | ||
478 | struct GNUNET_REGEX_State *from_state, const char label, | ||
479 | struct GNUNET_REGEX_State *to_state) | ||
480 | { | ||
481 | int is_dup; | ||
482 | struct Transition *t; | ||
483 | |||
484 | if (NULL == from_state) | ||
485 | { | ||
486 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not create Transition.\n"); | ||
487 | return; | ||
488 | } | ||
489 | |||
490 | // Do not add duplicate states | ||
491 | is_dup = GNUNET_NO; | ||
492 | for (t = from_state->transitions_head; NULL != t; t = t->next) | ||
493 | { | ||
494 | if (t->to_state == to_state | ||
495 | && t->label == label | ||
496 | && t->from_state == from_state) | ||
497 | { | ||
498 | is_dup = GNUNET_YES; | ||
499 | break; | ||
500 | } | ||
501 | } | ||
502 | |||
503 | if (is_dup) | ||
504 | return; | ||
505 | |||
506 | t = GNUNET_malloc (sizeof (struct Transition)); | ||
507 | |||
508 | t->id = ctx->transition_id++; | ||
509 | t->label = label; | ||
510 | t->to_state = to_state; | ||
511 | t->from_state = from_state; | ||
512 | |||
513 | from_state->transition_count++; | ||
514 | |||
515 | GNUNET_CONTAINER_DLL_insert (from_state->transitions_head, | ||
516 | from_state->transitions_tail, t); | ||
517 | } | ||
518 | |||
519 | /** | ||
520 | * Clears an automaton fragment. Does not destroy the states inside | ||
521 | * the automaton. | ||
522 | * | 558 | * |
523 | * @param a automaton to be cleared | 559 | * @param a automaton to be cleared |
524 | */ | 560 | */ |
@@ -566,9 +602,9 @@ automaton_destroy_state (struct GNUNET_REGEX_State *s) | |||
566 | } | 602 | } |
567 | 603 | ||
568 | /** | 604 | /** |
569 | * Remove a state from the given automaton 'a'. Always use this function | 605 | * Remove a state from the given automaton 'a'. Always use this function when |
570 | * when altering the states of an automaton. Will also remove all transitions | 606 | * altering the states of an automaton. Will also remove all transitions leading |
571 | * leading to this state, before destroying it. | 607 | * to this state, before destroying it. |
572 | * | 608 | * |
573 | * @param a automaton | 609 | * @param a automaton |
574 | * @param s state to remove | 610 | * @param s state to remove |
@@ -608,7 +644,8 @@ automaton_remove_state (struct GNUNET_REGEX_Automaton *a, | |||
608 | } | 644 | } |
609 | 645 | ||
610 | /** | 646 | /** |
611 | * Merge two states into one. Will merge 's1' and 's2' into 's1' and destroy 's2'. | 647 | * Merge two states into one. Will merge 's1' and 's2' into 's1' and destroy |
648 | * 's2'. | ||
612 | * | 649 | * |
613 | * @param ctx context | 650 | * @param ctx context |
614 | * @param a automaton | 651 | * @param a automaton |
@@ -642,7 +679,7 @@ automaton_merge_states (struct GNUNET_REGEX_Context *ctx, | |||
642 | for (t_check = s2->transitions_head; NULL != t_check; t_check = t_check->next) | 679 | for (t_check = s2->transitions_head; NULL != t_check; t_check = t_check->next) |
643 | { | 680 | { |
644 | if (t_check->to_state != s1) | 681 | if (t_check->to_state != s1) |
645 | state_add_transition (ctx, s1, t_check->label, t_check->to_state); | 682 | state_add_transition (ctx, s1, t_check->label, t_check->to_state); |
646 | } | 683 | } |
647 | 684 | ||
648 | // 3. Rename s1 to {s1,s2} | 685 | // 3. Rename s1 to {s1,s2} |
@@ -662,8 +699,8 @@ automaton_merge_states (struct GNUNET_REGEX_Context *ctx, | |||
662 | } | 699 | } |
663 | 700 | ||
664 | /** | 701 | /** |
665 | * Add a state to the automaton 'a', always use this function to | 702 | * Add a state to the automaton 'a', always use this function to alter the |
666 | * alter the states DLL of the automaton. | 703 | * states DLL of the automaton. |
667 | * | 704 | * |
668 | * @param a automaton to add the state to | 705 | * @param a automaton to add the state to |
669 | * @param s state that should be added | 706 | * @param s state that should be added |
@@ -679,10 +716,19 @@ automaton_add_state (struct GNUNET_REGEX_Automaton *a, | |||
679 | typedef void (*GNUNET_REGEX_traverse_action) (void *cls, | 716 | typedef void (*GNUNET_REGEX_traverse_action) (void *cls, |
680 | struct GNUNET_REGEX_State * s); | 717 | struct GNUNET_REGEX_State * s); |
681 | 718 | ||
719 | static void | ||
720 | automaton_state_traverse_backward (void *cls, struct GNUNET_REGEX_State *s, | ||
721 | GNUNET_REGEX_traverse_action action) | ||
722 | { | ||
723 | |||
724 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Traversing backwards...\n"); | ||
725 | |||
726 | } | ||
727 | |||
682 | /** | 728 | /** |
683 | * Traverses all states that are reachable from state 's'. Expects | 729 | * Traverses all states that are reachable from state 's'. Expects the states to |
684 | * the states to be unmarked (s->marked == GNUNET_NO). Performs | 730 | * be unmarked (s->marked == GNUNET_NO). Performs 'action' on each visited |
685 | * 'action' on each visited state. | 731 | * state. |
686 | * | 732 | * |
687 | * @param cls closure. | 733 | * @param cls closure. |
688 | * @param s start state. | 734 | * @param s start state. |
@@ -707,8 +753,8 @@ automaton_state_traverse (void *cls, struct GNUNET_REGEX_State *s, | |||
707 | } | 753 | } |
708 | 754 | ||
709 | /** | 755 | /** |
710 | * Traverses the given automaton from it's start state, visiting all | 756 | * Traverses the given automaton from it's start state, visiting all reachable |
711 | * reachable states and calling 'action' on each one of them. | 757 | * states and calling 'action' on each one of them. |
712 | * | 758 | * |
713 | * @param cls closure. | 759 | * @param cls closure. |
714 | * @param a automaton. | 760 | * @param a automaton. |
@@ -727,8 +773,8 @@ automaton_traverse (void *cls, struct GNUNET_REGEX_Automaton *a, | |||
727 | } | 773 | } |
728 | 774 | ||
729 | /** | 775 | /** |
730 | * Creates a new DFA state based on a set of NFA states. Needs to be freed | 776 | * Creates a new DFA state based on a set of NFA states. Needs to be freed using |
731 | * using automaton_destroy_state. | 777 | * automaton_destroy_state. |
732 | * | 778 | * |
733 | * @param ctx context | 779 | * @param ctx context |
734 | * @param nfa_states set of NFA states on which the DFA should be based on | 780 | * @param nfa_states set of NFA states on which the DFA should be based on |
@@ -809,7 +855,8 @@ dfa_state_create (struct GNUNET_REGEX_Context *ctx, | |||
809 | } | 855 | } |
810 | } | 856 | } |
811 | 857 | ||
812 | // If the nfa_states contain an accepting state, the new dfa state is also accepting | 858 | // If the nfa_states contain an accepting state, the new dfa state is also |
859 | // accepting | ||
813 | if (cstate->accepting) | 860 | if (cstate->accepting) |
814 | s->accepting = 1; | 861 | s->accepting = 1; |
815 | } | 862 | } |
@@ -820,8 +867,7 @@ dfa_state_create (struct GNUNET_REGEX_Context *ctx, | |||
820 | } | 867 | } |
821 | 868 | ||
822 | /** | 869 | /** |
823 | * Move from the given state 's' to the next state on | 870 | * Move from the given state 's' to the next state on transition 'label' |
824 | * transition 'label' | ||
825 | * | 871 | * |
826 | * @param s starting state | 872 | * @param s starting state |
827 | * @param label edge label to follow | 873 | * @param label edge label to follow |
@@ -853,8 +899,8 @@ dfa_move (struct GNUNET_REGEX_State *s, const char label) | |||
853 | 899 | ||
854 | 900 | ||
855 | /** | 901 | /** |
856 | * Remove all unreachable states from DFA 'a'. Unreachable states | 902 | * Remove all unreachable states from DFA 'a'. Unreachable states are those |
857 | * are those states that are not reachable from the starting state. | 903 | * states that are not reachable from the starting state. |
858 | * | 904 | * |
859 | * @param a DFA automaton | 905 | * @param a DFA automaton |
860 | */ | 906 | */ |
@@ -884,8 +930,8 @@ dfa_remove_unreachable_states (struct GNUNET_REGEX_Automaton *a) | |||
884 | } | 930 | } |
885 | 931 | ||
886 | /** | 932 | /** |
887 | * Remove all dead states from the DFA 'a'. Dead states are those | 933 | * Remove all dead states from the DFA 'a'. Dead states are those states that do |
888 | * states that do not transition to any other state but themselfes. | 934 | * not transition to any other state but themselfes. |
889 | * | 935 | * |
890 | * @param a DFA automaton | 936 | * @param a DFA automaton |
891 | */ | 937 | */ |
@@ -940,8 +986,7 @@ dfa_merge_nondistinguishable_states (struct GNUNET_REGEX_Context *ctx, | |||
940 | int change; | 986 | int change; |
941 | int common_labels; | 987 | int common_labels; |
942 | 988 | ||
943 | for (i = 0, s1 = a->states_head; | 989 | for (i = 0, s1 = a->states_head; i < a->state_count && NULL != s1; |
944 | i < a->state_count && NULL != s1; | ||
945 | i++, s1 = s1->next) | 990 | i++, s1 = s1->next) |
946 | { | 991 | { |
947 | s1->marked = i; | 992 | s1->marked = i; |
@@ -1011,8 +1056,8 @@ dfa_merge_nondistinguishable_states (struct GNUNET_REGEX_Context *ctx, | |||
1011 | } | 1056 | } |
1012 | 1057 | ||
1013 | /** | 1058 | /** |
1014 | * Minimize the given DFA 'a' by removing all unreachable states, | 1059 | * Minimize the given DFA 'a' by removing all unreachable states, removing all |
1015 | * removing all dead states and merging all non distinguishable states | 1060 | * dead states and merging all non distinguishable states |
1016 | * | 1061 | * |
1017 | * @param ctx context | 1062 | * @param ctx context |
1018 | * @param a DFA automaton | 1063 | * @param a DFA automaton |
@@ -1037,7 +1082,8 @@ dfa_minimize (struct GNUNET_REGEX_Context *ctx, | |||
1037 | } | 1082 | } |
1038 | 1083 | ||
1039 | /** | 1084 | /** |
1040 | * Creates a new NFA fragment. Needs to be cleared using automaton_fragment_clear. | 1085 | * Creates a new NFA fragment. Needs to be cleared using |
1086 | * automaton_fragment_clear. | ||
1041 | * | 1087 | * |
1042 | * @param start starting state | 1088 | * @param start starting state |
1043 | * @param end end state | 1089 | * @param end end state |
@@ -1384,8 +1430,8 @@ nfa_add_question_op (struct GNUNET_REGEX_Context *ctx) | |||
1384 | } | 1430 | } |
1385 | 1431 | ||
1386 | /** | 1432 | /** |
1387 | * Pops two NFA fragments (a, b) from the stack and adds a new NFA fragment | 1433 | * Pops two NFA fragments (a, b) from the stack and adds a new NFA fragment that |
1388 | * that alternates between a and b (a|b) | 1434 | * alternates between a and b (a|b) |
1389 | * | 1435 | * |
1390 | * @param ctx context | 1436 | * @param ctx context |
1391 | */ | 1437 | */ |
@@ -1629,6 +1675,59 @@ error: | |||
1629 | } | 1675 | } |
1630 | 1676 | ||
1631 | /** | 1677 | /** |
1678 | * Create DFA states based on given 'nfa' and starting with 'dfa_state'. | ||
1679 | * | ||
1680 | * @param ctx context. | ||
1681 | * @param nfa NFA automaton. | ||
1682 | * @param dfa DFA automaton. | ||
1683 | * @param dfa_state current dfa state, pass epsilon closure of first nfa state | ||
1684 | * for starting. | ||
1685 | */ | ||
1686 | static void | ||
1687 | construct_dfa_states (struct GNUNET_REGEX_Context *ctx, | ||
1688 | struct GNUNET_REGEX_Automaton *nfa, | ||
1689 | struct GNUNET_REGEX_Automaton *dfa, | ||
1690 | struct GNUNET_REGEX_State *dfa_state) | ||
1691 | { | ||
1692 | struct Transition *ctran; | ||
1693 | struct GNUNET_REGEX_State *state_iter; | ||
1694 | struct GNUNET_REGEX_State *new_dfa_state; | ||
1695 | struct GNUNET_REGEX_State *state_contains; | ||
1696 | struct GNUNET_REGEX_StateSet *tmp; | ||
1697 | struct GNUNET_REGEX_StateSet *nfa_set; | ||
1698 | |||
1699 | for (ctran = dfa_state->transitions_head; NULL != ctran; ctran = ctran->next) | ||
1700 | { | ||
1701 | if (0 == ctran->label || NULL != ctran->to_state) | ||
1702 | continue; | ||
1703 | |||
1704 | tmp = nfa_closure_set_create (nfa, dfa_state->nfa_set, ctran->label); | ||
1705 | nfa_set = nfa_closure_set_create (nfa, tmp, 0); | ||
1706 | state_set_clear (tmp); | ||
1707 | new_dfa_state = dfa_state_create (ctx, nfa_set); | ||
1708 | state_contains = NULL; | ||
1709 | for (state_iter = dfa->states_head; NULL != state_iter; | ||
1710 | state_iter = state_iter->next) | ||
1711 | { | ||
1712 | if (0 == state_set_compare (state_iter->nfa_set, new_dfa_state->nfa_set)) | ||
1713 | state_contains = state_iter; | ||
1714 | } | ||
1715 | |||
1716 | if (NULL == state_contains) | ||
1717 | { | ||
1718 | automaton_add_state (dfa, new_dfa_state); | ||
1719 | construct_dfa_states (ctx, nfa, dfa, new_dfa_state); | ||
1720 | ctran->to_state = new_dfa_state; | ||
1721 | } | ||
1722 | else | ||
1723 | { | ||
1724 | ctran->to_state = state_contains; | ||
1725 | automaton_destroy_state (new_dfa_state); | ||
1726 | } | ||
1727 | } | ||
1728 | } | ||
1729 | |||
1730 | /** | ||
1632 | * Construct DFA for the given 'regex' of length 'len' | 1731 | * Construct DFA for the given 'regex' of length 'len' |
1633 | * | 1732 | * |
1634 | * @param regex regular expression string | 1733 | * @param regex regular expression string |
@@ -1642,14 +1741,7 @@ GNUNET_REGEX_construct_dfa (const char *regex, const size_t len) | |||
1642 | struct GNUNET_REGEX_Context ctx; | 1741 | struct GNUNET_REGEX_Context ctx; |
1643 | struct GNUNET_REGEX_Automaton *dfa; | 1742 | struct GNUNET_REGEX_Automaton *dfa; |
1644 | struct GNUNET_REGEX_Automaton *nfa; | 1743 | struct GNUNET_REGEX_Automaton *nfa; |
1645 | struct GNUNET_REGEX_StateSet *tmp; | ||
1646 | struct GNUNET_REGEX_StateSet *nfa_set; | 1744 | struct GNUNET_REGEX_StateSet *nfa_set; |
1647 | struct GNUNET_REGEX_StateSet *dfa_stack; | ||
1648 | struct Transition *ctran; | ||
1649 | struct GNUNET_REGEX_State *dfa_state; | ||
1650 | struct GNUNET_REGEX_State *new_dfa_state; | ||
1651 | struct GNUNET_REGEX_State *state_contains; | ||
1652 | struct GNUNET_REGEX_State *state_iter; | ||
1653 | 1745 | ||
1654 | GNUNET_REGEX_context_init (&ctx); | 1746 | GNUNET_REGEX_context_init (&ctx); |
1655 | 1747 | ||
@@ -1667,53 +1759,12 @@ GNUNET_REGEX_construct_dfa (const char *regex, const size_t len) | |||
1667 | dfa->type = DFA; | 1759 | dfa->type = DFA; |
1668 | 1760 | ||
1669 | // Create DFA start state from epsilon closure | 1761 | // Create DFA start state from epsilon closure |
1670 | dfa_stack = GNUNET_malloc (sizeof (struct GNUNET_REGEX_StateSet)); | ||
1671 | nfa_set = nfa_closure_create (nfa, nfa->start, 0); | 1762 | nfa_set = nfa_closure_create (nfa, nfa->start, 0); |
1672 | dfa->start = dfa_state_create (&ctx, nfa_set); | 1763 | dfa->start = dfa_state_create (&ctx, nfa_set); |
1673 | automaton_add_state (dfa, dfa->start); | 1764 | automaton_add_state (dfa, dfa->start); |
1674 | GNUNET_array_append (dfa_stack->states, dfa_stack->len, dfa->start); | ||
1675 | 1765 | ||
1676 | // Create dfa states by combining nfa states | 1766 | construct_dfa_states (&ctx, nfa, dfa, dfa->start); |
1677 | while (dfa_stack->len > 0) | ||
1678 | { | ||
1679 | dfa_state = dfa_stack->states[dfa_stack->len - 1]; | ||
1680 | GNUNET_array_grow (dfa_stack->states, dfa_stack->len, dfa_stack->len - 1); | ||
1681 | 1767 | ||
1682 | for (ctran = dfa_state->transitions_head; NULL != ctran; | ||
1683 | ctran = ctran->next) | ||
1684 | { | ||
1685 | if (0 != ctran->label && NULL == ctran->to_state) | ||
1686 | { | ||
1687 | tmp = nfa_closure_set_create (nfa, dfa_state->nfa_set, ctran->label); | ||
1688 | nfa_set = nfa_closure_set_create (nfa, tmp, 0); | ||
1689 | state_set_clear (tmp); | ||
1690 | new_dfa_state = dfa_state_create (&ctx, nfa_set); | ||
1691 | state_contains = NULL; | ||
1692 | for (state_iter = dfa->states_head; NULL != state_iter; | ||
1693 | state_iter = state_iter->next) | ||
1694 | { | ||
1695 | if (0 == | ||
1696 | state_set_compare (state_iter->nfa_set, new_dfa_state->nfa_set)) | ||
1697 | state_contains = state_iter; | ||
1698 | } | ||
1699 | |||
1700 | if (NULL == state_contains) | ||
1701 | { | ||
1702 | automaton_add_state (dfa, new_dfa_state); | ||
1703 | GNUNET_array_append (dfa_stack->states, dfa_stack->len, | ||
1704 | new_dfa_state); | ||
1705 | ctran->to_state = new_dfa_state; | ||
1706 | } | ||
1707 | else | ||
1708 | { | ||
1709 | ctran->to_state = state_contains; | ||
1710 | automaton_destroy_state (new_dfa_state); | ||
1711 | } | ||
1712 | } | ||
1713 | } | ||
1714 | } | ||
1715 | |||
1716 | GNUNET_free (dfa_stack); | ||
1717 | GNUNET_REGEX_automaton_destroy (nfa); | 1768 | GNUNET_REGEX_automaton_destroy (nfa); |
1718 | 1769 | ||
1719 | dfa_minimize (&ctx, dfa); | 1770 | dfa_minimize (&ctx, dfa); |
@@ -1723,8 +1774,8 @@ GNUNET_REGEX_construct_dfa (const char *regex, const size_t len) | |||
1723 | } | 1774 | } |
1724 | 1775 | ||
1725 | /** | 1776 | /** |
1726 | * Free the memory allocated by constructing the GNUNET_REGEX_Automaton | 1777 | * Free the memory allocated by constructing the GNUNET_REGEX_Automaton data |
1727 | * data structure. | 1778 | * structure. |
1728 | * | 1779 | * |
1729 | * @param a automaton to be destroyed | 1780 | * @param a automaton to be destroyed |
1730 | */ | 1781 | */ |
@@ -1805,7 +1856,8 @@ GNUNET_REGEX_automaton_save_graph (struct GNUNET_REGEX_Automaton *a, | |||
1805 | 1856 | ||
1806 | if (NULL == s_acc) | 1857 | if (NULL == s_acc) |
1807 | { | 1858 | { |
1808 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not print state %s\n", s->name); | 1859 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not print state %s\n", |
1860 | s->name); | ||
1809 | return; | 1861 | return; |
1810 | } | 1862 | } |
1811 | fwrite (s_acc, strlen (s_acc), 1, p); | 1863 | fwrite (s_acc, strlen (s_acc), 1, p); |
@@ -1838,7 +1890,8 @@ GNUNET_REGEX_automaton_save_graph (struct GNUNET_REGEX_Automaton *a, | |||
1838 | 1890 | ||
1839 | if (NULL == s_tran) | 1891 | if (NULL == s_tran) |
1840 | { | 1892 | { |
1841 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not print state %s\n", s->name); | 1893 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not print state %s\n", |
1894 | s->name); | ||
1842 | return; | 1895 | return; |
1843 | } | 1896 | } |
1844 | 1897 | ||
@@ -1972,8 +2025,8 @@ GNUNET_REGEX_eval (struct GNUNET_REGEX_Automaton *a, const char *string) | |||
1972 | } | 2025 | } |
1973 | 2026 | ||
1974 | /** | 2027 | /** |
1975 | * Get the first key for the given 'input_string'. This hashes | 2028 | * Get the first key for the given 'input_string'. This hashes the first x bits |
1976 | * the first x bits of the 'input_strings'. | 2029 | * of the 'input_strings'. |
1977 | * | 2030 | * |
1978 | * @param input_string string. | 2031 | * @param input_string string. |
1979 | * @param string_len length of the 'input_string'. | 2032 | * @param string_len length of the 'input_string'. |
@@ -2015,36 +2068,6 @@ GNUNET_REGEX_check_proof (const char *proof, const GNUNET_HashCode * key) | |||
2015 | return GNUNET_OK; | 2068 | return GNUNET_OK; |
2016 | } | 2069 | } |
2017 | 2070 | ||
2018 | /** | ||
2019 | * Get all edges leaving state 's'. | ||
2020 | * | ||
2021 | * @param s state. | ||
2022 | * @param edges all edges leaving 's'. | ||
2023 | * | ||
2024 | * @return number of edges. | ||
2025 | */ | ||
2026 | static unsigned int | ||
2027 | state_get_edges (struct GNUNET_REGEX_State *s, struct GNUNET_REGEX_Edge *edges) | ||
2028 | { | ||
2029 | struct Transition *t; | ||
2030 | unsigned int count; | ||
2031 | |||
2032 | if (NULL == s) | ||
2033 | return 0; | ||
2034 | |||
2035 | count = 0; | ||
2036 | |||
2037 | for (t = s->transitions_head; NULL != t; t = t->next) | ||
2038 | { | ||
2039 | if (NULL != t->to_state) | ||
2040 | { | ||
2041 | edges[count].label = &t->label; | ||
2042 | edges[count].destination = t->to_state->hash; | ||
2043 | count++; | ||
2044 | } | ||
2045 | } | ||
2046 | return count; | ||
2047 | } | ||
2048 | 2071 | ||
2049 | /** | 2072 | /** |
2050 | * Iterate over all edges helper function starting from state 's', calling | 2073 | * Iterate over all edges helper function starting from state 's', calling |
diff --git a/src/regex/test_regex_iterate_api.c b/src/regex/test_regex_iterate_api.c index 39959641d..52cd12cb6 100644 --- a/src/regex/test_regex_iterate_api.c +++ b/src/regex/test_regex_iterate_api.c | |||
@@ -27,14 +27,15 @@ | |||
27 | #include "platform.h" | 27 | #include "platform.h" |
28 | #include "gnunet_regex_lib.h" | 28 | #include "gnunet_regex_lib.h" |
29 | 29 | ||
30 | void key_iterator(void *cls, const GNUNET_HashCode *key, const char *proof, | 30 | void |
31 | int accepting, unsigned int num_edges, | 31 | key_iterator (void *cls, const GNUNET_HashCode * key, const char *proof, |
32 | const struct GNUNET_REGEX_Edge *edges) | 32 | int accepting, unsigned int num_edges, |
33 | const struct GNUNET_REGEX_Edge *edges) | ||
33 | { | 34 | { |
34 | int i; | 35 | int i; |
35 | 36 | ||
36 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Iterating...\n"); | 37 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Iterating...\n"); |
37 | for (i=0; i<num_edges; i++) | 38 | for (i = 0; i < num_edges; i++) |
38 | { | 39 | { |
39 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Edge %i: %s\n", i, edges[i].label); | 40 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Edge %i: %s\n", i, edges[i].label); |
40 | } | 41 | } |