diff options
author | Maximilian Szengel <gnunet@maxsz.de> | 2012-08-17 10:03:56 +0000 |
---|---|---|
committer | Maximilian Szengel <gnunet@maxsz.de> | 2012-08-17 10:03:56 +0000 |
commit | 39e159b2613be7460dd47bb5d290ac84dc3a9196 (patch) | |
tree | c2915a29c382399282afa8231fa935c3df1f32d7 /src/regex | |
parent | 2162464f34cff436c90e61bf967be0f0ec74d31e (diff) | |
download | gnunet-39e159b2613be7460dd47bb5d290ac84dc3a9196.tar.gz gnunet-39e159b2613be7460dd47bb5d290ac84dc3a9196.zip |
Added multi-striding capabilities to regex.
Diffstat (limited to 'src/regex')
-rw-r--r-- | src/regex/regex.c | 282 | ||||
-rw-r--r-- | src/regex/regex_graph.c | 11 | ||||
-rw-r--r-- | src/regex/regex_internal.h | 33 |
3 files changed, 263 insertions, 63 deletions
diff --git a/src/regex/regex.c b/src/regex/regex.c index 4fb524b1c..694386e9a 100644 --- a/src/regex/regex.c +++ b/src/regex/regex.c | |||
@@ -245,7 +245,8 @@ state_add_transition (struct GNUNET_REGEX_Context *ctx, | |||
245 | } | 245 | } |
246 | 246 | ||
247 | t = GNUNET_malloc (sizeof (struct GNUNET_REGEX_Transition)); | 247 | t = GNUNET_malloc (sizeof (struct GNUNET_REGEX_Transition)); |
248 | t->id = ctx->transition_id++; | 248 | if (NULL != ctx) |
249 | t->id = ctx->transition_id++; | ||
249 | if (NULL != label) | 250 | if (NULL != label) |
250 | t->label = GNUNET_strdup (label); | 251 | t->label = GNUNET_strdup (label); |
251 | else | 252 | else |
@@ -572,55 +573,202 @@ automaton_add_state (struct GNUNET_REGEX_Automaton *a, | |||
572 | 573 | ||
573 | 574 | ||
574 | /** | 575 | /** |
575 | * Depth-first traversal of all states that are reachable from state 's'. Expects the states to | 576 | * Depth-first traversal (DFS) of all states that are reachable from state |
576 | * be unmarked (s->marked == GNUNET_NO). Performs 'action' on each visited | 577 | * 's'. Performs 'action' on each visited state. |
577 | * state. | ||
578 | * | 578 | * |
579 | * @param s start state. | 579 | * @param s start state. |
580 | * @param marks an array of size a->state_count to remember which state was | ||
581 | * already visited. | ||
580 | * @param count current count of the state. | 582 | * @param count current count of the state. |
581 | * @param action action to be performed on each state. | 583 | * @param action action to be performed on each state. |
582 | * @param action_cls closure for action | 584 | * @param action_cls closure for action. |
583 | */ | 585 | */ |
584 | static void | 586 | static void |
585 | automaton_state_traverse (struct GNUNET_REGEX_State *s, unsigned int *count, | 587 | automaton_state_traverse (struct GNUNET_REGEX_State *s, int *marks, |
588 | unsigned int *count, | ||
586 | GNUNET_REGEX_traverse_action action, void *action_cls) | 589 | GNUNET_REGEX_traverse_action action, void *action_cls) |
587 | { | 590 | { |
588 | struct GNUNET_REGEX_Transition *t; | 591 | struct GNUNET_REGEX_Transition *t; |
589 | 592 | ||
590 | if (GNUNET_NO != s->marked) | 593 | if (GNUNET_YES == marks[s->traversal_id]) |
591 | return; | 594 | return; |
592 | s->marked = GNUNET_YES; | 595 | |
596 | marks[s->traversal_id] = GNUNET_YES; | ||
597 | |||
593 | if (NULL != action) | 598 | if (NULL != action) |
594 | action (action_cls, *count, s); | 599 | action (action_cls, *count, s); |
600 | |||
595 | (*count)++; | 601 | (*count)++; |
602 | |||
596 | for (t = s->transitions_head; NULL != t; t = t->next) | 603 | for (t = s->transitions_head; NULL != t; t = t->next) |
597 | automaton_state_traverse (t->to_state, count, action, action_cls); | 604 | { |
605 | automaton_state_traverse (t->to_state, marks, count, action, action_cls); | ||
606 | } | ||
598 | } | 607 | } |
599 | 608 | ||
600 | 609 | ||
601 | /** | 610 | /** |
602 | * Traverses the given automaton from it's start state, visiting all reachable | 611 | * Traverses the given automaton using depth-first-search (DFS) from it's start |
603 | * states and calling 'action' on each one of them. | 612 | * state, visiting all reachable states and calling 'action' on each one of |
613 | * them. | ||
604 | * | 614 | * |
605 | * @param a automaton. | 615 | * @param a automaton to be traversed. |
616 | * @param start start state, pass a->start or NULL to traverse the whole automaton. | ||
606 | * @param action action to be performed on each state. | 617 | * @param action action to be performed on each state. |
607 | * @param action_cls closure for action | 618 | * @param action_cls closure for action |
608 | */ | 619 | */ |
609 | void | 620 | void |
610 | GNUNET_REGEX_automaton_traverse (struct GNUNET_REGEX_Automaton *a, | 621 | GNUNET_REGEX_automaton_traverse (const struct GNUNET_REGEX_Automaton *a, |
622 | struct GNUNET_REGEX_State *start, | ||
611 | GNUNET_REGEX_traverse_action action, | 623 | GNUNET_REGEX_traverse_action action, |
612 | void *action_cls) | 624 | void *action_cls) |
613 | { | 625 | { |
614 | unsigned int count; | 626 | unsigned int count; |
615 | struct GNUNET_REGEX_State *s; | 627 | struct GNUNET_REGEX_State *s; |
628 | int marks[a->state_count]; | ||
629 | |||
630 | if (NULL == a || 0 == a->state_count) | ||
631 | return; | ||
632 | |||
633 | for (count = 0, s = a->states_head; NULL != s; s = s->next, count++) | ||
634 | { | ||
635 | s->traversal_id = count; | ||
636 | marks[s->traversal_id] = GNUNET_NO; | ||
637 | } | ||
616 | 638 | ||
617 | for (s = a->states_head; NULL != s; s = s->next) | ||
618 | s->marked = GNUNET_NO; | ||
619 | count = 0; | 639 | count = 0; |
620 | automaton_state_traverse (a->start, &count, action, action_cls); | 640 | |
641 | if (NULL == start) | ||
642 | s = a->start; | ||
643 | else | ||
644 | s = start; | ||
645 | |||
646 | automaton_state_traverse (s, marks, &count, action, action_cls); | ||
647 | } | ||
648 | |||
649 | |||
650 | /** | ||
651 | * Context for adding strided transitions to a DFA. | ||
652 | */ | ||
653 | struct GNUNET_REGEX_Strided_Context | ||
654 | { | ||
655 | /** | ||
656 | * Length of the strides. | ||
657 | */ | ||
658 | const unsigned int stride; | ||
659 | |||
660 | /** | ||
661 | * Strided transitions DLL. New strided transitions will be stored in this DLL | ||
662 | * and afterwards added to the DFA. | ||
663 | */ | ||
664 | struct GNUNET_REGEX_Transition *transitions_head; | ||
665 | |||
666 | /** | ||
667 | * Strided transitions DLL. | ||
668 | */ | ||
669 | struct GNUNET_REGEX_Transition *transitions_tail; | ||
670 | }; | ||
671 | |||
672 | |||
673 | /** | ||
674 | * Recursive helper function to add strides to a DFA. | ||
675 | * | ||
676 | * @param cls context, contains stride length and strided transitions DLL. | ||
677 | * @param depth current depth of the depth-first traversal of the graph. | ||
678 | * @param label current label, string that contains all labels on the path from | ||
679 | * 'start' to 's'. | ||
680 | * @param start start state for the depth-first traversal of the graph. | ||
681 | * @param s current state in the depth-first traversal | ||
682 | */ | ||
683 | void | ||
684 | add_multi_strides_to_dfa_helper (void *cls, const unsigned int depth, | ||
685 | char *label, struct GNUNET_REGEX_State *start, | ||
686 | struct GNUNET_REGEX_State *s) | ||
687 | { | ||
688 | struct GNUNET_REGEX_Strided_Context *ctx = cls; | ||
689 | struct GNUNET_REGEX_Transition *t; | ||
690 | char *new_label; | ||
691 | |||
692 | if (depth == ctx->stride) | ||
693 | { | ||
694 | t = GNUNET_malloc (sizeof (struct GNUNET_REGEX_Transition)); | ||
695 | t->label = GNUNET_strdup (label); | ||
696 | t->to_state = s; | ||
697 | t->from_state = start; | ||
698 | GNUNET_CONTAINER_DLL_insert (ctx->transitions_head, ctx->transitions_tail, | ||
699 | t); | ||
700 | } | ||
701 | else | ||
702 | { | ||
703 | for (t = s->transitions_head; NULL != t; t = t->next) | ||
704 | { | ||
705 | /* Do not consider self-loops, because it end's up in too many | ||
706 | * transitions */ | ||
707 | if (t->to_state == t->from_state) | ||
708 | continue; | ||
709 | |||
710 | if (NULL != label) | ||
711 | { | ||
712 | GNUNET_asprintf (&new_label, "%s%s", label, t->label); | ||
713 | } | ||
714 | else | ||
715 | new_label = GNUNET_strdup (t->label); | ||
716 | |||
717 | add_multi_strides_to_dfa_helper (cls, (depth + 1), new_label, start, | ||
718 | t->to_state); | ||
719 | } | ||
720 | } | ||
721 | GNUNET_free_non_null (label); | ||
722 | } | ||
723 | |||
724 | |||
725 | /** | ||
726 | * Function called for each state in the DFA. Starts a traversal of depth set in | ||
727 | * context starting from state 's'. | ||
728 | * | ||
729 | * @param cls context. | ||
730 | * @param count not used. | ||
731 | * @param s current state. | ||
732 | */ | ||
733 | void | ||
734 | add_multi_strides_to_dfa (void *cls, const unsigned int count, | ||
735 | struct GNUNET_REGEX_State *s) | ||
736 | { | ||
737 | add_multi_strides_to_dfa_helper (cls, 0, NULL, s, s); | ||
738 | } | ||
739 | |||
740 | |||
741 | /** | ||
742 | * Adds multi-strided transitions to the given 'dfa'. | ||
743 | * | ||
744 | * @param regex_ctx regex context needed to add transitions to the automaton. | ||
745 | * @param dfa DFA to which the multi strided transitions should be added. | ||
746 | * @param stride_len length of the strides. | ||
747 | */ | ||
748 | void | ||
749 | GNUNET_REGEX_add_multi_strides_to_dfa (struct GNUNET_REGEX_Context *regex_ctx, | ||
750 | struct GNUNET_REGEX_Automaton *dfa, | ||
751 | const unsigned int stride_len) | ||
752 | { | ||
753 | struct GNUNET_REGEX_Strided_Context ctx = { stride_len, NULL, NULL}; | ||
754 | struct GNUNET_REGEX_Transition *t; | ||
755 | struct GNUNET_REGEX_Transition *t_next; | ||
756 | |||
757 | GNUNET_REGEX_automaton_traverse (dfa, dfa->start, &add_multi_strides_to_dfa, | ||
758 | &ctx); | ||
759 | |||
760 | for (t = ctx.transitions_head; NULL != t; t = t_next) | ||
761 | { | ||
762 | t_next = t->next; | ||
763 | state_add_transition (regex_ctx, t->from_state, t->label, t->to_state); | ||
764 | GNUNET_CONTAINER_DLL_remove (ctx.transitions_head, ctx.transitions_tail, t); | ||
765 | GNUNET_free_non_null (t->label); | ||
766 | GNUNET_free (t); | ||
767 | } | ||
621 | } | 768 | } |
622 | 769 | ||
623 | 770 | ||
771 | |||
624 | /** | 772 | /** |
625 | * Check if the given string 'str' needs parentheses around it when | 773 | * Check if the given string 'str' needs parentheses around it when |
626 | * using it to generate a regex. | 774 | * using it to generate a regex. |
@@ -721,8 +869,8 @@ has_epsilon (const char *str) | |||
721 | * | 869 | * |
722 | * @param str string | 870 | * @param str string |
723 | * | 871 | * |
724 | * @return string without preceding epsilon, string 'str' if no preceding epsilon | 872 | * @return string without preceding epsilon, string 'str' if no preceding |
725 | * could be found, NULL if 'str' was NULL | 873 | * epsilon could be found, NULL if 'str' was NULL |
726 | */ | 874 | */ |
727 | static char * | 875 | static char * |
728 | remove_epsilon (const char *str) | 876 | remove_epsilon (const char *str) |
@@ -760,19 +908,20 @@ strkcmp (const char *str1, const char *str2, size_t k) | |||
760 | 908 | ||
761 | 909 | ||
762 | /** | 910 | /** |
763 | * Helper function used as 'action' in 'GNUNET_REGEX_automaton_traverse' function to create | 911 | * Helper function used as 'action' in 'GNUNET_REGEX_automaton_traverse' |
764 | * the depth-first numbering of the states. | 912 | * function to create the depth-first numbering of the states. |
765 | * | 913 | * |
766 | * @param cls states array. | 914 | * @param cls states array. |
767 | * @param count current state counter. | 915 | * @param count current state counter. |
768 | * @param s current state. | 916 | * @param s current state. |
769 | */ | 917 | */ |
770 | static void | 918 | void |
771 | number_states (void *cls, unsigned int count, struct GNUNET_REGEX_State *s) | 919 | number_states (void *cls, const unsigned int count, |
920 | struct GNUNET_REGEX_State *s) | ||
772 | { | 921 | { |
773 | struct GNUNET_REGEX_State **states = cls; | 922 | struct GNUNET_REGEX_State **states = cls; |
774 | 923 | ||
775 | s->proof_id = count; | 924 | s->dfs_id = count; |
776 | if (NULL != states) | 925 | if (NULL != states) |
777 | states[count] = s; | 926 | states[count] = s; |
778 | } | 927 | } |
@@ -1171,9 +1320,18 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a) | |||
1171 | unsigned int j; | 1320 | unsigned int j; |
1172 | unsigned int k; | 1321 | unsigned int k; |
1173 | 1322 | ||
1323 | if (NULL == a) | ||
1324 | { | ||
1325 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
1326 | "Could not create proofs, automaton was NULL\n"); | ||
1327 | return; | ||
1328 | } | ||
1174 | 1329 | ||
1175 | /* create depth-first numbering of the states, initializes 'state' */ | 1330 | /* create depth-first numbering of the states, initializes 'state' */ |
1176 | GNUNET_REGEX_automaton_traverse (a, &number_states, states); | 1331 | GNUNET_REGEX_automaton_traverse (a, a->start, &number_states, states); |
1332 | |||
1333 | for (i = 0; i < n; i++) | ||
1334 | GNUNET_assert (NULL != states[i]); | ||
1177 | 1335 | ||
1178 | /* Compute regular expressions of length "1" between each pair of states */ | 1336 | /* Compute regular expressions of length "1" between each pair of states */ |
1179 | for (i = 0; i < n; i++) | 1337 | for (i = 0; i < n; i++) |
@@ -1185,7 +1343,7 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a) | |||
1185 | } | 1343 | } |
1186 | for (t = states[i]->transitions_head; NULL != t; t = t->next) | 1344 | for (t = states[i]->transitions_head; NULL != t; t = t->next) |
1187 | { | 1345 | { |
1188 | j = t->to_state->proof_id; | 1346 | j = t->to_state->dfs_id; |
1189 | if (NULL == R_last[i][j]) | 1347 | if (NULL == R_last[i][j]) |
1190 | GNUNET_asprintf (&R_last[i][j], "%s", t->label); | 1348 | GNUNET_asprintf (&R_last[i][j], "%s", t->label); |
1191 | else | 1349 | else |
@@ -1213,7 +1371,8 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a) | |||
1213 | GNUNET_free (temp); | 1371 | GNUNET_free (temp); |
1214 | } | 1372 | } |
1215 | 1373 | ||
1216 | /* Compute regular expressions of length "k" between each pair of states per induction */ | 1374 | /* Compute regular expressions of length "k" between each pair of states per |
1375 | * induction */ | ||
1217 | for (k = 0; k < n; k++) | 1376 | for (k = 0; k < n; k++) |
1218 | { | 1377 | { |
1219 | for (i = 0; i < n; i++) | 1378 | for (i = 0; i < n; i++) |
@@ -1246,30 +1405,31 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a) | |||
1246 | // assign proofs and hashes | 1405 | // assign proofs and hashes |
1247 | for (i = 0; i < n; i++) | 1406 | for (i = 0; i < n; i++) |
1248 | { | 1407 | { |
1249 | if (NULL != R_last[a->start->proof_id][i]) | 1408 | if (NULL != R_last[a->start->dfs_id][i]) |
1250 | { | 1409 | { |
1251 | states[i]->proof = GNUNET_strdup (R_last[a->start->proof_id][i]); | 1410 | states[i]->proof = GNUNET_strdup (R_last[a->start->dfs_id][i]); |
1252 | GNUNET_CRYPTO_hash (states[i]->proof, strlen (states[i]->proof), | 1411 | GNUNET_CRYPTO_hash (states[i]->proof, strlen (states[i]->proof), |
1253 | &states[i]->hash); | 1412 | &states[i]->hash); |
1254 | } | 1413 | } |
1255 | } | 1414 | } |
1256 | 1415 | ||
1257 | // complete regex for whole DFA: union of all pairs (start state/accepting state(s)). | 1416 | // complete regex for whole DFA: union of all pairs (start state/accepting |
1417 | // state(s)). | ||
1258 | complete_regex = NULL; | 1418 | complete_regex = NULL; |
1259 | for (i = 0; i < n; i++) | 1419 | for (i = 0; i < n; i++) |
1260 | { | 1420 | { |
1261 | if (states[i]->accepting) | 1421 | if (states[i]->accepting) |
1262 | { | 1422 | { |
1263 | if (NULL == complete_regex && 0 < strlen (R_last[a->start->proof_id][i])) | 1423 | if (NULL == complete_regex && 0 < strlen (R_last[a->start->dfs_id][i])) |
1264 | { | 1424 | { |
1265 | GNUNET_asprintf (&complete_regex, "%s", R_last[a->start->proof_id][i]); | 1425 | GNUNET_asprintf (&complete_regex, "%s", R_last[a->start->dfs_id][i]); |
1266 | } | 1426 | } |
1267 | else if (NULL != R_last[a->start->proof_id][i] && | 1427 | else if (NULL != R_last[a->start->dfs_id][i] && |
1268 | 0 < strlen (R_last[a->start->proof_id][i])) | 1428 | 0 < strlen (R_last[a->start->dfs_id][i])) |
1269 | { | 1429 | { |
1270 | temp = complete_regex; | 1430 | temp = complete_regex; |
1271 | GNUNET_asprintf (&complete_regex, "%s|%s", complete_regex, | 1431 | GNUNET_asprintf (&complete_regex, "%s|%s", complete_regex, |
1272 | R_last[a->start->proof_id][i]); | 1432 | R_last[a->start->dfs_id][i]); |
1273 | GNUNET_free (temp); | 1433 | GNUNET_free (temp); |
1274 | } | 1434 | } |
1275 | } | 1435 | } |
@@ -1308,7 +1468,7 @@ dfa_state_create (struct GNUNET_REGEX_Context *ctx, | |||
1308 | s = GNUNET_malloc (sizeof (struct GNUNET_REGEX_State)); | 1468 | s = GNUNET_malloc (sizeof (struct GNUNET_REGEX_State)); |
1309 | s->id = ctx->state_id++; | 1469 | s->id = ctx->state_id++; |
1310 | s->accepting = 0; | 1470 | s->accepting = 0; |
1311 | s->marked = 0; | 1471 | s->marked = GNUNET_NO; |
1312 | s->name = NULL; | 1472 | s->name = NULL; |
1313 | s->scc_id = 0; | 1473 | s->scc_id = 0; |
1314 | s->index = -1; | 1474 | s->index = -1; |
@@ -1398,6 +1558,20 @@ dfa_move (struct GNUNET_REGEX_State *s, const char *label) | |||
1398 | return new_s; | 1558 | return new_s; |
1399 | } | 1559 | } |
1400 | 1560 | ||
1561 | /** | ||
1562 | * Set the given state 'marked' to GNUNET_YES. Used by the | ||
1563 | * 'dfa_remove_unreachable_states' function to detect unreachable states in the | ||
1564 | * automaton. | ||
1565 | * | ||
1566 | * @param cls closure, not used. | ||
1567 | * @param count count, not used. | ||
1568 | * @param s state where the marked attribute will be set to GNUNET_YES. | ||
1569 | */ | ||
1570 | void | ||
1571 | mark_states (void *cls, const unsigned int count, struct GNUNET_REGEX_State *s) | ||
1572 | { | ||
1573 | s->marked = GNUNET_YES; | ||
1574 | } | ||
1401 | 1575 | ||
1402 | /** | 1576 | /** |
1403 | * Remove all unreachable states from DFA 'a'. Unreachable states are those | 1577 | * Remove all unreachable states from DFA 'a'. Unreachable states are those |
@@ -1416,7 +1590,7 @@ dfa_remove_unreachable_states (struct GNUNET_REGEX_Automaton *a) | |||
1416 | s->marked = GNUNET_NO; | 1590 | s->marked = GNUNET_NO; |
1417 | 1591 | ||
1418 | // 2. traverse dfa from start state and mark all visited states | 1592 | // 2. traverse dfa from start state and mark all visited states |
1419 | GNUNET_REGEX_automaton_traverse (a, NULL, NULL); | 1593 | GNUNET_REGEX_automaton_traverse (a, a->start, &mark_states, NULL); |
1420 | 1594 | ||
1421 | // 3. delete all states that were not visited | 1595 | // 3. delete all states that were not visited |
1422 | for (s = a->states_head; NULL != s; s = s_next) | 1596 | for (s = a->states_head; NULL != s; s = s_next) |
@@ -1477,7 +1651,6 @@ static void | |||
1477 | dfa_merge_nondistinguishable_states (struct GNUNET_REGEX_Context *ctx, | 1651 | dfa_merge_nondistinguishable_states (struct GNUNET_REGEX_Context *ctx, |
1478 | struct GNUNET_REGEX_Automaton *a) | 1652 | struct GNUNET_REGEX_Automaton *a) |
1479 | { | 1653 | { |
1480 | unsigned int i; | ||
1481 | int table[a->state_count][a->state_count]; | 1654 | int table[a->state_count][a->state_count]; |
1482 | struct GNUNET_REGEX_State *s1; | 1655 | struct GNUNET_REGEX_State *s1; |
1483 | struct GNUNET_REGEX_State *s2; | 1656 | struct GNUNET_REGEX_State *s2; |
@@ -1487,6 +1660,7 @@ dfa_merge_nondistinguishable_states (struct GNUNET_REGEX_Context *ctx, | |||
1487 | struct GNUNET_REGEX_State *s2_next; | 1660 | struct GNUNET_REGEX_State *s2_next; |
1488 | int change; | 1661 | int change; |
1489 | unsigned int num_equal_edges; | 1662 | unsigned int num_equal_edges; |
1663 | unsigned int i; | ||
1490 | 1664 | ||
1491 | for (i = 0, s1 = a->states_head; i < a->state_count && NULL != s1; | 1665 | for (i = 0, s1 = a->states_head; i < a->state_count && NULL != s1; |
1492 | i++, s1 = s1->next) | 1666 | i++, s1 = s1->next) |
@@ -1677,7 +1851,7 @@ nfa_state_create (struct GNUNET_REGEX_Context *ctx, int accepting) | |||
1677 | s = GNUNET_malloc (sizeof (struct GNUNET_REGEX_State)); | 1851 | s = GNUNET_malloc (sizeof (struct GNUNET_REGEX_State)); |
1678 | s->id = ctx->state_id++; | 1852 | s->id = ctx->state_id++; |
1679 | s->accepting = accepting; | 1853 | s->accepting = accepting; |
1680 | s->marked = 0; | 1854 | s->marked = GNUNET_NO; |
1681 | s->contained = 0; | 1855 | s->contained = 0; |
1682 | s->index = -1; | 1856 | s->index = -1; |
1683 | s->lowlink = -1; | 1857 | s->lowlink = -1; |
@@ -2188,7 +2362,7 @@ GNUNET_REGEX_construct_nfa (const char *regex, const size_t len) | |||
2188 | nfa->regex = GNUNET_strdup (regex); | 2362 | nfa->regex = GNUNET_strdup (regex); |
2189 | 2363 | ||
2190 | /* create depth-first numbering of the states for pretty printing */ | 2364 | /* create depth-first numbering of the states for pretty printing */ |
2191 | GNUNET_REGEX_automaton_traverse (nfa, &number_states, NULL); | 2365 | GNUNET_REGEX_automaton_traverse (nfa, NULL, &number_states, NULL); |
2192 | 2366 | ||
2193 | return nfa; | 2367 | return nfa; |
2194 | 2368 | ||
@@ -2293,6 +2467,9 @@ GNUNET_REGEX_construct_dfa (const char *regex, const size_t len) | |||
2293 | 2467 | ||
2294 | dfa = GNUNET_malloc (sizeof (struct GNUNET_REGEX_Automaton)); | 2468 | dfa = GNUNET_malloc (sizeof (struct GNUNET_REGEX_Automaton)); |
2295 | dfa->type = DFA; | 2469 | dfa->type = DFA; |
2470 | dfa->state_count = 0; | ||
2471 | dfa->states_head = NULL; | ||
2472 | dfa->states_tail = NULL; | ||
2296 | dfa->regex = GNUNET_strdup (regex); | 2473 | dfa->regex = GNUNET_strdup (regex); |
2297 | 2474 | ||
2298 | // Create DFA start state from epsilon closure | 2475 | // Create DFA start state from epsilon closure |
@@ -2310,6 +2487,9 @@ GNUNET_REGEX_construct_dfa (const char *regex, const size_t len) | |||
2310 | // Create proofs for all states | 2487 | // Create proofs for all states |
2311 | automaton_create_proofs (dfa); | 2488 | automaton_create_proofs (dfa); |
2312 | 2489 | ||
2490 | // Add strides to DFA | ||
2491 | // GNUNET_REGEX_add_multi_strides_to_dfa (&ctx, dfa, 2); | ||
2492 | |||
2313 | return dfa; | 2493 | return dfa; |
2314 | } | 2494 | } |
2315 | 2495 | ||
@@ -2542,6 +2722,12 @@ GNUNET_REGEX_check_proof (const char *proof, const struct GNUNET_HashCode *key) | |||
2542 | { | 2722 | { |
2543 | struct GNUNET_HashCode key_check; | 2723 | struct GNUNET_HashCode key_check; |
2544 | 2724 | ||
2725 | if (NULL == proof || NULL == key) | ||
2726 | { | ||
2727 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Proof check failed, was NULL.\n"); | ||
2728 | return GNUNET_NO; | ||
2729 | } | ||
2730 | |||
2545 | GNUNET_CRYPTO_hash (proof, strlen (proof), &key_check); | 2731 | GNUNET_CRYPTO_hash (proof, strlen (proof), &key_check); |
2546 | return (0 == | 2732 | return (0 == |
2547 | GNUNET_CRYPTO_hash_cmp (key, &key_check)) ? GNUNET_OK : GNUNET_NO; | 2733 | GNUNET_CRYPTO_hash_cmp (key, &key_check)) ? GNUNET_OK : GNUNET_NO; |
@@ -2607,10 +2793,14 @@ iterate_initial_edge (const unsigned int min_len, const unsigned int max_len, | |||
2607 | /** | 2793 | /** |
2608 | * Iterate over all initial edges that aren't actually part of the automaton. | 2794 | * Iterate over all initial edges that aren't actually part of the automaton. |
2609 | * This is needed to find the initial states returned by | 2795 | * This is needed to find the initial states returned by |
2610 | * GNUNET_REGEX_get_first_key. Iteration will start at the first branch state (a | 2796 | * GNUNET_REGEX_get_first_key. Iteration will start at the first state that has |
2611 | * state that has more than one outgoing edge, can be the first state), because | 2797 | * more than one outgoing edge, i.e. the state that branches the graph. |
2612 | * all previous states will have the same proof and be iterated over in | 2798 | * For example consider the following graph: |
2613 | * iterate_all_edges. | 2799 | * a -> b -> c -> d -> ... |
2800 | * \-> e -> ... | ||
2801 | * | ||
2802 | * This function will not iterate over the edges leading to "c", because these | ||
2803 | * will be covered by the iterate_edges function. | ||
2614 | * | 2804 | * |
2615 | * @param a the automaton for which the initial states should be computed. | 2805 | * @param a the automaton for which the initial states should be computed. |
2616 | * @param initial_len length of the initial state string. | 2806 | * @param initial_len length of the initial state string. |
@@ -2649,7 +2839,7 @@ iterate_initial_edges (struct GNUNET_REGEX_Automaton *a, | |||
2649 | GNUNET_asprintf (&consumed_string, "%s", s->transitions_head->label); | 2839 | GNUNET_asprintf (&consumed_string, "%s", s->transitions_head->label); |
2650 | 2840 | ||
2651 | s = s->transitions_head->to_state; | 2841 | s = s->transitions_head->to_state; |
2652 | cur_len++; | 2842 | cur_len += strlen (s->transitions_head->label); |
2653 | } | 2843 | } |
2654 | while (cur_len < initial_len && 1 == s->transition_count); | 2844 | while (cur_len < initial_len && 1 == s->transition_count); |
2655 | } | 2845 | } |
@@ -2663,7 +2853,7 @@ iterate_initial_edges (struct GNUNET_REGEX_Automaton *a, | |||
2663 | 2853 | ||
2664 | /** | 2854 | /** |
2665 | * Iterate over all edges helper function starting from state 's', calling | 2855 | * Iterate over all edges helper function starting from state 's', calling |
2666 | * iterator on for each edge. | 2856 | * iterator function for each edge. |
2667 | * | 2857 | * |
2668 | * @param s state. | 2858 | * @param s state. |
2669 | * @param iterator iterator function called for each edge. | 2859 | * @param iterator iterator function called for each edge. |
@@ -2683,7 +2873,7 @@ iterate_edge (struct GNUNET_REGEX_State *s, GNUNET_REGEX_KeyIterator iterator, | |||
2683 | 2873 | ||
2684 | num_edges = state_get_edges (s, edges); | 2874 | num_edges = state_get_edges (s, edges); |
2685 | 2875 | ||
2686 | if (0 < strlen (s->proof) || s->accepting) | 2876 | if ((NULL != s->proof && 0 < strlen (s->proof)) || s->accepting) |
2687 | iterator (iterator_cls, &s->hash, s->proof, s->accepting, num_edges, | 2877 | iterator (iterator_cls, &s->hash, s->proof, s->accepting, num_edges, |
2688 | edges); | 2878 | edges); |
2689 | 2879 | ||
diff --git a/src/regex/regex_graph.c b/src/regex/regex_graph.c index 06546e8f7..9223200c8 100644 --- a/src/regex/regex_graph.c +++ b/src/regex/regex_graph.c | |||
@@ -167,10 +167,10 @@ GNUNET_REGEX_automaton_save_graph_step (void *cls, unsigned int count, | |||
167 | char *to_name; | 167 | char *to_name; |
168 | 168 | ||
169 | if (GNUNET_YES == ctx->verbose) | 169 | if (GNUNET_YES == ctx->verbose) |
170 | GNUNET_asprintf (&name, "%i (%s) (%s) (%s)", s->proof_id, s->name, s->proof, | 170 | GNUNET_asprintf (&name, "%i (%s) (%s) (%s)", s->dfs_id, s->name, s->proof, |
171 | GNUNET_h2s (&s->hash)); | 171 | GNUNET_h2s (&s->hash)); |
172 | else | 172 | else |
173 | GNUNET_asprintf (&name, "%i", s->proof_id); | 173 | GNUNET_asprintf (&name, "%i", s->dfs_id); |
174 | 174 | ||
175 | if (s->accepting) | 175 | if (s->accepting) |
176 | { | 176 | { |
@@ -218,12 +218,12 @@ GNUNET_REGEX_automaton_save_graph_step (void *cls, unsigned int count, | |||
218 | 218 | ||
219 | if (GNUNET_YES == ctx->verbose) | 219 | if (GNUNET_YES == ctx->verbose) |
220 | { | 220 | { |
221 | GNUNET_asprintf (&to_name, "%i (%s) (%s) (%s)", ctran->to_state->proof_id, | 221 | GNUNET_asprintf (&to_name, "%i (%s) (%s) (%s)", ctran->to_state->dfs_id, |
222 | ctran->to_state->name, ctran->to_state->proof, | 222 | ctran->to_state->name, ctran->to_state->proof, |
223 | GNUNET_h2s (&ctran->to_state->hash)); | 223 | GNUNET_h2s (&ctran->to_state->hash)); |
224 | } | 224 | } |
225 | else | 225 | else |
226 | GNUNET_asprintf (&to_name, "%i", ctran->to_state->proof_id); | 226 | GNUNET_asprintf (&to_name, "%i", ctran->to_state->dfs_id); |
227 | 227 | ||
228 | if (NULL == ctran->label) | 228 | if (NULL == ctran->label) |
229 | { | 229 | { |
@@ -320,7 +320,8 @@ GNUNET_REGEX_automaton_save_graph (struct GNUNET_REGEX_Automaton *a, | |||
320 | start = "digraph G {\nrankdir=LR\n"; | 320 | start = "digraph G {\nrankdir=LR\n"; |
321 | fwrite (start, strlen (start), 1, ctx.filep); | 321 | fwrite (start, strlen (start), 1, ctx.filep); |
322 | 322 | ||
323 | GNUNET_REGEX_automaton_traverse (a, &GNUNET_REGEX_automaton_save_graph_step, | 323 | GNUNET_REGEX_automaton_traverse (a, a->start, |
324 | &GNUNET_REGEX_automaton_save_graph_step, | ||
324 | &ctx); | 325 | &ctx); |
325 | 326 | ||
326 | end = "\n}\n"; | 327 | end = "\n}\n"; |
diff --git a/src/regex/regex_internal.h b/src/regex/regex_internal.h index 007870832..f96d51fb0 100644 --- a/src/regex/regex_internal.h +++ b/src/regex/regex_internal.h | |||
@@ -77,11 +77,6 @@ struct GNUNET_REGEX_Transition | |||
77 | * State from which this transition origins. | 77 | * State from which this transition origins. |
78 | */ | 78 | */ |
79 | struct GNUNET_REGEX_State *from_state; | 79 | struct GNUNET_REGEX_State *from_state; |
80 | |||
81 | /** | ||
82 | * Mark this transition. For example when reversing the automaton. | ||
83 | */ | ||
84 | int mark; | ||
85 | }; | 80 | }; |
86 | 81 | ||
87 | 82 | ||
@@ -106,6 +101,12 @@ struct GNUNET_REGEX_State | |||
106 | unsigned int id; | 101 | unsigned int id; |
107 | 102 | ||
108 | /** | 103 | /** |
104 | * Unique state id that is used for traversing the automaton. It is guaranteed | ||
105 | * to be > 0 and < state_count. | ||
106 | */ | ||
107 | unsigned int traversal_id; | ||
108 | |||
109 | /** | ||
109 | * If this is an accepting state or not. | 110 | * If this is an accepting state or not. |
110 | */ | 111 | */ |
111 | int accepting; | 112 | int accepting; |
@@ -152,9 +153,12 @@ struct GNUNET_REGEX_State | |||
152 | struct GNUNET_HashCode hash; | 153 | struct GNUNET_HashCode hash; |
153 | 154 | ||
154 | /** | 155 | /** |
155 | * State ID for proof creation. | 156 | * Linear state ID accquired by depth-first-search. This ID should be used for |
157 | * storing information about the state in an array, because the 'id' of the | ||
158 | * state is not guaranteed to be linear. The 'dfs_id' is guaranteed to be > 0 | ||
159 | * and < 'state_count'. | ||
156 | */ | 160 | */ |
157 | unsigned int proof_id; | 161 | unsigned int dfs_id; |
158 | 162 | ||
159 | /** | 163 | /** |
160 | * Proof for this state. | 164 | * Proof for this state. |
@@ -259,20 +263,24 @@ struct GNUNET_REGEX_Automaton | |||
259 | * @param count current count of the state, from 0 to a->state_count -1. | 263 | * @param count current count of the state, from 0 to a->state_count -1. |
260 | * @param s state. | 264 | * @param s state. |
261 | */ | 265 | */ |
262 | typedef void (*GNUNET_REGEX_traverse_action) (void *cls, unsigned int count, | 266 | typedef void (*GNUNET_REGEX_traverse_action) (void *cls, |
267 | const unsigned int count, | ||
263 | struct GNUNET_REGEX_State * s); | 268 | struct GNUNET_REGEX_State * s); |
264 | 269 | ||
265 | 270 | ||
266 | /** | 271 | /** |
267 | * Traverses the given automaton from it's start state, visiting all reachable | 272 | * Traverses the given automaton using depth-first-search (DFS) from it's start |
268 | * states and calling 'action' on each one of them. | 273 | * state, visiting all reachable states and calling 'action' on each one of |
274 | * them. | ||
269 | * | 275 | * |
270 | * @param a automaton. | 276 | * @param a automaton to be traversed. |
277 | * @param start start state, pass a->start or NULL to traverse the whole automaton. | ||
271 | * @param action action to be performed on each state. | 278 | * @param action action to be performed on each state. |
272 | * @param action_cls closure for action | 279 | * @param action_cls closure for action |
273 | */ | 280 | */ |
274 | void | 281 | void |
275 | GNUNET_REGEX_automaton_traverse (struct GNUNET_REGEX_Automaton *a, | 282 | GNUNET_REGEX_automaton_traverse (const struct GNUNET_REGEX_Automaton *a, |
283 | struct GNUNET_REGEX_State *start, | ||
276 | GNUNET_REGEX_traverse_action action, | 284 | GNUNET_REGEX_traverse_action action, |
277 | void *action_cls); | 285 | void *action_cls); |
278 | 286 | ||
@@ -321,6 +329,7 @@ GNUNET_REGEX_generate_random_regex (size_t rx_length, char *matching_str); | |||
321 | char * | 329 | char * |
322 | GNUNET_REGEX_generate_random_string (size_t max_len); | 330 | GNUNET_REGEX_generate_random_string (size_t max_len); |
323 | 331 | ||
332 | |||
324 | #if 0 /* keep Emacsens' auto-indent happy */ | 333 | #if 0 /* keep Emacsens' auto-indent happy */ |
325 | { | 334 | { |
326 | #endif | 335 | #endif |