aboutsummaryrefslogtreecommitdiff
path: root/src/regex
diff options
context:
space:
mode:
authorMaximilian Szengel <gnunet@maxsz.de>2012-08-17 10:03:56 +0000
committerMaximilian Szengel <gnunet@maxsz.de>2012-08-17 10:03:56 +0000
commit39e159b2613be7460dd47bb5d290ac84dc3a9196 (patch)
treec2915a29c382399282afa8231fa935c3df1f32d7 /src/regex
parent2162464f34cff436c90e61bf967be0f0ec74d31e (diff)
downloadgnunet-39e159b2613be7460dd47bb5d290ac84dc3a9196.tar.gz
gnunet-39e159b2613be7460dd47bb5d290ac84dc3a9196.zip
Added multi-striding capabilities to regex.
Diffstat (limited to 'src/regex')
-rw-r--r--src/regex/regex.c282
-rw-r--r--src/regex/regex_graph.c11
-rw-r--r--src/regex/regex_internal.h33
3 files changed, 263 insertions, 63 deletions
diff --git a/src/regex/regex.c b/src/regex/regex.c
index 4fb524b1c..694386e9a 100644
--- a/src/regex/regex.c
+++ b/src/regex/regex.c
@@ -245,7 +245,8 @@ state_add_transition (struct GNUNET_REGEX_Context *ctx,
245 } 245 }
246 246
247 t = GNUNET_malloc (sizeof (struct GNUNET_REGEX_Transition)); 247 t = GNUNET_malloc (sizeof (struct GNUNET_REGEX_Transition));
248 t->id = ctx->transition_id++; 248 if (NULL != ctx)
249 t->id = ctx->transition_id++;
249 if (NULL != label) 250 if (NULL != label)
250 t->label = GNUNET_strdup (label); 251 t->label = GNUNET_strdup (label);
251 else 252 else
@@ -572,55 +573,202 @@ automaton_add_state (struct GNUNET_REGEX_Automaton *a,
572 573
573 574
574/** 575/**
575 * Depth-first traversal of all states that are reachable from state 's'. Expects the states to 576 * Depth-first traversal (DFS) of all states that are reachable from state
576 * be unmarked (s->marked == GNUNET_NO). Performs 'action' on each visited 577 * 's'. Performs 'action' on each visited state.
577 * state.
578 * 578 *
579 * @param s start state. 579 * @param s start state.
580 * @param marks an array of size a->state_count to remember which state was
581 * already visited.
580 * @param count current count of the state. 582 * @param count current count of the state.
581 * @param action action to be performed on each state. 583 * @param action action to be performed on each state.
582 * @param action_cls closure for action 584 * @param action_cls closure for action.
583 */ 585 */
584static void 586static void
585automaton_state_traverse (struct GNUNET_REGEX_State *s, unsigned int *count, 587automaton_state_traverse (struct GNUNET_REGEX_State *s, int *marks,
588 unsigned int *count,
586 GNUNET_REGEX_traverse_action action, void *action_cls) 589 GNUNET_REGEX_traverse_action action, void *action_cls)
587{ 590{
588 struct GNUNET_REGEX_Transition *t; 591 struct GNUNET_REGEX_Transition *t;
589 592
590 if (GNUNET_NO != s->marked) 593 if (GNUNET_YES == marks[s->traversal_id])
591 return; 594 return;
592 s->marked = GNUNET_YES; 595
596 marks[s->traversal_id] = GNUNET_YES;
597
593 if (NULL != action) 598 if (NULL != action)
594 action (action_cls, *count, s); 599 action (action_cls, *count, s);
600
595 (*count)++; 601 (*count)++;
602
596 for (t = s->transitions_head; NULL != t; t = t->next) 603 for (t = s->transitions_head; NULL != t; t = t->next)
597 automaton_state_traverse (t->to_state, count, action, action_cls); 604 {
605 automaton_state_traverse (t->to_state, marks, count, action, action_cls);
606 }
598} 607}
599 608
600 609
601/** 610/**
602 * Traverses the given automaton from it's start state, visiting all reachable 611 * Traverses the given automaton using depth-first-search (DFS) from it's start
603 * states and calling 'action' on each one of them. 612 * state, visiting all reachable states and calling 'action' on each one of
613 * them.
604 * 614 *
605 * @param a automaton. 615 * @param a automaton to be traversed.
616 * @param start start state, pass a->start or NULL to traverse the whole automaton.
606 * @param action action to be performed on each state. 617 * @param action action to be performed on each state.
607 * @param action_cls closure for action 618 * @param action_cls closure for action
608 */ 619 */
609void 620void
610GNUNET_REGEX_automaton_traverse (struct GNUNET_REGEX_Automaton *a, 621GNUNET_REGEX_automaton_traverse (const struct GNUNET_REGEX_Automaton *a,
622 struct GNUNET_REGEX_State *start,
611 GNUNET_REGEX_traverse_action action, 623 GNUNET_REGEX_traverse_action action,
612 void *action_cls) 624 void *action_cls)
613{ 625{
614 unsigned int count; 626 unsigned int count;
615 struct GNUNET_REGEX_State *s; 627 struct GNUNET_REGEX_State *s;
628 int marks[a->state_count];
629
630 if (NULL == a || 0 == a->state_count)
631 return;
632
633 for (count = 0, s = a->states_head; NULL != s; s = s->next, count++)
634 {
635 s->traversal_id = count;
636 marks[s->traversal_id] = GNUNET_NO;
637 }
616 638
617 for (s = a->states_head; NULL != s; s = s->next)
618 s->marked = GNUNET_NO;
619 count = 0; 639 count = 0;
620 automaton_state_traverse (a->start, &count, action, action_cls); 640
641 if (NULL == start)
642 s = a->start;
643 else
644 s = start;
645
646 automaton_state_traverse (s, marks, &count, action, action_cls);
647}
648
649
650/**
651 * Context for adding strided transitions to a DFA.
652 */
653struct GNUNET_REGEX_Strided_Context
654{
655 /**
656 * Length of the strides.
657 */
658 const unsigned int stride;
659
660 /**
661 * Strided transitions DLL. New strided transitions will be stored in this DLL
662 * and afterwards added to the DFA.
663 */
664 struct GNUNET_REGEX_Transition *transitions_head;
665
666 /**
667 * Strided transitions DLL.
668 */
669 struct GNUNET_REGEX_Transition *transitions_tail;
670};
671
672
673/**
674 * Recursive helper function to add strides to a DFA.
675 *
676 * @param cls context, contains stride length and strided transitions DLL.
677 * @param depth current depth of the depth-first traversal of the graph.
678 * @param label current label, string that contains all labels on the path from
679 * 'start' to 's'.
680 * @param start start state for the depth-first traversal of the graph.
681 * @param s current state in the depth-first traversal
682 */
683void
684add_multi_strides_to_dfa_helper (void *cls, const unsigned int depth,
685 char *label, struct GNUNET_REGEX_State *start,
686 struct GNUNET_REGEX_State *s)
687{
688 struct GNUNET_REGEX_Strided_Context *ctx = cls;
689 struct GNUNET_REGEX_Transition *t;
690 char *new_label;
691
692 if (depth == ctx->stride)
693 {
694 t = GNUNET_malloc (sizeof (struct GNUNET_REGEX_Transition));
695 t->label = GNUNET_strdup (label);
696 t->to_state = s;
697 t->from_state = start;
698 GNUNET_CONTAINER_DLL_insert (ctx->transitions_head, ctx->transitions_tail,
699 t);
700 }
701 else
702 {
703 for (t = s->transitions_head; NULL != t; t = t->next)
704 {
705 /* Do not consider self-loops, because it end's up in too many
706 * transitions */
707 if (t->to_state == t->from_state)
708 continue;
709
710 if (NULL != label)
711 {
712 GNUNET_asprintf (&new_label, "%s%s", label, t->label);
713 }
714 else
715 new_label = GNUNET_strdup (t->label);
716
717 add_multi_strides_to_dfa_helper (cls, (depth + 1), new_label, start,
718 t->to_state);
719 }
720 }
721 GNUNET_free_non_null (label);
722}
723
724
725/**
726 * Function called for each state in the DFA. Starts a traversal of depth set in
727 * context starting from state 's'.
728 *
729 * @param cls context.
730 * @param count not used.
731 * @param s current state.
732 */
733void
734add_multi_strides_to_dfa (void *cls, const unsigned int count,
735 struct GNUNET_REGEX_State *s)
736{
737 add_multi_strides_to_dfa_helper (cls, 0, NULL, s, s);
738}
739
740
741/**
742 * Adds multi-strided transitions to the given 'dfa'.
743 *
744 * @param regex_ctx regex context needed to add transitions to the automaton.
745 * @param dfa DFA to which the multi strided transitions should be added.
746 * @param stride_len length of the strides.
747 */
748void
749GNUNET_REGEX_add_multi_strides_to_dfa (struct GNUNET_REGEX_Context *regex_ctx,
750 struct GNUNET_REGEX_Automaton *dfa,
751 const unsigned int stride_len)
752{
753 struct GNUNET_REGEX_Strided_Context ctx = { stride_len, NULL, NULL};
754 struct GNUNET_REGEX_Transition *t;
755 struct GNUNET_REGEX_Transition *t_next;
756
757 GNUNET_REGEX_automaton_traverse (dfa, dfa->start, &add_multi_strides_to_dfa,
758 &ctx);
759
760 for (t = ctx.transitions_head; NULL != t; t = t_next)
761 {
762 t_next = t->next;
763 state_add_transition (regex_ctx, t->from_state, t->label, t->to_state);
764 GNUNET_CONTAINER_DLL_remove (ctx.transitions_head, ctx.transitions_tail, t);
765 GNUNET_free_non_null (t->label);
766 GNUNET_free (t);
767 }
621} 768}
622 769
623 770
771
624/** 772/**
625 * Check if the given string 'str' needs parentheses around it when 773 * Check if the given string 'str' needs parentheses around it when
626 * using it to generate a regex. 774 * using it to generate a regex.
@@ -721,8 +869,8 @@ has_epsilon (const char *str)
721 * 869 *
722 * @param str string 870 * @param str string
723 * 871 *
724 * @return string without preceding epsilon, string 'str' if no preceding epsilon 872 * @return string without preceding epsilon, string 'str' if no preceding
725 * could be found, NULL if 'str' was NULL 873 * epsilon could be found, NULL if 'str' was NULL
726 */ 874 */
727static char * 875static char *
728remove_epsilon (const char *str) 876remove_epsilon (const char *str)
@@ -760,19 +908,20 @@ strkcmp (const char *str1, const char *str2, size_t k)
760 908
761 909
762/** 910/**
763 * Helper function used as 'action' in 'GNUNET_REGEX_automaton_traverse' function to create 911 * Helper function used as 'action' in 'GNUNET_REGEX_automaton_traverse'
764 * the depth-first numbering of the states. 912 * function to create the depth-first numbering of the states.
765 * 913 *
766 * @param cls states array. 914 * @param cls states array.
767 * @param count current state counter. 915 * @param count current state counter.
768 * @param s current state. 916 * @param s current state.
769 */ 917 */
770static void 918void
771number_states (void *cls, unsigned int count, struct GNUNET_REGEX_State *s) 919number_states (void *cls, const unsigned int count,
920 struct GNUNET_REGEX_State *s)
772{ 921{
773 struct GNUNET_REGEX_State **states = cls; 922 struct GNUNET_REGEX_State **states = cls;
774 923
775 s->proof_id = count; 924 s->dfs_id = count;
776 if (NULL != states) 925 if (NULL != states)
777 states[count] = s; 926 states[count] = s;
778} 927}
@@ -1171,9 +1320,18 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a)
1171 unsigned int j; 1320 unsigned int j;
1172 unsigned int k; 1321 unsigned int k;
1173 1322
1323 if (NULL == a)
1324 {
1325 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
1326 "Could not create proofs, automaton was NULL\n");
1327 return;
1328 }
1174 1329
1175 /* create depth-first numbering of the states, initializes 'state' */ 1330 /* create depth-first numbering of the states, initializes 'state' */
1176 GNUNET_REGEX_automaton_traverse (a, &number_states, states); 1331 GNUNET_REGEX_automaton_traverse (a, a->start, &number_states, states);
1332
1333 for (i = 0; i < n; i++)
1334 GNUNET_assert (NULL != states[i]);
1177 1335
1178 /* Compute regular expressions of length "1" between each pair of states */ 1336 /* Compute regular expressions of length "1" between each pair of states */
1179 for (i = 0; i < n; i++) 1337 for (i = 0; i < n; i++)
@@ -1185,7 +1343,7 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a)
1185 } 1343 }
1186 for (t = states[i]->transitions_head; NULL != t; t = t->next) 1344 for (t = states[i]->transitions_head; NULL != t; t = t->next)
1187 { 1345 {
1188 j = t->to_state->proof_id; 1346 j = t->to_state->dfs_id;
1189 if (NULL == R_last[i][j]) 1347 if (NULL == R_last[i][j])
1190 GNUNET_asprintf (&R_last[i][j], "%s", t->label); 1348 GNUNET_asprintf (&R_last[i][j], "%s", t->label);
1191 else 1349 else
@@ -1213,7 +1371,8 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a)
1213 GNUNET_free (temp); 1371 GNUNET_free (temp);
1214 } 1372 }
1215 1373
1216 /* Compute regular expressions of length "k" between each pair of states per induction */ 1374 /* Compute regular expressions of length "k" between each pair of states per
1375 * induction */
1217 for (k = 0; k < n; k++) 1376 for (k = 0; k < n; k++)
1218 { 1377 {
1219 for (i = 0; i < n; i++) 1378 for (i = 0; i < n; i++)
@@ -1246,30 +1405,31 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a)
1246 // assign proofs and hashes 1405 // assign proofs and hashes
1247 for (i = 0; i < n; i++) 1406 for (i = 0; i < n; i++)
1248 { 1407 {
1249 if (NULL != R_last[a->start->proof_id][i]) 1408 if (NULL != R_last[a->start->dfs_id][i])
1250 { 1409 {
1251 states[i]->proof = GNUNET_strdup (R_last[a->start->proof_id][i]); 1410 states[i]->proof = GNUNET_strdup (R_last[a->start->dfs_id][i]);
1252 GNUNET_CRYPTO_hash (states[i]->proof, strlen (states[i]->proof), 1411 GNUNET_CRYPTO_hash (states[i]->proof, strlen (states[i]->proof),
1253 &states[i]->hash); 1412 &states[i]->hash);
1254 } 1413 }
1255 } 1414 }
1256 1415
1257 // complete regex for whole DFA: union of all pairs (start state/accepting state(s)). 1416 // complete regex for whole DFA: union of all pairs (start state/accepting
1417 // state(s)).
1258 complete_regex = NULL; 1418 complete_regex = NULL;
1259 for (i = 0; i < n; i++) 1419 for (i = 0; i < n; i++)
1260 { 1420 {
1261 if (states[i]->accepting) 1421 if (states[i]->accepting)
1262 { 1422 {
1263 if (NULL == complete_regex && 0 < strlen (R_last[a->start->proof_id][i])) 1423 if (NULL == complete_regex && 0 < strlen (R_last[a->start->dfs_id][i]))
1264 { 1424 {
1265 GNUNET_asprintf (&complete_regex, "%s", R_last[a->start->proof_id][i]); 1425 GNUNET_asprintf (&complete_regex, "%s", R_last[a->start->dfs_id][i]);
1266 } 1426 }
1267 else if (NULL != R_last[a->start->proof_id][i] && 1427 else if (NULL != R_last[a->start->dfs_id][i] &&
1268 0 < strlen (R_last[a->start->proof_id][i])) 1428 0 < strlen (R_last[a->start->dfs_id][i]))
1269 { 1429 {
1270 temp = complete_regex; 1430 temp = complete_regex;
1271 GNUNET_asprintf (&complete_regex, "%s|%s", complete_regex, 1431 GNUNET_asprintf (&complete_regex, "%s|%s", complete_regex,
1272 R_last[a->start->proof_id][i]); 1432 R_last[a->start->dfs_id][i]);
1273 GNUNET_free (temp); 1433 GNUNET_free (temp);
1274 } 1434 }
1275 } 1435 }
@@ -1308,7 +1468,7 @@ dfa_state_create (struct GNUNET_REGEX_Context *ctx,
1308 s = GNUNET_malloc (sizeof (struct GNUNET_REGEX_State)); 1468 s = GNUNET_malloc (sizeof (struct GNUNET_REGEX_State));
1309 s->id = ctx->state_id++; 1469 s->id = ctx->state_id++;
1310 s->accepting = 0; 1470 s->accepting = 0;
1311 s->marked = 0; 1471 s->marked = GNUNET_NO;
1312 s->name = NULL; 1472 s->name = NULL;
1313 s->scc_id = 0; 1473 s->scc_id = 0;
1314 s->index = -1; 1474 s->index = -1;
@@ -1398,6 +1558,20 @@ dfa_move (struct GNUNET_REGEX_State *s, const char *label)
1398 return new_s; 1558 return new_s;
1399} 1559}
1400 1560
1561/**
1562 * Set the given state 'marked' to GNUNET_YES. Used by the
1563 * 'dfa_remove_unreachable_states' function to detect unreachable states in the
1564 * automaton.
1565 *
1566 * @param cls closure, not used.
1567 * @param count count, not used.
1568 * @param s state where the marked attribute will be set to GNUNET_YES.
1569 */
1570void
1571mark_states (void *cls, const unsigned int count, struct GNUNET_REGEX_State *s)
1572{
1573 s->marked = GNUNET_YES;
1574}
1401 1575
1402/** 1576/**
1403 * Remove all unreachable states from DFA 'a'. Unreachable states are those 1577 * Remove all unreachable states from DFA 'a'. Unreachable states are those
@@ -1416,7 +1590,7 @@ dfa_remove_unreachable_states (struct GNUNET_REGEX_Automaton *a)
1416 s->marked = GNUNET_NO; 1590 s->marked = GNUNET_NO;
1417 1591
1418 // 2. traverse dfa from start state and mark all visited states 1592 // 2. traverse dfa from start state and mark all visited states
1419 GNUNET_REGEX_automaton_traverse (a, NULL, NULL); 1593 GNUNET_REGEX_automaton_traverse (a, a->start, &mark_states, NULL);
1420 1594
1421 // 3. delete all states that were not visited 1595 // 3. delete all states that were not visited
1422 for (s = a->states_head; NULL != s; s = s_next) 1596 for (s = a->states_head; NULL != s; s = s_next)
@@ -1477,7 +1651,6 @@ static void
1477dfa_merge_nondistinguishable_states (struct GNUNET_REGEX_Context *ctx, 1651dfa_merge_nondistinguishable_states (struct GNUNET_REGEX_Context *ctx,
1478 struct GNUNET_REGEX_Automaton *a) 1652 struct GNUNET_REGEX_Automaton *a)
1479{ 1653{
1480 unsigned int i;
1481 int table[a->state_count][a->state_count]; 1654 int table[a->state_count][a->state_count];
1482 struct GNUNET_REGEX_State *s1; 1655 struct GNUNET_REGEX_State *s1;
1483 struct GNUNET_REGEX_State *s2; 1656 struct GNUNET_REGEX_State *s2;
@@ -1487,6 +1660,7 @@ dfa_merge_nondistinguishable_states (struct GNUNET_REGEX_Context *ctx,
1487 struct GNUNET_REGEX_State *s2_next; 1660 struct GNUNET_REGEX_State *s2_next;
1488 int change; 1661 int change;
1489 unsigned int num_equal_edges; 1662 unsigned int num_equal_edges;
1663 unsigned int i;
1490 1664
1491 for (i = 0, s1 = a->states_head; i < a->state_count && NULL != s1; 1665 for (i = 0, s1 = a->states_head; i < a->state_count && NULL != s1;
1492 i++, s1 = s1->next) 1666 i++, s1 = s1->next)
@@ -1677,7 +1851,7 @@ nfa_state_create (struct GNUNET_REGEX_Context *ctx, int accepting)
1677 s = GNUNET_malloc (sizeof (struct GNUNET_REGEX_State)); 1851 s = GNUNET_malloc (sizeof (struct GNUNET_REGEX_State));
1678 s->id = ctx->state_id++; 1852 s->id = ctx->state_id++;
1679 s->accepting = accepting; 1853 s->accepting = accepting;
1680 s->marked = 0; 1854 s->marked = GNUNET_NO;
1681 s->contained = 0; 1855 s->contained = 0;
1682 s->index = -1; 1856 s->index = -1;
1683 s->lowlink = -1; 1857 s->lowlink = -1;
@@ -2188,7 +2362,7 @@ GNUNET_REGEX_construct_nfa (const char *regex, const size_t len)
2188 nfa->regex = GNUNET_strdup (regex); 2362 nfa->regex = GNUNET_strdup (regex);
2189 2363
2190 /* create depth-first numbering of the states for pretty printing */ 2364 /* create depth-first numbering of the states for pretty printing */
2191 GNUNET_REGEX_automaton_traverse (nfa, &number_states, NULL); 2365 GNUNET_REGEX_automaton_traverse (nfa, NULL, &number_states, NULL);
2192 2366
2193 return nfa; 2367 return nfa;
2194 2368
@@ -2293,6 +2467,9 @@ GNUNET_REGEX_construct_dfa (const char *regex, const size_t len)
2293 2467
2294 dfa = GNUNET_malloc (sizeof (struct GNUNET_REGEX_Automaton)); 2468 dfa = GNUNET_malloc (sizeof (struct GNUNET_REGEX_Automaton));
2295 dfa->type = DFA; 2469 dfa->type = DFA;
2470 dfa->state_count = 0;
2471 dfa->states_head = NULL;
2472 dfa->states_tail = NULL;
2296 dfa->regex = GNUNET_strdup (regex); 2473 dfa->regex = GNUNET_strdup (regex);
2297 2474
2298 // Create DFA start state from epsilon closure 2475 // Create DFA start state from epsilon closure
@@ -2310,6 +2487,9 @@ GNUNET_REGEX_construct_dfa (const char *regex, const size_t len)
2310 // Create proofs for all states 2487 // Create proofs for all states
2311 automaton_create_proofs (dfa); 2488 automaton_create_proofs (dfa);
2312 2489
2490 // Add strides to DFA
2491 // GNUNET_REGEX_add_multi_strides_to_dfa (&ctx, dfa, 2);
2492
2313 return dfa; 2493 return dfa;
2314} 2494}
2315 2495
@@ -2542,6 +2722,12 @@ GNUNET_REGEX_check_proof (const char *proof, const struct GNUNET_HashCode *key)
2542{ 2722{
2543 struct GNUNET_HashCode key_check; 2723 struct GNUNET_HashCode key_check;
2544 2724
2725 if (NULL == proof || NULL == key)
2726 {
2727 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Proof check failed, was NULL.\n");
2728 return GNUNET_NO;
2729 }
2730
2545 GNUNET_CRYPTO_hash (proof, strlen (proof), &key_check); 2731 GNUNET_CRYPTO_hash (proof, strlen (proof), &key_check);
2546 return (0 == 2732 return (0 ==
2547 GNUNET_CRYPTO_hash_cmp (key, &key_check)) ? GNUNET_OK : GNUNET_NO; 2733 GNUNET_CRYPTO_hash_cmp (key, &key_check)) ? GNUNET_OK : GNUNET_NO;
@@ -2607,10 +2793,14 @@ iterate_initial_edge (const unsigned int min_len, const unsigned int max_len,
2607/** 2793/**
2608 * Iterate over all initial edges that aren't actually part of the automaton. 2794 * Iterate over all initial edges that aren't actually part of the automaton.
2609 * This is needed to find the initial states returned by 2795 * This is needed to find the initial states returned by
2610 * GNUNET_REGEX_get_first_key. Iteration will start at the first branch state (a 2796 * GNUNET_REGEX_get_first_key. Iteration will start at the first state that has
2611 * state that has more than one outgoing edge, can be the first state), because 2797 * more than one outgoing edge, i.e. the state that branches the graph.
2612 * all previous states will have the same proof and be iterated over in 2798 * For example consider the following graph:
2613 * iterate_all_edges. 2799 * a -> b -> c -> d -> ...
2800 * \-> e -> ...
2801 *
2802 * This function will not iterate over the edges leading to "c", because these
2803 * will be covered by the iterate_edges function.
2614 * 2804 *
2615 * @param a the automaton for which the initial states should be computed. 2805 * @param a the automaton for which the initial states should be computed.
2616 * @param initial_len length of the initial state string. 2806 * @param initial_len length of the initial state string.
@@ -2649,7 +2839,7 @@ iterate_initial_edges (struct GNUNET_REGEX_Automaton *a,
2649 GNUNET_asprintf (&consumed_string, "%s", s->transitions_head->label); 2839 GNUNET_asprintf (&consumed_string, "%s", s->transitions_head->label);
2650 2840
2651 s = s->transitions_head->to_state; 2841 s = s->transitions_head->to_state;
2652 cur_len++; 2842 cur_len += strlen (s->transitions_head->label);
2653 } 2843 }
2654 while (cur_len < initial_len && 1 == s->transition_count); 2844 while (cur_len < initial_len && 1 == s->transition_count);
2655 } 2845 }
@@ -2663,7 +2853,7 @@ iterate_initial_edges (struct GNUNET_REGEX_Automaton *a,
2663 2853
2664/** 2854/**
2665 * Iterate over all edges helper function starting from state 's', calling 2855 * Iterate over all edges helper function starting from state 's', calling
2666 * iterator on for each edge. 2856 * iterator function for each edge.
2667 * 2857 *
2668 * @param s state. 2858 * @param s state.
2669 * @param iterator iterator function called for each edge. 2859 * @param iterator iterator function called for each edge.
@@ -2683,7 +2873,7 @@ iterate_edge (struct GNUNET_REGEX_State *s, GNUNET_REGEX_KeyIterator iterator,
2683 2873
2684 num_edges = state_get_edges (s, edges); 2874 num_edges = state_get_edges (s, edges);
2685 2875
2686 if (0 < strlen (s->proof) || s->accepting) 2876 if ((NULL != s->proof && 0 < strlen (s->proof)) || s->accepting)
2687 iterator (iterator_cls, &s->hash, s->proof, s->accepting, num_edges, 2877 iterator (iterator_cls, &s->hash, s->proof, s->accepting, num_edges,
2688 edges); 2878 edges);
2689 2879
diff --git a/src/regex/regex_graph.c b/src/regex/regex_graph.c
index 06546e8f7..9223200c8 100644
--- a/src/regex/regex_graph.c
+++ b/src/regex/regex_graph.c
@@ -167,10 +167,10 @@ GNUNET_REGEX_automaton_save_graph_step (void *cls, unsigned int count,
167 char *to_name; 167 char *to_name;
168 168
169 if (GNUNET_YES == ctx->verbose) 169 if (GNUNET_YES == ctx->verbose)
170 GNUNET_asprintf (&name, "%i (%s) (%s) (%s)", s->proof_id, s->name, s->proof, 170 GNUNET_asprintf (&name, "%i (%s) (%s) (%s)", s->dfs_id, s->name, s->proof,
171 GNUNET_h2s (&s->hash)); 171 GNUNET_h2s (&s->hash));
172 else 172 else
173 GNUNET_asprintf (&name, "%i", s->proof_id); 173 GNUNET_asprintf (&name, "%i", s->dfs_id);
174 174
175 if (s->accepting) 175 if (s->accepting)
176 { 176 {
@@ -218,12 +218,12 @@ GNUNET_REGEX_automaton_save_graph_step (void *cls, unsigned int count,
218 218
219 if (GNUNET_YES == ctx->verbose) 219 if (GNUNET_YES == ctx->verbose)
220 { 220 {
221 GNUNET_asprintf (&to_name, "%i (%s) (%s) (%s)", ctran->to_state->proof_id, 221 GNUNET_asprintf (&to_name, "%i (%s) (%s) (%s)", ctran->to_state->dfs_id,
222 ctran->to_state->name, ctran->to_state->proof, 222 ctran->to_state->name, ctran->to_state->proof,
223 GNUNET_h2s (&ctran->to_state->hash)); 223 GNUNET_h2s (&ctran->to_state->hash));
224 } 224 }
225 else 225 else
226 GNUNET_asprintf (&to_name, "%i", ctran->to_state->proof_id); 226 GNUNET_asprintf (&to_name, "%i", ctran->to_state->dfs_id);
227 227
228 if (NULL == ctran->label) 228 if (NULL == ctran->label)
229 { 229 {
@@ -320,7 +320,8 @@ GNUNET_REGEX_automaton_save_graph (struct GNUNET_REGEX_Automaton *a,
320 start = "digraph G {\nrankdir=LR\n"; 320 start = "digraph G {\nrankdir=LR\n";
321 fwrite (start, strlen (start), 1, ctx.filep); 321 fwrite (start, strlen (start), 1, ctx.filep);
322 322
323 GNUNET_REGEX_automaton_traverse (a, &GNUNET_REGEX_automaton_save_graph_step, 323 GNUNET_REGEX_automaton_traverse (a, a->start,
324 &GNUNET_REGEX_automaton_save_graph_step,
324 &ctx); 325 &ctx);
325 326
326 end = "\n}\n"; 327 end = "\n}\n";
diff --git a/src/regex/regex_internal.h b/src/regex/regex_internal.h
index 007870832..f96d51fb0 100644
--- a/src/regex/regex_internal.h
+++ b/src/regex/regex_internal.h
@@ -77,11 +77,6 @@ struct GNUNET_REGEX_Transition
77 * State from which this transition origins. 77 * State from which this transition origins.
78 */ 78 */
79 struct GNUNET_REGEX_State *from_state; 79 struct GNUNET_REGEX_State *from_state;
80
81 /**
82 * Mark this transition. For example when reversing the automaton.
83 */
84 int mark;
85}; 80};
86 81
87 82
@@ -106,6 +101,12 @@ struct GNUNET_REGEX_State
106 unsigned int id; 101 unsigned int id;
107 102
108 /** 103 /**
104 * Unique state id that is used for traversing the automaton. It is guaranteed
105 * to be > 0 and < state_count.
106 */
107 unsigned int traversal_id;
108
109 /**
109 * If this is an accepting state or not. 110 * If this is an accepting state or not.
110 */ 111 */
111 int accepting; 112 int accepting;
@@ -152,9 +153,12 @@ struct GNUNET_REGEX_State
152 struct GNUNET_HashCode hash; 153 struct GNUNET_HashCode hash;
153 154
154 /** 155 /**
155 * State ID for proof creation. 156 * Linear state ID accquired by depth-first-search. This ID should be used for
157 * storing information about the state in an array, because the 'id' of the
158 * state is not guaranteed to be linear. The 'dfs_id' is guaranteed to be > 0
159 * and < 'state_count'.
156 */ 160 */
157 unsigned int proof_id; 161 unsigned int dfs_id;
158 162
159 /** 163 /**
160 * Proof for this state. 164 * Proof for this state.
@@ -259,20 +263,24 @@ struct GNUNET_REGEX_Automaton
259 * @param count current count of the state, from 0 to a->state_count -1. 263 * @param count current count of the state, from 0 to a->state_count -1.
260 * @param s state. 264 * @param s state.
261 */ 265 */
262typedef void (*GNUNET_REGEX_traverse_action) (void *cls, unsigned int count, 266typedef void (*GNUNET_REGEX_traverse_action) (void *cls,
267 const unsigned int count,
263 struct GNUNET_REGEX_State * s); 268 struct GNUNET_REGEX_State * s);
264 269
265 270
266/** 271/**
267 * Traverses the given automaton from it's start state, visiting all reachable 272 * Traverses the given automaton using depth-first-search (DFS) from it's start
268 * states and calling 'action' on each one of them. 273 * state, visiting all reachable states and calling 'action' on each one of
274 * them.
269 * 275 *
270 * @param a automaton. 276 * @param a automaton to be traversed.
277 * @param start start state, pass a->start or NULL to traverse the whole automaton.
271 * @param action action to be performed on each state. 278 * @param action action to be performed on each state.
272 * @param action_cls closure for action 279 * @param action_cls closure for action
273 */ 280 */
274void 281void
275GNUNET_REGEX_automaton_traverse (struct GNUNET_REGEX_Automaton *a, 282GNUNET_REGEX_automaton_traverse (const struct GNUNET_REGEX_Automaton *a,
283 struct GNUNET_REGEX_State *start,
276 GNUNET_REGEX_traverse_action action, 284 GNUNET_REGEX_traverse_action action,
277 void *action_cls); 285 void *action_cls);
278 286
@@ -321,6 +329,7 @@ GNUNET_REGEX_generate_random_regex (size_t rx_length, char *matching_str);
321char * 329char *
322GNUNET_REGEX_generate_random_string (size_t max_len); 330GNUNET_REGEX_generate_random_string (size_t max_len);
323 331
332
324#if 0 /* keep Emacsens' auto-indent happy */ 333#if 0 /* keep Emacsens' auto-indent happy */
325{ 334{
326#endif 335#endif