diff options
author | Maximilian Szengel <gnunet@maxsz.de> | 2012-04-19 11:39:16 +0000 |
---|---|---|
committer | Maximilian Szengel <gnunet@maxsz.de> | 2012-04-19 11:39:16 +0000 |
commit | 93a5401e858e978aa2d7fc090fc1f6612c15dc3f (patch) | |
tree | 742bf947188398af64ef06eae70e6cb0b243b045 /src/regex | |
parent | 4071d1862fdcd9389e990784d0ea1fb9c4651f0f (diff) | |
download | gnunet-93a5401e858e978aa2d7fc090fc1f6612c15dc3f.tar.gz gnunet-93a5401e858e978aa2d7fc090fc1f6612c15dc3f.zip |
dfa minimization fix
Diffstat (limited to 'src/regex')
-rw-r--r-- | src/regex/regex.c | 114 | ||||
-rw-r--r-- | src/regex/test_regex_eval_api.c | 7 | ||||
-rw-r--r-- | src/regex/test_regex_iterate_api.c | 6 |
3 files changed, 81 insertions, 46 deletions
diff --git a/src/regex/regex.c b/src/regex/regex.c index ae28fb488..7aebd2191 100644 --- a/src/regex/regex.c +++ b/src/regex/regex.c | |||
@@ -262,8 +262,8 @@ static void | |||
262 | debug_print_state (struct GNUNET_REGEX_State *s) | 262 | debug_print_state (struct GNUNET_REGEX_State *s) |
263 | { | 263 | { |
264 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, | 264 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, |
265 | "State %i: %s marked: %i accepting: %i scc_id: %i\n", s->id, | 265 | "State %i: %s marked: %i accepting: %i scc_id: %i transitions: %i\n", s->id, |
266 | s->name, s->marked, s->accepting, s->scc_id); | 266 | s->name, s->marked, s->accepting, s->scc_id, s->transition_count); |
267 | } | 267 | } |
268 | 268 | ||
269 | static void | 269 | static void |
@@ -465,7 +465,8 @@ state_set_clear (struct GNUNET_REGEX_StateSet *set) | |||
465 | } | 465 | } |
466 | 466 | ||
467 | /** | 467 | /** |
468 | * Adds a transition from one state to another on 'label' | 468 | * Adds a transition from one state to another on 'label'. Does not add |
469 | * duplicate states. | ||
469 | * | 470 | * |
470 | * @param ctx context | 471 | * @param ctx context |
471 | * @param from_state starting state for the transition | 472 | * @param from_state starting state for the transition |
@@ -477,6 +478,7 @@ state_add_transition (struct GNUNET_REGEX_Context *ctx, | |||
477 | struct GNUNET_REGEX_State *from_state, const char label, | 478 | struct GNUNET_REGEX_State *from_state, const char label, |
478 | struct GNUNET_REGEX_State *to_state) | 479 | struct GNUNET_REGEX_State *to_state) |
479 | { | 480 | { |
481 | int is_dup; | ||
480 | struct Transition *t; | 482 | struct Transition *t; |
481 | 483 | ||
482 | if (NULL == from_state) | 484 | if (NULL == from_state) |
@@ -485,6 +487,22 @@ state_add_transition (struct GNUNET_REGEX_Context *ctx, | |||
485 | return; | 487 | return; |
486 | } | 488 | } |
487 | 489 | ||
490 | // Do not add duplicate states | ||
491 | is_dup = GNUNET_NO; | ||
492 | for (t = from_state->transitions_head; NULL != t; t = t->next) | ||
493 | { | ||
494 | if (t->to_state == to_state | ||
495 | && t->label == label | ||
496 | && t->from_state == from_state) | ||
497 | { | ||
498 | is_dup = GNUNET_YES; | ||
499 | break; | ||
500 | } | ||
501 | } | ||
502 | |||
503 | if (is_dup) | ||
504 | return; | ||
505 | |||
488 | t = GNUNET_malloc (sizeof (struct Transition)); | 506 | t = GNUNET_malloc (sizeof (struct Transition)); |
489 | 507 | ||
490 | t->id = ctx->transition_id++; | 508 | t->id = ctx->transition_id++; |
@@ -492,6 +510,8 @@ state_add_transition (struct GNUNET_REGEX_Context *ctx, | |||
492 | t->to_state = to_state; | 510 | t->to_state = to_state; |
493 | t->from_state = from_state; | 511 | t->from_state = from_state; |
494 | 512 | ||
513 | from_state->transition_count++; | ||
514 | |||
495 | GNUNET_CONTAINER_DLL_insert (from_state->transitions_head, | 515 | GNUNET_CONTAINER_DLL_insert (from_state->transitions_head, |
496 | from_state->transitions_tail, t); | 516 | from_state->transitions_tail, t); |
497 | } | 517 | } |
@@ -533,12 +553,11 @@ automaton_destroy_state (struct GNUNET_REGEX_State *s) | |||
533 | if (NULL != s->name) | 553 | if (NULL != s->name) |
534 | GNUNET_free (s->name); | 554 | GNUNET_free (s->name); |
535 | 555 | ||
536 | for (t = s->transitions_head; NULL != t;) | 556 | for (t = s->transitions_head; NULL != t; t = next_t) |
537 | { | 557 | { |
538 | next_t = t->next; | 558 | next_t = t->next; |
539 | GNUNET_CONTAINER_DLL_remove (s->transitions_head, s->transitions_tail, t); | 559 | GNUNET_CONTAINER_DLL_remove (s->transitions_head, s->transitions_tail, t); |
540 | GNUNET_free (t); | 560 | GNUNET_free (t); |
541 | t = next_t; | ||
542 | } | 561 | } |
543 | 562 | ||
544 | state_set_clear (s->nfa_set); | 563 | state_set_clear (s->nfa_set); |
@@ -604,7 +623,6 @@ automaton_merge_states (struct GNUNET_REGEX_Context *ctx, | |||
604 | { | 623 | { |
605 | struct GNUNET_REGEX_State *s_check; | 624 | struct GNUNET_REGEX_State *s_check; |
606 | struct Transition *t_check; | 625 | struct Transition *t_check; |
607 | struct Transition *t; | ||
608 | char *new_name; | 626 | char *new_name; |
609 | 627 | ||
610 | GNUNET_assert (NULL != ctx && NULL != a && NULL != s1 && NULL != s2); | 628 | GNUNET_assert (NULL != ctx && NULL != a && NULL != s1 && NULL != s2); |
@@ -615,7 +633,7 @@ automaton_merge_states (struct GNUNET_REGEX_Context *ctx, | |||
615 | for (t_check = s_check->transitions_head; NULL != t_check; | 633 | for (t_check = s_check->transitions_head; NULL != t_check; |
616 | t_check = t_check->next) | 634 | t_check = t_check->next) |
617 | { | 635 | { |
618 | if (s_check != s1 && s2 == t_check->to_state) | 636 | if (s2 == t_check->to_state) |
619 | t_check->to_state = s1; | 637 | t_check->to_state = s1; |
620 | } | 638 | } |
621 | } | 639 | } |
@@ -623,29 +641,24 @@ automaton_merge_states (struct GNUNET_REGEX_Context *ctx, | |||
623 | // 2. Add all transitions from s2 to sX to s1 | 641 | // 2. Add all transitions from s2 to sX to s1 |
624 | for (t_check = s2->transitions_head; NULL != t_check; t_check = t_check->next) | 642 | for (t_check = s2->transitions_head; NULL != t_check; t_check = t_check->next) |
625 | { | 643 | { |
626 | for (t = s1->transitions_head; NULL != t; t = t->next) | 644 | if (t_check->to_state != s1) |
627 | { | ||
628 | if (t_check->label != t->label && NULL != t_check->to_state && | ||
629 | t_check->to_state != t->to_state && t_check->to_state != s2) | ||
630 | { | ||
631 | state_add_transition (ctx, s1, t_check->label, t_check->to_state); | 645 | state_add_transition (ctx, s1, t_check->label, t_check->to_state); |
632 | } | ||
633 | } | ||
634 | } | 646 | } |
635 | 647 | ||
636 | // 3. Rename s1 to {s1,s2} | 648 | // 3. Rename s1 to {s1,s2} |
637 | new_name = GNUNET_malloc (strlen (s1->name) + strlen (s2->name) + 1); | 649 | new_name = GNUNET_strdup (s1->name); |
638 | strncat (new_name, s1->name, strlen (s1->name)); | ||
639 | strncat (new_name, s2->name, strlen (s2->name)); | ||
640 | if (NULL != s1->name) | 650 | if (NULL != s1->name) |
651 | { | ||
641 | GNUNET_free (s1->name); | 652 | GNUNET_free (s1->name); |
642 | s1->name = new_name; | 653 | s1->name = NULL; |
654 | } | ||
655 | GNUNET_asprintf (&s1->name, "{%s,%s}", new_name, s2->name); | ||
656 | GNUNET_free (new_name); | ||
643 | 657 | ||
644 | // remove state | 658 | // remove state |
645 | s_check = s2; | 659 | GNUNET_CONTAINER_DLL_remove (a->states_head, a->states_tail, s2); |
646 | GNUNET_CONTAINER_DLL_remove (a->states_head, a->states_tail, s_check); | ||
647 | a->state_count--; | 660 | a->state_count--; |
648 | automaton_destroy_state (s_check); | 661 | automaton_destroy_state (s2); |
649 | } | 662 | } |
650 | 663 | ||
651 | /** | 664 | /** |
@@ -925,8 +938,8 @@ dfa_merge_nondistinguishable_states (struct GNUNET_REGEX_Context *ctx, | |||
925 | struct Transition *t1; | 938 | struct Transition *t1; |
926 | struct Transition *t2; | 939 | struct Transition *t2; |
927 | int change; | 940 | int change; |
941 | int common_labels; | ||
928 | 942 | ||
929 | change = 1; | ||
930 | for (i = 0, s1 = a->states_head; | 943 | for (i = 0, s1 = a->states_head; |
931 | i < a->state_count && NULL != s1; | 944 | i < a->state_count && NULL != s1; |
932 | i++, s1 = s1->next) | 945 | i++, s1 = s1->next) |
@@ -949,6 +962,7 @@ dfa_merge_nondistinguishable_states (struct GNUNET_REGEX_Context *ctx, | |||
949 | } | 962 | } |
950 | } | 963 | } |
951 | 964 | ||
965 | change = 1; | ||
952 | while (0 != change) | 966 | while (0 != change) |
953 | { | 967 | { |
954 | change = 0; | 968 | change = 0; |
@@ -959,24 +973,26 @@ dfa_merge_nondistinguishable_states (struct GNUNET_REGEX_Context *ctx, | |||
959 | if (0 != table[s1->marked][s2->marked]) | 973 | if (0 != table[s1->marked][s2->marked]) |
960 | continue; | 974 | continue; |
961 | 975 | ||
976 | common_labels = GNUNET_NO; | ||
962 | for (t1 = s1->transitions_head; NULL != t1; t1 = t1->next) | 977 | for (t1 = s1->transitions_head; NULL != t1; t1 = t1->next) |
963 | { | 978 | { |
964 | for (t2 = s2->transitions_head; NULL != t2; t2 = t2->next) | 979 | for (t2 = s2->transitions_head; NULL != t2; t2 = t2->next) |
965 | { | 980 | { |
966 | if (t1->label == t2->label && t1->to_state == t2->to_state && | 981 | if (t1->label == t2->label) |
967 | (0 != table[t1->to_state->marked][t2->to_state->marked] || | ||
968 | 0 != table[t2->to_state->marked][t1->to_state->marked])) | ||
969 | { | 982 | { |
970 | table[s1->marked][s2->marked] = t1->label; | 983 | common_labels = GNUNET_YES; |
971 | change = 1; | 984 | |
972 | } | 985 | if (0 != table[t1->to_state->marked][t2->to_state->marked] || |
973 | else if (t1->label != t2->label && t1->to_state != t2->to_state) | 986 | 0 != table[t2->to_state->marked][t1->to_state->marked]) |
974 | { | 987 | { |
975 | table[s1->marked][s2->marked] = -1; | 988 | table[s1->marked][s2->marked] = t1->label != 0 ? t1->label : 1; |
976 | change = 1; | 989 | change = 1; |
990 | } | ||
977 | } | 991 | } |
978 | } | 992 | } |
979 | } | 993 | } |
994 | if (GNUNET_NO == common_labels) | ||
995 | table[s1->marked][s2->marked] = -2; | ||
980 | } | 996 | } |
981 | } | 997 | } |
982 | } | 998 | } |
@@ -988,7 +1004,7 @@ dfa_merge_nondistinguishable_states (struct GNUNET_REGEX_Context *ctx, | |||
988 | for (s2 = a->states_head; NULL != s2 && s1 != s2; s2 = s2_next) | 1004 | for (s2 = a->states_head; NULL != s2 && s1 != s2; s2 = s2_next) |
989 | { | 1005 | { |
990 | s2_next = s2->next; | 1006 | s2_next = s2->next; |
991 | if (s1 != s2 && table[s1->marked][s2->marked] == 0) | 1007 | if (table[s1->marked][s2->marked] == 0) |
992 | automaton_merge_states (ctx, a, s1, s2); | 1008 | automaton_merge_states (ctx, a, s1, s2); |
993 | } | 1009 | } |
994 | } | 1010 | } |
@@ -1700,10 +1716,8 @@ GNUNET_REGEX_construct_dfa (const char *regex, const size_t len) | |||
1700 | GNUNET_free (dfa_stack); | 1716 | GNUNET_free (dfa_stack); |
1701 | GNUNET_REGEX_automaton_destroy (nfa); | 1717 | GNUNET_REGEX_automaton_destroy (nfa); |
1702 | 1718 | ||
1703 | GNUNET_REGEX_automaton_save_graph (dfa, "dfa_before.dot"); | ||
1704 | dfa_minimize (&ctx, dfa); | 1719 | dfa_minimize (&ctx, dfa); |
1705 | /*GNUNET_REGEX_automaton_save_graph (dfa, "dfa_after.dot");*/ | 1720 | scc_tarjan (&ctx, dfa); |
1706 | /*scc_tarjan (&ctx, dfa);*/ | ||
1707 | 1721 | ||
1708 | return dfa; | 1722 | return dfa; |
1709 | } | 1723 | } |
@@ -1782,17 +1796,22 @@ GNUNET_REGEX_automaton_save_graph (struct GNUNET_REGEX_Automaton *a, | |||
1782 | GNUNET_asprintf (&s_acc, | 1796 | GNUNET_asprintf (&s_acc, |
1783 | "\"%s\" [shape=doublecircle, color=\"0.%i 0.8 0.95\"];\n", | 1797 | "\"%s\" [shape=doublecircle, color=\"0.%i 0.8 0.95\"];\n", |
1784 | s->name, s->scc_id); | 1798 | s->name, s->scc_id); |
1785 | fwrite (s_acc, strlen (s_acc), 1, p); | ||
1786 | GNUNET_free (s_acc); | ||
1787 | } | 1799 | } |
1788 | else | 1800 | else |
1789 | { | 1801 | { |
1790 | GNUNET_asprintf (&s_acc, "\"%s\" [color=\"0.%i 0.8 0.95\"];\n", s->name, | 1802 | GNUNET_asprintf (&s_acc, "\"%s\" [color=\"0.%i 0.8 0.95\"];\n", s->name, |
1791 | s->scc_id); | 1803 | s->scc_id); |
1792 | fwrite (s_acc, strlen (s_acc), 1, p); | ||
1793 | GNUNET_free (s_acc); | ||
1794 | } | 1804 | } |
1795 | 1805 | ||
1806 | if (NULL == s_acc) | ||
1807 | { | ||
1808 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not print state %s\n", s->name); | ||
1809 | return; | ||
1810 | } | ||
1811 | fwrite (s_acc, strlen (s_acc), 1, p); | ||
1812 | GNUNET_free (s_acc); | ||
1813 | s_acc = NULL; | ||
1814 | |||
1796 | for (ctran = s->transitions_head; NULL != ctran; ctran = ctran->next) | 1815 | for (ctran = s->transitions_head; NULL != ctran; ctran = ctran->next) |
1797 | { | 1816 | { |
1798 | if (NULL == ctran->to_state) | 1817 | if (NULL == ctran->to_state) |
@@ -1817,8 +1836,15 @@ GNUNET_REGEX_automaton_save_graph (struct GNUNET_REGEX_Automaton *a, | |||
1817 | s->scc_id); | 1836 | s->scc_id); |
1818 | } | 1837 | } |
1819 | 1838 | ||
1839 | if (NULL == s_tran) | ||
1840 | { | ||
1841 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not print state %s\n", s->name); | ||
1842 | return; | ||
1843 | } | ||
1844 | |||
1820 | fwrite (s_tran, strlen (s_tran), 1, p); | 1845 | fwrite (s_tran, strlen (s_tran), 1, p); |
1821 | GNUNET_free (s_tran); | 1846 | GNUNET_free (s_tran); |
1847 | s_tran = NULL; | ||
1822 | } | 1848 | } |
1823 | } | 1849 | } |
1824 | 1850 | ||
@@ -2012,8 +2038,8 @@ state_get_edges (struct GNUNET_REGEX_State *s, struct GNUNET_REGEX_Edge *edges) | |||
2012 | { | 2038 | { |
2013 | if (NULL != t->to_state) | 2039 | if (NULL != t->to_state) |
2014 | { | 2040 | { |
2015 | /*edges[count].label = &t->label;*/ | 2041 | edges[count].label = &t->label; |
2016 | /*edges[count].destination = t->to_state->hash;*/ | 2042 | edges[count].destination = t->to_state->hash; |
2017 | count++; | 2043 | count++; |
2018 | } | 2044 | } |
2019 | } | 2045 | } |
@@ -2036,7 +2062,7 @@ iterate_edge (struct GNUNET_REGEX_State *s, GNUNET_REGEX_KeyIterator iterator, | |||
2036 | struct GNUNET_REGEX_Edge edges[s->transition_count]; | 2062 | struct GNUNET_REGEX_Edge edges[s->transition_count]; |
2037 | unsigned int num_edges; | 2063 | unsigned int num_edges; |
2038 | 2064 | ||
2039 | if (GNUNET_NO == s->marked) | 2065 | if (GNUNET_YES != s->marked) |
2040 | { | 2066 | { |
2041 | s->marked = GNUNET_YES; | 2067 | s->marked = GNUNET_YES; |
2042 | 2068 | ||
@@ -2064,7 +2090,7 @@ GNUNET_REGEX_iterate_all_edges (struct GNUNET_REGEX_Automaton *a, | |||
2064 | { | 2090 | { |
2065 | struct GNUNET_REGEX_State *s; | 2091 | struct GNUNET_REGEX_State *s; |
2066 | 2092 | ||
2067 | for (s = a->start; NULL != s; s = s->next) | 2093 | for (s = a->states_head; NULL != s; s = s->next) |
2068 | s->marked = GNUNET_NO; | 2094 | s->marked = GNUNET_NO; |
2069 | 2095 | ||
2070 | iterate_edge (a->start, iterator, iterator_cls); | 2096 | iterate_edge (a->start, iterator, iterator_cls); |
diff --git a/src/regex/test_regex_eval_api.c b/src/regex/test_regex_eval_api.c index 49cdb3931..371d19ec1 100644 --- a/src/regex/test_regex_eval_api.c +++ b/src/regex/test_regex_eval_api.c | |||
@@ -248,7 +248,7 @@ main (int argc, char *argv[]) | |||
248 | int check_dfa; | 248 | int check_dfa; |
249 | int check_rand; | 249 | int check_rand; |
250 | 250 | ||
251 | struct Regex_String_Pair rxstr[4] = { | 251 | struct Regex_String_Pair rxstr[5] = { |
252 | {"ab?(abcd)?", 5, | 252 | {"ab?(abcd)?", 5, |
253 | {"ababcd", "abab", "aabcd", "a", "abb"}, | 253 | {"ababcd", "abab", "aabcd", "a", "abb"}, |
254 | {match, nomatch, match, match, nomatch}}, | 254 | {match, nomatch, match, match, nomatch}}, |
@@ -262,6 +262,9 @@ main (int argc, char *argv[]) | |||
262 | {nomatch, nomatch, nomatch, nomatch, nomatch}}, | 262 | {nomatch, nomatch, nomatch, nomatch, nomatch}}, |
263 | {"k|a+X*y+c|Q*e|p|R|Z*K*y*R+w|Y*6+n+h*k*w+V*F|W*B*e*g|N+V|t+L|P*j*3*9+X*h*J|J*6|b|E*i*f*R+S|Z|R|Y*Z|g*", 1, | 263 | {"k|a+X*y+c|Q*e|p|R|Z*K*y*R+w|Y*6+n+h*k*w+V*F|W*B*e*g|N+V|t+L|P*j*3*9+X*h*J|J*6|b|E*i*f*R+S|Z|R|Y*Z|g*", 1, |
264 | {"kaXycQepRZKyRwY6nhkwVFWBegNVtLPj39XhJJ6bEifRSZRYZg"}, | 264 | {"kaXycQepRZKyRwY6nhkwVFWBegNVtLPj39XhJJ6bEifRSZRYZg"}, |
265 | {nomatch}}, | ||
266 | {"F?W+m+2*6*c*s|P?U?a|B|y*i+t+A|V|6*C*7*e?Z*n*i|J?5+g?W*V?7*j?p?1|r?B?C+E+3+6*i+W*P?K?0|D+7?y*m+3?g?K?", 1, | ||
267 | {"osfjsodfonONONOnosndfsdnfsd"}, | ||
265 | {nomatch}} | 268 | {nomatch}} |
266 | }; | 269 | }; |
267 | 270 | ||
@@ -269,7 +272,7 @@ main (int argc, char *argv[]) | |||
269 | check_dfa = 0; | 272 | check_dfa = 0; |
270 | check_rand = 0; | 273 | check_rand = 0; |
271 | 274 | ||
272 | for (i = 0; i < 4; i++) | 275 | for (i = 0; i < 5; i++) |
273 | { | 276 | { |
274 | if (0 != regcomp (&rx, rxstr[i].regex, REG_EXTENDED)) | 277 | if (0 != regcomp (&rx, rxstr[i].regex, REG_EXTENDED)) |
275 | { | 278 | { |
diff --git a/src/regex/test_regex_iterate_api.c b/src/regex/test_regex_iterate_api.c index 913e94f2b..39959641d 100644 --- a/src/regex/test_regex_iterate_api.c +++ b/src/regex/test_regex_iterate_api.c | |||
@@ -31,7 +31,13 @@ void key_iterator(void *cls, const GNUNET_HashCode *key, const char *proof, | |||
31 | int accepting, unsigned int num_edges, | 31 | int accepting, unsigned int num_edges, |
32 | const struct GNUNET_REGEX_Edge *edges) | 32 | const struct GNUNET_REGEX_Edge *edges) |
33 | { | 33 | { |
34 | int i; | ||
35 | |||
34 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Iterating...\n"); | 36 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Iterating...\n"); |
37 | for (i=0; i<num_edges; i++) | ||
38 | { | ||
39 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Edge %i: %s\n", i, edges[i].label); | ||
40 | } | ||
35 | } | 41 | } |
36 | 42 | ||
37 | int | 43 | int |