aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMaximilian Szengel <gnunet@maxsz.de>2012-04-19 11:39:16 +0000
committerMaximilian Szengel <gnunet@maxsz.de>2012-04-19 11:39:16 +0000
commit93a5401e858e978aa2d7fc090fc1f6612c15dc3f (patch)
tree742bf947188398af64ef06eae70e6cb0b243b045 /src
parent4071d1862fdcd9389e990784d0ea1fb9c4651f0f (diff)
downloadgnunet-93a5401e858e978aa2d7fc090fc1f6612c15dc3f.tar.gz
gnunet-93a5401e858e978aa2d7fc090fc1f6612c15dc3f.zip
dfa minimization fix
Diffstat (limited to 'src')
-rw-r--r--src/regex/regex.c114
-rw-r--r--src/regex/test_regex_eval_api.c7
-rw-r--r--src/regex/test_regex_iterate_api.c6
3 files changed, 81 insertions, 46 deletions
diff --git a/src/regex/regex.c b/src/regex/regex.c
index ae28fb488..7aebd2191 100644
--- a/src/regex/regex.c
+++ b/src/regex/regex.c
@@ -262,8 +262,8 @@ static void
262debug_print_state (struct GNUNET_REGEX_State *s) 262debug_print_state (struct GNUNET_REGEX_State *s)
263{ 263{
264 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, 264 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
265 "State %i: %s marked: %i accepting: %i scc_id: %i\n", s->id, 265 "State %i: %s marked: %i accepting: %i scc_id: %i transitions: %i\n", s->id,
266 s->name, s->marked, s->accepting, s->scc_id); 266 s->name, s->marked, s->accepting, s->scc_id, s->transition_count);
267} 267}
268 268
269static void 269static void
@@ -465,7 +465,8 @@ state_set_clear (struct GNUNET_REGEX_StateSet *set)
465} 465}
466 466
467/** 467/**
468 * Adds a transition from one state to another on 'label' 468 * Adds a transition from one state to another on 'label'. Does not add
469 * duplicate states.
469 * 470 *
470 * @param ctx context 471 * @param ctx context
471 * @param from_state starting state for the transition 472 * @param from_state starting state for the transition
@@ -477,6 +478,7 @@ state_add_transition (struct GNUNET_REGEX_Context *ctx,
477 struct GNUNET_REGEX_State *from_state, const char label, 478 struct GNUNET_REGEX_State *from_state, const char label,
478 struct GNUNET_REGEX_State *to_state) 479 struct GNUNET_REGEX_State *to_state)
479{ 480{
481 int is_dup;
480 struct Transition *t; 482 struct Transition *t;
481 483
482 if (NULL == from_state) 484 if (NULL == from_state)
@@ -485,6 +487,22 @@ state_add_transition (struct GNUNET_REGEX_Context *ctx,
485 return; 487 return;
486 } 488 }
487 489
490 // Do not add duplicate states
491 is_dup = GNUNET_NO;
492 for (t = from_state->transitions_head; NULL != t; t = t->next)
493 {
494 if (t->to_state == to_state
495 && t->label == label
496 && t->from_state == from_state)
497 {
498 is_dup = GNUNET_YES;
499 break;
500 }
501 }
502
503 if (is_dup)
504 return;
505
488 t = GNUNET_malloc (sizeof (struct Transition)); 506 t = GNUNET_malloc (sizeof (struct Transition));
489 507
490 t->id = ctx->transition_id++; 508 t->id = ctx->transition_id++;
@@ -492,6 +510,8 @@ state_add_transition (struct GNUNET_REGEX_Context *ctx,
492 t->to_state = to_state; 510 t->to_state = to_state;
493 t->from_state = from_state; 511 t->from_state = from_state;
494 512
513 from_state->transition_count++;
514
495 GNUNET_CONTAINER_DLL_insert (from_state->transitions_head, 515 GNUNET_CONTAINER_DLL_insert (from_state->transitions_head,
496 from_state->transitions_tail, t); 516 from_state->transitions_tail, t);
497} 517}
@@ -533,12 +553,11 @@ automaton_destroy_state (struct GNUNET_REGEX_State *s)
533 if (NULL != s->name) 553 if (NULL != s->name)
534 GNUNET_free (s->name); 554 GNUNET_free (s->name);
535 555
536 for (t = s->transitions_head; NULL != t;) 556 for (t = s->transitions_head; NULL != t; t = next_t)
537 { 557 {
538 next_t = t->next; 558 next_t = t->next;
539 GNUNET_CONTAINER_DLL_remove (s->transitions_head, s->transitions_tail, t); 559 GNUNET_CONTAINER_DLL_remove (s->transitions_head, s->transitions_tail, t);
540 GNUNET_free (t); 560 GNUNET_free (t);
541 t = next_t;
542 } 561 }
543 562
544 state_set_clear (s->nfa_set); 563 state_set_clear (s->nfa_set);
@@ -604,7 +623,6 @@ automaton_merge_states (struct GNUNET_REGEX_Context *ctx,
604{ 623{
605 struct GNUNET_REGEX_State *s_check; 624 struct GNUNET_REGEX_State *s_check;
606 struct Transition *t_check; 625 struct Transition *t_check;
607 struct Transition *t;
608 char *new_name; 626 char *new_name;
609 627
610 GNUNET_assert (NULL != ctx && NULL != a && NULL != s1 && NULL != s2); 628 GNUNET_assert (NULL != ctx && NULL != a && NULL != s1 && NULL != s2);
@@ -615,7 +633,7 @@ automaton_merge_states (struct GNUNET_REGEX_Context *ctx,
615 for (t_check = s_check->transitions_head; NULL != t_check; 633 for (t_check = s_check->transitions_head; NULL != t_check;
616 t_check = t_check->next) 634 t_check = t_check->next)
617 { 635 {
618 if (s_check != s1 && s2 == t_check->to_state) 636 if (s2 == t_check->to_state)
619 t_check->to_state = s1; 637 t_check->to_state = s1;
620 } 638 }
621 } 639 }
@@ -623,29 +641,24 @@ automaton_merge_states (struct GNUNET_REGEX_Context *ctx,
623 // 2. Add all transitions from s2 to sX to s1 641 // 2. Add all transitions from s2 to sX to s1
624 for (t_check = s2->transitions_head; NULL != t_check; t_check = t_check->next) 642 for (t_check = s2->transitions_head; NULL != t_check; t_check = t_check->next)
625 { 643 {
626 for (t = s1->transitions_head; NULL != t; t = t->next) 644 if (t_check->to_state != s1)
627 {
628 if (t_check->label != t->label && NULL != t_check->to_state &&
629 t_check->to_state != t->to_state && t_check->to_state != s2)
630 {
631 state_add_transition (ctx, s1, t_check->label, t_check->to_state); 645 state_add_transition (ctx, s1, t_check->label, t_check->to_state);
632 }
633 }
634 } 646 }
635 647
636 // 3. Rename s1 to {s1,s2} 648 // 3. Rename s1 to {s1,s2}
637 new_name = GNUNET_malloc (strlen (s1->name) + strlen (s2->name) + 1); 649 new_name = GNUNET_strdup (s1->name);
638 strncat (new_name, s1->name, strlen (s1->name));
639 strncat (new_name, s2->name, strlen (s2->name));
640 if (NULL != s1->name) 650 if (NULL != s1->name)
651 {
641 GNUNET_free (s1->name); 652 GNUNET_free (s1->name);
642 s1->name = new_name; 653 s1->name = NULL;
654 }
655 GNUNET_asprintf (&s1->name, "{%s,%s}", new_name, s2->name);
656 GNUNET_free (new_name);
643 657
644 // remove state 658 // remove state
645 s_check = s2; 659 GNUNET_CONTAINER_DLL_remove (a->states_head, a->states_tail, s2);
646 GNUNET_CONTAINER_DLL_remove (a->states_head, a->states_tail, s_check);
647 a->state_count--; 660 a->state_count--;
648 automaton_destroy_state (s_check); 661 automaton_destroy_state (s2);
649} 662}
650 663
651/** 664/**
@@ -925,8 +938,8 @@ dfa_merge_nondistinguishable_states (struct GNUNET_REGEX_Context *ctx,
925 struct Transition *t1; 938 struct Transition *t1;
926 struct Transition *t2; 939 struct Transition *t2;
927 int change; 940 int change;
941 int common_labels;
928 942
929 change = 1;
930 for (i = 0, s1 = a->states_head; 943 for (i = 0, s1 = a->states_head;
931 i < a->state_count && NULL != s1; 944 i < a->state_count && NULL != s1;
932 i++, s1 = s1->next) 945 i++, s1 = s1->next)
@@ -949,6 +962,7 @@ dfa_merge_nondistinguishable_states (struct GNUNET_REGEX_Context *ctx,
949 } 962 }
950 } 963 }
951 964
965 change = 1;
952 while (0 != change) 966 while (0 != change)
953 { 967 {
954 change = 0; 968 change = 0;
@@ -959,24 +973,26 @@ dfa_merge_nondistinguishable_states (struct GNUNET_REGEX_Context *ctx,
959 if (0 != table[s1->marked][s2->marked]) 973 if (0 != table[s1->marked][s2->marked])
960 continue; 974 continue;
961 975
976 common_labels = GNUNET_NO;
962 for (t1 = s1->transitions_head; NULL != t1; t1 = t1->next) 977 for (t1 = s1->transitions_head; NULL != t1; t1 = t1->next)
963 { 978 {
964 for (t2 = s2->transitions_head; NULL != t2; t2 = t2->next) 979 for (t2 = s2->transitions_head; NULL != t2; t2 = t2->next)
965 { 980 {
966 if (t1->label == t2->label && t1->to_state == t2->to_state && 981 if (t1->label == t2->label)
967 (0 != table[t1->to_state->marked][t2->to_state->marked] ||
968 0 != table[t2->to_state->marked][t1->to_state->marked]))
969 { 982 {
970 table[s1->marked][s2->marked] = t1->label; 983 common_labels = GNUNET_YES;
971 change = 1; 984
972 } 985 if (0 != table[t1->to_state->marked][t2->to_state->marked] ||
973 else if (t1->label != t2->label && t1->to_state != t2->to_state) 986 0 != table[t2->to_state->marked][t1->to_state->marked])
974 { 987 {
975 table[s1->marked][s2->marked] = -1; 988 table[s1->marked][s2->marked] = t1->label != 0 ? t1->label : 1;
976 change = 1; 989 change = 1;
990 }
977 } 991 }
978 } 992 }
979 } 993 }
994 if (GNUNET_NO == common_labels)
995 table[s1->marked][s2->marked] = -2;
980 } 996 }
981 } 997 }
982 } 998 }
@@ -988,7 +1004,7 @@ dfa_merge_nondistinguishable_states (struct GNUNET_REGEX_Context *ctx,
988 for (s2 = a->states_head; NULL != s2 && s1 != s2; s2 = s2_next) 1004 for (s2 = a->states_head; NULL != s2 && s1 != s2; s2 = s2_next)
989 { 1005 {
990 s2_next = s2->next; 1006 s2_next = s2->next;
991 if (s1 != s2 && table[s1->marked][s2->marked] == 0) 1007 if (table[s1->marked][s2->marked] == 0)
992 automaton_merge_states (ctx, a, s1, s2); 1008 automaton_merge_states (ctx, a, s1, s2);
993 } 1009 }
994 } 1010 }
@@ -1700,10 +1716,8 @@ GNUNET_REGEX_construct_dfa (const char *regex, const size_t len)
1700 GNUNET_free (dfa_stack); 1716 GNUNET_free (dfa_stack);
1701 GNUNET_REGEX_automaton_destroy (nfa); 1717 GNUNET_REGEX_automaton_destroy (nfa);
1702 1718
1703 GNUNET_REGEX_automaton_save_graph (dfa, "dfa_before.dot");
1704 dfa_minimize (&ctx, dfa); 1719 dfa_minimize (&ctx, dfa);
1705 /*GNUNET_REGEX_automaton_save_graph (dfa, "dfa_after.dot");*/ 1720 scc_tarjan (&ctx, dfa);
1706 /*scc_tarjan (&ctx, dfa);*/
1707 1721
1708 return dfa; 1722 return dfa;
1709} 1723}
@@ -1782,17 +1796,22 @@ GNUNET_REGEX_automaton_save_graph (struct GNUNET_REGEX_Automaton *a,
1782 GNUNET_asprintf (&s_acc, 1796 GNUNET_asprintf (&s_acc,
1783 "\"%s\" [shape=doublecircle, color=\"0.%i 0.8 0.95\"];\n", 1797 "\"%s\" [shape=doublecircle, color=\"0.%i 0.8 0.95\"];\n",
1784 s->name, s->scc_id); 1798 s->name, s->scc_id);
1785 fwrite (s_acc, strlen (s_acc), 1, p);
1786 GNUNET_free (s_acc);
1787 } 1799 }
1788 else 1800 else
1789 { 1801 {
1790 GNUNET_asprintf (&s_acc, "\"%s\" [color=\"0.%i 0.8 0.95\"];\n", s->name, 1802 GNUNET_asprintf (&s_acc, "\"%s\" [color=\"0.%i 0.8 0.95\"];\n", s->name,
1791 s->scc_id); 1803 s->scc_id);
1792 fwrite (s_acc, strlen (s_acc), 1, p);
1793 GNUNET_free (s_acc);
1794 } 1804 }
1795 1805
1806 if (NULL == s_acc)
1807 {
1808 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not print state %s\n", s->name);
1809 return;
1810 }
1811 fwrite (s_acc, strlen (s_acc), 1, p);
1812 GNUNET_free (s_acc);
1813 s_acc = NULL;
1814
1796 for (ctran = s->transitions_head; NULL != ctran; ctran = ctran->next) 1815 for (ctran = s->transitions_head; NULL != ctran; ctran = ctran->next)
1797 { 1816 {
1798 if (NULL == ctran->to_state) 1817 if (NULL == ctran->to_state)
@@ -1817,8 +1836,15 @@ GNUNET_REGEX_automaton_save_graph (struct GNUNET_REGEX_Automaton *a,
1817 s->scc_id); 1836 s->scc_id);
1818 } 1837 }
1819 1838
1839 if (NULL == s_tran)
1840 {
1841 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not print state %s\n", s->name);
1842 return;
1843 }
1844
1820 fwrite (s_tran, strlen (s_tran), 1, p); 1845 fwrite (s_tran, strlen (s_tran), 1, p);
1821 GNUNET_free (s_tran); 1846 GNUNET_free (s_tran);
1847 s_tran = NULL;
1822 } 1848 }
1823 } 1849 }
1824 1850
@@ -2012,8 +2038,8 @@ state_get_edges (struct GNUNET_REGEX_State *s, struct GNUNET_REGEX_Edge *edges)
2012 { 2038 {
2013 if (NULL != t->to_state) 2039 if (NULL != t->to_state)
2014 { 2040 {
2015 /*edges[count].label = &t->label;*/ 2041 edges[count].label = &t->label;
2016 /*edges[count].destination = t->to_state->hash;*/ 2042 edges[count].destination = t->to_state->hash;
2017 count++; 2043 count++;
2018 } 2044 }
2019 } 2045 }
@@ -2036,7 +2062,7 @@ iterate_edge (struct GNUNET_REGEX_State *s, GNUNET_REGEX_KeyIterator iterator,
2036 struct GNUNET_REGEX_Edge edges[s->transition_count]; 2062 struct GNUNET_REGEX_Edge edges[s->transition_count];
2037 unsigned int num_edges; 2063 unsigned int num_edges;
2038 2064
2039 if (GNUNET_NO == s->marked) 2065 if (GNUNET_YES != s->marked)
2040 { 2066 {
2041 s->marked = GNUNET_YES; 2067 s->marked = GNUNET_YES;
2042 2068
@@ -2064,7 +2090,7 @@ GNUNET_REGEX_iterate_all_edges (struct GNUNET_REGEX_Automaton *a,
2064{ 2090{
2065 struct GNUNET_REGEX_State *s; 2091 struct GNUNET_REGEX_State *s;
2066 2092
2067 for (s = a->start; NULL != s; s = s->next) 2093 for (s = a->states_head; NULL != s; s = s->next)
2068 s->marked = GNUNET_NO; 2094 s->marked = GNUNET_NO;
2069 2095
2070 iterate_edge (a->start, iterator, iterator_cls); 2096 iterate_edge (a->start, iterator, iterator_cls);
diff --git a/src/regex/test_regex_eval_api.c b/src/regex/test_regex_eval_api.c
index 49cdb3931..371d19ec1 100644
--- a/src/regex/test_regex_eval_api.c
+++ b/src/regex/test_regex_eval_api.c
@@ -248,7 +248,7 @@ main (int argc, char *argv[])
248 int check_dfa; 248 int check_dfa;
249 int check_rand; 249 int check_rand;
250 250
251 struct Regex_String_Pair rxstr[4] = { 251 struct Regex_String_Pair rxstr[5] = {
252 {"ab?(abcd)?", 5, 252 {"ab?(abcd)?", 5,
253 {"ababcd", "abab", "aabcd", "a", "abb"}, 253 {"ababcd", "abab", "aabcd", "a", "abb"},
254 {match, nomatch, match, match, nomatch}}, 254 {match, nomatch, match, match, nomatch}},
@@ -262,6 +262,9 @@ main (int argc, char *argv[])
262 {nomatch, nomatch, nomatch, nomatch, nomatch}}, 262 {nomatch, nomatch, nomatch, nomatch, nomatch}},
263 {"k|a+X*y+c|Q*e|p|R|Z*K*y*R+w|Y*6+n+h*k*w+V*F|W*B*e*g|N+V|t+L|P*j*3*9+X*h*J|J*6|b|E*i*f*R+S|Z|R|Y*Z|g*", 1, 263 {"k|a+X*y+c|Q*e|p|R|Z*K*y*R+w|Y*6+n+h*k*w+V*F|W*B*e*g|N+V|t+L|P*j*3*9+X*h*J|J*6|b|E*i*f*R+S|Z|R|Y*Z|g*", 1,
264 {"kaXycQepRZKyRwY6nhkwVFWBegNVtLPj39XhJJ6bEifRSZRYZg"}, 264 {"kaXycQepRZKyRwY6nhkwVFWBegNVtLPj39XhJJ6bEifRSZRYZg"},
265 {nomatch}},
266 {"F?W+m+2*6*c*s|P?U?a|B|y*i+t+A|V|6*C*7*e?Z*n*i|J?5+g?W*V?7*j?p?1|r?B?C+E+3+6*i+W*P?K?0|D+7?y*m+3?g?K?", 1,
267 {"osfjsodfonONONOnosndfsdnfsd"},
265 {nomatch}} 268 {nomatch}}
266 }; 269 };
267 270
@@ -269,7 +272,7 @@ main (int argc, char *argv[])
269 check_dfa = 0; 272 check_dfa = 0;
270 check_rand = 0; 273 check_rand = 0;
271 274
272 for (i = 0; i < 4; i++) 275 for (i = 0; i < 5; i++)
273 { 276 {
274 if (0 != regcomp (&rx, rxstr[i].regex, REG_EXTENDED)) 277 if (0 != regcomp (&rx, rxstr[i].regex, REG_EXTENDED))
275 { 278 {
diff --git a/src/regex/test_regex_iterate_api.c b/src/regex/test_regex_iterate_api.c
index 913e94f2b..39959641d 100644
--- a/src/regex/test_regex_iterate_api.c
+++ b/src/regex/test_regex_iterate_api.c
@@ -31,7 +31,13 @@ void key_iterator(void *cls, const GNUNET_HashCode *key, const char *proof,
31 int accepting, unsigned int num_edges, 31 int accepting, unsigned int num_edges,
32 const struct GNUNET_REGEX_Edge *edges) 32 const struct GNUNET_REGEX_Edge *edges)
33{ 33{
34 int i;
35
34 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Iterating...\n"); 36 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Iterating...\n");
37 for (i=0; i<num_edges; i++)
38 {
39 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Edge %i: %s\n", i, edges[i].label);
40 }
35} 41}
36 42
37int 43int