diff options
author | Maximilian Szengel <gnunet@maxsz.de> | 2012-06-11 15:10:21 +0000 |
---|---|---|
committer | Maximilian Szengel <gnunet@maxsz.de> | 2012-06-11 15:10:21 +0000 |
commit | 16cf819a0feb38c36b046c59febae5bc511a3d1b (patch) | |
tree | 5a2add323bee9c5646cd56a0e37eb6a29c5de034 | |
parent | 300a9b12a902e6af2b763910fb372020a857ae7c (diff) | |
download | gnunet-16cf819a0feb38c36b046c59febae5bc511a3d1b.tar.gz gnunet-16cf819a0feb38c36b046c59febae5bc511a3d1b.zip |
simplified regex/proof generation
-rw-r--r-- | src/regex/regex.c | 276 | ||||
-rw-r--r-- | src/regex/test_regex_eval_api.c | 25 | ||||
-rw-r--r-- | src/regex/test_regex_iterate_api.c | 15 |
3 files changed, 235 insertions, 81 deletions
diff --git a/src/regex/regex.c b/src/regex/regex.c index 20c496dd4..d697aee89 100644 --- a/src/regex/regex.c +++ b/src/regex/regex.c | |||
@@ -456,6 +456,7 @@ state_add_transition (struct GNUNET_REGEX_Context *ctx, | |||
456 | { | 456 | { |
457 | int is_dup; | 457 | int is_dup; |
458 | struct Transition *t; | 458 | struct Transition *t; |
459 | struct Transition *oth; | ||
459 | 460 | ||
460 | if (NULL == from_state) | 461 | if (NULL == from_state) |
461 | { | 462 | { |
@@ -478,6 +479,13 @@ state_add_transition (struct GNUNET_REGEX_Context *ctx, | |||
478 | if (is_dup) | 479 | if (is_dup) |
479 | return; | 480 | return; |
480 | 481 | ||
482 | // sort transitions by label | ||
483 | for (oth = from_state->transitions_head; NULL != oth; oth = oth->next) | ||
484 | { | ||
485 | if (oth->label > label) | ||
486 | break; | ||
487 | } | ||
488 | |||
481 | t = GNUNET_malloc (sizeof (struct Transition)); | 489 | t = GNUNET_malloc (sizeof (struct Transition)); |
482 | t->id = ctx->transition_id++; | 490 | t->id = ctx->transition_id++; |
483 | t->label = label; | 491 | t->label = label; |
@@ -486,8 +494,8 @@ state_add_transition (struct GNUNET_REGEX_Context *ctx, | |||
486 | 494 | ||
487 | // Add outgoing transition to 'from_state' | 495 | // Add outgoing transition to 'from_state' |
488 | from_state->transition_count++; | 496 | from_state->transition_count++; |
489 | GNUNET_CONTAINER_DLL_insert (from_state->transitions_head, | 497 | GNUNET_CONTAINER_DLL_insert_before (from_state->transitions_head, |
490 | from_state->transitions_tail, t); | 498 | from_state->transitions_tail, oth, t); |
491 | } | 499 | } |
492 | 500 | ||
493 | /** | 501 | /** |
@@ -831,11 +839,14 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a) | |||
831 | char *R_cur_r; | 839 | char *R_cur_r; |
832 | int length_l; | 840 | int length_l; |
833 | int length_r; | 841 | int length_r; |
842 | int s_cnt; | ||
843 | int l_cnt; | ||
834 | char *complete_regex; | 844 | char *complete_regex; |
835 | 845 | ||
836 | k = 0; | 846 | k = 0; |
837 | n = a->state_count; | 847 | n = a->state_count; |
838 | 848 | ||
849 | // number the states | ||
839 | for (i = 0, s = a->states_head; NULL != s; s = s->next, i++) | 850 | for (i = 0, s = a->states_head; NULL != s; s = s->next, i++) |
840 | { | 851 | { |
841 | s->marked = i; | 852 | s->marked = i; |
@@ -891,6 +902,8 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a) | |||
891 | { | 902 | { |
892 | for (j = 0; j < n; j++) | 903 | for (j = 0; j < n; j++) |
893 | { | 904 | { |
905 | //construct R_cur | ||
906 | |||
894 | // 0*R = R*0 = 0 | 907 | // 0*R = R*0 = 0 |
895 | // 0+R = R+0 = R | 908 | // 0+R = R+0 = R |
896 | if (NULL == R_last[i][k] || NULL == R_last[k][j]) | 909 | if (NULL == R_last[i][k] || NULL == R_last[k][j]) |
@@ -898,6 +911,15 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a) | |||
898 | if (NULL != R_last[i][j]) | 911 | if (NULL != R_last[i][j]) |
899 | R_cur[i][j] = GNUNET_strdup (R_last[i][j]); | 912 | R_cur[i][j] = GNUNET_strdup (R_last[i][j]); |
900 | } | 913 | } |
914 | // a(ba)*b = (ab)+ | ||
915 | /*else if ((NULL == R_last[i][j] || 0 == strlen (R_last[i][j])) && */ | ||
916 | /*NULL != R_last[k][k] && 0 < strlen (R_last[k][k]) && */ | ||
917 | /*NULL != R_last[k][j] && 0 < strlen (R_last[k][j]) && */ | ||
918 | /*0 == strncmp (R_last[k][k], R_last[k][j], (strlen (R_last[k][k]) - 1)) && */ | ||
919 | /*R_last[i][k][0] == R_last[k][k][strlen (R_last[k][k]) - 1]) */ | ||
920 | /*{ */ | ||
921 | /*GNUNET_asprintf (&R_cur[i][j], "(%s%s)+", R_last[i][k], R_last[k][j]); */ | ||
922 | /*} */ | ||
901 | else | 923 | else |
902 | { | 924 | { |
903 | // R(k)ij = R(k-1)ij + R(k-1)ik (R(k-1)kk)* R(k-1)kj | 925 | // R(k)ij = R(k-1)ij + R(k-1)ik (R(k-1)kk)* R(k-1)kj |
@@ -911,27 +933,35 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a) | |||
911 | if (NULL != R_last[i][j]) | 933 | if (NULL != R_last[i][j]) |
912 | strcat (R_cur_l, R_last[i][j]); | 934 | strcat (R_cur_l, R_last[i][j]); |
913 | 935 | ||
914 | if (NULL != R_last[i][k]) | 936 | if (NULL != R_last[i][k] && 0 != strcmp (R_last[i][k], R_last[k][k])) |
915 | strcat (R_cur_r, R_last[i][k]); | 937 | strcat (R_cur_r, R_last[i][k]); |
916 | 938 | ||
917 | if (NULL != R_last[k][k] && 0 != strcmp (R_last[k][k], "")) | 939 | if (NULL != R_last[k][k] && 0 != strcmp (R_last[k][k], "")) |
918 | { | 940 | { |
919 | if (R_last[k][k][0] == '(' && R_last[k][k][strlen (R_last[k][k])-1] == ')') | 941 | if (R_last[k][k][0] == '(' && |
942 | R_last[k][k][strlen (R_last[k][k]) - 1] == ')') | ||
920 | { | 943 | { |
921 | strcat (R_cur_r, R_last[k][k]); | 944 | strcat (R_cur_r, R_last[k][k]); |
922 | strcat (R_cur_r, "*"); | ||
923 | } | 945 | } |
924 | else | 946 | else |
925 | { | 947 | { |
926 | strcat (R_cur_r, "("); | 948 | strcat (R_cur_r, "("); |
927 | strcat (R_cur_r, R_last[k][k]); | 949 | strcat (R_cur_r, R_last[k][k]); |
928 | strcat (R_cur_r, ")*"); | 950 | strcat (R_cur_r, ")"); |
929 | } | 951 | } |
952 | |||
953 | if (0 == strcmp (R_last[i][k], R_last[k][k]) || | ||
954 | 0 == strcmp (R_last[k][k], R_last[k][j])) | ||
955 | strcat (R_cur_r, "+"); | ||
956 | else | ||
957 | strcat (R_cur_r, "*"); | ||
930 | } | 958 | } |
931 | 959 | ||
932 | if (NULL != R_last[k][j]) | 960 | if (NULL != R_last[k][j] && 0 != strcmp (R_last[k][k], R_last[k][j])) |
933 | strcat (R_cur_r, R_last[k][j]); | 961 | strcat (R_cur_r, R_last[k][j]); |
934 | 962 | ||
963 | // simplifications... | ||
964 | |||
935 | // | is idempotent: a | a = a for all a in A | 965 | // | is idempotent: a | a = a for all a in A |
936 | if (0 == strcmp (R_cur_l, R_cur_r) || 0 == strcmp (R_cur_l, "") || | 966 | if (0 == strcmp (R_cur_l, R_cur_r) || 0 == strcmp (R_cur_l, "") || |
937 | 0 == strcmp (R_cur_r, "")) | 967 | 0 == strcmp (R_cur_r, "")) |
@@ -941,32 +971,102 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a) | |||
941 | else | 971 | else |
942 | GNUNET_asprintf (&R_cur[i][j], "%s", R_cur_l); | 972 | GNUNET_asprintf (&R_cur[i][j], "%s", R_cur_l); |
943 | } | 973 | } |
944 | // a | a a* a = a+ | 974 | // TODO: in theory only applicable if (e + a) | (e + a)(e + a)*(e+a) |
975 | // where e means epsilon... check if practical! | ||
976 | // a | a a* a = a* | ||
945 | else if (R_last[i][j] == R_last[i][k] && R_last[i][k] == R_last[k][k] | 977 | else if (R_last[i][j] == R_last[i][k] && R_last[i][k] == R_last[k][k] |
946 | && R_last[k][k] == R_last[k][j]) | 978 | && R_last[k][k] == R_last[k][j]) |
947 | { | 979 | { |
948 | GNUNET_asprintf (&R_cur[i][j], "%s+", R_last[i][j]); | 980 | if (1 >= strlen (R_last[k][k]) || |
981 | (R_last[k][k][0] == '(' && | ||
982 | R_last[k][k][strlen (R_last[k][k]) - 1] == ')')) | ||
983 | GNUNET_asprintf (&R_cur[i][j], "%s*", R_last[k][k]); | ||
984 | else | ||
985 | GNUNET_asprintf (&R_cur[i][j], "(%s)*", R_last[k][k]); | ||
949 | } | 986 | } |
950 | // a | a b* b => a | a b | a b b | ... => a b* | 987 | // a | a b* b => a | a b | a b b | ... => a b* |
951 | else if (R_last[i][j] == R_last[i][k] && R_last[k][k] == R_last[k][j]) | 988 | else if (R_last[i][j] == R_last[i][k] && R_last[k][k] == R_last[k][j]) |
952 | { | 989 | { |
953 | GNUNET_asprintf (&R_cur[i][j], "%s%s*", R_last[i][k], R_last[k][k]); | 990 | // if a is xb then a b* becomes x b b* = x b+ |
991 | |||
992 | s_cnt = strlen (R_last[k][k]); | ||
993 | l_cnt = strlen (R_last[i][k]); | ||
994 | R_cur[i][j] = GNUNET_malloc (s_cnt + l_cnt + 4); | ||
995 | |||
996 | if (l_cnt > 0 && s_cnt >= l_cnt) | ||
997 | for (; s_cnt > 0; s_cnt--, l_cnt--) | ||
998 | if (R_last[i][k][l_cnt] != R_last[k][k][s_cnt]) | ||
999 | break; | ||
1000 | |||
1001 | if (strlen (R_last[i][k]) > 0 && 0 == s_cnt && 0 <= l_cnt) | ||
1002 | strncat (R_cur[i][j], R_last[i][k], l_cnt); | ||
1003 | else | ||
1004 | strcat (R_cur[i][j], R_last[i][k]); | ||
1005 | |||
1006 | if (1 >= strlen (R_last[k][k]) && | ||
1007 | !(R_last[k][k][0] == '(' && | ||
1008 | R_last[k][k][strlen (R_last[k][k]) - 1] == ')')) | ||
1009 | { | ||
1010 | strcat (R_cur[i][j], "("); | ||
1011 | strcat (R_cur[i][j], R_last[k][k]); | ||
1012 | strcat (R_cur[i][j], ")"); | ||
1013 | } | ||
1014 | else | ||
1015 | strcat (R_cur[i][j], R_last[k][k]); | ||
1016 | |||
1017 | if (0 == s_cnt && 0 <= l_cnt) | ||
1018 | strcat (R_cur[i][j], "+"); | ||
1019 | else | ||
1020 | strcat (R_cur[i][j], "*"); | ||
954 | } | 1021 | } |
955 | // a | b b* a => a | b a | b b a | ... => b* a | 1022 | // a | b b* a => a | b a | b b a | ... => b* a |
956 | else if (R_last[i][j] == R_last[k][j] && R_last[i][k] == R_last[k][k]) | 1023 | else if (R_last[i][j] == R_last[k][j] && R_last[i][k] == R_last[k][k]) |
957 | { | 1024 | { |
958 | GNUNET_asprintf (&R_cur[i][j], "%s*%s", R_last[k][k], R_last[k][j]); | 1025 | // if a is bx then b* a becomes b+ x |
1026 | |||
1027 | temp = NULL; | ||
1028 | s_cnt = strlen (R_last[k][k]); | ||
1029 | l_cnt = strlen (R_last[k][j]); | ||
1030 | R_cur[i][j] = GNUNET_malloc (s_cnt + l_cnt + 4); | ||
1031 | |||
1032 | int bla; | ||
1033 | |||
1034 | if (l_cnt > 0 && s_cnt >= l_cnt) | ||
1035 | for (bla = 0; bla < s_cnt; bla++) | ||
1036 | if (R_last[k][k][bla] != R_last[k][j][bla]) | ||
1037 | break; | ||
1038 | |||
1039 | if (1 >= strlen (R_last[k][k]) && | ||
1040 | !(R_last[k][k][0] == '(' && | ||
1041 | R_last[k][k][strlen (R_last[k][k]) - 1] == ')')) | ||
1042 | { | ||
1043 | strcat (R_cur[i][j], "("); | ||
1044 | strcat (R_cur[i][j], R_last[k][k]); | ||
1045 | strcat (R_cur[i][j], ")"); | ||
1046 | } | ||
1047 | else | ||
1048 | strcat (R_cur[i][j], R_last[k][k]); | ||
1049 | |||
1050 | if (bla == s_cnt) | ||
1051 | strcat (R_cur[i][j], "+"); | ||
1052 | else | ||
1053 | strcat (R_cur[i][j], "*"); | ||
1054 | |||
1055 | if (strlen (R_last[k][j]) > 0 && bla == s_cnt) | ||
1056 | strcat (R_cur[i][j], &R_last[k][j][bla]); | ||
1057 | else | ||
1058 | strcat (R_cur[i][j], R_last[k][j]); | ||
959 | } | 1059 | } |
960 | else | 1060 | else |
961 | GNUNET_asprintf (&R_cur[i][j], "(%s|%s)", R_cur_l, R_cur_r); | 1061 | GNUNET_asprintf (&R_cur[i][j], "(%s|%s)", R_cur_l, R_cur_r); |
962 | 1062 | ||
963 | GNUNET_free_non_null (R_cur_l); | 1063 | GNUNET_free_non_null (R_cur_l); |
964 | GNUNET_free_non_null (R_cur_r); | 1064 | GNUNET_free_non_null (R_cur_r); |
965 | |||
966 | } | 1065 | } |
967 | } | 1066 | } |
968 | } | 1067 | } |
969 | 1068 | ||
1069 | // set R_last = R_cur | ||
970 | for (i = 0; i < n; i++) | 1070 | for (i = 0; i < n; i++) |
971 | { | 1071 | { |
972 | for (j = 0; j < n; j++) | 1072 | for (j = 0; j < n; j++) |
@@ -991,7 +1091,7 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a) | |||
991 | &states[i]->hash); | 1091 | &states[i]->hash); |
992 | } | 1092 | } |
993 | 1093 | ||
994 | // complete regex for whole DFA | 1094 | // complete regex for whole DFA: union of all pairs (start state/accepting state(s)). |
995 | complete_regex = NULL; | 1095 | complete_regex = NULL; |
996 | for (i = 0; i < n; i++) | 1096 | for (i = 0; i < n; i++) |
997 | { | 1097 | { |
@@ -1486,6 +1586,7 @@ nfa_closure_create (struct GNUNET_REGEX_Automaton *nfa, | |||
1486 | GNUNET_assert (0 == cls_check->len); | 1586 | GNUNET_assert (0 == cls_check->len); |
1487 | GNUNET_free (cls_check); | 1587 | GNUNET_free (cls_check); |
1488 | 1588 | ||
1589 | // sort the states | ||
1489 | if (cls->len > 1) | 1590 | if (cls->len > 1) |
1490 | qsort (cls->states, cls->len, sizeof (struct GNUNET_REGEX_State *), | 1591 | qsort (cls->states, cls->len, sizeof (struct GNUNET_REGEX_State *), |
1491 | state_compare); | 1592 | state_compare); |
@@ -2047,6 +2148,7 @@ GNUNET_REGEX_automaton_destroy (struct GNUNET_REGEX_Automaton *a) | |||
2047 | return; | 2148 | return; |
2048 | 2149 | ||
2049 | GNUNET_free (a->regex); | 2150 | GNUNET_free (a->regex); |
2151 | GNUNET_free_non_null (a->computed_regex); | ||
2050 | 2152 | ||
2051 | for (s = a->states_head; NULL != s;) | 2153 | for (s = a->states_head; NULL != s;) |
2052 | { | 2154 | { |
@@ -2059,6 +2161,81 @@ GNUNET_REGEX_automaton_destroy (struct GNUNET_REGEX_Automaton *a) | |||
2059 | } | 2161 | } |
2060 | 2162 | ||
2061 | /** | 2163 | /** |
2164 | * Save a state to an open file pointer. cls is expected to be a file pointer to | ||
2165 | * an open file. Used only in conjunction with | ||
2166 | * GNUNET_REGEX_automaton_save_graph. | ||
2167 | * | ||
2168 | * @param cls file pointer | ||
2169 | * @param s state | ||
2170 | */ | ||
2171 | void | ||
2172 | GNUNET_REGEX_automaton_save_graph_step (void *cls, struct GNUNET_REGEX_State *s) | ||
2173 | { | ||
2174 | FILE *p; | ||
2175 | struct Transition *ctran; | ||
2176 | char *s_acc = NULL; | ||
2177 | char *s_tran = NULL; | ||
2178 | |||
2179 | p = cls; | ||
2180 | |||
2181 | if (s->accepting) | ||
2182 | { | ||
2183 | GNUNET_asprintf (&s_acc, | ||
2184 | "\"%s\" [shape=doublecircle, color=\"0.%i 0.8 0.95\"];\n", | ||
2185 | s->name, s->scc_id); | ||
2186 | } | ||
2187 | else | ||
2188 | { | ||
2189 | GNUNET_asprintf (&s_acc, "\"%s\" [color=\"0.%i 0.8 0.95\"];\n", s->name, | ||
2190 | s->scc_id); | ||
2191 | } | ||
2192 | |||
2193 | if (NULL == s_acc) | ||
2194 | { | ||
2195 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not print state %s\n", s->name); | ||
2196 | return; | ||
2197 | } | ||
2198 | fwrite (s_acc, strlen (s_acc), 1, p); | ||
2199 | GNUNET_free (s_acc); | ||
2200 | s_acc = NULL; | ||
2201 | |||
2202 | for (ctran = s->transitions_head; NULL != ctran; ctran = ctran->next) | ||
2203 | { | ||
2204 | if (NULL == ctran->to_state) | ||
2205 | { | ||
2206 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
2207 | "Transition from State %i has has no state for transitioning\n", | ||
2208 | s->id); | ||
2209 | continue; | ||
2210 | } | ||
2211 | |||
2212 | if (ctran->label == 0) | ||
2213 | { | ||
2214 | GNUNET_asprintf (&s_tran, | ||
2215 | "\"%s\" -> \"%s\" [label = \"epsilon\", color=\"0.%i 0.8 0.95\"];\n", | ||
2216 | s->name, ctran->to_state->name, s->scc_id); | ||
2217 | } | ||
2218 | else | ||
2219 | { | ||
2220 | GNUNET_asprintf (&s_tran, | ||
2221 | "\"%s\" -> \"%s\" [label = \"%c\", color=\"0.%i 0.8 0.95\"];\n", | ||
2222 | s->name, ctran->to_state->name, ctran->label, s->scc_id); | ||
2223 | } | ||
2224 | |||
2225 | if (NULL == s_tran) | ||
2226 | { | ||
2227 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not print state %s\n", | ||
2228 | s->name); | ||
2229 | return; | ||
2230 | } | ||
2231 | |||
2232 | fwrite (s_tran, strlen (s_tran), 1, p); | ||
2233 | GNUNET_free (s_tran); | ||
2234 | s_tran = NULL; | ||
2235 | } | ||
2236 | } | ||
2237 | |||
2238 | /** | ||
2062 | * Save the given automaton as a GraphViz dot file | 2239 | * Save the given automaton as a GraphViz dot file |
2063 | * | 2240 | * |
2064 | * @param a the automaton to be saved | 2241 | * @param a the automaton to be saved |
@@ -2068,10 +2245,6 @@ void | |||
2068 | GNUNET_REGEX_automaton_save_graph (struct GNUNET_REGEX_Automaton *a, | 2245 | GNUNET_REGEX_automaton_save_graph (struct GNUNET_REGEX_Automaton *a, |
2069 | const char *filename) | 2246 | const char *filename) |
2070 | { | 2247 | { |
2071 | struct GNUNET_REGEX_State *s; | ||
2072 | struct Transition *ctran; | ||
2073 | char *s_acc = NULL; | ||
2074 | char *s_tran = NULL; | ||
2075 | char *start; | 2248 | char *start; |
2076 | char *end; | 2249 | char *end; |
2077 | FILE *p; | 2250 | FILE *p; |
@@ -2100,66 +2273,7 @@ GNUNET_REGEX_automaton_save_graph (struct GNUNET_REGEX_Automaton *a, | |||
2100 | start = "digraph G {\nrankdir=LR\n"; | 2273 | start = "digraph G {\nrankdir=LR\n"; |
2101 | fwrite (start, strlen (start), 1, p); | 2274 | fwrite (start, strlen (start), 1, p); |
2102 | 2275 | ||
2103 | for (s = a->states_head; NULL != s; s = s->next) | 2276 | automaton_traverse (p, a, GNUNET_REGEX_automaton_save_graph_step); |
2104 | { | ||
2105 | if (s->accepting) | ||
2106 | { | ||
2107 | GNUNET_asprintf (&s_acc, | ||
2108 | "\"%s\" [shape=doublecircle, color=\"0.%i 0.8 0.95\"];\n", | ||
2109 | s->name, s->scc_id); | ||
2110 | } | ||
2111 | else | ||
2112 | { | ||
2113 | GNUNET_asprintf (&s_acc, "\"%s\" [color=\"0.%i 0.8 0.95\"];\n", s->name, | ||
2114 | s->scc_id); | ||
2115 | } | ||
2116 | |||
2117 | if (NULL == s_acc) | ||
2118 | { | ||
2119 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not print state %s\n", | ||
2120 | s->name); | ||
2121 | return; | ||
2122 | } | ||
2123 | fwrite (s_acc, strlen (s_acc), 1, p); | ||
2124 | GNUNET_free (s_acc); | ||
2125 | s_acc = NULL; | ||
2126 | |||
2127 | for (ctran = s->transitions_head; NULL != ctran; ctran = ctran->next) | ||
2128 | { | ||
2129 | if (NULL == ctran->to_state) | ||
2130 | { | ||
2131 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
2132 | "Transition from State %i has has no state for transitioning\n", | ||
2133 | s->id); | ||
2134 | continue; | ||
2135 | } | ||
2136 | |||
2137 | if (ctran->label == 0) | ||
2138 | { | ||
2139 | GNUNET_asprintf (&s_tran, | ||
2140 | "\"%s\" -> \"%s\" [label = \"epsilon\", color=\"0.%i 0.8 0.95\"];\n", | ||
2141 | s->name, ctran->to_state->name, s->scc_id); | ||
2142 | } | ||
2143 | else | ||
2144 | { | ||
2145 | GNUNET_asprintf (&s_tran, | ||
2146 | "\"%s\" -> \"%s\" [label = \"%c\", color=\"0.%i 0.8 0.95\"];\n", | ||
2147 | s->name, ctran->to_state->name, ctran->label, | ||
2148 | s->scc_id); | ||
2149 | } | ||
2150 | |||
2151 | if (NULL == s_tran) | ||
2152 | { | ||
2153 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not print state %s\n", | ||
2154 | s->name); | ||
2155 | return; | ||
2156 | } | ||
2157 | |||
2158 | fwrite (s_tran, strlen (s_tran), 1, p); | ||
2159 | GNUNET_free (s_tran); | ||
2160 | s_tran = NULL; | ||
2161 | } | ||
2162 | } | ||
2163 | 2277 | ||
2164 | end = "\n}\n"; | 2278 | end = "\n}\n"; |
2165 | fwrite (end, strlen (end), 1, p); | 2279 | fwrite (end, strlen (end), 1, p); |
@@ -2189,6 +2303,10 @@ evaluate_dfa (struct GNUNET_REGEX_Automaton *a, const char *string) | |||
2189 | 2303 | ||
2190 | s = a->start; | 2304 | s = a->start; |
2191 | 2305 | ||
2306 | // If the string is empty but the starting state is accepting, we accept. | ||
2307 | if ((NULL == string || 0 == strlen (string)) && s->accepting) | ||
2308 | return 0; | ||
2309 | |||
2192 | for (strp = string; NULL != strp && *strp; strp++) | 2310 | for (strp = string; NULL != strp && *strp; strp++) |
2193 | { | 2311 | { |
2194 | s = dfa_move (s, *strp); | 2312 | s = dfa_move (s, *strp); |
@@ -2227,6 +2345,10 @@ evaluate_nfa (struct GNUNET_REGEX_Automaton *a, const char *string) | |||
2227 | return -1; | 2345 | return -1; |
2228 | } | 2346 | } |
2229 | 2347 | ||
2348 | // If the string is empty but the starting state is accepting, we accept. | ||
2349 | if ((NULL == string || 0 == strlen (string)) && a->start->accepting) | ||
2350 | return 0; | ||
2351 | |||
2230 | result = 1; | 2352 | result = 1; |
2231 | strp = string; | 2353 | strp = string; |
2232 | sset = nfa_closure_create (a, a->start, 0); | 2354 | sset = nfa_closure_create (a, a->start, 0); |
diff --git a/src/regex/test_regex_eval_api.c b/src/regex/test_regex_eval_api.c index 392fb3b36..89a757806 100644 --- a/src/regex/test_regex_eval_api.c +++ b/src/regex/test_regex_eval_api.c | |||
@@ -263,8 +263,9 @@ main (int argc, char *argv[]) | |||
263 | int check_nfa; | 263 | int check_nfa; |
264 | int check_dfa; | 264 | int check_dfa; |
265 | int check_rand; | 265 | int check_rand; |
266 | char *check_proof; | ||
266 | 267 | ||
267 | struct Regex_String_Pair rxstr[8] = { | 268 | struct Regex_String_Pair rxstr[12] = { |
268 | {"ab?(abcd)?", 5, | 269 | {"ab?(abcd)?", 5, |
269 | {"ababcd", "abab", "aabcd", "a", "abb"}, | 270 | {"ababcd", "abab", "aabcd", "a", "abb"}, |
270 | {match, nomatch, match, match, nomatch}}, | 271 | {match, nomatch, match, match, nomatch}}, |
@@ -288,6 +289,19 @@ main (int argc, char *argv[]) | |||
288 | {"V|M*o?x*p*d+h+b|E*m?h?Y*E*O?W*W*P+o?Z+H*M|I*q+C*a+5?5*9|b?z|G*y*k?R|p+u|8*h?B+l*H|e|L*O|1|F?v*0?5|C+", 1, | 289 | {"V|M*o?x*p*d+h+b|E*m?h?Y*E*O?W*W*P+o?Z+H*M|I*q+C*a+5?5*9|b?z|G*y*k?R|p+u|8*h?B+l*H|e|L*O|1|F?v*0?5|C+", 1, |
289 | {"VMoxpdhbEmhYEOWWPoZHMIqCa559bzGykRpu8hBlHeLO1Fv05C"}, | 290 | {"VMoxpdhbEmhYEOWWPoZHMIqCa559bzGykRpu8hBlHeLO1Fv05C"}, |
290 | {nomatch}}, | 291 | {nomatch}}, |
292 | {"(bla)*", 8, | ||
293 | {"", "bla", "blabla", "bl", "la", "b", "l", "a"}, | ||
294 | {match, match, match, nomatch, nomatch, nomatch, nomatch, nomatch}}, | ||
295 | {"ab(c|d)+c*(a(b|c)+d)+(bla)(bla)*", 8, | ||
296 | {"ab", "abcabdbla", "abdcccccccccccabcbccdblablabla", "bl", "la", "b", "l", | ||
297 | "a"}, | ||
298 | {nomatch, match, match, nomatch, nomatch, nomatch, nomatch, nomatch}}, | ||
299 | {"a|aa*a", 6, | ||
300 | {"", "a", "aa", "aaa", "aaaa", "aaaaa"}, | ||
301 | {nomatch, match, match, match, match, match}}, | ||
302 | {"ab(c|d)+c*(a(b|c)+d)+(bla)+", 1, | ||
303 | {"abcabdblaacdbla"}, | ||
304 | {nomatch}}, | ||
291 | {"ab(c|d)+c*(a(b|c)d)+", 1, | 305 | {"ab(c|d)+c*(a(b|c)d)+", 1, |
292 | {"abacd"}, | 306 | {"abacd"}, |
293 | {nomatch}} | 307 | {nomatch}} |
@@ -297,7 +311,7 @@ main (int argc, char *argv[]) | |||
297 | check_dfa = 0; | 311 | check_dfa = 0; |
298 | check_rand = 0; | 312 | check_rand = 0; |
299 | 313 | ||
300 | for (i = 0; i < 8; i++) | 314 | for (i = 0; i < 12; i++) |
301 | { | 315 | { |
302 | if (0 != regcomp (&rx, rxstr[i].regex, REG_EXTENDED)) | 316 | if (0 != regcomp (&rx, rxstr[i].regex, REG_EXTENDED)) |
303 | { | 317 | { |
@@ -314,7 +328,14 @@ main (int argc, char *argv[]) | |||
314 | // DFA test | 328 | // DFA test |
315 | a = GNUNET_REGEX_construct_dfa (rxstr[i].regex, strlen (rxstr[i].regex)); | 329 | a = GNUNET_REGEX_construct_dfa (rxstr[i].regex, strlen (rxstr[i].regex)); |
316 | check_dfa += test_automaton (a, &rx, &rxstr[i]); | 330 | check_dfa += test_automaton (a, &rx, &rxstr[i]); |
331 | check_proof = GNUNET_strdup (GNUNET_REGEX_get_computed_regex (a)); | ||
332 | GNUNET_REGEX_automaton_destroy (a); | ||
333 | a = GNUNET_REGEX_construct_dfa (check_proof, strlen (check_proof)); | ||
334 | check_dfa += test_automaton (a, &rx, &rxstr[i]); | ||
317 | GNUNET_REGEX_automaton_destroy (a); | 335 | GNUNET_REGEX_automaton_destroy (a); |
336 | if (0 != check_dfa) | ||
337 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "check_proof: %s\n", check_proof); | ||
338 | GNUNET_free_non_null (check_proof); | ||
318 | 339 | ||
319 | regfree (&rx); | 340 | regfree (&rx); |
320 | } | 341 | } |
diff --git a/src/regex/test_regex_iterate_api.c b/src/regex/test_regex_iterate_api.c index 6c5b0e55b..90907baee 100644 --- a/src/regex/test_regex_iterate_api.c +++ b/src/regex/test_regex_iterate_api.c | |||
@@ -60,7 +60,10 @@ main (int argc, char *argv[]) | |||
60 | struct GNUNET_REGEX_Automaton *dfa; | 60 | struct GNUNET_REGEX_Automaton *dfa; |
61 | 61 | ||
62 | error = 0; | 62 | error = 0; |
63 | /*regex = "ab(c|d)+c*(a(b|c)+d)+(bla)+"; */ | 63 | regex = "ab(c|d)+c*(a(b|c)+d)+(bla)+"; |
64 | /*regex = "(bla)+"; */ | ||
65 | /*regex = "b(lab)*la"; */ | ||
66 | /*regex = "(bla)*"; */ | ||
64 | /*regex = "ab(c|d)+c*(a(b|c)+d)+(bla)(bla)*"; */ | 67 | /*regex = "ab(c|d)+c*(a(b|c)+d)+(bla)(bla)*"; */ |
65 | /*regex = "z(abc|def)?xyz"; */ | 68 | /*regex = "z(abc|def)?xyz"; */ |
66 | /*regex = "1*0(0|1)*"; */ | 69 | /*regex = "1*0(0|1)*"; */ |
@@ -68,7 +71,15 @@ main (int argc, char *argv[]) | |||
68 | /*regex = "abcd:(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)"; */ | 71 | /*regex = "abcd:(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)"; */ |
69 | /*regex = "abc(1|0)*def"; */ | 72 | /*regex = "abc(1|0)*def"; */ |
70 | /*regex = "ab|ac"; */ | 73 | /*regex = "ab|ac"; */ |
71 | regex = "(ab)(ab)*"; | 74 | /*regex = "(ab)(ab)*"; */ |
75 | /*regex = "ab|cd|ef|gh"; */ | ||
76 | /*regex = "a|b|c|d|e|f|g"; */ | ||
77 | /*regex = "(ab)|(ac)"; */ | ||
78 | /*regex = "a(b|c)"; */ | ||
79 | /*regex = "a*a"; */ | ||
80 | /*regex = "ab?(abcd)?"; */ | ||
81 | /*regex = "(ab)+"; */ | ||
82 | /*regex = "(abcsdfsdf)+"; */ | ||
72 | dfa = GNUNET_REGEX_construct_dfa (regex, strlen (regex)); | 83 | dfa = GNUNET_REGEX_construct_dfa (regex, strlen (regex)); |
73 | GNUNET_REGEX_automaton_save_graph (dfa, "dfa.dot"); | 84 | GNUNET_REGEX_automaton_save_graph (dfa, "dfa.dot"); |
74 | GNUNET_REGEX_iterate_all_edges (dfa, key_iterator, NULL); | 85 | GNUNET_REGEX_iterate_all_edges (dfa, key_iterator, NULL); |