diff options
author | Maximilian Szengel <gnunet@maxsz.de> | 2012-07-02 12:22:37 +0000 |
---|---|---|
committer | Maximilian Szengel <gnunet@maxsz.de> | 2012-07-02 12:22:37 +0000 |
commit | a4b40b20949b4ea21d98bed902906cdf56d41326 (patch) | |
tree | c21f239124ccc818ca52832b736b18ae3522230a | |
parent | ce4b5c1fa37a9b26df4345d30b6ba2c839ce23d5 (diff) | |
download | gnunet-a4b40b20949b4ea21d98bed902906cdf56d41326.tar.gz gnunet-a4b40b20949b4ea21d98bed902906cdf56d41326.zip |
regex bugfixes
-rw-r--r-- | src/regex/regex.c | 811 | ||||
-rw-r--r-- | src/regex/test_regex_eval_api.c | 21 | ||||
-rw-r--r-- | src/regex/test_regex_iterate_api.c | 2 | ||||
-rw-r--r-- | src/regex/test_regex_proofs.c | 45 |
4 files changed, 466 insertions, 413 deletions
diff --git a/src/regex/regex.c b/src/regex/regex.c index 09cd910a0..18f6b60ea 100644 --- a/src/regex/regex.c +++ b/src/regex/regex.c | |||
@@ -503,7 +503,7 @@ state_add_transition (struct GNUNET_REGEX_Context *ctx, | |||
503 | } | 503 | } |
504 | } | 504 | } |
505 | 505 | ||
506 | if (is_dup) | 506 | if (GNUNET_YES == is_dup) |
507 | return; | 507 | return; |
508 | 508 | ||
509 | // sort transitions by label | 509 | // sort transitions by label |
@@ -734,21 +734,41 @@ automaton_merge_states (struct GNUNET_REGEX_Context *ctx, | |||
734 | { | 734 | { |
735 | struct GNUNET_REGEX_State *s_check; | 735 | struct GNUNET_REGEX_State *s_check; |
736 | struct Transition *t_check; | 736 | struct Transition *t_check; |
737 | struct Transition *t; | ||
738 | struct Transition *t_next; | ||
737 | char *new_name; | 739 | char *new_name; |
740 | int is_dup; | ||
738 | 741 | ||
739 | GNUNET_assert (NULL != ctx && NULL != a && NULL != s1 && NULL != s2); | 742 | GNUNET_assert (NULL != ctx && NULL != a && NULL != s1 && NULL != s2); |
740 | 743 | ||
741 | if (s1 == s2) | 744 | if (s1 == s2) |
742 | return; | 745 | return; |
743 | 746 | ||
744 | // 1. Make all transitions pointing to s2 point to s1 | 747 | // 1. Make all transitions pointing to s2 point to s1, unless this transition |
748 | // does not already exists, if it already exists remove transition. | ||
745 | for (s_check = a->states_head; NULL != s_check; s_check = s_check->next) | 749 | for (s_check = a->states_head; NULL != s_check; s_check = s_check->next) |
746 | { | 750 | { |
747 | for (t_check = s_check->transitions_head; NULL != t_check; | 751 | for (t_check = s_check->transitions_head; NULL != t_check; t_check = t_next) |
748 | t_check = t_check->next) | ||
749 | { | 752 | { |
753 | t_next = t_check->next; | ||
754 | |||
750 | if (s2 == t_check->to_state) | 755 | if (s2 == t_check->to_state) |
751 | t_check->to_state = s1; | 756 | { |
757 | is_dup = GNUNET_NO; | ||
758 | for (t = t_check->from_state->transitions_head; NULL != t; t = t->next) | ||
759 | { | ||
760 | if (t->to_state == s1 && t_check->label == t->label) | ||
761 | is_dup = GNUNET_YES; | ||
762 | } | ||
763 | if (GNUNET_NO == is_dup) | ||
764 | t_check->to_state = s1; | ||
765 | else | ||
766 | { | ||
767 | GNUNET_CONTAINER_DLL_remove (t_check->from_state->transitions_head, | ||
768 | t_check->from_state->transitions_tail, | ||
769 | t_check); | ||
770 | } | ||
771 | } | ||
752 | } | 772 | } |
753 | } | 773 | } |
754 | 774 | ||
@@ -1015,22 +1035,23 @@ number_states (void *cls, unsigned int count, struct GNUNET_REGEX_State *s) | |||
1015 | states[count] = s; | 1035 | states[count] = s; |
1016 | } | 1036 | } |
1017 | 1037 | ||
1018 | 1038 | /** | |
1019 | /** | 1039 | * Construct the regular expression given the inductive step, |
1020 | * create proofs for all states in the given automaton. Implementation of the | 1040 | * $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* |
1021 | * algorithm descriped in chapter 3.2.1 of "Automata Theory, Languages, and | 1041 | * R^{(k-1)}_{kj}, and simplify the resulting expression saved in R_cur_ij. |
1022 | * Computation 3rd Edition" by Hopcroft, Motwani and Ullman. | 1042 | * |
1023 | * | 1043 | * @param R_last_ij value of $R^{(k-1)_{ij}. |
1024 | * @param a automaton. | 1044 | * @param R_last_ik value of $R^{(k-1)_{ik}. |
1045 | * @param R_last_kk value of $R^{(k-1)_{kk}. | ||
1046 | * @param R_last_kj value of $R^{(k-1)_{kj}. | ||
1047 | * @param R_cur_ij result for this inductive step is saved in R_cur_ij, R_cur_ij | ||
1048 | * is expected to be NULL when called! | ||
1025 | */ | 1049 | */ |
1026 | static void | 1050 | static void |
1027 | automaton_create_proofs (struct GNUNET_REGEX_Automaton *a) | 1051 | automaton_create_proofs_simplify (char *R_last_ij, char *R_last_ik, |
1052 | char *R_last_kk, char *R_last_kj, | ||
1053 | char **R_cur_ij) | ||
1028 | { | 1054 | { |
1029 | unsigned int n = a->state_count; | ||
1030 | struct GNUNET_REGEX_State *states[n]; | ||
1031 | char *R_last[n][n]; | ||
1032 | char *R_cur[n][n]; | ||
1033 | struct Transition *t; | ||
1034 | char *R_cur_l; | 1055 | char *R_cur_l; |
1035 | char *R_cur_r; | 1056 | char *R_cur_r; |
1036 | char *temp_a; | 1057 | char *temp_a; |
@@ -1039,11 +1060,7 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a) | |||
1039 | char *R_temp_ik; | 1060 | char *R_temp_ik; |
1040 | char *R_temp_kj; | 1061 | char *R_temp_kj; |
1041 | char *R_temp_kk; | 1062 | char *R_temp_kk; |
1042 | char *complete_regex; | 1063 | |
1043 | unsigned int i; | ||
1044 | unsigned int j; | ||
1045 | unsigned int k; | ||
1046 | unsigned int cnt; | ||
1047 | int eps_check; | 1064 | int eps_check; |
1048 | int ij_ik_cmp; | 1065 | int ij_ik_cmp; |
1049 | int ij_kj_cmp; | 1066 | int ij_kj_cmp; |
@@ -1053,10 +1070,382 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a) | |||
1053 | int kk_kj_cmp; | 1070 | int kk_kj_cmp; |
1054 | int clean_ik_kk_cmp; | 1071 | int clean_ik_kk_cmp; |
1055 | int clean_kk_kj_cmp; | 1072 | int clean_kk_kj_cmp; |
1073 | unsigned int cnt; | ||
1074 | |||
1056 | size_t length; | 1075 | size_t length; |
1057 | size_t length_l; | 1076 | size_t length_l; |
1058 | size_t length_r; | 1077 | size_t length_r; |
1059 | 1078 | ||
1079 | GNUNET_assert (NULL == *R_cur_ij && NULL != R_cur_ij); | ||
1080 | |||
1081 | // $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj} | ||
1082 | // R_last == R^{(k-1)}, R_cur == R^{(k)} | ||
1083 | // R_cur_ij = R_cur_l | R_cur_r | ||
1084 | // R_cur_l == R^{(k-1)}_{ij} | ||
1085 | // R_cur_r == R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj} | ||
1086 | |||
1087 | if ((NULL == R_last_ij) && ((NULL == R_last_ik) || (NULL == R_last_kk) || /* technically cannot happen, but looks saner */ | ||
1088 | (NULL == R_last_kj))) | ||
1089 | { | ||
1090 | /* R^{(k)}_{ij} = N | N */ | ||
1091 | *R_cur_ij = NULL; | ||
1092 | return; | ||
1093 | } | ||
1094 | |||
1095 | if ((NULL == R_last_ik) || (NULL == R_last_kk) || /* technically cannot happen, but looks saner */ | ||
1096 | (NULL == R_last_kj)) | ||
1097 | { | ||
1098 | /* R^{(k)}_{ij} = R^{(k-1)}_{ij} | N */ | ||
1099 | *R_cur_ij = GNUNET_strdup (R_last_ij); | ||
1100 | return; | ||
1101 | } | ||
1102 | |||
1103 | // $R^{(k)}_{ij} = N | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj} OR | ||
1104 | // $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj} | ||
1105 | |||
1106 | /* *R_cur_ij = NULL; */ | ||
1107 | R_cur_r = NULL; | ||
1108 | R_cur_l = NULL; | ||
1109 | |||
1110 | // cache results from strcmp, we might need these many times | ||
1111 | ij_kj_cmp = nullstrcmp (R_last_ij, R_last_kj); | ||
1112 | ij_ik_cmp = nullstrcmp (R_last_ij, R_last_ik); | ||
1113 | ik_kk_cmp = nullstrcmp (R_last_ik, R_last_kk); | ||
1114 | //ik_kj_cmp = nullstrcmp (R_last_ik, R_last_kj); | ||
1115 | kk_kj_cmp = nullstrcmp (R_last_kk, R_last_kj); | ||
1116 | |||
1117 | // Assign R_temp_(ik|kk|kj) to R_last[][] and remove epsilon as well | ||
1118 | // as parentheses, so we can better compare the contents | ||
1119 | R_temp_ik = remove_parentheses (remove_epsilon (R_last_ik)); | ||
1120 | R_temp_kk = remove_parentheses (remove_epsilon (R_last_kk)); | ||
1121 | R_temp_kj = remove_parentheses (remove_epsilon (R_last_kj)); | ||
1122 | |||
1123 | clean_ik_kk_cmp = nullstrcmp (R_last_ik, R_temp_kk); | ||
1124 | clean_kk_kj_cmp = nullstrcmp (R_temp_kk, R_last_kj); | ||
1125 | |||
1126 | // construct R_cur_l (and, if necessary R_cur_r) | ||
1127 | if (NULL != R_last_ij) | ||
1128 | { | ||
1129 | // Assign R_temp_ij to R_last_ij and remove epsilon as well | ||
1130 | // as parentheses, so we can better compare the contents | ||
1131 | R_temp_ij = remove_parentheses (remove_epsilon (R_last_ij)); | ||
1132 | |||
1133 | if (0 == strcmp (R_temp_ij, R_temp_ik) && 0 == strcmp (R_temp_ik, R_temp_kk) | ||
1134 | && 0 == strcmp (R_temp_kk, R_temp_kj)) | ||
1135 | { | ||
1136 | if (0 == strlen (R_temp_ij)) | ||
1137 | { | ||
1138 | R_cur_r = GNUNET_strdup (""); | ||
1139 | } | ||
1140 | else if ((0 == strncmp (R_last_ij, "(|", 2)) || | ||
1141 | (0 == strncmp (R_last_ik, "(|", 2) && | ||
1142 | 0 == strncmp (R_last_kj, "(|", 2))) | ||
1143 | { | ||
1144 | // a|(e|a)a*(e|a) = a* | ||
1145 | // a|(e|a)(e|a)*(e|a) = a* | ||
1146 | // (e|a)|aa*a = a* | ||
1147 | // (e|a)|aa*(e|a) = a* | ||
1148 | // (e|a)|(e|a)a*a = a* | ||
1149 | // (e|a)|(e|a)a*(e|a) = a* | ||
1150 | // (e|a)|(e|a)(e|a)*(e|a) = a* | ||
1151 | if (GNUNET_YES == needs_parentheses (R_temp_ij)) | ||
1152 | GNUNET_asprintf (&R_cur_r, "(%s)*", R_temp_ij); | ||
1153 | else | ||
1154 | GNUNET_asprintf (&R_cur_r, "%s*", R_temp_ij); | ||
1155 | } | ||
1156 | else | ||
1157 | { | ||
1158 | // a|aa*a = a+ | ||
1159 | // a|(e|a)a*a = a+ | ||
1160 | // a|aa*(e|a) = a+ | ||
1161 | // a|(e|a)(e|a)*a = a+ | ||
1162 | // a|a(e|a)*(e|a) = a+ | ||
1163 | if (GNUNET_YES == needs_parentheses (R_temp_ij)) | ||
1164 | GNUNET_asprintf (&R_cur_r, "(%s)+", R_temp_ij); | ||
1165 | else | ||
1166 | GNUNET_asprintf (&R_cur_r, "%s+", R_temp_ij); | ||
1167 | } | ||
1168 | } | ||
1169 | else if (0 == ij_ik_cmp && 0 == clean_kk_kj_cmp && 0 != clean_ik_kk_cmp) | ||
1170 | { | ||
1171 | // a|ab*b = ab* | ||
1172 | if (strlen (R_last_kk) < 1) | ||
1173 | R_cur_r = GNUNET_strdup (R_last_ij); | ||
1174 | else if (GNUNET_YES == needs_parentheses (R_temp_kk)) | ||
1175 | GNUNET_asprintf (&R_cur_r, "%s(%s)*", R_last_ij, R_temp_kk); | ||
1176 | else | ||
1177 | GNUNET_asprintf (&R_cur_r, "%s%s*", R_last_ij, R_last_kk); | ||
1178 | |||
1179 | R_cur_l = NULL; | ||
1180 | } | ||
1181 | else if (0 == ij_kj_cmp && 0 == clean_ik_kk_cmp && 0 != clean_kk_kj_cmp) | ||
1182 | { | ||
1183 | // a|bb*a = b*a | ||
1184 | if (strlen (R_last_kk) < 1) | ||
1185 | R_cur_r = GNUNET_strdup (R_last_kj); | ||
1186 | else if (GNUNET_YES == needs_parentheses (R_temp_kk)) | ||
1187 | GNUNET_asprintf (&R_cur_r, "(%s)*%s", R_temp_kk, R_last_kj); | ||
1188 | else | ||
1189 | GNUNET_asprintf (&R_cur_r, "%s*%s", R_temp_kk, R_last_kj); | ||
1190 | |||
1191 | R_cur_l = NULL; | ||
1192 | } | ||
1193 | else if (0 == ij_ik_cmp && 0 == kk_kj_cmp && !has_epsilon (R_last_ij) && | ||
1194 | has_epsilon (R_last_kk)) | ||
1195 | { | ||
1196 | // a|a(e|b)*(e|b) = a|ab* = a|a|ab|abb|abbb|... = ab* | ||
1197 | if (needs_parentheses (R_temp_kk)) | ||
1198 | GNUNET_asprintf (&R_cur_r, "%s(%s)*", R_last_ij, R_temp_kk); | ||
1199 | else | ||
1200 | GNUNET_asprintf (&R_cur_r, "%s%s*", R_last_ij, R_temp_kk); | ||
1201 | |||
1202 | R_cur_l = NULL; | ||
1203 | } | ||
1204 | else if (0 == ij_kj_cmp && 0 == ik_kk_cmp && !has_epsilon (R_last_ij) && | ||
1205 | has_epsilon (R_last_kk)) | ||
1206 | { | ||
1207 | // a|(e|b)(e|b)*a = a|b*a = a|a|ba|bba|bbba|... = b*a | ||
1208 | if (needs_parentheses (R_temp_kk)) | ||
1209 | GNUNET_asprintf (&R_cur_r, "(%s)*%s", R_temp_kk, R_last_ij); | ||
1210 | else | ||
1211 | GNUNET_asprintf (&R_cur_r, "%s*%s", R_temp_kk, R_last_ij); | ||
1212 | |||
1213 | R_cur_l = NULL; | ||
1214 | } | ||
1215 | else | ||
1216 | { | ||
1217 | temp_a = (NULL == R_last_ij) ? NULL : GNUNET_strdup (R_last_ij); | ||
1218 | temp_a = remove_parentheses (temp_a); | ||
1219 | R_cur_l = temp_a; | ||
1220 | } | ||
1221 | |||
1222 | GNUNET_free_non_null (R_temp_ij); | ||
1223 | } | ||
1224 | else | ||
1225 | { | ||
1226 | // we have no left side | ||
1227 | R_cur_l = NULL; | ||
1228 | } | ||
1229 | |||
1230 | // construct R_cur_r, if not already constructed | ||
1231 | if (NULL == R_cur_r) | ||
1232 | { | ||
1233 | length = strlen (R_temp_kk) - strlen (R_last_ik); | ||
1234 | |||
1235 | // a(ba)*bx = (ab)+x | ||
1236 | if (length > 0 && NULL != R_last_kk && 0 < strlen (R_last_kk) && | ||
1237 | NULL != R_last_kj && 0 < strlen (R_last_kj) && NULL != R_last_ik && | ||
1238 | 0 < strlen (R_last_ik) && 0 == strkcmp (R_temp_kk, R_last_ik, length) && | ||
1239 | 0 == strncmp (R_temp_kk, R_last_kj, length)) | ||
1240 | { | ||
1241 | temp_a = GNUNET_malloc (length + 1); | ||
1242 | temp_b = GNUNET_malloc ((strlen (R_last_kj) - length) + 1); | ||
1243 | |||
1244 | length_l = 0; | ||
1245 | length_r = 0; | ||
1246 | |||
1247 | for (cnt = 0; cnt < strlen (R_last_kj); cnt++) | ||
1248 | { | ||
1249 | if (cnt < length) | ||
1250 | { | ||
1251 | temp_a[length_l] = R_last_kj[cnt]; | ||
1252 | length_l++; | ||
1253 | } | ||
1254 | else | ||
1255 | { | ||
1256 | temp_b[length_r] = R_last_kj[cnt]; | ||
1257 | length_r++; | ||
1258 | } | ||
1259 | } | ||
1260 | temp_a[length_l] = '\0'; | ||
1261 | temp_b[length_r] = '\0'; | ||
1262 | |||
1263 | // e|(ab)+ = (ab)* | ||
1264 | if (NULL != R_cur_l && 0 == strlen (R_cur_l) && 0 == strlen (temp_b)) | ||
1265 | { | ||
1266 | GNUNET_asprintf (&R_cur_r, "(%s%s)*", R_last_ik, temp_a); | ||
1267 | GNUNET_free (R_cur_l); | ||
1268 | R_cur_l = NULL; | ||
1269 | } | ||
1270 | else | ||
1271 | { | ||
1272 | GNUNET_asprintf (&R_cur_r, "(%s%s)+%s", R_last_ik, temp_a, temp_b); | ||
1273 | } | ||
1274 | GNUNET_free (temp_a); | ||
1275 | GNUNET_free (temp_b); | ||
1276 | } | ||
1277 | else if (0 == strcmp (R_temp_ik, R_temp_kk) && | ||
1278 | 0 == strcmp (R_temp_kk, R_temp_kj)) | ||
1279 | { | ||
1280 | // (e|a)a*(e|a) = a* | ||
1281 | // (e|a)(e|a)*(e|a) = a* | ||
1282 | if (has_epsilon (R_last_ik) && has_epsilon (R_last_kj)) | ||
1283 | { | ||
1284 | if (needs_parentheses (R_temp_kk)) | ||
1285 | GNUNET_asprintf (&R_cur_r, "(%s)*", R_temp_kk); | ||
1286 | else | ||
1287 | GNUNET_asprintf (&R_cur_r, "%s*", R_temp_kk); | ||
1288 | } | ||
1289 | // aa*a = a+a | ||
1290 | else if (0 == clean_ik_kk_cmp && 0 == clean_kk_kj_cmp && | ||
1291 | !has_epsilon (R_last_ik)) | ||
1292 | { | ||
1293 | if (needs_parentheses (R_temp_kk)) | ||
1294 | GNUNET_asprintf (&R_cur_r, "(%s)+%s", R_temp_kk, R_temp_kk); | ||
1295 | else | ||
1296 | GNUNET_asprintf (&R_cur_r, "(%s)+%s", R_temp_kk, R_temp_kk); | ||
1297 | } | ||
1298 | // (e|a)a*a = a+ | ||
1299 | // aa*(e|a) = a+ | ||
1300 | // a(e|a)*(e|a) = a+ | ||
1301 | // (e|a)a*a = a+ | ||
1302 | else | ||
1303 | { | ||
1304 | eps_check = | ||
1305 | (has_epsilon (R_last_ik) + has_epsilon (R_last_kk) + | ||
1306 | has_epsilon (R_last_kj)); | ||
1307 | |||
1308 | if (eps_check == 1) | ||
1309 | { | ||
1310 | if (needs_parentheses (R_temp_kk)) | ||
1311 | GNUNET_asprintf (&R_cur_r, "(%s)+", R_temp_kk); | ||
1312 | else | ||
1313 | GNUNET_asprintf (&R_cur_r, "%s+", R_temp_kk); | ||
1314 | } | ||
1315 | } | ||
1316 | } | ||
1317 | // aa*b = a+b | ||
1318 | // (e|a)(e|a)*b = a*b | ||
1319 | else if (0 == strcmp (R_temp_ik, R_temp_kk)) | ||
1320 | { | ||
1321 | if (has_epsilon (R_last_ik)) | ||
1322 | { | ||
1323 | if (needs_parentheses (R_temp_kk)) | ||
1324 | GNUNET_asprintf (&R_cur_r, "(%s)*%s", R_temp_kk, R_last_kj); | ||
1325 | else | ||
1326 | GNUNET_asprintf (&R_cur_r, "%s*%s", R_temp_kk, R_last_kj); | ||
1327 | } | ||
1328 | else | ||
1329 | { | ||
1330 | if (needs_parentheses (R_temp_kk)) | ||
1331 | GNUNET_asprintf (&R_cur_r, "(%s)+%s", R_temp_kk, R_last_kj); | ||
1332 | else | ||
1333 | GNUNET_asprintf (&R_cur_r, "%s+%s", R_temp_kk, R_last_kj); | ||
1334 | } | ||
1335 | } | ||
1336 | // ba*a = ba+ | ||
1337 | // b(e|a)*(e|a) = ba* | ||
1338 | else if (0 == strcmp (R_temp_kk, R_temp_kj)) | ||
1339 | { | ||
1340 | if (has_epsilon (R_last_kj)) | ||
1341 | { | ||
1342 | if (needs_parentheses (R_temp_kk)) | ||
1343 | GNUNET_asprintf (&R_cur_r, "%s(%s)*", R_last_ik, R_temp_kk); | ||
1344 | else | ||
1345 | GNUNET_asprintf (&R_cur_r, "%s%s*", R_last_ik, R_temp_kk); | ||
1346 | } | ||
1347 | else | ||
1348 | { | ||
1349 | if (needs_parentheses (R_temp_kk)) | ||
1350 | GNUNET_asprintf (&R_cur_r, "(%s)+%s", R_last_ik, R_temp_kk); | ||
1351 | else | ||
1352 | GNUNET_asprintf (&R_cur_r, "%s+%s", R_last_ik, R_temp_kk); | ||
1353 | } | ||
1354 | } | ||
1355 | else | ||
1356 | { | ||
1357 | if (strlen (R_temp_kk) > 0) | ||
1358 | { | ||
1359 | if (needs_parentheses (R_temp_kk)) | ||
1360 | { | ||
1361 | GNUNET_asprintf (&R_cur_r, "%s(%s)*%s", R_last_ik, R_temp_kk, | ||
1362 | R_last_kj); | ||
1363 | } | ||
1364 | else | ||
1365 | { | ||
1366 | GNUNET_asprintf (&R_cur_r, "%s%s*%s", R_last_ik, R_temp_kk, | ||
1367 | R_last_kj); | ||
1368 | } | ||
1369 | } | ||
1370 | else | ||
1371 | { | ||
1372 | GNUNET_asprintf (&R_cur_r, "%s%s", R_last_ik, R_last_kj); | ||
1373 | } | ||
1374 | } | ||
1375 | } | ||
1376 | |||
1377 | /* GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "R_cur_l: %s\n", R_cur_l); */ | ||
1378 | /* GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "R_cur_r: %s\n", R_cur_r); */ | ||
1379 | |||
1380 | if (NULL == R_cur_l && NULL == R_cur_r) | ||
1381 | { | ||
1382 | *R_cur_ij = NULL; | ||
1383 | return; | ||
1384 | } | ||
1385 | |||
1386 | if (NULL != R_cur_l && NULL == R_cur_r) | ||
1387 | { | ||
1388 | *R_cur_ij = GNUNET_strdup (R_cur_l); | ||
1389 | return; | ||
1390 | } | ||
1391 | |||
1392 | if (NULL == R_cur_l && NULL != R_cur_r) | ||
1393 | { | ||
1394 | *R_cur_ij = GNUNET_strdup (R_cur_r); | ||
1395 | return; | ||
1396 | } | ||
1397 | |||
1398 | if (R_cur_l[0] == 'C' || R_cur_r[0] == 'C') | ||
1399 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "R_cur_l: %s R_cur_r: %s\n", R_cur_l, | ||
1400 | R_cur_r); | ||
1401 | |||
1402 | |||
1403 | if (0 == nullstrcmp (R_cur_l, R_cur_r)) | ||
1404 | { | ||
1405 | *R_cur_ij = GNUNET_strdup (R_cur_l); | ||
1406 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, ">>>>>>>>>>>>>> %s == %s\n", R_cur_l, | ||
1407 | R_cur_r); | ||
1408 | return; | ||
1409 | } | ||
1410 | |||
1411 | /* else */ | ||
1412 | /* { */ | ||
1413 | /* GNUNET_log (GNUNET_ERROR_TYPE_ERROR, ">>>>>>>>>>>>>> %s != %s\n", R_cur_l, R_cur_r); */ | ||
1414 | /* } */ | ||
1415 | |||
1416 | GNUNET_asprintf (R_cur_ij, "(%s|%s)", R_cur_l, R_cur_r); | ||
1417 | |||
1418 | GNUNET_free_non_null (R_cur_l); | ||
1419 | GNUNET_free_non_null (R_cur_r); | ||
1420 | |||
1421 | GNUNET_free_non_null (R_temp_ik); | ||
1422 | GNUNET_free_non_null (R_temp_kk); | ||
1423 | GNUNET_free_non_null (R_temp_kj); | ||
1424 | |||
1425 | } | ||
1426 | |||
1427 | /** | ||
1428 | * create proofs for all states in the given automaton. Implementation of the | ||
1429 | * algorithm descriped in chapter 3.2.1 of "Automata Theory, Languages, and | ||
1430 | * Computation 3rd Edition" by Hopcroft, Motwani and Ullman. | ||
1431 | * | ||
1432 | * @param a automaton. | ||
1433 | */ | ||
1434 | static void | ||
1435 | automaton_create_proofs (struct GNUNET_REGEX_Automaton *a) | ||
1436 | { | ||
1437 | unsigned int n = a->state_count; | ||
1438 | struct GNUNET_REGEX_State *states[n]; | ||
1439 | char *R_last[n][n]; | ||
1440 | char *R_cur[n][n]; | ||
1441 | char *temp; | ||
1442 | struct Transition *t; | ||
1443 | char *complete_regex; | ||
1444 | unsigned int i; | ||
1445 | unsigned int j; | ||
1446 | unsigned int k; | ||
1447 | |||
1448 | |||
1060 | /* create depth-first numbering of the states, initializes 'state' */ | 1449 | /* create depth-first numbering of the states, initializes 'state' */ |
1061 | automaton_traverse (a, &number_states, states); | 1450 | automaton_traverse (a, &number_states, states); |
1062 | 1451 | ||
@@ -1075,31 +1464,29 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a) | |||
1075 | GNUNET_asprintf (&R_last[i][j], "%c", t->label); | 1464 | GNUNET_asprintf (&R_last[i][j], "%c", t->label); |
1076 | else | 1465 | else |
1077 | { | 1466 | { |
1078 | temp_a = R_last[i][j]; | 1467 | temp = R_last[i][j]; |
1079 | GNUNET_asprintf (&R_last[i][j], "%s|%c", R_last[i][j], t->label); | 1468 | GNUNET_asprintf (&R_last[i][j], "%s|%c", R_last[i][j], t->label); |
1080 | GNUNET_free (temp_a); | 1469 | GNUNET_free (temp); |
1081 | } | 1470 | } |
1082 | } | 1471 | } |
1083 | if (NULL == R_last[i][i]) | 1472 | if (NULL == R_last[i][i]) |
1084 | GNUNET_asprintf (&R_last[i][i], ""); | 1473 | GNUNET_asprintf (&R_last[i][i], ""); |
1085 | else | 1474 | else |
1086 | { | 1475 | { |
1087 | temp_a = R_last[i][i]; | 1476 | temp = R_last[i][i]; |
1088 | GNUNET_asprintf (&R_last[i][i], "(|%s)", R_last[i][i]); | 1477 | GNUNET_asprintf (&R_last[i][i], "(|%s)", R_last[i][i]); |
1089 | GNUNET_free (temp_a); | 1478 | GNUNET_free (temp); |
1090 | } | 1479 | } |
1091 | } | 1480 | } |
1092 | for (i = 0; i < n; i++) | 1481 | for (i = 0; i < n; i++) |
1093 | for (j = 0; j < n; j++) | 1482 | for (j = 0; j < n; j++) |
1094 | if (needs_parentheses (R_last[i][j])) | 1483 | if (needs_parentheses (R_last[i][j])) |
1095 | { | 1484 | { |
1096 | temp_a = R_last[i][j]; | 1485 | temp = R_last[i][j]; |
1097 | GNUNET_asprintf (&R_last[i][j], "(%s)", R_last[i][j]); | 1486 | GNUNET_asprintf (&R_last[i][j], "(%s)", R_last[i][j]); |
1098 | GNUNET_free (temp_a); | 1487 | GNUNET_free (temp); |
1099 | } | 1488 | } |
1100 | 1489 | ||
1101 | // TODO: clean up and fix the induction part | ||
1102 | |||
1103 | /* Compute regular expressions of length "k" between each pair of states per induction */ | 1490 | /* Compute regular expressions of length "k" between each pair of states per induction */ |
1104 | for (k = 0; k < n; k++) | 1491 | for (k = 0; k < n; k++) |
1105 | { | 1492 | { |
@@ -1110,341 +1497,11 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a) | |||
1110 | // Basis for the recursion: | 1497 | // Basis for the recursion: |
1111 | // $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj} | 1498 | // $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj} |
1112 | // R_last == R^{(k-1)}, R_cur == R^{(k)} | 1499 | // R_last == R^{(k-1)}, R_cur == R^{(k)} |
1113 | // With: R_cur[i][j] = R_cur_l | R_cur_r | ||
1114 | // R_cur_l == R^{(k-1)}_{ij} | ||
1115 | // R_cur_r == R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj} | ||
1116 | |||
1117 | if ((NULL == R_last[i][j]) && ((NULL == R_last[i][k]) || (NULL == R_last[k][k]) || /* technically cannot happen, but looks saner */ | ||
1118 | (NULL == R_last[k][j]))) | ||
1119 | { | ||
1120 | /* R^{(k)}_{ij} = N | N */ | ||
1121 | /* R_cur[i][j] is already NULL */ | ||
1122 | continue; | ||
1123 | } | ||
1124 | if ((NULL == R_last[i][k]) || (NULL == R_last[k][k]) || /* technically cannot happen, but looks saner */ | ||
1125 | (NULL == R_last[k][j])) | ||
1126 | { | ||
1127 | /* R^{(k)}_{ij} = R^{(k-1)}_{ij} | N */ | ||
1128 | R_cur[i][j] = GNUNET_strdup (R_last[i][j]); | ||
1129 | continue; | ||
1130 | } | ||
1131 | // $R^{(k)}_{ij} = N | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj} OR | ||
1132 | // $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj} | ||
1133 | |||
1134 | R_cur[i][j] = NULL; | ||
1135 | R_cur_r = NULL; | ||
1136 | R_cur_l = NULL; | ||
1137 | |||
1138 | // cache results from strcmp, we might need these many times | ||
1139 | ij_kj_cmp = nullstrcmp (R_last[i][j], R_last[k][j]); | ||
1140 | ij_ik_cmp = nullstrcmp (R_last[i][j], R_last[i][k]); | ||
1141 | ik_kk_cmp = nullstrcmp (R_last[i][k], R_last[k][k]); | ||
1142 | //ik_kj_cmp = nullstrcmp (R_last[i][k], R_last[k][j]); | ||
1143 | kk_kj_cmp = nullstrcmp (R_last[k][k], R_last[k][j]); | ||
1144 | 1500 | ||
1145 | // Assign R_temp_(ik|kk|kj) to R_last[][] and remove epsilon as well | 1501 | // Create R_cur[i][j] and simplify the expression |
1146 | // as parentheses, so we can better compare the contents | 1502 | automaton_create_proofs_simplify (R_last[i][j], R_last[i][k], |
1147 | R_temp_ik = remove_parentheses (remove_epsilon (R_last[i][k])); | 1503 | R_last[k][k], R_last[k][j], |
1148 | R_temp_kk = remove_parentheses (remove_epsilon (R_last[k][k])); | 1504 | &R_cur[i][j]); |
1149 | R_temp_kj = remove_parentheses (remove_epsilon (R_last[k][j])); | ||
1150 | |||
1151 | clean_ik_kk_cmp = nullstrcmp (R_last[i][k], R_temp_kk); | ||
1152 | clean_kk_kj_cmp = nullstrcmp (R_temp_kk, R_last[k][j]); | ||
1153 | |||
1154 | // construct R_cur_l (and, if necessary R_cur_r) | ||
1155 | if (NULL != R_last[i][j]) | ||
1156 | { | ||
1157 | // Assign R_temp_ij to R_last[i][j] and remove epsilon as well | ||
1158 | // as parentheses, so we can better compare the contents | ||
1159 | R_temp_ij = remove_parentheses (remove_epsilon (R_last[i][j])); | ||
1160 | |||
1161 | if (0 == strcmp (R_temp_ij, R_temp_ik) && | ||
1162 | 0 == strcmp (R_temp_ik, R_temp_kk) && | ||
1163 | 0 == strcmp (R_temp_kk, R_temp_kj)) | ||
1164 | { | ||
1165 | if (0 == strlen (R_temp_ij)) | ||
1166 | { | ||
1167 | R_cur_r = GNUNET_strdup (""); | ||
1168 | } | ||
1169 | else if ((0 == strncmp (R_last[i][j], "(|", 2)) || | ||
1170 | (0 == strncmp (R_last[i][k], "(|", 2) && | ||
1171 | 0 == strncmp (R_last[k][j], "(|", 2))) | ||
1172 | { | ||
1173 | // a|(e|a)a*(e|a) = a* | ||
1174 | // a|(e|a)(e|a)*(e|a) = a* | ||
1175 | // (e|a)|aa*a = a* | ||
1176 | // (e|a)|aa*(e|a) = a* | ||
1177 | // (e|a)|(e|a)a*a = a* | ||
1178 | // (e|a)|(e|a)a*(e|a) = a* | ||
1179 | // (e|a)|(e|a)(e|a)*(e|a) = a* | ||
1180 | if (GNUNET_YES == needs_parentheses (R_temp_ij)) | ||
1181 | GNUNET_asprintf (&R_cur_r, "(%s)*", R_temp_ij); | ||
1182 | else | ||
1183 | GNUNET_asprintf (&R_cur_r, "%s*", R_temp_ij); | ||
1184 | } | ||
1185 | else | ||
1186 | { | ||
1187 | // a|aa*a = a+ | ||
1188 | // a|(e|a)a*a = a+ | ||
1189 | // a|aa*(e|a) = a+ | ||
1190 | // a|(e|a)(e|a)*a = a+ | ||
1191 | // a|a(e|a)*(e|a) = a+ | ||
1192 | if (GNUNET_YES == needs_parentheses (R_temp_ij)) | ||
1193 | GNUNET_asprintf (&R_cur_r, "(%s)+", R_temp_ij); | ||
1194 | else | ||
1195 | GNUNET_asprintf (&R_cur_r, "%s+", R_temp_ij); | ||
1196 | } | ||
1197 | } | ||
1198 | else if (0 == ij_ik_cmp && 0 == clean_kk_kj_cmp && | ||
1199 | 0 != clean_ik_kk_cmp) | ||
1200 | { | ||
1201 | // a|ab*b = ab* | ||
1202 | if (strlen (R_last[k][k]) < 1) | ||
1203 | R_cur_r = GNUNET_strdup (R_last[i][j]); | ||
1204 | else if (GNUNET_YES == needs_parentheses (R_temp_kk)) | ||
1205 | GNUNET_asprintf (&R_cur_r, "%s(%s)*", R_last[i][j], R_temp_kk); | ||
1206 | else | ||
1207 | GNUNET_asprintf (&R_cur_r, "%s%s*", R_last[i][j], R_last[k][k]); | ||
1208 | |||
1209 | R_cur_l = NULL; | ||
1210 | } | ||
1211 | else if (0 == ij_kj_cmp && 0 == clean_ik_kk_cmp && | ||
1212 | 0 != clean_kk_kj_cmp) | ||
1213 | { | ||
1214 | // a|bb*a = b*a | ||
1215 | if (strlen (R_last[k][k]) < 1) | ||
1216 | R_cur_r = GNUNET_strdup (R_last[k][j]); | ||
1217 | else if (GNUNET_YES == needs_parentheses (R_temp_kk)) | ||
1218 | GNUNET_asprintf (&R_cur_r, "(%s)*%s", R_temp_kk, R_last[k][j]); | ||
1219 | else | ||
1220 | GNUNET_asprintf (&R_cur_r, "%s*%s", R_temp_kk, R_last[k][j]); | ||
1221 | |||
1222 | R_cur_l = NULL; | ||
1223 | } | ||
1224 | else if (0 == ij_ik_cmp && 0 == kk_kj_cmp && | ||
1225 | !has_epsilon (R_last[i][j]) && has_epsilon (R_last[k][k])) | ||
1226 | { | ||
1227 | // a|a(e|b)*(e|b) = a|ab* = a|a|ab|abb|abbb|... = ab* | ||
1228 | if (needs_parentheses (R_temp_kk)) | ||
1229 | GNUNET_asprintf (&R_cur_r, "%s(%s)*", R_last[i][j], R_temp_kk); | ||
1230 | else | ||
1231 | GNUNET_asprintf (&R_cur_r, "%s%s*", R_last[i][j], R_temp_kk); | ||
1232 | |||
1233 | R_cur_l = NULL; | ||
1234 | } | ||
1235 | else if (0 == ij_kj_cmp && 0 == ik_kk_cmp && | ||
1236 | !has_epsilon (R_last[i][j]) && has_epsilon (R_last[k][k])) | ||
1237 | { | ||
1238 | // a|(e|b)(e|b)*a = a|b*a = a|a|ba|bba|bbba|... = b*a | ||
1239 | if (needs_parentheses (R_temp_kk)) | ||
1240 | GNUNET_asprintf (&R_cur_r, "(%s)*%s", R_temp_kk, R_last[i][j]); | ||
1241 | else | ||
1242 | GNUNET_asprintf (&R_cur_r, "%s*%s", R_temp_kk, R_last[i][j]); | ||
1243 | |||
1244 | R_cur_l = NULL; | ||
1245 | } | ||
1246 | else | ||
1247 | { | ||
1248 | temp_a = | ||
1249 | (NULL == R_last[i][j]) ? NULL : GNUNET_strdup (R_last[i][j]); | ||
1250 | temp_a = remove_parentheses (temp_a); | ||
1251 | R_cur_l = temp_a; | ||
1252 | } | ||
1253 | |||
1254 | GNUNET_free_non_null (R_temp_ij); | ||
1255 | } | ||
1256 | else | ||
1257 | { | ||
1258 | // we have no left side | ||
1259 | R_cur_l = NULL; | ||
1260 | } | ||
1261 | |||
1262 | // construct R_cur_r, if not already constructed | ||
1263 | if (NULL == R_cur_r) | ||
1264 | { | ||
1265 | length = strlen (R_temp_kk) - strlen (R_last[i][k]); | ||
1266 | |||
1267 | // a(ba)*bx = (ab)+x | ||
1268 | if (length > 0 && NULL != R_last[k][k] && 0 < strlen (R_last[k][k]) && | ||
1269 | NULL != R_last[k][j] && 0 < strlen (R_last[k][j]) && | ||
1270 | NULL != R_last[i][k] && 0 < strlen (R_last[i][k]) && | ||
1271 | 0 == strkcmp (R_temp_kk, R_last[i][k], length) && | ||
1272 | 0 == strncmp (R_temp_kk, R_last[k][j], length)) | ||
1273 | { | ||
1274 | temp_a = GNUNET_malloc (length + 1); | ||
1275 | temp_b = GNUNET_malloc ((strlen (R_last[k][j]) - length) + 1); | ||
1276 | |||
1277 | length_l = 0; | ||
1278 | length_r = 0; | ||
1279 | |||
1280 | for (cnt = 0; cnt < strlen (R_last[k][j]); cnt++) | ||
1281 | { | ||
1282 | if (cnt < length) | ||
1283 | { | ||
1284 | temp_a[length_l] = R_last[k][j][cnt]; | ||
1285 | length_l++; | ||
1286 | } | ||
1287 | else | ||
1288 | { | ||
1289 | temp_b[length_r] = R_last[k][j][cnt]; | ||
1290 | length_r++; | ||
1291 | } | ||
1292 | } | ||
1293 | temp_a[length_l] = '\0'; | ||
1294 | temp_b[length_r] = '\0'; | ||
1295 | |||
1296 | // e|(ab)+ = (ab)* | ||
1297 | if (NULL != R_cur_l && 0 == strlen (R_cur_l) && | ||
1298 | 0 == strlen (temp_b)) | ||
1299 | { | ||
1300 | GNUNET_asprintf (&R_cur_r, "(%s%s)*", R_last[i][k], temp_a); | ||
1301 | GNUNET_free (R_cur_l); | ||
1302 | R_cur_l = NULL; | ||
1303 | } | ||
1304 | else | ||
1305 | { | ||
1306 | GNUNET_asprintf (&R_cur_r, "(%s%s)+%s", R_last[i][k], temp_a, | ||
1307 | temp_b); | ||
1308 | } | ||
1309 | GNUNET_free (temp_a); | ||
1310 | GNUNET_free (temp_b); | ||
1311 | } | ||
1312 | else if (0 == strcmp (R_temp_ik, R_temp_kk) && | ||
1313 | 0 == strcmp (R_temp_kk, R_temp_kj)) | ||
1314 | { | ||
1315 | // (e|a)a*(e|a) = a* | ||
1316 | // (e|a)(e|a)*(e|a) = a* | ||
1317 | if (has_epsilon (R_last[i][k]) && has_epsilon (R_last[k][j])) | ||
1318 | { | ||
1319 | if (needs_parentheses (R_temp_kk)) | ||
1320 | GNUNET_asprintf (&R_cur_r, "(%s)*", R_temp_kk); | ||
1321 | else | ||
1322 | GNUNET_asprintf (&R_cur_r, "%s*", R_temp_kk); | ||
1323 | } | ||
1324 | // aa*a = a+a | ||
1325 | else if (0 == clean_ik_kk_cmp && 0 == clean_kk_kj_cmp && | ||
1326 | !has_epsilon (R_last[i][k])) | ||
1327 | { | ||
1328 | if (needs_parentheses (R_temp_kk)) | ||
1329 | GNUNET_asprintf (&R_cur_r, "(%s)+%s", R_temp_kk, R_temp_kk); | ||
1330 | else | ||
1331 | GNUNET_asprintf (&R_cur_r, "(%s)+%s", R_temp_kk, R_temp_kk); | ||
1332 | } | ||
1333 | // (e|a)a*a = a+ | ||
1334 | // aa*(e|a) = a+ | ||
1335 | // a(e|a)*(e|a) = a+ | ||
1336 | // (e|a)a*a = a+ | ||
1337 | else | ||
1338 | { | ||
1339 | eps_check = | ||
1340 | (has_epsilon (R_last[i][k]) + has_epsilon (R_last[k][k]) + | ||
1341 | has_epsilon (R_last[k][j])); | ||
1342 | |||
1343 | if (eps_check == 1) | ||
1344 | { | ||
1345 | if (needs_parentheses (R_temp_kk)) | ||
1346 | GNUNET_asprintf (&R_cur_r, "(%s)+", R_temp_kk); | ||
1347 | else | ||
1348 | GNUNET_asprintf (&R_cur_r, "%s+", R_temp_kk); | ||
1349 | } | ||
1350 | } | ||
1351 | } | ||
1352 | // aa*b = a+b | ||
1353 | // (e|a)(e|a)*b = a*b | ||
1354 | else if (0 == strcmp (R_temp_ik, R_temp_kk)) | ||
1355 | { | ||
1356 | if (has_epsilon (R_last[i][k])) | ||
1357 | { | ||
1358 | if (needs_parentheses (R_temp_kk)) | ||
1359 | GNUNET_asprintf (&R_cur_r, "(%s)*%s", R_temp_kk, R_last[k][j]); | ||
1360 | else | ||
1361 | GNUNET_asprintf (&R_cur_r, "%s*%s", R_temp_kk, R_last[k][j]); | ||
1362 | } | ||
1363 | else | ||
1364 | { | ||
1365 | if (needs_parentheses (R_temp_kk)) | ||
1366 | GNUNET_asprintf (&R_cur_r, "(%s)+%s", R_temp_kk, R_last[k][j]); | ||
1367 | else | ||
1368 | GNUNET_asprintf (&R_cur_r, "%s+%s", R_temp_kk, R_last[k][j]); | ||
1369 | } | ||
1370 | } | ||
1371 | // ba*a = ba+ | ||
1372 | // b(e|a)*(e|a) = ba* | ||
1373 | else if (0 == strcmp (R_temp_kk, R_temp_kj)) | ||
1374 | { | ||
1375 | if (has_epsilon (R_last[k][j])) | ||
1376 | { | ||
1377 | if (needs_parentheses (R_temp_kk)) | ||
1378 | GNUNET_asprintf (&R_cur_r, "%s(%s)*", R_last[i][k], R_temp_kk); | ||
1379 | else | ||
1380 | GNUNET_asprintf (&R_cur_r, "%s%s*", R_last[i][k], R_temp_kk); | ||
1381 | } | ||
1382 | else | ||
1383 | { | ||
1384 | if (needs_parentheses (R_temp_kk)) | ||
1385 | GNUNET_asprintf (&R_cur_r, "(%s)+%s", R_last[i][k], R_temp_kk); | ||
1386 | else | ||
1387 | GNUNET_asprintf (&R_cur_r, "%s+%s", R_last[i][k], R_temp_kk); | ||
1388 | } | ||
1389 | } | ||
1390 | else | ||
1391 | { | ||
1392 | if (strlen (R_temp_kk) > 0) | ||
1393 | { | ||
1394 | if (needs_parentheses (R_temp_kk)) | ||
1395 | { | ||
1396 | GNUNET_asprintf (&R_cur_r, "%s(%s)*%s", R_last[i][k], R_temp_kk, | ||
1397 | R_last[k][j]); | ||
1398 | } | ||
1399 | else | ||
1400 | { | ||
1401 | GNUNET_asprintf (&R_cur_r, "%s%s*%s", R_last[i][k], R_temp_kk, | ||
1402 | R_last[k][j]); | ||
1403 | } | ||
1404 | } | ||
1405 | else | ||
1406 | { | ||
1407 | GNUNET_asprintf (&R_cur_r, "%s%s", R_last[i][k], R_last[k][j]); | ||
1408 | } | ||
1409 | } | ||
1410 | } | ||
1411 | |||
1412 | /* GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "R_cur_l: %s\n", R_cur_l); */ | ||
1413 | /* GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "R_cur_r: %s\n", R_cur_r); */ | ||
1414 | |||
1415 | // putting it all together | ||
1416 | if (NULL != R_cur_l && NULL != R_cur_r) | ||
1417 | { | ||
1418 | // a|a = a | ||
1419 | if (0 == strcmp (R_cur_l, R_cur_r)) | ||
1420 | { | ||
1421 | R_cur[i][j] = GNUNET_strdup (R_cur_l); | ||
1422 | } | ||
1423 | // R_cur_l | R_cur_r | ||
1424 | else | ||
1425 | { | ||
1426 | GNUNET_asprintf (&R_cur[i][j], "(%s|%s)", R_cur_l, R_cur_r); | ||
1427 | } | ||
1428 | } | ||
1429 | else if (NULL != R_cur_l) | ||
1430 | { | ||
1431 | R_cur[i][j] = GNUNET_strdup (R_cur_l); | ||
1432 | } | ||
1433 | else if (NULL != R_cur_r) | ||
1434 | { | ||
1435 | R_cur[i][j] = GNUNET_strdup (R_cur_r); | ||
1436 | } | ||
1437 | else | ||
1438 | { | ||
1439 | R_cur[i][j] = NULL; | ||
1440 | } | ||
1441 | |||
1442 | GNUNET_free_non_null (R_cur_l); | ||
1443 | GNUNET_free_non_null (R_cur_r); | ||
1444 | |||
1445 | GNUNET_free_non_null (R_temp_ik); | ||
1446 | GNUNET_free_non_null (R_temp_kk); | ||
1447 | GNUNET_free_non_null (R_temp_kj); | ||
1448 | } | 1505 | } |
1449 | } | 1506 | } |
1450 | 1507 | ||
@@ -1478,14 +1535,16 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a) | |||
1478 | if (states[i]->accepting) | 1535 | if (states[i]->accepting) |
1479 | { | 1536 | { |
1480 | if (NULL == complete_regex && 0 < strlen (R_last[a->start->proof_id][i])) | 1537 | if (NULL == complete_regex && 0 < strlen (R_last[a->start->proof_id][i])) |
1538 | { | ||
1481 | GNUNET_asprintf (&complete_regex, "%s", R_last[a->start->proof_id][i]); | 1539 | GNUNET_asprintf (&complete_regex, "%s", R_last[a->start->proof_id][i]); |
1540 | } | ||
1482 | else if (NULL != R_last[a->start->proof_id][i] && | 1541 | else if (NULL != R_last[a->start->proof_id][i] && |
1483 | 0 < strlen (R_last[a->start->proof_id][i])) | 1542 | 0 < strlen (R_last[a->start->proof_id][i])) |
1484 | { | 1543 | { |
1485 | temp_a = complete_regex; | 1544 | temp = complete_regex; |
1486 | GNUNET_asprintf (&complete_regex, "%s|%s", complete_regex, | 1545 | GNUNET_asprintf (&complete_regex, "%s|%s", complete_regex, |
1487 | R_last[a->start->proof_id][i]); | 1546 | R_last[a->start->proof_id][i]); |
1488 | GNUNET_free (temp_a); | 1547 | GNUNET_free (temp); |
1489 | } | 1548 | } |
1490 | } | 1549 | } |
1491 | } | 1550 | } |
@@ -1524,8 +1583,6 @@ dfa_state_create (struct GNUNET_REGEX_Context *ctx, | |||
1524 | int len = 0; | 1583 | int len = 0; |
1525 | struct GNUNET_REGEX_State *cstate; | 1584 | struct GNUNET_REGEX_State *cstate; |
1526 | struct Transition *ctran; | 1585 | struct Transition *ctran; |
1527 | int insert = 1; | ||
1528 | struct Transition *t; | ||
1529 | unsigned int i; | 1586 | unsigned int i; |
1530 | 1587 | ||
1531 | s = GNUNET_malloc (sizeof (struct GNUNET_REGEX_State)); | 1588 | s = GNUNET_malloc (sizeof (struct GNUNET_REGEX_State)); |
@@ -1573,21 +1630,7 @@ dfa_state_create (struct GNUNET_REGEX_Context *ctx, | |||
1573 | for (ctran = cstate->transitions_head; NULL != ctran; ctran = ctran->next) | 1630 | for (ctran = cstate->transitions_head; NULL != ctran; ctran = ctran->next) |
1574 | { | 1631 | { |
1575 | if (0 != ctran->label) | 1632 | if (0 != ctran->label) |
1576 | { | 1633 | state_add_transition (ctx, s, ctran->label, NULL); |
1577 | insert = 1; | ||
1578 | |||
1579 | for (t = s->transitions_head; NULL != t; t = t->next) | ||
1580 | { | ||
1581 | if (t->label == ctran->label) | ||
1582 | { | ||
1583 | insert = 0; | ||
1584 | break; | ||
1585 | } | ||
1586 | } | ||
1587 | |||
1588 | if (insert) | ||
1589 | state_add_transition (ctx, s, ctran->label, NULL); | ||
1590 | } | ||
1591 | } | 1634 | } |
1592 | 1635 | ||
1593 | // If the nfa_states contain an accepting state, the new dfa state is also | 1636 | // If the nfa_states contain an accepting state, the new dfa state is also |
diff --git a/src/regex/test_regex_eval_api.c b/src/regex/test_regex_eval_api.c index 6d575a05c..5a6575c86 100644 --- a/src/regex/test_regex_eval_api.c +++ b/src/regex/test_regex_eval_api.c | |||
@@ -58,7 +58,7 @@ int | |||
58 | test_random (unsigned int rx_length, unsigned int max_str_len, | 58 | test_random (unsigned int rx_length, unsigned int max_str_len, |
59 | unsigned int str_count) | 59 | unsigned int str_count) |
60 | { | 60 | { |
61 | int i; | 61 | unsigned int i; |
62 | char *rand_rx; | 62 | char *rand_rx; |
63 | char *matching_str; | 63 | char *matching_str; |
64 | int eval; | 64 | int eval; |
@@ -114,6 +114,11 @@ test_random (unsigned int rx_length, unsigned int max_str_len, | |||
114 | eval_check = regexec (&rx, matching_str, 1, matchptr, 0); | 114 | eval_check = regexec (&rx, matching_str, 1, matchptr, 0); |
115 | regfree (&rx); | 115 | regfree (&rx); |
116 | 116 | ||
117 | // We only want to match the whole string, because that's what our DFA does, too. | ||
118 | if (eval_check == 0 && | ||
119 | (matchptr[0].rm_so != 0 || matchptr[0].rm_eo != strlen (matching_str))) | ||
120 | eval_check = 1; | ||
121 | |||
117 | // Match canonical regex | 122 | // Match canonical regex |
118 | if (0 != regcomp (&rx, canonical_regex, REG_EXTENDED)) | 123 | if (0 != regcomp (&rx, canonical_regex, REG_EXTENDED)) |
119 | { | 124 | { |
@@ -127,11 +132,6 @@ test_random (unsigned int rx_length, unsigned int max_str_len, | |||
127 | regfree (&rx); | 132 | regfree (&rx); |
128 | GNUNET_free (canonical_regex); | 133 | GNUNET_free (canonical_regex); |
129 | 134 | ||
130 | // We only want to match the whole string, because that's what our DFA does, too. | ||
131 | if (eval_check == 0 && | ||
132 | (matchptr[0].rm_so != 0 || matchptr[0].rm_eo != strlen (matching_str))) | ||
133 | eval_check = 1; | ||
134 | |||
135 | // compare result | 135 | // compare result |
136 | if (eval_check != eval) | 136 | if (eval_check != eval) |
137 | { | 137 | { |
@@ -198,11 +198,11 @@ test_automaton (struct GNUNET_REGEX_Automaton *a, regex_t * rx, | |||
198 | result = 1; | 198 | result = 1; |
199 | regerror (eval_check, rx, error, sizeof error); | 199 | regerror (eval_check, rx, error, sizeof error); |
200 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | 200 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, |
201 | "Unexpected result:\nregex: %s\nstring: %s\nexpected result: %i\n" | 201 | "Unexpected result:\nregex: %s\ncanonical_regex: %s\nstring: %s\nexpected result: %i\n" |
202 | "gnunet regex: %i\nglibc regex: %i\nglibc error: %s\nrm_so: %i\nrm_eo: %i\n\n", | 202 | "gnunet regex: %i\nglibc regex: %i\nglibc error: %s\nrm_so: %i\nrm_eo: %i\n\n", |
203 | rxstr->regex, rxstr->strings[i], rxstr->expected_results[i], | 203 | rxstr->regex, GNUNET_REGEX_get_canonical_regex (a), |
204 | eval, eval_check, error, matchptr[0].rm_so, | 204 | rxstr->strings[i], rxstr->expected_results[i], eval, |
205 | matchptr[0].rm_eo); | 205 | eval_check, error, matchptr[0].rm_so, matchptr[0].rm_eo); |
206 | } | 206 | } |
207 | } | 207 | } |
208 | return result; | 208 | return result; |
@@ -298,6 +298,7 @@ main (int argc, char *argv[]) | |||
298 | check_dfa += test_automaton (a, &rx, &rxstr[i]); | 298 | check_dfa += test_automaton (a, &rx, &rxstr[i]); |
299 | check_proof = GNUNET_strdup (GNUNET_REGEX_get_canonical_regex (a)); | 299 | check_proof = GNUNET_strdup (GNUNET_REGEX_get_canonical_regex (a)); |
300 | GNUNET_REGEX_automaton_destroy (a); | 300 | GNUNET_REGEX_automaton_destroy (a); |
301 | |||
301 | a = GNUNET_REGEX_construct_dfa (check_proof, strlen (check_proof)); | 302 | a = GNUNET_REGEX_construct_dfa (check_proof, strlen (check_proof)); |
302 | check_dfa += test_automaton (a, &rx, &rxstr[i]); | 303 | check_dfa += test_automaton (a, &rx, &rxstr[i]); |
303 | GNUNET_REGEX_automaton_destroy (a); | 304 | GNUNET_REGEX_automaton_destroy (a); |
diff --git a/src/regex/test_regex_iterate_api.c b/src/regex/test_regex_iterate_api.c index e7052f5ee..9f4f2280e 100644 --- a/src/regex/test_regex_iterate_api.c +++ b/src/regex/test_regex_iterate_api.c | |||
@@ -32,7 +32,7 @@ key_iterator (void *cls, const struct GNUNET_HashCode *key, const char *proof, | |||
32 | int accepting, unsigned int num_edges, | 32 | int accepting, unsigned int num_edges, |
33 | const struct GNUNET_REGEX_Edge *edges) | 33 | const struct GNUNET_REGEX_Edge *edges) |
34 | { | 34 | { |
35 | int i; | 35 | unsigned int i; |
36 | 36 | ||
37 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Iterating... (accepting: %i)\n", | 37 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Iterating... (accepting: %i)\n", |
38 | accepting); | 38 | accepting); |
diff --git a/src/regex/test_regex_proofs.c b/src/regex/test_regex_proofs.c index 85fc3079d..831f4dc48 100644 --- a/src/regex/test_regex_proofs.c +++ b/src/regex/test_regex_proofs.c | |||
@@ -106,39 +106,48 @@ test_proofs_static (void) | |||
106 | unsigned int i; | 106 | unsigned int i; |
107 | unsigned int error; | 107 | unsigned int error; |
108 | 108 | ||
109 | const char *regex[4] = { | 109 | const char *regex[6] = { |
110 | "a|aa*a", | 110 | "a|aa*a", |
111 | "a+", | 111 | "a+", |
112 | "a*", | 112 | "a*", |
113 | "a*a*" | 113 | "a*a*", |
114 | "(F*C|WfPf|y+F*C)", | ||
115 | "y*F*C|WfPf", | ||
116 | /* "2?jA?e?D*K", */ | ||
117 | /* "((j|2j)K|(j|2j)AK|(j|2j)(D|e|(j|2j)A(D|e))D*K)", */ | ||
118 | /* "((j|2j)K|(j|2j)(D|e|((j|2j)j|(j|2j)2j)A(D|e))D*K|(j|2j)AK)", */ | ||
114 | }; | 119 | }; |
115 | 120 | ||
116 | char *canonical_regex; | 121 | const char *canon_rx1; |
117 | struct GNUNET_REGEX_Automaton *dfa; | 122 | const char *canon_rx2; |
123 | struct GNUNET_REGEX_Automaton *dfa1; | ||
124 | struct GNUNET_REGEX_Automaton *dfa2; | ||
118 | 125 | ||
119 | error = 0; | 126 | error = 0; |
120 | 127 | ||
121 | for (i = 0; i < 4; i += 2) | 128 | for (i = 0; i < 6; i += 2) |
122 | { | 129 | { |
123 | dfa = GNUNET_REGEX_construct_dfa (regex[i], strlen (regex[i])); | 130 | dfa1 = GNUNET_REGEX_construct_dfa (regex[i], strlen (regex[i])); |
124 | canonical_regex = GNUNET_strdup (GNUNET_REGEX_get_canonical_regex (dfa)); | 131 | dfa2 = GNUNET_REGEX_construct_dfa (regex[i + 1], strlen (regex[i + 1])); |
125 | GNUNET_REGEX_automaton_destroy (dfa); | 132 | |
133 | canon_rx1 = GNUNET_REGEX_get_canonical_regex (dfa1); | ||
134 | canon_rx2 = GNUNET_REGEX_get_canonical_regex (dfa2); | ||
126 | 135 | ||
127 | dfa = GNUNET_REGEX_construct_dfa (regex[i + 1], strlen (regex[i + 1])); | 136 | error += (0 == strcmp (canon_rx1, canon_rx2)) ? 0 : 1; |
128 | error += | ||
129 | (0 == | ||
130 | strcmp (canonical_regex, | ||
131 | GNUNET_REGEX_get_canonical_regex (dfa))) ? 0 : 1; | ||
132 | 137 | ||
133 | if (error > 0) | 138 | if (error > 0) |
134 | { | 139 | { |
135 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | 140 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, |
136 | "Comparing canonical regex of %s with %s failed.\n", regex[i], | 141 | "Comparing canonical regex failed:\nrx1: %s\ncrx1: %s\nrx2: %s\ncrx2: %s\n", |
137 | regex[i + 1]); | 142 | regex[i], canon_rx1, regex[i + 1], canon_rx2); |
143 | |||
144 | /* Save the graphs of the two conflicting DFAs */ | ||
145 | /* GNUNET_REGEX_automaton_save_graph (dfa1, "proofs_fail_dfa1.dot"); */ | ||
146 | /* GNUNET_REGEX_automaton_save_graph (dfa2, "proofs_fail_dfa2.dot"); */ | ||
138 | } | 147 | } |
139 | 148 | ||
140 | GNUNET_free (canonical_regex); | 149 | GNUNET_REGEX_automaton_destroy (dfa1); |
141 | GNUNET_REGEX_automaton_destroy (dfa); | 150 | GNUNET_REGEX_automaton_destroy (dfa2); |
142 | } | 151 | } |
143 | 152 | ||
144 | return error; | 153 | return error; |
@@ -161,7 +170,7 @@ main (int argc, char *argv[]) | |||
161 | error = 0; | 170 | error = 0; |
162 | 171 | ||
163 | error += test_proofs_static (); | 172 | error += test_proofs_static (); |
164 | // error += test_proofs_random (100, 10); | 173 | /* error += test_proofs_random (10000, 10); */ |
165 | 174 | ||
166 | return error; | 175 | return error; |
167 | } | 176 | } |