aboutsummaryrefslogtreecommitdiff
path: root/src/regex/regex.c
diff options
context:
space:
mode:
authorMaximilian Szengel <gnunet@maxsz.de>2012-07-02 12:22:37 +0000
committerMaximilian Szengel <gnunet@maxsz.de>2012-07-02 12:22:37 +0000
commita4b40b20949b4ea21d98bed902906cdf56d41326 (patch)
treec21f239124ccc818ca52832b736b18ae3522230a /src/regex/regex.c
parentce4b5c1fa37a9b26df4345d30b6ba2c839ce23d5 (diff)
downloadgnunet-a4b40b20949b4ea21d98bed902906cdf56d41326.tar.gz
gnunet-a4b40b20949b4ea21d98bed902906cdf56d41326.zip
regex bugfixes
Diffstat (limited to 'src/regex/regex.c')
-rw-r--r--src/regex/regex.c811
1 files changed, 427 insertions, 384 deletions
diff --git a/src/regex/regex.c b/src/regex/regex.c
index 09cd910a0..18f6b60ea 100644
--- a/src/regex/regex.c
+++ b/src/regex/regex.c
@@ -503,7 +503,7 @@ state_add_transition (struct GNUNET_REGEX_Context *ctx,
503 } 503 }
504 } 504 }
505 505
506 if (is_dup) 506 if (GNUNET_YES == is_dup)
507 return; 507 return;
508 508
509 // sort transitions by label 509 // sort transitions by label
@@ -734,21 +734,41 @@ automaton_merge_states (struct GNUNET_REGEX_Context *ctx,
734{ 734{
735 struct GNUNET_REGEX_State *s_check; 735 struct GNUNET_REGEX_State *s_check;
736 struct Transition *t_check; 736 struct Transition *t_check;
737 struct Transition *t;
738 struct Transition *t_next;
737 char *new_name; 739 char *new_name;
740 int is_dup;
738 741
739 GNUNET_assert (NULL != ctx && NULL != a && NULL != s1 && NULL != s2); 742 GNUNET_assert (NULL != ctx && NULL != a && NULL != s1 && NULL != s2);
740 743
741 if (s1 == s2) 744 if (s1 == s2)
742 return; 745 return;
743 746
744 // 1. Make all transitions pointing to s2 point to s1 747 // 1. Make all transitions pointing to s2 point to s1, unless this transition
748 // does not already exists, if it already exists remove transition.
745 for (s_check = a->states_head; NULL != s_check; s_check = s_check->next) 749 for (s_check = a->states_head; NULL != s_check; s_check = s_check->next)
746 { 750 {
747 for (t_check = s_check->transitions_head; NULL != t_check; 751 for (t_check = s_check->transitions_head; NULL != t_check; t_check = t_next)
748 t_check = t_check->next)
749 { 752 {
753 t_next = t_check->next;
754
750 if (s2 == t_check->to_state) 755 if (s2 == t_check->to_state)
751 t_check->to_state = s1; 756 {
757 is_dup = GNUNET_NO;
758 for (t = t_check->from_state->transitions_head; NULL != t; t = t->next)
759 {
760 if (t->to_state == s1 && t_check->label == t->label)
761 is_dup = GNUNET_YES;
762 }
763 if (GNUNET_NO == is_dup)
764 t_check->to_state = s1;
765 else
766 {
767 GNUNET_CONTAINER_DLL_remove (t_check->from_state->transitions_head,
768 t_check->from_state->transitions_tail,
769 t_check);
770 }
771 }
752 } 772 }
753 } 773 }
754 774
@@ -1015,22 +1035,23 @@ number_states (void *cls, unsigned int count, struct GNUNET_REGEX_State *s)
1015 states[count] = s; 1035 states[count] = s;
1016} 1036}
1017 1037
1018 1038/**
1019/** 1039 * Construct the regular expression given the inductive step,
1020 * create proofs for all states in the given automaton. Implementation of the 1040 * $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^*
1021 * algorithm descriped in chapter 3.2.1 of "Automata Theory, Languages, and 1041 * R^{(k-1)}_{kj}, and simplify the resulting expression saved in R_cur_ij.
1022 * Computation 3rd Edition" by Hopcroft, Motwani and Ullman. 1042 *
1023 * 1043 * @param R_last_ij value of $R^{(k-1)_{ij}.
1024 * @param a automaton. 1044 * @param R_last_ik value of $R^{(k-1)_{ik}.
1045 * @param R_last_kk value of $R^{(k-1)_{kk}.
1046 * @param R_last_kj value of $R^{(k-1)_{kj}.
1047 * @param R_cur_ij result for this inductive step is saved in R_cur_ij, R_cur_ij
1048 * is expected to be NULL when called!
1025 */ 1049 */
1026static void 1050static void
1027automaton_create_proofs (struct GNUNET_REGEX_Automaton *a) 1051automaton_create_proofs_simplify (char *R_last_ij, char *R_last_ik,
1052 char *R_last_kk, char *R_last_kj,
1053 char **R_cur_ij)
1028{ 1054{
1029 unsigned int n = a->state_count;
1030 struct GNUNET_REGEX_State *states[n];
1031 char *R_last[n][n];
1032 char *R_cur[n][n];
1033 struct Transition *t;
1034 char *R_cur_l; 1055 char *R_cur_l;
1035 char *R_cur_r; 1056 char *R_cur_r;
1036 char *temp_a; 1057 char *temp_a;
@@ -1039,11 +1060,7 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a)
1039 char *R_temp_ik; 1060 char *R_temp_ik;
1040 char *R_temp_kj; 1061 char *R_temp_kj;
1041 char *R_temp_kk; 1062 char *R_temp_kk;
1042 char *complete_regex; 1063
1043 unsigned int i;
1044 unsigned int j;
1045 unsigned int k;
1046 unsigned int cnt;
1047 int eps_check; 1064 int eps_check;
1048 int ij_ik_cmp; 1065 int ij_ik_cmp;
1049 int ij_kj_cmp; 1066 int ij_kj_cmp;
@@ -1053,10 +1070,382 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a)
1053 int kk_kj_cmp; 1070 int kk_kj_cmp;
1054 int clean_ik_kk_cmp; 1071 int clean_ik_kk_cmp;
1055 int clean_kk_kj_cmp; 1072 int clean_kk_kj_cmp;
1073 unsigned int cnt;
1074
1056 size_t length; 1075 size_t length;
1057 size_t length_l; 1076 size_t length_l;
1058 size_t length_r; 1077 size_t length_r;
1059 1078
1079 GNUNET_assert (NULL == *R_cur_ij && NULL != R_cur_ij);
1080
1081 // $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj}
1082 // R_last == R^{(k-1)}, R_cur == R^{(k)}
1083 // R_cur_ij = R_cur_l | R_cur_r
1084 // R_cur_l == R^{(k-1)}_{ij}
1085 // R_cur_r == R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj}
1086
1087 if ((NULL == R_last_ij) && ((NULL == R_last_ik) || (NULL == R_last_kk) || /* technically cannot happen, but looks saner */
1088 (NULL == R_last_kj)))
1089 {
1090 /* R^{(k)}_{ij} = N | N */
1091 *R_cur_ij = NULL;
1092 return;
1093 }
1094
1095 if ((NULL == R_last_ik) || (NULL == R_last_kk) || /* technically cannot happen, but looks saner */
1096 (NULL == R_last_kj))
1097 {
1098 /* R^{(k)}_{ij} = R^{(k-1)}_{ij} | N */
1099 *R_cur_ij = GNUNET_strdup (R_last_ij);
1100 return;
1101 }
1102
1103 // $R^{(k)}_{ij} = N | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj} OR
1104 // $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj}
1105
1106 /* *R_cur_ij = NULL; */
1107 R_cur_r = NULL;
1108 R_cur_l = NULL;
1109
1110 // cache results from strcmp, we might need these many times
1111 ij_kj_cmp = nullstrcmp (R_last_ij, R_last_kj);
1112 ij_ik_cmp = nullstrcmp (R_last_ij, R_last_ik);
1113 ik_kk_cmp = nullstrcmp (R_last_ik, R_last_kk);
1114 //ik_kj_cmp = nullstrcmp (R_last_ik, R_last_kj);
1115 kk_kj_cmp = nullstrcmp (R_last_kk, R_last_kj);
1116
1117 // Assign R_temp_(ik|kk|kj) to R_last[][] and remove epsilon as well
1118 // as parentheses, so we can better compare the contents
1119 R_temp_ik = remove_parentheses (remove_epsilon (R_last_ik));
1120 R_temp_kk = remove_parentheses (remove_epsilon (R_last_kk));
1121 R_temp_kj = remove_parentheses (remove_epsilon (R_last_kj));
1122
1123 clean_ik_kk_cmp = nullstrcmp (R_last_ik, R_temp_kk);
1124 clean_kk_kj_cmp = nullstrcmp (R_temp_kk, R_last_kj);
1125
1126 // construct R_cur_l (and, if necessary R_cur_r)
1127 if (NULL != R_last_ij)
1128 {
1129 // Assign R_temp_ij to R_last_ij and remove epsilon as well
1130 // as parentheses, so we can better compare the contents
1131 R_temp_ij = remove_parentheses (remove_epsilon (R_last_ij));
1132
1133 if (0 == strcmp (R_temp_ij, R_temp_ik) && 0 == strcmp (R_temp_ik, R_temp_kk)
1134 && 0 == strcmp (R_temp_kk, R_temp_kj))
1135 {
1136 if (0 == strlen (R_temp_ij))
1137 {
1138 R_cur_r = GNUNET_strdup ("");
1139 }
1140 else if ((0 == strncmp (R_last_ij, "(|", 2)) ||
1141 (0 == strncmp (R_last_ik, "(|", 2) &&
1142 0 == strncmp (R_last_kj, "(|", 2)))
1143 {
1144 // a|(e|a)a*(e|a) = a*
1145 // a|(e|a)(e|a)*(e|a) = a*
1146 // (e|a)|aa*a = a*
1147 // (e|a)|aa*(e|a) = a*
1148 // (e|a)|(e|a)a*a = a*
1149 // (e|a)|(e|a)a*(e|a) = a*
1150 // (e|a)|(e|a)(e|a)*(e|a) = a*
1151 if (GNUNET_YES == needs_parentheses (R_temp_ij))
1152 GNUNET_asprintf (&R_cur_r, "(%s)*", R_temp_ij);
1153 else
1154 GNUNET_asprintf (&R_cur_r, "%s*", R_temp_ij);
1155 }
1156 else
1157 {
1158 // a|aa*a = a+
1159 // a|(e|a)a*a = a+
1160 // a|aa*(e|a) = a+
1161 // a|(e|a)(e|a)*a = a+
1162 // a|a(e|a)*(e|a) = a+
1163 if (GNUNET_YES == needs_parentheses (R_temp_ij))
1164 GNUNET_asprintf (&R_cur_r, "(%s)+", R_temp_ij);
1165 else
1166 GNUNET_asprintf (&R_cur_r, "%s+", R_temp_ij);
1167 }
1168 }
1169 else if (0 == ij_ik_cmp && 0 == clean_kk_kj_cmp && 0 != clean_ik_kk_cmp)
1170 {
1171 // a|ab*b = ab*
1172 if (strlen (R_last_kk) < 1)
1173 R_cur_r = GNUNET_strdup (R_last_ij);
1174 else if (GNUNET_YES == needs_parentheses (R_temp_kk))
1175 GNUNET_asprintf (&R_cur_r, "%s(%s)*", R_last_ij, R_temp_kk);
1176 else
1177 GNUNET_asprintf (&R_cur_r, "%s%s*", R_last_ij, R_last_kk);
1178
1179 R_cur_l = NULL;
1180 }
1181 else if (0 == ij_kj_cmp && 0 == clean_ik_kk_cmp && 0 != clean_kk_kj_cmp)
1182 {
1183 // a|bb*a = b*a
1184 if (strlen (R_last_kk) < 1)
1185 R_cur_r = GNUNET_strdup (R_last_kj);
1186 else if (GNUNET_YES == needs_parentheses (R_temp_kk))
1187 GNUNET_asprintf (&R_cur_r, "(%s)*%s", R_temp_kk, R_last_kj);
1188 else
1189 GNUNET_asprintf (&R_cur_r, "%s*%s", R_temp_kk, R_last_kj);
1190
1191 R_cur_l = NULL;
1192 }
1193 else if (0 == ij_ik_cmp && 0 == kk_kj_cmp && !has_epsilon (R_last_ij) &&
1194 has_epsilon (R_last_kk))
1195 {
1196 // a|a(e|b)*(e|b) = a|ab* = a|a|ab|abb|abbb|... = ab*
1197 if (needs_parentheses (R_temp_kk))
1198 GNUNET_asprintf (&R_cur_r, "%s(%s)*", R_last_ij, R_temp_kk);
1199 else
1200 GNUNET_asprintf (&R_cur_r, "%s%s*", R_last_ij, R_temp_kk);
1201
1202 R_cur_l = NULL;
1203 }
1204 else if (0 == ij_kj_cmp && 0 == ik_kk_cmp && !has_epsilon (R_last_ij) &&
1205 has_epsilon (R_last_kk))
1206 {
1207 // a|(e|b)(e|b)*a = a|b*a = a|a|ba|bba|bbba|... = b*a
1208 if (needs_parentheses (R_temp_kk))
1209 GNUNET_asprintf (&R_cur_r, "(%s)*%s", R_temp_kk, R_last_ij);
1210 else
1211 GNUNET_asprintf (&R_cur_r, "%s*%s", R_temp_kk, R_last_ij);
1212
1213 R_cur_l = NULL;
1214 }
1215 else
1216 {
1217 temp_a = (NULL == R_last_ij) ? NULL : GNUNET_strdup (R_last_ij);
1218 temp_a = remove_parentheses (temp_a);
1219 R_cur_l = temp_a;
1220 }
1221
1222 GNUNET_free_non_null (R_temp_ij);
1223 }
1224 else
1225 {
1226 // we have no left side
1227 R_cur_l = NULL;
1228 }
1229
1230 // construct R_cur_r, if not already constructed
1231 if (NULL == R_cur_r)
1232 {
1233 length = strlen (R_temp_kk) - strlen (R_last_ik);
1234
1235 // a(ba)*bx = (ab)+x
1236 if (length > 0 && NULL != R_last_kk && 0 < strlen (R_last_kk) &&
1237 NULL != R_last_kj && 0 < strlen (R_last_kj) && NULL != R_last_ik &&
1238 0 < strlen (R_last_ik) && 0 == strkcmp (R_temp_kk, R_last_ik, length) &&
1239 0 == strncmp (R_temp_kk, R_last_kj, length))
1240 {
1241 temp_a = GNUNET_malloc (length + 1);
1242 temp_b = GNUNET_malloc ((strlen (R_last_kj) - length) + 1);
1243
1244 length_l = 0;
1245 length_r = 0;
1246
1247 for (cnt = 0; cnt < strlen (R_last_kj); cnt++)
1248 {
1249 if (cnt < length)
1250 {
1251 temp_a[length_l] = R_last_kj[cnt];
1252 length_l++;
1253 }
1254 else
1255 {
1256 temp_b[length_r] = R_last_kj[cnt];
1257 length_r++;
1258 }
1259 }
1260 temp_a[length_l] = '\0';
1261 temp_b[length_r] = '\0';
1262
1263 // e|(ab)+ = (ab)*
1264 if (NULL != R_cur_l && 0 == strlen (R_cur_l) && 0 == strlen (temp_b))
1265 {
1266 GNUNET_asprintf (&R_cur_r, "(%s%s)*", R_last_ik, temp_a);
1267 GNUNET_free (R_cur_l);
1268 R_cur_l = NULL;
1269 }
1270 else
1271 {
1272 GNUNET_asprintf (&R_cur_r, "(%s%s)+%s", R_last_ik, temp_a, temp_b);
1273 }
1274 GNUNET_free (temp_a);
1275 GNUNET_free (temp_b);
1276 }
1277 else if (0 == strcmp (R_temp_ik, R_temp_kk) &&
1278 0 == strcmp (R_temp_kk, R_temp_kj))
1279 {
1280 // (e|a)a*(e|a) = a*
1281 // (e|a)(e|a)*(e|a) = a*
1282 if (has_epsilon (R_last_ik) && has_epsilon (R_last_kj))
1283 {
1284 if (needs_parentheses (R_temp_kk))
1285 GNUNET_asprintf (&R_cur_r, "(%s)*", R_temp_kk);
1286 else
1287 GNUNET_asprintf (&R_cur_r, "%s*", R_temp_kk);
1288 }
1289 // aa*a = a+a
1290 else if (0 == clean_ik_kk_cmp && 0 == clean_kk_kj_cmp &&
1291 !has_epsilon (R_last_ik))
1292 {
1293 if (needs_parentheses (R_temp_kk))
1294 GNUNET_asprintf (&R_cur_r, "(%s)+%s", R_temp_kk, R_temp_kk);
1295 else
1296 GNUNET_asprintf (&R_cur_r, "(%s)+%s", R_temp_kk, R_temp_kk);
1297 }
1298 // (e|a)a*a = a+
1299 // aa*(e|a) = a+
1300 // a(e|a)*(e|a) = a+
1301 // (e|a)a*a = a+
1302 else
1303 {
1304 eps_check =
1305 (has_epsilon (R_last_ik) + has_epsilon (R_last_kk) +
1306 has_epsilon (R_last_kj));
1307
1308 if (eps_check == 1)
1309 {
1310 if (needs_parentheses (R_temp_kk))
1311 GNUNET_asprintf (&R_cur_r, "(%s)+", R_temp_kk);
1312 else
1313 GNUNET_asprintf (&R_cur_r, "%s+", R_temp_kk);
1314 }
1315 }
1316 }
1317 // aa*b = a+b
1318 // (e|a)(e|a)*b = a*b
1319 else if (0 == strcmp (R_temp_ik, R_temp_kk))
1320 {
1321 if (has_epsilon (R_last_ik))
1322 {
1323 if (needs_parentheses (R_temp_kk))
1324 GNUNET_asprintf (&R_cur_r, "(%s)*%s", R_temp_kk, R_last_kj);
1325 else
1326 GNUNET_asprintf (&R_cur_r, "%s*%s", R_temp_kk, R_last_kj);
1327 }
1328 else
1329 {
1330 if (needs_parentheses (R_temp_kk))
1331 GNUNET_asprintf (&R_cur_r, "(%s)+%s", R_temp_kk, R_last_kj);
1332 else
1333 GNUNET_asprintf (&R_cur_r, "%s+%s", R_temp_kk, R_last_kj);
1334 }
1335 }
1336 // ba*a = ba+
1337 // b(e|a)*(e|a) = ba*
1338 else if (0 == strcmp (R_temp_kk, R_temp_kj))
1339 {
1340 if (has_epsilon (R_last_kj))
1341 {
1342 if (needs_parentheses (R_temp_kk))
1343 GNUNET_asprintf (&R_cur_r, "%s(%s)*", R_last_ik, R_temp_kk);
1344 else
1345 GNUNET_asprintf (&R_cur_r, "%s%s*", R_last_ik, R_temp_kk);
1346 }
1347 else
1348 {
1349 if (needs_parentheses (R_temp_kk))
1350 GNUNET_asprintf (&R_cur_r, "(%s)+%s", R_last_ik, R_temp_kk);
1351 else
1352 GNUNET_asprintf (&R_cur_r, "%s+%s", R_last_ik, R_temp_kk);
1353 }
1354 }
1355 else
1356 {
1357 if (strlen (R_temp_kk) > 0)
1358 {
1359 if (needs_parentheses (R_temp_kk))
1360 {
1361 GNUNET_asprintf (&R_cur_r, "%s(%s)*%s", R_last_ik, R_temp_kk,
1362 R_last_kj);
1363 }
1364 else
1365 {
1366 GNUNET_asprintf (&R_cur_r, "%s%s*%s", R_last_ik, R_temp_kk,
1367 R_last_kj);
1368 }
1369 }
1370 else
1371 {
1372 GNUNET_asprintf (&R_cur_r, "%s%s", R_last_ik, R_last_kj);
1373 }
1374 }
1375 }
1376
1377 /* GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "R_cur_l: %s\n", R_cur_l); */
1378 /* GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "R_cur_r: %s\n", R_cur_r); */
1379
1380 if (NULL == R_cur_l && NULL == R_cur_r)
1381 {
1382 *R_cur_ij = NULL;
1383 return;
1384 }
1385
1386 if (NULL != R_cur_l && NULL == R_cur_r)
1387 {
1388 *R_cur_ij = GNUNET_strdup (R_cur_l);
1389 return;
1390 }
1391
1392 if (NULL == R_cur_l && NULL != R_cur_r)
1393 {
1394 *R_cur_ij = GNUNET_strdup (R_cur_r);
1395 return;
1396 }
1397
1398 if (R_cur_l[0] == 'C' || R_cur_r[0] == 'C')
1399 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "R_cur_l: %s R_cur_r: %s\n", R_cur_l,
1400 R_cur_r);
1401
1402
1403 if (0 == nullstrcmp (R_cur_l, R_cur_r))
1404 {
1405 *R_cur_ij = GNUNET_strdup (R_cur_l);
1406 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, ">>>>>>>>>>>>>> %s == %s\n", R_cur_l,
1407 R_cur_r);
1408 return;
1409 }
1410
1411 /* else */
1412 /* { */
1413 /* GNUNET_log (GNUNET_ERROR_TYPE_ERROR, ">>>>>>>>>>>>>> %s != %s\n", R_cur_l, R_cur_r); */
1414 /* } */
1415
1416 GNUNET_asprintf (R_cur_ij, "(%s|%s)", R_cur_l, R_cur_r);
1417
1418 GNUNET_free_non_null (R_cur_l);
1419 GNUNET_free_non_null (R_cur_r);
1420
1421 GNUNET_free_non_null (R_temp_ik);
1422 GNUNET_free_non_null (R_temp_kk);
1423 GNUNET_free_non_null (R_temp_kj);
1424
1425}
1426
1427/**
1428 * create proofs for all states in the given automaton. Implementation of the
1429 * algorithm descriped in chapter 3.2.1 of "Automata Theory, Languages, and
1430 * Computation 3rd Edition" by Hopcroft, Motwani and Ullman.
1431 *
1432 * @param a automaton.
1433 */
1434static void
1435automaton_create_proofs (struct GNUNET_REGEX_Automaton *a)
1436{
1437 unsigned int n = a->state_count;
1438 struct GNUNET_REGEX_State *states[n];
1439 char *R_last[n][n];
1440 char *R_cur[n][n];
1441 char *temp;
1442 struct Transition *t;
1443 char *complete_regex;
1444 unsigned int i;
1445 unsigned int j;
1446 unsigned int k;
1447
1448
1060 /* create depth-first numbering of the states, initializes 'state' */ 1449 /* create depth-first numbering of the states, initializes 'state' */
1061 automaton_traverse (a, &number_states, states); 1450 automaton_traverse (a, &number_states, states);
1062 1451
@@ -1075,31 +1464,29 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a)
1075 GNUNET_asprintf (&R_last[i][j], "%c", t->label); 1464 GNUNET_asprintf (&R_last[i][j], "%c", t->label);
1076 else 1465 else
1077 { 1466 {
1078 temp_a = R_last[i][j]; 1467 temp = R_last[i][j];
1079 GNUNET_asprintf (&R_last[i][j], "%s|%c", R_last[i][j], t->label); 1468 GNUNET_asprintf (&R_last[i][j], "%s|%c", R_last[i][j], t->label);
1080 GNUNET_free (temp_a); 1469 GNUNET_free (temp);
1081 } 1470 }
1082 } 1471 }
1083 if (NULL == R_last[i][i]) 1472 if (NULL == R_last[i][i])
1084 GNUNET_asprintf (&R_last[i][i], ""); 1473 GNUNET_asprintf (&R_last[i][i], "");
1085 else 1474 else
1086 { 1475 {
1087 temp_a = R_last[i][i]; 1476 temp = R_last[i][i];
1088 GNUNET_asprintf (&R_last[i][i], "(|%s)", R_last[i][i]); 1477 GNUNET_asprintf (&R_last[i][i], "(|%s)", R_last[i][i]);
1089 GNUNET_free (temp_a); 1478 GNUNET_free (temp);
1090 } 1479 }
1091 } 1480 }
1092 for (i = 0; i < n; i++) 1481 for (i = 0; i < n; i++)
1093 for (j = 0; j < n; j++) 1482 for (j = 0; j < n; j++)
1094 if (needs_parentheses (R_last[i][j])) 1483 if (needs_parentheses (R_last[i][j]))
1095 { 1484 {
1096 temp_a = R_last[i][j]; 1485 temp = R_last[i][j];
1097 GNUNET_asprintf (&R_last[i][j], "(%s)", R_last[i][j]); 1486 GNUNET_asprintf (&R_last[i][j], "(%s)", R_last[i][j]);
1098 GNUNET_free (temp_a); 1487 GNUNET_free (temp);
1099 } 1488 }
1100 1489
1101 // TODO: clean up and fix the induction part
1102
1103 /* Compute regular expressions of length "k" between each pair of states per induction */ 1490 /* Compute regular expressions of length "k" between each pair of states per induction */
1104 for (k = 0; k < n; k++) 1491 for (k = 0; k < n; k++)
1105 { 1492 {
@@ -1110,341 +1497,11 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a)
1110 // Basis for the recursion: 1497 // Basis for the recursion:
1111 // $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj} 1498 // $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj}
1112 // R_last == R^{(k-1)}, R_cur == R^{(k)} 1499 // R_last == R^{(k-1)}, R_cur == R^{(k)}
1113 // With: R_cur[i][j] = R_cur_l | R_cur_r
1114 // R_cur_l == R^{(k-1)}_{ij}
1115 // R_cur_r == R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj}
1116
1117 if ((NULL == R_last[i][j]) && ((NULL == R_last[i][k]) || (NULL == R_last[k][k]) || /* technically cannot happen, but looks saner */
1118 (NULL == R_last[k][j])))
1119 {
1120 /* R^{(k)}_{ij} = N | N */
1121 /* R_cur[i][j] is already NULL */
1122 continue;
1123 }
1124 if ((NULL == R_last[i][k]) || (NULL == R_last[k][k]) || /* technically cannot happen, but looks saner */
1125 (NULL == R_last[k][j]))
1126 {
1127 /* R^{(k)}_{ij} = R^{(k-1)}_{ij} | N */
1128 R_cur[i][j] = GNUNET_strdup (R_last[i][j]);
1129 continue;
1130 }
1131 // $R^{(k)}_{ij} = N | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj} OR
1132 // $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj}
1133
1134 R_cur[i][j] = NULL;
1135 R_cur_r = NULL;
1136 R_cur_l = NULL;
1137
1138 // cache results from strcmp, we might need these many times
1139 ij_kj_cmp = nullstrcmp (R_last[i][j], R_last[k][j]);
1140 ij_ik_cmp = nullstrcmp (R_last[i][j], R_last[i][k]);
1141 ik_kk_cmp = nullstrcmp (R_last[i][k], R_last[k][k]);
1142 //ik_kj_cmp = nullstrcmp (R_last[i][k], R_last[k][j]);
1143 kk_kj_cmp = nullstrcmp (R_last[k][k], R_last[k][j]);
1144 1500
1145 // Assign R_temp_(ik|kk|kj) to R_last[][] and remove epsilon as well 1501 // Create R_cur[i][j] and simplify the expression
1146 // as parentheses, so we can better compare the contents 1502 automaton_create_proofs_simplify (R_last[i][j], R_last[i][k],
1147 R_temp_ik = remove_parentheses (remove_epsilon (R_last[i][k])); 1503 R_last[k][k], R_last[k][j],
1148 R_temp_kk = remove_parentheses (remove_epsilon (R_last[k][k])); 1504 &R_cur[i][j]);
1149 R_temp_kj = remove_parentheses (remove_epsilon (R_last[k][j]));
1150
1151 clean_ik_kk_cmp = nullstrcmp (R_last[i][k], R_temp_kk);
1152 clean_kk_kj_cmp = nullstrcmp (R_temp_kk, R_last[k][j]);
1153
1154 // construct R_cur_l (and, if necessary R_cur_r)
1155 if (NULL != R_last[i][j])
1156 {
1157 // Assign R_temp_ij to R_last[i][j] and remove epsilon as well
1158 // as parentheses, so we can better compare the contents
1159 R_temp_ij = remove_parentheses (remove_epsilon (R_last[i][j]));
1160
1161 if (0 == strcmp (R_temp_ij, R_temp_ik) &&
1162 0 == strcmp (R_temp_ik, R_temp_kk) &&
1163 0 == strcmp (R_temp_kk, R_temp_kj))
1164 {
1165 if (0 == strlen (R_temp_ij))
1166 {
1167 R_cur_r = GNUNET_strdup ("");
1168 }
1169 else if ((0 == strncmp (R_last[i][j], "(|", 2)) ||
1170 (0 == strncmp (R_last[i][k], "(|", 2) &&
1171 0 == strncmp (R_last[k][j], "(|", 2)))
1172 {
1173 // a|(e|a)a*(e|a) = a*
1174 // a|(e|a)(e|a)*(e|a) = a*
1175 // (e|a)|aa*a = a*
1176 // (e|a)|aa*(e|a) = a*
1177 // (e|a)|(e|a)a*a = a*
1178 // (e|a)|(e|a)a*(e|a) = a*
1179 // (e|a)|(e|a)(e|a)*(e|a) = a*
1180 if (GNUNET_YES == needs_parentheses (R_temp_ij))
1181 GNUNET_asprintf (&R_cur_r, "(%s)*", R_temp_ij);
1182 else
1183 GNUNET_asprintf (&R_cur_r, "%s*", R_temp_ij);
1184 }
1185 else
1186 {
1187 // a|aa*a = a+
1188 // a|(e|a)a*a = a+
1189 // a|aa*(e|a) = a+
1190 // a|(e|a)(e|a)*a = a+
1191 // a|a(e|a)*(e|a) = a+
1192 if (GNUNET_YES == needs_parentheses (R_temp_ij))
1193 GNUNET_asprintf (&R_cur_r, "(%s)+", R_temp_ij);
1194 else
1195 GNUNET_asprintf (&R_cur_r, "%s+", R_temp_ij);
1196 }
1197 }
1198 else if (0 == ij_ik_cmp && 0 == clean_kk_kj_cmp &&
1199 0 != clean_ik_kk_cmp)
1200 {
1201 // a|ab*b = ab*
1202 if (strlen (R_last[k][k]) < 1)
1203 R_cur_r = GNUNET_strdup (R_last[i][j]);
1204 else if (GNUNET_YES == needs_parentheses (R_temp_kk))
1205 GNUNET_asprintf (&R_cur_r, "%s(%s)*", R_last[i][j], R_temp_kk);
1206 else
1207 GNUNET_asprintf (&R_cur_r, "%s%s*", R_last[i][j], R_last[k][k]);
1208
1209 R_cur_l = NULL;
1210 }
1211 else if (0 == ij_kj_cmp && 0 == clean_ik_kk_cmp &&
1212 0 != clean_kk_kj_cmp)
1213 {
1214 // a|bb*a = b*a
1215 if (strlen (R_last[k][k]) < 1)
1216 R_cur_r = GNUNET_strdup (R_last[k][j]);
1217 else if (GNUNET_YES == needs_parentheses (R_temp_kk))
1218 GNUNET_asprintf (&R_cur_r, "(%s)*%s", R_temp_kk, R_last[k][j]);
1219 else
1220 GNUNET_asprintf (&R_cur_r, "%s*%s", R_temp_kk, R_last[k][j]);
1221
1222 R_cur_l = NULL;
1223 }
1224 else if (0 == ij_ik_cmp && 0 == kk_kj_cmp &&
1225 !has_epsilon (R_last[i][j]) && has_epsilon (R_last[k][k]))
1226 {
1227 // a|a(e|b)*(e|b) = a|ab* = a|a|ab|abb|abbb|... = ab*
1228 if (needs_parentheses (R_temp_kk))
1229 GNUNET_asprintf (&R_cur_r, "%s(%s)*", R_last[i][j], R_temp_kk);
1230 else
1231 GNUNET_asprintf (&R_cur_r, "%s%s*", R_last[i][j], R_temp_kk);
1232
1233 R_cur_l = NULL;
1234 }
1235 else if (0 == ij_kj_cmp && 0 == ik_kk_cmp &&
1236 !has_epsilon (R_last[i][j]) && has_epsilon (R_last[k][k]))
1237 {
1238 // a|(e|b)(e|b)*a = a|b*a = a|a|ba|bba|bbba|... = b*a
1239 if (needs_parentheses (R_temp_kk))
1240 GNUNET_asprintf (&R_cur_r, "(%s)*%s", R_temp_kk, R_last[i][j]);
1241 else
1242 GNUNET_asprintf (&R_cur_r, "%s*%s", R_temp_kk, R_last[i][j]);
1243
1244 R_cur_l = NULL;
1245 }
1246 else
1247 {
1248 temp_a =
1249 (NULL == R_last[i][j]) ? NULL : GNUNET_strdup (R_last[i][j]);
1250 temp_a = remove_parentheses (temp_a);
1251 R_cur_l = temp_a;
1252 }
1253
1254 GNUNET_free_non_null (R_temp_ij);
1255 }
1256 else
1257 {
1258 // we have no left side
1259 R_cur_l = NULL;
1260 }
1261
1262 // construct R_cur_r, if not already constructed
1263 if (NULL == R_cur_r)
1264 {
1265 length = strlen (R_temp_kk) - strlen (R_last[i][k]);
1266
1267 // a(ba)*bx = (ab)+x
1268 if (length > 0 && NULL != R_last[k][k] && 0 < strlen (R_last[k][k]) &&
1269 NULL != R_last[k][j] && 0 < strlen (R_last[k][j]) &&
1270 NULL != R_last[i][k] && 0 < strlen (R_last[i][k]) &&
1271 0 == strkcmp (R_temp_kk, R_last[i][k], length) &&
1272 0 == strncmp (R_temp_kk, R_last[k][j], length))
1273 {
1274 temp_a = GNUNET_malloc (length + 1);
1275 temp_b = GNUNET_malloc ((strlen (R_last[k][j]) - length) + 1);
1276
1277 length_l = 0;
1278 length_r = 0;
1279
1280 for (cnt = 0; cnt < strlen (R_last[k][j]); cnt++)
1281 {
1282 if (cnt < length)
1283 {
1284 temp_a[length_l] = R_last[k][j][cnt];
1285 length_l++;
1286 }
1287 else
1288 {
1289 temp_b[length_r] = R_last[k][j][cnt];
1290 length_r++;
1291 }
1292 }
1293 temp_a[length_l] = '\0';
1294 temp_b[length_r] = '\0';
1295
1296 // e|(ab)+ = (ab)*
1297 if (NULL != R_cur_l && 0 == strlen (R_cur_l) &&
1298 0 == strlen (temp_b))
1299 {
1300 GNUNET_asprintf (&R_cur_r, "(%s%s)*", R_last[i][k], temp_a);
1301 GNUNET_free (R_cur_l);
1302 R_cur_l = NULL;
1303 }
1304 else
1305 {
1306 GNUNET_asprintf (&R_cur_r, "(%s%s)+%s", R_last[i][k], temp_a,
1307 temp_b);
1308 }
1309 GNUNET_free (temp_a);
1310 GNUNET_free (temp_b);
1311 }
1312 else if (0 == strcmp (R_temp_ik, R_temp_kk) &&
1313 0 == strcmp (R_temp_kk, R_temp_kj))
1314 {
1315 // (e|a)a*(e|a) = a*
1316 // (e|a)(e|a)*(e|a) = a*
1317 if (has_epsilon (R_last[i][k]) && has_epsilon (R_last[k][j]))
1318 {
1319 if (needs_parentheses (R_temp_kk))
1320 GNUNET_asprintf (&R_cur_r, "(%s)*", R_temp_kk);
1321 else
1322 GNUNET_asprintf (&R_cur_r, "%s*", R_temp_kk);
1323 }
1324 // aa*a = a+a
1325 else if (0 == clean_ik_kk_cmp && 0 == clean_kk_kj_cmp &&
1326 !has_epsilon (R_last[i][k]))
1327 {
1328 if (needs_parentheses (R_temp_kk))
1329 GNUNET_asprintf (&R_cur_r, "(%s)+%s", R_temp_kk, R_temp_kk);
1330 else
1331 GNUNET_asprintf (&R_cur_r, "(%s)+%s", R_temp_kk, R_temp_kk);
1332 }
1333 // (e|a)a*a = a+
1334 // aa*(e|a) = a+
1335 // a(e|a)*(e|a) = a+
1336 // (e|a)a*a = a+
1337 else
1338 {
1339 eps_check =
1340 (has_epsilon (R_last[i][k]) + has_epsilon (R_last[k][k]) +
1341 has_epsilon (R_last[k][j]));
1342
1343 if (eps_check == 1)
1344 {
1345 if (needs_parentheses (R_temp_kk))
1346 GNUNET_asprintf (&R_cur_r, "(%s)+", R_temp_kk);
1347 else
1348 GNUNET_asprintf (&R_cur_r, "%s+", R_temp_kk);
1349 }
1350 }
1351 }
1352 // aa*b = a+b
1353 // (e|a)(e|a)*b = a*b
1354 else if (0 == strcmp (R_temp_ik, R_temp_kk))
1355 {
1356 if (has_epsilon (R_last[i][k]))
1357 {
1358 if (needs_parentheses (R_temp_kk))
1359 GNUNET_asprintf (&R_cur_r, "(%s)*%s", R_temp_kk, R_last[k][j]);
1360 else
1361 GNUNET_asprintf (&R_cur_r, "%s*%s", R_temp_kk, R_last[k][j]);
1362 }
1363 else
1364 {
1365 if (needs_parentheses (R_temp_kk))
1366 GNUNET_asprintf (&R_cur_r, "(%s)+%s", R_temp_kk, R_last[k][j]);
1367 else
1368 GNUNET_asprintf (&R_cur_r, "%s+%s", R_temp_kk, R_last[k][j]);
1369 }
1370 }
1371 // ba*a = ba+
1372 // b(e|a)*(e|a) = ba*
1373 else if (0 == strcmp (R_temp_kk, R_temp_kj))
1374 {
1375 if (has_epsilon (R_last[k][j]))
1376 {
1377 if (needs_parentheses (R_temp_kk))
1378 GNUNET_asprintf (&R_cur_r, "%s(%s)*", R_last[i][k], R_temp_kk);
1379 else
1380 GNUNET_asprintf (&R_cur_r, "%s%s*", R_last[i][k], R_temp_kk);
1381 }
1382 else
1383 {
1384 if (needs_parentheses (R_temp_kk))
1385 GNUNET_asprintf (&R_cur_r, "(%s)+%s", R_last[i][k], R_temp_kk);
1386 else
1387 GNUNET_asprintf (&R_cur_r, "%s+%s", R_last[i][k], R_temp_kk);
1388 }
1389 }
1390 else
1391 {
1392 if (strlen (R_temp_kk) > 0)
1393 {
1394 if (needs_parentheses (R_temp_kk))
1395 {
1396 GNUNET_asprintf (&R_cur_r, "%s(%s)*%s", R_last[i][k], R_temp_kk,
1397 R_last[k][j]);
1398 }
1399 else
1400 {
1401 GNUNET_asprintf (&R_cur_r, "%s%s*%s", R_last[i][k], R_temp_kk,
1402 R_last[k][j]);
1403 }
1404 }
1405 else
1406 {
1407 GNUNET_asprintf (&R_cur_r, "%s%s", R_last[i][k], R_last[k][j]);
1408 }
1409 }
1410 }
1411
1412 /* GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "R_cur_l: %s\n", R_cur_l); */
1413 /* GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "R_cur_r: %s\n", R_cur_r); */
1414
1415 // putting it all together
1416 if (NULL != R_cur_l && NULL != R_cur_r)
1417 {
1418 // a|a = a
1419 if (0 == strcmp (R_cur_l, R_cur_r))
1420 {
1421 R_cur[i][j] = GNUNET_strdup (R_cur_l);
1422 }
1423 // R_cur_l | R_cur_r
1424 else
1425 {
1426 GNUNET_asprintf (&R_cur[i][j], "(%s|%s)", R_cur_l, R_cur_r);
1427 }
1428 }
1429 else if (NULL != R_cur_l)
1430 {
1431 R_cur[i][j] = GNUNET_strdup (R_cur_l);
1432 }
1433 else if (NULL != R_cur_r)
1434 {
1435 R_cur[i][j] = GNUNET_strdup (R_cur_r);
1436 }
1437 else
1438 {
1439 R_cur[i][j] = NULL;
1440 }
1441
1442 GNUNET_free_non_null (R_cur_l);
1443 GNUNET_free_non_null (R_cur_r);
1444
1445 GNUNET_free_non_null (R_temp_ik);
1446 GNUNET_free_non_null (R_temp_kk);
1447 GNUNET_free_non_null (R_temp_kj);
1448 } 1505 }
1449 } 1506 }
1450 1507
@@ -1478,14 +1535,16 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a)
1478 if (states[i]->accepting) 1535 if (states[i]->accepting)
1479 { 1536 {
1480 if (NULL == complete_regex && 0 < strlen (R_last[a->start->proof_id][i])) 1537 if (NULL == complete_regex && 0 < strlen (R_last[a->start->proof_id][i]))
1538 {
1481 GNUNET_asprintf (&complete_regex, "%s", R_last[a->start->proof_id][i]); 1539 GNUNET_asprintf (&complete_regex, "%s", R_last[a->start->proof_id][i]);
1540 }
1482 else if (NULL != R_last[a->start->proof_id][i] && 1541 else if (NULL != R_last[a->start->proof_id][i] &&
1483 0 < strlen (R_last[a->start->proof_id][i])) 1542 0 < strlen (R_last[a->start->proof_id][i]))
1484 { 1543 {
1485 temp_a = complete_regex; 1544 temp = complete_regex;
1486 GNUNET_asprintf (&complete_regex, "%s|%s", complete_regex, 1545 GNUNET_asprintf (&complete_regex, "%s|%s", complete_regex,
1487 R_last[a->start->proof_id][i]); 1546 R_last[a->start->proof_id][i]);
1488 GNUNET_free (temp_a); 1547 GNUNET_free (temp);
1489 } 1548 }
1490 } 1549 }
1491 } 1550 }
@@ -1524,8 +1583,6 @@ dfa_state_create (struct GNUNET_REGEX_Context *ctx,
1524 int len = 0; 1583 int len = 0;
1525 struct GNUNET_REGEX_State *cstate; 1584 struct GNUNET_REGEX_State *cstate;
1526 struct Transition *ctran; 1585 struct Transition *ctran;
1527 int insert = 1;
1528 struct Transition *t;
1529 unsigned int i; 1586 unsigned int i;
1530 1587
1531 s = GNUNET_malloc (sizeof (struct GNUNET_REGEX_State)); 1588 s = GNUNET_malloc (sizeof (struct GNUNET_REGEX_State));
@@ -1573,21 +1630,7 @@ dfa_state_create (struct GNUNET_REGEX_Context *ctx,
1573 for (ctran = cstate->transitions_head; NULL != ctran; ctran = ctran->next) 1630 for (ctran = cstate->transitions_head; NULL != ctran; ctran = ctran->next)
1574 { 1631 {
1575 if (0 != ctran->label) 1632 if (0 != ctran->label)
1576 { 1633 state_add_transition (ctx, s, ctran->label, NULL);
1577 insert = 1;
1578
1579 for (t = s->transitions_head; NULL != t; t = t->next)
1580 {
1581 if (t->label == ctran->label)
1582 {
1583 insert = 0;
1584 break;
1585 }
1586 }
1587
1588 if (insert)
1589 state_add_transition (ctx, s, ctran->label, NULL);
1590 }
1591 } 1634 }
1592 1635
1593 // If the nfa_states contain an accepting state, the new dfa state is also 1636 // If the nfa_states contain an accepting state, the new dfa state is also