aboutsummaryrefslogtreecommitdiff
path: root/src/regex/regex.c
diff options
context:
space:
mode:
authorMaximilian Szengel <gnunet@maxsz.de>2012-06-28 12:41:34 +0000
committerMaximilian Szengel <gnunet@maxsz.de>2012-06-28 12:41:34 +0000
commit748324f617d0e4ff81de0e73c074d5314ace7dfd (patch)
tree4e3bece6a9d882993ec4d402ec48664e18c2d244 /src/regex/regex.c
parentae344190110dd03febe9afe90c50ff148c6a6c3c (diff)
downloadgnunet-748324f617d0e4ff81de0e73c074d5314ace7dfd.tar.gz
gnunet-748324f617d0e4ff81de0e73c074d5314ace7dfd.zip
cleanup, fixes
Diffstat (limited to 'src/regex/regex.c')
-rw-r--r--src/regex/regex.c541
1 files changed, 267 insertions, 274 deletions
diff --git a/src/regex/regex.c b/src/regex/regex.c
index f237334d8..09cd910a0 100644
--- a/src/regex/regex.c
+++ b/src/regex/regex.c
@@ -594,7 +594,7 @@ state_set_compare (struct GNUNET_REGEX_StateSet *sset1,
594 struct GNUNET_REGEX_StateSet *sset2) 594 struct GNUNET_REGEX_StateSet *sset2)
595{ 595{
596 int result; 596 int result;
597 int i; 597 unsigned int i;
598 598
599 if (NULL == sset1 || NULL == sset2) 599 if (NULL == sset1 || NULL == sset2)
600 return 1; 600 return 1;
@@ -1043,18 +1043,19 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a)
1043 unsigned int i; 1043 unsigned int i;
1044 unsigned int j; 1044 unsigned int j;
1045 unsigned int k; 1045 unsigned int k;
1046 int cnt; 1046 unsigned int cnt;
1047 int eps_check; 1047 int eps_check;
1048 int ij_ik_cmp; 1048 int ij_ik_cmp;
1049 int ij_kj_cmp; 1049 int ij_kj_cmp;
1050 int ik_kj_cmp; 1050
1051 //int ik_kj_cmp;
1051 int ik_kk_cmp; 1052 int ik_kk_cmp;
1052 int kk_kj_cmp; 1053 int kk_kj_cmp;
1053 int clean_ik_kk_cmp; 1054 int clean_ik_kk_cmp;
1054 int clean_kk_kj_cmp; 1055 int clean_kk_kj_cmp;
1055 int length; 1056 size_t length;
1056 int length_l; 1057 size_t length_l;
1057 int length_r; 1058 size_t length_r;
1058 1059
1059 /* create depth-first numbering of the states, initializes 'state' */ 1060 /* create depth-first numbering of the states, initializes 'state' */
1060 automaton_traverse (a, &number_states, states); 1061 automaton_traverse (a, &number_states, states);
@@ -1099,17 +1100,36 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a)
1099 1100
1100 // TODO: clean up and fix the induction part 1101 // TODO: clean up and fix the induction part
1101 1102
1102 // INDUCTION 1103 /* Compute regular expressions of length "k" between each pair of states per induction */
1103 for (k = 0; k < n; k++) 1104 for (k = 0; k < n; k++)
1104 { 1105 {
1105 for (i = 0; i < n; i++) 1106 for (i = 0; i < n; i++)
1106 { 1107 {
1107 for (j = 0; j < n; j++) 1108 for (j = 0; j < n; j++)
1108 { 1109 {
1109 /* GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, */ 1110 // Basis for the recursion:
1110 /* ">>> R_last[i][j] = %s R_last[i][k] = %s " */ 1111 // $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj}
1111 /* "R_last[k][k] = %s R_last[k][j] = %s\n", R_last[i][j], */ 1112 // R_last == R^{(k-1)}, R_cur == R^{(k)}
1112 /* R_last[i][k], R_last[k][k], R_last[k][j]); */ 1113 // With: R_cur[i][j] = R_cur_l | R_cur_r
1114 // R_cur_l == R^{(k-1)}_{ij}
1115 // R_cur_r == R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj}
1116
1117 if ((NULL == R_last[i][j]) && ((NULL == R_last[i][k]) || (NULL == R_last[k][k]) || /* technically cannot happen, but looks saner */
1118 (NULL == R_last[k][j])))
1119 {
1120 /* R^{(k)}_{ij} = N | N */
1121 /* R_cur[i][j] is already NULL */
1122 continue;
1123 }
1124 if ((NULL == R_last[i][k]) || (NULL == R_last[k][k]) || /* technically cannot happen, but looks saner */
1125 (NULL == R_last[k][j]))
1126 {
1127 /* R^{(k)}_{ij} = R^{(k-1)}_{ij} | N */
1128 R_cur[i][j] = GNUNET_strdup (R_last[i][j]);
1129 continue;
1130 }
1131 // $R^{(k)}_{ij} = N | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj} OR
1132 // $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj}
1113 1133
1114 R_cur[i][j] = NULL; 1134 R_cur[i][j] = NULL;
1115 R_cur_r = NULL; 1135 R_cur_r = NULL;
@@ -1119,54 +1139,37 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a)
1119 ij_kj_cmp = nullstrcmp (R_last[i][j], R_last[k][j]); 1139 ij_kj_cmp = nullstrcmp (R_last[i][j], R_last[k][j]);
1120 ij_ik_cmp = nullstrcmp (R_last[i][j], R_last[i][k]); 1140 ij_ik_cmp = nullstrcmp (R_last[i][j], R_last[i][k]);
1121 ik_kk_cmp = nullstrcmp (R_last[i][k], R_last[k][k]); 1141 ik_kk_cmp = nullstrcmp (R_last[i][k], R_last[k][k]);
1122 ik_kj_cmp = nullstrcmp (R_last[i][k], R_last[k][j]); 1142 //ik_kj_cmp = nullstrcmp (R_last[i][k], R_last[k][j]);
1123 kk_kj_cmp = nullstrcmp (R_last[k][k], R_last[k][j]); 1143 kk_kj_cmp = nullstrcmp (R_last[k][k], R_last[k][j]);
1124 1144
1125 // $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk})^* R^{(k-1)}_{kj} 1145 // Assign R_temp_(ik|kk|kj) to R_last[][] and remove epsilon as well
1126 // With: R_cur[i][j] = R_cur_l | R_cur_r 1146 // as parentheses, so we can better compare the contents
1127 // Rij(k) = Rij(k-1), because right side (R_cur_r) is empty set (NULL) 1147 R_temp_ik = remove_parentheses (remove_epsilon (R_last[i][k]));
1128 if ((NULL == R_last[i][k] || NULL == R_last[k][j] || 1148 R_temp_kk = remove_parentheses (remove_epsilon (R_last[k][k]));
1129 NULL == R_last[k][k]) && NULL != R_last[i][j]) 1149 R_temp_kj = remove_parentheses (remove_epsilon (R_last[k][j]));
1130 {
1131 R_cur[i][j] = GNUNET_strdup (R_last[i][j]);
1132 }
1133 // Everything is NULL, so Rij(k) = NULL
1134 else if ((NULL == R_last[i][k] || NULL == R_last[k][j] ||
1135 NULL == R_last[k][k]) && NULL == R_last[i][j])
1136 {
1137 R_cur[i][j] = NULL;
1138 }
1139 // Right side (R_cur_r) not NULL
1140 else
1141 {
1142 /* GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, */
1143 /* "R_temp_ij = %s R_temp_ik = %s R_temp_kk = %s R_temp_kj = %s\n", */
1144 /* R_temp_ij, R_temp_ik, R_temp_kk, R_temp_kj); */
1145 1150
1146 // Assign R_temp_(ik|kk|kj) to R_last[][] and remove epsilon as well 1151 clean_ik_kk_cmp = nullstrcmp (R_last[i][k], R_temp_kk);
1147 // as parentheses, so we can better compare the contents 1152 clean_kk_kj_cmp = nullstrcmp (R_temp_kk, R_last[k][j]);
1148 R_temp_ik = remove_parentheses (remove_epsilon (R_last[i][k]));
1149 R_temp_kk = remove_parentheses (remove_epsilon (R_last[k][k]));
1150 R_temp_kj = remove_parentheses (remove_epsilon (R_last[k][j]));
1151 1153
1152 clean_ik_kk_cmp = nullstrcmp (R_last[i][k], R_temp_kk); 1154 // construct R_cur_l (and, if necessary R_cur_r)
1153 clean_kk_kj_cmp = nullstrcmp (R_temp_kk, R_last[k][j]); 1155 if (NULL != R_last[i][j])
1156 {
1157 // Assign R_temp_ij to R_last[i][j] and remove epsilon as well
1158 // as parentheses, so we can better compare the contents
1159 R_temp_ij = remove_parentheses (remove_epsilon (R_last[i][j]));
1154 1160
1155 // construct R_cur_l (and, if necessary R_cur_r) 1161 if (0 == strcmp (R_temp_ij, R_temp_ik) &&
1156 if (NULL != R_last[i][j]) 1162 0 == strcmp (R_temp_ik, R_temp_kk) &&
1163 0 == strcmp (R_temp_kk, R_temp_kj))
1157 { 1164 {
1158 // Assign R_temp_ij to R_last[i][j] and remove epsilon as well 1165 if (0 == strlen (R_temp_ij))
1159 // as parentheses, so we can better compare the contents 1166 {
1160 R_temp_ij = remove_parentheses (remove_epsilon (R_last[i][j])); 1167 R_cur_r = GNUNET_strdup ("");
1161 1168 }
1162 if (0 == strcmp (R_temp_ij, R_temp_ik) && 1169 else if ((0 == strncmp (R_last[i][j], "(|", 2)) ||
1163 0 == strcmp (R_temp_ik, R_temp_kk) && 1170 (0 == strncmp (R_last[i][k], "(|", 2) &&
1164 0 == strcmp (R_temp_kk, R_temp_kj)) 1171 0 == strncmp (R_last[k][j], "(|", 2)))
1165 { 1172 {
1166 if (0 == strlen (R_temp_ij))
1167 {
1168 R_cur_r = GNUNET_strdup ("");
1169 }
1170 // a|(e|a)a*(e|a) = a* 1173 // a|(e|a)a*(e|a) = a*
1171 // a|(e|a)(e|a)*(e|a) = a* 1174 // a|(e|a)(e|a)*(e|a) = a*
1172 // (e|a)|aa*a = a* 1175 // (e|a)|aa*a = a*
@@ -1174,284 +1177,274 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a)
1174 // (e|a)|(e|a)a*a = a* 1177 // (e|a)|(e|a)a*a = a*
1175 // (e|a)|(e|a)a*(e|a) = a* 1178 // (e|a)|(e|a)a*(e|a) = a*
1176 // (e|a)|(e|a)(e|a)*(e|a) = a* 1179 // (e|a)|(e|a)(e|a)*(e|a) = a*
1177 else if ((0 == strncmp (R_last[i][j], "(|", 2)) || 1180 if (GNUNET_YES == needs_parentheses (R_temp_ij))
1178 (0 == strncmp (R_last[i][k], "(|", 2) && 1181 GNUNET_asprintf (&R_cur_r, "(%s)*", R_temp_ij);
1179 0 == strncmp (R_last[k][j], "(|", 2))) 1182 else
1180 { 1183 GNUNET_asprintf (&R_cur_r, "%s*", R_temp_ij);
1181 if (GNUNET_YES == needs_parentheses (R_temp_ij)) 1184 }
1182 GNUNET_asprintf (&R_cur_r, "(%s)*", R_temp_ij); 1185 else
1183 else 1186 {
1184 GNUNET_asprintf (&R_cur_r, "%s*", R_temp_ij);
1185 }
1186 // a|aa*a = a+ 1187 // a|aa*a = a+
1187 // a|(e|a)a*a = a+ 1188 // a|(e|a)a*a = a+
1188 // a|aa*(e|a) = a+ 1189 // a|aa*(e|a) = a+
1189 // a|(e|a)(e|a)*a = a+ 1190 // a|(e|a)(e|a)*a = a+
1190 // a|a(e|a)*(e|a) = a+ 1191 // a|a(e|a)*(e|a) = a+
1192 if (GNUNET_YES == needs_parentheses (R_temp_ij))
1193 GNUNET_asprintf (&R_cur_r, "(%s)+", R_temp_ij);
1191 else 1194 else
1192 { 1195 GNUNET_asprintf (&R_cur_r, "%s+", R_temp_ij);
1193 if (GNUNET_YES == needs_parentheses (R_temp_ij))
1194 GNUNET_asprintf (&R_cur_r, "(%s)+", R_temp_ij);
1195 else
1196 GNUNET_asprintf (&R_cur_r, "%s+", R_temp_ij);
1197 }
1198 } 1196 }
1197 }
1198 else if (0 == ij_ik_cmp && 0 == clean_kk_kj_cmp &&
1199 0 != clean_ik_kk_cmp)
1200 {
1199 // a|ab*b = ab* 1201 // a|ab*b = ab*
1200 else if (0 == ij_ik_cmp && 0 == clean_kk_kj_cmp && 1202 if (strlen (R_last[k][k]) < 1)
1201 0 != clean_ik_kk_cmp) 1203 R_cur_r = GNUNET_strdup (R_last[i][j]);
1202 { 1204 else if (GNUNET_YES == needs_parentheses (R_temp_kk))
1203 if (strlen (R_last[k][k]) < 1) 1205 GNUNET_asprintf (&R_cur_r, "%s(%s)*", R_last[i][j], R_temp_kk);
1204 R_cur_r = GNUNET_strdup (R_last[i][j]); 1206 else
1205 else if (GNUNET_YES == needs_parentheses (R_temp_kk)) 1207 GNUNET_asprintf (&R_cur_r, "%s%s*", R_last[i][j], R_last[k][k]);
1206 GNUNET_asprintf (&R_cur_r, "%s(%s)*", R_last[i][j], R_temp_kk);
1207 else
1208 GNUNET_asprintf (&R_cur_r, "%s%s*", R_last[i][j], R_last[k][k]);
1209 1208
1210 R_cur_l = NULL; 1209 R_cur_l = NULL;
1211 } 1210 }
1211 else if (0 == ij_kj_cmp && 0 == clean_ik_kk_cmp &&
1212 0 != clean_kk_kj_cmp)
1213 {
1212 // a|bb*a = b*a 1214 // a|bb*a = b*a
1213 else if (0 == ij_kj_cmp && 0 == clean_ik_kk_cmp && 1215 if (strlen (R_last[k][k]) < 1)
1214 0 != clean_kk_kj_cmp) 1216 R_cur_r = GNUNET_strdup (R_last[k][j]);
1215 { 1217 else if (GNUNET_YES == needs_parentheses (R_temp_kk))
1216 if (strlen (R_last[k][k]) < 1) 1218 GNUNET_asprintf (&R_cur_r, "(%s)*%s", R_temp_kk, R_last[k][j]);
1217 R_cur_r = GNUNET_strdup (R_last[k][j]); 1219 else
1218 else if (GNUNET_YES == needs_parentheses (R_temp_kk)) 1220 GNUNET_asprintf (&R_cur_r, "%s*%s", R_temp_kk, R_last[k][j]);
1219 GNUNET_asprintf (&R_cur_r, "(%s)*%s", R_temp_kk, R_last[k][j]);
1220 else
1221 GNUNET_asprintf (&R_cur_r, "%s*%s", R_temp_kk, R_last[k][j]);
1222 1221
1223 R_cur_l = NULL; 1222 R_cur_l = NULL;
1224 } 1223 }
1224 else if (0 == ij_ik_cmp && 0 == kk_kj_cmp &&
1225 !has_epsilon (R_last[i][j]) && has_epsilon (R_last[k][k]))
1226 {
1225 // a|a(e|b)*(e|b) = a|ab* = a|a|ab|abb|abbb|... = ab* 1227 // a|a(e|b)*(e|b) = a|ab* = a|a|ab|abb|abbb|... = ab*
1226 else if (0 == ij_ik_cmp && 0 == kk_kj_cmp && 1228 if (needs_parentheses (R_temp_kk))
1227 !has_epsilon (R_last[i][j]) && has_epsilon (R_last[k][k])) 1229 GNUNET_asprintf (&R_cur_r, "%s(%s)*", R_last[i][j], R_temp_kk);
1228 { 1230 else
1229 if (needs_parentheses (R_temp_kk)) 1231 GNUNET_asprintf (&R_cur_r, "%s%s*", R_last[i][j], R_temp_kk);
1230 GNUNET_asprintf (&R_cur_r, "%s(%s)*", R_last[i][j], R_temp_kk);
1231 else
1232 GNUNET_asprintf (&R_cur_r, "%s%s*", R_last[i][j], R_temp_kk);
1233 1232
1234 R_cur_l = NULL; 1233 R_cur_l = NULL;
1235 } 1234 }
1235 else if (0 == ij_kj_cmp && 0 == ik_kk_cmp &&
1236 !has_epsilon (R_last[i][j]) && has_epsilon (R_last[k][k]))
1237 {
1236 // a|(e|b)(e|b)*a = a|b*a = a|a|ba|bba|bbba|... = b*a 1238 // a|(e|b)(e|b)*a = a|b*a = a|a|ba|bba|bbba|... = b*a
1237 else if (0 == ij_kj_cmp && 0 == ik_kk_cmp && 1239 if (needs_parentheses (R_temp_kk))
1238 !has_epsilon (R_last[i][j]) && has_epsilon (R_last[k][k])) 1240 GNUNET_asprintf (&R_cur_r, "(%s)*%s", R_temp_kk, R_last[i][j]);
1239 {
1240 if (needs_parentheses (R_temp_kk))
1241 GNUNET_asprintf (&R_cur_r, "(%s)*%s", R_temp_kk, R_last[i][j]);
1242 else
1243 GNUNET_asprintf (&R_cur_r, "%s*%s", R_temp_kk, R_last[i][j]);
1244
1245 R_cur_l = NULL;
1246 }
1247 else 1241 else
1248 { 1242 GNUNET_asprintf (&R_cur_r, "%s*%s", R_temp_kk, R_last[i][j]);
1249 /* GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "NO SIMPLIFICATION\n"); */
1250 temp_a =
1251 (NULL == R_last[i][j]) ? NULL : GNUNET_strdup (R_last[i][j]);
1252 temp_a = remove_parentheses (temp_a);
1253 R_cur_l = temp_a;
1254 }
1255 1243
1256 GNUNET_free_non_null (R_temp_ij); 1244 R_cur_l = NULL;
1257 } 1245 }
1258 // we have no left side
1259 else 1246 else
1260 { 1247 {
1261 R_cur_l = NULL; 1248 temp_a =
1249 (NULL == R_last[i][j]) ? NULL : GNUNET_strdup (R_last[i][j]);
1250 temp_a = remove_parentheses (temp_a);
1251 R_cur_l = temp_a;
1262 } 1252 }
1263 1253
1264 // construct R_cur_r, if not already constructed 1254 GNUNET_free_non_null (R_temp_ij);
1265 if (NULL == R_cur_r) 1255 }
1266 { 1256 else
1267 length = strlen (R_temp_kk) - strlen (R_last[i][k]); 1257 {
1268 1258 // we have no left side
1269 // a(ba)*bx = (ab)+x 1259 R_cur_l = NULL;
1270 if (length > 0 && NULL != R_last[k][k] && 0 < strlen (R_last[k][k]) 1260 }
1271 && NULL != R_last[k][j] && 0 < strlen (R_last[k][j]) &&
1272 NULL != R_last[i][k] && 0 < strlen (R_last[i][k]) &&
1273 0 == strkcmp (R_temp_kk, R_last[i][k], length) &&
1274 0 == strncmp (R_temp_kk, R_last[k][j], length))
1275 {
1276 temp_a = GNUNET_malloc (length + 1);
1277 temp_b = GNUNET_malloc ((strlen (R_last[k][j]) - length) + 1);
1278 1261
1279 length_l = 0; 1262 // construct R_cur_r, if not already constructed
1280 length_r = 0; 1263 if (NULL == R_cur_r)
1264 {
1265 length = strlen (R_temp_kk) - strlen (R_last[i][k]);
1266
1267 // a(ba)*bx = (ab)+x
1268 if (length > 0 && NULL != R_last[k][k] && 0 < strlen (R_last[k][k]) &&
1269 NULL != R_last[k][j] && 0 < strlen (R_last[k][j]) &&
1270 NULL != R_last[i][k] && 0 < strlen (R_last[i][k]) &&
1271 0 == strkcmp (R_temp_kk, R_last[i][k], length) &&
1272 0 == strncmp (R_temp_kk, R_last[k][j], length))
1273 {
1274 temp_a = GNUNET_malloc (length + 1);
1275 temp_b = GNUNET_malloc ((strlen (R_last[k][j]) - length) + 1);
1281 1276
1282 for (cnt = 0; cnt < strlen (R_last[k][j]); cnt++) 1277 length_l = 0;
1283 { 1278 length_r = 0;
1284 if (cnt < length)
1285 {
1286 temp_a[length_l] = R_last[k][j][cnt];
1287 length_l++;
1288 }
1289 else
1290 {
1291 temp_b[length_r] = R_last[k][j][cnt];
1292 length_r++;
1293 }
1294 }
1295 temp_a[length_l] = '\0';
1296 temp_b[length_r] = '\0';
1297 1279
1298 // e|(ab)+ = (ab)* 1280 for (cnt = 0; cnt < strlen (R_last[k][j]); cnt++)
1299 if (NULL != R_cur_l && 0 == strlen (R_cur_l) && 1281 {
1300 0 == strlen (temp_b)) 1282 if (cnt < length)
1301 { 1283 {
1302 GNUNET_asprintf (&R_cur_r, "(%s%s)*", R_last[i][k], temp_a); 1284 temp_a[length_l] = R_last[k][j][cnt];
1303 GNUNET_free (R_cur_l); 1285 length_l++;
1304 R_cur_l = NULL;
1305 } 1286 }
1306 else 1287 else
1307 { 1288 {
1308 GNUNET_asprintf (&R_cur_r, "(%s%s)+%s", R_last[i][k], temp_a, 1289 temp_b[length_r] = R_last[k][j][cnt];
1309 temp_b); 1290 length_r++;
1310 } 1291 }
1311 GNUNET_free (temp_a);
1312 GNUNET_free (temp_b);
1313 } 1292 }
1314 else if (0 == strcmp (R_temp_ik, R_temp_kk) && 1293 temp_a[length_l] = '\0';
1315 0 == strcmp (R_temp_kk, R_temp_kj)) 1294 temp_b[length_r] = '\0';
1295
1296 // e|(ab)+ = (ab)*
1297 if (NULL != R_cur_l && 0 == strlen (R_cur_l) &&
1298 0 == strlen (temp_b))
1316 { 1299 {
1317 // (e|a)a*(e|a) = a* 1300 GNUNET_asprintf (&R_cur_r, "(%s%s)*", R_last[i][k], temp_a);
1318 // (e|a)(e|a)*(e|a) = a* 1301 GNUNET_free (R_cur_l);
1319 if (has_epsilon (R_last[i][k]) && has_epsilon (R_last[k][j])) 1302 R_cur_l = NULL;
1320 { 1303 }
1321 if (needs_parentheses (R_temp_kk)) 1304 else
1322 GNUNET_asprintf (&R_cur_r, "(%s)*", R_temp_kk); 1305 {
1323 else 1306 GNUNET_asprintf (&R_cur_r, "(%s%s)+%s", R_last[i][k], temp_a,
1324 GNUNET_asprintf (&R_cur_r, "%s*", R_temp_kk); 1307 temp_b);
1325 } 1308 }
1326 // aa*a = a+a 1309 GNUNET_free (temp_a);
1327 else if (0 == clean_ik_kk_cmp && 0 == clean_kk_kj_cmp && 1310 GNUNET_free (temp_b);
1328 !has_epsilon (R_last[i][k])) 1311 }
1329 { 1312 else if (0 == strcmp (R_temp_ik, R_temp_kk) &&
1330 if (needs_parentheses (R_temp_kk)) 1313 0 == strcmp (R_temp_kk, R_temp_kj))
1331 GNUNET_asprintf (&R_cur_r, "(%s)+%s", R_temp_kk, R_temp_kk); 1314 {
1332 else 1315 // (e|a)a*(e|a) = a*
1333 GNUNET_asprintf (&R_cur_r, "(%s)+%s", R_temp_kk, R_temp_kk); 1316 // (e|a)(e|a)*(e|a) = a*
1334 } 1317 if (has_epsilon (R_last[i][k]) && has_epsilon (R_last[k][j]))
1335 // (e|a)a*a = a+ 1318 {
1336 // aa*(e|a) = a+ 1319 if (needs_parentheses (R_temp_kk))
1337 // a(e|a)*(e|a) = a+ 1320 GNUNET_asprintf (&R_cur_r, "(%s)*", R_temp_kk);
1338 // (e|a)a*a = a+
1339 else 1321 else
1340 { 1322 GNUNET_asprintf (&R_cur_r, "%s*", R_temp_kk);
1341 eps_check =
1342 (has_epsilon (R_last[i][k]) + has_epsilon (R_last[k][k]) +
1343 has_epsilon (R_last[k][j]));
1344
1345 if (eps_check == 1)
1346 {
1347 if (needs_parentheses (R_temp_kk))
1348 GNUNET_asprintf (&R_cur_r, "(%s)+", R_temp_kk);
1349 else
1350 GNUNET_asprintf (&R_cur_r, "%s+", R_temp_kk);
1351 }
1352 }
1353 } 1323 }
1354 // aa*b = a+b 1324 // aa*a = a+a
1355 // (e|a)(e|a)*b = a*b 1325 else if (0 == clean_ik_kk_cmp && 0 == clean_kk_kj_cmp &&
1356 else if (0 == strcmp (R_temp_ik, R_temp_kk)) 1326 !has_epsilon (R_last[i][k]))
1357 { 1327 {
1358 if (has_epsilon (R_last[i][k])) 1328 if (needs_parentheses (R_temp_kk))
1359 { 1329 GNUNET_asprintf (&R_cur_r, "(%s)+%s", R_temp_kk, R_temp_kk);
1360 if (needs_parentheses (R_temp_kk))
1361 GNUNET_asprintf (&R_cur_r, "(%s)*%s", R_temp_kk,
1362 R_last[k][j]);
1363 else
1364 GNUNET_asprintf (&R_cur_r, "%s*%s", R_temp_kk, R_last[k][j]);
1365 }
1366 else 1330 else
1367 { 1331 GNUNET_asprintf (&R_cur_r, "(%s)+%s", R_temp_kk, R_temp_kk);
1368 if (needs_parentheses (R_temp_kk))
1369 GNUNET_asprintf (&R_cur_r, "(%s)+%s", R_temp_kk,
1370 R_last[k][j]);
1371 else
1372 GNUNET_asprintf (&R_cur_r, "%s+%s", R_temp_kk, R_last[k][j]);
1373 }
1374 } 1332 }
1375 // ba*a = ba+ 1333 // (e|a)a*a = a+
1376 // b(e|a)*(e|a) = ba* 1334 // aa*(e|a) = a+
1377 else if (0 == strcmp (R_temp_kk, R_temp_kj)) 1335 // a(e|a)*(e|a) = a+
1336 // (e|a)a*a = a+
1337 else
1378 { 1338 {
1379 if (has_epsilon (R_last[k][j])) 1339 eps_check =
1340 (has_epsilon (R_last[i][k]) + has_epsilon (R_last[k][k]) +
1341 has_epsilon (R_last[k][j]));
1342
1343 if (eps_check == 1)
1380 { 1344 {
1381 if (needs_parentheses (R_temp_kk)) 1345 if (needs_parentheses (R_temp_kk))
1382 GNUNET_asprintf (&R_cur_r, "%s(%s)*", R_last[i][k], 1346 GNUNET_asprintf (&R_cur_r, "(%s)+", R_temp_kk);
1383 R_temp_kk);
1384 else 1347 else
1385 GNUNET_asprintf (&R_cur_r, "%s%s*", R_last[i][k], R_temp_kk); 1348 GNUNET_asprintf (&R_cur_r, "%s+", R_temp_kk);
1386 } 1349 }
1350 }
1351 }
1352 // aa*b = a+b
1353 // (e|a)(e|a)*b = a*b
1354 else if (0 == strcmp (R_temp_ik, R_temp_kk))
1355 {
1356 if (has_epsilon (R_last[i][k]))
1357 {
1358 if (needs_parentheses (R_temp_kk))
1359 GNUNET_asprintf (&R_cur_r, "(%s)*%s", R_temp_kk, R_last[k][j]);
1387 else 1360 else
1388 { 1361 GNUNET_asprintf (&R_cur_r, "%s*%s", R_temp_kk, R_last[k][j]);
1389 if (needs_parentheses (R_temp_kk))
1390 GNUNET_asprintf (&R_cur_r, "(%s)+%s", R_last[i][k],
1391 R_temp_kk);
1392 else
1393 GNUNET_asprintf (&R_cur_r, "%s+%s", R_last[i][k], R_temp_kk);
1394 }
1395 } 1362 }
1396 else 1363 else
1397 { 1364 {
1398 if (strlen (R_temp_kk) > 0) 1365 if (needs_parentheses (R_temp_kk))
1399 { 1366 GNUNET_asprintf (&R_cur_r, "(%s)+%s", R_temp_kk, R_last[k][j]);
1400 if (needs_parentheses (R_temp_kk))
1401 {
1402 GNUNET_asprintf (&R_cur_r, "%s(%s)*%s", R_last[i][k],
1403 R_temp_kk, R_last[k][j]);
1404 }
1405 else
1406 {
1407 GNUNET_asprintf (&R_cur_r, "%s%s*%s", R_last[i][k], R_temp_kk,
1408 R_last[k][j]);
1409 }
1410 }
1411 else 1367 else
1412 { 1368 GNUNET_asprintf (&R_cur_r, "%s+%s", R_temp_kk, R_last[k][j]);
1413 GNUNET_asprintf (&R_cur_r, "%s%s", R_last[i][k], R_last[k][j]);
1414 }
1415 } 1369 }
1416 } 1370 }
1417 1371 // ba*a = ba+
1418 /* GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "R_cur_l: %s\n", R_cur_l); */ 1372 // b(e|a)*(e|a) = ba*
1419 /* GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "R_cur_r: %s\n", R_cur_r); */ 1373 else if (0 == strcmp (R_temp_kk, R_temp_kj))
1420
1421 // putting it all together
1422 if (NULL != R_cur_l && NULL != R_cur_r)
1423 { 1374 {
1424 // a|a = a 1375 if (has_epsilon (R_last[k][j]))
1425 if (0 == strcmp (R_cur_l, R_cur_r))
1426 { 1376 {
1427 R_cur[i][j] = GNUNET_strdup (R_cur_l); 1377 if (needs_parentheses (R_temp_kk))
1378 GNUNET_asprintf (&R_cur_r, "%s(%s)*", R_last[i][k], R_temp_kk);
1379 else
1380 GNUNET_asprintf (&R_cur_r, "%s%s*", R_last[i][k], R_temp_kk);
1428 } 1381 }
1429 // R_cur_l | R_cur_r
1430 else 1382 else
1431 { 1383 {
1432 GNUNET_asprintf (&R_cur[i][j], "(%s|%s)", R_cur_l, R_cur_r); 1384 if (needs_parentheses (R_temp_kk))
1385 GNUNET_asprintf (&R_cur_r, "(%s)+%s", R_last[i][k], R_temp_kk);
1386 else
1387 GNUNET_asprintf (&R_cur_r, "%s+%s", R_last[i][k], R_temp_kk);
1433 } 1388 }
1434 } 1389 }
1435 else if (NULL != R_cur_l) 1390 else
1436 { 1391 {
1437 R_cur[i][j] = GNUNET_strdup (R_cur_l); 1392 if (strlen (R_temp_kk) > 0)
1393 {
1394 if (needs_parentheses (R_temp_kk))
1395 {
1396 GNUNET_asprintf (&R_cur_r, "%s(%s)*%s", R_last[i][k], R_temp_kk,
1397 R_last[k][j]);
1398 }
1399 else
1400 {
1401 GNUNET_asprintf (&R_cur_r, "%s%s*%s", R_last[i][k], R_temp_kk,
1402 R_last[k][j]);
1403 }
1404 }
1405 else
1406 {
1407 GNUNET_asprintf (&R_cur_r, "%s%s", R_last[i][k], R_last[k][j]);
1408 }
1438 } 1409 }
1439 else if (NULL != R_cur_r) 1410 }
1411
1412 /* GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "R_cur_l: %s\n", R_cur_l); */
1413 /* GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "R_cur_r: %s\n", R_cur_r); */
1414
1415 // putting it all together
1416 if (NULL != R_cur_l && NULL != R_cur_r)
1417 {
1418 // a|a = a
1419 if (0 == strcmp (R_cur_l, R_cur_r))
1440 { 1420 {
1441 R_cur[i][j] = GNUNET_strdup (R_cur_r); 1421 R_cur[i][j] = GNUNET_strdup (R_cur_l);
1442 } 1422 }
1423 // R_cur_l | R_cur_r
1443 else 1424 else
1444 { 1425 {
1445 R_cur[i][j] = NULL; 1426 GNUNET_asprintf (&R_cur[i][j], "(%s|%s)", R_cur_l, R_cur_r);
1446 } 1427 }
1428 }
1429 else if (NULL != R_cur_l)
1430 {
1431 R_cur[i][j] = GNUNET_strdup (R_cur_l);
1432 }
1433 else if (NULL != R_cur_r)
1434 {
1435 R_cur[i][j] = GNUNET_strdup (R_cur_r);
1436 }
1437 else
1438 {
1439 R_cur[i][j] = NULL;
1440 }
1447 1441
1448 GNUNET_free_non_null (R_cur_l); 1442 GNUNET_free_non_null (R_cur_l);
1449 GNUNET_free_non_null (R_cur_r); 1443 GNUNET_free_non_null (R_cur_r);
1450 1444
1451 GNUNET_free_non_null (R_temp_ik); 1445 GNUNET_free_non_null (R_temp_ik);
1452 GNUNET_free_non_null (R_temp_kk); 1446 GNUNET_free_non_null (R_temp_kk);
1453 GNUNET_free_non_null (R_temp_kj); 1447 GNUNET_free_non_null (R_temp_kj);
1454 }
1455 } 1448 }
1456 } 1449 }
1457 1450
@@ -1533,7 +1526,7 @@ dfa_state_create (struct GNUNET_REGEX_Context *ctx,
1533 struct Transition *ctran; 1526 struct Transition *ctran;
1534 int insert = 1; 1527 int insert = 1;
1535 struct Transition *t; 1528 struct Transition *t;
1536 int i; 1529 unsigned int i;
1537 1530
1538 s = GNUNET_malloc (sizeof (struct GNUNET_REGEX_State)); 1531 s = GNUNET_malloc (sizeof (struct GNUNET_REGEX_State));
1539 s->id = ctx->state_id++; 1532 s->id = ctx->state_id++;
@@ -1715,7 +1708,7 @@ static void
1715dfa_merge_nondistinguishable_states (struct GNUNET_REGEX_Context *ctx, 1708dfa_merge_nondistinguishable_states (struct GNUNET_REGEX_Context *ctx,
1716 struct GNUNET_REGEX_Automaton *a) 1709 struct GNUNET_REGEX_Automaton *a)
1717{ 1710{
1718 int i; 1711 unsigned int i;
1719 int table[a->state_count][a->state_count]; 1712 int table[a->state_count][a->state_count];
1720 struct GNUNET_REGEX_State *s1; 1713 struct GNUNET_REGEX_State *s1;
1721 struct GNUNET_REGEX_State *s2; 1714 struct GNUNET_REGEX_State *s2;
@@ -1724,7 +1717,7 @@ dfa_merge_nondistinguishable_states (struct GNUNET_REGEX_Context *ctx,
1724 struct GNUNET_REGEX_State *s1_next; 1717 struct GNUNET_REGEX_State *s1_next;
1725 struct GNUNET_REGEX_State *s2_next; 1718 struct GNUNET_REGEX_State *s2_next;
1726 int change; 1719 int change;
1727 int num_equal_edges; 1720 unsigned int num_equal_edges;
1728 1721
1729 for (i = 0, s1 = a->states_head; i < a->state_count && NULL != s1; 1722 for (i = 0, s1 = a->states_head; i < a->state_count && NULL != s1;
1730 i++, s1 = s1->next) 1723 i++, s1 = s1->next)
@@ -2005,10 +1998,10 @@ nfa_closure_set_create (struct GNUNET_REGEX_Automaton *nfa,
2005 struct GNUNET_REGEX_State *s; 1998 struct GNUNET_REGEX_State *s;
2006 struct GNUNET_REGEX_StateSet *sset; 1999 struct GNUNET_REGEX_StateSet *sset;
2007 struct GNUNET_REGEX_StateSet *cls; 2000 struct GNUNET_REGEX_StateSet *cls;
2008 int i; 2001 unsigned int i;
2009 int j; 2002 unsigned int j;
2010 int k; 2003 unsigned int k;
2011 int contains; 2004 unsigned int contains;
2012 2005
2013 if (NULL == states) 2006 if (NULL == states)
2014 return NULL; 2007 return NULL;
@@ -2732,7 +2725,7 @@ evaluate_nfa (struct GNUNET_REGEX_Automaton *a, const char *string)
2732 struct GNUNET_REGEX_State *s; 2725 struct GNUNET_REGEX_State *s;
2733 struct GNUNET_REGEX_StateSet *sset; 2726 struct GNUNET_REGEX_StateSet *sset;
2734 struct GNUNET_REGEX_StateSet *new_sset; 2727 struct GNUNET_REGEX_StateSet *new_sset;
2735 int i; 2728 unsigned int i;
2736 int result; 2729 int result;
2737 2730
2738 if (NFA != a->type) 2731 if (NFA != a->type)