diff options
author | Maximilian Szengel <gnunet@maxsz.de> | 2012-06-28 12:41:34 +0000 |
---|---|---|
committer | Maximilian Szengel <gnunet@maxsz.de> | 2012-06-28 12:41:34 +0000 |
commit | 748324f617d0e4ff81de0e73c074d5314ace7dfd (patch) | |
tree | 4e3bece6a9d882993ec4d402ec48664e18c2d244 /src/regex/regex.c | |
parent | ae344190110dd03febe9afe90c50ff148c6a6c3c (diff) | |
download | gnunet-748324f617d0e4ff81de0e73c074d5314ace7dfd.tar.gz gnunet-748324f617d0e4ff81de0e73c074d5314ace7dfd.zip |
cleanup, fixes
Diffstat (limited to 'src/regex/regex.c')
-rw-r--r-- | src/regex/regex.c | 541 |
1 files changed, 267 insertions, 274 deletions
diff --git a/src/regex/regex.c b/src/regex/regex.c index f237334d8..09cd910a0 100644 --- a/src/regex/regex.c +++ b/src/regex/regex.c | |||
@@ -594,7 +594,7 @@ state_set_compare (struct GNUNET_REGEX_StateSet *sset1, | |||
594 | struct GNUNET_REGEX_StateSet *sset2) | 594 | struct GNUNET_REGEX_StateSet *sset2) |
595 | { | 595 | { |
596 | int result; | 596 | int result; |
597 | int i; | 597 | unsigned int i; |
598 | 598 | ||
599 | if (NULL == sset1 || NULL == sset2) | 599 | if (NULL == sset1 || NULL == sset2) |
600 | return 1; | 600 | return 1; |
@@ -1043,18 +1043,19 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a) | |||
1043 | unsigned int i; | 1043 | unsigned int i; |
1044 | unsigned int j; | 1044 | unsigned int j; |
1045 | unsigned int k; | 1045 | unsigned int k; |
1046 | int cnt; | 1046 | unsigned int cnt; |
1047 | int eps_check; | 1047 | int eps_check; |
1048 | int ij_ik_cmp; | 1048 | int ij_ik_cmp; |
1049 | int ij_kj_cmp; | 1049 | int ij_kj_cmp; |
1050 | int ik_kj_cmp; | 1050 | |
1051 | //int ik_kj_cmp; | ||
1051 | int ik_kk_cmp; | 1052 | int ik_kk_cmp; |
1052 | int kk_kj_cmp; | 1053 | int kk_kj_cmp; |
1053 | int clean_ik_kk_cmp; | 1054 | int clean_ik_kk_cmp; |
1054 | int clean_kk_kj_cmp; | 1055 | int clean_kk_kj_cmp; |
1055 | int length; | 1056 | size_t length; |
1056 | int length_l; | 1057 | size_t length_l; |
1057 | int length_r; | 1058 | size_t length_r; |
1058 | 1059 | ||
1059 | /* create depth-first numbering of the states, initializes 'state' */ | 1060 | /* create depth-first numbering of the states, initializes 'state' */ |
1060 | automaton_traverse (a, &number_states, states); | 1061 | automaton_traverse (a, &number_states, states); |
@@ -1099,17 +1100,36 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a) | |||
1099 | 1100 | ||
1100 | // TODO: clean up and fix the induction part | 1101 | // TODO: clean up and fix the induction part |
1101 | 1102 | ||
1102 | // INDUCTION | 1103 | /* Compute regular expressions of length "k" between each pair of states per induction */ |
1103 | for (k = 0; k < n; k++) | 1104 | for (k = 0; k < n; k++) |
1104 | { | 1105 | { |
1105 | for (i = 0; i < n; i++) | 1106 | for (i = 0; i < n; i++) |
1106 | { | 1107 | { |
1107 | for (j = 0; j < n; j++) | 1108 | for (j = 0; j < n; j++) |
1108 | { | 1109 | { |
1109 | /* GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, */ | 1110 | // Basis for the recursion: |
1110 | /* ">>> R_last[i][j] = %s R_last[i][k] = %s " */ | 1111 | // $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj} |
1111 | /* "R_last[k][k] = %s R_last[k][j] = %s\n", R_last[i][j], */ | 1112 | // R_last == R^{(k-1)}, R_cur == R^{(k)} |
1112 | /* R_last[i][k], R_last[k][k], R_last[k][j]); */ | 1113 | // With: R_cur[i][j] = R_cur_l | R_cur_r |
1114 | // R_cur_l == R^{(k-1)}_{ij} | ||
1115 | // R_cur_r == R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj} | ||
1116 | |||
1117 | if ((NULL == R_last[i][j]) && ((NULL == R_last[i][k]) || (NULL == R_last[k][k]) || /* technically cannot happen, but looks saner */ | ||
1118 | (NULL == R_last[k][j]))) | ||
1119 | { | ||
1120 | /* R^{(k)}_{ij} = N | N */ | ||
1121 | /* R_cur[i][j] is already NULL */ | ||
1122 | continue; | ||
1123 | } | ||
1124 | if ((NULL == R_last[i][k]) || (NULL == R_last[k][k]) || /* technically cannot happen, but looks saner */ | ||
1125 | (NULL == R_last[k][j])) | ||
1126 | { | ||
1127 | /* R^{(k)}_{ij} = R^{(k-1)}_{ij} | N */ | ||
1128 | R_cur[i][j] = GNUNET_strdup (R_last[i][j]); | ||
1129 | continue; | ||
1130 | } | ||
1131 | // $R^{(k)}_{ij} = N | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj} OR | ||
1132 | // $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj} | ||
1113 | 1133 | ||
1114 | R_cur[i][j] = NULL; | 1134 | R_cur[i][j] = NULL; |
1115 | R_cur_r = NULL; | 1135 | R_cur_r = NULL; |
@@ -1119,54 +1139,37 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a) | |||
1119 | ij_kj_cmp = nullstrcmp (R_last[i][j], R_last[k][j]); | 1139 | ij_kj_cmp = nullstrcmp (R_last[i][j], R_last[k][j]); |
1120 | ij_ik_cmp = nullstrcmp (R_last[i][j], R_last[i][k]); | 1140 | ij_ik_cmp = nullstrcmp (R_last[i][j], R_last[i][k]); |
1121 | ik_kk_cmp = nullstrcmp (R_last[i][k], R_last[k][k]); | 1141 | ik_kk_cmp = nullstrcmp (R_last[i][k], R_last[k][k]); |
1122 | ik_kj_cmp = nullstrcmp (R_last[i][k], R_last[k][j]); | 1142 | //ik_kj_cmp = nullstrcmp (R_last[i][k], R_last[k][j]); |
1123 | kk_kj_cmp = nullstrcmp (R_last[k][k], R_last[k][j]); | 1143 | kk_kj_cmp = nullstrcmp (R_last[k][k], R_last[k][j]); |
1124 | 1144 | ||
1125 | // $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk})^* R^{(k-1)}_{kj} | 1145 | // Assign R_temp_(ik|kk|kj) to R_last[][] and remove epsilon as well |
1126 | // With: R_cur[i][j] = R_cur_l | R_cur_r | 1146 | // as parentheses, so we can better compare the contents |
1127 | // Rij(k) = Rij(k-1), because right side (R_cur_r) is empty set (NULL) | 1147 | R_temp_ik = remove_parentheses (remove_epsilon (R_last[i][k])); |
1128 | if ((NULL == R_last[i][k] || NULL == R_last[k][j] || | 1148 | R_temp_kk = remove_parentheses (remove_epsilon (R_last[k][k])); |
1129 | NULL == R_last[k][k]) && NULL != R_last[i][j]) | 1149 | R_temp_kj = remove_parentheses (remove_epsilon (R_last[k][j])); |
1130 | { | ||
1131 | R_cur[i][j] = GNUNET_strdup (R_last[i][j]); | ||
1132 | } | ||
1133 | // Everything is NULL, so Rij(k) = NULL | ||
1134 | else if ((NULL == R_last[i][k] || NULL == R_last[k][j] || | ||
1135 | NULL == R_last[k][k]) && NULL == R_last[i][j]) | ||
1136 | { | ||
1137 | R_cur[i][j] = NULL; | ||
1138 | } | ||
1139 | // Right side (R_cur_r) not NULL | ||
1140 | else | ||
1141 | { | ||
1142 | /* GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, */ | ||
1143 | /* "R_temp_ij = %s R_temp_ik = %s R_temp_kk = %s R_temp_kj = %s\n", */ | ||
1144 | /* R_temp_ij, R_temp_ik, R_temp_kk, R_temp_kj); */ | ||
1145 | 1150 | ||
1146 | // Assign R_temp_(ik|kk|kj) to R_last[][] and remove epsilon as well | 1151 | clean_ik_kk_cmp = nullstrcmp (R_last[i][k], R_temp_kk); |
1147 | // as parentheses, so we can better compare the contents | 1152 | clean_kk_kj_cmp = nullstrcmp (R_temp_kk, R_last[k][j]); |
1148 | R_temp_ik = remove_parentheses (remove_epsilon (R_last[i][k])); | ||
1149 | R_temp_kk = remove_parentheses (remove_epsilon (R_last[k][k])); | ||
1150 | R_temp_kj = remove_parentheses (remove_epsilon (R_last[k][j])); | ||
1151 | 1153 | ||
1152 | clean_ik_kk_cmp = nullstrcmp (R_last[i][k], R_temp_kk); | 1154 | // construct R_cur_l (and, if necessary R_cur_r) |
1153 | clean_kk_kj_cmp = nullstrcmp (R_temp_kk, R_last[k][j]); | 1155 | if (NULL != R_last[i][j]) |
1156 | { | ||
1157 | // Assign R_temp_ij to R_last[i][j] and remove epsilon as well | ||
1158 | // as parentheses, so we can better compare the contents | ||
1159 | R_temp_ij = remove_parentheses (remove_epsilon (R_last[i][j])); | ||
1154 | 1160 | ||
1155 | // construct R_cur_l (and, if necessary R_cur_r) | 1161 | if (0 == strcmp (R_temp_ij, R_temp_ik) && |
1156 | if (NULL != R_last[i][j]) | 1162 | 0 == strcmp (R_temp_ik, R_temp_kk) && |
1163 | 0 == strcmp (R_temp_kk, R_temp_kj)) | ||
1157 | { | 1164 | { |
1158 | // Assign R_temp_ij to R_last[i][j] and remove epsilon as well | 1165 | if (0 == strlen (R_temp_ij)) |
1159 | // as parentheses, so we can better compare the contents | 1166 | { |
1160 | R_temp_ij = remove_parentheses (remove_epsilon (R_last[i][j])); | 1167 | R_cur_r = GNUNET_strdup (""); |
1161 | 1168 | } | |
1162 | if (0 == strcmp (R_temp_ij, R_temp_ik) && | 1169 | else if ((0 == strncmp (R_last[i][j], "(|", 2)) || |
1163 | 0 == strcmp (R_temp_ik, R_temp_kk) && | 1170 | (0 == strncmp (R_last[i][k], "(|", 2) && |
1164 | 0 == strcmp (R_temp_kk, R_temp_kj)) | 1171 | 0 == strncmp (R_last[k][j], "(|", 2))) |
1165 | { | 1172 | { |
1166 | if (0 == strlen (R_temp_ij)) | ||
1167 | { | ||
1168 | R_cur_r = GNUNET_strdup (""); | ||
1169 | } | ||
1170 | // a|(e|a)a*(e|a) = a* | 1173 | // a|(e|a)a*(e|a) = a* |
1171 | // a|(e|a)(e|a)*(e|a) = a* | 1174 | // a|(e|a)(e|a)*(e|a) = a* |
1172 | // (e|a)|aa*a = a* | 1175 | // (e|a)|aa*a = a* |
@@ -1174,284 +1177,274 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a) | |||
1174 | // (e|a)|(e|a)a*a = a* | 1177 | // (e|a)|(e|a)a*a = a* |
1175 | // (e|a)|(e|a)a*(e|a) = a* | 1178 | // (e|a)|(e|a)a*(e|a) = a* |
1176 | // (e|a)|(e|a)(e|a)*(e|a) = a* | 1179 | // (e|a)|(e|a)(e|a)*(e|a) = a* |
1177 | else if ((0 == strncmp (R_last[i][j], "(|", 2)) || | 1180 | if (GNUNET_YES == needs_parentheses (R_temp_ij)) |
1178 | (0 == strncmp (R_last[i][k], "(|", 2) && | 1181 | GNUNET_asprintf (&R_cur_r, "(%s)*", R_temp_ij); |
1179 | 0 == strncmp (R_last[k][j], "(|", 2))) | 1182 | else |
1180 | { | 1183 | GNUNET_asprintf (&R_cur_r, "%s*", R_temp_ij); |
1181 | if (GNUNET_YES == needs_parentheses (R_temp_ij)) | 1184 | } |
1182 | GNUNET_asprintf (&R_cur_r, "(%s)*", R_temp_ij); | 1185 | else |
1183 | else | 1186 | { |
1184 | GNUNET_asprintf (&R_cur_r, "%s*", R_temp_ij); | ||
1185 | } | ||
1186 | // a|aa*a = a+ | 1187 | // a|aa*a = a+ |
1187 | // a|(e|a)a*a = a+ | 1188 | // a|(e|a)a*a = a+ |
1188 | // a|aa*(e|a) = a+ | 1189 | // a|aa*(e|a) = a+ |
1189 | // a|(e|a)(e|a)*a = a+ | 1190 | // a|(e|a)(e|a)*a = a+ |
1190 | // a|a(e|a)*(e|a) = a+ | 1191 | // a|a(e|a)*(e|a) = a+ |
1192 | if (GNUNET_YES == needs_parentheses (R_temp_ij)) | ||
1193 | GNUNET_asprintf (&R_cur_r, "(%s)+", R_temp_ij); | ||
1191 | else | 1194 | else |
1192 | { | 1195 | GNUNET_asprintf (&R_cur_r, "%s+", R_temp_ij); |
1193 | if (GNUNET_YES == needs_parentheses (R_temp_ij)) | ||
1194 | GNUNET_asprintf (&R_cur_r, "(%s)+", R_temp_ij); | ||
1195 | else | ||
1196 | GNUNET_asprintf (&R_cur_r, "%s+", R_temp_ij); | ||
1197 | } | ||
1198 | } | 1196 | } |
1197 | } | ||
1198 | else if (0 == ij_ik_cmp && 0 == clean_kk_kj_cmp && | ||
1199 | 0 != clean_ik_kk_cmp) | ||
1200 | { | ||
1199 | // a|ab*b = ab* | 1201 | // a|ab*b = ab* |
1200 | else if (0 == ij_ik_cmp && 0 == clean_kk_kj_cmp && | 1202 | if (strlen (R_last[k][k]) < 1) |
1201 | 0 != clean_ik_kk_cmp) | 1203 | R_cur_r = GNUNET_strdup (R_last[i][j]); |
1202 | { | 1204 | else if (GNUNET_YES == needs_parentheses (R_temp_kk)) |
1203 | if (strlen (R_last[k][k]) < 1) | 1205 | GNUNET_asprintf (&R_cur_r, "%s(%s)*", R_last[i][j], R_temp_kk); |
1204 | R_cur_r = GNUNET_strdup (R_last[i][j]); | 1206 | else |
1205 | else if (GNUNET_YES == needs_parentheses (R_temp_kk)) | 1207 | GNUNET_asprintf (&R_cur_r, "%s%s*", R_last[i][j], R_last[k][k]); |
1206 | GNUNET_asprintf (&R_cur_r, "%s(%s)*", R_last[i][j], R_temp_kk); | ||
1207 | else | ||
1208 | GNUNET_asprintf (&R_cur_r, "%s%s*", R_last[i][j], R_last[k][k]); | ||
1209 | 1208 | ||
1210 | R_cur_l = NULL; | 1209 | R_cur_l = NULL; |
1211 | } | 1210 | } |
1211 | else if (0 == ij_kj_cmp && 0 == clean_ik_kk_cmp && | ||
1212 | 0 != clean_kk_kj_cmp) | ||
1213 | { | ||
1212 | // a|bb*a = b*a | 1214 | // a|bb*a = b*a |
1213 | else if (0 == ij_kj_cmp && 0 == clean_ik_kk_cmp && | 1215 | if (strlen (R_last[k][k]) < 1) |
1214 | 0 != clean_kk_kj_cmp) | 1216 | R_cur_r = GNUNET_strdup (R_last[k][j]); |
1215 | { | 1217 | else if (GNUNET_YES == needs_parentheses (R_temp_kk)) |
1216 | if (strlen (R_last[k][k]) < 1) | 1218 | GNUNET_asprintf (&R_cur_r, "(%s)*%s", R_temp_kk, R_last[k][j]); |
1217 | R_cur_r = GNUNET_strdup (R_last[k][j]); | 1219 | else |
1218 | else if (GNUNET_YES == needs_parentheses (R_temp_kk)) | 1220 | GNUNET_asprintf (&R_cur_r, "%s*%s", R_temp_kk, R_last[k][j]); |
1219 | GNUNET_asprintf (&R_cur_r, "(%s)*%s", R_temp_kk, R_last[k][j]); | ||
1220 | else | ||
1221 | GNUNET_asprintf (&R_cur_r, "%s*%s", R_temp_kk, R_last[k][j]); | ||
1222 | 1221 | ||
1223 | R_cur_l = NULL; | 1222 | R_cur_l = NULL; |
1224 | } | 1223 | } |
1224 | else if (0 == ij_ik_cmp && 0 == kk_kj_cmp && | ||
1225 | !has_epsilon (R_last[i][j]) && has_epsilon (R_last[k][k])) | ||
1226 | { | ||
1225 | // a|a(e|b)*(e|b) = a|ab* = a|a|ab|abb|abbb|... = ab* | 1227 | // a|a(e|b)*(e|b) = a|ab* = a|a|ab|abb|abbb|... = ab* |
1226 | else if (0 == ij_ik_cmp && 0 == kk_kj_cmp && | 1228 | if (needs_parentheses (R_temp_kk)) |
1227 | !has_epsilon (R_last[i][j]) && has_epsilon (R_last[k][k])) | 1229 | GNUNET_asprintf (&R_cur_r, "%s(%s)*", R_last[i][j], R_temp_kk); |
1228 | { | 1230 | else |
1229 | if (needs_parentheses (R_temp_kk)) | 1231 | GNUNET_asprintf (&R_cur_r, "%s%s*", R_last[i][j], R_temp_kk); |
1230 | GNUNET_asprintf (&R_cur_r, "%s(%s)*", R_last[i][j], R_temp_kk); | ||
1231 | else | ||
1232 | GNUNET_asprintf (&R_cur_r, "%s%s*", R_last[i][j], R_temp_kk); | ||
1233 | 1232 | ||
1234 | R_cur_l = NULL; | 1233 | R_cur_l = NULL; |
1235 | } | 1234 | } |
1235 | else if (0 == ij_kj_cmp && 0 == ik_kk_cmp && | ||
1236 | !has_epsilon (R_last[i][j]) && has_epsilon (R_last[k][k])) | ||
1237 | { | ||
1236 | // a|(e|b)(e|b)*a = a|b*a = a|a|ba|bba|bbba|... = b*a | 1238 | // a|(e|b)(e|b)*a = a|b*a = a|a|ba|bba|bbba|... = b*a |
1237 | else if (0 == ij_kj_cmp && 0 == ik_kk_cmp && | 1239 | if (needs_parentheses (R_temp_kk)) |
1238 | !has_epsilon (R_last[i][j]) && has_epsilon (R_last[k][k])) | 1240 | GNUNET_asprintf (&R_cur_r, "(%s)*%s", R_temp_kk, R_last[i][j]); |
1239 | { | ||
1240 | if (needs_parentheses (R_temp_kk)) | ||
1241 | GNUNET_asprintf (&R_cur_r, "(%s)*%s", R_temp_kk, R_last[i][j]); | ||
1242 | else | ||
1243 | GNUNET_asprintf (&R_cur_r, "%s*%s", R_temp_kk, R_last[i][j]); | ||
1244 | |||
1245 | R_cur_l = NULL; | ||
1246 | } | ||
1247 | else | 1241 | else |
1248 | { | 1242 | GNUNET_asprintf (&R_cur_r, "%s*%s", R_temp_kk, R_last[i][j]); |
1249 | /* GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "NO SIMPLIFICATION\n"); */ | ||
1250 | temp_a = | ||
1251 | (NULL == R_last[i][j]) ? NULL : GNUNET_strdup (R_last[i][j]); | ||
1252 | temp_a = remove_parentheses (temp_a); | ||
1253 | R_cur_l = temp_a; | ||
1254 | } | ||
1255 | 1243 | ||
1256 | GNUNET_free_non_null (R_temp_ij); | 1244 | R_cur_l = NULL; |
1257 | } | 1245 | } |
1258 | // we have no left side | ||
1259 | else | 1246 | else |
1260 | { | 1247 | { |
1261 | R_cur_l = NULL; | 1248 | temp_a = |
1249 | (NULL == R_last[i][j]) ? NULL : GNUNET_strdup (R_last[i][j]); | ||
1250 | temp_a = remove_parentheses (temp_a); | ||
1251 | R_cur_l = temp_a; | ||
1262 | } | 1252 | } |
1263 | 1253 | ||
1264 | // construct R_cur_r, if not already constructed | 1254 | GNUNET_free_non_null (R_temp_ij); |
1265 | if (NULL == R_cur_r) | 1255 | } |
1266 | { | 1256 | else |
1267 | length = strlen (R_temp_kk) - strlen (R_last[i][k]); | 1257 | { |
1268 | 1258 | // we have no left side | |
1269 | // a(ba)*bx = (ab)+x | 1259 | R_cur_l = NULL; |
1270 | if (length > 0 && NULL != R_last[k][k] && 0 < strlen (R_last[k][k]) | 1260 | } |
1271 | && NULL != R_last[k][j] && 0 < strlen (R_last[k][j]) && | ||
1272 | NULL != R_last[i][k] && 0 < strlen (R_last[i][k]) && | ||
1273 | 0 == strkcmp (R_temp_kk, R_last[i][k], length) && | ||
1274 | 0 == strncmp (R_temp_kk, R_last[k][j], length)) | ||
1275 | { | ||
1276 | temp_a = GNUNET_malloc (length + 1); | ||
1277 | temp_b = GNUNET_malloc ((strlen (R_last[k][j]) - length) + 1); | ||
1278 | 1261 | ||
1279 | length_l = 0; | 1262 | // construct R_cur_r, if not already constructed |
1280 | length_r = 0; | 1263 | if (NULL == R_cur_r) |
1264 | { | ||
1265 | length = strlen (R_temp_kk) - strlen (R_last[i][k]); | ||
1266 | |||
1267 | // a(ba)*bx = (ab)+x | ||
1268 | if (length > 0 && NULL != R_last[k][k] && 0 < strlen (R_last[k][k]) && | ||
1269 | NULL != R_last[k][j] && 0 < strlen (R_last[k][j]) && | ||
1270 | NULL != R_last[i][k] && 0 < strlen (R_last[i][k]) && | ||
1271 | 0 == strkcmp (R_temp_kk, R_last[i][k], length) && | ||
1272 | 0 == strncmp (R_temp_kk, R_last[k][j], length)) | ||
1273 | { | ||
1274 | temp_a = GNUNET_malloc (length + 1); | ||
1275 | temp_b = GNUNET_malloc ((strlen (R_last[k][j]) - length) + 1); | ||
1281 | 1276 | ||
1282 | for (cnt = 0; cnt < strlen (R_last[k][j]); cnt++) | 1277 | length_l = 0; |
1283 | { | 1278 | length_r = 0; |
1284 | if (cnt < length) | ||
1285 | { | ||
1286 | temp_a[length_l] = R_last[k][j][cnt]; | ||
1287 | length_l++; | ||
1288 | } | ||
1289 | else | ||
1290 | { | ||
1291 | temp_b[length_r] = R_last[k][j][cnt]; | ||
1292 | length_r++; | ||
1293 | } | ||
1294 | } | ||
1295 | temp_a[length_l] = '\0'; | ||
1296 | temp_b[length_r] = '\0'; | ||
1297 | 1279 | ||
1298 | // e|(ab)+ = (ab)* | 1280 | for (cnt = 0; cnt < strlen (R_last[k][j]); cnt++) |
1299 | if (NULL != R_cur_l && 0 == strlen (R_cur_l) && | 1281 | { |
1300 | 0 == strlen (temp_b)) | 1282 | if (cnt < length) |
1301 | { | 1283 | { |
1302 | GNUNET_asprintf (&R_cur_r, "(%s%s)*", R_last[i][k], temp_a); | 1284 | temp_a[length_l] = R_last[k][j][cnt]; |
1303 | GNUNET_free (R_cur_l); | 1285 | length_l++; |
1304 | R_cur_l = NULL; | ||
1305 | } | 1286 | } |
1306 | else | 1287 | else |
1307 | { | 1288 | { |
1308 | GNUNET_asprintf (&R_cur_r, "(%s%s)+%s", R_last[i][k], temp_a, | 1289 | temp_b[length_r] = R_last[k][j][cnt]; |
1309 | temp_b); | 1290 | length_r++; |
1310 | } | 1291 | } |
1311 | GNUNET_free (temp_a); | ||
1312 | GNUNET_free (temp_b); | ||
1313 | } | 1292 | } |
1314 | else if (0 == strcmp (R_temp_ik, R_temp_kk) && | 1293 | temp_a[length_l] = '\0'; |
1315 | 0 == strcmp (R_temp_kk, R_temp_kj)) | 1294 | temp_b[length_r] = '\0'; |
1295 | |||
1296 | // e|(ab)+ = (ab)* | ||
1297 | if (NULL != R_cur_l && 0 == strlen (R_cur_l) && | ||
1298 | 0 == strlen (temp_b)) | ||
1316 | { | 1299 | { |
1317 | // (e|a)a*(e|a) = a* | 1300 | GNUNET_asprintf (&R_cur_r, "(%s%s)*", R_last[i][k], temp_a); |
1318 | // (e|a)(e|a)*(e|a) = a* | 1301 | GNUNET_free (R_cur_l); |
1319 | if (has_epsilon (R_last[i][k]) && has_epsilon (R_last[k][j])) | 1302 | R_cur_l = NULL; |
1320 | { | 1303 | } |
1321 | if (needs_parentheses (R_temp_kk)) | 1304 | else |
1322 | GNUNET_asprintf (&R_cur_r, "(%s)*", R_temp_kk); | 1305 | { |
1323 | else | 1306 | GNUNET_asprintf (&R_cur_r, "(%s%s)+%s", R_last[i][k], temp_a, |
1324 | GNUNET_asprintf (&R_cur_r, "%s*", R_temp_kk); | 1307 | temp_b); |
1325 | } | 1308 | } |
1326 | // aa*a = a+a | 1309 | GNUNET_free (temp_a); |
1327 | else if (0 == clean_ik_kk_cmp && 0 == clean_kk_kj_cmp && | 1310 | GNUNET_free (temp_b); |
1328 | !has_epsilon (R_last[i][k])) | 1311 | } |
1329 | { | 1312 | else if (0 == strcmp (R_temp_ik, R_temp_kk) && |
1330 | if (needs_parentheses (R_temp_kk)) | 1313 | 0 == strcmp (R_temp_kk, R_temp_kj)) |
1331 | GNUNET_asprintf (&R_cur_r, "(%s)+%s", R_temp_kk, R_temp_kk); | 1314 | { |
1332 | else | 1315 | // (e|a)a*(e|a) = a* |
1333 | GNUNET_asprintf (&R_cur_r, "(%s)+%s", R_temp_kk, R_temp_kk); | 1316 | // (e|a)(e|a)*(e|a) = a* |
1334 | } | 1317 | if (has_epsilon (R_last[i][k]) && has_epsilon (R_last[k][j])) |
1335 | // (e|a)a*a = a+ | 1318 | { |
1336 | // aa*(e|a) = a+ | 1319 | if (needs_parentheses (R_temp_kk)) |
1337 | // a(e|a)*(e|a) = a+ | 1320 | GNUNET_asprintf (&R_cur_r, "(%s)*", R_temp_kk); |
1338 | // (e|a)a*a = a+ | ||
1339 | else | 1321 | else |
1340 | { | 1322 | GNUNET_asprintf (&R_cur_r, "%s*", R_temp_kk); |
1341 | eps_check = | ||
1342 | (has_epsilon (R_last[i][k]) + has_epsilon (R_last[k][k]) + | ||
1343 | has_epsilon (R_last[k][j])); | ||
1344 | |||
1345 | if (eps_check == 1) | ||
1346 | { | ||
1347 | if (needs_parentheses (R_temp_kk)) | ||
1348 | GNUNET_asprintf (&R_cur_r, "(%s)+", R_temp_kk); | ||
1349 | else | ||
1350 | GNUNET_asprintf (&R_cur_r, "%s+", R_temp_kk); | ||
1351 | } | ||
1352 | } | ||
1353 | } | 1323 | } |
1354 | // aa*b = a+b | 1324 | // aa*a = a+a |
1355 | // (e|a)(e|a)*b = a*b | 1325 | else if (0 == clean_ik_kk_cmp && 0 == clean_kk_kj_cmp && |
1356 | else if (0 == strcmp (R_temp_ik, R_temp_kk)) | 1326 | !has_epsilon (R_last[i][k])) |
1357 | { | 1327 | { |
1358 | if (has_epsilon (R_last[i][k])) | 1328 | if (needs_parentheses (R_temp_kk)) |
1359 | { | 1329 | GNUNET_asprintf (&R_cur_r, "(%s)+%s", R_temp_kk, R_temp_kk); |
1360 | if (needs_parentheses (R_temp_kk)) | ||
1361 | GNUNET_asprintf (&R_cur_r, "(%s)*%s", R_temp_kk, | ||
1362 | R_last[k][j]); | ||
1363 | else | ||
1364 | GNUNET_asprintf (&R_cur_r, "%s*%s", R_temp_kk, R_last[k][j]); | ||
1365 | } | ||
1366 | else | 1330 | else |
1367 | { | 1331 | GNUNET_asprintf (&R_cur_r, "(%s)+%s", R_temp_kk, R_temp_kk); |
1368 | if (needs_parentheses (R_temp_kk)) | ||
1369 | GNUNET_asprintf (&R_cur_r, "(%s)+%s", R_temp_kk, | ||
1370 | R_last[k][j]); | ||
1371 | else | ||
1372 | GNUNET_asprintf (&R_cur_r, "%s+%s", R_temp_kk, R_last[k][j]); | ||
1373 | } | ||
1374 | } | 1332 | } |
1375 | // ba*a = ba+ | 1333 | // (e|a)a*a = a+ |
1376 | // b(e|a)*(e|a) = ba* | 1334 | // aa*(e|a) = a+ |
1377 | else if (0 == strcmp (R_temp_kk, R_temp_kj)) | 1335 | // a(e|a)*(e|a) = a+ |
1336 | // (e|a)a*a = a+ | ||
1337 | else | ||
1378 | { | 1338 | { |
1379 | if (has_epsilon (R_last[k][j])) | 1339 | eps_check = |
1340 | (has_epsilon (R_last[i][k]) + has_epsilon (R_last[k][k]) + | ||
1341 | has_epsilon (R_last[k][j])); | ||
1342 | |||
1343 | if (eps_check == 1) | ||
1380 | { | 1344 | { |
1381 | if (needs_parentheses (R_temp_kk)) | 1345 | if (needs_parentheses (R_temp_kk)) |
1382 | GNUNET_asprintf (&R_cur_r, "%s(%s)*", R_last[i][k], | 1346 | GNUNET_asprintf (&R_cur_r, "(%s)+", R_temp_kk); |
1383 | R_temp_kk); | ||
1384 | else | 1347 | else |
1385 | GNUNET_asprintf (&R_cur_r, "%s%s*", R_last[i][k], R_temp_kk); | 1348 | GNUNET_asprintf (&R_cur_r, "%s+", R_temp_kk); |
1386 | } | 1349 | } |
1350 | } | ||
1351 | } | ||
1352 | // aa*b = a+b | ||
1353 | // (e|a)(e|a)*b = a*b | ||
1354 | else if (0 == strcmp (R_temp_ik, R_temp_kk)) | ||
1355 | { | ||
1356 | if (has_epsilon (R_last[i][k])) | ||
1357 | { | ||
1358 | if (needs_parentheses (R_temp_kk)) | ||
1359 | GNUNET_asprintf (&R_cur_r, "(%s)*%s", R_temp_kk, R_last[k][j]); | ||
1387 | else | 1360 | else |
1388 | { | 1361 | GNUNET_asprintf (&R_cur_r, "%s*%s", R_temp_kk, R_last[k][j]); |
1389 | if (needs_parentheses (R_temp_kk)) | ||
1390 | GNUNET_asprintf (&R_cur_r, "(%s)+%s", R_last[i][k], | ||
1391 | R_temp_kk); | ||
1392 | else | ||
1393 | GNUNET_asprintf (&R_cur_r, "%s+%s", R_last[i][k], R_temp_kk); | ||
1394 | } | ||
1395 | } | 1362 | } |
1396 | else | 1363 | else |
1397 | { | 1364 | { |
1398 | if (strlen (R_temp_kk) > 0) | 1365 | if (needs_parentheses (R_temp_kk)) |
1399 | { | 1366 | GNUNET_asprintf (&R_cur_r, "(%s)+%s", R_temp_kk, R_last[k][j]); |
1400 | if (needs_parentheses (R_temp_kk)) | ||
1401 | { | ||
1402 | GNUNET_asprintf (&R_cur_r, "%s(%s)*%s", R_last[i][k], | ||
1403 | R_temp_kk, R_last[k][j]); | ||
1404 | } | ||
1405 | else | ||
1406 | { | ||
1407 | GNUNET_asprintf (&R_cur_r, "%s%s*%s", R_last[i][k], R_temp_kk, | ||
1408 | R_last[k][j]); | ||
1409 | } | ||
1410 | } | ||
1411 | else | 1367 | else |
1412 | { | 1368 | GNUNET_asprintf (&R_cur_r, "%s+%s", R_temp_kk, R_last[k][j]); |
1413 | GNUNET_asprintf (&R_cur_r, "%s%s", R_last[i][k], R_last[k][j]); | ||
1414 | } | ||
1415 | } | 1369 | } |
1416 | } | 1370 | } |
1417 | 1371 | // ba*a = ba+ | |
1418 | /* GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "R_cur_l: %s\n", R_cur_l); */ | 1372 | // b(e|a)*(e|a) = ba* |
1419 | /* GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "R_cur_r: %s\n", R_cur_r); */ | 1373 | else if (0 == strcmp (R_temp_kk, R_temp_kj)) |
1420 | |||
1421 | // putting it all together | ||
1422 | if (NULL != R_cur_l && NULL != R_cur_r) | ||
1423 | { | 1374 | { |
1424 | // a|a = a | 1375 | if (has_epsilon (R_last[k][j])) |
1425 | if (0 == strcmp (R_cur_l, R_cur_r)) | ||
1426 | { | 1376 | { |
1427 | R_cur[i][j] = GNUNET_strdup (R_cur_l); | 1377 | if (needs_parentheses (R_temp_kk)) |
1378 | GNUNET_asprintf (&R_cur_r, "%s(%s)*", R_last[i][k], R_temp_kk); | ||
1379 | else | ||
1380 | GNUNET_asprintf (&R_cur_r, "%s%s*", R_last[i][k], R_temp_kk); | ||
1428 | } | 1381 | } |
1429 | // R_cur_l | R_cur_r | ||
1430 | else | 1382 | else |
1431 | { | 1383 | { |
1432 | GNUNET_asprintf (&R_cur[i][j], "(%s|%s)", R_cur_l, R_cur_r); | 1384 | if (needs_parentheses (R_temp_kk)) |
1385 | GNUNET_asprintf (&R_cur_r, "(%s)+%s", R_last[i][k], R_temp_kk); | ||
1386 | else | ||
1387 | GNUNET_asprintf (&R_cur_r, "%s+%s", R_last[i][k], R_temp_kk); | ||
1433 | } | 1388 | } |
1434 | } | 1389 | } |
1435 | else if (NULL != R_cur_l) | 1390 | else |
1436 | { | 1391 | { |
1437 | R_cur[i][j] = GNUNET_strdup (R_cur_l); | 1392 | if (strlen (R_temp_kk) > 0) |
1393 | { | ||
1394 | if (needs_parentheses (R_temp_kk)) | ||
1395 | { | ||
1396 | GNUNET_asprintf (&R_cur_r, "%s(%s)*%s", R_last[i][k], R_temp_kk, | ||
1397 | R_last[k][j]); | ||
1398 | } | ||
1399 | else | ||
1400 | { | ||
1401 | GNUNET_asprintf (&R_cur_r, "%s%s*%s", R_last[i][k], R_temp_kk, | ||
1402 | R_last[k][j]); | ||
1403 | } | ||
1404 | } | ||
1405 | else | ||
1406 | { | ||
1407 | GNUNET_asprintf (&R_cur_r, "%s%s", R_last[i][k], R_last[k][j]); | ||
1408 | } | ||
1438 | } | 1409 | } |
1439 | else if (NULL != R_cur_r) | 1410 | } |
1411 | |||
1412 | /* GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "R_cur_l: %s\n", R_cur_l); */ | ||
1413 | /* GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "R_cur_r: %s\n", R_cur_r); */ | ||
1414 | |||
1415 | // putting it all together | ||
1416 | if (NULL != R_cur_l && NULL != R_cur_r) | ||
1417 | { | ||
1418 | // a|a = a | ||
1419 | if (0 == strcmp (R_cur_l, R_cur_r)) | ||
1440 | { | 1420 | { |
1441 | R_cur[i][j] = GNUNET_strdup (R_cur_r); | 1421 | R_cur[i][j] = GNUNET_strdup (R_cur_l); |
1442 | } | 1422 | } |
1423 | // R_cur_l | R_cur_r | ||
1443 | else | 1424 | else |
1444 | { | 1425 | { |
1445 | R_cur[i][j] = NULL; | 1426 | GNUNET_asprintf (&R_cur[i][j], "(%s|%s)", R_cur_l, R_cur_r); |
1446 | } | 1427 | } |
1428 | } | ||
1429 | else if (NULL != R_cur_l) | ||
1430 | { | ||
1431 | R_cur[i][j] = GNUNET_strdup (R_cur_l); | ||
1432 | } | ||
1433 | else if (NULL != R_cur_r) | ||
1434 | { | ||
1435 | R_cur[i][j] = GNUNET_strdup (R_cur_r); | ||
1436 | } | ||
1437 | else | ||
1438 | { | ||
1439 | R_cur[i][j] = NULL; | ||
1440 | } | ||
1447 | 1441 | ||
1448 | GNUNET_free_non_null (R_cur_l); | 1442 | GNUNET_free_non_null (R_cur_l); |
1449 | GNUNET_free_non_null (R_cur_r); | 1443 | GNUNET_free_non_null (R_cur_r); |
1450 | 1444 | ||
1451 | GNUNET_free_non_null (R_temp_ik); | 1445 | GNUNET_free_non_null (R_temp_ik); |
1452 | GNUNET_free_non_null (R_temp_kk); | 1446 | GNUNET_free_non_null (R_temp_kk); |
1453 | GNUNET_free_non_null (R_temp_kj); | 1447 | GNUNET_free_non_null (R_temp_kj); |
1454 | } | ||
1455 | } | 1448 | } |
1456 | } | 1449 | } |
1457 | 1450 | ||
@@ -1533,7 +1526,7 @@ dfa_state_create (struct GNUNET_REGEX_Context *ctx, | |||
1533 | struct Transition *ctran; | 1526 | struct Transition *ctran; |
1534 | int insert = 1; | 1527 | int insert = 1; |
1535 | struct Transition *t; | 1528 | struct Transition *t; |
1536 | int i; | 1529 | unsigned int i; |
1537 | 1530 | ||
1538 | s = GNUNET_malloc (sizeof (struct GNUNET_REGEX_State)); | 1531 | s = GNUNET_malloc (sizeof (struct GNUNET_REGEX_State)); |
1539 | s->id = ctx->state_id++; | 1532 | s->id = ctx->state_id++; |
@@ -1715,7 +1708,7 @@ static void | |||
1715 | dfa_merge_nondistinguishable_states (struct GNUNET_REGEX_Context *ctx, | 1708 | dfa_merge_nondistinguishable_states (struct GNUNET_REGEX_Context *ctx, |
1716 | struct GNUNET_REGEX_Automaton *a) | 1709 | struct GNUNET_REGEX_Automaton *a) |
1717 | { | 1710 | { |
1718 | int i; | 1711 | unsigned int i; |
1719 | int table[a->state_count][a->state_count]; | 1712 | int table[a->state_count][a->state_count]; |
1720 | struct GNUNET_REGEX_State *s1; | 1713 | struct GNUNET_REGEX_State *s1; |
1721 | struct GNUNET_REGEX_State *s2; | 1714 | struct GNUNET_REGEX_State *s2; |
@@ -1724,7 +1717,7 @@ dfa_merge_nondistinguishable_states (struct GNUNET_REGEX_Context *ctx, | |||
1724 | struct GNUNET_REGEX_State *s1_next; | 1717 | struct GNUNET_REGEX_State *s1_next; |
1725 | struct GNUNET_REGEX_State *s2_next; | 1718 | struct GNUNET_REGEX_State *s2_next; |
1726 | int change; | 1719 | int change; |
1727 | int num_equal_edges; | 1720 | unsigned int num_equal_edges; |
1728 | 1721 | ||
1729 | for (i = 0, s1 = a->states_head; i < a->state_count && NULL != s1; | 1722 | for (i = 0, s1 = a->states_head; i < a->state_count && NULL != s1; |
1730 | i++, s1 = s1->next) | 1723 | i++, s1 = s1->next) |
@@ -2005,10 +1998,10 @@ nfa_closure_set_create (struct GNUNET_REGEX_Automaton *nfa, | |||
2005 | struct GNUNET_REGEX_State *s; | 1998 | struct GNUNET_REGEX_State *s; |
2006 | struct GNUNET_REGEX_StateSet *sset; | 1999 | struct GNUNET_REGEX_StateSet *sset; |
2007 | struct GNUNET_REGEX_StateSet *cls; | 2000 | struct GNUNET_REGEX_StateSet *cls; |
2008 | int i; | 2001 | unsigned int i; |
2009 | int j; | 2002 | unsigned int j; |
2010 | int k; | 2003 | unsigned int k; |
2011 | int contains; | 2004 | unsigned int contains; |
2012 | 2005 | ||
2013 | if (NULL == states) | 2006 | if (NULL == states) |
2014 | return NULL; | 2007 | return NULL; |
@@ -2732,7 +2725,7 @@ evaluate_nfa (struct GNUNET_REGEX_Automaton *a, const char *string) | |||
2732 | struct GNUNET_REGEX_State *s; | 2725 | struct GNUNET_REGEX_State *s; |
2733 | struct GNUNET_REGEX_StateSet *sset; | 2726 | struct GNUNET_REGEX_StateSet *sset; |
2734 | struct GNUNET_REGEX_StateSet *new_sset; | 2727 | struct GNUNET_REGEX_StateSet *new_sset; |
2735 | int i; | 2728 | unsigned int i; |
2736 | int result; | 2729 | int result; |
2737 | 2730 | ||
2738 | if (NFA != a->type) | 2731 | if (NFA != a->type) |