diff options
-rw-r--r-- | src/regex/regex.c | 100 | ||||
-rw-r--r-- | src/regex/test_regex.c | 15 |
2 files changed, 75 insertions, 40 deletions
diff --git a/src/regex/regex.c b/src/regex/regex.c index fc9de529c..080e5d31e 100644 --- a/src/regex/regex.c +++ b/src/regex/regex.c | |||
@@ -298,6 +298,7 @@ automaton_fragment_clear (struct GNUNET_REGEX_Automaton *a) | |||
298 | a->end = NULL; | 298 | a->end = NULL; |
299 | a->states_head = NULL; | 299 | a->states_head = NULL; |
300 | a->states_tail = NULL; | 300 | a->states_tail = NULL; |
301 | a->state_count = 0; | ||
301 | GNUNET_free (a); | 302 | GNUNET_free (a); |
302 | } | 303 | } |
303 | 304 | ||
@@ -328,6 +329,23 @@ automaton_destroy_state (struct State *s) | |||
328 | GNUNET_free (s); | 329 | GNUNET_free (s); |
329 | } | 330 | } |
330 | 331 | ||
332 | static void | ||
333 | automaton_remove_state (struct GNUNET_REGEX_Automaton *a, struct State *s) | ||
334 | { | ||
335 | struct State *ss; | ||
336 | ss = s; | ||
337 | GNUNET_CONTAINER_DLL_remove (a->states_head, a->states_tail, s); | ||
338 | a->state_count--; | ||
339 | automaton_destroy_state (ss); | ||
340 | } | ||
341 | |||
342 | static void | ||
343 | automaton_add_state (struct GNUNET_REGEX_Automaton *a, struct State *s) | ||
344 | { | ||
345 | GNUNET_CONTAINER_DLL_insert (a->states_head, a->states_tail, s); | ||
346 | a->state_count++; | ||
347 | } | ||
348 | |||
331 | /** | 349 | /** |
332 | * Creates a new DFA state based on a set of NFA states. Needs to be freed | 350 | * Creates a new DFA state based on a set of NFA states. Needs to be freed |
333 | * using automaton_destroy_state. | 351 | * using automaton_destroy_state. |
@@ -457,34 +475,44 @@ dfa_move (struct State *s, const char literal) | |||
457 | static void | 475 | static void |
458 | dfa_remove_unreachable_states (struct GNUNET_REGEX_Automaton *a) | 476 | dfa_remove_unreachable_states (struct GNUNET_REGEX_Automaton *a) |
459 | { | 477 | { |
460 | /* | 478 | struct State *stack[a->state_count]; |
461 | * struct StateSet *unreachable; | 479 | int stack_len; |
462 | * struct State *stack[a->size]; | 480 | struct State *s; |
463 | * struct State *s; | 481 | struct Transition *t; |
464 | * | 482 | |
465 | * unreachable = GNUNET_malloc (sizeof (struct StateSet)); | 483 | stack_len = 0; |
466 | * | 484 | |
467 | * // 1. add all states to unreachable set | 485 | // 1. unmark all states |
468 | * for (s = a->states_head; NULL != s; s = s->next) | 486 | for (s = a->states_head; NULL != s; s = s->next) |
469 | * { | 487 | { |
470 | * GNUNET_array_append (unreachable->states, unreachable->len; s); | 488 | s->marked = 0; |
471 | * } | 489 | } |
472 | * | 490 | |
473 | * // 2. traverse dfa from start state and remove every visited state from unreachable set | 491 | // 2. traverse dfa from start state and mark all visited states |
474 | * s = a->start; | 492 | stack[stack_len] = a->start; |
475 | * // push a->start | 493 | stack_len++; |
476 | * while (stack->len > 0) | 494 | while (stack_len > 0) |
477 | * { | 495 | { |
478 | * s = stack->states[stack->len - 1]; | 496 | s = stack[stack_len-1]; |
479 | * GNUNET_array_grow (stack->states; stack->len; stack->len-1); | 497 | stack_len--; |
480 | * GNUNET_array_ | 498 | s->marked = 1; // mark s as visited |
481 | * for (t = s->transitions_head; NULL != t; t = t->next) | 499 | for (t = s->transitions_head; NULL != t; t = t->next) |
482 | * { | 500 | { |
483 | * | 501 | if (NULL != t->state && 0 == t->state->marked) |
484 | * } | 502 | { |
485 | * } | 503 | // add next states to stack |
486 | * // 3. delete all states that are still in the unreachable set | 504 | stack[stack_len] = t->state; |
487 | */ | 505 | stack_len++; |
506 | } | ||
507 | } | ||
508 | } | ||
509 | |||
510 | // 3. delete all states that were not visited | ||
511 | for (s = a->states_head; NULL != s; s = s->next) | ||
512 | { | ||
513 | if (0 == s->marked) | ||
514 | automaton_remove_state (a, s); | ||
515 | } | ||
488 | } | 516 | } |
489 | 517 | ||
490 | /** | 518 | /** |
@@ -537,7 +565,7 @@ dfa_remove_dead_states (struct GNUNET_REGEX_Automaton *a) | |||
537 | } | 565 | } |
538 | } | 566 | } |
539 | // 2. remove state | 567 | // 2. remove state |
540 | GNUNET_CONTAINER_DLL_remove (a->states_head, a->states_tail, s); | 568 | automaton_remove_state (a, s); |
541 | } | 569 | } |
542 | } | 570 | } |
543 | 571 | ||
@@ -595,8 +623,8 @@ nfa_fragment_create (struct State *start, struct State *end) | |||
595 | if (NULL == start && NULL == end) | 623 | if (NULL == start && NULL == end) |
596 | return n; | 624 | return n; |
597 | 625 | ||
598 | GNUNET_CONTAINER_DLL_insert (n->states_head, n->states_tail, end); | 626 | automaton_add_state (n, end); |
599 | GNUNET_CONTAINER_DLL_insert (n->states_head, n->states_tail, start); | 627 | automaton_add_state (n, start); |
600 | 628 | ||
601 | n->start = start; | 629 | n->start = start; |
602 | n->end = end; | 630 | n->end = end; |
@@ -615,6 +643,8 @@ static void | |||
615 | nfa_add_states (struct GNUNET_REGEX_Automaton *n, struct State *states_head, | 643 | nfa_add_states (struct GNUNET_REGEX_Automaton *n, struct State *states_head, |
616 | struct State *states_tail) | 644 | struct State *states_tail) |
617 | { | 645 | { |
646 | struct State *s; | ||
647 | |||
618 | if (NULL == n || NULL == states_head) | 648 | if (NULL == n || NULL == states_head) |
619 | { | 649 | { |
620 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not add states\n"); | 650 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not add states\n"); |
@@ -633,6 +663,9 @@ nfa_add_states (struct GNUNET_REGEX_Automaton *n, struct State *states_head, | |||
633 | n->states_tail->next = states_head; | 663 | n->states_tail->next = states_head; |
634 | n->states_tail = states_tail; | 664 | n->states_tail = states_tail; |
635 | } | 665 | } |
666 | |||
667 | for (s = states_head; NULL != s; s = s->next) | ||
668 | n->state_count++; | ||
636 | } | 669 | } |
637 | 670 | ||
638 | /** | 671 | /** |
@@ -1137,7 +1170,7 @@ GNUNET_REGEX_construct_dfa (const char *regex, const size_t len) | |||
1137 | dfa_stack = GNUNET_malloc (sizeof (struct StateSet)); | 1170 | dfa_stack = GNUNET_malloc (sizeof (struct StateSet)); |
1138 | nfa_set = nfa_closure_create (nfa->start, 0); | 1171 | nfa_set = nfa_closure_create (nfa->start, 0); |
1139 | dfa->start = dfa_state_create (&ctx, nfa_set); | 1172 | dfa->start = dfa_state_create (&ctx, nfa_set); |
1140 | GNUNET_CONTAINER_DLL_insert (dfa->states_head, dfa->states_tail, dfa->start); | 1173 | automaton_add_state (dfa, dfa->start); |
1141 | GNUNET_array_append (dfa_stack->states, dfa_stack->len, dfa->start); | 1174 | GNUNET_array_append (dfa_stack->states, dfa_stack->len, dfa->start); |
1142 | while (dfa_stack->len > 0) | 1175 | while (dfa_stack->len > 0) |
1143 | { | 1176 | { |
@@ -1164,8 +1197,7 @@ GNUNET_REGEX_construct_dfa (const char *regex, const size_t len) | |||
1164 | 1197 | ||
1165 | if (NULL == state_contains) | 1198 | if (NULL == state_contains) |
1166 | { | 1199 | { |
1167 | GNUNET_CONTAINER_DLL_insert_tail (dfa->states_head, dfa->states_tail, | 1200 | automaton_add_state (dfa, new_dfa_state); |
1168 | new_dfa_state); | ||
1169 | GNUNET_array_append (dfa_stack->states, dfa_stack->len, | 1201 | GNUNET_array_append (dfa_stack->states, dfa_stack->len, |
1170 | new_dfa_state); | 1202 | new_dfa_state); |
1171 | ctran->state = new_dfa_state; | 1203 | ctran->state = new_dfa_state; |
diff --git a/src/regex/test_regex.c b/src/regex/test_regex.c index d4e4e1ec5..4c08c7c53 100644 --- a/src/regex/test_regex.c +++ b/src/regex/test_regex.c | |||
@@ -156,6 +156,7 @@ test_random (unsigned int rx_length, unsigned int max_str_len, unsigned int str_ | |||
156 | } | 156 | } |
157 | 157 | ||
158 | eval_check = regexec (&rx, matching_str[i], 1, matchptr, 0); | 158 | eval_check = regexec (&rx, matching_str[i], 1, matchptr, 0); |
159 | regfree (&rx); | ||
159 | 160 | ||
160 | // We only want to match the whole string, because that's what our DFA does, too. | 161 | // We only want to match the whole string, because that's what our DFA does, too. |
161 | if (eval_check == 0 && (matchptr[0].rm_so != 0 || matchptr[0].rm_eo != strlen (matching_str[i]))) | 162 | if (eval_check == 0 && (matchptr[0].rm_so != 0 || matchptr[0].rm_eo != strlen (matching_str[i]))) |
@@ -210,8 +211,10 @@ test_automaton (struct GNUNET_REGEX_Automaton *a, regex_t *rx, struct Regex_Stri | |||
210 | result = 1; | 211 | result = 1; |
211 | regerror (eval_check, rx, error, sizeof error); | 212 | regerror (eval_check, rx, error, sizeof error); |
212 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | 213 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, |
213 | "Unexpected result:\nregex: %s\nstring: %s\nexpected result: %i\ngnunet regex: %i\nglibc regex: %i\nglibc error: %s\nrm_so: %i\nrm_eo: %i\n\n", | 214 | "Unexpected result:\nregex: %s\nstring: %s\nexpected result: %i\n" |
214 | rxstr->regex, rxstr->strings[i], rxstr->expected_results[i], eval, eval_check, error, matchptr[0].rm_so, matchptr[0].rm_eo); | 215 | "gnunet regex: %i\nglibc regex: %i\nglibc error: %s\nrm_so: %i\nrm_eo: %i\n\n", |
216 | rxstr->regex, rxstr->strings[i], rxstr->expected_results[i], | ||
217 | eval, eval_check, error, matchptr[0].rm_so, matchptr[0].rm_eo); | ||
215 | } | 218 | } |
216 | } | 219 | } |
217 | return result; | 220 | return result; |
@@ -228,6 +231,9 @@ main (int argc, char *argv[]) | |||
228 | #endif | 231 | #endif |
229 | NULL); | 232 | NULL); |
230 | 233 | ||
234 | struct GNUNET_REGEX_Automaton *a; | ||
235 | regex_t rx; | ||
236 | int i; | ||
231 | int check_nfa; | 237 | int check_nfa; |
232 | int check_dfa; | 238 | int check_dfa; |
233 | int check_rand; | 239 | int check_rand; |
@@ -238,9 +244,6 @@ main (int argc, char *argv[]) | |||
238 | {"ab+c*(a(bx|c)d)+", 5, | 244 | {"ab+c*(a(bx|c)d)+", 5, |
239 | {"abcdcdcdcdddddabd", "abcd", "abcddddddccccccccccccccccccccccccabdacdabd", "abccccca", "abcdcdcdccdabdabd"}, | 245 | {"abcdcdcdcdddddabd", "abcd", "abcddddddccccccccccccccccccccccccabdacdabd", "abccccca", "abcdcdcdccdabdabd"}, |
240 | {nomatch, nomatch, nomatch, nomatch, nomatch}}}; | 246 | {nomatch, nomatch, nomatch, nomatch, nomatch}}}; |
241 | struct GNUNET_REGEX_Automaton *a; | ||
242 | regex_t rx; | ||
243 | int i; | ||
244 | 247 | ||
245 | check_nfa = 0; | 248 | check_nfa = 0; |
246 | check_dfa = 0; | 249 | check_dfa = 0; |
@@ -269,7 +272,7 @@ main (int argc, char *argv[]) | |||
269 | 272 | ||
270 | srand (time(NULL)); | 273 | srand (time(NULL)); |
271 | for (i=0; i< 100; i++) | 274 | for (i=0; i< 100; i++) |
272 | check_rand += test_random (100, 100, 10); | 275 | check_rand += test_random (100, 150, 10); |
273 | 276 | ||
274 | return check_nfa + check_dfa + check_rand; | 277 | return check_nfa + check_dfa + check_rand; |
275 | } | 278 | } |