aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/regex/regex.c100
-rw-r--r--src/regex/test_regex.c15
2 files changed, 75 insertions, 40 deletions
diff --git a/src/regex/regex.c b/src/regex/regex.c
index fc9de529c..080e5d31e 100644
--- a/src/regex/regex.c
+++ b/src/regex/regex.c
@@ -298,6 +298,7 @@ automaton_fragment_clear (struct GNUNET_REGEX_Automaton *a)
298 a->end = NULL; 298 a->end = NULL;
299 a->states_head = NULL; 299 a->states_head = NULL;
300 a->states_tail = NULL; 300 a->states_tail = NULL;
301 a->state_count = 0;
301 GNUNET_free (a); 302 GNUNET_free (a);
302} 303}
303 304
@@ -328,6 +329,23 @@ automaton_destroy_state (struct State *s)
328 GNUNET_free (s); 329 GNUNET_free (s);
329} 330}
330 331
332static void
333automaton_remove_state (struct GNUNET_REGEX_Automaton *a, struct State *s)
334{
335 struct State *ss;
336 ss = s;
337 GNUNET_CONTAINER_DLL_remove (a->states_head, a->states_tail, s);
338 a->state_count--;
339 automaton_destroy_state (ss);
340}
341
342static void
343automaton_add_state (struct GNUNET_REGEX_Automaton *a, struct State *s)
344{
345 GNUNET_CONTAINER_DLL_insert (a->states_head, a->states_tail, s);
346 a->state_count++;
347}
348
331/** 349/**
332 * Creates a new DFA state based on a set of NFA states. Needs to be freed 350 * Creates a new DFA state based on a set of NFA states. Needs to be freed
333 * using automaton_destroy_state. 351 * using automaton_destroy_state.
@@ -457,34 +475,44 @@ dfa_move (struct State *s, const char literal)
457static void 475static void
458dfa_remove_unreachable_states (struct GNUNET_REGEX_Automaton *a) 476dfa_remove_unreachable_states (struct GNUNET_REGEX_Automaton *a)
459{ 477{
460 /* 478 struct State *stack[a->state_count];
461 * struct StateSet *unreachable; 479 int stack_len;
462 * struct State *stack[a->size]; 480 struct State *s;
463 * struct State *s; 481 struct Transition *t;
464 * 482
465 * unreachable = GNUNET_malloc (sizeof (struct StateSet)); 483 stack_len = 0;
466 * 484
467 * // 1. add all states to unreachable set 485 // 1. unmark all states
468 * for (s = a->states_head; NULL != s; s = s->next) 486 for (s = a->states_head; NULL != s; s = s->next)
469 * { 487 {
470 * GNUNET_array_append (unreachable->states, unreachable->len; s); 488 s->marked = 0;
471 * } 489 }
472 * 490
473 * // 2. traverse dfa from start state and remove every visited state from unreachable set 491 // 2. traverse dfa from start state and mark all visited states
474 * s = a->start; 492 stack[stack_len] = a->start;
475 * // push a->start 493 stack_len++;
476 * while (stack->len > 0) 494 while (stack_len > 0)
477 * { 495 {
478 * s = stack->states[stack->len - 1]; 496 s = stack[stack_len-1];
479 * GNUNET_array_grow (stack->states; stack->len; stack->len-1); 497 stack_len--;
480 * GNUNET_array_ 498 s->marked = 1; // mark s as visited
481 * for (t = s->transitions_head; NULL != t; t = t->next) 499 for (t = s->transitions_head; NULL != t; t = t->next)
482 * { 500 {
483 * 501 if (NULL != t->state && 0 == t->state->marked)
484 * } 502 {
485 * } 503 // add next states to stack
486 * // 3. delete all states that are still in the unreachable set 504 stack[stack_len] = t->state;
487 */ 505 stack_len++;
506 }
507 }
508 }
509
510 // 3. delete all states that were not visited
511 for (s = a->states_head; NULL != s; s = s->next)
512 {
513 if (0 == s->marked)
514 automaton_remove_state (a, s);
515 }
488} 516}
489 517
490/** 518/**
@@ -537,7 +565,7 @@ dfa_remove_dead_states (struct GNUNET_REGEX_Automaton *a)
537 } 565 }
538 } 566 }
539 // 2. remove state 567 // 2. remove state
540 GNUNET_CONTAINER_DLL_remove (a->states_head, a->states_tail, s); 568 automaton_remove_state (a, s);
541 } 569 }
542} 570}
543 571
@@ -595,8 +623,8 @@ nfa_fragment_create (struct State *start, struct State *end)
595 if (NULL == start && NULL == end) 623 if (NULL == start && NULL == end)
596 return n; 624 return n;
597 625
598 GNUNET_CONTAINER_DLL_insert (n->states_head, n->states_tail, end); 626 automaton_add_state (n, end);
599 GNUNET_CONTAINER_DLL_insert (n->states_head, n->states_tail, start); 627 automaton_add_state (n, start);
600 628
601 n->start = start; 629 n->start = start;
602 n->end = end; 630 n->end = end;
@@ -615,6 +643,8 @@ static void
615nfa_add_states (struct GNUNET_REGEX_Automaton *n, struct State *states_head, 643nfa_add_states (struct GNUNET_REGEX_Automaton *n, struct State *states_head,
616 struct State *states_tail) 644 struct State *states_tail)
617{ 645{
646 struct State *s;
647
618 if (NULL == n || NULL == states_head) 648 if (NULL == n || NULL == states_head)
619 { 649 {
620 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not add states\n"); 650 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not add states\n");
@@ -633,6 +663,9 @@ nfa_add_states (struct GNUNET_REGEX_Automaton *n, struct State *states_head,
633 n->states_tail->next = states_head; 663 n->states_tail->next = states_head;
634 n->states_tail = states_tail; 664 n->states_tail = states_tail;
635 } 665 }
666
667 for (s = states_head; NULL != s; s = s->next)
668 n->state_count++;
636} 669}
637 670
638/** 671/**
@@ -1137,7 +1170,7 @@ GNUNET_REGEX_construct_dfa (const char *regex, const size_t len)
1137 dfa_stack = GNUNET_malloc (sizeof (struct StateSet)); 1170 dfa_stack = GNUNET_malloc (sizeof (struct StateSet));
1138 nfa_set = nfa_closure_create (nfa->start, 0); 1171 nfa_set = nfa_closure_create (nfa->start, 0);
1139 dfa->start = dfa_state_create (&ctx, nfa_set); 1172 dfa->start = dfa_state_create (&ctx, nfa_set);
1140 GNUNET_CONTAINER_DLL_insert (dfa->states_head, dfa->states_tail, dfa->start); 1173 automaton_add_state (dfa, dfa->start);
1141 GNUNET_array_append (dfa_stack->states, dfa_stack->len, dfa->start); 1174 GNUNET_array_append (dfa_stack->states, dfa_stack->len, dfa->start);
1142 while (dfa_stack->len > 0) 1175 while (dfa_stack->len > 0)
1143 { 1176 {
@@ -1164,8 +1197,7 @@ GNUNET_REGEX_construct_dfa (const char *regex, const size_t len)
1164 1197
1165 if (NULL == state_contains) 1198 if (NULL == state_contains)
1166 { 1199 {
1167 GNUNET_CONTAINER_DLL_insert_tail (dfa->states_head, dfa->states_tail, 1200 automaton_add_state (dfa, new_dfa_state);
1168 new_dfa_state);
1169 GNUNET_array_append (dfa_stack->states, dfa_stack->len, 1201 GNUNET_array_append (dfa_stack->states, dfa_stack->len,
1170 new_dfa_state); 1202 new_dfa_state);
1171 ctran->state = new_dfa_state; 1203 ctran->state = new_dfa_state;
diff --git a/src/regex/test_regex.c b/src/regex/test_regex.c
index d4e4e1ec5..4c08c7c53 100644
--- a/src/regex/test_regex.c
+++ b/src/regex/test_regex.c
@@ -156,6 +156,7 @@ test_random (unsigned int rx_length, unsigned int max_str_len, unsigned int str_
156 } 156 }
157 157
158 eval_check = regexec (&rx, matching_str[i], 1, matchptr, 0); 158 eval_check = regexec (&rx, matching_str[i], 1, matchptr, 0);
159 regfree (&rx);
159 160
160 // We only want to match the whole string, because that's what our DFA does, too. 161 // We only want to match the whole string, because that's what our DFA does, too.
161 if (eval_check == 0 && (matchptr[0].rm_so != 0 || matchptr[0].rm_eo != strlen (matching_str[i]))) 162 if (eval_check == 0 && (matchptr[0].rm_so != 0 || matchptr[0].rm_eo != strlen (matching_str[i])))
@@ -210,8 +211,10 @@ test_automaton (struct GNUNET_REGEX_Automaton *a, regex_t *rx, struct Regex_Stri
210 result = 1; 211 result = 1;
211 regerror (eval_check, rx, error, sizeof error); 212 regerror (eval_check, rx, error, sizeof error);
212 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, 213 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
213 "Unexpected result:\nregex: %s\nstring: %s\nexpected result: %i\ngnunet regex: %i\nglibc regex: %i\nglibc error: %s\nrm_so: %i\nrm_eo: %i\n\n", 214 "Unexpected result:\nregex: %s\nstring: %s\nexpected result: %i\n"
214 rxstr->regex, rxstr->strings[i], rxstr->expected_results[i], eval, eval_check, error, matchptr[0].rm_so, matchptr[0].rm_eo); 215 "gnunet regex: %i\nglibc regex: %i\nglibc error: %s\nrm_so: %i\nrm_eo: %i\n\n",
216 rxstr->regex, rxstr->strings[i], rxstr->expected_results[i],
217 eval, eval_check, error, matchptr[0].rm_so, matchptr[0].rm_eo);
215 } 218 }
216 } 219 }
217 return result; 220 return result;
@@ -228,6 +231,9 @@ main (int argc, char *argv[])
228#endif 231#endif
229 NULL); 232 NULL);
230 233
234 struct GNUNET_REGEX_Automaton *a;
235 regex_t rx;
236 int i;
231 int check_nfa; 237 int check_nfa;
232 int check_dfa; 238 int check_dfa;
233 int check_rand; 239 int check_rand;
@@ -238,9 +244,6 @@ main (int argc, char *argv[])
238 {"ab+c*(a(bx|c)d)+", 5, 244 {"ab+c*(a(bx|c)d)+", 5,
239 {"abcdcdcdcdddddabd", "abcd", "abcddddddccccccccccccccccccccccccabdacdabd", "abccccca", "abcdcdcdccdabdabd"}, 245 {"abcdcdcdcdddddabd", "abcd", "abcddddddccccccccccccccccccccccccabdacdabd", "abccccca", "abcdcdcdccdabdabd"},
240 {nomatch, nomatch, nomatch, nomatch, nomatch}}}; 246 {nomatch, nomatch, nomatch, nomatch, nomatch}}};
241 struct GNUNET_REGEX_Automaton *a;
242 regex_t rx;
243 int i;
244 247
245 check_nfa = 0; 248 check_nfa = 0;
246 check_dfa = 0; 249 check_dfa = 0;
@@ -269,7 +272,7 @@ main (int argc, char *argv[])
269 272
270 srand (time(NULL)); 273 srand (time(NULL));
271 for (i=0; i< 100; i++) 274 for (i=0; i< 100; i++)
272 check_rand += test_random (100, 100, 10); 275 check_rand += test_random (100, 150, 10);
273 276
274 return check_nfa + check_dfa + check_rand; 277 return check_nfa + check_dfa + check_rand;
275} 278}