diff options
author | Maximilian Szengel <gnunet@maxsz.de> | 2012-04-02 09:39:00 +0000 |
---|---|---|
committer | Maximilian Szengel <gnunet@maxsz.de> | 2012-04-02 09:39:00 +0000 |
commit | 9333777c8beb3d3c56f2f37d1b0e61fecd08f80c (patch) | |
tree | b1937750480652b426ab8fb7bc67bbae69013fd0 /src/regex/regex.c | |
parent | c9b161c04b8e6349bc2e2856c2d2d02aa03bb1f0 (diff) | |
download | gnunet-9333777c8beb3d3c56f2f37d1b0e61fecd08f80c.tar.gz gnunet-9333777c8beb3d3c56f2f37d1b0e61fecd08f80c.zip |
DFA evaluation
Diffstat (limited to 'src/regex/regex.c')
-rw-r--r-- | src/regex/regex.c | 202 |
1 files changed, 150 insertions, 52 deletions
diff --git a/src/regex/regex.c b/src/regex/regex.c index 1b7bd8565..596acb323 100644 --- a/src/regex/regex.c +++ b/src/regex/regex.c | |||
@@ -43,6 +43,12 @@ struct GNUNET_REGEX_Context | |||
43 | struct GNUNET_REGEX_Automaton *stack_tail; | 43 | struct GNUNET_REGEX_Automaton *stack_tail; |
44 | }; | 44 | }; |
45 | 45 | ||
46 | enum GNUNET_REGEX_automaton_type | ||
47 | { | ||
48 | NFA, | ||
49 | DFA | ||
50 | }; | ||
51 | |||
46 | /** | 52 | /** |
47 | * Automaton representation | 53 | * Automaton representation |
48 | */ | 54 | */ |
@@ -56,6 +62,8 @@ struct GNUNET_REGEX_Automaton | |||
56 | 62 | ||
57 | struct State *states_head; | 63 | struct State *states_head; |
58 | struct State *states_tail; | 64 | struct State *states_tail; |
65 | |||
66 | enum GNUNET_REGEX_automaton_type type; | ||
59 | }; | 67 | }; |
60 | 68 | ||
61 | /** | 69 | /** |
@@ -268,6 +276,54 @@ add_transition (struct GNUNET_REGEX_Context *ctx, struct State *from_state, | |||
268 | } | 276 | } |
269 | 277 | ||
270 | /** | 278 | /** |
279 | * Clears an automaton fragment. Does not destroy the states inside | ||
280 | * the automaton. | ||
281 | * | ||
282 | * @param a automaton to be cleared | ||
283 | */ | ||
284 | void | ||
285 | automaton_fragment_clear (struct GNUNET_REGEX_Automaton *a) | ||
286 | { | ||
287 | a->start = NULL; | ||
288 | a->end = NULL; | ||
289 | a->states_head = NULL; | ||
290 | a->states_tail = NULL; | ||
291 | GNUNET_free (a); | ||
292 | } | ||
293 | |||
294 | /** | ||
295 | * Frees the memory used by State 's' | ||
296 | * | ||
297 | * @param s state that should be destroyed | ||
298 | */ | ||
299 | void | ||
300 | automaton_destroy_state (struct State *s) | ||
301 | { | ||
302 | struct Transition *t; | ||
303 | struct Transition *next_t; | ||
304 | |||
305 | if (NULL != s->name) | ||
306 | GNUNET_free (s->name); | ||
307 | |||
308 | for (t = s->transitions_head; NULL != t;) | ||
309 | { | ||
310 | next_t = t->next; | ||
311 | GNUNET_CONTAINER_DLL_remove (s->transitions_head, s->transitions_tail, t); | ||
312 | GNUNET_free (t); | ||
313 | t = next_t; | ||
314 | } | ||
315 | |||
316 | if (NULL != s->nfa_set) | ||
317 | { | ||
318 | if (s->nfa_set->len > 0) | ||
319 | GNUNET_free (s->nfa_set->states); | ||
320 | GNUNET_free (s->nfa_set); | ||
321 | } | ||
322 | |||
323 | GNUNET_free (s); | ||
324 | } | ||
325 | |||
326 | /** | ||
271 | * Creates a new DFA state based on a set of NFA states. Needs to be freed | 327 | * Creates a new DFA state based on a set of NFA states. Needs to be freed |
272 | * using automaton_destroy_state. | 328 | * using automaton_destroy_state. |
273 | * | 329 | * |
@@ -352,6 +408,29 @@ dfa_state_create (struct GNUNET_REGEX_Context *ctx, struct StateSet *nfa_states) | |||
352 | return s; | 408 | return s; |
353 | } | 409 | } |
354 | 410 | ||
411 | struct State * | ||
412 | dfa_move (struct State *s, const char literal) | ||
413 | { | ||
414 | struct Transition *t; | ||
415 | struct State *new_s; | ||
416 | |||
417 | if (NULL == s) | ||
418 | return NULL; | ||
419 | |||
420 | new_s = NULL; | ||
421 | |||
422 | for (t = s->transitions_head; NULL != t; t = t->next) | ||
423 | { | ||
424 | if (literal == t->literal) | ||
425 | { | ||
426 | new_s = t->state; | ||
427 | break; | ||
428 | } | ||
429 | } | ||
430 | |||
431 | return new_s; | ||
432 | } | ||
433 | |||
355 | /** | 434 | /** |
356 | * Creates a new NFA fragment. Needs to be cleared using automaton_fragment_clear. | 435 | * Creates a new NFA fragment. Needs to be cleared using automaton_fragment_clear. |
357 | * | 436 | * |
@@ -367,6 +446,7 @@ nfa_fragment_create (struct State *start, struct State *end) | |||
367 | 446 | ||
368 | n = GNUNET_malloc (sizeof (struct GNUNET_REGEX_Automaton)); | 447 | n = GNUNET_malloc (sizeof (struct GNUNET_REGEX_Automaton)); |
369 | 448 | ||
449 | n->type = NFA; | ||
370 | n->start = NULL; | 450 | n->start = NULL; |
371 | n->end = NULL; | 451 | n->end = NULL; |
372 | 452 | ||
@@ -437,54 +517,6 @@ nfa_state_create (struct GNUNET_REGEX_Context *ctx, int accepting) | |||
437 | } | 517 | } |
438 | 518 | ||
439 | /** | 519 | /** |
440 | * Clears an automaton fragment. Does not destroy the states inside | ||
441 | * the automaton. | ||
442 | * | ||
443 | * @param a automaton to be cleared | ||
444 | */ | ||
445 | void | ||
446 | automaton_fragment_clear (struct GNUNET_REGEX_Automaton *a) | ||
447 | { | ||
448 | a->start = NULL; | ||
449 | a->end = NULL; | ||
450 | a->states_head = NULL; | ||
451 | a->states_tail = NULL; | ||
452 | GNUNET_free (a); | ||
453 | } | ||
454 | |||
455 | /** | ||
456 | * Frees the memory used by State 's' | ||
457 | * | ||
458 | * @param s state that should be destroyed | ||
459 | */ | ||
460 | void | ||
461 | automaton_destroy_state (struct State *s) | ||
462 | { | ||
463 | struct Transition *t; | ||
464 | struct Transition *next_t; | ||
465 | |||
466 | if (NULL != s->name) | ||
467 | GNUNET_free (s->name); | ||
468 | |||
469 | for (t = s->transitions_head; NULL != t;) | ||
470 | { | ||
471 | next_t = t->next; | ||
472 | GNUNET_CONTAINER_DLL_remove (s->transitions_head, s->transitions_tail, t); | ||
473 | GNUNET_free (t); | ||
474 | t = next_t; | ||
475 | } | ||
476 | |||
477 | if (NULL != s->nfa_set) | ||
478 | { | ||
479 | if (s->nfa_set->len > 0) | ||
480 | GNUNET_free (s->nfa_set->states); | ||
481 | GNUNET_free (s->nfa_set); | ||
482 | } | ||
483 | |||
484 | GNUNET_free (s); | ||
485 | } | ||
486 | |||
487 | /** | ||
488 | * Pops two NFA fragments (a, b) from the stack and concatenates them (ab) | 520 | * Pops two NFA fragments (a, b) from the stack and concatenates them (ab) |
489 | * | 521 | * |
490 | * @param ctx context | 522 | * @param ctx context |
@@ -750,6 +782,7 @@ GNUNET_REGEX_construct_nfa (const char *regex, const size_t len) | |||
750 | { | 782 | { |
751 | struct GNUNET_REGEX_Context ctx; | 783 | struct GNUNET_REGEX_Context ctx; |
752 | struct GNUNET_REGEX_Automaton *nfa; | 784 | struct GNUNET_REGEX_Automaton *nfa; |
785 | const char *regexp; | ||
753 | char *error_msg; | 786 | char *error_msg; |
754 | unsigned int count; | 787 | unsigned int count; |
755 | unsigned int altcount; | 788 | unsigned int altcount; |
@@ -763,15 +796,16 @@ GNUNET_REGEX_construct_nfa (const char *regex, const size_t len) | |||
763 | 796 | ||
764 | GNUNET_REGEX_context_init (&ctx); | 797 | GNUNET_REGEX_context_init (&ctx); |
765 | 798 | ||
799 | regexp = regex; | ||
766 | p = NULL; | 800 | p = NULL; |
767 | error_msg = NULL; | 801 | error_msg = NULL; |
768 | altcount = 0; | 802 | altcount = 0; |
769 | atomcount = 0; | 803 | atomcount = 0; |
770 | pcount = 0; | 804 | pcount = 0; |
771 | 805 | ||
772 | for (count = 0; count < len && *regex; count++, regex++) | 806 | for (count = 0; count < len && *regexp; count++, regexp++) |
773 | { | 807 | { |
774 | switch (*regex) | 808 | switch (*regexp) |
775 | { | 809 | { |
776 | case '(': | 810 | case '(': |
777 | if (atomcount > 1) | 811 | if (atomcount > 1) |
@@ -835,7 +869,7 @@ GNUNET_REGEX_construct_nfa (const char *regex, const size_t len) | |||
835 | nfa_add_plus_op (&ctx); | 869 | nfa_add_plus_op (&ctx); |
836 | break; | 870 | break; |
837 | case 92: /* escape: \ */ | 871 | case 92: /* escape: \ */ |
838 | regex++; | 872 | regexp++; |
839 | count++; | 873 | count++; |
840 | default: | 874 | default: |
841 | if (atomcount > 1) | 875 | if (atomcount > 1) |
@@ -843,7 +877,7 @@ GNUNET_REGEX_construct_nfa (const char *regex, const size_t len) | |||
843 | --atomcount; | 877 | --atomcount; |
844 | nfa_add_concatenation (&ctx); | 878 | nfa_add_concatenation (&ctx); |
845 | } | 879 | } |
846 | nfa_add_literal (&ctx, *regex); | 880 | nfa_add_literal (&ctx, *regexp); |
847 | atomcount++; | 881 | atomcount++; |
848 | break; | 882 | break; |
849 | } | 883 | } |
@@ -945,6 +979,7 @@ GNUNET_REGEX_construct_dfa (const char *regex, const size_t len) | |||
945 | nfa = GNUNET_REGEX_construct_nfa (regex, len); | 979 | nfa = GNUNET_REGEX_construct_nfa (regex, len); |
946 | 980 | ||
947 | dfa = GNUNET_malloc (sizeof (struct GNUNET_REGEX_Automaton)); | 981 | dfa = GNUNET_malloc (sizeof (struct GNUNET_REGEX_Automaton)); |
982 | dfa->type = DFA; | ||
948 | 983 | ||
949 | // Create DFA start state from epsilon closure | 984 | // Create DFA start state from epsilon closure |
950 | dfa_stack = GNUNET_malloc (sizeof (struct StateSet)); | 985 | dfa_stack = GNUNET_malloc (sizeof (struct StateSet)); |
@@ -1089,3 +1124,66 @@ GNUNET_REGEX_automaton_save_graph (struct GNUNET_REGEX_Automaton *a, | |||
1089 | fwrite (end, strlen (end), 1, p); | 1124 | fwrite (end, strlen (end), 1, p); |
1090 | fclose (p); | 1125 | fclose (p); |
1091 | } | 1126 | } |
1127 | |||
1128 | int | ||
1129 | evaluate_dfa (struct GNUNET_REGEX_Automaton *a, const char *string) | ||
1130 | { | ||
1131 | const char *strp; | ||
1132 | struct State *s; | ||
1133 | |||
1134 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Evaluating '%s' using DFA\n", string); | ||
1135 | |||
1136 | s = a->start; | ||
1137 | |||
1138 | for (strp = string; NULL != strp && *strp; strp++) | ||
1139 | { | ||
1140 | s = dfa_move (s, *strp); | ||
1141 | if (NULL == s) | ||
1142 | break; | ||
1143 | } | ||
1144 | |||
1145 | if (NULL != s && s->accepting) | ||
1146 | return GNUNET_YES; | ||
1147 | |||
1148 | return GNUNET_NO; | ||
1149 | } | ||
1150 | |||
1151 | int | ||
1152 | evaluate_nfa (struct GNUNET_REGEX_Automaton *a, const char *string) | ||
1153 | { | ||
1154 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Evaluating '%s' using NFA\n", string); | ||
1155 | |||
1156 | return GNUNET_YES; | ||
1157 | } | ||
1158 | |||
1159 | |||
1160 | /** | ||
1161 | * Evaluates the given 'string' against the given compiled regex | ||
1162 | * | ||
1163 | * @param a automaton | ||
1164 | * @param string string to check | ||
1165 | * | ||
1166 | * @return GNUNET_YES if 'a' matches 'string', GNUNET_NO otherwise | ||
1167 | */ | ||
1168 | int | ||
1169 | GNUNET_REGEX_eval (struct GNUNET_REGEX_Automaton *a, const char *string) | ||
1170 | { | ||
1171 | int eval; | ||
1172 | |||
1173 | switch (a->type) | ||
1174 | { | ||
1175 | case DFA: | ||
1176 | eval = evaluate_dfa (a, string); | ||
1177 | break; | ||
1178 | case NFA: | ||
1179 | eval = evaluate_nfa (a, string); | ||
1180 | break; | ||
1181 | default: | ||
1182 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
1183 | "Evaluating regex failed, automaton has no type!\n"); | ||
1184 | eval = GNUNET_SYSERR; | ||
1185 | break; | ||
1186 | } | ||
1187 | |||
1188 | return eval; | ||
1189 | } | ||