diff options
-rw-r--r-- | src/include/gnunet_regex_lib.h | 22 | ||||
-rw-r--r-- | src/regex/regex.c | 202 | ||||
-rw-r--r-- | src/regex/test_regex.c | 13 |
3 files changed, 180 insertions, 57 deletions
diff --git a/src/include/gnunet_regex_lib.h b/src/include/gnunet_regex_lib.h index 890a1fb50..1138ad4ca 100644 --- a/src/include/gnunet_regex_lib.h +++ b/src/include/gnunet_regex_lib.h | |||
@@ -38,7 +38,7 @@ extern "C" | |||
38 | #endif | 38 | #endif |
39 | 39 | ||
40 | /** | 40 | /** |
41 | * NFA representation | 41 | * Automaton (NFA/DFA) representation |
42 | */ | 42 | */ |
43 | struct GNUNET_REGEX_Automaton; | 43 | struct GNUNET_REGEX_Automaton; |
44 | 44 | ||
@@ -51,7 +51,7 @@ struct GNUNET_REGEX_Automaton; | |||
51 | * @return NFA, needs to be freed using GNUNET_REGEX_destroy_automaton | 51 | * @return NFA, needs to be freed using GNUNET_REGEX_destroy_automaton |
52 | */ | 52 | */ |
53 | struct GNUNET_REGEX_Automaton * | 53 | struct GNUNET_REGEX_Automaton * |
54 | GNUNET_REGEX_construct_nfa(const char *regex, const size_t len); | 54 | GNUNET_REGEX_construct_nfa (const char *regex, const size_t len); |
55 | 55 | ||
56 | /** | 56 | /** |
57 | * Construct DFA for the given 'regex' of length 'len' | 57 | * Construct DFA for the given 'regex' of length 'len' |
@@ -71,7 +71,7 @@ GNUNET_REGEX_construct_dfa (const char *regex, const size_t len); | |||
71 | * @param a automaton to be destroyed | 71 | * @param a automaton to be destroyed |
72 | */ | 72 | */ |
73 | void | 73 | void |
74 | GNUNET_REGEX_automaton_destroy(struct GNUNET_REGEX_Automaton *a); | 74 | GNUNET_REGEX_automaton_destroy (struct GNUNET_REGEX_Automaton *a); |
75 | 75 | ||
76 | /** | 76 | /** |
77 | * Save the given automaton as a GraphViz dot file | 77 | * Save the given automaton as a GraphViz dot file |
@@ -80,8 +80,20 @@ GNUNET_REGEX_automaton_destroy(struct GNUNET_REGEX_Automaton *a); | |||
80 | * @param filename where to save the file | 80 | * @param filename where to save the file |
81 | */ | 81 | */ |
82 | void | 82 | void |
83 | GNUNET_REGEX_automaton_save_graph(struct GNUNET_REGEX_Automaton *a, | 83 | GNUNET_REGEX_automaton_save_graph (struct GNUNET_REGEX_Automaton *a, |
84 | const char *filename); | 84 | const char *filename); |
85 | |||
86 | /** | ||
87 | * Evaluates the given 'string' against the given compiled regex | ||
88 | * | ||
89 | * @param a automaton | ||
90 | * @param string string to check | ||
91 | * | ||
92 | * @return GNUNET_YES if 'a' matches 'string', GNUNET_NO otherwise | ||
93 | */ | ||
94 | int | ||
95 | GNUNET_REGEX_eval (struct GNUNET_REGEX_Automaton *a, | ||
96 | const char *string); | ||
85 | 97 | ||
86 | #if 0 /* keep Emacsens' auto-indent happy */ | 98 | #if 0 /* keep Emacsens' auto-indent happy */ |
87 | { | 99 | { |
diff --git a/src/regex/regex.c b/src/regex/regex.c index 1b7bd8565..596acb323 100644 --- a/src/regex/regex.c +++ b/src/regex/regex.c | |||
@@ -43,6 +43,12 @@ struct GNUNET_REGEX_Context | |||
43 | struct GNUNET_REGEX_Automaton *stack_tail; | 43 | struct GNUNET_REGEX_Automaton *stack_tail; |
44 | }; | 44 | }; |
45 | 45 | ||
46 | enum GNUNET_REGEX_automaton_type | ||
47 | { | ||
48 | NFA, | ||
49 | DFA | ||
50 | }; | ||
51 | |||
46 | /** | 52 | /** |
47 | * Automaton representation | 53 | * Automaton representation |
48 | */ | 54 | */ |
@@ -56,6 +62,8 @@ struct GNUNET_REGEX_Automaton | |||
56 | 62 | ||
57 | struct State *states_head; | 63 | struct State *states_head; |
58 | struct State *states_tail; | 64 | struct State *states_tail; |
65 | |||
66 | enum GNUNET_REGEX_automaton_type type; | ||
59 | }; | 67 | }; |
60 | 68 | ||
61 | /** | 69 | /** |
@@ -268,6 +276,54 @@ add_transition (struct GNUNET_REGEX_Context *ctx, struct State *from_state, | |||
268 | } | 276 | } |
269 | 277 | ||
270 | /** | 278 | /** |
279 | * Clears an automaton fragment. Does not destroy the states inside | ||
280 | * the automaton. | ||
281 | * | ||
282 | * @param a automaton to be cleared | ||
283 | */ | ||
284 | void | ||
285 | automaton_fragment_clear (struct GNUNET_REGEX_Automaton *a) | ||
286 | { | ||
287 | a->start = NULL; | ||
288 | a->end = NULL; | ||
289 | a->states_head = NULL; | ||
290 | a->states_tail = NULL; | ||
291 | GNUNET_free (a); | ||
292 | } | ||
293 | |||
294 | /** | ||
295 | * Frees the memory used by State 's' | ||
296 | * | ||
297 | * @param s state that should be destroyed | ||
298 | */ | ||
299 | void | ||
300 | automaton_destroy_state (struct State *s) | ||
301 | { | ||
302 | struct Transition *t; | ||
303 | struct Transition *next_t; | ||
304 | |||
305 | if (NULL != s->name) | ||
306 | GNUNET_free (s->name); | ||
307 | |||
308 | for (t = s->transitions_head; NULL != t;) | ||
309 | { | ||
310 | next_t = t->next; | ||
311 | GNUNET_CONTAINER_DLL_remove (s->transitions_head, s->transitions_tail, t); | ||
312 | GNUNET_free (t); | ||
313 | t = next_t; | ||
314 | } | ||
315 | |||
316 | if (NULL != s->nfa_set) | ||
317 | { | ||
318 | if (s->nfa_set->len > 0) | ||
319 | GNUNET_free (s->nfa_set->states); | ||
320 | GNUNET_free (s->nfa_set); | ||
321 | } | ||
322 | |||
323 | GNUNET_free (s); | ||
324 | } | ||
325 | |||
326 | /** | ||
271 | * Creates a new DFA state based on a set of NFA states. Needs to be freed | 327 | * Creates a new DFA state based on a set of NFA states. Needs to be freed |
272 | * using automaton_destroy_state. | 328 | * using automaton_destroy_state. |
273 | * | 329 | * |
@@ -352,6 +408,29 @@ dfa_state_create (struct GNUNET_REGEX_Context *ctx, struct StateSet *nfa_states) | |||
352 | return s; | 408 | return s; |
353 | } | 409 | } |
354 | 410 | ||
411 | struct State * | ||
412 | dfa_move (struct State *s, const char literal) | ||
413 | { | ||
414 | struct Transition *t; | ||
415 | struct State *new_s; | ||
416 | |||
417 | if (NULL == s) | ||
418 | return NULL; | ||
419 | |||
420 | new_s = NULL; | ||
421 | |||
422 | for (t = s->transitions_head; NULL != t; t = t->next) | ||
423 | { | ||
424 | if (literal == t->literal) | ||
425 | { | ||
426 | new_s = t->state; | ||
427 | break; | ||
428 | } | ||
429 | } | ||
430 | |||
431 | return new_s; | ||
432 | } | ||
433 | |||
355 | /** | 434 | /** |
356 | * Creates a new NFA fragment. Needs to be cleared using automaton_fragment_clear. | 435 | * Creates a new NFA fragment. Needs to be cleared using automaton_fragment_clear. |
357 | * | 436 | * |
@@ -367,6 +446,7 @@ nfa_fragment_create (struct State *start, struct State *end) | |||
367 | 446 | ||
368 | n = GNUNET_malloc (sizeof (struct GNUNET_REGEX_Automaton)); | 447 | n = GNUNET_malloc (sizeof (struct GNUNET_REGEX_Automaton)); |
369 | 448 | ||
449 | n->type = NFA; | ||
370 | n->start = NULL; | 450 | n->start = NULL; |
371 | n->end = NULL; | 451 | n->end = NULL; |
372 | 452 | ||
@@ -437,54 +517,6 @@ nfa_state_create (struct GNUNET_REGEX_Context *ctx, int accepting) | |||
437 | } | 517 | } |
438 | 518 | ||
439 | /** | 519 | /** |
440 | * Clears an automaton fragment. Does not destroy the states inside | ||
441 | * the automaton. | ||
442 | * | ||
443 | * @param a automaton to be cleared | ||
444 | */ | ||
445 | void | ||
446 | automaton_fragment_clear (struct GNUNET_REGEX_Automaton *a) | ||
447 | { | ||
448 | a->start = NULL; | ||
449 | a->end = NULL; | ||
450 | a->states_head = NULL; | ||
451 | a->states_tail = NULL; | ||
452 | GNUNET_free (a); | ||
453 | } | ||
454 | |||
455 | /** | ||
456 | * Frees the memory used by State 's' | ||
457 | * | ||
458 | * @param s state that should be destroyed | ||
459 | */ | ||
460 | void | ||
461 | automaton_destroy_state (struct State *s) | ||
462 | { | ||
463 | struct Transition *t; | ||
464 | struct Transition *next_t; | ||
465 | |||
466 | if (NULL != s->name) | ||
467 | GNUNET_free (s->name); | ||
468 | |||
469 | for (t = s->transitions_head; NULL != t;) | ||
470 | { | ||
471 | next_t = t->next; | ||
472 | GNUNET_CONTAINER_DLL_remove (s->transitions_head, s->transitions_tail, t); | ||
473 | GNUNET_free (t); | ||
474 | t = next_t; | ||
475 | } | ||
476 | |||
477 | if (NULL != s->nfa_set) | ||
478 | { | ||
479 | if (s->nfa_set->len > 0) | ||
480 | GNUNET_free (s->nfa_set->states); | ||
481 | GNUNET_free (s->nfa_set); | ||
482 | } | ||
483 | |||
484 | GNUNET_free (s); | ||
485 | } | ||
486 | |||
487 | /** | ||
488 | * Pops two NFA fragments (a, b) from the stack and concatenates them (ab) | 520 | * Pops two NFA fragments (a, b) from the stack and concatenates them (ab) |
489 | * | 521 | * |
490 | * @param ctx context | 522 | * @param ctx context |
@@ -750,6 +782,7 @@ GNUNET_REGEX_construct_nfa (const char *regex, const size_t len) | |||
750 | { | 782 | { |
751 | struct GNUNET_REGEX_Context ctx; | 783 | struct GNUNET_REGEX_Context ctx; |
752 | struct GNUNET_REGEX_Automaton *nfa; | 784 | struct GNUNET_REGEX_Automaton *nfa; |
785 | const char *regexp; | ||
753 | char *error_msg; | 786 | char *error_msg; |
754 | unsigned int count; | 787 | unsigned int count; |
755 | unsigned int altcount; | 788 | unsigned int altcount; |
@@ -763,15 +796,16 @@ GNUNET_REGEX_construct_nfa (const char *regex, const size_t len) | |||
763 | 796 | ||
764 | GNUNET_REGEX_context_init (&ctx); | 797 | GNUNET_REGEX_context_init (&ctx); |
765 | 798 | ||
799 | regexp = regex; | ||
766 | p = NULL; | 800 | p = NULL; |
767 | error_msg = NULL; | 801 | error_msg = NULL; |
768 | altcount = 0; | 802 | altcount = 0; |
769 | atomcount = 0; | 803 | atomcount = 0; |
770 | pcount = 0; | 804 | pcount = 0; |
771 | 805 | ||
772 | for (count = 0; count < len && *regex; count++, regex++) | 806 | for (count = 0; count < len && *regexp; count++, regexp++) |
773 | { | 807 | { |
774 | switch (*regex) | 808 | switch (*regexp) |
775 | { | 809 | { |
776 | case '(': | 810 | case '(': |
777 | if (atomcount > 1) | 811 | if (atomcount > 1) |
@@ -835,7 +869,7 @@ GNUNET_REGEX_construct_nfa (const char *regex, const size_t len) | |||
835 | nfa_add_plus_op (&ctx); | 869 | nfa_add_plus_op (&ctx); |
836 | break; | 870 | break; |
837 | case 92: /* escape: \ */ | 871 | case 92: /* escape: \ */ |
838 | regex++; | 872 | regexp++; |
839 | count++; | 873 | count++; |
840 | default: | 874 | default: |
841 | if (atomcount > 1) | 875 | if (atomcount > 1) |
@@ -843,7 +877,7 @@ GNUNET_REGEX_construct_nfa (const char *regex, const size_t len) | |||
843 | --atomcount; | 877 | --atomcount; |
844 | nfa_add_concatenation (&ctx); | 878 | nfa_add_concatenation (&ctx); |
845 | } | 879 | } |
846 | nfa_add_literal (&ctx, *regex); | 880 | nfa_add_literal (&ctx, *regexp); |
847 | atomcount++; | 881 | atomcount++; |
848 | break; | 882 | break; |
849 | } | 883 | } |
@@ -945,6 +979,7 @@ GNUNET_REGEX_construct_dfa (const char *regex, const size_t len) | |||
945 | nfa = GNUNET_REGEX_construct_nfa (regex, len); | 979 | nfa = GNUNET_REGEX_construct_nfa (regex, len); |
946 | 980 | ||
947 | dfa = GNUNET_malloc (sizeof (struct GNUNET_REGEX_Automaton)); | 981 | dfa = GNUNET_malloc (sizeof (struct GNUNET_REGEX_Automaton)); |
982 | dfa->type = DFA; | ||
948 | 983 | ||
949 | // Create DFA start state from epsilon closure | 984 | // Create DFA start state from epsilon closure |
950 | dfa_stack = GNUNET_malloc (sizeof (struct StateSet)); | 985 | dfa_stack = GNUNET_malloc (sizeof (struct StateSet)); |
@@ -1089,3 +1124,66 @@ GNUNET_REGEX_automaton_save_graph (struct GNUNET_REGEX_Automaton *a, | |||
1089 | fwrite (end, strlen (end), 1, p); | 1124 | fwrite (end, strlen (end), 1, p); |
1090 | fclose (p); | 1125 | fclose (p); |
1091 | } | 1126 | } |
1127 | |||
1128 | int | ||
1129 | evaluate_dfa (struct GNUNET_REGEX_Automaton *a, const char *string) | ||
1130 | { | ||
1131 | const char *strp; | ||
1132 | struct State *s; | ||
1133 | |||
1134 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Evaluating '%s' using DFA\n", string); | ||
1135 | |||
1136 | s = a->start; | ||
1137 | |||
1138 | for (strp = string; NULL != strp && *strp; strp++) | ||
1139 | { | ||
1140 | s = dfa_move (s, *strp); | ||
1141 | if (NULL == s) | ||
1142 | break; | ||
1143 | } | ||
1144 | |||
1145 | if (NULL != s && s->accepting) | ||
1146 | return GNUNET_YES; | ||
1147 | |||
1148 | return GNUNET_NO; | ||
1149 | } | ||
1150 | |||
1151 | int | ||
1152 | evaluate_nfa (struct GNUNET_REGEX_Automaton *a, const char *string) | ||
1153 | { | ||
1154 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Evaluating '%s' using NFA\n", string); | ||
1155 | |||
1156 | return GNUNET_YES; | ||
1157 | } | ||
1158 | |||
1159 | |||
1160 | /** | ||
1161 | * Evaluates the given 'string' against the given compiled regex | ||
1162 | * | ||
1163 | * @param a automaton | ||
1164 | * @param string string to check | ||
1165 | * | ||
1166 | * @return GNUNET_YES if 'a' matches 'string', GNUNET_NO otherwise | ||
1167 | */ | ||
1168 | int | ||
1169 | GNUNET_REGEX_eval (struct GNUNET_REGEX_Automaton *a, const char *string) | ||
1170 | { | ||
1171 | int eval; | ||
1172 | |||
1173 | switch (a->type) | ||
1174 | { | ||
1175 | case DFA: | ||
1176 | eval = evaluate_dfa (a, string); | ||
1177 | break; | ||
1178 | case NFA: | ||
1179 | eval = evaluate_nfa (a, string); | ||
1180 | break; | ||
1181 | default: | ||
1182 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
1183 | "Evaluating regex failed, automaton has no type!\n"); | ||
1184 | eval = GNUNET_SYSERR; | ||
1185 | break; | ||
1186 | } | ||
1187 | |||
1188 | return eval; | ||
1189 | } | ||
diff --git a/src/regex/test_regex.c b/src/regex/test_regex.c index dcc4ce0ed..56aea52a7 100644 --- a/src/regex/test_regex.c +++ b/src/regex/test_regex.c | |||
@@ -41,11 +41,14 @@ main (int argc, char *argv[]) | |||
41 | struct GNUNET_REGEX_Automaton *nfa; | 41 | struct GNUNET_REGEX_Automaton *nfa; |
42 | struct GNUNET_REGEX_Automaton *dfa; | 42 | struct GNUNET_REGEX_Automaton *dfa; |
43 | char *regex; | 43 | char *regex; |
44 | char *string; | ||
45 | int eval; | ||
44 | 46 | ||
45 | nfa = NULL; | 47 | nfa = NULL; |
46 | dfa = NULL; | 48 | dfa = NULL; |
47 | 49 | ||
48 | regex = "a\\*b(c|d)+c*(a(b|c)d)+"; | 50 | regex = "a\\*b(c|d)+c*(a(b|c)d)+"; |
51 | string = "a*bcabd"; | ||
49 | /*regex = "\\*a(a|b)b"; */ | 52 | /*regex = "\\*a(a|b)b"; */ |
50 | /*regex = "a(a|b)c"; */ | 53 | /*regex = "a(a|b)c"; */ |
51 | /*regex = "(a|aa)+"; */ | 54 | /*regex = "(a|aa)+"; */ |
@@ -54,6 +57,11 @@ main (int argc, char *argv[]) | |||
54 | if (nfa) | 57 | if (nfa) |
55 | { | 58 | { |
56 | GNUNET_REGEX_automaton_save_graph (nfa, "nfa_graph.dot"); | 59 | GNUNET_REGEX_automaton_save_graph (nfa, "nfa_graph.dot"); |
60 | eval = GNUNET_REGEX_eval (nfa, string); | ||
61 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Evaluating %s result: %i\n", string, | ||
62 | eval); | ||
63 | if (GNUNET_YES != eval) | ||
64 | err = 1; | ||
57 | GNUNET_REGEX_automaton_destroy (nfa); | 65 | GNUNET_REGEX_automaton_destroy (nfa); |
58 | } | 66 | } |
59 | else | 67 | else |
@@ -63,6 +71,11 @@ main (int argc, char *argv[]) | |||
63 | if (dfa) | 71 | if (dfa) |
64 | { | 72 | { |
65 | GNUNET_REGEX_automaton_save_graph (dfa, "dfa_graph.dot"); | 73 | GNUNET_REGEX_automaton_save_graph (dfa, "dfa_graph.dot"); |
74 | eval = GNUNET_REGEX_eval (dfa, string); | ||
75 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Evaluating %s result: %i\n", string, | ||
76 | eval); | ||
77 | if (GNUNET_YES != eval) | ||
78 | err = 1; | ||
66 | GNUNET_REGEX_automaton_destroy (dfa); | 79 | GNUNET_REGEX_automaton_destroy (dfa); |
67 | } | 80 | } |
68 | return err; | 81 | return err; |