aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/include/gnunet_regex_lib.h22
-rw-r--r--src/regex/regex.c202
-rw-r--r--src/regex/test_regex.c13
3 files changed, 180 insertions, 57 deletions
diff --git a/src/include/gnunet_regex_lib.h b/src/include/gnunet_regex_lib.h
index 890a1fb50..1138ad4ca 100644
--- a/src/include/gnunet_regex_lib.h
+++ b/src/include/gnunet_regex_lib.h
@@ -38,7 +38,7 @@ extern "C"
38#endif 38#endif
39 39
40/** 40/**
41 * NFA representation 41 * Automaton (NFA/DFA) representation
42 */ 42 */
43struct GNUNET_REGEX_Automaton; 43struct GNUNET_REGEX_Automaton;
44 44
@@ -51,7 +51,7 @@ struct GNUNET_REGEX_Automaton;
51 * @return NFA, needs to be freed using GNUNET_REGEX_destroy_automaton 51 * @return NFA, needs to be freed using GNUNET_REGEX_destroy_automaton
52 */ 52 */
53struct GNUNET_REGEX_Automaton * 53struct GNUNET_REGEX_Automaton *
54GNUNET_REGEX_construct_nfa(const char *regex, const size_t len); 54GNUNET_REGEX_construct_nfa (const char *regex, const size_t len);
55 55
56/** 56/**
57 * Construct DFA for the given 'regex' of length 'len' 57 * Construct DFA for the given 'regex' of length 'len'
@@ -71,7 +71,7 @@ GNUNET_REGEX_construct_dfa (const char *regex, const size_t len);
71 * @param a automaton to be destroyed 71 * @param a automaton to be destroyed
72 */ 72 */
73void 73void
74GNUNET_REGEX_automaton_destroy(struct GNUNET_REGEX_Automaton *a); 74GNUNET_REGEX_automaton_destroy (struct GNUNET_REGEX_Automaton *a);
75 75
76/** 76/**
77 * Save the given automaton as a GraphViz dot file 77 * Save the given automaton as a GraphViz dot file
@@ -80,8 +80,20 @@ GNUNET_REGEX_automaton_destroy(struct GNUNET_REGEX_Automaton *a);
80 * @param filename where to save the file 80 * @param filename where to save the file
81 */ 81 */
82void 82void
83GNUNET_REGEX_automaton_save_graph(struct GNUNET_REGEX_Automaton *a, 83GNUNET_REGEX_automaton_save_graph (struct GNUNET_REGEX_Automaton *a,
84 const char *filename); 84 const char *filename);
85
86/**
87 * Evaluates the given 'string' against the given compiled regex
88 *
89 * @param a automaton
90 * @param string string to check
91 *
92 * @return GNUNET_YES if 'a' matches 'string', GNUNET_NO otherwise
93 */
94int
95GNUNET_REGEX_eval (struct GNUNET_REGEX_Automaton *a,
96 const char *string);
85 97
86#if 0 /* keep Emacsens' auto-indent happy */ 98#if 0 /* keep Emacsens' auto-indent happy */
87{ 99{
diff --git a/src/regex/regex.c b/src/regex/regex.c
index 1b7bd8565..596acb323 100644
--- a/src/regex/regex.c
+++ b/src/regex/regex.c
@@ -43,6 +43,12 @@ struct GNUNET_REGEX_Context
43 struct GNUNET_REGEX_Automaton *stack_tail; 43 struct GNUNET_REGEX_Automaton *stack_tail;
44}; 44};
45 45
46enum GNUNET_REGEX_automaton_type
47{
48 NFA,
49 DFA
50};
51
46/** 52/**
47 * Automaton representation 53 * Automaton representation
48 */ 54 */
@@ -56,6 +62,8 @@ struct GNUNET_REGEX_Automaton
56 62
57 struct State *states_head; 63 struct State *states_head;
58 struct State *states_tail; 64 struct State *states_tail;
65
66 enum GNUNET_REGEX_automaton_type type;
59}; 67};
60 68
61/** 69/**
@@ -268,6 +276,54 @@ add_transition (struct GNUNET_REGEX_Context *ctx, struct State *from_state,
268} 276}
269 277
270/** 278/**
279 * Clears an automaton fragment. Does not destroy the states inside
280 * the automaton.
281 *
282 * @param a automaton to be cleared
283 */
284void
285automaton_fragment_clear (struct GNUNET_REGEX_Automaton *a)
286{
287 a->start = NULL;
288 a->end = NULL;
289 a->states_head = NULL;
290 a->states_tail = NULL;
291 GNUNET_free (a);
292}
293
294/**
295 * Frees the memory used by State 's'
296 *
297 * @param s state that should be destroyed
298 */
299void
300automaton_destroy_state (struct State *s)
301{
302 struct Transition *t;
303 struct Transition *next_t;
304
305 if (NULL != s->name)
306 GNUNET_free (s->name);
307
308 for (t = s->transitions_head; NULL != t;)
309 {
310 next_t = t->next;
311 GNUNET_CONTAINER_DLL_remove (s->transitions_head, s->transitions_tail, t);
312 GNUNET_free (t);
313 t = next_t;
314 }
315
316 if (NULL != s->nfa_set)
317 {
318 if (s->nfa_set->len > 0)
319 GNUNET_free (s->nfa_set->states);
320 GNUNET_free (s->nfa_set);
321 }
322
323 GNUNET_free (s);
324}
325
326/**
271 * Creates a new DFA state based on a set of NFA states. Needs to be freed 327 * Creates a new DFA state based on a set of NFA states. Needs to be freed
272 * using automaton_destroy_state. 328 * using automaton_destroy_state.
273 * 329 *
@@ -352,6 +408,29 @@ dfa_state_create (struct GNUNET_REGEX_Context *ctx, struct StateSet *nfa_states)
352 return s; 408 return s;
353} 409}
354 410
411struct State *
412dfa_move (struct State *s, const char literal)
413{
414 struct Transition *t;
415 struct State *new_s;
416
417 if (NULL == s)
418 return NULL;
419
420 new_s = NULL;
421
422 for (t = s->transitions_head; NULL != t; t = t->next)
423 {
424 if (literal == t->literal)
425 {
426 new_s = t->state;
427 break;
428 }
429 }
430
431 return new_s;
432}
433
355/** 434/**
356 * Creates a new NFA fragment. Needs to be cleared using automaton_fragment_clear. 435 * Creates a new NFA fragment. Needs to be cleared using automaton_fragment_clear.
357 * 436 *
@@ -367,6 +446,7 @@ nfa_fragment_create (struct State *start, struct State *end)
367 446
368 n = GNUNET_malloc (sizeof (struct GNUNET_REGEX_Automaton)); 447 n = GNUNET_malloc (sizeof (struct GNUNET_REGEX_Automaton));
369 448
449 n->type = NFA;
370 n->start = NULL; 450 n->start = NULL;
371 n->end = NULL; 451 n->end = NULL;
372 452
@@ -437,54 +517,6 @@ nfa_state_create (struct GNUNET_REGEX_Context *ctx, int accepting)
437} 517}
438 518
439/** 519/**
440 * Clears an automaton fragment. Does not destroy the states inside
441 * the automaton.
442 *
443 * @param a automaton to be cleared
444 */
445void
446automaton_fragment_clear (struct GNUNET_REGEX_Automaton *a)
447{
448 a->start = NULL;
449 a->end = NULL;
450 a->states_head = NULL;
451 a->states_tail = NULL;
452 GNUNET_free (a);
453}
454
455/**
456 * Frees the memory used by State 's'
457 *
458 * @param s state that should be destroyed
459 */
460void
461automaton_destroy_state (struct State *s)
462{
463 struct Transition *t;
464 struct Transition *next_t;
465
466 if (NULL != s->name)
467 GNUNET_free (s->name);
468
469 for (t = s->transitions_head; NULL != t;)
470 {
471 next_t = t->next;
472 GNUNET_CONTAINER_DLL_remove (s->transitions_head, s->transitions_tail, t);
473 GNUNET_free (t);
474 t = next_t;
475 }
476
477 if (NULL != s->nfa_set)
478 {
479 if (s->nfa_set->len > 0)
480 GNUNET_free (s->nfa_set->states);
481 GNUNET_free (s->nfa_set);
482 }
483
484 GNUNET_free (s);
485}
486
487/**
488 * Pops two NFA fragments (a, b) from the stack and concatenates them (ab) 520 * Pops two NFA fragments (a, b) from the stack and concatenates them (ab)
489 * 521 *
490 * @param ctx context 522 * @param ctx context
@@ -750,6 +782,7 @@ GNUNET_REGEX_construct_nfa (const char *regex, const size_t len)
750{ 782{
751 struct GNUNET_REGEX_Context ctx; 783 struct GNUNET_REGEX_Context ctx;
752 struct GNUNET_REGEX_Automaton *nfa; 784 struct GNUNET_REGEX_Automaton *nfa;
785 const char *regexp;
753 char *error_msg; 786 char *error_msg;
754 unsigned int count; 787 unsigned int count;
755 unsigned int altcount; 788 unsigned int altcount;
@@ -763,15 +796,16 @@ GNUNET_REGEX_construct_nfa (const char *regex, const size_t len)
763 796
764 GNUNET_REGEX_context_init (&ctx); 797 GNUNET_REGEX_context_init (&ctx);
765 798
799 regexp = regex;
766 p = NULL; 800 p = NULL;
767 error_msg = NULL; 801 error_msg = NULL;
768 altcount = 0; 802 altcount = 0;
769 atomcount = 0; 803 atomcount = 0;
770 pcount = 0; 804 pcount = 0;
771 805
772 for (count = 0; count < len && *regex; count++, regex++) 806 for (count = 0; count < len && *regexp; count++, regexp++)
773 { 807 {
774 switch (*regex) 808 switch (*regexp)
775 { 809 {
776 case '(': 810 case '(':
777 if (atomcount > 1) 811 if (atomcount > 1)
@@ -835,7 +869,7 @@ GNUNET_REGEX_construct_nfa (const char *regex, const size_t len)
835 nfa_add_plus_op (&ctx); 869 nfa_add_plus_op (&ctx);
836 break; 870 break;
837 case 92: /* escape: \ */ 871 case 92: /* escape: \ */
838 regex++; 872 regexp++;
839 count++; 873 count++;
840 default: 874 default:
841 if (atomcount > 1) 875 if (atomcount > 1)
@@ -843,7 +877,7 @@ GNUNET_REGEX_construct_nfa (const char *regex, const size_t len)
843 --atomcount; 877 --atomcount;
844 nfa_add_concatenation (&ctx); 878 nfa_add_concatenation (&ctx);
845 } 879 }
846 nfa_add_literal (&ctx, *regex); 880 nfa_add_literal (&ctx, *regexp);
847 atomcount++; 881 atomcount++;
848 break; 882 break;
849 } 883 }
@@ -945,6 +979,7 @@ GNUNET_REGEX_construct_dfa (const char *regex, const size_t len)
945 nfa = GNUNET_REGEX_construct_nfa (regex, len); 979 nfa = GNUNET_REGEX_construct_nfa (regex, len);
946 980
947 dfa = GNUNET_malloc (sizeof (struct GNUNET_REGEX_Automaton)); 981 dfa = GNUNET_malloc (sizeof (struct GNUNET_REGEX_Automaton));
982 dfa->type = DFA;
948 983
949 // Create DFA start state from epsilon closure 984 // Create DFA start state from epsilon closure
950 dfa_stack = GNUNET_malloc (sizeof (struct StateSet)); 985 dfa_stack = GNUNET_malloc (sizeof (struct StateSet));
@@ -1089,3 +1124,66 @@ GNUNET_REGEX_automaton_save_graph (struct GNUNET_REGEX_Automaton *a,
1089 fwrite (end, strlen (end), 1, p); 1124 fwrite (end, strlen (end), 1, p);
1090 fclose (p); 1125 fclose (p);
1091} 1126}
1127
1128int
1129evaluate_dfa (struct GNUNET_REGEX_Automaton *a, const char *string)
1130{
1131 const char *strp;
1132 struct State *s;
1133
1134 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Evaluating '%s' using DFA\n", string);
1135
1136 s = a->start;
1137
1138 for (strp = string; NULL != strp && *strp; strp++)
1139 {
1140 s = dfa_move (s, *strp);
1141 if (NULL == s)
1142 break;
1143 }
1144
1145 if (NULL != s && s->accepting)
1146 return GNUNET_YES;
1147
1148 return GNUNET_NO;
1149}
1150
1151int
1152evaluate_nfa (struct GNUNET_REGEX_Automaton *a, const char *string)
1153{
1154 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Evaluating '%s' using NFA\n", string);
1155
1156 return GNUNET_YES;
1157}
1158
1159
1160/**
1161 * Evaluates the given 'string' against the given compiled regex
1162 *
1163 * @param a automaton
1164 * @param string string to check
1165 *
1166 * @return GNUNET_YES if 'a' matches 'string', GNUNET_NO otherwise
1167 */
1168int
1169GNUNET_REGEX_eval (struct GNUNET_REGEX_Automaton *a, const char *string)
1170{
1171 int eval;
1172
1173 switch (a->type)
1174 {
1175 case DFA:
1176 eval = evaluate_dfa (a, string);
1177 break;
1178 case NFA:
1179 eval = evaluate_nfa (a, string);
1180 break;
1181 default:
1182 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
1183 "Evaluating regex failed, automaton has no type!\n");
1184 eval = GNUNET_SYSERR;
1185 break;
1186 }
1187
1188 return eval;
1189}
diff --git a/src/regex/test_regex.c b/src/regex/test_regex.c
index dcc4ce0ed..56aea52a7 100644
--- a/src/regex/test_regex.c
+++ b/src/regex/test_regex.c
@@ -41,11 +41,14 @@ main (int argc, char *argv[])
41 struct GNUNET_REGEX_Automaton *nfa; 41 struct GNUNET_REGEX_Automaton *nfa;
42 struct GNUNET_REGEX_Automaton *dfa; 42 struct GNUNET_REGEX_Automaton *dfa;
43 char *regex; 43 char *regex;
44 char *string;
45 int eval;
44 46
45 nfa = NULL; 47 nfa = NULL;
46 dfa = NULL; 48 dfa = NULL;
47 49
48 regex = "a\\*b(c|d)+c*(a(b|c)d)+"; 50 regex = "a\\*b(c|d)+c*(a(b|c)d)+";
51 string = "a*bcabd";
49 /*regex = "\\*a(a|b)b"; */ 52 /*regex = "\\*a(a|b)b"; */
50 /*regex = "a(a|b)c"; */ 53 /*regex = "a(a|b)c"; */
51 /*regex = "(a|aa)+"; */ 54 /*regex = "(a|aa)+"; */
@@ -54,6 +57,11 @@ main (int argc, char *argv[])
54 if (nfa) 57 if (nfa)
55 { 58 {
56 GNUNET_REGEX_automaton_save_graph (nfa, "nfa_graph.dot"); 59 GNUNET_REGEX_automaton_save_graph (nfa, "nfa_graph.dot");
60 eval = GNUNET_REGEX_eval (nfa, string);
61 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Evaluating %s result: %i\n", string,
62 eval);
63 if (GNUNET_YES != eval)
64 err = 1;
57 GNUNET_REGEX_automaton_destroy (nfa); 65 GNUNET_REGEX_automaton_destroy (nfa);
58 } 66 }
59 else 67 else
@@ -63,6 +71,11 @@ main (int argc, char *argv[])
63 if (dfa) 71 if (dfa)
64 { 72 {
65 GNUNET_REGEX_automaton_save_graph (dfa, "dfa_graph.dot"); 73 GNUNET_REGEX_automaton_save_graph (dfa, "dfa_graph.dot");
74 eval = GNUNET_REGEX_eval (dfa, string);
75 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Evaluating %s result: %i\n", string,
76 eval);
77 if (GNUNET_YES != eval)
78 err = 1;
66 GNUNET_REGEX_automaton_destroy (dfa); 79 GNUNET_REGEX_automaton_destroy (dfa);
67 } 80 }
68 return err; 81 return err;