aboutsummaryrefslogtreecommitdiff
path: root/src/regex/regex.c
diff options
context:
space:
mode:
authorMaximilian Szengel <gnunet@maxsz.de>2012-04-02 09:39:00 +0000
committerMaximilian Szengel <gnunet@maxsz.de>2012-04-02 09:39:00 +0000
commit9333777c8beb3d3c56f2f37d1b0e61fecd08f80c (patch)
treeb1937750480652b426ab8fb7bc67bbae69013fd0 /src/regex/regex.c
parentc9b161c04b8e6349bc2e2856c2d2d02aa03bb1f0 (diff)
downloadgnunet-9333777c8beb3d3c56f2f37d1b0e61fecd08f80c.tar.gz
gnunet-9333777c8beb3d3c56f2f37d1b0e61fecd08f80c.zip
DFA evaluation
Diffstat (limited to 'src/regex/regex.c')
-rw-r--r--src/regex/regex.c202
1 files changed, 150 insertions, 52 deletions
diff --git a/src/regex/regex.c b/src/regex/regex.c
index 1b7bd8565..596acb323 100644
--- a/src/regex/regex.c
+++ b/src/regex/regex.c
@@ -43,6 +43,12 @@ struct GNUNET_REGEX_Context
43 struct GNUNET_REGEX_Automaton *stack_tail; 43 struct GNUNET_REGEX_Automaton *stack_tail;
44}; 44};
45 45
46enum GNUNET_REGEX_automaton_type
47{
48 NFA,
49 DFA
50};
51
46/** 52/**
47 * Automaton representation 53 * Automaton representation
48 */ 54 */
@@ -56,6 +62,8 @@ struct GNUNET_REGEX_Automaton
56 62
57 struct State *states_head; 63 struct State *states_head;
58 struct State *states_tail; 64 struct State *states_tail;
65
66 enum GNUNET_REGEX_automaton_type type;
59}; 67};
60 68
61/** 69/**
@@ -268,6 +276,54 @@ add_transition (struct GNUNET_REGEX_Context *ctx, struct State *from_state,
268} 276}
269 277
270/** 278/**
279 * Clears an automaton fragment. Does not destroy the states inside
280 * the automaton.
281 *
282 * @param a automaton to be cleared
283 */
284void
285automaton_fragment_clear (struct GNUNET_REGEX_Automaton *a)
286{
287 a->start = NULL;
288 a->end = NULL;
289 a->states_head = NULL;
290 a->states_tail = NULL;
291 GNUNET_free (a);
292}
293
294/**
295 * Frees the memory used by State 's'
296 *
297 * @param s state that should be destroyed
298 */
299void
300automaton_destroy_state (struct State *s)
301{
302 struct Transition *t;
303 struct Transition *next_t;
304
305 if (NULL != s->name)
306 GNUNET_free (s->name);
307
308 for (t = s->transitions_head; NULL != t;)
309 {
310 next_t = t->next;
311 GNUNET_CONTAINER_DLL_remove (s->transitions_head, s->transitions_tail, t);
312 GNUNET_free (t);
313 t = next_t;
314 }
315
316 if (NULL != s->nfa_set)
317 {
318 if (s->nfa_set->len > 0)
319 GNUNET_free (s->nfa_set->states);
320 GNUNET_free (s->nfa_set);
321 }
322
323 GNUNET_free (s);
324}
325
326/**
271 * Creates a new DFA state based on a set of NFA states. Needs to be freed 327 * Creates a new DFA state based on a set of NFA states. Needs to be freed
272 * using automaton_destroy_state. 328 * using automaton_destroy_state.
273 * 329 *
@@ -352,6 +408,29 @@ dfa_state_create (struct GNUNET_REGEX_Context *ctx, struct StateSet *nfa_states)
352 return s; 408 return s;
353} 409}
354 410
411struct State *
412dfa_move (struct State *s, const char literal)
413{
414 struct Transition *t;
415 struct State *new_s;
416
417 if (NULL == s)
418 return NULL;
419
420 new_s = NULL;
421
422 for (t = s->transitions_head; NULL != t; t = t->next)
423 {
424 if (literal == t->literal)
425 {
426 new_s = t->state;
427 break;
428 }
429 }
430
431 return new_s;
432}
433
355/** 434/**
356 * Creates a new NFA fragment. Needs to be cleared using automaton_fragment_clear. 435 * Creates a new NFA fragment. Needs to be cleared using automaton_fragment_clear.
357 * 436 *
@@ -367,6 +446,7 @@ nfa_fragment_create (struct State *start, struct State *end)
367 446
368 n = GNUNET_malloc (sizeof (struct GNUNET_REGEX_Automaton)); 447 n = GNUNET_malloc (sizeof (struct GNUNET_REGEX_Automaton));
369 448
449 n->type = NFA;
370 n->start = NULL; 450 n->start = NULL;
371 n->end = NULL; 451 n->end = NULL;
372 452
@@ -437,54 +517,6 @@ nfa_state_create (struct GNUNET_REGEX_Context *ctx, int accepting)
437} 517}
438 518
439/** 519/**
440 * Clears an automaton fragment. Does not destroy the states inside
441 * the automaton.
442 *
443 * @param a automaton to be cleared
444 */
445void
446automaton_fragment_clear (struct GNUNET_REGEX_Automaton *a)
447{
448 a->start = NULL;
449 a->end = NULL;
450 a->states_head = NULL;
451 a->states_tail = NULL;
452 GNUNET_free (a);
453}
454
455/**
456 * Frees the memory used by State 's'
457 *
458 * @param s state that should be destroyed
459 */
460void
461automaton_destroy_state (struct State *s)
462{
463 struct Transition *t;
464 struct Transition *next_t;
465
466 if (NULL != s->name)
467 GNUNET_free (s->name);
468
469 for (t = s->transitions_head; NULL != t;)
470 {
471 next_t = t->next;
472 GNUNET_CONTAINER_DLL_remove (s->transitions_head, s->transitions_tail, t);
473 GNUNET_free (t);
474 t = next_t;
475 }
476
477 if (NULL != s->nfa_set)
478 {
479 if (s->nfa_set->len > 0)
480 GNUNET_free (s->nfa_set->states);
481 GNUNET_free (s->nfa_set);
482 }
483
484 GNUNET_free (s);
485}
486
487/**
488 * Pops two NFA fragments (a, b) from the stack and concatenates them (ab) 520 * Pops two NFA fragments (a, b) from the stack and concatenates them (ab)
489 * 521 *
490 * @param ctx context 522 * @param ctx context
@@ -750,6 +782,7 @@ GNUNET_REGEX_construct_nfa (const char *regex, const size_t len)
750{ 782{
751 struct GNUNET_REGEX_Context ctx; 783 struct GNUNET_REGEX_Context ctx;
752 struct GNUNET_REGEX_Automaton *nfa; 784 struct GNUNET_REGEX_Automaton *nfa;
785 const char *regexp;
753 char *error_msg; 786 char *error_msg;
754 unsigned int count; 787 unsigned int count;
755 unsigned int altcount; 788 unsigned int altcount;
@@ -763,15 +796,16 @@ GNUNET_REGEX_construct_nfa (const char *regex, const size_t len)
763 796
764 GNUNET_REGEX_context_init (&ctx); 797 GNUNET_REGEX_context_init (&ctx);
765 798
799 regexp = regex;
766 p = NULL; 800 p = NULL;
767 error_msg = NULL; 801 error_msg = NULL;
768 altcount = 0; 802 altcount = 0;
769 atomcount = 0; 803 atomcount = 0;
770 pcount = 0; 804 pcount = 0;
771 805
772 for (count = 0; count < len && *regex; count++, regex++) 806 for (count = 0; count < len && *regexp; count++, regexp++)
773 { 807 {
774 switch (*regex) 808 switch (*regexp)
775 { 809 {
776 case '(': 810 case '(':
777 if (atomcount > 1) 811 if (atomcount > 1)
@@ -835,7 +869,7 @@ GNUNET_REGEX_construct_nfa (const char *regex, const size_t len)
835 nfa_add_plus_op (&ctx); 869 nfa_add_plus_op (&ctx);
836 break; 870 break;
837 case 92: /* escape: \ */ 871 case 92: /* escape: \ */
838 regex++; 872 regexp++;
839 count++; 873 count++;
840 default: 874 default:
841 if (atomcount > 1) 875 if (atomcount > 1)
@@ -843,7 +877,7 @@ GNUNET_REGEX_construct_nfa (const char *regex, const size_t len)
843 --atomcount; 877 --atomcount;
844 nfa_add_concatenation (&ctx); 878 nfa_add_concatenation (&ctx);
845 } 879 }
846 nfa_add_literal (&ctx, *regex); 880 nfa_add_literal (&ctx, *regexp);
847 atomcount++; 881 atomcount++;
848 break; 882 break;
849 } 883 }
@@ -945,6 +979,7 @@ GNUNET_REGEX_construct_dfa (const char *regex, const size_t len)
945 nfa = GNUNET_REGEX_construct_nfa (regex, len); 979 nfa = GNUNET_REGEX_construct_nfa (regex, len);
946 980
947 dfa = GNUNET_malloc (sizeof (struct GNUNET_REGEX_Automaton)); 981 dfa = GNUNET_malloc (sizeof (struct GNUNET_REGEX_Automaton));
982 dfa->type = DFA;
948 983
949 // Create DFA start state from epsilon closure 984 // Create DFA start state from epsilon closure
950 dfa_stack = GNUNET_malloc (sizeof (struct StateSet)); 985 dfa_stack = GNUNET_malloc (sizeof (struct StateSet));
@@ -1089,3 +1124,66 @@ GNUNET_REGEX_automaton_save_graph (struct GNUNET_REGEX_Automaton *a,
1089 fwrite (end, strlen (end), 1, p); 1124 fwrite (end, strlen (end), 1, p);
1090 fclose (p); 1125 fclose (p);
1091} 1126}
1127
1128int
1129evaluate_dfa (struct GNUNET_REGEX_Automaton *a, const char *string)
1130{
1131 const char *strp;
1132 struct State *s;
1133
1134 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Evaluating '%s' using DFA\n", string);
1135
1136 s = a->start;
1137
1138 for (strp = string; NULL != strp && *strp; strp++)
1139 {
1140 s = dfa_move (s, *strp);
1141 if (NULL == s)
1142 break;
1143 }
1144
1145 if (NULL != s && s->accepting)
1146 return GNUNET_YES;
1147
1148 return GNUNET_NO;
1149}
1150
1151int
1152evaluate_nfa (struct GNUNET_REGEX_Automaton *a, const char *string)
1153{
1154 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Evaluating '%s' using NFA\n", string);
1155
1156 return GNUNET_YES;
1157}
1158
1159
1160/**
1161 * Evaluates the given 'string' against the given compiled regex
1162 *
1163 * @param a automaton
1164 * @param string string to check
1165 *
1166 * @return GNUNET_YES if 'a' matches 'string', GNUNET_NO otherwise
1167 */
1168int
1169GNUNET_REGEX_eval (struct GNUNET_REGEX_Automaton *a, const char *string)
1170{
1171 int eval;
1172
1173 switch (a->type)
1174 {
1175 case DFA:
1176 eval = evaluate_dfa (a, string);
1177 break;
1178 case NFA:
1179 eval = evaluate_nfa (a, string);
1180 break;
1181 default:
1182 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
1183 "Evaluating regex failed, automaton has no type!\n");
1184 eval = GNUNET_SYSERR;
1185 break;
1186 }
1187
1188 return eval;
1189}