aboutsummaryrefslogtreecommitdiff
path: root/src/regex/regex.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/regex/regex.c')
-rw-r--r--src/regex/regex.c639
1 files changed, 639 insertions, 0 deletions
diff --git a/src/regex/regex.c b/src/regex/regex.c
new file mode 100644
index 000000000..b9a104fae
--- /dev/null
+++ b/src/regex/regex.c
@@ -0,0 +1,639 @@
1/*
2 This file is part of GNUnet
3 (C) 2012 Christian Grothoff (and other contributing authors)
4
5 GNUnet is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 3, or (at your
8 option) any later version.
9
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with GNUnet; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
19*/
20/**
21 * @file src/regex/regex.c
22 * @brief library to create automatons from regular expressions
23 * @author Maximilian Szengel
24 */
25#include "platform.h"
26#include "gnunet_regex_lib.h"
27#include "regex.h"
28
29struct Stack
30{
31 void *data;
32 struct Stack *next;
33};
34
35static struct Stack *nfa_stack = NULL;
36
37void
38push (void *val, struct Stack **stack)
39{
40 struct Stack *new = GNUNET_malloc (sizeof (struct Stack *));
41
42 if (NULL == new)
43 {
44 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not push to stack\n");
45 return;
46 }
47 new->data = val;
48 new->next = *stack;
49 *stack = new;
50}
51
52int
53empty (struct Stack **stack)
54{
55 return (NULL == *stack || NULL == stack);
56}
57
58void *
59pop (struct Stack **stack)
60{
61 struct Stack *top;
62 void *val;
63
64 if (empty (stack))
65 return NULL;
66
67 top = *stack;
68 val = top->data;
69 *stack = top->next;
70 GNUNET_free (top);
71 return val;
72}
73
74void *
75top (struct Stack **stack)
76{
77 if (empty (stack))
78 return NULL;
79
80 return (*stack)->data;
81}
82
83struct GNUNET_REGEX_Nfa
84{
85 struct State *start;
86 struct State *end;
87
88 unsigned int statecnt;
89 struct State **states;
90};
91
92struct State
93{
94 unsigned int id;
95 int accepting;
96 unsigned int tcnt;
97 struct Transition *transitions;
98 int visited;
99};
100
101struct Transition
102{
103 unsigned int id;
104 char literal;
105 struct State *state;
106};
107
108static unsigned int state_id = 0;
109static unsigned int transition_id = 0;
110
111struct GNUNET_REGEX_Nfa *
112nfa_create (struct State *start, struct State *end)
113{
114 struct GNUNET_REGEX_Nfa *n;
115
116 n = GNUNET_malloc (sizeof (struct GNUNET_REGEX_Nfa));
117
118 if (NULL == start && NULL == end)
119 {
120 n->states = NULL;
121 n->statecnt = 0;
122 n->start = NULL;
123 n->end = NULL;
124
125 return n;
126 }
127
128 n->states = GNUNET_malloc ((sizeof (struct State *)) * 2);
129 n->states[0] = start;
130 n->states[1] = end;
131 n->statecnt = 2;
132
133 n->start = start;
134 n->end = end;
135
136 return n;
137}
138
139
140void
141nfa_add_states (struct GNUNET_REGEX_Nfa *n, struct State **states,
142 unsigned int count)
143{
144 unsigned int i;
145 unsigned int j;
146
147 i = n->statecnt;
148 GNUNET_array_grow (n->states, n->statecnt, n->statecnt + count);
149 for (j = 0; i < n->statecnt && j < count; i++, j++)
150 {
151 n->states[i] = states[j];
152 }
153}
154
155
156struct State *
157nfa_create_state (int accepting)
158{
159 struct State *s;
160
161 s = GNUNET_malloc (sizeof (struct State));
162 s->id = state_id++;
163 s->accepting = accepting;
164 s->tcnt = 0;
165 s->transitions = NULL;
166 s->visited = 0;
167
168 return s;
169}
170
171void
172nfa_add_transition (struct State *from_state, const char literal,
173 struct State *to_state)
174{
175 struct Transition t;
176
177 if (NULL == to_state)
178 {
179 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
180 "Could not create Transition. to_state was NULL.\n");
181 return;
182 }
183
184 t.id = transition_id++;
185 t.literal = literal;
186 t.state = to_state;
187
188 if (0 == from_state->tcnt)
189 from_state->transitions = NULL;
190
191 GNUNET_array_append (from_state->transitions, from_state->tcnt, t);
192}
193
194void
195mark_all_states (struct GNUNET_REGEX_Nfa *n, int visited)
196{
197 int i;
198
199 for (i = 0; i < n->statecnt; i++)
200 {
201 n->states[i]->visited = visited;
202 }
203}
204
205void
206print_states (struct GNUNET_REGEX_Nfa *n, char **out_str)
207{
208 struct State *s;
209 int i_s;
210 int i_t;
211 char *s_all;
212
213 mark_all_states (n, 0);
214
215 s_all = GNUNET_malloc (sizeof (char));
216 *s_all = '\0';
217
218 for (i_s = 0; i_s < n->statecnt; i_s++)
219 {
220 struct Transition *ctran;
221 char *s_acc = NULL;
222 char *s_tran = NULL;
223
224 s = n->states[i_s];
225
226 if (s->accepting)
227 {
228 GNUNET_asprintf (&s_acc, "s%i [shape=doublecircle];\n", s->id);
229
230 s_all = GNUNET_realloc (s_all, strlen (s_all) + strlen (s_acc) + 1);
231 strcat (s_all, s_acc);
232 GNUNET_free (s_acc);
233 }
234
235 ctran = s->transitions;
236 s->visited = 1;
237
238 for (i_t = 0; i_t < s->tcnt && NULL != s->transitions; i_t++)
239 {
240 if (NULL == ctran)
241 {
242 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "s->transitions was NULL\n");
243 }
244
245 if (NULL == ctran->state)
246 {
247 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
248 "Transition from State %i has has no state for transitioning\n",
249 s->id);
250 }
251
252 if (ctran->literal == 0)
253 {
254 GNUNET_asprintf (&s_tran, "s%i -> s%i [label = \"epsilon\"];\n", s->id,
255 ctran->state->id);
256 }
257 else
258 {
259 GNUNET_asprintf (&s_tran, "s%i -> s%i [label = \"%c\"];\n", s->id,
260 ctran->state->id, ctran->literal);
261 }
262
263 s_all = GNUNET_realloc (s_all, strlen (s_all) + strlen (s_tran) + 1);
264 strcat (s_all, s_tran);
265 GNUNET_free (s_tran);
266
267 ctran++;
268 }
269 }
270
271 *out_str = s_all;
272}
273
274void
275nfa_add_concatenation ()
276{
277 struct GNUNET_REGEX_Nfa *A;
278 struct GNUNET_REGEX_Nfa *B;
279 struct GNUNET_REGEX_Nfa *new;
280
281 B = pop (&nfa_stack);
282 A = pop (&nfa_stack);
283
284 if (NULL == A || NULL == B)
285 {
286 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
287 "nfa_add_concatenationenation failed, because there were not enough elements on the stack");
288 return;
289 }
290
291 nfa_add_transition (A->end, 0, B->start);
292 A->end->accepting = 0;
293 B->end->accepting = 1;
294
295 new = nfa_create (NULL, NULL);
296 nfa_add_states (new, A->states, A->statecnt);
297 nfa_add_states (new, B->states, B->statecnt);
298 new->start = A->start;
299 new->end = B->end;
300 GNUNET_free (A);
301 GNUNET_free (B);
302
303 push (new, &nfa_stack);
304}
305
306void
307nfa_add_star_op ()
308{
309 struct GNUNET_REGEX_Nfa *A;
310 struct GNUNET_REGEX_Nfa *new;
311 struct State *start;
312 struct State *end;
313
314 A = pop (&nfa_stack);
315
316 if (NULL == A)
317 {
318 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
319 "nfa_add_star_op failed, because there was no element on the stack");
320 return;
321 }
322
323 start = nfa_create_state (0);
324 end = nfa_create_state (1);
325
326 nfa_add_transition (start, 0, A->start);
327 nfa_add_transition (start, 0, end);
328 nfa_add_transition (A->end, 0, A->start);
329 nfa_add_transition (A->end, 0, end);
330
331 A->end->accepting = 0;
332 end->accepting = 1;
333
334 new = nfa_create (start, end);
335 nfa_add_states (new, A->states, A->statecnt);
336 GNUNET_free (A);
337
338 push (new, &nfa_stack);
339}
340
341void
342nfa_add_plus_op ()
343{
344 struct GNUNET_REGEX_Nfa *A;
345
346 A = pop (&nfa_stack);
347
348 if (NULL == A)
349 {
350 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
351 "nfa_add_plus_op failed, because there was no element on the stack");
352 return;
353 }
354
355 nfa_add_transition (A->end, 0, A->start);
356
357 push (A, &nfa_stack);
358}
359
360void
361nfa_add_alternation ()
362{
363 struct GNUNET_REGEX_Nfa *A;
364 struct GNUNET_REGEX_Nfa *B;
365 struct GNUNET_REGEX_Nfa *new;
366 struct State *start;
367 struct State *end;
368
369 B = pop (&nfa_stack);
370 A = pop (&nfa_stack);
371
372 if (NULL == A || NULL == B)
373 {
374 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
375 "alternation failed, because there were not enough elements on the stack");
376 return;
377 }
378
379 start = nfa_create_state (0);
380 end = nfa_create_state (1);
381 nfa_add_transition (start, 0, A->start);
382 nfa_add_transition (start, 0, B->start);
383
384 nfa_add_transition (A->end, 0, end);
385 nfa_add_transition (B->end, 0, end);
386
387 A->end->accepting = 0;
388 B->end->accepting = 0;
389 end->accepting = 1;
390
391 new = nfa_create (start, end);
392 nfa_add_states (new, A->states, A->statecnt);
393 nfa_add_states (new, B->states, B->statecnt);
394 GNUNET_free (A);
395 GNUNET_free (B);
396
397 push (new, &nfa_stack);
398}
399
400void
401nfa_add_literal (const char lit)
402{
403 struct GNUNET_REGEX_Nfa *n;
404 struct State *start;
405 struct State *end;
406
407 start = nfa_create_state (0);
408 end = nfa_create_state (1);
409 nfa_add_transition (start, lit, end);
410 n = nfa_create (start, end);
411 push (n, &nfa_stack);
412}
413
414struct GNUNET_REGEX_Nfa *
415GNUNET_REGEX_construct_nfa (const char *regex, size_t len)
416{
417 struct GNUNET_REGEX_Nfa *nfa;
418 unsigned int count;
419 unsigned int altcount;
420 unsigned int atomcount;
421 unsigned int pcount;
422 struct p_stage
423 {
424 int altcount;
425 int atomcount;
426 };
427 struct p_stage *p;
428
429 p = NULL;
430
431 altcount = 0;
432 atomcount = 0;
433 pcount = 0;
434
435 for (count = 0; count < len && *regex; count++, regex++)
436 {
437
438 switch (*regex)
439 {
440 case '(':
441 if (atomcount > 1)
442 {
443 --atomcount;
444 nfa_add_concatenation ();
445 }
446 GNUNET_array_grow (p, pcount, pcount + 1);
447 p[pcount - 1].altcount = altcount;
448 p[pcount - 1].atomcount = atomcount;
449 altcount = 0;
450 atomcount = 0;
451 break;
452 case '|':
453 if (0 == atomcount)
454 goto error;
455 while (--atomcount > 0)
456 nfa_add_concatenation ();
457 altcount++;
458 break;
459 case ')':
460 if (0 == pcount)
461 goto error;
462 if (atomcount == 0)
463 goto error;
464 while (--atomcount > 0)
465 nfa_add_concatenation ();
466 for (; altcount > 0; altcount--)
467 nfa_add_alternation ();
468 pcount--;
469 altcount = p[pcount].altcount;
470 atomcount = p[pcount].atomcount;
471 atomcount++;
472 break;
473 case '*':
474 if (atomcount == 0)
475 goto error;
476 nfa_add_star_op ();
477 break;
478 case '+':
479 if (atomcount == 0)
480 goto error;
481 nfa_add_plus_op ();
482 break;
483 case 92: /* escape: \ */
484 regex++;
485 count++;
486 default:
487 if (atomcount > 1)
488 {
489 --atomcount;
490 nfa_add_concatenation ();
491 }
492 nfa_add_literal (*regex);
493 atomcount++;
494 break;
495 }
496 }
497 if (0 != pcount)
498 goto error;
499 while (--atomcount > 0)
500 nfa_add_concatenation ();
501 for (; altcount > 0; altcount--)
502 nfa_add_alternation ();
503
504 if (NULL != p)
505 GNUNET_free (p);
506
507 nfa = pop (&nfa_stack);
508
509 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
510 "Created NFA with %i States and a total of %i Transitions\n",
511 state_id, transition_id);
512
513 return nfa;
514
515error:
516 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not parse regex\n");
517 GNUNET_free (p);
518 while (!empty (&nfa_stack))
519 GNUNET_REGEX_destroy_nfa (pop (&nfa_stack));
520 return NULL;
521}
522
523void
524GNUNET_REGEX_destroy_nfa (struct GNUNET_REGEX_Nfa *n)
525{
526 int i;
527
528 for (i = 0; i < n->statecnt; i++)
529 {
530 GNUNET_free (n->states[i]);
531 }
532}
533
534void
535GNUNET_REGEX_save_nfa_graph (struct GNUNET_REGEX_Nfa *n, const char *filename)
536{
537 struct State *s;
538 char *start;
539 char *end;
540 char *states;
541 FILE *p;
542 int i_s;
543 int i_t;
544
545 if (NULL == n)
546 {
547 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not print NFA, was NULL!");
548 return;
549 }
550
551 mark_all_states (n, 0);
552
553 states = GNUNET_malloc (sizeof (char));
554 *states = '\0';
555
556 for (i_s = 0; i_s < n->statecnt; i_s++)
557 {
558 struct Transition *ctran;
559 char *s_acc = NULL;
560 char *s_tran = NULL;
561
562 s = n->states[i_s];
563
564 if (s->accepting)
565 {
566 GNUNET_asprintf (&s_acc, "s%i [shape=doublecircle];\n", s->id);
567
568 states = GNUNET_realloc (states, strlen (states) + strlen (s_acc) + 1);
569 strcat (states, s_acc);
570 GNUNET_free (s_acc);
571 }
572
573 ctran = s->transitions;
574 s->visited = 1;
575
576 for (i_t = 0; i_t < s->tcnt && NULL != s->transitions; i_t++)
577 {
578 if (NULL == ctran)
579 {
580 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "s->transitions was NULL\n");
581 }
582
583 if (NULL == ctran->state)
584 {
585 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
586 "Transition from State %i has has no state for transitioning\n",
587 s->id);
588 }
589
590 if (ctran->literal == 0)
591 {
592 GNUNET_asprintf (&s_tran, "s%i -> s%i [label = \"epsilon\"];\n", s->id,
593 ctran->state->id);
594 }
595 else
596 {
597 GNUNET_asprintf (&s_tran, "s%i -> s%i [label = \"%c\"];\n", s->id,
598 ctran->state->id, ctran->literal);
599 }
600
601 states = GNUNET_realloc (states, strlen (states) + strlen (s_tran) + 1);
602 strcat (states, s_tran);
603 GNUNET_free (s_tran);
604
605 ctran++;
606 }
607 }
608
609 if (NULL == states)
610 {
611 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not print NFA");
612 return;
613 }
614
615 if (NULL == filename || strlen (filename) < 1)
616 {
617 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "No Filename given!");
618 GNUNET_free (states);
619 return;
620 }
621
622 p = fopen (filename, "w");
623 if (p == NULL)
624 {
625 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not open file for writing: %s",
626 filename);
627 GNUNET_free (states);
628 return;
629 }
630
631 start = "digraph G {\nrankdir=LR\n";
632 end = "\n}\n";
633 fwrite (start, strlen (start), 1, p);
634 fwrite (states, strlen (states), 1, p);
635 fwrite (end, strlen (end), 1, p);
636 fclose (p);
637
638 GNUNET_free (states);
639}