aboutsummaryrefslogtreecommitdiff
path: root/src/regex/regex_internal.c
diff options
context:
space:
mode:
authorChristian Grothoff <christian@grothoff.org>2013-06-20 08:55:56 +0000
committerChristian Grothoff <christian@grothoff.org>2013-06-20 08:55:56 +0000
commit5f7a6c8f6816a826a2dd93eadc5039733f7db606 (patch)
tree3c22c8ff3436cc3140dfb1b24813ed4362330879 /src/regex/regex_internal.c
parent21a2b4f95b4488645ba5a6254fcb8919c4915f73 (diff)
downloadgnunet-5f7a6c8f6816a826a2dd93eadc5039733f7db606.tar.gz
gnunet-5f7a6c8f6816a826a2dd93eadc5039733f7db606.zip
moving functions for testing and evaluation and experiments to the test library, minimizing the internal library, renaming files according to which library they belong to
Diffstat (limited to 'src/regex/regex_internal.c')
-rw-r--r--src/regex/regex_internal.c3485
1 files changed, 3485 insertions, 0 deletions
diff --git a/src/regex/regex_internal.c b/src/regex/regex_internal.c
new file mode 100644
index 000000000..e66835134
--- /dev/null
+++ b/src/regex/regex_internal.c
@@ -0,0 +1,3485 @@
1/*
2 This file is part of GNUnet
3 (C) 2012 Christian Grothoff (and other contributing authors)
4
5 GNUnet is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 3, or (at your
8 option) any later version.
9
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with GNUnet; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
19*/
20/**
21 * @file src/regex/regex_internal.c
22 * @brief library to create Deterministic Finite Automatons (DFAs) from regular
23 * expressions (regexes).
24 * @author Maximilian Szengel
25 */
26#include "platform.h"
27#include "gnunet_util_lib.h"
28#include "gnunet_regex_service.h"
29#include "regex_internal_lib.h"
30#include "regex_internal.h"
31
32
33/**
34 * Set this to GNUNET_YES to enable state naming. Used to debug NFA->DFA
35 * creation. Disabled by default for better performance.
36 */
37#define REGEX_DEBUG_DFA GNUNET_NO
38
39/**
40 * Set of states using MDLL API.
41 */
42struct REGEX_ITERNAL_StateSet_MDLL
43{
44 /**
45 * MDLL of states.
46 */
47 struct REGEX_ITERNAL_State *head;
48
49 /**
50 * MDLL of states.
51 */
52 struct REGEX_ITERNAL_State *tail;
53
54 /**
55 * Length of the MDLL.
56 */
57 unsigned int len;
58};
59
60
61/**
62 * Append state to the given StateSet '
63 *
64 * @param set set to be modified
65 * @param state state to be appended
66 */
67static void
68state_set_append (struct REGEX_ITERNAL_StateSet *set,
69 struct REGEX_ITERNAL_State *state)
70{
71 if (set->off == set->size)
72 GNUNET_array_grow (set->states, set->size, set->size * 2 + 4);
73 set->states[set->off++] = state;
74}
75
76
77/**
78 * Compare two strings for equality. If either is NULL they are not equal.
79 *
80 * @param str1 first string for comparison.
81 * @param str2 second string for comparison.
82 *
83 * @return 0 if the strings are the same or both NULL, 1 or -1 if not.
84 */
85static int
86nullstrcmp (const char *str1, const char *str2)
87{
88 if ((NULL == str1) != (NULL == str2))
89 return -1;
90 if ((NULL == str1) && (NULL == str2))
91 return 0;
92
93 return strcmp (str1, str2);
94}
95
96
97/**
98 * Adds a transition from one state to another on 'label'. Does not add
99 * duplicate states.
100 *
101 * @param ctx context
102 * @param from_state starting state for the transition
103 * @param label transition label
104 * @param to_state state to where the transition should point to
105 */
106static void
107state_add_transition (struct REGEX_ITERNAL_Context *ctx,
108 struct REGEX_ITERNAL_State *from_state, const char *label,
109 struct REGEX_ITERNAL_State *to_state)
110{
111 struct REGEX_ITERNAL_Transition *t;
112 struct REGEX_ITERNAL_Transition *oth;
113
114 if (NULL == from_state)
115 {
116 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not create Transition.\n");
117 return;
118 }
119
120 /* Do not add duplicate state transitions */
121 for (t = from_state->transitions_head; NULL != t; t = t->next)
122 {
123 if (t->to_state == to_state && 0 == nullstrcmp (t->label, label) &&
124 t->from_state == from_state)
125 return;
126 }
127
128 /* sort transitions by label */
129 for (oth = from_state->transitions_head; NULL != oth; oth = oth->next)
130 {
131 if (0 < nullstrcmp (oth->label, label))
132 break;
133 }
134
135 t = GNUNET_malloc (sizeof (struct REGEX_ITERNAL_Transition));
136 if (NULL != ctx)
137 t->id = ctx->transition_id++;
138 if (NULL != label)
139 t->label = GNUNET_strdup (label);
140 else
141 t->label = NULL;
142 t->to_state = to_state;
143 t->from_state = from_state;
144
145 /* Add outgoing transition to 'from_state' */
146 from_state->transition_count++;
147 GNUNET_CONTAINER_DLL_insert_before (from_state->transitions_head,
148 from_state->transitions_tail, oth, t);
149}
150
151
152/**
153 * Remove a 'transition' from 'state'.
154 *
155 * @param state state from which the to-be-removed transition originates.
156 * @param transition transition that should be removed from state 'state'.
157 */
158static void
159state_remove_transition (struct REGEX_ITERNAL_State *state,
160 struct REGEX_ITERNAL_Transition *transition)
161{
162 if (NULL == state || NULL == transition)
163 return;
164
165 if (transition->from_state != state)
166 return;
167
168 GNUNET_free_non_null (transition->label);
169
170 state->transition_count--;
171 GNUNET_CONTAINER_DLL_remove (state->transitions_head, state->transitions_tail,
172 transition);
173
174 GNUNET_free (transition);
175}
176
177
178/**
179 * Compare two states. Used for sorting.
180 *
181 * @param a first state
182 * @param b second state
183 *
184 * @return an integer less than, equal to, or greater than zero
185 * if the first argument is considered to be respectively
186 * less than, equal to, or greater than the second.
187 */
188static int
189state_compare (const void *a, const void *b)
190{
191 struct REGEX_ITERNAL_State **s1 = (struct REGEX_ITERNAL_State **) a;
192 struct REGEX_ITERNAL_State **s2 = (struct REGEX_ITERNAL_State **) b;
193
194 return (*s1)->id - (*s2)->id;
195}
196
197
198/**
199 * Get all edges leaving state 's'.
200 *
201 * @param s state.
202 * @param edges all edges leaving 's', expected to be allocated and have enough
203 * space for s->transitions_count elements.
204 *
205 * @return number of edges.
206 */
207static unsigned int
208state_get_edges (struct REGEX_ITERNAL_State *s, struct REGEX_ITERNAL_Edge *edges)
209{
210 struct REGEX_ITERNAL_Transition *t;
211 unsigned int count;
212
213 if (NULL == s)
214 return 0;
215
216 count = 0;
217
218 for (t = s->transitions_head; NULL != t; t = t->next)
219 {
220 if (NULL != t->to_state)
221 {
222 edges[count].label = t->label;
223 edges[count].destination = t->to_state->hash;
224 count++;
225 }
226 }
227 return count;
228}
229
230
231/**
232 * Compare to state sets by comparing the id's of the states that are contained
233 * in each set. Both sets are expected to be sorted by id!
234 *
235 * @param sset1 first state set
236 * @param sset2 second state set
237 * @return 0 if the sets are equal, otherwise non-zero
238 */
239static int
240state_set_compare (struct REGEX_ITERNAL_StateSet *sset1,
241 struct REGEX_ITERNAL_StateSet *sset2)
242{
243 int result;
244 unsigned int i;
245
246 if (NULL == sset1 || NULL == sset2)
247 return 1;
248
249 result = sset1->off - sset2->off;
250 if (result < 0)
251 return -1;
252 if (result > 0)
253 return 1;
254 for (i = 0; i < sset1->off; i++)
255 if (0 != (result = state_compare (&sset1->states[i], &sset2->states[i])))
256 break;
257 return result;
258}
259
260
261/**
262 * Clears the given StateSet 'set'
263 *
264 * @param set set to be cleared
265 */
266static void
267state_set_clear (struct REGEX_ITERNAL_StateSet *set)
268{
269 GNUNET_array_grow (set->states, set->size, 0);
270 set->off = 0;
271}
272
273
274/**
275 * Clears an automaton fragment. Does not destroy the states inside the
276 * automaton.
277 *
278 * @param a automaton to be cleared
279 */
280static void
281automaton_fragment_clear (struct REGEX_ITERNAL_Automaton *a)
282{
283 if (NULL == a)
284 return;
285
286 a->start = NULL;
287 a->end = NULL;
288 a->states_head = NULL;
289 a->states_tail = NULL;
290 a->state_count = 0;
291 GNUNET_free (a);
292}
293
294
295/**
296 * Frees the memory used by State 's'
297 *
298 * @param s state that should be destroyed
299 */
300static void
301automaton_destroy_state (struct REGEX_ITERNAL_State *s)
302{
303 struct REGEX_ITERNAL_Transition *t;
304 struct REGEX_ITERNAL_Transition *next_t;
305
306 if (NULL == s)
307 return;
308
309 GNUNET_free_non_null (s->name);
310 GNUNET_free_non_null (s->proof);
311 state_set_clear (&s->nfa_set);
312 for (t = s->transitions_head; NULL != t; t = next_t)
313 {
314 next_t = t->next;
315 state_remove_transition (s, t);
316 }
317
318 GNUNET_free (s);
319}
320
321
322/**
323 * Remove a state from the given automaton 'a'. Always use this function when
324 * altering the states of an automaton. Will also remove all transitions leading
325 * to this state, before destroying it.
326 *
327 * @param a automaton
328 * @param s state to remove
329 */
330static void
331automaton_remove_state (struct REGEX_ITERNAL_Automaton *a,
332 struct REGEX_ITERNAL_State *s)
333{
334 struct REGEX_ITERNAL_State *s_check;
335 struct REGEX_ITERNAL_Transition *t_check;
336 struct REGEX_ITERNAL_Transition *t_check_next;
337
338 if (NULL == a || NULL == s)
339 return;
340
341 /* remove all transitions leading to this state */
342 for (s_check = a->states_head; NULL != s_check; s_check = s_check->next)
343 {
344 for (t_check = s_check->transitions_head; NULL != t_check;
345 t_check = t_check_next)
346 {
347 t_check_next = t_check->next;
348 if (t_check->to_state == s)
349 state_remove_transition (s_check, t_check);
350 }
351 }
352
353 /* remove state */
354 GNUNET_CONTAINER_DLL_remove (a->states_head, a->states_tail, s);
355 a->state_count--;
356
357 automaton_destroy_state (s);
358}
359
360
361/**
362 * Merge two states into one. Will merge 's1' and 's2' into 's1' and destroy
363 * 's2'. 's1' will contain all (non-duplicate) outgoing transitions of 's2'.
364 *
365 * @param ctx context
366 * @param a automaton
367 * @param s1 first state
368 * @param s2 second state, will be destroyed
369 */
370static void
371automaton_merge_states (struct REGEX_ITERNAL_Context *ctx,
372 struct REGEX_ITERNAL_Automaton *a,
373 struct REGEX_ITERNAL_State *s1,
374 struct REGEX_ITERNAL_State *s2)
375{
376 struct REGEX_ITERNAL_State *s_check;
377 struct REGEX_ITERNAL_Transition *t_check;
378 struct REGEX_ITERNAL_Transition *t;
379 struct REGEX_ITERNAL_Transition *t_next;
380 int is_dup;
381
382 if (s1 == s2)
383 return;
384
385 /* 1. Make all transitions pointing to s2 point to s1, unless this transition
386 * does not already exists, if it already exists remove transition. */
387 for (s_check = a->states_head; NULL != s_check; s_check = s_check->next)
388 {
389 for (t_check = s_check->transitions_head; NULL != t_check; t_check = t_next)
390 {
391 t_next = t_check->next;
392
393 if (s2 == t_check->to_state)
394 {
395 is_dup = GNUNET_NO;
396 for (t = t_check->from_state->transitions_head; NULL != t; t = t->next)
397 {
398 if (t->to_state == s1 && 0 == strcmp (t_check->label, t->label))
399 is_dup = GNUNET_YES;
400 }
401 if (GNUNET_NO == is_dup)
402 t_check->to_state = s1;
403 else
404 state_remove_transition (t_check->from_state, t_check);
405 }
406 }
407 }
408
409 /* 2. Add all transitions from s2 to sX to s1 */
410 for (t_check = s2->transitions_head; NULL != t_check; t_check = t_check->next)
411 {
412 if (t_check->to_state != s1)
413 state_add_transition (ctx, s1, t_check->label, t_check->to_state);
414 }
415
416 /* 3. Rename s1 to {s1,s2} */
417#if REGEX_DEBUG_DFA
418 char *new_name;
419
420 new_name = s1->name;
421 GNUNET_asprintf (&s1->name, "{%s,%s}", new_name, s2->name);
422 GNUNET_free (new_name);
423#endif
424
425 /* remove state */
426 GNUNET_CONTAINER_DLL_remove (a->states_head, a->states_tail, s2);
427 a->state_count--;
428 automaton_destroy_state (s2);
429}
430
431
432/**
433 * Add a state to the automaton 'a', always use this function to alter the
434 * states DLL of the automaton.
435 *
436 * @param a automaton to add the state to
437 * @param s state that should be added
438 */
439static void
440automaton_add_state (struct REGEX_ITERNAL_Automaton *a,
441 struct REGEX_ITERNAL_State *s)
442{
443 GNUNET_CONTAINER_DLL_insert (a->states_head, a->states_tail, s);
444 a->state_count++;
445}
446
447
448/**
449 * Depth-first traversal (DFS) of all states that are reachable from state
450 * 's'. Performs 'action' on each visited state.
451 *
452 * @param s start state.
453 * @param marks an array of size a->state_count to remember which state was
454 * already visited.
455 * @param count current count of the state.
456 * @param check function that is checked before advancing on each transition
457 * in the DFS.
458 * @param check_cls closure for check.
459 * @param action action to be performed on each state.
460 * @param action_cls closure for action.
461 */
462static void
463automaton_state_traverse (struct REGEX_ITERNAL_State *s, int *marks,
464 unsigned int *count,
465 REGEX_ITERNAL_traverse_check check, void *check_cls,
466 REGEX_ITERNAL_traverse_action action, void *action_cls)
467{
468 struct REGEX_ITERNAL_Transition *t;
469
470 if (GNUNET_YES == marks[s->traversal_id])
471 return;
472
473 marks[s->traversal_id] = GNUNET_YES;
474
475 if (NULL != action)
476 action (action_cls, *count, s);
477
478 (*count)++;
479
480 for (t = s->transitions_head; NULL != t; t = t->next)
481 {
482 if (NULL == check ||
483 (NULL != check && GNUNET_YES == check (check_cls, s, t)))
484 {
485 automaton_state_traverse (t->to_state, marks, count, check, check_cls,
486 action, action_cls);
487 }
488 }
489}
490
491
492/**
493 * Traverses the given automaton using depth-first-search (DFS) from it's start
494 * state, visiting all reachable states and calling 'action' on each one of
495 * them.
496 *
497 * @param a automaton to be traversed.
498 * @param start start state, pass a->start or NULL to traverse the whole automaton.
499 * @param check function that is checked before advancing on each transition
500 * in the DFS.
501 * @param check_cls closure for check.
502 * @param action action to be performed on each state.
503 * @param action_cls closure for action
504 */
505void
506REGEX_ITERNAL_automaton_traverse (const struct REGEX_ITERNAL_Automaton *a,
507 struct REGEX_ITERNAL_State *start,
508 REGEX_ITERNAL_traverse_check check,
509 void *check_cls,
510 REGEX_ITERNAL_traverse_action action,
511 void *action_cls)
512{
513 unsigned int count;
514 struct REGEX_ITERNAL_State *s;
515
516 if (NULL == a || 0 == a->state_count)
517 return;
518
519 int marks[a->state_count];
520
521 for (count = 0, s = a->states_head; NULL != s && count < a->state_count;
522 s = s->next, count++)
523 {
524 s->traversal_id = count;
525 marks[s->traversal_id] = GNUNET_NO;
526 }
527
528 count = 0;
529
530 if (NULL == start)
531 s = a->start;
532 else
533 s = start;
534
535 automaton_state_traverse (s, marks, &count, check, check_cls, action,
536 action_cls);
537}
538
539
540/**
541 * String container for faster string operations.
542 */
543struct StringBuffer
544{
545 /**
546 * Buffer holding the string (may start in the middle!);
547 * NOT 0-terminated!
548 */
549 char *sbuf;
550
551 /**
552 * Allocated buffer.
553 */
554 char *abuf;
555
556 /**
557 * Length of the string in the buffer.
558 */
559 size_t slen;
560
561 /**
562 * Number of bytes allocated for 'sbuf'
563 */
564 unsigned int blen;
565
566 /**
567 * Buffer currently represents "NULL" (not the empty string!)
568 */
569 int16_t null_flag;
570
571 /**
572 * If this entry is part of the last/current generation array,
573 * this flag is GNUNET_YES if the last and current generation are
574 * identical (and thus copying is unnecessary if the value didn't
575 * change). This is used in an optimization that improves
576 * performance by about 1% --- if we use int16_t here. With just
577 * "int" for both flags, performance drops (on my system) significantly,
578 * most likely due to increased cache misses.
579 */
580 int16_t synced;
581
582};
583
584
585/**
586 * Compare two strings for equality. If either is NULL they are not equal.
587 *
588 * @param s1 first string for comparison.
589 * @param s2 second string for comparison.
590 *
591 * @return 0 if the strings are the same or both NULL, 1 or -1 if not.
592 */
593static int
594sb_nullstrcmp (const struct StringBuffer *s1,
595 const struct StringBuffer *s2)
596{
597 if ( (GNUNET_YES == s1->null_flag) &&
598 (GNUNET_YES == s2->null_flag) )
599 return 0;
600 if ( (GNUNET_YES == s1->null_flag) ||
601 (GNUNET_YES == s2->null_flag) )
602 return -1;
603 if (s1->slen != s2->slen)
604 return -1;
605 return memcmp (s1->sbuf, s2->sbuf, s1->slen);
606}
607
608
609/**
610 * Compare two strings for equality.
611 *
612 * @param s1 first string for comparison.
613 * @param s2 second string for comparison.
614 *
615 * @return 0 if the strings are the same, 1 or -1 if not.
616 */
617static int
618sb_strcmp (const struct StringBuffer *s1,
619 const struct StringBuffer *s2)
620{
621 if (s1->slen != s2->slen)
622 return -1;
623 return memcmp (s1->sbuf, s2->sbuf, s1->slen);
624}
625
626
627/**
628 * Reallocate the buffer of 'ret' to fit 'nlen' characters;
629 * move the existing string to the beginning of the new buffer.
630 *
631 * @param ret current buffer, to be updated
632 * @param nlen target length for the buffer, must be at least ret->slen
633 */
634static void
635sb_realloc (struct StringBuffer *ret,
636 size_t nlen)
637{
638 char *old;
639
640 GNUNET_assert (nlen >= ret->slen);
641 old = ret->abuf;
642 ret->abuf = GNUNET_malloc (nlen);
643 ret->blen = nlen;
644 memcpy (ret->abuf,
645 ret->sbuf,
646 ret->slen);
647 ret->sbuf = ret->abuf;
648 GNUNET_free_non_null (old);
649}
650
651
652/**
653 * Append a string.
654 *
655 * @param ret where to write the result
656 * @param sarg string to append
657 */
658static void
659sb_append (struct StringBuffer *ret,
660 const struct StringBuffer *sarg)
661{
662 if (GNUNET_YES == ret->null_flag)
663 ret->slen = 0;
664 ret->null_flag = GNUNET_NO;
665 if (ret->blen < sarg->slen + ret->slen)
666 sb_realloc (ret, ret->blen + sarg->slen + 128);
667 memcpy (&ret->sbuf[ret->slen],
668 sarg->sbuf,
669 sarg->slen);
670 ret->slen += sarg->slen;
671}
672
673
674/**
675 * Append a C string.
676 *
677 * @param ret where to write the result
678 * @param cstr string to append
679 */
680static void
681sb_append_cstr (struct StringBuffer *ret,
682 const char *cstr)
683{
684 size_t cstr_len = strlen (cstr);
685
686 if (GNUNET_YES == ret->null_flag)
687 ret->slen = 0;
688 ret->null_flag = GNUNET_NO;
689 if (ret->blen < cstr_len + ret->slen)
690 sb_realloc (ret, ret->blen + cstr_len + 128);
691 memcpy (&ret->sbuf[ret->slen],
692 cstr,
693 cstr_len);
694 ret->slen += cstr_len;
695}
696
697
698/**
699 * Wrap a string buffer, that is, set ret to the format string
700 * which contains an "%s" which is to be replaced with the original
701 * content of 'ret'. Note that optimizing this function is not
702 * really worth it, it is rarely called.
703 *
704 * @param ret where to write the result and take the input for %.*s from
705 * @param format format string, fprintf-style, with exactly one "%.*s"
706 * @param extra_chars how long will the result be, in addition to 'sarg' length
707 */
708static void
709sb_wrap (struct StringBuffer *ret,
710 const char *format,
711 size_t extra_chars)
712{
713 char *temp;
714
715 if (GNUNET_YES == ret->null_flag)
716 ret->slen = 0;
717 ret->null_flag = GNUNET_NO;
718 temp = GNUNET_malloc (ret->slen + extra_chars + 1);
719 GNUNET_snprintf (temp,
720 ret->slen + extra_chars + 1,
721 format,
722 (int) ret->slen,
723 ret->sbuf);
724 GNUNET_free_non_null (ret->abuf);
725 ret->abuf = temp;
726 ret->sbuf = temp;
727 ret->blen = ret->slen + extra_chars + 1;
728 ret->slen = ret->slen + extra_chars;
729}
730
731
732/**
733 * Format a string buffer. Note that optimizing this function is not
734 * really worth it, it is rarely called.
735 *
736 * @param ret where to write the result
737 * @param format format string, fprintf-style, with exactly one "%.*s"
738 * @param extra_chars how long will the result be, in addition to 'sarg' length
739 * @param sarg string to print into the format
740 */
741static void
742sb_printf1 (struct StringBuffer *ret,
743 const char *format,
744 size_t extra_chars,
745 const struct StringBuffer *sarg)
746{
747 if (ret->blen < sarg->slen + extra_chars + 1)
748 sb_realloc (ret,
749 sarg->slen + extra_chars + 1);
750 ret->null_flag = GNUNET_NO;
751 ret->sbuf = ret->abuf;
752 ret->slen = sarg->slen + extra_chars;
753 GNUNET_snprintf (ret->sbuf,
754 ret->blen,
755 format,
756 (int) sarg->slen,
757 sarg->sbuf);
758}
759
760
761/**
762 * Format a string buffer.
763 *
764 * @param ret where to write the result
765 * @param format format string, fprintf-style, with exactly two "%.*s"
766 * @param extra_chars how long will the result be, in addition to 'sarg1/2' length
767 * @param sarg1 first string to print into the format
768 * @param sarg2 second string to print into the format
769 */
770static void
771sb_printf2 (struct StringBuffer *ret,
772 const char *format,
773 size_t extra_chars,
774 const struct StringBuffer *sarg1,
775 const struct StringBuffer *sarg2)
776{
777 if (ret->blen < sarg1->slen + sarg2->slen + extra_chars + 1)
778 sb_realloc (ret,
779 sarg1->slen + sarg2->slen + extra_chars + 1);
780 ret->null_flag = GNUNET_NO;
781 ret->slen = sarg1->slen + sarg2->slen + extra_chars;
782 ret->sbuf = ret->abuf;
783 GNUNET_snprintf (ret->sbuf,
784 ret->blen,
785 format,
786 (int) sarg1->slen,
787 sarg1->sbuf,
788 (int) sarg2->slen,
789 sarg2->sbuf);
790}
791
792
793/**
794 * Format a string buffer. Note that optimizing this function is not
795 * really worth it, it is rarely called.
796 *
797 * @param ret where to write the result
798 * @param format format string, fprintf-style, with exactly three "%.*s"
799 * @param extra_chars how long will the result be, in addition to 'sarg1/2/3' length
800 * @param sarg1 first string to print into the format
801 * @param sarg2 second string to print into the format
802 * @param sarg3 third string to print into the format
803 */
804static void
805sb_printf3 (struct StringBuffer *ret,
806 const char *format,
807 size_t extra_chars,
808 const struct StringBuffer *sarg1,
809 const struct StringBuffer *sarg2,
810 const struct StringBuffer *sarg3)
811{
812 if (ret->blen < sarg1->slen + sarg2->slen + sarg3->slen + extra_chars + 1)
813 sb_realloc (ret,
814 sarg1->slen + sarg2->slen + sarg3->slen + extra_chars + 1);
815 ret->null_flag = GNUNET_NO;
816 ret->slen = sarg1->slen + sarg2->slen + sarg3->slen + extra_chars;
817 ret->sbuf = ret->abuf;
818 GNUNET_snprintf (ret->sbuf,
819 ret->blen,
820 format,
821 (int) sarg1->slen,
822 sarg1->sbuf,
823 (int) sarg2->slen,
824 sarg2->sbuf,
825 (int) sarg3->slen,
826 sarg3->sbuf);
827}
828
829
830/**
831 * Free resources of the given string buffer.
832 *
833 * @param sb buffer to free (actual pointer is not freed, as they
834 * should not be individually allocated)
835 */
836static void
837sb_free (struct StringBuffer *sb)
838{
839 GNUNET_array_grow (sb->abuf,
840 sb->blen,
841 0);
842 sb->slen = 0;
843 sb->sbuf = NULL;
844 sb->null_flag= GNUNET_YES;
845}
846
847
848/**
849 * Copy the given string buffer from 'in' to 'out'.
850 *
851 * @param in input string
852 * @param out output string
853 */
854static void
855sb_strdup (struct StringBuffer *out,
856 const struct StringBuffer *in)
857
858{
859 out->null_flag = in->null_flag;
860 if (GNUNET_YES == out->null_flag)
861 return;
862 if (out->blen < in->slen)
863 {
864 GNUNET_array_grow (out->abuf,
865 out->blen,
866 in->slen);
867 }
868 out->sbuf = out->abuf;
869 out->slen = in->slen;
870 memcpy (out->sbuf, in->sbuf, out->slen);
871}
872
873
874/**
875 * Copy the given string buffer from 'in' to 'out'.
876 *
877 * @param cstr input string
878 * @param out output string
879 */
880static void
881sb_strdup_cstr (struct StringBuffer *out,
882 const char *cstr)
883{
884 if (NULL == cstr)
885 {
886 out->null_flag = GNUNET_YES;
887 return;
888 }
889 out->null_flag = GNUNET_NO;
890 out->slen = strlen (cstr);
891 if (out->blen < out->slen)
892 {
893 GNUNET_array_grow (out->abuf,
894 out->blen,
895 out->slen);
896 }
897 out->sbuf = out->abuf;
898 memcpy (out->sbuf, cstr, out->slen);
899}
900
901
902/**
903 * Check if the given string 'str' needs parentheses around it when
904 * using it to generate a regex.
905 *
906 * @param str string
907 *
908 * @return GNUNET_YES if parentheses are needed, GNUNET_NO otherwise
909 */
910static int
911needs_parentheses (const struct StringBuffer *str)
912{
913 size_t slen;
914 const char *op;
915 const char *cl;
916 const char *pos;
917 const char *end;
918 unsigned int cnt;
919
920 if ((GNUNET_YES == str->null_flag) || ((slen = str->slen) < 2))
921 return GNUNET_NO;
922 pos = str->sbuf;
923 if ('(' != pos[0])
924 return GNUNET_YES;
925 end = str->sbuf + slen;
926 cnt = 1;
927 pos++;
928 while (cnt > 0)
929 {
930 cl = memchr (pos, ')', end - pos);
931 if (NULL == cl)
932 {
933 GNUNET_break (0);
934 return GNUNET_YES;
935 }
936 /* while '(' before ')', count opening parens */
937 while ( (NULL != (op = memchr (pos, '(', end - pos))) &&
938 (op < cl) )
939 {
940 cnt++;
941 pos = op + 1;
942 }
943 /* got ')' first */
944 cnt--;
945 pos = cl + 1;
946 }
947 return (*pos == '\0') ? GNUNET_NO : GNUNET_YES;
948}
949
950
951/**
952 * Remove parentheses surrounding string 'str'.
953 * Example: "(a)" becomes "a", "(a|b)|(a|c)" stays the same.
954 * You need to GNUNET_free the returned string.
955 *
956 * @param str string, modified to contain a
957 * @return string without surrounding parentheses, string 'str' if no preceding
958 * epsilon could be found, NULL if 'str' was NULL
959 */
960static void
961remove_parentheses (struct StringBuffer *str)
962{
963 size_t slen;
964 const char *pos;
965 const char *end;
966 const char *sbuf;
967 const char *op;
968 const char *cp;
969 unsigned int cnt;
970
971 if (0)
972 return;
973 sbuf = str->sbuf;
974 if ( (GNUNET_YES == str->null_flag) ||
975 (1 >= (slen = str->slen)) ||
976 ('(' != str->sbuf[0]) ||
977 (')' != str->sbuf[slen - 1]) )
978 return;
979 cnt = 0;
980 pos = &sbuf[1];
981 end = &sbuf[slen - 1];
982 op = memchr (pos, '(', end - pos);
983 cp = memchr (pos, ')', end - pos);
984 while (NULL != cp)
985 {
986 while ( (NULL != op) &&
987 (op < cp) )
988 {
989 cnt++;
990 pos = op + 1;
991 op = memchr (pos, '(', end - pos);
992 }
993 while ( (NULL != cp) &&
994 ( (NULL == op) ||
995 (cp < op) ) )
996 {
997 if (0 == cnt)
998 return; /* can't strip parens */
999 cnt--;
1000 pos = cp + 1;
1001 cp = memchr (pos, ')', end - pos);
1002 }
1003 }
1004 if (0 != cnt)
1005 {
1006 GNUNET_break (0);
1007 return;
1008 }
1009 str->sbuf++;
1010 str->slen -= 2;
1011}
1012
1013
1014/**
1015 * Check if the string 'str' starts with an epsilon (empty string).
1016 * Example: "(|a)" is starting with an epsilon.
1017 *
1018 * @param str string to test
1019 *
1020 * @return 0 if str has no epsilon, 1 if str starts with '(|' and ends with ')'
1021 */
1022static int
1023has_epsilon (const struct StringBuffer *str)
1024{
1025 return
1026 (GNUNET_YES != str->null_flag) &&
1027 (0 < str->slen) &&
1028 ('(' == str->sbuf[0]) &&
1029 ('|' == str->sbuf[1]) &&
1030 (')' == str->sbuf[str->slen - 1]);
1031}
1032
1033
1034/**
1035 * Remove an epsilon from the string str. Where epsilon is an empty string
1036 * Example: str = "(|a|b|c)", result: "a|b|c"
1037 * The returned string needs to be freed.
1038 *
1039 * @param str original string
1040 * @param ret where to return string without preceding epsilon, string 'str' if no preceding
1041 * epsilon could be found, NULL if 'str' was NULL
1042 */
1043static void
1044remove_epsilon (const struct StringBuffer *str,
1045 struct StringBuffer *ret)
1046{
1047 if (GNUNET_YES == str->null_flag)
1048 {
1049 ret->null_flag = GNUNET_YES;
1050 return;
1051 }
1052 if ( (str->slen > 1) &&
1053 ('(' == str->sbuf[0]) &&
1054 ('|' == str->sbuf[1]) &&
1055 (')' == str->sbuf[str->slen - 1]) )
1056 {
1057 /* remove epsilon */
1058 if (ret->blen < str->slen - 3)
1059 {
1060 GNUNET_array_grow (ret->abuf,
1061 ret->blen,
1062 str->slen - 3);
1063 }
1064 ret->sbuf = ret->abuf;
1065 ret->slen = str->slen - 3;
1066 memcpy (ret->sbuf, &str->sbuf[2], ret->slen);
1067 return;
1068 }
1069 sb_strdup (ret, str);
1070}
1071
1072
1073/**
1074 * Compare n bytes of 'str1' and 'str2'
1075 *
1076 * @param str1 first string to compare
1077 * @param str2 second string for comparison
1078 * @param n number of bytes to compare
1079 *
1080 * @return -1 if any of the strings is NULL, 0 if equal, non 0 otherwise
1081 */
1082static int
1083sb_strncmp (const struct StringBuffer *str1,
1084 const struct StringBuffer *str2, size_t n)
1085{
1086 size_t max;
1087
1088 if ( (str1->slen != str2->slen) &&
1089 ( (str1->slen < n) ||
1090 (str2->slen < n) ) )
1091 return -1;
1092 max = GNUNET_MAX (str1->slen, str2->slen);
1093 if (max > n)
1094 max = n;
1095 return memcmp (str1->sbuf, str2->sbuf, max);
1096}
1097
1098
1099/**
1100 * Compare n bytes of 'str1' and 'str2'
1101 *
1102 * @param str1 first string to compare
1103 * @param str2 second C string for comparison
1104 * @param n number of bytes to compare (and length of str2)
1105 *
1106 * @return -1 if any of the strings is NULL, 0 if equal, non 0 otherwise
1107 */
1108static int
1109sb_strncmp_cstr (const struct StringBuffer *str1,
1110 const char *str2, size_t n)
1111{
1112 if (str1->slen < n)
1113 return -1;
1114 return memcmp (str1->sbuf, str2, n);
1115}
1116
1117
1118/**
1119 * Initialize string buffer for storing strings of up to n
1120 * characters.
1121 *
1122 * @param sb buffer to initialize
1123 * @param n desired target length
1124 */
1125static void
1126sb_init (struct StringBuffer *sb,
1127 size_t n)
1128{
1129 sb->null_flag = GNUNET_NO;
1130 sb->abuf = sb->sbuf = (0 == n) ? NULL : GNUNET_malloc (n);
1131 sb->blen = n;
1132 sb->slen = 0;
1133}
1134
1135
1136/**
1137 * Compare 'str1', starting from position 'k', with whole 'str2'
1138 *
1139 * @param str1 first string to compare, starting from position 'k'
1140 * @param str2 second string for comparison
1141 * @param k starting position in 'str1'
1142 *
1143 * @return -1 if any of the strings is NULL, 0 if equal, non 0 otherwise
1144 */
1145static int
1146sb_strkcmp (const struct StringBuffer *str1,
1147 const struct StringBuffer *str2, size_t k)
1148{
1149 if ( (GNUNET_YES == str1->null_flag) ||
1150 (GNUNET_YES == str2->null_flag) ||
1151 (k > str1->slen) ||
1152 (str1->slen - k != str2->slen) )
1153 return -1;
1154 return memcmp (&str1->sbuf[k], str2->sbuf, str2->slen);
1155}
1156
1157
1158/**
1159 * Helper function used as 'action' in 'REGEX_ITERNAL_automaton_traverse'
1160 * function to create the depth-first numbering of the states.
1161 *
1162 * @param cls states array.
1163 * @param count current state counter.
1164 * @param s current state.
1165 */
1166static void
1167number_states (void *cls, const unsigned int count,
1168 struct REGEX_ITERNAL_State *s)
1169{
1170 struct REGEX_ITERNAL_State **states = cls;
1171
1172 s->dfs_id = count;
1173 if (NULL != states)
1174 states[count] = s;
1175}
1176
1177
1178
1179#define PRIS(a) \
1180 ((GNUNET_YES == a.null_flag) ? 6 : (int) a.slen), \
1181 ((GNUNET_YES == a.null_flag) ? "(null)" : a.sbuf)
1182
1183
1184/**
1185 * Construct the regular expression given the inductive step,
1186 * $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^*
1187 * R^{(k-1)}_{kj}, and simplify the resulting expression saved in R_cur_ij.
1188 *
1189 * @param R_last_ij value of $R^{(k-1)_{ij}.
1190 * @param R_last_ik value of $R^{(k-1)_{ik}.
1191 * @param R_last_kk value of $R^{(k-1)_{kk}.
1192 * @param R_last_kj value of $R^{(k-1)_{kj}.
1193 * @param R_cur_ij result for this inductive step is saved in R_cur_ij, R_cur_ij
1194 * is expected to be NULL when called!
1195 * @param R_cur_l optimization -- kept between iterations to avoid realloc
1196 * @param R_cur_r optimization -- kept between iterations to avoid realloc
1197 */
1198static void
1199automaton_create_proofs_simplify (const struct StringBuffer *R_last_ij,
1200 const struct StringBuffer *R_last_ik,
1201 const struct StringBuffer *R_last_kk,
1202 const struct StringBuffer *R_last_kj,
1203 struct StringBuffer *R_cur_ij,
1204 struct StringBuffer *R_cur_l,
1205 struct StringBuffer *R_cur_r)
1206{
1207 struct StringBuffer R_temp_ij;
1208 struct StringBuffer R_temp_ik;
1209 struct StringBuffer R_temp_kj;
1210 struct StringBuffer R_temp_kk;
1211 int eps_check;
1212 int ij_ik_cmp;
1213 int ij_kj_cmp;
1214 int ik_kk_cmp;
1215 int kk_kj_cmp;
1216 int clean_ik_kk_cmp;
1217 int clean_kk_kj_cmp;
1218 size_t length;
1219 size_t length_l;
1220 size_t length_r;
1221
1222 /*
1223 * $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj}
1224 * R_last == R^{(k-1)}, R_cur == R^{(k)}
1225 * R_cur_ij = R_cur_l | R_cur_r
1226 * R_cur_l == R^{(k-1)}_{ij}
1227 * R_cur_r == R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj}
1228 */
1229
1230 if ( (GNUNET_YES == R_last_ij->null_flag) &&
1231 ( (GNUNET_YES == R_last_ik->null_flag) ||
1232 (GNUNET_YES == R_last_kj->null_flag)))
1233 {
1234 /* R^{(k)}_{ij} = N | N */
1235 R_cur_ij->null_flag = GNUNET_YES;
1236 R_cur_ij->synced = GNUNET_NO;
1237 return;
1238 }
1239
1240 if ( (GNUNET_YES == R_last_ik->null_flag) ||
1241 (GNUNET_YES == R_last_kj->null_flag) )
1242 {
1243 /* R^{(k)}_{ij} = R^{(k-1)}_{ij} | N */
1244 if (GNUNET_YES == R_last_ij->synced)
1245 {
1246 R_cur_ij->synced = GNUNET_YES;
1247 R_cur_ij->null_flag = GNUNET_NO;
1248 return;
1249 }
1250 R_cur_ij->synced = GNUNET_YES;
1251 sb_strdup (R_cur_ij, R_last_ij);
1252 return;
1253 }
1254 R_cur_ij->synced = GNUNET_NO;
1255
1256 /* $R^{(k)}_{ij} = N | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj} OR
1257 * $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj} */
1258
1259 R_cur_r->null_flag = GNUNET_YES;
1260 R_cur_r->slen = 0;
1261 R_cur_l->null_flag = GNUNET_YES;
1262 R_cur_l->slen = 0;
1263
1264 /* cache results from strcmp, we might need these many times */
1265 ij_kj_cmp = sb_nullstrcmp (R_last_ij, R_last_kj);
1266 ij_ik_cmp = sb_nullstrcmp (R_last_ij, R_last_ik);
1267 ik_kk_cmp = sb_nullstrcmp (R_last_ik, R_last_kk);
1268 kk_kj_cmp = sb_nullstrcmp (R_last_kk, R_last_kj);
1269
1270 /* Assign R_temp_(ik|kk|kj) to R_last[][] and remove epsilon as well
1271 * as parentheses, so we can better compare the contents */
1272
1273 memset (&R_temp_ij, 0, sizeof (struct StringBuffer));
1274 memset (&R_temp_ik, 0, sizeof (struct StringBuffer));
1275 memset (&R_temp_kk, 0, sizeof (struct StringBuffer));
1276 memset (&R_temp_kj, 0, sizeof (struct StringBuffer));
1277 remove_epsilon (R_last_ik, &R_temp_ik);
1278 remove_epsilon (R_last_kk, &R_temp_kk);
1279 remove_epsilon (R_last_kj, &R_temp_kj);
1280 remove_parentheses (&R_temp_ik);
1281 remove_parentheses (&R_temp_kk);
1282 remove_parentheses (&R_temp_kj);
1283 clean_ik_kk_cmp = sb_nullstrcmp (R_last_ik, &R_temp_kk);
1284 clean_kk_kj_cmp = sb_nullstrcmp (&R_temp_kk, R_last_kj);
1285
1286 /* construct R_cur_l (and, if necessary R_cur_r) */
1287 if (GNUNET_YES != R_last_ij->null_flag)
1288 {
1289 /* Assign R_temp_ij to R_last_ij and remove epsilon as well
1290 * as parentheses, so we can better compare the contents */
1291 remove_epsilon (R_last_ij, &R_temp_ij);
1292 remove_parentheses (&R_temp_ij);
1293
1294 if ( (0 == sb_strcmp (&R_temp_ij, &R_temp_ik)) &&
1295 (0 == sb_strcmp (&R_temp_ik, &R_temp_kk)) &&
1296 (0 == sb_strcmp (&R_temp_kk, &R_temp_kj)) )
1297 {
1298 if (0 == R_temp_ij.slen)
1299 {
1300 R_cur_r->null_flag = GNUNET_NO;
1301 }
1302 else if ((0 == sb_strncmp_cstr (R_last_ij, "(|", 2)) ||
1303 (0 == sb_strncmp_cstr (R_last_ik, "(|", 2) &&
1304 0 == sb_strncmp_cstr (R_last_kj, "(|", 2)))
1305 {
1306 /*
1307 * a|(e|a)a*(e|a) = a*
1308 * a|(e|a)(e|a)*(e|a) = a*
1309 * (e|a)|aa*a = a*
1310 * (e|a)|aa*(e|a) = a*
1311 * (e|a)|(e|a)a*a = a*
1312 * (e|a)|(e|a)a*(e|a) = a*
1313 * (e|a)|(e|a)(e|a)*(e|a) = a*
1314 */
1315 if (GNUNET_YES == needs_parentheses (&R_temp_ij))
1316 sb_printf1 (R_cur_r, "(%.*s)*", 3, &R_temp_ij);
1317 else
1318 sb_printf1 (R_cur_r, "%.*s*", 1, &R_temp_ij);
1319 }
1320 else
1321 {
1322 /*
1323 * a|aa*a = a+
1324 * a|(e|a)a*a = a+
1325 * a|aa*(e|a) = a+
1326 * a|(e|a)(e|a)*a = a+
1327 * a|a(e|a)*(e|a) = a+
1328 */
1329 if (GNUNET_YES == needs_parentheses (&R_temp_ij))
1330 sb_printf1 (R_cur_r, "(%.*s)+", 3, &R_temp_ij);
1331 else
1332 sb_printf1 (R_cur_r, "%.*s+", 1, &R_temp_ij);
1333 }
1334 }
1335 else if ( (0 == ij_ik_cmp) && (0 == clean_kk_kj_cmp) && (0 != clean_ik_kk_cmp) )
1336 {
1337 /* a|ab*b = ab* */
1338 if (0 == R_last_kk->slen)
1339 sb_strdup (R_cur_r, R_last_ij);
1340 else if (GNUNET_YES == needs_parentheses (&R_temp_kk))
1341 sb_printf2 (R_cur_r, "%.*s(%.*s)*", 3, R_last_ij, &R_temp_kk);
1342 else
1343 sb_printf2 (R_cur_r, "%.*s%.*s*", 1, R_last_ij, R_last_kk);
1344 R_cur_l->null_flag = GNUNET_YES;
1345 }
1346 else if ( (0 == ij_kj_cmp) && (0 == clean_ik_kk_cmp) && (0 != clean_kk_kj_cmp))
1347 {
1348 /* a|bb*a = b*a */
1349 if (R_last_kk->slen < 1)
1350 {
1351 sb_strdup (R_cur_r, R_last_kj);
1352 }
1353 else if (GNUNET_YES == needs_parentheses (&R_temp_kk))
1354 sb_printf2 (R_cur_r, "(%.*s)*%.*s", 3, &R_temp_kk, R_last_kj);
1355 else
1356 sb_printf2 (R_cur_r, "%.*s*%.*s", 1, &R_temp_kk, R_last_kj);
1357
1358 R_cur_l->null_flag = GNUNET_YES;
1359 }
1360 else if ( (0 == ij_ik_cmp) && (0 == kk_kj_cmp) && (! has_epsilon (R_last_ij)) &&
1361 has_epsilon (R_last_kk))
1362 {
1363 /* a|a(e|b)*(e|b) = a|ab* = a|a|ab|abb|abbb|... = ab* */
1364 if (needs_parentheses (&R_temp_kk))
1365 sb_printf2 (R_cur_r, "%.*s(%.*s)*", 3, R_last_ij, &R_temp_kk);
1366 else
1367 sb_printf2 (R_cur_r, "%.*s%.*s*", 1, R_last_ij, &R_temp_kk);
1368 R_cur_l->null_flag = GNUNET_YES;
1369 }
1370 else if ( (0 == ij_kj_cmp) && (0 == ik_kk_cmp) && (! has_epsilon (R_last_ij)) &&
1371 has_epsilon (R_last_kk))
1372 {
1373 /* a|(e|b)(e|b)*a = a|b*a = a|a|ba|bba|bbba|... = b*a */
1374 if (needs_parentheses (&R_temp_kk))
1375 sb_printf2 (R_cur_r, "(%.*s)*%.*s", 3, &R_temp_kk, R_last_ij);
1376 else
1377 sb_printf2 (R_cur_r, "%.*s*%.*s", 1, &R_temp_kk, R_last_ij);
1378 R_cur_l->null_flag = GNUNET_YES;
1379 }
1380 else
1381 {
1382 sb_strdup (R_cur_l, R_last_ij);
1383 remove_parentheses (R_cur_l);
1384 }
1385 }
1386 else
1387 {
1388 /* we have no left side */
1389 R_cur_l->null_flag = GNUNET_YES;
1390 }
1391
1392 /* construct R_cur_r, if not already constructed */
1393 if (GNUNET_YES == R_cur_r->null_flag)
1394 {
1395 length = R_temp_kk.slen - R_last_ik->slen;
1396
1397 /* a(ba)*bx = (ab)+x */
1398 if ( (length > 0) &&
1399 (GNUNET_YES != R_last_kk->null_flag) &&
1400 (0 < R_last_kk->slen) &&
1401 (GNUNET_YES != R_last_kj->null_flag) &&
1402 (0 < R_last_kj->slen) &&
1403 (GNUNET_YES != R_last_ik->null_flag) &&
1404 (0 < R_last_ik->slen) &&
1405 (0 == sb_strkcmp (&R_temp_kk, R_last_ik, length)) &&
1406 (0 == sb_strncmp (&R_temp_kk, R_last_kj, length)) )
1407 {
1408 struct StringBuffer temp_a;
1409 struct StringBuffer temp_b;
1410
1411 sb_init (&temp_a, length);
1412 sb_init (&temp_b, R_last_kj->slen - length);
1413
1414 length_l = length;
1415 temp_a.sbuf = temp_a.abuf;
1416 memcpy (temp_a.sbuf, R_last_kj->sbuf, length_l);
1417 temp_a.slen = length_l;
1418
1419 length_r = R_last_kj->slen - length;
1420 temp_b.sbuf = temp_b.abuf;
1421 memcpy (temp_b.sbuf, &R_last_kj->sbuf[length], length_r);
1422 temp_b.slen = length_r;
1423
1424 /* e|(ab)+ = (ab)* */
1425 if ( (GNUNET_YES != R_cur_l->null_flag) &&
1426 (0 == R_cur_l->slen) &&
1427 (0 == temp_b.slen) )
1428 {
1429 sb_printf2 (R_cur_r, "(%.*s%.*s)*", 3, R_last_ik, &temp_a);
1430 sb_free (R_cur_l);
1431 R_cur_l->null_flag = GNUNET_YES;
1432 }
1433 else
1434 {
1435 sb_printf3 (R_cur_r, "(%.*s%.*s)+%.*s", 3, R_last_ik, &temp_a, &temp_b);
1436 }
1437 sb_free (&temp_a);
1438 sb_free (&temp_b);
1439 }
1440 else if (0 == sb_strcmp (&R_temp_ik, &R_temp_kk) &&
1441 0 == sb_strcmp (&R_temp_kk, &R_temp_kj))
1442 {
1443 /*
1444 * (e|a)a*(e|a) = a*
1445 * (e|a)(e|a)*(e|a) = a*
1446 */
1447 if (has_epsilon (R_last_ik) && has_epsilon (R_last_kj))
1448 {
1449 if (needs_parentheses (&R_temp_kk))
1450 sb_printf1 (R_cur_r, "(%.*s)*", 3, &R_temp_kk);
1451 else
1452 sb_printf1 (R_cur_r, "%.*s*", 1, &R_temp_kk);
1453 }
1454 /* aa*a = a+a */
1455 else if ( (0 == clean_ik_kk_cmp) &&
1456 (0 == clean_kk_kj_cmp) &&
1457 (! has_epsilon (R_last_ik)) )
1458 {
1459 if (needs_parentheses (&R_temp_kk))
1460 sb_printf2 (R_cur_r, "(%.*s)+%.*s", 3, &R_temp_kk, &R_temp_kk);
1461 else
1462 sb_printf2 (R_cur_r, "%.*s+%.*s", 1, &R_temp_kk, &R_temp_kk);
1463 }
1464 /*
1465 * (e|a)a*a = a+
1466 * aa*(e|a) = a+
1467 * a(e|a)*(e|a) = a+
1468 * (e|a)a*a = a+
1469 */
1470 else
1471 {
1472 eps_check =
1473 (has_epsilon (R_last_ik) + has_epsilon (R_last_kk) +
1474 has_epsilon (R_last_kj));
1475
1476 if (1 == eps_check)
1477 {
1478 if (needs_parentheses (&R_temp_kk))
1479 sb_printf1 (R_cur_r, "(%.*s)+", 3, &R_temp_kk);
1480 else
1481 sb_printf1 (R_cur_r, "%.*s+", 1, &R_temp_kk);
1482 }
1483 }
1484 }
1485 /*
1486 * aa*b = a+b
1487 * (e|a)(e|a)*b = a*b
1488 */
1489 else if (0 == sb_strcmp (&R_temp_ik, &R_temp_kk))
1490 {
1491 if (has_epsilon (R_last_ik))
1492 {
1493 if (needs_parentheses (&R_temp_kk))
1494 sb_printf2 (R_cur_r, "(%.*s)*%.*s", 3, &R_temp_kk, R_last_kj);
1495 else
1496 sb_printf2 (R_cur_r, "%.*s*%.*s", 1, &R_temp_kk, R_last_kj);
1497 }
1498 else
1499 {
1500 if (needs_parentheses (&R_temp_kk))
1501 sb_printf2 (R_cur_r, "(%.*s)+%.*s", 3, &R_temp_kk, R_last_kj);
1502 else
1503 sb_printf2 (R_cur_r, "%.*s+%.*s", 1, &R_temp_kk, R_last_kj);
1504 }
1505 }
1506 /*
1507 * ba*a = ba+
1508 * b(e|a)*(e|a) = ba*
1509 */
1510 else if (0 == sb_strcmp (&R_temp_kk, &R_temp_kj))
1511 {
1512 if (has_epsilon (R_last_kj))
1513 {
1514 if (needs_parentheses (&R_temp_kk))
1515 sb_printf2 (R_cur_r, "%.*s(%.*s)*", 3, R_last_ik, &R_temp_kk);
1516 else
1517 sb_printf2 (R_cur_r, "%.*s%.*s*", 1, R_last_ik, &R_temp_kk);
1518 }
1519 else
1520 {
1521 if (needs_parentheses (&R_temp_kk))
1522 sb_printf2 (R_cur_r, "(%.*s)+%.*s", 3, R_last_ik, &R_temp_kk);
1523 else
1524 sb_printf2 (R_cur_r, "%.*s+%.*s", 1, R_last_ik, &R_temp_kk);
1525 }
1526 }
1527 else
1528 {
1529 if (0 < R_temp_kk.slen)
1530 {
1531 if (needs_parentheses (&R_temp_kk))
1532 {
1533 sb_printf3 (R_cur_r, "%.*s(%.*s)*%.*s", 3, R_last_ik, &R_temp_kk,
1534 R_last_kj);
1535 }
1536 else
1537 {
1538 sb_printf3 (R_cur_r, "%.*s%.*s*%.*s", 1, R_last_ik, &R_temp_kk,
1539 R_last_kj);
1540 }
1541 }
1542 else
1543 {
1544 sb_printf2 (R_cur_r, "%.*s%.*s", 0, R_last_ik, R_last_kj);
1545 }
1546 }
1547 }
1548 sb_free (&R_temp_ij);
1549 sb_free (&R_temp_ik);
1550 sb_free (&R_temp_kk);
1551 sb_free (&R_temp_kj);
1552
1553 if ( (GNUNET_YES == R_cur_l->null_flag) &&
1554 (GNUNET_YES == R_cur_r->null_flag) )
1555 {
1556 R_cur_ij->null_flag = GNUNET_YES;
1557 return;
1558 }
1559
1560 if ( (GNUNET_YES != R_cur_l->null_flag) &&
1561 (GNUNET_YES == R_cur_r->null_flag) )
1562 {
1563 struct StringBuffer tmp;
1564
1565 tmp = *R_cur_ij;
1566 *R_cur_ij = *R_cur_l;
1567 *R_cur_l = tmp;
1568 return;
1569 }
1570
1571 if ( (GNUNET_YES == R_cur_l->null_flag) &&
1572 (GNUNET_YES != R_cur_r->null_flag) )
1573 {
1574 struct StringBuffer tmp;
1575
1576 tmp = *R_cur_ij;
1577 *R_cur_ij = *R_cur_r;
1578 *R_cur_r = tmp;
1579 return;
1580 }
1581
1582 if (0 == sb_nullstrcmp (R_cur_l, R_cur_r))
1583 {
1584 struct StringBuffer tmp;
1585
1586 tmp = *R_cur_ij;
1587 *R_cur_ij = *R_cur_l;
1588 *R_cur_l = tmp;
1589 return;
1590 }
1591 sb_printf2 (R_cur_ij, "(%.*s|%.*s)", 3, R_cur_l, R_cur_r);
1592}
1593
1594
1595/**
1596 * Create proofs for all states in the given automaton. Implementation of the
1597 * algorithm descriped in chapter 3.2.1 of "Automata Theory, Languages, and
1598 * Computation 3rd Edition" by Hopcroft, Motwani and Ullman.
1599 *
1600 * Each state in the automaton gets assigned 'proof' and 'hash' (hash of the
1601 * proof) fields. The starting state will only have a valid proof/hash if it has
1602 * any incoming transitions.
1603 *
1604 * @param a automaton for which to assign proofs and hashes, must not be NULL
1605 */
1606static int
1607automaton_create_proofs (struct REGEX_ITERNAL_Automaton *a)
1608{
1609 unsigned int n = a->state_count;
1610 struct REGEX_ITERNAL_State *states[n];
1611 struct StringBuffer *R_last;
1612 struct StringBuffer *R_cur;
1613 struct StringBuffer R_cur_r;
1614 struct StringBuffer R_cur_l;
1615 struct StringBuffer *R_swap;
1616 struct REGEX_ITERNAL_Transition *t;
1617 struct StringBuffer complete_regex;
1618 unsigned int i;
1619 unsigned int j;
1620 unsigned int k;
1621
1622 R_last = GNUNET_malloc_large (sizeof (struct StringBuffer) * n * n);
1623 R_cur = GNUNET_malloc_large (sizeof (struct StringBuffer) * n * n);
1624 if ( (NULL == R_last) ||
1625 (NULL == R_cur) )
1626 {
1627 GNUNET_log_strerror (GNUNET_ERROR_TYPE_ERROR, "malloc");
1628 GNUNET_free_non_null (R_cur);
1629 GNUNET_free_non_null (R_last);
1630 return GNUNET_SYSERR;
1631 }
1632
1633 /* create depth-first numbering of the states, initializes 'state' */
1634 REGEX_ITERNAL_automaton_traverse (a, a->start, NULL, NULL, &number_states,
1635 states);
1636
1637 for (i = 0; i < n; i++)
1638 GNUNET_assert (NULL != states[i]);
1639 for (i = 0; i < n; i++)
1640 for (j = 0; j < n; j++)
1641 R_last[i *n + j].null_flag = GNUNET_YES;
1642
1643 /* Compute regular expressions of length "1" between each pair of states */
1644 for (i = 0; i < n; i++)
1645 {
1646 for (t = states[i]->transitions_head; NULL != t; t = t->next)
1647 {
1648 j = t->to_state->dfs_id;
1649 if (GNUNET_YES == R_last[i * n + j].null_flag)
1650 {
1651 sb_strdup_cstr (&R_last[i * n + j], t->label);
1652 }
1653 else
1654 {
1655 sb_append_cstr (&R_last[i * n + j], "|");
1656 sb_append_cstr (&R_last[i * n + j], t->label);
1657 }
1658 }
1659 /* add self-loop: i is reachable from i via epsilon-transition */
1660 if (GNUNET_YES == R_last[i * n + i].null_flag)
1661 {
1662 R_last[i * n + i].slen = 0;
1663 R_last[i * n + i].null_flag = GNUNET_NO;
1664 }
1665 else
1666 {
1667 sb_wrap (&R_last[i * n + i], "(|%.*s)", 3);
1668 }
1669 }
1670 for (i = 0; i < n; i++)
1671 for (j = 0; j < n; j++)
1672 if (needs_parentheses (&R_last[i * n + j]))
1673 sb_wrap (&R_last[i * n + j], "(%.*s)", 2);
1674 /* Compute regular expressions of length "k" between each pair of states per
1675 * induction */
1676 memset (&R_cur_l, 0, sizeof (struct StringBuffer));
1677 memset (&R_cur_r, 0, sizeof (struct StringBuffer));
1678 for (k = 0; k < n; k++)
1679 {
1680 for (i = 0; i < n; i++)
1681 {
1682 for (j = 0; j < n; j++)
1683 {
1684 /* Basis for the recursion:
1685 * $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj}
1686 * R_last == R^{(k-1)}, R_cur == R^{(k)}
1687 */
1688
1689 /* Create R_cur[i][j] and simplify the expression */
1690 automaton_create_proofs_simplify (&R_last[i * n + j], &R_last[i * n + k],
1691 &R_last[k * n + k], &R_last[k * n + j],
1692 &R_cur[i * n + j],
1693 &R_cur_l, &R_cur_r);
1694 }
1695 }
1696 /* set R_last = R_cur */
1697 R_swap = R_last;
1698 R_last = R_cur;
1699 R_cur = R_swap;
1700 /* clear 'R_cur' for next iteration */
1701 for (i = 0; i < n; i++)
1702 for (j = 0; j < n; j++)
1703 R_cur[i * n + j].null_flag = GNUNET_YES;
1704 }
1705 sb_free (&R_cur_l);
1706 sb_free (&R_cur_r);
1707 /* assign proofs and hashes */
1708 for (i = 0; i < n; i++)
1709 {
1710 if (GNUNET_YES != R_last[a->start->dfs_id * n + i].null_flag)
1711 {
1712 states[i]->proof = GNUNET_strndup (R_last[a->start->dfs_id * n + i].sbuf,
1713 R_last[a->start->dfs_id * n + i].slen);
1714 GNUNET_CRYPTO_hash (states[i]->proof, strlen (states[i]->proof),
1715 &states[i]->hash);
1716 }
1717 }
1718
1719 /* complete regex for whole DFA: union of all pairs (start state/accepting
1720 * state(s)). */
1721 sb_init (&complete_regex, 16 * n);
1722 for (i = 0; i < n; i++)
1723 {
1724 if (states[i]->accepting)
1725 {
1726 if ( (0 == complete_regex.slen) &&
1727 (0 < R_last[a->start->dfs_id * n + i].slen) )
1728 {
1729 sb_append (&complete_regex,
1730 &R_last[a->start->dfs_id * n + i]);
1731 }
1732 else if ( (GNUNET_YES != R_last[a->start->dfs_id * n + i].null_flag) &&
1733 (0 < R_last[a->start->dfs_id * n + i].slen) )
1734 {
1735 sb_append_cstr (&complete_regex, "|");
1736 sb_append (&complete_regex,
1737 &R_last[a->start->dfs_id * n + i]);
1738 }
1739 }
1740 }
1741 a->canonical_regex = GNUNET_strndup (complete_regex.sbuf, complete_regex.slen);
1742
1743 /* cleanup */
1744 sb_free (&complete_regex);
1745 for (i = 0; i < n; i++)
1746 for (j = 0; j < n; j++)
1747 {
1748 sb_free (&R_cur[i * n + j]);
1749 sb_free (&R_last[i * n + j]);
1750 }
1751 GNUNET_free (R_cur);
1752 GNUNET_free (R_last);
1753 return GNUNET_OK;
1754}
1755
1756
1757/**
1758 * Creates a new DFA state based on a set of NFA states. Needs to be freed using
1759 * automaton_destroy_state.
1760 *
1761 * @param ctx context
1762 * @param nfa_states set of NFA states on which the DFA should be based on
1763 *
1764 * @return new DFA state
1765 */
1766static struct REGEX_ITERNAL_State *
1767dfa_state_create (struct REGEX_ITERNAL_Context *ctx,
1768 struct REGEX_ITERNAL_StateSet *nfa_states)
1769{
1770 struct REGEX_ITERNAL_State *s;
1771 char *pos;
1772 size_t len;
1773 struct REGEX_ITERNAL_State *cstate;
1774 struct REGEX_ITERNAL_Transition *ctran;
1775 unsigned int i;
1776
1777 s = GNUNET_malloc (sizeof (struct REGEX_ITERNAL_State));
1778 s->id = ctx->state_id++;
1779 s->index = -1;
1780 s->lowlink = -1;
1781
1782 if (NULL == nfa_states)
1783 {
1784 GNUNET_asprintf (&s->name, "s%i", s->id);
1785 return s;
1786 }
1787
1788 s->nfa_set = *nfa_states;
1789
1790 if (nfa_states->off < 1)
1791 return s;
1792
1793 /* Create a name based on 'nfa_states' */
1794 len = nfa_states->off * 14 + 4;
1795 s->name = GNUNET_malloc (len);
1796 strcat (s->name, "{");
1797 pos = s->name + 1;
1798
1799 for (i = 0; i < nfa_states->off; i++)
1800 {
1801 cstate = nfa_states->states[i];
1802 GNUNET_snprintf (pos, pos - s->name + len,
1803 "%i,", cstate->id);
1804 pos += strlen (pos);
1805
1806 /* Add a transition for each distinct label to NULL state */
1807 for (ctran = cstate->transitions_head; NULL != ctran; ctran = ctran->next)
1808 if (NULL != ctran->label)
1809 state_add_transition (ctx, s, ctran->label, NULL);
1810
1811 /* If the nfa_states contain an accepting state, the new dfa state is also
1812 * accepting. */
1813 if (cstate->accepting)
1814 s->accepting = 1;
1815 }
1816 pos[-1] = '}';
1817 s->name = GNUNET_realloc (s->name, strlen (s->name) + 1);
1818
1819 memset (nfa_states, 0, sizeof (struct REGEX_ITERNAL_StateSet));
1820 return s;
1821}
1822
1823
1824/**
1825 * Move from the given state 's' to the next state on transition 'str'. Consumes
1826 * as much of the given 'str' as possible (usefull for strided DFAs). On return
1827 * 's' will point to the next state, and the length of the substring used for
1828 * this transition will be returned. If no transition possible 0 is returned and
1829 * 's' points to NULL.
1830 *
1831 * @param s starting state, will point to the next state or NULL (if no
1832 * transition possible)
1833 * @param str edge label to follow (will match longest common prefix)
1834 *
1835 * @return length of the substring comsumed from 'str'
1836 */
1837static unsigned int
1838dfa_move (struct REGEX_ITERNAL_State **s, const char *str)
1839{
1840 struct REGEX_ITERNAL_Transition *t;
1841 struct REGEX_ITERNAL_State *new_s;
1842 unsigned int len;
1843 unsigned int max_len;
1844
1845 if (NULL == s)
1846 return 0;
1847
1848 new_s = NULL;
1849 max_len = 0;
1850 for (t = (*s)->transitions_head; NULL != t; t = t->next)
1851 {
1852 len = strlen (t->label);
1853
1854 if (0 == strncmp (t->label, str, len))
1855 {
1856 if (len >= max_len)
1857 {
1858 max_len = len;
1859 new_s = t->to_state;
1860 }
1861 }
1862 }
1863
1864 *s = new_s;
1865 return max_len;
1866}
1867
1868
1869/**
1870 * Set the given state 'marked' to GNUNET_YES. Used by the
1871 * 'dfa_remove_unreachable_states' function to detect unreachable states in the
1872 * automaton.
1873 *
1874 * @param cls closure, not used.
1875 * @param count count, not used.
1876 * @param s state where the marked attribute will be set to GNUNET_YES.
1877 */
1878static void
1879mark_states (void *cls, const unsigned int count, struct REGEX_ITERNAL_State *s)
1880{
1881 s->marked = GNUNET_YES;
1882}
1883
1884
1885/**
1886 * Remove all unreachable states from DFA 'a'. Unreachable states are those
1887 * states that are not reachable from the starting state.
1888 *
1889 * @param a DFA automaton
1890 */
1891static void
1892dfa_remove_unreachable_states (struct REGEX_ITERNAL_Automaton *a)
1893{
1894 struct REGEX_ITERNAL_State *s;
1895 struct REGEX_ITERNAL_State *s_next;
1896
1897 /* 1. unmark all states */
1898 for (s = a->states_head; NULL != s; s = s->next)
1899 s->marked = GNUNET_NO;
1900
1901 /* 2. traverse dfa from start state and mark all visited states */
1902 REGEX_ITERNAL_automaton_traverse (a, a->start, NULL, NULL, &mark_states, NULL);
1903
1904 /* 3. delete all states that were not visited */
1905 for (s = a->states_head; NULL != s; s = s_next)
1906 {
1907 s_next = s->next;
1908 if (GNUNET_NO == s->marked)
1909 automaton_remove_state (a, s);
1910 }
1911}
1912
1913
1914/**
1915 * Remove all dead states from the DFA 'a'. Dead states are those states that do
1916 * not transition to any other state but themselves.
1917 *
1918 * @param a DFA automaton
1919 */
1920static void
1921dfa_remove_dead_states (struct REGEX_ITERNAL_Automaton *a)
1922{
1923 struct REGEX_ITERNAL_State *s;
1924 struct REGEX_ITERNAL_State *s_next;
1925 struct REGEX_ITERNAL_Transition *t;
1926 int dead;
1927
1928 GNUNET_assert (DFA == a->type);
1929
1930 for (s = a->states_head; NULL != s; s = s_next)
1931 {
1932 s_next = s->next;
1933
1934 if (s->accepting)
1935 continue;
1936
1937 dead = 1;
1938 for (t = s->transitions_head; NULL != t; t = t->next)
1939 {
1940 if (NULL != t->to_state && t->to_state != s)
1941 {
1942 dead = 0;
1943 break;
1944 }
1945 }
1946
1947 if (0 == dead)
1948 continue;
1949
1950 /* state s is dead, remove it */
1951 automaton_remove_state (a, s);
1952 }
1953}
1954
1955
1956/**
1957 * Merge all non distinguishable states in the DFA 'a'
1958 *
1959 * @param ctx context
1960 * @param a DFA automaton
1961 * @return GNUNET_OK on success
1962 */
1963static int
1964dfa_merge_nondistinguishable_states (struct REGEX_ITERNAL_Context *ctx,
1965 struct REGEX_ITERNAL_Automaton *a)
1966{
1967 uint32_t *table;
1968 struct REGEX_ITERNAL_State *s1;
1969 struct REGEX_ITERNAL_State *s2;
1970 struct REGEX_ITERNAL_Transition *t1;
1971 struct REGEX_ITERNAL_Transition *t2;
1972 struct REGEX_ITERNAL_State *s1_next;
1973 struct REGEX_ITERNAL_State *s2_next;
1974 int change;
1975 unsigned int num_equal_edges;
1976 unsigned int i;
1977 unsigned int state_cnt;
1978 unsigned long long idx;
1979 unsigned long long idx1;
1980
1981 if ( (NULL == a) || (0 == a->state_count) )
1982 {
1983 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
1984 "Could not merge nondistinguishable states, automaton was NULL.\n");
1985 return GNUNET_SYSERR;
1986 }
1987
1988 state_cnt = a->state_count;
1989 table = GNUNET_malloc_large ((sizeof (uint32_t) * state_cnt * state_cnt / 32) + sizeof (uint32_t));
1990 if (NULL == table)
1991 {
1992 GNUNET_log_strerror (GNUNET_ERROR_TYPE_ERROR, "malloc");
1993 return GNUNET_SYSERR;
1994 }
1995
1996 for (i = 0, s1 = a->states_head; NULL != s1; s1 = s1->next)
1997 s1->marked = i++;
1998
1999 /* Mark all pairs of accepting/!accepting states */
2000 for (s1 = a->states_head; NULL != s1; s1 = s1->next)
2001 for (s2 = a->states_head; NULL != s2; s2 = s2->next)
2002 if ( (s1->accepting && !s2->accepting) ||
2003 (!s1->accepting && s2->accepting) )
2004 {
2005 idx = s1->marked * state_cnt + s2->marked;
2006 table[idx / 32] |= (1 << (idx % 32));
2007 }
2008
2009 /* Find all equal states */
2010 change = 1;
2011 while (0 != change)
2012 {
2013 change = 0;
2014 for (s1 = a->states_head; NULL != s1; s1 = s1->next)
2015 {
2016 for (s2 = a->states_head; NULL != s2 && s1 != s2; s2 = s2->next)
2017 {
2018 idx = s1->marked * state_cnt + s2->marked;
2019 if (0 != (table[idx / 32] & (1 << (idx % 32))))
2020 continue;
2021 num_equal_edges = 0;
2022 for (t1 = s1->transitions_head; NULL != t1; t1 = t1->next)
2023 {
2024 for (t2 = s2->transitions_head; NULL != t2; t2 = t2->next)
2025 {
2026 if (0 == strcmp (t1->label, t2->label))
2027 {
2028 num_equal_edges++;
2029 /* same edge, but targets definitively different, so we're different
2030 as well */
2031 if (t1->to_state->marked > t2->to_state->marked)
2032 idx1 = t1->to_state->marked * state_cnt + t2->to_state->marked;
2033 else
2034 idx1 = t2->to_state->marked * state_cnt + t1->to_state->marked;
2035 if (0 != (table[idx1 / 32] & (1 << (idx1 % 32))))
2036 {
2037 table[idx / 32] |= (1 << (idx % 32));
2038 change = 1; /* changed a marker, need to run again */
2039 }
2040 }
2041 }
2042 }
2043 if ( (num_equal_edges != s1->transition_count) ||
2044 (num_equal_edges != s2->transition_count) )
2045 {
2046 /* Make sure ALL edges of possible equal states are the same */
2047 table[idx / 32] |= (1 << (idx % 32));
2048 change = 1; /* changed a marker, need to run again */
2049 }
2050 }
2051 }
2052 }
2053
2054 /* Merge states that are equal */
2055 for (s1 = a->states_head; NULL != s1; s1 = s1_next)
2056 {
2057 s1_next = s1->next;
2058 for (s2 = a->states_head; NULL != s2 && s1 != s2; s2 = s2_next)
2059 {
2060 s2_next = s2->next;
2061 idx = s1->marked * state_cnt + s2->marked;
2062 if (0 == (table[idx / 32] & (1 << (idx % 32))))
2063 automaton_merge_states (ctx, a, s1, s2);
2064 }
2065 }
2066
2067 GNUNET_free (table);
2068 return GNUNET_OK;
2069}
2070
2071
2072/**
2073 * Minimize the given DFA 'a' by removing all unreachable states, removing all
2074 * dead states and merging all non distinguishable states
2075 *
2076 * @param ctx context
2077 * @param a DFA automaton
2078 * @return GNUNET_OK on success
2079 */
2080static int
2081dfa_minimize (struct REGEX_ITERNAL_Context *ctx,
2082 struct REGEX_ITERNAL_Automaton *a)
2083{
2084 if (NULL == a)
2085 return GNUNET_SYSERR;
2086
2087 GNUNET_assert (DFA == a->type);
2088
2089 /* 1. remove unreachable states */
2090 dfa_remove_unreachable_states (a);
2091
2092 /* 2. remove dead states */
2093 dfa_remove_dead_states (a);
2094
2095 /* 3. Merge nondistinguishable states */
2096 if (GNUNET_OK != dfa_merge_nondistinguishable_states (ctx, a))
2097 return GNUNET_SYSERR;
2098 return GNUNET_OK;
2099}
2100
2101
2102/**
2103 * Context for adding strided transitions to a DFA.
2104 */
2105struct REGEX_ITERNAL_Strided_Context
2106{
2107 /**
2108 * Length of the strides.
2109 */
2110 const unsigned int stride;
2111
2112 /**
2113 * Strided transitions DLL. New strided transitions will be stored in this DLL
2114 * and afterwards added to the DFA.
2115 */
2116 struct REGEX_ITERNAL_Transition *transitions_head;
2117
2118 /**
2119 * Strided transitions DLL.
2120 */
2121 struct REGEX_ITERNAL_Transition *transitions_tail;
2122};
2123
2124
2125/**
2126 * Recursive helper function to add strides to a DFA.
2127 *
2128 * @param cls context, contains stride length and strided transitions DLL.
2129 * @param depth current depth of the depth-first traversal of the graph.
2130 * @param label current label, string that contains all labels on the path from
2131 * 'start' to 's'.
2132 * @param start start state for the depth-first traversal of the graph.
2133 * @param s current state in the depth-first traversal
2134 */
2135void
2136dfa_add_multi_strides_helper (void *cls, const unsigned int depth, char *label,
2137 struct REGEX_ITERNAL_State *start,
2138 struct REGEX_ITERNAL_State *s)
2139{
2140 struct REGEX_ITERNAL_Strided_Context *ctx = cls;
2141 struct REGEX_ITERNAL_Transition *t;
2142 char *new_label;
2143
2144 if (depth == ctx->stride)
2145 {
2146 t = GNUNET_malloc (sizeof (struct REGEX_ITERNAL_Transition));
2147 t->label = GNUNET_strdup (label);
2148 t->to_state = s;
2149 t->from_state = start;
2150 GNUNET_CONTAINER_DLL_insert (ctx->transitions_head, ctx->transitions_tail,
2151 t);
2152 }
2153 else
2154 {
2155 for (t = s->transitions_head; NULL != t; t = t->next)
2156 {
2157 /* Do not consider self-loops, because it end's up in too many
2158 * transitions */
2159 if (t->to_state == t->from_state)
2160 continue;
2161
2162 if (NULL != label)
2163 {
2164 GNUNET_asprintf (&new_label, "%s%s", label, t->label);
2165 }
2166 else
2167 new_label = GNUNET_strdup (t->label);
2168
2169 dfa_add_multi_strides_helper (cls, (depth + 1), new_label, start,
2170 t->to_state);
2171 }
2172 }
2173 GNUNET_free_non_null (label);
2174}
2175
2176
2177/**
2178 * Function called for each state in the DFA. Starts a traversal of depth set in
2179 * context starting from state 's'.
2180 *
2181 * @param cls context.
2182 * @param count not used.
2183 * @param s current state.
2184 */
2185void
2186dfa_add_multi_strides (void *cls, const unsigned int count,
2187 struct REGEX_ITERNAL_State *s)
2188{
2189 dfa_add_multi_strides_helper (cls, 0, NULL, s, s);
2190}
2191
2192
2193/**
2194 * Adds multi-strided transitions to the given 'dfa'.
2195 *
2196 * @param regex_ctx regex context needed to add transitions to the automaton.
2197 * @param dfa DFA to which the multi strided transitions should be added.
2198 * @param stride_len length of the strides.
2199 */
2200void
2201REGEX_ITERNAL_dfa_add_multi_strides (struct REGEX_ITERNAL_Context *regex_ctx,
2202 struct REGEX_ITERNAL_Automaton *dfa,
2203 const unsigned int stride_len)
2204{
2205 struct REGEX_ITERNAL_Strided_Context ctx = { stride_len, NULL, NULL };
2206 struct REGEX_ITERNAL_Transition *t;
2207 struct REGEX_ITERNAL_Transition *t_next;
2208
2209 if (1 > stride_len || GNUNET_YES == dfa->is_multistrided)
2210 return;
2211
2212 /* Compute the new transitions of given stride_len */
2213 REGEX_ITERNAL_automaton_traverse (dfa, dfa->start, NULL, NULL,
2214 &dfa_add_multi_strides, &ctx);
2215
2216 /* Add all the new transitions to the automaton. */
2217 for (t = ctx.transitions_head; NULL != t; t = t_next)
2218 {
2219 t_next = t->next;
2220 state_add_transition (regex_ctx, t->from_state, t->label, t->to_state);
2221 GNUNET_CONTAINER_DLL_remove (ctx.transitions_head, ctx.transitions_tail, t);
2222 GNUNET_free_non_null (t->label);
2223 GNUNET_free (t);
2224 }
2225
2226 /* Mark this automaton as multistrided */
2227 dfa->is_multistrided = GNUNET_YES;
2228}
2229
2230/**
2231 * Recursive Helper function for DFA path compression. Does DFS on the DFA graph
2232 * and adds new transitions to the given transitions DLL and marks states that
2233 * should be removed by setting state->contained to GNUNET_YES.
2234 *
2235 * @param dfa DFA for which the paths should be compressed.
2236 * @param start starting state for linear path search.
2237 * @param cur current state in the recursive DFS.
2238 * @param label current label (string of traversed labels).
2239 * @param max_len maximal path compression length.
2240 * @param transitions_head transitions DLL.
2241 * @param transitions_tail transitions DLL.
2242 */
2243void
2244dfa_compress_paths_helper (struct REGEX_ITERNAL_Automaton *dfa,
2245 struct REGEX_ITERNAL_State *start,
2246 struct REGEX_ITERNAL_State *cur, char *label,
2247 unsigned int max_len,
2248 struct REGEX_ITERNAL_Transition **transitions_head,
2249 struct REGEX_ITERNAL_Transition **transitions_tail)
2250{
2251 struct REGEX_ITERNAL_Transition *t;
2252 char *new_label;
2253
2254
2255 if (NULL != label &&
2256 ((cur->incoming_transition_count > 1 || GNUNET_YES == cur->accepting ||
2257 GNUNET_YES == cur->marked) || (start != dfa->start && max_len > 0 &&
2258 max_len == strlen (label)) ||
2259 (start == dfa->start && GNUNET_REGEX_INITIAL_BYTES == strlen (label))))
2260 {
2261 t = GNUNET_malloc (sizeof (struct REGEX_ITERNAL_Transition));
2262 t->label = GNUNET_strdup (label);
2263 t->to_state = cur;
2264 t->from_state = start;
2265 GNUNET_CONTAINER_DLL_insert (*transitions_head, *transitions_tail, t);
2266
2267 if (GNUNET_NO == cur->marked)
2268 {
2269 dfa_compress_paths_helper (dfa, cur, cur, NULL, max_len, transitions_head,
2270 transitions_tail);
2271 }
2272 return;
2273 }
2274 else if (cur != start)
2275 cur->contained = GNUNET_YES;
2276
2277 if (GNUNET_YES == cur->marked && cur != start)
2278 return;
2279
2280 cur->marked = GNUNET_YES;
2281
2282
2283 for (t = cur->transitions_head; NULL != t; t = t->next)
2284 {
2285 if (NULL != label)
2286 GNUNET_asprintf (&new_label, "%s%s", label, t->label);
2287 else
2288 new_label = GNUNET_strdup (t->label);
2289
2290 if (t->to_state != cur)
2291 {
2292 dfa_compress_paths_helper (dfa, start, t->to_state, new_label, max_len,
2293 transitions_head, transitions_tail);
2294 }
2295 GNUNET_free (new_label);
2296 }
2297}
2298
2299
2300/**
2301 * Compress paths in the given 'dfa'. Linear paths like 0->1->2->3 will be
2302 * compressed to 0->3 by combining transitions.
2303 *
2304 * @param regex_ctx context for adding new transitions.
2305 * @param dfa DFA representation, will directly modify the given DFA.
2306 * @param max_len maximal length of the compressed paths.
2307 */
2308static void
2309dfa_compress_paths (struct REGEX_ITERNAL_Context *regex_ctx,
2310 struct REGEX_ITERNAL_Automaton *dfa, unsigned int max_len)
2311{
2312 struct REGEX_ITERNAL_State *s;
2313 struct REGEX_ITERNAL_State *s_next;
2314 struct REGEX_ITERNAL_Transition *t;
2315 struct REGEX_ITERNAL_Transition *t_next;
2316 struct REGEX_ITERNAL_Transition *transitions_head = NULL;
2317 struct REGEX_ITERNAL_Transition *transitions_tail = NULL;
2318
2319 if (NULL == dfa)
2320 return;
2321
2322 /* Count the incoming transitions on each state. */
2323 for (s = dfa->states_head; NULL != s; s = s->next)
2324 {
2325 for (t = s->transitions_head; NULL != t; t = t->next)
2326 {
2327 if (NULL != t->to_state)
2328 t->to_state->incoming_transition_count++;
2329 }
2330 }
2331
2332 /* Unmark all states. */
2333 for (s = dfa->states_head; NULL != s; s = s->next)
2334 {
2335 s->marked = GNUNET_NO;
2336 s->contained = GNUNET_NO;
2337 }
2338
2339 /* Add strides and mark states that can be deleted. */
2340 dfa_compress_paths_helper (dfa, dfa->start, dfa->start, NULL, max_len,
2341 &transitions_head, &transitions_tail);
2342
2343 /* Add all the new transitions to the automaton. */
2344 for (t = transitions_head; NULL != t; t = t_next)
2345 {
2346 t_next = t->next;
2347 state_add_transition (regex_ctx, t->from_state, t->label, t->to_state);
2348 GNUNET_CONTAINER_DLL_remove (transitions_head, transitions_tail, t);
2349 GNUNET_free_non_null (t->label);
2350 GNUNET_free (t);
2351 }
2352
2353 /* Remove marked states (including their incoming and outgoing transitions). */
2354 for (s = dfa->states_head; NULL != s; s = s_next)
2355 {
2356 s_next = s->next;
2357 if (GNUNET_YES == s->contained)
2358 automaton_remove_state (dfa, s);
2359 }
2360}
2361
2362
2363/**
2364 * Creates a new NFA fragment. Needs to be cleared using
2365 * automaton_fragment_clear.
2366 *
2367 * @param start starting state
2368 * @param end end state
2369 *
2370 * @return new NFA fragment
2371 */
2372static struct REGEX_ITERNAL_Automaton *
2373nfa_fragment_create (struct REGEX_ITERNAL_State *start,
2374 struct REGEX_ITERNAL_State *end)
2375{
2376 struct REGEX_ITERNAL_Automaton *n;
2377
2378 n = GNUNET_malloc (sizeof (struct REGEX_ITERNAL_Automaton));
2379
2380 n->type = NFA;
2381 n->start = NULL;
2382 n->end = NULL;
2383 n->state_count = 0;
2384
2385 if (NULL == start || NULL == end)
2386 return n;
2387
2388 automaton_add_state (n, end);
2389 automaton_add_state (n, start);
2390
2391 n->state_count = 2;
2392
2393 n->start = start;
2394 n->end = end;
2395
2396 return n;
2397}
2398
2399
2400/**
2401 * Adds a list of states to the given automaton 'n'.
2402 *
2403 * @param n automaton to which the states should be added
2404 * @param states_head head of the DLL of states
2405 * @param states_tail tail of the DLL of states
2406 */
2407static void
2408nfa_add_states (struct REGEX_ITERNAL_Automaton *n,
2409 struct REGEX_ITERNAL_State *states_head,
2410 struct REGEX_ITERNAL_State *states_tail)
2411{
2412 struct REGEX_ITERNAL_State *s;
2413
2414 if (NULL == n || NULL == states_head)
2415 {
2416 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not add states\n");
2417 return;
2418 }
2419
2420 if (NULL == n->states_head)
2421 {
2422 n->states_head = states_head;
2423 n->states_tail = states_tail;
2424 return;
2425 }
2426
2427 if (NULL != states_head)
2428 {
2429 n->states_tail->next = states_head;
2430 n->states_tail = states_tail;
2431 }
2432
2433 for (s = states_head; NULL != s; s = s->next)
2434 n->state_count++;
2435}
2436
2437
2438/**
2439 * Creates a new NFA state. Needs to be freed using automaton_destroy_state.
2440 *
2441 * @param ctx context
2442 * @param accepting is it an accepting state or not
2443 *
2444 * @return new NFA state
2445 */
2446static struct REGEX_ITERNAL_State *
2447nfa_state_create (struct REGEX_ITERNAL_Context *ctx, int accepting)
2448{
2449 struct REGEX_ITERNAL_State *s;
2450
2451 s = GNUNET_malloc (sizeof (struct REGEX_ITERNAL_State));
2452 s->id = ctx->state_id++;
2453 s->accepting = accepting;
2454 s->marked = GNUNET_NO;
2455 s->contained = 0;
2456 s->index = -1;
2457 s->lowlink = -1;
2458 s->scc_id = 0;
2459 s->name = NULL;
2460 GNUNET_asprintf (&s->name, "s%i", s->id);
2461
2462 return s;
2463}
2464
2465
2466/**
2467 * Calculates the closure set for the given set of states.
2468 *
2469 * @param ret set to sorted nfa closure on 'label' (epsilon closure if 'label' is NULL)
2470 * @param nfa the NFA containing 's'
2471 * @param states list of states on which to base the closure on
2472 * @param label transitioning label for which to base the closure on,
2473 * pass NULL for epsilon transition
2474 */
2475static void
2476nfa_closure_set_create (struct REGEX_ITERNAL_StateSet *ret,
2477 struct REGEX_ITERNAL_Automaton *nfa,
2478 struct REGEX_ITERNAL_StateSet *states, const char *label)
2479{
2480 struct REGEX_ITERNAL_State *s;
2481 unsigned int i;
2482 struct REGEX_ITERNAL_StateSet_MDLL cls_stack;
2483 struct REGEX_ITERNAL_State *clsstate;
2484 struct REGEX_ITERNAL_State *currentstate;
2485 struct REGEX_ITERNAL_Transition *ctran;
2486
2487 memset (ret, 0, sizeof (struct REGEX_ITERNAL_StateSet));
2488 if (NULL == states)
2489 return;
2490
2491 for (i = 0; i < states->off; i++)
2492 {
2493 s = states->states[i];
2494
2495 /* Add start state to closure only for epsilon closure */
2496 if (NULL == label)
2497 state_set_append (ret, s);
2498
2499 /* initialize work stack */
2500 cls_stack.head = NULL;
2501 cls_stack.tail = NULL;
2502 GNUNET_CONTAINER_MDLL_insert (ST, cls_stack.head, cls_stack.tail, s);
2503 cls_stack.len = 1;
2504
2505 while (NULL != (currentstate = cls_stack.tail))
2506 {
2507 GNUNET_CONTAINER_MDLL_remove (ST, cls_stack.head, cls_stack.tail,
2508 currentstate);
2509 cls_stack.len--;
2510 for (ctran = currentstate->transitions_head; NULL != ctran;
2511 ctran = ctran->next)
2512 {
2513 if (NULL == (clsstate = ctran->to_state))
2514 continue;
2515 if (0 != clsstate->contained)
2516 continue;
2517 if (0 != nullstrcmp (label, ctran->label))
2518 continue;
2519 state_set_append (ret, clsstate);
2520 GNUNET_CONTAINER_MDLL_insert_tail (ST, cls_stack.head, cls_stack.tail,
2521 clsstate);
2522 cls_stack.len++;
2523 clsstate->contained = 1;
2524 }
2525 }
2526 }
2527 for (i = 0; i < ret->off; i++)
2528 ret->states[i]->contained = 0;
2529
2530 if (ret->off > 1)
2531 qsort (ret->states, ret->off, sizeof (struct REGEX_ITERNAL_State *),
2532 &state_compare);
2533}
2534
2535
2536/**
2537 * Pops two NFA fragments (a, b) from the stack and concatenates them (ab)
2538 *
2539 * @param ctx context
2540 */
2541static void
2542nfa_add_concatenation (struct REGEX_ITERNAL_Context *ctx)
2543{
2544 struct REGEX_ITERNAL_Automaton *a;
2545 struct REGEX_ITERNAL_Automaton *b;
2546 struct REGEX_ITERNAL_Automaton *new_nfa;
2547
2548 b = ctx->stack_tail;
2549 GNUNET_assert (NULL != b);
2550 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, b);
2551 a = ctx->stack_tail;
2552 GNUNET_assert (NULL != a);
2553 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
2554
2555 state_add_transition (ctx, a->end, NULL, b->start);
2556 a->end->accepting = 0;
2557 b->end->accepting = 1;
2558
2559 new_nfa = nfa_fragment_create (NULL, NULL);
2560 nfa_add_states (new_nfa, a->states_head, a->states_tail);
2561 nfa_add_states (new_nfa, b->states_head, b->states_tail);
2562 new_nfa->start = a->start;
2563 new_nfa->end = b->end;
2564 new_nfa->state_count += a->state_count + b->state_count;
2565 automaton_fragment_clear (a);
2566 automaton_fragment_clear (b);
2567
2568 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new_nfa);
2569}
2570
2571
2572/**
2573 * Pops a NFA fragment from the stack (a) and adds a new fragment (a*)
2574 *
2575 * @param ctx context
2576 */
2577static void
2578nfa_add_star_op (struct REGEX_ITERNAL_Context *ctx)
2579{
2580 struct REGEX_ITERNAL_Automaton *a;
2581 struct REGEX_ITERNAL_Automaton *new_nfa;
2582 struct REGEX_ITERNAL_State *start;
2583 struct REGEX_ITERNAL_State *end;
2584
2585 a = ctx->stack_tail;
2586
2587 if (NULL == a)
2588 {
2589 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
2590 "nfa_add_star_op failed, because there was no element on the stack");
2591 return;
2592 }
2593
2594 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
2595
2596 start = nfa_state_create (ctx, 0);
2597 end = nfa_state_create (ctx, 1);
2598
2599 state_add_transition (ctx, start, NULL, a->start);
2600 state_add_transition (ctx, start, NULL, end);
2601 state_add_transition (ctx, a->end, NULL, a->start);
2602 state_add_transition (ctx, a->end, NULL, end);
2603
2604 a->end->accepting = 0;
2605 end->accepting = 1;
2606
2607 new_nfa = nfa_fragment_create (start, end);
2608 nfa_add_states (new_nfa, a->states_head, a->states_tail);
2609 automaton_fragment_clear (a);
2610
2611 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new_nfa);
2612}
2613
2614
2615/**
2616 * Pops an NFA fragment (a) from the stack and adds a new fragment (a+)
2617 *
2618 * @param ctx context
2619 */
2620static void
2621nfa_add_plus_op (struct REGEX_ITERNAL_Context *ctx)
2622{
2623 struct REGEX_ITERNAL_Automaton *a;
2624
2625 a = ctx->stack_tail;
2626
2627 if (NULL == a)
2628 {
2629 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
2630 "nfa_add_plus_op failed, because there was no element on the stack");
2631 return;
2632 }
2633
2634 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
2635
2636 state_add_transition (ctx, a->end, NULL, a->start);
2637
2638 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, a);
2639}
2640
2641
2642/**
2643 * Pops an NFA fragment (a) from the stack and adds a new fragment (a?)
2644 *
2645 * @param ctx context
2646 */
2647static void
2648nfa_add_question_op (struct REGEX_ITERNAL_Context *ctx)
2649{
2650 struct REGEX_ITERNAL_Automaton *a;
2651 struct REGEX_ITERNAL_Automaton *new_nfa;
2652 struct REGEX_ITERNAL_State *start;
2653 struct REGEX_ITERNAL_State *end;
2654
2655 a = ctx->stack_tail;
2656
2657 if (NULL == a)
2658 {
2659 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
2660 "nfa_add_question_op failed, because there was no element on the stack");
2661 return;
2662 }
2663
2664 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
2665
2666 start = nfa_state_create (ctx, 0);
2667 end = nfa_state_create (ctx, 1);
2668
2669 state_add_transition (ctx, start, NULL, a->start);
2670 state_add_transition (ctx, start, NULL, end);
2671 state_add_transition (ctx, a->end, NULL, end);
2672
2673 a->end->accepting = 0;
2674
2675 new_nfa = nfa_fragment_create (start, end);
2676 nfa_add_states (new_nfa, a->states_head, a->states_tail);
2677 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new_nfa);
2678 automaton_fragment_clear (a);
2679}
2680
2681
2682/**
2683 * Pops two NFA fragments (a, b) from the stack and adds a new NFA fragment that
2684 * alternates between a and b (a|b)
2685 *
2686 * @param ctx context
2687 */
2688static void
2689nfa_add_alternation (struct REGEX_ITERNAL_Context *ctx)
2690{
2691 struct REGEX_ITERNAL_Automaton *a;
2692 struct REGEX_ITERNAL_Automaton *b;
2693 struct REGEX_ITERNAL_Automaton *new_nfa;
2694 struct REGEX_ITERNAL_State *start;
2695 struct REGEX_ITERNAL_State *end;
2696
2697 b = ctx->stack_tail;
2698 GNUNET_assert (NULL != b);
2699 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, b);
2700 a = ctx->stack_tail;
2701 GNUNET_assert (NULL != a);
2702 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
2703
2704 start = nfa_state_create (ctx, 0);
2705 end = nfa_state_create (ctx, 1);
2706 state_add_transition (ctx, start, NULL, a->start);
2707 state_add_transition (ctx, start, NULL, b->start);
2708
2709 state_add_transition (ctx, a->end, NULL, end);
2710 state_add_transition (ctx, b->end, NULL, end);
2711
2712 a->end->accepting = 0;
2713 b->end->accepting = 0;
2714 end->accepting = 1;
2715
2716 new_nfa = nfa_fragment_create (start, end);
2717 nfa_add_states (new_nfa, a->states_head, a->states_tail);
2718 nfa_add_states (new_nfa, b->states_head, b->states_tail);
2719 automaton_fragment_clear (a);
2720 automaton_fragment_clear (b);
2721
2722 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new_nfa);
2723}
2724
2725
2726/**
2727 * Adds a new nfa fragment to the stack
2728 *
2729 * @param ctx context
2730 * @param label label for nfa transition
2731 */
2732static void
2733nfa_add_label (struct REGEX_ITERNAL_Context *ctx, const char *label)
2734{
2735 struct REGEX_ITERNAL_Automaton *n;
2736 struct REGEX_ITERNAL_State *start;
2737 struct REGEX_ITERNAL_State *end;
2738
2739 GNUNET_assert (NULL != ctx);
2740
2741 start = nfa_state_create (ctx, 0);
2742 end = nfa_state_create (ctx, 1);
2743 state_add_transition (ctx, start, label, end);
2744 n = nfa_fragment_create (start, end);
2745 GNUNET_assert (NULL != n);
2746 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, n);
2747}
2748
2749
2750/**
2751 * Initialize a new context
2752 *
2753 * @param ctx context
2754 */
2755static void
2756REGEX_ITERNAL_context_init (struct REGEX_ITERNAL_Context *ctx)
2757{
2758 if (NULL == ctx)
2759 {
2760 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Context was NULL!");
2761 return;
2762 }
2763 ctx->state_id = 0;
2764 ctx->transition_id = 0;
2765 ctx->stack_head = NULL;
2766 ctx->stack_tail = NULL;
2767}
2768
2769
2770/**
2771 * Construct an NFA by parsing the regex string of length 'len'.
2772 *
2773 * @param regex regular expression string
2774 * @param len length of the string
2775 *
2776 * @return NFA, needs to be freed using REGEX_ITERNAL_destroy_automaton
2777 */
2778struct REGEX_ITERNAL_Automaton *
2779REGEX_ITERNAL_construct_nfa (const char *regex, const size_t len)
2780{
2781 struct REGEX_ITERNAL_Context ctx;
2782 struct REGEX_ITERNAL_Automaton *nfa;
2783 const char *regexp;
2784 char curlabel[2];
2785 char *error_msg;
2786 unsigned int count;
2787 unsigned int altcount;
2788 unsigned int atomcount;
2789 unsigned int poff;
2790 unsigned int psize;
2791 struct
2792 {
2793 int altcount;
2794 int atomcount;
2795 } *p;
2796
2797 if (NULL == regex || 0 == strlen (regex) || 0 == len)
2798 {
2799 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
2800 "Could not parse regex. Empty regex string provided.\n");
2801
2802 return NULL;
2803 }
2804 REGEX_ITERNAL_context_init (&ctx);
2805
2806 regexp = regex;
2807 curlabel[1] = '\0';
2808 p = NULL;
2809 error_msg = NULL;
2810 altcount = 0;
2811 atomcount = 0;
2812 poff = 0;
2813 psize = 0;
2814
2815 for (count = 0; count < len && *regexp; count++, regexp++)
2816 {
2817 switch (*regexp)
2818 {
2819 case '(':
2820 if (atomcount > 1)
2821 {
2822 --atomcount;
2823 nfa_add_concatenation (&ctx);
2824 }
2825 if (poff == psize)
2826 GNUNET_array_grow (p, psize, psize * 2 + 4);
2827 p[poff].altcount = altcount;
2828 p[poff].atomcount = atomcount;
2829 poff++;
2830 altcount = 0;
2831 atomcount = 0;
2832 break;
2833 case '|':
2834 if (0 == atomcount)
2835 {
2836 error_msg = "Cannot append '|' to nothing";
2837 goto error;
2838 }
2839 while (--atomcount > 0)
2840 nfa_add_concatenation (&ctx);
2841 altcount++;
2842 break;
2843 case ')':
2844 if (0 == poff)
2845 {
2846 error_msg = "Missing opening '('";
2847 goto error;
2848 }
2849 if (0 == atomcount)
2850 {
2851 /* Ignore this: "()" */
2852 poff--;
2853 altcount = p[poff].altcount;
2854 atomcount = p[poff].atomcount;
2855 break;
2856 }
2857 while (--atomcount > 0)
2858 nfa_add_concatenation (&ctx);
2859 for (; altcount > 0; altcount--)
2860 nfa_add_alternation (&ctx);
2861 poff--;
2862 altcount = p[poff].altcount;
2863 atomcount = p[poff].atomcount;
2864 atomcount++;
2865 break;
2866 case '*':
2867 if (atomcount == 0)
2868 {
2869 error_msg = "Cannot append '*' to nothing";
2870 goto error;
2871 }
2872 nfa_add_star_op (&ctx);
2873 break;
2874 case '+':
2875 if (atomcount == 0)
2876 {
2877 error_msg = "Cannot append '+' to nothing";
2878 goto error;
2879 }
2880 nfa_add_plus_op (&ctx);
2881 break;
2882 case '?':
2883 if (atomcount == 0)
2884 {
2885 error_msg = "Cannot append '?' to nothing";
2886 goto error;
2887 }
2888 nfa_add_question_op (&ctx);
2889 break;
2890 default:
2891 if (atomcount > 1)
2892 {
2893 --atomcount;
2894 nfa_add_concatenation (&ctx);
2895 }
2896 curlabel[0] = *regexp;
2897 nfa_add_label (&ctx, curlabel);
2898 atomcount++;
2899 break;
2900 }
2901 }
2902 if (0 != poff)
2903 {
2904 error_msg = "Unbalanced parenthesis";
2905 goto error;
2906 }
2907 while (--atomcount > 0)
2908 nfa_add_concatenation (&ctx);
2909 for (; altcount > 0; altcount--)
2910 nfa_add_alternation (&ctx);
2911
2912 GNUNET_array_grow (p, psize, 0);
2913
2914 nfa = ctx.stack_tail;
2915 GNUNET_CONTAINER_DLL_remove (ctx.stack_head, ctx.stack_tail, nfa);
2916
2917 if (NULL != ctx.stack_head)
2918 {
2919 error_msg = "Creating the NFA failed. NFA stack was not empty!";
2920 goto error;
2921 }
2922
2923 /* Remember the regex that was used to generate this NFA */
2924 nfa->regex = GNUNET_strdup (regex);
2925
2926 /* create depth-first numbering of the states for pretty printing */
2927 REGEX_ITERNAL_automaton_traverse (nfa, NULL, NULL, NULL, &number_states, NULL);
2928
2929 /* No multistriding added so far */
2930 nfa->is_multistrided = GNUNET_NO;
2931
2932 return nfa;
2933
2934error:
2935 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not parse regex: `%s'\n", regex);
2936 if (NULL != error_msg)
2937 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "%s\n", error_msg);
2938
2939 GNUNET_free_non_null (p);
2940
2941 while (NULL != (nfa = ctx.stack_head))
2942 {
2943 GNUNET_CONTAINER_DLL_remove (ctx.stack_head, ctx.stack_tail, nfa);
2944 REGEX_ITERNAL_automaton_destroy (nfa);
2945 }
2946
2947 return NULL;
2948}
2949
2950
2951/**
2952 * Create DFA states based on given 'nfa' and starting with 'dfa_state'.
2953 *
2954 * @param ctx context.
2955 * @param nfa NFA automaton.
2956 * @param dfa DFA automaton.
2957 * @param dfa_state current dfa state, pass epsilon closure of first nfa state
2958 * for starting.
2959 */
2960static void
2961construct_dfa_states (struct REGEX_ITERNAL_Context *ctx,
2962 struct REGEX_ITERNAL_Automaton *nfa,
2963 struct REGEX_ITERNAL_Automaton *dfa,
2964 struct REGEX_ITERNAL_State *dfa_state)
2965{
2966 struct REGEX_ITERNAL_Transition *ctran;
2967 struct REGEX_ITERNAL_State *new_dfa_state;
2968 struct REGEX_ITERNAL_State *state_contains;
2969 struct REGEX_ITERNAL_State *state_iter;
2970 struct REGEX_ITERNAL_StateSet tmp;
2971 struct REGEX_ITERNAL_StateSet nfa_set;
2972
2973 for (ctran = dfa_state->transitions_head; NULL != ctran; ctran = ctran->next)
2974 {
2975 if (NULL == ctran->label || NULL != ctran->to_state)
2976 continue;
2977
2978 nfa_closure_set_create (&tmp, nfa, &dfa_state->nfa_set, ctran->label);
2979 nfa_closure_set_create (&nfa_set, nfa, &tmp, NULL);
2980 state_set_clear (&tmp);
2981
2982 state_contains = NULL;
2983 for (state_iter = dfa->states_head; NULL != state_iter;
2984 state_iter = state_iter->next)
2985 {
2986 if (0 == state_set_compare (&state_iter->nfa_set, &nfa_set))
2987 {
2988 state_contains = state_iter;
2989 break;
2990 }
2991 }
2992 if (NULL == state_contains)
2993 {
2994 new_dfa_state = dfa_state_create (ctx, &nfa_set);
2995 automaton_add_state (dfa, new_dfa_state);
2996 ctran->to_state = new_dfa_state;
2997 construct_dfa_states (ctx, nfa, dfa, new_dfa_state);
2998 }
2999 else
3000 {
3001 ctran->to_state = state_contains;
3002 state_set_clear (&nfa_set);
3003 }
3004 }
3005}
3006
3007
3008/**
3009 * Construct DFA for the given 'regex' of length 'len'.
3010 *
3011 * Path compression means, that for example a DFA o -> a -> b -> c -> o will be
3012 * compressed to o -> abc -> o. Note that this parameter influences the
3013 * non-determinism of states of the resulting NFA in the DHT (number of outgoing
3014 * edges with the same label). For example for an application that stores IPv4
3015 * addresses as bitstrings it could make sense to limit the path compression to
3016 * 4 or 8.
3017 *
3018 * @param regex regular expression string.
3019 * @param len length of the regular expression.
3020 * @param max_path_len limit the path compression length to the
3021 * given value. If set to 1, no path compression is applied. Set to 0 for
3022 * maximal possible path compression (generally not desireable).
3023 * @return DFA, needs to be freed using REGEX_ITERNAL_automaton_destroy.
3024 */
3025struct REGEX_ITERNAL_Automaton *
3026REGEX_ITERNAL_construct_dfa (const char *regex, const size_t len,
3027 unsigned int max_path_len)
3028{
3029 struct REGEX_ITERNAL_Context ctx;
3030 struct REGEX_ITERNAL_Automaton *dfa;
3031 struct REGEX_ITERNAL_Automaton *nfa;
3032 struct REGEX_ITERNAL_StateSet nfa_start_eps_cls;
3033 struct REGEX_ITERNAL_StateSet singleton_set;
3034
3035 REGEX_ITERNAL_context_init (&ctx);
3036
3037 /* Create NFA */
3038 // fprintf (stderr, "N");
3039 nfa = REGEX_ITERNAL_construct_nfa (regex, len);
3040
3041 if (NULL == nfa)
3042 {
3043 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
3044 "Could not create DFA, because NFA creation failed\n");
3045 return NULL;
3046 }
3047
3048 dfa = GNUNET_malloc (sizeof (struct REGEX_ITERNAL_Automaton));
3049 dfa->type = DFA;
3050 dfa->regex = GNUNET_strdup (regex);
3051
3052 /* Create DFA start state from epsilon closure */
3053 memset (&singleton_set, 0, sizeof (struct REGEX_ITERNAL_StateSet));
3054 state_set_append (&singleton_set, nfa->start);
3055 nfa_closure_set_create (&nfa_start_eps_cls, nfa, &singleton_set, NULL);
3056 state_set_clear (&singleton_set);
3057 dfa->start = dfa_state_create (&ctx, &nfa_start_eps_cls);
3058 automaton_add_state (dfa, dfa->start);
3059
3060 // fprintf (stderr, "D");
3061 construct_dfa_states (&ctx, nfa, dfa, dfa->start);
3062 REGEX_ITERNAL_automaton_destroy (nfa);
3063
3064 /* Minimize DFA */
3065 // fprintf (stderr, "M");
3066 if (GNUNET_OK != dfa_minimize (&ctx, dfa))
3067 {
3068 REGEX_ITERNAL_automaton_destroy (dfa);
3069 return NULL;
3070 }
3071
3072 /* Create proofs and hashes for all states */
3073 if (GNUNET_OK != automaton_create_proofs (dfa))
3074 {
3075 REGEX_ITERNAL_automaton_destroy (dfa);
3076 return NULL;
3077 }
3078
3079 /* Compress linear DFA paths */
3080 if (1 != max_path_len)
3081 dfa_compress_paths (&ctx, dfa, max_path_len);
3082
3083 return dfa;
3084}
3085
3086
3087/**
3088 * Free the memory allocated by constructing the REGEX_ITERNAL_Automaton data
3089 * structure.
3090 *
3091 * @param a automaton to be destroyed
3092 */
3093void
3094REGEX_ITERNAL_automaton_destroy (struct REGEX_ITERNAL_Automaton *a)
3095{
3096 struct REGEX_ITERNAL_State *s;
3097 struct REGEX_ITERNAL_State *next_state;
3098
3099 if (NULL == a)
3100 return;
3101
3102 GNUNET_free_non_null (a->regex);
3103 GNUNET_free_non_null (a->canonical_regex);
3104
3105 for (s = a->states_head; NULL != s; s = next_state)
3106 {
3107 next_state = s->next;
3108 GNUNET_CONTAINER_DLL_remove (a->states_head, a->states_tail, s);
3109 automaton_destroy_state (s);
3110 }
3111
3112 GNUNET_free (a);
3113}
3114
3115
3116/**
3117 * Evaluates the given string using the given DFA automaton
3118 *
3119 * @param a automaton, type must be DFA
3120 * @param string string that should be evaluated
3121 *
3122 * @return 0 if string matches, non 0 otherwise
3123 */
3124static int
3125evaluate_dfa (struct REGEX_ITERNAL_Automaton *a, const char *string)
3126{
3127 const char *strp;
3128 struct REGEX_ITERNAL_State *s;
3129 unsigned int step_len;
3130
3131 if (DFA != a->type)
3132 {
3133 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
3134 "Tried to evaluate DFA, but NFA automaton given");
3135 return -1;
3136 }
3137
3138 s = a->start;
3139
3140 /* If the string is empty but the starting state is accepting, we accept. */
3141 if ((NULL == string || 0 == strlen (string)) && s->accepting)
3142 return 0;
3143
3144 for (strp = string; NULL != strp && *strp; strp += step_len)
3145 {
3146 step_len = dfa_move (&s, strp);
3147
3148 if (NULL == s)
3149 break;
3150 }
3151
3152 if (NULL != s && s->accepting)
3153 return 0;
3154
3155 return 1;
3156}
3157
3158
3159/**
3160 * Evaluates the given string using the given NFA automaton
3161 *
3162 * @param a automaton, type must be NFA
3163 * @param string string that should be evaluated
3164 *
3165 * @return 0 if string matches, non 0 otherwise
3166 */
3167static int
3168evaluate_nfa (struct REGEX_ITERNAL_Automaton *a, const char *string)
3169{
3170 const char *strp;
3171 char str[2];
3172 struct REGEX_ITERNAL_State *s;
3173 struct REGEX_ITERNAL_StateSet sset;
3174 struct REGEX_ITERNAL_StateSet new_sset;
3175 struct REGEX_ITERNAL_StateSet singleton_set;
3176 unsigned int i;
3177 int result;
3178
3179 if (NFA != a->type)
3180 {
3181 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
3182 "Tried to evaluate NFA, but DFA automaton given");
3183 return -1;
3184 }
3185
3186 /* If the string is empty but the starting state is accepting, we accept. */
3187 if ((NULL == string || 0 == strlen (string)) && a->start->accepting)
3188 return 0;
3189
3190 result = 1;
3191 memset (&singleton_set, 0, sizeof (struct REGEX_ITERNAL_StateSet));
3192 state_set_append (&singleton_set, a->start);
3193 nfa_closure_set_create (&sset, a, &singleton_set, NULL);
3194 state_set_clear (&singleton_set);
3195
3196 str[1] = '\0';
3197 for (strp = string; NULL != strp && *strp; strp++)
3198 {
3199 str[0] = *strp;
3200 nfa_closure_set_create (&new_sset, a, &sset, str);
3201 state_set_clear (&sset);
3202 nfa_closure_set_create (&sset, a, &new_sset, 0);
3203 state_set_clear (&new_sset);
3204 }
3205
3206 for (i = 0; i < sset.off; i++)
3207 {
3208 s = sset.states[i];
3209 if ( (NULL != s) && (s->accepting) )
3210 {
3211 result = 0;
3212 break;
3213 }
3214 }
3215
3216 state_set_clear (&sset);
3217 return result;
3218}
3219
3220
3221/**
3222 * Evaluates the given 'string' against the given compiled regex
3223 *
3224 * @param a automaton
3225 * @param string string to check
3226 *
3227 * @return 0 if string matches, non 0 otherwise
3228 */
3229int
3230REGEX_ITERNAL_eval (struct REGEX_ITERNAL_Automaton *a, const char *string)
3231{
3232 int result;
3233
3234 switch (a->type)
3235 {
3236 case DFA:
3237 result = evaluate_dfa (a, string);
3238 break;
3239 case NFA:
3240 result = evaluate_nfa (a, string);
3241 break;
3242 default:
3243 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
3244 "Evaluating regex failed, automaton has no type!\n");
3245 result = GNUNET_SYSERR;
3246 break;
3247 }
3248
3249 return result;
3250}
3251
3252
3253/**
3254 * Get the canonical regex of the given automaton.
3255 * When constructing the automaton a proof is computed for each state,
3256 * consisting of the regular expression leading to this state. A complete
3257 * regex for the automaton can be computed by combining these proofs.
3258 * As of now this function is only useful for testing.
3259 *
3260 * @param a automaton for which the canonical regex should be returned.
3261 *
3262 * @return
3263 */
3264const char *
3265REGEX_ITERNAL_get_canonical_regex (struct REGEX_ITERNAL_Automaton *a)
3266{
3267 if (NULL == a)
3268 return NULL;
3269
3270 return a->canonical_regex;
3271}
3272
3273
3274/**
3275 * Get the number of transitions that are contained in the given automaton.
3276 *
3277 * @param a automaton for which the number of transitions should be returned.
3278 *
3279 * @return number of transitions in the given automaton.
3280 */
3281unsigned int
3282REGEX_ITERNAL_get_transition_count (struct REGEX_ITERNAL_Automaton *a)
3283{
3284 unsigned int t_count;
3285 struct REGEX_ITERNAL_State *s;
3286
3287 if (NULL == a)
3288 return 0;
3289
3290 t_count = 0;
3291 for (s = a->states_head; NULL != s; s = s->next)
3292 t_count += s->transition_count;
3293
3294 return t_count;
3295}
3296
3297
3298/**
3299 * Get the first key for the given 'input_string'. This hashes the first x bits
3300 * of the 'input_string'.
3301 *
3302 * @param input_string string.
3303 * @param string_len length of the 'input_string'.
3304 * @param key pointer to where to write the hash code.
3305 *
3306 * @return number of bits of 'input_string' that have been consumed
3307 * to construct the key
3308 */
3309size_t
3310REGEX_ITERNAL_get_first_key (const char *input_string, size_t string_len,
3311 struct GNUNET_HashCode * key)
3312{
3313 unsigned int size;
3314
3315 size =
3316 string_len <
3317 GNUNET_REGEX_INITIAL_BYTES ? string_len : GNUNET_REGEX_INITIAL_BYTES;
3318
3319 if (NULL == input_string)
3320 {
3321 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Given input string was NULL!\n");
3322 return 0;
3323 }
3324
3325 GNUNET_CRYPTO_hash (input_string, size, key);
3326
3327 return size;
3328}
3329
3330
3331/**
3332 * Check if the given 'proof' matches the given 'key'.
3333 *
3334 * @param proof partial regex of a state.
3335 * @param key hash of a state.
3336 *
3337 * @return GNUNET_OK if the proof is valid for the given key.
3338 */
3339int
3340REGEX_ITERNAL_check_proof (const char *proof, const struct GNUNET_HashCode *key)
3341{
3342 struct GNUNET_HashCode key_check;
3343
3344 if (NULL == proof || NULL == key)
3345 {
3346 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Proof check failed, was NULL.\n");
3347 return GNUNET_NO;
3348 }
3349
3350 GNUNET_CRYPTO_hash (proof, strlen (proof), &key_check);
3351 return (0 ==
3352 GNUNET_CRYPTO_hash_cmp (key, &key_check)) ? GNUNET_OK : GNUNET_NO;
3353}
3354
3355
3356/**
3357 * Recursive function that calls the iterator for each synthetic start state.
3358 *
3359 * @param min_len minimum length of the path in the graph.
3360 * @param max_len maximum length of the path in the graph.
3361 * @param consumed_string string consumed by traversing the graph till this state.
3362 * @param state current state of the automaton.
3363 * @param iterator iterator function called for each edge.
3364 * @param iterator_cls closure for the iterator function.
3365 */
3366static void
3367iterate_initial_edge (const unsigned int min_len, const unsigned int max_len,
3368 char *consumed_string, struct REGEX_ITERNAL_State *state,
3369 REGEX_ITERNAL_KeyIterator iterator, void *iterator_cls)
3370{
3371 unsigned int i;
3372 char *temp;
3373 struct REGEX_ITERNAL_Transition *t;
3374 unsigned int num_edges = state->transition_count;
3375 struct REGEX_ITERNAL_Edge edges[num_edges];
3376 struct REGEX_ITERNAL_Edge edge[1];
3377 struct GNUNET_HashCode hash;
3378 struct GNUNET_HashCode hash_new;
3379
3380 unsigned int cur_len;
3381
3382 if (NULL != consumed_string)
3383 cur_len = strlen (consumed_string);
3384 else
3385 cur_len = 0;
3386
3387 if ((cur_len >= min_len || GNUNET_YES == state->accepting) && cur_len > 0 &&
3388 NULL != consumed_string)
3389 {
3390 if (cur_len <= max_len)
3391 {
3392 if (state->proof != NULL && 0 != strcmp (consumed_string, state->proof))
3393 {
3394 for (i = 0, t = state->transitions_head; NULL != t && i < num_edges;
3395 t = t->next, i++)
3396 {
3397 edges[i].label = t->label;
3398 edges[i].destination = t->to_state->hash;
3399 }
3400 GNUNET_CRYPTO_hash (consumed_string, strlen (consumed_string), &hash);
3401 iterator (iterator_cls, &hash, consumed_string, state->accepting,
3402 num_edges, edges);
3403 }
3404
3405 if (GNUNET_YES == state->accepting && cur_len > 1 &&
3406 state->transition_count < 1 && cur_len < max_len)
3407 {
3408 /* Special case for regex consisting of just a string that is shorter than
3409 * max_len */
3410 edge[0].label = &consumed_string[cur_len - 1];
3411 edge[0].destination = state->hash;
3412 temp = GNUNET_strdup (consumed_string);
3413 temp[cur_len - 1] = '\0';
3414 GNUNET_CRYPTO_hash (temp, cur_len - 1, &hash_new);
3415 iterator (iterator_cls, &hash_new, temp, GNUNET_NO, 1, edge);
3416 GNUNET_free (temp);
3417 }
3418 }
3419 else if (max_len < cur_len)
3420 {
3421 /* Case where the concatenated labels are longer than max_len, then split. */
3422 edge[0].label = &consumed_string[max_len];
3423 edge[0].destination = state->hash;
3424 temp = GNUNET_strdup (consumed_string);
3425 temp[max_len] = '\0';
3426 GNUNET_CRYPTO_hash (temp, max_len, &hash);
3427 iterator (iterator_cls, &hash, temp, GNUNET_NO, 1, edge);
3428 GNUNET_free (temp);
3429 }
3430 }
3431
3432 if (cur_len < max_len)
3433 {
3434 for (t = state->transitions_head; NULL != t; t = t->next)
3435 {
3436 if (NULL != consumed_string)
3437 GNUNET_asprintf (&temp, "%s%s", consumed_string, t->label);
3438 else
3439 GNUNET_asprintf (&temp, "%s", t->label);
3440
3441 iterate_initial_edge (min_len, max_len, temp, t->to_state, iterator,
3442 iterator_cls);
3443 GNUNET_free (temp);
3444 }
3445 }
3446}
3447
3448
3449/**
3450 * Iterate over all edges starting from start state of automaton 'a'. Calling
3451 * iterator for each edge.
3452 *
3453 * @param a automaton.
3454 * @param iterator iterator called for each edge.
3455 * @param iterator_cls closure.
3456 */
3457void
3458REGEX_ITERNAL_iterate_all_edges (struct REGEX_ITERNAL_Automaton *a,
3459 REGEX_ITERNAL_KeyIterator iterator,
3460 void *iterator_cls)
3461{
3462 struct REGEX_ITERNAL_State *s;
3463
3464 for (s = a->states_head; NULL != s; s = s->next)
3465 {
3466 struct REGEX_ITERNAL_Edge edges[s->transition_count];
3467 unsigned int num_edges;
3468
3469 num_edges = state_get_edges (s, edges);
3470
3471 if ((NULL != s->proof && 0 < strlen (s->proof)) || s->accepting)
3472 iterator (iterator_cls, &s->hash, s->proof, s->accepting, num_edges,
3473 edges);
3474
3475 s->marked = GNUNET_NO;
3476 }
3477
3478 iterate_initial_edge (GNUNET_REGEX_INITIAL_BYTES, GNUNET_REGEX_INITIAL_BYTES,
3479 NULL, a->start, iterator, iterator_cls);
3480}
3481
3482
3483
3484
3485/* end of regex.c */