aboutsummaryrefslogtreecommitdiff
path: root/src/regex/regex_internal.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/regex/regex_internal.c')
-rw-r--r--src/regex/regex_internal.c3718
1 files changed, 0 insertions, 3718 deletions
diff --git a/src/regex/regex_internal.c b/src/regex/regex_internal.c
deleted file mode 100644
index aa40851a9..000000000
--- a/src/regex/regex_internal.c
+++ /dev/null
@@ -1,3718 +0,0 @@
1/*
2 This file is part of GNUnet
3 Copyright (C) 2012 GNUnet e.V.
4
5 GNUnet is free software: you can redistribute it and/or modify it
6 under the terms of the GNU Affero General Public License as published
7 by the Free Software Foundation, either version 3 of the License,
8 or (at your option) any later version.
9
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Affero General Public License for more details.
14
15 You should have received a copy of the GNU Affero General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>.
17
18 SPDX-License-Identifier: AGPL3.0-or-later
19 */
20/**
21 * @file src/regex/regex_internal.c
22 * @brief library to create Deterministic Finite Automatons (DFAs) from regular
23 * expressions (regexes).
24 * @author Maximilian Szengel
25 */
26#include "platform.h"
27#include "gnunet_util_lib.h"
28#include "gnunet_regex_service.h"
29#include "regex_internal_lib.h"
30#include "regex_internal.h"
31
32
33/**
34 * Set this to #GNUNET_YES to enable state naming. Used to debug NFA->DFA
35 * creation. Disabled by default for better performance.
36 */
37#define REGEX_DEBUG_DFA GNUNET_NO
38
39/**
40 * Set of states using MDLL API.
41 */
42struct REGEX_INTERNAL_StateSet_MDLL
43{
44 /**
45 * MDLL of states.
46 */
47 struct REGEX_INTERNAL_State *head;
48
49 /**
50 * MDLL of states.
51 */
52 struct REGEX_INTERNAL_State *tail;
53
54 /**
55 * Length of the MDLL.
56 */
57 unsigned int len;
58};
59
60
61/**
62 * Append state to the given StateSet.
63 *
64 * @param set set to be modified
65 * @param state state to be appended
66 */
67static void
68state_set_append (struct REGEX_INTERNAL_StateSet *set,
69 struct REGEX_INTERNAL_State *state)
70{
71 if (set->off == set->size)
72 GNUNET_array_grow (set->states, set->size, set->size * 2 + 4);
73 set->states[set->off++] = state;
74}
75
76
77/**
78 * Compare two strings for equality. If either is NULL they are not equal.
79 *
80 * @param str1 first string for comparison.
81 * @param str2 second string for comparison.
82 *
83 * @return 0 if the strings are the same or both NULL, 1 or -1 if not.
84 */
85static int
86nullstrcmp (const char *str1, const char *str2)
87{
88 if ((NULL == str1) != (NULL == str2))
89 return -1;
90 if ((NULL == str1) && (NULL == str2))
91 return 0;
92
93 return strcmp (str1, str2);
94}
95
96
97/**
98 * Adds a transition from one state to another on @a label. Does not add
99 * duplicate states.
100 *
101 * @param ctx context
102 * @param from_state starting state for the transition
103 * @param label transition label
104 * @param to_state state to where the transition should point to
105 */
106static void
107state_add_transition (struct REGEX_INTERNAL_Context *ctx,
108 struct REGEX_INTERNAL_State *from_state,
109 const char *label,
110 struct REGEX_INTERNAL_State *to_state)
111{
112 struct REGEX_INTERNAL_Transition *t;
113 struct REGEX_INTERNAL_Transition *oth;
114
115 if (NULL == from_state)
116 {
117 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not create Transition.\n");
118 return;
119 }
120
121 /* Do not add duplicate state transitions */
122 for (t = from_state->transitions_head; NULL != t; t = t->next)
123 {
124 if ((t->to_state == to_state) && (0 == nullstrcmp (t->label, label)) &&
125 (t->from_state == from_state) )
126 return;
127 }
128
129 /* sort transitions by label */
130 for (oth = from_state->transitions_head; NULL != oth; oth = oth->next)
131 {
132 if (0 < nullstrcmp (oth->label, label))
133 break;
134 }
135
136 t = GNUNET_new (struct REGEX_INTERNAL_Transition);
137 if (NULL != ctx)
138 t->id = ctx->transition_id++;
139 if (NULL != label)
140 t->label = GNUNET_strdup (label);
141 else
142 t->label = NULL;
143 t->to_state = to_state;
144 t->from_state = from_state;
145
146 /* Add outgoing transition to 'from_state' */
147 from_state->transition_count++;
148 GNUNET_CONTAINER_DLL_insert_before (from_state->transitions_head,
149 from_state->transitions_tail,
150 oth,
151 t);
152}
153
154
155/**
156 * Remove a 'transition' from 'state'.
157 *
158 * @param state state from which the to-be-removed transition originates.
159 * @param transition transition that should be removed from state 'state'.
160 */
161static void
162state_remove_transition (struct REGEX_INTERNAL_State *state,
163 struct REGEX_INTERNAL_Transition *transition)
164{
165 if ((NULL == state) || (NULL == transition))
166 return;
167
168 if (transition->from_state != state)
169 return;
170
171 GNUNET_free (transition->label);
172
173 state->transition_count--;
174 GNUNET_CONTAINER_DLL_remove (state->transitions_head,
175 state->transitions_tail,
176 transition);
177
178 GNUNET_free (transition);
179}
180
181
182/**
183 * Compare two states. Used for sorting.
184 *
185 * @param a first state
186 * @param b second state
187 *
188 * @return an integer less than, equal to, or greater than zero
189 * if the first argument is considered to be respectively
190 * less than, equal to, or greater than the second.
191 */
192static int
193state_compare (const void *a, const void *b)
194{
195 struct REGEX_INTERNAL_State **s1 = (struct REGEX_INTERNAL_State **) a;
196 struct REGEX_INTERNAL_State **s2 = (struct REGEX_INTERNAL_State **) b;
197
198 return (*s1)->id - (*s2)->id;
199}
200
201
202/**
203 * Get all edges leaving state @a s.
204 *
205 * @param s state.
206 * @param edges all edges leaving @a s, expected to be allocated and have enough
207 * space for `s->transitions_count` elements.
208 *
209 * @return number of edges.
210 */
211static unsigned int
212state_get_edges (struct REGEX_INTERNAL_State *s, struct REGEX_BLOCK_Edge *edges)
213{
214 struct REGEX_INTERNAL_Transition *t;
215 unsigned int count;
216
217 if (NULL == s)
218 return 0;
219
220 count = 0;
221
222 for (t = s->transitions_head; NULL != t; t = t->next)
223 {
224 if (NULL != t->to_state)
225 {
226 edges[count].label = t->label;
227 edges[count].destination = t->to_state->hash;
228 count++;
229 }
230 }
231 return count;
232}
233
234
235/**
236 * Compare to state sets by comparing the id's of the states that are contained
237 * in each set. Both sets are expected to be sorted by id!
238 *
239 * @param sset1 first state set
240 * @param sset2 second state set
241 * @return 0 if the sets are equal, otherwise non-zero
242 */
243static int
244state_set_compare (struct REGEX_INTERNAL_StateSet *sset1,
245 struct REGEX_INTERNAL_StateSet *sset2)
246{
247 int result;
248 unsigned int i;
249
250 if ((NULL == sset1) || (NULL == sset2))
251 return 1;
252
253 result = sset1->off - sset2->off;
254 if (result < 0)
255 return -1;
256 if (result > 0)
257 return 1;
258 for (i = 0; i < sset1->off; i++)
259 if (0 != (result = state_compare (&sset1->states[i], &sset2->states[i])))
260 break;
261 return result;
262}
263
264
265/**
266 * Clears the given StateSet 'set'
267 *
268 * @param set set to be cleared
269 */
270static void
271state_set_clear (struct REGEX_INTERNAL_StateSet *set)
272{
273 GNUNET_array_grow (set->states, set->size, 0);
274 set->off = 0;
275}
276
277
278/**
279 * Clears an automaton fragment. Does not destroy the states inside the
280 * automaton.
281 *
282 * @param a automaton to be cleared
283 */
284static void
285automaton_fragment_clear (struct REGEX_INTERNAL_Automaton *a)
286{
287 if (NULL == a)
288 return;
289
290 a->start = NULL;
291 a->end = NULL;
292 a->states_head = NULL;
293 a->states_tail = NULL;
294 a->state_count = 0;
295 GNUNET_free (a);
296}
297
298
299/**
300 * Frees the memory used by State @a s
301 *
302 * @param s state that should be destroyed
303 */
304static void
305automaton_destroy_state (struct REGEX_INTERNAL_State *s)
306{
307 struct REGEX_INTERNAL_Transition *t;
308 struct REGEX_INTERNAL_Transition *next_t;
309
310 if (NULL == s)
311 return;
312
313 GNUNET_free (s->name);
314 GNUNET_free (s->proof);
315 state_set_clear (&s->nfa_set);
316 for (t = s->transitions_head; NULL != t; t = next_t)
317 {
318 next_t = t->next;
319 state_remove_transition (s, t);
320 }
321
322 GNUNET_free (s);
323}
324
325
326/**
327 * Remove a state from the given automaton 'a'. Always use this function when
328 * altering the states of an automaton. Will also remove all transitions leading
329 * to this state, before destroying it.
330 *
331 * @param a automaton
332 * @param s state to remove
333 */
334static void
335automaton_remove_state (struct REGEX_INTERNAL_Automaton *a,
336 struct REGEX_INTERNAL_State *s)
337{
338 struct REGEX_INTERNAL_State *s_check;
339 struct REGEX_INTERNAL_Transition *t_check;
340 struct REGEX_INTERNAL_Transition *t_check_next;
341
342 if ((NULL == a) || (NULL == s))
343 return;
344
345 /* remove all transitions leading to this state */
346 for (s_check = a->states_head; NULL != s_check; s_check = s_check->next)
347 {
348 for (t_check = s_check->transitions_head; NULL != t_check;
349 t_check = t_check_next)
350 {
351 t_check_next = t_check->next;
352 if (t_check->to_state == s)
353 state_remove_transition (s_check, t_check);
354 }
355 }
356
357 /* remove state */
358 GNUNET_CONTAINER_DLL_remove (a->states_head, a->states_tail, s);
359 a->state_count--;
360
361 automaton_destroy_state (s);
362}
363
364
365/**
366 * Merge two states into one. Will merge 's1' and 's2' into 's1' and destroy
367 * 's2'. 's1' will contain all (non-duplicate) outgoing transitions of 's2'.
368 *
369 * @param ctx context
370 * @param a automaton
371 * @param s1 first state
372 * @param s2 second state, will be destroyed
373 */
374static void
375automaton_merge_states (struct REGEX_INTERNAL_Context *ctx,
376 struct REGEX_INTERNAL_Automaton *a,
377 struct REGEX_INTERNAL_State *s1,
378 struct REGEX_INTERNAL_State *s2)
379{
380 struct REGEX_INTERNAL_State *s_check;
381 struct REGEX_INTERNAL_Transition *t_check;
382 struct REGEX_INTERNAL_Transition *t;
383 struct REGEX_INTERNAL_Transition *t_next;
384 int is_dup;
385
386 if (s1 == s2)
387 return;
388
389 /* 1. Make all transitions pointing to s2 point to s1, unless this transition
390 * does not already exists, if it already exists remove transition. */
391 for (s_check = a->states_head; NULL != s_check; s_check = s_check->next)
392 {
393 for (t_check = s_check->transitions_head; NULL != t_check; t_check = t_next)
394 {
395 t_next = t_check->next;
396
397 if (s2 == t_check->to_state)
398 {
399 is_dup = GNUNET_NO;
400 for (t = t_check->from_state->transitions_head; NULL != t; t = t->next)
401 {
402 if ((t->to_state == s1) && (0 == strcmp (t_check->label, t->label)) )
403 is_dup = GNUNET_YES;
404 }
405 if (GNUNET_NO == is_dup)
406 t_check->to_state = s1;
407 else
408 state_remove_transition (t_check->from_state, t_check);
409 }
410 }
411 }
412
413 /* 2. Add all transitions from s2 to sX to s1 */
414 for (t_check = s2->transitions_head; NULL != t_check; t_check = t_check->next)
415 {
416 if (t_check->to_state != s1)
417 state_add_transition (ctx, s1, t_check->label, t_check->to_state);
418 }
419
420 /* 3. Rename s1 to {s1,s2} */
421#if REGEX_DEBUG_DFA
422 char *new_name;
423
424 new_name = s1->name;
425 GNUNET_asprintf (&s1->name, "{%s,%s}", new_name, s2->name);
426 GNUNET_free (new_name);
427#endif
428
429 /* remove state */
430 GNUNET_CONTAINER_DLL_remove (a->states_head, a->states_tail, s2);
431 a->state_count--;
432 automaton_destroy_state (s2);
433}
434
435
436/**
437 * Add a state to the automaton 'a', always use this function to alter the
438 * states DLL of the automaton.
439 *
440 * @param a automaton to add the state to
441 * @param s state that should be added
442 */
443static void
444automaton_add_state (struct REGEX_INTERNAL_Automaton *a,
445 struct REGEX_INTERNAL_State *s)
446{
447 GNUNET_CONTAINER_DLL_insert (a->states_head, a->states_tail, s);
448 a->state_count++;
449}
450
451
452/**
453 * Depth-first traversal (DFS) of all states that are reachable from state
454 * 's'. Performs 'action' on each visited state.
455 *
456 * @param s start state.
457 * @param marks an array of size a->state_count to remember which state was
458 * already visited.
459 * @param count current count of the state.
460 * @param check function that is checked before advancing on each transition
461 * in the DFS.
462 * @param check_cls closure for check.
463 * @param action action to be performed on each state.
464 * @param action_cls closure for action.
465 */
466static void
467automaton_state_traverse (struct REGEX_INTERNAL_State *s,
468 int *marks,
469 unsigned int *count,
470 REGEX_INTERNAL_traverse_check check,
471 void *check_cls,
472 REGEX_INTERNAL_traverse_action action,
473 void *action_cls)
474{
475 struct REGEX_INTERNAL_Transition *t;
476
477 if (GNUNET_YES == marks[s->traversal_id])
478 return;
479
480 marks[s->traversal_id] = GNUNET_YES;
481
482 if (NULL != action)
483 action (action_cls, *count, s);
484
485 (*count)++;
486
487 for (t = s->transitions_head; NULL != t; t = t->next)
488 {
489 if ((NULL == check) ||
490 ((NULL != check) && (GNUNET_YES == check (check_cls, s, t)) ))
491 {
492 automaton_state_traverse (t->to_state,
493 marks,
494 count,
495 check,
496 check_cls,
497 action,
498 action_cls);
499 }
500 }
501}
502
503
504/**
505 * Traverses the given automaton using depth-first-search (DFS) from it's start
506 * state, visiting all reachable states and calling 'action' on each one of
507 * them.
508 *
509 * @param a automaton to be traversed.
510 * @param start start state, pass a->start or NULL to traverse the whole automaton.
511 * @param check function that is checked before advancing on each transition
512 * in the DFS.
513 * @param check_cls closure for @a check.
514 * @param action action to be performed on each state.
515 * @param action_cls closure for @a action
516 */
517void
518REGEX_INTERNAL_automaton_traverse (const struct REGEX_INTERNAL_Automaton *a,
519 struct REGEX_INTERNAL_State *start,
520 REGEX_INTERNAL_traverse_check check,
521 void *check_cls,
522 REGEX_INTERNAL_traverse_action action,
523 void *action_cls)
524{
525 unsigned int count;
526 struct REGEX_INTERNAL_State *s;
527
528 if ((NULL == a) || (0 == a->state_count))
529 return;
530
531 int marks[a->state_count];
532
533 for (count = 0, s = a->states_head; NULL != s && count < a->state_count;
534 s = s->next, count++)
535 {
536 s->traversal_id = count;
537 marks[s->traversal_id] = GNUNET_NO;
538 }
539
540 count = 0;
541
542 if (NULL == start)
543 s = a->start;
544 else
545 s = start;
546
547 automaton_state_traverse (s,
548 marks,
549 &count,
550 check,
551 check_cls,
552 action,
553 action_cls);
554}
555
556
557/**
558 * String container for faster string operations.
559 */
560struct StringBuffer
561{
562 /**
563 * Buffer holding the string (may start in the middle!);
564 * NOT 0-terminated!
565 */
566 char *sbuf;
567
568 /**
569 * Allocated buffer.
570 */
571 char *abuf;
572
573 /**
574 * Length of the string in the buffer.
575 */
576 size_t slen;
577
578 /**
579 * Number of bytes allocated for @e sbuf
580 */
581 unsigned int blen;
582
583 /**
584 * Buffer currently represents "NULL" (not the empty string!)
585 */
586 int16_t null_flag;
587
588 /**
589 * If this entry is part of the last/current generation array,
590 * this flag is #GNUNET_YES if the last and current generation are
591 * identical (and thus copying is unnecessary if the value didn't
592 * change). This is used in an optimization that improves
593 * performance by about 1% --- if we use int16_t here. With just
594 * "int" for both flags, performance drops (on my system) significantly,
595 * most likely due to increased cache misses.
596 */
597 int16_t synced;
598};
599
600
601/**
602 * Compare two strings for equality. If either is NULL they are not equal.
603 *
604 * @param s1 first string for comparison.
605 * @param s2 second string for comparison.
606 *
607 * @return 0 if the strings are the same or both NULL, 1 or -1 if not.
608 */
609static int
610sb_nullstrcmp (const struct StringBuffer *s1, const struct StringBuffer *s2)
611{
612 if ((GNUNET_YES == s1->null_flag) && (GNUNET_YES == s2->null_flag))
613 return 0;
614 if ((GNUNET_YES == s1->null_flag) || (GNUNET_YES == s2->null_flag))
615 return -1;
616 if (s1->slen != s2->slen)
617 return -1;
618 if (0 == s1->slen)
619 return 0;
620 return memcmp (s1->sbuf, s2->sbuf, s1->slen);
621}
622
623
624/**
625 * Compare two strings for equality.
626 *
627 * @param s1 first string for comparison.
628 * @param s2 second string for comparison.
629 *
630 * @return 0 if the strings are the same, 1 or -1 if not.
631 */
632static int
633sb_strcmp (const struct StringBuffer *s1, const struct StringBuffer *s2)
634{
635 if (s1->slen != s2->slen)
636 return -1;
637 if (0 == s1->slen)
638 return 0;
639 return memcmp (s1->sbuf, s2->sbuf, s1->slen);
640}
641
642
643/**
644 * Reallocate the buffer of 'ret' to fit 'nlen' characters;
645 * move the existing string to the beginning of the new buffer.
646 *
647 * @param ret current buffer, to be updated
648 * @param nlen target length for the buffer, must be at least ret->slen
649 */
650static void
651sb_realloc (struct StringBuffer *ret, size_t nlen)
652{
653 char *old;
654
655 GNUNET_assert (nlen >= ret->slen);
656 old = ret->abuf;
657 ret->abuf = GNUNET_malloc (nlen);
658 ret->blen = nlen;
659 GNUNET_memcpy (ret->abuf, ret->sbuf, ret->slen);
660 ret->sbuf = ret->abuf;
661 GNUNET_free (old);
662}
663
664
665/**
666 * Append a string.
667 *
668 * @param ret where to write the result
669 * @param sarg string to append
670 */
671static void
672sb_append (struct StringBuffer *ret, const struct StringBuffer *sarg)
673{
674 if (GNUNET_YES == ret->null_flag)
675 ret->slen = 0;
676 ret->null_flag = GNUNET_NO;
677 if (ret->blen < sarg->slen + ret->slen)
678 sb_realloc (ret, ret->blen + sarg->slen + 128);
679 GNUNET_memcpy (&ret->sbuf[ret->slen], sarg->sbuf, sarg->slen);
680 ret->slen += sarg->slen;
681}
682
683
684/**
685 * Append a C string.
686 *
687 * @param ret where to write the result
688 * @param cstr string to append
689 */
690static void
691sb_append_cstr (struct StringBuffer *ret, const char *cstr)
692{
693 size_t cstr_len = strlen (cstr);
694
695 if (GNUNET_YES == ret->null_flag)
696 ret->slen = 0;
697 ret->null_flag = GNUNET_NO;
698 if (ret->blen < cstr_len + ret->slen)
699 sb_realloc (ret, ret->blen + cstr_len + 128);
700 GNUNET_memcpy (&ret->sbuf[ret->slen], cstr, cstr_len);
701 ret->slen += cstr_len;
702}
703
704
705/**
706 * Wrap a string buffer, that is, set ret to the format string
707 * which contains an "%s" which is to be replaced with the original
708 * content of 'ret'. Note that optimizing this function is not
709 * really worth it, it is rarely called.
710 *
711 * @param ret where to write the result and take the input for %.*s from
712 * @param format format string, fprintf-style, with exactly one "%.*s"
713 * @param extra_chars how long will the result be, in addition to 'sarg' length
714 */
715static void
716sb_wrap (struct StringBuffer *ret, const char *format, size_t extra_chars)
717{
718 char *temp;
719
720 if (GNUNET_YES == ret->null_flag)
721 ret->slen = 0;
722 ret->null_flag = GNUNET_NO;
723 temp = GNUNET_malloc (ret->slen + extra_chars + 1);
724 GNUNET_snprintf (temp,
725 ret->slen + extra_chars + 1,
726 format,
727 (int) ret->slen,
728 ret->sbuf);
729 GNUNET_free (ret->abuf);
730 ret->abuf = temp;
731 ret->sbuf = temp;
732 ret->blen = ret->slen + extra_chars + 1;
733 ret->slen = ret->slen + extra_chars;
734}
735
736
737/**
738 * Format a string buffer. Note that optimizing this function is not
739 * really worth it, it is rarely called.
740 *
741 * @param ret where to write the result
742 * @param format format string, fprintf-style, with exactly one "%.*s"
743 * @param extra_chars how long will the result be, in addition to 'sarg' length
744 * @param sarg string to print into the format
745 */
746static void
747sb_printf1 (struct StringBuffer *ret,
748 const char *format,
749 size_t extra_chars,
750 const struct StringBuffer *sarg)
751{
752 if (ret->blen < sarg->slen + extra_chars + 1)
753 sb_realloc (ret, sarg->slen + extra_chars + 1);
754 ret->null_flag = GNUNET_NO;
755 ret->sbuf = ret->abuf;
756 ret->slen = sarg->slen + extra_chars;
757 GNUNET_snprintf (ret->sbuf, ret->blen, format, (int) sarg->slen, sarg->sbuf);
758}
759
760
761/**
762 * Format a string buffer.
763 *
764 * @param ret where to write the result
765 * @param format format string, fprintf-style, with exactly two "%.*s"
766 * @param extra_chars how long will the result be, in addition to 'sarg1/2' length
767 * @param sarg1 first string to print into the format
768 * @param sarg2 second string to print into the format
769 */
770static void
771sb_printf2 (struct StringBuffer *ret,
772 const char *format,
773 size_t extra_chars,
774 const struct StringBuffer *sarg1,
775 const struct StringBuffer *sarg2)
776{
777 if (ret->blen < sarg1->slen + sarg2->slen + extra_chars + 1)
778 sb_realloc (ret, sarg1->slen + sarg2->slen + extra_chars + 1);
779 ret->null_flag = GNUNET_NO;
780 ret->slen = sarg1->slen + sarg2->slen + extra_chars;
781 ret->sbuf = ret->abuf;
782 GNUNET_snprintf (ret->sbuf,
783 ret->blen,
784 format,
785 (int) sarg1->slen,
786 sarg1->sbuf,
787 (int) sarg2->slen,
788 sarg2->sbuf);
789}
790
791
792/**
793 * Format a string buffer. Note that optimizing this function is not
794 * really worth it, it is rarely called.
795 *
796 * @param ret where to write the result
797 * @param format format string, fprintf-style, with exactly three "%.*s"
798 * @param extra_chars how long will the result be, in addition to 'sarg1/2/3' length
799 * @param sarg1 first string to print into the format
800 * @param sarg2 second string to print into the format
801 * @param sarg3 third string to print into the format
802 */
803static void
804sb_printf3 (struct StringBuffer *ret,
805 const char *format,
806 size_t extra_chars,
807 const struct StringBuffer *sarg1,
808 const struct StringBuffer *sarg2,
809 const struct StringBuffer *sarg3)
810{
811 if (ret->blen < sarg1->slen + sarg2->slen + sarg3->slen + extra_chars + 1)
812 sb_realloc (ret, sarg1->slen + sarg2->slen + sarg3->slen + extra_chars + 1);
813 ret->null_flag = GNUNET_NO;
814 ret->slen = sarg1->slen + sarg2->slen + sarg3->slen + extra_chars;
815 ret->sbuf = ret->abuf;
816 GNUNET_snprintf (ret->sbuf,
817 ret->blen,
818 format,
819 (int) sarg1->slen,
820 sarg1->sbuf,
821 (int) sarg2->slen,
822 sarg2->sbuf,
823 (int) sarg3->slen,
824 sarg3->sbuf);
825}
826
827
828/**
829 * Free resources of the given string buffer.
830 *
831 * @param sb buffer to free (actual pointer is not freed, as they
832 * should not be individually allocated)
833 */
834static void
835sb_free (struct StringBuffer *sb)
836{
837 GNUNET_array_grow (sb->abuf, sb->blen, 0);
838 sb->slen = 0;
839 sb->sbuf = NULL;
840 sb->null_flag = GNUNET_YES;
841}
842
843
844/**
845 * Copy the given string buffer from 'in' to 'out'.
846 *
847 * @param in input string
848 * @param out output string
849 */
850static void
851sb_strdup (struct StringBuffer *out, const struct StringBuffer *in)
852
853{
854 out->null_flag = in->null_flag;
855 if (GNUNET_YES == out->null_flag)
856 return;
857 if (out->blen < in->slen)
858 {
859 GNUNET_array_grow (out->abuf, out->blen, in->slen);
860 }
861 out->sbuf = out->abuf;
862 out->slen = in->slen;
863 GNUNET_memcpy (out->sbuf, in->sbuf, out->slen);
864}
865
866
867/**
868 * Copy the given string buffer from 'in' to 'out'.
869 *
870 * @param cstr input string
871 * @param out output string
872 */
873static void
874sb_strdup_cstr (struct StringBuffer *out, const char *cstr)
875{
876 if (NULL == cstr)
877 {
878 out->null_flag = GNUNET_YES;
879 return;
880 }
881 out->null_flag = GNUNET_NO;
882 out->slen = strlen (cstr);
883 if (out->blen < out->slen)
884 {
885 GNUNET_array_grow (out->abuf, out->blen, out->slen);
886 }
887 out->sbuf = out->abuf;
888 GNUNET_memcpy (out->sbuf, cstr, out->slen);
889}
890
891
892/**
893 * Check if the given string @a str needs parentheses around it when
894 * using it to generate a regex.
895 *
896 * @param str string
897 *
898 * @return #GNUNET_YES if parentheses are needed, #GNUNET_NO otherwise
899 */
900static int
901needs_parentheses (const struct StringBuffer *str)
902{
903 size_t slen;
904 const char *op;
905 const char *cl;
906 const char *pos;
907 const char *end;
908 unsigned int cnt;
909
910 if ((GNUNET_YES == str->null_flag) || ((slen = str->slen) < 2))
911 return GNUNET_NO;
912 pos = str->sbuf;
913 if ('(' != pos[0])
914 return GNUNET_YES;
915 end = str->sbuf + slen;
916 cnt = 1;
917 pos++;
918 while (cnt > 0)
919 {
920 cl = memchr (pos, ')', end - pos);
921 if (NULL == cl)
922 {
923 GNUNET_break (0);
924 return GNUNET_YES;
925 }
926 /* while '(' before ')', count opening parens */
927 while ((NULL != (op = memchr (pos, '(', end - pos))) && (op < cl))
928 {
929 cnt++;
930 pos = op + 1;
931 }
932 /* got ')' first */
933 cnt--;
934 pos = cl + 1;
935 }
936 return (*pos == '\0') ? GNUNET_NO : GNUNET_YES;
937}
938
939
940/**
941 * Remove parentheses surrounding string @a str.
942 * Example: "(a)" becomes "a", "(a|b)|(a|c)" stays the same.
943 * You need to #GNUNET_free() the returned string.
944 *
945 * @param str string, modified to contain a
946 * @return string without surrounding parentheses, string 'str' if no preceding
947 * epsilon could be found, NULL if 'str' was NULL
948 */
949static void
950remove_parentheses (struct StringBuffer *str)
951{
952 size_t slen;
953 const char *pos;
954 const char *end;
955 const char *sbuf;
956 const char *op;
957 const char *cp;
958 unsigned int cnt;
959
960 if (0)
961 return;
962 sbuf = str->sbuf;
963 if ((GNUNET_YES == str->null_flag) || (1 >= (slen = str->slen)) ||
964 ('(' != str->sbuf[0]) || (')' != str->sbuf[slen - 1]))
965 return;
966 cnt = 0;
967 pos = &sbuf[1];
968 end = &sbuf[slen - 1];
969 op = memchr (pos, '(', end - pos);
970 cp = memchr (pos, ')', end - pos);
971 while (NULL != cp)
972 {
973 while ((NULL != op) && (op < cp))
974 {
975 cnt++;
976 pos = op + 1;
977 op = memchr (pos, '(', end - pos);
978 }
979 while ((NULL != cp) && ((NULL == op) || (cp < op)))
980 {
981 if (0 == cnt)
982 return; /* can't strip parens */
983 cnt--;
984 pos = cp + 1;
985 cp = memchr (pos, ')', end - pos);
986 }
987 }
988 if (0 != cnt)
989 {
990 GNUNET_break (0);
991 return;
992 }
993 str->sbuf++;
994 str->slen -= 2;
995}
996
997
998/**
999 * Check if the string 'str' starts with an epsilon (empty string).
1000 * Example: "(|a)" is starting with an epsilon.
1001 *
1002 * @param str string to test
1003 *
1004 * @return 0 if str has no epsilon, 1 if str starts with '(|' and ends with ')'
1005 */
1006static int
1007has_epsilon (const struct StringBuffer *str)
1008{
1009 return (GNUNET_YES != str->null_flag) && (0 < str->slen) &&
1010 ('(' == str->sbuf[0]) && ('|' == str->sbuf[1]) &&
1011 (')' == str->sbuf[str->slen - 1]);
1012}
1013
1014
1015/**
1016 * Remove an epsilon from the string str. Where epsilon is an empty string
1017 * Example: str = "(|a|b|c)", result: "a|b|c"
1018 * The returned string needs to be freed.
1019 *
1020 * @param str original string
1021 * @param ret where to return string without preceding epsilon, string 'str' if no preceding
1022 * epsilon could be found, NULL if 'str' was NULL
1023 */
1024static void
1025remove_epsilon (const struct StringBuffer *str, struct StringBuffer *ret)
1026{
1027 if (GNUNET_YES == str->null_flag)
1028 {
1029 ret->null_flag = GNUNET_YES;
1030 return;
1031 }
1032 if ((str->slen > 1) && ('(' == str->sbuf[0]) && ('|' == str->sbuf[1]) &&
1033 (')' == str->sbuf[str->slen - 1]))
1034 {
1035 /* remove epsilon */
1036 if (ret->blen < str->slen - 3)
1037 {
1038 GNUNET_array_grow (ret->abuf, ret->blen, str->slen - 3);
1039 }
1040 ret->sbuf = ret->abuf;
1041 ret->slen = str->slen - 3;
1042 GNUNET_memcpy (ret->sbuf, &str->sbuf[2], ret->slen);
1043 return;
1044 }
1045 sb_strdup (ret, str);
1046}
1047
1048
1049/**
1050 * Compare n bytes of 'str1' and 'str2'
1051 *
1052 * @param str1 first string to compare
1053 * @param str2 second string for comparison
1054 * @param n number of bytes to compare
1055 *
1056 * @return -1 if any of the strings is NULL, 0 if equal, non 0 otherwise
1057 */
1058static int
1059sb_strncmp (const struct StringBuffer *str1,
1060 const struct StringBuffer *str2,
1061 size_t n)
1062{
1063 size_t max;
1064
1065 if ((str1->slen != str2->slen) && ((str1->slen < n) || (str2->slen < n)))
1066 return -1;
1067 max = GNUNET_MAX (str1->slen, str2->slen);
1068 if (max > n)
1069 max = n;
1070 return memcmp (str1->sbuf, str2->sbuf, max);
1071}
1072
1073
1074/**
1075 * Compare n bytes of 'str1' and 'str2'
1076 *
1077 * @param str1 first string to compare
1078 * @param str2 second C string for comparison
1079 * @param n number of bytes to compare (and length of str2)
1080 *
1081 * @return -1 if any of the strings is NULL, 0 if equal, non 0 otherwise
1082 */
1083static int
1084sb_strncmp_cstr (const struct StringBuffer *str1, const char *str2, size_t n)
1085{
1086 if (str1->slen < n)
1087 return -1;
1088 return memcmp (str1->sbuf, str2, n);
1089}
1090
1091
1092/**
1093 * Initialize string buffer for storing strings of up to n
1094 * characters.
1095 *
1096 * @param sb buffer to initialize
1097 * @param n desired target length
1098 */
1099static void
1100sb_init (struct StringBuffer *sb, size_t n)
1101{
1102 sb->null_flag = GNUNET_NO;
1103 sb->abuf = sb->sbuf = (0 == n) ? NULL : GNUNET_malloc (n);
1104 sb->blen = n;
1105 sb->slen = 0;
1106}
1107
1108
1109/**
1110 * Compare 'str1', starting from position 'k', with whole 'str2'
1111 *
1112 * @param str1 first string to compare, starting from position 'k'
1113 * @param str2 second string for comparison
1114 * @param k starting position in 'str1'
1115 *
1116 * @return -1 if any of the strings is NULL, 0 if equal, non 0 otherwise
1117 */
1118static int
1119sb_strkcmp (const struct StringBuffer *str1,
1120 const struct StringBuffer *str2,
1121 size_t k)
1122{
1123 if ((GNUNET_YES == str1->null_flag) || (GNUNET_YES == str2->null_flag) ||
1124 (k > str1->slen) || (str1->slen - k != str2->slen))
1125 return -1;
1126 return memcmp (&str1->sbuf[k], str2->sbuf, str2->slen);
1127}
1128
1129
1130/**
1131 * Helper function used as 'action' in 'REGEX_INTERNAL_automaton_traverse'
1132 * function to create the depth-first numbering of the states.
1133 *
1134 * @param cls states array.
1135 * @param count current state counter.
1136 * @param s current state.
1137 */
1138static void
1139number_states (void *cls,
1140 const unsigned int count,
1141 struct REGEX_INTERNAL_State *s)
1142{
1143 struct REGEX_INTERNAL_State **states = cls;
1144
1145 s->dfs_id = count;
1146 if (NULL != states)
1147 states[count] = s;
1148}
1149
1150
1151#define PRIS(a) \
1152 ((GNUNET_YES == a.null_flag) ? 6 : (int) a.slen), \
1153 ((GNUNET_YES == a.null_flag) ? "(null)" : a.sbuf)
1154
1155
1156/**
1157 * Construct the regular expression given the inductive step,
1158 * $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^*
1159 * R^{(k-1)}_{kj}, and simplify the resulting expression saved in R_cur_ij.
1160 *
1161 * @param R_last_ij value of $R^{(k-1)_{ij}.
1162 * @param R_last_ik value of $R^{(k-1)_{ik}.
1163 * @param R_last_kk value of $R^{(k-1)_{kk}.
1164 * @param R_last_kj value of $R^{(k-1)_{kj}.
1165 * @param R_cur_ij result for this inductive step is saved in R_cur_ij, R_cur_ij
1166 * is expected to be NULL when called!
1167 * @param R_cur_l optimization -- kept between iterations to avoid realloc
1168 * @param R_cur_r optimization -- kept between iterations to avoid realloc
1169 */
1170static void
1171automaton_create_proofs_simplify (const struct StringBuffer *R_last_ij,
1172 const struct StringBuffer *R_last_ik,
1173 const struct StringBuffer *R_last_kk,
1174 const struct StringBuffer *R_last_kj,
1175 struct StringBuffer *R_cur_ij,
1176 struct StringBuffer *R_cur_l,
1177 struct StringBuffer *R_cur_r)
1178{
1179 struct StringBuffer R_temp_ij;
1180 struct StringBuffer R_temp_ik;
1181 struct StringBuffer R_temp_kj;
1182 struct StringBuffer R_temp_kk;
1183 int eps_check;
1184 int ij_ik_cmp;
1185 int ij_kj_cmp;
1186 int ik_kk_cmp;
1187 int kk_kj_cmp;
1188 int clean_ik_kk_cmp;
1189 int clean_kk_kj_cmp;
1190 size_t length;
1191 size_t length_l;
1192 size_t length_r;
1193
1194 /*
1195 * $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj}
1196 * R_last == R^{(k-1)}, R_cur == R^{(k)}
1197 * R_cur_ij = R_cur_l | R_cur_r
1198 * R_cur_l == R^{(k-1)}_{ij}
1199 * R_cur_r == R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj}
1200 */if ((GNUNET_YES == R_last_ij->null_flag) &&
1201 ((GNUNET_YES == R_last_ik->null_flag) ||
1202 (GNUNET_YES == R_last_kj->null_flag)))
1203 {
1204 /* R^{(k)}_{ij} = N | N */
1205 R_cur_ij->null_flag = GNUNET_YES;
1206 R_cur_ij->synced = GNUNET_NO;
1207 return;
1208 }
1209
1210 if ((GNUNET_YES == R_last_ik->null_flag) ||
1211 (GNUNET_YES == R_last_kj->null_flag))
1212 {
1213 /* R^{(k)}_{ij} = R^{(k-1)}_{ij} | N */
1214 if (GNUNET_YES == R_last_ij->synced)
1215 {
1216 R_cur_ij->synced = GNUNET_YES;
1217 R_cur_ij->null_flag = GNUNET_NO;
1218 return;
1219 }
1220 R_cur_ij->synced = GNUNET_YES;
1221 sb_strdup (R_cur_ij, R_last_ij);
1222 return;
1223 }
1224 R_cur_ij->synced = GNUNET_NO;
1225
1226 /* $R^{(k)}_{ij} = N | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj} OR
1227 * $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj} */
1228
1229 R_cur_r->null_flag = GNUNET_YES;
1230 R_cur_r->slen = 0;
1231 R_cur_l->null_flag = GNUNET_YES;
1232 R_cur_l->slen = 0;
1233
1234 /* cache results from strcmp, we might need these many times */
1235 ij_kj_cmp = sb_nullstrcmp (R_last_ij, R_last_kj);
1236 ij_ik_cmp = sb_nullstrcmp (R_last_ij, R_last_ik);
1237 ik_kk_cmp = sb_nullstrcmp (R_last_ik, R_last_kk);
1238 kk_kj_cmp = sb_nullstrcmp (R_last_kk, R_last_kj);
1239
1240 /* Assign R_temp_(ik|kk|kj) to R_last[][] and remove epsilon as well
1241 * as parentheses, so we can better compare the contents */
1242
1243 memset (&R_temp_ij, 0, sizeof(struct StringBuffer));
1244 memset (&R_temp_ik, 0, sizeof(struct StringBuffer));
1245 memset (&R_temp_kk, 0, sizeof(struct StringBuffer));
1246 memset (&R_temp_kj, 0, sizeof(struct StringBuffer));
1247 remove_epsilon (R_last_ik, &R_temp_ik);
1248 remove_epsilon (R_last_kk, &R_temp_kk);
1249 remove_epsilon (R_last_kj, &R_temp_kj);
1250 remove_parentheses (&R_temp_ik);
1251 remove_parentheses (&R_temp_kk);
1252 remove_parentheses (&R_temp_kj);
1253 clean_ik_kk_cmp = sb_nullstrcmp (R_last_ik, &R_temp_kk);
1254 clean_kk_kj_cmp = sb_nullstrcmp (&R_temp_kk, R_last_kj);
1255
1256 /* construct R_cur_l (and, if necessary R_cur_r) */
1257 if (GNUNET_YES != R_last_ij->null_flag)
1258 {
1259 /* Assign R_temp_ij to R_last_ij and remove epsilon as well
1260 * as parentheses, so we can better compare the contents */
1261 remove_epsilon (R_last_ij, &R_temp_ij);
1262 remove_parentheses (&R_temp_ij);
1263
1264 if ((0 == sb_strcmp (&R_temp_ij, &R_temp_ik)) &&
1265 (0 == sb_strcmp (&R_temp_ik, &R_temp_kk)) &&
1266 (0 == sb_strcmp (&R_temp_kk, &R_temp_kj)))
1267 {
1268 if (0 == R_temp_ij.slen)
1269 {
1270 R_cur_r->null_flag = GNUNET_NO;
1271 }
1272 else if ((0 == sb_strncmp_cstr (R_last_ij, "(|", 2)) ||
1273 ((0 == sb_strncmp_cstr (R_last_ik, "(|", 2)) &&
1274 (0 == sb_strncmp_cstr (R_last_kj, "(|", 2)) ))
1275 {
1276 /*
1277 * a|(e|a)a*(e|a) = a*
1278 * a|(e|a)(e|a)*(e|a) = a*
1279 * (e|a)|aa*a = a*
1280 * (e|a)|aa*(e|a) = a*
1281 * (e|a)|(e|a)a*a = a*
1282 * (e|a)|(e|a)a*(e|a) = a*
1283 * (e|a)|(e|a)(e|a)*(e|a) = a*
1284 */if (GNUNET_YES == needs_parentheses (&R_temp_ij))
1285 sb_printf1 (R_cur_r, "(%.*s)*", 3, &R_temp_ij);
1286 else
1287 sb_printf1 (R_cur_r, "%.*s*", 1, &R_temp_ij);
1288 }
1289 else
1290 {
1291 /*
1292 * a|aa*a = a+
1293 * a|(e|a)a*a = a+
1294 * a|aa*(e|a) = a+
1295 * a|(e|a)(e|a)*a = a+
1296 * a|a(e|a)*(e|a) = a+
1297 */if (GNUNET_YES == needs_parentheses (&R_temp_ij))
1298 sb_printf1 (R_cur_r, "(%.*s)+", 3, &R_temp_ij);
1299 else
1300 sb_printf1 (R_cur_r, "%.*s+", 1, &R_temp_ij);
1301 }
1302 }
1303 else if ((0 == ij_ik_cmp) && (0 == clean_kk_kj_cmp) &&
1304 (0 != clean_ik_kk_cmp))
1305 {
1306 /* a|ab*b = ab* */
1307 if (0 == R_last_kk->slen)
1308 sb_strdup (R_cur_r, R_last_ij);
1309 else if (GNUNET_YES == needs_parentheses (&R_temp_kk))
1310 sb_printf2 (R_cur_r, "%.*s(%.*s)*", 3, R_last_ij, &R_temp_kk);
1311 else
1312 sb_printf2 (R_cur_r, "%.*s%.*s*", 1, R_last_ij, R_last_kk);
1313 R_cur_l->null_flag = GNUNET_YES;
1314 }
1315 else if ((0 == ij_kj_cmp) && (0 == clean_ik_kk_cmp) &&
1316 (0 != clean_kk_kj_cmp))
1317 {
1318 /* a|bb*a = b*a */
1319 if (R_last_kk->slen < 1)
1320 {
1321 sb_strdup (R_cur_r, R_last_kj);
1322 }
1323 else if (GNUNET_YES == needs_parentheses (&R_temp_kk))
1324 sb_printf2 (R_cur_r, "(%.*s)*%.*s", 3, &R_temp_kk, R_last_kj);
1325 else
1326 sb_printf2 (R_cur_r, "%.*s*%.*s", 1, &R_temp_kk, R_last_kj);
1327
1328 R_cur_l->null_flag = GNUNET_YES;
1329 }
1330 else if ((0 == ij_ik_cmp) && (0 == kk_kj_cmp) &&
1331 (! has_epsilon (R_last_ij)) && has_epsilon (R_last_kk))
1332 {
1333 /* a|a(e|b)*(e|b) = a|ab* = a|a|ab|abb|abbb|... = ab* */
1334 if (needs_parentheses (&R_temp_kk))
1335 sb_printf2 (R_cur_r, "%.*s(%.*s)*", 3, R_last_ij, &R_temp_kk);
1336 else
1337 sb_printf2 (R_cur_r, "%.*s%.*s*", 1, R_last_ij, &R_temp_kk);
1338 R_cur_l->null_flag = GNUNET_YES;
1339 }
1340 else if ((0 == ij_kj_cmp) && (0 == ik_kk_cmp) &&
1341 (! has_epsilon (R_last_ij)) && has_epsilon (R_last_kk))
1342 {
1343 /* a|(e|b)(e|b)*a = a|b*a = a|a|ba|bba|bbba|... = b*a */
1344 if (needs_parentheses (&R_temp_kk))
1345 sb_printf2 (R_cur_r, "(%.*s)*%.*s", 3, &R_temp_kk, R_last_ij);
1346 else
1347 sb_printf2 (R_cur_r, "%.*s*%.*s", 1, &R_temp_kk, R_last_ij);
1348 R_cur_l->null_flag = GNUNET_YES;
1349 }
1350 else
1351 {
1352 sb_strdup (R_cur_l, R_last_ij);
1353 remove_parentheses (R_cur_l);
1354 }
1355 }
1356 else
1357 {
1358 /* we have no left side */
1359 R_cur_l->null_flag = GNUNET_YES;
1360 }
1361
1362 /* construct R_cur_r, if not already constructed */
1363 if (GNUNET_YES == R_cur_r->null_flag)
1364 {
1365 length = R_temp_kk.slen - R_last_ik->slen;
1366
1367 /* a(ba)*bx = (ab)+x */
1368 if ((length > 0) && (GNUNET_YES != R_last_kk->null_flag) &&
1369 (0 < R_last_kk->slen) && (GNUNET_YES != R_last_kj->null_flag) &&
1370 (0 < R_last_kj->slen) && (GNUNET_YES != R_last_ik->null_flag) &&
1371 (0 < R_last_ik->slen) &&
1372 (0 == sb_strkcmp (&R_temp_kk, R_last_ik, length)) &&
1373 (0 == sb_strncmp (&R_temp_kk, R_last_kj, length)))
1374 {
1375 struct StringBuffer temp_a;
1376 struct StringBuffer temp_b;
1377
1378 sb_init (&temp_a, length);
1379 sb_init (&temp_b, R_last_kj->slen - length);
1380
1381 length_l = length;
1382 temp_a.sbuf = temp_a.abuf;
1383 GNUNET_memcpy (temp_a.sbuf, R_last_kj->sbuf, length_l);
1384 temp_a.slen = length_l;
1385
1386 length_r = R_last_kj->slen - length;
1387 temp_b.sbuf = temp_b.abuf;
1388 GNUNET_memcpy (temp_b.sbuf, &R_last_kj->sbuf[length], length_r);
1389 temp_b.slen = length_r;
1390
1391 /* e|(ab)+ = (ab)* */
1392 if ((GNUNET_YES != R_cur_l->null_flag) && (0 == R_cur_l->slen) &&
1393 (0 == temp_b.slen))
1394 {
1395 sb_printf2 (R_cur_r, "(%.*s%.*s)*", 3, R_last_ik, &temp_a);
1396 sb_free (R_cur_l);
1397 R_cur_l->null_flag = GNUNET_YES;
1398 }
1399 else
1400 {
1401 sb_printf3 (R_cur_r, "(%.*s%.*s)+%.*s", 3, R_last_ik, &temp_a, &temp_b);
1402 }
1403 sb_free (&temp_a);
1404 sb_free (&temp_b);
1405 }
1406 else if ((0 == sb_strcmp (&R_temp_ik, &R_temp_kk)) &&
1407 (0 == sb_strcmp (&R_temp_kk, &R_temp_kj)) )
1408 {
1409 /*
1410 * (e|a)a*(e|a) = a*
1411 * (e|a)(e|a)*(e|a) = a*
1412 */
1413 if (has_epsilon (R_last_ik) && has_epsilon (R_last_kj))
1414 {
1415 if (needs_parentheses (&R_temp_kk))
1416 sb_printf1 (R_cur_r, "(%.*s)*", 3, &R_temp_kk);
1417 else
1418 sb_printf1 (R_cur_r, "%.*s*", 1, &R_temp_kk);
1419 }
1420 /* aa*a = a+a */
1421 else if ((0 == clean_ik_kk_cmp) && (0 == clean_kk_kj_cmp) &&
1422 (! has_epsilon (R_last_ik)))
1423 {
1424 if (needs_parentheses (&R_temp_kk))
1425 sb_printf2 (R_cur_r, "(%.*s)+%.*s", 3, &R_temp_kk, &R_temp_kk);
1426 else
1427 sb_printf2 (R_cur_r, "%.*s+%.*s", 1, &R_temp_kk, &R_temp_kk);
1428 }
1429 /*
1430 * (e|a)a*a = a+
1431 * aa*(e|a) = a+
1432 * a(e|a)*(e|a) = a+
1433 * (e|a)a*a = a+
1434 */else
1435 {
1436 eps_check = (has_epsilon (R_last_ik) + has_epsilon (R_last_kk)
1437 + has_epsilon (R_last_kj));
1438
1439 if (1 == eps_check)
1440 {
1441 if (needs_parentheses (&R_temp_kk))
1442 sb_printf1 (R_cur_r, "(%.*s)+", 3, &R_temp_kk);
1443 else
1444 sb_printf1 (R_cur_r, "%.*s+", 1, &R_temp_kk);
1445 }
1446 }
1447 }
1448 /*
1449 * aa*b = a+b
1450 * (e|a)(e|a)*b = a*b
1451 */
1452 else if (0 == sb_strcmp (&R_temp_ik, &R_temp_kk))
1453 {
1454 if (has_epsilon (R_last_ik))
1455 {
1456 if (needs_parentheses (&R_temp_kk))
1457 sb_printf2 (R_cur_r, "(%.*s)*%.*s", 3, &R_temp_kk, R_last_kj);
1458 else
1459 sb_printf2 (R_cur_r, "%.*s*%.*s", 1, &R_temp_kk, R_last_kj);
1460 }
1461 else
1462 {
1463 if (needs_parentheses (&R_temp_kk))
1464 sb_printf2 (R_cur_r, "(%.*s)+%.*s", 3, &R_temp_kk, R_last_kj);
1465 else
1466 sb_printf2 (R_cur_r, "%.*s+%.*s", 1, &R_temp_kk, R_last_kj);
1467 }
1468 }
1469 /*
1470 * ba*a = ba+
1471 * b(e|a)*(e|a) = ba*
1472 */
1473 else if (0 == sb_strcmp (&R_temp_kk, &R_temp_kj))
1474 {
1475 if (has_epsilon (R_last_kj))
1476 {
1477 if (needs_parentheses (&R_temp_kk))
1478 sb_printf2 (R_cur_r, "%.*s(%.*s)*", 3, R_last_ik, &R_temp_kk);
1479 else
1480 sb_printf2 (R_cur_r, "%.*s%.*s*", 1, R_last_ik, &R_temp_kk);
1481 }
1482 else
1483 {
1484 if (needs_parentheses (&R_temp_kk))
1485 sb_printf2 (R_cur_r, "(%.*s)+%.*s", 3, R_last_ik, &R_temp_kk);
1486 else
1487 sb_printf2 (R_cur_r, "%.*s+%.*s", 1, R_last_ik, &R_temp_kk);
1488 }
1489 }
1490 else
1491 {
1492 if (0 < R_temp_kk.slen)
1493 {
1494 if (needs_parentheses (&R_temp_kk))
1495 {
1496 sb_printf3 (R_cur_r,
1497 "%.*s(%.*s)*%.*s",
1498 3,
1499 R_last_ik,
1500 &R_temp_kk,
1501 R_last_kj);
1502 }
1503 else
1504 {
1505 sb_printf3 (R_cur_r,
1506 "%.*s%.*s*%.*s",
1507 1,
1508 R_last_ik,
1509 &R_temp_kk,
1510 R_last_kj);
1511 }
1512 }
1513 else
1514 {
1515 sb_printf2 (R_cur_r, "%.*s%.*s", 0, R_last_ik, R_last_kj);
1516 }
1517 }
1518 }
1519 sb_free (&R_temp_ij);
1520 sb_free (&R_temp_ik);
1521 sb_free (&R_temp_kk);
1522 sb_free (&R_temp_kj);
1523
1524 if ((GNUNET_YES == R_cur_l->null_flag) && (GNUNET_YES == R_cur_r->null_flag))
1525 {
1526 R_cur_ij->null_flag = GNUNET_YES;
1527 return;
1528 }
1529
1530 if ((GNUNET_YES != R_cur_l->null_flag) && (GNUNET_YES == R_cur_r->null_flag))
1531 {
1532 struct StringBuffer tmp;
1533
1534 tmp = *R_cur_ij;
1535 *R_cur_ij = *R_cur_l;
1536 *R_cur_l = tmp;
1537 return;
1538 }
1539
1540 if ((GNUNET_YES == R_cur_l->null_flag) && (GNUNET_YES != R_cur_r->null_flag))
1541 {
1542 struct StringBuffer tmp;
1543
1544 tmp = *R_cur_ij;
1545 *R_cur_ij = *R_cur_r;
1546 *R_cur_r = tmp;
1547 return;
1548 }
1549
1550 if (0 == sb_nullstrcmp (R_cur_l, R_cur_r))
1551 {
1552 struct StringBuffer tmp;
1553
1554 tmp = *R_cur_ij;
1555 *R_cur_ij = *R_cur_l;
1556 *R_cur_l = tmp;
1557 return;
1558 }
1559 sb_printf2 (R_cur_ij, "(%.*s|%.*s)", 3, R_cur_l, R_cur_r);
1560}
1561
1562
1563/**
1564 * Create proofs for all states in the given automaton. Implementation of the
1565 * algorithm described in chapter 3.2.1 of "Automata Theory, Languages, and
1566 * Computation 3rd Edition" by Hopcroft, Motwani and Ullman.
1567 *
1568 * Each state in the automaton gets assigned 'proof' and 'hash' (hash of the
1569 * proof) fields. The starting state will only have a valid proof/hash if it has
1570 * any incoming transitions.
1571 *
1572 * @param a automaton for which to assign proofs and hashes, must not be NULL
1573 */
1574static int
1575automaton_create_proofs (struct REGEX_INTERNAL_Automaton *a)
1576{
1577 unsigned int n = a->state_count;
1578 struct REGEX_INTERNAL_State *states[n];
1579 struct StringBuffer *R_last;
1580 struct StringBuffer *R_cur;
1581 struct StringBuffer R_cur_r;
1582 struct StringBuffer R_cur_l;
1583 struct StringBuffer *R_swap;
1584 struct REGEX_INTERNAL_Transition *t;
1585 struct StringBuffer complete_regex;
1586 unsigned int i;
1587 unsigned int j;
1588 unsigned int k;
1589
1590 R_last = GNUNET_malloc_large (sizeof(struct StringBuffer) * n * n);
1591 R_cur = GNUNET_malloc_large (sizeof(struct StringBuffer) * n * n);
1592 if ((NULL == R_last) || (NULL == R_cur))
1593 {
1594 GNUNET_log_strerror (GNUNET_ERROR_TYPE_ERROR, "malloc");
1595 GNUNET_free (R_cur);
1596 GNUNET_free (R_last);
1597 return GNUNET_SYSERR;
1598 }
1599
1600 /* create depth-first numbering of the states, initializes 'state' */
1601 REGEX_INTERNAL_automaton_traverse (a,
1602 a->start,
1603 NULL,
1604 NULL,
1605 &number_states,
1606 states);
1607
1608 for (i = 0; i < n; i++)
1609 GNUNET_assert (NULL != states[i]);
1610 for (i = 0; i < n; i++)
1611 for (j = 0; j < n; j++)
1612 R_last[i * n + j].null_flag = GNUNET_YES;
1613
1614 /* Compute regular expressions of length "1" between each pair of states */
1615 for (i = 0; i < n; i++)
1616 {
1617 for (t = states[i]->transitions_head; NULL != t; t = t->next)
1618 {
1619 j = t->to_state->dfs_id;
1620 if (GNUNET_YES == R_last[i * n + j].null_flag)
1621 {
1622 sb_strdup_cstr (&R_last[i * n + j], t->label);
1623 }
1624 else
1625 {
1626 sb_append_cstr (&R_last[i * n + j], "|");
1627 sb_append_cstr (&R_last[i * n + j], t->label);
1628 }
1629 }
1630 /* add self-loop: i is reachable from i via epsilon-transition */
1631 if (GNUNET_YES == R_last[i * n + i].null_flag)
1632 {
1633 R_last[i * n + i].slen = 0;
1634 R_last[i * n + i].null_flag = GNUNET_NO;
1635 }
1636 else
1637 {
1638 sb_wrap (&R_last[i * n + i], "(|%.*s)", 3);
1639 }
1640 }
1641 for (i = 0; i < n; i++)
1642 for (j = 0; j < n; j++)
1643 if (needs_parentheses (&R_last[i * n + j]))
1644 sb_wrap (&R_last[i * n + j], "(%.*s)", 2);
1645 /* Compute regular expressions of length "k" between each pair of states per
1646 * induction */
1647 memset (&R_cur_l, 0, sizeof(struct StringBuffer));
1648 memset (&R_cur_r, 0, sizeof(struct StringBuffer));
1649 for (k = 0; k < n; k++)
1650 {
1651 for (i = 0; i < n; i++)
1652 {
1653 for (j = 0; j < n; j++)
1654 {
1655 /* Basis for the recursion:
1656 * $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj}
1657 * R_last == R^{(k-1)}, R_cur == R^{(k)}
1658 */
1659
1660 /* Create R_cur[i][j] and simplify the expression */
1661 automaton_create_proofs_simplify (&R_last[i * n + j],
1662 &R_last[i * n + k],
1663 &R_last[k * n + k],
1664 &R_last[k * n + j],
1665 &R_cur[i * n + j],
1666 &R_cur_l,
1667 &R_cur_r);
1668 }
1669 }
1670 /* set R_last = R_cur */
1671 R_swap = R_last;
1672 R_last = R_cur;
1673 R_cur = R_swap;
1674 /* clear 'R_cur' for next iteration */
1675 for (i = 0; i < n; i++)
1676 for (j = 0; j < n; j++)
1677 R_cur[i * n + j].null_flag = GNUNET_YES;
1678 }
1679 sb_free (&R_cur_l);
1680 sb_free (&R_cur_r);
1681 /* assign proofs and hashes */
1682 for (i = 0; i < n; i++)
1683 {
1684 if (GNUNET_YES != R_last[a->start->dfs_id * n + i].null_flag)
1685 {
1686 states[i]->proof = GNUNET_strndup (R_last[a->start->dfs_id * n + i].sbuf,
1687 R_last[a->start->dfs_id * n + i].slen);
1688 GNUNET_CRYPTO_hash (states[i]->proof,
1689 strlen (states[i]->proof),
1690 &states[i]->hash);
1691 }
1692 }
1693
1694 /* complete regex for whole DFA: union of all pairs (start state/accepting
1695 * state(s)). */
1696 sb_init (&complete_regex, 16 * n);
1697 for (i = 0; i < n; i++)
1698 {
1699 if (states[i]->accepting)
1700 {
1701 if ((0 == complete_regex.slen) &&
1702 (0 < R_last[a->start->dfs_id * n + i].slen))
1703 {
1704 sb_append (&complete_regex, &R_last[a->start->dfs_id * n + i]);
1705 }
1706 else if ((GNUNET_YES != R_last[a->start->dfs_id * n + i].null_flag) &&
1707 (0 < R_last[a->start->dfs_id * n + i].slen))
1708 {
1709 sb_append_cstr (&complete_regex, "|");
1710 sb_append (&complete_regex, &R_last[a->start->dfs_id * n + i]);
1711 }
1712 }
1713 }
1714 a->canonical_regex =
1715 GNUNET_strndup (complete_regex.sbuf, complete_regex.slen);
1716
1717 /* cleanup */
1718 sb_free (&complete_regex);
1719 for (i = 0; i < n; i++)
1720 for (j = 0; j < n; j++)
1721 {
1722 sb_free (&R_cur[i * n + j]);
1723 sb_free (&R_last[i * n + j]);
1724 }
1725 GNUNET_free (R_cur);
1726 GNUNET_free (R_last);
1727 return GNUNET_OK;
1728}
1729
1730
1731/**
1732 * Creates a new DFA state based on a set of NFA states. Needs to be freed using
1733 * automaton_destroy_state.
1734 *
1735 * @param ctx context
1736 * @param nfa_states set of NFA states on which the DFA should be based on
1737 *
1738 * @return new DFA state
1739 */
1740static struct REGEX_INTERNAL_State *
1741dfa_state_create (struct REGEX_INTERNAL_Context *ctx,
1742 struct REGEX_INTERNAL_StateSet *nfa_states)
1743{
1744 struct REGEX_INTERNAL_State *s;
1745 char *pos;
1746 size_t len;
1747 struct REGEX_INTERNAL_State *cstate;
1748 struct REGEX_INTERNAL_Transition *ctran;
1749 unsigned int i;
1750
1751 s = GNUNET_new (struct REGEX_INTERNAL_State);
1752 s->id = ctx->state_id++;
1753 s->index = -1;
1754 s->lowlink = -1;
1755
1756 if (NULL == nfa_states)
1757 {
1758 GNUNET_asprintf (&s->name, "s%i", s->id);
1759 return s;
1760 }
1761
1762 s->nfa_set = *nfa_states;
1763
1764 if (nfa_states->off < 1)
1765 return s;
1766
1767 /* Create a name based on 'nfa_states' */
1768 len = nfa_states->off * 14 + 4;
1769 s->name = GNUNET_malloc (len);
1770 strcat (s->name, "{");
1771 pos = s->name + 1;
1772
1773 for (i = 0; i < nfa_states->off; i++)
1774 {
1775 cstate = nfa_states->states[i];
1776 GNUNET_snprintf (pos, pos - s->name + len, "%i,", cstate->id);
1777 pos += strlen (pos);
1778
1779 /* Add a transition for each distinct label to NULL state */
1780 for (ctran = cstate->transitions_head; NULL != ctran; ctran = ctran->next)
1781 if (NULL != ctran->label)
1782 state_add_transition (ctx, s, ctran->label, NULL);
1783
1784 /* If the nfa_states contain an accepting state, the new dfa state is also
1785 * accepting. */
1786 if (cstate->accepting)
1787 s->accepting = 1;
1788 }
1789 pos[-1] = '}';
1790 s->name = GNUNET_realloc (s->name, strlen (s->name) + 1);
1791
1792 memset (nfa_states, 0, sizeof(struct REGEX_INTERNAL_StateSet));
1793 return s;
1794}
1795
1796
1797/**
1798 * Move from the given state 's' to the next state on transition 'str'. Consumes
1799 * as much of the given 'str' as possible (useful for strided DFAs). On return
1800 * 's' will point to the next state, and the length of the substring used for
1801 * this transition will be returned. If no transition possible 0 is returned and
1802 * 's' points to NULL.
1803 *
1804 * @param s starting state, will point to the next state or NULL (if no
1805 * transition possible)
1806 * @param str edge label to follow (will match longest common prefix)
1807 *
1808 * @return length of the substring consumed from 'str'
1809 */
1810static unsigned int
1811dfa_move (struct REGEX_INTERNAL_State **s, const char *str)
1812{
1813 struct REGEX_INTERNAL_Transition *t;
1814 struct REGEX_INTERNAL_State *new_s;
1815 unsigned int len;
1816 unsigned int max_len;
1817
1818 if (NULL == s)
1819 return 0;
1820
1821 new_s = NULL;
1822 max_len = 0;
1823 for (t = (*s)->transitions_head; NULL != t; t = t->next)
1824 {
1825 len = strlen (t->label);
1826
1827 if (0 == strncmp (t->label, str, len))
1828 {
1829 if (len >= max_len)
1830 {
1831 max_len = len;
1832 new_s = t->to_state;
1833 }
1834 }
1835 }
1836
1837 *s = new_s;
1838 return max_len;
1839}
1840
1841
1842/**
1843 * Set the given state 'marked' to #GNUNET_YES. Used by the
1844 * #dfa_remove_unreachable_states() function to detect unreachable states in the
1845 * automaton.
1846 *
1847 * @param cls closure, not used.
1848 * @param count count, not used.
1849 * @param s state where the marked attribute will be set to #GNUNET_YES.
1850 */
1851static void
1852mark_states (void *cls,
1853 const unsigned int count,
1854 struct REGEX_INTERNAL_State *s)
1855{
1856 s->marked = GNUNET_YES;
1857}
1858
1859
1860/**
1861 * Remove all unreachable states from DFA 'a'. Unreachable states are those
1862 * states that are not reachable from the starting state.
1863 *
1864 * @param a DFA automaton
1865 */
1866static void
1867dfa_remove_unreachable_states (struct REGEX_INTERNAL_Automaton *a)
1868{
1869 struct REGEX_INTERNAL_State *s;
1870 struct REGEX_INTERNAL_State *s_next;
1871
1872 /* 1. unmark all states */
1873 for (s = a->states_head; NULL != s; s = s->next)
1874 s->marked = GNUNET_NO;
1875
1876 /* 2. traverse dfa from start state and mark all visited states */
1877 REGEX_INTERNAL_automaton_traverse (a,
1878 a->start,
1879 NULL,
1880 NULL,
1881 &mark_states,
1882 NULL);
1883
1884 /* 3. delete all states that were not visited */
1885 for (s = a->states_head; NULL != s; s = s_next)
1886 {
1887 s_next = s->next;
1888 if (GNUNET_NO == s->marked)
1889 automaton_remove_state (a, s);
1890 }
1891}
1892
1893
1894/**
1895 * Remove all dead states from the DFA 'a'. Dead states are those states that do
1896 * not transition to any other state but themselves.
1897 *
1898 * @param a DFA automaton
1899 */
1900static void
1901dfa_remove_dead_states (struct REGEX_INTERNAL_Automaton *a)
1902{
1903 struct REGEX_INTERNAL_State *s;
1904 struct REGEX_INTERNAL_State *s_next;
1905 struct REGEX_INTERNAL_Transition *t;
1906 int dead;
1907
1908 GNUNET_assert (DFA == a->type);
1909
1910 for (s = a->states_head; NULL != s; s = s_next)
1911 {
1912 s_next = s->next;
1913
1914 if (s->accepting)
1915 continue;
1916
1917 dead = 1;
1918 for (t = s->transitions_head; NULL != t; t = t->next)
1919 {
1920 if ((NULL != t->to_state) && (t->to_state != s) )
1921 {
1922 dead = 0;
1923 break;
1924 }
1925 }
1926
1927 if (0 == dead)
1928 continue;
1929
1930 /* state s is dead, remove it */
1931 automaton_remove_state (a, s);
1932 }
1933}
1934
1935
1936/**
1937 * Merge all non distinguishable states in the DFA 'a'
1938 *
1939 * @param ctx context
1940 * @param a DFA automaton
1941 * @return #GNUNET_OK on success
1942 */
1943static int
1944dfa_merge_nondistinguishable_states (struct REGEX_INTERNAL_Context *ctx,
1945 struct REGEX_INTERNAL_Automaton *a)
1946{
1947 uint32_t *table;
1948 struct REGEX_INTERNAL_State *s1;
1949 struct REGEX_INTERNAL_State *s2;
1950 struct REGEX_INTERNAL_Transition *t1;
1951 struct REGEX_INTERNAL_Transition *t2;
1952 struct REGEX_INTERNAL_State *s1_next;
1953 struct REGEX_INTERNAL_State *s2_next;
1954 int change;
1955 unsigned int num_equal_edges;
1956 unsigned int i;
1957 unsigned int state_cnt;
1958 unsigned long long idx;
1959 unsigned long long idx1;
1960
1961 if ((NULL == a) || (0 == a->state_count))
1962 {
1963 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
1964 "Could not merge nondistinguishable states, automaton was NULL.\n");
1965 return GNUNET_SYSERR;
1966 }
1967
1968 state_cnt = a->state_count;
1969 table = GNUNET_malloc_large (
1970 (sizeof(uint32_t) * state_cnt * state_cnt / 32) + sizeof(uint32_t));
1971 if (NULL == table)
1972 {
1973 GNUNET_log_strerror (GNUNET_ERROR_TYPE_ERROR, "malloc");
1974 return GNUNET_SYSERR;
1975 }
1976
1977 for (i = 0, s1 = a->states_head; NULL != s1; s1 = s1->next)
1978 s1->marked = i++;
1979
1980 /* Mark all pairs of accepting/!accepting states */
1981 for (s1 = a->states_head; NULL != s1; s1 = s1->next)
1982 for (s2 = a->states_head; NULL != s2; s2 = s2->next)
1983 if ((s1->accepting && ! s2->accepting) ||
1984 (! s1->accepting && s2->accepting))
1985 {
1986 idx = (unsigned long long) s1->marked * state_cnt + s2->marked;
1987 table[idx / 32] |= (1U << (idx % 32));
1988 }
1989
1990 /* Find all equal states */
1991 change = 1;
1992 while (0 != change)
1993 {
1994 change = 0;
1995 for (s1 = a->states_head; NULL != s1; s1 = s1->next)
1996 {
1997 for (s2 = a->states_head; NULL != s2 && s1 != s2; s2 = s2->next)
1998 {
1999 idx = (unsigned long long) s1->marked * state_cnt + s2->marked;
2000 if (0 != (table[idx / 32] & (1U << (idx % 32))))
2001 continue;
2002 num_equal_edges = 0;
2003 for (t1 = s1->transitions_head; NULL != t1; t1 = t1->next)
2004 {
2005 for (t2 = s2->transitions_head; NULL != t2; t2 = t2->next)
2006 {
2007 if (0 == strcmp (t1->label, t2->label))
2008 {
2009 num_equal_edges++;
2010 /* same edge, but targets definitively different, so we're different
2011 as well */
2012 if (t1->to_state->marked > t2->to_state->marked)
2013 idx1 = (unsigned long long) t1->to_state->marked * state_cnt
2014 + t2->to_state->marked;
2015 else
2016 idx1 = (unsigned long long) t2->to_state->marked * state_cnt
2017 + t1->to_state->marked;
2018 if (0 != (table[idx1 / 32] & (1U << (idx1 % 32))))
2019 {
2020 table[idx / 32] |= (1U << (idx % 32));
2021 change = 1; /* changed a marker, need to run again */
2022 }
2023 }
2024 }
2025 }
2026 if ((num_equal_edges != s1->transition_count) ||
2027 (num_equal_edges != s2->transition_count))
2028 {
2029 /* Make sure ALL edges of possible equal states are the same */
2030 table[idx / 32] |= (1U << (idx % 32));
2031 change = 1; /* changed a marker, need to run again */
2032 }
2033 }
2034 }
2035 }
2036
2037 /* Merge states that are equal */
2038 for (s1 = a->states_head; NULL != s1; s1 = s1_next)
2039 {
2040 s1_next = s1->next;
2041 for (s2 = a->states_head; NULL != s2 && s1 != s2; s2 = s2_next)
2042 {
2043 s2_next = s2->next;
2044 idx = (unsigned long long) s1->marked * state_cnt + s2->marked;
2045 if (0 == (table[idx / 32] & (1U << (idx % 32))))
2046 automaton_merge_states (ctx, a, s1, s2);
2047 }
2048 }
2049
2050 GNUNET_free (table);
2051 return GNUNET_OK;
2052}
2053
2054
2055/**
2056 * Minimize the given DFA 'a' by removing all unreachable states, removing all
2057 * dead states and merging all non distinguishable states
2058 *
2059 * @param ctx context
2060 * @param a DFA automaton
2061 * @return GNUNET_OK on success
2062 */
2063static int
2064dfa_minimize (struct REGEX_INTERNAL_Context *ctx,
2065 struct REGEX_INTERNAL_Automaton *a)
2066{
2067 if (NULL == a)
2068 return GNUNET_SYSERR;
2069
2070 GNUNET_assert (DFA == a->type);
2071
2072 /* 1. remove unreachable states */
2073 dfa_remove_unreachable_states (a);
2074
2075 /* 2. remove dead states */
2076 dfa_remove_dead_states (a);
2077
2078 /* 3. Merge nondistinguishable states */
2079 if (GNUNET_OK != dfa_merge_nondistinguishable_states (ctx, a))
2080 return GNUNET_SYSERR;
2081 return GNUNET_OK;
2082}
2083
2084
2085/**
2086 * Context for adding strided transitions to a DFA.
2087 */
2088struct REGEX_INTERNAL_Strided_Context
2089{
2090 /**
2091 * Length of the strides.
2092 */
2093 const unsigned int stride;
2094
2095 /**
2096 * Strided transitions DLL. New strided transitions will be stored in this DLL
2097 * and afterwards added to the DFA.
2098 */
2099 struct REGEX_INTERNAL_Transition *transitions_head;
2100
2101 /**
2102 * Strided transitions DLL.
2103 */
2104 struct REGEX_INTERNAL_Transition *transitions_tail;
2105};
2106
2107
2108/**
2109 * Recursive helper function to add strides to a DFA.
2110 *
2111 * @param cls context, contains stride length and strided transitions DLL.
2112 * @param depth current depth of the depth-first traversal of the graph.
2113 * @param label current label, string that contains all labels on the path from
2114 * 'start' to 's'.
2115 * @param start start state for the depth-first traversal of the graph.
2116 * @param s current state in the depth-first traversal
2117 */
2118static void
2119dfa_add_multi_strides_helper (void *cls,
2120 const unsigned int depth,
2121 char *label,
2122 struct REGEX_INTERNAL_State *start,
2123 struct REGEX_INTERNAL_State *s)
2124{
2125 struct REGEX_INTERNAL_Strided_Context *ctx = cls;
2126 struct REGEX_INTERNAL_Transition *t;
2127 char *new_label;
2128
2129 if (depth == ctx->stride)
2130 {
2131 t = GNUNET_new (struct REGEX_INTERNAL_Transition);
2132 t->label = GNUNET_strdup (label);
2133 t->to_state = s;
2134 t->from_state = start;
2135 GNUNET_CONTAINER_DLL_insert (ctx->transitions_head,
2136 ctx->transitions_tail,
2137 t);
2138 }
2139 else
2140 {
2141 for (t = s->transitions_head; NULL != t; t = t->next)
2142 {
2143 /* Do not consider self-loops, because it end's up in too many
2144 * transitions */
2145 if (t->to_state == t->from_state)
2146 continue;
2147
2148 if (NULL != label)
2149 {
2150 GNUNET_asprintf (&new_label, "%s%s", label, t->label);
2151 }
2152 else
2153 new_label = GNUNET_strdup (t->label);
2154
2155 dfa_add_multi_strides_helper (cls,
2156 (depth + 1),
2157 new_label,
2158 start,
2159 t->to_state);
2160 }
2161 }
2162 GNUNET_free (label);
2163}
2164
2165
2166/**
2167 * Function called for each state in the DFA. Starts a traversal of depth set in
2168 * context starting from state 's'.
2169 *
2170 * @param cls context.
2171 * @param count not used.
2172 * @param s current state.
2173 */
2174static void
2175dfa_add_multi_strides (void *cls,
2176 const unsigned int count,
2177 struct REGEX_INTERNAL_State *s)
2178{
2179 dfa_add_multi_strides_helper (cls, 0, NULL, s, s);
2180}
2181
2182
2183/**
2184 * Adds multi-strided transitions to the given 'dfa'.
2185 *
2186 * @param regex_ctx regex context needed to add transitions to the automaton.
2187 * @param dfa DFA to which the multi strided transitions should be added.
2188 * @param stride_len length of the strides.
2189 */
2190void
2191REGEX_INTERNAL_dfa_add_multi_strides (struct REGEX_INTERNAL_Context *regex_ctx,
2192 struct REGEX_INTERNAL_Automaton *dfa,
2193 const unsigned int stride_len)
2194{
2195 struct REGEX_INTERNAL_Strided_Context ctx = { stride_len, NULL, NULL };
2196 struct REGEX_INTERNAL_Transition *t;
2197 struct REGEX_INTERNAL_Transition *t_next;
2198
2199 if ((1 > stride_len) || (GNUNET_YES == dfa->is_multistrided))
2200 return;
2201
2202 /* Compute the new transitions of given stride_len */
2203 REGEX_INTERNAL_automaton_traverse (dfa,
2204 dfa->start,
2205 NULL,
2206 NULL,
2207 &dfa_add_multi_strides,
2208 &ctx);
2209
2210 /* Add all the new transitions to the automaton. */
2211 for (t = ctx.transitions_head; NULL != t; t = t_next)
2212 {
2213 t_next = t->next;
2214 state_add_transition (regex_ctx, t->from_state, t->label, t->to_state);
2215 GNUNET_CONTAINER_DLL_remove (ctx.transitions_head, ctx.transitions_tail, t);
2216 GNUNET_free (t->label);
2217 GNUNET_free (t);
2218 }
2219
2220 /* Mark this automaton as multistrided */
2221 dfa->is_multistrided = GNUNET_YES;
2222}
2223
2224
2225/**
2226 * Recursive Helper function for DFA path compression. Does DFS on the DFA graph
2227 * and adds new transitions to the given transitions DLL and marks states that
2228 * should be removed by setting state->contained to GNUNET_YES.
2229 *
2230 * @param dfa DFA for which the paths should be compressed.
2231 * @param start starting state for linear path search.
2232 * @param cur current state in the recursive DFS.
2233 * @param label current label (string of traversed labels).
2234 * @param max_len maximal path compression length.
2235 * @param transitions_head transitions DLL.
2236 * @param transitions_tail transitions DLL.
2237 */
2238void
2239dfa_compress_paths_helper (struct REGEX_INTERNAL_Automaton *dfa,
2240 struct REGEX_INTERNAL_State *start,
2241 struct REGEX_INTERNAL_State *cur,
2242 char *label,
2243 unsigned int max_len,
2244 struct REGEX_INTERNAL_Transition **transitions_head,
2245 struct REGEX_INTERNAL_Transition **transitions_tail)
2246{
2247 struct REGEX_INTERNAL_Transition *t;
2248 char *new_label;
2249
2250
2251 if ((NULL != label) &&
2252 (((cur->incoming_transition_count > 1) || (GNUNET_YES ==
2253 cur->accepting) ||
2254 (GNUNET_YES == cur->marked) ) ||
2255 ((start != dfa->start) && (max_len > 0) && (max_len == strlen (
2256 label))) ||
2257 ((start == dfa->start) && (GNUNET_REGEX_INITIAL_BYTES == strlen (
2258 label)))))
2259 {
2260 t = GNUNET_new (struct REGEX_INTERNAL_Transition);
2261 t->label = GNUNET_strdup (label);
2262 t->to_state = cur;
2263 t->from_state = start;
2264 GNUNET_CONTAINER_DLL_insert (*transitions_head, *transitions_tail, t);
2265
2266 if (GNUNET_NO == cur->marked)
2267 {
2268 dfa_compress_paths_helper (dfa,
2269 cur,
2270 cur,
2271 NULL,
2272 max_len,
2273 transitions_head,
2274 transitions_tail);
2275 }
2276 return;
2277 }
2278 else if (cur != start)
2279 cur->contained = GNUNET_YES;
2280
2281 if ((GNUNET_YES == cur->marked) && (cur != start))
2282 return;
2283
2284 cur->marked = GNUNET_YES;
2285
2286
2287 for (t = cur->transitions_head; NULL != t; t = t->next)
2288 {
2289 if (NULL != label)
2290 GNUNET_asprintf (&new_label, "%s%s", label, t->label);
2291 else
2292 new_label = GNUNET_strdup (t->label);
2293
2294 if (t->to_state != cur)
2295 {
2296 dfa_compress_paths_helper (dfa,
2297 start,
2298 t->to_state,
2299 new_label,
2300 max_len,
2301 transitions_head,
2302 transitions_tail);
2303 }
2304 GNUNET_free (new_label);
2305 }
2306}
2307
2308
2309/**
2310 * Compress paths in the given 'dfa'. Linear paths like 0->1->2->3 will be
2311 * compressed to 0->3 by combining transitions.
2312 *
2313 * @param regex_ctx context for adding new transitions.
2314 * @param dfa DFA representation, will directly modify the given DFA.
2315 * @param max_len maximal length of the compressed paths.
2316 */
2317static void
2318dfa_compress_paths (struct REGEX_INTERNAL_Context *regex_ctx,
2319 struct REGEX_INTERNAL_Automaton *dfa,
2320 unsigned int max_len)
2321{
2322 struct REGEX_INTERNAL_State *s;
2323 struct REGEX_INTERNAL_State *s_next;
2324 struct REGEX_INTERNAL_Transition *t;
2325 struct REGEX_INTERNAL_Transition *t_next;
2326 struct REGEX_INTERNAL_Transition *transitions_head = NULL;
2327 struct REGEX_INTERNAL_Transition *transitions_tail = NULL;
2328
2329 if (NULL == dfa)
2330 return;
2331
2332 /* Count the incoming transitions on each state. */
2333 for (s = dfa->states_head; NULL != s; s = s->next)
2334 {
2335 for (t = s->transitions_head; NULL != t; t = t->next)
2336 {
2337 if (NULL != t->to_state)
2338 t->to_state->incoming_transition_count++;
2339 }
2340 }
2341
2342 /* Unmark all states. */
2343 for (s = dfa->states_head; NULL != s; s = s->next)
2344 {
2345 s->marked = GNUNET_NO;
2346 s->contained = GNUNET_NO;
2347 }
2348
2349 /* Add strides and mark states that can be deleted. */
2350 dfa_compress_paths_helper (dfa,
2351 dfa->start,
2352 dfa->start,
2353 NULL,
2354 max_len,
2355 &transitions_head,
2356 &transitions_tail);
2357
2358 /* Add all the new transitions to the automaton. */
2359 for (t = transitions_head; NULL != t; t = t_next)
2360 {
2361 t_next = t->next;
2362 state_add_transition (regex_ctx, t->from_state, t->label, t->to_state);
2363 GNUNET_CONTAINER_DLL_remove (transitions_head, transitions_tail, t);
2364 GNUNET_free (t->label);
2365 GNUNET_free (t);
2366 }
2367
2368 /* Remove marked states (including their incoming and outgoing transitions). */
2369 for (s = dfa->states_head; NULL != s; s = s_next)
2370 {
2371 s_next = s->next;
2372 if (GNUNET_YES == s->contained)
2373 automaton_remove_state (dfa, s);
2374 }
2375}
2376
2377
2378/**
2379 * Creates a new NFA fragment. Needs to be cleared using
2380 * automaton_fragment_clear.
2381 *
2382 * @param start starting state
2383 * @param end end state
2384 *
2385 * @return new NFA fragment
2386 */
2387static struct REGEX_INTERNAL_Automaton *
2388nfa_fragment_create (struct REGEX_INTERNAL_State *start,
2389 struct REGEX_INTERNAL_State *end)
2390{
2391 struct REGEX_INTERNAL_Automaton *n;
2392
2393 n = GNUNET_new (struct REGEX_INTERNAL_Automaton);
2394
2395 n->type = NFA;
2396 n->start = NULL;
2397 n->end = NULL;
2398 n->state_count = 0;
2399
2400 if ((NULL == start) || (NULL == end))
2401 return n;
2402
2403 automaton_add_state (n, end);
2404 automaton_add_state (n, start);
2405
2406 n->state_count = 2;
2407
2408 n->start = start;
2409 n->end = end;
2410
2411 return n;
2412}
2413
2414
2415/**
2416 * Adds a list of states to the given automaton 'n'.
2417 *
2418 * @param n automaton to which the states should be added
2419 * @param states_head head of the DLL of states
2420 * @param states_tail tail of the DLL of states
2421 */
2422static void
2423nfa_add_states (struct REGEX_INTERNAL_Automaton *n,
2424 struct REGEX_INTERNAL_State *states_head,
2425 struct REGEX_INTERNAL_State *states_tail)
2426{
2427 struct REGEX_INTERNAL_State *s;
2428
2429 if ((NULL == n) || (NULL == states_head))
2430 {
2431 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not add states\n");
2432 return;
2433 }
2434
2435 if (NULL == n->states_head)
2436 {
2437 n->states_head = states_head;
2438 n->states_tail = states_tail;
2439 return;
2440 }
2441
2442 if (NULL != states_head)
2443 {
2444 n->states_tail->next = states_head;
2445 n->states_tail = states_tail;
2446 }
2447
2448 for (s = states_head; NULL != s; s = s->next)
2449 n->state_count++;
2450}
2451
2452
2453/**
2454 * Creates a new NFA state. Needs to be freed using automaton_destroy_state.
2455 *
2456 * @param ctx context
2457 * @param accepting is it an accepting state or not
2458 *
2459 * @return new NFA state
2460 */
2461static struct REGEX_INTERNAL_State *
2462nfa_state_create (struct REGEX_INTERNAL_Context *ctx, int accepting)
2463{
2464 struct REGEX_INTERNAL_State *s;
2465
2466 s = GNUNET_new (struct REGEX_INTERNAL_State);
2467 s->id = ctx->state_id++;
2468 s->accepting = accepting;
2469 s->marked = GNUNET_NO;
2470 s->contained = 0;
2471 s->index = -1;
2472 s->lowlink = -1;
2473 s->scc_id = 0;
2474 s->name = NULL;
2475 GNUNET_asprintf (&s->name, "s%i", s->id);
2476
2477 return s;
2478}
2479
2480
2481/**
2482 * Calculates the closure set for the given set of states.
2483 *
2484 * @param ret set to sorted nfa closure on 'label' (epsilon closure if 'label' is NULL)
2485 * @param nfa the NFA containing 's'
2486 * @param states list of states on which to base the closure on
2487 * @param label transitioning label for which to base the closure on,
2488 * pass NULL for epsilon transition
2489 */
2490static void
2491nfa_closure_set_create (struct REGEX_INTERNAL_StateSet *ret,
2492 struct REGEX_INTERNAL_Automaton *nfa,
2493 struct REGEX_INTERNAL_StateSet *states,
2494 const char *label)
2495{
2496 struct REGEX_INTERNAL_State *s;
2497 unsigned int i;
2498 struct REGEX_INTERNAL_StateSet_MDLL cls_stack;
2499 struct REGEX_INTERNAL_State *clsstate;
2500 struct REGEX_INTERNAL_State *currentstate;
2501 struct REGEX_INTERNAL_Transition *ctran;
2502
2503 memset (ret, 0, sizeof(struct REGEX_INTERNAL_StateSet));
2504 if (NULL == states)
2505 return;
2506
2507 for (i = 0; i < states->off; i++)
2508 {
2509 s = states->states[i];
2510
2511 /* Add start state to closure only for epsilon closure */
2512 if (NULL == label)
2513 state_set_append (ret, s);
2514
2515 /* initialize work stack */
2516 cls_stack.head = NULL;
2517 cls_stack.tail = NULL;
2518 GNUNET_CONTAINER_MDLL_insert (ST, cls_stack.head, cls_stack.tail, s);
2519 cls_stack.len = 1;
2520
2521 while (NULL != (currentstate = cls_stack.tail))
2522 {
2523 GNUNET_CONTAINER_MDLL_remove (ST,
2524 cls_stack.head,
2525 cls_stack.tail,
2526 currentstate);
2527 cls_stack.len--;
2528 for (ctran = currentstate->transitions_head; NULL != ctran;
2529 ctran = ctran->next)
2530 {
2531 if (NULL == (clsstate = ctran->to_state))
2532 continue;
2533 if (0 != clsstate->contained)
2534 continue;
2535 if (0 != nullstrcmp (label, ctran->label))
2536 continue;
2537 state_set_append (ret, clsstate);
2538 GNUNET_CONTAINER_MDLL_insert_tail (ST,
2539 cls_stack.head,
2540 cls_stack.tail,
2541 clsstate);
2542 cls_stack.len++;
2543 clsstate->contained = 1;
2544 }
2545 }
2546 }
2547 for (i = 0; i < ret->off; i++)
2548 ret->states[i]->contained = 0;
2549
2550 if (ret->off > 1)
2551 qsort (ret->states,
2552 ret->off,
2553 sizeof(struct REGEX_INTERNAL_State *),
2554 &state_compare);
2555}
2556
2557
2558/**
2559 * Pops two NFA fragments (a, b) from the stack and concatenates them (ab)
2560 *
2561 * @param ctx context
2562 */
2563static void
2564nfa_add_concatenation (struct REGEX_INTERNAL_Context *ctx)
2565{
2566 struct REGEX_INTERNAL_Automaton *a;
2567 struct REGEX_INTERNAL_Automaton *b;
2568 struct REGEX_INTERNAL_Automaton *new_nfa;
2569
2570 b = ctx->stack_tail;
2571 GNUNET_assert (NULL != b);
2572 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, b);
2573 a = ctx->stack_tail;
2574 GNUNET_assert (NULL != a);
2575 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
2576
2577 state_add_transition (ctx, a->end, NULL, b->start);
2578 a->end->accepting = 0;
2579 b->end->accepting = 1;
2580
2581 new_nfa = nfa_fragment_create (NULL, NULL);
2582 nfa_add_states (new_nfa, a->states_head, a->states_tail);
2583 nfa_add_states (new_nfa, b->states_head, b->states_tail);
2584 new_nfa->start = a->start;
2585 new_nfa->end = b->end;
2586 new_nfa->state_count += a->state_count + b->state_count;
2587 automaton_fragment_clear (a);
2588 automaton_fragment_clear (b);
2589
2590 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new_nfa);
2591}
2592
2593
2594/**
2595 * Pops a NFA fragment from the stack (a) and adds a new fragment (a*)
2596 *
2597 * @param ctx context
2598 */
2599static void
2600nfa_add_star_op (struct REGEX_INTERNAL_Context *ctx)
2601{
2602 struct REGEX_INTERNAL_Automaton *a;
2603 struct REGEX_INTERNAL_Automaton *new_nfa;
2604 struct REGEX_INTERNAL_State *start;
2605 struct REGEX_INTERNAL_State *end;
2606
2607 a = ctx->stack_tail;
2608
2609 if (NULL == a)
2610 {
2611 GNUNET_log (
2612 GNUNET_ERROR_TYPE_ERROR,
2613 "nfa_add_star_op failed, because there was no element on the stack");
2614 return;
2615 }
2616
2617 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
2618
2619 start = nfa_state_create (ctx, 0);
2620 end = nfa_state_create (ctx, 1);
2621
2622 state_add_transition (ctx, start, NULL, a->start);
2623 state_add_transition (ctx, start, NULL, end);
2624 state_add_transition (ctx, a->end, NULL, a->start);
2625 state_add_transition (ctx, a->end, NULL, end);
2626
2627 a->end->accepting = 0;
2628 end->accepting = 1;
2629
2630 new_nfa = nfa_fragment_create (start, end);
2631 nfa_add_states (new_nfa, a->states_head, a->states_tail);
2632 automaton_fragment_clear (a);
2633
2634 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new_nfa);
2635}
2636
2637
2638/**
2639 * Pops an NFA fragment (a) from the stack and adds a new fragment (a+)
2640 *
2641 * @param ctx context
2642 */
2643static void
2644nfa_add_plus_op (struct REGEX_INTERNAL_Context *ctx)
2645{
2646 struct REGEX_INTERNAL_Automaton *a;
2647
2648 a = ctx->stack_tail;
2649
2650 if (NULL == a)
2651 {
2652 GNUNET_log (
2653 GNUNET_ERROR_TYPE_ERROR,
2654 "nfa_add_plus_op failed, because there was no element on the stack");
2655 return;
2656 }
2657
2658 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
2659
2660 state_add_transition (ctx, a->end, NULL, a->start);
2661
2662 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, a);
2663}
2664
2665
2666/**
2667 * Pops an NFA fragment (a) from the stack and adds a new fragment (a?)
2668 *
2669 * @param ctx context
2670 */
2671static void
2672nfa_add_question_op (struct REGEX_INTERNAL_Context *ctx)
2673{
2674 struct REGEX_INTERNAL_Automaton *a;
2675 struct REGEX_INTERNAL_Automaton *new_nfa;
2676 struct REGEX_INTERNAL_State *start;
2677 struct REGEX_INTERNAL_State *end;
2678
2679 a = ctx->stack_tail;
2680 if (NULL == a)
2681 {
2682 GNUNET_log (
2683 GNUNET_ERROR_TYPE_ERROR,
2684 "nfa_add_question_op failed, because there was no element on the stack");
2685 return;
2686 }
2687
2688 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
2689
2690 start = nfa_state_create (ctx, 0);
2691 end = nfa_state_create (ctx, 1);
2692
2693 state_add_transition (ctx, start, NULL, a->start);
2694 state_add_transition (ctx, start, NULL, end);
2695 state_add_transition (ctx, a->end, NULL, end);
2696
2697 a->end->accepting = 0;
2698
2699 new_nfa = nfa_fragment_create (start, end);
2700 nfa_add_states (new_nfa, a->states_head, a->states_tail);
2701 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new_nfa);
2702 automaton_fragment_clear (a);
2703}
2704
2705
2706/**
2707 * Pops two NFA fragments (a, b) from the stack and adds a new NFA fragment that
2708 * alternates between a and b (a|b)
2709 *
2710 * @param ctx context
2711 */
2712static void
2713nfa_add_alternation (struct REGEX_INTERNAL_Context *ctx)
2714{
2715 struct REGEX_INTERNAL_Automaton *a;
2716 struct REGEX_INTERNAL_Automaton *b;
2717 struct REGEX_INTERNAL_Automaton *new_nfa;
2718 struct REGEX_INTERNAL_State *start;
2719 struct REGEX_INTERNAL_State *end;
2720
2721 b = ctx->stack_tail;
2722 GNUNET_assert (NULL != b);
2723 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, b);
2724 a = ctx->stack_tail;
2725 GNUNET_assert (NULL != a);
2726 GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a);
2727
2728 start = nfa_state_create (ctx, 0);
2729 end = nfa_state_create (ctx, 1);
2730 state_add_transition (ctx, start, NULL, a->start);
2731 state_add_transition (ctx, start, NULL, b->start);
2732
2733 state_add_transition (ctx, a->end, NULL, end);
2734 state_add_transition (ctx, b->end, NULL, end);
2735
2736 a->end->accepting = 0;
2737 b->end->accepting = 0;
2738 end->accepting = 1;
2739
2740 new_nfa = nfa_fragment_create (start, end);
2741 nfa_add_states (new_nfa, a->states_head, a->states_tail);
2742 nfa_add_states (new_nfa, b->states_head, b->states_tail);
2743 automaton_fragment_clear (a);
2744 automaton_fragment_clear (b);
2745
2746 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new_nfa);
2747}
2748
2749
2750/**
2751 * Adds a new nfa fragment to the stack
2752 *
2753 * @param ctx context
2754 * @param label label for nfa transition
2755 */
2756static void
2757nfa_add_label (struct REGEX_INTERNAL_Context *ctx, const char *label)
2758{
2759 struct REGEX_INTERNAL_Automaton *n;
2760 struct REGEX_INTERNAL_State *start;
2761 struct REGEX_INTERNAL_State *end;
2762
2763 GNUNET_assert (NULL != ctx);
2764
2765 start = nfa_state_create (ctx, 0);
2766 end = nfa_state_create (ctx, 1);
2767 state_add_transition (ctx, start, label, end);
2768 n = nfa_fragment_create (start, end);
2769 GNUNET_assert (NULL != n);
2770 GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, n);
2771}
2772
2773
2774/**
2775 * Initialize a new context
2776 *
2777 * @param ctx context
2778 */
2779static void
2780REGEX_INTERNAL_context_init (struct REGEX_INTERNAL_Context *ctx)
2781{
2782 if (NULL == ctx)
2783 {
2784 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Context was NULL!");
2785 return;
2786 }
2787 ctx->state_id = 0;
2788 ctx->transition_id = 0;
2789 ctx->stack_head = NULL;
2790 ctx->stack_tail = NULL;
2791}
2792
2793
2794/**
2795 * Construct an NFA by parsing the regex string of length 'len'.
2796 *
2797 * @param regex regular expression string
2798 * @param len length of the string
2799 *
2800 * @return NFA, needs to be freed using REGEX_INTERNAL_destroy_automaton
2801 */
2802struct REGEX_INTERNAL_Automaton *
2803REGEX_INTERNAL_construct_nfa (const char *regex, const size_t len)
2804{
2805 struct REGEX_INTERNAL_Context ctx;
2806 struct REGEX_INTERNAL_Automaton *nfa;
2807 const char *regexp;
2808 char curlabel[2];
2809 char *error_msg;
2810 unsigned int count;
2811 unsigned int altcount;
2812 unsigned int atomcount;
2813 unsigned int poff;
2814 unsigned int psize;
2815
2816 struct
2817 {
2818 int altcount;
2819 int atomcount;
2820 } *p;
2821
2822 if ((NULL == regex) || (0 == strlen (regex)) || (0 == len))
2823 {
2824 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
2825 "Could not parse regex. Empty regex string provided.\n");
2826
2827 return NULL;
2828 }
2829 REGEX_INTERNAL_context_init (&ctx);
2830
2831 regexp = regex;
2832 curlabel[1] = '\0';
2833 p = NULL;
2834 error_msg = NULL;
2835 altcount = 0;
2836 atomcount = 0;
2837 poff = 0;
2838 psize = 0;
2839
2840 for (count = 0; count < len && *regexp; count++, regexp++)
2841 {
2842 switch (*regexp)
2843 {
2844 case '(':
2845 if (atomcount > 1)
2846 {
2847 --atomcount;
2848 nfa_add_concatenation (&ctx);
2849 }
2850 if (poff == psize)
2851 GNUNET_array_grow (p, psize, psize * 2 + 4); /* FIXME why *2 +4? */
2852 p[poff].altcount = altcount;
2853 p[poff].atomcount = atomcount;
2854 poff++;
2855 altcount = 0;
2856 atomcount = 0;
2857 break;
2858
2859 case '|':
2860 if (0 == atomcount)
2861 {
2862 error_msg = "Cannot append '|' to nothing";
2863 goto error;
2864 }
2865 while (--atomcount > 0)
2866 nfa_add_concatenation (&ctx);
2867 altcount++;
2868 break;
2869
2870 case ')':
2871 if (0 == poff)
2872 {
2873 error_msg = "Missing opening '('";
2874 goto error;
2875 }
2876 if (0 == atomcount)
2877 {
2878 /* Ignore this: "()" */
2879 poff--;
2880 altcount = p[poff].altcount;
2881 atomcount = p[poff].atomcount;
2882 break;
2883 }
2884 while (--atomcount > 0)
2885 nfa_add_concatenation (&ctx);
2886 for (; altcount > 0; altcount--)
2887 nfa_add_alternation (&ctx);
2888 poff--;
2889 altcount = p[poff].altcount;
2890 atomcount = p[poff].atomcount;
2891 atomcount++;
2892 break;
2893
2894 case '*':
2895 if (atomcount == 0)
2896 {
2897 error_msg = "Cannot append '*' to nothing";
2898 goto error;
2899 }
2900 nfa_add_star_op (&ctx);
2901 break;
2902
2903 case '+':
2904 if (atomcount == 0)
2905 {
2906 error_msg = "Cannot append '+' to nothing";
2907 goto error;
2908 }
2909 nfa_add_plus_op (&ctx);
2910 break;
2911
2912 case '?':
2913 if (atomcount == 0)
2914 {
2915 error_msg = "Cannot append '?' to nothing";
2916 goto error;
2917 }
2918 nfa_add_question_op (&ctx);
2919 break;
2920
2921 default:
2922 if (atomcount > 1)
2923 {
2924 --atomcount;
2925 nfa_add_concatenation (&ctx);
2926 }
2927 curlabel[0] = *regexp;
2928 nfa_add_label (&ctx, curlabel);
2929 atomcount++;
2930 break;
2931 }
2932 }
2933 if (0 != poff)
2934 {
2935 error_msg = "Unbalanced parenthesis";
2936 goto error;
2937 }
2938 while (--atomcount > 0)
2939 nfa_add_concatenation (&ctx);
2940 for (; altcount > 0; altcount--)
2941 nfa_add_alternation (&ctx);
2942
2943 GNUNET_array_grow (p, psize, 0);
2944
2945 nfa = ctx.stack_tail;
2946 GNUNET_CONTAINER_DLL_remove (ctx.stack_head, ctx.stack_tail, nfa);
2947
2948 if (NULL != ctx.stack_head)
2949 {
2950 error_msg = "Creating the NFA failed. NFA stack was not empty!";
2951 goto error;
2952 }
2953
2954 /* Remember the regex that was used to generate this NFA */
2955 nfa->regex = GNUNET_strdup (regex);
2956
2957 /* create depth-first numbering of the states for pretty printing */
2958 REGEX_INTERNAL_automaton_traverse (nfa,
2959 NULL,
2960 NULL,
2961 NULL,
2962 &number_states,
2963 NULL);
2964
2965 /* No multistriding added so far */
2966 nfa->is_multistrided = GNUNET_NO;
2967
2968 return nfa;
2969
2970error:
2971 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not parse regex: `%s'\n", regex);
2972 if (NULL != error_msg)
2973 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "%s\n", error_msg);
2974
2975 GNUNET_free (p);
2976
2977 while (NULL != (nfa = ctx.stack_head))
2978 {
2979 GNUNET_CONTAINER_DLL_remove (ctx.stack_head, ctx.stack_tail, nfa);
2980 REGEX_INTERNAL_automaton_destroy (nfa);
2981 }
2982
2983 return NULL;
2984}
2985
2986
2987/**
2988 * Create DFA states based on given 'nfa' and starting with 'dfa_state'.
2989 *
2990 * @param ctx context.
2991 * @param nfa NFA automaton.
2992 * @param dfa DFA automaton.
2993 * @param dfa_state current dfa state, pass epsilon closure of first nfa state
2994 * for starting.
2995 */
2996static void
2997construct_dfa_states (struct REGEX_INTERNAL_Context *ctx,
2998 struct REGEX_INTERNAL_Automaton *nfa,
2999 struct REGEX_INTERNAL_Automaton *dfa,
3000 struct REGEX_INTERNAL_State *dfa_state)
3001{
3002 struct REGEX_INTERNAL_Transition *ctran;
3003 struct REGEX_INTERNAL_State *new_dfa_state;
3004 struct REGEX_INTERNAL_State *state_contains;
3005 struct REGEX_INTERNAL_State *state_iter;
3006 struct REGEX_INTERNAL_StateSet tmp;
3007 struct REGEX_INTERNAL_StateSet nfa_set;
3008
3009 for (ctran = dfa_state->transitions_head; NULL != ctran; ctran = ctran->next)
3010 {
3011 if ((NULL == ctran->label) || (NULL != ctran->to_state) )
3012 continue;
3013
3014 nfa_closure_set_create (&tmp, nfa, &dfa_state->nfa_set, ctran->label);
3015 nfa_closure_set_create (&nfa_set, nfa, &tmp, NULL);
3016 state_set_clear (&tmp);
3017
3018 state_contains = NULL;
3019 for (state_iter = dfa->states_head; NULL != state_iter;
3020 state_iter = state_iter->next)
3021 {
3022 if (0 == state_set_compare (&state_iter->nfa_set, &nfa_set))
3023 {
3024 state_contains = state_iter;
3025 break;
3026 }
3027 }
3028 if (NULL == state_contains)
3029 {
3030 new_dfa_state = dfa_state_create (ctx, &nfa_set);
3031 automaton_add_state (dfa, new_dfa_state);
3032 ctran->to_state = new_dfa_state;
3033 construct_dfa_states (ctx, nfa, dfa, new_dfa_state);
3034 }
3035 else
3036 {
3037 ctran->to_state = state_contains;
3038 state_set_clear (&nfa_set);
3039 }
3040 }
3041}
3042
3043
3044/**
3045 * Construct DFA for the given 'regex' of length 'len'.
3046 *
3047 * Path compression means, that for example a DFA o -> a -> b -> c -> o will be
3048 * compressed to o -> abc -> o. Note that this parameter influences the
3049 * non-determinism of states of the resulting NFA in the DHT (number of outgoing
3050 * edges with the same label). For example for an application that stores IPv4
3051 * addresses as bitstrings it could make sense to limit the path compression to
3052 * 4 or 8.
3053 *
3054 * @param regex regular expression string.
3055 * @param len length of the regular expression.
3056 * @param max_path_len limit the path compression length to the
3057 * given value. If set to 1, no path compression is applied. Set to 0 for
3058 * maximal possible path compression (generally not desirable).
3059 * @return DFA, needs to be freed using REGEX_INTERNAL_automaton_destroy.
3060 */
3061struct REGEX_INTERNAL_Automaton *
3062REGEX_INTERNAL_construct_dfa (const char *regex,
3063 const size_t len,
3064 unsigned int max_path_len)
3065{
3066 struct REGEX_INTERNAL_Context ctx;
3067 struct REGEX_INTERNAL_Automaton *dfa;
3068 struct REGEX_INTERNAL_Automaton *nfa;
3069 struct REGEX_INTERNAL_StateSet nfa_start_eps_cls;
3070 struct REGEX_INTERNAL_StateSet singleton_set;
3071
3072 REGEX_INTERNAL_context_init (&ctx);
3073
3074 /* Create NFA */
3075 nfa = REGEX_INTERNAL_construct_nfa (regex, len);
3076
3077 if (NULL == nfa)
3078 {
3079 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
3080 "Could not create DFA, because NFA creation failed\n");
3081 return NULL;
3082 }
3083
3084 dfa = GNUNET_new (struct REGEX_INTERNAL_Automaton);
3085 dfa->type = DFA;
3086 dfa->regex = GNUNET_strdup (regex);
3087
3088 /* Create DFA start state from epsilon closure */
3089 memset (&singleton_set, 0, sizeof(struct REGEX_INTERNAL_StateSet));
3090 state_set_append (&singleton_set, nfa->start);
3091 nfa_closure_set_create (&nfa_start_eps_cls, nfa, &singleton_set, NULL);
3092 state_set_clear (&singleton_set);
3093 dfa->start = dfa_state_create (&ctx, &nfa_start_eps_cls);
3094 automaton_add_state (dfa, dfa->start);
3095
3096 construct_dfa_states (&ctx, nfa, dfa, dfa->start);
3097 REGEX_INTERNAL_automaton_destroy (nfa);
3098
3099 /* Minimize DFA */
3100 if (GNUNET_OK != dfa_minimize (&ctx, dfa))
3101 {
3102 REGEX_INTERNAL_automaton_destroy (dfa);
3103 return NULL;
3104 }
3105
3106 /* Create proofs and hashes for all states */
3107 if (GNUNET_OK != automaton_create_proofs (dfa))
3108 {
3109 REGEX_INTERNAL_automaton_destroy (dfa);
3110 return NULL;
3111 }
3112
3113 /* Compress linear DFA paths */
3114 if (1 != max_path_len)
3115 dfa_compress_paths (&ctx, dfa, max_path_len);
3116
3117 return dfa;
3118}
3119
3120
3121/**
3122 * Free the memory allocated by constructing the REGEX_INTERNAL_Automaton data
3123 * structure.
3124 *
3125 * @param a automaton to be destroyed
3126 */
3127void
3128REGEX_INTERNAL_automaton_destroy (struct REGEX_INTERNAL_Automaton *a)
3129{
3130 struct REGEX_INTERNAL_State *s;
3131 struct REGEX_INTERNAL_State *next_state;
3132
3133 if (NULL == a)
3134 return;
3135
3136 GNUNET_free (a->regex);
3137 GNUNET_free (a->canonical_regex);
3138
3139 for (s = a->states_head; NULL != s; s = next_state)
3140 {
3141 next_state = s->next;
3142 GNUNET_CONTAINER_DLL_remove (a->states_head, a->states_tail, s);
3143 automaton_destroy_state (s);
3144 }
3145
3146 GNUNET_free (a);
3147}
3148
3149
3150/**
3151 * Evaluates the given string using the given DFA automaton
3152 *
3153 * @param a automaton, type must be DFA
3154 * @param string string that should be evaluated
3155 *
3156 * @return 0 if string matches, non-0 otherwise
3157 */
3158static int
3159evaluate_dfa (struct REGEX_INTERNAL_Automaton *a, const char *string)
3160{
3161 const char *strp;
3162 struct REGEX_INTERNAL_State *s;
3163 unsigned int step_len;
3164
3165 if (DFA != a->type)
3166 {
3167 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
3168 "Tried to evaluate DFA, but NFA automaton given");
3169 return -1;
3170 }
3171
3172 s = a->start;
3173
3174 /* If the string is empty but the starting state is accepting, we accept. */
3175 if (((NULL == string) || (0 == strlen (string))) && s->accepting)
3176 return 0;
3177
3178 for (strp = string; NULL != strp && *strp; strp += step_len)
3179 {
3180 step_len = dfa_move (&s, strp);
3181
3182 if (NULL == s)
3183 break;
3184 }
3185
3186 if ((NULL != s) && s->accepting)
3187 return 0;
3188
3189 return 1;
3190}
3191
3192
3193/**
3194 * Evaluates the given string using the given NFA automaton
3195 *
3196 * @param a automaton, type must be NFA
3197 * @param string string that should be evaluated
3198 * @return 0 if string matches, non-0 otherwise
3199 */
3200static int
3201evaluate_nfa (struct REGEX_INTERNAL_Automaton *a, const char *string)
3202{
3203 const char *strp;
3204 char str[2];
3205 struct REGEX_INTERNAL_State *s;
3206 struct REGEX_INTERNAL_StateSet sset;
3207 struct REGEX_INTERNAL_StateSet new_sset;
3208 struct REGEX_INTERNAL_StateSet singleton_set;
3209 unsigned int i;
3210 int result;
3211
3212 if (NFA != a->type)
3213 {
3214 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
3215 "Tried to evaluate NFA, but DFA automaton given");
3216 return -1;
3217 }
3218
3219 /* If the string is empty but the starting state is accepting, we accept. */
3220 if (((NULL == string) || (0 == strlen (string))) && a->start->accepting)
3221 return 0;
3222
3223 result = 1;
3224 memset (&singleton_set, 0, sizeof(struct REGEX_INTERNAL_StateSet));
3225 state_set_append (&singleton_set, a->start);
3226 nfa_closure_set_create (&sset, a, &singleton_set, NULL);
3227 state_set_clear (&singleton_set);
3228
3229 str[1] = '\0';
3230 for (strp = string; NULL != strp && *strp; strp++)
3231 {
3232 str[0] = *strp;
3233 nfa_closure_set_create (&new_sset, a, &sset, str);
3234 state_set_clear (&sset);
3235 nfa_closure_set_create (&sset, a, &new_sset, 0);
3236 state_set_clear (&new_sset);
3237 }
3238
3239 for (i = 0; i < sset.off; i++)
3240 {
3241 s = sset.states[i];
3242 if ((NULL != s) && (s->accepting))
3243 {
3244 result = 0;
3245 break;
3246 }
3247 }
3248
3249 state_set_clear (&sset);
3250 return result;
3251}
3252
3253
3254/**
3255 * Evaluates the given @a string against the given compiled regex @a a
3256 *
3257 * @param a automaton
3258 * @param string string to check
3259 * @return 0 if string matches, non-0 otherwise
3260 */
3261int
3262REGEX_INTERNAL_eval (struct REGEX_INTERNAL_Automaton *a, const char *string)
3263{
3264 int result;
3265
3266 switch (a->type)
3267 {
3268 case DFA:
3269 result = evaluate_dfa (a, string);
3270 break;
3271
3272 case NFA:
3273 result = evaluate_nfa (a, string);
3274 break;
3275
3276 default:
3277 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
3278 "Evaluating regex failed, automaton has no type!\n");
3279 result = GNUNET_SYSERR;
3280 break;
3281 }
3282
3283 return result;
3284}
3285
3286
3287/**
3288 * Get the canonical regex of the given automaton.
3289 * When constructing the automaton a proof is computed for each state,
3290 * consisting of the regular expression leading to this state. A complete
3291 * regex for the automaton can be computed by combining these proofs.
3292 * As of now this function is only useful for testing.
3293 *
3294 * @param a automaton for which the canonical regex should be returned.
3295 *
3296 * @return
3297 */
3298const char *
3299REGEX_INTERNAL_get_canonical_regex (struct REGEX_INTERNAL_Automaton *a)
3300{
3301 if (NULL == a)
3302 return NULL;
3303
3304 return a->canonical_regex;
3305}
3306
3307
3308/**
3309 * Get the number of transitions that are contained in the given automaton.
3310 *
3311 * @param a automaton for which the number of transitions should be returned.
3312 *
3313 * @return number of transitions in the given automaton.
3314 */
3315unsigned int
3316REGEX_INTERNAL_get_transition_count (struct REGEX_INTERNAL_Automaton *a)
3317{
3318 unsigned int t_count;
3319 struct REGEX_INTERNAL_State *s;
3320
3321 if (NULL == a)
3322 return 0;
3323
3324 t_count = 0;
3325 for (s = a->states_head; NULL != s; s = s->next)
3326 t_count += s->transition_count;
3327
3328 return t_count;
3329}
3330
3331
3332/**
3333 * Get the first key for the given @a input_string. This hashes the first x bits
3334 * of the @a input_string.
3335 *
3336 * @param input_string string.
3337 * @param string_len length of the @a input_string.
3338 * @param key pointer to where to write the hash code.
3339 * @return number of bits of @a input_string that have been consumed
3340 * to construct the key
3341 */
3342size_t
3343REGEX_INTERNAL_get_first_key (const char *input_string,
3344 size_t string_len,
3345 struct GNUNET_HashCode *key)
3346{
3347 size_t size;
3348
3349 size = string_len < GNUNET_REGEX_INITIAL_BYTES ? string_len
3350 : GNUNET_REGEX_INITIAL_BYTES;
3351 if (NULL == input_string)
3352 {
3353 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Given input string was NULL!\n");
3354 return 0;
3355 }
3356 GNUNET_CRYPTO_hash (input_string, size, key);
3357
3358 return size;
3359}
3360
3361
3362/**
3363 * Recursive function that calls the iterator for each synthetic start state.
3364 *
3365 * @param min_len minimum length of the path in the graph.
3366 * @param max_len maximum length of the path in the graph.
3367 * @param consumed_string string consumed by traversing the graph till this state.
3368 * @param state current state of the automaton.
3369 * @param iterator iterator function called for each edge.
3370 * @param iterator_cls closure for the @a iterator function.
3371 */
3372static void
3373iterate_initial_edge (unsigned int min_len,
3374 unsigned int max_len,
3375 char *consumed_string,
3376 struct REGEX_INTERNAL_State *state,
3377 REGEX_INTERNAL_KeyIterator iterator,
3378 void *iterator_cls)
3379{
3380 char *temp;
3381 struct REGEX_INTERNAL_Transition *t;
3382 unsigned int num_edges = state->transition_count;
3383 struct REGEX_BLOCK_Edge edges[num_edges];
3384 struct REGEX_BLOCK_Edge edge[1];
3385 struct GNUNET_HashCode hash;
3386 struct GNUNET_HashCode hash_new;
3387 unsigned int cur_len;
3388
3389 if (NULL != consumed_string)
3390 cur_len = strlen (consumed_string);
3391 else
3392 cur_len = 0;
3393
3394 if (((cur_len >= min_len) || (GNUNET_YES == state->accepting)) &&
3395 (cur_len > 0) && (NULL != consumed_string))
3396 {
3397 if (cur_len <= max_len)
3398 {
3399 if ((NULL != state->proof) &&
3400 (0 != strcmp (consumed_string, state->proof)))
3401 {
3402 (void) state_get_edges (state, edges);
3403 GNUNET_CRYPTO_hash (consumed_string, strlen (consumed_string), &hash);
3404 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
3405 "Start state for string `%s' is %s\n",
3406 consumed_string,
3407 GNUNET_h2s (&hash));
3408 iterator (iterator_cls,
3409 &hash,
3410 consumed_string,
3411 state->accepting,
3412 num_edges,
3413 edges);
3414 }
3415
3416 if ((GNUNET_YES == state->accepting) && (cur_len > 1) &&
3417 (state->transition_count < 1) && (cur_len < max_len))
3418 {
3419 /* Special case for regex consisting of just a string that is shorter than
3420 * max_len */
3421 edge[0].label = &consumed_string[cur_len - 1];
3422 edge[0].destination = state->hash;
3423 temp = GNUNET_strdup (consumed_string);
3424 temp[cur_len - 1] = '\0';
3425 GNUNET_CRYPTO_hash (temp, cur_len - 1, &hash_new);
3426 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
3427 "Start state for short string `%s' is %s\n",
3428 temp,
3429 GNUNET_h2s (&hash_new));
3430 iterator (iterator_cls, &hash_new, temp, GNUNET_NO, 1, edge);
3431 GNUNET_free (temp);
3432 }
3433 }
3434 else /* cur_len > max_len */
3435 {
3436 /* Case where the concatenated labels are longer than max_len, then split. */
3437 edge[0].label = &consumed_string[max_len];
3438 edge[0].destination = state->hash;
3439 temp = GNUNET_strdup (consumed_string);
3440 temp[max_len] = '\0';
3441 GNUNET_CRYPTO_hash (temp, max_len, &hash);
3442 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
3443 "Start state at split edge `%s'-`%s` is %s\n",
3444 temp,
3445 edge[0].label,
3446 GNUNET_h2s (&hash_new));
3447 iterator (iterator_cls, &hash, temp, GNUNET_NO, 1, edge);
3448 GNUNET_free (temp);
3449 }
3450 }
3451
3452 if (cur_len < max_len)
3453 {
3454 for (t = state->transitions_head; NULL != t; t = t->next)
3455 {
3456 if (NULL != strchr (t->label, (int) '.'))
3457 {
3458 /* Wildcards not allowed during starting states */
3459 GNUNET_break (0);
3460 continue;
3461 }
3462 if (NULL != consumed_string)
3463 GNUNET_asprintf (&temp, "%s%s", consumed_string, t->label);
3464 else
3465 GNUNET_asprintf (&temp, "%s", t->label);
3466 iterate_initial_edge (min_len,
3467 max_len,
3468 temp,
3469 t->to_state,
3470 iterator,
3471 iterator_cls);
3472 GNUNET_free (temp);
3473 }
3474 }
3475}
3476
3477
3478/**
3479 * Iterate over all edges starting from start state of automaton 'a'. Calling
3480 * iterator for each edge.
3481 *
3482 * @param a automaton.
3483 * @param iterator iterator called for each edge.
3484 * @param iterator_cls closure.
3485 */
3486void
3487REGEX_INTERNAL_iterate_all_edges (struct REGEX_INTERNAL_Automaton *a,
3488 REGEX_INTERNAL_KeyIterator iterator,
3489 void *iterator_cls)
3490{
3491 struct REGEX_INTERNAL_State *s;
3492
3493 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Iterating over starting edges\n");
3494 iterate_initial_edge (GNUNET_REGEX_INITIAL_BYTES,
3495 GNUNET_REGEX_INITIAL_BYTES,
3496 NULL,
3497 a->start,
3498 iterator,
3499 iterator_cls);
3500 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Iterating over DFA edges\n");
3501 for (s = a->states_head; NULL != s; s = s->next)
3502 {
3503 struct REGEX_BLOCK_Edge edges[s->transition_count];
3504 unsigned int num_edges;
3505
3506 num_edges = state_get_edges (s, edges);
3507 if (((NULL != s->proof) && (0 < strlen (s->proof))) || s->accepting)
3508 {
3509 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
3510 "Creating DFA edges at `%s' under key %s\n",
3511 s->proof,
3512 GNUNET_h2s (&s->hash));
3513 iterator (iterator_cls,
3514 &s->hash,
3515 s->proof,
3516 s->accepting,
3517 num_edges,
3518 edges);
3519 }
3520 s->marked = GNUNET_NO;
3521 }
3522}
3523
3524
3525/**
3526 * Struct to hold all the relevant state information in the HashMap.
3527 *
3528 * Contains the same info as the Regex Iterator parameters except the key,
3529 * which comes directly from the HashMap iterator.
3530 */
3531struct temporal_state_store
3532{
3533 int reachable;
3534 char *proof;
3535 int accepting;
3536 int num_edges;
3537 struct REGEX_BLOCK_Edge *edges;
3538};
3539
3540
3541/**
3542 * Store regex iterator and cls in one place to pass to the hashmap iterator.
3543 */
3544struct client_iterator
3545{
3546 REGEX_INTERNAL_KeyIterator iterator;
3547 void *iterator_cls;
3548};
3549
3550
3551/**
3552 * Iterator over all edges of a dfa. Stores all of them in a HashMap
3553 * for later reachability marking.
3554 *
3555 * @param cls Closure (HashMap)
3556 * @param key hash for current state.
3557 * @param proof proof for current state
3558 * @param accepting GNUNET_YES if this is an accepting state, GNUNET_NO if not.
3559 * @param num_edges number of edges leaving current state.
3560 * @param edges edges leaving current state.
3561 */
3562static void
3563store_all_states (void *cls,
3564 const struct GNUNET_HashCode *key,
3565 const char *proof,
3566 int accepting,
3567 unsigned int num_edges,
3568 const struct REGEX_BLOCK_Edge *edges)
3569{
3570 struct GNUNET_CONTAINER_MultiHashMap *hm = cls;
3571 struct temporal_state_store *tmp;
3572 size_t edges_size;
3573
3574 tmp = GNUNET_new (struct temporal_state_store);
3575 tmp->reachable = GNUNET_NO;
3576 tmp->proof = GNUNET_strdup (proof);
3577 tmp->accepting = accepting;
3578 tmp->num_edges = num_edges;
3579 edges_size = sizeof(struct REGEX_BLOCK_Edge) * num_edges;
3580 tmp->edges = GNUNET_malloc (edges_size);
3581 GNUNET_memcpy (tmp->edges, edges, edges_size);
3582 GNUNET_assert (GNUNET_YES ==
3583 GNUNET_CONTAINER_multihashmap_put (
3584 hm,
3585 key,
3586 tmp,
3587 GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_FAST));
3588}
3589
3590
3591/**
3592 * Mark state as reachable and call recursively on all its edges.
3593 *
3594 * If already marked as reachable, do nothing.
3595 *
3596 * @param state State to mark as reachable.
3597 * @param hm HashMap which stores all the states indexed by key.
3598 */
3599static void
3600mark_as_reachable (struct temporal_state_store *state,
3601 struct GNUNET_CONTAINER_MultiHashMap *hm)
3602{
3603 struct temporal_state_store *child;
3604 unsigned int i;
3605
3606 if (GNUNET_YES == state->reachable)
3607 /* visited */
3608 return;
3609
3610 state->reachable = GNUNET_YES;
3611 for (i = 0; i < state->num_edges; i++)
3612 {
3613 child =
3614 GNUNET_CONTAINER_multihashmap_get (hm, &state->edges[i].destination);
3615 if (NULL == child)
3616 {
3617 GNUNET_break (0);
3618 continue;
3619 }
3620 mark_as_reachable (child, hm);
3621 }
3622}
3623
3624
3625/**
3626 * Iterator over hash map entries to mark the ones that are reachable.
3627 *
3628 * @param cls closure
3629 * @param key current key code
3630 * @param value value in the hash map
3631 * @return #GNUNET_YES if we should continue to iterate,
3632 * #GNUNET_NO if not.
3633 */
3634static int
3635reachability_iterator (void *cls,
3636 const struct GNUNET_HashCode *key,
3637 void *value)
3638{
3639 struct GNUNET_CONTAINER_MultiHashMap *hm = cls;
3640 struct temporal_state_store *state = value;
3641
3642 if (GNUNET_YES == state->reachable)
3643 /* already visited and marked */
3644 return GNUNET_YES;
3645
3646 if ((GNUNET_REGEX_INITIAL_BYTES > strlen (state->proof)) &&
3647 (GNUNET_NO == state->accepting) )
3648 /* not directly reachable */
3649 return GNUNET_YES;
3650
3651 mark_as_reachable (state, hm);
3652 return GNUNET_YES;
3653}
3654
3655
3656/**
3657 * Iterator over hash map entries.
3658 * Calling the callback on the ones marked as reachables.
3659 *
3660 * @param cls closure
3661 * @param key current key code
3662 * @param value value in the hash map
3663 * @return #GNUNET_YES if we should continue to iterate,
3664 * #GNUNET_NO if not.
3665 */
3666static int
3667iterate_reachables (void *cls, const struct GNUNET_HashCode *key, void *value)
3668{
3669 struct client_iterator *ci = cls;
3670 struct temporal_state_store *state = value;
3671
3672 if (GNUNET_YES == state->reachable)
3673 {
3674 ci->iterator (ci->iterator_cls,
3675 key,
3676 state->proof,
3677 state->accepting,
3678 state->num_edges,
3679 state->edges);
3680 }
3681 GNUNET_free (state->edges);
3682 GNUNET_free (state->proof);
3683 GNUNET_free (state);
3684 return GNUNET_YES;
3685}
3686
3687
3688/**
3689 * Iterate over all edges of automaton 'a' that are reachable from a state with
3690 * a proof of at least GNUNET_REGEX_INITIAL_BYTES characters.
3691 *
3692 * Call the iterator for each such edge.
3693 *
3694 * @param a automaton.
3695 * @param iterator iterator called for each reachable edge.
3696 * @param iterator_cls closure.
3697 */
3698void
3699REGEX_INTERNAL_iterate_reachable_edges (struct REGEX_INTERNAL_Automaton *a,
3700 REGEX_INTERNAL_KeyIterator iterator,
3701 void *iterator_cls)
3702{
3703 struct GNUNET_CONTAINER_MultiHashMap *hm;
3704 struct client_iterator ci;
3705
3706 hm = GNUNET_CONTAINER_multihashmap_create (a->state_count * 2, GNUNET_NO);
3707 ci.iterator = iterator;
3708 ci.iterator_cls = iterator_cls;
3709
3710 REGEX_INTERNAL_iterate_all_edges (a, &store_all_states, hm);
3711 GNUNET_CONTAINER_multihashmap_iterate (hm, &reachability_iterator, hm);
3712 GNUNET_CONTAINER_multihashmap_iterate (hm, &iterate_reachables, &ci);
3713
3714 GNUNET_CONTAINER_multihashmap_destroy (hm);
3715}
3716
3717
3718/* end of regex_internal.c */