diff options
-rw-r--r-- | configure.ac | 2 | ||||
-rw-r--r-- | pkgconfig/gnunetregex.pc | 12 | ||||
-rw-r--r-- | src/include/gnunet_regex_lib.h | 85 | ||||
-rw-r--r-- | src/regex/Makefile.am | 34 | ||||
-rw-r--r-- | src/regex/regex.c | 639 | ||||
-rw-r--r-- | src/regex/regex.h | 29 | ||||
-rw-r--r-- | src/regex/test_regex.c | 56 |
7 files changed, 857 insertions, 0 deletions
diff --git a/configure.ac b/configure.ac index fe5de4c12..7fb10cf32 100644 --- a/configure.ac +++ b/configure.ac | |||
@@ -898,6 +898,7 @@ src/peerinfo/peerinfo.conf | |||
898 | src/peerinfo-tool/Makefile | 898 | src/peerinfo-tool/Makefile |
899 | src/postgres/Makefile | 899 | src/postgres/Makefile |
900 | src/pt/Makefile | 900 | src/pt/Makefile |
901 | src/regex/Makefile | ||
901 | src/statistics/Makefile | 902 | src/statistics/Makefile |
902 | src/statistics/statistics.conf | 903 | src/statistics/statistics.conf |
903 | src/stream/Makefile | 904 | src/stream/Makefile |
@@ -927,6 +928,7 @@ pkgconfig/gnunethello.pc | |||
927 | pkgconfig/gnunetnat.pc | 928 | pkgconfig/gnunetnat.pc |
928 | pkgconfig/gnunetnse.pc | 929 | pkgconfig/gnunetnse.pc |
929 | pkgconfig/gnunetpeerinfo.pc | 930 | pkgconfig/gnunetpeerinfo.pc |
931 | pkgconfig/gnunetregex.pc | ||
930 | pkgconfig/gnunetstatistics.pc | 932 | pkgconfig/gnunetstatistics.pc |
931 | pkgconfig/gnunettesting.pc | 933 | pkgconfig/gnunettesting.pc |
932 | pkgconfig/gnunettransport.pc | 934 | pkgconfig/gnunettransport.pc |
diff --git a/pkgconfig/gnunetregex.pc b/pkgconfig/gnunetregex.pc new file mode 100644 index 000000000..69974dc8f --- /dev/null +++ b/pkgconfig/gnunetregex.pc | |||
@@ -0,0 +1,12 @@ | |||
1 | prefix=/usr/local | ||
2 | exec_prefix=${prefix} | ||
3 | libdir=${exec_prefix}/lib | ||
4 | includedir=${prefix}/include | ||
5 | |||
6 | Name: GNUnet regex | ||
7 | Description: Provides API for parsing regular expressions into finite automata | ||
8 | URL: http://gnunet.org | ||
9 | Version: 0.9.2 | ||
10 | Requires: | ||
11 | Libs: -L${libdir} -lgnunetregex | ||
12 | Cflags: -I${includedir} | ||
diff --git a/src/include/gnunet_regex_lib.h b/src/include/gnunet_regex_lib.h new file mode 100644 index 000000000..343a58901 --- /dev/null +++ b/src/include/gnunet_regex_lib.h | |||
@@ -0,0 +1,85 @@ | |||
1 | /* | ||
2 | This file is part of GNUnet | ||
3 | (C) 2012 Christian Grothoff (and other contributing authors) | ||
4 | |||
5 | GNUnet is free software; you can redistribute it and/or modify | ||
6 | it under the terms of the GNU General Public License as published | ||
7 | by the Free Software Foundation; either version 3, or (at your | ||
8 | option) any later version. | ||
9 | |||
10 | GNUnet is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU General Public License | ||
16 | along with GNUnet; see the file COPYING. If not, write to the | ||
17 | Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
18 | Boston, MA 02111-1307, USA. | ||
19 | */ | ||
20 | /** | ||
21 | * @file include/gnunet_regex_lib.h | ||
22 | * @brief library to parse regular expressions into dfa | ||
23 | * @author Maximilian Szengel | ||
24 | * | ||
25 | */ | ||
26 | |||
27 | #ifndef GNUNET_REGEX_LIB_H | ||
28 | #define GNUNET_REGEX_LIB_H | ||
29 | |||
30 | #include "gnunet_util_lib.h" | ||
31 | |||
32 | #ifdef __cplusplus | ||
33 | extern "C" | ||
34 | { | ||
35 | #if 0 /* keep Emacsens' auto-indent happy */ | ||
36 | } | ||
37 | #endif | ||
38 | #endif | ||
39 | |||
40 | /** | ||
41 | * NFA representation | ||
42 | */ | ||
43 | struct GNUNET_REGEX_Nfa; | ||
44 | |||
45 | /** | ||
46 | * Construct an NFA data structure by parsing the regex string of | ||
47 | * length len. | ||
48 | * | ||
49 | * @param regex regular expression string | ||
50 | * @param len length of the string | ||
51 | * | ||
52 | * @return NFA data structure. Needs to be freed using | ||
53 | * GNUNET_REGEX_destroy_nfa | ||
54 | */ | ||
55 | struct GNUNET_REGEX_Nfa * | ||
56 | GNUNET_REGEX_construct_nfa(const char *regex, size_t len); | ||
57 | |||
58 | /** | ||
59 | * Free the memory allocated by constructing the GNUNET_REGEX_Nfa | ||
60 | * data structure. | ||
61 | * | ||
62 | * @param n NFA to be destroyed | ||
63 | */ | ||
64 | void | ||
65 | GNUNET_REGEX_destroy_nfa(struct GNUNET_REGEX_Nfa *n); | ||
66 | |||
67 | /** | ||
68 | * Save the given NFA as a GraphViz dot file | ||
69 | * | ||
70 | * @param n NFA to be saved | ||
71 | * @param filename where to save the file | ||
72 | */ | ||
73 | void | ||
74 | GNUNET_REGEX_save_nfa_graph(struct GNUNET_REGEX_Nfa *n, | ||
75 | const char *filename); | ||
76 | |||
77 | #if 0 /* keep Emacsens' auto-indent happy */ | ||
78 | { | ||
79 | #endif | ||
80 | #ifdef __cplusplus | ||
81 | } | ||
82 | #endif | ||
83 | |||
84 | /* end of gnunet_regex_lib.h */ | ||
85 | #endif | ||
diff --git a/src/regex/Makefile.am b/src/regex/Makefile.am new file mode 100644 index 000000000..729773399 --- /dev/null +++ b/src/regex/Makefile.am | |||
@@ -0,0 +1,34 @@ | |||
1 | INCLUDES = -I$(top_srcdir)/src/include | ||
2 | |||
3 | if MINGW | ||
4 | WINFLAGS = -Wl,--no-undefined -Wl,--export-all-symbols | ||
5 | endif | ||
6 | |||
7 | if USE_COVERAGE | ||
8 | AM_CFLAGS = --coverage | ||
9 | endif | ||
10 | |||
11 | lib_LTLIBRARIES = libgnunetregex.la | ||
12 | |||
13 | libgnunetregex_la_SOURCES = \ | ||
14 | regex.c regex.h | ||
15 | libgnunetregex_la_LIBADD = -lm \ | ||
16 | $(top_builddir)/src/util/libgnunetutil.la | ||
17 | libgnunetregex_la_LDFLAGS = \ | ||
18 | $(GN_LIB_LDFLAGS) \ | ||
19 | -version-info 0:0:0 | ||
20 | |||
21 | check_PROGRAMS = \ | ||
22 | test_regex | ||
23 | |||
24 | if ENABLE_TEST_RUN | ||
25 | TESTS = $(check_PROGRAMS) | ||
26 | endif | ||
27 | |||
28 | test_regex_SOURCES = \ | ||
29 | test_regex.c | ||
30 | test_regex_LDADD = \ | ||
31 | $(top_builddir)/src/regex/libgnunetregex.la \ | ||
32 | $(top_builddir)/src/util/libgnunetutil.la | ||
33 | |||
34 | EXTRA_DIST = test_regex_data.conf | ||
diff --git a/src/regex/regex.c b/src/regex/regex.c new file mode 100644 index 000000000..b9a104fae --- /dev/null +++ b/src/regex/regex.c | |||
@@ -0,0 +1,639 @@ | |||
1 | /* | ||
2 | This file is part of GNUnet | ||
3 | (C) 2012 Christian Grothoff (and other contributing authors) | ||
4 | |||
5 | GNUnet is free software; you can redistribute it and/or modify | ||
6 | it under the terms of the GNU General Public License as published | ||
7 | by the Free Software Foundation; either version 3, or (at your | ||
8 | option) any later version. | ||
9 | |||
10 | GNUnet is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU General Public License | ||
16 | along with GNUnet; see the file COPYING. If not, write to the | ||
17 | Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
18 | Boston, MA 02111-1307, USA. | ||
19 | */ | ||
20 | /** | ||
21 | * @file src/regex/regex.c | ||
22 | * @brief library to create automatons from regular expressions | ||
23 | * @author Maximilian Szengel | ||
24 | */ | ||
25 | #include "platform.h" | ||
26 | #include "gnunet_regex_lib.h" | ||
27 | #include "regex.h" | ||
28 | |||
29 | struct Stack | ||
30 | { | ||
31 | void *data; | ||
32 | struct Stack *next; | ||
33 | }; | ||
34 | |||
35 | static struct Stack *nfa_stack = NULL; | ||
36 | |||
37 | void | ||
38 | push (void *val, struct Stack **stack) | ||
39 | { | ||
40 | struct Stack *new = GNUNET_malloc (sizeof (struct Stack *)); | ||
41 | |||
42 | if (NULL == new) | ||
43 | { | ||
44 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not push to stack\n"); | ||
45 | return; | ||
46 | } | ||
47 | new->data = val; | ||
48 | new->next = *stack; | ||
49 | *stack = new; | ||
50 | } | ||
51 | |||
52 | int | ||
53 | empty (struct Stack **stack) | ||
54 | { | ||
55 | return (NULL == *stack || NULL == stack); | ||
56 | } | ||
57 | |||
58 | void * | ||
59 | pop (struct Stack **stack) | ||
60 | { | ||
61 | struct Stack *top; | ||
62 | void *val; | ||
63 | |||
64 | if (empty (stack)) | ||
65 | return NULL; | ||
66 | |||
67 | top = *stack; | ||
68 | val = top->data; | ||
69 | *stack = top->next; | ||
70 | GNUNET_free (top); | ||
71 | return val; | ||
72 | } | ||
73 | |||
74 | void * | ||
75 | top (struct Stack **stack) | ||
76 | { | ||
77 | if (empty (stack)) | ||
78 | return NULL; | ||
79 | |||
80 | return (*stack)->data; | ||
81 | } | ||
82 | |||
83 | struct GNUNET_REGEX_Nfa | ||
84 | { | ||
85 | struct State *start; | ||
86 | struct State *end; | ||
87 | |||
88 | unsigned int statecnt; | ||
89 | struct State **states; | ||
90 | }; | ||
91 | |||
92 | struct State | ||
93 | { | ||
94 | unsigned int id; | ||
95 | int accepting; | ||
96 | unsigned int tcnt; | ||
97 | struct Transition *transitions; | ||
98 | int visited; | ||
99 | }; | ||
100 | |||
101 | struct Transition | ||
102 | { | ||
103 | unsigned int id; | ||
104 | char literal; | ||
105 | struct State *state; | ||
106 | }; | ||
107 | |||
108 | static unsigned int state_id = 0; | ||
109 | static unsigned int transition_id = 0; | ||
110 | |||
111 | struct GNUNET_REGEX_Nfa * | ||
112 | nfa_create (struct State *start, struct State *end) | ||
113 | { | ||
114 | struct GNUNET_REGEX_Nfa *n; | ||
115 | |||
116 | n = GNUNET_malloc (sizeof (struct GNUNET_REGEX_Nfa)); | ||
117 | |||
118 | if (NULL == start && NULL == end) | ||
119 | { | ||
120 | n->states = NULL; | ||
121 | n->statecnt = 0; | ||
122 | n->start = NULL; | ||
123 | n->end = NULL; | ||
124 | |||
125 | return n; | ||
126 | } | ||
127 | |||
128 | n->states = GNUNET_malloc ((sizeof (struct State *)) * 2); | ||
129 | n->states[0] = start; | ||
130 | n->states[1] = end; | ||
131 | n->statecnt = 2; | ||
132 | |||
133 | n->start = start; | ||
134 | n->end = end; | ||
135 | |||
136 | return n; | ||
137 | } | ||
138 | |||
139 | |||
140 | void | ||
141 | nfa_add_states (struct GNUNET_REGEX_Nfa *n, struct State **states, | ||
142 | unsigned int count) | ||
143 | { | ||
144 | unsigned int i; | ||
145 | unsigned int j; | ||
146 | |||
147 | i = n->statecnt; | ||
148 | GNUNET_array_grow (n->states, n->statecnt, n->statecnt + count); | ||
149 | for (j = 0; i < n->statecnt && j < count; i++, j++) | ||
150 | { | ||
151 | n->states[i] = states[j]; | ||
152 | } | ||
153 | } | ||
154 | |||
155 | |||
156 | struct State * | ||
157 | nfa_create_state (int accepting) | ||
158 | { | ||
159 | struct State *s; | ||
160 | |||
161 | s = GNUNET_malloc (sizeof (struct State)); | ||
162 | s->id = state_id++; | ||
163 | s->accepting = accepting; | ||
164 | s->tcnt = 0; | ||
165 | s->transitions = NULL; | ||
166 | s->visited = 0; | ||
167 | |||
168 | return s; | ||
169 | } | ||
170 | |||
171 | void | ||
172 | nfa_add_transition (struct State *from_state, const char literal, | ||
173 | struct State *to_state) | ||
174 | { | ||
175 | struct Transition t; | ||
176 | |||
177 | if (NULL == to_state) | ||
178 | { | ||
179 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
180 | "Could not create Transition. to_state was NULL.\n"); | ||
181 | return; | ||
182 | } | ||
183 | |||
184 | t.id = transition_id++; | ||
185 | t.literal = literal; | ||
186 | t.state = to_state; | ||
187 | |||
188 | if (0 == from_state->tcnt) | ||
189 | from_state->transitions = NULL; | ||
190 | |||
191 | GNUNET_array_append (from_state->transitions, from_state->tcnt, t); | ||
192 | } | ||
193 | |||
194 | void | ||
195 | mark_all_states (struct GNUNET_REGEX_Nfa *n, int visited) | ||
196 | { | ||
197 | int i; | ||
198 | |||
199 | for (i = 0; i < n->statecnt; i++) | ||
200 | { | ||
201 | n->states[i]->visited = visited; | ||
202 | } | ||
203 | } | ||
204 | |||
205 | void | ||
206 | print_states (struct GNUNET_REGEX_Nfa *n, char **out_str) | ||
207 | { | ||
208 | struct State *s; | ||
209 | int i_s; | ||
210 | int i_t; | ||
211 | char *s_all; | ||
212 | |||
213 | mark_all_states (n, 0); | ||
214 | |||
215 | s_all = GNUNET_malloc (sizeof (char)); | ||
216 | *s_all = '\0'; | ||
217 | |||
218 | for (i_s = 0; i_s < n->statecnt; i_s++) | ||
219 | { | ||
220 | struct Transition *ctran; | ||
221 | char *s_acc = NULL; | ||
222 | char *s_tran = NULL; | ||
223 | |||
224 | s = n->states[i_s]; | ||
225 | |||
226 | if (s->accepting) | ||
227 | { | ||
228 | GNUNET_asprintf (&s_acc, "s%i [shape=doublecircle];\n", s->id); | ||
229 | |||
230 | s_all = GNUNET_realloc (s_all, strlen (s_all) + strlen (s_acc) + 1); | ||
231 | strcat (s_all, s_acc); | ||
232 | GNUNET_free (s_acc); | ||
233 | } | ||
234 | |||
235 | ctran = s->transitions; | ||
236 | s->visited = 1; | ||
237 | |||
238 | for (i_t = 0; i_t < s->tcnt && NULL != s->transitions; i_t++) | ||
239 | { | ||
240 | if (NULL == ctran) | ||
241 | { | ||
242 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "s->transitions was NULL\n"); | ||
243 | } | ||
244 | |||
245 | if (NULL == ctran->state) | ||
246 | { | ||
247 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
248 | "Transition from State %i has has no state for transitioning\n", | ||
249 | s->id); | ||
250 | } | ||
251 | |||
252 | if (ctran->literal == 0) | ||
253 | { | ||
254 | GNUNET_asprintf (&s_tran, "s%i -> s%i [label = \"epsilon\"];\n", s->id, | ||
255 | ctran->state->id); | ||
256 | } | ||
257 | else | ||
258 | { | ||
259 | GNUNET_asprintf (&s_tran, "s%i -> s%i [label = \"%c\"];\n", s->id, | ||
260 | ctran->state->id, ctran->literal); | ||
261 | } | ||
262 | |||
263 | s_all = GNUNET_realloc (s_all, strlen (s_all) + strlen (s_tran) + 1); | ||
264 | strcat (s_all, s_tran); | ||
265 | GNUNET_free (s_tran); | ||
266 | |||
267 | ctran++; | ||
268 | } | ||
269 | } | ||
270 | |||
271 | *out_str = s_all; | ||
272 | } | ||
273 | |||
274 | void | ||
275 | nfa_add_concatenation () | ||
276 | { | ||
277 | struct GNUNET_REGEX_Nfa *A; | ||
278 | struct GNUNET_REGEX_Nfa *B; | ||
279 | struct GNUNET_REGEX_Nfa *new; | ||
280 | |||
281 | B = pop (&nfa_stack); | ||
282 | A = pop (&nfa_stack); | ||
283 | |||
284 | if (NULL == A || NULL == B) | ||
285 | { | ||
286 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
287 | "nfa_add_concatenationenation failed, because there were not enough elements on the stack"); | ||
288 | return; | ||
289 | } | ||
290 | |||
291 | nfa_add_transition (A->end, 0, B->start); | ||
292 | A->end->accepting = 0; | ||
293 | B->end->accepting = 1; | ||
294 | |||
295 | new = nfa_create (NULL, NULL); | ||
296 | nfa_add_states (new, A->states, A->statecnt); | ||
297 | nfa_add_states (new, B->states, B->statecnt); | ||
298 | new->start = A->start; | ||
299 | new->end = B->end; | ||
300 | GNUNET_free (A); | ||
301 | GNUNET_free (B); | ||
302 | |||
303 | push (new, &nfa_stack); | ||
304 | } | ||
305 | |||
306 | void | ||
307 | nfa_add_star_op () | ||
308 | { | ||
309 | struct GNUNET_REGEX_Nfa *A; | ||
310 | struct GNUNET_REGEX_Nfa *new; | ||
311 | struct State *start; | ||
312 | struct State *end; | ||
313 | |||
314 | A = pop (&nfa_stack); | ||
315 | |||
316 | if (NULL == A) | ||
317 | { | ||
318 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
319 | "nfa_add_star_op failed, because there was no element on the stack"); | ||
320 | return; | ||
321 | } | ||
322 | |||
323 | start = nfa_create_state (0); | ||
324 | end = nfa_create_state (1); | ||
325 | |||
326 | nfa_add_transition (start, 0, A->start); | ||
327 | nfa_add_transition (start, 0, end); | ||
328 | nfa_add_transition (A->end, 0, A->start); | ||
329 | nfa_add_transition (A->end, 0, end); | ||
330 | |||
331 | A->end->accepting = 0; | ||
332 | end->accepting = 1; | ||
333 | |||
334 | new = nfa_create (start, end); | ||
335 | nfa_add_states (new, A->states, A->statecnt); | ||
336 | GNUNET_free (A); | ||
337 | |||
338 | push (new, &nfa_stack); | ||
339 | } | ||
340 | |||
341 | void | ||
342 | nfa_add_plus_op () | ||
343 | { | ||
344 | struct GNUNET_REGEX_Nfa *A; | ||
345 | |||
346 | A = pop (&nfa_stack); | ||
347 | |||
348 | if (NULL == A) | ||
349 | { | ||
350 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
351 | "nfa_add_plus_op failed, because there was no element on the stack"); | ||
352 | return; | ||
353 | } | ||
354 | |||
355 | nfa_add_transition (A->end, 0, A->start); | ||
356 | |||
357 | push (A, &nfa_stack); | ||
358 | } | ||
359 | |||
360 | void | ||
361 | nfa_add_alternation () | ||
362 | { | ||
363 | struct GNUNET_REGEX_Nfa *A; | ||
364 | struct GNUNET_REGEX_Nfa *B; | ||
365 | struct GNUNET_REGEX_Nfa *new; | ||
366 | struct State *start; | ||
367 | struct State *end; | ||
368 | |||
369 | B = pop (&nfa_stack); | ||
370 | A = pop (&nfa_stack); | ||
371 | |||
372 | if (NULL == A || NULL == B) | ||
373 | { | ||
374 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
375 | "alternation failed, because there were not enough elements on the stack"); | ||
376 | return; | ||
377 | } | ||
378 | |||
379 | start = nfa_create_state (0); | ||
380 | end = nfa_create_state (1); | ||
381 | nfa_add_transition (start, 0, A->start); | ||
382 | nfa_add_transition (start, 0, B->start); | ||
383 | |||
384 | nfa_add_transition (A->end, 0, end); | ||
385 | nfa_add_transition (B->end, 0, end); | ||
386 | |||
387 | A->end->accepting = 0; | ||
388 | B->end->accepting = 0; | ||
389 | end->accepting = 1; | ||
390 | |||
391 | new = nfa_create (start, end); | ||
392 | nfa_add_states (new, A->states, A->statecnt); | ||
393 | nfa_add_states (new, B->states, B->statecnt); | ||
394 | GNUNET_free (A); | ||
395 | GNUNET_free (B); | ||
396 | |||
397 | push (new, &nfa_stack); | ||
398 | } | ||
399 | |||
400 | void | ||
401 | nfa_add_literal (const char lit) | ||
402 | { | ||
403 | struct GNUNET_REGEX_Nfa *n; | ||
404 | struct State *start; | ||
405 | struct State *end; | ||
406 | |||
407 | start = nfa_create_state (0); | ||
408 | end = nfa_create_state (1); | ||
409 | nfa_add_transition (start, lit, end); | ||
410 | n = nfa_create (start, end); | ||
411 | push (n, &nfa_stack); | ||
412 | } | ||
413 | |||
414 | struct GNUNET_REGEX_Nfa * | ||
415 | GNUNET_REGEX_construct_nfa (const char *regex, size_t len) | ||
416 | { | ||
417 | struct GNUNET_REGEX_Nfa *nfa; | ||
418 | unsigned int count; | ||
419 | unsigned int altcount; | ||
420 | unsigned int atomcount; | ||
421 | unsigned int pcount; | ||
422 | struct p_stage | ||
423 | { | ||
424 | int altcount; | ||
425 | int atomcount; | ||
426 | }; | ||
427 | struct p_stage *p; | ||
428 | |||
429 | p = NULL; | ||
430 | |||
431 | altcount = 0; | ||
432 | atomcount = 0; | ||
433 | pcount = 0; | ||
434 | |||
435 | for (count = 0; count < len && *regex; count++, regex++) | ||
436 | { | ||
437 | |||
438 | switch (*regex) | ||
439 | { | ||
440 | case '(': | ||
441 | if (atomcount > 1) | ||
442 | { | ||
443 | --atomcount; | ||
444 | nfa_add_concatenation (); | ||
445 | } | ||
446 | GNUNET_array_grow (p, pcount, pcount + 1); | ||
447 | p[pcount - 1].altcount = altcount; | ||
448 | p[pcount - 1].atomcount = atomcount; | ||
449 | altcount = 0; | ||
450 | atomcount = 0; | ||
451 | break; | ||
452 | case '|': | ||
453 | if (0 == atomcount) | ||
454 | goto error; | ||
455 | while (--atomcount > 0) | ||
456 | nfa_add_concatenation (); | ||
457 | altcount++; | ||
458 | break; | ||
459 | case ')': | ||
460 | if (0 == pcount) | ||
461 | goto error; | ||
462 | if (atomcount == 0) | ||
463 | goto error; | ||
464 | while (--atomcount > 0) | ||
465 | nfa_add_concatenation (); | ||
466 | for (; altcount > 0; altcount--) | ||
467 | nfa_add_alternation (); | ||
468 | pcount--; | ||
469 | altcount = p[pcount].altcount; | ||
470 | atomcount = p[pcount].atomcount; | ||
471 | atomcount++; | ||
472 | break; | ||
473 | case '*': | ||
474 | if (atomcount == 0) | ||
475 | goto error; | ||
476 | nfa_add_star_op (); | ||
477 | break; | ||
478 | case '+': | ||
479 | if (atomcount == 0) | ||
480 | goto error; | ||
481 | nfa_add_plus_op (); | ||
482 | break; | ||
483 | case 92: /* escape: \ */ | ||
484 | regex++; | ||
485 | count++; | ||
486 | default: | ||
487 | if (atomcount > 1) | ||
488 | { | ||
489 | --atomcount; | ||
490 | nfa_add_concatenation (); | ||
491 | } | ||
492 | nfa_add_literal (*regex); | ||
493 | atomcount++; | ||
494 | break; | ||
495 | } | ||
496 | } | ||
497 | if (0 != pcount) | ||
498 | goto error; | ||
499 | while (--atomcount > 0) | ||
500 | nfa_add_concatenation (); | ||
501 | for (; altcount > 0; altcount--) | ||
502 | nfa_add_alternation (); | ||
503 | |||
504 | if (NULL != p) | ||
505 | GNUNET_free (p); | ||
506 | |||
507 | nfa = pop (&nfa_stack); | ||
508 | |||
509 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, | ||
510 | "Created NFA with %i States and a total of %i Transitions\n", | ||
511 | state_id, transition_id); | ||
512 | |||
513 | return nfa; | ||
514 | |||
515 | error: | ||
516 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not parse regex\n"); | ||
517 | GNUNET_free (p); | ||
518 | while (!empty (&nfa_stack)) | ||
519 | GNUNET_REGEX_destroy_nfa (pop (&nfa_stack)); | ||
520 | return NULL; | ||
521 | } | ||
522 | |||
523 | void | ||
524 | GNUNET_REGEX_destroy_nfa (struct GNUNET_REGEX_Nfa *n) | ||
525 | { | ||
526 | int i; | ||
527 | |||
528 | for (i = 0; i < n->statecnt; i++) | ||
529 | { | ||
530 | GNUNET_free (n->states[i]); | ||
531 | } | ||
532 | } | ||
533 | |||
534 | void | ||
535 | GNUNET_REGEX_save_nfa_graph (struct GNUNET_REGEX_Nfa *n, const char *filename) | ||
536 | { | ||
537 | struct State *s; | ||
538 | char *start; | ||
539 | char *end; | ||
540 | char *states; | ||
541 | FILE *p; | ||
542 | int i_s; | ||
543 | int i_t; | ||
544 | |||
545 | if (NULL == n) | ||
546 | { | ||
547 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not print NFA, was NULL!"); | ||
548 | return; | ||
549 | } | ||
550 | |||
551 | mark_all_states (n, 0); | ||
552 | |||
553 | states = GNUNET_malloc (sizeof (char)); | ||
554 | *states = '\0'; | ||
555 | |||
556 | for (i_s = 0; i_s < n->statecnt; i_s++) | ||
557 | { | ||
558 | struct Transition *ctran; | ||
559 | char *s_acc = NULL; | ||
560 | char *s_tran = NULL; | ||
561 | |||
562 | s = n->states[i_s]; | ||
563 | |||
564 | if (s->accepting) | ||
565 | { | ||
566 | GNUNET_asprintf (&s_acc, "s%i [shape=doublecircle];\n", s->id); | ||
567 | |||
568 | states = GNUNET_realloc (states, strlen (states) + strlen (s_acc) + 1); | ||
569 | strcat (states, s_acc); | ||
570 | GNUNET_free (s_acc); | ||
571 | } | ||
572 | |||
573 | ctran = s->transitions; | ||
574 | s->visited = 1; | ||
575 | |||
576 | for (i_t = 0; i_t < s->tcnt && NULL != s->transitions; i_t++) | ||
577 | { | ||
578 | if (NULL == ctran) | ||
579 | { | ||
580 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "s->transitions was NULL\n"); | ||
581 | } | ||
582 | |||
583 | if (NULL == ctran->state) | ||
584 | { | ||
585 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
586 | "Transition from State %i has has no state for transitioning\n", | ||
587 | s->id); | ||
588 | } | ||
589 | |||
590 | if (ctran->literal == 0) | ||
591 | { | ||
592 | GNUNET_asprintf (&s_tran, "s%i -> s%i [label = \"epsilon\"];\n", s->id, | ||
593 | ctran->state->id); | ||
594 | } | ||
595 | else | ||
596 | { | ||
597 | GNUNET_asprintf (&s_tran, "s%i -> s%i [label = \"%c\"];\n", s->id, | ||
598 | ctran->state->id, ctran->literal); | ||
599 | } | ||
600 | |||
601 | states = GNUNET_realloc (states, strlen (states) + strlen (s_tran) + 1); | ||
602 | strcat (states, s_tran); | ||
603 | GNUNET_free (s_tran); | ||
604 | |||
605 | ctran++; | ||
606 | } | ||
607 | } | ||
608 | |||
609 | if (NULL == states) | ||
610 | { | ||
611 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not print NFA"); | ||
612 | return; | ||
613 | } | ||
614 | |||
615 | if (NULL == filename || strlen (filename) < 1) | ||
616 | { | ||
617 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "No Filename given!"); | ||
618 | GNUNET_free (states); | ||
619 | return; | ||
620 | } | ||
621 | |||
622 | p = fopen (filename, "w"); | ||
623 | if (p == NULL) | ||
624 | { | ||
625 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not open file for writing: %s", | ||
626 | filename); | ||
627 | GNUNET_free (states); | ||
628 | return; | ||
629 | } | ||
630 | |||
631 | start = "digraph G {\nrankdir=LR\n"; | ||
632 | end = "\n}\n"; | ||
633 | fwrite (start, strlen (start), 1, p); | ||
634 | fwrite (states, strlen (states), 1, p); | ||
635 | fwrite (end, strlen (end), 1, p); | ||
636 | fclose (p); | ||
637 | |||
638 | GNUNET_free (states); | ||
639 | } | ||
diff --git a/src/regex/regex.h b/src/regex/regex.h new file mode 100644 index 000000000..4f8c3853b --- /dev/null +++ b/src/regex/regex.h | |||
@@ -0,0 +1,29 @@ | |||
1 | /* | ||
2 | This file is part of GNUnet | ||
3 | (C) 2012 Christian Grothoff (and other contributing authors) | ||
4 | |||
5 | GNUnet is free software; you can redistribute it and/or modify | ||
6 | it under the terms of the GNU General Public License as published | ||
7 | by the Free Software Foundation; either version 3, or (at your | ||
8 | option) any later version. | ||
9 | |||
10 | GNUnet is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU General Public License | ||
16 | along with GNUnet; see the file COPYING. If not, write to the | ||
17 | Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
18 | Boston, MA 02111-1307, USA. | ||
19 | */ | ||
20 | /** | ||
21 | * @file src/regex/regex.h | ||
22 | * @brief library to create automatons from regular expressions | ||
23 | * @author Maximilian Szengel | ||
24 | */ | ||
25 | #ifndef REGEX_H | ||
26 | #define REGEX_H | ||
27 | #include "gnunet_regex_lib.h" | ||
28 | |||
29 | #endif | ||
diff --git a/src/regex/test_regex.c b/src/regex/test_regex.c new file mode 100644 index 000000000..288dc28e6 --- /dev/null +++ b/src/regex/test_regex.c | |||
@@ -0,0 +1,56 @@ | |||
1 | /* | ||
2 | This file is part of GNUnet | ||
3 | (C) 2012 Christian Grothoff (and other contributing authors) | ||
4 | |||
5 | GNUnet is free software; you can redistribute it and/or modify | ||
6 | it under the terms of the GNU General Public License as published | ||
7 | by the Free Software Foundation; either version 3, or (at your | ||
8 | option) any later version. | ||
9 | |||
10 | GNUnet is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU General Public License | ||
16 | along with GNUnet; see the file COPYING. If not, write to the | ||
17 | Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
18 | Boston, MA 02111-1307, USA. | ||
19 | */ | ||
20 | /** | ||
21 | * @file regex/test_regex.c | ||
22 | * @brief test for regex.c | ||
23 | * @author Maximilian Szengel | ||
24 | */ | ||
25 | #include "platform.h" | ||
26 | #include "gnunet_regex_lib.h" | ||
27 | |||
28 | static int err = 0; | ||
29 | |||
30 | int | ||
31 | main (int argc, char *argv[]) | ||
32 | { | ||
33 | GNUNET_log_setup ("test-regex", | ||
34 | #if VERBOSE | ||
35 | "DEBUG", | ||
36 | #else | ||
37 | "WARNING", | ||
38 | #endif | ||
39 | NULL); | ||
40 | |||
41 | struct GNUNET_REGEX_Nfa *nfa; | ||
42 | char *regex; | ||
43 | |||
44 | regex = "a\\*b(c|d)+c*(a(b|c)d)+"; | ||
45 | nfa = GNUNET_REGEX_construct_nfa (regex, strlen (regex)); | ||
46 | |||
47 | if (nfa) | ||
48 | { | ||
49 | GNUNET_REGEX_save_nfa_graph (nfa, "nfa_graph.dot"); | ||
50 | GNUNET_REGEX_destroy_nfa (nfa); | ||
51 | } | ||
52 | else | ||
53 | err = 1; | ||
54 | |||
55 | return err; | ||
56 | } | ||