aboutsummaryrefslogtreecommitdiff
path: root/src/regex/regex_test_lib.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/regex/regex_test_lib.c')
-rw-r--r--src/regex/regex_test_lib.c646
1 files changed, 0 insertions, 646 deletions
diff --git a/src/regex/regex_test_lib.c b/src/regex/regex_test_lib.c
deleted file mode 100644
index d25799ea4..000000000
--- a/src/regex/regex_test_lib.c
+++ /dev/null
@@ -1,646 +0,0 @@
1/*
2 * This file is part of GNUnet
3 * Copyright (C) 2012-2017 GNUnet e.V.
4 *
5 * GNUnet is free software: you can redistribute it and/or modify it
6 * under the terms of the GNU Affero General Public License as published
7 * by the Free Software Foundation, either version 3 of the License,
8 * or (at your option) any later version.
9 *
10 * GNUnet is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Affero General Public License for more details.
14 *
15 * You should have received a copy of the GNU Affero General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18/**
19 * @file src/regex/regex_test_lib.c
20 * @brief library to read regexes representing IP networks from a file.
21 * and simplyfinying the into one big regex, in order to run
22 * tests (regex performance, cadet profiler).
23 * @author Bartlomiej Polot
24 */
25
26#include "platform.h"
27#include "gnunet_util_lib.h"
28
29
30/**
31 * Struct to hold the tree formed by prefix-combining the regexes.
32 */
33struct RegexCombineCtx
34{
35 /**
36 * Child nodes with same prefix and token.
37 */
38 struct RegexCombineCtx **children;
39
40 /**
41 * Alphabet size (how many @a children there are)
42 */
43 unsigned int size;
44
45 /**
46 * Token.
47 */
48 char *s;
49};
50
51
52/**
53 * Char 2 int
54 *
55 * Convert a character into its int value depending on the base used
56 *
57 * @param c Char
58 * @param size base (2, 8 or 16(hex))
59 *
60 * @return Int in range [0, (base-1)]
61 */
62static int
63c2i (char c, int size)
64{
65 switch (size)
66 {
67 case 2:
68 case 8:
69 return c - '0';
70 break;
71 case 16:
72 if (c >= '0' && c <= '9')
73 return c - '0';
74 else if (c >= 'A' && c <= 'F')
75 return c - 'A' + 10;
76 else if (c >= 'a' && c <= 'f')
77 return c - 'a' + 10;
78 else
79 {
80 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
81 "Cannot convert char %c in base %u\n",
82 c, size);
83 GNUNET_assert (0);
84 }
85 break;
86 default:
87 GNUNET_assert (0);
88 }
89}
90
91
92/**
93 * Printf spaces to indent the regex tree
94 *
95 * @param n Indentation level
96 */
97static void
98space (int n)
99{
100 for (int i = 0; i < n; i++)
101 fprintf (stderr, "| ");
102}
103
104
105/**
106 * Printf the combined regex ctx.
107 *
108 * @param ctx The ctx to printf
109 * @param level Indentation level to start with
110 */
111static void
112debugctx (struct RegexCombineCtx *ctx, int level)
113{
114#if DEBUG_REGEX
115 if (NULL != ctx->s)
116 {
117 space (level - 1);
118 fprintf (stderr, "%u:'%s'\n", c2i(ctx->s[0], ctx->size), ctx->s);
119 }
120 else
121 fprintf (stderr, "ROOT (base %u)\n", ctx->size);
122 for (unsigned int i = 0; i < ctx->size; i++)
123 {
124 if (NULL != ctx->children[i])
125 {
126 space (level);
127 debugctx (ctx->children[i], level + 1);
128 }
129 }
130 fflush(stderr);
131#endif
132}
133
134
135/**
136 * Add a single regex to a context, combining with exisiting regex by-prefix.
137 *
138 * @param ctx Context with 0 or more regexes.
139 * @param regex Regex to add.
140 */
141static void
142regex_add (struct RegexCombineCtx *ctx,
143 const char *regex);
144
145
146/**
147 * Create and initialize a new RegexCombineCtx.
148 *
149 * @param alphabet_size Size of the alphabet (and the Trie array)
150 */
151static struct RegexCombineCtx *
152new_regex_ctx (unsigned int alphabet_size)
153{
154 struct RegexCombineCtx *ctx;
155 size_t array_size;
156
157 array_size = sizeof(struct RegexCombineCtx *) * alphabet_size;
158 ctx = GNUNET_new (struct RegexCombineCtx);
159 ctx->children = GNUNET_malloc (array_size);
160 ctx->size = alphabet_size;
161
162 return ctx;
163}
164
165
166static void
167move_children (struct RegexCombineCtx *dst,
168 const struct RegexCombineCtx *src)
169{
170 size_t array_size;
171
172 array_size = sizeof(struct RegexCombineCtx *) * src->size;
173 GNUNET_memcpy (dst->children,
174 src->children,
175 array_size);
176 for (unsigned int i = 0; i < src->size; i++)
177 {
178 src->children[i] = NULL;
179 }
180}
181
182
183/**
184 * Extract a string from all prefix-combined regexes.
185 *
186 * @param ctx Context with 0 or more regexes.
187 *
188 * @return Regex that matches any of the added regexes.
189 */
190static char *
191regex_combine (struct RegexCombineCtx *ctx)
192{
193 struct RegexCombineCtx *p;
194 unsigned int i;
195 size_t len;
196 char *regex;
197 char *tmp;
198 char *s;
199 int opt;
200
201 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "new combine %s\n", ctx->s);
202 regex = GNUNET_strdup ("");
203 opt = GNUNET_NO;
204 for (i = 0; i < ctx->size; i++)
205 {
206 p = ctx->children[i];
207 if (NULL == p)
208 continue;
209 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
210 "adding '%s' to innner %s\n",
211 p->s, ctx->s);
212 s = regex_combine (p);
213 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " total '%s'\n", s);
214 if (strlen(s) == 0)
215 {
216 opt = GNUNET_YES;
217 }
218 else
219 {
220 GNUNET_asprintf (&tmp, "%s%s|", regex, s);
221 GNUNET_free_non_null (regex);
222 regex = tmp;
223 }
224 GNUNET_free_non_null (s);
225 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " so far '%s' for inner %s\n", regex, ctx->s);
226 }
227
228 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "opt: %d, innner: '%s'\n", opt, regex);
229 len = strlen (regex);
230 if (0 == len)
231 {
232 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "empty, returning ''\n");
233 GNUNET_free (regex);
234 return NULL == ctx->s ? NULL : GNUNET_strdup (ctx->s);
235 }
236
237 if ('|' == regex[len - 1])
238 regex[len - 1] = '\0';
239
240 if (NULL != ctx->s)
241 {
242 if (opt)
243 GNUNET_asprintf (&s, "%s(%s)?", ctx->s, regex);
244 else
245 GNUNET_asprintf (&s, "%s(%s)", ctx->s, regex);
246 GNUNET_free (regex);
247 regex = s;
248 }
249
250 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "partial: %s\n", regex);
251 return regex;
252}
253
254
255/**
256 * Get the number of matching characters on the prefix of both strings.
257 *
258 * @param s1 String 1.
259 * @param s2 String 2.
260 *
261 * @return Number of characters of matching prefix.
262 */
263static unsigned int
264get_prefix_length (const char *s1, const char *s2)
265{
266 unsigned int l1;
267 unsigned int l2;
268 unsigned int limit;
269 unsigned int i;
270
271 l1 = strlen (s1);
272 l2 = strlen (s2);
273 limit = l1 > l2 ? l2 : l1;
274
275 for (i = 0; i < limit; i++)
276 {
277 if (s1[i] != s2[i])
278 return i;
279 }
280 return limit;
281}
282
283
284/**
285 * Return the child context with the longest prefix match with the regex.
286 * Usually only one child will match, search all just in case.
287 *
288 * @param ctx Context whose children to search.
289 * @param regex String to match.
290 *
291 * @return Child with the longest prefix, NULL if no child matches.
292 */
293static struct RegexCombineCtx *
294get_longest_prefix (struct RegexCombineCtx *ctx, const char *regex)
295{
296 struct RegexCombineCtx *p;
297 struct RegexCombineCtx *best;
298 unsigned int i;
299 unsigned int l;
300 unsigned int best_l;
301
302 best_l = 0;
303 best = NULL;
304
305 for (i = 0; i < ctx->size; i++)
306 {
307 p = ctx->children[i];
308 if (NULL == p)
309 continue;
310
311 l = get_prefix_length (p->s, regex);
312 if (l > best_l)
313 {
314 GNUNET_break (0 == best_l);
315 best = p;
316 best_l = l;
317 }
318 }
319 return best;
320}
321
322static void
323regex_add_multiple (struct RegexCombineCtx *ctx,
324 const char *regex,
325 struct RegexCombineCtx **children)
326{
327 char tmp[2];
328 long unsigned int i;
329 size_t l;
330 struct RegexCombineCtx *newctx;
331 unsigned int count;
332
333 if ('(' != regex[0])
334 {
335 GNUNET_assert (0);
336 }
337
338 /* Does the regex cover *all* possible children? Then don't add any,
339 * as it will be covered by the post-regex "(a-z)*"
340 */
341 l = strlen (regex);
342 count = 0;
343 for (i = 1UL; i < l; i++)
344 {
345 if (regex[i] != '|' && regex[i] != ')')
346 {
347 count++;
348 }
349 }
350 if (count == ctx->size)
351 {
352 return;
353 }
354
355 /* Add every component as a child node */
356 tmp[1] = '\0';
357 for (i = 1UL; i < l; i++)
358 {
359 if (regex[i] != '|' && regex[i] != ')')
360 {
361 tmp[0] = regex[i];
362 newctx = new_regex_ctx(ctx->size);
363 newctx->s = GNUNET_strdup (tmp);
364 if (children != NULL)
365 GNUNET_memcpy (newctx->children,
366 children,
367 sizeof (*children) * ctx->size);
368 ctx->children[c2i(tmp[0], ctx->size)] = newctx;
369 }
370 }
371}
372
373/**
374 * Add a single regex to a context, splitting the exisiting state.
375 *
376 * We only had a partial match, split existing state, truncate the current node
377 * so it only contains the prefix, add suffix(es) as children.
378 *
379 * @param ctx Context to split.
380 * @param len Lenght of ctx->s
381 * @param prefix_l Lenght of common prefix of the new regex and @a ctx->s
382 */
383static void
384regex_split (struct RegexCombineCtx *ctx,
385 unsigned int len,
386 unsigned int prefix_l)
387{
388 struct RegexCombineCtx *newctx;
389 unsigned int idx;
390 char *suffix;
391
392 suffix = GNUNET_malloc (len - prefix_l + 1);
393 strncpy (suffix, &ctx->s[prefix_l], len - prefix_l + 1);
394
395 /* Suffix saved, truncate current node so it only contains the prefix,
396 * copy any children nodes to put as grandchildren and initialize new empty
397 * children array.
398 */
399 ctx->s[prefix_l] = '\0';
400
401 /* If the suffix is an OR expression, add multiple children */
402 if ('(' == suffix[0])
403 {
404 struct RegexCombineCtx **tmp;
405
406 tmp = ctx->children;
407 ctx->children = GNUNET_malloc (sizeof(*tmp) * ctx->size);
408 regex_add_multiple (ctx, suffix, tmp);
409 GNUNET_free (suffix);
410 GNUNET_free (tmp);
411 return;
412 }
413
414 /* The suffix is a normal string, add as one node */
415 newctx = new_regex_ctx (ctx->size);
416 newctx->s = suffix;
417 move_children (newctx, ctx);
418 idx = c2i(suffix[0], ctx->size);
419 ctx->children[idx] = newctx;
420}
421
422
423/**
424 * Add a single regex to a context, combining with exisiting regex by-prefix.
425 *
426 * @param ctx Context with 0 or more regexes.
427 * @param regex Regex to add.
428 */
429static void
430regex_add (struct RegexCombineCtx *ctx, const char *regex)
431{
432 struct RegexCombineCtx *p;
433 struct RegexCombineCtx *newctx;
434 long unsigned int l;
435 unsigned int prefix_l;
436 const char *rest_r;
437 const char *rest_s;
438 size_t len;
439 int idx;
440
441 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
442 "regex_add '%s' into '%s'\n",
443 regex, ctx->s);
444 l = strlen (regex);
445 if (0UL == l)
446 return;
447
448 /* If the regex is in the form of (a|b|c), add every character separately */
449 if ('(' == regex[0])
450 {
451 regex_add_multiple (ctx, regex, NULL);
452 return;
453 }
454
455 p = get_longest_prefix (ctx, regex);
456 if (NULL != p)
457 {
458 /* There is some prefix match, reduce regex and try again */
459 prefix_l = get_prefix_length (p->s, regex);
460 rest_s = &p->s[prefix_l];
461 rest_r = &regex[prefix_l];
462 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "chosen '%s' [%u]\n", p->s, prefix_l);
463 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "prefix r '%.*s'\n", prefix_l, p->s);
464 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "rest r '%s'\n", rest_r);
465 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "rest s '%s'\n", rest_s);
466 len = strlen (p->s);
467 if (prefix_l < len)
468 {
469 regex_split (p, len, prefix_l);
470 }
471 regex_add (p, rest_r);
472 return;
473 }
474
475 /* There is no prefix match, add new */
476 idx = c2i(regex[0], ctx->size);
477 if (NULL == ctx->children[idx] && NULL != ctx->s)
478 {
479 /* this was the end before, add empty string */
480 newctx = new_regex_ctx (ctx->size);
481 newctx->s = GNUNET_strdup ("");
482 ctx->children[idx] = newctx;
483 }
484 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " no match\n");
485 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " new state %s\n", regex);
486 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " under %s\n", ctx->s);
487 newctx = new_regex_ctx(ctx->size);
488 newctx->s = GNUNET_strdup (regex);
489 ctx->children[idx] = newctx;
490}
491
492
493/**
494 * Free all resources used by the context node and all its children.
495 *
496 * @param ctx Context to free.
497 */
498static void
499regex_ctx_destroy (struct RegexCombineCtx *ctx)
500{
501 unsigned int i;
502
503 if (NULL == ctx)
504 return;
505
506 for (i = 0; i < ctx->size; i++)
507 {
508 regex_ctx_destroy (ctx->children[i]);
509 }
510 GNUNET_free_non_null (ctx->s); /* 's' on root node is null */
511 GNUNET_free (ctx->children);
512 GNUNET_free (ctx);
513}
514
515
516/**
517 * Combine an array of regexes into a single prefix-shared regex.
518 * Returns a prefix-combine regex that matches the same strings as
519 * any of the original regexes.
520 *
521 * WARNING: only useful for reading specific regexes for specific applications,
522 * namely the gnunet-regex-profiler / gnunet-regex-daemon.
523 * This function DOES NOT support arbitrary regex combining.
524 *
525 * @param regexes A NULL-terminated array of regexes.
526 * @param alphabet_size Size of the alphabet the regex uses.
527 *
528 * @return A string with a single regex that matches any of the original regexes
529 */
530char *
531REGEX_TEST_combine (char * const regexes[], unsigned int alphabet_size)
532{
533 unsigned int i;
534 char *combined;
535 const char *current;
536 struct RegexCombineCtx *ctx;
537
538 ctx = new_regex_ctx (alphabet_size);
539 for (i = 0; regexes[i]; i++)
540 {
541 current = regexes[i];
542 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Regex %u: %s\n", i, current);
543 regex_add (ctx, current);
544 debugctx (ctx, 0);
545 }
546 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "\nCombining...\n");
547 debugctx (ctx, 0);
548
549 combined = regex_combine (ctx);
550
551 regex_ctx_destroy (ctx);
552
553 return combined;
554}
555
556
557/**
558 * Read a set of regexes from a file, one per line and return them in an array
559 * suitable for REGEX_TEST_combine.
560 * The array must be free'd using REGEX_TEST_free_from_file.
561 *
562 * @param filename Name of the file containing the regexes.
563 *
564 * @return A newly allocated, NULL terminated array of regexes.
565 */
566char **
567REGEX_TEST_read_from_file (const char *filename)
568{
569 struct GNUNET_DISK_FileHandle *f;
570 unsigned int nr;
571 unsigned int offset;
572 off_t size;
573 size_t len;
574 char *buffer;
575 char *regex;
576 char **regexes;
577
578 f = GNUNET_DISK_file_open (filename,
579 GNUNET_DISK_OPEN_READ,
580 GNUNET_DISK_PERM_NONE);
581 if (NULL == f)
582 {
583 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
584 "Can't open file %s for reading\n", filename);
585 return NULL;
586 }
587 if (GNUNET_OK != GNUNET_DISK_file_handle_size (f, &size))
588 {
589 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
590 "Can't get size of file %s\n", filename);
591 GNUNET_DISK_file_close (f);
592 return NULL;
593 }
594 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
595 "using file %s, size %llu\n",
596 filename, (unsigned long long) size);
597
598 buffer = GNUNET_malloc (size + 1);
599 GNUNET_DISK_file_read (f, buffer, size);
600 GNUNET_DISK_file_close (f);
601 regexes = GNUNET_malloc (sizeof (char *));
602 nr = 1;
603 offset = 0;
604 regex = NULL;
605 do
606 {
607 if (NULL == regex)
608 regex = GNUNET_malloc (size + 1);
609 len = (size_t) sscanf (&buffer[offset], "%s", regex);
610 if (0 == len)
611 break;
612 len = strlen (regex);
613 offset += len + 1;
614 if (len < 1)
615 continue;
616 regex[len] = '\0';
617 regex = GNUNET_realloc (regex, len + 1);
618 GNUNET_array_grow (regexes, nr, nr + 1);
619 GNUNET_assert (NULL == regexes[nr - 2]);
620 regexes[nr - 2] = regex;
621 regexes[nr - 1] = NULL;
622 regex = NULL;
623 } while (offset < size);
624 GNUNET_free_non_null (regex);
625 GNUNET_free (buffer);
626
627 return regexes;
628}
629
630
631/**
632 * Free all memory reserved for a set of regexes created by read_from_file.
633 *
634 * @param regexes NULL-terminated array of regexes.
635 */
636void
637REGEX_TEST_free_from_file (char **regexes)
638{
639 unsigned int i;
640
641 for (i = 0; regexes[i]; i++)
642 GNUNET_free (regexes[i]);
643 GNUNET_free (regexes);
644}
645
646/* end of regex_test_lib.c */