aboutsummaryrefslogtreecommitdiff
path: root/src/regex
diff options
context:
space:
mode:
authorBart Polot <bart.polot+voyager@gmail.com>2017-05-10 20:58:28 +0200
committerBart Polot <bart.polot+voyager@gmail.com>2017-05-10 20:58:28 +0200
commit5031ce9079f9e5292468374fa8d4a95462e7168a (patch)
tree70812f66790a1775384ccd6006ac1a88fdd73a62 /src/regex
parent1c2ab4aa3b9b563ad2098984b5751e67d3267778 (diff)
downloadgnunet-5031ce9079f9e5292468374fa8d4a95462e7168a.tar.gz
gnunet-5031ce9079f9e5292468374fa8d4a95462e7168a.zip
Change regex combination, allow hex
Diffstat (limited to 'src/regex')
-rw-r--r--src/regex/gnunet-daemon-regexprofiler.c2
-rw-r--r--src/regex/gnunet-regex-profiler.c134
-rw-r--r--src/regex/perf-regex.c15
-rw-r--r--src/regex/regex_test_lib.c336
-rw-r--r--src/regex/regex_test_lib.h10
5 files changed, 400 insertions, 97 deletions
diff --git a/src/regex/gnunet-daemon-regexprofiler.c b/src/regex/gnunet-daemon-regexprofiler.c
index 42fc8ace0..24919aebe 100644
--- a/src/regex/gnunet-daemon-regexprofiler.c
+++ b/src/regex/gnunet-daemon-regexprofiler.c
@@ -340,7 +340,7 @@ run (void *cls, char *const *args GNUNET_UNUSED,
340 return; 340 return;
341 } 341 }
342 GNUNET_free (policy_dir); 342 GNUNET_free (policy_dir);
343 regex = REGEX_TEST_combine (components); 343 regex = REGEX_TEST_combine (components, 16);
344 REGEX_TEST_free_from_file (components); 344 REGEX_TEST_free_from_file (components);
345 345
346 /* Announcing regexes from policy_filename */ 346 /* Announcing regexes from policy_filename */
diff --git a/src/regex/gnunet-regex-profiler.c b/src/regex/gnunet-regex-profiler.c
index 7d907fec6..73ca7c809 100644
--- a/src/regex/gnunet-regex-profiler.c
+++ b/src/regex/gnunet-regex-profiler.c
@@ -1,6 +1,6 @@
1/* 1/*
2 This file is part of GNUnet. 2 This file is part of GNUnet.
3 Copyright (C) 2011 - 2013 GNUnet e.V. 3 Copyright (C) 2011 - 2017 GNUnet e.V.
4 4
5 GNUnet is free software; you can redistribute it and/or modify 5 GNUnet is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published 6 it under the terms of the GNU General Public License as published
@@ -1198,6 +1198,94 @@ master_controller_cb (void *cls,
1198/*************************** TESTBED PEER SETUP *****************************/ 1198/*************************** TESTBED PEER SETUP *****************************/
1199/******************************************************************************/ 1199/******************************************************************************/
1200 1200
1201/**
1202 * Process the text buffer counting the non-empty lines and separating them
1203 * with NULL characters, for later ease of copy using (as)printf.
1204 *
1205 * @param data Memory buffer with strings.
1206 * @param data_size Size of the @a data buffer in bytes.
1207 * @param str_max Maximum number of strings to return.
1208 * @return Positive number of lines found in the buffer,
1209 * #GNUNET_SYSERR otherwise.
1210 */
1211static int
1212count_and_separate_strings (char *data,
1213 uint64_t data_size,
1214 unsigned int str_max)
1215{
1216 char *buf; // Keep track of last string to skip blank lines
1217 unsigned int offset;
1218 unsigned int str_cnt;
1219
1220 buf = data;
1221 offset = 0;
1222 str_cnt = 0;
1223 while ( (offset < (data_size - 1)) && (str_cnt < str_max) )
1224 {
1225 offset++;
1226 if ( ((data[offset] == '\n')) &&
1227 (buf != &data[offset]) )
1228 {
1229 data[offset] = '\0';
1230 str_cnt++;
1231 buf = &data[offset + 1];
1232 }
1233 else if ( (data[offset] == '\n') ||
1234 (data[offset] == '\0') )
1235 buf = &data[offset + 1];
1236 }
1237 return str_cnt;
1238}
1239
1240
1241/**
1242 * Allocate a string array and fill it with the prefixed strings
1243 * from a pre-processed, NULL-separated memory region.
1244 *
1245 * @param data Preprocessed memory with strings
1246 * @param data_size Size of the @a data buffer in bytes.
1247 * @param strings Address of the string array to be created.
1248 * Must be freed by caller if function end in success.
1249 * @param str_cnt String count. The @a data buffer should contain
1250 * at least this many NULL-separated strings.
1251 * @return #GNUNET_OK in ase of success, #GNUNET_SYSERR otherwise.
1252 * In case of error @a strings must not be freed.
1253 */
1254static int
1255create_string_array (char *data, uint64_t data_size,
1256 char ***strings, unsigned int str_cnt)
1257{
1258 uint64_t offset;
1259 uint64_t len;
1260 unsigned int i;
1261
1262 *strings = GNUNET_malloc (sizeof (char *) * str_cnt);
1263 offset = 0;
1264 for (i = 0; i < str_cnt; i++)
1265 {
1266 len = strlen (&data[offset]);
1267 if (offset + len >= data_size)
1268 {
1269 GNUNET_free (*strings);
1270 *strings = NULL;
1271 return GNUNET_SYSERR;
1272 }
1273 if (0 == len) // empty line
1274 {
1275 offset++;
1276 i--;
1277 continue;
1278 }
1279
1280 GNUNET_asprintf (&(*strings)[i],
1281 "%s%s",
1282 regex_prefix,
1283 &data[offset]);
1284 offset += len + 1;
1285 }
1286 return GNUNET_OK;
1287}
1288
1201 1289
1202/** 1290/**
1203 * Load search strings from given filename. One search string per line. 1291 * Load search strings from given filename. One search string per line.
@@ -1214,17 +1302,14 @@ load_search_strings (const char *filename,
1214 unsigned int limit) 1302 unsigned int limit)
1215{ 1303{
1216 char *data; 1304 char *data;
1217 char *buf;
1218 uint64_t filesize; 1305 uint64_t filesize;
1219 unsigned int offset;
1220 int str_cnt; 1306 int str_cnt;
1221 unsigned int i;
1222 1307
1308 /* Sanity checks */
1223 if (NULL == filename) 1309 if (NULL == filename)
1224 { 1310 {
1225 return GNUNET_SYSERR; 1311 return GNUNET_SYSERR;
1226 } 1312 }
1227
1228 if (GNUNET_YES != GNUNET_DISK_file_test (filename)) 1313 if (GNUNET_YES != GNUNET_DISK_file_test (filename))
1229 { 1314 {
1230 GNUNET_log (GNUNET_ERROR_TYPE_WARNING, 1315 GNUNET_log (GNUNET_ERROR_TYPE_WARNING,
@@ -1236,7 +1321,12 @@ load_search_strings (const char *filename,
1236 &filesize, 1321 &filesize,
1237 GNUNET_YES, 1322 GNUNET_YES,
1238 GNUNET_YES)) 1323 GNUNET_YES))
1239 filesize = 0; 1324 {
1325 GNUNET_log (GNUNET_ERROR_TYPE_WARNING,
1326 "Search strings file %s cannot be read.\n",
1327 filename);
1328 return GNUNET_SYSERR;
1329 }
1240 if (0 == filesize) 1330 if (0 == filesize)
1241 { 1331 {
1242 GNUNET_log (GNUNET_ERROR_TYPE_WARNING, 1332 GNUNET_log (GNUNET_ERROR_TYPE_WARNING,
@@ -1244,6 +1334,8 @@ load_search_strings (const char *filename,
1244 filename); 1334 filename);
1245 return GNUNET_SYSERR; 1335 return GNUNET_SYSERR;
1246 } 1336 }
1337
1338 /* Read data into memory */
1247 data = GNUNET_malloc (filesize + 1); 1339 data = GNUNET_malloc (filesize + 1);
1248 if (filesize != GNUNET_DISK_fn_read (filename, 1340 if (filesize != GNUNET_DISK_fn_read (filename,
1249 data, 1341 data,
@@ -1255,32 +1347,12 @@ load_search_strings (const char *filename,
1255 filename); 1347 filename);
1256 return GNUNET_SYSERR; 1348 return GNUNET_SYSERR;
1257 } 1349 }
1258 buf = data; 1350
1259 offset = 0; 1351 /* Process buffer and build array */
1260 str_cnt = 0; 1352 str_cnt = count_and_separate_strings (data, filesize, limit);
1261 while ( (offset < (filesize - 1)) && (str_cnt < limit) ) 1353 if (GNUNET_OK != create_string_array (data, filesize, strings, str_cnt))
1262 {
1263 offset++;
1264 if ( ((data[offset] == '\n')) &&
1265 (buf != &data[offset]) )
1266 {
1267 data[offset] = '\0';
1268 str_cnt++;
1269 buf = &data[offset + 1];
1270 }
1271 else if ( (data[offset] == '\n') ||
1272 (data[offset] == '\0') )
1273 buf = &data[offset + 1];
1274 }
1275 *strings = GNUNET_malloc (sizeof (char *) * str_cnt);
1276 offset = 0;
1277 for (i = 0; i < str_cnt; i++)
1278 { 1354 {
1279 GNUNET_asprintf (&(*strings)[i], 1355 str_cnt = GNUNET_SYSERR;
1280 "%s%s",
1281 regex_prefix,
1282 &data[offset]);
1283 offset += strlen (&data[offset]) + 1;
1284 } 1356 }
1285 GNUNET_free (data); 1357 GNUNET_free (data);
1286 return str_cnt; 1358 return str_cnt;
diff --git a/src/regex/perf-regex.c b/src/regex/perf-regex.c
index c7a5e6c5f..ec42d2625 100644
--- a/src/regex/perf-regex.c
+++ b/src/regex/perf-regex.c
@@ -80,13 +80,14 @@ main (int argc, char *const *argv)
80 char *buffer; 80 char *buffer;
81 char *regex; 81 char *regex;
82 int compression; 82 int compression;
83 unsigned int alphabet_size;
83 long size; 84 long size;
84 85
85 GNUNET_log_setup ("perf-regex", "DEBUG", NULL); 86 GNUNET_log_setup ("perf-regex", "DEBUG", NULL);
86 if (3 != argc) 87 if (4 != argc)
87 { 88 {
88 fprintf (stderr, 89 fprintf (stderr,
89 "Usage: %s REGEX_FILE COMPRESSION\n", 90 "Usage: %s REGEX_FILE ALPHABET_SIZE COMPRESSION\n",
90 argv[0]); 91 argv[0]);
91 return 1; 92 return 1;
92 } 93 }
@@ -98,9 +99,13 @@ main (int argc, char *const *argv)
98 argv[1]); 99 argv[1]);
99 return 2; 100 return 2;
100 } 101 }
101 compression = atoi (argv[2]); 102 alphabet_size = atoi (argv[2]);
102 103 compression = atoi (argv[3]);
103 buffer = REGEX_TEST_combine (regexes); 104 printf ("********* PERF-REGEX *********'\n");
105 printf ("Using:\n file '%s'\n Alphabet size %u\n compression %d\n",
106 argv[1], alphabet_size, compression);
107 fflush(stdout);
108 buffer = REGEX_TEST_combine (regexes, alphabet_size);
104 GNUNET_asprintf (&regex, "GNUNET_REGEX_PROFILER_(%s)(0|1)*", buffer); 109 GNUNET_asprintf (&regex, "GNUNET_REGEX_PROFILER_(%s)(0|1)*", buffer);
105 size = strlen (regex); 110 size = strlen (regex);
106 111
diff --git a/src/regex/regex_test_lib.c b/src/regex/regex_test_lib.c
index c42dbcef6..f4025f652 100644
--- a/src/regex/regex_test_lib.c
+++ b/src/regex/regex_test_lib.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * This file is part of GNUnet 2 * This file is part of GNUnet
3 * Copyright (C) 2012 GNUnet e.V. 3 * Copyright (C) 2012-2017 GNUnet e.V.
4 * 4 *
5 * GNUnet is free software; you can redistribute it and/or modify 5 * GNUnet is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published 6 * it under the terms of the GNU General Public License as published
@@ -32,27 +32,17 @@
32/** 32/**
33 * Struct to hold the tree formed by prefix-combining the regexes. 33 * Struct to hold the tree formed by prefix-combining the regexes.
34 */ 34 */
35struct RegexCombineCtx { 35struct RegexCombineCtx
36 36{
37 /**
38 * Next node with same prefix but different token.
39 */
40 struct RegexCombineCtx *next;
41
42 /**
43 * Prev node with same prefix but different token.
44 */
45 struct RegexCombineCtx *prev;
46
47 /** 37 /**
48 * First child node with same prefix and token. 38 * Child nodes with same prefix and token.
49 */ 39 */
50 struct RegexCombineCtx *head; 40 struct RegexCombineCtx **children;
51 41
52 /** 42 /**
53 * Last child node. 43 * Alphabet size (how many @a children there are)
54 */ 44 */
55 struct RegexCombineCtx *tail; 45 unsigned int size;
56 46
57 /** 47 /**
58 * Token. 48 * Token.
@@ -60,30 +50,133 @@ struct RegexCombineCtx {
60 char *s; 50 char *s;
61}; 51};
62 52
63/* 53
54/**
55 * Char 2 int
56 *
57 * Convert a character into its int value depending on the base used
58 *
59 * @param c Char
60 * @param size base (2, 8 or 16(hex))
61 *
62 * @return Int in range [0, (base-1)]
63 */
64static int
65c2i (char c, int size)
66{
67 switch (size)
68 {
69 case 2:
70 case 8:
71 return c - '0';
72 break;
73 case 16:
74 if (c >= '0' && c <= '9')
75 return c - '0';
76 else if (c >= 'A' && c <= 'F')
77 return c - 'A' + 10;
78 else if (c >= 'a' && c <= 'f')
79 return c - 'a' + 10;
80 else
81 {
82 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
83 "Cannot convert char %c in base %u\n",
84 c, size);
85 GNUNET_assert (0);
86 }
87 break;
88 default:
89 GNUNET_assert (0);
90 }
91}
92
93
94/**
95 * Printf spaces to indent the regex tree
96 *
97 * @param n Indentation level
98 */
64static void 99static void
65space (int n) 100space (int n)
66{ 101{
67 int i; 102 int i;
68 for (i = 0; i < n; i++) 103 for (i = 0; i < n; i++)
69 printf (" "); 104 fprintf (stderr, "| ");
70} 105}
71 106
107
108/**
109 * Printf the combined regex ctx.
110 *
111 * @param ctx The ctx to printf
112 * @param level Indentation level to start with
113 */
72static void 114static void
73debugctx (struct RegexCombineCtx *ctx, int level) 115debugctx (struct RegexCombineCtx *ctx, int level)
74{ 116{
75 struct RegexCombineCtx *p; 117 return;
76 space (level); 118 unsigned int i;
77 if (NULL != ctx->s) 119 if (NULL != ctx->s)
78 printf ("'%s'\n", ctx->s); 120 {
121 space (level - 1);
122 fprintf (stderr, "%u:'%s'\n", c2i(ctx->s[0], ctx->size), ctx->s);
123 }
79 else 124 else
80 printf ("NULL\n"); 125 fprintf (stderr, "ROOT (base %u)\n", ctx->size);
81 for (p = ctx->head; NULL != p; p = p->next) 126 for (i = 0; i < ctx->size; i++)
127 {
128 if (NULL != ctx->children[i])
129 {
130 space (level);
131 debugctx (ctx->children[i], level + 1);
132 }
133 }
134 fflush(stderr);
135}
136
137
138/**
139 * Add a single regex to a context, combining with exisiting regex by-prefix.
140 *
141 * @param ctx Context with 0 or more regexes.
142 * @param regex Regex to add.
143 */
144static void
145regex_add (struct RegexCombineCtx *ctx, const char *regex);
146
147
148/**
149 * Create and initialize a new RegexCombineCtx.
150 *
151 * @param alphabet_size Size of the alphabet (and the Trie array)
152 */
153static struct RegexCombineCtx *
154new_regex_ctx (unsigned int alphabet_size)
155{
156 struct RegexCombineCtx *ctx;
157 size_t array_size;
158
159 array_size = sizeof(struct RegexCombineCtx *) * alphabet_size;
160 ctx = GNUNET_new (struct RegexCombineCtx);
161 ctx->children = GNUNET_malloc (array_size);
162 ctx->size = alphabet_size;
163
164 return ctx;
165}
166
167static void
168move_children (struct RegexCombineCtx *dst, const struct RegexCombineCtx *src)
169{
170 size_t array_size;
171
172 array_size = sizeof(struct RegexCombineCtx *) * src->size;
173 memcpy (dst->children, src->children, array_size);
174 for (int i = 0; i < src->size; i++)
82 { 175 {
83 debugctx (p, level + 1); 176 src->children[i] = NULL;
84 } 177 }
85} 178}
86*/ 179
87 180
88/** 181/**
89 * Extract a string from all prefix-combined regexes. 182 * Extract a string from all prefix-combined regexes.
@@ -96,6 +189,7 @@ static char *
96regex_combine (struct RegexCombineCtx *ctx) 189regex_combine (struct RegexCombineCtx *ctx)
97{ 190{
98 struct RegexCombineCtx *p; 191 struct RegexCombineCtx *p;
192 unsigned int i;
99 size_t len; 193 size_t len;
100 char *regex; 194 char *regex;
101 char *tmp; 195 char *tmp;
@@ -105,9 +199,14 @@ regex_combine (struct RegexCombineCtx *ctx)
105 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "new combine %s\n", ctx->s); 199 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "new combine %s\n", ctx->s);
106 regex = GNUNET_strdup (""); 200 regex = GNUNET_strdup ("");
107 opt = GNUNET_NO; 201 opt = GNUNET_NO;
108 for (p = ctx->head; NULL != p; p = p->next) 202 for (i = 0; i < ctx->size; i++)
109 { 203 {
110 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "adding '%s' to innner %s\n", p->s, ctx->s); 204 p = ctx->children[i];
205 if (NULL == p)
206 continue;
207 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
208 "adding '%s' to innner %s\n",
209 p->s, ctx->s);
111 s = regex_combine (p); 210 s = regex_combine (p);
112 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " total '%s'\n", s); 211 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " total '%s'\n", s);
113 if (strlen(s) == 0) 212 if (strlen(s) == 0)
@@ -171,10 +270,10 @@ get_prefix_length (const char *s1, const char *s2)
171 l2 = strlen (s2); 270 l2 = strlen (s2);
172 limit = l1 > l2 ? l2 : l1; 271 limit = l1 > l2 ? l2 : l1;
173 272
174 for (i = 1; i <= limit; i++) 273 for (i = 0; i < limit; i++)
175 { 274 {
176 if (0 != strncmp (s1, s2, i)) 275 if (s1[i] != s2[i])
177 return i - 1; 276 return i;
178 } 277 }
179 return limit; 278 return limit;
180} 279}
@@ -194,13 +293,19 @@ get_longest_prefix (struct RegexCombineCtx *ctx, const char *regex)
194{ 293{
195 struct RegexCombineCtx *p; 294 struct RegexCombineCtx *p;
196 struct RegexCombineCtx *best; 295 struct RegexCombineCtx *best;
296 unsigned int i;
197 unsigned int l; 297 unsigned int l;
198 unsigned int best_l; 298 unsigned int best_l;
199 299
200 best_l = 0; 300 best_l = 0;
201 best = NULL; 301 best = NULL;
202 for (p = ctx->head; NULL != p; p = p->next) 302
303 for (i = 0; i < ctx->size; i++)
203 { 304 {
305 p = ctx->children[i];
306 if (NULL == p)
307 continue;
308
204 l = get_prefix_length (p->s, regex); 309 l = get_prefix_length (p->s, regex);
205 if (l > best_l) 310 if (l > best_l)
206 { 311 {
@@ -212,6 +317,103 @@ get_longest_prefix (struct RegexCombineCtx *ctx, const char *regex)
212 return best; 317 return best;
213} 318}
214 319
320static void
321regex_add_multiple (struct RegexCombineCtx *ctx,
322 const char *regex,
323 struct RegexCombineCtx **children)
324{
325 char tmp[2];
326 long unsigned int i;
327 size_t l;
328 struct RegexCombineCtx *newctx;
329 unsigned int count;
330
331 if ('(' != regex[0])
332 {
333 GNUNET_assert (0);
334 }
335
336 /* Does the regex cover *all* possible children? Then don't add any,
337 * as it will be covered by the post-regex "(a-z)*"
338 */
339 l = strlen (regex);
340 count = 0;
341 for (i = 1UL; i < l; i++)
342 {
343 if (regex[i] != '|' && regex[i] != ')')
344 {
345 count++;
346 }
347 }
348 if (count == ctx->size)
349 {
350 return;
351 }
352
353 /* Add every component as a child node */
354 tmp[1] = '\0';
355 for (i = 1UL; i < l; i++)
356 {
357 if (regex[i] != '|' && regex[i] != ')')
358 {
359 tmp[0] = regex[i];
360 newctx = new_regex_ctx(ctx->size);
361 newctx->s = GNUNET_strdup (tmp);
362 if (children != NULL)
363 memcpy (newctx->children, children, sizeof (*children) * ctx->size);
364 ctx->children[c2i(tmp[0], ctx->size)] = newctx;
365 }
366 }
367}
368
369/**
370 * Add a single regex to a context, splitting the exisiting state.
371 *
372 * We only had a partial match, split existing state, truncate the current node
373 * so it only contains the prefix, add suffix(es) as children.
374 *
375 * @param ctx Context to split.
376 * @param len Lenght of ctx->s
377 * @param prefix_l Lenght of common prefix of the new regex and @a ctx->s
378 */
379static void
380regex_split (struct RegexCombineCtx *ctx,
381 unsigned int len,
382 unsigned int prefix_l)
383{
384 struct RegexCombineCtx *newctx;
385 unsigned int idx;
386 char *suffix;
387
388 suffix = GNUNET_malloc (len - prefix_l + 1);
389 strncpy (suffix, &ctx->s[prefix_l], len - prefix_l + 1);
390
391 /* Suffix saved, truncate current node so it only contains the prefix,
392 * copy any children nodes to put as grandchildren and initialize new empty
393 * children array.
394 */
395 ctx->s[prefix_l] = '\0';
396
397 /* If the suffix is an OR expression, add multiple children */
398 if ('(' == suffix[0])
399 {
400 struct RegexCombineCtx **tmp;
401
402 tmp = ctx->children;
403 ctx->children = GNUNET_malloc (sizeof(*tmp) * ctx->size);
404 regex_add_multiple (ctx, suffix, tmp);
405 GNUNET_free (tmp);
406 return;
407 }
408
409 /* The suffix is a normal string, add as one node */
410 newctx = new_regex_ctx (ctx->size);
411 newctx->s = suffix;
412 move_children (newctx, ctx);
413 idx = c2i(suffix[0], ctx->size);
414 ctx->children[idx] = newctx;
415}
416
215 417
216/** 418/**
217 * Add a single regex to a context, combining with exisiting regex by-prefix. 419 * Add a single regex to a context, combining with exisiting regex by-prefix.
@@ -224,17 +426,31 @@ regex_add (struct RegexCombineCtx *ctx, const char *regex)
224{ 426{
225 struct RegexCombineCtx *p; 427 struct RegexCombineCtx *p;
226 struct RegexCombineCtx *newctx; 428 struct RegexCombineCtx *newctx;
429 long unsigned int l;
227 unsigned int prefix_l; 430 unsigned int prefix_l;
228 const char *rest_r; 431 const char *rest_r;
229 const char *rest_s; 432 const char *rest_s;
230 size_t len; 433 size_t len;
434 int idx;
435
436 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
437 "regex_add '%s' into '%s'\n",
438 regex, ctx->s);
439 l = strlen (regex);
440 if (0UL == l)
441 return;
231 442
232 if (0 == strlen (regex)) 443 /* If the regex is in the form of (a|b|c), add every character separately */
444 if ('(' == regex[0])
445 {
446 regex_add_multiple (ctx, regex, NULL);
233 return; 447 return;
448 }
234 449
235 p = get_longest_prefix (ctx, regex); 450 p = get_longest_prefix (ctx, regex);
236 if (NULL != p) /* There is some prefix match, reduce regex and try again */ 451 if (NULL != p)
237 { 452 {
453 /* There is some prefix match, reduce regex and try again */
238 prefix_l = get_prefix_length (p->s, regex); 454 prefix_l = get_prefix_length (p->s, regex);
239 rest_s = &p->s[prefix_l]; 455 rest_s = &p->s[prefix_l];
240 rest_r = &regex[prefix_l]; 456 rest_r = &regex[prefix_l];
@@ -243,35 +459,29 @@ regex_add (struct RegexCombineCtx *ctx, const char *regex)
243 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "rest r '%s'\n", rest_r); 459 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "rest r '%s'\n", rest_r);
244 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "rest s '%s'\n", rest_s); 460 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "rest s '%s'\n", rest_s);
245 len = strlen (p->s); 461 len = strlen (p->s);
246 if (prefix_l < len) /* only partial match, split existing state */ 462 if (prefix_l < len)
247 { 463 {
248 newctx = GNUNET_new (struct RegexCombineCtx); 464 regex_split (p, len, prefix_l);
249 newctx->head = p->head;
250 newctx->tail = p->tail;
251 newctx->s = GNUNET_malloc(len - prefix_l + 1);
252 strncpy (newctx->s, rest_s, len - prefix_l + 1);
253
254 p->head = newctx;
255 p->tail = newctx;
256 p->s[prefix_l] = '\0';
257 } 465 }
258 regex_add (p, rest_r); 466 regex_add (p, rest_r);
259 return; 467 return;
260 } 468 }
469
261 /* There is no prefix match, add new */ 470 /* There is no prefix match, add new */
262 if (NULL == ctx->head && NULL != ctx->s) 471 idx = c2i(regex[0], ctx->size);
472 if (NULL == ctx->children[idx] && NULL != ctx->s)
263 { 473 {
264 /* this was the end before, add empty string */ 474 /* this was the end before, add empty string */
265 newctx = GNUNET_new (struct RegexCombineCtx); 475 newctx = new_regex_ctx (ctx->size);
266 newctx->s = GNUNET_strdup (""); 476 newctx->s = GNUNET_strdup ("");
267 GNUNET_CONTAINER_DLL_insert (ctx->head, ctx->tail, newctx); 477 ctx->children[idx] = newctx;
268 } 478 }
269 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " no match\n"); 479 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " no match\n");
270 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " new state %s\n", regex); 480 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " new state %s\n", regex);
271 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " under %s\n", ctx->s); 481 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " under %s\n", ctx->s);
272 newctx = GNUNET_new (struct RegexCombineCtx); 482 newctx = new_regex_ctx(ctx->size);
273 newctx->s = GNUNET_strdup (regex); 483 newctx->s = GNUNET_strdup (regex);
274 GNUNET_CONTAINER_DLL_insert (ctx->head, ctx->tail, newctx); 484 ctx->children[idx] = newctx;
275} 485}
276 486
277 487
@@ -283,45 +493,53 @@ regex_add (struct RegexCombineCtx *ctx, const char *regex)
283static void 493static void
284regex_ctx_destroy (struct RegexCombineCtx *ctx) 494regex_ctx_destroy (struct RegexCombineCtx *ctx)
285{ 495{
286 struct RegexCombineCtx *p; 496 unsigned int i;
287 struct RegexCombineCtx *next; 497
498 if (NULL == ctx)
499 return;
288 500
289 for (p = ctx->head; NULL != p; p = next) 501 for (i = 0; i < ctx->size; i++)
290 { 502 {
291 next = p->next; 503 regex_ctx_destroy (ctx->children[i]);
292 regex_ctx_destroy (p);
293 } 504 }
294 GNUNET_free_non_null (ctx->s); /* 's' on root node is null */ 505 GNUNET_free_non_null (ctx->s); /* 's' on root node is null */
506 GNUNET_free (ctx->children);
295 GNUNET_free (ctx); 507 GNUNET_free (ctx);
296} 508}
297 509
298 510
299/** 511/**
300 * Return a prefix-combine regex that matches the same strings as 512 * Combine an array of regexes into a single prefix-shared regex.
513 * Returns a prefix-combine regex that matches the same strings as
301 * any of the original regexes. 514 * any of the original regexes.
302 * 515 *
303 * WARNING: only useful for reading specific regexes for specific applications, 516 * WARNING: only useful for reading specific regexes for specific applications,
304 * namely the gnunet-regex-profiler / gnunet-regex-daemon. 517 * namely the gnunet-regex-profiler / gnunet-regex-daemon.
305 * This function DOES NOT support arbitrary regex combining. 518 * This function DOES NOT support arbitrary regex combining.
519 *
520 * @param regexes A NULL-terminated array of regexes.
521 * @param alphabet_size Size of the alphabet the regex uses.
522 *
523 * @return A string with a single regex that matches any of the original regexes
306 */ 524 */
307char * 525char *
308REGEX_TEST_combine (char * const regexes[]) 526REGEX_TEST_combine (char * const regexes[], unsigned int alphabet_size)
309{ 527{
310 unsigned int i; 528 unsigned int i;
311 char *combined; 529 char *combined;
312 const char *current; 530 const char *current;
313 struct RegexCombineCtx *ctx; 531 struct RegexCombineCtx *ctx;
314 532
315 ctx = GNUNET_new (struct RegexCombineCtx); 533 ctx = new_regex_ctx (alphabet_size);
316 for (i = 0; regexes[i]; i++) 534 for (i = 0; regexes[i]; i++)
317 { 535 {
318 current = regexes[i]; 536 current = regexes[i];
319 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Regex %u: %s\n", i, current); 537 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Regex %u: %s\n", i, current);
320 regex_add (ctx, current); 538 regex_add (ctx, current);
321 /* debugctx (ctx, 0); */ 539 debugctx (ctx, 0);
322 } 540 }
323 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "\nCombining...\n"); 541 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "\nCombining...\n");
324 /* debugctx (ctx, 0); */ 542 debugctx (ctx, 0);
325 543
326 combined = regex_combine (ctx); 544 combined = regex_combine (ctx);
327 545
diff --git a/src/regex/regex_test_lib.h b/src/regex/regex_test_lib.h
index 83414e5dc..c4f7e7539 100644
--- a/src/regex/regex_test_lib.h
+++ b/src/regex/regex_test_lib.h
@@ -38,15 +38,23 @@ extern "C"
38#endif 38#endif
39#endif 39#endif
40 40
41
41/** 42/**
42 * Combine an array of regexes into a single prefix-shared regex. 43 * Combine an array of regexes into a single prefix-shared regex.
44 * Returns a prefix-combine regex that matches the same strings as
45 * any of the original regexes.
46 *
47 * WARNING: only useful for reading specific regexes for specific applications,
48 * namely the gnunet-regex-profiler / gnunet-regex-daemon.
49 * This function DOES NOT support arbitrary regex combining.
43 * 50 *
44 * @param regexes A NULL-terminated array of regexes. 51 * @param regexes A NULL-terminated array of regexes.
52 * @param alphabet_size Size of the alphabet the regex uses.
45 * 53 *
46 * @return A string with a single regex that matches any of the original regexes 54 * @return A string with a single regex that matches any of the original regexes
47 */ 55 */
48char * 56char *
49REGEX_TEST_combine(char * const regexes[]); 57REGEX_TEST_combine (char * const regexes[], unsigned int alphabet_size);
50 58
51 59
52/** 60/**