diff options
author | Bart Polot <bart.polot+voyager@gmail.com> | 2017-05-10 20:58:28 +0200 |
---|---|---|
committer | Bart Polot <bart.polot+voyager@gmail.com> | 2017-05-10 20:58:28 +0200 |
commit | 5031ce9079f9e5292468374fa8d4a95462e7168a (patch) | |
tree | 70812f66790a1775384ccd6006ac1a88fdd73a62 /src | |
parent | 1c2ab4aa3b9b563ad2098984b5751e67d3267778 (diff) | |
download | gnunet-5031ce9079f9e5292468374fa8d4a95462e7168a.tar.gz gnunet-5031ce9079f9e5292468374fa8d4a95462e7168a.zip |
Change regex combination, allow hex
Diffstat (limited to 'src')
-rw-r--r-- | src/regex/gnunet-daemon-regexprofiler.c | 2 | ||||
-rw-r--r-- | src/regex/gnunet-regex-profiler.c | 134 | ||||
-rw-r--r-- | src/regex/perf-regex.c | 15 | ||||
-rw-r--r-- | src/regex/regex_test_lib.c | 336 | ||||
-rw-r--r-- | src/regex/regex_test_lib.h | 10 |
5 files changed, 400 insertions, 97 deletions
diff --git a/src/regex/gnunet-daemon-regexprofiler.c b/src/regex/gnunet-daemon-regexprofiler.c index 42fc8ace0..24919aebe 100644 --- a/src/regex/gnunet-daemon-regexprofiler.c +++ b/src/regex/gnunet-daemon-regexprofiler.c | |||
@@ -340,7 +340,7 @@ run (void *cls, char *const *args GNUNET_UNUSED, | |||
340 | return; | 340 | return; |
341 | } | 341 | } |
342 | GNUNET_free (policy_dir); | 342 | GNUNET_free (policy_dir); |
343 | regex = REGEX_TEST_combine (components); | 343 | regex = REGEX_TEST_combine (components, 16); |
344 | REGEX_TEST_free_from_file (components); | 344 | REGEX_TEST_free_from_file (components); |
345 | 345 | ||
346 | /* Announcing regexes from policy_filename */ | 346 | /* Announcing regexes from policy_filename */ |
diff --git a/src/regex/gnunet-regex-profiler.c b/src/regex/gnunet-regex-profiler.c index 7d907fec6..73ca7c809 100644 --- a/src/regex/gnunet-regex-profiler.c +++ b/src/regex/gnunet-regex-profiler.c | |||
@@ -1,6 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | This file is part of GNUnet. | 2 | This file is part of GNUnet. |
3 | Copyright (C) 2011 - 2013 GNUnet e.V. | 3 | Copyright (C) 2011 - 2017 GNUnet e.V. |
4 | 4 | ||
5 | GNUnet is free software; you can redistribute it and/or modify | 5 | GNUnet is free software; you can redistribute it and/or modify |
6 | it under the terms of the GNU General Public License as published | 6 | it under the terms of the GNU General Public License as published |
@@ -1198,6 +1198,94 @@ master_controller_cb (void *cls, | |||
1198 | /*************************** TESTBED PEER SETUP *****************************/ | 1198 | /*************************** TESTBED PEER SETUP *****************************/ |
1199 | /******************************************************************************/ | 1199 | /******************************************************************************/ |
1200 | 1200 | ||
1201 | /** | ||
1202 | * Process the text buffer counting the non-empty lines and separating them | ||
1203 | * with NULL characters, for later ease of copy using (as)printf. | ||
1204 | * | ||
1205 | * @param data Memory buffer with strings. | ||
1206 | * @param data_size Size of the @a data buffer in bytes. | ||
1207 | * @param str_max Maximum number of strings to return. | ||
1208 | * @return Positive number of lines found in the buffer, | ||
1209 | * #GNUNET_SYSERR otherwise. | ||
1210 | */ | ||
1211 | static int | ||
1212 | count_and_separate_strings (char *data, | ||
1213 | uint64_t data_size, | ||
1214 | unsigned int str_max) | ||
1215 | { | ||
1216 | char *buf; // Keep track of last string to skip blank lines | ||
1217 | unsigned int offset; | ||
1218 | unsigned int str_cnt; | ||
1219 | |||
1220 | buf = data; | ||
1221 | offset = 0; | ||
1222 | str_cnt = 0; | ||
1223 | while ( (offset < (data_size - 1)) && (str_cnt < str_max) ) | ||
1224 | { | ||
1225 | offset++; | ||
1226 | if ( ((data[offset] == '\n')) && | ||
1227 | (buf != &data[offset]) ) | ||
1228 | { | ||
1229 | data[offset] = '\0'; | ||
1230 | str_cnt++; | ||
1231 | buf = &data[offset + 1]; | ||
1232 | } | ||
1233 | else if ( (data[offset] == '\n') || | ||
1234 | (data[offset] == '\0') ) | ||
1235 | buf = &data[offset + 1]; | ||
1236 | } | ||
1237 | return str_cnt; | ||
1238 | } | ||
1239 | |||
1240 | |||
1241 | /** | ||
1242 | * Allocate a string array and fill it with the prefixed strings | ||
1243 | * from a pre-processed, NULL-separated memory region. | ||
1244 | * | ||
1245 | * @param data Preprocessed memory with strings | ||
1246 | * @param data_size Size of the @a data buffer in bytes. | ||
1247 | * @param strings Address of the string array to be created. | ||
1248 | * Must be freed by caller if function end in success. | ||
1249 | * @param str_cnt String count. The @a data buffer should contain | ||
1250 | * at least this many NULL-separated strings. | ||
1251 | * @return #GNUNET_OK in ase of success, #GNUNET_SYSERR otherwise. | ||
1252 | * In case of error @a strings must not be freed. | ||
1253 | */ | ||
1254 | static int | ||
1255 | create_string_array (char *data, uint64_t data_size, | ||
1256 | char ***strings, unsigned int str_cnt) | ||
1257 | { | ||
1258 | uint64_t offset; | ||
1259 | uint64_t len; | ||
1260 | unsigned int i; | ||
1261 | |||
1262 | *strings = GNUNET_malloc (sizeof (char *) * str_cnt); | ||
1263 | offset = 0; | ||
1264 | for (i = 0; i < str_cnt; i++) | ||
1265 | { | ||
1266 | len = strlen (&data[offset]); | ||
1267 | if (offset + len >= data_size) | ||
1268 | { | ||
1269 | GNUNET_free (*strings); | ||
1270 | *strings = NULL; | ||
1271 | return GNUNET_SYSERR; | ||
1272 | } | ||
1273 | if (0 == len) // empty line | ||
1274 | { | ||
1275 | offset++; | ||
1276 | i--; | ||
1277 | continue; | ||
1278 | } | ||
1279 | |||
1280 | GNUNET_asprintf (&(*strings)[i], | ||
1281 | "%s%s", | ||
1282 | regex_prefix, | ||
1283 | &data[offset]); | ||
1284 | offset += len + 1; | ||
1285 | } | ||
1286 | return GNUNET_OK; | ||
1287 | } | ||
1288 | |||
1201 | 1289 | ||
1202 | /** | 1290 | /** |
1203 | * Load search strings from given filename. One search string per line. | 1291 | * Load search strings from given filename. One search string per line. |
@@ -1214,17 +1302,14 @@ load_search_strings (const char *filename, | |||
1214 | unsigned int limit) | 1302 | unsigned int limit) |
1215 | { | 1303 | { |
1216 | char *data; | 1304 | char *data; |
1217 | char *buf; | ||
1218 | uint64_t filesize; | 1305 | uint64_t filesize; |
1219 | unsigned int offset; | ||
1220 | int str_cnt; | 1306 | int str_cnt; |
1221 | unsigned int i; | ||
1222 | 1307 | ||
1308 | /* Sanity checks */ | ||
1223 | if (NULL == filename) | 1309 | if (NULL == filename) |
1224 | { | 1310 | { |
1225 | return GNUNET_SYSERR; | 1311 | return GNUNET_SYSERR; |
1226 | } | 1312 | } |
1227 | |||
1228 | if (GNUNET_YES != GNUNET_DISK_file_test (filename)) | 1313 | if (GNUNET_YES != GNUNET_DISK_file_test (filename)) |
1229 | { | 1314 | { |
1230 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, | 1315 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, |
@@ -1236,7 +1321,12 @@ load_search_strings (const char *filename, | |||
1236 | &filesize, | 1321 | &filesize, |
1237 | GNUNET_YES, | 1322 | GNUNET_YES, |
1238 | GNUNET_YES)) | 1323 | GNUNET_YES)) |
1239 | filesize = 0; | 1324 | { |
1325 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, | ||
1326 | "Search strings file %s cannot be read.\n", | ||
1327 | filename); | ||
1328 | return GNUNET_SYSERR; | ||
1329 | } | ||
1240 | if (0 == filesize) | 1330 | if (0 == filesize) |
1241 | { | 1331 | { |
1242 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, | 1332 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, |
@@ -1244,6 +1334,8 @@ load_search_strings (const char *filename, | |||
1244 | filename); | 1334 | filename); |
1245 | return GNUNET_SYSERR; | 1335 | return GNUNET_SYSERR; |
1246 | } | 1336 | } |
1337 | |||
1338 | /* Read data into memory */ | ||
1247 | data = GNUNET_malloc (filesize + 1); | 1339 | data = GNUNET_malloc (filesize + 1); |
1248 | if (filesize != GNUNET_DISK_fn_read (filename, | 1340 | if (filesize != GNUNET_DISK_fn_read (filename, |
1249 | data, | 1341 | data, |
@@ -1255,32 +1347,12 @@ load_search_strings (const char *filename, | |||
1255 | filename); | 1347 | filename); |
1256 | return GNUNET_SYSERR; | 1348 | return GNUNET_SYSERR; |
1257 | } | 1349 | } |
1258 | buf = data; | 1350 | |
1259 | offset = 0; | 1351 | /* Process buffer and build array */ |
1260 | str_cnt = 0; | 1352 | str_cnt = count_and_separate_strings (data, filesize, limit); |
1261 | while ( (offset < (filesize - 1)) && (str_cnt < limit) ) | 1353 | if (GNUNET_OK != create_string_array (data, filesize, strings, str_cnt)) |
1262 | { | ||
1263 | offset++; | ||
1264 | if ( ((data[offset] == '\n')) && | ||
1265 | (buf != &data[offset]) ) | ||
1266 | { | ||
1267 | data[offset] = '\0'; | ||
1268 | str_cnt++; | ||
1269 | buf = &data[offset + 1]; | ||
1270 | } | ||
1271 | else if ( (data[offset] == '\n') || | ||
1272 | (data[offset] == '\0') ) | ||
1273 | buf = &data[offset + 1]; | ||
1274 | } | ||
1275 | *strings = GNUNET_malloc (sizeof (char *) * str_cnt); | ||
1276 | offset = 0; | ||
1277 | for (i = 0; i < str_cnt; i++) | ||
1278 | { | 1354 | { |
1279 | GNUNET_asprintf (&(*strings)[i], | 1355 | str_cnt = GNUNET_SYSERR; |
1280 | "%s%s", | ||
1281 | regex_prefix, | ||
1282 | &data[offset]); | ||
1283 | offset += strlen (&data[offset]) + 1; | ||
1284 | } | 1356 | } |
1285 | GNUNET_free (data); | 1357 | GNUNET_free (data); |
1286 | return str_cnt; | 1358 | return str_cnt; |
diff --git a/src/regex/perf-regex.c b/src/regex/perf-regex.c index c7a5e6c5f..ec42d2625 100644 --- a/src/regex/perf-regex.c +++ b/src/regex/perf-regex.c | |||
@@ -80,13 +80,14 @@ main (int argc, char *const *argv) | |||
80 | char *buffer; | 80 | char *buffer; |
81 | char *regex; | 81 | char *regex; |
82 | int compression; | 82 | int compression; |
83 | unsigned int alphabet_size; | ||
83 | long size; | 84 | long size; |
84 | 85 | ||
85 | GNUNET_log_setup ("perf-regex", "DEBUG", NULL); | 86 | GNUNET_log_setup ("perf-regex", "DEBUG", NULL); |
86 | if (3 != argc) | 87 | if (4 != argc) |
87 | { | 88 | { |
88 | fprintf (stderr, | 89 | fprintf (stderr, |
89 | "Usage: %s REGEX_FILE COMPRESSION\n", | 90 | "Usage: %s REGEX_FILE ALPHABET_SIZE COMPRESSION\n", |
90 | argv[0]); | 91 | argv[0]); |
91 | return 1; | 92 | return 1; |
92 | } | 93 | } |
@@ -98,9 +99,13 @@ main (int argc, char *const *argv) | |||
98 | argv[1]); | 99 | argv[1]); |
99 | return 2; | 100 | return 2; |
100 | } | 101 | } |
101 | compression = atoi (argv[2]); | 102 | alphabet_size = atoi (argv[2]); |
102 | 103 | compression = atoi (argv[3]); | |
103 | buffer = REGEX_TEST_combine (regexes); | 104 | printf ("********* PERF-REGEX *********'\n"); |
105 | printf ("Using:\n file '%s'\n Alphabet size %u\n compression %d\n", | ||
106 | argv[1], alphabet_size, compression); | ||
107 | fflush(stdout); | ||
108 | buffer = REGEX_TEST_combine (regexes, alphabet_size); | ||
104 | GNUNET_asprintf (®ex, "GNUNET_REGEX_PROFILER_(%s)(0|1)*", buffer); | 109 | GNUNET_asprintf (®ex, "GNUNET_REGEX_PROFILER_(%s)(0|1)*", buffer); |
105 | size = strlen (regex); | 110 | size = strlen (regex); |
106 | 111 | ||
diff --git a/src/regex/regex_test_lib.c b/src/regex/regex_test_lib.c index c42dbcef6..f4025f652 100644 --- a/src/regex/regex_test_lib.c +++ b/src/regex/regex_test_lib.c | |||
@@ -1,6 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * This file is part of GNUnet | 2 | * This file is part of GNUnet |
3 | * Copyright (C) 2012 GNUnet e.V. | 3 | * Copyright (C) 2012-2017 GNUnet e.V. |
4 | * | 4 | * |
5 | * GNUnet is free software; you can redistribute it and/or modify | 5 | * GNUnet is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published | 6 | * it under the terms of the GNU General Public License as published |
@@ -32,27 +32,17 @@ | |||
32 | /** | 32 | /** |
33 | * Struct to hold the tree formed by prefix-combining the regexes. | 33 | * Struct to hold the tree formed by prefix-combining the regexes. |
34 | */ | 34 | */ |
35 | struct RegexCombineCtx { | 35 | struct RegexCombineCtx |
36 | 36 | { | |
37 | /** | ||
38 | * Next node with same prefix but different token. | ||
39 | */ | ||
40 | struct RegexCombineCtx *next; | ||
41 | |||
42 | /** | ||
43 | * Prev node with same prefix but different token. | ||
44 | */ | ||
45 | struct RegexCombineCtx *prev; | ||
46 | |||
47 | /** | 37 | /** |
48 | * First child node with same prefix and token. | 38 | * Child nodes with same prefix and token. |
49 | */ | 39 | */ |
50 | struct RegexCombineCtx *head; | 40 | struct RegexCombineCtx **children; |
51 | 41 | ||
52 | /** | 42 | /** |
53 | * Last child node. | 43 | * Alphabet size (how many @a children there are) |
54 | */ | 44 | */ |
55 | struct RegexCombineCtx *tail; | 45 | unsigned int size; |
56 | 46 | ||
57 | /** | 47 | /** |
58 | * Token. | 48 | * Token. |
@@ -60,30 +50,133 @@ struct RegexCombineCtx { | |||
60 | char *s; | 50 | char *s; |
61 | }; | 51 | }; |
62 | 52 | ||
63 | /* | 53 | |
54 | /** | ||
55 | * Char 2 int | ||
56 | * | ||
57 | * Convert a character into its int value depending on the base used | ||
58 | * | ||
59 | * @param c Char | ||
60 | * @param size base (2, 8 or 16(hex)) | ||
61 | * | ||
62 | * @return Int in range [0, (base-1)] | ||
63 | */ | ||
64 | static int | ||
65 | c2i (char c, int size) | ||
66 | { | ||
67 | switch (size) | ||
68 | { | ||
69 | case 2: | ||
70 | case 8: | ||
71 | return c - '0'; | ||
72 | break; | ||
73 | case 16: | ||
74 | if (c >= '0' && c <= '9') | ||
75 | return c - '0'; | ||
76 | else if (c >= 'A' && c <= 'F') | ||
77 | return c - 'A' + 10; | ||
78 | else if (c >= 'a' && c <= 'f') | ||
79 | return c - 'a' + 10; | ||
80 | else | ||
81 | { | ||
82 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
83 | "Cannot convert char %c in base %u\n", | ||
84 | c, size); | ||
85 | GNUNET_assert (0); | ||
86 | } | ||
87 | break; | ||
88 | default: | ||
89 | GNUNET_assert (0); | ||
90 | } | ||
91 | } | ||
92 | |||
93 | |||
94 | /** | ||
95 | * Printf spaces to indent the regex tree | ||
96 | * | ||
97 | * @param n Indentation level | ||
98 | */ | ||
64 | static void | 99 | static void |
65 | space (int n) | 100 | space (int n) |
66 | { | 101 | { |
67 | int i; | 102 | int i; |
68 | for (i = 0; i < n; i++) | 103 | for (i = 0; i < n; i++) |
69 | printf (" "); | 104 | fprintf (stderr, "| "); |
70 | } | 105 | } |
71 | 106 | ||
107 | |||
108 | /** | ||
109 | * Printf the combined regex ctx. | ||
110 | * | ||
111 | * @param ctx The ctx to printf | ||
112 | * @param level Indentation level to start with | ||
113 | */ | ||
72 | static void | 114 | static void |
73 | debugctx (struct RegexCombineCtx *ctx, int level) | 115 | debugctx (struct RegexCombineCtx *ctx, int level) |
74 | { | 116 | { |
75 | struct RegexCombineCtx *p; | 117 | return; |
76 | space (level); | 118 | unsigned int i; |
77 | if (NULL != ctx->s) | 119 | if (NULL != ctx->s) |
78 | printf ("'%s'\n", ctx->s); | 120 | { |
121 | space (level - 1); | ||
122 | fprintf (stderr, "%u:'%s'\n", c2i(ctx->s[0], ctx->size), ctx->s); | ||
123 | } | ||
79 | else | 124 | else |
80 | printf ("NULL\n"); | 125 | fprintf (stderr, "ROOT (base %u)\n", ctx->size); |
81 | for (p = ctx->head; NULL != p; p = p->next) | 126 | for (i = 0; i < ctx->size; i++) |
127 | { | ||
128 | if (NULL != ctx->children[i]) | ||
129 | { | ||
130 | space (level); | ||
131 | debugctx (ctx->children[i], level + 1); | ||
132 | } | ||
133 | } | ||
134 | fflush(stderr); | ||
135 | } | ||
136 | |||
137 | |||
138 | /** | ||
139 | * Add a single regex to a context, combining with exisiting regex by-prefix. | ||
140 | * | ||
141 | * @param ctx Context with 0 or more regexes. | ||
142 | * @param regex Regex to add. | ||
143 | */ | ||
144 | static void | ||
145 | regex_add (struct RegexCombineCtx *ctx, const char *regex); | ||
146 | |||
147 | |||
148 | /** | ||
149 | * Create and initialize a new RegexCombineCtx. | ||
150 | * | ||
151 | * @param alphabet_size Size of the alphabet (and the Trie array) | ||
152 | */ | ||
153 | static struct RegexCombineCtx * | ||
154 | new_regex_ctx (unsigned int alphabet_size) | ||
155 | { | ||
156 | struct RegexCombineCtx *ctx; | ||
157 | size_t array_size; | ||
158 | |||
159 | array_size = sizeof(struct RegexCombineCtx *) * alphabet_size; | ||
160 | ctx = GNUNET_new (struct RegexCombineCtx); | ||
161 | ctx->children = GNUNET_malloc (array_size); | ||
162 | ctx->size = alphabet_size; | ||
163 | |||
164 | return ctx; | ||
165 | } | ||
166 | |||
167 | static void | ||
168 | move_children (struct RegexCombineCtx *dst, const struct RegexCombineCtx *src) | ||
169 | { | ||
170 | size_t array_size; | ||
171 | |||
172 | array_size = sizeof(struct RegexCombineCtx *) * src->size; | ||
173 | memcpy (dst->children, src->children, array_size); | ||
174 | for (int i = 0; i < src->size; i++) | ||
82 | { | 175 | { |
83 | debugctx (p, level + 1); | 176 | src->children[i] = NULL; |
84 | } | 177 | } |
85 | } | 178 | } |
86 | */ | 179 | |
87 | 180 | ||
88 | /** | 181 | /** |
89 | * Extract a string from all prefix-combined regexes. | 182 | * Extract a string from all prefix-combined regexes. |
@@ -96,6 +189,7 @@ static char * | |||
96 | regex_combine (struct RegexCombineCtx *ctx) | 189 | regex_combine (struct RegexCombineCtx *ctx) |
97 | { | 190 | { |
98 | struct RegexCombineCtx *p; | 191 | struct RegexCombineCtx *p; |
192 | unsigned int i; | ||
99 | size_t len; | 193 | size_t len; |
100 | char *regex; | 194 | char *regex; |
101 | char *tmp; | 195 | char *tmp; |
@@ -105,9 +199,14 @@ regex_combine (struct RegexCombineCtx *ctx) | |||
105 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "new combine %s\n", ctx->s); | 199 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "new combine %s\n", ctx->s); |
106 | regex = GNUNET_strdup (""); | 200 | regex = GNUNET_strdup (""); |
107 | opt = GNUNET_NO; | 201 | opt = GNUNET_NO; |
108 | for (p = ctx->head; NULL != p; p = p->next) | 202 | for (i = 0; i < ctx->size; i++) |
109 | { | 203 | { |
110 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "adding '%s' to innner %s\n", p->s, ctx->s); | 204 | p = ctx->children[i]; |
205 | if (NULL == p) | ||
206 | continue; | ||
207 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, | ||
208 | "adding '%s' to innner %s\n", | ||
209 | p->s, ctx->s); | ||
111 | s = regex_combine (p); | 210 | s = regex_combine (p); |
112 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " total '%s'\n", s); | 211 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " total '%s'\n", s); |
113 | if (strlen(s) == 0) | 212 | if (strlen(s) == 0) |
@@ -171,10 +270,10 @@ get_prefix_length (const char *s1, const char *s2) | |||
171 | l2 = strlen (s2); | 270 | l2 = strlen (s2); |
172 | limit = l1 > l2 ? l2 : l1; | 271 | limit = l1 > l2 ? l2 : l1; |
173 | 272 | ||
174 | for (i = 1; i <= limit; i++) | 273 | for (i = 0; i < limit; i++) |
175 | { | 274 | { |
176 | if (0 != strncmp (s1, s2, i)) | 275 | if (s1[i] != s2[i]) |
177 | return i - 1; | 276 | return i; |
178 | } | 277 | } |
179 | return limit; | 278 | return limit; |
180 | } | 279 | } |
@@ -194,13 +293,19 @@ get_longest_prefix (struct RegexCombineCtx *ctx, const char *regex) | |||
194 | { | 293 | { |
195 | struct RegexCombineCtx *p; | 294 | struct RegexCombineCtx *p; |
196 | struct RegexCombineCtx *best; | 295 | struct RegexCombineCtx *best; |
296 | unsigned int i; | ||
197 | unsigned int l; | 297 | unsigned int l; |
198 | unsigned int best_l; | 298 | unsigned int best_l; |
199 | 299 | ||
200 | best_l = 0; | 300 | best_l = 0; |
201 | best = NULL; | 301 | best = NULL; |
202 | for (p = ctx->head; NULL != p; p = p->next) | 302 | |
303 | for (i = 0; i < ctx->size; i++) | ||
203 | { | 304 | { |
305 | p = ctx->children[i]; | ||
306 | if (NULL == p) | ||
307 | continue; | ||
308 | |||
204 | l = get_prefix_length (p->s, regex); | 309 | l = get_prefix_length (p->s, regex); |
205 | if (l > best_l) | 310 | if (l > best_l) |
206 | { | 311 | { |
@@ -212,6 +317,103 @@ get_longest_prefix (struct RegexCombineCtx *ctx, const char *regex) | |||
212 | return best; | 317 | return best; |
213 | } | 318 | } |
214 | 319 | ||
320 | static void | ||
321 | regex_add_multiple (struct RegexCombineCtx *ctx, | ||
322 | const char *regex, | ||
323 | struct RegexCombineCtx **children) | ||
324 | { | ||
325 | char tmp[2]; | ||
326 | long unsigned int i; | ||
327 | size_t l; | ||
328 | struct RegexCombineCtx *newctx; | ||
329 | unsigned int count; | ||
330 | |||
331 | if ('(' != regex[0]) | ||
332 | { | ||
333 | GNUNET_assert (0); | ||
334 | } | ||
335 | |||
336 | /* Does the regex cover *all* possible children? Then don't add any, | ||
337 | * as it will be covered by the post-regex "(a-z)*" | ||
338 | */ | ||
339 | l = strlen (regex); | ||
340 | count = 0; | ||
341 | for (i = 1UL; i < l; i++) | ||
342 | { | ||
343 | if (regex[i] != '|' && regex[i] != ')') | ||
344 | { | ||
345 | count++; | ||
346 | } | ||
347 | } | ||
348 | if (count == ctx->size) | ||
349 | { | ||
350 | return; | ||
351 | } | ||
352 | |||
353 | /* Add every component as a child node */ | ||
354 | tmp[1] = '\0'; | ||
355 | for (i = 1UL; i < l; i++) | ||
356 | { | ||
357 | if (regex[i] != '|' && regex[i] != ')') | ||
358 | { | ||
359 | tmp[0] = regex[i]; | ||
360 | newctx = new_regex_ctx(ctx->size); | ||
361 | newctx->s = GNUNET_strdup (tmp); | ||
362 | if (children != NULL) | ||
363 | memcpy (newctx->children, children, sizeof (*children) * ctx->size); | ||
364 | ctx->children[c2i(tmp[0], ctx->size)] = newctx; | ||
365 | } | ||
366 | } | ||
367 | } | ||
368 | |||
369 | /** | ||
370 | * Add a single regex to a context, splitting the exisiting state. | ||
371 | * | ||
372 | * We only had a partial match, split existing state, truncate the current node | ||
373 | * so it only contains the prefix, add suffix(es) as children. | ||
374 | * | ||
375 | * @param ctx Context to split. | ||
376 | * @param len Lenght of ctx->s | ||
377 | * @param prefix_l Lenght of common prefix of the new regex and @a ctx->s | ||
378 | */ | ||
379 | static void | ||
380 | regex_split (struct RegexCombineCtx *ctx, | ||
381 | unsigned int len, | ||
382 | unsigned int prefix_l) | ||
383 | { | ||
384 | struct RegexCombineCtx *newctx; | ||
385 | unsigned int idx; | ||
386 | char *suffix; | ||
387 | |||
388 | suffix = GNUNET_malloc (len - prefix_l + 1); | ||
389 | strncpy (suffix, &ctx->s[prefix_l], len - prefix_l + 1); | ||
390 | |||
391 | /* Suffix saved, truncate current node so it only contains the prefix, | ||
392 | * copy any children nodes to put as grandchildren and initialize new empty | ||
393 | * children array. | ||
394 | */ | ||
395 | ctx->s[prefix_l] = '\0'; | ||
396 | |||
397 | /* If the suffix is an OR expression, add multiple children */ | ||
398 | if ('(' == suffix[0]) | ||
399 | { | ||
400 | struct RegexCombineCtx **tmp; | ||
401 | |||
402 | tmp = ctx->children; | ||
403 | ctx->children = GNUNET_malloc (sizeof(*tmp) * ctx->size); | ||
404 | regex_add_multiple (ctx, suffix, tmp); | ||
405 | GNUNET_free (tmp); | ||
406 | return; | ||
407 | } | ||
408 | |||
409 | /* The suffix is a normal string, add as one node */ | ||
410 | newctx = new_regex_ctx (ctx->size); | ||
411 | newctx->s = suffix; | ||
412 | move_children (newctx, ctx); | ||
413 | idx = c2i(suffix[0], ctx->size); | ||
414 | ctx->children[idx] = newctx; | ||
415 | } | ||
416 | |||
215 | 417 | ||
216 | /** | 418 | /** |
217 | * Add a single regex to a context, combining with exisiting regex by-prefix. | 419 | * Add a single regex to a context, combining with exisiting regex by-prefix. |
@@ -224,17 +426,31 @@ regex_add (struct RegexCombineCtx *ctx, const char *regex) | |||
224 | { | 426 | { |
225 | struct RegexCombineCtx *p; | 427 | struct RegexCombineCtx *p; |
226 | struct RegexCombineCtx *newctx; | 428 | struct RegexCombineCtx *newctx; |
429 | long unsigned int l; | ||
227 | unsigned int prefix_l; | 430 | unsigned int prefix_l; |
228 | const char *rest_r; | 431 | const char *rest_r; |
229 | const char *rest_s; | 432 | const char *rest_s; |
230 | size_t len; | 433 | size_t len; |
434 | int idx; | ||
435 | |||
436 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, | ||
437 | "regex_add '%s' into '%s'\n", | ||
438 | regex, ctx->s); | ||
439 | l = strlen (regex); | ||
440 | if (0UL == l) | ||
441 | return; | ||
231 | 442 | ||
232 | if (0 == strlen (regex)) | 443 | /* If the regex is in the form of (a|b|c), add every character separately */ |
444 | if ('(' == regex[0]) | ||
445 | { | ||
446 | regex_add_multiple (ctx, regex, NULL); | ||
233 | return; | 447 | return; |
448 | } | ||
234 | 449 | ||
235 | p = get_longest_prefix (ctx, regex); | 450 | p = get_longest_prefix (ctx, regex); |
236 | if (NULL != p) /* There is some prefix match, reduce regex and try again */ | 451 | if (NULL != p) |
237 | { | 452 | { |
453 | /* There is some prefix match, reduce regex and try again */ | ||
238 | prefix_l = get_prefix_length (p->s, regex); | 454 | prefix_l = get_prefix_length (p->s, regex); |
239 | rest_s = &p->s[prefix_l]; | 455 | rest_s = &p->s[prefix_l]; |
240 | rest_r = ®ex[prefix_l]; | 456 | rest_r = ®ex[prefix_l]; |
@@ -243,35 +459,29 @@ regex_add (struct RegexCombineCtx *ctx, const char *regex) | |||
243 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "rest r '%s'\n", rest_r); | 459 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "rest r '%s'\n", rest_r); |
244 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "rest s '%s'\n", rest_s); | 460 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "rest s '%s'\n", rest_s); |
245 | len = strlen (p->s); | 461 | len = strlen (p->s); |
246 | if (prefix_l < len) /* only partial match, split existing state */ | 462 | if (prefix_l < len) |
247 | { | 463 | { |
248 | newctx = GNUNET_new (struct RegexCombineCtx); | 464 | regex_split (p, len, prefix_l); |
249 | newctx->head = p->head; | ||
250 | newctx->tail = p->tail; | ||
251 | newctx->s = GNUNET_malloc(len - prefix_l + 1); | ||
252 | strncpy (newctx->s, rest_s, len - prefix_l + 1); | ||
253 | |||
254 | p->head = newctx; | ||
255 | p->tail = newctx; | ||
256 | p->s[prefix_l] = '\0'; | ||
257 | } | 465 | } |
258 | regex_add (p, rest_r); | 466 | regex_add (p, rest_r); |
259 | return; | 467 | return; |
260 | } | 468 | } |
469 | |||
261 | /* There is no prefix match, add new */ | 470 | /* There is no prefix match, add new */ |
262 | if (NULL == ctx->head && NULL != ctx->s) | 471 | idx = c2i(regex[0], ctx->size); |
472 | if (NULL == ctx->children[idx] && NULL != ctx->s) | ||
263 | { | 473 | { |
264 | /* this was the end before, add empty string */ | 474 | /* this was the end before, add empty string */ |
265 | newctx = GNUNET_new (struct RegexCombineCtx); | 475 | newctx = new_regex_ctx (ctx->size); |
266 | newctx->s = GNUNET_strdup (""); | 476 | newctx->s = GNUNET_strdup (""); |
267 | GNUNET_CONTAINER_DLL_insert (ctx->head, ctx->tail, newctx); | 477 | ctx->children[idx] = newctx; |
268 | } | 478 | } |
269 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " no match\n"); | 479 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " no match\n"); |
270 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " new state %s\n", regex); | 480 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " new state %s\n", regex); |
271 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " under %s\n", ctx->s); | 481 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " under %s\n", ctx->s); |
272 | newctx = GNUNET_new (struct RegexCombineCtx); | 482 | newctx = new_regex_ctx(ctx->size); |
273 | newctx->s = GNUNET_strdup (regex); | 483 | newctx->s = GNUNET_strdup (regex); |
274 | GNUNET_CONTAINER_DLL_insert (ctx->head, ctx->tail, newctx); | 484 | ctx->children[idx] = newctx; |
275 | } | 485 | } |
276 | 486 | ||
277 | 487 | ||
@@ -283,45 +493,53 @@ regex_add (struct RegexCombineCtx *ctx, const char *regex) | |||
283 | static void | 493 | static void |
284 | regex_ctx_destroy (struct RegexCombineCtx *ctx) | 494 | regex_ctx_destroy (struct RegexCombineCtx *ctx) |
285 | { | 495 | { |
286 | struct RegexCombineCtx *p; | 496 | unsigned int i; |
287 | struct RegexCombineCtx *next; | 497 | |
498 | if (NULL == ctx) | ||
499 | return; | ||
288 | 500 | ||
289 | for (p = ctx->head; NULL != p; p = next) | 501 | for (i = 0; i < ctx->size; i++) |
290 | { | 502 | { |
291 | next = p->next; | 503 | regex_ctx_destroy (ctx->children[i]); |
292 | regex_ctx_destroy (p); | ||
293 | } | 504 | } |
294 | GNUNET_free_non_null (ctx->s); /* 's' on root node is null */ | 505 | GNUNET_free_non_null (ctx->s); /* 's' on root node is null */ |
506 | GNUNET_free (ctx->children); | ||
295 | GNUNET_free (ctx); | 507 | GNUNET_free (ctx); |
296 | } | 508 | } |
297 | 509 | ||
298 | 510 | ||
299 | /** | 511 | /** |
300 | * Return a prefix-combine regex that matches the same strings as | 512 | * Combine an array of regexes into a single prefix-shared regex. |
513 | * Returns a prefix-combine regex that matches the same strings as | ||
301 | * any of the original regexes. | 514 | * any of the original regexes. |
302 | * | 515 | * |
303 | * WARNING: only useful for reading specific regexes for specific applications, | 516 | * WARNING: only useful for reading specific regexes for specific applications, |
304 | * namely the gnunet-regex-profiler / gnunet-regex-daemon. | 517 | * namely the gnunet-regex-profiler / gnunet-regex-daemon. |
305 | * This function DOES NOT support arbitrary regex combining. | 518 | * This function DOES NOT support arbitrary regex combining. |
519 | * | ||
520 | * @param regexes A NULL-terminated array of regexes. | ||
521 | * @param alphabet_size Size of the alphabet the regex uses. | ||
522 | * | ||
523 | * @return A string with a single regex that matches any of the original regexes | ||
306 | */ | 524 | */ |
307 | char * | 525 | char * |
308 | REGEX_TEST_combine (char * const regexes[]) | 526 | REGEX_TEST_combine (char * const regexes[], unsigned int alphabet_size) |
309 | { | 527 | { |
310 | unsigned int i; | 528 | unsigned int i; |
311 | char *combined; | 529 | char *combined; |
312 | const char *current; | 530 | const char *current; |
313 | struct RegexCombineCtx *ctx; | 531 | struct RegexCombineCtx *ctx; |
314 | 532 | ||
315 | ctx = GNUNET_new (struct RegexCombineCtx); | 533 | ctx = new_regex_ctx (alphabet_size); |
316 | for (i = 0; regexes[i]; i++) | 534 | for (i = 0; regexes[i]; i++) |
317 | { | 535 | { |
318 | current = regexes[i]; | 536 | current = regexes[i]; |
319 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Regex %u: %s\n", i, current); | 537 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Regex %u: %s\n", i, current); |
320 | regex_add (ctx, current); | 538 | regex_add (ctx, current); |
321 | /* debugctx (ctx, 0); */ | 539 | debugctx (ctx, 0); |
322 | } | 540 | } |
323 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "\nCombining...\n"); | 541 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "\nCombining...\n"); |
324 | /* debugctx (ctx, 0); */ | 542 | debugctx (ctx, 0); |
325 | 543 | ||
326 | combined = regex_combine (ctx); | 544 | combined = regex_combine (ctx); |
327 | 545 | ||
diff --git a/src/regex/regex_test_lib.h b/src/regex/regex_test_lib.h index 83414e5dc..c4f7e7539 100644 --- a/src/regex/regex_test_lib.h +++ b/src/regex/regex_test_lib.h | |||
@@ -38,15 +38,23 @@ extern "C" | |||
38 | #endif | 38 | #endif |
39 | #endif | 39 | #endif |
40 | 40 | ||
41 | |||
41 | /** | 42 | /** |
42 | * Combine an array of regexes into a single prefix-shared regex. | 43 | * Combine an array of regexes into a single prefix-shared regex. |
44 | * Returns a prefix-combine regex that matches the same strings as | ||
45 | * any of the original regexes. | ||
46 | * | ||
47 | * WARNING: only useful for reading specific regexes for specific applications, | ||
48 | * namely the gnunet-regex-profiler / gnunet-regex-daemon. | ||
49 | * This function DOES NOT support arbitrary regex combining. | ||
43 | * | 50 | * |
44 | * @param regexes A NULL-terminated array of regexes. | 51 | * @param regexes A NULL-terminated array of regexes. |
52 | * @param alphabet_size Size of the alphabet the regex uses. | ||
45 | * | 53 | * |
46 | * @return A string with a single regex that matches any of the original regexes | 54 | * @return A string with a single regex that matches any of the original regexes |
47 | */ | 55 | */ |
48 | char * | 56 | char * |
49 | REGEX_TEST_combine(char * const regexes[]); | 57 | REGEX_TEST_combine (char * const regexes[], unsigned int alphabet_size); |
50 | 58 | ||
51 | 59 | ||
52 | /** | 60 | /** |