diff options
author | Maximilian Szengel <gnunet@maxsz.de> | 2012-10-26 14:33:59 +0000 |
---|---|---|
committer | Maximilian Szengel <gnunet@maxsz.de> | 2012-10-26 14:33:59 +0000 |
commit | 7a741b2fa8c96076f8b05a2d08e4b6b3ba78360b (patch) | |
tree | bd6c9d2d20fe9fb8f405075e929b3056cdd67736 /src/regex | |
parent | 993a74c191842519ad6c82216fb7a0ee2bb09456 (diff) | |
download | gnunet-7a741b2fa8c96076f8b05a2d08e4b6b3ba78360b.tar.gz gnunet-7a741b2fa8c96076f8b05a2d08e4b6b3ba78360b.zip |
- Added path compression parameter to DFA construction API
- Moved NFA construction to internal header
- Added regex simulation profiler (for profiling the NFA, that results by merging
several DFAs in the DHT, in a database)
Diffstat (limited to 'src/regex')
-rw-r--r-- | src/regex/Makefile.am | 14 | ||||
-rw-r--r-- | src/regex/gnunet-regex-simulation-profiler.c | 519 | ||||
-rw-r--r-- | src/regex/regex.c | 30 | ||||
-rw-r--r-- | src/regex/regex_internal.h | 12 | ||||
-rw-r--r-- | src/regex/regex_simulation_profiler_test.conf | 6 | ||||
-rw-r--r-- | src/regex/test_regex_eval_api.c | 8 | ||||
-rw-r--r-- | src/regex/test_regex_graph_api.c | 9 | ||||
-rw-r--r-- | src/regex/test_regex_iterate_api.c | 4 | ||||
-rw-r--r-- | src/regex/test_regex_proofs.c | 8 |
9 files changed, 586 insertions, 24 deletions
diff --git a/src/regex/Makefile.am b/src/regex/Makefile.am index 212e2ffcd..fe882a1e9 100644 --- a/src/regex/Makefile.am +++ b/src/regex/Makefile.am | |||
@@ -19,6 +19,20 @@ libgnunetregex_la_LDFLAGS = \ | |||
19 | $(GN_LIB_LDFLAGS) \ | 19 | $(GN_LIB_LDFLAGS) \ |
20 | -version-info 0:0:0 | 20 | -version-info 0:0:0 |
21 | 21 | ||
22 | if HAVE_MYSQL | ||
23 | noinst_PROGRAMS = \ | ||
24 | gnunet-regex-simulation-profiler | ||
25 | |||
26 | gnunet_regex_simulation_profiler_SOURCES = \ | ||
27 | gnunet-regex-simulation-profiler.c | ||
28 | gnunet_regex_simulation_profiler_LDADD = \ | ||
29 | $(top_builddir)/src/util/libgnunetutil.la \ | ||
30 | $(top_builddir)/src/regex/libgnunetregex.la \ | ||
31 | $(top_builddir)/src/mysql/libgnunetmysql.la | ||
32 | gnunet_regex_simulation_profiler_DEPENDENCIES = \ | ||
33 | libgnunetregex.la | ||
34 | endif | ||
35 | |||
22 | check_PROGRAMS = \ | 36 | check_PROGRAMS = \ |
23 | test_regex_eval_api \ | 37 | test_regex_eval_api \ |
24 | test_regex_iterate_api \ | 38 | test_regex_iterate_api \ |
diff --git a/src/regex/gnunet-regex-simulation-profiler.c b/src/regex/gnunet-regex-simulation-profiler.c new file mode 100644 index 000000000..14d0d6b07 --- /dev/null +++ b/src/regex/gnunet-regex-simulation-profiler.c | |||
@@ -0,0 +1,519 @@ | |||
1 | /* | ||
2 | This file is part of GNUnet. | ||
3 | (C) 2011, 2012 Christian Grothoff (and other contributing authors) | ||
4 | |||
5 | GNUnet is free software; you can redistribute it and/or modify | ||
6 | it under the terms of the GNU General Public License as published | ||
7 | by the Free Software Foundation; either version 3, or (at your | ||
8 | option) any later version. | ||
9 | |||
10 | GNUnet is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU General Public License | ||
16 | along with GNUnet; see the file COPYING. If not, write to the | ||
17 | Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
18 | Boston, MA 02111-1307, USA. | ||
19 | */ | ||
20 | |||
21 | /** | ||
22 | * @file regex/gnunet-regex-simulation-profiler.c | ||
23 | * @brief Regex profiler that dumps all DFAs into a database instead of | ||
24 | * using the DHT (with mesh). | ||
25 | * @author Maximilian Szengel | ||
26 | * | ||
27 | */ | ||
28 | |||
29 | #include "platform.h" | ||
30 | #include "gnunet_util_lib.h" | ||
31 | #include "gnunet_regex_lib.h" | ||
32 | #include "gnunet_mysql_lib.h" | ||
33 | |||
34 | /** | ||
35 | * Simple struct to keep track of progress, and print a | ||
36 | * nice little percentage meter for long running tasks. | ||
37 | */ | ||
38 | struct ProgressMeter | ||
39 | { | ||
40 | unsigned int total; | ||
41 | |||
42 | unsigned int modnum; | ||
43 | |||
44 | unsigned int dotnum; | ||
45 | |||
46 | unsigned int completed; | ||
47 | |||
48 | int print; | ||
49 | |||
50 | char *startup_string; | ||
51 | }; | ||
52 | |||
53 | |||
54 | /** | ||
55 | * Handle for the progress meter | ||
56 | */ | ||
57 | static struct ProgressMeter *meter; | ||
58 | |||
59 | /** | ||
60 | * Abort task identifier. | ||
61 | */ | ||
62 | static GNUNET_SCHEDULER_TaskIdentifier abort_task; | ||
63 | |||
64 | /** | ||
65 | * Scan task identifier; | ||
66 | */ | ||
67 | static GNUNET_SCHEDULER_TaskIdentifier scan_task; | ||
68 | |||
69 | /** | ||
70 | * Global testing status. | ||
71 | */ | ||
72 | static int result; | ||
73 | |||
74 | /** | ||
75 | * MySQL context. | ||
76 | */ | ||
77 | static struct GNUNET_MYSQL_Context *mysql_ctx; | ||
78 | |||
79 | /** | ||
80 | * MySQL table name. | ||
81 | */ | ||
82 | static char *table_name; | ||
83 | |||
84 | /** | ||
85 | * Policy dir containing files that contain policies. | ||
86 | */ | ||
87 | static char *policy_dir; | ||
88 | |||
89 | /** | ||
90 | * Number of policy files. | ||
91 | */ | ||
92 | static unsigned int num_policy_files; | ||
93 | |||
94 | /** | ||
95 | * Number of policies. | ||
96 | */ | ||
97 | static unsigned int num_policies; | ||
98 | |||
99 | /** | ||
100 | * Maximal path compression length. | ||
101 | */ | ||
102 | static unsigned int max_path_compression; | ||
103 | |||
104 | |||
105 | /** | ||
106 | * Create a meter to keep track of the progress of some task. | ||
107 | * | ||
108 | * @param total the total number of items to complete | ||
109 | * @param start_string a string to prefix the meter with (if printing) | ||
110 | * @param print GNUNET_YES to print the meter, GNUNET_NO to count | ||
111 | * internally only | ||
112 | * | ||
113 | * @return the progress meter | ||
114 | */ | ||
115 | static struct ProgressMeter * | ||
116 | create_meter (unsigned int total, char *start_string, int print) | ||
117 | { | ||
118 | struct ProgressMeter *ret; | ||
119 | |||
120 | ret = GNUNET_malloc (sizeof (struct ProgressMeter)); | ||
121 | ret->print = print; | ||
122 | ret->total = total; | ||
123 | ret->modnum = total / 4; | ||
124 | if (ret->modnum == 0) /* Divide by zero check */ | ||
125 | ret->modnum = 1; | ||
126 | ret->dotnum = (total / 50) + 1; | ||
127 | if (start_string != NULL) | ||
128 | ret->startup_string = GNUNET_strdup (start_string); | ||
129 | else | ||
130 | ret->startup_string = GNUNET_strdup (""); | ||
131 | |||
132 | return ret; | ||
133 | } | ||
134 | |||
135 | |||
136 | /** | ||
137 | * Update progress meter (increment by one). | ||
138 | * | ||
139 | * @param meter the meter to update and print info for | ||
140 | * | ||
141 | * @return GNUNET_YES if called the total requested, | ||
142 | * GNUNET_NO if more items expected | ||
143 | */ | ||
144 | static int | ||
145 | update_meter (struct ProgressMeter *meter) | ||
146 | { | ||
147 | if (meter->print == GNUNET_YES) | ||
148 | { | ||
149 | if (meter->completed % meter->modnum == 0) | ||
150 | { | ||
151 | if (meter->completed == 0) | ||
152 | { | ||
153 | FPRINTF (stdout, "%sProgress: [0%%", meter->startup_string); | ||
154 | } | ||
155 | else | ||
156 | FPRINTF (stdout, "%d%%", | ||
157 | (int) (((float) meter->completed / meter->total) * 100)); | ||
158 | } | ||
159 | else if (meter->completed % meter->dotnum == 0) | ||
160 | FPRINTF (stdout, "%s", "."); | ||
161 | |||
162 | if (meter->completed + 1 == meter->total) | ||
163 | FPRINTF (stdout, "%d%%]\n", 100); | ||
164 | fflush (stdout); | ||
165 | } | ||
166 | meter->completed++; | ||
167 | |||
168 | if (meter->completed == meter->total) | ||
169 | return GNUNET_YES; | ||
170 | if (meter->completed > meter->total) | ||
171 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, "Progress meter overflow!!\n"); | ||
172 | return GNUNET_NO; | ||
173 | } | ||
174 | |||
175 | |||
176 | /** | ||
177 | * Reset progress meter. | ||
178 | * | ||
179 | * @param meter the meter to reset | ||
180 | * | ||
181 | * @return GNUNET_YES if meter reset, | ||
182 | * GNUNET_SYSERR on error | ||
183 | */ | ||
184 | static int | ||
185 | reset_meter (struct ProgressMeter *meter) | ||
186 | { | ||
187 | if (meter == NULL) | ||
188 | return GNUNET_SYSERR; | ||
189 | |||
190 | meter->completed = 0; | ||
191 | return GNUNET_YES; | ||
192 | } | ||
193 | |||
194 | |||
195 | /** | ||
196 | * Release resources for meter | ||
197 | * | ||
198 | * @param meter the meter to free | ||
199 | */ | ||
200 | static void | ||
201 | free_meter (struct ProgressMeter *meter) | ||
202 | { | ||
203 | GNUNET_free_non_null (meter->startup_string); | ||
204 | GNUNET_free (meter); | ||
205 | } | ||
206 | |||
207 | |||
208 | /** | ||
209 | * Shutdown task. | ||
210 | * | ||
211 | * @param cls NULL | ||
212 | * @param tc the task context | ||
213 | */ | ||
214 | static void | ||
215 | do_shutdown (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc) | ||
216 | { | ||
217 | if (NULL != mysql_ctx) | ||
218 | GNUNET_MYSQL_context_destroy (mysql_ctx); | ||
219 | if (NULL != meter) | ||
220 | free_meter (meter); | ||
221 | |||
222 | GNUNET_SCHEDULER_shutdown (); /* Stop scheduler to shutdown testbed run */ | ||
223 | } | ||
224 | |||
225 | |||
226 | /** | ||
227 | * abort task to run on test timed out | ||
228 | * | ||
229 | * @param cls NULL | ||
230 | * @param tc the task context | ||
231 | */ | ||
232 | static void | ||
233 | do_abort (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc) | ||
234 | { | ||
235 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, "Aborting\n"); | ||
236 | abort_task = GNUNET_SCHEDULER_NO_TASK; | ||
237 | GNUNET_SCHEDULER_cancel (scan_task); | ||
238 | scan_task = GNUNET_SCHEDULER_NO_TASK; | ||
239 | result = GNUNET_SYSERR; | ||
240 | GNUNET_SCHEDULER_add_now (&do_shutdown, NULL); | ||
241 | } | ||
242 | |||
243 | |||
244 | /** | ||
245 | * Iterator over all states that inserts each state into the MySQL db. | ||
246 | * | ||
247 | * @param cls closure. | ||
248 | * @param key hash for current state. | ||
249 | * @param proof proof for current state. | ||
250 | * @param accepting GNUNET_YES if this is an accepting state, GNUNET_NO if not. | ||
251 | * @param num_edges number of edges leaving current state. | ||
252 | * @param edges edges leaving current state. | ||
253 | */ | ||
254 | static void | ||
255 | regex_iterator (void *cls, | ||
256 | const struct GNUNET_HashCode *key, | ||
257 | const char *proof, | ||
258 | int accepting, | ||
259 | unsigned int num_edges, | ||
260 | const struct GNUNET_REGEX_Edge *edges) | ||
261 | { | ||
262 | char *stmt; | ||
263 | unsigned int i; | ||
264 | |||
265 | GNUNET_assert (NULL != mysql_ctx); | ||
266 | |||
267 | for (i = 0; i < num_edges; i++) | ||
268 | { | ||
269 | GNUNET_asprintf (&stmt, | ||
270 | "INSERT IGNORE INTO `%s` (`key`, `label`, `to_key`, `accepting`) VALUES ('%s', '%s', '%s', '%d');", | ||
271 | table_name, GNUNET_h2s_full (key), edges[i].label, | ||
272 | GNUNET_h2s_full (&edges[i].destination), accepting); | ||
273 | |||
274 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Insert statement: %s\n", stmt); | ||
275 | |||
276 | if (GNUNET_OK != GNUNET_MYSQL_statement_run (mysql_ctx, stmt)) | ||
277 | { | ||
278 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
279 | "Error executing mysql statement: %s\n", stmt); | ||
280 | } | ||
281 | |||
282 | GNUNET_free (stmt); | ||
283 | } | ||
284 | } | ||
285 | |||
286 | |||
287 | /** | ||
288 | * Announce a regex by creating the DFA and iterating over each state, inserting | ||
289 | * each state into a MySQL database. | ||
290 | * | ||
291 | * @param regex regular expression. | ||
292 | * @return GNUNET_OK on success, GNUNET_SYSERR on failure. | ||
293 | */ | ||
294 | static int | ||
295 | announce_regex (const char *regex) | ||
296 | { | ||
297 | struct GNUNET_REGEX_Automaton *dfa; | ||
298 | |||
299 | dfa = GNUNET_REGEX_construct_dfa (regex, strlen (regex), max_path_compression); | ||
300 | |||
301 | if (NULL == dfa) | ||
302 | { | ||
303 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
304 | "Failed to create DFA for regex %s\n", regex); | ||
305 | abort_task = GNUNET_SCHEDULER_add_now (&do_abort, NULL); | ||
306 | return GNUNET_SYSERR; | ||
307 | } | ||
308 | |||
309 | GNUNET_REGEX_iterate_all_edges (dfa, ®ex_iterator, NULL); | ||
310 | |||
311 | GNUNET_REGEX_automaton_destroy (dfa); | ||
312 | |||
313 | return GNUNET_OK; | ||
314 | } | ||
315 | |||
316 | |||
317 | /** | ||
318 | * Function called with a filename. | ||
319 | * | ||
320 | * @param cls closure | ||
321 | * @param filename complete filename (absolute path) | ||
322 | * @return GNUNET_OK to continue to iterate, | ||
323 | * GNUNET_SYSERR to abort iteration with error! | ||
324 | */ | ||
325 | int | ||
326 | policy_filename_cb (void *cls, const char *filename) | ||
327 | { | ||
328 | char *regex; | ||
329 | char *data; | ||
330 | char *buf; | ||
331 | uint64_t filesize; | ||
332 | unsigned int offset; | ||
333 | |||
334 | GNUNET_assert (NULL != filename); | ||
335 | |||
336 | GNUNET_log (GNUNET_ERROR_TYPE_INFO, | ||
337 | "Announcing regexes from file %s\n", | ||
338 | filename); | ||
339 | |||
340 | if (GNUNET_YES != GNUNET_DISK_file_test (filename)) | ||
341 | { | ||
342 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, | ||
343 | "Could not find policy file %s\n", filename); | ||
344 | return GNUNET_OK; | ||
345 | } | ||
346 | if (GNUNET_OK != GNUNET_DISK_file_size (filename, &filesize, GNUNET_YES, GNUNET_YES)) | ||
347 | filesize = 0; | ||
348 | if (0 == filesize) | ||
349 | { | ||
350 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, "Policy file %s is empty.\n", filename); | ||
351 | return GNUNET_OK; | ||
352 | } | ||
353 | data = GNUNET_malloc (filesize); | ||
354 | if (filesize != GNUNET_DISK_fn_read (filename, data, filesize)) | ||
355 | { | ||
356 | GNUNET_free (data); | ||
357 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, "Could not read policy file %s.\n", | ||
358 | filename); | ||
359 | return GNUNET_OK; | ||
360 | } | ||
361 | |||
362 | update_meter (meter); | ||
363 | |||
364 | buf = data; | ||
365 | offset = 0; | ||
366 | regex = NULL; | ||
367 | while (offset < (filesize - 1)) | ||
368 | { | ||
369 | offset++; | ||
370 | if (((data[offset] == '\n')) && (buf != &data[offset])) | ||
371 | { | ||
372 | data[offset] = '\0'; | ||
373 | regex = buf; | ||
374 | GNUNET_assert (NULL != regex); | ||
375 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Announcing regex: %s\n", | ||
376 | regex); | ||
377 | num_policies++; | ||
378 | |||
379 | if (GNUNET_OK != announce_regex (regex)) | ||
380 | { | ||
381 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
382 | "Could not announce regex %s\n", regex); | ||
383 | } | ||
384 | |||
385 | buf = &data[offset + 1]; | ||
386 | } | ||
387 | else if ((data[offset] == '\n') || (data[offset] == '\0')) | ||
388 | buf = &data[offset + 1]; | ||
389 | } | ||
390 | GNUNET_free (data); | ||
391 | return GNUNET_OK; | ||
392 | } | ||
393 | |||
394 | |||
395 | /** | ||
396 | * Iterate over files contained in policy_dir. | ||
397 | * | ||
398 | * @param cls NULL | ||
399 | * @param tc the task context | ||
400 | */ | ||
401 | static void | ||
402 | do_directory_scan (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc) | ||
403 | { | ||
404 | struct GNUNET_TIME_Absolute start_time; | ||
405 | struct GNUNET_TIME_Relative duration; | ||
406 | |||
407 | if (GNUNET_SCHEDULER_NO_TASK != abort_task) | ||
408 | GNUNET_SCHEDULER_cancel (abort_task); | ||
409 | |||
410 | meter = create_meter (num_policy_files, "Announcing policy files\n", GNUNET_YES); | ||
411 | start_time = GNUNET_TIME_absolute_get (); | ||
412 | GNUNET_DISK_directory_scan (policy_dir, | ||
413 | &policy_filename_cb, | ||
414 | NULL); | ||
415 | duration = GNUNET_TIME_absolute_get_duration (start_time); | ||
416 | reset_meter (meter); | ||
417 | free_meter (meter); | ||
418 | meter = NULL; | ||
419 | |||
420 | printf ("Announced %u files containing %u policies in %s\n", | ||
421 | num_policy_files, num_policies, | ||
422 | GNUNET_STRINGS_relative_time_to_string (duration, GNUNET_NO)); | ||
423 | |||
424 | result = GNUNET_OK; | ||
425 | GNUNET_SCHEDULER_add_now (&do_shutdown, NULL); | ||
426 | } | ||
427 | |||
428 | |||
429 | /** | ||
430 | * Main function that will be run by the scheduler. | ||
431 | * | ||
432 | * @param cls closure | ||
433 | * @param args remaining command-line arguments | ||
434 | * @param cfgfile name of the configuration file used (for saving, can be NULL!) | ||
435 | * @param config configuration | ||
436 | */ | ||
437 | static void | ||
438 | run (void *cls, char *const *args, const char *cfgfile, | ||
439 | const struct GNUNET_CONFIGURATION_Handle *config) | ||
440 | { | ||
441 | if (NULL == args[0]) | ||
442 | { | ||
443 | fprintf (stderr, _("No policy directory specified on command line. Exiting.\n")); | ||
444 | result = GNUNET_SYSERR; | ||
445 | return; | ||
446 | } | ||
447 | if (GNUNET_YES != GNUNET_DISK_directory_test (args[0])) | ||
448 | { | ||
449 | fprintf (stderr, _("Specified policies directory does not exist. Exiting.\n")); | ||
450 | result = GNUNET_SYSERR; | ||
451 | return; | ||
452 | } | ||
453 | policy_dir = args[0]; | ||
454 | |||
455 | num_policy_files = GNUNET_DISK_directory_scan (policy_dir, | ||
456 | NULL, | ||
457 | NULL); | ||
458 | meter = NULL; | ||
459 | |||
460 | if (NULL == table_name) | ||
461 | { | ||
462 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, "No table name specified, using default \"NFA\".\n"); | ||
463 | table_name = "NFA"; | ||
464 | } | ||
465 | |||
466 | mysql_ctx = GNUNET_MYSQL_context_create (config, "regex-mysql"); | ||
467 | if (NULL == mysql_ctx) | ||
468 | { | ||
469 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Failed to create mysql context\n"); | ||
470 | result = GNUNET_SYSERR; | ||
471 | return; | ||
472 | } | ||
473 | |||
474 | result = GNUNET_OK; | ||
475 | |||
476 | scan_task = GNUNET_SCHEDULER_add_now (&do_directory_scan, NULL); | ||
477 | |||
478 | abort_task = | ||
479 | GNUNET_SCHEDULER_add_delayed (GNUNET_TIME_relative_multiply | ||
480 | (GNUNET_TIME_UNIT_SECONDS, 10), &do_abort, | ||
481 | NULL); | ||
482 | } | ||
483 | |||
484 | |||
485 | /** | ||
486 | * Main function. | ||
487 | * | ||
488 | * @param argc argument count | ||
489 | * @param argv argument values | ||
490 | * @return 0 on success | ||
491 | */ | ||
492 | int | ||
493 | main (int argc, char *const *argv) | ||
494 | { | ||
495 | static const struct GNUNET_GETOPT_CommandLineOption options[] = { | ||
496 | {'t', "table", "TABLENAME", | ||
497 | gettext_noop ("name of the table to write DFAs"), | ||
498 | 1, &GNUNET_GETOPT_set_string, &table_name}, | ||
499 | {'p', "max-path-compression", "MAX_PATH_COMPRESSION", | ||
500 | gettext_noop ("maximum path compression length"), | ||
501 | 1, &GNUNET_GETOPT_set_uint, &max_path_compression}, | ||
502 | GNUNET_GETOPT_OPTION_END | ||
503 | }; | ||
504 | int ret; | ||
505 | |||
506 | if (GNUNET_OK != GNUNET_STRINGS_get_utf8_args (argc, argv, &argc, &argv)) | ||
507 | return 2; | ||
508 | |||
509 | result = GNUNET_SYSERR; | ||
510 | ret = | ||
511 | GNUNET_PROGRAM_run (argc, argv, "gnunet-regex-simulationprofiler [OPTIONS] policy-dir", | ||
512 | _("Profiler for regex library"), | ||
513 | options, &run, NULL); | ||
514 | if (GNUNET_OK != ret) | ||
515 | return ret; | ||
516 | if (GNUNET_OK != result) | ||
517 | return 1; | ||
518 | return 0; | ||
519 | } | ||
diff --git a/src/regex/regex.c b/src/regex/regex.c index 637eac00d..3281ff469 100644 --- a/src/regex/regex.c +++ b/src/regex/regex.c | |||
@@ -1687,7 +1687,7 @@ dfa_compress_paths_helper (struct GNUNET_REGEX_Automaton *dfa, | |||
1687 | // || cur->transition_count > 1 | 1687 | // || cur->transition_count > 1 |
1688 | || GNUNET_YES == cur->marked) || (start != dfa->start && max_len > 0 && | 1688 | || GNUNET_YES == cur->marked) || (start != dfa->start && max_len > 0 && |
1689 | max_len == strlen (label)) || | 1689 | max_len == strlen (label)) || |
1690 | (start == dfa->start && GNUNET_REGEX_INITIAL_BITS == strlen (label)))) | 1690 | (start == dfa->start && GNUNET_REGEX_INITIAL_BYTES == strlen (label)))) |
1691 | { | 1691 | { |
1692 | t = GNUNET_malloc (sizeof (struct GNUNET_REGEX_Transition)); | 1692 | t = GNUNET_malloc (sizeof (struct GNUNET_REGEX_Transition)); |
1693 | t->label = GNUNET_strdup (label); | 1693 | t->label = GNUNET_strdup (label); |
@@ -2482,17 +2482,26 @@ construct_dfa_states (struct GNUNET_REGEX_Context *ctx, | |||
2482 | } | 2482 | } |
2483 | } | 2483 | } |
2484 | 2484 | ||
2485 | |||
2486 | /** | 2485 | /** |
2487 | * Construct DFA for the given 'regex' of length 'len' | 2486 | * Construct DFA for the given 'regex' of length 'len'. |
2488 | * | 2487 | * |
2489 | * @param regex regular expression string | 2488 | * Path compression means, that for example a DFA o -> a -> b -> c -> o will be |
2490 | * @param len length of the regular expression | 2489 | * compressed to o -> abc -> o. Note that this parameter influences the |
2490 | * non-determinism of states of the resulting NFA in the DHT (number of outgoing | ||
2491 | * edges with the same label). For example for an application that stores IPv4 | ||
2492 | * addresses as bitstrings it could make sense to limit the path compression to | ||
2493 | * 4 or 8. | ||
2491 | * | 2494 | * |
2492 | * @return DFA, needs to be freed using GNUNET_REGEX_destroy_automaton | 2495 | * @param regex regular expression string. |
2496 | * @param len length of the regular expression. | ||
2497 | * @param max_path_len limit the path compression length to the | ||
2498 | * given value. If set to 1, no path compression is applied. Set to 0 for | ||
2499 | * maximal possible path compression (generally not desireable). | ||
2500 | * @return DFA, needs to be freed using GNUNET_REGEX_automaton_destroy. | ||
2493 | */ | 2501 | */ |
2494 | struct GNUNET_REGEX_Automaton * | 2502 | struct GNUNET_REGEX_Automaton * |
2495 | GNUNET_REGEX_construct_dfa (const char *regex, const size_t len) | 2503 | GNUNET_REGEX_construct_dfa (const char *regex, const size_t len, |
2504 | int max_path_len) | ||
2496 | { | 2505 | { |
2497 | struct GNUNET_REGEX_Context ctx; | 2506 | struct GNUNET_REGEX_Context ctx; |
2498 | struct GNUNET_REGEX_Automaton *dfa; | 2507 | struct GNUNET_REGEX_Automaton *dfa; |
@@ -2535,7 +2544,8 @@ GNUNET_REGEX_construct_dfa (const char *regex, const size_t len) | |||
2535 | automaton_create_proofs (dfa); | 2544 | automaton_create_proofs (dfa); |
2536 | 2545 | ||
2537 | // Compress DFA paths | 2546 | // Compress DFA paths |
2538 | dfa_compress_paths (&ctx, dfa, 8); | 2547 | if (1 != max_path_len) |
2548 | dfa_compress_paths (&ctx, dfa, max_path_len); | ||
2539 | 2549 | ||
2540 | // Add strides to DFA | 2550 | // Add strides to DFA |
2541 | //GNUNET_REGEX_dfa_add_multi_strides (&ctx, dfa, 2); | 2551 | //GNUNET_REGEX_dfa_add_multi_strides (&ctx, dfa, 2); |
@@ -2770,7 +2780,7 @@ GNUNET_REGEX_get_first_key (const char *input_string, size_t string_len, | |||
2770 | 2780 | ||
2771 | size = | 2781 | size = |
2772 | string_len < | 2782 | string_len < |
2773 | GNUNET_REGEX_INITIAL_BITS ? string_len : GNUNET_REGEX_INITIAL_BITS; | 2783 | GNUNET_REGEX_INITIAL_BYTES ? string_len : GNUNET_REGEX_INITIAL_BYTES; |
2774 | 2784 | ||
2775 | if (NULL == input_string) | 2785 | if (NULL == input_string) |
2776 | { | 2786 | { |
@@ -2931,7 +2941,7 @@ GNUNET_REGEX_iterate_all_edges (struct GNUNET_REGEX_Automaton *a, | |||
2931 | s->marked = GNUNET_NO; | 2941 | s->marked = GNUNET_NO; |
2932 | } | 2942 | } |
2933 | 2943 | ||
2934 | iterate_initial_edge (GNUNET_REGEX_INITIAL_BITS, GNUNET_REGEX_INITIAL_BITS, | 2944 | iterate_initial_edge (GNUNET_REGEX_INITIAL_BYTES, GNUNET_REGEX_INITIAL_BYTES, |
2935 | NULL, a->start, iterator, iterator_cls); | 2945 | NULL, a->start, iterator, iterator_cls); |
2936 | } | 2946 | } |
2937 | 2947 | ||
diff --git a/src/regex/regex_internal.h b/src/regex/regex_internal.h index 49e5dd022..21c498145 100644 --- a/src/regex/regex_internal.h +++ b/src/regex/regex_internal.h | |||
@@ -267,6 +267,18 @@ struct GNUNET_REGEX_Automaton | |||
267 | 267 | ||
268 | 268 | ||
269 | /** | 269 | /** |
270 | * Construct an NFA by parsing the regex string of length 'len'. | ||
271 | * | ||
272 | * @param regex regular expression string. | ||
273 | * @param len length of the string. | ||
274 | * | ||
275 | * @return NFA, needs to be freed using GNUNET_REGEX_automaton_destroy. | ||
276 | */ | ||
277 | struct GNUNET_REGEX_Automaton * | ||
278 | GNUNET_REGEX_construct_nfa (const char *regex, const size_t len); | ||
279 | |||
280 | |||
281 | /** | ||
270 | * Function that get's passed to automaton traversal and is called before each | 282 | * Function that get's passed to automaton traversal and is called before each |
271 | * next traversal from state 's' using transition 't' to check if traversal | 283 | * next traversal from state 's' using transition 't' to check if traversal |
272 | * should proceed. Return GNUNET_NO to stop traversal or GNUNET_YES to continue. | 284 | * should proceed. Return GNUNET_NO to stop traversal or GNUNET_YES to continue. |
diff --git a/src/regex/regex_simulation_profiler_test.conf b/src/regex/regex_simulation_profiler_test.conf new file mode 100644 index 000000000..738fe7a81 --- /dev/null +++ b/src/regex/regex_simulation_profiler_test.conf | |||
@@ -0,0 +1,6 @@ | |||
1 | [regex-mysql] | ||
2 | DATABASE = regex | ||
3 | USER = gnunet | ||
4 | PASSWORD = | ||
5 | HOST = localhost | ||
6 | PORT = 3306 | ||
diff --git a/src/regex/test_regex_eval_api.c b/src/regex/test_regex_eval_api.c index ffe6d74bf..02aa1d01c 100644 --- a/src/regex/test_regex_eval_api.c +++ b/src/regex/test_regex_eval_api.c | |||
@@ -92,7 +92,7 @@ test_random (unsigned int rx_length, unsigned int max_str_len, | |||
92 | } | 92 | } |
93 | 93 | ||
94 | /* Match string using DFA */ | 94 | /* Match string using DFA */ |
95 | dfa = GNUNET_REGEX_construct_dfa (rand_rx, strlen (rand_rx)); | 95 | dfa = GNUNET_REGEX_construct_dfa (rand_rx, strlen (rand_rx), 0); |
96 | if (NULL == dfa) | 96 | if (NULL == dfa) |
97 | { | 97 | { |
98 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Constructing DFA failed\n"); | 98 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Constructing DFA failed\n"); |
@@ -123,7 +123,7 @@ test_random (unsigned int rx_length, unsigned int max_str_len, | |||
123 | 123 | ||
124 | /* Match canonical regex */ | 124 | /* Match canonical regex */ |
125 | dfa = | 125 | dfa = |
126 | GNUNET_REGEX_construct_dfa (canonical_regex, strlen (canonical_regex)); | 126 | GNUNET_REGEX_construct_dfa (canonical_regex, strlen (canonical_regex), 0); |
127 | if (NULL == dfa) | 127 | if (NULL == dfa) |
128 | { | 128 | { |
129 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Constructing DFA failed\n"); | 129 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Constructing DFA failed\n"); |
@@ -333,12 +333,12 @@ main (int argc, char *argv[]) | |||
333 | GNUNET_REGEX_automaton_destroy (a); | 333 | GNUNET_REGEX_automaton_destroy (a); |
334 | 334 | ||
335 | /* DFA test */ | 335 | /* DFA test */ |
336 | a = GNUNET_REGEX_construct_dfa (rxstr[i].regex, strlen (rxstr[i].regex)); | 336 | a = GNUNET_REGEX_construct_dfa (rxstr[i].regex, strlen (rxstr[i].regex), 0); |
337 | check_dfa += test_automaton (a, &rx, &rxstr[i]); | 337 | check_dfa += test_automaton (a, &rx, &rxstr[i]); |
338 | check_proof = GNUNET_strdup (GNUNET_REGEX_get_canonical_regex (a)); | 338 | check_proof = GNUNET_strdup (GNUNET_REGEX_get_canonical_regex (a)); |
339 | GNUNET_REGEX_automaton_destroy (a); | 339 | GNUNET_REGEX_automaton_destroy (a); |
340 | 340 | ||
341 | a = GNUNET_REGEX_construct_dfa (check_proof, strlen (check_proof)); | 341 | a = GNUNET_REGEX_construct_dfa (check_proof, strlen (check_proof), 0); |
342 | check_dfa += test_automaton (a, &rx, &rxstr[i]); | 342 | check_dfa += test_automaton (a, &rx, &rxstr[i]); |
343 | GNUNET_REGEX_automaton_destroy (a); | 343 | GNUNET_REGEX_automaton_destroy (a); |
344 | if (0 != check_dfa) | 344 | if (0 != check_dfa) |
diff --git a/src/regex/test_regex_graph_api.c b/src/regex/test_regex_graph_api.c index b57c79edd..d429025d9 100644 --- a/src/regex/test_regex_graph_api.c +++ b/src/regex/test_regex_graph_api.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <time.h> | 26 | #include <time.h> |
27 | #include "platform.h" | 27 | #include "platform.h" |
28 | #include "gnunet_regex_lib.h" | 28 | #include "gnunet_regex_lib.h" |
29 | #include "regex_internal.h" | ||
29 | 30 | ||
30 | #define KEEP_FILES 0 | 31 | #define KEEP_FILES 0 |
31 | 32 | ||
@@ -134,19 +135,19 @@ main (int argc, char *argv[]) | |||
134 | 135 | ||
135 | 136 | ||
136 | // Check DFA graph creation | 137 | // Check DFA graph creation |
137 | a = GNUNET_REGEX_construct_dfa (regex[i], strlen (regex[i])); | 138 | a = GNUNET_REGEX_construct_dfa (regex[i], strlen (regex[i]), 0); |
138 | GNUNET_REGEX_automaton_save_graph (a, filename, GNUNET_REGEX_GRAPH_DEFAULT); | 139 | GNUNET_REGEX_automaton_save_graph (a, filename, GNUNET_REGEX_GRAPH_DEFAULT); |
139 | GNUNET_REGEX_automaton_destroy (a); | 140 | GNUNET_REGEX_automaton_destroy (a); |
140 | error += filecheck (filename); | 141 | error += filecheck (filename); |
141 | 142 | ||
142 | a = GNUNET_REGEX_construct_dfa (regex[i], strlen (regex[i])); | 143 | a = GNUNET_REGEX_construct_dfa (regex[i], strlen (regex[i]), 0); |
143 | GNUNET_REGEX_automaton_save_graph (a, filename, | 144 | GNUNET_REGEX_automaton_save_graph (a, filename, |
144 | GNUNET_REGEX_GRAPH_DEFAULT | | 145 | GNUNET_REGEX_GRAPH_DEFAULT | |
145 | GNUNET_REGEX_GRAPH_VERBOSE); | 146 | GNUNET_REGEX_GRAPH_VERBOSE); |
146 | GNUNET_REGEX_automaton_destroy (a); | 147 | GNUNET_REGEX_automaton_destroy (a); |
147 | error += filecheck (filename); | 148 | error += filecheck (filename); |
148 | 149 | ||
149 | a = GNUNET_REGEX_construct_dfa (regex[i], strlen (regex[i])); | 150 | a = GNUNET_REGEX_construct_dfa (regex[i], strlen (regex[i]), 0); |
150 | GNUNET_REGEX_automaton_save_graph (a, filename, | 151 | GNUNET_REGEX_automaton_save_graph (a, filename, |
151 | GNUNET_REGEX_GRAPH_DEFAULT | | 152 | GNUNET_REGEX_GRAPH_DEFAULT | |
152 | GNUNET_REGEX_GRAPH_COLORING); | 153 | GNUNET_REGEX_GRAPH_COLORING); |
@@ -154,7 +155,7 @@ main (int argc, char *argv[]) | |||
154 | error += filecheck (filename); | 155 | error += filecheck (filename); |
155 | 156 | ||
156 | 157 | ||
157 | a = GNUNET_REGEX_construct_dfa (regex[i], strlen (regex[i])); | 158 | a = GNUNET_REGEX_construct_dfa (regex[i], strlen (regex[i]), 0); |
158 | GNUNET_REGEX_automaton_save_graph (a, filename, GNUNET_REGEX_GRAPH_DEFAULT); //| | 159 | GNUNET_REGEX_automaton_save_graph (a, filename, GNUNET_REGEX_GRAPH_DEFAULT); //| |
159 | // GNUNET_REGEX_GRAPH_VERBOSE | | 160 | // GNUNET_REGEX_GRAPH_VERBOSE | |
160 | //GNUNET_REGEX_GRAPH_COLORING); | 161 | //GNUNET_REGEX_GRAPH_COLORING); |
diff --git a/src/regex/test_regex_iterate_api.c b/src/regex/test_regex_iterate_api.c index 72847476b..09abdb7c7 100644 --- a/src/regex/test_regex_iterate_api.c +++ b/src/regex/test_regex_iterate_api.c | |||
@@ -188,7 +188,7 @@ main (int argc, char *argv[]) | |||
188 | ctx.string_count = rxstr[i].string_count; | 188 | ctx.string_count = rxstr[i].string_count; |
189 | ctx.strings = rxstr[i].strings; | 189 | ctx.strings = rxstr[i].strings; |
190 | ctx.match_count = 0; | 190 | ctx.match_count = 0; |
191 | dfa = GNUNET_REGEX_construct_dfa (rxstr[i].regex, strlen (rxstr[i].regex)); | 191 | dfa = GNUNET_REGEX_construct_dfa (rxstr[i].regex, strlen (rxstr[i].regex), 0); |
192 | GNUNET_REGEX_iterate_all_edges (dfa, key_iterator, &ctx); | 192 | GNUNET_REGEX_iterate_all_edges (dfa, key_iterator, &ctx); |
193 | num_transitions = | 193 | num_transitions = |
194 | GNUNET_REGEX_get_transition_count (dfa) - dfa->start->transition_count; | 194 | GNUNET_REGEX_get_transition_count (dfa) - dfa->start->transition_count; |
@@ -234,7 +234,7 @@ main (int argc, char *argv[]) | |||
234 | ctx.strings = rxstr[i].strings; | 234 | ctx.strings = rxstr[i].strings; |
235 | ctx.match_count = 0; | 235 | ctx.match_count = 0; |
236 | 236 | ||
237 | dfa = GNUNET_REGEX_construct_dfa (rxstr[i].regex, strlen (rxstr[i].regex)); | 237 | dfa = GNUNET_REGEX_construct_dfa (rxstr[i].regex, strlen (rxstr[i].regex), 0); |
238 | GNUNET_REGEX_dfa_add_multi_strides (NULL, dfa, 2); | 238 | GNUNET_REGEX_dfa_add_multi_strides (NULL, dfa, 2); |
239 | GNUNET_REGEX_iterate_all_edges (dfa, key_iterator, &ctx); | 239 | GNUNET_REGEX_iterate_all_edges (dfa, key_iterator, &ctx); |
240 | 240 | ||
diff --git a/src/regex/test_regex_proofs.c b/src/regex/test_regex_proofs.c index 8ccbe00ad..a5e049fba 100644 --- a/src/regex/test_regex_proofs.c +++ b/src/regex/test_regex_proofs.c | |||
@@ -46,10 +46,10 @@ test_proof (const char *regex) | |||
46 | char *c_rx1; | 46 | char *c_rx1; |
47 | const char *c_rx2; | 47 | const char *c_rx2; |
48 | 48 | ||
49 | dfa = GNUNET_REGEX_construct_dfa (regex, strlen (regex)); | 49 | dfa = GNUNET_REGEX_construct_dfa (regex, strlen (regex), 1); |
50 | c_rx1 = GNUNET_strdup (GNUNET_REGEX_get_canonical_regex (dfa)); | 50 | c_rx1 = GNUNET_strdup (GNUNET_REGEX_get_canonical_regex (dfa)); |
51 | GNUNET_REGEX_automaton_destroy (dfa); | 51 | GNUNET_REGEX_automaton_destroy (dfa); |
52 | dfa = GNUNET_REGEX_construct_dfa (c_rx1, strlen (c_rx1)); | 52 | dfa = GNUNET_REGEX_construct_dfa (c_rx1, strlen (c_rx1), 1); |
53 | c_rx2 = GNUNET_REGEX_get_canonical_regex (dfa); | 53 | c_rx2 = GNUNET_REGEX_get_canonical_regex (dfa); |
54 | 54 | ||
55 | error = (0 == strcmp (c_rx1, c_rx2)) ? 0 : 1; | 55 | error = (0 == strcmp (c_rx1, c_rx2)) ? 0 : 1; |
@@ -126,8 +126,8 @@ test_proofs_static (void) | |||
126 | 126 | ||
127 | for (i = 0; i < 8; i += 2) | 127 | for (i = 0; i < 8; i += 2) |
128 | { | 128 | { |
129 | dfa1 = GNUNET_REGEX_construct_dfa (regex[i], strlen (regex[i])); | 129 | dfa1 = GNUNET_REGEX_construct_dfa (regex[i], strlen (regex[i]), 1); |
130 | dfa2 = GNUNET_REGEX_construct_dfa (regex[i + 1], strlen (regex[i + 1])); | 130 | dfa2 = GNUNET_REGEX_construct_dfa (regex[i + 1], strlen (regex[i + 1]), 1); |
131 | 131 | ||
132 | canon_rx1 = GNUNET_REGEX_get_canonical_regex (dfa1); | 132 | canon_rx1 = GNUNET_REGEX_get_canonical_regex (dfa1); |
133 | canon_rx2 = GNUNET_REGEX_get_canonical_regex (dfa2); | 133 | canon_rx2 = GNUNET_REGEX_get_canonical_regex (dfa2); |