aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMaximilian Szengel <gnunet@maxsz.de>2012-10-26 14:33:59 +0000
committerMaximilian Szengel <gnunet@maxsz.de>2012-10-26 14:33:59 +0000
commit7a741b2fa8c96076f8b05a2d08e4b6b3ba78360b (patch)
treebd6c9d2d20fe9fb8f405075e929b3056cdd67736
parent993a74c191842519ad6c82216fb7a0ee2bb09456 (diff)
downloadgnunet-7a741b2fa8c96076f8b05a2d08e4b6b3ba78360b.tar.gz
gnunet-7a741b2fa8c96076f8b05a2d08e4b6b3ba78360b.zip
- Added path compression parameter to DFA construction API
- Moved NFA construction to internal header - Added regex simulation profiler (for profiling the NFA, that results by merging several DFAs in the DHT, in a database)
-rw-r--r--src/include/gnunet_regex_lib.h30
-rw-r--r--src/regex/Makefile.am14
-rw-r--r--src/regex/gnunet-regex-simulation-profiler.c519
-rw-r--r--src/regex/regex.c30
-rw-r--r--src/regex/regex_internal.h12
-rw-r--r--src/regex/regex_simulation_profiler_test.conf6
-rw-r--r--src/regex/test_regex_eval_api.c8
-rw-r--r--src/regex/test_regex_graph_api.c9
-rw-r--r--src/regex/test_regex_iterate_api.c4
-rw-r--r--src/regex/test_regex_proofs.c8
10 files changed, 600 insertions, 40 deletions
diff --git a/src/include/gnunet_regex_lib.h b/src/include/gnunet_regex_lib.h
index 87718b2f0..d7815461c 100644
--- a/src/include/gnunet_regex_lib.h
+++ b/src/include/gnunet_regex_lib.h
@@ -41,7 +41,7 @@ extern "C"
41/** 41/**
42 * Constant for how many bits the initial string regex should have. 42 * Constant for how many bits the initial string regex should have.
43 */ 43 */
44#define GNUNET_REGEX_INITIAL_BITS 24 44#define GNUNET_REGEX_INITIAL_BYTES 24
45 45
46 46
47/** 47/**
@@ -80,27 +80,25 @@ struct GNUNET_REGEX_Edge
80 80
81 81
82/** 82/**
83 * Construct an NFA by parsing the regex string of length 'len'.
84 *
85 * @param regex regular expression string.
86 * @param len length of the string.
87 *
88 * @return NFA, needs to be freed using GNUNET_REGEX_destroy_automaton.
89 */
90struct GNUNET_REGEX_Automaton *
91GNUNET_REGEX_construct_nfa (const char *regex, const size_t len);
92
93
94/**
95 * Construct DFA for the given 'regex' of length 'len'. 83 * Construct DFA for the given 'regex' of length 'len'.
96 * 84 *
85 * Path compression means, that for example a DFA o -> a -> b -> c -> o will be
86 * compressed to o -> abc -> o. Note that this parameter influences the
87 * non-determinism of states of the resulting NFA in the DHT (number of outgoing
88 * edges with the same label). For example for an application that stores IPv4
89 * addresses as bitstrings it could make sense to limit the path compression to
90 * 4 or 8.
91 *
97 * @param regex regular expression string. 92 * @param regex regular expression string.
98 * @param len length of the regular expression. 93 * @param len length of the regular expression.
99 * 94 * @param max_path_len limit the path compression length to the
100 * @return DFA, needs to be freed using GNUNET_REGEX_destroy_automaton. 95 * given value. If set to 1, no path compression is applied. Set to 0 for
96 * maximal possible path compression (generally not desireable).
97 * @return DFA, needs to be freed using GNUNET_REGEX_automaton_destroy.
101 */ 98 */
102struct GNUNET_REGEX_Automaton * 99struct GNUNET_REGEX_Automaton *
103GNUNET_REGEX_construct_dfa (const char *regex, const size_t len); 100GNUNET_REGEX_construct_dfa (const char *regex, const size_t len,
101 int max_path_len);
104 102
105 103
106/** 104/**
diff --git a/src/regex/Makefile.am b/src/regex/Makefile.am
index 212e2ffcd..fe882a1e9 100644
--- a/src/regex/Makefile.am
+++ b/src/regex/Makefile.am
@@ -19,6 +19,20 @@ libgnunetregex_la_LDFLAGS = \
19 $(GN_LIB_LDFLAGS) \ 19 $(GN_LIB_LDFLAGS) \
20 -version-info 0:0:0 20 -version-info 0:0:0
21 21
22if HAVE_MYSQL
23noinst_PROGRAMS = \
24gnunet-regex-simulation-profiler
25
26gnunet_regex_simulation_profiler_SOURCES = \
27 gnunet-regex-simulation-profiler.c
28gnunet_regex_simulation_profiler_LDADD = \
29 $(top_builddir)/src/util/libgnunetutil.la \
30 $(top_builddir)/src/regex/libgnunetregex.la \
31 $(top_builddir)/src/mysql/libgnunetmysql.la
32gnunet_regex_simulation_profiler_DEPENDENCIES = \
33 libgnunetregex.la
34endif
35
22check_PROGRAMS = \ 36check_PROGRAMS = \
23 test_regex_eval_api \ 37 test_regex_eval_api \
24 test_regex_iterate_api \ 38 test_regex_iterate_api \
diff --git a/src/regex/gnunet-regex-simulation-profiler.c b/src/regex/gnunet-regex-simulation-profiler.c
new file mode 100644
index 000000000..14d0d6b07
--- /dev/null
+++ b/src/regex/gnunet-regex-simulation-profiler.c
@@ -0,0 +1,519 @@
1/*
2 This file is part of GNUnet.
3 (C) 2011, 2012 Christian Grothoff (and other contributing authors)
4
5 GNUnet is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 3, or (at your
8 option) any later version.
9
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with GNUnet; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
19*/
20
21/**
22 * @file regex/gnunet-regex-simulation-profiler.c
23 * @brief Regex profiler that dumps all DFAs into a database instead of
24 * using the DHT (with mesh).
25 * @author Maximilian Szengel
26 *
27 */
28
29#include "platform.h"
30#include "gnunet_util_lib.h"
31#include "gnunet_regex_lib.h"
32#include "gnunet_mysql_lib.h"
33
34/**
35 * Simple struct to keep track of progress, and print a
36 * nice little percentage meter for long running tasks.
37 */
38struct ProgressMeter
39{
40 unsigned int total;
41
42 unsigned int modnum;
43
44 unsigned int dotnum;
45
46 unsigned int completed;
47
48 int print;
49
50 char *startup_string;
51};
52
53
54/**
55 * Handle for the progress meter
56 */
57static struct ProgressMeter *meter;
58
59/**
60 * Abort task identifier.
61 */
62static GNUNET_SCHEDULER_TaskIdentifier abort_task;
63
64/**
65 * Scan task identifier;
66 */
67static GNUNET_SCHEDULER_TaskIdentifier scan_task;
68
69/**
70 * Global testing status.
71 */
72static int result;
73
74/**
75 * MySQL context.
76 */
77static struct GNUNET_MYSQL_Context *mysql_ctx;
78
79/**
80 * MySQL table name.
81 */
82static char *table_name;
83
84/**
85 * Policy dir containing files that contain policies.
86 */
87static char *policy_dir;
88
89/**
90 * Number of policy files.
91 */
92static unsigned int num_policy_files;
93
94/**
95 * Number of policies.
96 */
97static unsigned int num_policies;
98
99/**
100 * Maximal path compression length.
101 */
102static unsigned int max_path_compression;
103
104
105/**
106 * Create a meter to keep track of the progress of some task.
107 *
108 * @param total the total number of items to complete
109 * @param start_string a string to prefix the meter with (if printing)
110 * @param print GNUNET_YES to print the meter, GNUNET_NO to count
111 * internally only
112 *
113 * @return the progress meter
114 */
115static struct ProgressMeter *
116create_meter (unsigned int total, char *start_string, int print)
117{
118 struct ProgressMeter *ret;
119
120 ret = GNUNET_malloc (sizeof (struct ProgressMeter));
121 ret->print = print;
122 ret->total = total;
123 ret->modnum = total / 4;
124 if (ret->modnum == 0) /* Divide by zero check */
125 ret->modnum = 1;
126 ret->dotnum = (total / 50) + 1;
127 if (start_string != NULL)
128 ret->startup_string = GNUNET_strdup (start_string);
129 else
130 ret->startup_string = GNUNET_strdup ("");
131
132 return ret;
133}
134
135
136/**
137 * Update progress meter (increment by one).
138 *
139 * @param meter the meter to update and print info for
140 *
141 * @return GNUNET_YES if called the total requested,
142 * GNUNET_NO if more items expected
143 */
144static int
145update_meter (struct ProgressMeter *meter)
146{
147 if (meter->print == GNUNET_YES)
148 {
149 if (meter->completed % meter->modnum == 0)
150 {
151 if (meter->completed == 0)
152 {
153 FPRINTF (stdout, "%sProgress: [0%%", meter->startup_string);
154 }
155 else
156 FPRINTF (stdout, "%d%%",
157 (int) (((float) meter->completed / meter->total) * 100));
158 }
159 else if (meter->completed % meter->dotnum == 0)
160 FPRINTF (stdout, "%s", ".");
161
162 if (meter->completed + 1 == meter->total)
163 FPRINTF (stdout, "%d%%]\n", 100);
164 fflush (stdout);
165 }
166 meter->completed++;
167
168 if (meter->completed == meter->total)
169 return GNUNET_YES;
170 if (meter->completed > meter->total)
171 GNUNET_log (GNUNET_ERROR_TYPE_WARNING, "Progress meter overflow!!\n");
172 return GNUNET_NO;
173}
174
175
176/**
177 * Reset progress meter.
178 *
179 * @param meter the meter to reset
180 *
181 * @return GNUNET_YES if meter reset,
182 * GNUNET_SYSERR on error
183 */
184static int
185reset_meter (struct ProgressMeter *meter)
186{
187 if (meter == NULL)
188 return GNUNET_SYSERR;
189
190 meter->completed = 0;
191 return GNUNET_YES;
192}
193
194
195/**
196 * Release resources for meter
197 *
198 * @param meter the meter to free
199 */
200static void
201free_meter (struct ProgressMeter *meter)
202{
203 GNUNET_free_non_null (meter->startup_string);
204 GNUNET_free (meter);
205}
206
207
208/**
209 * Shutdown task.
210 *
211 * @param cls NULL
212 * @param tc the task context
213 */
214static void
215do_shutdown (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc)
216{
217 if (NULL != mysql_ctx)
218 GNUNET_MYSQL_context_destroy (mysql_ctx);
219 if (NULL != meter)
220 free_meter (meter);
221
222 GNUNET_SCHEDULER_shutdown (); /* Stop scheduler to shutdown testbed run */
223}
224
225
226/**
227 * abort task to run on test timed out
228 *
229 * @param cls NULL
230 * @param tc the task context
231 */
232static void
233do_abort (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc)
234{
235 GNUNET_log (GNUNET_ERROR_TYPE_WARNING, "Aborting\n");
236 abort_task = GNUNET_SCHEDULER_NO_TASK;
237 GNUNET_SCHEDULER_cancel (scan_task);
238 scan_task = GNUNET_SCHEDULER_NO_TASK;
239 result = GNUNET_SYSERR;
240 GNUNET_SCHEDULER_add_now (&do_shutdown, NULL);
241}
242
243
244/**
245 * Iterator over all states that inserts each state into the MySQL db.
246 *
247 * @param cls closure.
248 * @param key hash for current state.
249 * @param proof proof for current state.
250 * @param accepting GNUNET_YES if this is an accepting state, GNUNET_NO if not.
251 * @param num_edges number of edges leaving current state.
252 * @param edges edges leaving current state.
253 */
254static void
255regex_iterator (void *cls,
256 const struct GNUNET_HashCode *key,
257 const char *proof,
258 int accepting,
259 unsigned int num_edges,
260 const struct GNUNET_REGEX_Edge *edges)
261{
262 char *stmt;
263 unsigned int i;
264
265 GNUNET_assert (NULL != mysql_ctx);
266
267 for (i = 0; i < num_edges; i++)
268 {
269 GNUNET_asprintf (&stmt,
270 "INSERT IGNORE INTO `%s` (`key`, `label`, `to_key`, `accepting`) VALUES ('%s', '%s', '%s', '%d');",
271 table_name, GNUNET_h2s_full (key), edges[i].label,
272 GNUNET_h2s_full (&edges[i].destination), accepting);
273
274 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Insert statement: %s\n", stmt);
275
276 if (GNUNET_OK != GNUNET_MYSQL_statement_run (mysql_ctx, stmt))
277 {
278 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
279 "Error executing mysql statement: %s\n", stmt);
280 }
281
282 GNUNET_free (stmt);
283 }
284}
285
286
287/**
288 * Announce a regex by creating the DFA and iterating over each state, inserting
289 * each state into a MySQL database.
290 *
291 * @param regex regular expression.
292 * @return GNUNET_OK on success, GNUNET_SYSERR on failure.
293 */
294static int
295announce_regex (const char *regex)
296{
297 struct GNUNET_REGEX_Automaton *dfa;
298
299 dfa = GNUNET_REGEX_construct_dfa (regex, strlen (regex), max_path_compression);
300
301 if (NULL == dfa)
302 {
303 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
304 "Failed to create DFA for regex %s\n", regex);
305 abort_task = GNUNET_SCHEDULER_add_now (&do_abort, NULL);
306 return GNUNET_SYSERR;
307 }
308
309 GNUNET_REGEX_iterate_all_edges (dfa, &regex_iterator, NULL);
310
311 GNUNET_REGEX_automaton_destroy (dfa);
312
313 return GNUNET_OK;
314}
315
316
317/**
318 * Function called with a filename.
319 *
320 * @param cls closure
321 * @param filename complete filename (absolute path)
322 * @return GNUNET_OK to continue to iterate,
323 * GNUNET_SYSERR to abort iteration with error!
324 */
325int
326policy_filename_cb (void *cls, const char *filename)
327{
328 char *regex;
329 char *data;
330 char *buf;
331 uint64_t filesize;
332 unsigned int offset;
333
334 GNUNET_assert (NULL != filename);
335
336 GNUNET_log (GNUNET_ERROR_TYPE_INFO,
337 "Announcing regexes from file %s\n",
338 filename);
339
340 if (GNUNET_YES != GNUNET_DISK_file_test (filename))
341 {
342 GNUNET_log (GNUNET_ERROR_TYPE_WARNING,
343 "Could not find policy file %s\n", filename);
344 return GNUNET_OK;
345 }
346 if (GNUNET_OK != GNUNET_DISK_file_size (filename, &filesize, GNUNET_YES, GNUNET_YES))
347 filesize = 0;
348 if (0 == filesize)
349 {
350 GNUNET_log (GNUNET_ERROR_TYPE_WARNING, "Policy file %s is empty.\n", filename);
351 return GNUNET_OK;
352 }
353 data = GNUNET_malloc (filesize);
354 if (filesize != GNUNET_DISK_fn_read (filename, data, filesize))
355 {
356 GNUNET_free (data);
357 GNUNET_log (GNUNET_ERROR_TYPE_WARNING, "Could not read policy file %s.\n",
358 filename);
359 return GNUNET_OK;
360 }
361
362 update_meter (meter);
363
364 buf = data;
365 offset = 0;
366 regex = NULL;
367 while (offset < (filesize - 1))
368 {
369 offset++;
370 if (((data[offset] == '\n')) && (buf != &data[offset]))
371 {
372 data[offset] = '\0';
373 regex = buf;
374 GNUNET_assert (NULL != regex);
375 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Announcing regex: %s\n",
376 regex);
377 num_policies++;
378
379 if (GNUNET_OK != announce_regex (regex))
380 {
381 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
382 "Could not announce regex %s\n", regex);
383 }
384
385 buf = &data[offset + 1];
386 }
387 else if ((data[offset] == '\n') || (data[offset] == '\0'))
388 buf = &data[offset + 1];
389 }
390 GNUNET_free (data);
391 return GNUNET_OK;
392}
393
394
395/**
396 * Iterate over files contained in policy_dir.
397 *
398 * @param cls NULL
399 * @param tc the task context
400 */
401static void
402do_directory_scan (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc)
403{
404 struct GNUNET_TIME_Absolute start_time;
405 struct GNUNET_TIME_Relative duration;
406
407 if (GNUNET_SCHEDULER_NO_TASK != abort_task)
408 GNUNET_SCHEDULER_cancel (abort_task);
409
410 meter = create_meter (num_policy_files, "Announcing policy files\n", GNUNET_YES);
411 start_time = GNUNET_TIME_absolute_get ();
412 GNUNET_DISK_directory_scan (policy_dir,
413 &policy_filename_cb,
414 NULL);
415 duration = GNUNET_TIME_absolute_get_duration (start_time);
416 reset_meter (meter);
417 free_meter (meter);
418 meter = NULL;
419
420 printf ("Announced %u files containing %u policies in %s\n",
421 num_policy_files, num_policies,
422 GNUNET_STRINGS_relative_time_to_string (duration, GNUNET_NO));
423
424 result = GNUNET_OK;
425 GNUNET_SCHEDULER_add_now (&do_shutdown, NULL);
426}
427
428
429/**
430 * Main function that will be run by the scheduler.
431 *
432 * @param cls closure
433 * @param args remaining command-line arguments
434 * @param cfgfile name of the configuration file used (for saving, can be NULL!)
435 * @param config configuration
436 */
437static void
438run (void *cls, char *const *args, const char *cfgfile,
439 const struct GNUNET_CONFIGURATION_Handle *config)
440{
441 if (NULL == args[0])
442 {
443 fprintf (stderr, _("No policy directory specified on command line. Exiting.\n"));
444 result = GNUNET_SYSERR;
445 return;
446 }
447 if (GNUNET_YES != GNUNET_DISK_directory_test (args[0]))
448 {
449 fprintf (stderr, _("Specified policies directory does not exist. Exiting.\n"));
450 result = GNUNET_SYSERR;
451 return;
452 }
453 policy_dir = args[0];
454
455 num_policy_files = GNUNET_DISK_directory_scan (policy_dir,
456 NULL,
457 NULL);
458 meter = NULL;
459
460 if (NULL == table_name)
461 {
462 GNUNET_log (GNUNET_ERROR_TYPE_WARNING, "No table name specified, using default \"NFA\".\n");
463 table_name = "NFA";
464 }
465
466 mysql_ctx = GNUNET_MYSQL_context_create (config, "regex-mysql");
467 if (NULL == mysql_ctx)
468 {
469 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Failed to create mysql context\n");
470 result = GNUNET_SYSERR;
471 return;
472 }
473
474 result = GNUNET_OK;
475
476 scan_task = GNUNET_SCHEDULER_add_now (&do_directory_scan, NULL);
477
478 abort_task =
479 GNUNET_SCHEDULER_add_delayed (GNUNET_TIME_relative_multiply
480 (GNUNET_TIME_UNIT_SECONDS, 10), &do_abort,
481 NULL);
482}
483
484
485/**
486 * Main function.
487 *
488 * @param argc argument count
489 * @param argv argument values
490 * @return 0 on success
491 */
492int
493main (int argc, char *const *argv)
494{
495 static const struct GNUNET_GETOPT_CommandLineOption options[] = {
496 {'t', "table", "TABLENAME",
497 gettext_noop ("name of the table to write DFAs"),
498 1, &GNUNET_GETOPT_set_string, &table_name},
499 {'p', "max-path-compression", "MAX_PATH_COMPRESSION",
500 gettext_noop ("maximum path compression length"),
501 1, &GNUNET_GETOPT_set_uint, &max_path_compression},
502 GNUNET_GETOPT_OPTION_END
503 };
504 int ret;
505
506 if (GNUNET_OK != GNUNET_STRINGS_get_utf8_args (argc, argv, &argc, &argv))
507 return 2;
508
509 result = GNUNET_SYSERR;
510 ret =
511 GNUNET_PROGRAM_run (argc, argv, "gnunet-regex-simulationprofiler [OPTIONS] policy-dir",
512 _("Profiler for regex library"),
513 options, &run, NULL);
514 if (GNUNET_OK != ret)
515 return ret;
516 if (GNUNET_OK != result)
517 return 1;
518 return 0;
519}
diff --git a/src/regex/regex.c b/src/regex/regex.c
index 637eac00d..3281ff469 100644
--- a/src/regex/regex.c
+++ b/src/regex/regex.c
@@ -1687,7 +1687,7 @@ dfa_compress_paths_helper (struct GNUNET_REGEX_Automaton *dfa,
1687// || cur->transition_count > 1 1687// || cur->transition_count > 1
1688 || GNUNET_YES == cur->marked) || (start != dfa->start && max_len > 0 && 1688 || GNUNET_YES == cur->marked) || (start != dfa->start && max_len > 0 &&
1689 max_len == strlen (label)) || 1689 max_len == strlen (label)) ||
1690 (start == dfa->start && GNUNET_REGEX_INITIAL_BITS == strlen (label)))) 1690 (start == dfa->start && GNUNET_REGEX_INITIAL_BYTES == strlen (label))))
1691 { 1691 {
1692 t = GNUNET_malloc (sizeof (struct GNUNET_REGEX_Transition)); 1692 t = GNUNET_malloc (sizeof (struct GNUNET_REGEX_Transition));
1693 t->label = GNUNET_strdup (label); 1693 t->label = GNUNET_strdup (label);
@@ -2482,17 +2482,26 @@ construct_dfa_states (struct GNUNET_REGEX_Context *ctx,
2482 } 2482 }
2483} 2483}
2484 2484
2485
2486/** 2485/**
2487 * Construct DFA for the given 'regex' of length 'len' 2486 * Construct DFA for the given 'regex' of length 'len'.
2488 * 2487 *
2489 * @param regex regular expression string 2488 * Path compression means, that for example a DFA o -> a -> b -> c -> o will be
2490 * @param len length of the regular expression 2489 * compressed to o -> abc -> o. Note that this parameter influences the
2490 * non-determinism of states of the resulting NFA in the DHT (number of outgoing
2491 * edges with the same label). For example for an application that stores IPv4
2492 * addresses as bitstrings it could make sense to limit the path compression to
2493 * 4 or 8.
2491 * 2494 *
2492 * @return DFA, needs to be freed using GNUNET_REGEX_destroy_automaton 2495 * @param regex regular expression string.
2496 * @param len length of the regular expression.
2497 * @param max_path_len limit the path compression length to the
2498 * given value. If set to 1, no path compression is applied. Set to 0 for
2499 * maximal possible path compression (generally not desireable).
2500 * @return DFA, needs to be freed using GNUNET_REGEX_automaton_destroy.
2493 */ 2501 */
2494struct GNUNET_REGEX_Automaton * 2502struct GNUNET_REGEX_Automaton *
2495GNUNET_REGEX_construct_dfa (const char *regex, const size_t len) 2503GNUNET_REGEX_construct_dfa (const char *regex, const size_t len,
2504 int max_path_len)
2496{ 2505{
2497 struct GNUNET_REGEX_Context ctx; 2506 struct GNUNET_REGEX_Context ctx;
2498 struct GNUNET_REGEX_Automaton *dfa; 2507 struct GNUNET_REGEX_Automaton *dfa;
@@ -2535,7 +2544,8 @@ GNUNET_REGEX_construct_dfa (const char *regex, const size_t len)
2535 automaton_create_proofs (dfa); 2544 automaton_create_proofs (dfa);
2536 2545
2537 // Compress DFA paths 2546 // Compress DFA paths
2538 dfa_compress_paths (&ctx, dfa, 8); 2547 if (1 != max_path_len)
2548 dfa_compress_paths (&ctx, dfa, max_path_len);
2539 2549
2540 // Add strides to DFA 2550 // Add strides to DFA
2541 //GNUNET_REGEX_dfa_add_multi_strides (&ctx, dfa, 2); 2551 //GNUNET_REGEX_dfa_add_multi_strides (&ctx, dfa, 2);
@@ -2770,7 +2780,7 @@ GNUNET_REGEX_get_first_key (const char *input_string, size_t string_len,
2770 2780
2771 size = 2781 size =
2772 string_len < 2782 string_len <
2773 GNUNET_REGEX_INITIAL_BITS ? string_len : GNUNET_REGEX_INITIAL_BITS; 2783 GNUNET_REGEX_INITIAL_BYTES ? string_len : GNUNET_REGEX_INITIAL_BYTES;
2774 2784
2775 if (NULL == input_string) 2785 if (NULL == input_string)
2776 { 2786 {
@@ -2931,7 +2941,7 @@ GNUNET_REGEX_iterate_all_edges (struct GNUNET_REGEX_Automaton *a,
2931 s->marked = GNUNET_NO; 2941 s->marked = GNUNET_NO;
2932 } 2942 }
2933 2943
2934 iterate_initial_edge (GNUNET_REGEX_INITIAL_BITS, GNUNET_REGEX_INITIAL_BITS, 2944 iterate_initial_edge (GNUNET_REGEX_INITIAL_BYTES, GNUNET_REGEX_INITIAL_BYTES,
2935 NULL, a->start, iterator, iterator_cls); 2945 NULL, a->start, iterator, iterator_cls);
2936} 2946}
2937 2947
diff --git a/src/regex/regex_internal.h b/src/regex/regex_internal.h
index 49e5dd022..21c498145 100644
--- a/src/regex/regex_internal.h
+++ b/src/regex/regex_internal.h
@@ -267,6 +267,18 @@ struct GNUNET_REGEX_Automaton
267 267
268 268
269/** 269/**
270 * Construct an NFA by parsing the regex string of length 'len'.
271 *
272 * @param regex regular expression string.
273 * @param len length of the string.
274 *
275 * @return NFA, needs to be freed using GNUNET_REGEX_automaton_destroy.
276 */
277struct GNUNET_REGEX_Automaton *
278GNUNET_REGEX_construct_nfa (const char *regex, const size_t len);
279
280
281/**
270 * Function that get's passed to automaton traversal and is called before each 282 * Function that get's passed to automaton traversal and is called before each
271 * next traversal from state 's' using transition 't' to check if traversal 283 * next traversal from state 's' using transition 't' to check if traversal
272 * should proceed. Return GNUNET_NO to stop traversal or GNUNET_YES to continue. 284 * should proceed. Return GNUNET_NO to stop traversal or GNUNET_YES to continue.
diff --git a/src/regex/regex_simulation_profiler_test.conf b/src/regex/regex_simulation_profiler_test.conf
new file mode 100644
index 000000000..738fe7a81
--- /dev/null
+++ b/src/regex/regex_simulation_profiler_test.conf
@@ -0,0 +1,6 @@
1[regex-mysql]
2DATABASE = regex
3USER = gnunet
4PASSWORD =
5HOST = localhost
6PORT = 3306
diff --git a/src/regex/test_regex_eval_api.c b/src/regex/test_regex_eval_api.c
index ffe6d74bf..02aa1d01c 100644
--- a/src/regex/test_regex_eval_api.c
+++ b/src/regex/test_regex_eval_api.c
@@ -92,7 +92,7 @@ test_random (unsigned int rx_length, unsigned int max_str_len,
92 } 92 }
93 93
94 /* Match string using DFA */ 94 /* Match string using DFA */
95 dfa = GNUNET_REGEX_construct_dfa (rand_rx, strlen (rand_rx)); 95 dfa = GNUNET_REGEX_construct_dfa (rand_rx, strlen (rand_rx), 0);
96 if (NULL == dfa) 96 if (NULL == dfa)
97 { 97 {
98 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Constructing DFA failed\n"); 98 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Constructing DFA failed\n");
@@ -123,7 +123,7 @@ test_random (unsigned int rx_length, unsigned int max_str_len,
123 123
124 /* Match canonical regex */ 124 /* Match canonical regex */
125 dfa = 125 dfa =
126 GNUNET_REGEX_construct_dfa (canonical_regex, strlen (canonical_regex)); 126 GNUNET_REGEX_construct_dfa (canonical_regex, strlen (canonical_regex), 0);
127 if (NULL == dfa) 127 if (NULL == dfa)
128 { 128 {
129 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Constructing DFA failed\n"); 129 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Constructing DFA failed\n");
@@ -333,12 +333,12 @@ main (int argc, char *argv[])
333 GNUNET_REGEX_automaton_destroy (a); 333 GNUNET_REGEX_automaton_destroy (a);
334 334
335 /* DFA test */ 335 /* DFA test */
336 a = GNUNET_REGEX_construct_dfa (rxstr[i].regex, strlen (rxstr[i].regex)); 336 a = GNUNET_REGEX_construct_dfa (rxstr[i].regex, strlen (rxstr[i].regex), 0);
337 check_dfa += test_automaton (a, &rx, &rxstr[i]); 337 check_dfa += test_automaton (a, &rx, &rxstr[i]);
338 check_proof = GNUNET_strdup (GNUNET_REGEX_get_canonical_regex (a)); 338 check_proof = GNUNET_strdup (GNUNET_REGEX_get_canonical_regex (a));
339 GNUNET_REGEX_automaton_destroy (a); 339 GNUNET_REGEX_automaton_destroy (a);
340 340
341 a = GNUNET_REGEX_construct_dfa (check_proof, strlen (check_proof)); 341 a = GNUNET_REGEX_construct_dfa (check_proof, strlen (check_proof), 0);
342 check_dfa += test_automaton (a, &rx, &rxstr[i]); 342 check_dfa += test_automaton (a, &rx, &rxstr[i]);
343 GNUNET_REGEX_automaton_destroy (a); 343 GNUNET_REGEX_automaton_destroy (a);
344 if (0 != check_dfa) 344 if (0 != check_dfa)
diff --git a/src/regex/test_regex_graph_api.c b/src/regex/test_regex_graph_api.c
index b57c79edd..d429025d9 100644
--- a/src/regex/test_regex_graph_api.c
+++ b/src/regex/test_regex_graph_api.c
@@ -26,6 +26,7 @@
26#include <time.h> 26#include <time.h>
27#include "platform.h" 27#include "platform.h"
28#include "gnunet_regex_lib.h" 28#include "gnunet_regex_lib.h"
29#include "regex_internal.h"
29 30
30#define KEEP_FILES 0 31#define KEEP_FILES 0
31 32
@@ -134,19 +135,19 @@ main (int argc, char *argv[])
134 135
135 136
136 // Check DFA graph creation 137 // Check DFA graph creation
137 a = GNUNET_REGEX_construct_dfa (regex[i], strlen (regex[i])); 138 a = GNUNET_REGEX_construct_dfa (regex[i], strlen (regex[i]), 0);
138 GNUNET_REGEX_automaton_save_graph (a, filename, GNUNET_REGEX_GRAPH_DEFAULT); 139 GNUNET_REGEX_automaton_save_graph (a, filename, GNUNET_REGEX_GRAPH_DEFAULT);
139 GNUNET_REGEX_automaton_destroy (a); 140 GNUNET_REGEX_automaton_destroy (a);
140 error += filecheck (filename); 141 error += filecheck (filename);
141 142
142 a = GNUNET_REGEX_construct_dfa (regex[i], strlen (regex[i])); 143 a = GNUNET_REGEX_construct_dfa (regex[i], strlen (regex[i]), 0);
143 GNUNET_REGEX_automaton_save_graph (a, filename, 144 GNUNET_REGEX_automaton_save_graph (a, filename,
144 GNUNET_REGEX_GRAPH_DEFAULT | 145 GNUNET_REGEX_GRAPH_DEFAULT |
145 GNUNET_REGEX_GRAPH_VERBOSE); 146 GNUNET_REGEX_GRAPH_VERBOSE);
146 GNUNET_REGEX_automaton_destroy (a); 147 GNUNET_REGEX_automaton_destroy (a);
147 error += filecheck (filename); 148 error += filecheck (filename);
148 149
149 a = GNUNET_REGEX_construct_dfa (regex[i], strlen (regex[i])); 150 a = GNUNET_REGEX_construct_dfa (regex[i], strlen (regex[i]), 0);
150 GNUNET_REGEX_automaton_save_graph (a, filename, 151 GNUNET_REGEX_automaton_save_graph (a, filename,
151 GNUNET_REGEX_GRAPH_DEFAULT | 152 GNUNET_REGEX_GRAPH_DEFAULT |
152 GNUNET_REGEX_GRAPH_COLORING); 153 GNUNET_REGEX_GRAPH_COLORING);
@@ -154,7 +155,7 @@ main (int argc, char *argv[])
154 error += filecheck (filename); 155 error += filecheck (filename);
155 156
156 157
157 a = GNUNET_REGEX_construct_dfa (regex[i], strlen (regex[i])); 158 a = GNUNET_REGEX_construct_dfa (regex[i], strlen (regex[i]), 0);
158 GNUNET_REGEX_automaton_save_graph (a, filename, GNUNET_REGEX_GRAPH_DEFAULT); //| 159 GNUNET_REGEX_automaton_save_graph (a, filename, GNUNET_REGEX_GRAPH_DEFAULT); //|
159 // GNUNET_REGEX_GRAPH_VERBOSE | 160 // GNUNET_REGEX_GRAPH_VERBOSE |
160 //GNUNET_REGEX_GRAPH_COLORING); 161 //GNUNET_REGEX_GRAPH_COLORING);
diff --git a/src/regex/test_regex_iterate_api.c b/src/regex/test_regex_iterate_api.c
index 72847476b..09abdb7c7 100644
--- a/src/regex/test_regex_iterate_api.c
+++ b/src/regex/test_regex_iterate_api.c
@@ -188,7 +188,7 @@ main (int argc, char *argv[])
188 ctx.string_count = rxstr[i].string_count; 188 ctx.string_count = rxstr[i].string_count;
189 ctx.strings = rxstr[i].strings; 189 ctx.strings = rxstr[i].strings;
190 ctx.match_count = 0; 190 ctx.match_count = 0;
191 dfa = GNUNET_REGEX_construct_dfa (rxstr[i].regex, strlen (rxstr[i].regex)); 191 dfa = GNUNET_REGEX_construct_dfa (rxstr[i].regex, strlen (rxstr[i].regex), 0);
192 GNUNET_REGEX_iterate_all_edges (dfa, key_iterator, &ctx); 192 GNUNET_REGEX_iterate_all_edges (dfa, key_iterator, &ctx);
193 num_transitions = 193 num_transitions =
194 GNUNET_REGEX_get_transition_count (dfa) - dfa->start->transition_count; 194 GNUNET_REGEX_get_transition_count (dfa) - dfa->start->transition_count;
@@ -234,7 +234,7 @@ main (int argc, char *argv[])
234 ctx.strings = rxstr[i].strings; 234 ctx.strings = rxstr[i].strings;
235 ctx.match_count = 0; 235 ctx.match_count = 0;
236 236
237 dfa = GNUNET_REGEX_construct_dfa (rxstr[i].regex, strlen (rxstr[i].regex)); 237 dfa = GNUNET_REGEX_construct_dfa (rxstr[i].regex, strlen (rxstr[i].regex), 0);
238 GNUNET_REGEX_dfa_add_multi_strides (NULL, dfa, 2); 238 GNUNET_REGEX_dfa_add_multi_strides (NULL, dfa, 2);
239 GNUNET_REGEX_iterate_all_edges (dfa, key_iterator, &ctx); 239 GNUNET_REGEX_iterate_all_edges (dfa, key_iterator, &ctx);
240 240
diff --git a/src/regex/test_regex_proofs.c b/src/regex/test_regex_proofs.c
index 8ccbe00ad..a5e049fba 100644
--- a/src/regex/test_regex_proofs.c
+++ b/src/regex/test_regex_proofs.c
@@ -46,10 +46,10 @@ test_proof (const char *regex)
46 char *c_rx1; 46 char *c_rx1;
47 const char *c_rx2; 47 const char *c_rx2;
48 48
49 dfa = GNUNET_REGEX_construct_dfa (regex, strlen (regex)); 49 dfa = GNUNET_REGEX_construct_dfa (regex, strlen (regex), 1);
50 c_rx1 = GNUNET_strdup (GNUNET_REGEX_get_canonical_regex (dfa)); 50 c_rx1 = GNUNET_strdup (GNUNET_REGEX_get_canonical_regex (dfa));
51 GNUNET_REGEX_automaton_destroy (dfa); 51 GNUNET_REGEX_automaton_destroy (dfa);
52 dfa = GNUNET_REGEX_construct_dfa (c_rx1, strlen (c_rx1)); 52 dfa = GNUNET_REGEX_construct_dfa (c_rx1, strlen (c_rx1), 1);
53 c_rx2 = GNUNET_REGEX_get_canonical_regex (dfa); 53 c_rx2 = GNUNET_REGEX_get_canonical_regex (dfa);
54 54
55 error = (0 == strcmp (c_rx1, c_rx2)) ? 0 : 1; 55 error = (0 == strcmp (c_rx1, c_rx2)) ? 0 : 1;
@@ -126,8 +126,8 @@ test_proofs_static (void)
126 126
127 for (i = 0; i < 8; i += 2) 127 for (i = 0; i < 8; i += 2)
128 { 128 {
129 dfa1 = GNUNET_REGEX_construct_dfa (regex[i], strlen (regex[i])); 129 dfa1 = GNUNET_REGEX_construct_dfa (regex[i], strlen (regex[i]), 1);
130 dfa2 = GNUNET_REGEX_construct_dfa (regex[i + 1], strlen (regex[i + 1])); 130 dfa2 = GNUNET_REGEX_construct_dfa (regex[i + 1], strlen (regex[i + 1]), 1);
131 131
132 canon_rx1 = GNUNET_REGEX_get_canonical_regex (dfa1); 132 canon_rx1 = GNUNET_REGEX_get_canonical_regex (dfa1);
133 canon_rx2 = GNUNET_REGEX_get_canonical_regex (dfa2); 133 canon_rx2 = GNUNET_REGEX_get_canonical_regex (dfa2);