diff options
author | Maximilian Szengel <gnunet@maxsz.de> | 2012-10-26 14:33:59 +0000 |
---|---|---|
committer | Maximilian Szengel <gnunet@maxsz.de> | 2012-10-26 14:33:59 +0000 |
commit | 7a741b2fa8c96076f8b05a2d08e4b6b3ba78360b (patch) | |
tree | bd6c9d2d20fe9fb8f405075e929b3056cdd67736 /src/regex/gnunet-regex-simulation-profiler.c | |
parent | 993a74c191842519ad6c82216fb7a0ee2bb09456 (diff) | |
download | gnunet-7a741b2fa8c96076f8b05a2d08e4b6b3ba78360b.tar.gz gnunet-7a741b2fa8c96076f8b05a2d08e4b6b3ba78360b.zip |
- Added path compression parameter to DFA construction API
- Moved NFA construction to internal header
- Added regex simulation profiler (for profiling the NFA, that results by merging
several DFAs in the DHT, in a database)
Diffstat (limited to 'src/regex/gnunet-regex-simulation-profiler.c')
-rw-r--r-- | src/regex/gnunet-regex-simulation-profiler.c | 519 |
1 files changed, 519 insertions, 0 deletions
diff --git a/src/regex/gnunet-regex-simulation-profiler.c b/src/regex/gnunet-regex-simulation-profiler.c new file mode 100644 index 000000000..14d0d6b07 --- /dev/null +++ b/src/regex/gnunet-regex-simulation-profiler.c | |||
@@ -0,0 +1,519 @@ | |||
1 | /* | ||
2 | This file is part of GNUnet. | ||
3 | (C) 2011, 2012 Christian Grothoff (and other contributing authors) | ||
4 | |||
5 | GNUnet is free software; you can redistribute it and/or modify | ||
6 | it under the terms of the GNU General Public License as published | ||
7 | by the Free Software Foundation; either version 3, or (at your | ||
8 | option) any later version. | ||
9 | |||
10 | GNUnet is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU General Public License | ||
16 | along with GNUnet; see the file COPYING. If not, write to the | ||
17 | Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
18 | Boston, MA 02111-1307, USA. | ||
19 | */ | ||
20 | |||
21 | /** | ||
22 | * @file regex/gnunet-regex-simulation-profiler.c | ||
23 | * @brief Regex profiler that dumps all DFAs into a database instead of | ||
24 | * using the DHT (with mesh). | ||
25 | * @author Maximilian Szengel | ||
26 | * | ||
27 | */ | ||
28 | |||
29 | #include "platform.h" | ||
30 | #include "gnunet_util_lib.h" | ||
31 | #include "gnunet_regex_lib.h" | ||
32 | #include "gnunet_mysql_lib.h" | ||
33 | |||
34 | /** | ||
35 | * Simple struct to keep track of progress, and print a | ||
36 | * nice little percentage meter for long running tasks. | ||
37 | */ | ||
38 | struct ProgressMeter | ||
39 | { | ||
40 | unsigned int total; | ||
41 | |||
42 | unsigned int modnum; | ||
43 | |||
44 | unsigned int dotnum; | ||
45 | |||
46 | unsigned int completed; | ||
47 | |||
48 | int print; | ||
49 | |||
50 | char *startup_string; | ||
51 | }; | ||
52 | |||
53 | |||
54 | /** | ||
55 | * Handle for the progress meter | ||
56 | */ | ||
57 | static struct ProgressMeter *meter; | ||
58 | |||
59 | /** | ||
60 | * Abort task identifier. | ||
61 | */ | ||
62 | static GNUNET_SCHEDULER_TaskIdentifier abort_task; | ||
63 | |||
64 | /** | ||
65 | * Scan task identifier; | ||
66 | */ | ||
67 | static GNUNET_SCHEDULER_TaskIdentifier scan_task; | ||
68 | |||
69 | /** | ||
70 | * Global testing status. | ||
71 | */ | ||
72 | static int result; | ||
73 | |||
74 | /** | ||
75 | * MySQL context. | ||
76 | */ | ||
77 | static struct GNUNET_MYSQL_Context *mysql_ctx; | ||
78 | |||
79 | /** | ||
80 | * MySQL table name. | ||
81 | */ | ||
82 | static char *table_name; | ||
83 | |||
84 | /** | ||
85 | * Policy dir containing files that contain policies. | ||
86 | */ | ||
87 | static char *policy_dir; | ||
88 | |||
89 | /** | ||
90 | * Number of policy files. | ||
91 | */ | ||
92 | static unsigned int num_policy_files; | ||
93 | |||
94 | /** | ||
95 | * Number of policies. | ||
96 | */ | ||
97 | static unsigned int num_policies; | ||
98 | |||
99 | /** | ||
100 | * Maximal path compression length. | ||
101 | */ | ||
102 | static unsigned int max_path_compression; | ||
103 | |||
104 | |||
105 | /** | ||
106 | * Create a meter to keep track of the progress of some task. | ||
107 | * | ||
108 | * @param total the total number of items to complete | ||
109 | * @param start_string a string to prefix the meter with (if printing) | ||
110 | * @param print GNUNET_YES to print the meter, GNUNET_NO to count | ||
111 | * internally only | ||
112 | * | ||
113 | * @return the progress meter | ||
114 | */ | ||
115 | static struct ProgressMeter * | ||
116 | create_meter (unsigned int total, char *start_string, int print) | ||
117 | { | ||
118 | struct ProgressMeter *ret; | ||
119 | |||
120 | ret = GNUNET_malloc (sizeof (struct ProgressMeter)); | ||
121 | ret->print = print; | ||
122 | ret->total = total; | ||
123 | ret->modnum = total / 4; | ||
124 | if (ret->modnum == 0) /* Divide by zero check */ | ||
125 | ret->modnum = 1; | ||
126 | ret->dotnum = (total / 50) + 1; | ||
127 | if (start_string != NULL) | ||
128 | ret->startup_string = GNUNET_strdup (start_string); | ||
129 | else | ||
130 | ret->startup_string = GNUNET_strdup (""); | ||
131 | |||
132 | return ret; | ||
133 | } | ||
134 | |||
135 | |||
136 | /** | ||
137 | * Update progress meter (increment by one). | ||
138 | * | ||
139 | * @param meter the meter to update and print info for | ||
140 | * | ||
141 | * @return GNUNET_YES if called the total requested, | ||
142 | * GNUNET_NO if more items expected | ||
143 | */ | ||
144 | static int | ||
145 | update_meter (struct ProgressMeter *meter) | ||
146 | { | ||
147 | if (meter->print == GNUNET_YES) | ||
148 | { | ||
149 | if (meter->completed % meter->modnum == 0) | ||
150 | { | ||
151 | if (meter->completed == 0) | ||
152 | { | ||
153 | FPRINTF (stdout, "%sProgress: [0%%", meter->startup_string); | ||
154 | } | ||
155 | else | ||
156 | FPRINTF (stdout, "%d%%", | ||
157 | (int) (((float) meter->completed / meter->total) * 100)); | ||
158 | } | ||
159 | else if (meter->completed % meter->dotnum == 0) | ||
160 | FPRINTF (stdout, "%s", "."); | ||
161 | |||
162 | if (meter->completed + 1 == meter->total) | ||
163 | FPRINTF (stdout, "%d%%]\n", 100); | ||
164 | fflush (stdout); | ||
165 | } | ||
166 | meter->completed++; | ||
167 | |||
168 | if (meter->completed == meter->total) | ||
169 | return GNUNET_YES; | ||
170 | if (meter->completed > meter->total) | ||
171 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, "Progress meter overflow!!\n"); | ||
172 | return GNUNET_NO; | ||
173 | } | ||
174 | |||
175 | |||
176 | /** | ||
177 | * Reset progress meter. | ||
178 | * | ||
179 | * @param meter the meter to reset | ||
180 | * | ||
181 | * @return GNUNET_YES if meter reset, | ||
182 | * GNUNET_SYSERR on error | ||
183 | */ | ||
184 | static int | ||
185 | reset_meter (struct ProgressMeter *meter) | ||
186 | { | ||
187 | if (meter == NULL) | ||
188 | return GNUNET_SYSERR; | ||
189 | |||
190 | meter->completed = 0; | ||
191 | return GNUNET_YES; | ||
192 | } | ||
193 | |||
194 | |||
195 | /** | ||
196 | * Release resources for meter | ||
197 | * | ||
198 | * @param meter the meter to free | ||
199 | */ | ||
200 | static void | ||
201 | free_meter (struct ProgressMeter *meter) | ||
202 | { | ||
203 | GNUNET_free_non_null (meter->startup_string); | ||
204 | GNUNET_free (meter); | ||
205 | } | ||
206 | |||
207 | |||
208 | /** | ||
209 | * Shutdown task. | ||
210 | * | ||
211 | * @param cls NULL | ||
212 | * @param tc the task context | ||
213 | */ | ||
214 | static void | ||
215 | do_shutdown (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc) | ||
216 | { | ||
217 | if (NULL != mysql_ctx) | ||
218 | GNUNET_MYSQL_context_destroy (mysql_ctx); | ||
219 | if (NULL != meter) | ||
220 | free_meter (meter); | ||
221 | |||
222 | GNUNET_SCHEDULER_shutdown (); /* Stop scheduler to shutdown testbed run */ | ||
223 | } | ||
224 | |||
225 | |||
226 | /** | ||
227 | * abort task to run on test timed out | ||
228 | * | ||
229 | * @param cls NULL | ||
230 | * @param tc the task context | ||
231 | */ | ||
232 | static void | ||
233 | do_abort (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc) | ||
234 | { | ||
235 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, "Aborting\n"); | ||
236 | abort_task = GNUNET_SCHEDULER_NO_TASK; | ||
237 | GNUNET_SCHEDULER_cancel (scan_task); | ||
238 | scan_task = GNUNET_SCHEDULER_NO_TASK; | ||
239 | result = GNUNET_SYSERR; | ||
240 | GNUNET_SCHEDULER_add_now (&do_shutdown, NULL); | ||
241 | } | ||
242 | |||
243 | |||
244 | /** | ||
245 | * Iterator over all states that inserts each state into the MySQL db. | ||
246 | * | ||
247 | * @param cls closure. | ||
248 | * @param key hash for current state. | ||
249 | * @param proof proof for current state. | ||
250 | * @param accepting GNUNET_YES if this is an accepting state, GNUNET_NO if not. | ||
251 | * @param num_edges number of edges leaving current state. | ||
252 | * @param edges edges leaving current state. | ||
253 | */ | ||
254 | static void | ||
255 | regex_iterator (void *cls, | ||
256 | const struct GNUNET_HashCode *key, | ||
257 | const char *proof, | ||
258 | int accepting, | ||
259 | unsigned int num_edges, | ||
260 | const struct GNUNET_REGEX_Edge *edges) | ||
261 | { | ||
262 | char *stmt; | ||
263 | unsigned int i; | ||
264 | |||
265 | GNUNET_assert (NULL != mysql_ctx); | ||
266 | |||
267 | for (i = 0; i < num_edges; i++) | ||
268 | { | ||
269 | GNUNET_asprintf (&stmt, | ||
270 | "INSERT IGNORE INTO `%s` (`key`, `label`, `to_key`, `accepting`) VALUES ('%s', '%s', '%s', '%d');", | ||
271 | table_name, GNUNET_h2s_full (key), edges[i].label, | ||
272 | GNUNET_h2s_full (&edges[i].destination), accepting); | ||
273 | |||
274 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Insert statement: %s\n", stmt); | ||
275 | |||
276 | if (GNUNET_OK != GNUNET_MYSQL_statement_run (mysql_ctx, stmt)) | ||
277 | { | ||
278 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
279 | "Error executing mysql statement: %s\n", stmt); | ||
280 | } | ||
281 | |||
282 | GNUNET_free (stmt); | ||
283 | } | ||
284 | } | ||
285 | |||
286 | |||
287 | /** | ||
288 | * Announce a regex by creating the DFA and iterating over each state, inserting | ||
289 | * each state into a MySQL database. | ||
290 | * | ||
291 | * @param regex regular expression. | ||
292 | * @return GNUNET_OK on success, GNUNET_SYSERR on failure. | ||
293 | */ | ||
294 | static int | ||
295 | announce_regex (const char *regex) | ||
296 | { | ||
297 | struct GNUNET_REGEX_Automaton *dfa; | ||
298 | |||
299 | dfa = GNUNET_REGEX_construct_dfa (regex, strlen (regex), max_path_compression); | ||
300 | |||
301 | if (NULL == dfa) | ||
302 | { | ||
303 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
304 | "Failed to create DFA for regex %s\n", regex); | ||
305 | abort_task = GNUNET_SCHEDULER_add_now (&do_abort, NULL); | ||
306 | return GNUNET_SYSERR; | ||
307 | } | ||
308 | |||
309 | GNUNET_REGEX_iterate_all_edges (dfa, ®ex_iterator, NULL); | ||
310 | |||
311 | GNUNET_REGEX_automaton_destroy (dfa); | ||
312 | |||
313 | return GNUNET_OK; | ||
314 | } | ||
315 | |||
316 | |||
317 | /** | ||
318 | * Function called with a filename. | ||
319 | * | ||
320 | * @param cls closure | ||
321 | * @param filename complete filename (absolute path) | ||
322 | * @return GNUNET_OK to continue to iterate, | ||
323 | * GNUNET_SYSERR to abort iteration with error! | ||
324 | */ | ||
325 | int | ||
326 | policy_filename_cb (void *cls, const char *filename) | ||
327 | { | ||
328 | char *regex; | ||
329 | char *data; | ||
330 | char *buf; | ||
331 | uint64_t filesize; | ||
332 | unsigned int offset; | ||
333 | |||
334 | GNUNET_assert (NULL != filename); | ||
335 | |||
336 | GNUNET_log (GNUNET_ERROR_TYPE_INFO, | ||
337 | "Announcing regexes from file %s\n", | ||
338 | filename); | ||
339 | |||
340 | if (GNUNET_YES != GNUNET_DISK_file_test (filename)) | ||
341 | { | ||
342 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, | ||
343 | "Could not find policy file %s\n", filename); | ||
344 | return GNUNET_OK; | ||
345 | } | ||
346 | if (GNUNET_OK != GNUNET_DISK_file_size (filename, &filesize, GNUNET_YES, GNUNET_YES)) | ||
347 | filesize = 0; | ||
348 | if (0 == filesize) | ||
349 | { | ||
350 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, "Policy file %s is empty.\n", filename); | ||
351 | return GNUNET_OK; | ||
352 | } | ||
353 | data = GNUNET_malloc (filesize); | ||
354 | if (filesize != GNUNET_DISK_fn_read (filename, data, filesize)) | ||
355 | { | ||
356 | GNUNET_free (data); | ||
357 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, "Could not read policy file %s.\n", | ||
358 | filename); | ||
359 | return GNUNET_OK; | ||
360 | } | ||
361 | |||
362 | update_meter (meter); | ||
363 | |||
364 | buf = data; | ||
365 | offset = 0; | ||
366 | regex = NULL; | ||
367 | while (offset < (filesize - 1)) | ||
368 | { | ||
369 | offset++; | ||
370 | if (((data[offset] == '\n')) && (buf != &data[offset])) | ||
371 | { | ||
372 | data[offset] = '\0'; | ||
373 | regex = buf; | ||
374 | GNUNET_assert (NULL != regex); | ||
375 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Announcing regex: %s\n", | ||
376 | regex); | ||
377 | num_policies++; | ||
378 | |||
379 | if (GNUNET_OK != announce_regex (regex)) | ||
380 | { | ||
381 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
382 | "Could not announce regex %s\n", regex); | ||
383 | } | ||
384 | |||
385 | buf = &data[offset + 1]; | ||
386 | } | ||
387 | else if ((data[offset] == '\n') || (data[offset] == '\0')) | ||
388 | buf = &data[offset + 1]; | ||
389 | } | ||
390 | GNUNET_free (data); | ||
391 | return GNUNET_OK; | ||
392 | } | ||
393 | |||
394 | |||
395 | /** | ||
396 | * Iterate over files contained in policy_dir. | ||
397 | * | ||
398 | * @param cls NULL | ||
399 | * @param tc the task context | ||
400 | */ | ||
401 | static void | ||
402 | do_directory_scan (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc) | ||
403 | { | ||
404 | struct GNUNET_TIME_Absolute start_time; | ||
405 | struct GNUNET_TIME_Relative duration; | ||
406 | |||
407 | if (GNUNET_SCHEDULER_NO_TASK != abort_task) | ||
408 | GNUNET_SCHEDULER_cancel (abort_task); | ||
409 | |||
410 | meter = create_meter (num_policy_files, "Announcing policy files\n", GNUNET_YES); | ||
411 | start_time = GNUNET_TIME_absolute_get (); | ||
412 | GNUNET_DISK_directory_scan (policy_dir, | ||
413 | &policy_filename_cb, | ||
414 | NULL); | ||
415 | duration = GNUNET_TIME_absolute_get_duration (start_time); | ||
416 | reset_meter (meter); | ||
417 | free_meter (meter); | ||
418 | meter = NULL; | ||
419 | |||
420 | printf ("Announced %u files containing %u policies in %s\n", | ||
421 | num_policy_files, num_policies, | ||
422 | GNUNET_STRINGS_relative_time_to_string (duration, GNUNET_NO)); | ||
423 | |||
424 | result = GNUNET_OK; | ||
425 | GNUNET_SCHEDULER_add_now (&do_shutdown, NULL); | ||
426 | } | ||
427 | |||
428 | |||
429 | /** | ||
430 | * Main function that will be run by the scheduler. | ||
431 | * | ||
432 | * @param cls closure | ||
433 | * @param args remaining command-line arguments | ||
434 | * @param cfgfile name of the configuration file used (for saving, can be NULL!) | ||
435 | * @param config configuration | ||
436 | */ | ||
437 | static void | ||
438 | run (void *cls, char *const *args, const char *cfgfile, | ||
439 | const struct GNUNET_CONFIGURATION_Handle *config) | ||
440 | { | ||
441 | if (NULL == args[0]) | ||
442 | { | ||
443 | fprintf (stderr, _("No policy directory specified on command line. Exiting.\n")); | ||
444 | result = GNUNET_SYSERR; | ||
445 | return; | ||
446 | } | ||
447 | if (GNUNET_YES != GNUNET_DISK_directory_test (args[0])) | ||
448 | { | ||
449 | fprintf (stderr, _("Specified policies directory does not exist. Exiting.\n")); | ||
450 | result = GNUNET_SYSERR; | ||
451 | return; | ||
452 | } | ||
453 | policy_dir = args[0]; | ||
454 | |||
455 | num_policy_files = GNUNET_DISK_directory_scan (policy_dir, | ||
456 | NULL, | ||
457 | NULL); | ||
458 | meter = NULL; | ||
459 | |||
460 | if (NULL == table_name) | ||
461 | { | ||
462 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, "No table name specified, using default \"NFA\".\n"); | ||
463 | table_name = "NFA"; | ||
464 | } | ||
465 | |||
466 | mysql_ctx = GNUNET_MYSQL_context_create (config, "regex-mysql"); | ||
467 | if (NULL == mysql_ctx) | ||
468 | { | ||
469 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Failed to create mysql context\n"); | ||
470 | result = GNUNET_SYSERR; | ||
471 | return; | ||
472 | } | ||
473 | |||
474 | result = GNUNET_OK; | ||
475 | |||
476 | scan_task = GNUNET_SCHEDULER_add_now (&do_directory_scan, NULL); | ||
477 | |||
478 | abort_task = | ||
479 | GNUNET_SCHEDULER_add_delayed (GNUNET_TIME_relative_multiply | ||
480 | (GNUNET_TIME_UNIT_SECONDS, 10), &do_abort, | ||
481 | NULL); | ||
482 | } | ||
483 | |||
484 | |||
485 | /** | ||
486 | * Main function. | ||
487 | * | ||
488 | * @param argc argument count | ||
489 | * @param argv argument values | ||
490 | * @return 0 on success | ||
491 | */ | ||
492 | int | ||
493 | main (int argc, char *const *argv) | ||
494 | { | ||
495 | static const struct GNUNET_GETOPT_CommandLineOption options[] = { | ||
496 | {'t', "table", "TABLENAME", | ||
497 | gettext_noop ("name of the table to write DFAs"), | ||
498 | 1, &GNUNET_GETOPT_set_string, &table_name}, | ||
499 | {'p', "max-path-compression", "MAX_PATH_COMPRESSION", | ||
500 | gettext_noop ("maximum path compression length"), | ||
501 | 1, &GNUNET_GETOPT_set_uint, &max_path_compression}, | ||
502 | GNUNET_GETOPT_OPTION_END | ||
503 | }; | ||
504 | int ret; | ||
505 | |||
506 | if (GNUNET_OK != GNUNET_STRINGS_get_utf8_args (argc, argv, &argc, &argv)) | ||
507 | return 2; | ||
508 | |||
509 | result = GNUNET_SYSERR; | ||
510 | ret = | ||
511 | GNUNET_PROGRAM_run (argc, argv, "gnunet-regex-simulationprofiler [OPTIONS] policy-dir", | ||
512 | _("Profiler for regex library"), | ||
513 | options, &run, NULL); | ||
514 | if (GNUNET_OK != ret) | ||
515 | return ret; | ||
516 | if (GNUNET_OK != result) | ||
517 | return 1; | ||
518 | return 0; | ||
519 | } | ||