diff options
Diffstat (limited to 'src/service/regex')
27 files changed, 11147 insertions, 0 deletions
diff --git a/src/service/regex/.gitignore b/src/service/regex/.gitignore new file mode 100644 index 000000000..39dc89c88 --- /dev/null +++ b/src/service/regex/.gitignore | |||
@@ -0,0 +1,12 @@ | |||
1 | perf-regex | ||
2 | gnunet-daemon-regexprofiler | ||
3 | gnunet-regex-profiler | ||
4 | gnunet-regex-simulation-profiler | ||
5 | gnunet-service-regex | ||
6 | test_graph.dot | ||
7 | test_regex_api | ||
8 | test_regex_eval_api | ||
9 | test_regex_graph_api | ||
10 | test_regex_integration | ||
11 | test_regex_iterate_api | ||
12 | test_regex_proofs | ||
diff --git a/src/service/regex/Makefile.am b/src/service/regex/Makefile.am new file mode 100644 index 000000000..63c166a23 --- /dev/null +++ b/src/service/regex/Makefile.am | |||
@@ -0,0 +1,139 @@ | |||
1 | # This Makefile.am is in the public domain | ||
2 | AM_CPPFLAGS = -I$(top_srcdir)/src/include | ||
3 | |||
4 | if USE_COVERAGE | ||
5 | AM_CFLAGS = --coverage | ||
6 | endif | ||
7 | |||
8 | pkgcfgdir= $(pkgdatadir)/config.d/ | ||
9 | |||
10 | libexecdir= $(pkglibdir)/libexec/ | ||
11 | |||
12 | plugindir = $(libdir)/gnunet | ||
13 | |||
14 | pkgcfg_DATA = \ | ||
15 | regex.conf | ||
16 | |||
17 | libexec_PROGRAMS = \ | ||
18 | gnunet-service-regex \ | ||
19 | gnunet-daemon-regexprofiler | ||
20 | |||
21 | REGEX_INTERNAL = \ | ||
22 | regex_internal_lib.h \ | ||
23 | regex_internal.h regex_internal.c \ | ||
24 | regex_internal_dht.c | ||
25 | REGEX_INTERNAL_TEST = \ | ||
26 | $(REGEX_INTERNAL) \ | ||
27 | regex_test_lib.c regex_test_lib.h \ | ||
28 | regex_test_graph.c \ | ||
29 | regex_test_random.c | ||
30 | |||
31 | gnunet_service_regex_SOURCES = \ | ||
32 | $(REGEX_INTERNAL) gnunet-service-regex.c | ||
33 | gnunet_service_regex_LDADD = -lm \ | ||
34 | $(top_builddir)/src/plugin/regex/libgnunetregexblock.la \ | ||
35 | $(top_builddir)/src/service/dht/libgnunetdht.la \ | ||
36 | $(top_builddir)/src/service/statistics/libgnunetstatistics.la \ | ||
37 | $(top_builddir)/src/lib/util/libgnunetutil.la \ | ||
38 | $(GN_LIBINTL) | ||
39 | |||
40 | lib_LTLIBRARIES = \ | ||
41 | libgnunetregex.la | ||
42 | |||
43 | libgnunetregex_la_SOURCES = \ | ||
44 | regex_api_announce.c \ | ||
45 | regex_api_search.c \ | ||
46 | regex_ipc.h | ||
47 | libgnunetregex_la_LIBADD = \ | ||
48 | $(top_builddir)/src/lib/util/libgnunetutil.la | ||
49 | libgnunetregex_la_LDFLAGS = \ | ||
50 | $(GN_LIBINTL) \ | ||
51 | $(GN_LIB_LDFLAGS) \ | ||
52 | -version-info 3:1:0 | ||
53 | |||
54 | |||
55 | noinst_PROGRAMS = $(noinst_mysql_progs) \ | ||
56 | perf-regex | ||
57 | |||
58 | perf_regex_SOURCES = \ | ||
59 | $(REGEX_INTERNAL_TEST) perf-regex.c | ||
60 | perf_regex_LDADD = -lm \ | ||
61 | $(top_builddir)/src/service/dht/libgnunetdht.la \ | ||
62 | $(top_builddir)/src/plugin/regex/libgnunetregexblock.la \ | ||
63 | $(top_builddir)/src/lib/util/libgnunetutil.la \ | ||
64 | $(top_builddir)/src/service/statistics/libgnunetstatistics.la | ||
65 | perf_regex_LDFLAGS = \ | ||
66 | $(GN_LIBINTL) | ||
67 | |||
68 | gnunet_daemon_regexprofiler_SOURCES = \ | ||
69 | $(REGEX_INTERNAL_TEST) gnunet-daemon-regexprofiler.c | ||
70 | gnunet_daemon_regexprofiler_LDADD = -lm \ | ||
71 | $(top_builddir)/src/service/dht/libgnunetdht.la \ | ||
72 | $(top_builddir)/src/plugin/regex/libgnunetregexblock.la \ | ||
73 | $(top_builddir)/src/service/statistics/libgnunetstatistics.la \ | ||
74 | $(top_builddir)/src/lib/util/libgnunetutil.la | ||
75 | gnunet_daemon_regexprofiler_LDFLAGS = \ | ||
76 | $(GN_LIBINTL) | ||
77 | |||
78 | #check_PROGRAMS = \ | ||
79 | # test_regex_integration \ | ||
80 | # test_regex_eval_api \ | ||
81 | # test_regex_iterate_api \ | ||
82 | # test_regex_proofs \ | ||
83 | # test_regex_graph_api \ | ||
84 | # test_regex_api | ||
85 | |||
86 | if ENABLE_TEST_RUN | ||
87 | AM_TESTS_ENVIRONMENT=export GNUNET_PREFIX=$${GNUNET_PREFIX:-@libdir@};export PATH=$${GNUNET_PREFIX:-@prefix@}/bin:$$PATH;unset XDG_DATA_HOME;unset XDG_CONFIG_HOME; | ||
88 | TESTS = $(check_PROGRAMS) | ||
89 | endif | ||
90 | |||
91 | test_regex_eval_api_SOURCES = \ | ||
92 | $(REGEX_INTERNAL_TEST) test_regex_eval_api.c | ||
93 | test_regex_eval_api_LDADD = -lm \ | ||
94 | $(top_builddir)/src/service/dht/libgnunetdht.la \ | ||
95 | $(top_builddir)/src/plugin/regex/libgnunetregexblock.la \ | ||
96 | $(top_builddir)/src/service/statistics/libgnunetstatistics.la \ | ||
97 | $(top_builddir)/src/lib/util/libgnunetutil.la | ||
98 | |||
99 | test_regex_integration_SOURCES = \ | ||
100 | test_regex_integration.c | ||
101 | test_regex_integration_LDADD = -lm \ | ||
102 | libgnunetregex.la \ | ||
103 | $(top_builddir)/src/service/testing/libgnunettesting.la \ | ||
104 | $(top_builddir)/src/lib/util/libgnunetutil.la | ||
105 | |||
106 | test_regex_api_SOURCES = \ | ||
107 | test_regex_api.c | ||
108 | test_regex_api_LDADD = -lm \ | ||
109 | libgnunetregex.la \ | ||
110 | $(top_builddir)/src/service/testing/libgnunettesting.la \ | ||
111 | $(top_builddir)/src/lib/util/libgnunetutil.la | ||
112 | |||
113 | test_regex_iterate_api_SOURCES = \ | ||
114 | $(REGEX_INTERNAL) test_regex_iterate_api.c | ||
115 | test_regex_iterate_api_LDADD = -lm \ | ||
116 | $(top_builddir)/src/plugin/regex/libgnunetregexblock.la \ | ||
117 | $(top_builddir)/src/service/dht/libgnunetdht.la \ | ||
118 | $(top_builddir)/src/service/statistics/libgnunetstatistics.la \ | ||
119 | $(top_builddir)/src/lib/util/libgnunetutil.la | ||
120 | |||
121 | test_regex_proofs_SOURCES = \ | ||
122 | $(REGEX_INTERNAL_TEST) test_regex_proofs.c | ||
123 | test_regex_proofs_LDADD = -lm \ | ||
124 | $(top_builddir)/src/service/dht/libgnunetdht.la \ | ||
125 | $(top_builddir)/src/plugin/regex/libgnunetregexblock.la \ | ||
126 | $(top_builddir)/src/service/statistics/libgnunetstatistics.la \ | ||
127 | $(top_builddir)/src/lib/util/libgnunetutil.la | ||
128 | |||
129 | test_regex_graph_api_SOURCES = \ | ||
130 | $(REGEX_INTERNAL_TEST) test_regex_graph_api.c | ||
131 | test_regex_graph_api_LDADD = -lm \ | ||
132 | $(top_builddir)/src/service/dht/libgnunetdht.la \ | ||
133 | $(top_builddir)/src/plugin/regex/libgnunetregexblock.la \ | ||
134 | $(top_builddir)/src/service/statistics/libgnunetstatistics.la \ | ||
135 | $(top_builddir)/src/lib/util/libgnunetutil.la | ||
136 | |||
137 | |||
138 | EXTRA_DIST = \ | ||
139 | test_regex_api_data.conf | ||
diff --git a/src/service/regex/gnunet-daemon-regexprofiler.c b/src/service/regex/gnunet-daemon-regexprofiler.c new file mode 100644 index 000000000..8aa2a2a30 --- /dev/null +++ b/src/service/regex/gnunet-daemon-regexprofiler.c | |||
@@ -0,0 +1,407 @@ | |||
1 | /* | ||
2 | This file is part of GNUnet. | ||
3 | Copyright (C) 2012, 2013 Christian Grothoff | ||
4 | |||
5 | GNUnet is free software: you can redistribute it and/or modify it | ||
6 | under the terms of the GNU Affero General Public License as published | ||
7 | by the Free Software Foundation, either version 3 of the License, | ||
8 | or (at your option) any later version. | ||
9 | |||
10 | GNUnet is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | Affero General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU Affero General Public License | ||
16 | along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | |||
18 | SPDX-License-Identifier: AGPL3.0-or-later | ||
19 | */ | ||
20 | |||
21 | /** | ||
22 | * @file regex/gnunet-daemon-regexprofiler.c | ||
23 | * @brief daemon that uses cadet to announce a regular expression. Used in | ||
24 | * conjunction with gnunet-regex-profiler to announce regexes on several peers | ||
25 | * without the need to explicitly connect to the cadet service running on the | ||
26 | * peer from within the profiler. | ||
27 | * @author Maximilian Szengel | ||
28 | * @author Bartlomiej Polot | ||
29 | */ | ||
30 | #include "platform.h" | ||
31 | #include "gnunet_util_lib.h" | ||
32 | #include "regex_internal_lib.h" | ||
33 | #include "regex_test_lib.h" | ||
34 | #include "gnunet_dht_service.h" | ||
35 | #include "gnunet_statistics_service.h" | ||
36 | |||
37 | /** | ||
38 | * Return value from 'main'. | ||
39 | */ | ||
40 | static int global_ret; | ||
41 | |||
42 | /** | ||
43 | * Configuration we use. | ||
44 | */ | ||
45 | static const struct GNUNET_CONFIGURATION_Handle *cfg; | ||
46 | |||
47 | /** | ||
48 | * Handle to the statistics service. | ||
49 | */ | ||
50 | static struct GNUNET_STATISTICS_Handle *stats_handle; | ||
51 | |||
52 | /** | ||
53 | * Peer's dht handle. | ||
54 | */ | ||
55 | static struct GNUNET_DHT_Handle *dht_handle; | ||
56 | |||
57 | /** | ||
58 | * Peer's regex announce handle. | ||
59 | */ | ||
60 | static struct REGEX_INTERNAL_Announcement *announce_handle; | ||
61 | |||
62 | /** | ||
63 | * Periodically reannounce regex. | ||
64 | */ | ||
65 | static struct GNUNET_SCHEDULER_Task *reannounce_task; | ||
66 | |||
67 | /** | ||
68 | * What's the maximum reannounce period. | ||
69 | */ | ||
70 | static struct GNUNET_TIME_Relative reannounce_period_max; | ||
71 | |||
72 | /** | ||
73 | * Maximal path compression length for regex announcing. | ||
74 | */ | ||
75 | static unsigned long long max_path_compression; | ||
76 | |||
77 | /** | ||
78 | * Name of the file containing policies that this peer should announce. One | ||
79 | * policy per line. | ||
80 | */ | ||
81 | static char *policy_filename; | ||
82 | |||
83 | /** | ||
84 | * Prefix to add before every regex we're announcing. | ||
85 | */ | ||
86 | static char *regex_prefix; | ||
87 | |||
88 | /** | ||
89 | * Regex with prefix. | ||
90 | */ | ||
91 | static char *rx_with_pfx; | ||
92 | |||
93 | /** | ||
94 | * How many put rounds should we do. | ||
95 | */ | ||
96 | static unsigned int rounds = 3; | ||
97 | |||
98 | /** | ||
99 | * Private key for this peer. | ||
100 | */ | ||
101 | static struct GNUNET_CRYPTO_EddsaPrivateKey *my_private_key; | ||
102 | |||
103 | |||
104 | /** | ||
105 | * Task run during shutdown. | ||
106 | * | ||
107 | * @param cls unused | ||
108 | */ | ||
109 | static void | ||
110 | shutdown_task (void *cls) | ||
111 | { | ||
112 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "shutting down\n"); | ||
113 | |||
114 | if (NULL != announce_handle) | ||
115 | { | ||
116 | REGEX_INTERNAL_announce_cancel (announce_handle); | ||
117 | announce_handle = NULL; | ||
118 | } | ||
119 | if (NULL != reannounce_task) | ||
120 | { | ||
121 | GNUNET_free_nz (GNUNET_SCHEDULER_cancel (reannounce_task)); | ||
122 | reannounce_task = NULL; | ||
123 | } | ||
124 | if (NULL != dht_handle) | ||
125 | { | ||
126 | GNUNET_DHT_disconnect (dht_handle); | ||
127 | dht_handle = NULL; | ||
128 | } | ||
129 | GNUNET_free (my_private_key); | ||
130 | my_private_key = NULL; | ||
131 | |||
132 | GNUNET_log (GNUNET_ERROR_TYPE_INFO, | ||
133 | "Daemon for %s shutting down\n", | ||
134 | policy_filename); | ||
135 | } | ||
136 | |||
137 | |||
138 | /** | ||
139 | * Announce a previously announced regex re-using cached data. | ||
140 | * | ||
141 | * @param cls Closure (regex to announce if needed). | ||
142 | */ | ||
143 | static void | ||
144 | reannounce_regex (void *cls) | ||
145 | { | ||
146 | char *regex = cls; | ||
147 | struct GNUNET_TIME_Relative random_delay; | ||
148 | |||
149 | reannounce_task = NULL; | ||
150 | if (0 == rounds--) | ||
151 | { | ||
152 | global_ret = 0; | ||
153 | GNUNET_SCHEDULER_shutdown (); | ||
154 | GNUNET_free (regex); | ||
155 | return; | ||
156 | } | ||
157 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Announcing regex: %s\n", regex); | ||
158 | GNUNET_STATISTICS_update (stats_handle, "# regexes announced", 1, GNUNET_NO); | ||
159 | if ((NULL == announce_handle) && (NULL != regex)) | ||
160 | { | ||
161 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, | ||
162 | "First time, creating regex: %s\n", | ||
163 | regex); | ||
164 | announce_handle = REGEX_INTERNAL_announce (dht_handle, | ||
165 | my_private_key, | ||
166 | regex, | ||
167 | (unsigned | ||
168 | int) max_path_compression, | ||
169 | stats_handle); | ||
170 | } | ||
171 | else | ||
172 | { | ||
173 | GNUNET_assert (NULL != announce_handle); | ||
174 | REGEX_INTERNAL_reannounce (announce_handle); | ||
175 | } | ||
176 | |||
177 | random_delay = | ||
178 | GNUNET_TIME_relative_multiply (GNUNET_TIME_UNIT_MICROSECONDS, | ||
179 | GNUNET_CRYPTO_random_u32 ( | ||
180 | GNUNET_CRYPTO_QUALITY_WEAK, | ||
181 | reannounce_period_max.rel_value_us)); | ||
182 | reannounce_task = GNUNET_SCHEDULER_add_delayed (random_delay, | ||
183 | &reannounce_regex, cls); | ||
184 | } | ||
185 | |||
186 | |||
187 | /** | ||
188 | * Announce the given regular expression using regex and the path compression | ||
189 | * length read from config. | ||
190 | * | ||
191 | * @param regex regular expression to announce on this peer's cadet. | ||
192 | */ | ||
193 | static void | ||
194 | announce_regex (const char *regex) | ||
195 | { | ||
196 | char *copy; | ||
197 | |||
198 | if ((NULL == regex) || (0 == strlen (regex))) | ||
199 | { | ||
200 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Cannot announce empty regex\n"); | ||
201 | return; | ||
202 | } | ||
203 | |||
204 | GNUNET_log (GNUNET_ERROR_TYPE_INFO, | ||
205 | "Daemon for %s starting\n", | ||
206 | policy_filename); | ||
207 | GNUNET_assert (NULL == reannounce_task); | ||
208 | copy = GNUNET_strdup (regex); | ||
209 | reannounce_task = GNUNET_SCHEDULER_add_now (&reannounce_regex, | ||
210 | (void *) copy); | ||
211 | } | ||
212 | |||
213 | |||
214 | /** | ||
215 | * Scan through the policy_dir looking for the n-th filename. | ||
216 | * | ||
217 | * @param cls Closure (target number n). | ||
218 | * @param filename complete filename (absolute path). | ||
219 | * @return GNUNET_OK to continue to iterate, | ||
220 | * GNUNET_NO to stop when found | ||
221 | */ | ||
222 | static int | ||
223 | scan (void *cls, const char *filename) | ||
224 | { | ||
225 | long n = (long) cls; | ||
226 | static long c = 0; | ||
227 | |||
228 | if (c == n) | ||
229 | { | ||
230 | policy_filename = GNUNET_strdup (filename); | ||
231 | return GNUNET_NO; | ||
232 | } | ||
233 | c++; | ||
234 | return GNUNET_OK; | ||
235 | } | ||
236 | |||
237 | |||
238 | /** | ||
239 | * @brief Main function that will be run by the scheduler. | ||
240 | * | ||
241 | * @param cls closure | ||
242 | * @param args remaining command-line arguments | ||
243 | * @param cfgfile name of the configuration file used (for saving, can be NULL!) | ||
244 | * @param cfg_ configuration | ||
245 | */ | ||
246 | static void | ||
247 | run (void *cls, char *const *args GNUNET_UNUSED, | ||
248 | const char *cfgfile GNUNET_UNUSED, | ||
249 | const struct GNUNET_CONFIGURATION_Handle *cfg_) | ||
250 | { | ||
251 | char *regex = NULL; | ||
252 | char **components; | ||
253 | char *policy_dir; | ||
254 | long long unsigned int peer_id; | ||
255 | |||
256 | cfg = cfg_; | ||
257 | |||
258 | my_private_key = GNUNET_CRYPTO_eddsa_key_create_from_configuration (cfg); | ||
259 | GNUNET_assert (NULL != my_private_key); | ||
260 | if (GNUNET_OK != | ||
261 | GNUNET_CONFIGURATION_get_value_number (cfg, "REGEXPROFILER", | ||
262 | "MAX_PATH_COMPRESSION", | ||
263 | &max_path_compression)) | ||
264 | { | ||
265 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
266 | _ | ||
267 | ( | ||
268 | "%s service is lacking key configuration settings (%s). Exiting.\n"), | ||
269 | "regexprofiler", "max_path_compression"); | ||
270 | global_ret = GNUNET_SYSERR; | ||
271 | GNUNET_SCHEDULER_shutdown (); | ||
272 | return; | ||
273 | } | ||
274 | if (GNUNET_OK != | ||
275 | GNUNET_CONFIGURATION_get_value_string (cfg, "REGEXPROFILER", | ||
276 | "POLICY_DIR", &policy_dir)) | ||
277 | { | ||
278 | GNUNET_log_config_missing (GNUNET_ERROR_TYPE_ERROR, "REGEXPROFILER", | ||
279 | "POLICY_DIR"); | ||
280 | global_ret = GNUNET_SYSERR; | ||
281 | GNUNET_SCHEDULER_shutdown (); | ||
282 | return; | ||
283 | } | ||
284 | if (GNUNET_OK != | ||
285 | GNUNET_CONFIGURATION_get_value_number (cfg, "TESTBED", | ||
286 | "PEERID", &peer_id)) | ||
287 | { | ||
288 | GNUNET_log_config_missing (GNUNET_ERROR_TYPE_ERROR, "TESTBED", "PEERID"); | ||
289 | global_ret = GNUNET_SYSERR; | ||
290 | GNUNET_free (policy_dir); | ||
291 | GNUNET_SCHEDULER_shutdown (); | ||
292 | return; | ||
293 | } | ||
294 | |||
295 | if (GNUNET_OK != | ||
296 | GNUNET_CONFIGURATION_get_value_string (cfg, "REGEXPROFILER", | ||
297 | "REGEX_PREFIX", ®ex_prefix)) | ||
298 | { | ||
299 | GNUNET_log_config_missing (GNUNET_ERROR_TYPE_ERROR, "REGEXPROFILER", | ||
300 | "REGEX_PREFIX"); | ||
301 | global_ret = GNUNET_SYSERR; | ||
302 | GNUNET_free (policy_dir); | ||
303 | GNUNET_SCHEDULER_shutdown (); | ||
304 | return; | ||
305 | } | ||
306 | |||
307 | if (GNUNET_OK != | ||
308 | GNUNET_CONFIGURATION_get_value_time (cfg, "REGEXPROFILER", | ||
309 | "REANNOUNCE_PERIOD_MAX", | ||
310 | &reannounce_period_max)) | ||
311 | { | ||
312 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
313 | "reannounce_period_max not given. Using 10 minutes.\n"); | ||
314 | reannounce_period_max = | ||
315 | GNUNET_TIME_relative_multiply (GNUNET_TIME_UNIT_MINUTES, 10); | ||
316 | } | ||
317 | |||
318 | stats_handle = GNUNET_STATISTICS_create ("regexprofiler", cfg); | ||
319 | |||
320 | dht_handle = GNUNET_DHT_connect (cfg, 1); | ||
321 | |||
322 | if (NULL == dht_handle) | ||
323 | { | ||
324 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
325 | "Could not acquire dht handle. Exiting.\n"); | ||
326 | global_ret = GNUNET_SYSERR; | ||
327 | GNUNET_free (policy_dir); | ||
328 | GNUNET_SCHEDULER_shutdown (); | ||
329 | return; | ||
330 | } | ||
331 | |||
332 | /* Read regexes from policy files */ | ||
333 | GNUNET_assert (-1 != GNUNET_DISK_directory_scan (policy_dir, &scan, | ||
334 | (void *) (long) peer_id)); | ||
335 | if (NULL == (components = REGEX_TEST_read_from_file (policy_filename))) | ||
336 | { | ||
337 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
338 | "Policy file %s contains no policies. Exiting.\n", | ||
339 | policy_filename); | ||
340 | global_ret = GNUNET_SYSERR; | ||
341 | GNUNET_free (policy_dir); | ||
342 | GNUNET_SCHEDULER_shutdown (); | ||
343 | return; | ||
344 | } | ||
345 | GNUNET_free (policy_dir); | ||
346 | regex = REGEX_TEST_combine (components, 16); | ||
347 | REGEX_TEST_free_from_file (components); | ||
348 | |||
349 | /* Announcing regexes from policy_filename */ | ||
350 | GNUNET_asprintf (&rx_with_pfx, | ||
351 | "%s(%s)(0|1|2|3|4|5|6|7|8|9|a|b|c|d|e|f)*", | ||
352 | regex_prefix, | ||
353 | regex); | ||
354 | announce_regex (rx_with_pfx); | ||
355 | GNUNET_free (regex); | ||
356 | GNUNET_free (rx_with_pfx); | ||
357 | |||
358 | /* Scheduled the task to clean up when shutdown is called */ | ||
359 | GNUNET_SCHEDULER_add_shutdown (&shutdown_task, | ||
360 | NULL); | ||
361 | } | ||
362 | |||
363 | |||
364 | /** | ||
365 | * The main function of the regexprofiler service. | ||
366 | * | ||
367 | * @param argc number of arguments from the command line | ||
368 | * @param argv command line arguments | ||
369 | * @return 0 ok, 1 on error | ||
370 | */ | ||
371 | int | ||
372 | main (int argc, char *const *argv) | ||
373 | { | ||
374 | static const struct GNUNET_GETOPT_CommandLineOption options[] = { | ||
375 | GNUNET_GETOPT_OPTION_END | ||
376 | }; | ||
377 | |||
378 | if (GNUNET_OK != GNUNET_STRINGS_get_utf8_args (argc, argv, &argc, &argv)) | ||
379 | return 2; | ||
380 | return (GNUNET_OK == | ||
381 | GNUNET_PROGRAM_run (argc, argv, "regexprofiler", | ||
382 | gettext_noop | ||
383 | ( | ||
384 | "Daemon to announce regular expressions for the peer using cadet."), | ||
385 | options, &run, NULL)) ? global_ret : 1; | ||
386 | } | ||
387 | |||
388 | |||
389 | #if defined(__linux__) && defined(__GLIBC__) | ||
390 | #include <malloc.h> | ||
391 | |||
392 | /** | ||
393 | * MINIMIZE heap size (way below 128k) since this process doesn't need much. | ||
394 | */ | ||
395 | void __attribute__ ((constructor)) | ||
396 | GNUNET_REGEX_memory_init () | ||
397 | { | ||
398 | mallopt (M_TRIM_THRESHOLD, 4 * 1024); | ||
399 | mallopt (M_TOP_PAD, 1 * 1024); | ||
400 | malloc_trim (0); | ||
401 | } | ||
402 | |||
403 | |||
404 | #endif | ||
405 | |||
406 | |||
407 | /* end of gnunet-daemon-regexprofiler.c */ | ||
diff --git a/src/service/regex/gnunet-regex-profiler.c b/src/service/regex/gnunet-regex-profiler.c new file mode 100644 index 000000000..8238ad3df --- /dev/null +++ b/src/service/regex/gnunet-regex-profiler.c | |||
@@ -0,0 +1,1589 @@ | |||
1 | /* | ||
2 | This file is part of GNUnet. | ||
3 | Copyright (C) 2011 - 2017 GNUnet e.V. | ||
4 | |||
5 | GNUnet is free software: you can redistribute it and/or modify it | ||
6 | under the terms of the GNU Affero General Public License as published | ||
7 | by the Free Software Foundation, either version 3 of the License, | ||
8 | or (at your option) any later version. | ||
9 | |||
10 | GNUnet is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | Affero General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU Affero General Public License | ||
16 | along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | |||
18 | SPDX-License-Identifier: AGPL3.0-or-later | ||
19 | */ | ||
20 | |||
21 | /** | ||
22 | * @file regex/gnunet-regex-profiler.c | ||
23 | * @brief Regex profiler for testing distributed regex use. | ||
24 | * @author Bartlomiej Polot | ||
25 | * @author Maximilian Szengel | ||
26 | */ | ||
27 | #include "platform.h" | ||
28 | #include "gnunet_applications.h" | ||
29 | #include "gnunet_util_lib.h" | ||
30 | #include "regex_internal_lib.h" | ||
31 | #include "gnunet_arm_service.h" | ||
32 | #include "gnunet_dht_service.h" | ||
33 | #include "gnunet_testbed_service.h" | ||
34 | |||
35 | #define FIND_TIMEOUT \ | ||
36 | GNUNET_TIME_relative_multiply (GNUNET_TIME_UNIT_SECONDS, 90) | ||
37 | |||
38 | /** | ||
39 | * DLL of operations | ||
40 | */ | ||
41 | struct DLLOperation | ||
42 | { | ||
43 | /** | ||
44 | * The testbed operation handle | ||
45 | */ | ||
46 | struct GNUNET_TESTBED_Operation *op; | ||
47 | |||
48 | /** | ||
49 | * Closure | ||
50 | */ | ||
51 | void *cls; | ||
52 | |||
53 | /** | ||
54 | * The next pointer for DLL | ||
55 | */ | ||
56 | struct DLLOperation *next; | ||
57 | |||
58 | /** | ||
59 | * The prev pointer for DLL | ||
60 | */ | ||
61 | struct DLLOperation *prev; | ||
62 | }; | ||
63 | |||
64 | |||
65 | /** | ||
66 | * Available states during profiling | ||
67 | */ | ||
68 | enum State | ||
69 | { | ||
70 | /** | ||
71 | * Initial state | ||
72 | */ | ||
73 | STATE_INIT = 0, | ||
74 | |||
75 | /** | ||
76 | * Starting slaves | ||
77 | */ | ||
78 | STATE_SLAVES_STARTING, | ||
79 | |||
80 | /** | ||
81 | * Creating peers | ||
82 | */ | ||
83 | STATE_PEERS_CREATING, | ||
84 | |||
85 | /** | ||
86 | * Starting peers | ||
87 | */ | ||
88 | STATE_PEERS_STARTING, | ||
89 | |||
90 | /** | ||
91 | * Linking peers | ||
92 | */ | ||
93 | STATE_PEERS_LINKING, | ||
94 | |||
95 | /** | ||
96 | * Matching strings against announced regexes | ||
97 | */ | ||
98 | STATE_SEARCH_REGEX, | ||
99 | |||
100 | /** | ||
101 | * Destroying peers; we can do this as the controller takes care of stopping a | ||
102 | * peer if it is running | ||
103 | */ | ||
104 | STATE_PEERS_DESTROYING | ||
105 | }; | ||
106 | |||
107 | |||
108 | /** | ||
109 | * Peer handles. | ||
110 | */ | ||
111 | struct RegexPeer | ||
112 | { | ||
113 | /** | ||
114 | * Peer id. | ||
115 | */ | ||
116 | unsigned int id; | ||
117 | |||
118 | /** | ||
119 | * Peer configuration handle. | ||
120 | */ | ||
121 | struct GNUNET_CONFIGURATION_Handle *cfg; | ||
122 | |||
123 | /** | ||
124 | * The actual testbed peer handle. | ||
125 | */ | ||
126 | struct GNUNET_TESTBED_Peer *peer_handle; | ||
127 | |||
128 | /** | ||
129 | * Peer's search string. | ||
130 | */ | ||
131 | const char *search_str; | ||
132 | |||
133 | /** | ||
134 | * Set to GNUNET_YES if the peer successfully matched the above | ||
135 | * search string. GNUNET_NO if the string could not be matched | ||
136 | * during the profiler run. GNUNET_SYSERR if the string matching | ||
137 | * timed out. Undefined if search_str is NULL | ||
138 | */ | ||
139 | int search_str_matched; | ||
140 | |||
141 | /** | ||
142 | * Peer's DHT handle. | ||
143 | */ | ||
144 | struct GNUNET_DHT_Handle *dht_handle; | ||
145 | |||
146 | /** | ||
147 | * Handle to a running regex search. | ||
148 | */ | ||
149 | struct REGEX_INTERNAL_Search *search_handle; | ||
150 | |||
151 | /** | ||
152 | * Testbed operation handle for DHT. | ||
153 | */ | ||
154 | struct GNUNET_TESTBED_Operation *op_handle; | ||
155 | |||
156 | /** | ||
157 | * Peers's statistics handle. | ||
158 | */ | ||
159 | struct GNUNET_STATISTICS_Handle *stats_handle; | ||
160 | |||
161 | /** | ||
162 | * The starting time of a profiling step. | ||
163 | */ | ||
164 | struct GNUNET_TIME_Absolute prof_start_time; | ||
165 | |||
166 | /** | ||
167 | * Operation timeout | ||
168 | */ | ||
169 | struct GNUNET_SCHEDULER_Task *timeout; | ||
170 | |||
171 | /** | ||
172 | * Daemon start | ||
173 | */ | ||
174 | struct GNUNET_TESTBED_Operation *daemon_op; | ||
175 | }; | ||
176 | |||
177 | /** | ||
178 | * Set when shutting down to avoid making more queries. | ||
179 | */ | ||
180 | static int in_shutdown; | ||
181 | |||
182 | /** | ||
183 | * The array of peers; we fill this as the peers are given to us by the testbed | ||
184 | */ | ||
185 | static struct RegexPeer *peers; | ||
186 | |||
187 | /** | ||
188 | * Host registration handle | ||
189 | */ | ||
190 | static struct GNUNET_TESTBED_HostRegistrationHandle *reg_handle; | ||
191 | |||
192 | /** | ||
193 | * Handle to the master controller process | ||
194 | */ | ||
195 | static struct GNUNET_TESTBED_ControllerProc *mc_proc; | ||
196 | |||
197 | /** | ||
198 | * Handle to the master controller | ||
199 | */ | ||
200 | static struct GNUNET_TESTBED_Controller *mc; | ||
201 | |||
202 | /** | ||
203 | * Handle to global configuration | ||
204 | */ | ||
205 | static struct GNUNET_CONFIGURATION_Handle *cfg; | ||
206 | |||
207 | /** | ||
208 | * Abort task identifier | ||
209 | */ | ||
210 | static struct GNUNET_SCHEDULER_Task *abort_task; | ||
211 | |||
212 | /** | ||
213 | * Host registration task identifier | ||
214 | */ | ||
215 | static struct GNUNET_SCHEDULER_Task *register_hosts_task; | ||
216 | |||
217 | /** | ||
218 | * Global event mask for all testbed events | ||
219 | */ | ||
220 | static uint64_t event_mask; | ||
221 | |||
222 | /** | ||
223 | * The starting time of a profiling step | ||
224 | */ | ||
225 | static struct GNUNET_TIME_Absolute prof_start_time; | ||
226 | |||
227 | /** | ||
228 | * Duration profiling step has taken | ||
229 | */ | ||
230 | static struct GNUNET_TIME_Relative prof_time; | ||
231 | |||
232 | /** | ||
233 | * Number of peers to be started by the profiler | ||
234 | */ | ||
235 | static unsigned int num_peers; | ||
236 | |||
237 | /** | ||
238 | * Global testing status | ||
239 | */ | ||
240 | static int result; | ||
241 | |||
242 | /** | ||
243 | * current state of profiling | ||
244 | */ | ||
245 | enum State state; | ||
246 | |||
247 | /** | ||
248 | * Folder where policy files are stored. | ||
249 | */ | ||
250 | static char *policy_dir; | ||
251 | |||
252 | /** | ||
253 | * File with hostnames where to execute the test. | ||
254 | */ | ||
255 | static char *hosts_file; | ||
256 | |||
257 | /** | ||
258 | * File with the strings to look for. | ||
259 | */ | ||
260 | static char *strings_file; | ||
261 | |||
262 | /** | ||
263 | * Search strings (num_peers of them). | ||
264 | */ | ||
265 | static char **search_strings; | ||
266 | |||
267 | /** | ||
268 | * How many searches are we going to start in parallel | ||
269 | */ | ||
270 | static long long unsigned int init_parallel_searches; | ||
271 | |||
272 | /** | ||
273 | * How many searches are running in parallel | ||
274 | */ | ||
275 | static unsigned int parallel_searches; | ||
276 | |||
277 | /** | ||
278 | * Number of strings found in the published regexes. | ||
279 | */ | ||
280 | static unsigned int strings_found; | ||
281 | |||
282 | /** | ||
283 | * Index of peer to start next announce/search. | ||
284 | */ | ||
285 | static unsigned int next_search; | ||
286 | |||
287 | /** | ||
288 | * Search timeout task identifier. | ||
289 | */ | ||
290 | static struct GNUNET_SCHEDULER_Task *search_timeout_task; | ||
291 | |||
292 | /** | ||
293 | * Search timeout in seconds. | ||
294 | */ | ||
295 | static struct GNUNET_TIME_Relative search_timeout_time = { 60000 }; | ||
296 | |||
297 | /** | ||
298 | * File to log statistics to. | ||
299 | */ | ||
300 | static struct GNUNET_DISK_FileHandle *data_file; | ||
301 | |||
302 | /** | ||
303 | * Filename to log statistics to. | ||
304 | */ | ||
305 | static char *data_filename; | ||
306 | |||
307 | /** | ||
308 | * Prefix used for regex announcing. We need to prefix the search | ||
309 | * strings with it, in order to find something. | ||
310 | */ | ||
311 | static char *regex_prefix; | ||
312 | |||
313 | /** | ||
314 | * What's the maximum regex reannounce period. | ||
315 | */ | ||
316 | static struct GNUNET_TIME_Relative reannounce_period_max; | ||
317 | |||
318 | |||
319 | /******************************************************************************/ | ||
320 | /****************************** DECLARATIONS ********************************/ | ||
321 | /******************************************************************************/ | ||
322 | |||
323 | /** | ||
324 | * DHT connect callback. Called when we are connected to the dht service for | ||
325 | * the peer in 'cls'. If successful we connect to the stats service of this | ||
326 | * peer and then try to match the search string of this peer. | ||
327 | * | ||
328 | * @param cls internal peer id. | ||
329 | * @param op operation handle. | ||
330 | * @param ca_result connect adapter result. | ||
331 | * @param emsg error message. | ||
332 | */ | ||
333 | static void | ||
334 | dht_connect_cb (void *cls, struct GNUNET_TESTBED_Operation *op, | ||
335 | void *ca_result, const char *emsg); | ||
336 | |||
337 | /** | ||
338 | * DHT connect adapter. Opens a connection to the DHT service. | ||
339 | * | ||
340 | * @param cls Closure (peer). | ||
341 | * @param cfg Configuration handle. | ||
342 | * | ||
343 | * @return | ||
344 | */ | ||
345 | static void * | ||
346 | dht_ca (void *cls, const struct GNUNET_CONFIGURATION_Handle *cfg); | ||
347 | |||
348 | |||
349 | /** | ||
350 | * Adapter function called to destroy a connection to | ||
351 | * the DHT service. | ||
352 | * | ||
353 | * @param cls Closure | ||
354 | * @param op_result Service handle returned from the connect adapter. | ||
355 | */ | ||
356 | static void | ||
357 | dht_da (void *cls, void *op_result); | ||
358 | |||
359 | |||
360 | /** | ||
361 | * Function called by testbed once we are connected to stats | ||
362 | * service. Get the statistics for the services of interest. | ||
363 | * | ||
364 | * @param cls the 'struct RegexPeer' for which we connected to stats | ||
365 | * @param op connect operation handle | ||
366 | * @param ca_result handle to stats service | ||
367 | * @param emsg error message on failure | ||
368 | */ | ||
369 | static void | ||
370 | stats_connect_cb (void *cls, | ||
371 | struct GNUNET_TESTBED_Operation *op, | ||
372 | void *ca_result, | ||
373 | const char *emsg); | ||
374 | |||
375 | |||
376 | /** | ||
377 | * Start announcing the next regex in the DHT. | ||
378 | * | ||
379 | * @param cls Index of the next peer in the peers array. | ||
380 | */ | ||
381 | static void | ||
382 | announce_next_regex (void *cls); | ||
383 | |||
384 | |||
385 | /******************************************************************************/ | ||
386 | /******************************** SHUTDOWN **********************************/ | ||
387 | /******************************************************************************/ | ||
388 | |||
389 | |||
390 | /** | ||
391 | * Shutdown nicely | ||
392 | * | ||
393 | * @param cls NULL | ||
394 | */ | ||
395 | static void | ||
396 | do_shutdown (void *cls) | ||
397 | { | ||
398 | struct RegexPeer *peer; | ||
399 | unsigned int peer_cnt; | ||
400 | unsigned int search_str_cnt; | ||
401 | char output_buffer[512]; | ||
402 | size_t size; | ||
403 | |||
404 | if (NULL != abort_task) | ||
405 | { | ||
406 | GNUNET_SCHEDULER_cancel (abort_task); | ||
407 | abort_task = NULL; | ||
408 | } | ||
409 | if (NULL != register_hosts_task) | ||
410 | { | ||
411 | GNUNET_SCHEDULER_cancel (register_hosts_task); | ||
412 | register_hosts_task = NULL; | ||
413 | } | ||
414 | for (peer_cnt = 0; peer_cnt < num_peers; peer_cnt++) | ||
415 | { | ||
416 | peer = &peers[peer_cnt]; | ||
417 | |||
418 | if ((GNUNET_YES != peer->search_str_matched) && (NULL != data_file) ) | ||
419 | { | ||
420 | prof_time = GNUNET_TIME_absolute_get_duration (peer->prof_start_time); | ||
421 | size = | ||
422 | GNUNET_snprintf (output_buffer, | ||
423 | sizeof(output_buffer), | ||
424 | "%p Search string not found: %s (%d)\n" | ||
425 | "%p On peer: %u (%p)\n" | ||
426 | "%p After: %s\n", | ||
427 | peer, peer->search_str, peer->search_str_matched, | ||
428 | peer, peer->id, peer, | ||
429 | peer, | ||
430 | GNUNET_STRINGS_relative_time_to_string (prof_time, | ||
431 | GNUNET_NO)); | ||
432 | if (size != GNUNET_DISK_file_write (data_file, output_buffer, size)) | ||
433 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, "Unable to write to file!\n"); | ||
434 | } | ||
435 | |||
436 | if (NULL != peers[peer_cnt].op_handle) | ||
437 | GNUNET_TESTBED_operation_done (peers[peer_cnt].op_handle); | ||
438 | } | ||
439 | |||
440 | if (NULL != data_file) | ||
441 | { | ||
442 | GNUNET_DISK_file_close (data_file); | ||
443 | data_file = NULL; | ||
444 | } | ||
445 | for (search_str_cnt = 0; | ||
446 | search_str_cnt < num_peers && NULL != search_strings; | ||
447 | search_str_cnt++) | ||
448 | { | ||
449 | GNUNET_free (search_strings[search_str_cnt]); | ||
450 | } | ||
451 | GNUNET_free (search_strings); | ||
452 | search_strings = NULL; | ||
453 | |||
454 | if (NULL != reg_handle) | ||
455 | { | ||
456 | GNUNET_TESTBED_cancel_registration (reg_handle); | ||
457 | reg_handle = NULL; | ||
458 | } | ||
459 | if (NULL != mc) | ||
460 | { | ||
461 | GNUNET_TESTBED_controller_disconnect (mc); | ||
462 | mc = NULL; | ||
463 | } | ||
464 | if (NULL != mc_proc) | ||
465 | { | ||
466 | GNUNET_TESTBED_controller_stop (mc_proc); | ||
467 | mc_proc = NULL; | ||
468 | } | ||
469 | if (NULL != cfg) | ||
470 | { | ||
471 | GNUNET_CONFIGURATION_destroy (cfg); | ||
472 | cfg = NULL; | ||
473 | } | ||
474 | } | ||
475 | |||
476 | |||
477 | /** | ||
478 | * abort task to run on test timed out | ||
479 | * | ||
480 | * @param cls NULL | ||
481 | */ | ||
482 | static void | ||
483 | do_abort (void *cls) | ||
484 | { | ||
485 | unsigned long i = (unsigned long) cls; | ||
486 | |||
487 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, | ||
488 | "Aborting from line %lu...\n", i); | ||
489 | abort_task = NULL; | ||
490 | result = GNUNET_SYSERR; | ||
491 | GNUNET_SCHEDULER_add_now (&do_shutdown, NULL); | ||
492 | } | ||
493 | |||
494 | |||
495 | /******************************************************************************/ | ||
496 | /********************* STATISTICS SERVICE CONNECTIONS ***********************/ | ||
497 | /******************************************************************************/ | ||
498 | |||
499 | /** | ||
500 | * Adapter function called to establish a connection to | ||
501 | * statistics service. | ||
502 | * | ||
503 | * @param cls closure | ||
504 | * @param cfg configuration of the peer to connect to; will be available until | ||
505 | * GNUNET_TESTBED_operation_done() is called on the operation returned | ||
506 | * from GNUNET_TESTBED_service_connect() | ||
507 | * @return service handle to return in 'op_result', NULL on error | ||
508 | */ | ||
509 | static void * | ||
510 | stats_ca (void *cls, | ||
511 | const struct GNUNET_CONFIGURATION_Handle *cfg) | ||
512 | { | ||
513 | return GNUNET_STATISTICS_create ("<driver>", cfg); | ||
514 | } | ||
515 | |||
516 | |||
517 | /** | ||
518 | * Adapter function called to destroy a connection to | ||
519 | * statistics service. | ||
520 | * | ||
521 | * @param cls closure | ||
522 | * @param op_result service handle returned from the connect adapter | ||
523 | */ | ||
524 | static void | ||
525 | stats_da (void *cls, void *op_result) | ||
526 | { | ||
527 | struct RegexPeer *peer = cls; | ||
528 | |||
529 | GNUNET_assert (op_result == peer->stats_handle); | ||
530 | |||
531 | GNUNET_STATISTICS_destroy (peer->stats_handle, GNUNET_NO); | ||
532 | peer->stats_handle = NULL; | ||
533 | } | ||
534 | |||
535 | |||
536 | /** | ||
537 | * Process statistic values. Write all values to global 'data_file', if present. | ||
538 | * | ||
539 | * @param cls closure | ||
540 | * @param subsystem name of subsystem that created the statistic | ||
541 | * @param name the name of the datum | ||
542 | * @param value the current value | ||
543 | * @param is_persistent GNUNET_YES if the value is persistent, GNUNET_NO if not | ||
544 | * @return #GNUNET_OK to continue, #GNUNET_SYSERR to abort iteration | ||
545 | */ | ||
546 | static int | ||
547 | stats_iterator (void *cls, | ||
548 | const char *subsystem, | ||
549 | const char *name, | ||
550 | uint64_t value, int is_persistent) | ||
551 | { | ||
552 | struct RegexPeer *peer = cls; | ||
553 | char output_buffer[512]; | ||
554 | size_t size; | ||
555 | |||
556 | if (NULL == data_file) | ||
557 | { | ||
558 | GNUNET_log (GNUNET_ERROR_TYPE_INFO, | ||
559 | "%p -> %s [%s]: %llu\n", | ||
560 | peer, | ||
561 | subsystem, | ||
562 | name, | ||
563 | (unsigned long long) value); | ||
564 | return GNUNET_OK; | ||
565 | } | ||
566 | size = | ||
567 | GNUNET_snprintf (output_buffer, | ||
568 | sizeof(output_buffer), | ||
569 | "%p [%s] %llu %s\n", | ||
570 | peer, | ||
571 | subsystem, | ||
572 | (unsigned long long) value, | ||
573 | name); | ||
574 | if (size != GNUNET_DISK_file_write (data_file, output_buffer, size)) | ||
575 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, | ||
576 | "Unable to write to file!\n"); | ||
577 | |||
578 | return GNUNET_OK; | ||
579 | } | ||
580 | |||
581 | |||
582 | /** | ||
583 | * Stats callback. Finish the stats testbed operation and when all stats have | ||
584 | * been iterated, shutdown the profiler. | ||
585 | * | ||
586 | * @param cls closure | ||
587 | * @param success GNUNET_OK if statistics were | ||
588 | * successfully obtained, GNUNET_SYSERR if not. | ||
589 | */ | ||
590 | static void | ||
591 | stats_cb (void *cls, | ||
592 | int success) | ||
593 | { | ||
594 | static unsigned int peer_cnt; | ||
595 | struct RegexPeer *peer = cls; | ||
596 | |||
597 | if (GNUNET_OK != success) | ||
598 | { | ||
599 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
600 | "Getting statistics for peer %u failed!\n", | ||
601 | peer->id); | ||
602 | return; | ||
603 | } | ||
604 | |||
605 | GNUNET_assert (NULL != peer->op_handle); | ||
606 | |||
607 | GNUNET_TESTBED_operation_done (peer->op_handle); | ||
608 | peer->op_handle = NULL; | ||
609 | |||
610 | peer_cnt++; | ||
611 | peer = &peers[peer_cnt]; | ||
612 | |||
613 | fprintf (stderr, "s"); | ||
614 | if (peer_cnt == num_peers) | ||
615 | { | ||
616 | GNUNET_log (GNUNET_ERROR_TYPE_INFO, | ||
617 | "\nCollecting stats finished. Shutting down.\n"); | ||
618 | GNUNET_SCHEDULER_shutdown (); | ||
619 | result = GNUNET_OK; | ||
620 | } | ||
621 | else | ||
622 | { | ||
623 | peer->op_handle = | ||
624 | GNUNET_TESTBED_service_connect (NULL, | ||
625 | peer->peer_handle, | ||
626 | "statistics", | ||
627 | &stats_connect_cb, | ||
628 | peer, | ||
629 | &stats_ca, | ||
630 | &stats_da, | ||
631 | peer); | ||
632 | } | ||
633 | } | ||
634 | |||
635 | |||
636 | /** | ||
637 | * Function called by testbed once we are connected to stats | ||
638 | * service. Get the statistics for the services of interest. | ||
639 | * | ||
640 | * @param cls the 'struct RegexPeer' for which we connected to stats | ||
641 | * @param op connect operation handle | ||
642 | * @param ca_result handle to stats service | ||
643 | * @param emsg error message on failure | ||
644 | */ | ||
645 | static void | ||
646 | stats_connect_cb (void *cls, | ||
647 | struct GNUNET_TESTBED_Operation *op, | ||
648 | void *ca_result, | ||
649 | const char *emsg) | ||
650 | { | ||
651 | struct RegexPeer *peer = cls; | ||
652 | |||
653 | if ((NULL == ca_result) || (NULL != emsg)) | ||
654 | { | ||
655 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
656 | "Failed to connect to statistics service on peer %u: %s\n", | ||
657 | peer->id, emsg); | ||
658 | |||
659 | peer->stats_handle = NULL; | ||
660 | return; | ||
661 | } | ||
662 | |||
663 | peer->stats_handle = ca_result; | ||
664 | |||
665 | if (NULL == GNUNET_STATISTICS_get (peer->stats_handle, NULL, NULL, | ||
666 | &stats_cb, | ||
667 | &stats_iterator, peer)) | ||
668 | { | ||
669 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
670 | "Could not get statistics of peer %u!\n", peer->id); | ||
671 | } | ||
672 | } | ||
673 | |||
674 | |||
675 | /** | ||
676 | * Task to collect all statistics from all peers, will shutdown the | ||
677 | * profiler, when done. | ||
678 | * | ||
679 | * @param cls NULL | ||
680 | */ | ||
681 | static void | ||
682 | do_collect_stats (void *cls) | ||
683 | { | ||
684 | struct RegexPeer *peer = &peers[0]; | ||
685 | |||
686 | GNUNET_assert (NULL != peer->peer_handle); | ||
687 | |||
688 | peer->op_handle = | ||
689 | GNUNET_TESTBED_service_connect (NULL, | ||
690 | peer->peer_handle, | ||
691 | "statistics", | ||
692 | &stats_connect_cb, | ||
693 | peer, | ||
694 | &stats_ca, | ||
695 | &stats_da, | ||
696 | peer); | ||
697 | } | ||
698 | |||
699 | |||
700 | /******************************************************************************/ | ||
701 | /************************ REGEX FIND CONNECTIONS **************************/ | ||
702 | /******************************************************************************/ | ||
703 | |||
704 | |||
705 | /** | ||
706 | * Start searching for the next string in the DHT. | ||
707 | * | ||
708 | * @param cls Index of the next peer in the peers array. | ||
709 | */ | ||
710 | static void | ||
711 | find_string (void *cls); | ||
712 | |||
713 | |||
714 | /** | ||
715 | * Method called when we've found a peer that announced a regex | ||
716 | * that matches our search string. Now get the statistics. | ||
717 | * | ||
718 | * @param cls Closure provided in REGEX_INTERNAL_search. | ||
719 | * @param id Peer providing a regex that matches the string. | ||
720 | * @param get_path Path of the get request. | ||
721 | * @param get_path_length Length of get_path. | ||
722 | * @param put_path Path of the put request. | ||
723 | * @param put_path_length Length of the put_path. | ||
724 | */ | ||
725 | static void | ||
726 | regex_found_handler (void *cls, | ||
727 | const struct GNUNET_PeerIdentity *id, | ||
728 | const struct GNUNET_DHT_PathElement *get_path, | ||
729 | unsigned int get_path_length, | ||
730 | const struct GNUNET_DHT_PathElement *put_path, | ||
731 | unsigned int put_path_length) | ||
732 | { | ||
733 | struct RegexPeer *peer = cls; | ||
734 | char output_buffer[512]; | ||
735 | size_t size; | ||
736 | |||
737 | if (GNUNET_YES == peer->search_str_matched) | ||
738 | { | ||
739 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
740 | "String %s on peer %u already matched!\n", | ||
741 | peer->search_str, peer->id); | ||
742 | return; | ||
743 | } | ||
744 | |||
745 | strings_found++; | ||
746 | parallel_searches--; | ||
747 | |||
748 | if (NULL != peer->timeout) | ||
749 | { | ||
750 | GNUNET_SCHEDULER_cancel (peer->timeout); | ||
751 | peer->timeout = NULL; | ||
752 | if (GNUNET_NO == in_shutdown) | ||
753 | GNUNET_SCHEDULER_add_now (&announce_next_regex, NULL); | ||
754 | } | ||
755 | |||
756 | if (NULL == id) | ||
757 | { | ||
758 | // FIXME not possible right now | ||
759 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, | ||
760 | "String matching timed out for string %s on peer %u (%i/%i)\n", | ||
761 | peer->search_str, peer->id, strings_found, num_peers); | ||
762 | peer->search_str_matched = GNUNET_SYSERR; | ||
763 | } | ||
764 | else | ||
765 | { | ||
766 | prof_time = GNUNET_TIME_absolute_get_duration (peer->prof_start_time); | ||
767 | |||
768 | GNUNET_log (GNUNET_ERROR_TYPE_INFO, | ||
769 | "String %s found on peer %u after %s (%i/%i) (%u||)\n", | ||
770 | peer->search_str, peer->id, | ||
771 | GNUNET_STRINGS_relative_time_to_string (prof_time, GNUNET_NO), | ||
772 | strings_found, num_peers, parallel_searches); | ||
773 | |||
774 | peer->search_str_matched = GNUNET_YES; | ||
775 | |||
776 | if (NULL != data_file) | ||
777 | { | ||
778 | size = | ||
779 | GNUNET_snprintf (output_buffer, | ||
780 | sizeof(output_buffer), | ||
781 | "%p Peer: %u\n" | ||
782 | "%p Search string: %s\n" | ||
783 | "%p Search duration: %s\n\n", | ||
784 | peer, peer->id, | ||
785 | peer, peer->search_str, | ||
786 | peer, | ||
787 | GNUNET_STRINGS_relative_time_to_string (prof_time, | ||
788 | GNUNET_NO)); | ||
789 | |||
790 | if (size != GNUNET_DISK_file_write (data_file, output_buffer, size)) | ||
791 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, "Unable to write to file!\n"); | ||
792 | } | ||
793 | } | ||
794 | |||
795 | GNUNET_TESTBED_operation_done (peer->op_handle); | ||
796 | peer->op_handle = NULL; | ||
797 | |||
798 | if (strings_found == num_peers) | ||
799 | { | ||
800 | prof_time = GNUNET_TIME_absolute_get_duration (prof_start_time); | ||
801 | GNUNET_log (GNUNET_ERROR_TYPE_INFO, | ||
802 | "All strings successfully matched in %s\n", | ||
803 | GNUNET_STRINGS_relative_time_to_string (prof_time, GNUNET_NO)); | ||
804 | |||
805 | if (NULL != search_timeout_task) | ||
806 | { | ||
807 | GNUNET_SCHEDULER_cancel (search_timeout_task); | ||
808 | search_timeout_task = NULL; | ||
809 | } | ||
810 | |||
811 | GNUNET_log (GNUNET_ERROR_TYPE_INFO, "Collecting stats.\n"); | ||
812 | GNUNET_SCHEDULER_add_now (&do_collect_stats, NULL); | ||
813 | } | ||
814 | } | ||
815 | |||
816 | |||
817 | /** | ||
818 | * Connect by string timeout task. This will cancel the profiler after the | ||
819 | * specified timeout 'search_timeout'. | ||
820 | * | ||
821 | * @param cls NULL | ||
822 | */ | ||
823 | static void | ||
824 | search_timed_out (void *cls) | ||
825 | { | ||
826 | unsigned int i; | ||
827 | |||
828 | GNUNET_log (GNUNET_ERROR_TYPE_INFO, | ||
829 | "Finding matches to all strings did not succeed after %s.\n", | ||
830 | GNUNET_STRINGS_relative_time_to_string (search_timeout_time, | ||
831 | GNUNET_NO)); | ||
832 | GNUNET_log (GNUNET_ERROR_TYPE_INFO, | ||
833 | "Found %i of %i strings\n", strings_found, num_peers); | ||
834 | |||
835 | GNUNET_log (GNUNET_ERROR_TYPE_INFO, | ||
836 | "Search timed out after %s." | ||
837 | "Collecting stats and shutting down.\n", | ||
838 | GNUNET_STRINGS_relative_time_to_string (search_timeout_time, | ||
839 | GNUNET_NO)); | ||
840 | |||
841 | in_shutdown = GNUNET_YES; | ||
842 | for (i = 0; i < num_peers; i++) | ||
843 | { | ||
844 | if (NULL != peers[i].op_handle) | ||
845 | { | ||
846 | GNUNET_TESTBED_operation_done (peers[i].op_handle); | ||
847 | peers[i].op_handle = NULL; | ||
848 | } | ||
849 | } | ||
850 | GNUNET_SCHEDULER_add_now (&do_collect_stats, NULL); | ||
851 | } | ||
852 | |||
853 | |||
854 | /** | ||
855 | * Search timed out. It might still complete in the future, | ||
856 | * but we should start another one. | ||
857 | * | ||
858 | * @param cls Index of the next peer in the peers array. | ||
859 | */ | ||
860 | static void | ||
861 | find_timed_out (void *cls) | ||
862 | { | ||
863 | struct RegexPeer *p = cls; | ||
864 | |||
865 | p->timeout = NULL; | ||
866 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, | ||
867 | "Searching for string \"%s\" on peer %d timed out.\n", | ||
868 | p->search_str, | ||
869 | p->id); | ||
870 | if (GNUNET_NO == in_shutdown) | ||
871 | GNUNET_SCHEDULER_add_now (&announce_next_regex, NULL); | ||
872 | } | ||
873 | |||
874 | |||
875 | /** | ||
876 | * Start searching for a string in the DHT. | ||
877 | * | ||
878 | * @param cls Index of the next peer in the peers array. | ||
879 | */ | ||
880 | static void | ||
881 | find_string (void *cls) | ||
882 | { | ||
883 | unsigned int search_peer = (unsigned int) (long) cls; | ||
884 | |||
885 | if ((search_peer >= num_peers) || | ||
886 | (GNUNET_YES == in_shutdown)) | ||
887 | return; | ||
888 | |||
889 | GNUNET_log (GNUNET_ERROR_TYPE_INFO, | ||
890 | "Searching for string \"%s\" on peer %d (%u||)\n", | ||
891 | peers[search_peer].search_str, | ||
892 | search_peer, | ||
893 | parallel_searches); | ||
894 | |||
895 | peers[search_peer].op_handle = | ||
896 | GNUNET_TESTBED_service_connect (NULL, | ||
897 | peers[search_peer].peer_handle, | ||
898 | "dht", | ||
899 | &dht_connect_cb, | ||
900 | &peers[search_peer], | ||
901 | &dht_ca, | ||
902 | &dht_da, | ||
903 | &peers[search_peer]); | ||
904 | GNUNET_assert (NULL != peers[search_peer].op_handle); | ||
905 | peers[search_peer].timeout | ||
906 | = GNUNET_SCHEDULER_add_delayed (FIND_TIMEOUT, | ||
907 | &find_timed_out, | ||
908 | &peers[search_peer]); | ||
909 | } | ||
910 | |||
911 | |||
912 | /** | ||
913 | * Callback called when testbed has started the daemon we asked for. | ||
914 | * | ||
915 | * @param cls NULL | ||
916 | * @param op the operation handle | ||
917 | * @param emsg NULL on success; otherwise an error description | ||
918 | */ | ||
919 | static void | ||
920 | daemon_started (void *cls, | ||
921 | struct GNUNET_TESTBED_Operation *op, | ||
922 | const char *emsg) | ||
923 | { | ||
924 | struct RegexPeer *peer = (struct RegexPeer *) cls; | ||
925 | unsigned long search_peer; | ||
926 | unsigned int i; | ||
927 | |||
928 | GNUNET_TESTBED_operation_done (peer->daemon_op); | ||
929 | peer->daemon_op = NULL; | ||
930 | if (NULL != emsg) | ||
931 | { | ||
932 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, | ||
933 | "Failed to start/stop daemon at peer %u: %s\n", peer->id, emsg); | ||
934 | GNUNET_assert (0); | ||
935 | } | ||
936 | else | ||
937 | { | ||
938 | GNUNET_log (GNUNET_ERROR_TYPE_INFO, | ||
939 | "Daemon %u started successfully\n", peer->id); | ||
940 | } | ||
941 | |||
942 | /* Find a peer to look for a string matching the regex announced */ | ||
943 | search_peer = GNUNET_CRYPTO_random_u32 (GNUNET_CRYPTO_QUALITY_WEAK, | ||
944 | num_peers); | ||
945 | for (i = 0; peers[search_peer].search_str != NULL; i++) | ||
946 | { | ||
947 | search_peer = (search_peer + 1) % num_peers; | ||
948 | if (i > num_peers) | ||
949 | GNUNET_assert (0); /* we ran out of peers, must be a bug */ | ||
950 | } | ||
951 | peers[search_peer].search_str = search_strings[peer->id]; | ||
952 | peers[search_peer].search_str_matched = GNUNET_NO; | ||
953 | GNUNET_SCHEDULER_add_delayed (GNUNET_TIME_relative_saturating_multiply ( | ||
954 | reannounce_period_max, | ||
955 | 2), | ||
956 | &find_string, | ||
957 | (void *) search_peer); | ||
958 | } | ||
959 | |||
960 | |||
961 | /** | ||
962 | * Task to start the daemons on each peer so that the regexes are announced | ||
963 | * into the DHT. | ||
964 | * | ||
965 | * @param cls NULL | ||
966 | */ | ||
967 | static void | ||
968 | do_announce (void *cls) | ||
969 | { | ||
970 | unsigned int i; | ||
971 | |||
972 | if (GNUNET_YES == in_shutdown) | ||
973 | return; | ||
974 | GNUNET_log (GNUNET_ERROR_TYPE_INFO, | ||
975 | "Starting announce.\n"); | ||
976 | for (i = 0; i < init_parallel_searches; i++) | ||
977 | { | ||
978 | GNUNET_log (GNUNET_ERROR_TYPE_INFO, | ||
979 | " scheduling announce %u\n", | ||
980 | i); | ||
981 | (void) GNUNET_SCHEDULER_add_now (&announce_next_regex, NULL); | ||
982 | } | ||
983 | } | ||
984 | |||
985 | |||
986 | /** | ||
987 | * Start announcing the next regex in the DHT. | ||
988 | * | ||
989 | * @param cls Closure (unused). | ||
990 | */ | ||
991 | static void | ||
992 | announce_next_regex (void *cls) | ||
993 | { | ||
994 | struct RegexPeer *peer; | ||
995 | |||
996 | if (GNUNET_YES == in_shutdown) | ||
997 | return; | ||
998 | if (next_search >= num_peers) | ||
999 | { | ||
1000 | if (strings_found != num_peers) | ||
1001 | { | ||
1002 | struct GNUNET_TIME_Relative new_delay; | ||
1003 | if (NULL != search_timeout_task) | ||
1004 | GNUNET_SCHEDULER_cancel (search_timeout_task); | ||
1005 | new_delay = GNUNET_TIME_relative_multiply (GNUNET_TIME_UNIT_MINUTES, 15); | ||
1006 | search_timeout_task = GNUNET_SCHEDULER_add_delayed (new_delay, | ||
1007 | &search_timed_out, | ||
1008 | NULL); | ||
1009 | } | ||
1010 | return; | ||
1011 | } | ||
1012 | |||
1013 | GNUNET_log (GNUNET_ERROR_TYPE_INFO, "Starting daemon %u\n", next_search); | ||
1014 | peer = &peers[next_search]; | ||
1015 | peer->daemon_op = | ||
1016 | GNUNET_TESTBED_peer_manage_service (NULL, | ||
1017 | peer->peer_handle, | ||
1018 | "regexprofiler", | ||
1019 | &daemon_started, | ||
1020 | peer, | ||
1021 | 1); | ||
1022 | next_search++; | ||
1023 | parallel_searches++; | ||
1024 | } | ||
1025 | |||
1026 | |||
1027 | static void | ||
1028 | dht_connect_cb (void *cls, | ||
1029 | struct GNUNET_TESTBED_Operation *op, | ||
1030 | void *ca_result, | ||
1031 | const char *emsg) | ||
1032 | { | ||
1033 | struct RegexPeer *peer = (struct RegexPeer *) cls; | ||
1034 | |||
1035 | if ((NULL != emsg) || (NULL == op) || (NULL == ca_result)) | ||
1036 | { | ||
1037 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "DHT connect failed: %s\n", emsg); | ||
1038 | GNUNET_assert (0); | ||
1039 | } | ||
1040 | |||
1041 | GNUNET_assert (NULL != peer->dht_handle); | ||
1042 | GNUNET_assert (peer->op_handle == op); | ||
1043 | GNUNET_assert (peer->dht_handle == ca_result); | ||
1044 | |||
1045 | peer->search_str_matched = GNUNET_NO; | ||
1046 | peer->search_handle = REGEX_INTERNAL_search (peer->dht_handle, | ||
1047 | peer->search_str, | ||
1048 | ®ex_found_handler, peer, | ||
1049 | NULL); | ||
1050 | peer->prof_start_time = GNUNET_TIME_absolute_get (); | ||
1051 | } | ||
1052 | |||
1053 | |||
1054 | static void * | ||
1055 | dht_ca (void *cls, const struct GNUNET_CONFIGURATION_Handle *cfg) | ||
1056 | { | ||
1057 | struct RegexPeer *peer = cls; | ||
1058 | |||
1059 | peer->dht_handle = GNUNET_DHT_connect (cfg, 32); | ||
1060 | |||
1061 | return peer->dht_handle; | ||
1062 | } | ||
1063 | |||
1064 | |||
1065 | static void | ||
1066 | dht_da (void *cls, void *op_result) | ||
1067 | { | ||
1068 | struct RegexPeer *peer = (struct RegexPeer *) cls; | ||
1069 | |||
1070 | GNUNET_assert (peer->dht_handle == op_result); | ||
1071 | |||
1072 | if (NULL != peer->search_handle) | ||
1073 | { | ||
1074 | REGEX_INTERNAL_search_cancel (peer->search_handle); | ||
1075 | peer->search_handle = NULL; | ||
1076 | } | ||
1077 | |||
1078 | if (NULL != peer->dht_handle) | ||
1079 | { | ||
1080 | GNUNET_DHT_disconnect (peer->dht_handle); | ||
1081 | peer->dht_handle = NULL; | ||
1082 | } | ||
1083 | } | ||
1084 | |||
1085 | |||
1086 | /** | ||
1087 | * Signature of a main function for a testcase. | ||
1088 | * | ||
1089 | * @param cls NULL | ||
1090 | * @param h the run handle | ||
1091 | * @param num_peers_ number of peers in 'peers' | ||
1092 | * @param testbed_peers handle to peers run in the testbed. NULL upon timeout (see | ||
1093 | * GNUNET_TESTBED_test_run()). | ||
1094 | * @param links_succeeded the number of overlay link connection attempts that | ||
1095 | * succeeded | ||
1096 | * @param links_failed the number of overlay link connection attempts that | ||
1097 | * failed | ||
1098 | */ | ||
1099 | static void | ||
1100 | test_master (void *cls, | ||
1101 | struct GNUNET_TESTBED_RunHandle *h, | ||
1102 | unsigned int num_peers_, | ||
1103 | struct GNUNET_TESTBED_Peer **testbed_peers, | ||
1104 | unsigned int links_succeeded, | ||
1105 | unsigned int links_failed) | ||
1106 | { | ||
1107 | unsigned int i; | ||
1108 | |||
1109 | GNUNET_assert (num_peers_ == num_peers); | ||
1110 | |||
1111 | prof_time = GNUNET_TIME_absolute_get_duration (prof_start_time); | ||
1112 | GNUNET_log (GNUNET_ERROR_TYPE_INFO, | ||
1113 | "Testbed started in %s\n", | ||
1114 | GNUNET_STRINGS_relative_time_to_string (prof_time, GNUNET_NO)); | ||
1115 | |||
1116 | if (NULL != abort_task) | ||
1117 | { | ||
1118 | GNUNET_SCHEDULER_cancel (abort_task); | ||
1119 | abort_task = NULL; | ||
1120 | } | ||
1121 | |||
1122 | for (i = 0; i < num_peers; i++) | ||
1123 | { | ||
1124 | peers[i].peer_handle = testbed_peers[i]; | ||
1125 | } | ||
1126 | if (GNUNET_NO == | ||
1127 | GNUNET_CONFIGURATION_get_value_yesno (cfg, "DHT", "DISABLE_TRY_CONNECT")) | ||
1128 | { | ||
1129 | struct GNUNET_TIME_Relative settle_time; | ||
1130 | |||
1131 | settle_time = | ||
1132 | GNUNET_TIME_relative_multiply (GNUNET_TIME_UNIT_MILLISECONDS, | ||
1133 | 10 * num_peers); | ||
1134 | GNUNET_log (GNUNET_ERROR_TYPE_INFO, | ||
1135 | "Waiting for DHT for %s to settle new connections.\n\n", | ||
1136 | GNUNET_STRINGS_relative_time_to_string (settle_time, | ||
1137 | GNUNET_NO)); | ||
1138 | GNUNET_SCHEDULER_add_delayed (settle_time, &do_announce, NULL); | ||
1139 | } | ||
1140 | else | ||
1141 | { | ||
1142 | GNUNET_SCHEDULER_add_now (&do_announce, NULL); | ||
1143 | } | ||
1144 | search_timeout_task = | ||
1145 | GNUNET_SCHEDULER_add_delayed (search_timeout_time, &search_timed_out, NULL); | ||
1146 | } | ||
1147 | |||
1148 | |||
1149 | /** | ||
1150 | * Function that will be called whenever something in the testbed changes. | ||
1151 | * | ||
1152 | * @param cls closure, NULL | ||
1153 | * @param event information on what is happening | ||
1154 | */ | ||
1155 | static void | ||
1156 | master_controller_cb (void *cls, | ||
1157 | const struct GNUNET_TESTBED_EventInformation *event) | ||
1158 | { | ||
1159 | switch (event->type) | ||
1160 | { | ||
1161 | case GNUNET_TESTBED_ET_CONNECT: | ||
1162 | printf ("."); | ||
1163 | break; | ||
1164 | |||
1165 | case GNUNET_TESTBED_ET_PEER_START: | ||
1166 | printf ("#"); | ||
1167 | break; | ||
1168 | |||
1169 | default: | ||
1170 | break; | ||
1171 | } | ||
1172 | fflush (stdout); | ||
1173 | } | ||
1174 | |||
1175 | |||
1176 | /******************************************************************************/ | ||
1177 | /*************************** TESTBED PEER SETUP *****************************/ | ||
1178 | /******************************************************************************/ | ||
1179 | |||
1180 | /** | ||
1181 | * Process the text buffer counting the non-empty lines and separating them | ||
1182 | * with NULL characters, for later ease of copy using (as)printf. | ||
1183 | * | ||
1184 | * @param data Memory buffer with strings. | ||
1185 | * @param data_size Size of the @a data buffer in bytes. | ||
1186 | * @param str_max Maximum number of strings to return. | ||
1187 | * @return Positive number of lines found in the buffer, | ||
1188 | * #GNUNET_SYSERR otherwise. | ||
1189 | */ | ||
1190 | static int | ||
1191 | count_and_separate_strings (char *data, | ||
1192 | uint64_t data_size, | ||
1193 | unsigned int str_max) | ||
1194 | { | ||
1195 | char *buf; // Keep track of last string to skip blank lines | ||
1196 | unsigned int offset; | ||
1197 | unsigned int str_cnt; | ||
1198 | |||
1199 | buf = data; | ||
1200 | offset = 0; | ||
1201 | str_cnt = 0; | ||
1202 | while ((offset < (data_size - 1)) && (str_cnt < str_max)) | ||
1203 | { | ||
1204 | offset++; | ||
1205 | if (((data[offset] == '\n')) && | ||
1206 | (buf != &data[offset])) | ||
1207 | { | ||
1208 | data[offset] = '\0'; | ||
1209 | str_cnt++; | ||
1210 | buf = &data[offset + 1]; | ||
1211 | } | ||
1212 | else if ((data[offset] == '\n') || | ||
1213 | (data[offset] == '\0')) | ||
1214 | buf = &data[offset + 1]; | ||
1215 | } | ||
1216 | return str_cnt; | ||
1217 | } | ||
1218 | |||
1219 | |||
1220 | /** | ||
1221 | * Allocate a string array and fill it with the prefixed strings | ||
1222 | * from a pre-processed, NULL-separated memory region. | ||
1223 | * | ||
1224 | * @param data Preprocessed memory with strings | ||
1225 | * @param data_size Size of the @a data buffer in bytes. | ||
1226 | * @param strings Address of the string array to be created. | ||
1227 | * Must be freed by caller if function end in success. | ||
1228 | * @param str_cnt String count. The @a data buffer should contain | ||
1229 | * at least this many NULL-separated strings. | ||
1230 | * @return #GNUNET_OK in ase of success, #GNUNET_SYSERR otherwise. | ||
1231 | * In case of error @a strings must not be freed. | ||
1232 | */ | ||
1233 | static int | ||
1234 | create_string_array (char *data, uint64_t data_size, | ||
1235 | char ***strings, unsigned int str_cnt) | ||
1236 | { | ||
1237 | uint64_t offset; | ||
1238 | uint64_t len; | ||
1239 | unsigned int i; | ||
1240 | |||
1241 | *strings = GNUNET_malloc (sizeof(char *) * str_cnt); | ||
1242 | offset = 0; | ||
1243 | for (i = 0; i < str_cnt; i++) | ||
1244 | { | ||
1245 | len = strlen (&data[offset]); | ||
1246 | if (offset + len >= data_size) | ||
1247 | { | ||
1248 | GNUNET_free (*strings); | ||
1249 | *strings = NULL; | ||
1250 | return GNUNET_SYSERR; | ||
1251 | } | ||
1252 | if (0 == len) // empty line | ||
1253 | { | ||
1254 | offset++; | ||
1255 | i--; | ||
1256 | continue; | ||
1257 | } | ||
1258 | |||
1259 | GNUNET_asprintf (&(*strings)[i], | ||
1260 | "%s%s", | ||
1261 | regex_prefix, | ||
1262 | &data[offset]); | ||
1263 | offset += len + 1; | ||
1264 | } | ||
1265 | return GNUNET_OK; | ||
1266 | } | ||
1267 | |||
1268 | |||
1269 | /** | ||
1270 | * Load search strings from given filename. One search string per line. | ||
1271 | * | ||
1272 | * @param filename filename of the file containing the search strings. | ||
1273 | * @param strings set of strings loaded from file. Caller needs to free this | ||
1274 | * if number returned is greater than zero. | ||
1275 | * @param limit upper limit on the number of strings read from the file | ||
1276 | * @return number of strings found in the file. #GNUNET_SYSERR on error. | ||
1277 | */ | ||
1278 | static int | ||
1279 | load_search_strings (const char *filename, | ||
1280 | char ***strings, | ||
1281 | unsigned int limit) | ||
1282 | { | ||
1283 | char *data; | ||
1284 | uint64_t filesize; | ||
1285 | int str_cnt; | ||
1286 | |||
1287 | /* Sanity checks */ | ||
1288 | if (NULL == filename) | ||
1289 | { | ||
1290 | return GNUNET_SYSERR; | ||
1291 | } | ||
1292 | if (GNUNET_YES != GNUNET_DISK_file_test (filename)) | ||
1293 | { | ||
1294 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, | ||
1295 | "Could not find search strings file %s\n", filename); | ||
1296 | return GNUNET_SYSERR; | ||
1297 | } | ||
1298 | if (GNUNET_OK != | ||
1299 | GNUNET_DISK_file_size (filename, | ||
1300 | &filesize, | ||
1301 | GNUNET_YES, | ||
1302 | GNUNET_YES)) | ||
1303 | { | ||
1304 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, | ||
1305 | "Search strings file %s cannot be read.\n", | ||
1306 | filename); | ||
1307 | return GNUNET_SYSERR; | ||
1308 | } | ||
1309 | if (0 == filesize) | ||
1310 | { | ||
1311 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, | ||
1312 | "Search strings file %s is empty.\n", | ||
1313 | filename); | ||
1314 | return GNUNET_SYSERR; | ||
1315 | } | ||
1316 | |||
1317 | /* Read data into memory */ | ||
1318 | data = GNUNET_malloc (filesize + 1); | ||
1319 | if (filesize != GNUNET_DISK_fn_read (filename, | ||
1320 | data, | ||
1321 | filesize)) | ||
1322 | { | ||
1323 | GNUNET_free (data); | ||
1324 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, | ||
1325 | "Could not read search strings file %s.\n", | ||
1326 | filename); | ||
1327 | return GNUNET_SYSERR; | ||
1328 | } | ||
1329 | |||
1330 | /* Process buffer and build array */ | ||
1331 | str_cnt = count_and_separate_strings (data, filesize, limit); | ||
1332 | if (GNUNET_OK != create_string_array (data, filesize, strings, str_cnt)) | ||
1333 | { | ||
1334 | str_cnt = GNUNET_SYSERR; | ||
1335 | } | ||
1336 | GNUNET_free (data); | ||
1337 | return str_cnt; | ||
1338 | } | ||
1339 | |||
1340 | |||
1341 | /** | ||
1342 | * Main function that will be run by the scheduler. | ||
1343 | * | ||
1344 | * @param cls closure | ||
1345 | * @param args remaining command-line arguments | ||
1346 | * @param cfgfile name of the configuration file used (for saving, can be NULL!) | ||
1347 | * @param config configuration | ||
1348 | */ | ||
1349 | static void | ||
1350 | run (void *cls, | ||
1351 | char *const *args, | ||
1352 | const char *cfgfile, | ||
1353 | const struct GNUNET_CONFIGURATION_Handle *config) | ||
1354 | { | ||
1355 | unsigned int nsearchstrs; | ||
1356 | unsigned int i; | ||
1357 | struct GNUNET_TIME_Relative abort_time; | ||
1358 | |||
1359 | in_shutdown = GNUNET_NO; | ||
1360 | |||
1361 | /* Check config */ | ||
1362 | if (NULL == config) | ||
1363 | { | ||
1364 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
1365 | _ ("No configuration file given. Exiting\n")); | ||
1366 | GNUNET_SCHEDULER_add_now (&do_shutdown, NULL); | ||
1367 | return; | ||
1368 | } | ||
1369 | cfg = GNUNET_CONFIGURATION_dup (config); | ||
1370 | if (GNUNET_OK != | ||
1371 | GNUNET_CONFIGURATION_get_value_string (cfg, "REGEXPROFILER", | ||
1372 | "REGEX_PREFIX", | ||
1373 | ®ex_prefix)) | ||
1374 | { | ||
1375 | GNUNET_log_config_missing (GNUNET_ERROR_TYPE_ERROR, | ||
1376 | "regexprofiler", | ||
1377 | "regex_prefix"); | ||
1378 | GNUNET_SCHEDULER_add_now (&do_shutdown, NULL); | ||
1379 | return; | ||
1380 | } | ||
1381 | if (GNUNET_OK != | ||
1382 | GNUNET_CONFIGURATION_get_value_number (cfg, "REGEXPROFILER", | ||
1383 | "PARALLEL_SEARCHES", | ||
1384 | &init_parallel_searches)) | ||
1385 | { | ||
1386 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, | ||
1387 | "Configuration option \"PARALLEL_SEARCHES\" missing." | ||
1388 | " Using default (%d)\n", 10); | ||
1389 | init_parallel_searches = 10; | ||
1390 | } | ||
1391 | if (GNUNET_OK != | ||
1392 | GNUNET_CONFIGURATION_get_value_time (cfg, "REGEXPROFILER", | ||
1393 | "REANNOUNCE_PERIOD_MAX", | ||
1394 | &reannounce_period_max)) | ||
1395 | { | ||
1396 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, | ||
1397 | "reannounce_period_max not given. Using 10 minutes.\n"); | ||
1398 | reannounce_period_max = | ||
1399 | GNUNET_TIME_relative_multiply (GNUNET_TIME_UNIT_MINUTES, 10); | ||
1400 | } | ||
1401 | |||
1402 | /* Check arguments */ | ||
1403 | if (NULL == policy_dir) | ||
1404 | { | ||
1405 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
1406 | _ ( | ||
1407 | "No policy directory specified on command line. Exiting.\n")); | ||
1408 | return; | ||
1409 | } | ||
1410 | if (GNUNET_YES != GNUNET_DISK_directory_test (policy_dir, GNUNET_YES)) | ||
1411 | { | ||
1412 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
1413 | _ ("Specified policies directory does not exist. Exiting.\n")); | ||
1414 | GNUNET_SCHEDULER_add_now (&do_shutdown, NULL); | ||
1415 | return; | ||
1416 | } | ||
1417 | if (0 >= (int) (num_peers = GNUNET_DISK_directory_scan (policy_dir, NULL, | ||
1418 | NULL))) | ||
1419 | { | ||
1420 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
1421 | _ ("No files found in `%s'\n"), | ||
1422 | policy_dir); | ||
1423 | return; | ||
1424 | } | ||
1425 | GNUNET_CONFIGURATION_set_value_string (cfg, "REGEXPROFILER", | ||
1426 | "POLICY_DIR", policy_dir); | ||
1427 | if (GNUNET_YES != GNUNET_DISK_file_test (strings_file)) | ||
1428 | { | ||
1429 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
1430 | _ ("No search strings file given. Exiting.\n")); | ||
1431 | GNUNET_SCHEDULER_add_now (&do_shutdown, NULL); | ||
1432 | return; | ||
1433 | } | ||
1434 | nsearchstrs = load_search_strings (strings_file, | ||
1435 | &search_strings, | ||
1436 | num_peers); | ||
1437 | if (num_peers != nsearchstrs) | ||
1438 | { | ||
1439 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
1440 | "Error loading search strings.\n"); | ||
1441 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
1442 | "File (%s) does not contain enough strings (%u/%u).\n", | ||
1443 | strings_file, nsearchstrs, num_peers); | ||
1444 | GNUNET_SCHEDULER_add_now (&do_shutdown, NULL); | ||
1445 | return; | ||
1446 | } | ||
1447 | if ((0 == num_peers) || (NULL == search_strings)) | ||
1448 | { | ||
1449 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
1450 | _ ("Error loading search strings. Exiting.\n")); | ||
1451 | GNUNET_SCHEDULER_add_now (&do_shutdown, NULL); | ||
1452 | return; | ||
1453 | } | ||
1454 | for (i = 0; i < num_peers; i++) | ||
1455 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, | ||
1456 | "search string: %s\n", | ||
1457 | search_strings[i]); | ||
1458 | |||
1459 | /* Check logfile */ | ||
1460 | if ((NULL != data_filename) && | ||
1461 | (NULL == (data_file = | ||
1462 | GNUNET_DISK_file_open (data_filename, | ||
1463 | GNUNET_DISK_OPEN_READWRITE | ||
1464 | | GNUNET_DISK_OPEN_TRUNCATE | ||
1465 | | GNUNET_DISK_OPEN_CREATE, | ||
1466 | GNUNET_DISK_PERM_USER_READ | ||
1467 | | GNUNET_DISK_PERM_USER_WRITE)))) | ||
1468 | { | ||
1469 | GNUNET_log_strerror_file (GNUNET_ERROR_TYPE_ERROR, | ||
1470 | "open", | ||
1471 | data_filename); | ||
1472 | return; | ||
1473 | } | ||
1474 | |||
1475 | /* Initialize peers */ | ||
1476 | peers = GNUNET_malloc (sizeof(struct RegexPeer) * num_peers); | ||
1477 | for (i = 0; i < num_peers; i++) | ||
1478 | peers[i].id = i; | ||
1479 | |||
1480 | GNUNET_CONFIGURATION_set_value_number (cfg, | ||
1481 | "TESTBED", "OVERLAY_RANDOM_LINKS", | ||
1482 | num_peers * 20); | ||
1483 | GNUNET_CONFIGURATION_set_value_number (cfg, | ||
1484 | "DHT", "FORCE_NSE", | ||
1485 | (long long unsigned) | ||
1486 | (log (num_peers) / log (2.0))); | ||
1487 | event_mask = 0LL; | ||
1488 | /* For feedback about the start process activate these and pass master_cb */ | ||
1489 | event_mask |= (1LL << GNUNET_TESTBED_ET_PEER_START); | ||
1490 | // event_mask |= (1LL << GNUNET_TESTBED_ET_PEER_STOP); | ||
1491 | event_mask |= (1LL << GNUNET_TESTBED_ET_CONNECT); | ||
1492 | // event_mask |= (1LL << GNUNET_TESTBED_ET_DISCONNECT); | ||
1493 | prof_start_time = GNUNET_TIME_absolute_get (); | ||
1494 | GNUNET_TESTBED_run (hosts_file, | ||
1495 | cfg, | ||
1496 | num_peers, | ||
1497 | event_mask, | ||
1498 | &master_controller_cb, | ||
1499 | NULL, /* master_controller_cb cls */ | ||
1500 | &test_master, | ||
1501 | NULL); /* test_master cls */ | ||
1502 | if (GNUNET_OK != | ||
1503 | GNUNET_CONFIGURATION_get_value_time (cfg, "TESTBED", | ||
1504 | "SETUP_TIMEOUT", | ||
1505 | &abort_time)) | ||
1506 | { | ||
1507 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, | ||
1508 | "SETUP_TIMEOUT not given. Using 15 minutes.\n"); | ||
1509 | abort_time = | ||
1510 | GNUNET_TIME_relative_multiply (GNUNET_TIME_UNIT_MINUTES, 15); | ||
1511 | } | ||
1512 | abort_time = GNUNET_TIME_relative_add (abort_time, GNUNET_TIME_UNIT_MINUTES); | ||
1513 | abort_task = | ||
1514 | GNUNET_SCHEDULER_add_delayed (abort_time, | ||
1515 | &do_abort, | ||
1516 | (void *) __LINE__); | ||
1517 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, | ||
1518 | "setup_timeout: %s\n", | ||
1519 | GNUNET_STRINGS_relative_time_to_string (abort_time, GNUNET_YES)); | ||
1520 | } | ||
1521 | |||
1522 | |||
1523 | /** | ||
1524 | * Main function. | ||
1525 | * | ||
1526 | * @param argc argument count | ||
1527 | * @param argv argument values | ||
1528 | * @return 0 on success | ||
1529 | */ | ||
1530 | int | ||
1531 | main (int argc, char *const *argv) | ||
1532 | { | ||
1533 | struct GNUNET_GETOPT_CommandLineOption options[] = { | ||
1534 | GNUNET_GETOPT_option_filename ( | ||
1535 | 'o', | ||
1536 | "output-file", | ||
1537 | "FILENAME", | ||
1538 | gettext_noop ( | ||
1539 | "name of the file for writing statistics"), | ||
1540 | &data_filename), | ||
1541 | GNUNET_GETOPT_option_relative_time ( | ||
1542 | 't', | ||
1543 | "matching-timeout", | ||
1544 | "TIMEOUT", | ||
1545 | gettext_noop ( | ||
1546 | "wait TIMEOUT before ending the experiment"), | ||
1547 | &search_timeout_time), | ||
1548 | GNUNET_GETOPT_option_filename ( | ||
1549 | 'p', | ||
1550 | "policy-dir", | ||
1551 | "DIRECTORY", | ||
1552 | gettext_noop ("directory with policy files"), | ||
1553 | &policy_dir), | ||
1554 | GNUNET_GETOPT_option_filename ( | ||
1555 | 's', | ||
1556 | "strings-file", | ||
1557 | "FILENAME", | ||
1558 | gettext_noop ( | ||
1559 | "name of file with input strings"), | ||
1560 | &strings_file), | ||
1561 | GNUNET_GETOPT_option_filename ( | ||
1562 | 'H', | ||
1563 | "hosts-file", | ||
1564 | "FILENAME", | ||
1565 | gettext_noop ( | ||
1566 | "name of file with hosts' names"), | ||
1567 | &hosts_file), | ||
1568 | |||
1569 | GNUNET_GETOPT_OPTION_END | ||
1570 | }; | ||
1571 | int ret; | ||
1572 | |||
1573 | if (GNUNET_OK != | ||
1574 | GNUNET_STRINGS_get_utf8_args (argc, argv, | ||
1575 | &argc, &argv)) | ||
1576 | return 2; | ||
1577 | result = GNUNET_SYSERR; | ||
1578 | ret = | ||
1579 | GNUNET_PROGRAM_run (argc, argv, | ||
1580 | "gnunet-regex-profiler", | ||
1581 | _ ("Profiler for regex"), | ||
1582 | options, | ||
1583 | &run, NULL); | ||
1584 | if (GNUNET_OK != ret) | ||
1585 | return ret; | ||
1586 | if (GNUNET_OK != result) | ||
1587 | return 1; | ||
1588 | return 0; | ||
1589 | } | ||
diff --git a/src/service/regex/gnunet-service-regex.c b/src/service/regex/gnunet-service-regex.c new file mode 100644 index 000000000..c25ef9eda --- /dev/null +++ b/src/service/regex/gnunet-service-regex.c | |||
@@ -0,0 +1,408 @@ | |||
1 | /* | ||
2 | This file is part of GNUnet. | ||
3 | Copyright (C) 2013 GNUnet e.V. | ||
4 | |||
5 | GNUnet is free software: you can redistribute it and/or modify it | ||
6 | under the terms of the GNU Affero General Public License as published | ||
7 | by the Free Software Foundation, either version 3 of the License, | ||
8 | or (at your option) any later version. | ||
9 | |||
10 | GNUnet is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | Affero General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU Affero General Public License | ||
16 | along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | |||
18 | SPDX-License-Identifier: AGPL3.0-or-later | ||
19 | */ | ||
20 | |||
21 | /** | ||
22 | * @file regex/gnunet-service-regex.c | ||
23 | * @brief service to advertise capabilities described as regex and to | ||
24 | * lookup capabilities by regex | ||
25 | * @author Christian Grothoff | ||
26 | */ | ||
27 | #include "platform.h" | ||
28 | #include "gnunet_util_lib.h" | ||
29 | #include "regex_internal_lib.h" | ||
30 | #include "regex_ipc.h" | ||
31 | |||
32 | |||
33 | /** | ||
34 | * Information about one of our clients. | ||
35 | */ | ||
36 | struct ClientEntry | ||
37 | { | ||
38 | /** | ||
39 | * Queue for transmissions to @e client. | ||
40 | */ | ||
41 | struct GNUNET_MQ_Handle *mq; | ||
42 | |||
43 | /** | ||
44 | * Handle identifying the client. | ||
45 | */ | ||
46 | struct GNUNET_SERVICE_Client *client; | ||
47 | |||
48 | /** | ||
49 | * Search handle (if this client is searching). | ||
50 | */ | ||
51 | struct REGEX_INTERNAL_Search *sh; | ||
52 | |||
53 | /** | ||
54 | * Announcement handle (if this client is announcing). | ||
55 | */ | ||
56 | struct REGEX_INTERNAL_Announcement *ah; | ||
57 | |||
58 | /** | ||
59 | * Refresh frequency for announcements. | ||
60 | */ | ||
61 | struct GNUNET_TIME_Relative frequency; | ||
62 | |||
63 | /** | ||
64 | * Task for re-announcing. | ||
65 | */ | ||
66 | struct GNUNET_SCHEDULER_Task *refresh_task; | ||
67 | }; | ||
68 | |||
69 | |||
70 | /** | ||
71 | * Connection to the DHT. | ||
72 | */ | ||
73 | static struct GNUNET_DHT_Handle *dht; | ||
74 | |||
75 | /** | ||
76 | * Handle for doing statistics. | ||
77 | */ | ||
78 | static struct GNUNET_STATISTICS_Handle *stats; | ||
79 | |||
80 | /** | ||
81 | * Private key for this peer. | ||
82 | */ | ||
83 | static struct GNUNET_CRYPTO_EddsaPrivateKey *my_private_key; | ||
84 | |||
85 | |||
86 | /** | ||
87 | * Task run during shutdown. | ||
88 | * | ||
89 | * @param cls unused | ||
90 | */ | ||
91 | static void | ||
92 | cleanup_task (void *cls) | ||
93 | { | ||
94 | GNUNET_DHT_disconnect (dht); | ||
95 | dht = NULL; | ||
96 | GNUNET_STATISTICS_destroy (stats, | ||
97 | GNUNET_NO); | ||
98 | stats = NULL; | ||
99 | GNUNET_free (my_private_key); | ||
100 | my_private_key = NULL; | ||
101 | } | ||
102 | |||
103 | |||
104 | /** | ||
105 | * Periodic task to refresh our announcement of the regex. | ||
106 | * | ||
107 | * @param cls the `struct ClientEntry *` of the client that triggered the | ||
108 | * announcement | ||
109 | */ | ||
110 | static void | ||
111 | reannounce (void *cls) | ||
112 | { | ||
113 | struct ClientEntry *ce = cls; | ||
114 | |||
115 | REGEX_INTERNAL_reannounce (ce->ah); | ||
116 | ce->refresh_task = GNUNET_SCHEDULER_add_delayed (ce->frequency, | ||
117 | &reannounce, | ||
118 | ce); | ||
119 | } | ||
120 | |||
121 | |||
122 | /** | ||
123 | * Check ANNOUNCE message. | ||
124 | * | ||
125 | * @param cls identification of the client | ||
126 | * @param am the actual message | ||
127 | * @return #GNUNET_OK if @a am is well-formed | ||
128 | */ | ||
129 | static int | ||
130 | check_announce (void *cls, | ||
131 | const struct AnnounceMessage *am) | ||
132 | { | ||
133 | struct ClientEntry *ce = cls; | ||
134 | |||
135 | GNUNET_MQ_check_zero_termination (am); | ||
136 | if (NULL != ce->ah) | ||
137 | { | ||
138 | /* only one announcement per client allowed */ | ||
139 | GNUNET_break (0); | ||
140 | return GNUNET_SYSERR; | ||
141 | } | ||
142 | return GNUNET_OK; | ||
143 | } | ||
144 | |||
145 | |||
146 | /** | ||
147 | * Handle ANNOUNCE message. | ||
148 | * | ||
149 | * @param cls identification of the client | ||
150 | * @param am the actual message | ||
151 | */ | ||
152 | static void | ||
153 | handle_announce (void *cls, | ||
154 | const struct AnnounceMessage *am) | ||
155 | { | ||
156 | struct ClientEntry *ce = cls; | ||
157 | const char *regex; | ||
158 | |||
159 | regex = (const char *) &am[1]; | ||
160 | ce->frequency = GNUNET_TIME_relative_ntoh (am->refresh_delay); | ||
161 | ce->refresh_task = GNUNET_SCHEDULER_add_delayed (ce->frequency, | ||
162 | &reannounce, | ||
163 | ce); | ||
164 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, | ||
165 | "Starting to announce regex `%s' every %s\n", | ||
166 | regex, | ||
167 | GNUNET_STRINGS_relative_time_to_string (ce->frequency, | ||
168 | GNUNET_NO)); | ||
169 | ce->ah = REGEX_INTERNAL_announce (dht, | ||
170 | my_private_key, | ||
171 | regex, | ||
172 | ntohs (am->compression), | ||
173 | stats); | ||
174 | if (NULL == ce->ah) | ||
175 | { | ||
176 | GNUNET_break (0); | ||
177 | GNUNET_SCHEDULER_cancel (ce->refresh_task); | ||
178 | ce->refresh_task = NULL; | ||
179 | GNUNET_SERVICE_client_drop (ce->client); | ||
180 | return; | ||
181 | } | ||
182 | GNUNET_SERVICE_client_continue (ce->client); | ||
183 | } | ||
184 | |||
185 | |||
186 | /** | ||
187 | * Handle result, pass it back to the client. | ||
188 | * | ||
189 | * @param cls the struct ClientEntry of the client searching | ||
190 | * @param id Peer providing a regex that matches the string. | ||
191 | * @param get_path Path of the get request. | ||
192 | * @param get_path_length Length of @a get_path. | ||
193 | * @param put_path Path of the put request. | ||
194 | * @param put_path_length Length of the @a put_path. | ||
195 | */ | ||
196 | static void | ||
197 | handle_search_result (void *cls, | ||
198 | const struct GNUNET_PeerIdentity *id, | ||
199 | const struct GNUNET_DHT_PathElement *get_path, | ||
200 | unsigned int get_path_length, | ||
201 | const struct GNUNET_DHT_PathElement *put_path, | ||
202 | unsigned int put_path_length) | ||
203 | { | ||
204 | struct ClientEntry *ce = cls; | ||
205 | struct GNUNET_MQ_Envelope *env; | ||
206 | struct ResultMessage *result; | ||
207 | struct GNUNET_PeerIdentity *gp; | ||
208 | uint16_t size; | ||
209 | |||
210 | if ((get_path_length >= 65536) || | ||
211 | (put_path_length >= 65536) || | ||
212 | ( ((get_path_length + put_path_length) | ||
213 | * sizeof(struct GNUNET_PeerIdentity)) | ||
214 | + sizeof(struct ResultMessage) >= GNUNET_MAX_MESSAGE_SIZE) ) | ||
215 | { | ||
216 | GNUNET_break (0); | ||
217 | return; | ||
218 | } | ||
219 | size = (get_path_length + put_path_length) | ||
220 | * sizeof(struct GNUNET_PeerIdentity); | ||
221 | env = GNUNET_MQ_msg_extra (result, | ||
222 | size, | ||
223 | GNUNET_MESSAGE_TYPE_REGEX_RESULT); | ||
224 | result->get_path_length = htons ((uint16_t) get_path_length); | ||
225 | result->put_path_length = htons ((uint16_t) put_path_length); | ||
226 | result->id = *id; | ||
227 | gp = &result->id; | ||
228 | for (unsigned int i = 0; i<get_path_length; i++) | ||
229 | gp[i + 1] = get_path[i].pred; | ||
230 | for (unsigned int i = 0; i<put_path_length; i++) | ||
231 | gp[i + get_path_length + 1] = put_path[i].pred; | ||
232 | GNUNET_MQ_send (ce->mq, | ||
233 | env); | ||
234 | } | ||
235 | |||
236 | |||
237 | /** | ||
238 | * Check SEARCH message. | ||
239 | * | ||
240 | * @param cls identification of the client | ||
241 | * @param sm the actual message | ||
242 | */ | ||
243 | static int | ||
244 | check_search (void *cls, | ||
245 | const struct RegexSearchMessage *sm) | ||
246 | { | ||
247 | struct ClientEntry *ce = cls; | ||
248 | const char *string; | ||
249 | uint16_t size; | ||
250 | |||
251 | size = ntohs (sm->header.size) - sizeof(*sm); | ||
252 | string = (const char *) &sm[1]; | ||
253 | if ('\0' != string[size - 1]) | ||
254 | { | ||
255 | GNUNET_break (0); | ||
256 | return GNUNET_SYSERR; | ||
257 | } | ||
258 | if (NULL != ce->sh) | ||
259 | { | ||
260 | /* only one search allowed per client */ | ||
261 | GNUNET_break (0); | ||
262 | return GNUNET_SYSERR; | ||
263 | } | ||
264 | return GNUNET_OK; | ||
265 | } | ||
266 | |||
267 | |||
268 | /** | ||
269 | * Handle SEARCH message. | ||
270 | * | ||
271 | * @param cls identification of the client | ||
272 | * @param sm the actual message | ||
273 | */ | ||
274 | static void | ||
275 | handle_search (void *cls, | ||
276 | const struct RegexSearchMessage *sm) | ||
277 | { | ||
278 | struct ClientEntry *ce = cls; | ||
279 | const char *string; | ||
280 | |||
281 | string = (const char *) &sm[1]; | ||
282 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, | ||
283 | "Starting to search for `%s'\n", | ||
284 | string); | ||
285 | ce->sh = REGEX_INTERNAL_search (dht, | ||
286 | string, | ||
287 | &handle_search_result, | ||
288 | ce, | ||
289 | stats); | ||
290 | if (NULL == ce->sh) | ||
291 | { | ||
292 | GNUNET_break (0); | ||
293 | GNUNET_SERVICE_client_drop (ce->client); | ||
294 | return; | ||
295 | } | ||
296 | GNUNET_SERVICE_client_continue (ce->client); | ||
297 | } | ||
298 | |||
299 | |||
300 | /** | ||
301 | * Process regex requests. | ||
302 | * | ||
303 | * @param cls closure | ||
304 | * @param cfg configuration to use | ||
305 | * @param service the initialized service | ||
306 | */ | ||
307 | static void | ||
308 | run (void *cls, | ||
309 | const struct GNUNET_CONFIGURATION_Handle *cfg, | ||
310 | struct GNUNET_SERVICE_Handle *service) | ||
311 | { | ||
312 | my_private_key = GNUNET_CRYPTO_eddsa_key_create_from_configuration (cfg); | ||
313 | if (NULL == my_private_key) | ||
314 | { | ||
315 | GNUNET_SCHEDULER_shutdown (); | ||
316 | return; | ||
317 | } | ||
318 | dht = GNUNET_DHT_connect (cfg, 1024); | ||
319 | if (NULL == dht) | ||
320 | { | ||
321 | GNUNET_free (my_private_key); | ||
322 | my_private_key = NULL; | ||
323 | GNUNET_SCHEDULER_shutdown (); | ||
324 | return; | ||
325 | } | ||
326 | GNUNET_SCHEDULER_add_shutdown (&cleanup_task, | ||
327 | NULL); | ||
328 | stats = GNUNET_STATISTICS_create ("regex", cfg); | ||
329 | } | ||
330 | |||
331 | |||
332 | /** | ||
333 | * Callback called when a client connects to the service. | ||
334 | * | ||
335 | * @param cls closure for the service | ||
336 | * @param c the new client that connected to the service | ||
337 | * @param mq the message queue used to send messages to the client | ||
338 | * @return @a c | ||
339 | */ | ||
340 | static void * | ||
341 | client_connect_cb (void *cls, | ||
342 | struct GNUNET_SERVICE_Client *c, | ||
343 | struct GNUNET_MQ_Handle *mq) | ||
344 | { | ||
345 | struct ClientEntry *ce; | ||
346 | |||
347 | ce = GNUNET_new (struct ClientEntry); | ||
348 | ce->client = c; | ||
349 | ce->mq = mq; | ||
350 | return ce; | ||
351 | } | ||
352 | |||
353 | |||
354 | /** | ||
355 | * Callback called when a client disconnected from the service | ||
356 | * | ||
357 | * @param cls closure for the service | ||
358 | * @param c the client that disconnected | ||
359 | * @param internal_cls should be equal to @a c | ||
360 | */ | ||
361 | static void | ||
362 | client_disconnect_cb (void *cls, | ||
363 | struct GNUNET_SERVICE_Client *c, | ||
364 | void *internal_cls) | ||
365 | { | ||
366 | struct ClientEntry *ce = internal_cls; | ||
367 | |||
368 | if (NULL != ce->refresh_task) | ||
369 | { | ||
370 | GNUNET_SCHEDULER_cancel (ce->refresh_task); | ||
371 | ce->refresh_task = NULL; | ||
372 | } | ||
373 | if (NULL != ce->ah) | ||
374 | { | ||
375 | REGEX_INTERNAL_announce_cancel (ce->ah); | ||
376 | ce->ah = NULL; | ||
377 | } | ||
378 | if (NULL != ce->sh) | ||
379 | { | ||
380 | REGEX_INTERNAL_search_cancel (ce->sh); | ||
381 | ce->sh = NULL; | ||
382 | } | ||
383 | GNUNET_free (ce); | ||
384 | } | ||
385 | |||
386 | |||
387 | /** | ||
388 | * Define "main" method using service macro. | ||
389 | */ | ||
390 | GNUNET_SERVICE_MAIN | ||
391 | ("regex", | ||
392 | GNUNET_SERVICE_OPTION_NONE, | ||
393 | &run, | ||
394 | &client_connect_cb, | ||
395 | &client_disconnect_cb, | ||
396 | NULL, | ||
397 | GNUNET_MQ_hd_var_size (announce, | ||
398 | GNUNET_MESSAGE_TYPE_REGEX_ANNOUNCE, | ||
399 | struct AnnounceMessage, | ||
400 | NULL), | ||
401 | GNUNET_MQ_hd_var_size (search, | ||
402 | GNUNET_MESSAGE_TYPE_REGEX_SEARCH, | ||
403 | struct RegexSearchMessage, | ||
404 | NULL), | ||
405 | GNUNET_MQ_handler_end ()); | ||
406 | |||
407 | |||
408 | /* end of gnunet-service-regex.c */ | ||
diff --git a/src/service/regex/meson.build b/src/service/regex/meson.build new file mode 100644 index 000000000..3c25a5148 --- /dev/null +++ b/src/service/regex/meson.build | |||
@@ -0,0 +1,57 @@ | |||
1 | libgnunetregex_src = ['regex_api_announce.c', | ||
2 | 'regex_api_search.c'] | ||
3 | gnunetserviceregex_src = ['gnunet-service-regex.c', | ||
4 | 'regex_internal_dht.c', | ||
5 | 'regex_internal.c'] | ||
6 | |||
7 | configure_file(input : 'regex.conf.in', | ||
8 | output : 'regex.conf', | ||
9 | configuration : cdata, | ||
10 | install: true, | ||
11 | install_dir: pkgcfgdir) | ||
12 | |||
13 | |||
14 | if get_option('monolith') | ||
15 | foreach p : libgnunetregex_src + gnunetserviceregex_src | ||
16 | gnunet_src += 'regex/' + p | ||
17 | endforeach | ||
18 | endif | ||
19 | |||
20 | libgnunetregex = library('gnunetregex', | ||
21 | libgnunetregex_src, | ||
22 | soversion: '3', | ||
23 | version: '3.0.1', | ||
24 | dependencies: libgnunetutil_dep, | ||
25 | include_directories: [incdir, configuration_inc], | ||
26 | install: true, | ||
27 | install_dir: get_option('libdir')) | ||
28 | pkg.generate(libgnunetregex, url: 'https://www.gnunet.org', | ||
29 | description : 'Provides API for accessing the regex service') | ||
30 | libgnunetregex_dep = declare_dependency(link_with : libgnunetregex) | ||
31 | |||
32 | executable ('gnunet-service-regex', | ||
33 | gnunetserviceregex_src, | ||
34 | dependencies: [libgnunetregex_dep, libgnunetutil_dep, | ||
35 | libgnunetdht_dep, | ||
36 | libgnunetstatistics_dep, | ||
37 | libgnunetregex_dep, | ||
38 | libgnunetregexblock_dep], | ||
39 | include_directories: [incdir, configuration_inc], | ||
40 | install: true, | ||
41 | install_dir: get_option('libdir')/'gnunet'/'libexec') | ||
42 | executable ('gnunet-daemon-regexprofiler', | ||
43 | ['gnunet-daemon-regexprofiler.c', | ||
44 | 'regex_internal.c', | ||
45 | 'regex_internal_dht.c', | ||
46 | 'regex_test_lib.c', | ||
47 | 'regex_test_graph.c', | ||
48 | 'regex_test_random.c'], | ||
49 | dependencies: [libgnunetregex_dep, libgnunetutil_dep, | ||
50 | libgnunetdht_dep, | ||
51 | libgnunetstatistics_dep, | ||
52 | libgnunetregex_dep, | ||
53 | libgnunetregexblock_dep], | ||
54 | include_directories: [incdir, configuration_inc], | ||
55 | install: true, | ||
56 | install_dir: get_option('libdir')/'gnunet'/'libexec') | ||
57 | |||
diff --git a/src/service/regex/perf-data.tar.gz b/src/service/regex/perf-data.tar.gz new file mode 100644 index 000000000..9e909e58e --- /dev/null +++ b/src/service/regex/perf-data.tar.gz | |||
Binary files differ | |||
diff --git a/src/service/regex/perf-regex.c b/src/service/regex/perf-regex.c new file mode 100644 index 000000000..fbddc58eb --- /dev/null +++ b/src/service/regex/perf-regex.c | |||
@@ -0,0 +1,129 @@ | |||
1 | /* | ||
2 | This file is part of GNUnet. | ||
3 | Copyright (C) 2012 GNUnet e.V. | ||
4 | |||
5 | GNUnet is free software: you can redistribute it and/or modify it | ||
6 | under the terms of the GNU Affero General Public License as published | ||
7 | by the Free Software Foundation, either version 3 of the License, | ||
8 | or (at your option) any later version. | ||
9 | |||
10 | GNUnet is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | Affero General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU Affero General Public License | ||
16 | along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | |||
18 | SPDX-License-Identifier: AGPL3.0-or-later | ||
19 | */ | ||
20 | |||
21 | /** | ||
22 | * @file src/regex/perf-regex.c | ||
23 | * @brief Test how long it takes to create a automaton from a string regex. | ||
24 | * @author Bartlomiej Polot | ||
25 | */ | ||
26 | #include "platform.h" | ||
27 | #include <regex.h> | ||
28 | #include <time.h> | ||
29 | #include "regex_internal_lib.h" | ||
30 | #include "regex_test_lib.h" | ||
31 | |||
32 | |||
33 | /** | ||
34 | * Print information about the given node and its edges | ||
35 | * to stdout. | ||
36 | * | ||
37 | * @param cls closure, unused. | ||
38 | * @param key hash for current state. | ||
39 | * @param proof proof for current state. | ||
40 | * @param accepting GNUNET_YES if this is an accepting state, GNUNET_NO if not. | ||
41 | * @param num_edges number of edges leaving current state. | ||
42 | * @param edges edges leaving current state. | ||
43 | */ | ||
44 | static void | ||
45 | print_edge (void *cls, | ||
46 | const struct GNUNET_HashCode *key, | ||
47 | const char *proof, | ||
48 | int accepting, | ||
49 | unsigned int num_edges, | ||
50 | const struct REGEX_BLOCK_Edge *edges) | ||
51 | { | ||
52 | unsigned int i; | ||
53 | |||
54 | printf ("%s: %s, proof: `%s'\n", | ||
55 | GNUNET_h2s (key), | ||
56 | accepting ? "ACCEPTING" : "", | ||
57 | proof); | ||
58 | for (i = 0; i < num_edges; i++) | ||
59 | printf (" `%s': %s\n", | ||
60 | edges[i].label, | ||
61 | GNUNET_h2s (&edges[i].destination)); | ||
62 | } | ||
63 | |||
64 | |||
65 | /** | ||
66 | * The main function of the regex performance test. | ||
67 | * | ||
68 | * Read a set of regex from a file, combine them and create a DFA from the | ||
69 | * resulting combined regex. | ||
70 | * | ||
71 | * @param argc number of arguments from the command line | ||
72 | * @param argv command line arguments | ||
73 | * @return 0 ok, 1 on error | ||
74 | */ | ||
75 | int | ||
76 | main (int argc, char *const *argv) | ||
77 | { | ||
78 | struct REGEX_INTERNAL_Automaton*dfa; | ||
79 | char **regexes; | ||
80 | char *buffer; | ||
81 | char *regex; | ||
82 | int compression; | ||
83 | unsigned int alphabet_size; | ||
84 | long size; | ||
85 | |||
86 | GNUNET_log_setup ("perf-regex", "DEBUG", NULL); | ||
87 | if (4 != argc) | ||
88 | { | ||
89 | fprintf (stderr, | ||
90 | "Usage: %s REGEX_FILE ALPHABET_SIZE COMPRESSION\n", | ||
91 | argv[0]); | ||
92 | return 1; | ||
93 | } | ||
94 | regexes = REGEX_TEST_read_from_file (argv[1]); | ||
95 | if (NULL == regexes) | ||
96 | { | ||
97 | fprintf (stderr, | ||
98 | "Failed to read regexes from `%s'\n", | ||
99 | argv[1]); | ||
100 | return 2; | ||
101 | } | ||
102 | alphabet_size = atoi (argv[2]); | ||
103 | compression = atoi (argv[3]); | ||
104 | printf ("********* PERF-REGEX *********'\n"); | ||
105 | printf ("Using:\n file '%s'\n Alphabet size %u\n compression %d\n", | ||
106 | argv[1], alphabet_size, compression); | ||
107 | fflush (stdout); | ||
108 | buffer = REGEX_TEST_combine (regexes, alphabet_size); | ||
109 | GNUNET_asprintf (®ex, "GNUNET_REGEX_PROFILER_(%s)(0|1)*", buffer); | ||
110 | size = strlen (regex); | ||
111 | |||
112 | fprintf (stderr, | ||
113 | "Combined regex (%ld bytes):\n%s\n", | ||
114 | size, | ||
115 | regex); | ||
116 | dfa = REGEX_INTERNAL_construct_dfa (regex, size, compression); | ||
117 | printf ("********* ALL EDGES *********'\n"); | ||
118 | REGEX_INTERNAL_iterate_all_edges (dfa, &print_edge, NULL); | ||
119 | printf ("\n\n********* REACHABLE EDGES *********'\n"); | ||
120 | REGEX_INTERNAL_iterate_reachable_edges (dfa, &print_edge, NULL); | ||
121 | REGEX_INTERNAL_automaton_destroy (dfa); | ||
122 | GNUNET_free (buffer); | ||
123 | REGEX_TEST_free_from_file (regexes); | ||
124 | GNUNET_free (regex); | ||
125 | return 0; | ||
126 | } | ||
127 | |||
128 | |||
129 | /* end of prof-regex.c */ | ||
diff --git a/src/service/regex/regex.conf.in b/src/service/regex/regex.conf.in new file mode 100644 index 000000000..5e68a43da --- /dev/null +++ b/src/service/regex/regex.conf.in | |||
@@ -0,0 +1,8 @@ | |||
1 | [regex] | ||
2 | START_ON_DEMAND = @START_ON_DEMAND@ | ||
3 | @UNIXONLY@ PORT = 2107 | ||
4 | UNIXPATH = $GNUNET_RUNTIME_DIR/gnunet-service-regex.sock | ||
5 | HOSTNAME = localhost | ||
6 | BINARY = gnunet-service-regex | ||
7 | ACCEPT_FROM = 127.0.0.1; | ||
8 | ACCEPT_FROM6 = ::1; | ||
diff --git a/src/service/regex/regex_api_announce.c b/src/service/regex/regex_api_announce.c new file mode 100644 index 000000000..12f173476 --- /dev/null +++ b/src/service/regex/regex_api_announce.c | |||
@@ -0,0 +1,181 @@ | |||
1 | /* | ||
2 | This file is part of GNUnet | ||
3 | Copyright (C) 2012, 2013, 2016 GNUnet e.V. | ||
4 | |||
5 | GNUnet is free software: you can redistribute it and/or modify it | ||
6 | under the terms of the GNU Affero General Public License as published | ||
7 | by the Free Software Foundation, either version 3 of the License, | ||
8 | or (at your option) any later version. | ||
9 | |||
10 | GNUnet is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | Affero General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU Affero General Public License | ||
16 | along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | |||
18 | SPDX-License-Identifier: AGPL3.0-or-later | ||
19 | */ | ||
20 | /** | ||
21 | * @file regex/regex_api_announce.c | ||
22 | * @brief access regex service to advertise capabilities via regex | ||
23 | * @author Maximilian Szengel | ||
24 | * @author Christian Grothoff | ||
25 | */ | ||
26 | #include "platform.h" | ||
27 | #include "gnunet_protocols.h" | ||
28 | #include "gnunet_util_lib.h" | ||
29 | #include "gnunet_regex_service.h" | ||
30 | #include "regex_ipc.h" | ||
31 | |||
32 | #define LOG(kind, ...) GNUNET_log_from (kind, "regex-api", __VA_ARGS__) | ||
33 | |||
34 | /** | ||
35 | * Handle to store cached data about a regex announce. | ||
36 | */ | ||
37 | struct GNUNET_REGEX_Announcement | ||
38 | { | ||
39 | /** | ||
40 | * Connection to the regex service. | ||
41 | */ | ||
42 | struct GNUNET_MQ_Handle *mq; | ||
43 | |||
44 | /** | ||
45 | * Our configuration. | ||
46 | */ | ||
47 | const struct GNUNET_CONFIGURATION_Handle *cfg; | ||
48 | |||
49 | /** | ||
50 | * Message we're sending to the service. | ||
51 | */ | ||
52 | char *regex; | ||
53 | |||
54 | /** | ||
55 | * Frequency of announcements. | ||
56 | */ | ||
57 | struct GNUNET_TIME_Relative refresh_delay; | ||
58 | |||
59 | /** | ||
60 | * Number of characters per edge. | ||
61 | */ | ||
62 | uint16_t compression; | ||
63 | }; | ||
64 | |||
65 | |||
66 | /** | ||
67 | * (Re)connect to the REGEX service with the given announcement @a a. | ||
68 | * | ||
69 | * @param a REGEX to announce. | ||
70 | */ | ||
71 | static void | ||
72 | announce_reconnect (struct GNUNET_REGEX_Announcement *a); | ||
73 | |||
74 | |||
75 | /** | ||
76 | * We got a disconnect after asking regex to do the announcement. | ||
77 | * Retry. | ||
78 | * | ||
79 | * @param cls the `struct GNUNET_REGEX_Announcement` to retry | ||
80 | * @param error error code | ||
81 | */ | ||
82 | static void | ||
83 | announce_mq_error_handler (void *cls, | ||
84 | enum GNUNET_MQ_Error error) | ||
85 | { | ||
86 | struct GNUNET_REGEX_Announcement *a = cls; | ||
87 | |||
88 | GNUNET_MQ_destroy (a->mq); | ||
89 | a->mq = NULL; | ||
90 | announce_reconnect (a); | ||
91 | } | ||
92 | |||
93 | |||
94 | /** | ||
95 | * (Re)connect to the REGEX service with the given announcement @a a. | ||
96 | * | ||
97 | * @param a REGEX to announce. | ||
98 | */ | ||
99 | static void | ||
100 | announce_reconnect (struct GNUNET_REGEX_Announcement *a) | ||
101 | { | ||
102 | struct GNUNET_MQ_Envelope *env; | ||
103 | struct AnnounceMessage *am; | ||
104 | size_t slen; | ||
105 | |||
106 | a->mq = GNUNET_CLIENT_connect (a->cfg, | ||
107 | "regex", | ||
108 | NULL, | ||
109 | &announce_mq_error_handler, | ||
110 | a); | ||
111 | if (NULL == a->mq) | ||
112 | return; | ||
113 | slen = strlen (a->regex) + 1; | ||
114 | env = GNUNET_MQ_msg_extra (am, | ||
115 | slen, | ||
116 | GNUNET_MESSAGE_TYPE_REGEX_ANNOUNCE); | ||
117 | am->compression = htons (a->compression); | ||
118 | am->reserved = htons (0); | ||
119 | am->refresh_delay = GNUNET_TIME_relative_hton (a->refresh_delay); | ||
120 | GNUNET_memcpy (&am[1], | ||
121 | a->regex, | ||
122 | slen); | ||
123 | GNUNET_MQ_send (a->mq, | ||
124 | env); | ||
125 | } | ||
126 | |||
127 | |||
128 | /** | ||
129 | * Announce the given peer under the given regular expression. | ||
130 | * | ||
131 | * @param cfg configuration to use | ||
132 | * @param regex Regular expression to announce. | ||
133 | * @param refresh_delay after what delay should the announcement be repeated? | ||
134 | * @param compression How many characters per edge can we squeeze? | ||
135 | * @return Handle to reuse o free cached resources. | ||
136 | * Must be freed by calling #GNUNET_REGEX_announce_cancel(). | ||
137 | */ | ||
138 | struct GNUNET_REGEX_Announcement * | ||
139 | GNUNET_REGEX_announce (const struct GNUNET_CONFIGURATION_Handle *cfg, | ||
140 | const char *regex, | ||
141 | struct GNUNET_TIME_Relative refresh_delay, | ||
142 | uint16_t compression) | ||
143 | { | ||
144 | struct GNUNET_REGEX_Announcement *a; | ||
145 | size_t slen; | ||
146 | |||
147 | slen = strlen (regex) + 1; | ||
148 | if (slen + sizeof(struct AnnounceMessage) >= GNUNET_MAX_MESSAGE_SIZE) | ||
149 | { | ||
150 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, | ||
151 | _ ("Regex `%s' is too long!\n"), | ||
152 | regex); | ||
153 | GNUNET_break (0); | ||
154 | return NULL; | ||
155 | } | ||
156 | a = GNUNET_new (struct GNUNET_REGEX_Announcement); | ||
157 | a->cfg = cfg; | ||
158 | a->refresh_delay = refresh_delay; | ||
159 | a->compression = compression; | ||
160 | a->regex = GNUNET_strdup (regex); | ||
161 | announce_reconnect (a); | ||
162 | if (NULL == a->mq) | ||
163 | { | ||
164 | GNUNET_free (a->regex); | ||
165 | GNUNET_free (a); | ||
166 | return NULL; | ||
167 | } | ||
168 | return a; | ||
169 | } | ||
170 | |||
171 | |||
172 | void | ||
173 | GNUNET_REGEX_announce_cancel (struct GNUNET_REGEX_Announcement *a) | ||
174 | { | ||
175 | GNUNET_MQ_destroy (a->mq); | ||
176 | GNUNET_free (a->regex); | ||
177 | GNUNET_free (a); | ||
178 | } | ||
179 | |||
180 | |||
181 | /* end of regex_api_announce.c */ | ||
diff --git a/src/service/regex/regex_api_search.c b/src/service/regex/regex_api_search.c new file mode 100644 index 000000000..71219cbdd --- /dev/null +++ b/src/service/regex/regex_api_search.c | |||
@@ -0,0 +1,233 @@ | |||
1 | /* | ||
2 | This file is part of GNUnet | ||
3 | Copyright (C) 2012, 2013, 2016 GNUnet e.V. | ||
4 | |||
5 | GNUnet is free software: you can redistribute it and/or modify it | ||
6 | under the terms of the GNU Affero General Public License as published | ||
7 | by the Free Software Foundation, either version 3 of the License, | ||
8 | or (at your option) any later version. | ||
9 | |||
10 | GNUnet is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | Affero General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU Affero General Public License | ||
16 | along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | |||
18 | SPDX-License-Identifier: AGPL3.0-or-later | ||
19 | */ | ||
20 | /** | ||
21 | * @file regex/regex_api_search.c | ||
22 | * @brief access regex service to discover | ||
23 | * peers using matching strings | ||
24 | * @author Maximilian Szengel | ||
25 | * @author Christian Grothoff | ||
26 | */ | ||
27 | #include "platform.h" | ||
28 | #include "gnunet_protocols.h" | ||
29 | #include "gnunet_util_lib.h" | ||
30 | #include "gnunet_regex_service.h" | ||
31 | #include "regex_ipc.h" | ||
32 | |||
33 | #define LOG(kind, ...) GNUNET_log_from (kind, "regex-api", __VA_ARGS__) | ||
34 | |||
35 | |||
36 | /** | ||
37 | * Handle to store data about a regex search. | ||
38 | */ | ||
39 | struct GNUNET_REGEX_Search | ||
40 | { | ||
41 | /** | ||
42 | * Connection to the regex service. | ||
43 | */ | ||
44 | struct GNUNET_MQ_Handle *mq; | ||
45 | |||
46 | /** | ||
47 | * Our configuration. | ||
48 | */ | ||
49 | const struct GNUNET_CONFIGURATION_Handle *cfg; | ||
50 | |||
51 | /** | ||
52 | * Function to call with results. | ||
53 | */ | ||
54 | GNUNET_REGEX_Found callback; | ||
55 | |||
56 | /** | ||
57 | * Closure for @e callback. | ||
58 | */ | ||
59 | void *callback_cls; | ||
60 | |||
61 | /** | ||
62 | * Search string to transmit to the service. | ||
63 | */ | ||
64 | char *string; | ||
65 | }; | ||
66 | |||
67 | |||
68 | /** | ||
69 | * (Re)connect to the REGEX service for the given search @a s. | ||
70 | * | ||
71 | * @param s context for the search search for | ||
72 | */ | ||
73 | static void | ||
74 | search_reconnect (struct GNUNET_REGEX_Search *s); | ||
75 | |||
76 | |||
77 | /** | ||
78 | * We got a response or disconnect after asking regex | ||
79 | * to do the search. Check it is well-formed. | ||
80 | * | ||
81 | * @param cls the `struct GNUNET_REGEX_Search` to handle reply for | ||
82 | * @param result the message | ||
83 | * @return #GNUNET_SYSERR if @a rm is not well-formed. | ||
84 | */ | ||
85 | static int | ||
86 | check_search_response (void *cls, | ||
87 | const struct ResultMessage *result) | ||
88 | { | ||
89 | uint16_t size = ntohs (result->header.size) - sizeof(*result); | ||
90 | uint16_t gpl = ntohs (result->get_path_length); | ||
91 | uint16_t ppl = ntohs (result->put_path_length); | ||
92 | |||
93 | if (size != (gpl + ppl) * sizeof(struct GNUNET_PeerIdentity)) | ||
94 | { | ||
95 | GNUNET_break (0); | ||
96 | return GNUNET_SYSERR; | ||
97 | } | ||
98 | return GNUNET_OK; | ||
99 | } | ||
100 | |||
101 | |||
102 | /** | ||
103 | * We got a response or disconnect after asking regex | ||
104 | * to do the search. Handle it. | ||
105 | * | ||
106 | * @param cls the `struct GNUNET_REGEX_Search` to handle reply for | ||
107 | * @param result the message | ||
108 | */ | ||
109 | static void | ||
110 | handle_search_response (void *cls, | ||
111 | const struct ResultMessage *result) | ||
112 | { | ||
113 | struct GNUNET_REGEX_Search *s = cls; | ||
114 | uint16_t gpl = ntohs (result->get_path_length); | ||
115 | uint16_t ppl = ntohs (result->put_path_length); | ||
116 | const struct GNUNET_PeerIdentity *pid; | ||
117 | |||
118 | pid = &result->id; | ||
119 | LOG (GNUNET_ERROR_TYPE_DEBUG, | ||
120 | "Got regex result %s\n", | ||
121 | GNUNET_i2s (pid)); | ||
122 | s->callback (s->callback_cls, | ||
123 | pid, | ||
124 | &pid[1], | ||
125 | gpl, | ||
126 | &pid[1 + gpl], | ||
127 | ppl); | ||
128 | } | ||
129 | |||
130 | |||
131 | /** | ||
132 | * We got a disconnect after asking regex to do the announcement. | ||
133 | * Retry. | ||
134 | * | ||
135 | * @param cls the `struct GNUNET_REGEX_Search` to retry | ||
136 | * @param error error code | ||
137 | */ | ||
138 | static void | ||
139 | mq_error_handler (void *cls, | ||
140 | enum GNUNET_MQ_Error error) | ||
141 | { | ||
142 | struct GNUNET_REGEX_Search *s = cls; | ||
143 | |||
144 | GNUNET_MQ_destroy (s->mq); | ||
145 | s->mq = NULL; | ||
146 | search_reconnect (s); | ||
147 | } | ||
148 | |||
149 | |||
150 | /** | ||
151 | * (Re)connect to the REGEX service for the given search @a s. | ||
152 | * | ||
153 | * @param s context for the search search for | ||
154 | */ | ||
155 | static void | ||
156 | search_reconnect (struct GNUNET_REGEX_Search *s) | ||
157 | { | ||
158 | struct GNUNET_MQ_MessageHandler handlers[] = { | ||
159 | GNUNET_MQ_hd_var_size (search_response, | ||
160 | GNUNET_MESSAGE_TYPE_REGEX_RESULT, | ||
161 | struct ResultMessage, | ||
162 | s), | ||
163 | GNUNET_MQ_handler_end () | ||
164 | }; | ||
165 | size_t slen = strlen (s->string) + 1; | ||
166 | struct GNUNET_MQ_Envelope *env; | ||
167 | struct RegexSearchMessage *rsm; | ||
168 | |||
169 | GNUNET_assert (NULL == s->mq); | ||
170 | s->mq = GNUNET_CLIENT_connect (s->cfg, | ||
171 | "regex", | ||
172 | handlers, | ||
173 | &mq_error_handler, | ||
174 | s); | ||
175 | if (NULL == s->mq) | ||
176 | return; | ||
177 | env = GNUNET_MQ_msg_extra (rsm, | ||
178 | slen, | ||
179 | GNUNET_MESSAGE_TYPE_REGEX_SEARCH); | ||
180 | GNUNET_memcpy (&rsm[1], | ||
181 | s->string, | ||
182 | slen); | ||
183 | GNUNET_MQ_send (s->mq, | ||
184 | env); | ||
185 | } | ||
186 | |||
187 | |||
188 | struct GNUNET_REGEX_Search * | ||
189 | GNUNET_REGEX_search (const struct GNUNET_CONFIGURATION_Handle *cfg, | ||
190 | const char *string, | ||
191 | GNUNET_REGEX_Found callback, | ||
192 | void *callback_cls) | ||
193 | { | ||
194 | struct GNUNET_REGEX_Search *s; | ||
195 | size_t slen = strlen (string) + 1; | ||
196 | |||
197 | if (slen + sizeof(struct RegexSearchMessage) >= GNUNET_MAX_MESSAGE_SIZE) | ||
198 | { | ||
199 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, | ||
200 | _ ("Search string `%s' is too long!\n"), | ||
201 | string); | ||
202 | GNUNET_break (0); | ||
203 | return NULL; | ||
204 | } | ||
205 | LOG (GNUNET_ERROR_TYPE_DEBUG, | ||
206 | "Starting regex search for %s\n", | ||
207 | string); | ||
208 | s = GNUNET_new (struct GNUNET_REGEX_Search); | ||
209 | s->cfg = cfg; | ||
210 | s->string = GNUNET_strdup (string); | ||
211 | s->callback = callback; | ||
212 | s->callback_cls = callback_cls; | ||
213 | search_reconnect (s); | ||
214 | if (NULL == s->mq) | ||
215 | { | ||
216 | GNUNET_free (s->string); | ||
217 | GNUNET_free (s); | ||
218 | return NULL; | ||
219 | } | ||
220 | return s; | ||
221 | } | ||
222 | |||
223 | |||
224 | void | ||
225 | GNUNET_REGEX_search_cancel (struct GNUNET_REGEX_Search *s) | ||
226 | { | ||
227 | GNUNET_MQ_destroy (s->mq); | ||
228 | GNUNET_free (s->string); | ||
229 | GNUNET_free (s); | ||
230 | } | ||
231 | |||
232 | |||
233 | /* end of regex_api_search.c */ | ||
diff --git a/src/service/regex/regex_internal.c b/src/service/regex/regex_internal.c new file mode 100644 index 000000000..c4fab6b56 --- /dev/null +++ b/src/service/regex/regex_internal.c | |||
@@ -0,0 +1,3654 @@ | |||
1 | /* | ||
2 | This file is part of GNUnet | ||
3 | Copyright (C) 2012 GNUnet e.V. | ||
4 | |||
5 | GNUnet is free software: you can redistribute it and/or modify it | ||
6 | under the terms of the GNU Affero General Public License as published | ||
7 | by the Free Software Foundation, either version 3 of the License, | ||
8 | or (at your option) any later version. | ||
9 | |||
10 | GNUnet is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | Affero General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU Affero General Public License | ||
16 | along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | |||
18 | SPDX-License-Identifier: AGPL3.0-or-later | ||
19 | */ | ||
20 | /** | ||
21 | * @file src/regex/regex_internal.c | ||
22 | * @brief library to create Deterministic Finite Automatons (DFAs) from regular | ||
23 | * expressions (regexes). | ||
24 | * @author Maximilian Szengel | ||
25 | */ | ||
26 | #include "platform.h" | ||
27 | #include "gnunet_util_lib.h" | ||
28 | #include "gnunet_regex_service.h" | ||
29 | #include "regex_internal_lib.h" | ||
30 | #include "regex_internal.h" | ||
31 | |||
32 | |||
33 | /** | ||
34 | * Set this to #GNUNET_YES to enable state naming. Used to debug NFA->DFA | ||
35 | * creation. Disabled by default for better performance. | ||
36 | */ | ||
37 | #define REGEX_DEBUG_DFA GNUNET_NO | ||
38 | |||
39 | /** | ||
40 | * Set of states using MDLL API. | ||
41 | */ | ||
42 | struct REGEX_INTERNAL_StateSet_MDLL | ||
43 | { | ||
44 | /** | ||
45 | * MDLL of states. | ||
46 | */ | ||
47 | struct REGEX_INTERNAL_State *head; | ||
48 | |||
49 | /** | ||
50 | * MDLL of states. | ||
51 | */ | ||
52 | struct REGEX_INTERNAL_State *tail; | ||
53 | |||
54 | /** | ||
55 | * Length of the MDLL. | ||
56 | */ | ||
57 | unsigned int len; | ||
58 | }; | ||
59 | |||
60 | |||
61 | /** | ||
62 | * Append state to the given StateSet. | ||
63 | * | ||
64 | * @param set set to be modified | ||
65 | * @param state state to be appended | ||
66 | */ | ||
67 | static void | ||
68 | state_set_append (struct REGEX_INTERNAL_StateSet *set, | ||
69 | struct REGEX_INTERNAL_State *state) | ||
70 | { | ||
71 | if (set->off == set->size) | ||
72 | GNUNET_array_grow (set->states, set->size, set->size * 2 + 4); | ||
73 | set->states[set->off++] = state; | ||
74 | } | ||
75 | |||
76 | |||
77 | /** | ||
78 | * Compare two strings for equality. If either is NULL they are not equal. | ||
79 | * | ||
80 | * @param str1 first string for comparison. | ||
81 | * @param str2 second string for comparison. | ||
82 | * | ||
83 | * @return 0 if the strings are the same or both NULL, 1 or -1 if not. | ||
84 | */ | ||
85 | static int | ||
86 | nullstrcmp (const char *str1, const char *str2) | ||
87 | { | ||
88 | if ((NULL == str1) != (NULL == str2)) | ||
89 | return -1; | ||
90 | if ((NULL == str1) && (NULL == str2)) | ||
91 | return 0; | ||
92 | |||
93 | return strcmp (str1, str2); | ||
94 | } | ||
95 | |||
96 | |||
97 | /** | ||
98 | * Adds a transition from one state to another on @a label. Does not add | ||
99 | * duplicate states. | ||
100 | * | ||
101 | * @param ctx context | ||
102 | * @param from_state starting state for the transition | ||
103 | * @param label transition label | ||
104 | * @param to_state state to where the transition should point to | ||
105 | */ | ||
106 | static void | ||
107 | state_add_transition (struct REGEX_INTERNAL_Context *ctx, | ||
108 | struct REGEX_INTERNAL_State *from_state, | ||
109 | const char *label, | ||
110 | struct REGEX_INTERNAL_State *to_state) | ||
111 | { | ||
112 | struct REGEX_INTERNAL_Transition *t; | ||
113 | struct REGEX_INTERNAL_Transition *oth; | ||
114 | |||
115 | if (NULL == from_state) | ||
116 | { | ||
117 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not create Transition.\n"); | ||
118 | return; | ||
119 | } | ||
120 | |||
121 | /* Do not add duplicate state transitions */ | ||
122 | for (t = from_state->transitions_head; NULL != t; t = t->next) | ||
123 | { | ||
124 | if ((t->to_state == to_state) && (0 == nullstrcmp (t->label, label)) && | ||
125 | (t->from_state == from_state) ) | ||
126 | return; | ||
127 | } | ||
128 | |||
129 | /* sort transitions by label */ | ||
130 | for (oth = from_state->transitions_head; NULL != oth; oth = oth->next) | ||
131 | { | ||
132 | if (0 < nullstrcmp (oth->label, label)) | ||
133 | break; | ||
134 | } | ||
135 | |||
136 | t = GNUNET_new (struct REGEX_INTERNAL_Transition); | ||
137 | if (NULL != ctx) | ||
138 | t->id = ctx->transition_id++; | ||
139 | if (NULL != label) | ||
140 | t->label = GNUNET_strdup (label); | ||
141 | else | ||
142 | t->label = NULL; | ||
143 | t->to_state = to_state; | ||
144 | t->from_state = from_state; | ||
145 | |||
146 | /* Add outgoing transition to 'from_state' */ | ||
147 | from_state->transition_count++; | ||
148 | GNUNET_CONTAINER_DLL_insert_before (from_state->transitions_head, | ||
149 | from_state->transitions_tail, | ||
150 | oth, | ||
151 | t); | ||
152 | } | ||
153 | |||
154 | |||
155 | /** | ||
156 | * Remove a 'transition' from 'state'. | ||
157 | * | ||
158 | * @param state state from which the to-be-removed transition originates. | ||
159 | * @param transition transition that should be removed from state 'state'. | ||
160 | */ | ||
161 | static void | ||
162 | state_remove_transition (struct REGEX_INTERNAL_State *state, | ||
163 | struct REGEX_INTERNAL_Transition *transition) | ||
164 | { | ||
165 | if ((NULL == state) || (NULL == transition)) | ||
166 | return; | ||
167 | |||
168 | if (transition->from_state != state) | ||
169 | return; | ||
170 | |||
171 | GNUNET_free (transition->label); | ||
172 | |||
173 | state->transition_count--; | ||
174 | GNUNET_CONTAINER_DLL_remove (state->transitions_head, | ||
175 | state->transitions_tail, | ||
176 | transition); | ||
177 | |||
178 | GNUNET_free (transition); | ||
179 | } | ||
180 | |||
181 | |||
182 | /** | ||
183 | * Compare two states. Used for sorting. | ||
184 | * | ||
185 | * @param a first state | ||
186 | * @param b second state | ||
187 | * | ||
188 | * @return an integer less than, equal to, or greater than zero | ||
189 | * if the first argument is considered to be respectively | ||
190 | * less than, equal to, or greater than the second. | ||
191 | */ | ||
192 | static int | ||
193 | state_compare (const void *a, const void *b) | ||
194 | { | ||
195 | struct REGEX_INTERNAL_State **s1 = (struct REGEX_INTERNAL_State **) a; | ||
196 | struct REGEX_INTERNAL_State **s2 = (struct REGEX_INTERNAL_State **) b; | ||
197 | |||
198 | return (*s1)->id - (*s2)->id; | ||
199 | } | ||
200 | |||
201 | |||
202 | /** | ||
203 | * Get all edges leaving state @a s. | ||
204 | * | ||
205 | * @param s state. | ||
206 | * @param edges all edges leaving @a s, expected to be allocated and have enough | ||
207 | * space for `s->transitions_count` elements. | ||
208 | * | ||
209 | * @return number of edges. | ||
210 | */ | ||
211 | static unsigned int | ||
212 | state_get_edges (struct REGEX_INTERNAL_State *s, struct REGEX_BLOCK_Edge *edges) | ||
213 | { | ||
214 | struct REGEX_INTERNAL_Transition *t; | ||
215 | unsigned int count; | ||
216 | |||
217 | if (NULL == s) | ||
218 | return 0; | ||
219 | |||
220 | count = 0; | ||
221 | |||
222 | for (t = s->transitions_head; NULL != t; t = t->next) | ||
223 | { | ||
224 | if (NULL != t->to_state) | ||
225 | { | ||
226 | edges[count].label = t->label; | ||
227 | edges[count].destination = t->to_state->hash; | ||
228 | count++; | ||
229 | } | ||
230 | } | ||
231 | return count; | ||
232 | } | ||
233 | |||
234 | |||
235 | /** | ||
236 | * Compare to state sets by comparing the id's of the states that are contained | ||
237 | * in each set. Both sets are expected to be sorted by id! | ||
238 | * | ||
239 | * @param sset1 first state set | ||
240 | * @param sset2 second state set | ||
241 | * @return 0 if the sets are equal, otherwise non-zero | ||
242 | */ | ||
243 | static int | ||
244 | state_set_compare (struct REGEX_INTERNAL_StateSet *sset1, | ||
245 | struct REGEX_INTERNAL_StateSet *sset2) | ||
246 | { | ||
247 | int result; | ||
248 | unsigned int i; | ||
249 | |||
250 | if ((NULL == sset1) || (NULL == sset2)) | ||
251 | return 1; | ||
252 | |||
253 | result = sset1->off - sset2->off; | ||
254 | if (result < 0) | ||
255 | return -1; | ||
256 | if (result > 0) | ||
257 | return 1; | ||
258 | for (i = 0; i < sset1->off; i++) | ||
259 | if (0 != (result = state_compare (&sset1->states[i], &sset2->states[i]))) | ||
260 | break; | ||
261 | return result; | ||
262 | } | ||
263 | |||
264 | |||
265 | /** | ||
266 | * Clears the given StateSet 'set' | ||
267 | * | ||
268 | * @param set set to be cleared | ||
269 | */ | ||
270 | static void | ||
271 | state_set_clear (struct REGEX_INTERNAL_StateSet *set) | ||
272 | { | ||
273 | GNUNET_array_grow (set->states, set->size, 0); | ||
274 | set->off = 0; | ||
275 | } | ||
276 | |||
277 | |||
278 | /** | ||
279 | * Clears an automaton fragment. Does not destroy the states inside the | ||
280 | * automaton. | ||
281 | * | ||
282 | * @param a automaton to be cleared | ||
283 | */ | ||
284 | static void | ||
285 | automaton_fragment_clear (struct REGEX_INTERNAL_Automaton *a) | ||
286 | { | ||
287 | if (NULL == a) | ||
288 | return; | ||
289 | |||
290 | a->start = NULL; | ||
291 | a->end = NULL; | ||
292 | a->states_head = NULL; | ||
293 | a->states_tail = NULL; | ||
294 | a->state_count = 0; | ||
295 | GNUNET_free (a); | ||
296 | } | ||
297 | |||
298 | |||
299 | /** | ||
300 | * Frees the memory used by State @a s | ||
301 | * | ||
302 | * @param s state that should be destroyed | ||
303 | */ | ||
304 | static void | ||
305 | automaton_destroy_state (struct REGEX_INTERNAL_State *s) | ||
306 | { | ||
307 | struct REGEX_INTERNAL_Transition *t; | ||
308 | struct REGEX_INTERNAL_Transition *next_t; | ||
309 | |||
310 | if (NULL == s) | ||
311 | return; | ||
312 | |||
313 | GNUNET_free (s->name); | ||
314 | GNUNET_free (s->proof); | ||
315 | state_set_clear (&s->nfa_set); | ||
316 | for (t = s->transitions_head; NULL != t; t = next_t) | ||
317 | { | ||
318 | next_t = t->next; | ||
319 | state_remove_transition (s, t); | ||
320 | } | ||
321 | |||
322 | GNUNET_free (s); | ||
323 | } | ||
324 | |||
325 | |||
326 | /** | ||
327 | * Remove a state from the given automaton 'a'. Always use this function when | ||
328 | * altering the states of an automaton. Will also remove all transitions leading | ||
329 | * to this state, before destroying it. | ||
330 | * | ||
331 | * @param a automaton | ||
332 | * @param s state to remove | ||
333 | */ | ||
334 | static void | ||
335 | automaton_remove_state (struct REGEX_INTERNAL_Automaton *a, | ||
336 | struct REGEX_INTERNAL_State *s) | ||
337 | { | ||
338 | struct REGEX_INTERNAL_State *s_check; | ||
339 | struct REGEX_INTERNAL_Transition *t_check; | ||
340 | struct REGEX_INTERNAL_Transition *t_check_next; | ||
341 | |||
342 | if ((NULL == a) || (NULL == s)) | ||
343 | return; | ||
344 | |||
345 | /* remove all transitions leading to this state */ | ||
346 | for (s_check = a->states_head; NULL != s_check; s_check = s_check->next) | ||
347 | { | ||
348 | for (t_check = s_check->transitions_head; NULL != t_check; | ||
349 | t_check = t_check_next) | ||
350 | { | ||
351 | t_check_next = t_check->next; | ||
352 | if (t_check->to_state == s) | ||
353 | state_remove_transition (s_check, t_check); | ||
354 | } | ||
355 | } | ||
356 | |||
357 | /* remove state */ | ||
358 | GNUNET_CONTAINER_DLL_remove (a->states_head, a->states_tail, s); | ||
359 | a->state_count--; | ||
360 | |||
361 | automaton_destroy_state (s); | ||
362 | } | ||
363 | |||
364 | |||
365 | /** | ||
366 | * Merge two states into one. Will merge 's1' and 's2' into 's1' and destroy | ||
367 | * 's2'. 's1' will contain all (non-duplicate) outgoing transitions of 's2'. | ||
368 | * | ||
369 | * @param ctx context | ||
370 | * @param a automaton | ||
371 | * @param s1 first state | ||
372 | * @param s2 second state, will be destroyed | ||
373 | */ | ||
374 | static void | ||
375 | automaton_merge_states (struct REGEX_INTERNAL_Context *ctx, | ||
376 | struct REGEX_INTERNAL_Automaton *a, | ||
377 | struct REGEX_INTERNAL_State *s1, | ||
378 | struct REGEX_INTERNAL_State *s2) | ||
379 | { | ||
380 | struct REGEX_INTERNAL_State *s_check; | ||
381 | struct REGEX_INTERNAL_Transition *t_check; | ||
382 | struct REGEX_INTERNAL_Transition *t; | ||
383 | struct REGEX_INTERNAL_Transition *t_next; | ||
384 | int is_dup; | ||
385 | |||
386 | if (s1 == s2) | ||
387 | return; | ||
388 | |||
389 | /* 1. Make all transitions pointing to s2 point to s1, unless this transition | ||
390 | * does not already exists, if it already exists remove transition. */ | ||
391 | for (s_check = a->states_head; NULL != s_check; s_check = s_check->next) | ||
392 | { | ||
393 | for (t_check = s_check->transitions_head; NULL != t_check; t_check = t_next) | ||
394 | { | ||
395 | t_next = t_check->next; | ||
396 | |||
397 | if (s2 == t_check->to_state) | ||
398 | { | ||
399 | is_dup = GNUNET_NO; | ||
400 | for (t = t_check->from_state->transitions_head; NULL != t; t = t->next) | ||
401 | { | ||
402 | if ((t->to_state == s1) && (0 == strcmp (t_check->label, t->label)) ) | ||
403 | is_dup = GNUNET_YES; | ||
404 | } | ||
405 | if (GNUNET_NO == is_dup) | ||
406 | t_check->to_state = s1; | ||
407 | else | ||
408 | state_remove_transition (t_check->from_state, t_check); | ||
409 | } | ||
410 | } | ||
411 | } | ||
412 | |||
413 | /* 2. Add all transitions from s2 to sX to s1 */ | ||
414 | for (t_check = s2->transitions_head; NULL != t_check; t_check = t_check->next) | ||
415 | { | ||
416 | if (t_check->to_state != s1) | ||
417 | state_add_transition (ctx, s1, t_check->label, t_check->to_state); | ||
418 | } | ||
419 | |||
420 | /* 3. Rename s1 to {s1,s2} */ | ||
421 | #if REGEX_DEBUG_DFA | ||
422 | char *new_name; | ||
423 | |||
424 | new_name = s1->name; | ||
425 | GNUNET_asprintf (&s1->name, "{%s,%s}", new_name, s2->name); | ||
426 | GNUNET_free (new_name); | ||
427 | #endif | ||
428 | |||
429 | /* remove state */ | ||
430 | GNUNET_CONTAINER_DLL_remove (a->states_head, a->states_tail, s2); | ||
431 | a->state_count--; | ||
432 | automaton_destroy_state (s2); | ||
433 | } | ||
434 | |||
435 | |||
436 | /** | ||
437 | * Add a state to the automaton 'a', always use this function to alter the | ||
438 | * states DLL of the automaton. | ||
439 | * | ||
440 | * @param a automaton to add the state to | ||
441 | * @param s state that should be added | ||
442 | */ | ||
443 | static void | ||
444 | automaton_add_state (struct REGEX_INTERNAL_Automaton *a, | ||
445 | struct REGEX_INTERNAL_State *s) | ||
446 | { | ||
447 | GNUNET_CONTAINER_DLL_insert (a->states_head, a->states_tail, s); | ||
448 | a->state_count++; | ||
449 | } | ||
450 | |||
451 | |||
452 | /** | ||
453 | * Depth-first traversal (DFS) of all states that are reachable from state | ||
454 | * 's'. Performs 'action' on each visited state. | ||
455 | * | ||
456 | * @param s start state. | ||
457 | * @param marks an array of size a->state_count to remember which state was | ||
458 | * already visited. | ||
459 | * @param count current count of the state. | ||
460 | * @param check function that is checked before advancing on each transition | ||
461 | * in the DFS. | ||
462 | * @param check_cls closure for check. | ||
463 | * @param action action to be performed on each state. | ||
464 | * @param action_cls closure for action. | ||
465 | */ | ||
466 | static void | ||
467 | automaton_state_traverse (struct REGEX_INTERNAL_State *s, | ||
468 | int *marks, | ||
469 | unsigned int *count, | ||
470 | REGEX_INTERNAL_traverse_check check, | ||
471 | void *check_cls, | ||
472 | REGEX_INTERNAL_traverse_action action, | ||
473 | void *action_cls) | ||
474 | { | ||
475 | struct REGEX_INTERNAL_Transition *t; | ||
476 | |||
477 | if (GNUNET_YES == marks[s->traversal_id]) | ||
478 | return; | ||
479 | |||
480 | marks[s->traversal_id] = GNUNET_YES; | ||
481 | |||
482 | if (NULL != action) | ||
483 | action (action_cls, *count, s); | ||
484 | |||
485 | (*count)++; | ||
486 | |||
487 | for (t = s->transitions_head; NULL != t; t = t->next) | ||
488 | { | ||
489 | if ((NULL == check) || | ||
490 | ((NULL != check) && (GNUNET_YES == check (check_cls, s, t)) )) | ||
491 | { | ||
492 | automaton_state_traverse (t->to_state, | ||
493 | marks, | ||
494 | count, | ||
495 | check, | ||
496 | check_cls, | ||
497 | action, | ||
498 | action_cls); | ||
499 | } | ||
500 | } | ||
501 | } | ||
502 | |||
503 | |||
504 | void | ||
505 | REGEX_INTERNAL_automaton_traverse (const struct REGEX_INTERNAL_Automaton *a, | ||
506 | struct REGEX_INTERNAL_State *start, | ||
507 | REGEX_INTERNAL_traverse_check check, | ||
508 | void *check_cls, | ||
509 | REGEX_INTERNAL_traverse_action action, | ||
510 | void *action_cls) | ||
511 | { | ||
512 | unsigned int count; | ||
513 | struct REGEX_INTERNAL_State *s; | ||
514 | |||
515 | if ((NULL == a) || (0 == a->state_count)) | ||
516 | return; | ||
517 | |||
518 | int marks[a->state_count]; | ||
519 | |||
520 | for (count = 0, s = a->states_head; NULL != s && count < a->state_count; | ||
521 | s = s->next, count++) | ||
522 | { | ||
523 | s->traversal_id = count; | ||
524 | marks[s->traversal_id] = GNUNET_NO; | ||
525 | } | ||
526 | |||
527 | count = 0; | ||
528 | |||
529 | if (NULL == start) | ||
530 | s = a->start; | ||
531 | else | ||
532 | s = start; | ||
533 | |||
534 | automaton_state_traverse (s, | ||
535 | marks, | ||
536 | &count, | ||
537 | check, | ||
538 | check_cls, | ||
539 | action, | ||
540 | action_cls); | ||
541 | } | ||
542 | |||
543 | |||
544 | /** | ||
545 | * String container for faster string operations. | ||
546 | */ | ||
547 | struct StringBuffer | ||
548 | { | ||
549 | /** | ||
550 | * Buffer holding the string (may start in the middle!); | ||
551 | * NOT 0-terminated! | ||
552 | */ | ||
553 | char *sbuf; | ||
554 | |||
555 | /** | ||
556 | * Allocated buffer. | ||
557 | */ | ||
558 | char *abuf; | ||
559 | |||
560 | /** | ||
561 | * Length of the string in the buffer. | ||
562 | */ | ||
563 | size_t slen; | ||
564 | |||
565 | /** | ||
566 | * Number of bytes allocated for @e sbuf | ||
567 | */ | ||
568 | unsigned int blen; | ||
569 | |||
570 | /** | ||
571 | * Buffer currently represents "NULL" (not the empty string!) | ||
572 | */ | ||
573 | int16_t null_flag; | ||
574 | |||
575 | /** | ||
576 | * If this entry is part of the last/current generation array, | ||
577 | * this flag is #GNUNET_YES if the last and current generation are | ||
578 | * identical (and thus copying is unnecessary if the value didn't | ||
579 | * change). This is used in an optimization that improves | ||
580 | * performance by about 1% --- if we use int16_t here. With just | ||
581 | * "int" for both flags, performance drops (on my system) significantly, | ||
582 | * most likely due to increased cache misses. | ||
583 | */ | ||
584 | int16_t synced; | ||
585 | }; | ||
586 | |||
587 | |||
588 | /** | ||
589 | * Compare two strings for equality. If either is NULL they are not equal. | ||
590 | * | ||
591 | * @param s1 first string for comparison. | ||
592 | * @param s2 second string for comparison. | ||
593 | * | ||
594 | * @return 0 if the strings are the same or both NULL, 1 or -1 if not. | ||
595 | */ | ||
596 | static int | ||
597 | sb_nullstrcmp (const struct StringBuffer *s1, const struct StringBuffer *s2) | ||
598 | { | ||
599 | if ((GNUNET_YES == s1->null_flag) && (GNUNET_YES == s2->null_flag)) | ||
600 | return 0; | ||
601 | if ((GNUNET_YES == s1->null_flag) || (GNUNET_YES == s2->null_flag)) | ||
602 | return -1; | ||
603 | if (s1->slen != s2->slen) | ||
604 | return -1; | ||
605 | if (0 == s1->slen) | ||
606 | return 0; | ||
607 | return memcmp (s1->sbuf, s2->sbuf, s1->slen); | ||
608 | } | ||
609 | |||
610 | |||
611 | /** | ||
612 | * Compare two strings for equality. | ||
613 | * | ||
614 | * @param s1 first string for comparison. | ||
615 | * @param s2 second string for comparison. | ||
616 | * | ||
617 | * @return 0 if the strings are the same, 1 or -1 if not. | ||
618 | */ | ||
619 | static int | ||
620 | sb_strcmp (const struct StringBuffer *s1, const struct StringBuffer *s2) | ||
621 | { | ||
622 | if (s1->slen != s2->slen) | ||
623 | return -1; | ||
624 | if (0 == s1->slen) | ||
625 | return 0; | ||
626 | return memcmp (s1->sbuf, s2->sbuf, s1->slen); | ||
627 | } | ||
628 | |||
629 | |||
630 | /** | ||
631 | * Reallocate the buffer of 'ret' to fit 'nlen' characters; | ||
632 | * move the existing string to the beginning of the new buffer. | ||
633 | * | ||
634 | * @param ret current buffer, to be updated | ||
635 | * @param nlen target length for the buffer, must be at least ret->slen | ||
636 | */ | ||
637 | static void | ||
638 | sb_realloc (struct StringBuffer *ret, size_t nlen) | ||
639 | { | ||
640 | char *old; | ||
641 | |||
642 | GNUNET_assert (nlen >= ret->slen); | ||
643 | old = ret->abuf; | ||
644 | ret->abuf = GNUNET_malloc (nlen); | ||
645 | ret->blen = nlen; | ||
646 | GNUNET_memcpy (ret->abuf, ret->sbuf, ret->slen); | ||
647 | ret->sbuf = ret->abuf; | ||
648 | GNUNET_free (old); | ||
649 | } | ||
650 | |||
651 | |||
652 | /** | ||
653 | * Append a string. | ||
654 | * | ||
655 | * @param ret where to write the result | ||
656 | * @param sarg string to append | ||
657 | */ | ||
658 | static void | ||
659 | sb_append (struct StringBuffer *ret, const struct StringBuffer *sarg) | ||
660 | { | ||
661 | if (GNUNET_YES == ret->null_flag) | ||
662 | ret->slen = 0; | ||
663 | ret->null_flag = GNUNET_NO; | ||
664 | if (ret->blen < sarg->slen + ret->slen) | ||
665 | sb_realloc (ret, ret->blen + sarg->slen + 128); | ||
666 | GNUNET_memcpy (&ret->sbuf[ret->slen], sarg->sbuf, sarg->slen); | ||
667 | ret->slen += sarg->slen; | ||
668 | } | ||
669 | |||
670 | |||
671 | /** | ||
672 | * Append a C string. | ||
673 | * | ||
674 | * @param ret where to write the result | ||
675 | * @param cstr string to append | ||
676 | */ | ||
677 | static void | ||
678 | sb_append_cstr (struct StringBuffer *ret, const char *cstr) | ||
679 | { | ||
680 | size_t cstr_len = strlen (cstr); | ||
681 | |||
682 | if (GNUNET_YES == ret->null_flag) | ||
683 | ret->slen = 0; | ||
684 | ret->null_flag = GNUNET_NO; | ||
685 | if (ret->blen < cstr_len + ret->slen) | ||
686 | sb_realloc (ret, ret->blen + cstr_len + 128); | ||
687 | GNUNET_memcpy (&ret->sbuf[ret->slen], cstr, cstr_len); | ||
688 | ret->slen += cstr_len; | ||
689 | } | ||
690 | |||
691 | |||
692 | /** | ||
693 | * Wrap a string buffer, that is, set ret to the format string | ||
694 | * which contains an "%s" which is to be replaced with the original | ||
695 | * content of 'ret'. Note that optimizing this function is not | ||
696 | * really worth it, it is rarely called. | ||
697 | * | ||
698 | * @param ret where to write the result and take the input for %.*s from | ||
699 | * @param format format string, fprintf-style, with exactly one "%.*s" | ||
700 | * @param extra_chars how long will the result be, in addition to 'sarg' length | ||
701 | */ | ||
702 | static void | ||
703 | sb_wrap (struct StringBuffer *ret, const char *format, size_t extra_chars) | ||
704 | { | ||
705 | char *temp; | ||
706 | |||
707 | if (GNUNET_YES == ret->null_flag) | ||
708 | ret->slen = 0; | ||
709 | ret->null_flag = GNUNET_NO; | ||
710 | temp = GNUNET_malloc (ret->slen + extra_chars + 1); | ||
711 | GNUNET_snprintf (temp, | ||
712 | ret->slen + extra_chars + 1, | ||
713 | format, | ||
714 | (int) ret->slen, | ||
715 | ret->sbuf); | ||
716 | GNUNET_free (ret->abuf); | ||
717 | ret->abuf = temp; | ||
718 | ret->sbuf = temp; | ||
719 | ret->blen = ret->slen + extra_chars + 1; | ||
720 | ret->slen = ret->slen + extra_chars; | ||
721 | } | ||
722 | |||
723 | |||
724 | /** | ||
725 | * Format a string buffer. Note that optimizing this function is not | ||
726 | * really worth it, it is rarely called. | ||
727 | * | ||
728 | * @param ret where to write the result | ||
729 | * @param format format string, fprintf-style, with exactly one "%.*s" | ||
730 | * @param extra_chars how long will the result be, in addition to 'sarg' length | ||
731 | * @param sarg string to print into the format | ||
732 | */ | ||
733 | static void | ||
734 | sb_printf1 (struct StringBuffer *ret, | ||
735 | const char *format, | ||
736 | size_t extra_chars, | ||
737 | const struct StringBuffer *sarg) | ||
738 | { | ||
739 | if (ret->blen < sarg->slen + extra_chars + 1) | ||
740 | sb_realloc (ret, sarg->slen + extra_chars + 1); | ||
741 | ret->null_flag = GNUNET_NO; | ||
742 | ret->sbuf = ret->abuf; | ||
743 | ret->slen = sarg->slen + extra_chars; | ||
744 | GNUNET_snprintf (ret->sbuf, ret->blen, format, (int) sarg->slen, sarg->sbuf); | ||
745 | } | ||
746 | |||
747 | |||
748 | /** | ||
749 | * Format a string buffer. | ||
750 | * | ||
751 | * @param ret where to write the result | ||
752 | * @param format format string, fprintf-style, with exactly two "%.*s" | ||
753 | * @param extra_chars how long will the result be, in addition to 'sarg1/2' length | ||
754 | * @param sarg1 first string to print into the format | ||
755 | * @param sarg2 second string to print into the format | ||
756 | */ | ||
757 | static void | ||
758 | sb_printf2 (struct StringBuffer *ret, | ||
759 | const char *format, | ||
760 | size_t extra_chars, | ||
761 | const struct StringBuffer *sarg1, | ||
762 | const struct StringBuffer *sarg2) | ||
763 | { | ||
764 | if (ret->blen < sarg1->slen + sarg2->slen + extra_chars + 1) | ||
765 | sb_realloc (ret, sarg1->slen + sarg2->slen + extra_chars + 1); | ||
766 | ret->null_flag = GNUNET_NO; | ||
767 | ret->slen = sarg1->slen + sarg2->slen + extra_chars; | ||
768 | ret->sbuf = ret->abuf; | ||
769 | GNUNET_snprintf (ret->sbuf, | ||
770 | ret->blen, | ||
771 | format, | ||
772 | (int) sarg1->slen, | ||
773 | sarg1->sbuf, | ||
774 | (int) sarg2->slen, | ||
775 | sarg2->sbuf); | ||
776 | } | ||
777 | |||
778 | |||
779 | /** | ||
780 | * Format a string buffer. Note that optimizing this function is not | ||
781 | * really worth it, it is rarely called. | ||
782 | * | ||
783 | * @param ret where to write the result | ||
784 | * @param format format string, fprintf-style, with exactly three "%.*s" | ||
785 | * @param extra_chars how long will the result be, in addition to 'sarg1/2/3' length | ||
786 | * @param sarg1 first string to print into the format | ||
787 | * @param sarg2 second string to print into the format | ||
788 | * @param sarg3 third string to print into the format | ||
789 | */ | ||
790 | static void | ||
791 | sb_printf3 (struct StringBuffer *ret, | ||
792 | const char *format, | ||
793 | size_t extra_chars, | ||
794 | const struct StringBuffer *sarg1, | ||
795 | const struct StringBuffer *sarg2, | ||
796 | const struct StringBuffer *sarg3) | ||
797 | { | ||
798 | if (ret->blen < sarg1->slen + sarg2->slen + sarg3->slen + extra_chars + 1) | ||
799 | sb_realloc (ret, sarg1->slen + sarg2->slen + sarg3->slen + extra_chars + 1); | ||
800 | ret->null_flag = GNUNET_NO; | ||
801 | ret->slen = sarg1->slen + sarg2->slen + sarg3->slen + extra_chars; | ||
802 | ret->sbuf = ret->abuf; | ||
803 | GNUNET_snprintf (ret->sbuf, | ||
804 | ret->blen, | ||
805 | format, | ||
806 | (int) sarg1->slen, | ||
807 | sarg1->sbuf, | ||
808 | (int) sarg2->slen, | ||
809 | sarg2->sbuf, | ||
810 | (int) sarg3->slen, | ||
811 | sarg3->sbuf); | ||
812 | } | ||
813 | |||
814 | |||
815 | /** | ||
816 | * Free resources of the given string buffer. | ||
817 | * | ||
818 | * @param sb buffer to free (actual pointer is not freed, as they | ||
819 | * should not be individually allocated) | ||
820 | */ | ||
821 | static void | ||
822 | sb_free (struct StringBuffer *sb) | ||
823 | { | ||
824 | GNUNET_array_grow (sb->abuf, sb->blen, 0); | ||
825 | sb->slen = 0; | ||
826 | sb->sbuf = NULL; | ||
827 | sb->null_flag = GNUNET_YES; | ||
828 | } | ||
829 | |||
830 | |||
831 | /** | ||
832 | * Copy the given string buffer from 'in' to 'out'. | ||
833 | * | ||
834 | * @param in input string | ||
835 | * @param out output string | ||
836 | */ | ||
837 | static void | ||
838 | sb_strdup (struct StringBuffer *out, const struct StringBuffer *in) | ||
839 | |||
840 | { | ||
841 | out->null_flag = in->null_flag; | ||
842 | if (GNUNET_YES == out->null_flag) | ||
843 | return; | ||
844 | if (out->blen < in->slen) | ||
845 | { | ||
846 | GNUNET_array_grow (out->abuf, out->blen, in->slen); | ||
847 | } | ||
848 | out->sbuf = out->abuf; | ||
849 | out->slen = in->slen; | ||
850 | GNUNET_memcpy (out->sbuf, in->sbuf, out->slen); | ||
851 | } | ||
852 | |||
853 | |||
854 | /** | ||
855 | * Copy the given string buffer from 'in' to 'out'. | ||
856 | * | ||
857 | * @param cstr input string | ||
858 | * @param out output string | ||
859 | */ | ||
860 | static void | ||
861 | sb_strdup_cstr (struct StringBuffer *out, const char *cstr) | ||
862 | { | ||
863 | if (NULL == cstr) | ||
864 | { | ||
865 | out->null_flag = GNUNET_YES; | ||
866 | return; | ||
867 | } | ||
868 | out->null_flag = GNUNET_NO; | ||
869 | out->slen = strlen (cstr); | ||
870 | if (out->blen < out->slen) | ||
871 | { | ||
872 | GNUNET_array_grow (out->abuf, out->blen, out->slen); | ||
873 | } | ||
874 | out->sbuf = out->abuf; | ||
875 | GNUNET_memcpy (out->sbuf, cstr, out->slen); | ||
876 | } | ||
877 | |||
878 | |||
879 | /** | ||
880 | * Check if the given string @a str needs parentheses around it when | ||
881 | * using it to generate a regex. | ||
882 | * | ||
883 | * @param str string | ||
884 | * | ||
885 | * @return #GNUNET_YES if parentheses are needed, #GNUNET_NO otherwise | ||
886 | */ | ||
887 | static int | ||
888 | needs_parentheses (const struct StringBuffer *str) | ||
889 | { | ||
890 | size_t slen; | ||
891 | const char *op; | ||
892 | const char *cl; | ||
893 | const char *pos; | ||
894 | const char *end; | ||
895 | unsigned int cnt; | ||
896 | |||
897 | if ((GNUNET_YES == str->null_flag) || ((slen = str->slen) < 2)) | ||
898 | return GNUNET_NO; | ||
899 | pos = str->sbuf; | ||
900 | if ('(' != pos[0]) | ||
901 | return GNUNET_YES; | ||
902 | end = str->sbuf + slen; | ||
903 | cnt = 1; | ||
904 | pos++; | ||
905 | while (cnt > 0) | ||
906 | { | ||
907 | cl = memchr (pos, ')', end - pos); | ||
908 | if (NULL == cl) | ||
909 | { | ||
910 | GNUNET_break (0); | ||
911 | return GNUNET_YES; | ||
912 | } | ||
913 | /* while '(' before ')', count opening parens */ | ||
914 | while ((NULL != (op = memchr (pos, '(', end - pos))) && (op < cl)) | ||
915 | { | ||
916 | cnt++; | ||
917 | pos = op + 1; | ||
918 | } | ||
919 | /* got ')' first */ | ||
920 | cnt--; | ||
921 | pos = cl + 1; | ||
922 | } | ||
923 | return (*pos == '\0') ? GNUNET_NO : GNUNET_YES; | ||
924 | } | ||
925 | |||
926 | |||
927 | /** | ||
928 | * Remove parentheses surrounding string @a str. | ||
929 | * Example: "(a)" becomes "a", "(a|b)|(a|c)" stays the same. | ||
930 | * You need to #GNUNET_free() the returned string. | ||
931 | * | ||
932 | * @param str string, modified to contain a | ||
933 | * @return string without surrounding parentheses, string 'str' if no preceding | ||
934 | * epsilon could be found, NULL if 'str' was NULL | ||
935 | */ | ||
936 | static void | ||
937 | remove_parentheses (struct StringBuffer *str) | ||
938 | { | ||
939 | size_t slen; | ||
940 | const char *pos; | ||
941 | const char *end; | ||
942 | const char *sbuf; | ||
943 | const char *op; | ||
944 | const char *cp; | ||
945 | unsigned int cnt; | ||
946 | |||
947 | if (0) | ||
948 | return; | ||
949 | sbuf = str->sbuf; | ||
950 | if ((GNUNET_YES == str->null_flag) || (1 >= (slen = str->slen)) || | ||
951 | ('(' != str->sbuf[0]) || (')' != str->sbuf[slen - 1])) | ||
952 | return; | ||
953 | cnt = 0; | ||
954 | pos = &sbuf[1]; | ||
955 | end = &sbuf[slen - 1]; | ||
956 | op = memchr (pos, '(', end - pos); | ||
957 | cp = memchr (pos, ')', end - pos); | ||
958 | while (NULL != cp) | ||
959 | { | ||
960 | while ((NULL != op) && (op < cp)) | ||
961 | { | ||
962 | cnt++; | ||
963 | pos = op + 1; | ||
964 | op = memchr (pos, '(', end - pos); | ||
965 | } | ||
966 | while ((NULL != cp) && ((NULL == op) || (cp < op))) | ||
967 | { | ||
968 | if (0 == cnt) | ||
969 | return; /* can't strip parens */ | ||
970 | cnt--; | ||
971 | pos = cp + 1; | ||
972 | cp = memchr (pos, ')', end - pos); | ||
973 | } | ||
974 | } | ||
975 | if (0 != cnt) | ||
976 | { | ||
977 | GNUNET_break (0); | ||
978 | return; | ||
979 | } | ||
980 | str->sbuf++; | ||
981 | str->slen -= 2; | ||
982 | } | ||
983 | |||
984 | |||
985 | /** | ||
986 | * Check if the string 'str' starts with an epsilon (empty string). | ||
987 | * Example: "(|a)" is starting with an epsilon. | ||
988 | * | ||
989 | * @param str string to test | ||
990 | * | ||
991 | * @return 0 if str has no epsilon, 1 if str starts with '(|' and ends with ')' | ||
992 | */ | ||
993 | static int | ||
994 | has_epsilon (const struct StringBuffer *str) | ||
995 | { | ||
996 | return (GNUNET_YES != str->null_flag) && (0 < str->slen) && | ||
997 | ('(' == str->sbuf[0]) && ('|' == str->sbuf[1]) && | ||
998 | (')' == str->sbuf[str->slen - 1]); | ||
999 | } | ||
1000 | |||
1001 | |||
1002 | /** | ||
1003 | * Remove an epsilon from the string str. Where epsilon is an empty string | ||
1004 | * Example: str = "(|a|b|c)", result: "a|b|c" | ||
1005 | * The returned string needs to be freed. | ||
1006 | * | ||
1007 | * @param str original string | ||
1008 | * @param ret where to return string without preceding epsilon, string 'str' if no preceding | ||
1009 | * epsilon could be found, NULL if 'str' was NULL | ||
1010 | */ | ||
1011 | static void | ||
1012 | remove_epsilon (const struct StringBuffer *str, struct StringBuffer *ret) | ||
1013 | { | ||
1014 | if (GNUNET_YES == str->null_flag) | ||
1015 | { | ||
1016 | ret->null_flag = GNUNET_YES; | ||
1017 | return; | ||
1018 | } | ||
1019 | if ((str->slen > 1) && ('(' == str->sbuf[0]) && ('|' == str->sbuf[1]) && | ||
1020 | (')' == str->sbuf[str->slen - 1])) | ||
1021 | { | ||
1022 | /* remove epsilon */ | ||
1023 | if (ret->blen < str->slen - 3) | ||
1024 | { | ||
1025 | GNUNET_array_grow (ret->abuf, ret->blen, str->slen - 3); | ||
1026 | } | ||
1027 | ret->sbuf = ret->abuf; | ||
1028 | ret->slen = str->slen - 3; | ||
1029 | GNUNET_memcpy (ret->sbuf, &str->sbuf[2], ret->slen); | ||
1030 | return; | ||
1031 | } | ||
1032 | sb_strdup (ret, str); | ||
1033 | } | ||
1034 | |||
1035 | |||
1036 | /** | ||
1037 | * Compare n bytes of 'str1' and 'str2' | ||
1038 | * | ||
1039 | * @param str1 first string to compare | ||
1040 | * @param str2 second string for comparison | ||
1041 | * @param n number of bytes to compare | ||
1042 | * | ||
1043 | * @return -1 if any of the strings is NULL, 0 if equal, non 0 otherwise | ||
1044 | */ | ||
1045 | static int | ||
1046 | sb_strncmp (const struct StringBuffer *str1, | ||
1047 | const struct StringBuffer *str2, | ||
1048 | size_t n) | ||
1049 | { | ||
1050 | size_t max; | ||
1051 | |||
1052 | if ((str1->slen != str2->slen) && ((str1->slen < n) || (str2->slen < n))) | ||
1053 | return -1; | ||
1054 | max = GNUNET_MAX (str1->slen, str2->slen); | ||
1055 | if (max > n) | ||
1056 | max = n; | ||
1057 | return memcmp (str1->sbuf, str2->sbuf, max); | ||
1058 | } | ||
1059 | |||
1060 | |||
1061 | /** | ||
1062 | * Compare n bytes of 'str1' and 'str2' | ||
1063 | * | ||
1064 | * @param str1 first string to compare | ||
1065 | * @param str2 second C string for comparison | ||
1066 | * @param n number of bytes to compare (and length of str2) | ||
1067 | * | ||
1068 | * @return -1 if any of the strings is NULL, 0 if equal, non 0 otherwise | ||
1069 | */ | ||
1070 | static int | ||
1071 | sb_strncmp_cstr (const struct StringBuffer *str1, const char *str2, size_t n) | ||
1072 | { | ||
1073 | if (str1->slen < n) | ||
1074 | return -1; | ||
1075 | return memcmp (str1->sbuf, str2, n); | ||
1076 | } | ||
1077 | |||
1078 | |||
1079 | /** | ||
1080 | * Initialize string buffer for storing strings of up to n | ||
1081 | * characters. | ||
1082 | * | ||
1083 | * @param sb buffer to initialize | ||
1084 | * @param n desired target length | ||
1085 | */ | ||
1086 | static void | ||
1087 | sb_init (struct StringBuffer *sb, size_t n) | ||
1088 | { | ||
1089 | sb->null_flag = GNUNET_NO; | ||
1090 | sb->abuf = sb->sbuf = (0 == n) ? NULL : GNUNET_malloc (n); | ||
1091 | sb->blen = n; | ||
1092 | sb->slen = 0; | ||
1093 | } | ||
1094 | |||
1095 | |||
1096 | /** | ||
1097 | * Compare 'str1', starting from position 'k', with whole 'str2' | ||
1098 | * | ||
1099 | * @param str1 first string to compare, starting from position 'k' | ||
1100 | * @param str2 second string for comparison | ||
1101 | * @param k starting position in 'str1' | ||
1102 | * | ||
1103 | * @return -1 if any of the strings is NULL, 0 if equal, non 0 otherwise | ||
1104 | */ | ||
1105 | static int | ||
1106 | sb_strkcmp (const struct StringBuffer *str1, | ||
1107 | const struct StringBuffer *str2, | ||
1108 | size_t k) | ||
1109 | { | ||
1110 | if ((GNUNET_YES == str1->null_flag) || (GNUNET_YES == str2->null_flag) || | ||
1111 | (k > str1->slen) || (str1->slen - k != str2->slen)) | ||
1112 | return -1; | ||
1113 | return memcmp (&str1->sbuf[k], str2->sbuf, str2->slen); | ||
1114 | } | ||
1115 | |||
1116 | |||
1117 | /** | ||
1118 | * Helper function used as 'action' in 'REGEX_INTERNAL_automaton_traverse' | ||
1119 | * function to create the depth-first numbering of the states. | ||
1120 | * | ||
1121 | * @param cls states array. | ||
1122 | * @param count current state counter. | ||
1123 | * @param s current state. | ||
1124 | */ | ||
1125 | static void | ||
1126 | number_states (void *cls, | ||
1127 | const unsigned int count, | ||
1128 | struct REGEX_INTERNAL_State *s) | ||
1129 | { | ||
1130 | struct REGEX_INTERNAL_State **states = cls; | ||
1131 | |||
1132 | s->dfs_id = count; | ||
1133 | if (NULL != states) | ||
1134 | states[count] = s; | ||
1135 | } | ||
1136 | |||
1137 | |||
1138 | #define PRIS(a) \ | ||
1139 | ((GNUNET_YES == a.null_flag) ? 6 : (int) a.slen), \ | ||
1140 | ((GNUNET_YES == a.null_flag) ? "(null)" : a.sbuf) | ||
1141 | |||
1142 | |||
1143 | /** | ||
1144 | * Construct the regular expression given the inductive step, | ||
1145 | * $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* | ||
1146 | * R^{(k-1)}_{kj}, and simplify the resulting expression saved in R_cur_ij. | ||
1147 | * | ||
1148 | * @param R_last_ij value of $R^{(k-1)_{ij}. | ||
1149 | * @param R_last_ik value of $R^{(k-1)_{ik}. | ||
1150 | * @param R_last_kk value of $R^{(k-1)_{kk}. | ||
1151 | * @param R_last_kj value of $R^{(k-1)_{kj}. | ||
1152 | * @param R_cur_ij result for this inductive step is saved in R_cur_ij, R_cur_ij | ||
1153 | * is expected to be NULL when called! | ||
1154 | * @param R_cur_l optimization -- kept between iterations to avoid realloc | ||
1155 | * @param R_cur_r optimization -- kept between iterations to avoid realloc | ||
1156 | */ | ||
1157 | static void | ||
1158 | automaton_create_proofs_simplify (const struct StringBuffer *R_last_ij, | ||
1159 | const struct StringBuffer *R_last_ik, | ||
1160 | const struct StringBuffer *R_last_kk, | ||
1161 | const struct StringBuffer *R_last_kj, | ||
1162 | struct StringBuffer *R_cur_ij, | ||
1163 | struct StringBuffer *R_cur_l, | ||
1164 | struct StringBuffer *R_cur_r) | ||
1165 | { | ||
1166 | struct StringBuffer R_temp_ij; | ||
1167 | struct StringBuffer R_temp_ik; | ||
1168 | struct StringBuffer R_temp_kj; | ||
1169 | struct StringBuffer R_temp_kk; | ||
1170 | int eps_check; | ||
1171 | int ij_ik_cmp; | ||
1172 | int ij_kj_cmp; | ||
1173 | int ik_kk_cmp; | ||
1174 | int kk_kj_cmp; | ||
1175 | int clean_ik_kk_cmp; | ||
1176 | int clean_kk_kj_cmp; | ||
1177 | size_t length; | ||
1178 | size_t length_l; | ||
1179 | size_t length_r; | ||
1180 | |||
1181 | /* | ||
1182 | * $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj} | ||
1183 | * R_last == R^{(k-1)}, R_cur == R^{(k)} | ||
1184 | * R_cur_ij = R_cur_l | R_cur_r | ||
1185 | * R_cur_l == R^{(k-1)}_{ij} | ||
1186 | * R_cur_r == R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj} | ||
1187 | */if ((GNUNET_YES == R_last_ij->null_flag) && | ||
1188 | ((GNUNET_YES == R_last_ik->null_flag) || | ||
1189 | (GNUNET_YES == R_last_kj->null_flag))) | ||
1190 | { | ||
1191 | /* R^{(k)}_{ij} = N | N */ | ||
1192 | R_cur_ij->null_flag = GNUNET_YES; | ||
1193 | R_cur_ij->synced = GNUNET_NO; | ||
1194 | return; | ||
1195 | } | ||
1196 | |||
1197 | if ((GNUNET_YES == R_last_ik->null_flag) || | ||
1198 | (GNUNET_YES == R_last_kj->null_flag)) | ||
1199 | { | ||
1200 | /* R^{(k)}_{ij} = R^{(k-1)}_{ij} | N */ | ||
1201 | if (GNUNET_YES == R_last_ij->synced) | ||
1202 | { | ||
1203 | R_cur_ij->synced = GNUNET_YES; | ||
1204 | R_cur_ij->null_flag = GNUNET_NO; | ||
1205 | return; | ||
1206 | } | ||
1207 | R_cur_ij->synced = GNUNET_YES; | ||
1208 | sb_strdup (R_cur_ij, R_last_ij); | ||
1209 | return; | ||
1210 | } | ||
1211 | R_cur_ij->synced = GNUNET_NO; | ||
1212 | |||
1213 | /* $R^{(k)}_{ij} = N | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj} OR | ||
1214 | * $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj} */ | ||
1215 | |||
1216 | R_cur_r->null_flag = GNUNET_YES; | ||
1217 | R_cur_r->slen = 0; | ||
1218 | R_cur_l->null_flag = GNUNET_YES; | ||
1219 | R_cur_l->slen = 0; | ||
1220 | |||
1221 | /* cache results from strcmp, we might need these many times */ | ||
1222 | ij_kj_cmp = sb_nullstrcmp (R_last_ij, R_last_kj); | ||
1223 | ij_ik_cmp = sb_nullstrcmp (R_last_ij, R_last_ik); | ||
1224 | ik_kk_cmp = sb_nullstrcmp (R_last_ik, R_last_kk); | ||
1225 | kk_kj_cmp = sb_nullstrcmp (R_last_kk, R_last_kj); | ||
1226 | |||
1227 | /* Assign R_temp_(ik|kk|kj) to R_last[][] and remove epsilon as well | ||
1228 | * as parentheses, so we can better compare the contents */ | ||
1229 | |||
1230 | memset (&R_temp_ij, 0, sizeof(struct StringBuffer)); | ||
1231 | memset (&R_temp_ik, 0, sizeof(struct StringBuffer)); | ||
1232 | memset (&R_temp_kk, 0, sizeof(struct StringBuffer)); | ||
1233 | memset (&R_temp_kj, 0, sizeof(struct StringBuffer)); | ||
1234 | remove_epsilon (R_last_ik, &R_temp_ik); | ||
1235 | remove_epsilon (R_last_kk, &R_temp_kk); | ||
1236 | remove_epsilon (R_last_kj, &R_temp_kj); | ||
1237 | remove_parentheses (&R_temp_ik); | ||
1238 | remove_parentheses (&R_temp_kk); | ||
1239 | remove_parentheses (&R_temp_kj); | ||
1240 | clean_ik_kk_cmp = sb_nullstrcmp (R_last_ik, &R_temp_kk); | ||
1241 | clean_kk_kj_cmp = sb_nullstrcmp (&R_temp_kk, R_last_kj); | ||
1242 | |||
1243 | /* construct R_cur_l (and, if necessary R_cur_r) */ | ||
1244 | if (GNUNET_YES != R_last_ij->null_flag) | ||
1245 | { | ||
1246 | /* Assign R_temp_ij to R_last_ij and remove epsilon as well | ||
1247 | * as parentheses, so we can better compare the contents */ | ||
1248 | remove_epsilon (R_last_ij, &R_temp_ij); | ||
1249 | remove_parentheses (&R_temp_ij); | ||
1250 | |||
1251 | if ((0 == sb_strcmp (&R_temp_ij, &R_temp_ik)) && | ||
1252 | (0 == sb_strcmp (&R_temp_ik, &R_temp_kk)) && | ||
1253 | (0 == sb_strcmp (&R_temp_kk, &R_temp_kj))) | ||
1254 | { | ||
1255 | if (0 == R_temp_ij.slen) | ||
1256 | { | ||
1257 | R_cur_r->null_flag = GNUNET_NO; | ||
1258 | } | ||
1259 | else if ((0 == sb_strncmp_cstr (R_last_ij, "(|", 2)) || | ||
1260 | ((0 == sb_strncmp_cstr (R_last_ik, "(|", 2)) && | ||
1261 | (0 == sb_strncmp_cstr (R_last_kj, "(|", 2)) )) | ||
1262 | { | ||
1263 | /* | ||
1264 | * a|(e|a)a*(e|a) = a* | ||
1265 | * a|(e|a)(e|a)*(e|a) = a* | ||
1266 | * (e|a)|aa*a = a* | ||
1267 | * (e|a)|aa*(e|a) = a* | ||
1268 | * (e|a)|(e|a)a*a = a* | ||
1269 | * (e|a)|(e|a)a*(e|a) = a* | ||
1270 | * (e|a)|(e|a)(e|a)*(e|a) = a* | ||
1271 | */if (GNUNET_YES == needs_parentheses (&R_temp_ij)) | ||
1272 | sb_printf1 (R_cur_r, "(%.*s)*", 3, &R_temp_ij); | ||
1273 | else | ||
1274 | sb_printf1 (R_cur_r, "%.*s*", 1, &R_temp_ij); | ||
1275 | } | ||
1276 | else | ||
1277 | { | ||
1278 | /* | ||
1279 | * a|aa*a = a+ | ||
1280 | * a|(e|a)a*a = a+ | ||
1281 | * a|aa*(e|a) = a+ | ||
1282 | * a|(e|a)(e|a)*a = a+ | ||
1283 | * a|a(e|a)*(e|a) = a+ | ||
1284 | */if (GNUNET_YES == needs_parentheses (&R_temp_ij)) | ||
1285 | sb_printf1 (R_cur_r, "(%.*s)+", 3, &R_temp_ij); | ||
1286 | else | ||
1287 | sb_printf1 (R_cur_r, "%.*s+", 1, &R_temp_ij); | ||
1288 | } | ||
1289 | } | ||
1290 | else if ((0 == ij_ik_cmp) && (0 == clean_kk_kj_cmp) && | ||
1291 | (0 != clean_ik_kk_cmp)) | ||
1292 | { | ||
1293 | /* a|ab*b = ab* */ | ||
1294 | if (0 == R_last_kk->slen) | ||
1295 | sb_strdup (R_cur_r, R_last_ij); | ||
1296 | else if (GNUNET_YES == needs_parentheses (&R_temp_kk)) | ||
1297 | sb_printf2 (R_cur_r, "%.*s(%.*s)*", 3, R_last_ij, &R_temp_kk); | ||
1298 | else | ||
1299 | sb_printf2 (R_cur_r, "%.*s%.*s*", 1, R_last_ij, R_last_kk); | ||
1300 | R_cur_l->null_flag = GNUNET_YES; | ||
1301 | } | ||
1302 | else if ((0 == ij_kj_cmp) && (0 == clean_ik_kk_cmp) && | ||
1303 | (0 != clean_kk_kj_cmp)) | ||
1304 | { | ||
1305 | /* a|bb*a = b*a */ | ||
1306 | if (R_last_kk->slen < 1) | ||
1307 | { | ||
1308 | sb_strdup (R_cur_r, R_last_kj); | ||
1309 | } | ||
1310 | else if (GNUNET_YES == needs_parentheses (&R_temp_kk)) | ||
1311 | sb_printf2 (R_cur_r, "(%.*s)*%.*s", 3, &R_temp_kk, R_last_kj); | ||
1312 | else | ||
1313 | sb_printf2 (R_cur_r, "%.*s*%.*s", 1, &R_temp_kk, R_last_kj); | ||
1314 | |||
1315 | R_cur_l->null_flag = GNUNET_YES; | ||
1316 | } | ||
1317 | else if ((0 == ij_ik_cmp) && (0 == kk_kj_cmp) && | ||
1318 | (! has_epsilon (R_last_ij)) && has_epsilon (R_last_kk)) | ||
1319 | { | ||
1320 | /* a|a(e|b)*(e|b) = a|ab* = a|a|ab|abb|abbb|... = ab* */ | ||
1321 | if (needs_parentheses (&R_temp_kk)) | ||
1322 | sb_printf2 (R_cur_r, "%.*s(%.*s)*", 3, R_last_ij, &R_temp_kk); | ||
1323 | else | ||
1324 | sb_printf2 (R_cur_r, "%.*s%.*s*", 1, R_last_ij, &R_temp_kk); | ||
1325 | R_cur_l->null_flag = GNUNET_YES; | ||
1326 | } | ||
1327 | else if ((0 == ij_kj_cmp) && (0 == ik_kk_cmp) && | ||
1328 | (! has_epsilon (R_last_ij)) && has_epsilon (R_last_kk)) | ||
1329 | { | ||
1330 | /* a|(e|b)(e|b)*a = a|b*a = a|a|ba|bba|bbba|... = b*a */ | ||
1331 | if (needs_parentheses (&R_temp_kk)) | ||
1332 | sb_printf2 (R_cur_r, "(%.*s)*%.*s", 3, &R_temp_kk, R_last_ij); | ||
1333 | else | ||
1334 | sb_printf2 (R_cur_r, "%.*s*%.*s", 1, &R_temp_kk, R_last_ij); | ||
1335 | R_cur_l->null_flag = GNUNET_YES; | ||
1336 | } | ||
1337 | else | ||
1338 | { | ||
1339 | sb_strdup (R_cur_l, R_last_ij); | ||
1340 | remove_parentheses (R_cur_l); | ||
1341 | } | ||
1342 | } | ||
1343 | else | ||
1344 | { | ||
1345 | /* we have no left side */ | ||
1346 | R_cur_l->null_flag = GNUNET_YES; | ||
1347 | } | ||
1348 | |||
1349 | /* construct R_cur_r, if not already constructed */ | ||
1350 | if (GNUNET_YES == R_cur_r->null_flag) | ||
1351 | { | ||
1352 | length = R_temp_kk.slen - R_last_ik->slen; | ||
1353 | |||
1354 | /* a(ba)*bx = (ab)+x */ | ||
1355 | if ((length > 0) && (GNUNET_YES != R_last_kk->null_flag) && | ||
1356 | (0 < R_last_kk->slen) && (GNUNET_YES != R_last_kj->null_flag) && | ||
1357 | (0 < R_last_kj->slen) && (GNUNET_YES != R_last_ik->null_flag) && | ||
1358 | (0 < R_last_ik->slen) && | ||
1359 | (0 == sb_strkcmp (&R_temp_kk, R_last_ik, length)) && | ||
1360 | (0 == sb_strncmp (&R_temp_kk, R_last_kj, length))) | ||
1361 | { | ||
1362 | struct StringBuffer temp_a; | ||
1363 | struct StringBuffer temp_b; | ||
1364 | |||
1365 | sb_init (&temp_a, length); | ||
1366 | sb_init (&temp_b, R_last_kj->slen - length); | ||
1367 | |||
1368 | length_l = length; | ||
1369 | temp_a.sbuf = temp_a.abuf; | ||
1370 | GNUNET_memcpy (temp_a.sbuf, R_last_kj->sbuf, length_l); | ||
1371 | temp_a.slen = length_l; | ||
1372 | |||
1373 | length_r = R_last_kj->slen - length; | ||
1374 | temp_b.sbuf = temp_b.abuf; | ||
1375 | GNUNET_memcpy (temp_b.sbuf, &R_last_kj->sbuf[length], length_r); | ||
1376 | temp_b.slen = length_r; | ||
1377 | |||
1378 | /* e|(ab)+ = (ab)* */ | ||
1379 | if ((GNUNET_YES != R_cur_l->null_flag) && (0 == R_cur_l->slen) && | ||
1380 | (0 == temp_b.slen)) | ||
1381 | { | ||
1382 | sb_printf2 (R_cur_r, "(%.*s%.*s)*", 3, R_last_ik, &temp_a); | ||
1383 | sb_free (R_cur_l); | ||
1384 | R_cur_l->null_flag = GNUNET_YES; | ||
1385 | } | ||
1386 | else | ||
1387 | { | ||
1388 | sb_printf3 (R_cur_r, "(%.*s%.*s)+%.*s", 3, R_last_ik, &temp_a, &temp_b); | ||
1389 | } | ||
1390 | sb_free (&temp_a); | ||
1391 | sb_free (&temp_b); | ||
1392 | } | ||
1393 | else if ((0 == sb_strcmp (&R_temp_ik, &R_temp_kk)) && | ||
1394 | (0 == sb_strcmp (&R_temp_kk, &R_temp_kj)) ) | ||
1395 | { | ||
1396 | /* | ||
1397 | * (e|a)a*(e|a) = a* | ||
1398 | * (e|a)(e|a)*(e|a) = a* | ||
1399 | */ | ||
1400 | if (has_epsilon (R_last_ik) && has_epsilon (R_last_kj)) | ||
1401 | { | ||
1402 | if (needs_parentheses (&R_temp_kk)) | ||
1403 | sb_printf1 (R_cur_r, "(%.*s)*", 3, &R_temp_kk); | ||
1404 | else | ||
1405 | sb_printf1 (R_cur_r, "%.*s*", 1, &R_temp_kk); | ||
1406 | } | ||
1407 | /* aa*a = a+a */ | ||
1408 | else if ((0 == clean_ik_kk_cmp) && (0 == clean_kk_kj_cmp) && | ||
1409 | (! has_epsilon (R_last_ik))) | ||
1410 | { | ||
1411 | if (needs_parentheses (&R_temp_kk)) | ||
1412 | sb_printf2 (R_cur_r, "(%.*s)+%.*s", 3, &R_temp_kk, &R_temp_kk); | ||
1413 | else | ||
1414 | sb_printf2 (R_cur_r, "%.*s+%.*s", 1, &R_temp_kk, &R_temp_kk); | ||
1415 | } | ||
1416 | /* | ||
1417 | * (e|a)a*a = a+ | ||
1418 | * aa*(e|a) = a+ | ||
1419 | * a(e|a)*(e|a) = a+ | ||
1420 | * (e|a)a*a = a+ | ||
1421 | */else | ||
1422 | { | ||
1423 | eps_check = (has_epsilon (R_last_ik) + has_epsilon (R_last_kk) | ||
1424 | + has_epsilon (R_last_kj)); | ||
1425 | |||
1426 | if (1 == eps_check) | ||
1427 | { | ||
1428 | if (needs_parentheses (&R_temp_kk)) | ||
1429 | sb_printf1 (R_cur_r, "(%.*s)+", 3, &R_temp_kk); | ||
1430 | else | ||
1431 | sb_printf1 (R_cur_r, "%.*s+", 1, &R_temp_kk); | ||
1432 | } | ||
1433 | } | ||
1434 | } | ||
1435 | /* | ||
1436 | * aa*b = a+b | ||
1437 | * (e|a)(e|a)*b = a*b | ||
1438 | */ | ||
1439 | else if (0 == sb_strcmp (&R_temp_ik, &R_temp_kk)) | ||
1440 | { | ||
1441 | if (has_epsilon (R_last_ik)) | ||
1442 | { | ||
1443 | if (needs_parentheses (&R_temp_kk)) | ||
1444 | sb_printf2 (R_cur_r, "(%.*s)*%.*s", 3, &R_temp_kk, R_last_kj); | ||
1445 | else | ||
1446 | sb_printf2 (R_cur_r, "%.*s*%.*s", 1, &R_temp_kk, R_last_kj); | ||
1447 | } | ||
1448 | else | ||
1449 | { | ||
1450 | if (needs_parentheses (&R_temp_kk)) | ||
1451 | sb_printf2 (R_cur_r, "(%.*s)+%.*s", 3, &R_temp_kk, R_last_kj); | ||
1452 | else | ||
1453 | sb_printf2 (R_cur_r, "%.*s+%.*s", 1, &R_temp_kk, R_last_kj); | ||
1454 | } | ||
1455 | } | ||
1456 | /* | ||
1457 | * ba*a = ba+ | ||
1458 | * b(e|a)*(e|a) = ba* | ||
1459 | */ | ||
1460 | else if (0 == sb_strcmp (&R_temp_kk, &R_temp_kj)) | ||
1461 | { | ||
1462 | if (has_epsilon (R_last_kj)) | ||
1463 | { | ||
1464 | if (needs_parentheses (&R_temp_kk)) | ||
1465 | sb_printf2 (R_cur_r, "%.*s(%.*s)*", 3, R_last_ik, &R_temp_kk); | ||
1466 | else | ||
1467 | sb_printf2 (R_cur_r, "%.*s%.*s*", 1, R_last_ik, &R_temp_kk); | ||
1468 | } | ||
1469 | else | ||
1470 | { | ||
1471 | if (needs_parentheses (&R_temp_kk)) | ||
1472 | sb_printf2 (R_cur_r, "(%.*s)+%.*s", 3, R_last_ik, &R_temp_kk); | ||
1473 | else | ||
1474 | sb_printf2 (R_cur_r, "%.*s+%.*s", 1, R_last_ik, &R_temp_kk); | ||
1475 | } | ||
1476 | } | ||
1477 | else | ||
1478 | { | ||
1479 | if (0 < R_temp_kk.slen) | ||
1480 | { | ||
1481 | if (needs_parentheses (&R_temp_kk)) | ||
1482 | { | ||
1483 | sb_printf3 (R_cur_r, | ||
1484 | "%.*s(%.*s)*%.*s", | ||
1485 | 3, | ||
1486 | R_last_ik, | ||
1487 | &R_temp_kk, | ||
1488 | R_last_kj); | ||
1489 | } | ||
1490 | else | ||
1491 | { | ||
1492 | sb_printf3 (R_cur_r, | ||
1493 | "%.*s%.*s*%.*s", | ||
1494 | 1, | ||
1495 | R_last_ik, | ||
1496 | &R_temp_kk, | ||
1497 | R_last_kj); | ||
1498 | } | ||
1499 | } | ||
1500 | else | ||
1501 | { | ||
1502 | sb_printf2 (R_cur_r, "%.*s%.*s", 0, R_last_ik, R_last_kj); | ||
1503 | } | ||
1504 | } | ||
1505 | } | ||
1506 | sb_free (&R_temp_ij); | ||
1507 | sb_free (&R_temp_ik); | ||
1508 | sb_free (&R_temp_kk); | ||
1509 | sb_free (&R_temp_kj); | ||
1510 | |||
1511 | if ((GNUNET_YES == R_cur_l->null_flag) && (GNUNET_YES == R_cur_r->null_flag)) | ||
1512 | { | ||
1513 | R_cur_ij->null_flag = GNUNET_YES; | ||
1514 | return; | ||
1515 | } | ||
1516 | |||
1517 | if ((GNUNET_YES != R_cur_l->null_flag) && (GNUNET_YES == R_cur_r->null_flag)) | ||
1518 | { | ||
1519 | struct StringBuffer tmp; | ||
1520 | |||
1521 | tmp = *R_cur_ij; | ||
1522 | *R_cur_ij = *R_cur_l; | ||
1523 | *R_cur_l = tmp; | ||
1524 | return; | ||
1525 | } | ||
1526 | |||
1527 | if ((GNUNET_YES == R_cur_l->null_flag) && (GNUNET_YES != R_cur_r->null_flag)) | ||
1528 | { | ||
1529 | struct StringBuffer tmp; | ||
1530 | |||
1531 | tmp = *R_cur_ij; | ||
1532 | *R_cur_ij = *R_cur_r; | ||
1533 | *R_cur_r = tmp; | ||
1534 | return; | ||
1535 | } | ||
1536 | |||
1537 | if (0 == sb_nullstrcmp (R_cur_l, R_cur_r)) | ||
1538 | { | ||
1539 | struct StringBuffer tmp; | ||
1540 | |||
1541 | tmp = *R_cur_ij; | ||
1542 | *R_cur_ij = *R_cur_l; | ||
1543 | *R_cur_l = tmp; | ||
1544 | return; | ||
1545 | } | ||
1546 | sb_printf2 (R_cur_ij, "(%.*s|%.*s)", 3, R_cur_l, R_cur_r); | ||
1547 | } | ||
1548 | |||
1549 | |||
1550 | /** | ||
1551 | * Create proofs for all states in the given automaton. Implementation of the | ||
1552 | * algorithm described in chapter 3.2.1 of "Automata Theory, Languages, and | ||
1553 | * Computation 3rd Edition" by Hopcroft, Motwani and Ullman. | ||
1554 | * | ||
1555 | * Each state in the automaton gets assigned 'proof' and 'hash' (hash of the | ||
1556 | * proof) fields. The starting state will only have a valid proof/hash if it has | ||
1557 | * any incoming transitions. | ||
1558 | * | ||
1559 | * @param a automaton for which to assign proofs and hashes, must not be NULL | ||
1560 | */ | ||
1561 | static int | ||
1562 | automaton_create_proofs (struct REGEX_INTERNAL_Automaton *a) | ||
1563 | { | ||
1564 | unsigned int n = a->state_count; | ||
1565 | struct REGEX_INTERNAL_State *states[n]; | ||
1566 | struct StringBuffer *R_last; | ||
1567 | struct StringBuffer *R_cur; | ||
1568 | struct StringBuffer R_cur_r; | ||
1569 | struct StringBuffer R_cur_l; | ||
1570 | struct StringBuffer *R_swap; | ||
1571 | struct REGEX_INTERNAL_Transition *t; | ||
1572 | struct StringBuffer complete_regex; | ||
1573 | unsigned int i; | ||
1574 | unsigned int j; | ||
1575 | unsigned int k; | ||
1576 | |||
1577 | R_last = GNUNET_malloc_large (sizeof(struct StringBuffer) * n * n); | ||
1578 | R_cur = GNUNET_malloc_large (sizeof(struct StringBuffer) * n * n); | ||
1579 | if ((NULL == R_last) || (NULL == R_cur)) | ||
1580 | { | ||
1581 | GNUNET_log_strerror (GNUNET_ERROR_TYPE_ERROR, "malloc"); | ||
1582 | GNUNET_free (R_cur); | ||
1583 | GNUNET_free (R_last); | ||
1584 | return GNUNET_SYSERR; | ||
1585 | } | ||
1586 | |||
1587 | /* create depth-first numbering of the states, initializes 'state' */ | ||
1588 | REGEX_INTERNAL_automaton_traverse (a, | ||
1589 | a->start, | ||
1590 | NULL, | ||
1591 | NULL, | ||
1592 | &number_states, | ||
1593 | states); | ||
1594 | |||
1595 | for (i = 0; i < n; i++) | ||
1596 | GNUNET_assert (NULL != states[i]); | ||
1597 | for (i = 0; i < n; i++) | ||
1598 | for (j = 0; j < n; j++) | ||
1599 | R_last[i * n + j].null_flag = GNUNET_YES; | ||
1600 | |||
1601 | /* Compute regular expressions of length "1" between each pair of states */ | ||
1602 | for (i = 0; i < n; i++) | ||
1603 | { | ||
1604 | for (t = states[i]->transitions_head; NULL != t; t = t->next) | ||
1605 | { | ||
1606 | j = t->to_state->dfs_id; | ||
1607 | if (GNUNET_YES == R_last[i * n + j].null_flag) | ||
1608 | { | ||
1609 | sb_strdup_cstr (&R_last[i * n + j], t->label); | ||
1610 | } | ||
1611 | else | ||
1612 | { | ||
1613 | sb_append_cstr (&R_last[i * n + j], "|"); | ||
1614 | sb_append_cstr (&R_last[i * n + j], t->label); | ||
1615 | } | ||
1616 | } | ||
1617 | /* add self-loop: i is reachable from i via epsilon-transition */ | ||
1618 | if (GNUNET_YES == R_last[i * n + i].null_flag) | ||
1619 | { | ||
1620 | R_last[i * n + i].slen = 0; | ||
1621 | R_last[i * n + i].null_flag = GNUNET_NO; | ||
1622 | } | ||
1623 | else | ||
1624 | { | ||
1625 | sb_wrap (&R_last[i * n + i], "(|%.*s)", 3); | ||
1626 | } | ||
1627 | } | ||
1628 | for (i = 0; i < n; i++) | ||
1629 | for (j = 0; j < n; j++) | ||
1630 | if (needs_parentheses (&R_last[i * n + j])) | ||
1631 | sb_wrap (&R_last[i * n + j], "(%.*s)", 2); | ||
1632 | /* Compute regular expressions of length "k" between each pair of states per | ||
1633 | * induction */ | ||
1634 | memset (&R_cur_l, 0, sizeof(struct StringBuffer)); | ||
1635 | memset (&R_cur_r, 0, sizeof(struct StringBuffer)); | ||
1636 | for (k = 0; k < n; k++) | ||
1637 | { | ||
1638 | for (i = 0; i < n; i++) | ||
1639 | { | ||
1640 | for (j = 0; j < n; j++) | ||
1641 | { | ||
1642 | /* Basis for the recursion: | ||
1643 | * $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj} | ||
1644 | * R_last == R^{(k-1)}, R_cur == R^{(k)} | ||
1645 | */ | ||
1646 | |||
1647 | /* Create R_cur[i][j] and simplify the expression */ | ||
1648 | automaton_create_proofs_simplify (&R_last[i * n + j], | ||
1649 | &R_last[i * n + k], | ||
1650 | &R_last[k * n + k], | ||
1651 | &R_last[k * n + j], | ||
1652 | &R_cur[i * n + j], | ||
1653 | &R_cur_l, | ||
1654 | &R_cur_r); | ||
1655 | } | ||
1656 | } | ||
1657 | /* set R_last = R_cur */ | ||
1658 | R_swap = R_last; | ||
1659 | R_last = R_cur; | ||
1660 | R_cur = R_swap; | ||
1661 | /* clear 'R_cur' for next iteration */ | ||
1662 | for (i = 0; i < n; i++) | ||
1663 | for (j = 0; j < n; j++) | ||
1664 | R_cur[i * n + j].null_flag = GNUNET_YES; | ||
1665 | } | ||
1666 | sb_free (&R_cur_l); | ||
1667 | sb_free (&R_cur_r); | ||
1668 | /* assign proofs and hashes */ | ||
1669 | for (i = 0; i < n; i++) | ||
1670 | { | ||
1671 | if (GNUNET_YES != R_last[a->start->dfs_id * n + i].null_flag) | ||
1672 | { | ||
1673 | states[i]->proof = GNUNET_strndup (R_last[a->start->dfs_id * n + i].sbuf, | ||
1674 | R_last[a->start->dfs_id * n + i].slen); | ||
1675 | GNUNET_CRYPTO_hash (states[i]->proof, | ||
1676 | strlen (states[i]->proof), | ||
1677 | &states[i]->hash); | ||
1678 | } | ||
1679 | } | ||
1680 | |||
1681 | /* complete regex for whole DFA: union of all pairs (start state/accepting | ||
1682 | * state(s)). */ | ||
1683 | sb_init (&complete_regex, 16 * n); | ||
1684 | for (i = 0; i < n; i++) | ||
1685 | { | ||
1686 | if (states[i]->accepting) | ||
1687 | { | ||
1688 | if ((0 == complete_regex.slen) && | ||
1689 | (0 < R_last[a->start->dfs_id * n + i].slen)) | ||
1690 | { | ||
1691 | sb_append (&complete_regex, &R_last[a->start->dfs_id * n + i]); | ||
1692 | } | ||
1693 | else if ((GNUNET_YES != R_last[a->start->dfs_id * n + i].null_flag) && | ||
1694 | (0 < R_last[a->start->dfs_id * n + i].slen)) | ||
1695 | { | ||
1696 | sb_append_cstr (&complete_regex, "|"); | ||
1697 | sb_append (&complete_regex, &R_last[a->start->dfs_id * n + i]); | ||
1698 | } | ||
1699 | } | ||
1700 | } | ||
1701 | a->canonical_regex = | ||
1702 | GNUNET_strndup (complete_regex.sbuf, complete_regex.slen); | ||
1703 | |||
1704 | /* cleanup */ | ||
1705 | sb_free (&complete_regex); | ||
1706 | for (i = 0; i < n; i++) | ||
1707 | for (j = 0; j < n; j++) | ||
1708 | { | ||
1709 | sb_free (&R_cur[i * n + j]); | ||
1710 | sb_free (&R_last[i * n + j]); | ||
1711 | } | ||
1712 | GNUNET_free (R_cur); | ||
1713 | GNUNET_free (R_last); | ||
1714 | return GNUNET_OK; | ||
1715 | } | ||
1716 | |||
1717 | |||
1718 | /** | ||
1719 | * Creates a new DFA state based on a set of NFA states. Needs to be freed using | ||
1720 | * automaton_destroy_state. | ||
1721 | * | ||
1722 | * @param ctx context | ||
1723 | * @param nfa_states set of NFA states on which the DFA should be based on | ||
1724 | * | ||
1725 | * @return new DFA state | ||
1726 | */ | ||
1727 | static struct REGEX_INTERNAL_State * | ||
1728 | dfa_state_create (struct REGEX_INTERNAL_Context *ctx, | ||
1729 | struct REGEX_INTERNAL_StateSet *nfa_states) | ||
1730 | { | ||
1731 | struct REGEX_INTERNAL_State *s; | ||
1732 | char *pos; | ||
1733 | size_t len; | ||
1734 | struct REGEX_INTERNAL_State *cstate; | ||
1735 | struct REGEX_INTERNAL_Transition *ctran; | ||
1736 | unsigned int i; | ||
1737 | |||
1738 | s = GNUNET_new (struct REGEX_INTERNAL_State); | ||
1739 | s->id = ctx->state_id++; | ||
1740 | s->index = -1; | ||
1741 | s->lowlink = -1; | ||
1742 | |||
1743 | if (NULL == nfa_states) | ||
1744 | { | ||
1745 | GNUNET_asprintf (&s->name, "s%i", s->id); | ||
1746 | return s; | ||
1747 | } | ||
1748 | |||
1749 | s->nfa_set = *nfa_states; | ||
1750 | |||
1751 | if (nfa_states->off < 1) | ||
1752 | return s; | ||
1753 | |||
1754 | /* Create a name based on 'nfa_states' */ | ||
1755 | len = nfa_states->off * 14 + 4; | ||
1756 | s->name = GNUNET_malloc (len); | ||
1757 | strcat (s->name, "{"); | ||
1758 | pos = s->name + 1; | ||
1759 | |||
1760 | for (i = 0; i < nfa_states->off; i++) | ||
1761 | { | ||
1762 | cstate = nfa_states->states[i]; | ||
1763 | GNUNET_snprintf (pos, pos - s->name + len, "%i,", cstate->id); | ||
1764 | pos += strlen (pos); | ||
1765 | |||
1766 | /* Add a transition for each distinct label to NULL state */ | ||
1767 | for (ctran = cstate->transitions_head; NULL != ctran; ctran = ctran->next) | ||
1768 | if (NULL != ctran->label) | ||
1769 | state_add_transition (ctx, s, ctran->label, NULL); | ||
1770 | |||
1771 | /* If the nfa_states contain an accepting state, the new dfa state is also | ||
1772 | * accepting. */ | ||
1773 | if (cstate->accepting) | ||
1774 | s->accepting = 1; | ||
1775 | } | ||
1776 | pos[-1] = '}'; | ||
1777 | s->name = GNUNET_realloc (s->name, strlen (s->name) + 1); | ||
1778 | |||
1779 | memset (nfa_states, 0, sizeof(struct REGEX_INTERNAL_StateSet)); | ||
1780 | return s; | ||
1781 | } | ||
1782 | |||
1783 | |||
1784 | /** | ||
1785 | * Move from the given state 's' to the next state on transition 'str'. Consumes | ||
1786 | * as much of the given 'str' as possible (useful for strided DFAs). On return | ||
1787 | * 's' will point to the next state, and the length of the substring used for | ||
1788 | * this transition will be returned. If no transition possible 0 is returned and | ||
1789 | * 's' points to NULL. | ||
1790 | * | ||
1791 | * @param s starting state, will point to the next state or NULL (if no | ||
1792 | * transition possible) | ||
1793 | * @param str edge label to follow (will match longest common prefix) | ||
1794 | * | ||
1795 | * @return length of the substring consumed from 'str' | ||
1796 | */ | ||
1797 | static unsigned int | ||
1798 | dfa_move (struct REGEX_INTERNAL_State **s, const char *str) | ||
1799 | { | ||
1800 | struct REGEX_INTERNAL_Transition *t; | ||
1801 | struct REGEX_INTERNAL_State *new_s; | ||
1802 | unsigned int len; | ||
1803 | unsigned int max_len; | ||
1804 | |||
1805 | if (NULL == s) | ||
1806 | return 0; | ||
1807 | |||
1808 | new_s = NULL; | ||
1809 | max_len = 0; | ||
1810 | for (t = (*s)->transitions_head; NULL != t; t = t->next) | ||
1811 | { | ||
1812 | len = strlen (t->label); | ||
1813 | |||
1814 | if (0 == strncmp (t->label, str, len)) | ||
1815 | { | ||
1816 | if (len >= max_len) | ||
1817 | { | ||
1818 | max_len = len; | ||
1819 | new_s = t->to_state; | ||
1820 | } | ||
1821 | } | ||
1822 | } | ||
1823 | |||
1824 | *s = new_s; | ||
1825 | return max_len; | ||
1826 | } | ||
1827 | |||
1828 | |||
1829 | /** | ||
1830 | * Set the given state 'marked' to #GNUNET_YES. Used by the | ||
1831 | * #dfa_remove_unreachable_states() function to detect unreachable states in the | ||
1832 | * automaton. | ||
1833 | * | ||
1834 | * @param cls closure, not used. | ||
1835 | * @param count count, not used. | ||
1836 | * @param s state where the marked attribute will be set to #GNUNET_YES. | ||
1837 | */ | ||
1838 | static void | ||
1839 | mark_states (void *cls, | ||
1840 | const unsigned int count, | ||
1841 | struct REGEX_INTERNAL_State *s) | ||
1842 | { | ||
1843 | s->marked = GNUNET_YES; | ||
1844 | } | ||
1845 | |||
1846 | |||
1847 | /** | ||
1848 | * Remove all unreachable states from DFA 'a'. Unreachable states are those | ||
1849 | * states that are not reachable from the starting state. | ||
1850 | * | ||
1851 | * @param a DFA automaton | ||
1852 | */ | ||
1853 | static void | ||
1854 | dfa_remove_unreachable_states (struct REGEX_INTERNAL_Automaton *a) | ||
1855 | { | ||
1856 | struct REGEX_INTERNAL_State *s; | ||
1857 | struct REGEX_INTERNAL_State *s_next; | ||
1858 | |||
1859 | /* 1. unmark all states */ | ||
1860 | for (s = a->states_head; NULL != s; s = s->next) | ||
1861 | s->marked = GNUNET_NO; | ||
1862 | |||
1863 | /* 2. traverse dfa from start state and mark all visited states */ | ||
1864 | REGEX_INTERNAL_automaton_traverse (a, | ||
1865 | a->start, | ||
1866 | NULL, | ||
1867 | NULL, | ||
1868 | &mark_states, | ||
1869 | NULL); | ||
1870 | |||
1871 | /* 3. delete all states that were not visited */ | ||
1872 | for (s = a->states_head; NULL != s; s = s_next) | ||
1873 | { | ||
1874 | s_next = s->next; | ||
1875 | if (GNUNET_NO == s->marked) | ||
1876 | automaton_remove_state (a, s); | ||
1877 | } | ||
1878 | } | ||
1879 | |||
1880 | |||
1881 | /** | ||
1882 | * Remove all dead states from the DFA 'a'. Dead states are those states that do | ||
1883 | * not transition to any other state but themselves. | ||
1884 | * | ||
1885 | * @param a DFA automaton | ||
1886 | */ | ||
1887 | static void | ||
1888 | dfa_remove_dead_states (struct REGEX_INTERNAL_Automaton *a) | ||
1889 | { | ||
1890 | struct REGEX_INTERNAL_State *s; | ||
1891 | struct REGEX_INTERNAL_State *s_next; | ||
1892 | struct REGEX_INTERNAL_Transition *t; | ||
1893 | int dead; | ||
1894 | |||
1895 | GNUNET_assert (DFA == a->type); | ||
1896 | |||
1897 | for (s = a->states_head; NULL != s; s = s_next) | ||
1898 | { | ||
1899 | s_next = s->next; | ||
1900 | |||
1901 | if (s->accepting) | ||
1902 | continue; | ||
1903 | |||
1904 | dead = 1; | ||
1905 | for (t = s->transitions_head; NULL != t; t = t->next) | ||
1906 | { | ||
1907 | if ((NULL != t->to_state) && (t->to_state != s) ) | ||
1908 | { | ||
1909 | dead = 0; | ||
1910 | break; | ||
1911 | } | ||
1912 | } | ||
1913 | |||
1914 | if (0 == dead) | ||
1915 | continue; | ||
1916 | |||
1917 | /* state s is dead, remove it */ | ||
1918 | automaton_remove_state (a, s); | ||
1919 | } | ||
1920 | } | ||
1921 | |||
1922 | |||
1923 | /** | ||
1924 | * Merge all non distinguishable states in the DFA 'a' | ||
1925 | * | ||
1926 | * @param ctx context | ||
1927 | * @param a DFA automaton | ||
1928 | * @return #GNUNET_OK on success | ||
1929 | */ | ||
1930 | static int | ||
1931 | dfa_merge_nondistinguishable_states (struct REGEX_INTERNAL_Context *ctx, | ||
1932 | struct REGEX_INTERNAL_Automaton *a) | ||
1933 | { | ||
1934 | uint32_t *table; | ||
1935 | struct REGEX_INTERNAL_State *s1; | ||
1936 | struct REGEX_INTERNAL_State *s2; | ||
1937 | struct REGEX_INTERNAL_Transition *t1; | ||
1938 | struct REGEX_INTERNAL_Transition *t2; | ||
1939 | struct REGEX_INTERNAL_State *s1_next; | ||
1940 | struct REGEX_INTERNAL_State *s2_next; | ||
1941 | int change; | ||
1942 | unsigned int num_equal_edges; | ||
1943 | unsigned int i; | ||
1944 | unsigned int state_cnt; | ||
1945 | unsigned long long idx; | ||
1946 | unsigned long long idx1; | ||
1947 | |||
1948 | if ((NULL == a) || (0 == a->state_count)) | ||
1949 | { | ||
1950 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
1951 | "Could not merge nondistinguishable states, automaton was NULL.\n"); | ||
1952 | return GNUNET_SYSERR; | ||
1953 | } | ||
1954 | |||
1955 | state_cnt = a->state_count; | ||
1956 | table = GNUNET_malloc_large ( | ||
1957 | (sizeof(uint32_t) * state_cnt * state_cnt / 32) + sizeof(uint32_t)); | ||
1958 | if (NULL == table) | ||
1959 | { | ||
1960 | GNUNET_log_strerror (GNUNET_ERROR_TYPE_ERROR, "malloc"); | ||
1961 | return GNUNET_SYSERR; | ||
1962 | } | ||
1963 | |||
1964 | for (i = 0, s1 = a->states_head; NULL != s1; s1 = s1->next) | ||
1965 | s1->marked = i++; | ||
1966 | |||
1967 | /* Mark all pairs of accepting/!accepting states */ | ||
1968 | for (s1 = a->states_head; NULL != s1; s1 = s1->next) | ||
1969 | for (s2 = a->states_head; NULL != s2; s2 = s2->next) | ||
1970 | if ((s1->accepting && ! s2->accepting) || | ||
1971 | (! s1->accepting && s2->accepting)) | ||
1972 | { | ||
1973 | idx = (unsigned long long) s1->marked * state_cnt + s2->marked; | ||
1974 | table[idx / 32] |= (1U << (idx % 32)); | ||
1975 | } | ||
1976 | |||
1977 | /* Find all equal states */ | ||
1978 | change = 1; | ||
1979 | while (0 != change) | ||
1980 | { | ||
1981 | change = 0; | ||
1982 | for (s1 = a->states_head; NULL != s1; s1 = s1->next) | ||
1983 | { | ||
1984 | for (s2 = a->states_head; NULL != s2 && s1 != s2; s2 = s2->next) | ||
1985 | { | ||
1986 | idx = (unsigned long long) s1->marked * state_cnt + s2->marked; | ||
1987 | if (0 != (table[idx / 32] & (1U << (idx % 32)))) | ||
1988 | continue; | ||
1989 | num_equal_edges = 0; | ||
1990 | for (t1 = s1->transitions_head; NULL != t1; t1 = t1->next) | ||
1991 | { | ||
1992 | for (t2 = s2->transitions_head; NULL != t2; t2 = t2->next) | ||
1993 | { | ||
1994 | if (0 == strcmp (t1->label, t2->label)) | ||
1995 | { | ||
1996 | num_equal_edges++; | ||
1997 | /* same edge, but targets definitively different, so we're different | ||
1998 | as well */ | ||
1999 | if (t1->to_state->marked > t2->to_state->marked) | ||
2000 | idx1 = (unsigned long long) t1->to_state->marked * state_cnt | ||
2001 | + t2->to_state->marked; | ||
2002 | else | ||
2003 | idx1 = (unsigned long long) t2->to_state->marked * state_cnt | ||
2004 | + t1->to_state->marked; | ||
2005 | if (0 != (table[idx1 / 32] & (1U << (idx1 % 32)))) | ||
2006 | { | ||
2007 | table[idx / 32] |= (1U << (idx % 32)); | ||
2008 | change = 1; /* changed a marker, need to run again */ | ||
2009 | } | ||
2010 | } | ||
2011 | } | ||
2012 | } | ||
2013 | if ((num_equal_edges != s1->transition_count) || | ||
2014 | (num_equal_edges != s2->transition_count)) | ||
2015 | { | ||
2016 | /* Make sure ALL edges of possible equal states are the same */ | ||
2017 | table[idx / 32] |= (1U << (idx % 32)); | ||
2018 | change = 1; /* changed a marker, need to run again */ | ||
2019 | } | ||
2020 | } | ||
2021 | } | ||
2022 | } | ||
2023 | |||
2024 | /* Merge states that are equal */ | ||
2025 | for (s1 = a->states_head; NULL != s1; s1 = s1_next) | ||
2026 | { | ||
2027 | s1_next = s1->next; | ||
2028 | for (s2 = a->states_head; NULL != s2 && s1 != s2; s2 = s2_next) | ||
2029 | { | ||
2030 | s2_next = s2->next; | ||
2031 | idx = (unsigned long long) s1->marked * state_cnt + s2->marked; | ||
2032 | if (0 == (table[idx / 32] & (1U << (idx % 32)))) | ||
2033 | automaton_merge_states (ctx, a, s1, s2); | ||
2034 | } | ||
2035 | } | ||
2036 | |||
2037 | GNUNET_free (table); | ||
2038 | return GNUNET_OK; | ||
2039 | } | ||
2040 | |||
2041 | |||
2042 | /** | ||
2043 | * Minimize the given DFA 'a' by removing all unreachable states, removing all | ||
2044 | * dead states and merging all non distinguishable states | ||
2045 | * | ||
2046 | * @param ctx context | ||
2047 | * @param a DFA automaton | ||
2048 | * @return GNUNET_OK on success | ||
2049 | */ | ||
2050 | static int | ||
2051 | dfa_minimize (struct REGEX_INTERNAL_Context *ctx, | ||
2052 | struct REGEX_INTERNAL_Automaton *a) | ||
2053 | { | ||
2054 | if (NULL == a) | ||
2055 | return GNUNET_SYSERR; | ||
2056 | |||
2057 | GNUNET_assert (DFA == a->type); | ||
2058 | |||
2059 | /* 1. remove unreachable states */ | ||
2060 | dfa_remove_unreachable_states (a); | ||
2061 | |||
2062 | /* 2. remove dead states */ | ||
2063 | dfa_remove_dead_states (a); | ||
2064 | |||
2065 | /* 3. Merge nondistinguishable states */ | ||
2066 | if (GNUNET_OK != dfa_merge_nondistinguishable_states (ctx, a)) | ||
2067 | return GNUNET_SYSERR; | ||
2068 | return GNUNET_OK; | ||
2069 | } | ||
2070 | |||
2071 | |||
2072 | /** | ||
2073 | * Context for adding strided transitions to a DFA. | ||
2074 | */ | ||
2075 | struct REGEX_INTERNAL_Strided_Context | ||
2076 | { | ||
2077 | /** | ||
2078 | * Length of the strides. | ||
2079 | */ | ||
2080 | const unsigned int stride; | ||
2081 | |||
2082 | /** | ||
2083 | * Strided transitions DLL. New strided transitions will be stored in this DLL | ||
2084 | * and afterwards added to the DFA. | ||
2085 | */ | ||
2086 | struct REGEX_INTERNAL_Transition *transitions_head; | ||
2087 | |||
2088 | /** | ||
2089 | * Strided transitions DLL. | ||
2090 | */ | ||
2091 | struct REGEX_INTERNAL_Transition *transitions_tail; | ||
2092 | }; | ||
2093 | |||
2094 | |||
2095 | /** | ||
2096 | * Recursive helper function to add strides to a DFA. | ||
2097 | * | ||
2098 | * @param cls context, contains stride length and strided transitions DLL. | ||
2099 | * @param depth current depth of the depth-first traversal of the graph. | ||
2100 | * @param label current label, string that contains all labels on the path from | ||
2101 | * 'start' to 's'. | ||
2102 | * @param start start state for the depth-first traversal of the graph. | ||
2103 | * @param s current state in the depth-first traversal | ||
2104 | */ | ||
2105 | static void | ||
2106 | dfa_add_multi_strides_helper (void *cls, | ||
2107 | const unsigned int depth, | ||
2108 | char *label, | ||
2109 | struct REGEX_INTERNAL_State *start, | ||
2110 | struct REGEX_INTERNAL_State *s) | ||
2111 | { | ||
2112 | struct REGEX_INTERNAL_Strided_Context *ctx = cls; | ||
2113 | struct REGEX_INTERNAL_Transition *t; | ||
2114 | char *new_label; | ||
2115 | |||
2116 | if (depth == ctx->stride) | ||
2117 | { | ||
2118 | t = GNUNET_new (struct REGEX_INTERNAL_Transition); | ||
2119 | t->label = GNUNET_strdup (label); | ||
2120 | t->to_state = s; | ||
2121 | t->from_state = start; | ||
2122 | GNUNET_CONTAINER_DLL_insert (ctx->transitions_head, | ||
2123 | ctx->transitions_tail, | ||
2124 | t); | ||
2125 | } | ||
2126 | else | ||
2127 | { | ||
2128 | for (t = s->transitions_head; NULL != t; t = t->next) | ||
2129 | { | ||
2130 | /* Do not consider self-loops, because it end's up in too many | ||
2131 | * transitions */ | ||
2132 | if (t->to_state == t->from_state) | ||
2133 | continue; | ||
2134 | |||
2135 | if (NULL != label) | ||
2136 | { | ||
2137 | GNUNET_asprintf (&new_label, "%s%s", label, t->label); | ||
2138 | } | ||
2139 | else | ||
2140 | new_label = GNUNET_strdup (t->label); | ||
2141 | |||
2142 | dfa_add_multi_strides_helper (cls, | ||
2143 | (depth + 1), | ||
2144 | new_label, | ||
2145 | start, | ||
2146 | t->to_state); | ||
2147 | } | ||
2148 | } | ||
2149 | GNUNET_free (label); | ||
2150 | } | ||
2151 | |||
2152 | |||
2153 | /** | ||
2154 | * Function called for each state in the DFA. Starts a traversal of depth set in | ||
2155 | * context starting from state 's'. | ||
2156 | * | ||
2157 | * @param cls context. | ||
2158 | * @param count not used. | ||
2159 | * @param s current state. | ||
2160 | */ | ||
2161 | static void | ||
2162 | dfa_add_multi_strides (void *cls, | ||
2163 | const unsigned int count, | ||
2164 | struct REGEX_INTERNAL_State *s) | ||
2165 | { | ||
2166 | dfa_add_multi_strides_helper (cls, 0, NULL, s, s); | ||
2167 | } | ||
2168 | |||
2169 | |||
2170 | /** | ||
2171 | * Adds multi-strided transitions to the given 'dfa'. | ||
2172 | * | ||
2173 | * @param regex_ctx regex context needed to add transitions to the automaton. | ||
2174 | * @param dfa DFA to which the multi strided transitions should be added. | ||
2175 | * @param stride_len length of the strides. | ||
2176 | */ | ||
2177 | void | ||
2178 | REGEX_INTERNAL_dfa_add_multi_strides (struct REGEX_INTERNAL_Context *regex_ctx, | ||
2179 | struct REGEX_INTERNAL_Automaton *dfa, | ||
2180 | const unsigned int stride_len) | ||
2181 | { | ||
2182 | struct REGEX_INTERNAL_Strided_Context ctx = { stride_len, NULL, NULL }; | ||
2183 | struct REGEX_INTERNAL_Transition *t; | ||
2184 | struct REGEX_INTERNAL_Transition *t_next; | ||
2185 | |||
2186 | if ((1 > stride_len) || (GNUNET_YES == dfa->is_multistrided)) | ||
2187 | return; | ||
2188 | |||
2189 | /* Compute the new transitions of given stride_len */ | ||
2190 | REGEX_INTERNAL_automaton_traverse (dfa, | ||
2191 | dfa->start, | ||
2192 | NULL, | ||
2193 | NULL, | ||
2194 | &dfa_add_multi_strides, | ||
2195 | &ctx); | ||
2196 | |||
2197 | /* Add all the new transitions to the automaton. */ | ||
2198 | for (t = ctx.transitions_head; NULL != t; t = t_next) | ||
2199 | { | ||
2200 | t_next = t->next; | ||
2201 | state_add_transition (regex_ctx, t->from_state, t->label, t->to_state); | ||
2202 | GNUNET_CONTAINER_DLL_remove (ctx.transitions_head, ctx.transitions_tail, t); | ||
2203 | GNUNET_free (t->label); | ||
2204 | GNUNET_free (t); | ||
2205 | } | ||
2206 | |||
2207 | /* Mark this automaton as multistrided */ | ||
2208 | dfa->is_multistrided = GNUNET_YES; | ||
2209 | } | ||
2210 | |||
2211 | |||
2212 | /** | ||
2213 | * Recursive Helper function for DFA path compression. Does DFS on the DFA graph | ||
2214 | * and adds new transitions to the given transitions DLL and marks states that | ||
2215 | * should be removed by setting state->contained to GNUNET_YES. | ||
2216 | * | ||
2217 | * @param dfa DFA for which the paths should be compressed. | ||
2218 | * @param start starting state for linear path search. | ||
2219 | * @param cur current state in the recursive DFS. | ||
2220 | * @param label current label (string of traversed labels). | ||
2221 | * @param max_len maximal path compression length. | ||
2222 | * @param transitions_head transitions DLL. | ||
2223 | * @param transitions_tail transitions DLL. | ||
2224 | */ | ||
2225 | void | ||
2226 | dfa_compress_paths_helper (struct REGEX_INTERNAL_Automaton *dfa, | ||
2227 | struct REGEX_INTERNAL_State *start, | ||
2228 | struct REGEX_INTERNAL_State *cur, | ||
2229 | char *label, | ||
2230 | unsigned int max_len, | ||
2231 | struct REGEX_INTERNAL_Transition **transitions_head, | ||
2232 | struct REGEX_INTERNAL_Transition **transitions_tail) | ||
2233 | { | ||
2234 | struct REGEX_INTERNAL_Transition *t; | ||
2235 | char *new_label; | ||
2236 | |||
2237 | |||
2238 | if ((NULL != label) && | ||
2239 | (((cur->incoming_transition_count > 1) || (GNUNET_YES == | ||
2240 | cur->accepting) || | ||
2241 | (GNUNET_YES == cur->marked) ) || | ||
2242 | ((start != dfa->start) && (max_len > 0) && (max_len == strlen ( | ||
2243 | label))) || | ||
2244 | ((start == dfa->start) && (GNUNET_REGEX_INITIAL_BYTES == strlen ( | ||
2245 | label))))) | ||
2246 | { | ||
2247 | t = GNUNET_new (struct REGEX_INTERNAL_Transition); | ||
2248 | t->label = GNUNET_strdup (label); | ||
2249 | t->to_state = cur; | ||
2250 | t->from_state = start; | ||
2251 | GNUNET_CONTAINER_DLL_insert (*transitions_head, *transitions_tail, t); | ||
2252 | |||
2253 | if (GNUNET_NO == cur->marked) | ||
2254 | { | ||
2255 | dfa_compress_paths_helper (dfa, | ||
2256 | cur, | ||
2257 | cur, | ||
2258 | NULL, | ||
2259 | max_len, | ||
2260 | transitions_head, | ||
2261 | transitions_tail); | ||
2262 | } | ||
2263 | return; | ||
2264 | } | ||
2265 | else if (cur != start) | ||
2266 | cur->contained = GNUNET_YES; | ||
2267 | |||
2268 | if ((GNUNET_YES == cur->marked) && (cur != start)) | ||
2269 | return; | ||
2270 | |||
2271 | cur->marked = GNUNET_YES; | ||
2272 | |||
2273 | |||
2274 | for (t = cur->transitions_head; NULL != t; t = t->next) | ||
2275 | { | ||
2276 | if (NULL != label) | ||
2277 | GNUNET_asprintf (&new_label, "%s%s", label, t->label); | ||
2278 | else | ||
2279 | new_label = GNUNET_strdup (t->label); | ||
2280 | |||
2281 | if (t->to_state != cur) | ||
2282 | { | ||
2283 | dfa_compress_paths_helper (dfa, | ||
2284 | start, | ||
2285 | t->to_state, | ||
2286 | new_label, | ||
2287 | max_len, | ||
2288 | transitions_head, | ||
2289 | transitions_tail); | ||
2290 | } | ||
2291 | GNUNET_free (new_label); | ||
2292 | } | ||
2293 | } | ||
2294 | |||
2295 | |||
2296 | /** | ||
2297 | * Compress paths in the given 'dfa'. Linear paths like 0->1->2->3 will be | ||
2298 | * compressed to 0->3 by combining transitions. | ||
2299 | * | ||
2300 | * @param regex_ctx context for adding new transitions. | ||
2301 | * @param dfa DFA representation, will directly modify the given DFA. | ||
2302 | * @param max_len maximal length of the compressed paths. | ||
2303 | */ | ||
2304 | static void | ||
2305 | dfa_compress_paths (struct REGEX_INTERNAL_Context *regex_ctx, | ||
2306 | struct REGEX_INTERNAL_Automaton *dfa, | ||
2307 | unsigned int max_len) | ||
2308 | { | ||
2309 | struct REGEX_INTERNAL_State *s; | ||
2310 | struct REGEX_INTERNAL_State *s_next; | ||
2311 | struct REGEX_INTERNAL_Transition *t; | ||
2312 | struct REGEX_INTERNAL_Transition *t_next; | ||
2313 | struct REGEX_INTERNAL_Transition *transitions_head = NULL; | ||
2314 | struct REGEX_INTERNAL_Transition *transitions_tail = NULL; | ||
2315 | |||
2316 | if (NULL == dfa) | ||
2317 | return; | ||
2318 | |||
2319 | /* Count the incoming transitions on each state. */ | ||
2320 | for (s = dfa->states_head; NULL != s; s = s->next) | ||
2321 | { | ||
2322 | for (t = s->transitions_head; NULL != t; t = t->next) | ||
2323 | { | ||
2324 | if (NULL != t->to_state) | ||
2325 | t->to_state->incoming_transition_count++; | ||
2326 | } | ||
2327 | } | ||
2328 | |||
2329 | /* Unmark all states. */ | ||
2330 | for (s = dfa->states_head; NULL != s; s = s->next) | ||
2331 | { | ||
2332 | s->marked = GNUNET_NO; | ||
2333 | s->contained = GNUNET_NO; | ||
2334 | } | ||
2335 | |||
2336 | /* Add strides and mark states that can be deleted. */ | ||
2337 | dfa_compress_paths_helper (dfa, | ||
2338 | dfa->start, | ||
2339 | dfa->start, | ||
2340 | NULL, | ||
2341 | max_len, | ||
2342 | &transitions_head, | ||
2343 | &transitions_tail); | ||
2344 | |||
2345 | /* Add all the new transitions to the automaton. */ | ||
2346 | for (t = transitions_head; NULL != t; t = t_next) | ||
2347 | { | ||
2348 | t_next = t->next; | ||
2349 | state_add_transition (regex_ctx, t->from_state, t->label, t->to_state); | ||
2350 | GNUNET_CONTAINER_DLL_remove (transitions_head, transitions_tail, t); | ||
2351 | GNUNET_free (t->label); | ||
2352 | GNUNET_free (t); | ||
2353 | } | ||
2354 | |||
2355 | /* Remove marked states (including their incoming and outgoing transitions). */ | ||
2356 | for (s = dfa->states_head; NULL != s; s = s_next) | ||
2357 | { | ||
2358 | s_next = s->next; | ||
2359 | if (GNUNET_YES == s->contained) | ||
2360 | automaton_remove_state (dfa, s); | ||
2361 | } | ||
2362 | } | ||
2363 | |||
2364 | |||
2365 | /** | ||
2366 | * Creates a new NFA fragment. Needs to be cleared using | ||
2367 | * automaton_fragment_clear. | ||
2368 | * | ||
2369 | * @param start starting state | ||
2370 | * @param end end state | ||
2371 | * | ||
2372 | * @return new NFA fragment | ||
2373 | */ | ||
2374 | static struct REGEX_INTERNAL_Automaton * | ||
2375 | nfa_fragment_create (struct REGEX_INTERNAL_State *start, | ||
2376 | struct REGEX_INTERNAL_State *end) | ||
2377 | { | ||
2378 | struct REGEX_INTERNAL_Automaton *n; | ||
2379 | |||
2380 | n = GNUNET_new (struct REGEX_INTERNAL_Automaton); | ||
2381 | |||
2382 | n->type = NFA; | ||
2383 | n->start = NULL; | ||
2384 | n->end = NULL; | ||
2385 | n->state_count = 0; | ||
2386 | |||
2387 | if ((NULL == start) || (NULL == end)) | ||
2388 | return n; | ||
2389 | |||
2390 | automaton_add_state (n, end); | ||
2391 | automaton_add_state (n, start); | ||
2392 | |||
2393 | n->state_count = 2; | ||
2394 | |||
2395 | n->start = start; | ||
2396 | n->end = end; | ||
2397 | |||
2398 | return n; | ||
2399 | } | ||
2400 | |||
2401 | |||
2402 | /** | ||
2403 | * Adds a list of states to the given automaton 'n'. | ||
2404 | * | ||
2405 | * @param n automaton to which the states should be added | ||
2406 | * @param states_head head of the DLL of states | ||
2407 | * @param states_tail tail of the DLL of states | ||
2408 | */ | ||
2409 | static void | ||
2410 | nfa_add_states (struct REGEX_INTERNAL_Automaton *n, | ||
2411 | struct REGEX_INTERNAL_State *states_head, | ||
2412 | struct REGEX_INTERNAL_State *states_tail) | ||
2413 | { | ||
2414 | struct REGEX_INTERNAL_State *s; | ||
2415 | |||
2416 | if ((NULL == n) || (NULL == states_head)) | ||
2417 | { | ||
2418 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not add states\n"); | ||
2419 | return; | ||
2420 | } | ||
2421 | |||
2422 | if (NULL == n->states_head) | ||
2423 | { | ||
2424 | n->states_head = states_head; | ||
2425 | n->states_tail = states_tail; | ||
2426 | return; | ||
2427 | } | ||
2428 | |||
2429 | if (NULL != states_head) | ||
2430 | { | ||
2431 | n->states_tail->next = states_head; | ||
2432 | n->states_tail = states_tail; | ||
2433 | } | ||
2434 | |||
2435 | for (s = states_head; NULL != s; s = s->next) | ||
2436 | n->state_count++; | ||
2437 | } | ||
2438 | |||
2439 | |||
2440 | /** | ||
2441 | * Creates a new NFA state. Needs to be freed using automaton_destroy_state. | ||
2442 | * | ||
2443 | * @param ctx context | ||
2444 | * @param accepting is it an accepting state or not | ||
2445 | * | ||
2446 | * @return new NFA state | ||
2447 | */ | ||
2448 | static struct REGEX_INTERNAL_State * | ||
2449 | nfa_state_create (struct REGEX_INTERNAL_Context *ctx, int accepting) | ||
2450 | { | ||
2451 | struct REGEX_INTERNAL_State *s; | ||
2452 | |||
2453 | s = GNUNET_new (struct REGEX_INTERNAL_State); | ||
2454 | s->id = ctx->state_id++; | ||
2455 | s->accepting = accepting; | ||
2456 | s->marked = GNUNET_NO; | ||
2457 | s->contained = 0; | ||
2458 | s->index = -1; | ||
2459 | s->lowlink = -1; | ||
2460 | s->scc_id = 0; | ||
2461 | s->name = NULL; | ||
2462 | GNUNET_asprintf (&s->name, "s%i", s->id); | ||
2463 | |||
2464 | return s; | ||
2465 | } | ||
2466 | |||
2467 | |||
2468 | /** | ||
2469 | * Calculates the closure set for the given set of states. | ||
2470 | * | ||
2471 | * @param ret set to sorted nfa closure on 'label' (epsilon closure if 'label' is NULL) | ||
2472 | * @param nfa the NFA containing 's' | ||
2473 | * @param states list of states on which to base the closure on | ||
2474 | * @param label transitioning label for which to base the closure on, | ||
2475 | * pass NULL for epsilon transition | ||
2476 | */ | ||
2477 | static void | ||
2478 | nfa_closure_set_create (struct REGEX_INTERNAL_StateSet *ret, | ||
2479 | struct REGEX_INTERNAL_Automaton *nfa, | ||
2480 | struct REGEX_INTERNAL_StateSet *states, | ||
2481 | const char *label) | ||
2482 | { | ||
2483 | struct REGEX_INTERNAL_State *s; | ||
2484 | unsigned int i; | ||
2485 | struct REGEX_INTERNAL_StateSet_MDLL cls_stack; | ||
2486 | struct REGEX_INTERNAL_State *clsstate; | ||
2487 | struct REGEX_INTERNAL_State *currentstate; | ||
2488 | struct REGEX_INTERNAL_Transition *ctran; | ||
2489 | |||
2490 | memset (ret, 0, sizeof(struct REGEX_INTERNAL_StateSet)); | ||
2491 | if (NULL == states) | ||
2492 | return; | ||
2493 | |||
2494 | for (i = 0; i < states->off; i++) | ||
2495 | { | ||
2496 | s = states->states[i]; | ||
2497 | |||
2498 | /* Add start state to closure only for epsilon closure */ | ||
2499 | if (NULL == label) | ||
2500 | state_set_append (ret, s); | ||
2501 | |||
2502 | /* initialize work stack */ | ||
2503 | cls_stack.head = NULL; | ||
2504 | cls_stack.tail = NULL; | ||
2505 | GNUNET_CONTAINER_MDLL_insert (ST, cls_stack.head, cls_stack.tail, s); | ||
2506 | cls_stack.len = 1; | ||
2507 | |||
2508 | while (NULL != (currentstate = cls_stack.tail)) | ||
2509 | { | ||
2510 | GNUNET_CONTAINER_MDLL_remove (ST, | ||
2511 | cls_stack.head, | ||
2512 | cls_stack.tail, | ||
2513 | currentstate); | ||
2514 | cls_stack.len--; | ||
2515 | for (ctran = currentstate->transitions_head; NULL != ctran; | ||
2516 | ctran = ctran->next) | ||
2517 | { | ||
2518 | if (NULL == (clsstate = ctran->to_state)) | ||
2519 | continue; | ||
2520 | if (0 != clsstate->contained) | ||
2521 | continue; | ||
2522 | if (0 != nullstrcmp (label, ctran->label)) | ||
2523 | continue; | ||
2524 | state_set_append (ret, clsstate); | ||
2525 | GNUNET_CONTAINER_MDLL_insert_tail (ST, | ||
2526 | cls_stack.head, | ||
2527 | cls_stack.tail, | ||
2528 | clsstate); | ||
2529 | cls_stack.len++; | ||
2530 | clsstate->contained = 1; | ||
2531 | } | ||
2532 | } | ||
2533 | } | ||
2534 | for (i = 0; i < ret->off; i++) | ||
2535 | ret->states[i]->contained = 0; | ||
2536 | |||
2537 | if (ret->off > 1) | ||
2538 | qsort (ret->states, | ||
2539 | ret->off, | ||
2540 | sizeof(struct REGEX_INTERNAL_State *), | ||
2541 | &state_compare); | ||
2542 | } | ||
2543 | |||
2544 | |||
2545 | /** | ||
2546 | * Pops two NFA fragments (a, b) from the stack and concatenates them (ab) | ||
2547 | * | ||
2548 | * @param ctx context | ||
2549 | */ | ||
2550 | static void | ||
2551 | nfa_add_concatenation (struct REGEX_INTERNAL_Context *ctx) | ||
2552 | { | ||
2553 | struct REGEX_INTERNAL_Automaton *a; | ||
2554 | struct REGEX_INTERNAL_Automaton *b; | ||
2555 | struct REGEX_INTERNAL_Automaton *new_nfa; | ||
2556 | |||
2557 | b = ctx->stack_tail; | ||
2558 | GNUNET_assert (NULL != b); | ||
2559 | GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, b); | ||
2560 | a = ctx->stack_tail; | ||
2561 | GNUNET_assert (NULL != a); | ||
2562 | GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a); | ||
2563 | |||
2564 | state_add_transition (ctx, a->end, NULL, b->start); | ||
2565 | a->end->accepting = 0; | ||
2566 | b->end->accepting = 1; | ||
2567 | |||
2568 | new_nfa = nfa_fragment_create (NULL, NULL); | ||
2569 | nfa_add_states (new_nfa, a->states_head, a->states_tail); | ||
2570 | nfa_add_states (new_nfa, b->states_head, b->states_tail); | ||
2571 | new_nfa->start = a->start; | ||
2572 | new_nfa->end = b->end; | ||
2573 | new_nfa->state_count += a->state_count + b->state_count; | ||
2574 | automaton_fragment_clear (a); | ||
2575 | automaton_fragment_clear (b); | ||
2576 | |||
2577 | GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new_nfa); | ||
2578 | } | ||
2579 | |||
2580 | |||
2581 | /** | ||
2582 | * Pops a NFA fragment from the stack (a) and adds a new fragment (a*) | ||
2583 | * | ||
2584 | * @param ctx context | ||
2585 | */ | ||
2586 | static void | ||
2587 | nfa_add_star_op (struct REGEX_INTERNAL_Context *ctx) | ||
2588 | { | ||
2589 | struct REGEX_INTERNAL_Automaton *a; | ||
2590 | struct REGEX_INTERNAL_Automaton *new_nfa; | ||
2591 | struct REGEX_INTERNAL_State *start; | ||
2592 | struct REGEX_INTERNAL_State *end; | ||
2593 | |||
2594 | a = ctx->stack_tail; | ||
2595 | |||
2596 | if (NULL == a) | ||
2597 | { | ||
2598 | GNUNET_log ( | ||
2599 | GNUNET_ERROR_TYPE_ERROR, | ||
2600 | "nfa_add_star_op failed, because there was no element on the stack"); | ||
2601 | return; | ||
2602 | } | ||
2603 | |||
2604 | GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a); | ||
2605 | |||
2606 | start = nfa_state_create (ctx, 0); | ||
2607 | end = nfa_state_create (ctx, 1); | ||
2608 | |||
2609 | state_add_transition (ctx, start, NULL, a->start); | ||
2610 | state_add_transition (ctx, start, NULL, end); | ||
2611 | state_add_transition (ctx, a->end, NULL, a->start); | ||
2612 | state_add_transition (ctx, a->end, NULL, end); | ||
2613 | |||
2614 | a->end->accepting = 0; | ||
2615 | end->accepting = 1; | ||
2616 | |||
2617 | new_nfa = nfa_fragment_create (start, end); | ||
2618 | nfa_add_states (new_nfa, a->states_head, a->states_tail); | ||
2619 | automaton_fragment_clear (a); | ||
2620 | |||
2621 | GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new_nfa); | ||
2622 | } | ||
2623 | |||
2624 | |||
2625 | /** | ||
2626 | * Pops an NFA fragment (a) from the stack and adds a new fragment (a+) | ||
2627 | * | ||
2628 | * @param ctx context | ||
2629 | */ | ||
2630 | static void | ||
2631 | nfa_add_plus_op (struct REGEX_INTERNAL_Context *ctx) | ||
2632 | { | ||
2633 | struct REGEX_INTERNAL_Automaton *a; | ||
2634 | |||
2635 | a = ctx->stack_tail; | ||
2636 | |||
2637 | if (NULL == a) | ||
2638 | { | ||
2639 | GNUNET_log ( | ||
2640 | GNUNET_ERROR_TYPE_ERROR, | ||
2641 | "nfa_add_plus_op failed, because there was no element on the stack"); | ||
2642 | return; | ||
2643 | } | ||
2644 | |||
2645 | GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a); | ||
2646 | |||
2647 | state_add_transition (ctx, a->end, NULL, a->start); | ||
2648 | |||
2649 | GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, a); | ||
2650 | } | ||
2651 | |||
2652 | |||
2653 | /** | ||
2654 | * Pops an NFA fragment (a) from the stack and adds a new fragment (a?) | ||
2655 | * | ||
2656 | * @param ctx context | ||
2657 | */ | ||
2658 | static void | ||
2659 | nfa_add_question_op (struct REGEX_INTERNAL_Context *ctx) | ||
2660 | { | ||
2661 | struct REGEX_INTERNAL_Automaton *a; | ||
2662 | struct REGEX_INTERNAL_Automaton *new_nfa; | ||
2663 | struct REGEX_INTERNAL_State *start; | ||
2664 | struct REGEX_INTERNAL_State *end; | ||
2665 | |||
2666 | a = ctx->stack_tail; | ||
2667 | if (NULL == a) | ||
2668 | { | ||
2669 | GNUNET_log ( | ||
2670 | GNUNET_ERROR_TYPE_ERROR, | ||
2671 | "nfa_add_question_op failed, because there was no element on the stack"); | ||
2672 | return; | ||
2673 | } | ||
2674 | |||
2675 | GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a); | ||
2676 | |||
2677 | start = nfa_state_create (ctx, 0); | ||
2678 | end = nfa_state_create (ctx, 1); | ||
2679 | |||
2680 | state_add_transition (ctx, start, NULL, a->start); | ||
2681 | state_add_transition (ctx, start, NULL, end); | ||
2682 | state_add_transition (ctx, a->end, NULL, end); | ||
2683 | |||
2684 | a->end->accepting = 0; | ||
2685 | |||
2686 | new_nfa = nfa_fragment_create (start, end); | ||
2687 | nfa_add_states (new_nfa, a->states_head, a->states_tail); | ||
2688 | GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new_nfa); | ||
2689 | automaton_fragment_clear (a); | ||
2690 | } | ||
2691 | |||
2692 | |||
2693 | /** | ||
2694 | * Pops two NFA fragments (a, b) from the stack and adds a new NFA fragment that | ||
2695 | * alternates between a and b (a|b) | ||
2696 | * | ||
2697 | * @param ctx context | ||
2698 | */ | ||
2699 | static void | ||
2700 | nfa_add_alternation (struct REGEX_INTERNAL_Context *ctx) | ||
2701 | { | ||
2702 | struct REGEX_INTERNAL_Automaton *a; | ||
2703 | struct REGEX_INTERNAL_Automaton *b; | ||
2704 | struct REGEX_INTERNAL_Automaton *new_nfa; | ||
2705 | struct REGEX_INTERNAL_State *start; | ||
2706 | struct REGEX_INTERNAL_State *end; | ||
2707 | |||
2708 | b = ctx->stack_tail; | ||
2709 | GNUNET_assert (NULL != b); | ||
2710 | GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, b); | ||
2711 | a = ctx->stack_tail; | ||
2712 | GNUNET_assert (NULL != a); | ||
2713 | GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a); | ||
2714 | |||
2715 | start = nfa_state_create (ctx, 0); | ||
2716 | end = nfa_state_create (ctx, 1); | ||
2717 | state_add_transition (ctx, start, NULL, a->start); | ||
2718 | state_add_transition (ctx, start, NULL, b->start); | ||
2719 | |||
2720 | state_add_transition (ctx, a->end, NULL, end); | ||
2721 | state_add_transition (ctx, b->end, NULL, end); | ||
2722 | |||
2723 | a->end->accepting = 0; | ||
2724 | b->end->accepting = 0; | ||
2725 | end->accepting = 1; | ||
2726 | |||
2727 | new_nfa = nfa_fragment_create (start, end); | ||
2728 | nfa_add_states (new_nfa, a->states_head, a->states_tail); | ||
2729 | nfa_add_states (new_nfa, b->states_head, b->states_tail); | ||
2730 | automaton_fragment_clear (a); | ||
2731 | automaton_fragment_clear (b); | ||
2732 | |||
2733 | GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new_nfa); | ||
2734 | } | ||
2735 | |||
2736 | |||
2737 | /** | ||
2738 | * Adds a new nfa fragment to the stack | ||
2739 | * | ||
2740 | * @param ctx context | ||
2741 | * @param label label for nfa transition | ||
2742 | */ | ||
2743 | static void | ||
2744 | nfa_add_label (struct REGEX_INTERNAL_Context *ctx, const char *label) | ||
2745 | { | ||
2746 | struct REGEX_INTERNAL_Automaton *n; | ||
2747 | struct REGEX_INTERNAL_State *start; | ||
2748 | struct REGEX_INTERNAL_State *end; | ||
2749 | |||
2750 | GNUNET_assert (NULL != ctx); | ||
2751 | |||
2752 | start = nfa_state_create (ctx, 0); | ||
2753 | end = nfa_state_create (ctx, 1); | ||
2754 | state_add_transition (ctx, start, label, end); | ||
2755 | n = nfa_fragment_create (start, end); | ||
2756 | GNUNET_assert (NULL != n); | ||
2757 | GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, n); | ||
2758 | } | ||
2759 | |||
2760 | |||
2761 | /** | ||
2762 | * Initialize a new context | ||
2763 | * | ||
2764 | * @param ctx context | ||
2765 | */ | ||
2766 | static void | ||
2767 | REGEX_INTERNAL_context_init (struct REGEX_INTERNAL_Context *ctx) | ||
2768 | { | ||
2769 | if (NULL == ctx) | ||
2770 | { | ||
2771 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Context was NULL!"); | ||
2772 | return; | ||
2773 | } | ||
2774 | ctx->state_id = 0; | ||
2775 | ctx->transition_id = 0; | ||
2776 | ctx->stack_head = NULL; | ||
2777 | ctx->stack_tail = NULL; | ||
2778 | } | ||
2779 | |||
2780 | |||
2781 | /** | ||
2782 | * Construct an NFA by parsing the regex string of length 'len'. | ||
2783 | * | ||
2784 | * @param regex regular expression string | ||
2785 | * @param len length of the string | ||
2786 | * | ||
2787 | * @return NFA, needs to be freed using REGEX_INTERNAL_destroy_automaton | ||
2788 | */ | ||
2789 | struct REGEX_INTERNAL_Automaton * | ||
2790 | REGEX_INTERNAL_construct_nfa (const char *regex, const size_t len) | ||
2791 | { | ||
2792 | struct REGEX_INTERNAL_Context ctx; | ||
2793 | struct REGEX_INTERNAL_Automaton *nfa; | ||
2794 | const char *regexp; | ||
2795 | char curlabel[2]; | ||
2796 | char *error_msg; | ||
2797 | unsigned int count; | ||
2798 | unsigned int altcount; | ||
2799 | unsigned int atomcount; | ||
2800 | unsigned int poff; | ||
2801 | unsigned int psize; | ||
2802 | |||
2803 | struct | ||
2804 | { | ||
2805 | int altcount; | ||
2806 | int atomcount; | ||
2807 | } *p; | ||
2808 | |||
2809 | if ((NULL == regex) || (0 == strlen (regex)) || (0 == len)) | ||
2810 | { | ||
2811 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
2812 | "Could not parse regex. Empty regex string provided.\n"); | ||
2813 | |||
2814 | return NULL; | ||
2815 | } | ||
2816 | REGEX_INTERNAL_context_init (&ctx); | ||
2817 | |||
2818 | regexp = regex; | ||
2819 | curlabel[1] = '\0'; | ||
2820 | p = NULL; | ||
2821 | error_msg = NULL; | ||
2822 | altcount = 0; | ||
2823 | atomcount = 0; | ||
2824 | poff = 0; | ||
2825 | psize = 0; | ||
2826 | |||
2827 | for (count = 0; count < len && *regexp; count++, regexp++) | ||
2828 | { | ||
2829 | switch (*regexp) | ||
2830 | { | ||
2831 | case '(': | ||
2832 | if (atomcount > 1) | ||
2833 | { | ||
2834 | --atomcount; | ||
2835 | nfa_add_concatenation (&ctx); | ||
2836 | } | ||
2837 | if (poff == psize) | ||
2838 | GNUNET_array_grow (p, psize, psize * 2 + 4); /* FIXME why *2 +4? */ | ||
2839 | p[poff].altcount = altcount; | ||
2840 | p[poff].atomcount = atomcount; | ||
2841 | poff++; | ||
2842 | altcount = 0; | ||
2843 | atomcount = 0; | ||
2844 | break; | ||
2845 | |||
2846 | case '|': | ||
2847 | if (0 == atomcount) | ||
2848 | { | ||
2849 | error_msg = "Cannot append '|' to nothing"; | ||
2850 | goto error; | ||
2851 | } | ||
2852 | while (--atomcount > 0) | ||
2853 | nfa_add_concatenation (&ctx); | ||
2854 | altcount++; | ||
2855 | break; | ||
2856 | |||
2857 | case ')': | ||
2858 | if (0 == poff) | ||
2859 | { | ||
2860 | error_msg = "Missing opening '('"; | ||
2861 | goto error; | ||
2862 | } | ||
2863 | if (0 == atomcount) | ||
2864 | { | ||
2865 | /* Ignore this: "()" */ | ||
2866 | poff--; | ||
2867 | altcount = p[poff].altcount; | ||
2868 | atomcount = p[poff].atomcount; | ||
2869 | break; | ||
2870 | } | ||
2871 | while (--atomcount > 0) | ||
2872 | nfa_add_concatenation (&ctx); | ||
2873 | for (; altcount > 0; altcount--) | ||
2874 | nfa_add_alternation (&ctx); | ||
2875 | poff--; | ||
2876 | altcount = p[poff].altcount; | ||
2877 | atomcount = p[poff].atomcount; | ||
2878 | atomcount++; | ||
2879 | break; | ||
2880 | |||
2881 | case '*': | ||
2882 | if (atomcount == 0) | ||
2883 | { | ||
2884 | error_msg = "Cannot append '*' to nothing"; | ||
2885 | goto error; | ||
2886 | } | ||
2887 | nfa_add_star_op (&ctx); | ||
2888 | break; | ||
2889 | |||
2890 | case '+': | ||
2891 | if (atomcount == 0) | ||
2892 | { | ||
2893 | error_msg = "Cannot append '+' to nothing"; | ||
2894 | goto error; | ||
2895 | } | ||
2896 | nfa_add_plus_op (&ctx); | ||
2897 | break; | ||
2898 | |||
2899 | case '?': | ||
2900 | if (atomcount == 0) | ||
2901 | { | ||
2902 | error_msg = "Cannot append '?' to nothing"; | ||
2903 | goto error; | ||
2904 | } | ||
2905 | nfa_add_question_op (&ctx); | ||
2906 | break; | ||
2907 | |||
2908 | default: | ||
2909 | if (atomcount > 1) | ||
2910 | { | ||
2911 | --atomcount; | ||
2912 | nfa_add_concatenation (&ctx); | ||
2913 | } | ||
2914 | curlabel[0] = *regexp; | ||
2915 | nfa_add_label (&ctx, curlabel); | ||
2916 | atomcount++; | ||
2917 | break; | ||
2918 | } | ||
2919 | } | ||
2920 | if (0 != poff) | ||
2921 | { | ||
2922 | error_msg = "Unbalanced parenthesis"; | ||
2923 | goto error; | ||
2924 | } | ||
2925 | while (--atomcount > 0) | ||
2926 | nfa_add_concatenation (&ctx); | ||
2927 | for (; altcount > 0; altcount--) | ||
2928 | nfa_add_alternation (&ctx); | ||
2929 | |||
2930 | GNUNET_array_grow (p, psize, 0); | ||
2931 | |||
2932 | nfa = ctx.stack_tail; | ||
2933 | GNUNET_CONTAINER_DLL_remove (ctx.stack_head, ctx.stack_tail, nfa); | ||
2934 | |||
2935 | if (NULL != ctx.stack_head) | ||
2936 | { | ||
2937 | error_msg = "Creating the NFA failed. NFA stack was not empty!"; | ||
2938 | goto error; | ||
2939 | } | ||
2940 | |||
2941 | /* Remember the regex that was used to generate this NFA */ | ||
2942 | nfa->regex = GNUNET_strdup (regex); | ||
2943 | |||
2944 | /* create depth-first numbering of the states for pretty printing */ | ||
2945 | REGEX_INTERNAL_automaton_traverse (nfa, | ||
2946 | NULL, | ||
2947 | NULL, | ||
2948 | NULL, | ||
2949 | &number_states, | ||
2950 | NULL); | ||
2951 | |||
2952 | /* No multistriding added so far */ | ||
2953 | nfa->is_multistrided = GNUNET_NO; | ||
2954 | |||
2955 | return nfa; | ||
2956 | |||
2957 | error: | ||
2958 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not parse regex: `%s'\n", regex); | ||
2959 | if (NULL != error_msg) | ||
2960 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "%s\n", error_msg); | ||
2961 | |||
2962 | GNUNET_free (p); | ||
2963 | |||
2964 | while (NULL != (nfa = ctx.stack_head)) | ||
2965 | { | ||
2966 | GNUNET_CONTAINER_DLL_remove (ctx.stack_head, ctx.stack_tail, nfa); | ||
2967 | REGEX_INTERNAL_automaton_destroy (nfa); | ||
2968 | } | ||
2969 | |||
2970 | return NULL; | ||
2971 | } | ||
2972 | |||
2973 | |||
2974 | /** | ||
2975 | * Create DFA states based on given 'nfa' and starting with 'dfa_state'. | ||
2976 | * | ||
2977 | * @param ctx context. | ||
2978 | * @param nfa NFA automaton. | ||
2979 | * @param dfa DFA automaton. | ||
2980 | * @param dfa_state current dfa state, pass epsilon closure of first nfa state | ||
2981 | * for starting. | ||
2982 | */ | ||
2983 | static void | ||
2984 | construct_dfa_states (struct REGEX_INTERNAL_Context *ctx, | ||
2985 | struct REGEX_INTERNAL_Automaton *nfa, | ||
2986 | struct REGEX_INTERNAL_Automaton *dfa, | ||
2987 | struct REGEX_INTERNAL_State *dfa_state) | ||
2988 | { | ||
2989 | struct REGEX_INTERNAL_Transition *ctran; | ||
2990 | struct REGEX_INTERNAL_State *new_dfa_state; | ||
2991 | struct REGEX_INTERNAL_State *state_contains; | ||
2992 | struct REGEX_INTERNAL_State *state_iter; | ||
2993 | struct REGEX_INTERNAL_StateSet tmp; | ||
2994 | struct REGEX_INTERNAL_StateSet nfa_set; | ||
2995 | |||
2996 | for (ctran = dfa_state->transitions_head; NULL != ctran; ctran = ctran->next) | ||
2997 | { | ||
2998 | if ((NULL == ctran->label) || (NULL != ctran->to_state) ) | ||
2999 | continue; | ||
3000 | |||
3001 | nfa_closure_set_create (&tmp, nfa, &dfa_state->nfa_set, ctran->label); | ||
3002 | nfa_closure_set_create (&nfa_set, nfa, &tmp, NULL); | ||
3003 | state_set_clear (&tmp); | ||
3004 | |||
3005 | state_contains = NULL; | ||
3006 | for (state_iter = dfa->states_head; NULL != state_iter; | ||
3007 | state_iter = state_iter->next) | ||
3008 | { | ||
3009 | if (0 == state_set_compare (&state_iter->nfa_set, &nfa_set)) | ||
3010 | { | ||
3011 | state_contains = state_iter; | ||
3012 | break; | ||
3013 | } | ||
3014 | } | ||
3015 | if (NULL == state_contains) | ||
3016 | { | ||
3017 | new_dfa_state = dfa_state_create (ctx, &nfa_set); | ||
3018 | automaton_add_state (dfa, new_dfa_state); | ||
3019 | ctran->to_state = new_dfa_state; | ||
3020 | construct_dfa_states (ctx, nfa, dfa, new_dfa_state); | ||
3021 | } | ||
3022 | else | ||
3023 | { | ||
3024 | ctran->to_state = state_contains; | ||
3025 | state_set_clear (&nfa_set); | ||
3026 | } | ||
3027 | } | ||
3028 | } | ||
3029 | |||
3030 | |||
3031 | struct REGEX_INTERNAL_Automaton * | ||
3032 | REGEX_INTERNAL_construct_dfa (const char *regex, | ||
3033 | const size_t len, | ||
3034 | unsigned int max_path_len) | ||
3035 | { | ||
3036 | struct REGEX_INTERNAL_Context ctx; | ||
3037 | struct REGEX_INTERNAL_Automaton *dfa; | ||
3038 | struct REGEX_INTERNAL_Automaton *nfa; | ||
3039 | struct REGEX_INTERNAL_StateSet nfa_start_eps_cls; | ||
3040 | struct REGEX_INTERNAL_StateSet singleton_set; | ||
3041 | |||
3042 | REGEX_INTERNAL_context_init (&ctx); | ||
3043 | |||
3044 | /* Create NFA */ | ||
3045 | nfa = REGEX_INTERNAL_construct_nfa (regex, len); | ||
3046 | |||
3047 | if (NULL == nfa) | ||
3048 | { | ||
3049 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
3050 | "Could not create DFA, because NFA creation failed\n"); | ||
3051 | return NULL; | ||
3052 | } | ||
3053 | |||
3054 | dfa = GNUNET_new (struct REGEX_INTERNAL_Automaton); | ||
3055 | dfa->type = DFA; | ||
3056 | dfa->regex = GNUNET_strdup (regex); | ||
3057 | |||
3058 | /* Create DFA start state from epsilon closure */ | ||
3059 | memset (&singleton_set, 0, sizeof(struct REGEX_INTERNAL_StateSet)); | ||
3060 | state_set_append (&singleton_set, nfa->start); | ||
3061 | nfa_closure_set_create (&nfa_start_eps_cls, nfa, &singleton_set, NULL); | ||
3062 | state_set_clear (&singleton_set); | ||
3063 | dfa->start = dfa_state_create (&ctx, &nfa_start_eps_cls); | ||
3064 | automaton_add_state (dfa, dfa->start); | ||
3065 | |||
3066 | construct_dfa_states (&ctx, nfa, dfa, dfa->start); | ||
3067 | REGEX_INTERNAL_automaton_destroy (nfa); | ||
3068 | |||
3069 | /* Minimize DFA */ | ||
3070 | if (GNUNET_OK != dfa_minimize (&ctx, dfa)) | ||
3071 | { | ||
3072 | REGEX_INTERNAL_automaton_destroy (dfa); | ||
3073 | return NULL; | ||
3074 | } | ||
3075 | |||
3076 | /* Create proofs and hashes for all states */ | ||
3077 | if (GNUNET_OK != automaton_create_proofs (dfa)) | ||
3078 | { | ||
3079 | REGEX_INTERNAL_automaton_destroy (dfa); | ||
3080 | return NULL; | ||
3081 | } | ||
3082 | |||
3083 | /* Compress linear DFA paths */ | ||
3084 | if (1 != max_path_len) | ||
3085 | dfa_compress_paths (&ctx, dfa, max_path_len); | ||
3086 | |||
3087 | return dfa; | ||
3088 | } | ||
3089 | |||
3090 | |||
3091 | void | ||
3092 | REGEX_INTERNAL_automaton_destroy (struct REGEX_INTERNAL_Automaton *a) | ||
3093 | { | ||
3094 | struct REGEX_INTERNAL_State *s; | ||
3095 | struct REGEX_INTERNAL_State *next_state; | ||
3096 | |||
3097 | if (NULL == a) | ||
3098 | return; | ||
3099 | |||
3100 | GNUNET_free (a->regex); | ||
3101 | GNUNET_free (a->canonical_regex); | ||
3102 | |||
3103 | for (s = a->states_head; NULL != s; s = next_state) | ||
3104 | { | ||
3105 | next_state = s->next; | ||
3106 | GNUNET_CONTAINER_DLL_remove (a->states_head, a->states_tail, s); | ||
3107 | automaton_destroy_state (s); | ||
3108 | } | ||
3109 | |||
3110 | GNUNET_free (a); | ||
3111 | } | ||
3112 | |||
3113 | |||
3114 | /** | ||
3115 | * Evaluates the given string using the given DFA automaton | ||
3116 | * | ||
3117 | * @param a automaton, type must be DFA | ||
3118 | * @param string string that should be evaluated | ||
3119 | * | ||
3120 | * @return 0 if string matches, non-0 otherwise | ||
3121 | */ | ||
3122 | static int | ||
3123 | evaluate_dfa (struct REGEX_INTERNAL_Automaton *a, const char *string) | ||
3124 | { | ||
3125 | const char *strp; | ||
3126 | struct REGEX_INTERNAL_State *s; | ||
3127 | unsigned int step_len; | ||
3128 | |||
3129 | if (DFA != a->type) | ||
3130 | { | ||
3131 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
3132 | "Tried to evaluate DFA, but NFA automaton given"); | ||
3133 | return -1; | ||
3134 | } | ||
3135 | |||
3136 | s = a->start; | ||
3137 | |||
3138 | /* If the string is empty but the starting state is accepting, we accept. */ | ||
3139 | if (((NULL == string) || (0 == strlen (string))) && s->accepting) | ||
3140 | return 0; | ||
3141 | |||
3142 | for (strp = string; NULL != strp && *strp; strp += step_len) | ||
3143 | { | ||
3144 | step_len = dfa_move (&s, strp); | ||
3145 | |||
3146 | if (NULL == s) | ||
3147 | break; | ||
3148 | } | ||
3149 | |||
3150 | if ((NULL != s) && s->accepting) | ||
3151 | return 0; | ||
3152 | |||
3153 | return 1; | ||
3154 | } | ||
3155 | |||
3156 | |||
3157 | /** | ||
3158 | * Evaluates the given string using the given NFA automaton | ||
3159 | * | ||
3160 | * @param a automaton, type must be NFA | ||
3161 | * @param string string that should be evaluated | ||
3162 | * @return 0 if string matches, non-0 otherwise | ||
3163 | */ | ||
3164 | static int | ||
3165 | evaluate_nfa (struct REGEX_INTERNAL_Automaton *a, const char *string) | ||
3166 | { | ||
3167 | const char *strp; | ||
3168 | char str[2]; | ||
3169 | struct REGEX_INTERNAL_State *s; | ||
3170 | struct REGEX_INTERNAL_StateSet sset; | ||
3171 | struct REGEX_INTERNAL_StateSet new_sset; | ||
3172 | struct REGEX_INTERNAL_StateSet singleton_set; | ||
3173 | unsigned int i; | ||
3174 | int result; | ||
3175 | |||
3176 | if (NFA != a->type) | ||
3177 | { | ||
3178 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
3179 | "Tried to evaluate NFA, but DFA automaton given"); | ||
3180 | return -1; | ||
3181 | } | ||
3182 | |||
3183 | /* If the string is empty but the starting state is accepting, we accept. */ | ||
3184 | if (((NULL == string) || (0 == strlen (string))) && a->start->accepting) | ||
3185 | return 0; | ||
3186 | |||
3187 | result = 1; | ||
3188 | memset (&singleton_set, 0, sizeof(struct REGEX_INTERNAL_StateSet)); | ||
3189 | state_set_append (&singleton_set, a->start); | ||
3190 | nfa_closure_set_create (&sset, a, &singleton_set, NULL); | ||
3191 | state_set_clear (&singleton_set); | ||
3192 | |||
3193 | str[1] = '\0'; | ||
3194 | for (strp = string; NULL != strp && *strp; strp++) | ||
3195 | { | ||
3196 | str[0] = *strp; | ||
3197 | nfa_closure_set_create (&new_sset, a, &sset, str); | ||
3198 | state_set_clear (&sset); | ||
3199 | nfa_closure_set_create (&sset, a, &new_sset, 0); | ||
3200 | state_set_clear (&new_sset); | ||
3201 | } | ||
3202 | |||
3203 | for (i = 0; i < sset.off; i++) | ||
3204 | { | ||
3205 | s = sset.states[i]; | ||
3206 | if ((NULL != s) && (s->accepting)) | ||
3207 | { | ||
3208 | result = 0; | ||
3209 | break; | ||
3210 | } | ||
3211 | } | ||
3212 | |||
3213 | state_set_clear (&sset); | ||
3214 | return result; | ||
3215 | } | ||
3216 | |||
3217 | |||
3218 | int | ||
3219 | REGEX_INTERNAL_eval (struct REGEX_INTERNAL_Automaton *a, const char *string) | ||
3220 | { | ||
3221 | int result; | ||
3222 | |||
3223 | switch (a->type) | ||
3224 | { | ||
3225 | case DFA: | ||
3226 | result = evaluate_dfa (a, string); | ||
3227 | break; | ||
3228 | |||
3229 | case NFA: | ||
3230 | result = evaluate_nfa (a, string); | ||
3231 | break; | ||
3232 | |||
3233 | default: | ||
3234 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
3235 | "Evaluating regex failed, automaton has no type!\n"); | ||
3236 | result = GNUNET_SYSERR; | ||
3237 | break; | ||
3238 | } | ||
3239 | |||
3240 | return result; | ||
3241 | } | ||
3242 | |||
3243 | |||
3244 | const char * | ||
3245 | REGEX_INTERNAL_get_canonical_regex (struct REGEX_INTERNAL_Automaton *a) | ||
3246 | { | ||
3247 | if (NULL == a) | ||
3248 | return NULL; | ||
3249 | |||
3250 | return a->canonical_regex; | ||
3251 | } | ||
3252 | |||
3253 | |||
3254 | /** | ||
3255 | * Get the number of transitions that are contained in the given automaton. | ||
3256 | * | ||
3257 | * @param a automaton for which the number of transitions should be returned. | ||
3258 | * | ||
3259 | * @return number of transitions in the given automaton. | ||
3260 | */ | ||
3261 | unsigned int | ||
3262 | REGEX_INTERNAL_get_transition_count (struct REGEX_INTERNAL_Automaton *a) | ||
3263 | { | ||
3264 | unsigned int t_count; | ||
3265 | struct REGEX_INTERNAL_State *s; | ||
3266 | |||
3267 | if (NULL == a) | ||
3268 | return 0; | ||
3269 | |||
3270 | t_count = 0; | ||
3271 | for (s = a->states_head; NULL != s; s = s->next) | ||
3272 | t_count += s->transition_count; | ||
3273 | |||
3274 | return t_count; | ||
3275 | } | ||
3276 | |||
3277 | |||
3278 | /** | ||
3279 | * Get the first key for the given @a input_string. This hashes the first x bits | ||
3280 | * of the @a input_string. | ||
3281 | * | ||
3282 | * @param input_string string. | ||
3283 | * @param string_len length of the @a input_string. | ||
3284 | * @param key pointer to where to write the hash code. | ||
3285 | * @return number of bits of @a input_string that have been consumed | ||
3286 | * to construct the key | ||
3287 | */ | ||
3288 | size_t | ||
3289 | REGEX_INTERNAL_get_first_key (const char *input_string, | ||
3290 | size_t string_len, | ||
3291 | struct GNUNET_HashCode *key) | ||
3292 | { | ||
3293 | size_t size; | ||
3294 | |||
3295 | size = string_len < GNUNET_REGEX_INITIAL_BYTES ? string_len | ||
3296 | : GNUNET_REGEX_INITIAL_BYTES; | ||
3297 | if (NULL == input_string) | ||
3298 | { | ||
3299 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Given input string was NULL!\n"); | ||
3300 | return 0; | ||
3301 | } | ||
3302 | GNUNET_CRYPTO_hash (input_string, size, key); | ||
3303 | |||
3304 | return size; | ||
3305 | } | ||
3306 | |||
3307 | |||
3308 | /** | ||
3309 | * Recursive function that calls the iterator for each synthetic start state. | ||
3310 | * | ||
3311 | * @param min_len minimum length of the path in the graph. | ||
3312 | * @param max_len maximum length of the path in the graph. | ||
3313 | * @param consumed_string string consumed by traversing the graph till this state. | ||
3314 | * @param state current state of the automaton. | ||
3315 | * @param iterator iterator function called for each edge. | ||
3316 | * @param iterator_cls closure for the @a iterator function. | ||
3317 | */ | ||
3318 | static void | ||
3319 | iterate_initial_edge (unsigned int min_len, | ||
3320 | unsigned int max_len, | ||
3321 | char *consumed_string, | ||
3322 | struct REGEX_INTERNAL_State *state, | ||
3323 | REGEX_INTERNAL_KeyIterator iterator, | ||
3324 | void *iterator_cls) | ||
3325 | { | ||
3326 | char *temp; | ||
3327 | struct REGEX_INTERNAL_Transition *t; | ||
3328 | unsigned int num_edges = state->transition_count; | ||
3329 | struct REGEX_BLOCK_Edge edges[num_edges]; | ||
3330 | struct REGEX_BLOCK_Edge edge[1]; | ||
3331 | struct GNUNET_HashCode hash; | ||
3332 | struct GNUNET_HashCode hash_new; | ||
3333 | unsigned int cur_len; | ||
3334 | |||
3335 | if (NULL != consumed_string) | ||
3336 | cur_len = strlen (consumed_string); | ||
3337 | else | ||
3338 | cur_len = 0; | ||
3339 | |||
3340 | if (((cur_len >= min_len) || (GNUNET_YES == state->accepting)) && | ||
3341 | (cur_len > 0) && (NULL != consumed_string)) | ||
3342 | { | ||
3343 | if (cur_len <= max_len) | ||
3344 | { | ||
3345 | if ((NULL != state->proof) && | ||
3346 | (0 != strcmp (consumed_string, state->proof))) | ||
3347 | { | ||
3348 | (void) state_get_edges (state, edges); | ||
3349 | GNUNET_CRYPTO_hash (consumed_string, strlen (consumed_string), &hash); | ||
3350 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, | ||
3351 | "Start state for string `%s' is %s\n", | ||
3352 | consumed_string, | ||
3353 | GNUNET_h2s (&hash)); | ||
3354 | iterator (iterator_cls, | ||
3355 | &hash, | ||
3356 | consumed_string, | ||
3357 | state->accepting, | ||
3358 | num_edges, | ||
3359 | edges); | ||
3360 | } | ||
3361 | |||
3362 | if ((GNUNET_YES == state->accepting) && (cur_len > 1) && | ||
3363 | (state->transition_count < 1) && (cur_len < max_len)) | ||
3364 | { | ||
3365 | /* Special case for regex consisting of just a string that is shorter than | ||
3366 | * max_len */ | ||
3367 | edge[0].label = &consumed_string[cur_len - 1]; | ||
3368 | edge[0].destination = state->hash; | ||
3369 | temp = GNUNET_strdup (consumed_string); | ||
3370 | temp[cur_len - 1] = '\0'; | ||
3371 | GNUNET_CRYPTO_hash (temp, cur_len - 1, &hash_new); | ||
3372 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, | ||
3373 | "Start state for short string `%s' is %s\n", | ||
3374 | temp, | ||
3375 | GNUNET_h2s (&hash_new)); | ||
3376 | iterator (iterator_cls, &hash_new, temp, GNUNET_NO, 1, edge); | ||
3377 | GNUNET_free (temp); | ||
3378 | } | ||
3379 | } | ||
3380 | else /* cur_len > max_len */ | ||
3381 | { | ||
3382 | /* Case where the concatenated labels are longer than max_len, then split. */ | ||
3383 | edge[0].label = &consumed_string[max_len]; | ||
3384 | edge[0].destination = state->hash; | ||
3385 | temp = GNUNET_strdup (consumed_string); | ||
3386 | temp[max_len] = '\0'; | ||
3387 | GNUNET_CRYPTO_hash (temp, max_len, &hash); | ||
3388 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, | ||
3389 | "Start state at split edge `%s'-`%s` is %s\n", | ||
3390 | temp, | ||
3391 | edge[0].label, | ||
3392 | GNUNET_h2s (&hash_new)); | ||
3393 | iterator (iterator_cls, &hash, temp, GNUNET_NO, 1, edge); | ||
3394 | GNUNET_free (temp); | ||
3395 | } | ||
3396 | } | ||
3397 | |||
3398 | if (cur_len < max_len) | ||
3399 | { | ||
3400 | for (t = state->transitions_head; NULL != t; t = t->next) | ||
3401 | { | ||
3402 | if (NULL != strchr (t->label, (int) '.')) | ||
3403 | { | ||
3404 | /* Wildcards not allowed during starting states */ | ||
3405 | GNUNET_break (0); | ||
3406 | continue; | ||
3407 | } | ||
3408 | if (NULL != consumed_string) | ||
3409 | GNUNET_asprintf (&temp, "%s%s", consumed_string, t->label); | ||
3410 | else | ||
3411 | GNUNET_asprintf (&temp, "%s", t->label); | ||
3412 | iterate_initial_edge (min_len, | ||
3413 | max_len, | ||
3414 | temp, | ||
3415 | t->to_state, | ||
3416 | iterator, | ||
3417 | iterator_cls); | ||
3418 | GNUNET_free (temp); | ||
3419 | } | ||
3420 | } | ||
3421 | } | ||
3422 | |||
3423 | |||
3424 | /** | ||
3425 | * Iterate over all edges starting from start state of automaton 'a'. Calling | ||
3426 | * iterator for each edge. | ||
3427 | * | ||
3428 | * @param a automaton. | ||
3429 | * @param iterator iterator called for each edge. | ||
3430 | * @param iterator_cls closure. | ||
3431 | */ | ||
3432 | void | ||
3433 | REGEX_INTERNAL_iterate_all_edges (struct REGEX_INTERNAL_Automaton *a, | ||
3434 | REGEX_INTERNAL_KeyIterator iterator, | ||
3435 | void *iterator_cls) | ||
3436 | { | ||
3437 | struct REGEX_INTERNAL_State *s; | ||
3438 | |||
3439 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Iterating over starting edges\n"); | ||
3440 | iterate_initial_edge (GNUNET_REGEX_INITIAL_BYTES, | ||
3441 | GNUNET_REGEX_INITIAL_BYTES, | ||
3442 | NULL, | ||
3443 | a->start, | ||
3444 | iterator, | ||
3445 | iterator_cls); | ||
3446 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Iterating over DFA edges\n"); | ||
3447 | for (s = a->states_head; NULL != s; s = s->next) | ||
3448 | { | ||
3449 | struct REGEX_BLOCK_Edge edges[s->transition_count]; | ||
3450 | unsigned int num_edges; | ||
3451 | |||
3452 | num_edges = state_get_edges (s, edges); | ||
3453 | if (((NULL != s->proof) && (0 < strlen (s->proof))) || s->accepting) | ||
3454 | { | ||
3455 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, | ||
3456 | "Creating DFA edges at `%s' under key %s\n", | ||
3457 | s->proof, | ||
3458 | GNUNET_h2s (&s->hash)); | ||
3459 | iterator (iterator_cls, | ||
3460 | &s->hash, | ||
3461 | s->proof, | ||
3462 | s->accepting, | ||
3463 | num_edges, | ||
3464 | edges); | ||
3465 | } | ||
3466 | s->marked = GNUNET_NO; | ||
3467 | } | ||
3468 | } | ||
3469 | |||
3470 | |||
3471 | /** | ||
3472 | * Struct to hold all the relevant state information in the HashMap. | ||
3473 | * | ||
3474 | * Contains the same info as the Regex Iterator parameters except the key, | ||
3475 | * which comes directly from the HashMap iterator. | ||
3476 | */ | ||
3477 | struct temporal_state_store | ||
3478 | { | ||
3479 | int reachable; | ||
3480 | char *proof; | ||
3481 | int accepting; | ||
3482 | int num_edges; | ||
3483 | struct REGEX_BLOCK_Edge *edges; | ||
3484 | }; | ||
3485 | |||
3486 | |||
3487 | /** | ||
3488 | * Store regex iterator and cls in one place to pass to the hashmap iterator. | ||
3489 | */ | ||
3490 | struct client_iterator | ||
3491 | { | ||
3492 | REGEX_INTERNAL_KeyIterator iterator; | ||
3493 | void *iterator_cls; | ||
3494 | }; | ||
3495 | |||
3496 | |||
3497 | /** | ||
3498 | * Iterator over all edges of a dfa. Stores all of them in a HashMap | ||
3499 | * for later reachability marking. | ||
3500 | * | ||
3501 | * @param cls Closure (HashMap) | ||
3502 | * @param key hash for current state. | ||
3503 | * @param proof proof for current state | ||
3504 | * @param accepting GNUNET_YES if this is an accepting state, GNUNET_NO if not. | ||
3505 | * @param num_edges number of edges leaving current state. | ||
3506 | * @param edges edges leaving current state. | ||
3507 | */ | ||
3508 | static void | ||
3509 | store_all_states (void *cls, | ||
3510 | const struct GNUNET_HashCode *key, | ||
3511 | const char *proof, | ||
3512 | int accepting, | ||
3513 | unsigned int num_edges, | ||
3514 | const struct REGEX_BLOCK_Edge *edges) | ||
3515 | { | ||
3516 | struct GNUNET_CONTAINER_MultiHashMap *hm = cls; | ||
3517 | struct temporal_state_store *tmp; | ||
3518 | size_t edges_size; | ||
3519 | |||
3520 | tmp = GNUNET_new (struct temporal_state_store); | ||
3521 | tmp->reachable = GNUNET_NO; | ||
3522 | tmp->proof = GNUNET_strdup (proof); | ||
3523 | tmp->accepting = accepting; | ||
3524 | tmp->num_edges = num_edges; | ||
3525 | edges_size = sizeof(struct REGEX_BLOCK_Edge) * num_edges; | ||
3526 | tmp->edges = GNUNET_malloc (edges_size); | ||
3527 | GNUNET_memcpy (tmp->edges, edges, edges_size); | ||
3528 | GNUNET_assert (GNUNET_YES == | ||
3529 | GNUNET_CONTAINER_multihashmap_put ( | ||
3530 | hm, | ||
3531 | key, | ||
3532 | tmp, | ||
3533 | GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_FAST)); | ||
3534 | } | ||
3535 | |||
3536 | |||
3537 | /** | ||
3538 | * Mark state as reachable and call recursively on all its edges. | ||
3539 | * | ||
3540 | * If already marked as reachable, do nothing. | ||
3541 | * | ||
3542 | * @param state State to mark as reachable. | ||
3543 | * @param hm HashMap which stores all the states indexed by key. | ||
3544 | */ | ||
3545 | static void | ||
3546 | mark_as_reachable (struct temporal_state_store *state, | ||
3547 | struct GNUNET_CONTAINER_MultiHashMap *hm) | ||
3548 | { | ||
3549 | struct temporal_state_store *child; | ||
3550 | unsigned int i; | ||
3551 | |||
3552 | if (GNUNET_YES == state->reachable) | ||
3553 | /* visited */ | ||
3554 | return; | ||
3555 | |||
3556 | state->reachable = GNUNET_YES; | ||
3557 | for (i = 0; i < state->num_edges; i++) | ||
3558 | { | ||
3559 | child = | ||
3560 | GNUNET_CONTAINER_multihashmap_get (hm, &state->edges[i].destination); | ||
3561 | if (NULL == child) | ||
3562 | { | ||
3563 | GNUNET_break (0); | ||
3564 | continue; | ||
3565 | } | ||
3566 | mark_as_reachable (child, hm); | ||
3567 | } | ||
3568 | } | ||
3569 | |||
3570 | |||
3571 | /** | ||
3572 | * Iterator over hash map entries to mark the ones that are reachable. | ||
3573 | * | ||
3574 | * @param cls closure | ||
3575 | * @param key current key code | ||
3576 | * @param value value in the hash map | ||
3577 | * @return #GNUNET_YES if we should continue to iterate, | ||
3578 | * #GNUNET_NO if not. | ||
3579 | */ | ||
3580 | static int | ||
3581 | reachability_iterator (void *cls, | ||
3582 | const struct GNUNET_HashCode *key, | ||
3583 | void *value) | ||
3584 | { | ||
3585 | struct GNUNET_CONTAINER_MultiHashMap *hm = cls; | ||
3586 | struct temporal_state_store *state = value; | ||
3587 | |||
3588 | if (GNUNET_YES == state->reachable) | ||
3589 | /* already visited and marked */ | ||
3590 | return GNUNET_YES; | ||
3591 | |||
3592 | if ((GNUNET_REGEX_INITIAL_BYTES > strlen (state->proof)) && | ||
3593 | (GNUNET_NO == state->accepting) ) | ||
3594 | /* not directly reachable */ | ||
3595 | return GNUNET_YES; | ||
3596 | |||
3597 | mark_as_reachable (state, hm); | ||
3598 | return GNUNET_YES; | ||
3599 | } | ||
3600 | |||
3601 | |||
3602 | /** | ||
3603 | * Iterator over hash map entries. | ||
3604 | * Calling the callback on the ones marked as reachables. | ||
3605 | * | ||
3606 | * @param cls closure | ||
3607 | * @param key current key code | ||
3608 | * @param value value in the hash map | ||
3609 | * @return #GNUNET_YES if we should continue to iterate, | ||
3610 | * #GNUNET_NO if not. | ||
3611 | */ | ||
3612 | static int | ||
3613 | iterate_reachables (void *cls, const struct GNUNET_HashCode *key, void *value) | ||
3614 | { | ||
3615 | struct client_iterator *ci = cls; | ||
3616 | struct temporal_state_store *state = value; | ||
3617 | |||
3618 | if (GNUNET_YES == state->reachable) | ||
3619 | { | ||
3620 | ci->iterator (ci->iterator_cls, | ||
3621 | key, | ||
3622 | state->proof, | ||
3623 | state->accepting, | ||
3624 | state->num_edges, | ||
3625 | state->edges); | ||
3626 | } | ||
3627 | GNUNET_free (state->edges); | ||
3628 | GNUNET_free (state->proof); | ||
3629 | GNUNET_free (state); | ||
3630 | return GNUNET_YES; | ||
3631 | } | ||
3632 | |||
3633 | |||
3634 | void | ||
3635 | REGEX_INTERNAL_iterate_reachable_edges (struct REGEX_INTERNAL_Automaton *a, | ||
3636 | REGEX_INTERNAL_KeyIterator iterator, | ||
3637 | void *iterator_cls) | ||
3638 | { | ||
3639 | struct GNUNET_CONTAINER_MultiHashMap *hm; | ||
3640 | struct client_iterator ci; | ||
3641 | |||
3642 | hm = GNUNET_CONTAINER_multihashmap_create (a->state_count * 2, GNUNET_NO); | ||
3643 | ci.iterator = iterator; | ||
3644 | ci.iterator_cls = iterator_cls; | ||
3645 | |||
3646 | REGEX_INTERNAL_iterate_all_edges (a, &store_all_states, hm); | ||
3647 | GNUNET_CONTAINER_multihashmap_iterate (hm, &reachability_iterator, hm); | ||
3648 | GNUNET_CONTAINER_multihashmap_iterate (hm, &iterate_reachables, &ci); | ||
3649 | |||
3650 | GNUNET_CONTAINER_multihashmap_destroy (hm); | ||
3651 | } | ||
3652 | |||
3653 | |||
3654 | /* end of regex_internal.c */ | ||
diff --git a/src/service/regex/regex_internal.h b/src/service/regex/regex_internal.h new file mode 100644 index 000000000..8d4526d1f --- /dev/null +++ b/src/service/regex/regex_internal.h | |||
@@ -0,0 +1,457 @@ | |||
1 | /* | ||
2 | This file is part of GNUnet | ||
3 | Copyright (C) 2012 GNUnet e.V. | ||
4 | |||
5 | GNUnet is free software: you can redistribute it and/or modify it | ||
6 | under the terms of the GNU Affero General Public License as published | ||
7 | by the Free Software Foundation, either version 3 of the License, | ||
8 | or (at your option) any later version. | ||
9 | |||
10 | GNUnet is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | Affero General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU Affero General Public License | ||
16 | along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | |||
18 | SPDX-License-Identifier: AGPL3.0-or-later | ||
19 | */ | ||
20 | /** | ||
21 | * @file src/regex/regex_internal.h | ||
22 | * @brief common internal definitions for regex library. | ||
23 | * @author Maximilian Szengel | ||
24 | */ | ||
25 | #ifndef REGEX_INTERNAL_H | ||
26 | #define REGEX_INTERNAL_H | ||
27 | |||
28 | #include "regex_internal_lib.h" | ||
29 | |||
30 | #ifdef __cplusplus | ||
31 | extern "C" | ||
32 | { | ||
33 | #if 0 /* keep Emacsens' auto-indent happy */ | ||
34 | } | ||
35 | #endif | ||
36 | #endif | ||
37 | |||
38 | /** | ||
39 | * char array of literals that are allowed inside a regex (apart from the | ||
40 | * operators) | ||
41 | */ | ||
42 | #define ALLOWED_LITERALS \ | ||
43 | "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" | ||
44 | |||
45 | |||
46 | /** | ||
47 | * Transition between two states. Transitions are stored at the states from | ||
48 | * which they origin ('from_state'). Each state can have 0-n transitions. | ||
49 | * If label is NULL, this is considered to be an epsilon transition. | ||
50 | */ | ||
51 | struct REGEX_INTERNAL_Transition | ||
52 | { | ||
53 | /** | ||
54 | * This is a linked list. | ||
55 | */ | ||
56 | struct REGEX_INTERNAL_Transition *prev; | ||
57 | |||
58 | /** | ||
59 | * This is a linked list. | ||
60 | */ | ||
61 | struct REGEX_INTERNAL_Transition *next; | ||
62 | |||
63 | /** | ||
64 | * Unique id of this transition. | ||
65 | */ | ||
66 | unsigned int id; | ||
67 | |||
68 | /** | ||
69 | * Label for this transition. This is basically the edge label for the graph. | ||
70 | */ | ||
71 | char *label; | ||
72 | |||
73 | /** | ||
74 | * State to which this transition leads. | ||
75 | */ | ||
76 | struct REGEX_INTERNAL_State *to_state; | ||
77 | |||
78 | /** | ||
79 | * State from which this transition origins. | ||
80 | */ | ||
81 | struct REGEX_INTERNAL_State *from_state; | ||
82 | }; | ||
83 | |||
84 | |||
85 | /** | ||
86 | * A state. Can be used in DFA and NFA automatons. | ||
87 | */ | ||
88 | struct REGEX_INTERNAL_State; | ||
89 | |||
90 | |||
91 | /** | ||
92 | * Set of states. | ||
93 | */ | ||
94 | struct REGEX_INTERNAL_StateSet | ||
95 | { | ||
96 | /** | ||
97 | * Array of states. | ||
98 | */ | ||
99 | struct REGEX_INTERNAL_State **states; | ||
100 | |||
101 | /** | ||
102 | * Number of entries in *use* in the 'states' array. | ||
103 | */ | ||
104 | unsigned int off; | ||
105 | |||
106 | /** | ||
107 | * Length of the 'states' array. | ||
108 | */ | ||
109 | unsigned int size; | ||
110 | }; | ||
111 | |||
112 | |||
113 | /** | ||
114 | * A state. Can be used in DFA and NFA automatons. | ||
115 | */ | ||
116 | struct REGEX_INTERNAL_State | ||
117 | { | ||
118 | /** | ||
119 | * This is a linked list to keep states in an automaton. | ||
120 | */ | ||
121 | struct REGEX_INTERNAL_State *prev; | ||
122 | |||
123 | /** | ||
124 | * This is a linked list to keep states in an automaton. | ||
125 | */ | ||
126 | struct REGEX_INTERNAL_State *next; | ||
127 | |||
128 | /** | ||
129 | * This is a multi DLL for StateSet_MDLL. | ||
130 | */ | ||
131 | struct REGEX_INTERNAL_State *prev_SS; | ||
132 | |||
133 | /** | ||
134 | * This is a multi DLL for StateSet_MDLL. | ||
135 | */ | ||
136 | struct REGEX_INTERNAL_State *next_SS; | ||
137 | |||
138 | /** | ||
139 | * This is a multi DLL for StateSet_MDLL Stack. | ||
140 | */ | ||
141 | struct REGEX_INTERNAL_State *prev_ST; | ||
142 | |||
143 | /** | ||
144 | * This is a multi DLL for StateSet_MDLL Stack. | ||
145 | */ | ||
146 | struct REGEX_INTERNAL_State *next_ST; | ||
147 | |||
148 | /** | ||
149 | * Unique state id. | ||
150 | */ | ||
151 | unsigned int id; | ||
152 | |||
153 | /** | ||
154 | * Unique state id that is used for traversing the automaton. It is guaranteed | ||
155 | * to be > 0 and < state_count. | ||
156 | */ | ||
157 | unsigned int traversal_id; | ||
158 | |||
159 | /** | ||
160 | * If this is an accepting state or not. | ||
161 | */ | ||
162 | int accepting; | ||
163 | |||
164 | /** | ||
165 | * Marking of the state. This is used for marking all visited states when | ||
166 | * traversing all states of an automaton and for cases where the state id | ||
167 | * cannot be used (dfa minimization). | ||
168 | */ | ||
169 | int marked; | ||
170 | |||
171 | /** | ||
172 | * Marking the state as contained. This is used for checking, if the state is | ||
173 | * contained in a set in constant time. | ||
174 | */ | ||
175 | int contained; | ||
176 | |||
177 | /** | ||
178 | * Marking the state as part of an SCC (Strongly Connected Component). All | ||
179 | * states with the same scc_id are part of the same SCC. scc_id is 0, if state | ||
180 | * is not a part of any SCC. | ||
181 | */ | ||
182 | unsigned int scc_id; | ||
183 | |||
184 | /** | ||
185 | * Used for SCC detection. | ||
186 | */ | ||
187 | int index; | ||
188 | |||
189 | /** | ||
190 | * Used for SCC detection. | ||
191 | */ | ||
192 | int lowlink; | ||
193 | |||
194 | /** | ||
195 | * Human readable name of the state. Used for debugging and graph | ||
196 | * creation. | ||
197 | */ | ||
198 | char *name; | ||
199 | |||
200 | /** | ||
201 | * Hash of the state. | ||
202 | */ | ||
203 | struct GNUNET_HashCode hash; | ||
204 | |||
205 | /** | ||
206 | * Linear state ID acquired by depth-first-search. This ID should be used for | ||
207 | * storing information about the state in an array, because the 'id' of the | ||
208 | * state is not guaranteed to be linear. The 'dfs_id' is guaranteed to be > 0 | ||
209 | * and < 'state_count'. | ||
210 | */ | ||
211 | unsigned int dfs_id; | ||
212 | |||
213 | /** | ||
214 | * Proof for this state. | ||
215 | */ | ||
216 | char *proof; | ||
217 | |||
218 | /** | ||
219 | * Number of transitions from this state to other states. | ||
220 | */ | ||
221 | unsigned int transition_count; | ||
222 | |||
223 | /** | ||
224 | * DLL of transitions. | ||
225 | */ | ||
226 | struct REGEX_INTERNAL_Transition *transitions_head; | ||
227 | |||
228 | /** | ||
229 | * DLL of transitions. | ||
230 | */ | ||
231 | struct REGEX_INTERNAL_Transition *transitions_tail; | ||
232 | |||
233 | /** | ||
234 | * Number of incoming transitions. Used for compressing DFA paths. | ||
235 | */ | ||
236 | unsigned int incoming_transition_count; | ||
237 | |||
238 | /** | ||
239 | * Set of states on which this state is based on. Used when creating a DFA out | ||
240 | * of several NFA states. | ||
241 | */ | ||
242 | struct REGEX_INTERNAL_StateSet nfa_set; | ||
243 | }; | ||
244 | |||
245 | |||
246 | /** | ||
247 | * Type of an automaton. | ||
248 | */ | ||
249 | enum REGEX_INTERNAL_AutomatonType | ||
250 | { | ||
251 | NFA, | ||
252 | DFA | ||
253 | }; | ||
254 | |||
255 | |||
256 | /** | ||
257 | * Automaton representation. | ||
258 | */ | ||
259 | struct REGEX_INTERNAL_Automaton | ||
260 | { | ||
261 | /** | ||
262 | * Linked list of NFAs used for partial NFA creation. | ||
263 | */ | ||
264 | struct REGEX_INTERNAL_Automaton *prev; | ||
265 | |||
266 | /** | ||
267 | * Linked list of NFAs used for partial NFA creation. | ||
268 | */ | ||
269 | struct REGEX_INTERNAL_Automaton *next; | ||
270 | |||
271 | /** | ||
272 | * First state of the automaton. This is mainly used for constructing an NFA, | ||
273 | * where each NFA itself consists of one or more NFAs linked together. | ||
274 | */ | ||
275 | struct REGEX_INTERNAL_State *start; | ||
276 | |||
277 | /** | ||
278 | * End state of the partial NFA. This is undefined for DFAs | ||
279 | */ | ||
280 | struct REGEX_INTERNAL_State *end; | ||
281 | |||
282 | /** | ||
283 | * Number of states in the automaton. | ||
284 | */ | ||
285 | unsigned int state_count; | ||
286 | |||
287 | /** | ||
288 | * DLL of states. | ||
289 | */ | ||
290 | struct REGEX_INTERNAL_State *states_head; | ||
291 | |||
292 | /** | ||
293 | * DLL of states | ||
294 | */ | ||
295 | struct REGEX_INTERNAL_State *states_tail; | ||
296 | |||
297 | /** | ||
298 | * Type of the automaton. | ||
299 | */ | ||
300 | enum REGEX_INTERNAL_AutomatonType type; | ||
301 | |||
302 | /** | ||
303 | * Regex | ||
304 | */ | ||
305 | char *regex; | ||
306 | |||
307 | /** | ||
308 | * Canonical regex (result of RX->NFA->DFA->RX) | ||
309 | */ | ||
310 | char *canonical_regex; | ||
311 | |||
312 | /** | ||
313 | * GNUNET_YES, if multi strides have been added to the Automaton. | ||
314 | */ | ||
315 | int is_multistrided; | ||
316 | }; | ||
317 | |||
318 | |||
319 | /** | ||
320 | * Construct an NFA by parsing the regex string of length 'len'. | ||
321 | * | ||
322 | * @param regex regular expression string. | ||
323 | * @param len length of the string. | ||
324 | * | ||
325 | * @return NFA, needs to be freed using REGEX_INTERNAL_automaton_destroy. | ||
326 | */ | ||
327 | struct REGEX_INTERNAL_Automaton * | ||
328 | REGEX_INTERNAL_construct_nfa (const char *regex, const size_t len); | ||
329 | |||
330 | |||
331 | /** | ||
332 | * Function that gets passed to automaton traversal and is called before each | ||
333 | * next traversal from state 's' using transition 't' to check if traversal | ||
334 | * should proceed. Return GNUNET_NO to stop traversal or GNUNET_YES to continue. | ||
335 | * | ||
336 | * @param cls closure for the check. | ||
337 | * @param s current state in the traversal. | ||
338 | * @param t current transition from state 's' that will be used for the next | ||
339 | * step. | ||
340 | * | ||
341 | * @return GNUNET_YES to proceed traversal, GNUNET_NO to stop. | ||
342 | */ | ||
343 | typedef int | ||
344 | (*REGEX_INTERNAL_traverse_check) (void *cls, | ||
345 | struct REGEX_INTERNAL_State *s, | ||
346 | struct REGEX_INTERNAL_Transition *t); | ||
347 | |||
348 | |||
349 | /** | ||
350 | * Function that is called with each state, when traversing an automaton. | ||
351 | * | ||
352 | * @param cls closure. | ||
353 | * @param count current count of the state, from 0 to a->state_count -1. | ||
354 | * @param s state. | ||
355 | */ | ||
356 | typedef void | ||
357 | (*REGEX_INTERNAL_traverse_action) (void *cls, | ||
358 | const unsigned int count, | ||
359 | struct REGEX_INTERNAL_State *s); | ||
360 | |||
361 | |||
362 | /** | ||
363 | * Traverses the given automaton using depth-first-search (DFS) from it's start | ||
364 | * state, visiting all reachable states and calling 'action' on each one of | ||
365 | * them. | ||
366 | * | ||
367 | * @param a automaton to be traversed. | ||
368 | * @param start start state, pass a->start or NULL to traverse the whole automaton. | ||
369 | * @param check function that is checked before advancing on each transition | ||
370 | * in the DFS. | ||
371 | * @param check_cls closure for @a check. | ||
372 | * @param action action to be performed on each state. | ||
373 | * @param action_cls closure for @a action | ||
374 | */ | ||
375 | void | ||
376 | REGEX_INTERNAL_automaton_traverse (const struct REGEX_INTERNAL_Automaton *a, | ||
377 | struct REGEX_INTERNAL_State *start, | ||
378 | REGEX_INTERNAL_traverse_check check, | ||
379 | void *check_cls, | ||
380 | REGEX_INTERNAL_traverse_action action, | ||
381 | void *action_cls); | ||
382 | |||
383 | /** | ||
384 | * Get the canonical regex of the given automaton. | ||
385 | * When constructing the automaton a proof is computed for each state, | ||
386 | * consisting of the regular expression leading to this state. A complete | ||
387 | * regex for the automaton can be computed by combining these proofs. | ||
388 | * As of now this function is only useful for testing. | ||
389 | * | ||
390 | * @param a automaton for which the canonical regex should be returned. | ||
391 | * | ||
392 | * @return canonical regex string. | ||
393 | */ | ||
394 | const char * | ||
395 | REGEX_INTERNAL_get_canonical_regex (struct REGEX_INTERNAL_Automaton *a); | ||
396 | |||
397 | |||
398 | /** | ||
399 | * Get the number of transitions that are contained in the given automaton. | ||
400 | * | ||
401 | * @param a automaton for which the number of transitions should be returned. | ||
402 | * | ||
403 | * @return number of transitions in the given automaton. | ||
404 | */ | ||
405 | unsigned int | ||
406 | REGEX_INTERNAL_get_transition_count (struct REGEX_INTERNAL_Automaton *a); | ||
407 | |||
408 | |||
409 | /** | ||
410 | * Context that contains an id counter for states and transitions as well as a | ||
411 | * DLL of automatons used as a stack for NFA construction. | ||
412 | */ | ||
413 | struct REGEX_INTERNAL_Context | ||
414 | { | ||
415 | /** | ||
416 | * Unique state id. | ||
417 | */ | ||
418 | unsigned int state_id; | ||
419 | |||
420 | /** | ||
421 | * Unique transition id. | ||
422 | */ | ||
423 | unsigned int transition_id; | ||
424 | |||
425 | /** | ||
426 | * DLL of REGEX_INTERNAL_Automaton's used as a stack. | ||
427 | */ | ||
428 | struct REGEX_INTERNAL_Automaton *stack_head; | ||
429 | |||
430 | /** | ||
431 | * DLL of REGEX_INTERNAL_Automaton's used as a stack. | ||
432 | */ | ||
433 | struct REGEX_INTERNAL_Automaton *stack_tail; | ||
434 | }; | ||
435 | |||
436 | |||
437 | /** | ||
438 | * Adds multi-strided transitions to the given 'dfa'. | ||
439 | * | ||
440 | * @param regex_ctx regex context needed to add transitions to the automaton. | ||
441 | * @param dfa DFA to which the multi strided transitions should be added. | ||
442 | * @param stride_len length of the strides. | ||
443 | */ | ||
444 | void | ||
445 | REGEX_INTERNAL_dfa_add_multi_strides (struct REGEX_INTERNAL_Context *regex_ctx, | ||
446 | struct REGEX_INTERNAL_Automaton *dfa, | ||
447 | const unsigned int stride_len); | ||
448 | |||
449 | |||
450 | #if 0 /* keep Emacsens' auto-indent happy */ | ||
451 | { | ||
452 | #endif | ||
453 | #ifdef __cplusplus | ||
454 | } | ||
455 | #endif | ||
456 | |||
457 | #endif | ||
diff --git a/src/service/regex/regex_internal_dht.c b/src/service/regex/regex_internal_dht.c new file mode 100644 index 000000000..48850f87f --- /dev/null +++ b/src/service/regex/regex_internal_dht.c | |||
@@ -0,0 +1,830 @@ | |||
1 | /* | ||
2 | This file is part of GNUnet | ||
3 | Copyright (C) 2012, 2015 GNUnet e.V. | ||
4 | |||
5 | GNUnet is free software: you can redistribute it and/or modify it | ||
6 | under the terms of the GNU Affero General Public License as published | ||
7 | by the Free Software Foundation, either version 3 of the License, | ||
8 | or (at your option) any later version. | ||
9 | |||
10 | GNUnet is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | Affero General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU Affero General Public License | ||
16 | along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | |||
18 | SPDX-License-Identifier: AGPL3.0-or-later | ||
19 | */ | ||
20 | /** | ||
21 | * @file src/regex/regex_internal_dht.c | ||
22 | * @brief library to announce regexes in the network and match strings | ||
23 | * against published regexes. | ||
24 | * @author Bartlomiej Polot | ||
25 | */ | ||
26 | #include "platform.h" | ||
27 | #include "regex_internal_lib.h" | ||
28 | #include "gnunet_dht_service.h" | ||
29 | #include "gnunet_statistics_service.h" | ||
30 | #include "gnunet_constants.h" | ||
31 | #include "gnunet_signatures.h" | ||
32 | |||
33 | |||
34 | #define LOG(kind, ...) GNUNET_log_from (kind, "regex-dht", __VA_ARGS__) | ||
35 | |||
36 | /** | ||
37 | * DHT replication level to use. | ||
38 | */ | ||
39 | #define DHT_REPLICATION 5 | ||
40 | |||
41 | /** | ||
42 | * DHT record lifetime to use. | ||
43 | */ | ||
44 | #define DHT_TTL GNUNET_TIME_UNIT_HOURS | ||
45 | |||
46 | /** | ||
47 | * DHT options to set. | ||
48 | */ | ||
49 | #define DHT_OPT GNUNET_DHT_RO_DEMULTIPLEX_EVERYWHERE | ||
50 | |||
51 | |||
52 | /** | ||
53 | * Handle to store cached data about a regex announce. | ||
54 | */ | ||
55 | struct REGEX_INTERNAL_Announcement | ||
56 | { | ||
57 | /** | ||
58 | * DHT handle to use, must be initialized externally. | ||
59 | */ | ||
60 | struct GNUNET_DHT_Handle *dht; | ||
61 | |||
62 | /** | ||
63 | * Regular expression. | ||
64 | */ | ||
65 | const char *regex; | ||
66 | |||
67 | /** | ||
68 | * Automaton representation of the regex (expensive to build). | ||
69 | */ | ||
70 | struct REGEX_INTERNAL_Automaton *dfa; | ||
71 | |||
72 | /** | ||
73 | * Our private key. | ||
74 | */ | ||
75 | const struct GNUNET_CRYPTO_EddsaPrivateKey *priv; | ||
76 | |||
77 | /** | ||
78 | * Optional statistics handle to report usage. Can be NULL. | ||
79 | */ | ||
80 | struct GNUNET_STATISTICS_Handle *stats; | ||
81 | }; | ||
82 | |||
83 | |||
84 | /** | ||
85 | * Regex callback iterator to store own service description in the DHT. | ||
86 | * | ||
87 | * @param cls closure. | ||
88 | * @param key hash for current state. | ||
89 | * @param proof proof for current state. | ||
90 | * @param accepting #GNUNET_YES if this is an accepting state, #GNUNET_NO if not. | ||
91 | * @param num_edges number of edges leaving current state. | ||
92 | * @param edges edges leaving current state. | ||
93 | */ | ||
94 | static void | ||
95 | regex_iterator (void *cls, | ||
96 | const struct GNUNET_HashCode *key, | ||
97 | const char *proof, | ||
98 | int accepting, | ||
99 | unsigned int num_edges, | ||
100 | const struct REGEX_BLOCK_Edge *edges) | ||
101 | { | ||
102 | struct REGEX_INTERNAL_Announcement *h = cls; | ||
103 | struct RegexBlock *block; | ||
104 | size_t size; | ||
105 | unsigned int i; | ||
106 | |||
107 | LOG (GNUNET_ERROR_TYPE_INFO, | ||
108 | "DHT PUT for state %s with proof `%s' and %u edges:\n", | ||
109 | GNUNET_h2s (key), | ||
110 | proof, | ||
111 | num_edges); | ||
112 | for (i = 0; i < num_edges; i++) | ||
113 | { | ||
114 | LOG (GNUNET_ERROR_TYPE_INFO, | ||
115 | "Edge %u `%s' towards %s\n", | ||
116 | i, | ||
117 | edges[i].label, | ||
118 | GNUNET_h2s (&edges[i].destination)); | ||
119 | } | ||
120 | if (GNUNET_YES == accepting) | ||
121 | { | ||
122 | struct RegexAcceptBlock ab; | ||
123 | |||
124 | LOG (GNUNET_ERROR_TYPE_INFO, | ||
125 | "State %s is accepting, putting own id\n", | ||
126 | GNUNET_h2s (key)); | ||
127 | size = sizeof(struct RegexAcceptBlock); | ||
128 | ab.purpose.size = ntohl (sizeof(struct GNUNET_CRYPTO_EccSignaturePurpose) | ||
129 | + sizeof(struct GNUNET_TIME_AbsoluteNBO) | ||
130 | + sizeof(struct GNUNET_HashCode)); | ||
131 | ab.purpose.purpose = ntohl (GNUNET_SIGNATURE_PURPOSE_REGEX_ACCEPT); | ||
132 | ab.expiration_time = GNUNET_TIME_absolute_hton ( | ||
133 | GNUNET_TIME_relative_to_absolute (GNUNET_CONSTANTS_DHT_MAX_EXPIRATION)); | ||
134 | ab.key = *key; | ||
135 | GNUNET_CRYPTO_eddsa_key_get_public (h->priv, | ||
136 | &ab.peer.public_key); | ||
137 | GNUNET_assert (GNUNET_OK == | ||
138 | GNUNET_CRYPTO_eddsa_sign_ (h->priv, | ||
139 | &ab.purpose, | ||
140 | &ab.signature)); | ||
141 | |||
142 | GNUNET_STATISTICS_update (h->stats, "# regex accepting blocks stored", | ||
143 | 1, GNUNET_NO); | ||
144 | GNUNET_STATISTICS_update (h->stats, "# regex accepting block bytes stored", | ||
145 | sizeof(struct RegexAcceptBlock), GNUNET_NO); | ||
146 | (void) | ||
147 | GNUNET_DHT_put (h->dht, key, | ||
148 | DHT_REPLICATION, | ||
149 | DHT_OPT | GNUNET_DHT_RO_RECORD_ROUTE, | ||
150 | GNUNET_BLOCK_TYPE_REGEX_ACCEPT, | ||
151 | size, | ||
152 | &ab, | ||
153 | GNUNET_TIME_relative_to_absolute (DHT_TTL), | ||
154 | NULL, NULL); | ||
155 | } | ||
156 | block = REGEX_BLOCK_create (proof, | ||
157 | num_edges, | ||
158 | edges, | ||
159 | accepting, | ||
160 | &size); | ||
161 | if (NULL == block) | ||
162 | return; | ||
163 | (void) GNUNET_DHT_put (h->dht, | ||
164 | key, | ||
165 | DHT_REPLICATION, | ||
166 | DHT_OPT, | ||
167 | GNUNET_BLOCK_TYPE_REGEX, | ||
168 | size, | ||
169 | block, | ||
170 | GNUNET_TIME_relative_to_absolute (DHT_TTL), | ||
171 | NULL, | ||
172 | NULL); | ||
173 | GNUNET_STATISTICS_update (h->stats, | ||
174 | "# regex blocks stored", | ||
175 | 1, | ||
176 | GNUNET_NO); | ||
177 | GNUNET_STATISTICS_update (h->stats, | ||
178 | "# regex block bytes stored", | ||
179 | size, | ||
180 | GNUNET_NO); | ||
181 | GNUNET_free (block); | ||
182 | } | ||
183 | |||
184 | |||
185 | /** | ||
186 | * Announce a regular expression: put all states of the automaton in the DHT. | ||
187 | * Does not free resources, must call #REGEX_INTERNAL_announce_cancel() for that. | ||
188 | * | ||
189 | * @param dht An existing and valid DHT service handle. CANNOT be NULL. | ||
190 | * @param priv our private key, must remain valid until the announcement is cancelled | ||
191 | * @param regex Regular expression to announce. | ||
192 | * @param compression How many characters per edge can we squeeze? | ||
193 | * @param stats Optional statistics handle to report usage. Can be NULL. | ||
194 | * @return Handle to reuse o free cached resources. | ||
195 | * Must be freed by calling #REGEX_INTERNAL_announce_cancel(). | ||
196 | */ | ||
197 | struct REGEX_INTERNAL_Announcement * | ||
198 | REGEX_INTERNAL_announce (struct GNUNET_DHT_Handle *dht, | ||
199 | const struct GNUNET_CRYPTO_EddsaPrivateKey *priv, | ||
200 | const char *regex, | ||
201 | uint16_t compression, | ||
202 | struct GNUNET_STATISTICS_Handle *stats) | ||
203 | { | ||
204 | struct REGEX_INTERNAL_Announcement *h; | ||
205 | |||
206 | GNUNET_assert (NULL != dht); | ||
207 | h = GNUNET_new (struct REGEX_INTERNAL_Announcement); | ||
208 | h->regex = regex; | ||
209 | h->dht = dht; | ||
210 | h->stats = stats; | ||
211 | h->priv = priv; | ||
212 | h->dfa = REGEX_INTERNAL_construct_dfa (regex, strlen (regex), compression); | ||
213 | REGEX_INTERNAL_reannounce (h); | ||
214 | return h; | ||
215 | } | ||
216 | |||
217 | |||
218 | void | ||
219 | REGEX_INTERNAL_reannounce (struct REGEX_INTERNAL_Announcement *h) | ||
220 | { | ||
221 | GNUNET_assert (NULL != h->dfa); /* make sure to call announce first */ | ||
222 | LOG (GNUNET_ERROR_TYPE_INFO, | ||
223 | "REGEX_INTERNAL_reannounce: %s\n", | ||
224 | h->regex); | ||
225 | REGEX_INTERNAL_iterate_reachable_edges (h->dfa, | ||
226 | ®ex_iterator, | ||
227 | h); | ||
228 | } | ||
229 | |||
230 | |||
231 | /** | ||
232 | * Clear all cached data used by a regex announce. | ||
233 | * Does not close DHT connection. | ||
234 | * | ||
235 | * @param h Handle returned by a previous #REGEX_INTERNAL_announce() call. | ||
236 | */ | ||
237 | void | ||
238 | REGEX_INTERNAL_announce_cancel (struct REGEX_INTERNAL_Announcement *h) | ||
239 | { | ||
240 | REGEX_INTERNAL_automaton_destroy (h->dfa); | ||
241 | GNUNET_free (h); | ||
242 | } | ||
243 | |||
244 | |||
245 | /******************************************************************************/ | ||
246 | |||
247 | |||
248 | /** | ||
249 | * Struct to keep state of running searches that have consumed a part of | ||
250 | * the initial string. | ||
251 | */ | ||
252 | struct RegexSearchContext | ||
253 | { | ||
254 | /** | ||
255 | * Part of the description already consumed by | ||
256 | * this particular search branch. | ||
257 | */ | ||
258 | size_t position; | ||
259 | |||
260 | /** | ||
261 | * Information about the search. | ||
262 | */ | ||
263 | struct REGEX_INTERNAL_Search *info; | ||
264 | |||
265 | /** | ||
266 | * We just want to look for one edge, the longer the better. | ||
267 | * Keep its length. | ||
268 | */ | ||
269 | unsigned int longest_match; | ||
270 | |||
271 | /** | ||
272 | * Destination hash of the longest match. | ||
273 | */ | ||
274 | struct GNUNET_HashCode hash; | ||
275 | }; | ||
276 | |||
277 | |||
278 | /** | ||
279 | * Type of values in `dht_get_results`. | ||
280 | */ | ||
281 | struct Result | ||
282 | { | ||
283 | /** | ||
284 | * Number of bytes in data. | ||
285 | */ | ||
286 | size_t size; | ||
287 | |||
288 | /** | ||
289 | * The raw result data. | ||
290 | */ | ||
291 | const void *data; | ||
292 | }; | ||
293 | |||
294 | |||
295 | /** | ||
296 | * Struct to keep information of searches of services described by a regex | ||
297 | * using a user-provided string service description. | ||
298 | */ | ||
299 | struct REGEX_INTERNAL_Search | ||
300 | { | ||
301 | /** | ||
302 | * DHT handle to use, must be initialized externally. | ||
303 | */ | ||
304 | struct GNUNET_DHT_Handle *dht; | ||
305 | |||
306 | /** | ||
307 | * Optional statistics handle to report usage. Can be NULL. | ||
308 | */ | ||
309 | struct GNUNET_STATISTICS_Handle *stats; | ||
310 | |||
311 | /** | ||
312 | * User provided description of the searched service. | ||
313 | */ | ||
314 | char *description; | ||
315 | |||
316 | /** | ||
317 | * Running DHT GETs. | ||
318 | */ | ||
319 | struct GNUNET_CONTAINER_MultiHashMap *dht_get_handles; | ||
320 | |||
321 | /** | ||
322 | * Results from running DHT GETs, values are of type | ||
323 | * 'struct Result'. | ||
324 | */ | ||
325 | struct GNUNET_CONTAINER_MultiHashMap *dht_get_results; | ||
326 | |||
327 | /** | ||
328 | * Contexts, for each running DHT GET. Free all on end of search. | ||
329 | */ | ||
330 | struct RegexSearchContext **contexts; | ||
331 | |||
332 | /** | ||
333 | * Number of contexts (branches/steps in search). | ||
334 | */ | ||
335 | unsigned int n_contexts; | ||
336 | |||
337 | /** | ||
338 | * @param callback Callback for found peers. | ||
339 | */ | ||
340 | REGEX_INTERNAL_Found callback; | ||
341 | |||
342 | /** | ||
343 | * @param callback_cls Closure for @c callback. | ||
344 | */ | ||
345 | void *callback_cls; | ||
346 | }; | ||
347 | |||
348 | |||
349 | /** | ||
350 | * Jump to the next edge, with the longest matching token. | ||
351 | * | ||
352 | * @param block Block found in the DHT. | ||
353 | * @param size Size of the block. | ||
354 | * @param ctx Context of the search. | ||
355 | */ | ||
356 | static void | ||
357 | regex_next_edge (const struct RegexBlock *block, | ||
358 | size_t size, | ||
359 | struct RegexSearchContext *ctx); | ||
360 | |||
361 | |||
362 | /** | ||
363 | * Function to process DHT string to regex matching. | ||
364 | * Called on each result obtained for the DHT search. | ||
365 | * | ||
366 | * @param cls Closure (search context). | ||
367 | * @param exp When will this value expire. | ||
368 | * @param trunc_peer truncated peer, or NULL if none was truncated | ||
369 | * @param key Key of the result. | ||
370 | * @param get_path Path of the get request. | ||
371 | * @param get_path_length Length of get_path. | ||
372 | * @param put_path Path of the put request. | ||
373 | * @param put_path_length Length of the put_path. | ||
374 | * @param type Type of the result. | ||
375 | * @param size Number of bytes in data. | ||
376 | * @param data Pointer to the result data. | ||
377 | */ | ||
378 | static void | ||
379 | dht_get_string_accept_handler (void *cls, struct GNUNET_TIME_Absolute exp, | ||
380 | const struct GNUNET_HashCode *key, | ||
381 | const struct GNUNET_PeerIdentity *trunc_peer, | ||
382 | const struct GNUNET_DHT_PathElement *get_path, | ||
383 | unsigned int get_path_length, | ||
384 | const struct GNUNET_DHT_PathElement *put_path, | ||
385 | unsigned int put_path_length, | ||
386 | enum GNUNET_BLOCK_Type type, | ||
387 | size_t size, const void *data) | ||
388 | { | ||
389 | const struct RegexAcceptBlock *block = data; | ||
390 | struct RegexSearchContext *ctx = cls; | ||
391 | struct REGEX_INTERNAL_Search *info = ctx->info; | ||
392 | |||
393 | LOG (GNUNET_ERROR_TYPE_DEBUG, | ||
394 | "Regex result accept for %s (key %s)\n", | ||
395 | info->description, GNUNET_h2s (key)); | ||
396 | |||
397 | GNUNET_STATISTICS_update (info->stats, | ||
398 | "# regex accepting blocks found", | ||
399 | 1, GNUNET_NO); | ||
400 | GNUNET_STATISTICS_update (info->stats, | ||
401 | "# regex accepting block bytes found", | ||
402 | size, GNUNET_NO); | ||
403 | info->callback (info->callback_cls, | ||
404 | &block->peer, | ||
405 | get_path, get_path_length, | ||
406 | put_path, put_path_length); | ||
407 | } | ||
408 | |||
409 | |||
410 | /** | ||
411 | * Find a path to a peer that offers a regex service compatible | ||
412 | * with a given string. | ||
413 | * | ||
414 | * @param key The key of the accepting state. | ||
415 | * @param ctx Context containing info about the string, tunnel, etc. | ||
416 | */ | ||
417 | static void | ||
418 | regex_find_path (const struct GNUNET_HashCode *key, | ||
419 | struct RegexSearchContext *ctx) | ||
420 | { | ||
421 | struct GNUNET_DHT_GetHandle *get_h; | ||
422 | |||
423 | LOG (GNUNET_ERROR_TYPE_DEBUG, | ||
424 | "Accept state found, now searching for paths to %s\n", | ||
425 | GNUNET_h2s (key)); | ||
426 | get_h = GNUNET_DHT_get_start (ctx->info->dht, /* handle */ | ||
427 | GNUNET_BLOCK_TYPE_REGEX_ACCEPT, /* type */ | ||
428 | key, /* key to search */ | ||
429 | DHT_REPLICATION, /* replication level */ | ||
430 | DHT_OPT | GNUNET_DHT_RO_RECORD_ROUTE, | ||
431 | NULL, /* xquery */ // FIXME BLOOMFILTER | ||
432 | 0, /* xquery bits */ // FIXME BLOOMFILTER SIZE | ||
433 | &dht_get_string_accept_handler, ctx); | ||
434 | GNUNET_break (GNUNET_OK == | ||
435 | GNUNET_CONTAINER_multihashmap_put (ctx->info->dht_get_handles, | ||
436 | key, | ||
437 | get_h, | ||
438 | GNUNET_CONTAINER_MULTIHASHMAPOPTION_MULTIPLE)); | ||
439 | } | ||
440 | |||
441 | |||
442 | /** | ||
443 | * Function to process DHT string to regex matching. | ||
444 | * Called on each result obtained for the DHT search. | ||
445 | * | ||
446 | * @param cls closure (search context) | ||
447 | * @param exp when will this value expire | ||
448 | * @param key key of the result | ||
449 | * @param trunc_peer NULL if not truncated | ||
450 | * @param get_path path of the get request (not used) | ||
451 | * @param get_path_length length of @a get_path (not used) | ||
452 | * @param put_path path of the put request (not used) | ||
453 | * @param put_path_length length of the @a put_path (not used) | ||
454 | * @param type type of the result | ||
455 | * @param size number of bytes in data | ||
456 | * @param data pointer to the result data | ||
457 | * | ||
458 | * TODO: re-issue the request after certain time? cancel after X results? | ||
459 | */ | ||
460 | static void | ||
461 | dht_get_string_handler (void *cls, struct GNUNET_TIME_Absolute exp, | ||
462 | const struct GNUNET_HashCode *key, | ||
463 | const struct GNUNET_PeerIdentity *trunc_peer, | ||
464 | const struct GNUNET_DHT_PathElement *get_path, | ||
465 | unsigned int get_path_length, | ||
466 | const struct GNUNET_DHT_PathElement *put_path, | ||
467 | unsigned int put_path_length, | ||
468 | enum GNUNET_BLOCK_Type type, | ||
469 | size_t size, const void *data) | ||
470 | { | ||
471 | const struct RegexBlock *block = data; | ||
472 | struct RegexSearchContext *ctx = cls; | ||
473 | struct REGEX_INTERNAL_Search *info = ctx->info; | ||
474 | size_t len; | ||
475 | struct Result *copy; | ||
476 | |||
477 | LOG (GNUNET_ERROR_TYPE_INFO, | ||
478 | "DHT GET result for %s (%s)\n", | ||
479 | GNUNET_h2s (key), ctx->info->description); | ||
480 | copy = GNUNET_malloc (sizeof(struct Result) + size); | ||
481 | copy->size = size; | ||
482 | copy->data = ©[1]; | ||
483 | GNUNET_memcpy (©[1], block, size); | ||
484 | GNUNET_break (GNUNET_OK == | ||
485 | GNUNET_CONTAINER_multihashmap_put (info->dht_get_results, | ||
486 | key, copy, | ||
487 | GNUNET_CONTAINER_MULTIHASHMAPOPTION_MULTIPLE)); | ||
488 | len = strlen (info->description); | ||
489 | if (len == ctx->position) // String processed | ||
490 | { | ||
491 | if (GNUNET_YES == GNUNET_BLOCK_is_accepting (block, size)) | ||
492 | { | ||
493 | regex_find_path (key, ctx); | ||
494 | } | ||
495 | else | ||
496 | { | ||
497 | LOG (GNUNET_ERROR_TYPE_INFO, "block not accepting!\n"); | ||
498 | /* FIXME REGEX this block not successful, wait for more? start timeout? */ | ||
499 | } | ||
500 | return; | ||
501 | } | ||
502 | regex_next_edge (block, size, ctx); | ||
503 | } | ||
504 | |||
505 | |||
506 | /** | ||
507 | * Iterator over found existing cadet regex blocks that match an ongoing search. | ||
508 | * | ||
509 | * @param cls Closure (current context)- | ||
510 | * @param key Current key code (key for cached block). | ||
511 | * @param value Value in the hash map (cached RegexBlock). | ||
512 | * @return #GNUNET_YES: we should always continue to iterate. | ||
513 | */ | ||
514 | static int | ||
515 | regex_result_iterator (void *cls, | ||
516 | const struct GNUNET_HashCode *key, | ||
517 | void *value) | ||
518 | { | ||
519 | struct Result *result = value; | ||
520 | const struct RegexBlock *block = result->data; | ||
521 | struct RegexSearchContext *ctx = cls; | ||
522 | |||
523 | if ((GNUNET_YES == | ||
524 | GNUNET_BLOCK_is_accepting (block, result->size)) && | ||
525 | (ctx->position == strlen (ctx->info->description))) | ||
526 | { | ||
527 | LOG (GNUNET_ERROR_TYPE_INFO, | ||
528 | "Found accepting known block\n"); | ||
529 | regex_find_path (key, ctx); | ||
530 | return GNUNET_YES; // We found an accept state! | ||
531 | } | ||
532 | LOG (GNUNET_ERROR_TYPE_DEBUG, | ||
533 | "* %llu, %llu, [%u]\n", | ||
534 | (unsigned long long) ctx->position, | ||
535 | (unsigned long long) strlen (ctx->info->description), | ||
536 | GNUNET_BLOCK_is_accepting (block, | ||
537 | result->size)); | ||
538 | regex_next_edge (block, result->size, ctx); | ||
539 | |||
540 | GNUNET_STATISTICS_update (ctx->info->stats, | ||
541 | "# regex cadet blocks iterated", | ||
542 | 1, GNUNET_NO); | ||
543 | |||
544 | return GNUNET_YES; | ||
545 | } | ||
546 | |||
547 | |||
548 | /** | ||
549 | * Iterator over edges in a regex block retrieved from the DHT. | ||
550 | * | ||
551 | * @param cls Closure (context of the search). | ||
552 | * @param token Token that follows to next state. | ||
553 | * @param len Length of token. | ||
554 | * @param key Hash of next state. | ||
555 | * @return #GNUNET_YES if should keep iterating, #GNUNET_NO otherwise. | ||
556 | */ | ||
557 | static int | ||
558 | regex_edge_iterator (void *cls, | ||
559 | const char *token, | ||
560 | size_t len, | ||
561 | const struct GNUNET_HashCode *key) | ||
562 | { | ||
563 | struct RegexSearchContext *ctx = cls; | ||
564 | struct REGEX_INTERNAL_Search *info = ctx->info; | ||
565 | const char *current; | ||
566 | size_t current_len; | ||
567 | |||
568 | GNUNET_STATISTICS_update (info->stats, "# regex edges iterated", | ||
569 | 1, GNUNET_NO); | ||
570 | current = &info->description[ctx->position]; | ||
571 | current_len = strlen (info->description) - ctx->position; | ||
572 | if (len > current_len) | ||
573 | { | ||
574 | LOG (GNUNET_ERROR_TYPE_DEBUG, "Token too long, END\n"); | ||
575 | return GNUNET_YES; | ||
576 | } | ||
577 | if (0 != strncmp (current, token, len)) | ||
578 | { | ||
579 | LOG (GNUNET_ERROR_TYPE_DEBUG, "Token doesn't match, END\n"); | ||
580 | return GNUNET_YES; | ||
581 | } | ||
582 | |||
583 | if (len > ctx->longest_match) | ||
584 | { | ||
585 | LOG (GNUNET_ERROR_TYPE_DEBUG, "Token is longer, KEEP\n"); | ||
586 | ctx->longest_match = len; | ||
587 | ctx->hash = *key; | ||
588 | } | ||
589 | else | ||
590 | { | ||
591 | LOG (GNUNET_ERROR_TYPE_DEBUG, "Token is not longer, IGNORE\n"); | ||
592 | } | ||
593 | |||
594 | LOG (GNUNET_ERROR_TYPE_DEBUG, "* End of regex edge iterator\n"); | ||
595 | return GNUNET_YES; | ||
596 | } | ||
597 | |||
598 | |||
599 | /** | ||
600 | * Jump to the next edge, with the longest matching token. | ||
601 | * | ||
602 | * @param block Block found in the DHT. | ||
603 | * @param size Size of the block. | ||
604 | * @param ctx Context of the search. | ||
605 | */ | ||
606 | static void | ||
607 | regex_next_edge (const struct RegexBlock *block, | ||
608 | size_t size, | ||
609 | struct RegexSearchContext *ctx) | ||
610 | { | ||
611 | struct RegexSearchContext *new_ctx; | ||
612 | struct REGEX_INTERNAL_Search *info = ctx->info; | ||
613 | struct GNUNET_DHT_GetHandle *get_h; | ||
614 | struct GNUNET_HashCode *hash; | ||
615 | const char *rest; | ||
616 | int result; | ||
617 | |||
618 | LOG (GNUNET_ERROR_TYPE_DEBUG, "Next edge\n"); | ||
619 | /* Find the longest match for the current string position, | ||
620 | * among tokens in the given block */ | ||
621 | ctx->longest_match = 0; | ||
622 | result = REGEX_BLOCK_iterate (block, size, | ||
623 | ®ex_edge_iterator, ctx); | ||
624 | GNUNET_break (GNUNET_OK == result); | ||
625 | |||
626 | /* Did anything match? */ | ||
627 | if (0 == ctx->longest_match) | ||
628 | { | ||
629 | LOG (GNUNET_ERROR_TYPE_DEBUG, | ||
630 | "no match in block\n"); | ||
631 | return; | ||
632 | } | ||
633 | |||
634 | hash = &ctx->hash; | ||
635 | new_ctx = GNUNET_new (struct RegexSearchContext); | ||
636 | new_ctx->info = info; | ||
637 | new_ctx->position = ctx->position + ctx->longest_match; | ||
638 | GNUNET_array_append (info->contexts, info->n_contexts, new_ctx); | ||
639 | |||
640 | /* Check whether we already have a DHT GET running for it */ | ||
641 | if (GNUNET_YES == | ||
642 | GNUNET_CONTAINER_multihashmap_contains (info->dht_get_handles, hash)) | ||
643 | { | ||
644 | LOG (GNUNET_ERROR_TYPE_DEBUG, | ||
645 | "GET for %s running, END\n", | ||
646 | GNUNET_h2s (hash)); | ||
647 | GNUNET_CONTAINER_multihashmap_get_multiple (info->dht_get_results, | ||
648 | hash, | ||
649 | ®ex_result_iterator, | ||
650 | new_ctx); | ||
651 | return; /* We are already looking for it */ | ||
652 | } | ||
653 | |||
654 | GNUNET_STATISTICS_update (info->stats, "# regex nodes traversed", | ||
655 | 1, GNUNET_NO); | ||
656 | |||
657 | LOG (GNUNET_ERROR_TYPE_DEBUG, | ||
658 | "Following edges at %s for offset %u in `%s'\n", | ||
659 | GNUNET_h2s (hash), | ||
660 | (unsigned int) ctx->position, | ||
661 | info->description); | ||
662 | rest = &new_ctx->info->description[new_ctx->position]; | ||
663 | get_h = | ||
664 | GNUNET_DHT_get_start (info->dht, /* handle */ | ||
665 | GNUNET_BLOCK_TYPE_REGEX, /* type */ | ||
666 | hash, /* key to search */ | ||
667 | DHT_REPLICATION, /* replication level */ | ||
668 | DHT_OPT, | ||
669 | rest, /* xquery */ | ||
670 | strlen (rest) + 1, /* xquery bits */ | ||
671 | &dht_get_string_handler, new_ctx); | ||
672 | if (GNUNET_OK != | ||
673 | GNUNET_CONTAINER_multihashmap_put (info->dht_get_handles, | ||
674 | hash, | ||
675 | get_h, | ||
676 | GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_FAST)) | ||
677 | { | ||
678 | GNUNET_break (0); | ||
679 | return; | ||
680 | } | ||
681 | } | ||
682 | |||
683 | |||
684 | /** | ||
685 | * Search for a peer offering a regex matching certain string in the DHT. | ||
686 | * The search runs until #REGEX_INTERNAL_search_cancel() is called, even if results | ||
687 | * are returned. | ||
688 | * | ||
689 | * @param dht An existing and valid DHT service handle. | ||
690 | * @param string String to match against the regexes in the DHT. | ||
691 | * @param callback Callback for found peers. | ||
692 | * @param callback_cls Closure for @c callback. | ||
693 | * @param stats Optional statistics handle to report usage. Can be NULL. | ||
694 | * @return Handle to stop search and free resources. | ||
695 | * Must be freed by calling #REGEX_INTERNAL_search_cancel(). | ||
696 | */ | ||
697 | struct REGEX_INTERNAL_Search * | ||
698 | REGEX_INTERNAL_search (struct GNUNET_DHT_Handle *dht, | ||
699 | const char *string, | ||
700 | REGEX_INTERNAL_Found callback, | ||
701 | void *callback_cls, | ||
702 | struct GNUNET_STATISTICS_Handle *stats) | ||
703 | { | ||
704 | struct REGEX_INTERNAL_Search *h; | ||
705 | struct GNUNET_DHT_GetHandle *get_h; | ||
706 | struct RegexSearchContext *ctx; | ||
707 | struct GNUNET_HashCode key; | ||
708 | size_t size; | ||
709 | size_t len; | ||
710 | |||
711 | /* Initialize handle */ | ||
712 | GNUNET_assert (NULL != dht); | ||
713 | GNUNET_assert (NULL != callback); | ||
714 | h = GNUNET_new (struct REGEX_INTERNAL_Search); | ||
715 | h->dht = dht; | ||
716 | h->description = GNUNET_strdup (string); | ||
717 | h->callback = callback; | ||
718 | h->callback_cls = callback_cls; | ||
719 | h->stats = stats; | ||
720 | h->dht_get_handles = GNUNET_CONTAINER_multihashmap_create (32, GNUNET_NO); | ||
721 | h->dht_get_results = GNUNET_CONTAINER_multihashmap_create (32, GNUNET_NO); | ||
722 | |||
723 | /* Initialize context */ | ||
724 | len = strlen (string); | ||
725 | size = REGEX_INTERNAL_get_first_key (string, len, &key); | ||
726 | LOG (GNUNET_ERROR_TYPE_INFO, | ||
727 | "Initial key for `%s' is %s (based on `%.*s')\n", | ||
728 | string, | ||
729 | GNUNET_h2s (&key), | ||
730 | (int) size, | ||
731 | string); | ||
732 | ctx = GNUNET_new (struct RegexSearchContext); | ||
733 | ctx->position = size; | ||
734 | ctx->info = h; | ||
735 | GNUNET_array_append (h->contexts, | ||
736 | h->n_contexts, | ||
737 | ctx); | ||
738 | /* Start search in DHT */ | ||
739 | get_h = GNUNET_DHT_get_start (h->dht, /* handle */ | ||
740 | GNUNET_BLOCK_TYPE_REGEX, /* type */ | ||
741 | &key, /* key to search */ | ||
742 | DHT_REPLICATION, /* replication level */ | ||
743 | DHT_OPT, | ||
744 | &h->description[size], /* xquery */ | ||
745 | // FIXME add BLOOMFILTER to exclude filtered peers | ||
746 | len + 1 - size, /* xquery bits */ | ||
747 | // FIXME add BLOOMFILTER SIZE | ||
748 | &dht_get_string_handler, ctx); | ||
749 | GNUNET_break ( | ||
750 | GNUNET_OK == | ||
751 | GNUNET_CONTAINER_multihashmap_put (h->dht_get_handles, | ||
752 | &key, | ||
753 | get_h, | ||
754 | GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_FAST) | ||
755 | ); | ||
756 | |||
757 | return h; | ||
758 | } | ||
759 | |||
760 | |||
761 | /** | ||
762 | * Iterator over hash map entries to cancel DHT GET requests after a | ||
763 | * successful connect_by_string. | ||
764 | * | ||
765 | * @param cls Closure (unused). | ||
766 | * @param key Current key code (unused). | ||
767 | * @param value Value in the hash map (get handle). | ||
768 | * @return #GNUNET_YES if we should continue to iterate, | ||
769 | * #GNUNET_NO if not. | ||
770 | */ | ||
771 | static int | ||
772 | regex_cancel_dht_get (void *cls, | ||
773 | const struct GNUNET_HashCode *key, | ||
774 | void *value) | ||
775 | { | ||
776 | struct GNUNET_DHT_GetHandle *h = value; | ||
777 | |||
778 | GNUNET_DHT_get_stop (h); | ||
779 | return GNUNET_YES; | ||
780 | } | ||
781 | |||
782 | |||
783 | /** | ||
784 | * Iterator over hash map entries to free CadetRegexBlocks stored during the | ||
785 | * search for connect_by_string. | ||
786 | * | ||
787 | * @param cls Closure (unused). | ||
788 | * @param key Current key code (unused). | ||
789 | * @param value CadetRegexBlock in the hash map. | ||
790 | * @return #GNUNET_YES if we should continue to iterate, | ||
791 | * #GNUNET_NO if not. | ||
792 | */ | ||
793 | static int | ||
794 | regex_free_result (void *cls, | ||
795 | const struct GNUNET_HashCode *key, | ||
796 | void *value) | ||
797 | { | ||
798 | GNUNET_free (value); | ||
799 | return GNUNET_YES; | ||
800 | } | ||
801 | |||
802 | |||
803 | /** | ||
804 | * Cancel an ongoing regex search in the DHT and free all resources. | ||
805 | * | ||
806 | * @param h the search context. | ||
807 | */ | ||
808 | void | ||
809 | REGEX_INTERNAL_search_cancel (struct REGEX_INTERNAL_Search *h) | ||
810 | { | ||
811 | unsigned int i; | ||
812 | |||
813 | GNUNET_free (h->description); | ||
814 | GNUNET_CONTAINER_multihashmap_iterate (h->dht_get_handles, | ||
815 | ®ex_cancel_dht_get, NULL); | ||
816 | GNUNET_CONTAINER_multihashmap_iterate (h->dht_get_results, | ||
817 | ®ex_free_result, NULL); | ||
818 | GNUNET_CONTAINER_multihashmap_destroy (h->dht_get_results); | ||
819 | GNUNET_CONTAINER_multihashmap_destroy (h->dht_get_handles); | ||
820 | if (0 < h->n_contexts) | ||
821 | { | ||
822 | for (i = 0; i < h->n_contexts; i++) | ||
823 | GNUNET_free (h->contexts[i]); | ||
824 | GNUNET_free (h->contexts); | ||
825 | } | ||
826 | GNUNET_free (h); | ||
827 | } | ||
828 | |||
829 | |||
830 | /* end of regex_internal_dht.c */ | ||
diff --git a/src/service/regex/regex_internal_lib.h b/src/service/regex/regex_internal_lib.h new file mode 100644 index 000000000..bfa3fc97d --- /dev/null +++ b/src/service/regex/regex_internal_lib.h | |||
@@ -0,0 +1,268 @@ | |||
1 | /* | ||
2 | This file is part of GNUnet | ||
3 | Copyright (C) 2012, 2013 GNUnet e.V. | ||
4 | |||
5 | GNUnet is free software: you can redistribute it and/or modify it | ||
6 | under the terms of the GNU Affero General Public License as published | ||
7 | by the Free Software Foundation, either version 3 of the License, | ||
8 | or (at your option) any later version. | ||
9 | |||
10 | GNUnet is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | Affero General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU Affero General Public License | ||
16 | along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | |||
18 | SPDX-License-Identifier: AGPL3.0-or-later | ||
19 | */ | ||
20 | /** | ||
21 | * @file regex/regex_internal_lib.h | ||
22 | * @brief library to parse regular expressions into dfa | ||
23 | * @author Maximilian Szengel | ||
24 | */ | ||
25 | |||
26 | #ifndef REGEX_INTERNAL_LIB_H | ||
27 | #define REGEX_INTERNAL_LIB_H | ||
28 | |||
29 | #include "gnunet_util_lib.h" | ||
30 | #include "gnunet_dht_service.h" | ||
31 | #include "gnunet_statistics_service.h" | ||
32 | #include "../../plugin/regex/regex_block_lib.h" | ||
33 | |||
34 | #ifdef __cplusplus | ||
35 | extern "C" | ||
36 | { | ||
37 | #if 0 /* keep Emacsens' auto-indent happy */ | ||
38 | } | ||
39 | #endif | ||
40 | #endif | ||
41 | |||
42 | |||
43 | /** | ||
44 | * Automaton (NFA/DFA) representation. | ||
45 | */ | ||
46 | struct REGEX_INTERNAL_Automaton; | ||
47 | |||
48 | |||
49 | /** | ||
50 | * Construct DFA for the given 'regex' of length 'len'. | ||
51 | * | ||
52 | * Path compression means, that for example a DFA o -> a -> b -> c -> o will be | ||
53 | * compressed to o -> abc -> o. Note that this parameter influences the | ||
54 | * non-determinism of states of the resulting NFA in the DHT (number of outgoing | ||
55 | * edges with the same label). For example for an application that stores IPv4 | ||
56 | * addresses as bitstrings it could make sense to limit the path compression to | ||
57 | * 4 or 8. | ||
58 | * | ||
59 | * @param regex regular expression string. | ||
60 | * @param len length of the regular expression. | ||
61 | * @param max_path_len limit the path compression length to the | ||
62 | * given value. If set to 1, no path compression is applied. Set to 0 for | ||
63 | * maximal possible path compression (generally not desirable). | ||
64 | * @return DFA, needs to be freed using #REGEX_INTERNAL_automaton_destroy(). | ||
65 | */ | ||
66 | struct REGEX_INTERNAL_Automaton * | ||
67 | REGEX_INTERNAL_construct_dfa (const char *regex, | ||
68 | const size_t len, | ||
69 | unsigned int max_path_len); | ||
70 | |||
71 | |||
72 | /** | ||
73 | * Free the memory allocated by constructing the REGEX_INTERNAL_Automaton. | ||
74 | * data structure. | ||
75 | * | ||
76 | * @param a automaton to be destroyed. | ||
77 | */ | ||
78 | void | ||
79 | REGEX_INTERNAL_automaton_destroy (struct REGEX_INTERNAL_Automaton *a); | ||
80 | |||
81 | |||
82 | /** | ||
83 | * Evaluates the given 'string' against the given compiled regex. | ||
84 | * | ||
85 | * @param a automaton. | ||
86 | * @param string string to check. | ||
87 | * | ||
88 | * @return 0 if string matches, non 0 otherwise. | ||
89 | */ | ||
90 | int | ||
91 | REGEX_INTERNAL_eval (struct REGEX_INTERNAL_Automaton *a, | ||
92 | const char *string); | ||
93 | |||
94 | |||
95 | /** | ||
96 | * Get the first key for the given @a input_string. This hashes | ||
97 | * the first x bits of the @a input_string. | ||
98 | * | ||
99 | * @param input_string string. | ||
100 | * @param string_len length of the @a input_string. | ||
101 | * @param key pointer to where to write the hash code. | ||
102 | * @return number of bits of @a input_string that have been consumed | ||
103 | * to construct the key | ||
104 | */ | ||
105 | size_t | ||
106 | REGEX_INTERNAL_get_first_key (const char *input_string, | ||
107 | size_t string_len, | ||
108 | struct GNUNET_HashCode *key); | ||
109 | |||
110 | |||
111 | /** | ||
112 | * Iterator callback function. | ||
113 | * | ||
114 | * @param cls closure. | ||
115 | * @param key hash for current state. | ||
116 | * @param proof proof for current state | ||
117 | * @param accepting #GNUNET_YES if this is an accepting state, #GNUNET_NO if not. | ||
118 | * @param num_edges number of edges leaving current state. | ||
119 | * @param edges edges leaving current state. | ||
120 | */ | ||
121 | typedef void | ||
122 | (*REGEX_INTERNAL_KeyIterator)(void *cls, | ||
123 | const struct GNUNET_HashCode *key, | ||
124 | const char *proof, | ||
125 | int accepting, | ||
126 | unsigned int num_edges, | ||
127 | const struct REGEX_BLOCK_Edge *edges); | ||
128 | |||
129 | |||
130 | /** | ||
131 | * Iterate over all edges starting from start state of automaton 'a'. Calling | ||
132 | * iterator for each edge. | ||
133 | * | ||
134 | * @param a automaton. | ||
135 | * @param iterator iterator called for each edge. | ||
136 | * @param iterator_cls closure. | ||
137 | */ | ||
138 | void | ||
139 | REGEX_INTERNAL_iterate_all_edges (struct REGEX_INTERNAL_Automaton *a, | ||
140 | REGEX_INTERNAL_KeyIterator iterator, | ||
141 | void *iterator_cls); | ||
142 | |||
143 | |||
144 | /** | ||
145 | * Iterate over all edges of automaton 'a' that are reachable from a state with | ||
146 | * a proof of at least #GNUNET_REGEX_INITIAL_BYTES characters. | ||
147 | * | ||
148 | * Call the iterator for each such edge. | ||
149 | * | ||
150 | * @param a automaton. | ||
151 | * @param iterator iterator called for each reachable edge. | ||
152 | * @param iterator_cls closure. | ||
153 | */ | ||
154 | void | ||
155 | REGEX_INTERNAL_iterate_reachable_edges (struct REGEX_INTERNAL_Automaton *a, | ||
156 | REGEX_INTERNAL_KeyIterator iterator, | ||
157 | void *iterator_cls); | ||
158 | |||
159 | |||
160 | /** | ||
161 | * Handle to store cached data about a regex announce. | ||
162 | */ | ||
163 | struct REGEX_INTERNAL_Announcement; | ||
164 | |||
165 | /** | ||
166 | * Handle to store data about a regex search. | ||
167 | */ | ||
168 | struct REGEX_INTERNAL_Search; | ||
169 | |||
170 | |||
171 | /** | ||
172 | * Announce a regular expression: put all states of the automaton in the DHT. | ||
173 | * Does not free resources, must call #REGEX_INTERNAL_announce_cancel() for that. | ||
174 | * | ||
175 | * @param dht An existing and valid DHT service handle. CANNOT be NULL. | ||
176 | * @param priv our private key, must remain valid until the announcement is cancelled | ||
177 | * @param regex Regular expression to announce. | ||
178 | * @param compression How many characters per edge can we squeeze? | ||
179 | * @param stats Optional statistics handle to report usage. Can be NULL. | ||
180 | * @return Handle to reuse o free cached resources. | ||
181 | * Must be freed by calling #REGEX_INTERNAL_announce_cancel(). | ||
182 | */ | ||
183 | struct REGEX_INTERNAL_Announcement * | ||
184 | REGEX_INTERNAL_announce (struct GNUNET_DHT_Handle *dht, | ||
185 | const struct GNUNET_CRYPTO_EddsaPrivateKey *priv, | ||
186 | const char *regex, | ||
187 | uint16_t compression, | ||
188 | struct GNUNET_STATISTICS_Handle *stats); | ||
189 | |||
190 | |||
191 | /** | ||
192 | * Announce again a regular expression previously announced. | ||
193 | * Does use caching to speed up process. | ||
194 | * | ||
195 | * @param h Handle returned by a previous #REGEX_INTERNAL_announce() call. | ||
196 | */ | ||
197 | void | ||
198 | REGEX_INTERNAL_reannounce (struct REGEX_INTERNAL_Announcement *h); | ||
199 | |||
200 | |||
201 | /** | ||
202 | * Clear all cached data used by a regex announce. | ||
203 | * Does not close DHT connection. | ||
204 | * | ||
205 | * @param h Handle returned by a previous #REGEX_INTERNAL_announce() call. | ||
206 | */ | ||
207 | void | ||
208 | REGEX_INTERNAL_announce_cancel (struct REGEX_INTERNAL_Announcement *h); | ||
209 | |||
210 | |||
211 | /** | ||
212 | * Search callback function. | ||
213 | * | ||
214 | * @param cls Closure provided in #REGEX_INTERNAL_search(). | ||
215 | * @param id Peer providing a regex that matches the string. | ||
216 | * @param get_path Path of the get request. | ||
217 | * @param get_path_length Length of @a get_path. | ||
218 | * @param put_path Path of the put request. | ||
219 | * @param put_path_length Length of the @a put_path. | ||
220 | */ | ||
221 | typedef void | ||
222 | (*REGEX_INTERNAL_Found)(void *cls, | ||
223 | const struct GNUNET_PeerIdentity *id, | ||
224 | const struct GNUNET_DHT_PathElement *get_path, | ||
225 | unsigned int get_path_length, | ||
226 | const struct GNUNET_DHT_PathElement *put_path, | ||
227 | unsigned int put_path_length); | ||
228 | |||
229 | |||
230 | /** | ||
231 | * Search for a peer offering a regex matching certain string in the DHT. | ||
232 | * The search runs until #REGEX_INTERNAL_search_cancel() is called, even if results | ||
233 | * are returned. | ||
234 | * | ||
235 | * @param dht An existing and valid DHT service handle. | ||
236 | * @param string String to match against the regexes in the DHT. | ||
237 | * @param callback Callback for found peers. | ||
238 | * @param callback_cls Closure for @c callback. | ||
239 | * @param stats Optional statistics handle to report usage. Can be NULL. | ||
240 | * @return Handle to stop search and free resources. | ||
241 | * Must be freed by calling #REGEX_INTERNAL_search_cancel(). | ||
242 | */ | ||
243 | struct REGEX_INTERNAL_Search * | ||
244 | REGEX_INTERNAL_search (struct GNUNET_DHT_Handle *dht, | ||
245 | const char *string, | ||
246 | REGEX_INTERNAL_Found callback, | ||
247 | void *callback_cls, | ||
248 | struct GNUNET_STATISTICS_Handle *stats); | ||
249 | |||
250 | /** | ||
251 | * Stop search and free all data used by a #REGEX_INTERNAL_search() call. | ||
252 | * Does not close DHT connection. | ||
253 | * | ||
254 | * @param h Handle returned by a previous #REGEX_INTERNAL_search() call. | ||
255 | */ | ||
256 | void | ||
257 | REGEX_INTERNAL_search_cancel (struct REGEX_INTERNAL_Search *h); | ||
258 | |||
259 | |||
260 | #if 0 /* keep Emacsens' auto-indent happy */ | ||
261 | { | ||
262 | #endif | ||
263 | #ifdef __cplusplus | ||
264 | } | ||
265 | #endif | ||
266 | |||
267 | /* end of regex_internal_lib.h */ | ||
268 | #endif | ||
diff --git a/src/service/regex/regex_ipc.h b/src/service/regex/regex_ipc.h new file mode 100644 index 000000000..b5a474d56 --- /dev/null +++ b/src/service/regex/regex_ipc.h | |||
@@ -0,0 +1,104 @@ | |||
1 | /* | ||
2 | This file is part of GNUnet | ||
3 | Copyright (C) 2012, 2013 GNUnet e.V. | ||
4 | |||
5 | GNUnet is free software: you can redistribute it and/or modify it | ||
6 | under the terms of the GNU Affero General Public License as published | ||
7 | by the Free Software Foundation, either version 3 of the License, | ||
8 | or (at your option) any later version. | ||
9 | |||
10 | GNUnet is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | Affero General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU Affero General Public License | ||
16 | along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | |||
18 | SPDX-License-Identifier: AGPL3.0-or-later | ||
19 | */ | ||
20 | /** | ||
21 | * @file regex/regex_ipc.h | ||
22 | * @brief regex IPC messages (not called 'regex.h' due to conflict with | ||
23 | * system headers) | ||
24 | * @author Christian Grothoff | ||
25 | */ | ||
26 | #ifndef REGEX_IPC_H | ||
27 | #define REGEX_IPC_H | ||
28 | |||
29 | #include "gnunet_util_lib.h" | ||
30 | |||
31 | /** | ||
32 | * Request for regex service to announce capability. | ||
33 | */ | ||
34 | struct AnnounceMessage | ||
35 | { | ||
36 | /** | ||
37 | * Type is GNUNET_MESSAGE_TYPE_REGEX_ANNOUNCE | ||
38 | */ | ||
39 | struct GNUNET_MessageHeader header; | ||
40 | |||
41 | /** | ||
42 | * How many characters can we squeeze per edge? | ||
43 | */ | ||
44 | uint16_t compression; | ||
45 | |||
46 | /** | ||
47 | * Always zero. | ||
48 | */ | ||
49 | uint16_t reserved; | ||
50 | |||
51 | /** | ||
52 | * Delay between repeated announcements. | ||
53 | */ | ||
54 | struct GNUNET_TIME_RelativeNBO refresh_delay; | ||
55 | |||
56 | /* followed by 0-terminated regex as string */ | ||
57 | }; | ||
58 | |||
59 | |||
60 | /** | ||
61 | * Message to initiate regex search. | ||
62 | */ | ||
63 | struct RegexSearchMessage | ||
64 | { | ||
65 | /** | ||
66 | * Type is GNUNET_MESSAGE_TYPE_REGEX_SEARCH | ||
67 | */ | ||
68 | struct GNUNET_MessageHeader header; | ||
69 | |||
70 | /* followed by 0-terminated search string */ | ||
71 | }; | ||
72 | |||
73 | |||
74 | /** | ||
75 | * Result from regex search. | ||
76 | */ | ||
77 | struct ResultMessage | ||
78 | { | ||
79 | /** | ||
80 | * Type is GNUNET_MESSAGE_TYPE_REGEX_RESULT | ||
81 | */ | ||
82 | struct GNUNET_MessageHeader header; | ||
83 | |||
84 | /** | ||
85 | * Number of entries in the GET path. | ||
86 | */ | ||
87 | uint16_t get_path_length; | ||
88 | |||
89 | /** | ||
90 | * Number of entries in the PUT path. | ||
91 | */ | ||
92 | uint16_t put_path_length; | ||
93 | |||
94 | /** | ||
95 | * Identity of the peer that was found. | ||
96 | */ | ||
97 | struct GNUNET_PeerIdentity id; | ||
98 | |||
99 | /* followed by GET path and PUT path arrays */ | ||
100 | }; | ||
101 | |||
102 | |||
103 | /* end of regex_ipc.h */ | ||
104 | #endif | ||
diff --git a/src/service/regex/regex_test_graph.c b/src/service/regex/regex_test_graph.c new file mode 100644 index 000000000..c8efae772 --- /dev/null +++ b/src/service/regex/regex_test_graph.c | |||
@@ -0,0 +1,317 @@ | |||
1 | /* | ||
2 | This file is part of GNUnet | ||
3 | Copyright (C) 2012 GNUnet e.V. | ||
4 | |||
5 | GNUnet is free software: you can redistribute it and/or modify it | ||
6 | under the terms of the GNU Affero General Public License as published | ||
7 | by the Free Software Foundation, either version 3 of the License, | ||
8 | or (at your option) any later version. | ||
9 | |||
10 | GNUnet is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | Affero General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU Affero General Public License | ||
16 | along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | |||
18 | SPDX-License-Identifier: AGPL3.0-or-later | ||
19 | */ | ||
20 | /** | ||
21 | * @file src/regex/regex_test_graph.c | ||
22 | * @brief functions for creating .dot graphs from regexes | ||
23 | * @author Maximilian Szengel | ||
24 | */ | ||
25 | #include "platform.h" | ||
26 | #include "regex_internal_lib.h" | ||
27 | #include "regex_test_lib.h" | ||
28 | #include "regex_internal.h" | ||
29 | |||
30 | /** | ||
31 | * Context for graph creation. Passed as the cls to | ||
32 | * REGEX_TEST_automaton_save_graph_step. | ||
33 | */ | ||
34 | struct REGEX_TEST_Graph_Context | ||
35 | { | ||
36 | /** | ||
37 | * File pointer to the dot file used for output. | ||
38 | */ | ||
39 | FILE *filep; | ||
40 | |||
41 | /** | ||
42 | * Verbose flag, if it's set to GNUNET_YES additional info will be printed in | ||
43 | * the graph. | ||
44 | */ | ||
45 | int verbose; | ||
46 | |||
47 | /** | ||
48 | * Coloring flag, if set to GNUNET_YES SCCs will be colored. | ||
49 | */ | ||
50 | int coloring; | ||
51 | }; | ||
52 | |||
53 | |||
54 | /** | ||
55 | * Recursive function doing DFS with 'v' as a start, detecting all SCCs inside | ||
56 | * the subgraph reachable from 'v'. Used with scc_tarjan function to detect all | ||
57 | * SCCs inside an automaton. | ||
58 | * | ||
59 | * @param scc_counter counter for numbering the sccs | ||
60 | * @param v start vertex | ||
61 | * @param index current index | ||
62 | * @param stack stack for saving all SCCs | ||
63 | * @param stack_size current size of the stack | ||
64 | */ | ||
65 | static void | ||
66 | scc_tarjan_strongconnect (unsigned int *scc_counter, | ||
67 | struct REGEX_INTERNAL_State *v, unsigned int *index, | ||
68 | struct REGEX_INTERNAL_State **stack, | ||
69 | unsigned int *stack_size) | ||
70 | { | ||
71 | struct REGEX_INTERNAL_State *w; | ||
72 | struct REGEX_INTERNAL_Transition *t; | ||
73 | |||
74 | v->index = *index; | ||
75 | v->lowlink = *index; | ||
76 | (*index)++; | ||
77 | stack[(*stack_size)++] = v; | ||
78 | v->contained = 1; | ||
79 | |||
80 | for (t = v->transitions_head; NULL != t; t = t->next) | ||
81 | { | ||
82 | w = t->to_state; | ||
83 | |||
84 | if (NULL == w) | ||
85 | continue; | ||
86 | |||
87 | if (w->index < 0) | ||
88 | { | ||
89 | scc_tarjan_strongconnect (scc_counter, w, index, stack, stack_size); | ||
90 | v->lowlink = (v->lowlink > w->lowlink) ? w->lowlink : v->lowlink; | ||
91 | } | ||
92 | else if (1 == w->contained) | ||
93 | v->lowlink = (v->lowlink > w->index) ? w->index : v->lowlink; | ||
94 | } | ||
95 | |||
96 | if (v->lowlink == v->index) | ||
97 | { | ||
98 | (*scc_counter)++; | ||
99 | do | ||
100 | { | ||
101 | w = stack[--(*stack_size)]; | ||
102 | w->contained = 0; | ||
103 | w->scc_id = *scc_counter; | ||
104 | } | ||
105 | while (w != v); | ||
106 | } | ||
107 | } | ||
108 | |||
109 | |||
110 | /** | ||
111 | * Detect all SCCs (Strongly Connected Components) inside the given automaton. | ||
112 | * SCCs will be marked using the scc_id on each state. | ||
113 | * | ||
114 | * @param a the automaton for which SCCs should be computed and assigned. | ||
115 | */ | ||
116 | static void | ||
117 | scc_tarjan (struct REGEX_INTERNAL_Automaton *a) | ||
118 | { | ||
119 | unsigned int index; | ||
120 | unsigned int scc_counter; | ||
121 | struct REGEX_INTERNAL_State *v; | ||
122 | struct REGEX_INTERNAL_State *stack[a->state_count]; | ||
123 | unsigned int stack_size; | ||
124 | |||
125 | for (v = a->states_head; NULL != v; v = v->next) | ||
126 | { | ||
127 | v->contained = 0; | ||
128 | v->index = -1; | ||
129 | v->lowlink = -1; | ||
130 | } | ||
131 | |||
132 | stack_size = 0; | ||
133 | index = 0; | ||
134 | scc_counter = 0; | ||
135 | |||
136 | for (v = a->states_head; NULL != v; v = v->next) | ||
137 | { | ||
138 | if (v->index < 0) | ||
139 | scc_tarjan_strongconnect (&scc_counter, v, &index, stack, &stack_size); | ||
140 | } | ||
141 | } | ||
142 | |||
143 | |||
144 | /** | ||
145 | * Save a state to an open file pointer. cls is expected to be a file pointer to | ||
146 | * an open file. Used only in conjunction with | ||
147 | * REGEX_TEST_automaton_save_graph. | ||
148 | * | ||
149 | * @param cls file pointer. | ||
150 | * @param count current count of the state, not used. | ||
151 | * @param s state. | ||
152 | */ | ||
153 | void | ||
154 | REGEX_TEST_automaton_save_graph_step (void *cls, unsigned int count, | ||
155 | struct REGEX_INTERNAL_State *s) | ||
156 | { | ||
157 | struct REGEX_TEST_Graph_Context *ctx = cls; | ||
158 | struct REGEX_INTERNAL_Transition *ctran; | ||
159 | char *s_acc = NULL; | ||
160 | char *s_tran = NULL; | ||
161 | char *name; | ||
162 | char *to_name; | ||
163 | |||
164 | if (GNUNET_YES == ctx->verbose) | ||
165 | GNUNET_asprintf (&name, "%i (%s) (%s) (%s)", s->dfs_id, s->name, s->proof, | ||
166 | GNUNET_h2s (&s->hash)); | ||
167 | else | ||
168 | GNUNET_asprintf (&name, "%i", s->dfs_id); | ||
169 | |||
170 | if (s->accepting) | ||
171 | { | ||
172 | if (GNUNET_YES == ctx->coloring) | ||
173 | { | ||
174 | GNUNET_asprintf (&s_acc, | ||
175 | "\"%s\" [shape=doublecircle, color=\"0.%i 0.8 0.95\"];\n", | ||
176 | name, s->scc_id * s->scc_id); | ||
177 | } | ||
178 | else | ||
179 | { | ||
180 | GNUNET_asprintf (&s_acc, "\"%s\" [shape=doublecircle];\n", name); | ||
181 | } | ||
182 | } | ||
183 | else if (GNUNET_YES == ctx->coloring) | ||
184 | { | ||
185 | GNUNET_asprintf (&s_acc, | ||
186 | "\"%s\" [shape=circle, color=\"0.%i 0.8 0.95\"];\n", name, | ||
187 | s->scc_id * s->scc_id); | ||
188 | } | ||
189 | else | ||
190 | { | ||
191 | GNUNET_asprintf (&s_acc, "\"%s\" [shape=circle];\n", name); | ||
192 | } | ||
193 | |||
194 | GNUNET_assert (NULL != s_acc); | ||
195 | |||
196 | fwrite (s_acc, strlen (s_acc), 1, ctx->filep); | ||
197 | GNUNET_free (s_acc); | ||
198 | s_acc = NULL; | ||
199 | |||
200 | for (ctran = s->transitions_head; NULL != ctran; ctran = ctran->next) | ||
201 | { | ||
202 | if (NULL == ctran->to_state) | ||
203 | { | ||
204 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
205 | "Transition from State %i has no state for transitioning\n", | ||
206 | s->id); | ||
207 | continue; | ||
208 | } | ||
209 | |||
210 | if (GNUNET_YES == ctx->verbose) | ||
211 | { | ||
212 | GNUNET_asprintf (&to_name, "%i (%s) (%s) (%s)", ctran->to_state->dfs_id, | ||
213 | ctran->to_state->name, ctran->to_state->proof, | ||
214 | GNUNET_h2s (&ctran->to_state->hash)); | ||
215 | } | ||
216 | else | ||
217 | GNUNET_asprintf (&to_name, "%i", ctran->to_state->dfs_id); | ||
218 | |||
219 | if (NULL == ctran->label) | ||
220 | { | ||
221 | if (GNUNET_YES == ctx->coloring) | ||
222 | { | ||
223 | GNUNET_asprintf (&s_tran, | ||
224 | "\"%s\" -> \"%s\" [label = \"ε\", color=\"0.%i 0.8 0.95\"];\n", | ||
225 | name, to_name, s->scc_id * s->scc_id); | ||
226 | } | ||
227 | else | ||
228 | { | ||
229 | GNUNET_asprintf (&s_tran, "\"%s\" -> \"%s\" [label = \"ε\"];\n", name, | ||
230 | to_name); | ||
231 | } | ||
232 | } | ||
233 | else | ||
234 | { | ||
235 | if (GNUNET_YES == ctx->coloring) | ||
236 | { | ||
237 | GNUNET_asprintf (&s_tran, | ||
238 | "\"%s\" -> \"%s\" [label = \"%s\", color=\"0.%i 0.8 0.95\"];\n", | ||
239 | name, to_name, ctran->label, s->scc_id * s->scc_id); | ||
240 | } | ||
241 | else | ||
242 | { | ||
243 | GNUNET_asprintf (&s_tran, "\"%s\" -> \"%s\" [label = \"%s\"];\n", name, | ||
244 | to_name, ctran->label); | ||
245 | } | ||
246 | } | ||
247 | |||
248 | GNUNET_free (to_name); | ||
249 | |||
250 | GNUNET_assert (NULL != s_tran); | ||
251 | |||
252 | fwrite (s_tran, strlen (s_tran), 1, ctx->filep); | ||
253 | GNUNET_free (s_tran); | ||
254 | s_tran = NULL; | ||
255 | } | ||
256 | |||
257 | GNUNET_free (name); | ||
258 | } | ||
259 | |||
260 | |||
261 | /** | ||
262 | * Save the given automaton as a GraphViz dot file. | ||
263 | * | ||
264 | * @param a the automaton to be saved. | ||
265 | * @param filename where to save the file. | ||
266 | * @param options options for graph generation that include coloring or verbose | ||
267 | * mode | ||
268 | */ | ||
269 | void | ||
270 | REGEX_TEST_automaton_save_graph (struct REGEX_INTERNAL_Automaton *a, | ||
271 | const char *filename, | ||
272 | enum REGEX_TEST_GraphSavingOptions options) | ||
273 | { | ||
274 | char *start; | ||
275 | char *end; | ||
276 | struct REGEX_TEST_Graph_Context ctx; | ||
277 | |||
278 | if (NULL == a) | ||
279 | { | ||
280 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not print NFA, was NULL!"); | ||
281 | return; | ||
282 | } | ||
283 | |||
284 | if ((NULL == filename) || (strlen (filename) < 1)) | ||
285 | { | ||
286 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "No Filename given!"); | ||
287 | return; | ||
288 | } | ||
289 | |||
290 | ctx.filep = fopen (filename, "w"); | ||
291 | ctx.verbose = | ||
292 | (0 == (options & REGEX_TEST_GRAPH_VERBOSE)) ? GNUNET_NO : GNUNET_YES; | ||
293 | ctx.coloring = | ||
294 | (0 == (options & REGEX_TEST_GRAPH_COLORING)) ? GNUNET_NO : GNUNET_YES; | ||
295 | |||
296 | if (NULL == ctx.filep) | ||
297 | { | ||
298 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not open file for writing: %s", | ||
299 | filename); | ||
300 | return; | ||
301 | } | ||
302 | |||
303 | /* First add the SCCs to the automaton, so we can color them nicely */ | ||
304 | if (GNUNET_YES == ctx.coloring) | ||
305 | scc_tarjan (a); | ||
306 | |||
307 | start = "digraph G {\nrankdir=LR\n"; | ||
308 | fwrite (start, strlen (start), 1, ctx.filep); | ||
309 | |||
310 | REGEX_INTERNAL_automaton_traverse (a, a->start, NULL, NULL, | ||
311 | ®EX_TEST_automaton_save_graph_step, | ||
312 | &ctx); | ||
313 | |||
314 | end = "\n}\n"; | ||
315 | fwrite (end, strlen (end), 1, ctx.filep); | ||
316 | fclose (ctx.filep); | ||
317 | } | ||
diff --git a/src/service/regex/regex_test_lib.c b/src/service/regex/regex_test_lib.c new file mode 100644 index 000000000..2fece6bff --- /dev/null +++ b/src/service/regex/regex_test_lib.c | |||
@@ -0,0 +1,662 @@ | |||
1 | /* | ||
2 | * This file is part of GNUnet | ||
3 | * Copyright (C) 2012-2017 GNUnet e.V. | ||
4 | * | ||
5 | * GNUnet is free software: you can redistribute it and/or modify it | ||
6 | * under the terms of the GNU Affero General Public License as published | ||
7 | * by the Free Software Foundation, either version 3 of the License, | ||
8 | * or (at your option) any later version. | ||
9 | * | ||
10 | * GNUnet is distributed in the hope that it will be useful, but | ||
11 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | * Affero General Public License for more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU Affero General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | |||
18 | SPDX-License-Identifier: AGPL3.0-or-later | ||
19 | */ | ||
20 | /** | ||
21 | * @file src/regex/regex_test_lib.c | ||
22 | * @brief library to read regexes representing IP networks from a file. | ||
23 | * and simplyfinying the into one big regex, in order to run | ||
24 | * tests (regex performance, cadet profiler). | ||
25 | * @author Bartlomiej Polot | ||
26 | */ | ||
27 | |||
28 | #include "platform.h" | ||
29 | #include "gnunet_util_lib.h" | ||
30 | |||
31 | |||
32 | /** | ||
33 | * Struct to hold the tree formed by prefix-combining the regexes. | ||
34 | */ | ||
35 | struct RegexCombineCtx | ||
36 | { | ||
37 | /** | ||
38 | * Child nodes with same prefix and token. | ||
39 | */ | ||
40 | struct RegexCombineCtx **children; | ||
41 | |||
42 | /** | ||
43 | * Alphabet size (how many @a children there are) | ||
44 | */ | ||
45 | unsigned int size; | ||
46 | |||
47 | /** | ||
48 | * Token. | ||
49 | */ | ||
50 | char *s; | ||
51 | }; | ||
52 | |||
53 | |||
54 | /** | ||
55 | * Char 2 int | ||
56 | * | ||
57 | * Convert a character into its int value depending on the base used | ||
58 | * | ||
59 | * @param c Char | ||
60 | * @param size base (2, 8 or 16(hex)) | ||
61 | * | ||
62 | * @return Int in range [0, (base-1)] | ||
63 | */ | ||
64 | static int | ||
65 | c2i (char c, int size) | ||
66 | { | ||
67 | switch (size) | ||
68 | { | ||
69 | case 2: | ||
70 | case 8: | ||
71 | return c - '0'; | ||
72 | break; | ||
73 | |||
74 | case 16: | ||
75 | if ((c >= '0') && (c <= '9') ) | ||
76 | return c - '0'; | ||
77 | else if ((c >= 'A') && (c <= 'F') ) | ||
78 | return c - 'A' + 10; | ||
79 | else if ((c >= 'a') && (c <= 'f') ) | ||
80 | return c - 'a' + 10; | ||
81 | else | ||
82 | { | ||
83 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
84 | "Cannot convert char %c in base %u\n", | ||
85 | c, size); | ||
86 | GNUNET_assert (0); | ||
87 | } | ||
88 | break; | ||
89 | |||
90 | default: | ||
91 | GNUNET_assert (0); | ||
92 | } | ||
93 | } | ||
94 | |||
95 | |||
96 | #if DEBUG_REGEX | ||
97 | /** | ||
98 | * Printf spaces to indent the regex tree | ||
99 | * | ||
100 | * @param n Indentation level | ||
101 | */ | ||
102 | static void | ||
103 | space (int n) | ||
104 | { | ||
105 | for (int i = 0; i < n; i++) | ||
106 | fprintf (stderr, "| "); | ||
107 | } | ||
108 | |||
109 | |||
110 | #endif | ||
111 | |||
112 | |||
113 | /** | ||
114 | * Printf the combined regex ctx. | ||
115 | * | ||
116 | * @param ctx The ctx to printf | ||
117 | * @param level Indentation level to start with | ||
118 | */ | ||
119 | static void | ||
120 | debugctx (struct RegexCombineCtx *ctx, int level) | ||
121 | { | ||
122 | #if DEBUG_REGEX | ||
123 | if (NULL != ctx->s) | ||
124 | { | ||
125 | space (level - 1); | ||
126 | fprintf (stderr, "%u:'%s'\n", c2i (ctx->s[0], ctx->size), ctx->s); | ||
127 | } | ||
128 | else | ||
129 | fprintf (stderr, "ROOT (base %u)\n", ctx->size); | ||
130 | for (unsigned int i = 0; i < ctx->size; i++) | ||
131 | { | ||
132 | if (NULL != ctx->children[i]) | ||
133 | { | ||
134 | space (level); | ||
135 | debugctx (ctx->children[i], level + 1); | ||
136 | } | ||
137 | } | ||
138 | fflush (stderr); | ||
139 | #endif | ||
140 | } | ||
141 | |||
142 | |||
143 | /** | ||
144 | * Add a single regex to a context, combining with existing regex by-prefix. | ||
145 | * | ||
146 | * @param ctx Context with 0 or more regexes. | ||
147 | * @param regex Regex to add. | ||
148 | */ | ||
149 | static void | ||
150 | regex_add (struct RegexCombineCtx *ctx, | ||
151 | const char *regex); | ||
152 | |||
153 | |||
154 | /** | ||
155 | * Create and initialize a new RegexCombineCtx. | ||
156 | * | ||
157 | * @param alphabet_size Size of the alphabet (and the Trie array) | ||
158 | */ | ||
159 | static struct RegexCombineCtx * | ||
160 | new_regex_ctx (unsigned int alphabet_size) | ||
161 | { | ||
162 | struct RegexCombineCtx *ctx; | ||
163 | size_t array_size; | ||
164 | |||
165 | array_size = sizeof(struct RegexCombineCtx *) * alphabet_size; | ||
166 | ctx = GNUNET_new (struct RegexCombineCtx); | ||
167 | ctx->children = GNUNET_malloc (array_size); | ||
168 | ctx->size = alphabet_size; | ||
169 | |||
170 | return ctx; | ||
171 | } | ||
172 | |||
173 | |||
174 | static void | ||
175 | move_children (struct RegexCombineCtx *dst, | ||
176 | const struct RegexCombineCtx *src) | ||
177 | { | ||
178 | size_t array_size; | ||
179 | |||
180 | array_size = sizeof(struct RegexCombineCtx *) * src->size; | ||
181 | GNUNET_memcpy (dst->children, | ||
182 | src->children, | ||
183 | array_size); | ||
184 | for (unsigned int i = 0; i < src->size; i++) | ||
185 | { | ||
186 | src->children[i] = NULL; | ||
187 | } | ||
188 | } | ||
189 | |||
190 | |||
191 | /** | ||
192 | * Extract a string from all prefix-combined regexes. | ||
193 | * | ||
194 | * @param ctx Context with 0 or more regexes. | ||
195 | * | ||
196 | * @return Regex that matches any of the added regexes. | ||
197 | */ | ||
198 | static char * | ||
199 | regex_combine (struct RegexCombineCtx *ctx) | ||
200 | { | ||
201 | struct RegexCombineCtx *p; | ||
202 | unsigned int i; | ||
203 | size_t len; | ||
204 | char *regex; | ||
205 | char *tmp; | ||
206 | char *s; | ||
207 | int opt; | ||
208 | |||
209 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "new combine %s\n", ctx->s); | ||
210 | regex = GNUNET_strdup (""); | ||
211 | opt = GNUNET_NO; | ||
212 | for (i = 0; i < ctx->size; i++) | ||
213 | { | ||
214 | p = ctx->children[i]; | ||
215 | if (NULL == p) | ||
216 | continue; | ||
217 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, | ||
218 | "adding '%s' to innner %s\n", | ||
219 | p->s, ctx->s); | ||
220 | s = regex_combine (p); | ||
221 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " total '%s'\n", s); | ||
222 | if (strlen (s) == 0) | ||
223 | { | ||
224 | opt = GNUNET_YES; | ||
225 | } | ||
226 | else | ||
227 | { | ||
228 | GNUNET_asprintf (&tmp, "%s%s|", regex, s); | ||
229 | GNUNET_free (regex); | ||
230 | regex = tmp; | ||
231 | } | ||
232 | GNUNET_free (s); | ||
233 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " so far '%s' for inner %s\n", regex, | ||
234 | ctx->s); | ||
235 | } | ||
236 | |||
237 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "opt: %d, innner: '%s'\n", opt, regex); | ||
238 | len = strlen (regex); | ||
239 | if (0 == len) | ||
240 | { | ||
241 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "empty, returning ''\n"); | ||
242 | GNUNET_free (regex); | ||
243 | return NULL == ctx->s ? NULL : GNUNET_strdup (ctx->s); | ||
244 | } | ||
245 | |||
246 | if ('|' == regex[len - 1]) | ||
247 | regex[len - 1] = '\0'; | ||
248 | |||
249 | if (NULL != ctx->s) | ||
250 | { | ||
251 | if (opt) | ||
252 | GNUNET_asprintf (&s, "%s(%s)?", ctx->s, regex); | ||
253 | else | ||
254 | GNUNET_asprintf (&s, "%s(%s)", ctx->s, regex); | ||
255 | GNUNET_free (regex); | ||
256 | regex = s; | ||
257 | } | ||
258 | |||
259 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "partial: %s\n", regex); | ||
260 | return regex; | ||
261 | } | ||
262 | |||
263 | |||
264 | /** | ||
265 | * Get the number of matching characters on the prefix of both strings. | ||
266 | * | ||
267 | * @param s1 String 1. | ||
268 | * @param s2 String 2. | ||
269 | * | ||
270 | * @return Number of characters of matching prefix. | ||
271 | */ | ||
272 | static unsigned int | ||
273 | get_prefix_length (const char *s1, const char *s2) | ||
274 | { | ||
275 | unsigned int l1; | ||
276 | unsigned int l2; | ||
277 | unsigned int limit; | ||
278 | unsigned int i; | ||
279 | |||
280 | l1 = strlen (s1); | ||
281 | l2 = strlen (s2); | ||
282 | limit = l1 > l2 ? l2 : l1; | ||
283 | |||
284 | for (i = 0; i < limit; i++) | ||
285 | { | ||
286 | if (s1[i] != s2[i]) | ||
287 | return i; | ||
288 | } | ||
289 | return limit; | ||
290 | } | ||
291 | |||
292 | |||
293 | /** | ||
294 | * Return the child context with the longest prefix match with the regex. | ||
295 | * Usually only one child will match, search all just in case. | ||
296 | * | ||
297 | * @param ctx Context whose children to search. | ||
298 | * @param regex String to match. | ||
299 | * | ||
300 | * @return Child with the longest prefix, NULL if no child matches. | ||
301 | */ | ||
302 | static struct RegexCombineCtx * | ||
303 | get_longest_prefix (struct RegexCombineCtx *ctx, const char *regex) | ||
304 | { | ||
305 | struct RegexCombineCtx *p; | ||
306 | struct RegexCombineCtx *best; | ||
307 | unsigned int i; | ||
308 | unsigned int l; | ||
309 | unsigned int best_l; | ||
310 | |||
311 | best_l = 0; | ||
312 | best = NULL; | ||
313 | |||
314 | for (i = 0; i < ctx->size; i++) | ||
315 | { | ||
316 | p = ctx->children[i]; | ||
317 | if (NULL == p) | ||
318 | continue; | ||
319 | |||
320 | l = get_prefix_length (p->s, regex); | ||
321 | if (l > best_l) | ||
322 | { | ||
323 | GNUNET_break (0 == best_l); | ||
324 | best = p; | ||
325 | best_l = l; | ||
326 | } | ||
327 | } | ||
328 | return best; | ||
329 | } | ||
330 | |||
331 | |||
332 | static void | ||
333 | regex_add_multiple (struct RegexCombineCtx *ctx, | ||
334 | const char *regex, | ||
335 | struct RegexCombineCtx **children) | ||
336 | { | ||
337 | char tmp[2]; | ||
338 | long unsigned int i; | ||
339 | size_t l; | ||
340 | struct RegexCombineCtx *newctx; | ||
341 | unsigned int count; | ||
342 | |||
343 | if ('(' != regex[0]) | ||
344 | { | ||
345 | GNUNET_assert (0); | ||
346 | } | ||
347 | |||
348 | /* Does the regex cover *all* possible children? Then don't add any, | ||
349 | * as it will be covered by the post-regex "(a-z)*" | ||
350 | */ | ||
351 | l = strlen (regex); | ||
352 | count = 0; | ||
353 | for (i = 1UL; i < l; i++) | ||
354 | { | ||
355 | if ((regex[i] != '|') && (regex[i] != ')') ) | ||
356 | { | ||
357 | count++; | ||
358 | } | ||
359 | } | ||
360 | if (count == ctx->size) | ||
361 | { | ||
362 | return; | ||
363 | } | ||
364 | |||
365 | /* Add every component as a child node */ | ||
366 | tmp[1] = '\0'; | ||
367 | for (i = 1UL; i < l; i++) | ||
368 | { | ||
369 | if ((regex[i] != '|') && (regex[i] != ')') ) | ||
370 | { | ||
371 | tmp[0] = regex[i]; | ||
372 | newctx = new_regex_ctx (ctx->size); | ||
373 | newctx->s = GNUNET_strdup (tmp); | ||
374 | if (children != NULL) | ||
375 | GNUNET_memcpy (newctx->children, | ||
376 | children, | ||
377 | sizeof(*children) * ctx->size); | ||
378 | ctx->children[c2i (tmp[0], ctx->size)] = newctx; | ||
379 | } | ||
380 | } | ||
381 | } | ||
382 | |||
383 | |||
384 | /** | ||
385 | * Add a single regex to a context, splitting the existing state. | ||
386 | * | ||
387 | * We only had a partial match, split existing state, truncate the current node | ||
388 | * so it only contains the prefix, add suffix(es) as children. | ||
389 | * | ||
390 | * @param ctx Context to split. | ||
391 | * @param len Length of ctx->s | ||
392 | * @param prefix_l Length of common prefix of the new regex and @a ctx->s | ||
393 | */ | ||
394 | static void | ||
395 | regex_split (struct RegexCombineCtx *ctx, | ||
396 | unsigned int len, | ||
397 | unsigned int prefix_l) | ||
398 | { | ||
399 | struct RegexCombineCtx *newctx; | ||
400 | unsigned int idx; | ||
401 | char *suffix; | ||
402 | |||
403 | suffix = GNUNET_malloc (len - prefix_l + 1); | ||
404 | /* | ||
405 | * We can use GNUNET_strlcpy because ctx->s is null-terminated | ||
406 | */ | ||
407 | GNUNET_strlcpy (suffix, &ctx->s[prefix_l], len - prefix_l + 1); | ||
408 | |||
409 | /* Suffix saved, truncate current node so it only contains the prefix, | ||
410 | * copy any children nodes to put as grandchildren and initialize new empty | ||
411 | * children array. | ||
412 | */ | ||
413 | ctx->s[prefix_l] = '\0'; | ||
414 | |||
415 | /* If the suffix is an OR expression, add multiple children */ | ||
416 | if ('(' == suffix[0]) | ||
417 | { | ||
418 | struct RegexCombineCtx **tmp; | ||
419 | |||
420 | tmp = ctx->children; | ||
421 | ctx->children = GNUNET_malloc (sizeof(*tmp) * ctx->size); | ||
422 | regex_add_multiple (ctx, suffix, tmp); | ||
423 | GNUNET_free (suffix); | ||
424 | GNUNET_free (tmp); | ||
425 | return; | ||
426 | } | ||
427 | |||
428 | /* The suffix is a normal string, add as one node */ | ||
429 | newctx = new_regex_ctx (ctx->size); | ||
430 | newctx->s = suffix; | ||
431 | move_children (newctx, ctx); | ||
432 | idx = c2i (suffix[0], ctx->size); | ||
433 | ctx->children[idx] = newctx; | ||
434 | } | ||
435 | |||
436 | |||
437 | /** | ||
438 | * Add a single regex to a context, combining with existing regex by-prefix. | ||
439 | * | ||
440 | * @param ctx Context with 0 or more regexes. | ||
441 | * @param regex Regex to add. | ||
442 | */ | ||
443 | static void | ||
444 | regex_add (struct RegexCombineCtx *ctx, const char *regex) | ||
445 | { | ||
446 | struct RegexCombineCtx *p; | ||
447 | struct RegexCombineCtx *newctx; | ||
448 | long unsigned int l; | ||
449 | unsigned int prefix_l; | ||
450 | const char *rest_r; | ||
451 | const char *rest_s; | ||
452 | size_t len; | ||
453 | int idx; | ||
454 | |||
455 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, | ||
456 | "regex_add '%s' into '%s'\n", | ||
457 | regex, ctx->s); | ||
458 | l = strlen (regex); | ||
459 | if (0UL == l) | ||
460 | return; | ||
461 | |||
462 | /* If the regex is in the form of (a|b|c), add every character separately */ | ||
463 | if ('(' == regex[0]) | ||
464 | { | ||
465 | regex_add_multiple (ctx, regex, NULL); | ||
466 | return; | ||
467 | } | ||
468 | |||
469 | p = get_longest_prefix (ctx, regex); | ||
470 | if (NULL != p) | ||
471 | { | ||
472 | /* There is some prefix match, reduce regex and try again */ | ||
473 | prefix_l = get_prefix_length (p->s, regex); | ||
474 | rest_s = &p->s[prefix_l]; | ||
475 | rest_r = ®ex[prefix_l]; | ||
476 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "chosen '%s' [%u]\n", p->s, prefix_l); | ||
477 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "prefix r '%.*s'\n", prefix_l, p->s); | ||
478 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "rest r '%s'\n", rest_r); | ||
479 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "rest s '%s'\n", rest_s); | ||
480 | len = strlen (p->s); | ||
481 | if (prefix_l < len) | ||
482 | { | ||
483 | regex_split (p, len, prefix_l); | ||
484 | } | ||
485 | regex_add (p, rest_r); | ||
486 | return; | ||
487 | } | ||
488 | |||
489 | /* There is no prefix match, add new */ | ||
490 | idx = c2i (regex[0], ctx->size); | ||
491 | if ((NULL == ctx->children[idx]) && (NULL != ctx->s)) | ||
492 | { | ||
493 | /* this was the end before, add empty string */ | ||
494 | newctx = new_regex_ctx (ctx->size); | ||
495 | newctx->s = GNUNET_strdup (""); | ||
496 | ctx->children[idx] = newctx; | ||
497 | } | ||
498 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " no match\n"); | ||
499 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " new state %s\n", regex); | ||
500 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " under %s\n", ctx->s); | ||
501 | newctx = new_regex_ctx (ctx->size); | ||
502 | newctx->s = GNUNET_strdup (regex); | ||
503 | ctx->children[idx] = newctx; | ||
504 | } | ||
505 | |||
506 | |||
507 | /** | ||
508 | * Free all resources used by the context node and all its children. | ||
509 | * | ||
510 | * @param ctx Context to free. | ||
511 | */ | ||
512 | static void | ||
513 | regex_ctx_destroy (struct RegexCombineCtx *ctx) | ||
514 | { | ||
515 | unsigned int i; | ||
516 | |||
517 | if (NULL == ctx) | ||
518 | return; | ||
519 | |||
520 | for (i = 0; i < ctx->size; i++) | ||
521 | { | ||
522 | regex_ctx_destroy (ctx->children[i]); | ||
523 | } | ||
524 | GNUNET_free (ctx->s); /* 's' on root node is null */ | ||
525 | GNUNET_free (ctx->children); | ||
526 | GNUNET_free (ctx); | ||
527 | } | ||
528 | |||
529 | |||
530 | /** | ||
531 | * Combine an array of regexes into a single prefix-shared regex. | ||
532 | * Returns a prefix-combine regex that matches the same strings as | ||
533 | * any of the original regexes. | ||
534 | * | ||
535 | * WARNING: only useful for reading specific regexes for specific applications, | ||
536 | * namely the gnunet-regex-profiler / gnunet-regex-daemon. | ||
537 | * This function DOES NOT support arbitrary regex combining. | ||
538 | * | ||
539 | * @param regexes A NULL-terminated array of regexes. | ||
540 | * @param alphabet_size Size of the alphabet the regex uses. | ||
541 | * | ||
542 | * @return A string with a single regex that matches any of the original regexes | ||
543 | */ | ||
544 | char * | ||
545 | REGEX_TEST_combine (char *const regexes[], unsigned int alphabet_size) | ||
546 | { | ||
547 | unsigned int i; | ||
548 | char *combined; | ||
549 | const char *current; | ||
550 | struct RegexCombineCtx *ctx; | ||
551 | |||
552 | ctx = new_regex_ctx (alphabet_size); | ||
553 | for (i = 0; regexes[i]; i++) | ||
554 | { | ||
555 | current = regexes[i]; | ||
556 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Regex %u: %s\n", i, current); | ||
557 | regex_add (ctx, current); | ||
558 | debugctx (ctx, 0); | ||
559 | } | ||
560 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "\nCombining...\n"); | ||
561 | debugctx (ctx, 0); | ||
562 | |||
563 | combined = regex_combine (ctx); | ||
564 | |||
565 | regex_ctx_destroy (ctx); | ||
566 | |||
567 | return combined; | ||
568 | } | ||
569 | |||
570 | |||
571 | /** | ||
572 | * Read a set of regexes from a file, one per line and return them in an array | ||
573 | * suitable for REGEX_TEST_combine. | ||
574 | * The array must be free'd using REGEX_TEST_free_from_file. | ||
575 | * | ||
576 | * @param filename Name of the file containing the regexes. | ||
577 | * | ||
578 | * @return A newly allocated, NULL terminated array of regexes. | ||
579 | */ | ||
580 | char ** | ||
581 | REGEX_TEST_read_from_file (const char *filename) | ||
582 | { | ||
583 | struct GNUNET_DISK_FileHandle *f; | ||
584 | unsigned int nr; | ||
585 | unsigned int offset; | ||
586 | off_t size; | ||
587 | size_t len; | ||
588 | char *buffer; | ||
589 | char *regex; | ||
590 | char **regexes; | ||
591 | |||
592 | f = GNUNET_DISK_file_open (filename, | ||
593 | GNUNET_DISK_OPEN_READ, | ||
594 | GNUNET_DISK_PERM_NONE); | ||
595 | if (NULL == f) | ||
596 | { | ||
597 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
598 | "Can't open file %s for reading\n", filename); | ||
599 | return NULL; | ||
600 | } | ||
601 | if (GNUNET_OK != GNUNET_DISK_file_handle_size (f, &size)) | ||
602 | { | ||
603 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
604 | "Can't get size of file %s\n", filename); | ||
605 | GNUNET_DISK_file_close (f); | ||
606 | return NULL; | ||
607 | } | ||
608 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, | ||
609 | "using file %s, size %llu\n", | ||
610 | filename, (unsigned long long) size); | ||
611 | |||
612 | buffer = GNUNET_malloc (size + 1); | ||
613 | GNUNET_DISK_file_read (f, buffer, size); | ||
614 | GNUNET_DISK_file_close (f); | ||
615 | regexes = GNUNET_malloc (sizeof(char *)); | ||
616 | nr = 1; | ||
617 | offset = 0; | ||
618 | regex = NULL; | ||
619 | do | ||
620 | { | ||
621 | if (NULL == regex) | ||
622 | regex = GNUNET_malloc (size + 1); | ||
623 | len = (size_t) sscanf (&buffer[offset], "%s", regex); | ||
624 | if (0 == len) | ||
625 | break; | ||
626 | len = strlen (regex); | ||
627 | offset += len + 1; | ||
628 | if (len < 1) | ||
629 | continue; | ||
630 | regex[len] = '\0'; | ||
631 | regex = GNUNET_realloc (regex, len + 1); | ||
632 | GNUNET_array_grow (regexes, nr, nr + 1); | ||
633 | GNUNET_assert (NULL == regexes[nr - 2]); | ||
634 | regexes[nr - 2] = regex; | ||
635 | regexes[nr - 1] = NULL; | ||
636 | regex = NULL; | ||
637 | } | ||
638 | while (offset < size); | ||
639 | GNUNET_free (regex); | ||
640 | GNUNET_free (buffer); | ||
641 | |||
642 | return regexes; | ||
643 | } | ||
644 | |||
645 | |||
646 | /** | ||
647 | * Free all memory reserved for a set of regexes created by read_from_file. | ||
648 | * | ||
649 | * @param regexes NULL-terminated array of regexes. | ||
650 | */ | ||
651 | void | ||
652 | REGEX_TEST_free_from_file (char **regexes) | ||
653 | { | ||
654 | unsigned int i; | ||
655 | |||
656 | for (i = 0; regexes[i]; i++) | ||
657 | GNUNET_free (regexes[i]); | ||
658 | GNUNET_free (regexes); | ||
659 | } | ||
660 | |||
661 | |||
662 | /* end of regex_test_lib.c */ | ||
diff --git a/src/service/regex/regex_test_lib.h b/src/service/regex/regex_test_lib.h new file mode 100644 index 000000000..98f60d317 --- /dev/null +++ b/src/service/regex/regex_test_lib.h | |||
@@ -0,0 +1,158 @@ | |||
1 | /* | ||
2 | * This file is part of GNUnet | ||
3 | * Copyright (C) 2012 GNUnet e.V. | ||
4 | * | ||
5 | * GNUnet is free software: you can redistribute it and/or modify it | ||
6 | * under the terms of the GNU Affero General Public License as published | ||
7 | * by the Free Software Foundation, either version 3 of the License, | ||
8 | * or (at your option) any later version. | ||
9 | * | ||
10 | * GNUnet is distributed in the hope that it will be useful, but | ||
11 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | * Affero General Public License for more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU Affero General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | |||
18 | SPDX-License-Identifier: AGPL3.0-or-later | ||
19 | */ | ||
20 | /** | ||
21 | * @file src/regex/regex_test_lib.h | ||
22 | * @brief library to read regexes representing IP networks from a file. | ||
23 | * and simplifying the into one big regex, in order to run | ||
24 | * tests (regex performance, regex profiler). | ||
25 | * @author Bertlomiej Polot | ||
26 | */ | ||
27 | |||
28 | #ifndef REGEX_INTERNAL_TEST_LIB_H | ||
29 | #define REGEX_INTERNAL_TEST_LIB_H | ||
30 | |||
31 | #include "regex_internal_lib.h" | ||
32 | |||
33 | #ifdef __cplusplus | ||
34 | extern "C" | ||
35 | { | ||
36 | #if 0 /* keep Emacsens' auto-indent happy */ | ||
37 | } | ||
38 | #endif | ||
39 | #endif | ||
40 | |||
41 | |||
42 | /** | ||
43 | * Combine an array of regexes into a single prefix-shared regex. | ||
44 | * Returns a prefix-combine regex that matches the same strings as | ||
45 | * any of the original regexes. | ||
46 | * | ||
47 | * WARNING: only useful for reading specific regexes for specific applications, | ||
48 | * namely the gnunet-regex-profiler / gnunet-regex-daemon. | ||
49 | * This function DOES NOT support arbitrary regex combining. | ||
50 | * | ||
51 | * @param regexes A NULL-terminated array of regexes. | ||
52 | * @param alphabet_size Size of the alphabet the regex uses. | ||
53 | * | ||
54 | * @return A string with a single regex that matches any of the original regexes | ||
55 | */ | ||
56 | char * | ||
57 | REGEX_TEST_combine (char *const regexes[], unsigned int alphabet_size); | ||
58 | |||
59 | |||
60 | /** | ||
61 | * Read a set of regexes from a file, one per line and return them in an array | ||
62 | * suitable for REGEX_TEST_combine. | ||
63 | * The array must be free'd using REGEX_TEST_free_from_file. | ||
64 | * | ||
65 | * @param filename Name of the file containing the regexes. | ||
66 | * | ||
67 | * @return A newly allocated, NULL terminated array of regexes. | ||
68 | */ | ||
69 | char ** | ||
70 | REGEX_TEST_read_from_file (const char *filename); | ||
71 | |||
72 | |||
73 | /** | ||
74 | * Free all memory reserved for a set of regexes created by read_from_file. | ||
75 | * | ||
76 | * @param regexes NULL-terminated array of regexes. | ||
77 | */ | ||
78 | void | ||
79 | REGEX_TEST_free_from_file (char **regexes); | ||
80 | |||
81 | |||
82 | /** | ||
83 | * Generate a (pseudo) random regular expression of length 'rx_length', as well | ||
84 | * as a (optional) string that will be matched by the generated regex. The | ||
85 | * returned regex needs to be freed. | ||
86 | * | ||
87 | * @param rx_length length of the random regex. | ||
88 | * @param matching_str (optional) pointer to a string that will contain a string | ||
89 | * that will be matched by the generated regex, if | ||
90 | * 'matching_str' pointer was not NULL. | ||
91 | * | ||
92 | * @return NULL if 'rx_length' is 0, a random regex of length 'rx_length', which | ||
93 | * needs to be freed, otherwise. | ||
94 | */ | ||
95 | char * | ||
96 | REGEX_TEST_generate_random_regex (size_t rx_length, char *matching_str); | ||
97 | |||
98 | |||
99 | /** | ||
100 | * Generate a random string of maximum length 'max_len' that only contains literals allowed | ||
101 | * in a regular expression. The string might be 0 chars long but is garantueed | ||
102 | * to be shorter or equal to 'max_len'. | ||
103 | * | ||
104 | * @param max_len maximum length of the string that should be generated. | ||
105 | * | ||
106 | * @return random string that needs to be freed. | ||
107 | */ | ||
108 | char * | ||
109 | REGEX_TEST_generate_random_string (size_t max_len); | ||
110 | |||
111 | |||
112 | /** | ||
113 | * Options for graph creation function | ||
114 | * REGEX_TEST_automaton_save_graph. | ||
115 | */ | ||
116 | enum REGEX_TEST_GraphSavingOptions | ||
117 | { | ||
118 | /** | ||
119 | * Default. Do nothing special. | ||
120 | */ | ||
121 | REGEX_TEST_GRAPH_DEFAULT = 0, | ||
122 | |||
123 | /** | ||
124 | * The generated graph will include extra information such as the NFA states | ||
125 | * that were used to generate the DFA state. | ||
126 | */ | ||
127 | REGEX_TEST_GRAPH_VERBOSE = 1, | ||
128 | |||
129 | /** | ||
130 | * Enable graph coloring. Will color each SCC in a different color. | ||
131 | */ | ||
132 | REGEX_TEST_GRAPH_COLORING = 2 | ||
133 | }; | ||
134 | |||
135 | |||
136 | /** | ||
137 | * Save the given automaton as a GraphViz dot file. | ||
138 | * | ||
139 | * @param a the automaton to be saved. | ||
140 | * @param filename where to save the file. | ||
141 | * @param options options for graph generation that include coloring or verbose | ||
142 | * mode | ||
143 | */ | ||
144 | void | ||
145 | REGEX_TEST_automaton_save_graph (struct REGEX_INTERNAL_Automaton *a, | ||
146 | const char *filename, | ||
147 | enum REGEX_TEST_GraphSavingOptions options); | ||
148 | |||
149 | |||
150 | #if 0 /* keep Emacsens' auto-indent happy */ | ||
151 | { | ||
152 | #endif | ||
153 | #ifdef __cplusplus | ||
154 | } | ||
155 | #endif | ||
156 | |||
157 | /* end of regex_internal_lib.h */ | ||
158 | #endif | ||
diff --git a/src/service/regex/regex_test_random.c b/src/service/regex/regex_test_random.c new file mode 100644 index 000000000..56508b3b4 --- /dev/null +++ b/src/service/regex/regex_test_random.c | |||
@@ -0,0 +1,175 @@ | |||
1 | /* | ||
2 | This file is part of GNUnet | ||
3 | Copyright (C) 2012 GNUnet e.V. | ||
4 | |||
5 | GNUnet is free software: you can redistribute it and/or modify it | ||
6 | under the terms of the GNU Affero General Public License as published | ||
7 | by the Free Software Foundation, either version 3 of the License, | ||
8 | or (at your option) any later version. | ||
9 | |||
10 | GNUnet is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | Affero General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU Affero General Public License | ||
16 | along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | |||
18 | SPDX-License-Identifier: AGPL3.0-or-later | ||
19 | */ | ||
20 | /** | ||
21 | * @file src/regex/regex_test_random.c | ||
22 | * @brief functions for creating random regular expressions and strings | ||
23 | * @author Maximilian Szengel | ||
24 | */ | ||
25 | #include "platform.h" | ||
26 | #include "regex_test_lib.h" | ||
27 | #include "gnunet_util_lib.h" | ||
28 | #include "regex_internal.h" | ||
29 | |||
30 | |||
31 | /** | ||
32 | * Get a (pseudo) random valid literal for building a regular expression. | ||
33 | * | ||
34 | * @return random valid literal | ||
35 | */ | ||
36 | static char | ||
37 | get_random_literal () | ||
38 | { | ||
39 | uint32_t ridx; | ||
40 | |||
41 | ridx = | ||
42 | GNUNET_CRYPTO_random_u32 (GNUNET_CRYPTO_QUALITY_WEAK, | ||
43 | (uint32_t) strlen (ALLOWED_LITERALS)); | ||
44 | |||
45 | return ALLOWED_LITERALS[ridx]; | ||
46 | } | ||
47 | |||
48 | |||
49 | /** | ||
50 | * Generate a (pseudo) random regular expression of length 'rx_length', as well | ||
51 | * as a (optional) string that will be matched by the generated regex. The | ||
52 | * returned regex needs to be freed. | ||
53 | * | ||
54 | * @param rx_length length of the random regex. | ||
55 | * @param matching_str (optional) pointer to a string that will contain a string | ||
56 | * that will be matched by the generated regex, if | ||
57 | * 'matching_str' pointer was not NULL. Make sure you | ||
58 | * allocated at least rx_length+1 bytes for this string. | ||
59 | * | ||
60 | * @return NULL if 'rx_length' is 0, a random regex of length 'rx_length', which | ||
61 | * needs to be freed, otherwise. | ||
62 | */ | ||
63 | char * | ||
64 | REGEX_TEST_generate_random_regex (size_t rx_length, char *matching_str) | ||
65 | { | ||
66 | char *rx; | ||
67 | char *rx_p; | ||
68 | char *matching_strp; | ||
69 | unsigned int i; | ||
70 | unsigned int char_op_switch; | ||
71 | unsigned int last_was_op; | ||
72 | int rx_op; | ||
73 | char current_char; | ||
74 | |||
75 | if (0 == rx_length) | ||
76 | return NULL; | ||
77 | |||
78 | if (NULL != matching_str) | ||
79 | matching_strp = matching_str; | ||
80 | else | ||
81 | matching_strp = NULL; | ||
82 | |||
83 | rx = GNUNET_malloc (rx_length + 1); | ||
84 | rx_p = rx; | ||
85 | current_char = 0; | ||
86 | last_was_op = 1; | ||
87 | |||
88 | for (i = 0; i < rx_length; i++) | ||
89 | { | ||
90 | char_op_switch = GNUNET_CRYPTO_random_u32 (GNUNET_CRYPTO_QUALITY_WEAK, 2); | ||
91 | |||
92 | if ((0 == char_op_switch) && ! last_was_op) | ||
93 | { | ||
94 | last_was_op = 1; | ||
95 | rx_op = GNUNET_CRYPTO_random_u32 (GNUNET_CRYPTO_QUALITY_WEAK, 4); | ||
96 | |||
97 | switch (rx_op) | ||
98 | { | ||
99 | case 0: | ||
100 | current_char = '+'; | ||
101 | break; | ||
102 | |||
103 | case 1: | ||
104 | current_char = '*'; | ||
105 | break; | ||
106 | |||
107 | case 2: | ||
108 | current_char = '?'; | ||
109 | break; | ||
110 | |||
111 | case 3: | ||
112 | if (i < rx_length - 1) /* '|' cannot be at the end */ | ||
113 | current_char = '|'; | ||
114 | else | ||
115 | current_char = get_random_literal (); | ||
116 | break; | ||
117 | } | ||
118 | } | ||
119 | else | ||
120 | { | ||
121 | current_char = get_random_literal (); | ||
122 | last_was_op = 0; | ||
123 | } | ||
124 | |||
125 | if ((NULL != matching_strp) && | ||
126 | ((current_char != '+') && (current_char != '*') && (current_char != | ||
127 | '?') && | ||
128 | (current_char != '|') )) | ||
129 | { | ||
130 | *matching_strp = current_char; | ||
131 | matching_strp++; | ||
132 | } | ||
133 | |||
134 | *rx_p = current_char; | ||
135 | rx_p++; | ||
136 | } | ||
137 | *rx_p = '\0'; | ||
138 | if (NULL != matching_strp) | ||
139 | *matching_strp = '\0'; | ||
140 | |||
141 | return rx; | ||
142 | } | ||
143 | |||
144 | |||
145 | /** | ||
146 | * Generate a random string of maximum length 'max_len' that only contains literals allowed | ||
147 | * in a regular expression. The string might be 0 chars long but is garantueed | ||
148 | * to be shorter or equal to 'max_len'. | ||
149 | * | ||
150 | * @param max_len maximum length of the string that should be generated. | ||
151 | * | ||
152 | * @return random string that needs to be freed. | ||
153 | */ | ||
154 | char * | ||
155 | REGEX_TEST_generate_random_string (size_t max_len) | ||
156 | { | ||
157 | unsigned int i; | ||
158 | char *str; | ||
159 | size_t len; | ||
160 | |||
161 | if (1 > max_len) | ||
162 | return GNUNET_strdup (""); | ||
163 | |||
164 | len = (size_t) GNUNET_CRYPTO_random_u32 (GNUNET_CRYPTO_QUALITY_WEAK, max_len); | ||
165 | str = GNUNET_malloc (len + 1); | ||
166 | |||
167 | for (i = 0; i < len; i++) | ||
168 | { | ||
169 | str[i] = get_random_literal (); | ||
170 | } | ||
171 | |||
172 | str[i] = '\0'; | ||
173 | |||
174 | return str; | ||
175 | } | ||
diff --git a/src/service/regex/test_regex_api.c b/src/service/regex/test_regex_api.c new file mode 100644 index 000000000..9761e80f9 --- /dev/null +++ b/src/service/regex/test_regex_api.c | |||
@@ -0,0 +1,132 @@ | |||
1 | /* | ||
2 | This file is part of GNUnet. | ||
3 | Copyright (C) 2013 GNUnet e.V. | ||
4 | |||
5 | GNUnet is free software: you can redistribute it and/or modify it | ||
6 | under the terms of the GNU Affero General Public License as published | ||
7 | by the Free Software Foundation, either version 3 of the License, | ||
8 | or (at your option) any later version. | ||
9 | |||
10 | GNUnet is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | Affero General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU Affero General Public License | ||
16 | along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | |||
18 | SPDX-License-Identifier: AGPL3.0-or-later | ||
19 | */ | ||
20 | /** | ||
21 | * @file regex/test_regex_api.c | ||
22 | * @brief base test case for regex api (and DHT functions) | ||
23 | * @author Christian Grothoff | ||
24 | */ | ||
25 | #include "platform.h" | ||
26 | #include "gnunet_util_lib.h" | ||
27 | #include "gnunet_testing_lib.h" | ||
28 | #include "gnunet_regex_service.h" | ||
29 | |||
30 | |||
31 | /** | ||
32 | * How long until we really give up on a particular testcase portion? | ||
33 | */ | ||
34 | #define TOTAL_TIMEOUT GNUNET_TIME_relative_multiply (GNUNET_TIME_UNIT_SECONDS, \ | ||
35 | 600) | ||
36 | |||
37 | /** | ||
38 | * How long until we give up on any particular operation (and retry)? | ||
39 | */ | ||
40 | #define BASE_TIMEOUT GNUNET_TIME_relative_multiply (GNUNET_TIME_UNIT_SECONDS, 3) | ||
41 | |||
42 | |||
43 | static struct GNUNET_REGEX_Announcement *a; | ||
44 | |||
45 | static struct GNUNET_REGEX_Search *s; | ||
46 | |||
47 | static int ok = 1; | ||
48 | |||
49 | static struct GNUNET_SCHEDULER_Task *die_task; | ||
50 | |||
51 | |||
52 | static void | ||
53 | end (void *cls) | ||
54 | { | ||
55 | die_task = NULL; | ||
56 | GNUNET_REGEX_announce_cancel (a); | ||
57 | a = NULL; | ||
58 | GNUNET_REGEX_search_cancel (s); | ||
59 | s = NULL; | ||
60 | ok = 0; | ||
61 | } | ||
62 | |||
63 | |||
64 | static void | ||
65 | end_badly () | ||
66 | { | ||
67 | die_task = NULL; | ||
68 | fprintf (stderr, "%s", "Testcase failed (timeout).\n"); | ||
69 | GNUNET_REGEX_announce_cancel (a); | ||
70 | a = NULL; | ||
71 | GNUNET_REGEX_search_cancel (s); | ||
72 | s = NULL; | ||
73 | ok = 1; | ||
74 | } | ||
75 | |||
76 | |||
77 | /** | ||
78 | * Search callback function, invoked for every result that was found. | ||
79 | * | ||
80 | * @param cls Closure provided in GNUNET_REGEX_search. | ||
81 | * @param id Peer providing a regex that matches the string. | ||
82 | * @param get_path Path of the get request. | ||
83 | * @param get_path_length Length of get_path. | ||
84 | * @param put_path Path of the put request. | ||
85 | * @param put_path_length Length of the put_path. | ||
86 | */ | ||
87 | static void | ||
88 | found_cb (void *cls, | ||
89 | const struct GNUNET_PeerIdentity *id, | ||
90 | const struct GNUNET_PeerIdentity *get_path, | ||
91 | unsigned int get_path_length, | ||
92 | const struct GNUNET_PeerIdentity *put_path, | ||
93 | unsigned int put_path_length) | ||
94 | { | ||
95 | GNUNET_SCHEDULER_cancel (die_task); | ||
96 | die_task = | ||
97 | GNUNET_SCHEDULER_add_now (&end, NULL); | ||
98 | } | ||
99 | |||
100 | |||
101 | static void | ||
102 | run (void *cls, | ||
103 | const struct GNUNET_CONFIGURATION_Handle *cfg, | ||
104 | struct GNUNET_TESTING_Peer *peer) | ||
105 | { | ||
106 | die_task = | ||
107 | GNUNET_SCHEDULER_add_delayed (TOTAL_TIMEOUT, | ||
108 | &end_badly, NULL); | ||
109 | a = GNUNET_REGEX_announce (cfg, | ||
110 | "my long prefix - hello world(0|1)*", | ||
111 | GNUNET_TIME_relative_multiply ( | ||
112 | GNUNET_TIME_UNIT_SECONDS, | ||
113 | 5), | ||
114 | 1); | ||
115 | s = GNUNET_REGEX_search (cfg, | ||
116 | "my long prefix - hello world0101", | ||
117 | &found_cb, NULL); | ||
118 | } | ||
119 | |||
120 | |||
121 | int | ||
122 | main (int argc, char *argv[]) | ||
123 | { | ||
124 | if (0 != GNUNET_TESTING_peer_run ("test-regex-api", | ||
125 | "test_regex_api_data.conf", | ||
126 | &run, NULL)) | ||
127 | return 1; | ||
128 | return ok; | ||
129 | } | ||
130 | |||
131 | |||
132 | /* end of test_regex_api.c */ | ||
diff --git a/src/service/regex/test_regex_api_data.conf b/src/service/regex/test_regex_api_data.conf new file mode 100644 index 000000000..acc622cbe --- /dev/null +++ b/src/service/regex/test_regex_api_data.conf | |||
@@ -0,0 +1,39 @@ | |||
1 | @INLINE@ ../../../contrib/conf/gnunet/no_forcestart.conf | ||
2 | @INLINE@ ../../../contrib/conf/gnunet/no_autostart_above_core.conf | ||
3 | |||
4 | [PATHS] | ||
5 | GNUNET_TEST_HOME = $GNUNET_TMP/test-regex-api/ | ||
6 | |||
7 | [dhtcache] | ||
8 | QUOTA = 1 MB | ||
9 | DATABASE = heap | ||
10 | |||
11 | [topology] | ||
12 | TARGET-CONNECTION-COUNT = 16 | ||
13 | AUTOCONNECT = YES | ||
14 | FRIENDS-ONLY = NO | ||
15 | MINIMUM-FRIENDS = 0 | ||
16 | |||
17 | [ats] | ||
18 | WAN_QUOTA_IN = 1 GB | ||
19 | WAN_QUOTA_OUT = 1 GB | ||
20 | |||
21 | [dht] | ||
22 | START_ON_DEMAND = YES | ||
23 | PORT = 12370 | ||
24 | |||
25 | [regex] | ||
26 | START_ON_DEMAND = YES | ||
27 | |||
28 | [transport] | ||
29 | plugins = tcp | ||
30 | NEIGHBOUR_LIMIT = 50 | ||
31 | |||
32 | [nat] | ||
33 | DISABLEV6 = YES | ||
34 | BINDTO = 127.0.0.1 | ||
35 | ENABLE_UPNP = NO | ||
36 | BEHIND_NAT = NO | ||
37 | ALLOW_NAT = NO | ||
38 | INTERNAL_ADDRESS = 127.0.0.1 | ||
39 | EXTERNAL_ADDRESS = 127.0.0.1 | ||
diff --git a/src/service/regex/test_regex_eval_api.c b/src/service/regex/test_regex_eval_api.c new file mode 100644 index 000000000..475bfb7c8 --- /dev/null +++ b/src/service/regex/test_regex_eval_api.c | |||
@@ -0,0 +1,385 @@ | |||
1 | /* | ||
2 | This file is part of GNUnet | ||
3 | Copyright (C) 2012 GNUnet e.V. | ||
4 | |||
5 | GNUnet is free software: you can redistribute it and/or modify it | ||
6 | under the terms of the GNU Affero General Public License as published | ||
7 | by the Free Software Foundation, either version 3 of the License, | ||
8 | or (at your option) any later version. | ||
9 | |||
10 | GNUnet is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | Affero General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU Affero General Public License | ||
16 | along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | |||
18 | SPDX-License-Identifier: AGPL3.0-or-later | ||
19 | */ | ||
20 | /** | ||
21 | * @file regex/test_regex_eval_api.c | ||
22 | * @brief test for regex.c | ||
23 | * @author Maximilian Szengel | ||
24 | */ | ||
25 | #include "platform.h" | ||
26 | #include <regex.h> | ||
27 | #include <time.h> | ||
28 | #include "regex_internal_lib.h" | ||
29 | #include "regex_test_lib.h" | ||
30 | #include "regex_internal.h" | ||
31 | |||
32 | enum Match_Result | ||
33 | { | ||
34 | match = 0, | ||
35 | nomatch = 1 | ||
36 | }; | ||
37 | |||
38 | struct Regex_String_Pair | ||
39 | { | ||
40 | char *regex; | ||
41 | int string_count; | ||
42 | char *strings[20]; | ||
43 | enum Match_Result expected_results[20]; | ||
44 | }; | ||
45 | |||
46 | |||
47 | /** | ||
48 | * Random regex test. Generate a random regex as well as 'str_count' strings to | ||
49 | * match it against. Will match using GNUNET_REGEX implementation and compare | ||
50 | * the result to glibc regex result. 'rx_length' has to be smaller then | ||
51 | * 'max_str_len'. | ||
52 | * | ||
53 | * @param rx_length length of the regular expression. | ||
54 | * @param max_str_len maximum length of the random strings. | ||
55 | * @param str_count number of generated random strings. | ||
56 | * | ||
57 | * @return 0 on success, non 0 otherwise. | ||
58 | */ | ||
59 | int | ||
60 | test_random (unsigned int rx_length, unsigned int max_str_len, | ||
61 | unsigned int str_count) | ||
62 | { | ||
63 | unsigned int i; | ||
64 | char *rand_rx; | ||
65 | char *matching_str; | ||
66 | int eval; | ||
67 | int eval_check; | ||
68 | int eval_canonical; | ||
69 | int eval_canonical_check; | ||
70 | struct REGEX_INTERNAL_Automaton *dfa; | ||
71 | regex_t rx; | ||
72 | regmatch_t matchptr[1]; | ||
73 | char error[200]; | ||
74 | int result; | ||
75 | char *canonical_regex = NULL; | ||
76 | |||
77 | /* At least one string is needed for matching */ | ||
78 | GNUNET_assert (str_count > 0); | ||
79 | /* The string should be at least as long as the regex itself */ | ||
80 | GNUNET_assert (max_str_len >= rx_length); | ||
81 | |||
82 | /* Generate random regex and a string that matches the regex */ | ||
83 | matching_str = GNUNET_malloc (rx_length + 1); | ||
84 | rand_rx = REGEX_TEST_generate_random_regex (rx_length, matching_str); | ||
85 | |||
86 | /* Now match */ | ||
87 | result = 0; | ||
88 | for (i = 0; i < str_count; i++) | ||
89 | { | ||
90 | if (0 < i) | ||
91 | { | ||
92 | matching_str = REGEX_TEST_generate_random_string (max_str_len); | ||
93 | } | ||
94 | |||
95 | /* Match string using DFA */ | ||
96 | dfa = REGEX_INTERNAL_construct_dfa (rand_rx, strlen (rand_rx), 0); | ||
97 | if (NULL == dfa) | ||
98 | { | ||
99 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Constructing DFA failed\n"); | ||
100 | goto error; | ||
101 | } | ||
102 | |||
103 | eval = REGEX_INTERNAL_eval (dfa, matching_str); | ||
104 | /* save the canonical regex for later comparison */ | ||
105 | canonical_regex = GNUNET_strdup (REGEX_INTERNAL_get_canonical_regex (dfa)); | ||
106 | REGEX_INTERNAL_automaton_destroy (dfa); | ||
107 | |||
108 | /* Match string using glibc regex */ | ||
109 | if (0 != regcomp (&rx, rand_rx, REG_EXTENDED)) | ||
110 | { | ||
111 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
112 | "Could not compile regex using regcomp: %s\n", rand_rx); | ||
113 | goto error; | ||
114 | } | ||
115 | |||
116 | eval_check = regexec (&rx, matching_str, 1, matchptr, 0); | ||
117 | regfree (&rx); | ||
118 | |||
119 | /* We only want to match the whole string, because that's what our DFA does, | ||
120 | * too. */ | ||
121 | if ((eval_check == 0) && | ||
122 | ((matchptr[0].rm_so != 0) || (matchptr[0].rm_eo != strlen ( | ||
123 | matching_str)) )) | ||
124 | eval_check = 1; | ||
125 | |||
126 | /* Match canonical regex */ | ||
127 | dfa = | ||
128 | REGEX_INTERNAL_construct_dfa (canonical_regex, strlen (canonical_regex), | ||
129 | 0); | ||
130 | if (NULL == dfa) | ||
131 | { | ||
132 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Constructing DFA failed\n"); | ||
133 | goto error; | ||
134 | } | ||
135 | |||
136 | eval_canonical = REGEX_INTERNAL_eval (dfa, matching_str); | ||
137 | REGEX_INTERNAL_automaton_destroy (dfa); | ||
138 | |||
139 | if (0 != regcomp (&rx, canonical_regex, REG_EXTENDED)) | ||
140 | { | ||
141 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
142 | "Could not compile regex using regcomp: %s\n", | ||
143 | canonical_regex); | ||
144 | goto error; | ||
145 | } | ||
146 | |||
147 | eval_canonical_check = regexec (&rx, matching_str, 1, matchptr, 0); | ||
148 | regfree (&rx); | ||
149 | |||
150 | /* We only want to match the whole string, because that's what our DFA does, | ||
151 | * too. */ | ||
152 | if ((eval_canonical_check == 0) && | ||
153 | ((matchptr[0].rm_so != 0) || (matchptr[0].rm_eo != strlen ( | ||
154 | matching_str)) )) | ||
155 | eval_canonical_check = 1; | ||
156 | |||
157 | /* compare results */ | ||
158 | if ((eval_check != eval) || (eval_canonical != eval_canonical_check) ) | ||
159 | { | ||
160 | regerror (eval_check, &rx, error, sizeof error); | ||
161 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
162 | "Unexpected result:\nregex: %s\ncanonical_regex: %s\n\ | ||
163 | string: %s\ngnunet regex: %i\nglibc regex: %i\n\ | ||
164 | canonical regex: %i\ncanonical regex glibc: %i\n\ | ||
165 | glibc error: %s\n\n", rand_rx, canonical_regex, matching_str, | ||
166 | eval, eval_check, eval_canonical, eval_canonical_check, | ||
167 | error); | ||
168 | result += 1; | ||
169 | } | ||
170 | GNUNET_free (canonical_regex); | ||
171 | GNUNET_free (matching_str); | ||
172 | canonical_regex = NULL; | ||
173 | matching_str = NULL; | ||
174 | } | ||
175 | |||
176 | GNUNET_free (rand_rx); | ||
177 | |||
178 | return result; | ||
179 | |||
180 | error: | ||
181 | GNUNET_free (matching_str); | ||
182 | GNUNET_free (rand_rx); | ||
183 | GNUNET_free (canonical_regex); | ||
184 | return -1; | ||
185 | } | ||
186 | |||
187 | |||
188 | /** | ||
189 | * Automaton test that compares the result of matching regular expression 'rx' | ||
190 | * with the strings and expected results in 'rxstr' with the result of matching | ||
191 | * the same strings with glibc regex. | ||
192 | * | ||
193 | * @param a automaton. | ||
194 | * @param rx compiled glibc regex. | ||
195 | * @param rxstr regular expression and strings with expected results to | ||
196 | * match against. | ||
197 | * | ||
198 | * @return 0 on successful, non 0 otherwise | ||
199 | */ | ||
200 | int | ||
201 | test_automaton (struct REGEX_INTERNAL_Automaton *a, regex_t *rx, | ||
202 | struct Regex_String_Pair *rxstr) | ||
203 | { | ||
204 | int result; | ||
205 | int eval; | ||
206 | int eval_check; | ||
207 | char error[200]; | ||
208 | regmatch_t matchptr[1]; | ||
209 | int i; | ||
210 | |||
211 | if (NULL == a) | ||
212 | { | ||
213 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Automaton was NULL\n"); | ||
214 | return 1; | ||
215 | } | ||
216 | |||
217 | result = 0; | ||
218 | |||
219 | for (i = 0; i < rxstr->string_count; i++) | ||
220 | { | ||
221 | eval = REGEX_INTERNAL_eval (a, rxstr->strings[i]); | ||
222 | eval_check = regexec (rx, rxstr->strings[i], 1, matchptr, 0); | ||
223 | |||
224 | /* We only want to match the whole string, because that's what our DFA does, | ||
225 | * too. */ | ||
226 | if ((eval_check == 0) && | ||
227 | ((matchptr[0].rm_so != 0) || | ||
228 | (matchptr[0].rm_eo != strlen (rxstr->strings[i])) )) | ||
229 | eval_check = 1; | ||
230 | |||
231 | if (((rxstr->expected_results[i] == match) && ((0 != eval) || (0 != | ||
232 | eval_check) )) | ||
233 | || ((rxstr->expected_results[i] == nomatch) && | ||
234 | ((0 == eval) || (0 == eval_check) ))) | ||
235 | { | ||
236 | result = 1; | ||
237 | regerror (eval_check, rx, error, sizeof error); | ||
238 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
239 | "Unexpected result:\nregex: %s\ncanonical_regex: %s\n" | ||
240 | "string: %s\nexpected result: %i\n" | ||
241 | "gnunet regex: %i\nglibc regex: %i\nglibc error: %s\n" | ||
242 | "rm_so: %i\nrm_eo: %i\n\n", rxstr->regex, | ||
243 | REGEX_INTERNAL_get_canonical_regex (a), rxstr->strings[i], | ||
244 | rxstr->expected_results[i], eval, eval_check, error, | ||
245 | matchptr[0].rm_so, matchptr[0].rm_eo); | ||
246 | } | ||
247 | } | ||
248 | return result; | ||
249 | } | ||
250 | |||
251 | |||
252 | int | ||
253 | main (int argc, char *argv[]) | ||
254 | { | ||
255 | GNUNET_log_setup ("test-regex", "WARNING", NULL); | ||
256 | |||
257 | struct REGEX_INTERNAL_Automaton *a; | ||
258 | regex_t rx; | ||
259 | int i; | ||
260 | int check_nfa; | ||
261 | int check_dfa; | ||
262 | int check_rand; | ||
263 | char *check_proof; | ||
264 | |||
265 | struct Regex_String_Pair rxstr[19] = { | ||
266 | { "ab?(abcd)?", 5, | ||
267 | { "ababcd", "abab", "aabcd", "a", "abb" }, | ||
268 | { match, nomatch, match, match, nomatch } }, | ||
269 | { "ab(c|d)+c*(a(b|c)d)+", 5, | ||
270 | { "abcdcdcdcdddddabd", "abcd", | ||
271 | "abcddddddccccccccccccccccccccccccabdacdabd", | ||
272 | "abccccca", "abcdcdcdccdabdabd" }, | ||
273 | { match, nomatch, match, nomatch, match } }, | ||
274 | { "ab+c*(a(bx|c)d)+", 5, | ||
275 | { "abcdcdcdcdddddabd", "abcd", | ||
276 | "abcddddddccccccccccccccccccccccccabdacdabd", | ||
277 | "abccccca", "abcdcdcdccdabdabd" }, | ||
278 | { nomatch, nomatch, nomatch, nomatch, nomatch } }, | ||
279 | { "a+X*y+c|p|R|Z*K*y*R+w|Y*6+n+h*k*w+V*F|W*B*e*", 1, | ||
280 | { "kaXycQepRZKyRwY6nhkwVFWBegNVtLPj39XhJJ6bEifRSZRYZg" }, | ||
281 | { nomatch } }, | ||
282 | { | ||
283 | "k|a+X*y+c|Q*e|p|R|Z*K*y*R+w|Y*6+n+h*k*w+V*F|W*B*e*g|N+V|t+L|P*j*3*9+X*h*J|J*6|b|E*i*f*R+S|Z|R|Y*Z|g*", | ||
284 | 1, | ||
285 | { "kaXycQepRZKyRwY6nhkwVFWBegNVtLPj39XhJJ6bEifRSZRYZg" }, | ||
286 | { nomatch } | ||
287 | }, | ||
288 | { | ||
289 | "F?W+m+2*6*c*s|P?U?a|B|y*i+t+A|V|6*C*7*e?Z*n*i|J?5+g?W*V?7*j?p?1|r?B?C+E+3+6*i+W*P?K?0|D+7?y*m+3?g?K?", | ||
290 | 1, | ||
291 | { "osfjsodfonONONOnosndfsdnfsd" }, | ||
292 | { nomatch } | ||
293 | }, | ||
294 | { | ||
295 | "V|M*o?x*p*d+h+b|E*m?h?Y*E*O?W*W*P+o?Z+H*M|I*q+C*a+5?5*9|b?z|G*y*k?R|p+u|8*h?B+l*H|e|L*O|1|F?v*0?5|C+", | ||
296 | 1, | ||
297 | { "VMoxpdhbEmhYEOWWPoZHMIqCa559bzGykRpu8hBlHeLO1Fv05C" }, | ||
298 | { nomatch } | ||
299 | }, | ||
300 | { "(bla)*", 8, | ||
301 | { "", "bla", "blabla", "bl", "la", "b", "l", "a" }, | ||
302 | { match, match, match, nomatch, nomatch, nomatch, nomatch, nomatch } }, | ||
303 | { "ab(c|d)+c*(a(b|c)+d)+(bla)(bla)*", 8, | ||
304 | { "ab", "abcabdbla", "abdcccccccccccabcbccdblablabla", "bl", "la", "b", | ||
305 | "l", | ||
306 | "a" }, | ||
307 | { nomatch, match, match, nomatch, nomatch, nomatch, nomatch, nomatch } }, | ||
308 | { "a|aa*a", 6, | ||
309 | { "", "a", "aa", "aaa", "aaaa", "aaaaa" }, | ||
310 | { nomatch, match, match, match, match, match } }, | ||
311 | { "ab(c|d)+c*(a(b|c)+d)+(bla)+", 1, | ||
312 | { "abcabdblaacdbla" }, | ||
313 | { nomatch } }, | ||
314 | { "(ac|b)+", 8, | ||
315 | { "b", "bb", "ac", "", "acb", "bacbacac", "acacac", "abc" }, | ||
316 | { match, match, match, nomatch, match, match, match, nomatch } }, | ||
317 | { "(ab|c)+", 7, | ||
318 | { "", "ab", "c", "abc", "ababcc", "acc", "abac" }, | ||
319 | { nomatch, match, match, match, match, nomatch, nomatch } }, | ||
320 | { "((j|2j)K|(j|2j)AK|(j|2j)(D|e|(j|2j)A(D|e))D*K)", 1, | ||
321 | { "", "2j2jADK", "j2jADK" }, | ||
322 | { nomatch, match, match } }, | ||
323 | { "((j|2j)K|(j|2j)(D|e|((j|2j)j|(j|2j)2j)A(D|e))D*K|(j|2j)AK)", 2, | ||
324 | { "", "2j2jjADK", "j2jADK" }, | ||
325 | { nomatch, match, match } }, | ||
326 | { "ab(c|d)+c*(a(b|c)d)+", 1, | ||
327 | { "abacd" }, | ||
328 | { nomatch } }, | ||
329 | { "d|5kl", 1, | ||
330 | { "d5kl" }, | ||
331 | { nomatch } }, | ||
332 | { "a()b", 1, | ||
333 | { "ab" }, | ||
334 | { match } }, | ||
335 | { | ||
336 | "GNVPN-0001-PAD(001110101001001010(0|1)*|001110101001001010000(0|1)*|001110101001001010001(0|1)*|001110101001001010010(0|1)*|001110101001001010011(0|1)*|001110101001001010100(0|1)*|001110101001001010101(0|1)*|001110101001001010110(0|1)*|001110101001001010111(0|1)*|0011101010110110(0|1)*|001110101011011000000(0|1)*|001110101011011000001(0|1)*|001110101011011000010(0|1)*|001110101011011000011(0|1)*|001110101011011000100(0|1)*|001110101011011000101(0|1)*|001110101011011000110(0|1)*|001110101011011000111(0|1)*|001110101011011001000(0|1)*|001110101011011001001(0|1)*|001110101011011001010(0|1)*|001110101011011001011(0|1)*|001110101011011001100(0|1)*|001110101011011001101(0|1)*|001110101011011001110(0|1)*|001110101011011001111(0|1)*|001110101011011010000(0|1)*|001110101011011010001(0|1)*|001110101011011010010(0|1)*|001110101011011010011(0|1)*|001110101011011010100(0|1)*|001110101011011010101(0|1)*|001110101011011010110(0|1)*|001110101011011010111(0|1)*|001110101011011011000(0|1)*|001110101011011011001(0|1)*|001110101011011011010(0|1)*|001110101011011011011(0|1)*|001110101011011011100(0|1)*|001110101011011011101(0|1)*|001110101011011011110(0|1)*|001110101011011011111(0|1)*|0011101110111101(0|1)*|001110111011110100000(0|1)*|001110111011110100001(0|1)*|001110111011110100010(0|1)*|001110111011110100011(0|1)*|001110111011110100100(0|1)*|001110111011110100101(0|1)*|001110111011110100110(0|1)*|001110111011110100111(0|1)*|001110111011110101000(0|1)*|001110111011110101001(0|1)*|001110111011110101010(0|1)*|001110111011110101011(0|1)*|001110111011110101100(0|1)*|001110111011110101101(0|1)*|001110111011110101110(0|1)*|001110111011110101111(0|1)*|001110111011110110000(0|1)*|001110111011110110001(0|1)*|001110111011110110010(0|1)*|001110111011110110011(0|1)*|001110111011110110100(0|1)*|001110111011110110101(0|1)*|001110111011110110110(0|1)*|001110111011110110111(0|1)*|001110111011110111000(0|1)*|001110111011110111001(0|1)*|001110111011110111010(0|1)*|001110111011110111011(0|1)*|001110111011110111100(0|1)*|001110111011110111101(0|1)*|001110111011110111110(0|1)*|0111010001010110(0|1)*|011101000101011000000(0|1)*|011101000101011000001(0|1)*|011101000101011000010(0|1)*|011101000101011000011(0|1)*|011101000101011000100(0|1)*|011101000101011000101(0|1)*|011101000101011000110(0|1)*|011101000101011000111(0|1)*|011101000101011001000(0|1)*|011101000101011001001(0|1)*|011101000101011001010(0|1)*|011101000101011001011(0|1)*|011101000101011001100(0|1)*|011101000101011001101(0|1)*|011101000101011001110(0|1)*|011101000101011001111(0|1)*|011101000101011010000(0|1)*|011101000101011010001(0|1)*|011101000101011010010(0|1)*|011101000101011010011(0|1)*|011101000101011010100(0|1)*|011101000101011010101(0|1)*|011101000101011010110(0|1)*|011101000101011010111(0|1)*|011101000101011011000(0|1)*|011101000101011011001(0|1)*|011101000101011011010(0|1)*|011101000101011011011(0|1)*|011101000101011011100(0|1)*|011101000101011011101(0|1)*|011101000101011011110(0|1)*|011101000101011011111(0|1)*|0111010001010111(0|1)*|011101000101011100000(0|1)*|011101000101011100001(0|1)*|011101000101011100010(0|1)*|011101000101011100011(0|1)*|011101000101011100100(0|1)*|011101000101011100101(0|1)*|011101000101011100110(0|1)*|011101000101011100111(0|1)*|011101000101011101000(0|1)*|011101000101011101001(0|1)*|011101000101011101010(0|1)*|011101000101011101011(0|1)*|011101000101011101100(0|1)*|011101000101011101101(0|1)*|011101000101011101110(0|1)*|011101000101011101111(0|1)*|011101000101011110000(0|1)*|011101000101011110001(0|1)*|011101000101011110010(0|1)*|011101000101011110011(0|1)*|011101000101011110100(0|1)*|011101000101011110101(0|1)*|011101000101011110110(0|1)*|011101000101011110111(0|1)*|011101000101011111000(0|1)*|011101000101011111001(0|1)*|011101000101011111010(0|1)*|011101000101011111011(0|1)*|011101000101011111100(0|1)*|011101000101011111101(0|1)*|011101000101011111110(0|1)*|011101000101011111111(0|1)*|0111010001011000(0|1)*|011101000101100000000(0|1)*|011101000101100000001(0|1)*|011101000101100000010(0|1)*|011101000101100000011(0|1)*|011101000101100000100(0|1)*|011101000101100000101(0|1)*|011101000101100000110(0|1)*|011101000101100000111(0|1)*|011101000101100001000(0|1)*|011101000101100001001(0|1)*|011101000101100001010(0|1)*|011101000101100001011(0|1)*|011101000101100001100(0|1)*|011101000101100001101(0|1)*|011101000101100001110(0|1)*|011101000101100001111(0|1)*|011101000101100010000(0|1)*|011101000101100010001(0|1)*|011101000101100010010(0|1)*|011101000101100010011(0|1)*|011101000101100010100(0|1)*|011101000101100010101(0|1)*|011101000101100010110(0|1)*|011101000101100010111(0|1)*|011101000101100011000(0|1)*|011101000101100011001(0|1)*|011101000101100011010(0|1)*|011101000101100011011(0|1)*|011101000101100011100(0|1)*|011101000101100011101(0|1)*|011101000101100011110(0|1)*|011101000101100011111(0|1)*|01110100010110010(0|1)*|011101000101100100000(0|1)*|011101000101100100001(0|1)*|011101000101100100010(0|1)*|011101000101100100011(0|1)*|011101000101100100100(0|1)*|011101000101100100101(0|1)*|011101000101100100110(0|1)*|011101000101100100111(0|1)*|011101000101100101000(0|1)*|011101000101100101001(0|1)*|011101000101100101010(0|1)*|011101000101100101011(0|1)*|011101000101100101100(0|1)*|011101000101100101101(0|1)*|011101000101100101110(0|1)*|011101000101100101111(0|1)*|011101000101100101111000(0|1)*|1100101010011100(0|1)*|110010101001110000000(0|1)*|110010101001110000000001(0|1)*|110010101001110000000010(0|1)*|110010101001110000000110(0|1)*|110010101001110000001(0|1)*|110010101001110000001000(0|1)*|110010101001110000001001(0|1)*|110010101001110000001010(0|1)*|110010101001110000001011(0|1)*|110010101001110000001101(0|1)*|110010101001110000001110(0|1)*|110010101001110000010(0|1)*|110010101001110000011(0|1)*|110010101001110000100(0|1)*|110010101001110000101(0|1)*|110010101001110000110(0|1)*|110010101001110000111(0|1)*|110010101001110001000(0|1)*|110010101001110001001(0|1)*|110010101001110001010(0|1)*|110010101001110001011(0|1)*|110010101001110001100(0|1)*|110010101001110001101(0|1)*|110010101001110001110(0|1)*|110010101001110001111(0|1)*|110010101001110010000(0|1)*|110010101001110010001(0|1)*|110010101001110010010(0|1)*|110010101001110010011(0|1)*|110010101001110010100(0|1)*|110010101001110010101(0|1)*|110010101001110010110(0|1)*|110010101001110010111(0|1)*|110010101001110011000(0|1)*|110010101001110011001(0|1)*|110010101001110011010(0|1)*|110010101001110011011(0|1)*|110010101001110011100(0|1)*|110010101001110011101(0|1)*|110010101001110011110(0|1)*|110010101001110011111(0|1)*|1101101010111010(0|1)*|110110101011101000000(0|1)*|110110101011101000000001(0|1)*|110110101011101000001000(0|1)*|110110101011101000001001(0|1)*|110110101011101000001010(0|1)*|110110101011101000001011(0|1)*|110110101011101000001100(0|1)*|110110101011101000001110(0|1)*|110110101011101000001111(0|1)*|110110101011101000010(0|1)*|110110101011101000010000(0|1)*|110110101011101000010001(0|1)*|110110101011101000010010(0|1)*|110110101011101000010011(0|1)*|110110101011101000011(0|1)*|110110101011101000100(0|1)*|110110101011101000101(0|1)*|110110101011101000110(0|1)*|110110101011101000111(0|1)*|110110101011101001000(0|1)*|110110101011101001001(0|1)*|110110101011101001010(0|1)*|110110101011101001011(0|1)*|110110101011101001100(0|1)*|110110101011101001101(0|1)*|110110101011101001110(0|1)*|110110101011101001111(0|1)*|110110101011101010000(0|1)*|110110101011101010001(0|1)*|110110101011101010010(0|1)*|110110101011101010011(0|1)*|110110101011101010100(0|1)*|110110101011101010101(0|1)*|110110101011101010110(0|1)*|110110101011101010111(0|1)*|110110101011101011000(0|1)*|110110101011101011001(0|1)*|110110101011101011010(0|1)*|110110101011101011011(0|1)*|110110101011101011100(0|1)*|110110101011101011101(0|1)*|110110101011101011110(0|1)*|110110101011101011111(0|1)*|1101101011010100(0|1)*|110110101101010000000(0|1)*|110110101101010000001(0|1)*|110110101101010000010(0|1)*|110110101101010000011(0|1)*|110110101101010000100(0|1)*|110110101101010000101(0|1)*|110110101101010000110(0|1)*|110110101101010000111(0|1)*|110110101101010001000(0|1)*|110110101101010001001(0|1)*|110110101101010001010(0|1)*|110110101101010001011(0|1)*|110110101101010001100(0|1)*|110110101101010001101(0|1)*|110110101101010001110(0|1)*|110110101101010001111(0|1)*|110110101101010010000(0|1)*|110110101101010010001(0|1)*|110110101101010010010(0|1)*|110110101101010010011(0|1)*|110110101101010010100(0|1)*|1101101011010100101000(0|1)*|110110101101010010101(0|1)*|110110101101010010110(0|1)*|110110101101010010111(0|1)*|110110101101010011000(0|1)*|110110101101010011010(0|1)*|110110101101010011011(0|1)*|110110101101010011100(0|1)*|110110101101010011101(0|1)*|110110101101010011110(0|1)*|110110101101010011111(0|1)*|1101111010100100(0|1)*|110111101010010000000(0|1)*|110111101010010000001(0|1)*|110111101010010000010(0|1)*|110111101010010000011(0|1)*|110111101010010000100(0|1)*|110111101010010000101(0|1)*|110111101010010000110(0|1)*|110111101010010000111(0|1)*|110111101010010001000(0|1)*|110111101010010001001(0|1)*|110111101010010001010(0|1)*|110111101010010001011(0|1)*|110111101010010001100(0|1)*|110111101010010001101(0|1)*|110111101010010001110(0|1)*|110111101010010001111(0|1)*|110111101010010010000(0|1)*|110111101010010010001(0|1)*|110111101010010010010(0|1)*|110111101010010010011(0|1)*|110111101010010010100(0|1)*|110111101010010010101(0|1)*|110111101010010010110(0|1)*|110111101010010010111(0|1)*|110111101010010011000(0|1)*|110111101010010011001(0|1)*|110111101010010011010(0|1)*|110111101010010011011(0|1)*|110111101010010011100(0|1)*|110111101010010011101(0|1)*|110111101010010011110(0|1)*|110111101010010011111(0|1)*|11011110101001010(0|1)*|110111101010010100000(0|1)*|110111101010010100001(0|1)*|110111101010010100010(0|1)*|110111101010010100011(0|1)*|110111101010010100100(0|1)*|110111101010010100101(0|1)*|110111101010010100110(0|1)*|110111101010010100111(0|1)*|110111101010010101000(0|1)*|110111101010010101001(0|1)*|110111101010010101010(0|1)*|110111101010010101011(0|1)*|110111101010010101100(0|1)*|110111101010010101101(0|1)*|110111101010010101110(0|1)*|110111101010010101111(0|1)*)", | ||
337 | 2, | ||
338 | { "GNVPN-0001-PAD1101111010100101011101010101010101", | ||
339 | "GNVPN-0001-PAD11001010100111000101101010101" }, | ||
340 | { match, match } | ||
341 | } | ||
342 | }; | ||
343 | |||
344 | check_nfa = 0; | ||
345 | check_dfa = 0; | ||
346 | check_rand = 0; | ||
347 | |||
348 | for (i = 0; i < 19; i++) | ||
349 | { | ||
350 | if (0 != regcomp (&rx, rxstr[i].regex, REG_EXTENDED)) | ||
351 | { | ||
352 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
353 | "Could not compile regex using regcomp()\n"); | ||
354 | return 1; | ||
355 | } | ||
356 | |||
357 | /* NFA test */ | ||
358 | a = REGEX_INTERNAL_construct_nfa (rxstr[i].regex, strlen (rxstr[i].regex)); | ||
359 | check_nfa += test_automaton (a, &rx, &rxstr[i]); | ||
360 | REGEX_INTERNAL_automaton_destroy (a); | ||
361 | |||
362 | /* DFA test */ | ||
363 | a = REGEX_INTERNAL_construct_dfa (rxstr[i].regex, strlen (rxstr[i].regex), | ||
364 | 0); | ||
365 | check_dfa += test_automaton (a, &rx, &rxstr[i]); | ||
366 | check_proof = GNUNET_strdup (REGEX_INTERNAL_get_canonical_regex (a)); | ||
367 | REGEX_INTERNAL_automaton_destroy (a); | ||
368 | |||
369 | a = REGEX_INTERNAL_construct_dfa (check_proof, strlen (check_proof), 0); | ||
370 | check_dfa += test_automaton (a, &rx, &rxstr[i]); | ||
371 | REGEX_INTERNAL_automaton_destroy (a); | ||
372 | if (0 != check_dfa) | ||
373 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "check_proof: %s\n", check_proof); | ||
374 | GNUNET_free (check_proof); | ||
375 | |||
376 | regfree (&rx); | ||
377 | } | ||
378 | |||
379 | /* Random tests */ | ||
380 | srand (time (NULL)); | ||
381 | for (i = 0; i < 20; i++) | ||
382 | check_rand += test_random (50, 60, 10); | ||
383 | |||
384 | return check_nfa + check_dfa + check_rand; | ||
385 | } | ||
diff --git a/src/service/regex/test_regex_graph_api.c b/src/service/regex/test_regex_graph_api.c new file mode 100644 index 000000000..5c12ea229 --- /dev/null +++ b/src/service/regex/test_regex_graph_api.c | |||
@@ -0,0 +1,157 @@ | |||
1 | /* | ||
2 | This file is part of GNUnet | ||
3 | Copyright (C) 2012 GNUnet e.V. | ||
4 | |||
5 | GNUnet is free software: you can redistribute it and/or modify it | ||
6 | under the terms of the GNU Affero General Public License as published | ||
7 | by the Free Software Foundation, either version 3 of the License, | ||
8 | or (at your option) any later version. | ||
9 | |||
10 | GNUnet is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | Affero General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU Affero General Public License | ||
16 | along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | |||
18 | SPDX-License-Identifier: AGPL3.0-or-later | ||
19 | */ | ||
20 | /** | ||
21 | * @file regex/test_regex_graph_api.c | ||
22 | * @brief test for regex_graph.c | ||
23 | * @author Maximilian Szengel | ||
24 | */ | ||
25 | #include "platform.h" | ||
26 | #include <regex.h> | ||
27 | #include <time.h> | ||
28 | #include "regex_internal_lib.h" | ||
29 | #include "regex_test_lib.h" | ||
30 | #include "regex_internal.h" | ||
31 | |||
32 | #define KEEP_FILES 1 | ||
33 | |||
34 | /** | ||
35 | * Check if 'filename' exists and is not empty. | ||
36 | * | ||
37 | * @param filename name of the file that should be checked | ||
38 | * | ||
39 | * @return 0 if ok, non 0 on error. | ||
40 | */ | ||
41 | static int | ||
42 | filecheck (const char *filename) | ||
43 | { | ||
44 | int error = 0; | ||
45 | FILE *fp; | ||
46 | |||
47 | /* Check if file was created and delete it again */ | ||
48 | if (NULL == (fp = fopen (filename, "r"))) | ||
49 | { | ||
50 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not find graph %s\n", filename); | ||
51 | return 1; | ||
52 | } | ||
53 | |||
54 | GNUNET_break (0 == fseek (fp, 0L, SEEK_END)); | ||
55 | if (1 > ftell (fp)) | ||
56 | { | ||
57 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
58 | "Graph writing failed, got empty file (%s)!\n", filename); | ||
59 | error = 2; | ||
60 | } | ||
61 | |||
62 | GNUNET_assert (0 == fclose (fp)); | ||
63 | |||
64 | if (! KEEP_FILES) | ||
65 | { | ||
66 | if (0 != unlink (filename)) | ||
67 | GNUNET_log_strerror_file (GNUNET_ERROR_TYPE_ERROR, "unlink", filename); | ||
68 | } | ||
69 | return error; | ||
70 | } | ||
71 | |||
72 | |||
73 | int | ||
74 | main (int argc, char *argv[]) | ||
75 | { | ||
76 | int error; | ||
77 | struct REGEX_INTERNAL_Automaton *a; | ||
78 | unsigned int i; | ||
79 | const char *filename = "test_graph.dot"; | ||
80 | |||
81 | const char *regex[12] = { | ||
82 | "ab(c|d)+c*(a(b|c)+d)+(bla)+", | ||
83 | "(bla)*", | ||
84 | "b(lab)*la", | ||
85 | "(ab)*", | ||
86 | "ab(c|d)+c*(a(b|c)+d)+(bla)(bla)*", | ||
87 | "z(abc|def)?xyz", | ||
88 | "1*0(0|1)*", | ||
89 | "a*b*", | ||
90 | "a+X*y+c|p|R|Z*K*y*R+w|Y*6+n+h*k*w+V*F|W*B*e*", | ||
91 | "a", | ||
92 | "a|b", | ||
93 | "PADPADPADPADPADPabcdefghixxxxxxxxxxxxxjklmnop*qstoisdjfguisdfguihsdfgbdsuivggsd" | ||
94 | }; | ||
95 | |||
96 | GNUNET_log_setup ("test-regex", "WARNING", NULL); | ||
97 | error = 0; | ||
98 | for (i = 0; i < 12; i++) | ||
99 | { | ||
100 | /* Check NFA graph creation */ | ||
101 | a = REGEX_INTERNAL_construct_nfa (regex[i], strlen (regex[i])); | ||
102 | REGEX_TEST_automaton_save_graph (a, filename, REGEX_TEST_GRAPH_DEFAULT); | ||
103 | REGEX_INTERNAL_automaton_destroy (a); | ||
104 | error += filecheck (filename); | ||
105 | |||
106 | a = REGEX_INTERNAL_construct_nfa (regex[i], strlen (regex[i])); | ||
107 | REGEX_TEST_automaton_save_graph (a, filename, | ||
108 | REGEX_TEST_GRAPH_DEFAULT | ||
109 | | REGEX_TEST_GRAPH_VERBOSE); | ||
110 | REGEX_INTERNAL_automaton_destroy (a); | ||
111 | error += filecheck (filename); | ||
112 | |||
113 | a = REGEX_INTERNAL_construct_nfa (regex[i], strlen (regex[i])); | ||
114 | REGEX_TEST_automaton_save_graph (a, filename, | ||
115 | REGEX_TEST_GRAPH_DEFAULT | ||
116 | | REGEX_TEST_GRAPH_COLORING); | ||
117 | REGEX_INTERNAL_automaton_destroy (a); | ||
118 | error += filecheck (filename); | ||
119 | |||
120 | a = REGEX_INTERNAL_construct_nfa (regex[i], strlen (regex[i])); | ||
121 | REGEX_TEST_automaton_save_graph (a, filename, | ||
122 | REGEX_TEST_GRAPH_DEFAULT | ||
123 | | REGEX_TEST_GRAPH_VERBOSE | ||
124 | | REGEX_TEST_GRAPH_COLORING); | ||
125 | REGEX_INTERNAL_automaton_destroy (a); | ||
126 | error += filecheck (filename); | ||
127 | |||
128 | |||
129 | /* Check DFA graph creation */ | ||
130 | a = REGEX_INTERNAL_construct_dfa (regex[i], strlen (regex[i]), 0); | ||
131 | REGEX_TEST_automaton_save_graph (a, filename, REGEX_TEST_GRAPH_DEFAULT); | ||
132 | REGEX_INTERNAL_automaton_destroy (a); | ||
133 | error += filecheck (filename); | ||
134 | |||
135 | a = REGEX_INTERNAL_construct_dfa (regex[i], strlen (regex[i]), 0); | ||
136 | REGEX_TEST_automaton_save_graph (a, filename, | ||
137 | REGEX_TEST_GRAPH_DEFAULT | ||
138 | | REGEX_TEST_GRAPH_VERBOSE); | ||
139 | REGEX_INTERNAL_automaton_destroy (a); | ||
140 | error += filecheck (filename); | ||
141 | |||
142 | a = REGEX_INTERNAL_construct_dfa (regex[i], strlen (regex[i]), 0); | ||
143 | REGEX_TEST_automaton_save_graph (a, filename, | ||
144 | REGEX_TEST_GRAPH_DEFAULT | ||
145 | | REGEX_TEST_GRAPH_COLORING); | ||
146 | REGEX_INTERNAL_automaton_destroy (a); | ||
147 | error += filecheck (filename); | ||
148 | |||
149 | |||
150 | a = REGEX_INTERNAL_construct_dfa (regex[i], strlen (regex[i]), 4); | ||
151 | REGEX_TEST_automaton_save_graph (a, filename, REGEX_TEST_GRAPH_DEFAULT); | ||
152 | REGEX_INTERNAL_automaton_destroy (a); | ||
153 | error += filecheck (filename); | ||
154 | } | ||
155 | |||
156 | return error; | ||
157 | } | ||
diff --git a/src/service/regex/test_regex_integration.c b/src/service/regex/test_regex_integration.c new file mode 100644 index 000000000..1f422e5a3 --- /dev/null +++ b/src/service/regex/test_regex_integration.c | |||
@@ -0,0 +1,211 @@ | |||
1 | /* | ||
2 | This file is part of GNUnet. | ||
3 | Copyright (C) 2013, 2015 GNUnet e.V. | ||
4 | |||
5 | GNUnet is free software: you can redistribute it and/or modify it | ||
6 | under the terms of the GNU Affero General Public License as published | ||
7 | by the Free Software Foundation, either version 3 of the License, | ||
8 | or (at your option) any later version. | ||
9 | |||
10 | GNUnet is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | Affero General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU Affero General Public License | ||
16 | along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | |||
18 | SPDX-License-Identifier: AGPL3.0-or-later | ||
19 | */ | ||
20 | /** | ||
21 | * @file regex/test_regex_integration.c | ||
22 | * @brief base test case for regex integration with VPN; | ||
23 | * tests that the regexes generated by the TUN API | ||
24 | * for IP addresses work (for some simple cases) | ||
25 | * @author Christian Grothoff | ||
26 | */ | ||
27 | #include "platform.h" | ||
28 | #include "gnunet_applications.h" | ||
29 | #include "gnunet_util_lib.h" | ||
30 | #include "gnunet_testing_lib.h" | ||
31 | #include "gnunet_regex_service.h" | ||
32 | |||
33 | |||
34 | /** | ||
35 | * How long until we really give up on a particular testcase portion? | ||
36 | */ | ||
37 | #define TOTAL_TIMEOUT GNUNET_TIME_relative_multiply (GNUNET_TIME_UNIT_SECONDS, \ | ||
38 | 600) | ||
39 | |||
40 | /** | ||
41 | * How long until we give up on any particular operation (and retry)? | ||
42 | */ | ||
43 | #define BASE_TIMEOUT GNUNET_TIME_relative_multiply (GNUNET_TIME_UNIT_SECONDS, 3) | ||
44 | |||
45 | |||
46 | static struct GNUNET_REGEX_Announcement *a4; | ||
47 | |||
48 | static struct GNUNET_REGEX_Search *s4; | ||
49 | |||
50 | static struct GNUNET_REGEX_Announcement *a6; | ||
51 | |||
52 | static struct GNUNET_REGEX_Search *s6; | ||
53 | |||
54 | static int ok = 1; | ||
55 | |||
56 | static struct GNUNET_SCHEDULER_Task *die_task; | ||
57 | |||
58 | |||
59 | static void | ||
60 | end (void *cls) | ||
61 | { | ||
62 | die_task = NULL; | ||
63 | GNUNET_REGEX_announce_cancel (a4); | ||
64 | a4 = NULL; | ||
65 | GNUNET_REGEX_search_cancel (s4); | ||
66 | s4 = NULL; | ||
67 | GNUNET_REGEX_announce_cancel (a6); | ||
68 | a6 = NULL; | ||
69 | GNUNET_REGEX_search_cancel (s6); | ||
70 | s6 = NULL; | ||
71 | ok = 0; | ||
72 | } | ||
73 | |||
74 | |||
75 | static void | ||
76 | end_badly () | ||
77 | { | ||
78 | fprintf (stderr, "%s", "Testcase failed (timeout).\n"); | ||
79 | end (NULL); | ||
80 | ok = 1; | ||
81 | } | ||
82 | |||
83 | |||
84 | /** | ||
85 | * Search callback function, invoked for every result that was found. | ||
86 | * | ||
87 | * @param cls Closure provided in #GNUNET_REGEX_search(). | ||
88 | * @param id Peer providing a regex that matches the string. | ||
89 | * @param get_path Path of the get request. | ||
90 | * @param get_path_length Length of @a get_path. | ||
91 | * @param put_path Path of the put request. | ||
92 | * @param put_path_length Length of the @a put_path. | ||
93 | */ | ||
94 | static void | ||
95 | found_cb (void *cls, | ||
96 | const struct GNUNET_PeerIdentity *id, | ||
97 | const struct GNUNET_PeerIdentity *get_path, | ||
98 | unsigned int get_path_length, | ||
99 | const struct GNUNET_PeerIdentity *put_path, | ||
100 | unsigned int put_path_length) | ||
101 | { | ||
102 | const char *str = cls; | ||
103 | static int found; | ||
104 | |||
105 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, | ||
106 | "IPv%s-exit found\n", | ||
107 | str); | ||
108 | if (0 == strcmp (str, "4")) | ||
109 | found |= 4; | ||
110 | if (0 == strcmp (str, "6")) | ||
111 | found |= 2; | ||
112 | if ((4 | 2) == found) | ||
113 | { | ||
114 | GNUNET_SCHEDULER_cancel (die_task); | ||
115 | die_task = | ||
116 | GNUNET_SCHEDULER_add_now (&end, NULL); | ||
117 | } | ||
118 | } | ||
119 | |||
120 | |||
121 | static void | ||
122 | run (void *cls, | ||
123 | const struct GNUNET_CONFIGURATION_Handle *cfg, | ||
124 | struct GNUNET_TESTING_Peer *peer) | ||
125 | { | ||
126 | char rxstr4[GNUNET_TUN_IPV4_REGEXLEN]; | ||
127 | char rxstr6[GNUNET_TUN_IPV6_REGEXLEN]; | ||
128 | char *p4r; | ||
129 | char *p6r; | ||
130 | char *p4; | ||
131 | char *p6; | ||
132 | char *ss4; | ||
133 | char *ss6; | ||
134 | struct in_addr i4; | ||
135 | struct in6_addr i6; | ||
136 | |||
137 | die_task = | ||
138 | GNUNET_SCHEDULER_add_delayed (TOTAL_TIMEOUT, | ||
139 | &end_badly, NULL); | ||
140 | GNUNET_assert (1 == | ||
141 | inet_pton (AF_INET, | ||
142 | "127.0.0.1", | ||
143 | &i4)); | ||
144 | GNUNET_assert (1 == | ||
145 | inet_pton (AF_INET6, | ||
146 | "::1:5", | ||
147 | &i6)); | ||
148 | GNUNET_TUN_ipv4toregexsearch (&i4, | ||
149 | 8080, | ||
150 | rxstr4); | ||
151 | GNUNET_TUN_ipv6toregexsearch (&i6, | ||
152 | 8686, | ||
153 | rxstr6); | ||
154 | GNUNET_asprintf (&ss4, | ||
155 | "%s%s", | ||
156 | GNUNET_APPLICATION_TYPE_EXIT_REGEX_PREFIX, | ||
157 | rxstr4); | ||
158 | GNUNET_asprintf (&ss6, | ||
159 | "%s%s", | ||
160 | GNUNET_APPLICATION_TYPE_EXIT_REGEX_PREFIX, | ||
161 | rxstr6); | ||
162 | p4r = GNUNET_TUN_ipv4policy2regex ("0.0.0.0/0:!25;"); | ||
163 | p6r = GNUNET_TUN_ipv6policy2regex ("::/0:!25;"); | ||
164 | GNUNET_asprintf (&p4, | ||
165 | "%s%s", | ||
166 | GNUNET_APPLICATION_TYPE_EXIT_REGEX_PREFIX, | ||
167 | p4r); | ||
168 | GNUNET_asprintf (&p6, | ||
169 | "%s%s", | ||
170 | GNUNET_APPLICATION_TYPE_EXIT_REGEX_PREFIX, | ||
171 | p6r); | ||
172 | GNUNET_free (p4r); | ||
173 | GNUNET_free (p6r); | ||
174 | a4 = GNUNET_REGEX_announce (cfg, | ||
175 | p4, | ||
176 | GNUNET_TIME_relative_multiply ( | ||
177 | GNUNET_TIME_UNIT_SECONDS, | ||
178 | 5), | ||
179 | 1); | ||
180 | a6 = GNUNET_REGEX_announce (cfg, | ||
181 | p6, | ||
182 | GNUNET_TIME_relative_multiply ( | ||
183 | GNUNET_TIME_UNIT_SECONDS, | ||
184 | 5), | ||
185 | 1); | ||
186 | GNUNET_free (p4); | ||
187 | GNUNET_free (p6); | ||
188 | |||
189 | s4 = GNUNET_REGEX_search (cfg, | ||
190 | ss4, | ||
191 | &found_cb, "4"); | ||
192 | s6 = GNUNET_REGEX_search (cfg, | ||
193 | ss6, | ||
194 | &found_cb, "6"); | ||
195 | GNUNET_free (ss4); | ||
196 | GNUNET_free (ss6); | ||
197 | } | ||
198 | |||
199 | |||
200 | int | ||
201 | main (int argc, char *argv[]) | ||
202 | { | ||
203 | if (0 != GNUNET_TESTING_peer_run ("test-regex-integration", | ||
204 | "test_regex_api_data.conf", | ||
205 | &run, NULL)) | ||
206 | return 1; | ||
207 | return ok; | ||
208 | } | ||
209 | |||
210 | |||
211 | /* end of test_regex_integration.c */ | ||
diff --git a/src/service/regex/test_regex_iterate_api.c b/src/service/regex/test_regex_iterate_api.c new file mode 100644 index 000000000..6355e8218 --- /dev/null +++ b/src/service/regex/test_regex_iterate_api.c | |||
@@ -0,0 +1,262 @@ | |||
1 | /* | ||
2 | This file is part of GNUnet | ||
3 | Copyright (C) 2012 GNUnet e.V. | ||
4 | |||
5 | GNUnet is free software: you can redistribute it and/or modify it | ||
6 | under the terms of the GNU Affero General Public License as published | ||
7 | by the Free Software Foundation, either version 3 of the License, | ||
8 | or (at your option) any later version. | ||
9 | |||
10 | GNUnet is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | Affero General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU Affero General Public License | ||
16 | along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | |||
18 | SPDX-License-Identifier: AGPL3.0-or-later | ||
19 | */ | ||
20 | /** | ||
21 | * @file regex/test_regex_iterate_api.c | ||
22 | * @brief test for regex.c | ||
23 | * @author Maximilian Szengel | ||
24 | */ | ||
25 | #include "platform.h" | ||
26 | #include <regex.h> | ||
27 | #include <time.h> | ||
28 | #include "regex_internal_lib.h" | ||
29 | #include "../../plugin/regex/regex_block_lib.h" | ||
30 | #include "regex_internal.h" | ||
31 | |||
32 | /** | ||
33 | * Regex initial padding. | ||
34 | */ | ||
35 | #define INITIAL_PADDING "PADPADPADPADPADP" | ||
36 | |||
37 | /** | ||
38 | * Set to GNUNET_YES to save a debug graph. | ||
39 | */ | ||
40 | #define REGEX_INTERNAL_ITERATE_SAVE_DEBUG_GRAPH GNUNET_NO | ||
41 | |||
42 | static unsigned int transition_counter; | ||
43 | |||
44 | struct IteratorContext | ||
45 | { | ||
46 | int error; | ||
47 | int should_save_graph; | ||
48 | FILE *graph_filep; | ||
49 | unsigned int string_count; | ||
50 | char *const *strings; | ||
51 | unsigned int match_count; | ||
52 | }; | ||
53 | |||
54 | struct RegexStringPair | ||
55 | { | ||
56 | char *regex; | ||
57 | unsigned int string_count; | ||
58 | char *strings[20]; | ||
59 | }; | ||
60 | |||
61 | |||
62 | static void | ||
63 | key_iterator (void *cls, const struct GNUNET_HashCode *key, | ||
64 | const char *proof, | ||
65 | int accepting, unsigned int num_edges, | ||
66 | const struct REGEX_BLOCK_Edge *edges) | ||
67 | { | ||
68 | unsigned int i; | ||
69 | struct IteratorContext *ctx = cls; | ||
70 | char *out_str; | ||
71 | char *state_id = GNUNET_strdup (GNUNET_h2s (key)); | ||
72 | |||
73 | GNUNET_assert (NULL != proof); | ||
74 | if (GNUNET_YES == ctx->should_save_graph) | ||
75 | { | ||
76 | if (GNUNET_YES == accepting) | ||
77 | GNUNET_asprintf (&out_str, "\"%s\" [shape=doublecircle]\n", state_id); | ||
78 | else | ||
79 | GNUNET_asprintf (&out_str, "\"%s\" [shape=circle]\n", state_id); | ||
80 | fwrite (out_str, strlen (out_str), 1, ctx->graph_filep); | ||
81 | GNUNET_free (out_str); | ||
82 | |||
83 | for (i = 0; i < num_edges; i++) | ||
84 | { | ||
85 | transition_counter++; | ||
86 | GNUNET_asprintf (&out_str, "\"%s\" -> \"%s\" [label = \"%s (%s)\"]\n", | ||
87 | state_id, GNUNET_h2s (&edges[i].destination), | ||
88 | edges[i].label, proof); | ||
89 | fwrite (out_str, strlen (out_str), 1, ctx->graph_filep); | ||
90 | |||
91 | GNUNET_free (out_str); | ||
92 | } | ||
93 | } | ||
94 | else | ||
95 | { | ||
96 | for (i = 0; i < num_edges; i++) | ||
97 | transition_counter++; | ||
98 | } | ||
99 | |||
100 | for (i = 0; i < ctx->string_count; i++) | ||
101 | { | ||
102 | if (0 == strcmp (proof, ctx->strings[i])) | ||
103 | ctx->match_count++; | ||
104 | } | ||
105 | |||
106 | if (GNUNET_OK != REGEX_BLOCK_check_proof (proof, strlen (proof), key)) | ||
107 | { | ||
108 | ctx->error++; | ||
109 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
110 | "Proof check failed: proof: %s key: %s\n", proof, state_id); | ||
111 | } | ||
112 | GNUNET_free (state_id); | ||
113 | } | ||
114 | |||
115 | |||
116 | int | ||
117 | main (int argc, char *argv[]) | ||
118 | { | ||
119 | GNUNET_log_setup ("test-regex", "WARNING", NULL); | ||
120 | |||
121 | int error; | ||
122 | struct REGEX_INTERNAL_Automaton *dfa; | ||
123 | unsigned int i; | ||
124 | unsigned int num_transitions; | ||
125 | char *filename = NULL; | ||
126 | struct IteratorContext ctx = { 0, 0, NULL, 0, NULL, 0 }; | ||
127 | |||
128 | error = 0; | ||
129 | |||
130 | const struct RegexStringPair rxstr[13] = { | ||
131 | { INITIAL_PADDING "ab(c|d)+c*(a(b|c)+d)+(bla)+", 2, | ||
132 | { INITIAL_PADDING "abcdcdca", INITIAL_PADDING "abcabdbl" } }, | ||
133 | { INITIAL_PADDING | ||
134 | "abcdefghixxxxxxxxxxxxxjklmnop*qstoisdjfguisdfguihsdfgbdsuivggsd", 1, | ||
135 | { INITIAL_PADDING "abcdefgh" } }, | ||
136 | { INITIAL_PADDING "VPN-4-1(0|1)*", 2, | ||
137 | { INITIAL_PADDING "VPN-4-10", INITIAL_PADDING "VPN-4-11" } }, | ||
138 | { INITIAL_PADDING "(a+X*y+c|p|R|Z*K*y*R+w|Y*6+n+h*k*w+V*F|W*B*e*)", 2, | ||
139 | { INITIAL_PADDING "aaaaaaaa", INITIAL_PADDING "aaXXyyyc" } }, | ||
140 | { INITIAL_PADDING "a*", 1, { INITIAL_PADDING "aaaaaaaa" } }, | ||
141 | { INITIAL_PADDING "xzxzxzxzxz", 1, { INITIAL_PADDING "xzxzxzxz" } }, | ||
142 | { INITIAL_PADDING "xyz*", 1, { INITIAL_PADDING "xyzzzzzz" } }, | ||
143 | { INITIAL_PADDING | ||
144 | "abcd:(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1):(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)", | ||
145 | 2, { INITIAL_PADDING "abcd:000", INITIAL_PADDING "abcd:101" } }, | ||
146 | { INITIAL_PADDING "(x*|(0|1|2)(a|b|c|d)+)", 2, | ||
147 | { INITIAL_PADDING "xxxxxxxx", INITIAL_PADDING "0abcdbad" } }, | ||
148 | { INITIAL_PADDING "(0|1)(0|1)23456789ABC", 1, | ||
149 | { INITIAL_PADDING "11234567" } }, | ||
150 | { INITIAL_PADDING "0*123456789ABC*", 3, | ||
151 | { INITIAL_PADDING "00123456", INITIAL_PADDING "00000000", | ||
152 | INITIAL_PADDING "12345678" } }, | ||
153 | { INITIAL_PADDING "0123456789A*BC", 1, { INITIAL_PADDING "01234567" } }, | ||
154 | { "GNUNETVPN000100000IPEX6-fc5a:4e1:c2ba::1", 1, | ||
155 | { "GNUNETVPN000100000IPEX6-" } } | ||
156 | }; | ||
157 | |||
158 | const char *graph_start_str = "digraph G {\nrankdir=LR\n"; | ||
159 | const char *graph_end_str = "\n}\n"; | ||
160 | |||
161 | for (i = 0; i < 13; i++) | ||
162 | { | ||
163 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Iterating DFA for regex %s\n", | ||
164 | rxstr[i].regex); | ||
165 | |||
166 | |||
167 | /* Create graph */ | ||
168 | if (GNUNET_YES == REGEX_INTERNAL_ITERATE_SAVE_DEBUG_GRAPH) | ||
169 | { | ||
170 | GNUNET_asprintf (&filename, "iteration_graph_%u.dot", i); | ||
171 | ctx.graph_filep = fopen (filename, "w"); | ||
172 | if (NULL == ctx.graph_filep) | ||
173 | { | ||
174 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, | ||
175 | "Could not open file %s for saving iteration graph.\n", | ||
176 | filename); | ||
177 | ctx.should_save_graph = GNUNET_NO; | ||
178 | } | ||
179 | else | ||
180 | { | ||
181 | ctx.should_save_graph = GNUNET_YES; | ||
182 | fwrite (graph_start_str, strlen (graph_start_str), 1, ctx.graph_filep); | ||
183 | } | ||
184 | GNUNET_free (filename); | ||
185 | } | ||
186 | else | ||
187 | { | ||
188 | ctx.should_save_graph = GNUNET_NO; | ||
189 | ctx.graph_filep = NULL; | ||
190 | } | ||
191 | |||
192 | /* Iterate over DFA edges */ | ||
193 | transition_counter = 0; | ||
194 | ctx.string_count = rxstr[i].string_count; | ||
195 | ctx.strings = rxstr[i].strings; | ||
196 | ctx.match_count = 0; | ||
197 | dfa = | ||
198 | REGEX_INTERNAL_construct_dfa (rxstr[i].regex, strlen (rxstr[i].regex), 0); | ||
199 | REGEX_INTERNAL_iterate_all_edges (dfa, key_iterator, &ctx); | ||
200 | num_transitions = | ||
201 | REGEX_INTERNAL_get_transition_count (dfa) - dfa->start->transition_count; | ||
202 | |||
203 | if (transition_counter < num_transitions) | ||
204 | { | ||
205 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
206 | "Automaton has %d transitions, iterated over %d transitions\n", | ||
207 | num_transitions, transition_counter); | ||
208 | error += 1; | ||
209 | } | ||
210 | |||
211 | if (ctx.match_count < ctx.string_count) | ||
212 | { | ||
213 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
214 | "Missing initial states for regex %s\n", rxstr[i].regex); | ||
215 | error += (ctx.string_count - ctx.match_count); | ||
216 | } | ||
217 | else if (ctx.match_count > ctx.string_count) | ||
218 | { | ||
219 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
220 | "Duplicate initial transitions for regex %s\n", | ||
221 | rxstr[i].regex); | ||
222 | error += (ctx.string_count - ctx.match_count); | ||
223 | } | ||
224 | |||
225 | REGEX_INTERNAL_automaton_destroy (dfa); | ||
226 | |||
227 | /* Finish graph */ | ||
228 | if (GNUNET_YES == ctx.should_save_graph) | ||
229 | { | ||
230 | fwrite (graph_end_str, strlen (graph_end_str), 1, ctx.graph_filep); | ||
231 | fclose (ctx.graph_filep); | ||
232 | ctx.graph_filep = NULL; | ||
233 | ctx.should_save_graph = GNUNET_NO; | ||
234 | } | ||
235 | } | ||
236 | |||
237 | |||
238 | for (i = 0; i < 13; i++) | ||
239 | { | ||
240 | ctx.string_count = rxstr[i].string_count; | ||
241 | ctx.strings = rxstr[i].strings; | ||
242 | ctx.match_count = 0; | ||
243 | |||
244 | dfa = | ||
245 | REGEX_INTERNAL_construct_dfa (rxstr[i].regex, strlen (rxstr[i].regex), 0); | ||
246 | REGEX_INTERNAL_dfa_add_multi_strides (NULL, dfa, 2); | ||
247 | REGEX_INTERNAL_iterate_all_edges (dfa, key_iterator, &ctx); | ||
248 | |||
249 | if (ctx.match_count < ctx.string_count) | ||
250 | { | ||
251 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
252 | "Missing initial states for regex %s\n", rxstr[i].regex); | ||
253 | error += (ctx.string_count - ctx.match_count); | ||
254 | } | ||
255 | |||
256 | REGEX_INTERNAL_automaton_destroy (dfa); | ||
257 | } | ||
258 | |||
259 | error += ctx.error; | ||
260 | |||
261 | return error; | ||
262 | } | ||
diff --git a/src/service/regex/test_regex_proofs.c b/src/service/regex/test_regex_proofs.c new file mode 100644 index 000000000..289b1183b --- /dev/null +++ b/src/service/regex/test_regex_proofs.c | |||
@@ -0,0 +1,173 @@ | |||
1 | /* | ||
2 | This file is part of GNUnet | ||
3 | Copyright (C) 2012 GNUnet e.V. | ||
4 | |||
5 | GNUnet is free software: you can redistribute it and/or modify it | ||
6 | under the terms of the GNU Affero General Public License as published | ||
7 | by the Free Software Foundation, either version 3 of the License, | ||
8 | or (at your option) any later version. | ||
9 | |||
10 | GNUnet is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | Affero General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU Affero General Public License | ||
16 | along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | |||
18 | SPDX-License-Identifier: AGPL3.0-or-later | ||
19 | */ | ||
20 | /** | ||
21 | * @file regex/test_regex_proofs.c | ||
22 | * @brief test for regex.c | ||
23 | * @author Maximilian Szengel | ||
24 | */ | ||
25 | #include "platform.h" | ||
26 | #include "regex_internal_lib.h" | ||
27 | #include "regex_test_lib.h" | ||
28 | #include "regex_internal.h" | ||
29 | |||
30 | |||
31 | /** | ||
32 | * Test if the given regex's canonical regex is the same as this canonical | ||
33 | * regex's canonical regex. Confused? Ok, then: 1. construct a dfa A from the | ||
34 | * given 'regex' 2. get the canonical regex of dfa A 3. construct a dfa B from | ||
35 | * this canonical regex 3. compare the canonical regex of dfa A with the | ||
36 | * canonical regex of dfa B. | ||
37 | * | ||
38 | * @param regex regular expression used for this test (see above). | ||
39 | * | ||
40 | * @return 0 on success, 1 on failure | ||
41 | */ | ||
42 | static unsigned int | ||
43 | test_proof (const char *regex) | ||
44 | { | ||
45 | unsigned int error; | ||
46 | struct REGEX_INTERNAL_Automaton *dfa; | ||
47 | char *c_rx1; | ||
48 | const char *c_rx2; | ||
49 | |||
50 | dfa = REGEX_INTERNAL_construct_dfa (regex, strlen (regex), 1); | ||
51 | GNUNET_assert (NULL != dfa); | ||
52 | c_rx1 = GNUNET_strdup (REGEX_INTERNAL_get_canonical_regex (dfa)); | ||
53 | REGEX_INTERNAL_automaton_destroy (dfa); | ||
54 | dfa = REGEX_INTERNAL_construct_dfa (c_rx1, strlen (c_rx1), 1); | ||
55 | GNUNET_assert (NULL != dfa); | ||
56 | c_rx2 = REGEX_INTERNAL_get_canonical_regex (dfa); | ||
57 | |||
58 | error = (0 == strcmp (c_rx1, c_rx2)) ? 0 : 1; | ||
59 | |||
60 | if (error > 0) | ||
61 | { | ||
62 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
63 | "Comparing canonical regex of\n%s\nfailed:\n%s\nvs.\n%s\n", | ||
64 | regex, c_rx1, c_rx2); | ||
65 | } | ||
66 | |||
67 | GNUNET_free (c_rx1); | ||
68 | REGEX_INTERNAL_automaton_destroy (dfa); | ||
69 | |||
70 | return error; | ||
71 | } | ||
72 | |||
73 | |||
74 | /** | ||
75 | * Use 'test_proof' function to randomly test the canonical regexes of 'count' | ||
76 | * random expressions of length 'rx_length'. | ||
77 | * | ||
78 | * @param count number of random regular expressions to test. | ||
79 | * @param rx_length length of the random regular expressions. | ||
80 | * | ||
81 | * @return 0 on success, number of failures otherwise. | ||
82 | */ | ||
83 | static unsigned int | ||
84 | test_proofs_random (unsigned int count, size_t rx_length) | ||
85 | { | ||
86 | unsigned int i; | ||
87 | char *rand_rx; | ||
88 | unsigned int failures; | ||
89 | |||
90 | failures = 0; | ||
91 | |||
92 | for (i = 0; i < count; i++) | ||
93 | { | ||
94 | rand_rx = REGEX_TEST_generate_random_regex (rx_length, NULL); | ||
95 | failures += test_proof (rand_rx); | ||
96 | GNUNET_free (rand_rx); | ||
97 | } | ||
98 | |||
99 | return failures; | ||
100 | } | ||
101 | |||
102 | |||
103 | /** | ||
104 | * Test a number of known examples of regexes for proper canonicalization. | ||
105 | * | ||
106 | * @return 0 on success, number of failures otherwise. | ||
107 | */ | ||
108 | static unsigned int | ||
109 | test_proofs_static () | ||
110 | { | ||
111 | unsigned int i; | ||
112 | unsigned int error; | ||
113 | |||
114 | const char *regex[8] = { | ||
115 | "a|aa*a", | ||
116 | "a+", | ||
117 | "a*", | ||
118 | "a*a*", | ||
119 | "(F*C|WfPf|y+F*C)", | ||
120 | "y*F*C|WfPf", | ||
121 | "((a|b)c|(a|b)(d|(a|b)e))", | ||
122 | "((a|b)(c|d)|(a|b)(a|b)e)" | ||
123 | }; | ||
124 | |||
125 | const char *canon_rx1; | ||
126 | const char *canon_rx2; | ||
127 | struct REGEX_INTERNAL_Automaton *dfa1; | ||
128 | struct REGEX_INTERNAL_Automaton *dfa2; | ||
129 | |||
130 | error = 0; | ||
131 | |||
132 | for (i = 0; i < 8; i += 2) | ||
133 | { | ||
134 | dfa1 = REGEX_INTERNAL_construct_dfa (regex[i], strlen (regex[i]), 1); | ||
135 | dfa2 = REGEX_INTERNAL_construct_dfa (regex[i + 1], strlen (regex[i + 1]), | ||
136 | 1); | ||
137 | GNUNET_assert (NULL != dfa1); | ||
138 | GNUNET_assert (NULL != dfa2); | ||
139 | |||
140 | canon_rx1 = REGEX_INTERNAL_get_canonical_regex (dfa1); | ||
141 | canon_rx2 = REGEX_INTERNAL_get_canonical_regex (dfa2); | ||
142 | |||
143 | error += (0 == strcmp (canon_rx1, canon_rx2)) ? 0 : 1; | ||
144 | |||
145 | if (error > 0) | ||
146 | { | ||
147 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
148 | "Comparing canonical regex failed:\nrx1:\t%s\ncrx1:\t%s\nrx2:\t%s\ncrx2:\t%s\n", | ||
149 | regex[i], canon_rx1, regex[i + 1], canon_rx2); | ||
150 | } | ||
151 | |||
152 | REGEX_INTERNAL_automaton_destroy (dfa1); | ||
153 | REGEX_INTERNAL_automaton_destroy (dfa2); | ||
154 | } | ||
155 | |||
156 | return error; | ||
157 | } | ||
158 | |||
159 | |||
160 | int | ||
161 | main (int argc, char *argv[]) | ||
162 | { | ||
163 | GNUNET_log_setup ("test-regex", "WARNING", NULL); | ||
164 | |||
165 | int error; | ||
166 | |||
167 | error = 0; | ||
168 | |||
169 | error += test_proofs_static (); | ||
170 | error += test_proofs_random (100, 30); | ||
171 | |||
172 | return error; | ||
173 | } | ||