diff options
Diffstat (limited to 'src/regex')
31 files changed, 0 insertions, 13088 deletions
diff --git a/src/regex/.gitignore b/src/regex/.gitignore deleted file mode 100644 index 39dc89c88..000000000 --- a/src/regex/.gitignore +++ /dev/null | |||
@@ -1,12 +0,0 @@ | |||
1 | perf-regex | ||
2 | gnunet-daemon-regexprofiler | ||
3 | gnunet-regex-profiler | ||
4 | gnunet-regex-simulation-profiler | ||
5 | gnunet-service-regex | ||
6 | test_graph.dot | ||
7 | test_regex_api | ||
8 | test_regex_eval_api | ||
9 | test_regex_graph_api | ||
10 | test_regex_integration | ||
11 | test_regex_iterate_api | ||
12 | test_regex_proofs | ||
diff --git a/src/regex/Makefile.am b/src/regex/Makefile.am deleted file mode 100644 index 2dbf3a3f9..000000000 --- a/src/regex/Makefile.am +++ /dev/null | |||
@@ -1,212 +0,0 @@ | |||
1 | # This Makefile.am is in the public domain | ||
2 | AM_CPPFLAGS = -I$(top_srcdir)/src/include | ||
3 | |||
4 | if USE_COVERAGE | ||
5 | AM_CFLAGS = --coverage | ||
6 | endif | ||
7 | |||
8 | pkgcfgdir= $(pkgdatadir)/config.d/ | ||
9 | |||
10 | libexecdir= $(pkglibdir)/libexec/ | ||
11 | |||
12 | plugindir = $(libdir)/gnunet | ||
13 | |||
14 | pkgcfg_DATA = \ | ||
15 | regex.conf | ||
16 | |||
17 | libexec_PROGRAMS = \ | ||
18 | gnunet-service-regex \ | ||
19 | gnunet-daemon-regexprofiler | ||
20 | |||
21 | |||
22 | gnunet_service_regex_SOURCES = \ | ||
23 | gnunet-service-regex.c | ||
24 | gnunet_service_regex_LDADD = -lm \ | ||
25 | libgnunetregex_internal.a \ | ||
26 | libgnunetregexblock.la \ | ||
27 | $(top_builddir)/src/dht/libgnunetdht.la \ | ||
28 | $(top_builddir)/src/statistics/libgnunetstatistics.la \ | ||
29 | $(top_builddir)/src/util/libgnunetutil.la \ | ||
30 | $(GN_LIBINTL) | ||
31 | |||
32 | noinst_LIBRARIES = \ | ||
33 | libgnunetregex_internal.a \ | ||
34 | libgnunetregextest.a | ||
35 | |||
36 | lib_LTLIBRARIES = \ | ||
37 | libgnunetregexblock.la \ | ||
38 | libgnunetregex.la | ||
39 | |||
40 | |||
41 | libgnunetregexblock_la_SOURCES = \ | ||
42 | regex_block_lib.c regex_block_lib.h | ||
43 | libgnunetregexblock_la_LIBADD = \ | ||
44 | $(top_builddir)/src/util/libgnunetutil.la \ | ||
45 | $(XLIB) \ | ||
46 | $(LTLIBINTL) | ||
47 | libgnunetregexblock_la_LDFLAGS = \ | ||
48 | $(GN_LIB_LDFLAGS) \ | ||
49 | -version-info 1:0:0 | ||
50 | |||
51 | |||
52 | libgnunetregex_internal_a_SOURCES = \ | ||
53 | regex_internal_lib.h \ | ||
54 | regex_internal.h regex_internal.c \ | ||
55 | regex_internal_dht.c | ||
56 | |||
57 | libgnunetregex_la_SOURCES = \ | ||
58 | regex_api_announce.c \ | ||
59 | regex_api_search.c \ | ||
60 | regex_ipc.h | ||
61 | libgnunetregex_la_LIBADD = \ | ||
62 | $(top_builddir)/src/util/libgnunetutil.la | ||
63 | libgnunetregex_la_LDFLAGS = \ | ||
64 | $(GN_LIBINTL) \ | ||
65 | $(GN_LIB_LDFLAGS) \ | ||
66 | -version-info 3:1:0 | ||
67 | |||
68 | |||
69 | plugin_LTLIBRARIES = \ | ||
70 | libgnunet_plugin_block_regex.la | ||
71 | |||
72 | libgnunet_plugin_block_regex_la_SOURCES = \ | ||
73 | plugin_block_regex.c | ||
74 | libgnunet_plugin_block_regex_la_LIBADD = \ | ||
75 | libgnunetregexblock.la \ | ||
76 | $(top_builddir)/src/block/libgnunetblock.la \ | ||
77 | $(top_builddir)/src/block/libgnunetblockgroup.la \ | ||
78 | $(top_builddir)/src/util/libgnunetutil.la | ||
79 | libgnunet_plugin_block_regex_la_LDFLAGS = \ | ||
80 | $(GN_LIBINTL) \ | ||
81 | $(GN_PLUGIN_LDFLAGS) | ||
82 | |||
83 | if HAVE_MYSQL | ||
84 | noinst_mysql_progs = \ | ||
85 | gnunet-regex-simulation-profiler | ||
86 | |||
87 | gnunet_regex_simulation_profiler_SOURCES = \ | ||
88 | gnunet-regex-simulation-profiler.c | ||
89 | gnunet_regex_simulation_profiler_LDADD = \ | ||
90 | $(top_builddir)/src/util/libgnunetutil.la \ | ||
91 | libgnunetregex_internal.a \ | ||
92 | $(top_builddir)/src/dht/libgnunetdht.la \ | ||
93 | $(top_builddir)/src/my/libgnunetmy.la \ | ||
94 | $(top_builddir)/src/mysql/libgnunetmysql.la | ||
95 | endif | ||
96 | |||
97 | libgnunetregextest_a_SOURCES = \ | ||
98 | regex_test_lib.c regex_test_lib.h \ | ||
99 | regex_test_graph.c \ | ||
100 | regex_test_random.c | ||
101 | libgnunetregextest_a_LIBADD = \ | ||
102 | $(top_builddir)/src/util/libgnunetutil.la \ | ||
103 | libgnunetregex_internal.a | ||
104 | |||
105 | noinst_PROGRAMS = $(noinst_mysql_progs) \ | ||
106 | perf-regex \ | ||
107 | gnunet-regex-profiler | ||
108 | |||
109 | perf_regex_SOURCES = \ | ||
110 | perf-regex.c | ||
111 | perf_regex_LDADD = -lm \ | ||
112 | libgnunetregex_internal.a \ | ||
113 | $(top_builddir)/src/dht/libgnunetdht.la \ | ||
114 | libgnunetregexblock.la \ | ||
115 | libgnunetregextest.a \ | ||
116 | $(top_builddir)/src/util/libgnunetutil.la | ||
117 | perf_regex_LDFLAGS = \ | ||
118 | $(GN_LIBINTL) | ||
119 | |||
120 | gnunet_regex_profiler_SOURCES = \ | ||
121 | gnunet-regex-profiler.c | ||
122 | gnunet_regex_profiler_LDADD = -lm \ | ||
123 | $(top_builddir)/src/arm/libgnunetarm.la \ | ||
124 | $(top_builddir)/src/testing/libgnunettesting.la \ | ||
125 | $(top_builddir)/src/testbed/libgnunettestbed.la \ | ||
126 | libgnunetregex_internal.a \ | ||
127 | $(top_builddir)/src/dht/libgnunetdht.la \ | ||
128 | libgnunetregexblock.la \ | ||
129 | libgnunetregextest.a \ | ||
130 | $(top_builddir)/src/statistics/libgnunetstatistics.la \ | ||
131 | $(top_builddir)/src/util/libgnunetutil.la | ||
132 | gnunet_regex_profiler_LDFLAGS = \ | ||
133 | $(GN_LIBINTL) | ||
134 | |||
135 | gnunet_daemon_regexprofiler_SOURCES = \ | ||
136 | gnunet-daemon-regexprofiler.c | ||
137 | gnunet_daemon_regexprofiler_LDADD = -lm \ | ||
138 | libgnunetregex_internal.a \ | ||
139 | $(top_builddir)/src/dht/libgnunetdht.la \ | ||
140 | libgnunetregexblock.la \ | ||
141 | libgnunetregextest.a \ | ||
142 | $(top_builddir)/src/statistics/libgnunetstatistics.la \ | ||
143 | $(top_builddir)/src/util/libgnunetutil.la | ||
144 | gnunet_daemon_regexprofiler_LDFLAGS = \ | ||
145 | $(GN_LIBINTL) | ||
146 | |||
147 | check_PROGRAMS = \ | ||
148 | test_regex_integration \ | ||
149 | test_regex_eval_api \ | ||
150 | test_regex_iterate_api \ | ||
151 | test_regex_proofs \ | ||
152 | test_regex_graph_api \ | ||
153 | test_regex_api | ||
154 | |||
155 | if ENABLE_TEST_RUN | ||
156 | AM_TESTS_ENVIRONMENT=export GNUNET_PREFIX=$${GNUNET_PREFIX:-@libdir@};export PATH=$${GNUNET_PREFIX:-@prefix@}/bin:$$PATH;unset XDG_DATA_HOME;unset XDG_CONFIG_HOME; | ||
157 | TESTS = $(check_PROGRAMS) | ||
158 | endif | ||
159 | |||
160 | test_regex_eval_api_SOURCES = \ | ||
161 | test_regex_eval_api.c | ||
162 | test_regex_eval_api_LDADD = -lm \ | ||
163 | libgnunetregex_internal.a \ | ||
164 | $(top_builddir)/src/dht/libgnunetdht.la \ | ||
165 | libgnunetregextest.a \ | ||
166 | libgnunetregexblock.la \ | ||
167 | $(top_builddir)/src/util/libgnunetutil.la | ||
168 | |||
169 | test_regex_integration_SOURCES = \ | ||
170 | test_regex_integration.c | ||
171 | test_regex_integration_LDADD = -lm \ | ||
172 | libgnunetregex.la \ | ||
173 | $(top_builddir)/src/testing/libgnunettesting.la \ | ||
174 | $(top_builddir)/src/util/libgnunetutil.la | ||
175 | |||
176 | test_regex_api_SOURCES = \ | ||
177 | test_regex_api.c | ||
178 | test_regex_api_LDADD = -lm \ | ||
179 | libgnunetregex.la \ | ||
180 | $(top_builddir)/src/testing/libgnunettesting.la \ | ||
181 | $(top_builddir)/src/util/libgnunetutil.la | ||
182 | |||
183 | test_regex_iterate_api_SOURCES = \ | ||
184 | test_regex_iterate_api.c | ||
185 | test_regex_iterate_api_LDADD = -lm \ | ||
186 | libgnunetregex_internal.a \ | ||
187 | libgnunetregexblock.la \ | ||
188 | $(top_builddir)/src/dht/libgnunetdht.la \ | ||
189 | $(top_builddir)/src/util/libgnunetutil.la | ||
190 | |||
191 | test_regex_proofs_SOURCES = \ | ||
192 | test_regex_proofs.c | ||
193 | test_regex_proofs_LDADD = -lm \ | ||
194 | libgnunetregex_internal.a \ | ||
195 | $(top_builddir)/src/dht/libgnunetdht.la \ | ||
196 | libgnunetregextest.a \ | ||
197 | libgnunetregexblock.la \ | ||
198 | $(top_builddir)/src/util/libgnunetutil.la | ||
199 | |||
200 | test_regex_graph_api_SOURCES = \ | ||
201 | test_regex_graph_api.c | ||
202 | test_regex_graph_api_LDADD = -lm \ | ||
203 | libgnunetregex_internal.a \ | ||
204 | $(top_builddir)/src/dht/libgnunetdht.la \ | ||
205 | libgnunetregextest.a \ | ||
206 | libgnunetregexblock.la \ | ||
207 | $(top_builddir)/src/util/libgnunetutil.la | ||
208 | |||
209 | |||
210 | EXTRA_DIST = \ | ||
211 | regex_simulation_profiler_test.conf \ | ||
212 | test_regex_api_data.conf | ||
diff --git a/src/regex/gnunet-daemon-regexprofiler.c b/src/regex/gnunet-daemon-regexprofiler.c deleted file mode 100644 index 70301338f..000000000 --- a/src/regex/gnunet-daemon-regexprofiler.c +++ /dev/null | |||
@@ -1,407 +0,0 @@ | |||
1 | /* | ||
2 | This file is part of GNUnet. | ||
3 | Copyright (C) 2012, 2013 Christian Grothoff | ||
4 | |||
5 | GNUnet is free software: you can redistribute it and/or modify it | ||
6 | under the terms of the GNU Affero General Public License as published | ||
7 | by the Free Software Foundation, either version 3 of the License, | ||
8 | or (at your option) any later version. | ||
9 | |||
10 | GNUnet is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | Affero General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU Affero General Public License | ||
16 | along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | |||
18 | SPDX-License-Identifier: AGPL3.0-or-later | ||
19 | */ | ||
20 | |||
21 | /** | ||
22 | * @file regex/gnunet-daemon-regexprofiler.c | ||
23 | * @brief daemon that uses cadet to announce a regular expression. Used in | ||
24 | * conjunction with gnunet-regex-profiler to announce regexes on several peers | ||
25 | * without the need to explicitly connect to the cadet service running on the | ||
26 | * peer from within the profiler. | ||
27 | * @author Maximilian Szengel | ||
28 | * @author Bartlomiej Polot | ||
29 | */ | ||
30 | #include "platform.h" | ||
31 | #include "gnunet_util_lib.h" | ||
32 | #include "regex_internal_lib.h" | ||
33 | #include "regex_test_lib.h" | ||
34 | #include "gnunet_dht_service.h" | ||
35 | #include "gnunet_statistics_service.h" | ||
36 | |||
37 | /** | ||
38 | * Return value from 'main'. | ||
39 | */ | ||
40 | static int global_ret; | ||
41 | |||
42 | /** | ||
43 | * Configuration we use. | ||
44 | */ | ||
45 | static const struct GNUNET_CONFIGURATION_Handle *cfg; | ||
46 | |||
47 | /** | ||
48 | * Handle to the statistics service. | ||
49 | */ | ||
50 | static struct GNUNET_STATISTICS_Handle *stats_handle; | ||
51 | |||
52 | /** | ||
53 | * Peer's dht handle. | ||
54 | */ | ||
55 | static struct GNUNET_DHT_Handle *dht_handle; | ||
56 | |||
57 | /** | ||
58 | * Peer's regex announce handle. | ||
59 | */ | ||
60 | static struct REGEX_INTERNAL_Announcement *announce_handle; | ||
61 | |||
62 | /** | ||
63 | * Periodically reannounce regex. | ||
64 | */ | ||
65 | static struct GNUNET_SCHEDULER_Task *reannounce_task; | ||
66 | |||
67 | /** | ||
68 | * What's the maximum reannounce period. | ||
69 | */ | ||
70 | static struct GNUNET_TIME_Relative reannounce_period_max; | ||
71 | |||
72 | /** | ||
73 | * Maximal path compression length for regex announcing. | ||
74 | */ | ||
75 | static unsigned long long max_path_compression; | ||
76 | |||
77 | /** | ||
78 | * Name of the file containing policies that this peer should announce. One | ||
79 | * policy per line. | ||
80 | */ | ||
81 | static char *policy_filename; | ||
82 | |||
83 | /** | ||
84 | * Prefix to add before every regex we're announcing. | ||
85 | */ | ||
86 | static char *regex_prefix; | ||
87 | |||
88 | /** | ||
89 | * Regex with prefix. | ||
90 | */ | ||
91 | static char *rx_with_pfx; | ||
92 | |||
93 | /** | ||
94 | * How many put rounds should we do. | ||
95 | */ | ||
96 | static unsigned int rounds = 3; | ||
97 | |||
98 | /** | ||
99 | * Private key for this peer. | ||
100 | */ | ||
101 | static struct GNUNET_CRYPTO_EddsaPrivateKey *my_private_key; | ||
102 | |||
103 | |||
104 | /** | ||
105 | * Task run during shutdown. | ||
106 | * | ||
107 | * @param cls unused | ||
108 | */ | ||
109 | static void | ||
110 | shutdown_task (void *cls) | ||
111 | { | ||
112 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "shutting down\n"); | ||
113 | |||
114 | if (NULL != announce_handle) | ||
115 | { | ||
116 | REGEX_INTERNAL_announce_cancel (announce_handle); | ||
117 | announce_handle = NULL; | ||
118 | } | ||
119 | if (NULL != reannounce_task) | ||
120 | { | ||
121 | GNUNET_free_nz (GNUNET_SCHEDULER_cancel (reannounce_task)); | ||
122 | reannounce_task = NULL; | ||
123 | } | ||
124 | if (NULL != dht_handle) | ||
125 | { | ||
126 | GNUNET_DHT_disconnect (dht_handle); | ||
127 | dht_handle = NULL; | ||
128 | } | ||
129 | GNUNET_free (my_private_key); | ||
130 | my_private_key = NULL; | ||
131 | |||
132 | GNUNET_log (GNUNET_ERROR_TYPE_INFO, | ||
133 | "Daemon for %s shutting down\n", | ||
134 | policy_filename); | ||
135 | } | ||
136 | |||
137 | |||
138 | /** | ||
139 | * Announce a previously announced regex re-using cached data. | ||
140 | * | ||
141 | * @param cls Closure (regex to announce if needed). | ||
142 | */ | ||
143 | static void | ||
144 | reannounce_regex (void *cls) | ||
145 | { | ||
146 | char *regex = cls; | ||
147 | struct GNUNET_TIME_Relative random_delay; | ||
148 | |||
149 | reannounce_task = NULL; | ||
150 | if (0 == rounds--) | ||
151 | { | ||
152 | global_ret = 0; | ||
153 | GNUNET_SCHEDULER_shutdown (); | ||
154 | GNUNET_free (regex); | ||
155 | return; | ||
156 | } | ||
157 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Announcing regex: %s\n", regex); | ||
158 | GNUNET_STATISTICS_update (stats_handle, "# regexes announced", 1, GNUNET_NO); | ||
159 | if ((NULL == announce_handle) && (NULL != regex)) | ||
160 | { | ||
161 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, | ||
162 | "First time, creating regex: %s\n", | ||
163 | regex); | ||
164 | announce_handle = REGEX_INTERNAL_announce (dht_handle, | ||
165 | my_private_key, | ||
166 | regex, | ||
167 | (unsigned | ||
168 | int) max_path_compression, | ||
169 | stats_handle); | ||
170 | } | ||
171 | else | ||
172 | { | ||
173 | GNUNET_assert (NULL != announce_handle); | ||
174 | REGEX_INTERNAL_reannounce (announce_handle); | ||
175 | } | ||
176 | |||
177 | random_delay = | ||
178 | GNUNET_TIME_relative_multiply (GNUNET_TIME_UNIT_MICROSECONDS, | ||
179 | GNUNET_CRYPTO_random_u32 ( | ||
180 | GNUNET_CRYPTO_QUALITY_WEAK, | ||
181 | reannounce_period_max.rel_value_us)); | ||
182 | reannounce_task = GNUNET_SCHEDULER_add_delayed (random_delay, | ||
183 | &reannounce_regex, cls); | ||
184 | } | ||
185 | |||
186 | |||
187 | /** | ||
188 | * Announce the given regular expression using regex and the path compression | ||
189 | * length read from config. | ||
190 | * | ||
191 | * @param regex regular expression to announce on this peer's cadet. | ||
192 | */ | ||
193 | static void | ||
194 | announce_regex (const char *regex) | ||
195 | { | ||
196 | char *copy; | ||
197 | |||
198 | if ((NULL == regex) || (0 == strlen (regex))) | ||
199 | { | ||
200 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Cannot announce empty regex\n"); | ||
201 | return; | ||
202 | } | ||
203 | |||
204 | GNUNET_log (GNUNET_ERROR_TYPE_INFO, | ||
205 | "Daemon for %s starting\n", | ||
206 | policy_filename); | ||
207 | GNUNET_assert (NULL == reannounce_task); | ||
208 | copy = GNUNET_strdup (regex); | ||
209 | reannounce_task = GNUNET_SCHEDULER_add_now (&reannounce_regex, | ||
210 | (void *) copy); | ||
211 | } | ||
212 | |||
213 | |||
214 | /** | ||
215 | * Scan through the policy_dir looking for the n-th filename. | ||
216 | * | ||
217 | * @param cls Closure (target number n). | ||
218 | * @param filename complete filename (absolute path). | ||
219 | * @return GNUNET_OK to continue to iterate, | ||
220 | * GNUNET_NO to stop when found | ||
221 | */ | ||
222 | static int | ||
223 | scan (void *cls, const char *filename) | ||
224 | { | ||
225 | long n = (long) cls; | ||
226 | static long c = 0; | ||
227 | |||
228 | if (c == n) | ||
229 | { | ||
230 | policy_filename = GNUNET_strdup (filename); | ||
231 | return GNUNET_NO; | ||
232 | } | ||
233 | c++; | ||
234 | return GNUNET_OK; | ||
235 | } | ||
236 | |||
237 | |||
238 | /** | ||
239 | * @brief Main function that will be run by the scheduler. | ||
240 | * | ||
241 | * @param cls closure | ||
242 | * @param args remaining command-line arguments | ||
243 | * @param cfgfile name of the configuration file used (for saving, can be NULL!) | ||
244 | * @param cfg_ configuration | ||
245 | */ | ||
246 | static void | ||
247 | run (void *cls, char *const *args GNUNET_UNUSED, | ||
248 | const char *cfgfile GNUNET_UNUSED, | ||
249 | const struct GNUNET_CONFIGURATION_Handle *cfg_) | ||
250 | { | ||
251 | char *regex = NULL; | ||
252 | char **components; | ||
253 | char *policy_dir; | ||
254 | long long unsigned int peer_id; | ||
255 | |||
256 | cfg = cfg_; | ||
257 | |||
258 | my_private_key = GNUNET_CRYPTO_eddsa_key_create_from_configuration (cfg); | ||
259 | GNUNET_assert (NULL != my_private_key); | ||
260 | if (GNUNET_OK != | ||
261 | GNUNET_CONFIGURATION_get_value_number (cfg, "REGEXPROFILER", | ||
262 | "MAX_PATH_COMPRESSION", | ||
263 | &max_path_compression)) | ||
264 | { | ||
265 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
266 | _ | ||
267 | ( | ||
268 | "%s service is lacking key configuration settings (%s). Exiting.\n"), | ||
269 | "regexprofiler", "max_path_compression"); | ||
270 | global_ret = GNUNET_SYSERR; | ||
271 | GNUNET_SCHEDULER_shutdown (); | ||
272 | return; | ||
273 | } | ||
274 | if (GNUNET_OK != | ||
275 | GNUNET_CONFIGURATION_get_value_string (cfg, "REGEXPROFILER", | ||
276 | "POLICY_DIR", &policy_dir)) | ||
277 | { | ||
278 | GNUNET_log_config_missing (GNUNET_ERROR_TYPE_ERROR, "REGEXPROFILER", | ||
279 | "POLICY_DIR"); | ||
280 | global_ret = GNUNET_SYSERR; | ||
281 | GNUNET_SCHEDULER_shutdown (); | ||
282 | return; | ||
283 | } | ||
284 | if (GNUNET_OK != | ||
285 | GNUNET_CONFIGURATION_get_value_number (cfg, "TESTBED", | ||
286 | "PEERID", &peer_id)) | ||
287 | { | ||
288 | GNUNET_log_config_missing (GNUNET_ERROR_TYPE_ERROR, "TESTBED", "PEERID"); | ||
289 | global_ret = GNUNET_SYSERR; | ||
290 | GNUNET_free (policy_dir); | ||
291 | GNUNET_SCHEDULER_shutdown (); | ||
292 | return; | ||
293 | } | ||
294 | |||
295 | if (GNUNET_OK != | ||
296 | GNUNET_CONFIGURATION_get_value_string (cfg, "REGEXPROFILER", | ||
297 | "REGEX_PREFIX", ®ex_prefix)) | ||
298 | { | ||
299 | GNUNET_log_config_missing (GNUNET_ERROR_TYPE_ERROR, "REGEXPROFILER", | ||
300 | "REGEX_PREFIX"); | ||
301 | global_ret = GNUNET_SYSERR; | ||
302 | GNUNET_free (policy_dir); | ||
303 | GNUNET_SCHEDULER_shutdown (); | ||
304 | return; | ||
305 | } | ||
306 | |||
307 | if (GNUNET_OK != | ||
308 | GNUNET_CONFIGURATION_get_value_time (cfg, "REGEXPROFILER", | ||
309 | "REANNOUNCE_PERIOD_MAX", | ||
310 | &reannounce_period_max)) | ||
311 | { | ||
312 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
313 | "reannounce_period_max not given. Using 10 minutes.\n"); | ||
314 | reannounce_period_max = | ||
315 | GNUNET_TIME_relative_multiply (GNUNET_TIME_UNIT_MINUTES, 10); | ||
316 | } | ||
317 | |||
318 | stats_handle = GNUNET_STATISTICS_create ("regexprofiler", cfg); | ||
319 | |||
320 | dht_handle = GNUNET_DHT_connect (cfg, 1); | ||
321 | |||
322 | if (NULL == dht_handle) | ||
323 | { | ||
324 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
325 | "Could not acquire dht handle. Exiting.\n"); | ||
326 | global_ret = GNUNET_SYSERR; | ||
327 | GNUNET_free (policy_dir); | ||
328 | GNUNET_SCHEDULER_shutdown (); | ||
329 | return; | ||
330 | } | ||
331 | |||
332 | /* Read regexes from policy files */ | ||
333 | GNUNET_assert (-1 != GNUNET_DISK_directory_scan (policy_dir, &scan, | ||
334 | (void *) (long) peer_id)); | ||
335 | if (NULL == (components = REGEX_TEST_read_from_file (policy_filename))) | ||
336 | { | ||
337 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
338 | "Policy file %s contains no policies. Exiting.\n", | ||
339 | policy_filename); | ||
340 | global_ret = GNUNET_SYSERR; | ||
341 | GNUNET_free (policy_dir); | ||
342 | GNUNET_SCHEDULER_shutdown (); | ||
343 | return; | ||
344 | } | ||
345 | GNUNET_free (policy_dir); | ||
346 | regex = REGEX_TEST_combine (components, 16); | ||
347 | REGEX_TEST_free_from_file (components); | ||
348 | |||
349 | /* Announcing regexes from policy_filename */ | ||
350 | GNUNET_asprintf (&rx_with_pfx, | ||
351 | "%s(%s)(0|1|2|3|4|5|6|7|8|9|a|b|c|d|e|f)*", | ||
352 | regex_prefix, | ||
353 | regex); | ||
354 | announce_regex (rx_with_pfx); | ||
355 | GNUNET_free (regex); | ||
356 | GNUNET_free (rx_with_pfx); | ||
357 | |||
358 | /* Scheduled the task to clean up when shutdown is called */ | ||
359 | GNUNET_SCHEDULER_add_shutdown (&shutdown_task, | ||
360 | NULL); | ||
361 | } | ||
362 | |||
363 | |||
364 | /** | ||
365 | * The main function of the regexprofiler service. | ||
366 | * | ||
367 | * @param argc number of arguments from the command line | ||
368 | * @param argv command line arguments | ||
369 | * @return 0 ok, 1 on error | ||
370 | */ | ||
371 | int | ||
372 | main (int argc, char *const *argv) | ||
373 | { | ||
374 | static const struct GNUNET_GETOPT_CommandLineOption options[] = { | ||
375 | GNUNET_GETOPT_OPTION_END | ||
376 | }; | ||
377 | |||
378 | if (GNUNET_OK != GNUNET_STRINGS_get_utf8_args (argc, argv, &argc, &argv)) | ||
379 | return 2; | ||
380 | return (GNUNET_OK == | ||
381 | GNUNET_PROGRAM_run (argc, argv, "regexprofiler", | ||
382 | gettext_noop | ||
383 | ( | ||
384 | "Daemon to announce regular expressions for the peer using cadet."), | ||
385 | options, &run, NULL)) ? global_ret : 1; | ||
386 | } | ||
387 | |||
388 | |||
389 | #if defined(__linux__) && defined(__GLIBC__) | ||
390 | #include <malloc.h> | ||
391 | |||
392 | /** | ||
393 | * MINIMIZE heap size (way below 128k) since this process doesn't need much. | ||
394 | */ | ||
395 | void __attribute__ ((constructor)) | ||
396 | GNUNET_ARM_memory_init () | ||
397 | { | ||
398 | mallopt (M_TRIM_THRESHOLD, 4 * 1024); | ||
399 | mallopt (M_TOP_PAD, 1 * 1024); | ||
400 | malloc_trim (0); | ||
401 | } | ||
402 | |||
403 | |||
404 | #endif | ||
405 | |||
406 | |||
407 | /* end of gnunet-daemon-regexprofiler.c */ | ||
diff --git a/src/regex/gnunet-regex-profiler.c b/src/regex/gnunet-regex-profiler.c deleted file mode 100644 index bd223ae8e..000000000 --- a/src/regex/gnunet-regex-profiler.c +++ /dev/null | |||
@@ -1,1613 +0,0 @@ | |||
1 | /* | ||
2 | This file is part of GNUnet. | ||
3 | Copyright (C) 2011 - 2017 GNUnet e.V. | ||
4 | |||
5 | GNUnet is free software: you can redistribute it and/or modify it | ||
6 | under the terms of the GNU Affero General Public License as published | ||
7 | by the Free Software Foundation, either version 3 of the License, | ||
8 | or (at your option) any later version. | ||
9 | |||
10 | GNUnet is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | Affero General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU Affero General Public License | ||
16 | along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | |||
18 | SPDX-License-Identifier: AGPL3.0-or-later | ||
19 | */ | ||
20 | |||
21 | /** | ||
22 | * @file regex/gnunet-regex-profiler.c | ||
23 | * @brief Regex profiler for testing distributed regex use. | ||
24 | * @author Bartlomiej Polot | ||
25 | * @author Maximilian Szengel | ||
26 | * | ||
27 | */ | ||
28 | |||
29 | #include <string.h> | ||
30 | |||
31 | #include "platform.h" | ||
32 | #include "gnunet_applications.h" | ||
33 | #include "gnunet_util_lib.h" | ||
34 | #include "regex_internal_lib.h" | ||
35 | #include "gnunet_arm_service.h" | ||
36 | #include "gnunet_dht_service.h" | ||
37 | #include "gnunet_testbed_service.h" | ||
38 | |||
39 | #define FIND_TIMEOUT \ | ||
40 | GNUNET_TIME_relative_multiply (GNUNET_TIME_UNIT_SECONDS, 90) | ||
41 | |||
42 | /** | ||
43 | * DLL of operations | ||
44 | */ | ||
45 | struct DLLOperation | ||
46 | { | ||
47 | /** | ||
48 | * The testbed operation handle | ||
49 | */ | ||
50 | struct GNUNET_TESTBED_Operation *op; | ||
51 | |||
52 | /** | ||
53 | * Closure | ||
54 | */ | ||
55 | void *cls; | ||
56 | |||
57 | /** | ||
58 | * The next pointer for DLL | ||
59 | */ | ||
60 | struct DLLOperation *next; | ||
61 | |||
62 | /** | ||
63 | * The prev pointer for DLL | ||
64 | */ | ||
65 | struct DLLOperation *prev; | ||
66 | }; | ||
67 | |||
68 | |||
69 | /** | ||
70 | * Available states during profiling | ||
71 | */ | ||
72 | enum State | ||
73 | { | ||
74 | /** | ||
75 | * Initial state | ||
76 | */ | ||
77 | STATE_INIT = 0, | ||
78 | |||
79 | /** | ||
80 | * Starting slaves | ||
81 | */ | ||
82 | STATE_SLAVES_STARTING, | ||
83 | |||
84 | /** | ||
85 | * Creating peers | ||
86 | */ | ||
87 | STATE_PEERS_CREATING, | ||
88 | |||
89 | /** | ||
90 | * Starting peers | ||
91 | */ | ||
92 | STATE_PEERS_STARTING, | ||
93 | |||
94 | /** | ||
95 | * Linking peers | ||
96 | */ | ||
97 | STATE_PEERS_LINKING, | ||
98 | |||
99 | /** | ||
100 | * Matching strings against announced regexes | ||
101 | */ | ||
102 | STATE_SEARCH_REGEX, | ||
103 | |||
104 | /** | ||
105 | * Destroying peers; we can do this as the controller takes care of stopping a | ||
106 | * peer if it is running | ||
107 | */ | ||
108 | STATE_PEERS_DESTROYING | ||
109 | }; | ||
110 | |||
111 | |||
112 | /** | ||
113 | * Peer handles. | ||
114 | */ | ||
115 | struct RegexPeer | ||
116 | { | ||
117 | /** | ||
118 | * Peer id. | ||
119 | */ | ||
120 | unsigned int id; | ||
121 | |||
122 | /** | ||
123 | * Peer configuration handle. | ||
124 | */ | ||
125 | struct GNUNET_CONFIGURATION_Handle *cfg; | ||
126 | |||
127 | /** | ||
128 | * The actual testbed peer handle. | ||
129 | */ | ||
130 | struct GNUNET_TESTBED_Peer *peer_handle; | ||
131 | |||
132 | /** | ||
133 | * Peer's search string. | ||
134 | */ | ||
135 | const char *search_str; | ||
136 | |||
137 | /** | ||
138 | * Set to GNUNET_YES if the peer successfully matched the above | ||
139 | * search string. GNUNET_NO if the string could not be matched | ||
140 | * during the profiler run. GNUNET_SYSERR if the string matching | ||
141 | * timed out. Undefined if search_str is NULL | ||
142 | */ | ||
143 | int search_str_matched; | ||
144 | |||
145 | /** | ||
146 | * Peer's DHT handle. | ||
147 | */ | ||
148 | struct GNUNET_DHT_Handle *dht_handle; | ||
149 | |||
150 | /** | ||
151 | * Handle to a running regex search. | ||
152 | */ | ||
153 | struct REGEX_INTERNAL_Search *search_handle; | ||
154 | |||
155 | /** | ||
156 | * Testbed operation handle for DHT. | ||
157 | */ | ||
158 | struct GNUNET_TESTBED_Operation *op_handle; | ||
159 | |||
160 | /** | ||
161 | * Peers's statistics handle. | ||
162 | */ | ||
163 | struct GNUNET_STATISTICS_Handle *stats_handle; | ||
164 | |||
165 | /** | ||
166 | * The starting time of a profiling step. | ||
167 | */ | ||
168 | struct GNUNET_TIME_Absolute prof_start_time; | ||
169 | |||
170 | /** | ||
171 | * Operation timeout | ||
172 | */ | ||
173 | struct GNUNET_SCHEDULER_Task *timeout; | ||
174 | |||
175 | /** | ||
176 | * Daemon start | ||
177 | */ | ||
178 | struct GNUNET_TESTBED_Operation *daemon_op; | ||
179 | }; | ||
180 | |||
181 | /** | ||
182 | * Set when shutting down to avoid making more queries. | ||
183 | */ | ||
184 | static int in_shutdown; | ||
185 | |||
186 | /** | ||
187 | * The array of peers; we fill this as the peers are given to us by the testbed | ||
188 | */ | ||
189 | static struct RegexPeer *peers; | ||
190 | |||
191 | /** | ||
192 | * Host registration handle | ||
193 | */ | ||
194 | static struct GNUNET_TESTBED_HostRegistrationHandle *reg_handle; | ||
195 | |||
196 | /** | ||
197 | * Handle to the master controller process | ||
198 | */ | ||
199 | static struct GNUNET_TESTBED_ControllerProc *mc_proc; | ||
200 | |||
201 | /** | ||
202 | * Handle to the master controller | ||
203 | */ | ||
204 | static struct GNUNET_TESTBED_Controller *mc; | ||
205 | |||
206 | /** | ||
207 | * Handle to global configuration | ||
208 | */ | ||
209 | static struct GNUNET_CONFIGURATION_Handle *cfg; | ||
210 | |||
211 | /** | ||
212 | * Abort task identifier | ||
213 | */ | ||
214 | static struct GNUNET_SCHEDULER_Task *abort_task; | ||
215 | |||
216 | /** | ||
217 | * Host registration task identifier | ||
218 | */ | ||
219 | static struct GNUNET_SCHEDULER_Task *register_hosts_task; | ||
220 | |||
221 | /** | ||
222 | * Global event mask for all testbed events | ||
223 | */ | ||
224 | static uint64_t event_mask; | ||
225 | |||
226 | /** | ||
227 | * The starting time of a profiling step | ||
228 | */ | ||
229 | static struct GNUNET_TIME_Absolute prof_start_time; | ||
230 | |||
231 | /** | ||
232 | * Duration profiling step has taken | ||
233 | */ | ||
234 | static struct GNUNET_TIME_Relative prof_time; | ||
235 | |||
236 | /** | ||
237 | * Number of peers to be started by the profiler | ||
238 | */ | ||
239 | static unsigned int num_peers; | ||
240 | |||
241 | /** | ||
242 | * Global testing status | ||
243 | */ | ||
244 | static int result; | ||
245 | |||
246 | /** | ||
247 | * current state of profiling | ||
248 | */ | ||
249 | enum State state; | ||
250 | |||
251 | /** | ||
252 | * Folder where policy files are stored. | ||
253 | */ | ||
254 | static char *policy_dir; | ||
255 | |||
256 | /** | ||
257 | * File with hostnames where to execute the test. | ||
258 | */ | ||
259 | static char *hosts_file; | ||
260 | |||
261 | /** | ||
262 | * File with the strings to look for. | ||
263 | */ | ||
264 | static char *strings_file; | ||
265 | |||
266 | /** | ||
267 | * Search strings (num_peers of them). | ||
268 | */ | ||
269 | static char **search_strings; | ||
270 | |||
271 | /** | ||
272 | * How many searches are we going to start in parallel | ||
273 | */ | ||
274 | static long long unsigned int init_parallel_searches; | ||
275 | |||
276 | /** | ||
277 | * How many searches are running in parallel | ||
278 | */ | ||
279 | static unsigned int parallel_searches; | ||
280 | |||
281 | /** | ||
282 | * Number of strings found in the published regexes. | ||
283 | */ | ||
284 | static unsigned int strings_found; | ||
285 | |||
286 | /** | ||
287 | * Index of peer to start next announce/search. | ||
288 | */ | ||
289 | static unsigned int next_search; | ||
290 | |||
291 | /** | ||
292 | * Search timeout task identifier. | ||
293 | */ | ||
294 | static struct GNUNET_SCHEDULER_Task *search_timeout_task; | ||
295 | |||
296 | /** | ||
297 | * Search timeout in seconds. | ||
298 | */ | ||
299 | static struct GNUNET_TIME_Relative search_timeout_time = { 60000 }; | ||
300 | |||
301 | /** | ||
302 | * File to log statistics to. | ||
303 | */ | ||
304 | static struct GNUNET_DISK_FileHandle *data_file; | ||
305 | |||
306 | /** | ||
307 | * Filename to log statistics to. | ||
308 | */ | ||
309 | static char *data_filename; | ||
310 | |||
311 | /** | ||
312 | * Prefix used for regex announcing. We need to prefix the search | ||
313 | * strings with it, in order to find something. | ||
314 | */ | ||
315 | static char *regex_prefix; | ||
316 | |||
317 | /** | ||
318 | * What's the maximum regex reannounce period. | ||
319 | */ | ||
320 | static struct GNUNET_TIME_Relative reannounce_period_max; | ||
321 | |||
322 | |||
323 | /******************************************************************************/ | ||
324 | /****************************** DECLARATIONS ********************************/ | ||
325 | /******************************************************************************/ | ||
326 | |||
327 | /** | ||
328 | * DHT connect callback. | ||
329 | * | ||
330 | * @param cls internal peer id. | ||
331 | * @param op operation handle. | ||
332 | * @param ca_result connect adapter result. | ||
333 | * @param emsg error message. | ||
334 | */ | ||
335 | static void | ||
336 | dht_connect_cb (void *cls, struct GNUNET_TESTBED_Operation *op, | ||
337 | void *ca_result, const char *emsg); | ||
338 | |||
339 | /** | ||
340 | * DHT connect adapter. | ||
341 | * | ||
342 | * @param cls not used. | ||
343 | * @param cfg configuration handle. | ||
344 | * | ||
345 | * @return | ||
346 | */ | ||
347 | static void * | ||
348 | dht_ca (void *cls, const struct GNUNET_CONFIGURATION_Handle *cfg); | ||
349 | |||
350 | |||
351 | /** | ||
352 | * Adapter function called to destroy a connection to | ||
353 | * the DHT service | ||
354 | * | ||
355 | * @param cls closure | ||
356 | * @param op_result service handle returned from the connect adapter | ||
357 | */ | ||
358 | static void | ||
359 | dht_da (void *cls, void *op_result); | ||
360 | |||
361 | |||
362 | /** | ||
363 | * Function called by testbed once we are connected to stats | ||
364 | * service. Get the statistics for the services of interest. | ||
365 | * | ||
366 | * @param cls the 'struct RegexPeer' for which we connected to stats | ||
367 | * @param op connect operation handle | ||
368 | * @param ca_result handle to stats service | ||
369 | * @param emsg error message on failure | ||
370 | */ | ||
371 | static void | ||
372 | stats_connect_cb (void *cls, | ||
373 | struct GNUNET_TESTBED_Operation *op, | ||
374 | void *ca_result, | ||
375 | const char *emsg); | ||
376 | |||
377 | |||
378 | /** | ||
379 | * Start announcing the next regex in the DHT. | ||
380 | * | ||
381 | * @param cls Index of the next peer in the peers array. | ||
382 | */ | ||
383 | static void | ||
384 | announce_next_regex (void *cls); | ||
385 | |||
386 | |||
387 | /******************************************************************************/ | ||
388 | /******************************** SHUTDOWN **********************************/ | ||
389 | /******************************************************************************/ | ||
390 | |||
391 | |||
392 | /** | ||
393 | * Shutdown nicely | ||
394 | * | ||
395 | * @param cls NULL | ||
396 | */ | ||
397 | static void | ||
398 | do_shutdown (void *cls) | ||
399 | { | ||
400 | struct RegexPeer *peer; | ||
401 | unsigned int peer_cnt; | ||
402 | unsigned int search_str_cnt; | ||
403 | char output_buffer[512]; | ||
404 | size_t size; | ||
405 | |||
406 | if (NULL != abort_task) | ||
407 | { | ||
408 | GNUNET_SCHEDULER_cancel (abort_task); | ||
409 | abort_task = NULL; | ||
410 | } | ||
411 | if (NULL != register_hosts_task) | ||
412 | { | ||
413 | GNUNET_SCHEDULER_cancel (register_hosts_task); | ||
414 | register_hosts_task = NULL; | ||
415 | } | ||
416 | for (peer_cnt = 0; peer_cnt < num_peers; peer_cnt++) | ||
417 | { | ||
418 | peer = &peers[peer_cnt]; | ||
419 | |||
420 | if ((GNUNET_YES != peer->search_str_matched) && (NULL != data_file) ) | ||
421 | { | ||
422 | prof_time = GNUNET_TIME_absolute_get_duration (peer->prof_start_time); | ||
423 | size = | ||
424 | GNUNET_snprintf (output_buffer, | ||
425 | sizeof(output_buffer), | ||
426 | "%p Search string not found: %s (%d)\n" | ||
427 | "%p On peer: %u (%p)\n" | ||
428 | "%p After: %s\n", | ||
429 | peer, peer->search_str, peer->search_str_matched, | ||
430 | peer, peer->id, peer, | ||
431 | peer, | ||
432 | GNUNET_STRINGS_relative_time_to_string (prof_time, | ||
433 | GNUNET_NO)); | ||
434 | if (size != GNUNET_DISK_file_write (data_file, output_buffer, size)) | ||
435 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, "Unable to write to file!\n"); | ||
436 | } | ||
437 | |||
438 | if (NULL != peers[peer_cnt].op_handle) | ||
439 | GNUNET_TESTBED_operation_done (peers[peer_cnt].op_handle); | ||
440 | } | ||
441 | |||
442 | if (NULL != data_file) | ||
443 | { | ||
444 | GNUNET_DISK_file_close (data_file); | ||
445 | data_file = NULL; | ||
446 | } | ||
447 | for (search_str_cnt = 0; | ||
448 | search_str_cnt < num_peers && NULL != search_strings; | ||
449 | search_str_cnt++) | ||
450 | { | ||
451 | GNUNET_free (search_strings[search_str_cnt]); | ||
452 | } | ||
453 | GNUNET_free (search_strings); | ||
454 | search_strings = NULL; | ||
455 | |||
456 | if (NULL != reg_handle) | ||
457 | { | ||
458 | GNUNET_TESTBED_cancel_registration (reg_handle); | ||
459 | reg_handle = NULL; | ||
460 | } | ||
461 | if (NULL != mc) | ||
462 | { | ||
463 | GNUNET_TESTBED_controller_disconnect (mc); | ||
464 | mc = NULL; | ||
465 | } | ||
466 | if (NULL != mc_proc) | ||
467 | { | ||
468 | GNUNET_TESTBED_controller_stop (mc_proc); | ||
469 | mc_proc = NULL; | ||
470 | } | ||
471 | if (NULL != cfg) | ||
472 | { | ||
473 | GNUNET_CONFIGURATION_destroy (cfg); | ||
474 | cfg = NULL; | ||
475 | } | ||
476 | } | ||
477 | |||
478 | |||
479 | /** | ||
480 | * abort task to run on test timed out | ||
481 | * | ||
482 | * @param cls NULL | ||
483 | */ | ||
484 | static void | ||
485 | do_abort (void *cls) | ||
486 | { | ||
487 | unsigned long i = (unsigned long) cls; | ||
488 | |||
489 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, | ||
490 | "Aborting from line %lu...\n", i); | ||
491 | abort_task = NULL; | ||
492 | result = GNUNET_SYSERR; | ||
493 | GNUNET_SCHEDULER_add_now (&do_shutdown, NULL); | ||
494 | } | ||
495 | |||
496 | |||
497 | /******************************************************************************/ | ||
498 | /********************* STATISTICS SERVICE CONNECTIONS ***********************/ | ||
499 | /******************************************************************************/ | ||
500 | |||
501 | /** | ||
502 | * Adapter function called to establish a connection to | ||
503 | * statistics service. | ||
504 | * | ||
505 | * @param cls closure | ||
506 | * @param cfg configuration of the peer to connect to; will be available until | ||
507 | * GNUNET_TESTBED_operation_done() is called on the operation returned | ||
508 | * from GNUNET_TESTBED_service_connect() | ||
509 | * @return service handle to return in 'op_result', NULL on error | ||
510 | */ | ||
511 | static void * | ||
512 | stats_ca (void *cls, | ||
513 | const struct GNUNET_CONFIGURATION_Handle *cfg) | ||
514 | { | ||
515 | return GNUNET_STATISTICS_create ("<driver>", cfg); | ||
516 | } | ||
517 | |||
518 | |||
519 | /** | ||
520 | * Adapter function called to destroy a connection to | ||
521 | * statistics service. | ||
522 | * | ||
523 | * @param cls closure | ||
524 | * @param op_result service handle returned from the connect adapter | ||
525 | */ | ||
526 | static void | ||
527 | stats_da (void *cls, void *op_result) | ||
528 | { | ||
529 | struct RegexPeer *peer = cls; | ||
530 | |||
531 | GNUNET_assert (op_result == peer->stats_handle); | ||
532 | |||
533 | GNUNET_STATISTICS_destroy (peer->stats_handle, GNUNET_NO); | ||
534 | peer->stats_handle = NULL; | ||
535 | } | ||
536 | |||
537 | |||
538 | /** | ||
539 | * Process statistic values. Write all values to global 'data_file', if present. | ||
540 | * | ||
541 | * @param cls closure | ||
542 | * @param subsystem name of subsystem that created the statistic | ||
543 | * @param name the name of the datum | ||
544 | * @param value the current value | ||
545 | * @param is_persistent GNUNET_YES if the value is persistent, GNUNET_NO if not | ||
546 | * @return #GNUNET_OK to continue, #GNUNET_SYSERR to abort iteration | ||
547 | */ | ||
548 | static int | ||
549 | stats_iterator (void *cls, | ||
550 | const char *subsystem, | ||
551 | const char *name, | ||
552 | uint64_t value, int is_persistent) | ||
553 | { | ||
554 | struct RegexPeer *peer = cls; | ||
555 | char output_buffer[512]; | ||
556 | size_t size; | ||
557 | |||
558 | if (NULL == data_file) | ||
559 | { | ||
560 | GNUNET_log (GNUNET_ERROR_TYPE_INFO, | ||
561 | "%p -> %s [%s]: %llu\n", | ||
562 | peer, | ||
563 | subsystem, | ||
564 | name, | ||
565 | (unsigned long long) value); | ||
566 | return GNUNET_OK; | ||
567 | } | ||
568 | size = | ||
569 | GNUNET_snprintf (output_buffer, | ||
570 | sizeof(output_buffer), | ||
571 | "%p [%s] %llu %s\n", | ||
572 | peer, | ||
573 | subsystem, | ||
574 | (unsigned long long) value, | ||
575 | name); | ||
576 | if (size != GNUNET_DISK_file_write (data_file, output_buffer, size)) | ||
577 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, | ||
578 | "Unable to write to file!\n"); | ||
579 | |||
580 | return GNUNET_OK; | ||
581 | } | ||
582 | |||
583 | |||
584 | /** | ||
585 | * Stats callback. Finish the stats testbed operation and when all stats have | ||
586 | * been iterated, shutdown the profiler. | ||
587 | * | ||
588 | * @param cls closure | ||
589 | * @param success GNUNET_OK if statistics were | ||
590 | * successfully obtained, GNUNET_SYSERR if not. | ||
591 | */ | ||
592 | static void | ||
593 | stats_cb (void *cls, | ||
594 | int success) | ||
595 | { | ||
596 | static unsigned int peer_cnt; | ||
597 | struct RegexPeer *peer = cls; | ||
598 | |||
599 | if (GNUNET_OK != success) | ||
600 | { | ||
601 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
602 | "Getting statistics for peer %u failed!\n", | ||
603 | peer->id); | ||
604 | return; | ||
605 | } | ||
606 | |||
607 | GNUNET_assert (NULL != peer->op_handle); | ||
608 | |||
609 | GNUNET_TESTBED_operation_done (peer->op_handle); | ||
610 | peer->op_handle = NULL; | ||
611 | |||
612 | peer_cnt++; | ||
613 | peer = &peers[peer_cnt]; | ||
614 | |||
615 | fprintf (stderr, "s"); | ||
616 | if (peer_cnt == num_peers) | ||
617 | { | ||
618 | GNUNET_log (GNUNET_ERROR_TYPE_INFO, | ||
619 | "\nCollecting stats finished. Shutting down.\n"); | ||
620 | GNUNET_SCHEDULER_shutdown (); | ||
621 | result = GNUNET_OK; | ||
622 | } | ||
623 | else | ||
624 | { | ||
625 | peer->op_handle = | ||
626 | GNUNET_TESTBED_service_connect (NULL, | ||
627 | peer->peer_handle, | ||
628 | "statistics", | ||
629 | &stats_connect_cb, | ||
630 | peer, | ||
631 | &stats_ca, | ||
632 | &stats_da, | ||
633 | peer); | ||
634 | } | ||
635 | } | ||
636 | |||
637 | |||
638 | /** | ||
639 | * Function called by testbed once we are connected to stats | ||
640 | * service. Get the statistics for the services of interest. | ||
641 | * | ||
642 | * @param cls the 'struct RegexPeer' for which we connected to stats | ||
643 | * @param op connect operation handle | ||
644 | * @param ca_result handle to stats service | ||
645 | * @param emsg error message on failure | ||
646 | */ | ||
647 | static void | ||
648 | stats_connect_cb (void *cls, | ||
649 | struct GNUNET_TESTBED_Operation *op, | ||
650 | void *ca_result, | ||
651 | const char *emsg) | ||
652 | { | ||
653 | struct RegexPeer *peer = cls; | ||
654 | |||
655 | if ((NULL == ca_result) || (NULL != emsg)) | ||
656 | { | ||
657 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
658 | "Failed to connect to statistics service on peer %u: %s\n", | ||
659 | peer->id, emsg); | ||
660 | |||
661 | peer->stats_handle = NULL; | ||
662 | return; | ||
663 | } | ||
664 | |||
665 | peer->stats_handle = ca_result; | ||
666 | |||
667 | if (NULL == GNUNET_STATISTICS_get (peer->stats_handle, NULL, NULL, | ||
668 | &stats_cb, | ||
669 | &stats_iterator, peer)) | ||
670 | { | ||
671 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
672 | "Could not get statistics of peer %u!\n", peer->id); | ||
673 | } | ||
674 | } | ||
675 | |||
676 | |||
677 | /** | ||
678 | * Task to collect all statistics from all peers, will shutdown the | ||
679 | * profiler, when done. | ||
680 | * | ||
681 | * @param cls NULL | ||
682 | */ | ||
683 | static void | ||
684 | do_collect_stats (void *cls) | ||
685 | { | ||
686 | struct RegexPeer *peer = &peers[0]; | ||
687 | |||
688 | GNUNET_assert (NULL != peer->peer_handle); | ||
689 | |||
690 | peer->op_handle = | ||
691 | GNUNET_TESTBED_service_connect (NULL, | ||
692 | peer->peer_handle, | ||
693 | "statistics", | ||
694 | &stats_connect_cb, | ||
695 | peer, | ||
696 | &stats_ca, | ||
697 | &stats_da, | ||
698 | peer); | ||
699 | } | ||
700 | |||
701 | |||
702 | /******************************************************************************/ | ||
703 | /************************ REGEX FIND CONNECTIONS **************************/ | ||
704 | /******************************************************************************/ | ||
705 | |||
706 | |||
707 | /** | ||
708 | * Start searching for the next string in the DHT. | ||
709 | * | ||
710 | * @param cls Index of the next peer in the peers array. | ||
711 | */ | ||
712 | static void | ||
713 | find_string (void *cls); | ||
714 | |||
715 | |||
716 | /** | ||
717 | * Method called when we've found a peer that announced a regex | ||
718 | * that matches our search string. Now get the statistics. | ||
719 | * | ||
720 | * @param cls Closure provided in REGEX_INTERNAL_search. | ||
721 | * @param id Peer providing a regex that matches the string. | ||
722 | * @param get_path Path of the get request. | ||
723 | * @param get_path_length Length of get_path. | ||
724 | * @param put_path Path of the put request. | ||
725 | * @param put_path_length Length of the put_path. | ||
726 | */ | ||
727 | static void | ||
728 | regex_found_handler (void *cls, | ||
729 | const struct GNUNET_PeerIdentity *id, | ||
730 | const struct GNUNET_PeerIdentity *get_path, | ||
731 | unsigned int get_path_length, | ||
732 | const struct GNUNET_PeerIdentity *put_path, | ||
733 | unsigned int put_path_length) | ||
734 | { | ||
735 | struct RegexPeer *peer = cls; | ||
736 | char output_buffer[512]; | ||
737 | size_t size; | ||
738 | |||
739 | if (GNUNET_YES == peer->search_str_matched) | ||
740 | { | ||
741 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
742 | "String %s on peer %u already matched!\n", | ||
743 | peer->search_str, peer->id); | ||
744 | return; | ||
745 | } | ||
746 | |||
747 | strings_found++; | ||
748 | parallel_searches--; | ||
749 | |||
750 | if (NULL != peer->timeout) | ||
751 | { | ||
752 | GNUNET_SCHEDULER_cancel (peer->timeout); | ||
753 | peer->timeout = NULL; | ||
754 | if (GNUNET_NO == in_shutdown) | ||
755 | GNUNET_SCHEDULER_add_now (&announce_next_regex, NULL); | ||
756 | } | ||
757 | |||
758 | if (NULL == id) | ||
759 | { | ||
760 | // FIXME not possible right now | ||
761 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, | ||
762 | "String matching timed out for string %s on peer %u (%i/%i)\n", | ||
763 | peer->search_str, peer->id, strings_found, num_peers); | ||
764 | peer->search_str_matched = GNUNET_SYSERR; | ||
765 | } | ||
766 | else | ||
767 | { | ||
768 | prof_time = GNUNET_TIME_absolute_get_duration (peer->prof_start_time); | ||
769 | |||
770 | GNUNET_log (GNUNET_ERROR_TYPE_INFO, | ||
771 | "String %s found on peer %u after %s (%i/%i) (%u||)\n", | ||
772 | peer->search_str, peer->id, | ||
773 | GNUNET_STRINGS_relative_time_to_string (prof_time, GNUNET_NO), | ||
774 | strings_found, num_peers, parallel_searches); | ||
775 | |||
776 | peer->search_str_matched = GNUNET_YES; | ||
777 | |||
778 | if (NULL != data_file) | ||
779 | { | ||
780 | size = | ||
781 | GNUNET_snprintf (output_buffer, | ||
782 | sizeof(output_buffer), | ||
783 | "%p Peer: %u\n" | ||
784 | "%p Search string: %s\n" | ||
785 | "%p Search duration: %s\n\n", | ||
786 | peer, peer->id, | ||
787 | peer, peer->search_str, | ||
788 | peer, | ||
789 | GNUNET_STRINGS_relative_time_to_string (prof_time, | ||
790 | GNUNET_NO)); | ||
791 | |||
792 | if (size != GNUNET_DISK_file_write (data_file, output_buffer, size)) | ||
793 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, "Unable to write to file!\n"); | ||
794 | } | ||
795 | } | ||
796 | |||
797 | GNUNET_TESTBED_operation_done (peer->op_handle); | ||
798 | peer->op_handle = NULL; | ||
799 | |||
800 | if (strings_found == num_peers) | ||
801 | { | ||
802 | prof_time = GNUNET_TIME_absolute_get_duration (prof_start_time); | ||
803 | GNUNET_log (GNUNET_ERROR_TYPE_INFO, | ||
804 | "All strings successfully matched in %s\n", | ||
805 | GNUNET_STRINGS_relative_time_to_string (prof_time, GNUNET_NO)); | ||
806 | |||
807 | if (NULL != search_timeout_task) | ||
808 | { | ||
809 | GNUNET_SCHEDULER_cancel (search_timeout_task); | ||
810 | search_timeout_task = NULL; | ||
811 | } | ||
812 | |||
813 | GNUNET_log (GNUNET_ERROR_TYPE_INFO, "Collecting stats.\n"); | ||
814 | GNUNET_SCHEDULER_add_now (&do_collect_stats, NULL); | ||
815 | } | ||
816 | } | ||
817 | |||
818 | |||
819 | /** | ||
820 | * Connect by string timeout task. This will cancel the profiler after the | ||
821 | * specified timeout 'search_timeout'. | ||
822 | * | ||
823 | * @param cls NULL | ||
824 | */ | ||
825 | static void | ||
826 | search_timed_out (void *cls) | ||
827 | { | ||
828 | unsigned int i; | ||
829 | |||
830 | GNUNET_log (GNUNET_ERROR_TYPE_INFO, | ||
831 | "Finding matches to all strings did not succeed after %s.\n", | ||
832 | GNUNET_STRINGS_relative_time_to_string (search_timeout_time, | ||
833 | GNUNET_NO)); | ||
834 | GNUNET_log (GNUNET_ERROR_TYPE_INFO, | ||
835 | "Found %i of %i strings\n", strings_found, num_peers); | ||
836 | |||
837 | GNUNET_log (GNUNET_ERROR_TYPE_INFO, | ||
838 | "Search timed out after %s." | ||
839 | "Collecting stats and shutting down.\n", | ||
840 | GNUNET_STRINGS_relative_time_to_string (search_timeout_time, | ||
841 | GNUNET_NO)); | ||
842 | |||
843 | in_shutdown = GNUNET_YES; | ||
844 | for (i = 0; i < num_peers; i++) | ||
845 | { | ||
846 | if (NULL != peers[i].op_handle) | ||
847 | { | ||
848 | GNUNET_TESTBED_operation_done (peers[i].op_handle); | ||
849 | peers[i].op_handle = NULL; | ||
850 | } | ||
851 | } | ||
852 | GNUNET_SCHEDULER_add_now (&do_collect_stats, NULL); | ||
853 | } | ||
854 | |||
855 | |||
856 | /** | ||
857 | * Search timed out. It might still complete in the future, | ||
858 | * but we should start another one. | ||
859 | * | ||
860 | * @param cls Index of the next peer in the peers array. | ||
861 | */ | ||
862 | static void | ||
863 | find_timed_out (void *cls) | ||
864 | { | ||
865 | struct RegexPeer *p = cls; | ||
866 | |||
867 | p->timeout = NULL; | ||
868 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, | ||
869 | "Searching for string \"%s\" on peer %d timed out.\n", | ||
870 | p->search_str, | ||
871 | p->id); | ||
872 | if (GNUNET_NO == in_shutdown) | ||
873 | GNUNET_SCHEDULER_add_now (&announce_next_regex, NULL); | ||
874 | } | ||
875 | |||
876 | |||
877 | /** | ||
878 | * Start searching for a string in the DHT. | ||
879 | * | ||
880 | * @param cls Index of the next peer in the peers array. | ||
881 | */ | ||
882 | static void | ||
883 | find_string (void *cls) | ||
884 | { | ||
885 | unsigned int search_peer = (unsigned int) (long) cls; | ||
886 | |||
887 | if ((search_peer >= num_peers) || | ||
888 | (GNUNET_YES == in_shutdown)) | ||
889 | return; | ||
890 | |||
891 | GNUNET_log (GNUNET_ERROR_TYPE_INFO, | ||
892 | "Searching for string \"%s\" on peer %d (%u||)\n", | ||
893 | peers[search_peer].search_str, | ||
894 | search_peer, | ||
895 | parallel_searches); | ||
896 | |||
897 | peers[search_peer].op_handle = | ||
898 | GNUNET_TESTBED_service_connect (NULL, | ||
899 | peers[search_peer].peer_handle, | ||
900 | "dht", | ||
901 | &dht_connect_cb, | ||
902 | &peers[search_peer], | ||
903 | &dht_ca, | ||
904 | &dht_da, | ||
905 | &peers[search_peer]); | ||
906 | GNUNET_assert (NULL != peers[search_peer].op_handle); | ||
907 | peers[search_peer].timeout | ||
908 | = GNUNET_SCHEDULER_add_delayed (FIND_TIMEOUT, | ||
909 | &find_timed_out, | ||
910 | &peers[search_peer]); | ||
911 | } | ||
912 | |||
913 | |||
914 | /** | ||
915 | * Callback called when testbed has started the daemon we asked for. | ||
916 | * | ||
917 | * @param cls NULL | ||
918 | * @param op the operation handle | ||
919 | * @param emsg NULL on success; otherwise an error description | ||
920 | */ | ||
921 | static void | ||
922 | daemon_started (void *cls, | ||
923 | struct GNUNET_TESTBED_Operation *op, | ||
924 | const char *emsg) | ||
925 | { | ||
926 | struct RegexPeer *peer = (struct RegexPeer *) cls; | ||
927 | unsigned long search_peer; | ||
928 | unsigned int i; | ||
929 | |||
930 | GNUNET_TESTBED_operation_done (peer->daemon_op); | ||
931 | peer->daemon_op = NULL; | ||
932 | if (NULL != emsg) | ||
933 | { | ||
934 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, | ||
935 | "Failed to start/stop daemon at peer %u: %s\n", peer->id, emsg); | ||
936 | GNUNET_assert (0); | ||
937 | } | ||
938 | else | ||
939 | { | ||
940 | GNUNET_log (GNUNET_ERROR_TYPE_INFO, | ||
941 | "Daemon %u started successfully\n", peer->id); | ||
942 | } | ||
943 | |||
944 | /* Find a peer to look for a string matching the regex announced */ | ||
945 | search_peer = GNUNET_CRYPTO_random_u32 (GNUNET_CRYPTO_QUALITY_WEAK, | ||
946 | num_peers); | ||
947 | for (i = 0; peers[search_peer].search_str != NULL; i++) | ||
948 | { | ||
949 | search_peer = (search_peer + 1) % num_peers; | ||
950 | if (i > num_peers) | ||
951 | GNUNET_assert (0); /* we ran out of peers, must be a bug */ | ||
952 | } | ||
953 | peers[search_peer].search_str = search_strings[peer->id]; | ||
954 | peers[search_peer].search_str_matched = GNUNET_NO; | ||
955 | GNUNET_SCHEDULER_add_delayed (GNUNET_TIME_relative_saturating_multiply ( | ||
956 | reannounce_period_max, | ||
957 | 2), | ||
958 | &find_string, | ||
959 | (void *) search_peer); | ||
960 | } | ||
961 | |||
962 | |||
963 | /** | ||
964 | * Task to start the daemons on each peer so that the regexes are announced | ||
965 | * into the DHT. | ||
966 | * | ||
967 | * @param cls NULL | ||
968 | * @param tc the task context | ||
969 | */ | ||
970 | static void | ||
971 | do_announce (void *cls) | ||
972 | { | ||
973 | unsigned int i; | ||
974 | |||
975 | if (GNUNET_YES == in_shutdown) | ||
976 | return; | ||
977 | GNUNET_log (GNUNET_ERROR_TYPE_INFO, | ||
978 | "Starting announce.\n"); | ||
979 | for (i = 0; i < init_parallel_searches; i++) | ||
980 | { | ||
981 | GNUNET_log (GNUNET_ERROR_TYPE_INFO, | ||
982 | " scheduling announce %u\n", | ||
983 | i); | ||
984 | (void) GNUNET_SCHEDULER_add_now (&announce_next_regex, NULL); | ||
985 | } | ||
986 | } | ||
987 | |||
988 | |||
989 | /** | ||
990 | * Start announcing the next regex in the DHT. | ||
991 | * | ||
992 | * @param cls Closure (unused). | ||
993 | */ | ||
994 | static void | ||
995 | announce_next_regex (void *cls) | ||
996 | { | ||
997 | struct RegexPeer *peer; | ||
998 | |||
999 | if (GNUNET_YES == in_shutdown) | ||
1000 | return; | ||
1001 | if (next_search >= num_peers) | ||
1002 | { | ||
1003 | if (strings_found != num_peers) | ||
1004 | { | ||
1005 | struct GNUNET_TIME_Relative new_delay; | ||
1006 | if (NULL != search_timeout_task) | ||
1007 | GNUNET_SCHEDULER_cancel (search_timeout_task); | ||
1008 | new_delay = GNUNET_TIME_relative_multiply (GNUNET_TIME_UNIT_MINUTES, 15); | ||
1009 | search_timeout_task = GNUNET_SCHEDULER_add_delayed (new_delay, | ||
1010 | &search_timed_out, | ||
1011 | NULL); | ||
1012 | } | ||
1013 | return; | ||
1014 | } | ||
1015 | |||
1016 | GNUNET_log (GNUNET_ERROR_TYPE_INFO, "Starting daemon %u\n", next_search); | ||
1017 | peer = &peers[next_search]; | ||
1018 | peer->daemon_op = | ||
1019 | GNUNET_TESTBED_peer_manage_service (NULL, | ||
1020 | peer->peer_handle, | ||
1021 | "regexprofiler", | ||
1022 | &daemon_started, | ||
1023 | peer, | ||
1024 | 1); | ||
1025 | next_search++; | ||
1026 | parallel_searches++; | ||
1027 | } | ||
1028 | |||
1029 | |||
1030 | /** | ||
1031 | * DHT connect callback. Called when we are connected to the dht service for | ||
1032 | * the peer in 'cls'. If successful we connect to the stats service of this | ||
1033 | * peer and then try to match the search string of this peer. | ||
1034 | * | ||
1035 | * @param cls internal peer id. | ||
1036 | * @param op operation handle. | ||
1037 | * @param ca_result connect adapter result. | ||
1038 | * @param emsg error message. | ||
1039 | */ | ||
1040 | static void | ||
1041 | dht_connect_cb (void *cls, | ||
1042 | struct GNUNET_TESTBED_Operation *op, | ||
1043 | void *ca_result, | ||
1044 | const char *emsg) | ||
1045 | { | ||
1046 | struct RegexPeer *peer = (struct RegexPeer *) cls; | ||
1047 | |||
1048 | if ((NULL != emsg) || (NULL == op) || (NULL == ca_result)) | ||
1049 | { | ||
1050 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "DHT connect failed: %s\n", emsg); | ||
1051 | GNUNET_assert (0); | ||
1052 | } | ||
1053 | |||
1054 | GNUNET_assert (NULL != peer->dht_handle); | ||
1055 | GNUNET_assert (peer->op_handle == op); | ||
1056 | GNUNET_assert (peer->dht_handle == ca_result); | ||
1057 | |||
1058 | peer->search_str_matched = GNUNET_NO; | ||
1059 | peer->search_handle = REGEX_INTERNAL_search (peer->dht_handle, | ||
1060 | peer->search_str, | ||
1061 | ®ex_found_handler, peer, | ||
1062 | NULL); | ||
1063 | peer->prof_start_time = GNUNET_TIME_absolute_get (); | ||
1064 | } | ||
1065 | |||
1066 | |||
1067 | /** | ||
1068 | * DHT connect adapter. Opens a connection to the dht service. | ||
1069 | * | ||
1070 | * @param cls Closure (peer). | ||
1071 | * @param cfg Configuration handle. | ||
1072 | * | ||
1073 | * @return | ||
1074 | */ | ||
1075 | static void * | ||
1076 | dht_ca (void *cls, const struct GNUNET_CONFIGURATION_Handle *cfg) | ||
1077 | { | ||
1078 | struct RegexPeer *peer = cls; | ||
1079 | |||
1080 | peer->dht_handle = GNUNET_DHT_connect (cfg, 32); | ||
1081 | |||
1082 | return peer->dht_handle; | ||
1083 | } | ||
1084 | |||
1085 | |||
1086 | /** | ||
1087 | * Adapter function called to destroy a connection to the dht service. | ||
1088 | * | ||
1089 | * @param cls Closure (peer). | ||
1090 | * @param op_result Service handle returned from the connect adapter. | ||
1091 | */ | ||
1092 | static void | ||
1093 | dht_da (void *cls, void *op_result) | ||
1094 | { | ||
1095 | struct RegexPeer *peer = (struct RegexPeer *) cls; | ||
1096 | |||
1097 | GNUNET_assert (peer->dht_handle == op_result); | ||
1098 | |||
1099 | if (NULL != peer->search_handle) | ||
1100 | { | ||
1101 | REGEX_INTERNAL_search_cancel (peer->search_handle); | ||
1102 | peer->search_handle = NULL; | ||
1103 | } | ||
1104 | |||
1105 | if (NULL != peer->dht_handle) | ||
1106 | { | ||
1107 | GNUNET_DHT_disconnect (peer->dht_handle); | ||
1108 | peer->dht_handle = NULL; | ||
1109 | } | ||
1110 | } | ||
1111 | |||
1112 | |||
1113 | /** | ||
1114 | * Signature of a main function for a testcase. | ||
1115 | * | ||
1116 | * @param cls NULL | ||
1117 | * @param h the run handle | ||
1118 | * @param num_peers_ number of peers in 'peers' | ||
1119 | * @param testbed_peers handle to peers run in the testbed. NULL upon timeout (see | ||
1120 | * GNUNET_TESTBED_test_run()). | ||
1121 | * @param links_succeeded the number of overlay link connection attempts that | ||
1122 | * succeeded | ||
1123 | * @param links_failed the number of overlay link connection attempts that | ||
1124 | * failed | ||
1125 | */ | ||
1126 | static void | ||
1127 | test_master (void *cls, | ||
1128 | struct GNUNET_TESTBED_RunHandle *h, | ||
1129 | unsigned int num_peers_, | ||
1130 | struct GNUNET_TESTBED_Peer **testbed_peers, | ||
1131 | unsigned int links_succeeded, | ||
1132 | unsigned int links_failed) | ||
1133 | { | ||
1134 | unsigned int i; | ||
1135 | |||
1136 | GNUNET_assert (num_peers_ == num_peers); | ||
1137 | |||
1138 | prof_time = GNUNET_TIME_absolute_get_duration (prof_start_time); | ||
1139 | GNUNET_log (GNUNET_ERROR_TYPE_INFO, | ||
1140 | "Testbed started in %s\n", | ||
1141 | GNUNET_STRINGS_relative_time_to_string (prof_time, GNUNET_NO)); | ||
1142 | |||
1143 | if (NULL != abort_task) | ||
1144 | { | ||
1145 | GNUNET_SCHEDULER_cancel (abort_task); | ||
1146 | abort_task = NULL; | ||
1147 | } | ||
1148 | |||
1149 | for (i = 0; i < num_peers; i++) | ||
1150 | { | ||
1151 | peers[i].peer_handle = testbed_peers[i]; | ||
1152 | } | ||
1153 | if (GNUNET_NO == | ||
1154 | GNUNET_CONFIGURATION_get_value_yesno (cfg, "DHT", "DISABLE_TRY_CONNECT")) | ||
1155 | { | ||
1156 | struct GNUNET_TIME_Relative settle_time; | ||
1157 | |||
1158 | settle_time = | ||
1159 | GNUNET_TIME_relative_multiply (GNUNET_TIME_UNIT_MILLISECONDS, | ||
1160 | 10 * num_peers); | ||
1161 | GNUNET_log (GNUNET_ERROR_TYPE_INFO, | ||
1162 | "Waiting for DHT for %s to settle new connections.\n\n", | ||
1163 | GNUNET_STRINGS_relative_time_to_string (settle_time, | ||
1164 | GNUNET_NO)); | ||
1165 | GNUNET_SCHEDULER_add_delayed (settle_time, &do_announce, NULL); | ||
1166 | } | ||
1167 | else | ||
1168 | { | ||
1169 | GNUNET_SCHEDULER_add_now (&do_announce, NULL); | ||
1170 | } | ||
1171 | search_timeout_task = | ||
1172 | GNUNET_SCHEDULER_add_delayed (search_timeout_time, &search_timed_out, NULL); | ||
1173 | } | ||
1174 | |||
1175 | |||
1176 | /** | ||
1177 | * Function that will be called whenever something in the testbed changes. | ||
1178 | * | ||
1179 | * @param cls closure, NULL | ||
1180 | * @param event information on what is happening | ||
1181 | */ | ||
1182 | static void | ||
1183 | master_controller_cb (void *cls, | ||
1184 | const struct GNUNET_TESTBED_EventInformation *event) | ||
1185 | { | ||
1186 | switch (event->type) | ||
1187 | { | ||
1188 | case GNUNET_TESTBED_ET_CONNECT: | ||
1189 | printf ("."); | ||
1190 | break; | ||
1191 | |||
1192 | case GNUNET_TESTBED_ET_PEER_START: | ||
1193 | printf ("#"); | ||
1194 | break; | ||
1195 | |||
1196 | default: | ||
1197 | break; | ||
1198 | } | ||
1199 | fflush (stdout); | ||
1200 | } | ||
1201 | |||
1202 | |||
1203 | /******************************************************************************/ | ||
1204 | /*************************** TESTBED PEER SETUP *****************************/ | ||
1205 | /******************************************************************************/ | ||
1206 | |||
1207 | /** | ||
1208 | * Process the text buffer counting the non-empty lines and separating them | ||
1209 | * with NULL characters, for later ease of copy using (as)printf. | ||
1210 | * | ||
1211 | * @param data Memory buffer with strings. | ||
1212 | * @param data_size Size of the @a data buffer in bytes. | ||
1213 | * @param str_max Maximum number of strings to return. | ||
1214 | * @return Positive number of lines found in the buffer, | ||
1215 | * #GNUNET_SYSERR otherwise. | ||
1216 | */ | ||
1217 | static int | ||
1218 | count_and_separate_strings (char *data, | ||
1219 | uint64_t data_size, | ||
1220 | unsigned int str_max) | ||
1221 | { | ||
1222 | char *buf; // Keep track of last string to skip blank lines | ||
1223 | unsigned int offset; | ||
1224 | unsigned int str_cnt; | ||
1225 | |||
1226 | buf = data; | ||
1227 | offset = 0; | ||
1228 | str_cnt = 0; | ||
1229 | while ((offset < (data_size - 1)) && (str_cnt < str_max)) | ||
1230 | { | ||
1231 | offset++; | ||
1232 | if (((data[offset] == '\n')) && | ||
1233 | (buf != &data[offset])) | ||
1234 | { | ||
1235 | data[offset] = '\0'; | ||
1236 | str_cnt++; | ||
1237 | buf = &data[offset + 1]; | ||
1238 | } | ||
1239 | else if ((data[offset] == '\n') || | ||
1240 | (data[offset] == '\0')) | ||
1241 | buf = &data[offset + 1]; | ||
1242 | } | ||
1243 | return str_cnt; | ||
1244 | } | ||
1245 | |||
1246 | |||
1247 | /** | ||
1248 | * Allocate a string array and fill it with the prefixed strings | ||
1249 | * from a pre-processed, NULL-separated memory region. | ||
1250 | * | ||
1251 | * @param data Preprocessed memory with strings | ||
1252 | * @param data_size Size of the @a data buffer in bytes. | ||
1253 | * @param strings Address of the string array to be created. | ||
1254 | * Must be freed by caller if function end in success. | ||
1255 | * @param str_cnt String count. The @a data buffer should contain | ||
1256 | * at least this many NULL-separated strings. | ||
1257 | * @return #GNUNET_OK in ase of success, #GNUNET_SYSERR otherwise. | ||
1258 | * In case of error @a strings must not be freed. | ||
1259 | */ | ||
1260 | static int | ||
1261 | create_string_array (char *data, uint64_t data_size, | ||
1262 | char ***strings, unsigned int str_cnt) | ||
1263 | { | ||
1264 | uint64_t offset; | ||
1265 | uint64_t len; | ||
1266 | unsigned int i; | ||
1267 | |||
1268 | *strings = GNUNET_malloc (sizeof(char *) * str_cnt); | ||
1269 | offset = 0; | ||
1270 | for (i = 0; i < str_cnt; i++) | ||
1271 | { | ||
1272 | len = strlen (&data[offset]); | ||
1273 | if (offset + len >= data_size) | ||
1274 | { | ||
1275 | GNUNET_free (*strings); | ||
1276 | *strings = NULL; | ||
1277 | return GNUNET_SYSERR; | ||
1278 | } | ||
1279 | if (0 == len) // empty line | ||
1280 | { | ||
1281 | offset++; | ||
1282 | i--; | ||
1283 | continue; | ||
1284 | } | ||
1285 | |||
1286 | GNUNET_asprintf (&(*strings)[i], | ||
1287 | "%s%s", | ||
1288 | regex_prefix, | ||
1289 | &data[offset]); | ||
1290 | offset += len + 1; | ||
1291 | } | ||
1292 | return GNUNET_OK; | ||
1293 | } | ||
1294 | |||
1295 | |||
1296 | /** | ||
1297 | * Load search strings from given filename. One search string per line. | ||
1298 | * | ||
1299 | * @param filename filename of the file containing the search strings. | ||
1300 | * @param strings set of strings loaded from file. Caller needs to free this | ||
1301 | * if number returned is greater than zero. | ||
1302 | * @param limit upper limit on the number of strings read from the file | ||
1303 | * @return number of strings found in the file. #GNUNET_SYSERR on error. | ||
1304 | */ | ||
1305 | static int | ||
1306 | load_search_strings (const char *filename, | ||
1307 | char ***strings, | ||
1308 | unsigned int limit) | ||
1309 | { | ||
1310 | char *data; | ||
1311 | uint64_t filesize; | ||
1312 | int str_cnt; | ||
1313 | |||
1314 | /* Sanity checks */ | ||
1315 | if (NULL == filename) | ||
1316 | { | ||
1317 | return GNUNET_SYSERR; | ||
1318 | } | ||
1319 | if (GNUNET_YES != GNUNET_DISK_file_test (filename)) | ||
1320 | { | ||
1321 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, | ||
1322 | "Could not find search strings file %s\n", filename); | ||
1323 | return GNUNET_SYSERR; | ||
1324 | } | ||
1325 | if (GNUNET_OK != | ||
1326 | GNUNET_DISK_file_size (filename, | ||
1327 | &filesize, | ||
1328 | GNUNET_YES, | ||
1329 | GNUNET_YES)) | ||
1330 | { | ||
1331 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, | ||
1332 | "Search strings file %s cannot be read.\n", | ||
1333 | filename); | ||
1334 | return GNUNET_SYSERR; | ||
1335 | } | ||
1336 | if (0 == filesize) | ||
1337 | { | ||
1338 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, | ||
1339 | "Search strings file %s is empty.\n", | ||
1340 | filename); | ||
1341 | return GNUNET_SYSERR; | ||
1342 | } | ||
1343 | |||
1344 | /* Read data into memory */ | ||
1345 | data = GNUNET_malloc (filesize + 1); | ||
1346 | if (filesize != GNUNET_DISK_fn_read (filename, | ||
1347 | data, | ||
1348 | filesize)) | ||
1349 | { | ||
1350 | GNUNET_free (data); | ||
1351 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, | ||
1352 | "Could not read search strings file %s.\n", | ||
1353 | filename); | ||
1354 | return GNUNET_SYSERR; | ||
1355 | } | ||
1356 | |||
1357 | /* Process buffer and build array */ | ||
1358 | str_cnt = count_and_separate_strings (data, filesize, limit); | ||
1359 | if (GNUNET_OK != create_string_array (data, filesize, strings, str_cnt)) | ||
1360 | { | ||
1361 | str_cnt = GNUNET_SYSERR; | ||
1362 | } | ||
1363 | GNUNET_free (data); | ||
1364 | return str_cnt; | ||
1365 | } | ||
1366 | |||
1367 | |||
1368 | /** | ||
1369 | * Main function that will be run by the scheduler. | ||
1370 | * | ||
1371 | * @param cls closure | ||
1372 | * @param args remaining command-line arguments | ||
1373 | * @param cfgfile name of the configuration file used (for saving, can be NULL!) | ||
1374 | * @param config configuration | ||
1375 | */ | ||
1376 | static void | ||
1377 | run (void *cls, | ||
1378 | char *const *args, | ||
1379 | const char *cfgfile, | ||
1380 | const struct GNUNET_CONFIGURATION_Handle *config) | ||
1381 | { | ||
1382 | unsigned int nsearchstrs; | ||
1383 | unsigned int i; | ||
1384 | struct GNUNET_TIME_Relative abort_time; | ||
1385 | |||
1386 | in_shutdown = GNUNET_NO; | ||
1387 | |||
1388 | /* Check config */ | ||
1389 | if (NULL == config) | ||
1390 | { | ||
1391 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
1392 | _ ("No configuration file given. Exiting\n")); | ||
1393 | GNUNET_SCHEDULER_add_now (&do_shutdown, NULL); | ||
1394 | return; | ||
1395 | } | ||
1396 | cfg = GNUNET_CONFIGURATION_dup (config); | ||
1397 | if (GNUNET_OK != | ||
1398 | GNUNET_CONFIGURATION_get_value_string (cfg, "REGEXPROFILER", | ||
1399 | "REGEX_PREFIX", | ||
1400 | ®ex_prefix)) | ||
1401 | { | ||
1402 | GNUNET_log_config_missing (GNUNET_ERROR_TYPE_ERROR, | ||
1403 | "regexprofiler", | ||
1404 | "regex_prefix"); | ||
1405 | GNUNET_SCHEDULER_add_now (&do_shutdown, NULL); | ||
1406 | return; | ||
1407 | } | ||
1408 | if (GNUNET_OK != | ||
1409 | GNUNET_CONFIGURATION_get_value_number (cfg, "REGEXPROFILER", | ||
1410 | "PARALLEL_SEARCHES", | ||
1411 | &init_parallel_searches)) | ||
1412 | { | ||
1413 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, | ||
1414 | "Configuration option \"PARALLEL_SEARCHES\" missing." | ||
1415 | " Using default (%d)\n", 10); | ||
1416 | init_parallel_searches = 10; | ||
1417 | } | ||
1418 | if (GNUNET_OK != | ||
1419 | GNUNET_CONFIGURATION_get_value_time (cfg, "REGEXPROFILER", | ||
1420 | "REANNOUNCE_PERIOD_MAX", | ||
1421 | &reannounce_period_max)) | ||
1422 | { | ||
1423 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, | ||
1424 | "reannounce_period_max not given. Using 10 minutes.\n"); | ||
1425 | reannounce_period_max = | ||
1426 | GNUNET_TIME_relative_multiply (GNUNET_TIME_UNIT_MINUTES, 10); | ||
1427 | } | ||
1428 | |||
1429 | /* Check arguments */ | ||
1430 | if (NULL == policy_dir) | ||
1431 | { | ||
1432 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
1433 | _ ( | ||
1434 | "No policy directory specified on command line. Exiting.\n")); | ||
1435 | return; | ||
1436 | } | ||
1437 | if (GNUNET_YES != GNUNET_DISK_directory_test (policy_dir, GNUNET_YES)) | ||
1438 | { | ||
1439 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
1440 | _ ("Specified policies directory does not exist. Exiting.\n")); | ||
1441 | GNUNET_SCHEDULER_add_now (&do_shutdown, NULL); | ||
1442 | return; | ||
1443 | } | ||
1444 | if (0 >= (int) (num_peers = GNUNET_DISK_directory_scan (policy_dir, NULL, | ||
1445 | NULL))) | ||
1446 | { | ||
1447 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
1448 | _ ("No files found in `%s'\n"), | ||
1449 | policy_dir); | ||
1450 | return; | ||
1451 | } | ||
1452 | GNUNET_CONFIGURATION_set_value_string (cfg, "REGEXPROFILER", | ||
1453 | "POLICY_DIR", policy_dir); | ||
1454 | if (GNUNET_YES != GNUNET_DISK_file_test (strings_file)) | ||
1455 | { | ||
1456 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
1457 | _ ("No search strings file given. Exiting.\n")); | ||
1458 | GNUNET_SCHEDULER_add_now (&do_shutdown, NULL); | ||
1459 | return; | ||
1460 | } | ||
1461 | nsearchstrs = load_search_strings (strings_file, | ||
1462 | &search_strings, | ||
1463 | num_peers); | ||
1464 | if (num_peers != nsearchstrs) | ||
1465 | { | ||
1466 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
1467 | "Error loading search strings.\n"); | ||
1468 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
1469 | "File (%s) does not contain enough strings (%u/%u).\n", | ||
1470 | strings_file, nsearchstrs, num_peers); | ||
1471 | GNUNET_SCHEDULER_add_now (&do_shutdown, NULL); | ||
1472 | return; | ||
1473 | } | ||
1474 | if ((0 == num_peers) || (NULL == search_strings)) | ||
1475 | { | ||
1476 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
1477 | _ ("Error loading search strings. Exiting.\n")); | ||
1478 | GNUNET_SCHEDULER_add_now (&do_shutdown, NULL); | ||
1479 | return; | ||
1480 | } | ||
1481 | for (i = 0; i < num_peers; i++) | ||
1482 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, | ||
1483 | "search string: %s\n", | ||
1484 | search_strings[i]); | ||
1485 | |||
1486 | /* Check logfile */ | ||
1487 | if ((NULL != data_filename) && | ||
1488 | (NULL == (data_file = | ||
1489 | GNUNET_DISK_file_open (data_filename, | ||
1490 | GNUNET_DISK_OPEN_READWRITE | ||
1491 | | GNUNET_DISK_OPEN_TRUNCATE | ||
1492 | | GNUNET_DISK_OPEN_CREATE, | ||
1493 | GNUNET_DISK_PERM_USER_READ | ||
1494 | | GNUNET_DISK_PERM_USER_WRITE)))) | ||
1495 | { | ||
1496 | GNUNET_log_strerror_file (GNUNET_ERROR_TYPE_ERROR, | ||
1497 | "open", | ||
1498 | data_filename); | ||
1499 | return; | ||
1500 | } | ||
1501 | |||
1502 | /* Initialize peers */ | ||
1503 | peers = GNUNET_malloc (sizeof(struct RegexPeer) * num_peers); | ||
1504 | for (i = 0; i < num_peers; i++) | ||
1505 | peers[i].id = i; | ||
1506 | |||
1507 | GNUNET_CONFIGURATION_set_value_number (cfg, | ||
1508 | "TESTBED", "OVERLAY_RANDOM_LINKS", | ||
1509 | num_peers * 20); | ||
1510 | GNUNET_CONFIGURATION_set_value_number (cfg, | ||
1511 | "DHT", "FORCE_NSE", | ||
1512 | (long long unsigned) | ||
1513 | (log (num_peers) / log (2.0))); | ||
1514 | event_mask = 0LL; | ||
1515 | /* For feedback about the start process activate these and pass master_cb */ | ||
1516 | event_mask |= (1LL << GNUNET_TESTBED_ET_PEER_START); | ||
1517 | // event_mask |= (1LL << GNUNET_TESTBED_ET_PEER_STOP); | ||
1518 | event_mask |= (1LL << GNUNET_TESTBED_ET_CONNECT); | ||
1519 | // event_mask |= (1LL << GNUNET_TESTBED_ET_DISCONNECT); | ||
1520 | prof_start_time = GNUNET_TIME_absolute_get (); | ||
1521 | GNUNET_TESTBED_run (hosts_file, | ||
1522 | cfg, | ||
1523 | num_peers, | ||
1524 | event_mask, | ||
1525 | &master_controller_cb, | ||
1526 | NULL, /* master_controller_cb cls */ | ||
1527 | &test_master, | ||
1528 | NULL); /* test_master cls */ | ||
1529 | if (GNUNET_OK != | ||
1530 | GNUNET_CONFIGURATION_get_value_time (cfg, "TESTBED", | ||
1531 | "SETUP_TIMEOUT", | ||
1532 | &abort_time)) | ||
1533 | { | ||
1534 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, | ||
1535 | "SETUP_TIMEOUT not given. Using 15 minutes.\n"); | ||
1536 | abort_time = | ||
1537 | GNUNET_TIME_relative_multiply (GNUNET_TIME_UNIT_MINUTES, 15); | ||
1538 | } | ||
1539 | abort_time = GNUNET_TIME_relative_add (abort_time, GNUNET_TIME_UNIT_MINUTES); | ||
1540 | abort_task = | ||
1541 | GNUNET_SCHEDULER_add_delayed (abort_time, | ||
1542 | &do_abort, | ||
1543 | (void *) __LINE__); | ||
1544 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, | ||
1545 | "setup_timeout: %s\n", | ||
1546 | GNUNET_STRINGS_relative_time_to_string (abort_time, GNUNET_YES)); | ||
1547 | } | ||
1548 | |||
1549 | |||
1550 | /** | ||
1551 | * Main function. | ||
1552 | * | ||
1553 | * @param argc argument count | ||
1554 | * @param argv argument values | ||
1555 | * @return 0 on success | ||
1556 | */ | ||
1557 | int | ||
1558 | main (int argc, char *const *argv) | ||
1559 | { | ||
1560 | struct GNUNET_GETOPT_CommandLineOption options[] = { | ||
1561 | GNUNET_GETOPT_option_filename ('o', | ||
1562 | "output-file", | ||
1563 | "FILENAME", | ||
1564 | gettext_noop ( | ||
1565 | "name of the file for writing statistics"), | ||
1566 | &data_filename), | ||
1567 | |||
1568 | GNUNET_GETOPT_option_relative_time ('t', | ||
1569 | "matching-timeout", | ||
1570 | "TIMEOUT", | ||
1571 | gettext_noop ( | ||
1572 | "wait TIMEOUT before ending the experiment"), | ||
1573 | &search_timeout_time), | ||
1574 | |||
1575 | GNUNET_GETOPT_option_filename ('p', | ||
1576 | "policy-dir", | ||
1577 | "DIRECTORY", | ||
1578 | gettext_noop ("directory with policy files"), | ||
1579 | &policy_dir), | ||
1580 | |||
1581 | |||
1582 | GNUNET_GETOPT_option_filename ('s', | ||
1583 | "strings-file", | ||
1584 | "FILENAME", | ||
1585 | gettext_noop ( | ||
1586 | "name of file with input strings"), | ||
1587 | &strings_file), | ||
1588 | |||
1589 | GNUNET_GETOPT_option_filename ('H', | ||
1590 | "hosts-file", | ||
1591 | "FILENAME", | ||
1592 | gettext_noop ( | ||
1593 | "name of file with hosts' names"), | ||
1594 | &hosts_file), | ||
1595 | |||
1596 | GNUNET_GETOPT_OPTION_END | ||
1597 | }; | ||
1598 | int ret; | ||
1599 | |||
1600 | if (GNUNET_OK != GNUNET_STRINGS_get_utf8_args (argc, argv, &argc, &argv)) | ||
1601 | return 2; | ||
1602 | result = GNUNET_SYSERR; | ||
1603 | ret = | ||
1604 | GNUNET_PROGRAM_run (argc, argv, | ||
1605 | "gnunet-regex-profiler", | ||
1606 | _ ("Profiler for regex"), | ||
1607 | options, &run, NULL); | ||
1608 | if (GNUNET_OK != ret) | ||
1609 | return ret; | ||
1610 | if (GNUNET_OK != result) | ||
1611 | return 1; | ||
1612 | return 0; | ||
1613 | } | ||
diff --git a/src/regex/gnunet-regex-simulation-profiler.c b/src/regex/gnunet-regex-simulation-profiler.c deleted file mode 100644 index ab6d65d58..000000000 --- a/src/regex/gnunet-regex-simulation-profiler.c +++ /dev/null | |||
@@ -1,726 +0,0 @@ | |||
1 | /* | ||
2 | This file is part of GNUnet. | ||
3 | Copyright (C) 2011, 2012 GNUnet e.V. | ||
4 | |||
5 | GNUnet is free software: you can redistribute it and/or modify it | ||
6 | under the terms of the GNU Affero General Public License as published | ||
7 | by the Free Software Foundation, either version 3 of the License, | ||
8 | or (at your option) any later version. | ||
9 | |||
10 | GNUnet is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | Affero General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU Affero General Public License | ||
16 | along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | |||
18 | SPDX-License-Identifier: AGPL3.0-or-later | ||
19 | */ | ||
20 | |||
21 | |||
22 | /** | ||
23 | * @file regex/gnunet-regex-simulation-profiler.c | ||
24 | * @brief Regex profiler that dumps all DFAs into a database instead of | ||
25 | * using the DHT (with cadet). | ||
26 | * @author Maximilian Szengel | ||
27 | * @author Christophe Genevey | ||
28 | * | ||
29 | */ | ||
30 | |||
31 | #include "platform.h" | ||
32 | #include "gnunet_util_lib.h" | ||
33 | #include "regex_internal_lib.h" | ||
34 | #include "gnunet_mysql_lib.h" | ||
35 | #include "gnunet_my_lib.h" | ||
36 | #include <mysql/mysql.h> | ||
37 | |||
38 | /** | ||
39 | * MySQL statement to insert an edge. | ||
40 | */ | ||
41 | #define INSERT_EDGE_STMT "INSERT IGNORE INTO `%s` " \ | ||
42 | "(`key`, `label`, `to_key`, `accepting`) " \ | ||
43 | "VALUES (?, ?, ?, ?);" | ||
44 | |||
45 | /** | ||
46 | * MySQL statement to select a key count. | ||
47 | */ | ||
48 | #define SELECT_KEY_STMT "SELECT COUNT(*) FROM `%s` " \ | ||
49 | "WHERE `key` = ? AND `label` = ?;" | ||
50 | |||
51 | /** | ||
52 | * Simple struct to keep track of progress, and print a | ||
53 | * nice little percentage meter for long running tasks. | ||
54 | */ | ||
55 | struct ProgressMeter | ||
56 | { | ||
57 | /** | ||
58 | * Total number of elements. | ||
59 | */ | ||
60 | unsigned int total; | ||
61 | |||
62 | /** | ||
63 | * Interval for printing percentage. | ||
64 | */ | ||
65 | unsigned int modnum; | ||
66 | |||
67 | /** | ||
68 | * Number of dots to print. | ||
69 | */ | ||
70 | unsigned int dotnum; | ||
71 | |||
72 | /** | ||
73 | * Completed number. | ||
74 | */ | ||
75 | unsigned int completed; | ||
76 | |||
77 | /** | ||
78 | * Should the meter be printed? | ||
79 | */ | ||
80 | int print; | ||
81 | |||
82 | /** | ||
83 | * String to print on startup. | ||
84 | */ | ||
85 | char *startup_string; | ||
86 | }; | ||
87 | |||
88 | |||
89 | /** | ||
90 | * Handle for the progress meter | ||
91 | */ | ||
92 | static struct ProgressMeter *meter; | ||
93 | |||
94 | /** | ||
95 | * Scan task identifier; | ||
96 | */ | ||
97 | static struct GNUNET_SCHEDULER_Task *scan_task; | ||
98 | |||
99 | /** | ||
100 | * Global testing status. | ||
101 | */ | ||
102 | static int result; | ||
103 | |||
104 | /** | ||
105 | * MySQL context. | ||
106 | */ | ||
107 | static struct GNUNET_MYSQL_Context *mysql_ctx; | ||
108 | |||
109 | /** | ||
110 | * MySQL prepared statement handle. | ||
111 | */ | ||
112 | static struct GNUNET_MYSQL_StatementHandle *stmt_handle; | ||
113 | |||
114 | /** | ||
115 | * MySQL prepared statement handle for `key` select. | ||
116 | */ | ||
117 | static struct GNUNET_MYSQL_StatementHandle *select_stmt_handle; | ||
118 | |||
119 | /** | ||
120 | * MySQL table name. | ||
121 | */ | ||
122 | static char *table_name; | ||
123 | |||
124 | /** | ||
125 | * Policy dir containing files that contain policies. | ||
126 | */ | ||
127 | static char *policy_dir; | ||
128 | |||
129 | /** | ||
130 | * Number of policy files. | ||
131 | */ | ||
132 | static unsigned int num_policy_files; | ||
133 | |||
134 | /** | ||
135 | * Number of policies. | ||
136 | */ | ||
137 | static unsigned int num_policies; | ||
138 | |||
139 | /** | ||
140 | * Maximal path compression length. | ||
141 | */ | ||
142 | static unsigned int max_path_compression; | ||
143 | |||
144 | /** | ||
145 | * Number of merged transitions. | ||
146 | */ | ||
147 | static unsigned long long num_merged_transitions; | ||
148 | |||
149 | /** | ||
150 | * Number of merged states from different policies. | ||
151 | */ | ||
152 | static unsigned long long num_merged_states; | ||
153 | |||
154 | /** | ||
155 | * Prefix to add before every regex we're announcing. | ||
156 | */ | ||
157 | static char *regex_prefix; | ||
158 | |||
159 | |||
160 | /** | ||
161 | * Create a meter to keep track of the progress of some task. | ||
162 | * | ||
163 | * @param total the total number of items to complete | ||
164 | * @param start_string a string to prefix the meter with (if printing) | ||
165 | * @param print GNUNET_YES to print the meter, GNUNET_NO to count | ||
166 | * internally only | ||
167 | * | ||
168 | * @return the progress meter | ||
169 | */ | ||
170 | static struct ProgressMeter * | ||
171 | create_meter (unsigned int total, char *start_string, int print) | ||
172 | { | ||
173 | struct ProgressMeter *ret; | ||
174 | |||
175 | ret = GNUNET_new (struct ProgressMeter); | ||
176 | ret->print = print; | ||
177 | ret->total = total; | ||
178 | ret->modnum = total / 4; | ||
179 | if (ret->modnum == 0) /* Divide by zero check */ | ||
180 | ret->modnum = 1; | ||
181 | ret->dotnum = (total / 50) + 1; | ||
182 | if (start_string != NULL) | ||
183 | ret->startup_string = GNUNET_strdup (start_string); | ||
184 | else | ||
185 | ret->startup_string = GNUNET_strdup (""); | ||
186 | |||
187 | return ret; | ||
188 | } | ||
189 | |||
190 | |||
191 | /** | ||
192 | * Update progress meter (increment by one). | ||
193 | * | ||
194 | * @param meter the meter to update and print info for | ||
195 | * | ||
196 | * @return GNUNET_YES if called the total requested, | ||
197 | * GNUNET_NO if more items expected | ||
198 | */ | ||
199 | static int | ||
200 | update_meter (struct ProgressMeter *meter) | ||
201 | { | ||
202 | if (meter->print == GNUNET_YES) | ||
203 | { | ||
204 | if (meter->completed % meter->modnum == 0) | ||
205 | { | ||
206 | if (meter->completed == 0) | ||
207 | { | ||
208 | fprintf (stdout, "%sProgress: [0%%", meter->startup_string); | ||
209 | } | ||
210 | else | ||
211 | fprintf (stdout, "%d%%", | ||
212 | (int) (((float) meter->completed / meter->total) * 100)); | ||
213 | } | ||
214 | else if (meter->completed % meter->dotnum == 0) | ||
215 | fprintf (stdout, "%s", "."); | ||
216 | |||
217 | if (meter->completed + 1 == meter->total) | ||
218 | fprintf (stdout, "%d%%]\n", 100); | ||
219 | fflush (stdout); | ||
220 | } | ||
221 | meter->completed++; | ||
222 | |||
223 | if (meter->completed == meter->total) | ||
224 | return GNUNET_YES; | ||
225 | if (meter->completed > meter->total) | ||
226 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, "Progress meter overflow!!\n"); | ||
227 | return GNUNET_NO; | ||
228 | } | ||
229 | |||
230 | |||
231 | /** | ||
232 | * Reset progress meter. | ||
233 | * | ||
234 | * @param meter the meter to reset | ||
235 | * | ||
236 | * @return #GNUNET_YES if meter reset, | ||
237 | * #GNUNET_SYSERR on error | ||
238 | */ | ||
239 | static int | ||
240 | reset_meter (struct ProgressMeter *meter) | ||
241 | { | ||
242 | if (meter == NULL) | ||
243 | return GNUNET_SYSERR; | ||
244 | |||
245 | meter->completed = 0; | ||
246 | return GNUNET_YES; | ||
247 | } | ||
248 | |||
249 | |||
250 | /** | ||
251 | * Release resources for meter | ||
252 | * | ||
253 | * @param meter the meter to free | ||
254 | */ | ||
255 | static void | ||
256 | free_meter (struct ProgressMeter *meter) | ||
257 | { | ||
258 | GNUNET_free (meter->startup_string); | ||
259 | GNUNET_free (meter); | ||
260 | } | ||
261 | |||
262 | |||
263 | /** | ||
264 | * Shutdown task. | ||
265 | * | ||
266 | * @param cls NULL | ||
267 | */ | ||
268 | static void | ||
269 | do_shutdown (void *cls) | ||
270 | { | ||
271 | if (NULL != mysql_ctx) | ||
272 | { | ||
273 | GNUNET_MYSQL_context_destroy (mysql_ctx); | ||
274 | mysql_ctx = NULL; | ||
275 | } | ||
276 | if (NULL != meter) | ||
277 | { | ||
278 | free_meter (meter); | ||
279 | meter = NULL; | ||
280 | } | ||
281 | } | ||
282 | |||
283 | |||
284 | /** | ||
285 | * Abort task to run on test timed out. | ||
286 | * | ||
287 | * FIXME: this doesn't actually work, it used to cancel | ||
288 | * the already running 'scan_task', but now that should | ||
289 | * always be NULL and do nothing. We instead need to set | ||
290 | * a global variable and abort scan_task internally, not | ||
291 | * via scheduler. | ||
292 | * | ||
293 | * @param cls NULL | ||
294 | */ | ||
295 | static void | ||
296 | do_abort (void *cls) | ||
297 | { | ||
298 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, "Aborting\n"); | ||
299 | if (NULL != scan_task) | ||
300 | { | ||
301 | GNUNET_SCHEDULER_cancel (scan_task); | ||
302 | scan_task = NULL; | ||
303 | } | ||
304 | result = GNUNET_SYSERR; | ||
305 | GNUNET_SCHEDULER_shutdown (); | ||
306 | } | ||
307 | |||
308 | |||
309 | /** | ||
310 | * Iterator over all states that inserts each state into the MySQL db. | ||
311 | * | ||
312 | * @param cls closure. | ||
313 | * @param key hash for current state. | ||
314 | * @param proof proof for current state. | ||
315 | * @param accepting #GNUNET_YES if this is an accepting state, #GNUNET_NO if not. | ||
316 | * @param num_edges number of edges leaving current state. | ||
317 | * @param edges edges leaving current state. | ||
318 | */ | ||
319 | static void | ||
320 | regex_iterator (void *cls, | ||
321 | const struct GNUNET_HashCode *key, | ||
322 | const char *proof, | ||
323 | int accepting, | ||
324 | unsigned int num_edges, | ||
325 | const struct REGEX_BLOCK_Edge *edges) | ||
326 | { | ||
327 | unsigned int i; | ||
328 | int result; | ||
329 | |||
330 | uint32_t iaccepting = (uint32_t) accepting; | ||
331 | uint64_t total; | ||
332 | |||
333 | GNUNET_assert (NULL != mysql_ctx); | ||
334 | |||
335 | for (i = 0; i < num_edges; i++) | ||
336 | { | ||
337 | struct GNUNET_MY_QueryParam params_select[] = { | ||
338 | GNUNET_MY_query_param_auto_from_type (key), | ||
339 | GNUNET_MY_query_param_string (edges[i].label), | ||
340 | GNUNET_MY_query_param_end | ||
341 | }; | ||
342 | |||
343 | struct GNUNET_MY_ResultSpec results_select[] = { | ||
344 | GNUNET_MY_result_spec_uint64 (&total), | ||
345 | GNUNET_MY_result_spec_end | ||
346 | }; | ||
347 | |||
348 | result = | ||
349 | GNUNET_MY_exec_prepared (mysql_ctx, | ||
350 | select_stmt_handle, | ||
351 | params_select); | ||
352 | |||
353 | if (GNUNET_SYSERR == result) | ||
354 | { | ||
355 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
356 | "Error executing prepared mysql select statement\n"); | ||
357 | GNUNET_SCHEDULER_add_now (&do_abort, NULL); | ||
358 | return; | ||
359 | } | ||
360 | |||
361 | result = | ||
362 | GNUNET_MY_extract_result (select_stmt_handle, | ||
363 | results_select); | ||
364 | |||
365 | if (GNUNET_SYSERR == result) | ||
366 | { | ||
367 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
368 | "Error extracting result mysql select statement\n"); | ||
369 | GNUNET_SCHEDULER_add_now (&do_abort, NULL); | ||
370 | return; | ||
371 | } | ||
372 | |||
373 | if ((-1 != total) && (total > 0) ) | ||
374 | { | ||
375 | GNUNET_log (GNUNET_ERROR_TYPE_INFO, "Total: %llu (%s, %s)\n", | ||
376 | (unsigned long long) total, | ||
377 | GNUNET_h2s (key), edges[i].label); | ||
378 | } | ||
379 | |||
380 | struct GNUNET_MY_QueryParam params_stmt[] = { | ||
381 | GNUNET_MY_query_param_auto_from_type (&key), | ||
382 | GNUNET_MY_query_param_string (edges[i].label), | ||
383 | GNUNET_MY_query_param_auto_from_type (&edges[i].destination), | ||
384 | GNUNET_MY_query_param_uint32 (&iaccepting), | ||
385 | GNUNET_MY_query_param_end | ||
386 | }; | ||
387 | |||
388 | result = | ||
389 | GNUNET_MY_exec_prepared (mysql_ctx, | ||
390 | stmt_handle, | ||
391 | params_stmt); | ||
392 | |||
393 | if (0 == result) | ||
394 | { | ||
395 | char *key_str = GNUNET_strdup (GNUNET_h2s (key)); | ||
396 | char *to_key_str = GNUNET_strdup (GNUNET_h2s (&edges[i].destination)); | ||
397 | |||
398 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Merged (%s, %s, %s, %i)\n", | ||
399 | key_str, | ||
400 | edges[i].label, | ||
401 | to_key_str, | ||
402 | accepting); | ||
403 | |||
404 | GNUNET_free (key_str); | ||
405 | GNUNET_free (to_key_str); | ||
406 | num_merged_transitions++; | ||
407 | } | ||
408 | else if (-1 != total) | ||
409 | { | ||
410 | num_merged_states++; | ||
411 | } | ||
412 | |||
413 | if ((GNUNET_SYSERR == result) || ((1 != result) && (0 != result) )) | ||
414 | { | ||
415 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
416 | "Error executing prepared mysql statement for edge: Affected rows: %i, expected 0 or 1!\n", | ||
417 | result); | ||
418 | GNUNET_SCHEDULER_add_now (&do_abort, NULL); | ||
419 | } | ||
420 | } | ||
421 | |||
422 | if (0 == num_edges) | ||
423 | { | ||
424 | struct GNUNET_MY_QueryParam params_stmt[] = { | ||
425 | GNUNET_MY_query_param_auto_from_type (key), | ||
426 | GNUNET_MY_query_param_string (""), | ||
427 | GNUNET_MY_query_param_fixed_size (NULL, 0), | ||
428 | GNUNET_MY_query_param_uint32 (&iaccepting), | ||
429 | GNUNET_MY_query_param_end | ||
430 | }; | ||
431 | |||
432 | result = | ||
433 | GNUNET_MY_exec_prepared (mysql_ctx, | ||
434 | stmt_handle, | ||
435 | params_stmt); | ||
436 | |||
437 | if ((1 != result) && (0 != result) ) | ||
438 | { | ||
439 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
440 | "Error executing prepared mysql statement for edge: Affected rows: %i, expected 0 or 1!\n", | ||
441 | result); | ||
442 | GNUNET_SCHEDULER_add_now (&do_abort, NULL); | ||
443 | } | ||
444 | } | ||
445 | } | ||
446 | |||
447 | |||
448 | /** | ||
449 | * Announce a regex by creating the DFA and iterating over each state, inserting | ||
450 | * each state into a MySQL database. | ||
451 | * | ||
452 | * @param regex regular expression. | ||
453 | * @return #GNUNET_OK on success, #GNUNET_SYSERR on failure. | ||
454 | */ | ||
455 | static int | ||
456 | announce_regex (const char *regex) | ||
457 | { | ||
458 | struct REGEX_INTERNAL_Automaton *dfa; | ||
459 | |||
460 | dfa = | ||
461 | REGEX_INTERNAL_construct_dfa (regex, | ||
462 | strlen (regex), | ||
463 | max_path_compression); | ||
464 | |||
465 | if (NULL == dfa) | ||
466 | { | ||
467 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
468 | "Failed to create DFA for regex %s\n", | ||
469 | regex); | ||
470 | GNUNET_SCHEDULER_add_now (&do_abort, NULL); | ||
471 | return GNUNET_SYSERR; | ||
472 | } | ||
473 | REGEX_INTERNAL_iterate_all_edges (dfa, | ||
474 | ®ex_iterator, NULL); | ||
475 | REGEX_INTERNAL_automaton_destroy (dfa); | ||
476 | |||
477 | return GNUNET_OK; | ||
478 | } | ||
479 | |||
480 | |||
481 | /** | ||
482 | * Function called with a filename. | ||
483 | * | ||
484 | * @param cls closure | ||
485 | * @param filename complete filename (absolute path) | ||
486 | * @return #GNUNET_OK to continue to iterate, | ||
487 | * #GNUNET_SYSERR to abort iteration with error! | ||
488 | */ | ||
489 | static int | ||
490 | policy_filename_cb (void *cls, const char *filename) | ||
491 | { | ||
492 | char *regex; | ||
493 | char *data; | ||
494 | char *buf; | ||
495 | uint64_t filesize; | ||
496 | unsigned int offset; | ||
497 | |||
498 | GNUNET_assert (NULL != filename); | ||
499 | |||
500 | GNUNET_log (GNUNET_ERROR_TYPE_INFO, | ||
501 | "Announcing regexes from file %s\n", | ||
502 | filename); | ||
503 | |||
504 | if (GNUNET_YES != GNUNET_DISK_file_test (filename)) | ||
505 | { | ||
506 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, | ||
507 | "Could not find policy file %s\n", | ||
508 | filename); | ||
509 | return GNUNET_OK; | ||
510 | } | ||
511 | if (GNUNET_OK != | ||
512 | GNUNET_DISK_file_size (filename, &filesize, | ||
513 | GNUNET_YES, GNUNET_YES)) | ||
514 | filesize = 0; | ||
515 | if (0 == filesize) | ||
516 | { | ||
517 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, "Policy file %s is empty.\n", | ||
518 | filename); | ||
519 | return GNUNET_OK; | ||
520 | } | ||
521 | data = GNUNET_malloc (filesize); | ||
522 | if (filesize != GNUNET_DISK_fn_read (filename, data, filesize)) | ||
523 | { | ||
524 | GNUNET_free (data); | ||
525 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, | ||
526 | "Could not read policy file %s.\n", | ||
527 | filename); | ||
528 | return GNUNET_OK; | ||
529 | } | ||
530 | |||
531 | update_meter (meter); | ||
532 | |||
533 | buf = data; | ||
534 | offset = 0; | ||
535 | regex = NULL; | ||
536 | while (offset < (filesize - 1)) | ||
537 | { | ||
538 | offset++; | ||
539 | if (((data[offset] == '\n')) && (buf != &data[offset])) | ||
540 | { | ||
541 | data[offset] = '|'; | ||
542 | num_policies++; | ||
543 | buf = &data[offset + 1]; | ||
544 | } | ||
545 | else if ((data[offset] == '\n') || (data[offset] == '\0')) | ||
546 | buf = &data[offset + 1]; | ||
547 | } | ||
548 | data[offset] = '\0'; | ||
549 | GNUNET_asprintf (®ex, "%s(%s)", regex_prefix, data); | ||
550 | GNUNET_assert (NULL != regex); | ||
551 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, | ||
552 | "Announcing regex: %s\n", regex); | ||
553 | |||
554 | if (GNUNET_OK != announce_regex (regex)) | ||
555 | { | ||
556 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
557 | "Could not announce regex %s\n", | ||
558 | regex); | ||
559 | } | ||
560 | GNUNET_free (regex); | ||
561 | GNUNET_free (data); | ||
562 | return GNUNET_OK; | ||
563 | } | ||
564 | |||
565 | |||
566 | /** | ||
567 | * Iterate over files contained in policy_dir. | ||
568 | * | ||
569 | * @param cls NULL | ||
570 | */ | ||
571 | static void | ||
572 | do_directory_scan (void *cls) | ||
573 | { | ||
574 | struct GNUNET_TIME_Absolute start_time; | ||
575 | struct GNUNET_TIME_Relative duration; | ||
576 | char *stmt; | ||
577 | |||
578 | /* Create an MySQL prepared statement for the inserts */ | ||
579 | scan_task = NULL; | ||
580 | GNUNET_asprintf (&stmt, INSERT_EDGE_STMT, table_name); | ||
581 | stmt_handle = GNUNET_MYSQL_statement_prepare (mysql_ctx, stmt); | ||
582 | GNUNET_free (stmt); | ||
583 | |||
584 | GNUNET_asprintf (&stmt, SELECT_KEY_STMT, table_name); | ||
585 | select_stmt_handle = GNUNET_MYSQL_statement_prepare (mysql_ctx, stmt); | ||
586 | GNUNET_free (stmt); | ||
587 | |||
588 | GNUNET_assert (NULL != stmt_handle); | ||
589 | |||
590 | meter = create_meter (num_policy_files, | ||
591 | "Announcing policy files\n", | ||
592 | GNUNET_YES); | ||
593 | start_time = GNUNET_TIME_absolute_get (); | ||
594 | GNUNET_DISK_directory_scan (policy_dir, | ||
595 | &policy_filename_cb, | ||
596 | stmt_handle); | ||
597 | duration = GNUNET_TIME_absolute_get_duration (start_time); | ||
598 | reset_meter (meter); | ||
599 | free_meter (meter); | ||
600 | meter = NULL; | ||
601 | |||
602 | printf ("Announced %u files containing %u policies in %s\n" | ||
603 | "Duplicate transitions: %llu\nMerged states: %llu\n", | ||
604 | num_policy_files, | ||
605 | num_policies, | ||
606 | GNUNET_STRINGS_relative_time_to_string (duration, GNUNET_NO), | ||
607 | num_merged_transitions, | ||
608 | num_merged_states); | ||
609 | result = GNUNET_OK; | ||
610 | GNUNET_SCHEDULER_shutdown (); | ||
611 | } | ||
612 | |||
613 | |||
614 | /** | ||
615 | * Main function that will be run by the scheduler. | ||
616 | * | ||
617 | * @param cls closure | ||
618 | * @param args remaining command-line arguments | ||
619 | * @param cfgfile name of the configuration file used (for saving, can be NULL!) | ||
620 | * @param config configuration | ||
621 | */ | ||
622 | static void | ||
623 | run (void *cls, | ||
624 | char *const *args, | ||
625 | const char *cfgfile, | ||
626 | const struct GNUNET_CONFIGURATION_Handle *config) | ||
627 | { | ||
628 | if (NULL == args[0]) | ||
629 | { | ||
630 | fprintf (stderr, | ||
631 | _ ("No policy directory specified on command line. Exiting.\n")); | ||
632 | result = GNUNET_SYSERR; | ||
633 | return; | ||
634 | } | ||
635 | if (GNUNET_YES != | ||
636 | GNUNET_DISK_directory_test (args[0], GNUNET_YES)) | ||
637 | { | ||
638 | fprintf (stderr, | ||
639 | _ ("Specified policies directory does not exist. Exiting.\n")); | ||
640 | result = GNUNET_SYSERR; | ||
641 | return; | ||
642 | } | ||
643 | policy_dir = args[0]; | ||
644 | |||
645 | num_policy_files = GNUNET_DISK_directory_scan (policy_dir, | ||
646 | NULL, NULL); | ||
647 | meter = NULL; | ||
648 | |||
649 | if (NULL == table_name) | ||
650 | { | ||
651 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, | ||
652 | "No table name specified, using default \"NFA\".\n"); | ||
653 | table_name = "NFA"; | ||
654 | } | ||
655 | |||
656 | mysql_ctx = GNUNET_MYSQL_context_create (config, "regex-mysql"); | ||
657 | if (NULL == mysql_ctx) | ||
658 | { | ||
659 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
660 | "Failed to create mysql context\n"); | ||
661 | result = GNUNET_SYSERR; | ||
662 | return; | ||
663 | } | ||
664 | |||
665 | if (GNUNET_OK != | ||
666 | GNUNET_CONFIGURATION_get_value_string (config, | ||
667 | "regex-mysql", | ||
668 | "REGEX_PREFIX", | ||
669 | ®ex_prefix)) | ||
670 | { | ||
671 | GNUNET_log_config_missing (GNUNET_ERROR_TYPE_ERROR, | ||
672 | "regex-mysql", | ||
673 | "REGEX_PREFIX"); | ||
674 | result = GNUNET_SYSERR; | ||
675 | return; | ||
676 | } | ||
677 | |||
678 | result = GNUNET_OK; | ||
679 | GNUNET_SCHEDULER_add_shutdown (&do_shutdown, | ||
680 | NULL); | ||
681 | scan_task = GNUNET_SCHEDULER_add_now (&do_directory_scan, NULL); | ||
682 | } | ||
683 | |||
684 | |||
685 | /** | ||
686 | * Main function. | ||
687 | * | ||
688 | * @param argc argument count | ||
689 | * @param argv argument values | ||
690 | * @return 0 on success | ||
691 | */ | ||
692 | int | ||
693 | main (int argc, char *const *argv) | ||
694 | { | ||
695 | struct GNUNET_GETOPT_CommandLineOption options[] = { | ||
696 | GNUNET_GETOPT_option_string ('t', | ||
697 | "table", | ||
698 | "TABLENAME", | ||
699 | gettext_noop ( | ||
700 | "name of the table to write DFAs"), | ||
701 | &table_name), | ||
702 | |||
703 | GNUNET_GETOPT_option_uint ('p', | ||
704 | "max-path-compression", | ||
705 | "MAX_PATH_COMPRESSION", | ||
706 | gettext_noop ("maximum path compression length"), | ||
707 | &max_path_compression), | ||
708 | |||
709 | GNUNET_GETOPT_OPTION_END | ||
710 | }; | ||
711 | int ret; | ||
712 | |||
713 | if (GNUNET_OK != GNUNET_STRINGS_get_utf8_args (argc, argv, &argc, &argv)) | ||
714 | return 2; | ||
715 | |||
716 | result = GNUNET_SYSERR; | ||
717 | ret = | ||
718 | GNUNET_PROGRAM_run (argc, argv, | ||
719 | "gnunet-regex-simulationprofiler [OPTIONS] policy-dir", | ||
720 | _ ("Profiler for regex library"), options, &run, NULL); | ||
721 | if (GNUNET_OK != ret) | ||
722 | return ret; | ||
723 | if (GNUNET_OK != result) | ||
724 | return 1; | ||
725 | return 0; | ||
726 | } | ||
diff --git a/src/regex/gnunet-service-regex.c b/src/regex/gnunet-service-regex.c deleted file mode 100644 index 7a5cc1f05..000000000 --- a/src/regex/gnunet-service-regex.c +++ /dev/null | |||
@@ -1,410 +0,0 @@ | |||
1 | /* | ||
2 | This file is part of GNUnet. | ||
3 | Copyright (C) 2013 GNUnet e.V. | ||
4 | |||
5 | GNUnet is free software: you can redistribute it and/or modify it | ||
6 | under the terms of the GNU Affero General Public License as published | ||
7 | by the Free Software Foundation, either version 3 of the License, | ||
8 | or (at your option) any later version. | ||
9 | |||
10 | GNUnet is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | Affero General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU Affero General Public License | ||
16 | along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | |||
18 | SPDX-License-Identifier: AGPL3.0-or-later | ||
19 | */ | ||
20 | |||
21 | /** | ||
22 | * @file regex/gnunet-service-regex.c | ||
23 | * @brief service to advertise capabilities described as regex and to | ||
24 | * lookup capabilities by regex | ||
25 | * @author Christian Grothoff | ||
26 | */ | ||
27 | #include "platform.h" | ||
28 | #include "gnunet_util_lib.h" | ||
29 | #include "regex_internal_lib.h" | ||
30 | #include "regex_ipc.h" | ||
31 | |||
32 | |||
33 | /** | ||
34 | * Information about one of our clients. | ||
35 | */ | ||
36 | struct ClientEntry | ||
37 | { | ||
38 | /** | ||
39 | * Queue for transmissions to @e client. | ||
40 | */ | ||
41 | struct GNUNET_MQ_Handle *mq; | ||
42 | |||
43 | /** | ||
44 | * Handle identifying the client. | ||
45 | */ | ||
46 | struct GNUNET_SERVICE_Client *client; | ||
47 | |||
48 | /** | ||
49 | * Search handle (if this client is searching). | ||
50 | */ | ||
51 | struct REGEX_INTERNAL_Search *sh; | ||
52 | |||
53 | /** | ||
54 | * Announcement handle (if this client is announcing). | ||
55 | */ | ||
56 | struct REGEX_INTERNAL_Announcement *ah; | ||
57 | |||
58 | /** | ||
59 | * Refresh frequency for announcements. | ||
60 | */ | ||
61 | struct GNUNET_TIME_Relative frequency; | ||
62 | |||
63 | /** | ||
64 | * Task for re-announcing. | ||
65 | */ | ||
66 | struct GNUNET_SCHEDULER_Task *refresh_task; | ||
67 | }; | ||
68 | |||
69 | |||
70 | /** | ||
71 | * Connection to the DHT. | ||
72 | */ | ||
73 | static struct GNUNET_DHT_Handle *dht; | ||
74 | |||
75 | /** | ||
76 | * Handle for doing statistics. | ||
77 | */ | ||
78 | static struct GNUNET_STATISTICS_Handle *stats; | ||
79 | |||
80 | /** | ||
81 | * Private key for this peer. | ||
82 | */ | ||
83 | static struct GNUNET_CRYPTO_EddsaPrivateKey *my_private_key; | ||
84 | |||
85 | |||
86 | /** | ||
87 | * Task run during shutdown. | ||
88 | * | ||
89 | * @param cls unused | ||
90 | */ | ||
91 | static void | ||
92 | cleanup_task (void *cls) | ||
93 | { | ||
94 | GNUNET_DHT_disconnect (dht); | ||
95 | dht = NULL; | ||
96 | GNUNET_STATISTICS_destroy (stats, | ||
97 | GNUNET_NO); | ||
98 | stats = NULL; | ||
99 | GNUNET_free (my_private_key); | ||
100 | my_private_key = NULL; | ||
101 | } | ||
102 | |||
103 | |||
104 | /** | ||
105 | * Periodic task to refresh our announcement of the regex. | ||
106 | * | ||
107 | * @param cls the `struct ClientEntry *` of the client that triggered the | ||
108 | * announcement | ||
109 | */ | ||
110 | static void | ||
111 | reannounce (void *cls) | ||
112 | { | ||
113 | struct ClientEntry *ce = cls; | ||
114 | |||
115 | REGEX_INTERNAL_reannounce (ce->ah); | ||
116 | ce->refresh_task = GNUNET_SCHEDULER_add_delayed (ce->frequency, | ||
117 | &reannounce, | ||
118 | ce); | ||
119 | } | ||
120 | |||
121 | |||
122 | /** | ||
123 | * Check ANNOUNCE message. | ||
124 | * | ||
125 | * @param cls identification of the client | ||
126 | * @param am the actual message | ||
127 | * @return #GNUNET_OK if @am is well-formed | ||
128 | */ | ||
129 | static int | ||
130 | check_announce (void *cls, | ||
131 | const struct AnnounceMessage *am) | ||
132 | { | ||
133 | struct ClientEntry *ce = cls; | ||
134 | |||
135 | GNUNET_MQ_check_zero_termination (am); | ||
136 | if (NULL != ce->ah) | ||
137 | { | ||
138 | /* only one announcement per client allowed */ | ||
139 | GNUNET_break (0); | ||
140 | return GNUNET_SYSERR; | ||
141 | } | ||
142 | return GNUNET_OK; | ||
143 | } | ||
144 | |||
145 | |||
146 | /** | ||
147 | * Handle ANNOUNCE message. | ||
148 | * | ||
149 | * @param cls identification of the client | ||
150 | * @param am the actual message | ||
151 | */ | ||
152 | static void | ||
153 | handle_announce (void *cls, | ||
154 | const struct AnnounceMessage *am) | ||
155 | { | ||
156 | struct ClientEntry *ce = cls; | ||
157 | const char *regex; | ||
158 | |||
159 | regex = (const char *) &am[1]; | ||
160 | ce->frequency = GNUNET_TIME_relative_ntoh (am->refresh_delay); | ||
161 | ce->refresh_task = GNUNET_SCHEDULER_add_delayed (ce->frequency, | ||
162 | &reannounce, | ||
163 | ce); | ||
164 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, | ||
165 | "Starting to announce regex `%s' every %s\n", | ||
166 | regex, | ||
167 | GNUNET_STRINGS_relative_time_to_string (ce->frequency, | ||
168 | GNUNET_NO)); | ||
169 | ce->ah = REGEX_INTERNAL_announce (dht, | ||
170 | my_private_key, | ||
171 | regex, | ||
172 | ntohs (am->compression), | ||
173 | stats); | ||
174 | if (NULL == ce->ah) | ||
175 | { | ||
176 | GNUNET_break (0); | ||
177 | GNUNET_SCHEDULER_cancel (ce->refresh_task); | ||
178 | ce->refresh_task = NULL; | ||
179 | GNUNET_SERVICE_client_drop (ce->client); | ||
180 | return; | ||
181 | } | ||
182 | GNUNET_SERVICE_client_continue (ce->client); | ||
183 | } | ||
184 | |||
185 | |||
186 | /** | ||
187 | * Handle result, pass it back to the client. | ||
188 | * | ||
189 | * @param cls the struct ClientEntry of the client searching | ||
190 | * @param id Peer providing a regex that matches the string. | ||
191 | * @param get_path Path of the get request. | ||
192 | * @param get_path_length Length of @a get_path. | ||
193 | * @param put_path Path of the put request. | ||
194 | * @param put_path_length Length of the @a put_path. | ||
195 | */ | ||
196 | static void | ||
197 | handle_search_result (void *cls, | ||
198 | const struct GNUNET_PeerIdentity *id, | ||
199 | const struct GNUNET_PeerIdentity *get_path, | ||
200 | unsigned int get_path_length, | ||
201 | const struct GNUNET_PeerIdentity *put_path, | ||
202 | unsigned int put_path_length) | ||
203 | { | ||
204 | struct ClientEntry *ce = cls; | ||
205 | struct GNUNET_MQ_Envelope *env; | ||
206 | struct ResultMessage *result; | ||
207 | struct GNUNET_PeerIdentity *gp; | ||
208 | uint16_t size; | ||
209 | |||
210 | if ((get_path_length >= 65536) || | ||
211 | (put_path_length >= 65536) || | ||
212 | ( ((get_path_length + put_path_length) * sizeof(struct | ||
213 | GNUNET_PeerIdentity)) | ||
214 | + sizeof(struct ResultMessage) >= GNUNET_MAX_MESSAGE_SIZE) ) | ||
215 | { | ||
216 | GNUNET_break (0); | ||
217 | return; | ||
218 | } | ||
219 | size = (get_path_length + put_path_length) * sizeof(struct | ||
220 | GNUNET_PeerIdentity); | ||
221 | env = GNUNET_MQ_msg_extra (result, | ||
222 | size, | ||
223 | GNUNET_MESSAGE_TYPE_REGEX_RESULT); | ||
224 | result->get_path_length = htons ((uint16_t) get_path_length); | ||
225 | result->put_path_length = htons ((uint16_t) put_path_length); | ||
226 | result->id = *id; | ||
227 | gp = &result->id; | ||
228 | GNUNET_memcpy (&gp[1], | ||
229 | get_path, | ||
230 | get_path_length * sizeof(struct GNUNET_PeerIdentity)); | ||
231 | GNUNET_memcpy (&gp[1 + get_path_length], | ||
232 | put_path, | ||
233 | put_path_length * sizeof(struct GNUNET_PeerIdentity)); | ||
234 | GNUNET_MQ_send (ce->mq, | ||
235 | env); | ||
236 | } | ||
237 | |||
238 | |||
239 | /** | ||
240 | * Check SEARCH message. | ||
241 | * | ||
242 | * @param cls identification of the client | ||
243 | * @param message the actual message | ||
244 | */ | ||
245 | static int | ||
246 | check_search (void *cls, | ||
247 | const struct RegexSearchMessage *sm) | ||
248 | { | ||
249 | struct ClientEntry *ce = cls; | ||
250 | const char *string; | ||
251 | uint16_t size; | ||
252 | |||
253 | size = ntohs (sm->header.size) - sizeof(*sm); | ||
254 | string = (const char *) &sm[1]; | ||
255 | if ('\0' != string[size - 1]) | ||
256 | { | ||
257 | GNUNET_break (0); | ||
258 | return GNUNET_SYSERR; | ||
259 | } | ||
260 | if (NULL != ce->sh) | ||
261 | { | ||
262 | /* only one search allowed per client */ | ||
263 | GNUNET_break (0); | ||
264 | return GNUNET_SYSERR; | ||
265 | } | ||
266 | return GNUNET_OK; | ||
267 | } | ||
268 | |||
269 | |||
270 | /** | ||
271 | * Handle SEARCH message. | ||
272 | * | ||
273 | * @param cls identification of the client | ||
274 | * @param message the actual message | ||
275 | */ | ||
276 | static void | ||
277 | handle_search (void *cls, | ||
278 | const struct RegexSearchMessage *sm) | ||
279 | { | ||
280 | struct ClientEntry *ce = cls; | ||
281 | const char *string; | ||
282 | |||
283 | string = (const char *) &sm[1]; | ||
284 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, | ||
285 | "Starting to search for `%s'\n", | ||
286 | string); | ||
287 | ce->sh = REGEX_INTERNAL_search (dht, | ||
288 | string, | ||
289 | &handle_search_result, | ||
290 | ce, | ||
291 | stats); | ||
292 | if (NULL == ce->sh) | ||
293 | { | ||
294 | GNUNET_break (0); | ||
295 | GNUNET_SERVICE_client_drop (ce->client); | ||
296 | return; | ||
297 | } | ||
298 | GNUNET_SERVICE_client_continue (ce->client); | ||
299 | } | ||
300 | |||
301 | |||
302 | /** | ||
303 | * Process regex requests. | ||
304 | * | ||
305 | * @param cls closure | ||
306 | * @param cfg configuration to use | ||
307 | * @param service the initialized service | ||
308 | */ | ||
309 | static void | ||
310 | run (void *cls, | ||
311 | const struct GNUNET_CONFIGURATION_Handle *cfg, | ||
312 | struct GNUNET_SERVICE_Handle *service) | ||
313 | { | ||
314 | my_private_key = GNUNET_CRYPTO_eddsa_key_create_from_configuration (cfg); | ||
315 | if (NULL == my_private_key) | ||
316 | { | ||
317 | GNUNET_SCHEDULER_shutdown (); | ||
318 | return; | ||
319 | } | ||
320 | dht = GNUNET_DHT_connect (cfg, 1024); | ||
321 | if (NULL == dht) | ||
322 | { | ||
323 | GNUNET_free (my_private_key); | ||
324 | my_private_key = NULL; | ||
325 | GNUNET_SCHEDULER_shutdown (); | ||
326 | return; | ||
327 | } | ||
328 | GNUNET_SCHEDULER_add_shutdown (&cleanup_task, | ||
329 | NULL); | ||
330 | stats = GNUNET_STATISTICS_create ("regex", cfg); | ||
331 | } | ||
332 | |||
333 | |||
334 | /** | ||
335 | * Callback called when a client connects to the service. | ||
336 | * | ||
337 | * @param cls closure for the service | ||
338 | * @param c the new client that connected to the service | ||
339 | * @param mq the message queue used to send messages to the client | ||
340 | * @return @a c | ||
341 | */ | ||
342 | static void * | ||
343 | client_connect_cb (void *cls, | ||
344 | struct GNUNET_SERVICE_Client *c, | ||
345 | struct GNUNET_MQ_Handle *mq) | ||
346 | { | ||
347 | struct ClientEntry *ce; | ||
348 | |||
349 | ce = GNUNET_new (struct ClientEntry); | ||
350 | ce->client = c; | ||
351 | ce->mq = mq; | ||
352 | return ce; | ||
353 | } | ||
354 | |||
355 | |||
356 | /** | ||
357 | * Callback called when a client disconnected from the service | ||
358 | * | ||
359 | * @param cls closure for the service | ||
360 | * @param c the client that disconnected | ||
361 | * @param internal_cls should be equal to @a c | ||
362 | */ | ||
363 | static void | ||
364 | client_disconnect_cb (void *cls, | ||
365 | struct GNUNET_SERVICE_Client *c, | ||
366 | void *internal_cls) | ||
367 | { | ||
368 | struct ClientEntry *ce = internal_cls; | ||
369 | |||
370 | if (NULL != ce->refresh_task) | ||
371 | { | ||
372 | GNUNET_SCHEDULER_cancel (ce->refresh_task); | ||
373 | ce->refresh_task = NULL; | ||
374 | } | ||
375 | if (NULL != ce->ah) | ||
376 | { | ||
377 | REGEX_INTERNAL_announce_cancel (ce->ah); | ||
378 | ce->ah = NULL; | ||
379 | } | ||
380 | if (NULL != ce->sh) | ||
381 | { | ||
382 | REGEX_INTERNAL_search_cancel (ce->sh); | ||
383 | ce->sh = NULL; | ||
384 | } | ||
385 | GNUNET_free (ce); | ||
386 | } | ||
387 | |||
388 | |||
389 | /** | ||
390 | * Define "main" method using service macro. | ||
391 | */ | ||
392 | GNUNET_SERVICE_MAIN | ||
393 | ("regex", | ||
394 | GNUNET_SERVICE_OPTION_NONE, | ||
395 | &run, | ||
396 | &client_connect_cb, | ||
397 | &client_disconnect_cb, | ||
398 | NULL, | ||
399 | GNUNET_MQ_hd_var_size (announce, | ||
400 | GNUNET_MESSAGE_TYPE_REGEX_ANNOUNCE, | ||
401 | struct AnnounceMessage, | ||
402 | NULL), | ||
403 | GNUNET_MQ_hd_var_size (search, | ||
404 | GNUNET_MESSAGE_TYPE_REGEX_SEARCH, | ||
405 | struct RegexSearchMessage, | ||
406 | NULL), | ||
407 | GNUNET_MQ_handler_end ()); | ||
408 | |||
409 | |||
410 | /* end of gnunet-service-regex.c */ | ||
diff --git a/src/regex/perf-data.tar.gz b/src/regex/perf-data.tar.gz deleted file mode 100644 index 9e909e58e..000000000 --- a/src/regex/perf-data.tar.gz +++ /dev/null | |||
Binary files differ | |||
diff --git a/src/regex/perf-regex.c b/src/regex/perf-regex.c deleted file mode 100644 index aea0c054f..000000000 --- a/src/regex/perf-regex.c +++ /dev/null | |||
@@ -1,129 +0,0 @@ | |||
1 | /* | ||
2 | This file is part of GNUnet. | ||
3 | Copyright (C) 2012 GNUnet e.V. | ||
4 | |||
5 | GNUnet is free software: you can redistribute it and/or modify it | ||
6 | under the terms of the GNU Affero General Public License as published | ||
7 | by the Free Software Foundation, either version 3 of the License, | ||
8 | or (at your option) any later version. | ||
9 | |||
10 | GNUnet is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | Affero General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU Affero General Public License | ||
16 | along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | |||
18 | SPDX-License-Identifier: AGPL3.0-or-later | ||
19 | */ | ||
20 | |||
21 | /** | ||
22 | * @file src/regex/perf-regex.c | ||
23 | * @brief Test how long it takes to create a automaton from a string regex. | ||
24 | * @author Bartlomiej Polot | ||
25 | */ | ||
26 | #include <regex.h> | ||
27 | #include <time.h> | ||
28 | #include "platform.h" | ||
29 | #include "regex_internal_lib.h" | ||
30 | #include "regex_test_lib.h" | ||
31 | |||
32 | |||
33 | /** | ||
34 | * Print information about the given node and its edges | ||
35 | * to stdout. | ||
36 | * | ||
37 | * @param cls closure, unused. | ||
38 | * @param key hash for current state. | ||
39 | * @param proof proof for current state. | ||
40 | * @param accepting GNUNET_YES if this is an accepting state, GNUNET_NO if not. | ||
41 | * @param num_edges number of edges leaving current state. | ||
42 | * @param edges edges leaving current state. | ||
43 | */ | ||
44 | static void | ||
45 | print_edge (void *cls, | ||
46 | const struct GNUNET_HashCode *key, | ||
47 | const char *proof, | ||
48 | int accepting, | ||
49 | unsigned int num_edges, | ||
50 | const struct REGEX_BLOCK_Edge *edges) | ||
51 | { | ||
52 | unsigned int i; | ||
53 | |||
54 | printf ("%s: %s, proof: `%s'\n", | ||
55 | GNUNET_h2s (key), | ||
56 | accepting ? "ACCEPTING" : "", | ||
57 | proof); | ||
58 | for (i = 0; i < num_edges; i++) | ||
59 | printf (" `%s': %s\n", | ||
60 | edges[i].label, | ||
61 | GNUNET_h2s (&edges[i].destination)); | ||
62 | } | ||
63 | |||
64 | |||
65 | /** | ||
66 | * The main function of the regex performance test. | ||
67 | * | ||
68 | * Read a set of regex from a file, combine them and create a DFA from the | ||
69 | * resulting combined regex. | ||
70 | * | ||
71 | * @param argc number of arguments from the command line | ||
72 | * @param argv command line arguments | ||
73 | * @return 0 ok, 1 on error | ||
74 | */ | ||
75 | int | ||
76 | main (int argc, char *const *argv) | ||
77 | { | ||
78 | struct REGEX_INTERNAL_Automaton*dfa; | ||
79 | char **regexes; | ||
80 | char *buffer; | ||
81 | char *regex; | ||
82 | int compression; | ||
83 | unsigned int alphabet_size; | ||
84 | long size; | ||
85 | |||
86 | GNUNET_log_setup ("perf-regex", "DEBUG", NULL); | ||
87 | if (4 != argc) | ||
88 | { | ||
89 | fprintf (stderr, | ||
90 | "Usage: %s REGEX_FILE ALPHABET_SIZE COMPRESSION\n", | ||
91 | argv[0]); | ||
92 | return 1; | ||
93 | } | ||
94 | regexes = REGEX_TEST_read_from_file (argv[1]); | ||
95 | if (NULL == regexes) | ||
96 | { | ||
97 | fprintf (stderr, | ||
98 | "Failed to read regexes from `%s'\n", | ||
99 | argv[1]); | ||
100 | return 2; | ||
101 | } | ||
102 | alphabet_size = atoi (argv[2]); | ||
103 | compression = atoi (argv[3]); | ||
104 | printf ("********* PERF-REGEX *********'\n"); | ||
105 | printf ("Using:\n file '%s'\n Alphabet size %u\n compression %d\n", | ||
106 | argv[1], alphabet_size, compression); | ||
107 | fflush (stdout); | ||
108 | buffer = REGEX_TEST_combine (regexes, alphabet_size); | ||
109 | GNUNET_asprintf (®ex, "GNUNET_REGEX_PROFILER_(%s)(0|1)*", buffer); | ||
110 | size = strlen (regex); | ||
111 | |||
112 | fprintf (stderr, | ||
113 | "Combined regex (%ld bytes):\n%s\n", | ||
114 | size, | ||
115 | regex); | ||
116 | dfa = REGEX_INTERNAL_construct_dfa (regex, size, compression); | ||
117 | printf ("********* ALL EDGES *********'\n"); | ||
118 | REGEX_INTERNAL_iterate_all_edges (dfa, &print_edge, NULL); | ||
119 | printf ("\n\n********* REACHABLE EDGES *********'\n"); | ||
120 | REGEX_INTERNAL_iterate_reachable_edges (dfa, &print_edge, NULL); | ||
121 | REGEX_INTERNAL_automaton_destroy (dfa); | ||
122 | GNUNET_free (buffer); | ||
123 | REGEX_TEST_free_from_file (regexes); | ||
124 | GNUNET_free (regex); | ||
125 | return 0; | ||
126 | } | ||
127 | |||
128 | |||
129 | /* end of prof-regex.c */ | ||
diff --git a/src/regex/plugin_block_regex.c b/src/regex/plugin_block_regex.c deleted file mode 100644 index ad897493f..000000000 --- a/src/regex/plugin_block_regex.c +++ /dev/null | |||
@@ -1,412 +0,0 @@ | |||
1 | /* | ||
2 | This file is part of GNUnet | ||
3 | Copyright (C) 2013 GNUnet e.V. | ||
4 | |||
5 | GNUnet is free software: you can redistribute it and/or modify it | ||
6 | under the terms of the GNU Affero General Public License as published | ||
7 | by the Free Software Foundation, either version 3 of the License, | ||
8 | or (at your option) any later version. | ||
9 | |||
10 | GNUnet is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | Affero General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU Affero General Public License | ||
16 | along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | |||
18 | SPDX-License-Identifier: AGPL3.0-or-later | ||
19 | */ | ||
20 | |||
21 | /** | ||
22 | * @file regex/plugin_block_regex.c | ||
23 | * @brief blocks used for regex storage and search | ||
24 | * @author Bartlomiej Polot | ||
25 | */ | ||
26 | #include "platform.h" | ||
27 | #include "gnunet_block_plugin.h" | ||
28 | #include "gnunet_block_group_lib.h" | ||
29 | #include "block_regex.h" | ||
30 | #include "regex_block_lib.h" | ||
31 | #include "gnunet_signatures.h" | ||
32 | |||
33 | |||
34 | /** | ||
35 | * Number of bits we set per entry in the bloomfilter. | ||
36 | * Do not change! | ||
37 | */ | ||
38 | #define BLOOMFILTER_K 16 | ||
39 | |||
40 | |||
41 | /** | ||
42 | * How big is the BF we use for REGEX blocks? | ||
43 | */ | ||
44 | #define REGEX_BF_SIZE 8 | ||
45 | |||
46 | |||
47 | /** | ||
48 | * Create a new block group. | ||
49 | * | ||
50 | * @param ctx block context in which the block group is created | ||
51 | * @param type type of the block for which we are creating the group | ||
52 | * @param nonce random value used to seed the group creation | ||
53 | * @param raw_data optional serialized prior state of the group, NULL if unavailable/fresh | ||
54 | * @param raw_data_size number of bytes in @a raw_data, 0 if unavailable/fresh | ||
55 | * @param va variable arguments specific to @a type | ||
56 | * @return block group handle, NULL if block groups are not supported | ||
57 | * by this @a type of block (this is not an error) | ||
58 | */ | ||
59 | static struct GNUNET_BLOCK_Group * | ||
60 | block_plugin_regex_create_group (void *cls, | ||
61 | enum GNUNET_BLOCK_Type type, | ||
62 | uint32_t nonce, | ||
63 | const void *raw_data, | ||
64 | size_t raw_data_size, | ||
65 | va_list va) | ||
66 | { | ||
67 | unsigned int bf_size; | ||
68 | const char *guard; | ||
69 | |||
70 | guard = va_arg (va, const char *); | ||
71 | if (0 == strcmp (guard, | ||
72 | "seen-set-size")) | ||
73 | bf_size = GNUNET_BLOCK_GROUP_compute_bloomfilter_size (va_arg (va, unsigned | ||
74 | int), | ||
75 | BLOOMFILTER_K); | ||
76 | else if (0 == strcmp (guard, | ||
77 | "filter-size")) | ||
78 | bf_size = va_arg (va, unsigned int); | ||
79 | else | ||
80 | { | ||
81 | GNUNET_break (0); | ||
82 | bf_size = REGEX_BF_SIZE; | ||
83 | } | ||
84 | GNUNET_break (NULL == va_arg (va, const char *)); | ||
85 | return GNUNET_BLOCK_GROUP_bf_create (cls, | ||
86 | bf_size, | ||
87 | BLOOMFILTER_K, | ||
88 | type, | ||
89 | nonce, | ||
90 | raw_data, | ||
91 | raw_data_size); | ||
92 | } | ||
93 | |||
94 | |||
95 | /** | ||
96 | * Function called to validate a reply or a request of type | ||
97 | * #GNUNET_BLOCK_TYPE_REGEX. | ||
98 | * For request evaluation, pass "NULL" for the reply_block. | ||
99 | * Note that it is assumed that the reply has already been | ||
100 | * matched to the key (and signatures checked) as it would | ||
101 | * be done with the #GNUNET_BLOCK_get_key() function. | ||
102 | * | ||
103 | * @param cls closure | ||
104 | * @param type block type | ||
105 | * @param bg block group to evaluate against | ||
106 | * @param eo control flags | ||
107 | * @param query original query (hash) | ||
108 | * @param xquery extrended query data (can be NULL, depending on type) | ||
109 | * @param xquery_size number of bytes in @a xquery | ||
110 | * @param reply_block response to validate | ||
111 | * @param reply_block_size number of bytes in @a reply_block | ||
112 | * @return characterization of result | ||
113 | */ | ||
114 | static enum GNUNET_BLOCK_EvaluationResult | ||
115 | evaluate_block_regex (void *cls, | ||
116 | enum GNUNET_BLOCK_Type type, | ||
117 | struct GNUNET_BLOCK_Group *bg, | ||
118 | enum GNUNET_BLOCK_EvaluationOptions eo, | ||
119 | const struct GNUNET_HashCode *query, | ||
120 | const void *xquery, | ||
121 | size_t xquery_size, | ||
122 | const void *reply_block, | ||
123 | size_t reply_block_size) | ||
124 | { | ||
125 | struct GNUNET_HashCode chash; | ||
126 | |||
127 | if (NULL == reply_block) | ||
128 | { | ||
129 | if (0 != xquery_size) | ||
130 | { | ||
131 | const char *s; | ||
132 | |||
133 | s = (const char *) xquery; | ||
134 | if ('\0' != s[xquery_size - 1]) /* must be valid 0-terminated string */ | ||
135 | { | ||
136 | GNUNET_break_op (0); | ||
137 | return GNUNET_BLOCK_EVALUATION_REQUEST_INVALID; | ||
138 | } | ||
139 | } | ||
140 | return GNUNET_BLOCK_EVALUATION_REQUEST_VALID; | ||
141 | } | ||
142 | if (0 != xquery_size) | ||
143 | { | ||
144 | const char *s; | ||
145 | |||
146 | s = (const char *) xquery; | ||
147 | if ('\0' != s[xquery_size - 1]) /* must be valid 0-terminated string */ | ||
148 | { | ||
149 | GNUNET_break_op (0); | ||
150 | return GNUNET_BLOCK_EVALUATION_REQUEST_INVALID; | ||
151 | } | ||
152 | } | ||
153 | else if (NULL != query) | ||
154 | { | ||
155 | /* xquery is required for regex GETs, at least an empty string */ | ||
156 | GNUNET_break_op (0); | ||
157 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "type %d, query %p, xquery %p\n", | ||
158 | type, query, xquery); | ||
159 | return GNUNET_BLOCK_EVALUATION_REQUEST_INVALID; | ||
160 | } | ||
161 | switch (REGEX_BLOCK_check (reply_block, | ||
162 | reply_block_size, | ||
163 | query, | ||
164 | xquery)) | ||
165 | { | ||
166 | case GNUNET_SYSERR: | ||
167 | GNUNET_break_op (0); | ||
168 | return GNUNET_BLOCK_EVALUATION_RESULT_INVALID; | ||
169 | |||
170 | case GNUNET_NO: | ||
171 | /* xquery mismatch, can happen */ | ||
172 | return GNUNET_BLOCK_EVALUATION_RESULT_IRRELEVANT; | ||
173 | |||
174 | default: | ||
175 | break; | ||
176 | } | ||
177 | GNUNET_CRYPTO_hash (reply_block, | ||
178 | reply_block_size, | ||
179 | &chash); | ||
180 | if (GNUNET_YES == | ||
181 | GNUNET_BLOCK_GROUP_bf_test_and_set (bg, | ||
182 | &chash)) | ||
183 | return GNUNET_BLOCK_EVALUATION_OK_DUPLICATE; | ||
184 | return GNUNET_BLOCK_EVALUATION_OK_MORE; | ||
185 | } | ||
186 | |||
187 | |||
188 | /** | ||
189 | * Function called to validate a reply or a request of type | ||
190 | * #GNUNET_BLOCK_TYPE_REGEX_ACCEPT. | ||
191 | * For request evaluation, pass "NULL" for the reply_block. | ||
192 | * Note that it is assumed that the reply has already been | ||
193 | * matched to the key (and signatures checked) as it would | ||
194 | * be done with the #GNUNET_BLOCK_get_key() function. | ||
195 | * | ||
196 | * @param cls closure | ||
197 | * @param type block type | ||
198 | * @param bg block group to evaluate against | ||
199 | * @param eo control flags | ||
200 | * @param query original query (hash) | ||
201 | * @param xquery extrended query data (can be NULL, depending on type) | ||
202 | * @param xquery_size number of bytes in @a xquery | ||
203 | * @param reply_block response to validate | ||
204 | * @param reply_block_size number of bytes in @a reply_block | ||
205 | * @return characterization of result | ||
206 | */ | ||
207 | static enum GNUNET_BLOCK_EvaluationResult | ||
208 | evaluate_block_regex_accept (void *cls, | ||
209 | enum GNUNET_BLOCK_Type type, | ||
210 | struct GNUNET_BLOCK_Group *bg, | ||
211 | enum GNUNET_BLOCK_EvaluationOptions eo, | ||
212 | const struct GNUNET_HashCode *query, | ||
213 | const void *xquery, | ||
214 | size_t xquery_size, const void *reply_block, | ||
215 | size_t reply_block_size) | ||
216 | { | ||
217 | const struct RegexAcceptBlock *rba; | ||
218 | struct GNUNET_HashCode chash; | ||
219 | |||
220 | if (0 != xquery_size) | ||
221 | { | ||
222 | GNUNET_break_op (0); | ||
223 | return GNUNET_BLOCK_EVALUATION_REQUEST_INVALID; | ||
224 | } | ||
225 | if (NULL == reply_block) | ||
226 | return GNUNET_BLOCK_EVALUATION_REQUEST_VALID; | ||
227 | if (sizeof(struct RegexAcceptBlock) != reply_block_size) | ||
228 | { | ||
229 | GNUNET_break_op (0); | ||
230 | return GNUNET_BLOCK_EVALUATION_RESULT_INVALID; | ||
231 | } | ||
232 | rba = reply_block; | ||
233 | if (ntohl (rba->purpose.size) != | ||
234 | sizeof(struct GNUNET_CRYPTO_EccSignaturePurpose) | ||
235 | + sizeof(struct GNUNET_TIME_AbsoluteNBO) | ||
236 | + sizeof(struct GNUNET_HashCode)) | ||
237 | { | ||
238 | GNUNET_break_op (0); | ||
239 | return GNUNET_BLOCK_EVALUATION_RESULT_INVALID; | ||
240 | } | ||
241 | if (0 == GNUNET_TIME_absolute_get_remaining (GNUNET_TIME_absolute_ntoh ( | ||
242 | rba->expiration_time)). | ||
243 | rel_value_us) | ||
244 | { | ||
245 | /* technically invalid, but can happen without an error, so | ||
246 | we're nice by reporting it as a 'duplicate' */ | ||
247 | return GNUNET_BLOCK_EVALUATION_OK_DUPLICATE; | ||
248 | } | ||
249 | if (GNUNET_OK != | ||
250 | GNUNET_CRYPTO_eddsa_verify_ (GNUNET_SIGNATURE_PURPOSE_REGEX_ACCEPT, | ||
251 | &rba->purpose, | ||
252 | &rba->signature, | ||
253 | &rba->peer.public_key)) | ||
254 | { | ||
255 | GNUNET_break_op (0); | ||
256 | return GNUNET_BLOCK_EVALUATION_RESULT_INVALID; | ||
257 | } | ||
258 | GNUNET_CRYPTO_hash (reply_block, | ||
259 | reply_block_size, | ||
260 | &chash); | ||
261 | if (GNUNET_YES == | ||
262 | GNUNET_BLOCK_GROUP_bf_test_and_set (bg, | ||
263 | &chash)) | ||
264 | return GNUNET_BLOCK_EVALUATION_OK_DUPLICATE; | ||
265 | return GNUNET_BLOCK_EVALUATION_OK_MORE; | ||
266 | } | ||
267 | |||
268 | |||
269 | /** | ||
270 | * Function called to validate a reply or a request. For | ||
271 | * request evaluation, simply pass "NULL" for the reply_block. | ||
272 | * Note that it is assumed that the reply has already been | ||
273 | * matched to the key (and signatures checked) as it would | ||
274 | * be done with the #GNUNET_BLOCK_get_key() function. | ||
275 | * | ||
276 | * @param cls closure | ||
277 | * @param ctx block context | ||
278 | * @param type block type | ||
279 | * @param bg group to evaluate against | ||
280 | * @param eo control flags | ||
281 | * @param query original query (hash) | ||
282 | * @param xquery extrended query data (can be NULL, depending on type) | ||
283 | * @param xquery_size number of bytes in xquery | ||
284 | * @param reply_block response to validate | ||
285 | * @param reply_block_size number of bytes in reply block | ||
286 | * @return characterization of result | ||
287 | */ | ||
288 | static enum GNUNET_BLOCK_EvaluationResult | ||
289 | block_plugin_regex_evaluate (void *cls, | ||
290 | struct GNUNET_BLOCK_Context *ctx, | ||
291 | enum GNUNET_BLOCK_Type type, | ||
292 | struct GNUNET_BLOCK_Group *bg, | ||
293 | enum GNUNET_BLOCK_EvaluationOptions eo, | ||
294 | const struct GNUNET_HashCode *query, | ||
295 | const void *xquery, | ||
296 | size_t xquery_size, | ||
297 | const void *reply_block, | ||
298 | size_t reply_block_size) | ||
299 | { | ||
300 | enum GNUNET_BLOCK_EvaluationResult result; | ||
301 | |||
302 | switch (type) | ||
303 | { | ||
304 | case GNUNET_BLOCK_TYPE_REGEX: | ||
305 | result = evaluate_block_regex (cls, | ||
306 | type, | ||
307 | bg, | ||
308 | eo, | ||
309 | query, | ||
310 | xquery, xquery_size, | ||
311 | reply_block, reply_block_size); | ||
312 | break; | ||
313 | |||
314 | case GNUNET_BLOCK_TYPE_REGEX_ACCEPT: | ||
315 | result = evaluate_block_regex_accept (cls, | ||
316 | type, | ||
317 | bg, | ||
318 | eo, | ||
319 | query, | ||
320 | xquery, xquery_size, | ||
321 | reply_block, reply_block_size); | ||
322 | break; | ||
323 | |||
324 | default: | ||
325 | result = GNUNET_BLOCK_EVALUATION_TYPE_NOT_SUPPORTED; | ||
326 | } | ||
327 | return result; | ||
328 | } | ||
329 | |||
330 | |||
331 | /** | ||
332 | * Function called to obtain the key for a block. | ||
333 | * | ||
334 | * @param cls closure | ||
335 | * @param type block type | ||
336 | * @param block block to get the key for | ||
337 | * @param block_size number of bytes in @a block | ||
338 | * @param key set to the key (query) for the given block | ||
339 | * @return #GNUNET_OK on success, #GNUNET_SYSERR if type not supported | ||
340 | * (or if extracting a key from a block of this type does not work) | ||
341 | */ | ||
342 | static int | ||
343 | block_plugin_regex_get_key (void *cls, | ||
344 | enum GNUNET_BLOCK_Type type, | ||
345 | const void *block, | ||
346 | size_t block_size, | ||
347 | struct GNUNET_HashCode *key) | ||
348 | { | ||
349 | switch (type) | ||
350 | { | ||
351 | case GNUNET_BLOCK_TYPE_REGEX: | ||
352 | if (GNUNET_OK != | ||
353 | REGEX_BLOCK_get_key (block, block_size, | ||
354 | key)) | ||
355 | { | ||
356 | GNUNET_break_op (0); | ||
357 | return GNUNET_NO; | ||
358 | } | ||
359 | return GNUNET_OK; | ||
360 | |||
361 | case GNUNET_BLOCK_TYPE_REGEX_ACCEPT: | ||
362 | if (sizeof(struct RegexAcceptBlock) != block_size) | ||
363 | { | ||
364 | GNUNET_break_op (0); | ||
365 | return GNUNET_NO; | ||
366 | } | ||
367 | *key = ((struct RegexAcceptBlock *) block)->key; | ||
368 | return GNUNET_OK; | ||
369 | |||
370 | default: | ||
371 | GNUNET_break (0); | ||
372 | return GNUNET_SYSERR; | ||
373 | } | ||
374 | } | ||
375 | |||
376 | |||
377 | /** | ||
378 | * Entry point for the plugin. | ||
379 | */ | ||
380 | void * | ||
381 | libgnunet_plugin_block_regex_init (void *cls) | ||
382 | { | ||
383 | static enum GNUNET_BLOCK_Type types[] = { | ||
384 | GNUNET_BLOCK_TYPE_REGEX, | ||
385 | GNUNET_BLOCK_TYPE_REGEX_ACCEPT, | ||
386 | GNUNET_BLOCK_TYPE_ANY /* end of list */ | ||
387 | }; | ||
388 | struct GNUNET_BLOCK_PluginFunctions *api; | ||
389 | |||
390 | api = GNUNET_new (struct GNUNET_BLOCK_PluginFunctions); | ||
391 | api->evaluate = &block_plugin_regex_evaluate; | ||
392 | api->get_key = &block_plugin_regex_get_key; | ||
393 | api->create_group = &block_plugin_regex_create_group; | ||
394 | api->types = types; | ||
395 | return api; | ||
396 | } | ||
397 | |||
398 | |||
399 | /** | ||
400 | * Exit point from the plugin. | ||
401 | */ | ||
402 | void * | ||
403 | libgnunet_plugin_block_regex_done (void *cls) | ||
404 | { | ||
405 | struct GNUNET_BLOCK_PluginFunctions *api = cls; | ||
406 | |||
407 | GNUNET_free (api); | ||
408 | return NULL; | ||
409 | } | ||
410 | |||
411 | |||
412 | /* end of plugin_block_regex.c */ | ||
diff --git a/src/regex/regex.conf.in b/src/regex/regex.conf.in deleted file mode 100644 index 5e68a43da..000000000 --- a/src/regex/regex.conf.in +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | [regex] | ||
2 | START_ON_DEMAND = @START_ON_DEMAND@ | ||
3 | @UNIXONLY@ PORT = 2107 | ||
4 | UNIXPATH = $GNUNET_RUNTIME_DIR/gnunet-service-regex.sock | ||
5 | HOSTNAME = localhost | ||
6 | BINARY = gnunet-service-regex | ||
7 | ACCEPT_FROM = 127.0.0.1; | ||
8 | ACCEPT_FROM6 = ::1; | ||
diff --git a/src/regex/regex_api_announce.c b/src/regex/regex_api_announce.c deleted file mode 100644 index ad8a4fab9..000000000 --- a/src/regex/regex_api_announce.c +++ /dev/null | |||
@@ -1,186 +0,0 @@ | |||
1 | /* | ||
2 | This file is part of GNUnet | ||
3 | Copyright (C) 2012, 2013, 2016 GNUnet e.V. | ||
4 | |||
5 | GNUnet is free software: you can redistribute it and/or modify it | ||
6 | under the terms of the GNU Affero General Public License as published | ||
7 | by the Free Software Foundation, either version 3 of the License, | ||
8 | or (at your option) any later version. | ||
9 | |||
10 | GNUnet is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | Affero General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU Affero General Public License | ||
16 | along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | |||
18 | SPDX-License-Identifier: AGPL3.0-or-later | ||
19 | */ | ||
20 | /** | ||
21 | * @file regex/regex_api_announce.c | ||
22 | * @brief access regex service to advertise capabilities via regex | ||
23 | * @author Maximilian Szengel | ||
24 | * @author Christian Grothoff | ||
25 | */ | ||
26 | #include "platform.h" | ||
27 | #include "gnunet_protocols.h" | ||
28 | #include "gnunet_util_lib.h" | ||
29 | #include "gnunet_regex_service.h" | ||
30 | #include "regex_ipc.h" | ||
31 | |||
32 | #define LOG(kind, ...) GNUNET_log_from (kind, "regex-api", __VA_ARGS__) | ||
33 | |||
34 | /** | ||
35 | * Handle to store cached data about a regex announce. | ||
36 | */ | ||
37 | struct GNUNET_REGEX_Announcement | ||
38 | { | ||
39 | /** | ||
40 | * Connection to the regex service. | ||
41 | */ | ||
42 | struct GNUNET_MQ_Handle *mq; | ||
43 | |||
44 | /** | ||
45 | * Our configuration. | ||
46 | */ | ||
47 | const struct GNUNET_CONFIGURATION_Handle *cfg; | ||
48 | |||
49 | /** | ||
50 | * Message we're sending to the service. | ||
51 | */ | ||
52 | char *regex; | ||
53 | |||
54 | /** | ||
55 | * Frequency of announcements. | ||
56 | */ | ||
57 | struct GNUNET_TIME_Relative refresh_delay; | ||
58 | |||
59 | /** | ||
60 | * Number of characters per edge. | ||
61 | */ | ||
62 | uint16_t compression; | ||
63 | }; | ||
64 | |||
65 | |||
66 | /** | ||
67 | * (Re)connect to the REGEX service with the given announcement @a a. | ||
68 | * | ||
69 | * @param a REGEX to announce. | ||
70 | */ | ||
71 | static void | ||
72 | announce_reconnect (struct GNUNET_REGEX_Announcement *a); | ||
73 | |||
74 | |||
75 | /** | ||
76 | * We got a disconnect after asking regex to do the announcement. | ||
77 | * Retry. | ||
78 | * | ||
79 | * @param cls the `struct GNUNET_REGEX_Announcement` to retry | ||
80 | * @param error error code | ||
81 | */ | ||
82 | static void | ||
83 | announce_mq_error_handler (void *cls, | ||
84 | enum GNUNET_MQ_Error error) | ||
85 | { | ||
86 | struct GNUNET_REGEX_Announcement *a = cls; | ||
87 | |||
88 | GNUNET_MQ_destroy (a->mq); | ||
89 | a->mq = NULL; | ||
90 | announce_reconnect (a); | ||
91 | } | ||
92 | |||
93 | |||
94 | /** | ||
95 | * (Re)connect to the REGEX service with the given announcement @a a. | ||
96 | * | ||
97 | * @param a REGEX to announce. | ||
98 | */ | ||
99 | static void | ||
100 | announce_reconnect (struct GNUNET_REGEX_Announcement *a) | ||
101 | { | ||
102 | struct GNUNET_MQ_Envelope *env; | ||
103 | struct AnnounceMessage *am; | ||
104 | size_t slen; | ||
105 | |||
106 | a->mq = GNUNET_CLIENT_connect (a->cfg, | ||
107 | "regex", | ||
108 | NULL, | ||
109 | &announce_mq_error_handler, | ||
110 | a); | ||
111 | if (NULL == a->mq) | ||
112 | return; | ||
113 | slen = strlen (a->regex) + 1; | ||
114 | env = GNUNET_MQ_msg_extra (am, | ||
115 | slen, | ||
116 | GNUNET_MESSAGE_TYPE_REGEX_ANNOUNCE); | ||
117 | am->compression = htons (a->compression); | ||
118 | am->reserved = htons (0); | ||
119 | am->refresh_delay = GNUNET_TIME_relative_hton (a->refresh_delay); | ||
120 | GNUNET_memcpy (&am[1], | ||
121 | a->regex, | ||
122 | slen); | ||
123 | GNUNET_MQ_send (a->mq, | ||
124 | env); | ||
125 | } | ||
126 | |||
127 | |||
128 | /** | ||
129 | * Announce the given peer under the given regular expression. | ||
130 | * | ||
131 | * @param cfg configuration to use | ||
132 | * @param regex Regular expression to announce. | ||
133 | * @param refresh_delay after what delay should the announcement be repeated? | ||
134 | * @param compression How many characters per edge can we squeeze? | ||
135 | * @return Handle to reuse o free cached resources. | ||
136 | * Must be freed by calling #GNUNET_REGEX_announce_cancel(). | ||
137 | */ | ||
138 | struct GNUNET_REGEX_Announcement * | ||
139 | GNUNET_REGEX_announce (const struct GNUNET_CONFIGURATION_Handle *cfg, | ||
140 | const char *regex, | ||
141 | struct GNUNET_TIME_Relative refresh_delay, | ||
142 | uint16_t compression) | ||
143 | { | ||
144 | struct GNUNET_REGEX_Announcement *a; | ||
145 | size_t slen; | ||
146 | |||
147 | slen = strlen (regex) + 1; | ||
148 | if (slen + sizeof(struct AnnounceMessage) >= GNUNET_MAX_MESSAGE_SIZE) | ||
149 | { | ||
150 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, | ||
151 | _ ("Regex `%s' is too long!\n"), | ||
152 | regex); | ||
153 | GNUNET_break (0); | ||
154 | return NULL; | ||
155 | } | ||
156 | a = GNUNET_new (struct GNUNET_REGEX_Announcement); | ||
157 | a->cfg = cfg; | ||
158 | a->refresh_delay = refresh_delay; | ||
159 | a->compression = compression; | ||
160 | a->regex = GNUNET_strdup (regex); | ||
161 | announce_reconnect (a); | ||
162 | if (NULL == a->mq) | ||
163 | { | ||
164 | GNUNET_free (a->regex); | ||
165 | GNUNET_free (a); | ||
166 | return NULL; | ||
167 | } | ||
168 | return a; | ||
169 | } | ||
170 | |||
171 | |||
172 | /** | ||
173 | * Stop announcing the regex specified by the given handle. | ||
174 | * | ||
175 | * @param a handle returned by a previous #GNUNET_REGEX_announce() call. | ||
176 | */ | ||
177 | void | ||
178 | GNUNET_REGEX_announce_cancel (struct GNUNET_REGEX_Announcement *a) | ||
179 | { | ||
180 | GNUNET_MQ_destroy (a->mq); | ||
181 | GNUNET_free (a->regex); | ||
182 | GNUNET_free (a); | ||
183 | } | ||
184 | |||
185 | |||
186 | /* end of regex_api_announce.c */ | ||
diff --git a/src/regex/regex_api_search.c b/src/regex/regex_api_search.c deleted file mode 100644 index 960719cbd..000000000 --- a/src/regex/regex_api_search.c +++ /dev/null | |||
@@ -1,250 +0,0 @@ | |||
1 | /* | ||
2 | This file is part of GNUnet | ||
3 | Copyright (C) 2012, 2013, 2016 GNUnet e.V. | ||
4 | |||
5 | GNUnet is free software: you can redistribute it and/or modify it | ||
6 | under the terms of the GNU Affero General Public License as published | ||
7 | by the Free Software Foundation, either version 3 of the License, | ||
8 | or (at your option) any later version. | ||
9 | |||
10 | GNUnet is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | Affero General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU Affero General Public License | ||
16 | along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | |||
18 | SPDX-License-Identifier: AGPL3.0-or-later | ||
19 | */ | ||
20 | /** | ||
21 | * @file regex/regex_api_search.c | ||
22 | * @brief access regex service to discover | ||
23 | * peers using matching strings | ||
24 | * @author Maximilian Szengel | ||
25 | * @author Christian Grothoff | ||
26 | */ | ||
27 | #include "platform.h" | ||
28 | #include "gnunet_protocols.h" | ||
29 | #include "gnunet_util_lib.h" | ||
30 | #include "gnunet_regex_service.h" | ||
31 | #include "regex_ipc.h" | ||
32 | |||
33 | #define LOG(kind, ...) GNUNET_log_from (kind, "regex-api", __VA_ARGS__) | ||
34 | |||
35 | |||
36 | /** | ||
37 | * Handle to store data about a regex search. | ||
38 | */ | ||
39 | struct GNUNET_REGEX_Search | ||
40 | { | ||
41 | /** | ||
42 | * Connection to the regex service. | ||
43 | */ | ||
44 | struct GNUNET_MQ_Handle *mq; | ||
45 | |||
46 | /** | ||
47 | * Our configuration. | ||
48 | */ | ||
49 | const struct GNUNET_CONFIGURATION_Handle *cfg; | ||
50 | |||
51 | /** | ||
52 | * Function to call with results. | ||
53 | */ | ||
54 | GNUNET_REGEX_Found callback; | ||
55 | |||
56 | /** | ||
57 | * Closure for @e callback. | ||
58 | */ | ||
59 | void *callback_cls; | ||
60 | |||
61 | /** | ||
62 | * Search string to transmit to the service. | ||
63 | */ | ||
64 | char *string; | ||
65 | }; | ||
66 | |||
67 | |||
68 | /** | ||
69 | * (Re)connect to the REGEX service for the given search @a s. | ||
70 | * | ||
71 | * @param s context for the search search for | ||
72 | */ | ||
73 | static void | ||
74 | search_reconnect (struct GNUNET_REGEX_Search *s); | ||
75 | |||
76 | |||
77 | /** | ||
78 | * We got a response or disconnect after asking regex | ||
79 | * to do the search. Check it is well-formed. | ||
80 | * | ||
81 | * @param cls the `struct GNUNET_REGEX_Search` to handle reply for | ||
82 | * @param result the message | ||
83 | * @return #GNUNET_SYSERR if @a rm is not well-formed. | ||
84 | */ | ||
85 | static int | ||
86 | check_search_response (void *cls, | ||
87 | const struct ResultMessage *result) | ||
88 | { | ||
89 | uint16_t size = ntohs (result->header.size) - sizeof(*result); | ||
90 | uint16_t gpl = ntohs (result->get_path_length); | ||
91 | uint16_t ppl = ntohs (result->put_path_length); | ||
92 | |||
93 | if (size != (gpl + ppl) * sizeof(struct GNUNET_PeerIdentity)) | ||
94 | { | ||
95 | GNUNET_break (0); | ||
96 | return GNUNET_SYSERR; | ||
97 | } | ||
98 | return GNUNET_OK; | ||
99 | } | ||
100 | |||
101 | |||
102 | /** | ||
103 | * We got a response or disconnect after asking regex | ||
104 | * to do the search. Handle it. | ||
105 | * | ||
106 | * @param cls the `struct GNUNET_REGEX_Search` to handle reply for | ||
107 | * @param result the message | ||
108 | */ | ||
109 | static void | ||
110 | handle_search_response (void *cls, | ||
111 | const struct ResultMessage *result) | ||
112 | { | ||
113 | struct GNUNET_REGEX_Search *s = cls; | ||
114 | uint16_t gpl = ntohs (result->get_path_length); | ||
115 | uint16_t ppl = ntohs (result->put_path_length); | ||
116 | const struct GNUNET_PeerIdentity *pid; | ||
117 | |||
118 | pid = &result->id; | ||
119 | LOG (GNUNET_ERROR_TYPE_DEBUG, | ||
120 | "Got regex result %s\n", | ||
121 | GNUNET_i2s (pid)); | ||
122 | s->callback (s->callback_cls, | ||
123 | pid, | ||
124 | &pid[1], | ||
125 | gpl, | ||
126 | &pid[1 + gpl], | ||
127 | ppl); | ||
128 | } | ||
129 | |||
130 | |||
131 | /** | ||
132 | * We got a disconnect after asking regex to do the announcement. | ||
133 | * Retry. | ||
134 | * | ||
135 | * @param cls the `struct GNUNET_REGEX_Search` to retry | ||
136 | * @param error error code | ||
137 | */ | ||
138 | static void | ||
139 | mq_error_handler (void *cls, | ||
140 | enum GNUNET_MQ_Error error) | ||
141 | { | ||
142 | struct GNUNET_REGEX_Search *s = cls; | ||
143 | |||
144 | GNUNET_MQ_destroy (s->mq); | ||
145 | s->mq = NULL; | ||
146 | search_reconnect (s); | ||
147 | } | ||
148 | |||
149 | |||
150 | /** | ||
151 | * (Re)connect to the REGEX service for the given search @a s. | ||
152 | * | ||
153 | * @param s context for the search search for | ||
154 | */ | ||
155 | static void | ||
156 | search_reconnect (struct GNUNET_REGEX_Search *s) | ||
157 | { | ||
158 | struct GNUNET_MQ_MessageHandler handlers[] = { | ||
159 | GNUNET_MQ_hd_var_size (search_response, | ||
160 | GNUNET_MESSAGE_TYPE_REGEX_RESULT, | ||
161 | struct ResultMessage, | ||
162 | s), | ||
163 | GNUNET_MQ_handler_end () | ||
164 | }; | ||
165 | size_t slen = strlen (s->string) + 1; | ||
166 | struct GNUNET_MQ_Envelope *env; | ||
167 | struct RegexSearchMessage *rsm; | ||
168 | |||
169 | GNUNET_assert (NULL == s->mq); | ||
170 | s->mq = GNUNET_CLIENT_connect (s->cfg, | ||
171 | "regex", | ||
172 | handlers, | ||
173 | &mq_error_handler, | ||
174 | s); | ||
175 | if (NULL == s->mq) | ||
176 | return; | ||
177 | env = GNUNET_MQ_msg_extra (rsm, | ||
178 | slen, | ||
179 | GNUNET_MESSAGE_TYPE_REGEX_SEARCH); | ||
180 | GNUNET_memcpy (&rsm[1], | ||
181 | s->string, | ||
182 | slen); | ||
183 | GNUNET_MQ_send (s->mq, | ||
184 | env); | ||
185 | } | ||
186 | |||
187 | |||
188 | /** | ||
189 | * Search for a peer offering a regex matching certain string in the DHT. | ||
190 | * The search runs until #GNUNET_REGEX_search_cancel() is called, even if results | ||
191 | * are returned. | ||
192 | * | ||
193 | * @param cfg configuration to use | ||
194 | * @param string String to match against the regexes in the DHT. | ||
195 | * @param callback Callback for found peers. | ||
196 | * @param callback_cls Closure for @c callback. | ||
197 | * @return Handle to stop search and free resources. | ||
198 | * Must be freed by calling #GNUNET_REGEX_search_cancel(). | ||
199 | */ | ||
200 | struct GNUNET_REGEX_Search * | ||
201 | GNUNET_REGEX_search (const struct GNUNET_CONFIGURATION_Handle *cfg, | ||
202 | const char *string, | ||
203 | GNUNET_REGEX_Found callback, | ||
204 | void *callback_cls) | ||
205 | { | ||
206 | struct GNUNET_REGEX_Search *s; | ||
207 | size_t slen = strlen (string) + 1; | ||
208 | |||
209 | if (slen + sizeof(struct RegexSearchMessage) >= GNUNET_MAX_MESSAGE_SIZE) | ||
210 | { | ||
211 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, | ||
212 | _ ("Search string `%s' is too long!\n"), | ||
213 | string); | ||
214 | GNUNET_break (0); | ||
215 | return NULL; | ||
216 | } | ||
217 | LOG (GNUNET_ERROR_TYPE_DEBUG, | ||
218 | "Starting regex search for %s\n", | ||
219 | string); | ||
220 | s = GNUNET_new (struct GNUNET_REGEX_Search); | ||
221 | s->cfg = cfg; | ||
222 | s->string = GNUNET_strdup (string); | ||
223 | s->callback = callback; | ||
224 | s->callback_cls = callback_cls; | ||
225 | search_reconnect (s); | ||
226 | if (NULL == s->mq) | ||
227 | { | ||
228 | GNUNET_free (s->string); | ||
229 | GNUNET_free (s); | ||
230 | return NULL; | ||
231 | } | ||
232 | return s; | ||
233 | } | ||
234 | |||
235 | |||
236 | /** | ||
237 | * Stop search and free all data used by a #GNUNET_REGEX_search() call. | ||
238 | * | ||
239 | * @param s Handle returned by a previous #GNUNET_REGEX_search() call. | ||
240 | */ | ||
241 | void | ||
242 | GNUNET_REGEX_search_cancel (struct GNUNET_REGEX_Search *s) | ||
243 | { | ||
244 | GNUNET_MQ_destroy (s->mq); | ||
245 | GNUNET_free (s->string); | ||
246 | GNUNET_free (s); | ||
247 | } | ||
248 | |||
249 | |||
250 | /* end of regex_api_search.c */ | ||
diff --git a/src/regex/regex_block_lib.c b/src/regex/regex_block_lib.c deleted file mode 100644 index cbfb553ce..000000000 --- a/src/regex/regex_block_lib.c +++ /dev/null | |||
@@ -1,474 +0,0 @@ | |||
1 | /* | ||
2 | This file is part of GNUnet. | ||
3 | Copyright (C) 2012,2013 GNUnet e.V. | ||
4 | |||
5 | GNUnet is free software: you can redistribute it and/or modify it | ||
6 | under the terms of the GNU Affero General Public License as published | ||
7 | by the Free Software Foundation, either version 3 of the License, | ||
8 | or (at your option) any later version. | ||
9 | |||
10 | GNUnet is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | Affero General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU Affero General Public License | ||
16 | along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | |||
18 | SPDX-License-Identifier: AGPL3.0-or-later | ||
19 | */ | ||
20 | /** | ||
21 | * @author Bartlomiej Polot | ||
22 | * @file regex/regex_block_lib.c | ||
23 | * @brief functions for manipulating non-accept blocks stored for | ||
24 | * regex in the DHT | ||
25 | */ | ||
26 | #include "platform.h" | ||
27 | #include "regex_block_lib.h" | ||
28 | #include "gnunet_constants.h" | ||
29 | |||
30 | #define LOG(kind, ...) GNUNET_log_from (kind, "regex-bck", __VA_ARGS__) | ||
31 | |||
32 | GNUNET_NETWORK_STRUCT_BEGIN | ||
33 | |||
34 | /** | ||
35 | * Information for each edge. | ||
36 | */ | ||
37 | struct EdgeInfo | ||
38 | { | ||
39 | /** | ||
40 | * Index of the destination of this edge in the | ||
41 | * unique destinations array. | ||
42 | */ | ||
43 | uint16_t destination_index GNUNET_PACKED; | ||
44 | |||
45 | /** | ||
46 | * Number of bytes the token for this edge takes in the | ||
47 | * token area. | ||
48 | */ | ||
49 | uint16_t token_length GNUNET_PACKED; | ||
50 | }; | ||
51 | |||
52 | |||
53 | /** | ||
54 | * @brief Block to announce a regex state. | ||
55 | */ | ||
56 | struct RegexBlock | ||
57 | { | ||
58 | /** | ||
59 | * Length of the proof regex string. | ||
60 | */ | ||
61 | uint16_t proof_len GNUNET_PACKED; | ||
62 | |||
63 | /** | ||
64 | * Is this state an accepting state? | ||
65 | */ | ||
66 | int16_t is_accepting GNUNET_PACKED; | ||
67 | |||
68 | /** | ||
69 | * Number of edges parting from this state. | ||
70 | */ | ||
71 | uint16_t num_edges GNUNET_PACKED; | ||
72 | |||
73 | /** | ||
74 | * Number of unique destinations reachable from this state. | ||
75 | */ | ||
76 | uint16_t num_destinations GNUNET_PACKED; | ||
77 | |||
78 | /* followed by 'struct GNUNET_HashCode[num_destinations]' */ | ||
79 | |||
80 | /* followed by 'struct EdgeInfo[edge_destination_indices]' */ | ||
81 | |||
82 | /* followed by 'char proof[n_proof]', NOT 0-terminated */ | ||
83 | |||
84 | /* followed by 'char tokens[num_edges][edge_info[k].token_length]'; | ||
85 | essentially all of the tokens one after the other in the | ||
86 | order of the edges; tokens are NOT 0-terminated */ | ||
87 | }; | ||
88 | |||
89 | |||
90 | GNUNET_NETWORK_STRUCT_END | ||
91 | |||
92 | |||
93 | /** | ||
94 | * Test if this block is marked as being an accept state. | ||
95 | * | ||
96 | * @param block block to test | ||
97 | * @param size number of bytes in block | ||
98 | * @return #GNUNET_YES if the block is accepting, #GNUNET_NO if not | ||
99 | */ | ||
100 | int | ||
101 | GNUNET_BLOCK_is_accepting (const struct RegexBlock *block, | ||
102 | size_t size) | ||
103 | { | ||
104 | if (size < sizeof(struct RegexBlock)) | ||
105 | { | ||
106 | GNUNET_break_op (0); | ||
107 | return GNUNET_SYSERR; | ||
108 | } | ||
109 | return ntohs (block->is_accepting); | ||
110 | } | ||
111 | |||
112 | |||
113 | /** | ||
114 | * Check if the given 'proof' matches the given 'key'. | ||
115 | * | ||
116 | * @param proof partial regex of a state | ||
117 | * @param proof_len number of bytes in 'proof' | ||
118 | * @param key hash of a state. | ||
119 | * @return #GNUNET_OK if the proof is valid for the given key. | ||
120 | */ | ||
121 | int | ||
122 | REGEX_BLOCK_check_proof (const char *proof, | ||
123 | size_t proof_len, | ||
124 | const struct GNUNET_HashCode *key) | ||
125 | { | ||
126 | struct GNUNET_HashCode key_check; | ||
127 | |||
128 | if ((NULL == proof) || (NULL == key)) | ||
129 | { | ||
130 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Proof check failed, was NULL.\n"); | ||
131 | return GNUNET_NO; | ||
132 | } | ||
133 | GNUNET_CRYPTO_hash (proof, proof_len, &key_check); | ||
134 | return (0 == | ||
135 | GNUNET_CRYPTO_hash_cmp (key, &key_check)) ? GNUNET_OK : GNUNET_NO; | ||
136 | } | ||
137 | |||
138 | |||
139 | /** | ||
140 | * Struct to keep track of the xquery while iterating all the edges in a block. | ||
141 | */ | ||
142 | struct CheckEdgeContext | ||
143 | { | ||
144 | /** | ||
145 | * Xquery: string we are looking for. | ||
146 | */ | ||
147 | const char *xquery; | ||
148 | |||
149 | /** | ||
150 | * Has any edge matched the xquery so far? (GNUNET_OK / GNUNET_NO) | ||
151 | */ | ||
152 | int found; | ||
153 | }; | ||
154 | |||
155 | |||
156 | /** | ||
157 | * Iterator over all edges in a block, checking for a presence of a given query. | ||
158 | * | ||
159 | * @param cls Closure, (xquery context). | ||
160 | * @param token Token that follows to next state. | ||
161 | * @param len Length of token. | ||
162 | * @param key Hash of next state. | ||
163 | * | ||
164 | * @return #GNUNET_YES, to keep iterating | ||
165 | */ | ||
166 | static int | ||
167 | check_edge (void *cls, | ||
168 | const char *token, | ||
169 | size_t len, | ||
170 | const struct GNUNET_HashCode *key) | ||
171 | { | ||
172 | struct CheckEdgeContext *ctx = cls; | ||
173 | |||
174 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, | ||
175 | "edge %.*s [%u]: %s\n", | ||
176 | (int) len, | ||
177 | token, | ||
178 | (unsigned int) len, | ||
179 | GNUNET_h2s (key)); | ||
180 | if (NULL == ctx->xquery) | ||
181 | return GNUNET_YES; | ||
182 | if (strlen (ctx->xquery) < len) | ||
183 | return GNUNET_YES; /* too long */ | ||
184 | if (0 == strncmp (ctx->xquery, token, len)) | ||
185 | ctx->found = GNUNET_OK; | ||
186 | return GNUNET_YES; /* keep checking for malformed data! */ | ||
187 | } | ||
188 | |||
189 | |||
190 | /** | ||
191 | * Check if the regex block is well formed, including all edges. | ||
192 | * | ||
193 | * @param block The start of the block. | ||
194 | * @param size The size of the block. | ||
195 | * @param query the query for the block | ||
196 | * @param xquery String describing the edge we are looking for. | ||
197 | * Can be NULL in case this is a put block. | ||
198 | * @return #GNUNET_OK in case it's fine. | ||
199 | * #GNUNET_NO in case the xquery exists and is not found (IRRELEVANT). | ||
200 | * #GNUNET_SYSERR if the block is invalid. | ||
201 | */ | ||
202 | int | ||
203 | REGEX_BLOCK_check (const struct RegexBlock *block, | ||
204 | size_t size, | ||
205 | const struct GNUNET_HashCode *query, | ||
206 | const char *xquery) | ||
207 | { | ||
208 | struct GNUNET_HashCode key; | ||
209 | struct CheckEdgeContext ctx; | ||
210 | int res; | ||
211 | |||
212 | LOG (GNUNET_ERROR_TYPE_DEBUG, | ||
213 | "Block check\n"); | ||
214 | if (GNUNET_OK != | ||
215 | REGEX_BLOCK_get_key (block, size, | ||
216 | &key)) | ||
217 | { | ||
218 | GNUNET_break_op (0); | ||
219 | return GNUNET_SYSERR; | ||
220 | } | ||
221 | if ((NULL != query) && | ||
222 | (0 != GNUNET_memcmp (&key, | ||
223 | query)) ) | ||
224 | { | ||
225 | GNUNET_break_op (0); | ||
226 | return GNUNET_SYSERR; | ||
227 | } | ||
228 | if ((GNUNET_YES == ntohs (block->is_accepting)) && | ||
229 | ((NULL == xquery) || ('\0' == xquery[0]))) | ||
230 | { | ||
231 | LOG (GNUNET_ERROR_TYPE_DEBUG, | ||
232 | " out! Is accepting: %u, xquery %p\n", | ||
233 | ntohs (block->is_accepting), | ||
234 | xquery); | ||
235 | return GNUNET_OK; | ||
236 | } | ||
237 | ctx.xquery = xquery; | ||
238 | ctx.found = GNUNET_NO; | ||
239 | res = REGEX_BLOCK_iterate (block, size, &check_edge, &ctx); | ||
240 | if (GNUNET_SYSERR == res) | ||
241 | return GNUNET_SYSERR; | ||
242 | if (NULL == xquery) | ||
243 | return GNUNET_YES; | ||
244 | LOG (GNUNET_ERROR_TYPE_DEBUG, "Result %d\n", ctx.found); | ||
245 | return ctx.found; | ||
246 | } | ||
247 | |||
248 | |||
249 | /** | ||
250 | * Obtain the key that a particular block is to be stored under. | ||
251 | * | ||
252 | * @param block block to get the key from | ||
253 | * @param block_len number of bytes in block | ||
254 | * @param key where to store the key | ||
255 | * @return #GNUNET_OK on success, #GNUNET_SYSERR if the block is malformed | ||
256 | */ | ||
257 | int | ||
258 | REGEX_BLOCK_get_key (const struct RegexBlock *block, | ||
259 | size_t block_len, | ||
260 | struct GNUNET_HashCode *key) | ||
261 | { | ||
262 | uint16_t len; | ||
263 | const struct GNUNET_HashCode *destinations; | ||
264 | const struct EdgeInfo *edges; | ||
265 | uint16_t num_destinations; | ||
266 | uint16_t num_edges; | ||
267 | size_t total; | ||
268 | |||
269 | if (block_len < sizeof(struct RegexBlock)) | ||
270 | { | ||
271 | GNUNET_break_op (0); | ||
272 | return GNUNET_SYSERR; | ||
273 | } | ||
274 | num_destinations = ntohs (block->num_destinations); | ||
275 | num_edges = ntohs (block->num_edges); | ||
276 | len = ntohs (block->proof_len); | ||
277 | destinations = (const struct GNUNET_HashCode *) &block[1]; | ||
278 | edges = (const struct EdgeInfo *) &destinations[num_destinations]; | ||
279 | total = sizeof(struct RegexBlock) + num_destinations * sizeof(struct | ||
280 | GNUNET_HashCode) | ||
281 | + num_edges * sizeof(struct EdgeInfo) + len; | ||
282 | if (block_len < total) | ||
283 | { | ||
284 | GNUNET_break_op (0); | ||
285 | return GNUNET_SYSERR; | ||
286 | } | ||
287 | GNUNET_CRYPTO_hash (&edges[num_edges], len, key); | ||
288 | return GNUNET_OK; | ||
289 | } | ||
290 | |||
291 | |||
292 | /** | ||
293 | * Iterate over all edges of a block of a regex state. | ||
294 | * | ||
295 | * @param block Block to iterate over. | ||
296 | * @param size Size of @a block. | ||
297 | * @param iterator Function to call on each edge in the block. | ||
298 | * @param iter_cls Closure for the @a iterator. | ||
299 | * @return #GNUNET_SYSERR if an error has been encountered. | ||
300 | * #GNUNET_OK if no error has been encountered. | ||
301 | * Note that if the iterator stops the iteration by returning | ||
302 | * #GNUNET_NO, the block will no longer be checked for further errors. | ||
303 | * The return value will be GNUNET_OK meaning that no errors were | ||
304 | * found until the edge last notified to the iterator, but there might | ||
305 | * be errors in further edges. | ||
306 | */ | ||
307 | int | ||
308 | REGEX_BLOCK_iterate (const struct RegexBlock *block, | ||
309 | size_t size, | ||
310 | REGEX_INTERNAL_EgdeIterator iterator, | ||
311 | void *iter_cls) | ||
312 | { | ||
313 | uint16_t len; | ||
314 | const struct GNUNET_HashCode *destinations; | ||
315 | const struct EdgeInfo *edges; | ||
316 | const char *aux; | ||
317 | uint16_t num_destinations; | ||
318 | uint16_t num_edges; | ||
319 | size_t total; | ||
320 | unsigned int n; | ||
321 | size_t off; | ||
322 | |||
323 | LOG (GNUNET_ERROR_TYPE_DEBUG, "Block iterate\n"); | ||
324 | if (size < sizeof(struct RegexBlock)) | ||
325 | { | ||
326 | GNUNET_break_op (0); | ||
327 | return GNUNET_SYSERR; | ||
328 | } | ||
329 | num_destinations = ntohs (block->num_destinations); | ||
330 | num_edges = ntohs (block->num_edges); | ||
331 | len = ntohs (block->proof_len); | ||
332 | destinations = (const struct GNUNET_HashCode *) &block[1]; | ||
333 | edges = (const struct EdgeInfo *) &destinations[num_destinations]; | ||
334 | aux = (const char *) &edges[num_edges]; | ||
335 | total = sizeof(struct RegexBlock) + num_destinations * sizeof(struct | ||
336 | GNUNET_HashCode) | ||
337 | + num_edges * sizeof(struct EdgeInfo) + len; | ||
338 | if (size < total) | ||
339 | { | ||
340 | GNUNET_break_op (0); | ||
341 | return GNUNET_SYSERR; | ||
342 | } | ||
343 | for (n = 0; n < num_edges; n++) | ||
344 | total += ntohs (edges[n].token_length); | ||
345 | if (size != total) | ||
346 | { | ||
347 | fprintf (stderr, "Expected %u, got %u\n", | ||
348 | (unsigned int) size, | ||
349 | (unsigned int) total); | ||
350 | GNUNET_break_op (0); | ||
351 | return GNUNET_SYSERR; | ||
352 | } | ||
353 | off = len; | ||
354 | LOG (GNUNET_ERROR_TYPE_DEBUG, | ||
355 | "Start iterating block of size %lu, proof %u, off %lu edges %u\n", | ||
356 | (unsigned long) size, len, (unsigned long) off, n); | ||
357 | /* &aux[off] always points to our token */ | ||
358 | for (n = 0; n < num_edges; n++) | ||
359 | { | ||
360 | LOG (GNUNET_ERROR_TYPE_DEBUG, | ||
361 | "Edge %u/%u, off %lu tokenlen %u (%.*s)\n", | ||
362 | n + 1, num_edges, (unsigned long) off, | ||
363 | ntohs (edges[n].token_length), ntohs (edges[n].token_length), | ||
364 | &aux[off]); | ||
365 | if (NULL != iterator) | ||
366 | if (GNUNET_NO == iterator (iter_cls, | ||
367 | &aux[off], | ||
368 | ntohs (edges[n].token_length), | ||
369 | &destinations[ntohs ( | ||
370 | edges[n].destination_index)])) | ||
371 | return GNUNET_OK; | ||
372 | off += ntohs (edges[n].token_length); | ||
373 | } | ||
374 | return GNUNET_OK; | ||
375 | } | ||
376 | |||
377 | |||
378 | /** | ||
379 | * Construct a regex block to be stored in the DHT. | ||
380 | * | ||
381 | * @param proof proof string for the block | ||
382 | * @param num_edges number of edges in the block | ||
383 | * @param edges the edges of the block | ||
384 | * @param accepting is this an accepting state | ||
385 | * @param rsize set to the size of the returned block (OUT-only) | ||
386 | * @return the regex block, NULL on error | ||
387 | */ | ||
388 | struct RegexBlock * | ||
389 | REGEX_BLOCK_create (const char *proof, | ||
390 | unsigned int num_edges, | ||
391 | const struct REGEX_BLOCK_Edge *edges, | ||
392 | int accepting, | ||
393 | size_t *rsize) | ||
394 | { | ||
395 | struct RegexBlock *block; | ||
396 | struct GNUNET_HashCode destinations[1024]; /* 1024 = 64k/64 bytes/key == absolute MAX */ | ||
397 | uint16_t destination_indices[num_edges]; | ||
398 | struct GNUNET_HashCode *dests; | ||
399 | struct EdgeInfo *edgeinfos; | ||
400 | size_t off; | ||
401 | size_t len; | ||
402 | size_t total; | ||
403 | size_t slen; | ||
404 | unsigned int unique_destinations; | ||
405 | unsigned int j; | ||
406 | unsigned int i; | ||
407 | char *aux; | ||
408 | |||
409 | len = strlen (proof); | ||
410 | if (len > UINT16_MAX) | ||
411 | { | ||
412 | GNUNET_break (0); | ||
413 | return NULL; | ||
414 | } | ||
415 | unique_destinations = 0; | ||
416 | total = sizeof(struct RegexBlock) + len; | ||
417 | for (i = 0; i < num_edges; i++) | ||
418 | { | ||
419 | slen = strlen (edges[i].label); | ||
420 | if (slen > UINT16_MAX) | ||
421 | { | ||
422 | GNUNET_break (0); | ||
423 | return NULL; | ||
424 | } | ||
425 | total += slen; | ||
426 | for (j = 0; j < unique_destinations; j++) | ||
427 | if (0 == memcmp (&destinations[j], | ||
428 | &edges[i].destination, | ||
429 | sizeof(struct GNUNET_HashCode))) | ||
430 | break; | ||
431 | if (j >= 1024) | ||
432 | { | ||
433 | GNUNET_break (0); | ||
434 | return NULL; | ||
435 | } | ||
436 | destination_indices[i] = j; | ||
437 | if (j == unique_destinations) | ||
438 | destinations[unique_destinations++] = edges[i].destination; | ||
439 | } | ||
440 | total += num_edges * sizeof(struct EdgeInfo) + unique_destinations | ||
441 | * sizeof(struct GNUNET_HashCode); | ||
442 | if (total >= GNUNET_CONSTANTS_MAX_BLOCK_SIZE) | ||
443 | { | ||
444 | GNUNET_break (0); | ||
445 | return NULL; | ||
446 | } | ||
447 | block = GNUNET_malloc (total); | ||
448 | block->proof_len = htons (len); | ||
449 | block->is_accepting = htons (accepting); | ||
450 | block->num_edges = htons (num_edges); | ||
451 | block->num_destinations = htons (unique_destinations); | ||
452 | dests = (struct GNUNET_HashCode *) &block[1]; | ||
453 | GNUNET_memcpy (dests, destinations, sizeof(struct GNUNET_HashCode) | ||
454 | * unique_destinations); | ||
455 | edgeinfos = (struct EdgeInfo *) &dests[unique_destinations]; | ||
456 | aux = (char *) &edgeinfos[num_edges]; | ||
457 | off = len; | ||
458 | GNUNET_memcpy (aux, proof, len); | ||
459 | for (i = 0; i < num_edges; i++) | ||
460 | { | ||
461 | slen = strlen (edges[i].label); | ||
462 | edgeinfos[i].token_length = htons ((uint16_t) slen); | ||
463 | edgeinfos[i].destination_index = htons (destination_indices[i]); | ||
464 | GNUNET_memcpy (&aux[off], | ||
465 | edges[i].label, | ||
466 | slen); | ||
467 | off += slen; | ||
468 | } | ||
469 | *rsize = total; | ||
470 | return block; | ||
471 | } | ||
472 | |||
473 | |||
474 | /* end of regex_block_lib.c */ | ||
diff --git a/src/regex/regex_block_lib.h b/src/regex/regex_block_lib.h deleted file mode 100644 index 11029b9af..000000000 --- a/src/regex/regex_block_lib.h +++ /dev/null | |||
@@ -1,193 +0,0 @@ | |||
1 | /* | ||
2 | This file is part of GNUnet. | ||
3 | Copyright (C) 2012,2013 GNUnet e.V. | ||
4 | |||
5 | GNUnet is free software: you can redistribute it and/or modify it | ||
6 | under the terms of the GNU Affero General Public License as published | ||
7 | by the Free Software Foundation, either version 3 of the License, | ||
8 | or (at your option) any later version. | ||
9 | |||
10 | GNUnet is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | Affero General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU Affero General Public License | ||
16 | along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | |||
18 | SPDX-License-Identifier: AGPL3.0-or-later | ||
19 | */ | ||
20 | |||
21 | /** | ||
22 | * @author Bartlomiej Polot | ||
23 | * @file regex/regex_block_lib.h | ||
24 | * @brief common function to manipulate blocks stored by regex in the DHT | ||
25 | */ | ||
26 | |||
27 | #ifndef REGEX_BLOCK_LIB_H_ | ||
28 | #define REGEX_BLOCK_LIB_H_ | ||
29 | |||
30 | #ifdef __cplusplus | ||
31 | extern "C" | ||
32 | { | ||
33 | #if 0 | ||
34 | /* keep Emacsens' auto-indent happy */ | ||
35 | } | ||
36 | #endif | ||
37 | #endif | ||
38 | |||
39 | #include "platform.h" | ||
40 | #include "block_regex.h" | ||
41 | |||
42 | |||
43 | /** | ||
44 | * Representation of a Regex node (and edges) in the DHT. | ||
45 | */ | ||
46 | struct RegexBlock; | ||
47 | |||
48 | |||
49 | /** | ||
50 | * Edge representation. | ||
51 | */ | ||
52 | struct REGEX_BLOCK_Edge | ||
53 | { | ||
54 | /** | ||
55 | * Label of the edge. FIXME: might want to not consume exactly | ||
56 | * multiples of 8 bits, need length! | ||
57 | */ | ||
58 | const char *label; | ||
59 | |||
60 | /** | ||
61 | * Destination of the edge. | ||
62 | */ | ||
63 | struct GNUNET_HashCode destination; | ||
64 | }; | ||
65 | |||
66 | |||
67 | /** | ||
68 | * Check if the given 'proof' matches the given 'key'. | ||
69 | * | ||
70 | * @param proof partial regex of a state | ||
71 | * @param proof_len number of bytes in @a proof | ||
72 | * @param key hash of a state. | ||
73 | * @return #GNUNET_OK if the proof is valid for the given key. | ||
74 | */ | ||
75 | int | ||
76 | REGEX_BLOCK_check_proof (const char *proof, | ||
77 | size_t proof_len, | ||
78 | const struct GNUNET_HashCode *key); | ||
79 | |||
80 | |||
81 | /** | ||
82 | * Check if the regex block is well formed, including all edges. | ||
83 | * | ||
84 | * @param block The start of the block. | ||
85 | * @param size The size of the @a block. | ||
86 | * @param query the query for the @a block | ||
87 | * @param xquery String describing the edge we are looking for. | ||
88 | * Can be NULL in case this is a put block. | ||
89 | * @return #GNUNET_OK in case it's fine. | ||
90 | * #GNUNET_NO in case the xquery exists and is not found (IRRELEVANT). | ||
91 | * #GNUNET_SYSERR if the block is invalid. | ||
92 | */ | ||
93 | int | ||
94 | REGEX_BLOCK_check (const struct RegexBlock *block, | ||
95 | size_t size, | ||
96 | const struct GNUNET_HashCode *query, | ||
97 | const char *xquery); | ||
98 | |||
99 | |||
100 | /* FIXME: might want to use 'struct REGEX_BLOCK_Edge' here instead of 3 arguments! */ | ||
101 | |||
102 | /** | ||
103 | * Iterator over edges in a block. | ||
104 | * | ||
105 | * @param cls Closure. | ||
106 | * @param token Token that follows to next state. | ||
107 | * @param len Length of token. | ||
108 | * @param key Hash of next state. | ||
109 | * @return #GNUNET_YES if should keep iterating, #GNUNET_NO otherwise. | ||
110 | */ | ||
111 | typedef int | ||
112 | (*REGEX_INTERNAL_EgdeIterator)(void *cls, | ||
113 | const char *token, | ||
114 | size_t len, | ||
115 | const struct GNUNET_HashCode *key); | ||
116 | |||
117 | |||
118 | /** | ||
119 | * Iterate over all edges of a block of a regex state. | ||
120 | * | ||
121 | * @param block Block to iterate over. | ||
122 | * @param size Size of block. | ||
123 | * @param iterator Function to call on each edge in the block. | ||
124 | * @param iter_cls Closure for the @a iterator. | ||
125 | * @return #GNUNET_SYSERR if an error has been encountered. | ||
126 | * #GNUNET_OK if no error has been encountered. | ||
127 | * Note that if the iterator stops the iteration by returning | ||
128 | * #GNUNET_NO, the block will no longer be checked for further errors. | ||
129 | * The return value will be #GNUNET_OK meaning that no errors were | ||
130 | * found until the edge last notified to the iterator, but there might | ||
131 | * be errors in further edges. | ||
132 | */ | ||
133 | int | ||
134 | REGEX_BLOCK_iterate (const struct RegexBlock *block, | ||
135 | size_t size, | ||
136 | REGEX_INTERNAL_EgdeIterator iterator, | ||
137 | void *iter_cls); | ||
138 | |||
139 | |||
140 | /** | ||
141 | * Obtain the key that a particular block is to be stored under. | ||
142 | * | ||
143 | * @param block block to get the key from | ||
144 | * @param block_len number of bytes in @a block | ||
145 | * @param key where to store the key | ||
146 | * @return #GNUNET_OK on success, #GNUNET_SYSERR if the block is malformed | ||
147 | */ | ||
148 | int | ||
149 | REGEX_BLOCK_get_key (const struct RegexBlock *block, | ||
150 | size_t block_len, | ||
151 | struct GNUNET_HashCode *key); | ||
152 | |||
153 | |||
154 | /** | ||
155 | * Test if this block is marked as being an accept state. | ||
156 | * | ||
157 | * @param block block to test | ||
158 | * @param size number of bytes in block | ||
159 | * @return #GNUNET_YES if the block is accepting, #GNUNET_NO if not | ||
160 | */ | ||
161 | int | ||
162 | GNUNET_BLOCK_is_accepting (const struct RegexBlock *block, | ||
163 | size_t block_len); | ||
164 | |||
165 | |||
166 | /** | ||
167 | * Construct a regex block to be stored in the DHT. | ||
168 | * | ||
169 | * @param proof proof string for the block | ||
170 | * @param num_edges number of edges in the block | ||
171 | * @param edges the edges of the block | ||
172 | * @param accepting is this an accepting state | ||
173 | * @param rsize set to the size of the returned block (OUT-only) | ||
174 | * @return the regex block, NULL on error | ||
175 | */ | ||
176 | struct RegexBlock * | ||
177 | REGEX_BLOCK_create (const char *proof, | ||
178 | unsigned int num_edges, | ||
179 | const struct REGEX_BLOCK_Edge *edges, | ||
180 | int accepting, | ||
181 | size_t *rsize); | ||
182 | |||
183 | |||
184 | #if 0 /* keep Emacsens' auto-indent happy */ | ||
185 | { | ||
186 | #endif | ||
187 | #ifdef __cplusplus | ||
188 | } | ||
189 | #endif | ||
190 | |||
191 | /* ifndef REGEX_BLOCK_LIB_H */ | ||
192 | #endif | ||
193 | /* end of regex_block_lib.h */ | ||
diff --git a/src/regex/regex_internal.c b/src/regex/regex_internal.c deleted file mode 100644 index aa40851a9..000000000 --- a/src/regex/regex_internal.c +++ /dev/null | |||
@@ -1,3718 +0,0 @@ | |||
1 | /* | ||
2 | This file is part of GNUnet | ||
3 | Copyright (C) 2012 GNUnet e.V. | ||
4 | |||
5 | GNUnet is free software: you can redistribute it and/or modify it | ||
6 | under the terms of the GNU Affero General Public License as published | ||
7 | by the Free Software Foundation, either version 3 of the License, | ||
8 | or (at your option) any later version. | ||
9 | |||
10 | GNUnet is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | Affero General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU Affero General Public License | ||
16 | along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | |||
18 | SPDX-License-Identifier: AGPL3.0-or-later | ||
19 | */ | ||
20 | /** | ||
21 | * @file src/regex/regex_internal.c | ||
22 | * @brief library to create Deterministic Finite Automatons (DFAs) from regular | ||
23 | * expressions (regexes). | ||
24 | * @author Maximilian Szengel | ||
25 | */ | ||
26 | #include "platform.h" | ||
27 | #include "gnunet_util_lib.h" | ||
28 | #include "gnunet_regex_service.h" | ||
29 | #include "regex_internal_lib.h" | ||
30 | #include "regex_internal.h" | ||
31 | |||
32 | |||
33 | /** | ||
34 | * Set this to #GNUNET_YES to enable state naming. Used to debug NFA->DFA | ||
35 | * creation. Disabled by default for better performance. | ||
36 | */ | ||
37 | #define REGEX_DEBUG_DFA GNUNET_NO | ||
38 | |||
39 | /** | ||
40 | * Set of states using MDLL API. | ||
41 | */ | ||
42 | struct REGEX_INTERNAL_StateSet_MDLL | ||
43 | { | ||
44 | /** | ||
45 | * MDLL of states. | ||
46 | */ | ||
47 | struct REGEX_INTERNAL_State *head; | ||
48 | |||
49 | /** | ||
50 | * MDLL of states. | ||
51 | */ | ||
52 | struct REGEX_INTERNAL_State *tail; | ||
53 | |||
54 | /** | ||
55 | * Length of the MDLL. | ||
56 | */ | ||
57 | unsigned int len; | ||
58 | }; | ||
59 | |||
60 | |||
61 | /** | ||
62 | * Append state to the given StateSet. | ||
63 | * | ||
64 | * @param set set to be modified | ||
65 | * @param state state to be appended | ||
66 | */ | ||
67 | static void | ||
68 | state_set_append (struct REGEX_INTERNAL_StateSet *set, | ||
69 | struct REGEX_INTERNAL_State *state) | ||
70 | { | ||
71 | if (set->off == set->size) | ||
72 | GNUNET_array_grow (set->states, set->size, set->size * 2 + 4); | ||
73 | set->states[set->off++] = state; | ||
74 | } | ||
75 | |||
76 | |||
77 | /** | ||
78 | * Compare two strings for equality. If either is NULL they are not equal. | ||
79 | * | ||
80 | * @param str1 first string for comparison. | ||
81 | * @param str2 second string for comparison. | ||
82 | * | ||
83 | * @return 0 if the strings are the same or both NULL, 1 or -1 if not. | ||
84 | */ | ||
85 | static int | ||
86 | nullstrcmp (const char *str1, const char *str2) | ||
87 | { | ||
88 | if ((NULL == str1) != (NULL == str2)) | ||
89 | return -1; | ||
90 | if ((NULL == str1) && (NULL == str2)) | ||
91 | return 0; | ||
92 | |||
93 | return strcmp (str1, str2); | ||
94 | } | ||
95 | |||
96 | |||
97 | /** | ||
98 | * Adds a transition from one state to another on @a label. Does not add | ||
99 | * duplicate states. | ||
100 | * | ||
101 | * @param ctx context | ||
102 | * @param from_state starting state for the transition | ||
103 | * @param label transition label | ||
104 | * @param to_state state to where the transition should point to | ||
105 | */ | ||
106 | static void | ||
107 | state_add_transition (struct REGEX_INTERNAL_Context *ctx, | ||
108 | struct REGEX_INTERNAL_State *from_state, | ||
109 | const char *label, | ||
110 | struct REGEX_INTERNAL_State *to_state) | ||
111 | { | ||
112 | struct REGEX_INTERNAL_Transition *t; | ||
113 | struct REGEX_INTERNAL_Transition *oth; | ||
114 | |||
115 | if (NULL == from_state) | ||
116 | { | ||
117 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not create Transition.\n"); | ||
118 | return; | ||
119 | } | ||
120 | |||
121 | /* Do not add duplicate state transitions */ | ||
122 | for (t = from_state->transitions_head; NULL != t; t = t->next) | ||
123 | { | ||
124 | if ((t->to_state == to_state) && (0 == nullstrcmp (t->label, label)) && | ||
125 | (t->from_state == from_state) ) | ||
126 | return; | ||
127 | } | ||
128 | |||
129 | /* sort transitions by label */ | ||
130 | for (oth = from_state->transitions_head; NULL != oth; oth = oth->next) | ||
131 | { | ||
132 | if (0 < nullstrcmp (oth->label, label)) | ||
133 | break; | ||
134 | } | ||
135 | |||
136 | t = GNUNET_new (struct REGEX_INTERNAL_Transition); | ||
137 | if (NULL != ctx) | ||
138 | t->id = ctx->transition_id++; | ||
139 | if (NULL != label) | ||
140 | t->label = GNUNET_strdup (label); | ||
141 | else | ||
142 | t->label = NULL; | ||
143 | t->to_state = to_state; | ||
144 | t->from_state = from_state; | ||
145 | |||
146 | /* Add outgoing transition to 'from_state' */ | ||
147 | from_state->transition_count++; | ||
148 | GNUNET_CONTAINER_DLL_insert_before (from_state->transitions_head, | ||
149 | from_state->transitions_tail, | ||
150 | oth, | ||
151 | t); | ||
152 | } | ||
153 | |||
154 | |||
155 | /** | ||
156 | * Remove a 'transition' from 'state'. | ||
157 | * | ||
158 | * @param state state from which the to-be-removed transition originates. | ||
159 | * @param transition transition that should be removed from state 'state'. | ||
160 | */ | ||
161 | static void | ||
162 | state_remove_transition (struct REGEX_INTERNAL_State *state, | ||
163 | struct REGEX_INTERNAL_Transition *transition) | ||
164 | { | ||
165 | if ((NULL == state) || (NULL == transition)) | ||
166 | return; | ||
167 | |||
168 | if (transition->from_state != state) | ||
169 | return; | ||
170 | |||
171 | GNUNET_free (transition->label); | ||
172 | |||
173 | state->transition_count--; | ||
174 | GNUNET_CONTAINER_DLL_remove (state->transitions_head, | ||
175 | state->transitions_tail, | ||
176 | transition); | ||
177 | |||
178 | GNUNET_free (transition); | ||
179 | } | ||
180 | |||
181 | |||
182 | /** | ||
183 | * Compare two states. Used for sorting. | ||
184 | * | ||
185 | * @param a first state | ||
186 | * @param b second state | ||
187 | * | ||
188 | * @return an integer less than, equal to, or greater than zero | ||
189 | * if the first argument is considered to be respectively | ||
190 | * less than, equal to, or greater than the second. | ||
191 | */ | ||
192 | static int | ||
193 | state_compare (const void *a, const void *b) | ||
194 | { | ||
195 | struct REGEX_INTERNAL_State **s1 = (struct REGEX_INTERNAL_State **) a; | ||
196 | struct REGEX_INTERNAL_State **s2 = (struct REGEX_INTERNAL_State **) b; | ||
197 | |||
198 | return (*s1)->id - (*s2)->id; | ||
199 | } | ||
200 | |||
201 | |||
202 | /** | ||
203 | * Get all edges leaving state @a s. | ||
204 | * | ||
205 | * @param s state. | ||
206 | * @param edges all edges leaving @a s, expected to be allocated and have enough | ||
207 | * space for `s->transitions_count` elements. | ||
208 | * | ||
209 | * @return number of edges. | ||
210 | */ | ||
211 | static unsigned int | ||
212 | state_get_edges (struct REGEX_INTERNAL_State *s, struct REGEX_BLOCK_Edge *edges) | ||
213 | { | ||
214 | struct REGEX_INTERNAL_Transition *t; | ||
215 | unsigned int count; | ||
216 | |||
217 | if (NULL == s) | ||
218 | return 0; | ||
219 | |||
220 | count = 0; | ||
221 | |||
222 | for (t = s->transitions_head; NULL != t; t = t->next) | ||
223 | { | ||
224 | if (NULL != t->to_state) | ||
225 | { | ||
226 | edges[count].label = t->label; | ||
227 | edges[count].destination = t->to_state->hash; | ||
228 | count++; | ||
229 | } | ||
230 | } | ||
231 | return count; | ||
232 | } | ||
233 | |||
234 | |||
235 | /** | ||
236 | * Compare to state sets by comparing the id's of the states that are contained | ||
237 | * in each set. Both sets are expected to be sorted by id! | ||
238 | * | ||
239 | * @param sset1 first state set | ||
240 | * @param sset2 second state set | ||
241 | * @return 0 if the sets are equal, otherwise non-zero | ||
242 | */ | ||
243 | static int | ||
244 | state_set_compare (struct REGEX_INTERNAL_StateSet *sset1, | ||
245 | struct REGEX_INTERNAL_StateSet *sset2) | ||
246 | { | ||
247 | int result; | ||
248 | unsigned int i; | ||
249 | |||
250 | if ((NULL == sset1) || (NULL == sset2)) | ||
251 | return 1; | ||
252 | |||
253 | result = sset1->off - sset2->off; | ||
254 | if (result < 0) | ||
255 | return -1; | ||
256 | if (result > 0) | ||
257 | return 1; | ||
258 | for (i = 0; i < sset1->off; i++) | ||
259 | if (0 != (result = state_compare (&sset1->states[i], &sset2->states[i]))) | ||
260 | break; | ||
261 | return result; | ||
262 | } | ||
263 | |||
264 | |||
265 | /** | ||
266 | * Clears the given StateSet 'set' | ||
267 | * | ||
268 | * @param set set to be cleared | ||
269 | */ | ||
270 | static void | ||
271 | state_set_clear (struct REGEX_INTERNAL_StateSet *set) | ||
272 | { | ||
273 | GNUNET_array_grow (set->states, set->size, 0); | ||
274 | set->off = 0; | ||
275 | } | ||
276 | |||
277 | |||
278 | /** | ||
279 | * Clears an automaton fragment. Does not destroy the states inside the | ||
280 | * automaton. | ||
281 | * | ||
282 | * @param a automaton to be cleared | ||
283 | */ | ||
284 | static void | ||
285 | automaton_fragment_clear (struct REGEX_INTERNAL_Automaton *a) | ||
286 | { | ||
287 | if (NULL == a) | ||
288 | return; | ||
289 | |||
290 | a->start = NULL; | ||
291 | a->end = NULL; | ||
292 | a->states_head = NULL; | ||
293 | a->states_tail = NULL; | ||
294 | a->state_count = 0; | ||
295 | GNUNET_free (a); | ||
296 | } | ||
297 | |||
298 | |||
299 | /** | ||
300 | * Frees the memory used by State @a s | ||
301 | * | ||
302 | * @param s state that should be destroyed | ||
303 | */ | ||
304 | static void | ||
305 | automaton_destroy_state (struct REGEX_INTERNAL_State *s) | ||
306 | { | ||
307 | struct REGEX_INTERNAL_Transition *t; | ||
308 | struct REGEX_INTERNAL_Transition *next_t; | ||
309 | |||
310 | if (NULL == s) | ||
311 | return; | ||
312 | |||
313 | GNUNET_free (s->name); | ||
314 | GNUNET_free (s->proof); | ||
315 | state_set_clear (&s->nfa_set); | ||
316 | for (t = s->transitions_head; NULL != t; t = next_t) | ||
317 | { | ||
318 | next_t = t->next; | ||
319 | state_remove_transition (s, t); | ||
320 | } | ||
321 | |||
322 | GNUNET_free (s); | ||
323 | } | ||
324 | |||
325 | |||
326 | /** | ||
327 | * Remove a state from the given automaton 'a'. Always use this function when | ||
328 | * altering the states of an automaton. Will also remove all transitions leading | ||
329 | * to this state, before destroying it. | ||
330 | * | ||
331 | * @param a automaton | ||
332 | * @param s state to remove | ||
333 | */ | ||
334 | static void | ||
335 | automaton_remove_state (struct REGEX_INTERNAL_Automaton *a, | ||
336 | struct REGEX_INTERNAL_State *s) | ||
337 | { | ||
338 | struct REGEX_INTERNAL_State *s_check; | ||
339 | struct REGEX_INTERNAL_Transition *t_check; | ||
340 | struct REGEX_INTERNAL_Transition *t_check_next; | ||
341 | |||
342 | if ((NULL == a) || (NULL == s)) | ||
343 | return; | ||
344 | |||
345 | /* remove all transitions leading to this state */ | ||
346 | for (s_check = a->states_head; NULL != s_check; s_check = s_check->next) | ||
347 | { | ||
348 | for (t_check = s_check->transitions_head; NULL != t_check; | ||
349 | t_check = t_check_next) | ||
350 | { | ||
351 | t_check_next = t_check->next; | ||
352 | if (t_check->to_state == s) | ||
353 | state_remove_transition (s_check, t_check); | ||
354 | } | ||
355 | } | ||
356 | |||
357 | /* remove state */ | ||
358 | GNUNET_CONTAINER_DLL_remove (a->states_head, a->states_tail, s); | ||
359 | a->state_count--; | ||
360 | |||
361 | automaton_destroy_state (s); | ||
362 | } | ||
363 | |||
364 | |||
365 | /** | ||
366 | * Merge two states into one. Will merge 's1' and 's2' into 's1' and destroy | ||
367 | * 's2'. 's1' will contain all (non-duplicate) outgoing transitions of 's2'. | ||
368 | * | ||
369 | * @param ctx context | ||
370 | * @param a automaton | ||
371 | * @param s1 first state | ||
372 | * @param s2 second state, will be destroyed | ||
373 | */ | ||
374 | static void | ||
375 | automaton_merge_states (struct REGEX_INTERNAL_Context *ctx, | ||
376 | struct REGEX_INTERNAL_Automaton *a, | ||
377 | struct REGEX_INTERNAL_State *s1, | ||
378 | struct REGEX_INTERNAL_State *s2) | ||
379 | { | ||
380 | struct REGEX_INTERNAL_State *s_check; | ||
381 | struct REGEX_INTERNAL_Transition *t_check; | ||
382 | struct REGEX_INTERNAL_Transition *t; | ||
383 | struct REGEX_INTERNAL_Transition *t_next; | ||
384 | int is_dup; | ||
385 | |||
386 | if (s1 == s2) | ||
387 | return; | ||
388 | |||
389 | /* 1. Make all transitions pointing to s2 point to s1, unless this transition | ||
390 | * does not already exists, if it already exists remove transition. */ | ||
391 | for (s_check = a->states_head; NULL != s_check; s_check = s_check->next) | ||
392 | { | ||
393 | for (t_check = s_check->transitions_head; NULL != t_check; t_check = t_next) | ||
394 | { | ||
395 | t_next = t_check->next; | ||
396 | |||
397 | if (s2 == t_check->to_state) | ||
398 | { | ||
399 | is_dup = GNUNET_NO; | ||
400 | for (t = t_check->from_state->transitions_head; NULL != t; t = t->next) | ||
401 | { | ||
402 | if ((t->to_state == s1) && (0 == strcmp (t_check->label, t->label)) ) | ||
403 | is_dup = GNUNET_YES; | ||
404 | } | ||
405 | if (GNUNET_NO == is_dup) | ||
406 | t_check->to_state = s1; | ||
407 | else | ||
408 | state_remove_transition (t_check->from_state, t_check); | ||
409 | } | ||
410 | } | ||
411 | } | ||
412 | |||
413 | /* 2. Add all transitions from s2 to sX to s1 */ | ||
414 | for (t_check = s2->transitions_head; NULL != t_check; t_check = t_check->next) | ||
415 | { | ||
416 | if (t_check->to_state != s1) | ||
417 | state_add_transition (ctx, s1, t_check->label, t_check->to_state); | ||
418 | } | ||
419 | |||
420 | /* 3. Rename s1 to {s1,s2} */ | ||
421 | #if REGEX_DEBUG_DFA | ||
422 | char *new_name; | ||
423 | |||
424 | new_name = s1->name; | ||
425 | GNUNET_asprintf (&s1->name, "{%s,%s}", new_name, s2->name); | ||
426 | GNUNET_free (new_name); | ||
427 | #endif | ||
428 | |||
429 | /* remove state */ | ||
430 | GNUNET_CONTAINER_DLL_remove (a->states_head, a->states_tail, s2); | ||
431 | a->state_count--; | ||
432 | automaton_destroy_state (s2); | ||
433 | } | ||
434 | |||
435 | |||
436 | /** | ||
437 | * Add a state to the automaton 'a', always use this function to alter the | ||
438 | * states DLL of the automaton. | ||
439 | * | ||
440 | * @param a automaton to add the state to | ||
441 | * @param s state that should be added | ||
442 | */ | ||
443 | static void | ||
444 | automaton_add_state (struct REGEX_INTERNAL_Automaton *a, | ||
445 | struct REGEX_INTERNAL_State *s) | ||
446 | { | ||
447 | GNUNET_CONTAINER_DLL_insert (a->states_head, a->states_tail, s); | ||
448 | a->state_count++; | ||
449 | } | ||
450 | |||
451 | |||
452 | /** | ||
453 | * Depth-first traversal (DFS) of all states that are reachable from state | ||
454 | * 's'. Performs 'action' on each visited state. | ||
455 | * | ||
456 | * @param s start state. | ||
457 | * @param marks an array of size a->state_count to remember which state was | ||
458 | * already visited. | ||
459 | * @param count current count of the state. | ||
460 | * @param check function that is checked before advancing on each transition | ||
461 | * in the DFS. | ||
462 | * @param check_cls closure for check. | ||
463 | * @param action action to be performed on each state. | ||
464 | * @param action_cls closure for action. | ||
465 | */ | ||
466 | static void | ||
467 | automaton_state_traverse (struct REGEX_INTERNAL_State *s, | ||
468 | int *marks, | ||
469 | unsigned int *count, | ||
470 | REGEX_INTERNAL_traverse_check check, | ||
471 | void *check_cls, | ||
472 | REGEX_INTERNAL_traverse_action action, | ||
473 | void *action_cls) | ||
474 | { | ||
475 | struct REGEX_INTERNAL_Transition *t; | ||
476 | |||
477 | if (GNUNET_YES == marks[s->traversal_id]) | ||
478 | return; | ||
479 | |||
480 | marks[s->traversal_id] = GNUNET_YES; | ||
481 | |||
482 | if (NULL != action) | ||
483 | action (action_cls, *count, s); | ||
484 | |||
485 | (*count)++; | ||
486 | |||
487 | for (t = s->transitions_head; NULL != t; t = t->next) | ||
488 | { | ||
489 | if ((NULL == check) || | ||
490 | ((NULL != check) && (GNUNET_YES == check (check_cls, s, t)) )) | ||
491 | { | ||
492 | automaton_state_traverse (t->to_state, | ||
493 | marks, | ||
494 | count, | ||
495 | check, | ||
496 | check_cls, | ||
497 | action, | ||
498 | action_cls); | ||
499 | } | ||
500 | } | ||
501 | } | ||
502 | |||
503 | |||
504 | /** | ||
505 | * Traverses the given automaton using depth-first-search (DFS) from it's start | ||
506 | * state, visiting all reachable states and calling 'action' on each one of | ||
507 | * them. | ||
508 | * | ||
509 | * @param a automaton to be traversed. | ||
510 | * @param start start state, pass a->start or NULL to traverse the whole automaton. | ||
511 | * @param check function that is checked before advancing on each transition | ||
512 | * in the DFS. | ||
513 | * @param check_cls closure for @a check. | ||
514 | * @param action action to be performed on each state. | ||
515 | * @param action_cls closure for @a action | ||
516 | */ | ||
517 | void | ||
518 | REGEX_INTERNAL_automaton_traverse (const struct REGEX_INTERNAL_Automaton *a, | ||
519 | struct REGEX_INTERNAL_State *start, | ||
520 | REGEX_INTERNAL_traverse_check check, | ||
521 | void *check_cls, | ||
522 | REGEX_INTERNAL_traverse_action action, | ||
523 | void *action_cls) | ||
524 | { | ||
525 | unsigned int count; | ||
526 | struct REGEX_INTERNAL_State *s; | ||
527 | |||
528 | if ((NULL == a) || (0 == a->state_count)) | ||
529 | return; | ||
530 | |||
531 | int marks[a->state_count]; | ||
532 | |||
533 | for (count = 0, s = a->states_head; NULL != s && count < a->state_count; | ||
534 | s = s->next, count++) | ||
535 | { | ||
536 | s->traversal_id = count; | ||
537 | marks[s->traversal_id] = GNUNET_NO; | ||
538 | } | ||
539 | |||
540 | count = 0; | ||
541 | |||
542 | if (NULL == start) | ||
543 | s = a->start; | ||
544 | else | ||
545 | s = start; | ||
546 | |||
547 | automaton_state_traverse (s, | ||
548 | marks, | ||
549 | &count, | ||
550 | check, | ||
551 | check_cls, | ||
552 | action, | ||
553 | action_cls); | ||
554 | } | ||
555 | |||
556 | |||
557 | /** | ||
558 | * String container for faster string operations. | ||
559 | */ | ||
560 | struct StringBuffer | ||
561 | { | ||
562 | /** | ||
563 | * Buffer holding the string (may start in the middle!); | ||
564 | * NOT 0-terminated! | ||
565 | */ | ||
566 | char *sbuf; | ||
567 | |||
568 | /** | ||
569 | * Allocated buffer. | ||
570 | */ | ||
571 | char *abuf; | ||
572 | |||
573 | /** | ||
574 | * Length of the string in the buffer. | ||
575 | */ | ||
576 | size_t slen; | ||
577 | |||
578 | /** | ||
579 | * Number of bytes allocated for @e sbuf | ||
580 | */ | ||
581 | unsigned int blen; | ||
582 | |||
583 | /** | ||
584 | * Buffer currently represents "NULL" (not the empty string!) | ||
585 | */ | ||
586 | int16_t null_flag; | ||
587 | |||
588 | /** | ||
589 | * If this entry is part of the last/current generation array, | ||
590 | * this flag is #GNUNET_YES if the last and current generation are | ||
591 | * identical (and thus copying is unnecessary if the value didn't | ||
592 | * change). This is used in an optimization that improves | ||
593 | * performance by about 1% --- if we use int16_t here. With just | ||
594 | * "int" for both flags, performance drops (on my system) significantly, | ||
595 | * most likely due to increased cache misses. | ||
596 | */ | ||
597 | int16_t synced; | ||
598 | }; | ||
599 | |||
600 | |||
601 | /** | ||
602 | * Compare two strings for equality. If either is NULL they are not equal. | ||
603 | * | ||
604 | * @param s1 first string for comparison. | ||
605 | * @param s2 second string for comparison. | ||
606 | * | ||
607 | * @return 0 if the strings are the same or both NULL, 1 or -1 if not. | ||
608 | */ | ||
609 | static int | ||
610 | sb_nullstrcmp (const struct StringBuffer *s1, const struct StringBuffer *s2) | ||
611 | { | ||
612 | if ((GNUNET_YES == s1->null_flag) && (GNUNET_YES == s2->null_flag)) | ||
613 | return 0; | ||
614 | if ((GNUNET_YES == s1->null_flag) || (GNUNET_YES == s2->null_flag)) | ||
615 | return -1; | ||
616 | if (s1->slen != s2->slen) | ||
617 | return -1; | ||
618 | if (0 == s1->slen) | ||
619 | return 0; | ||
620 | return memcmp (s1->sbuf, s2->sbuf, s1->slen); | ||
621 | } | ||
622 | |||
623 | |||
624 | /** | ||
625 | * Compare two strings for equality. | ||
626 | * | ||
627 | * @param s1 first string for comparison. | ||
628 | * @param s2 second string for comparison. | ||
629 | * | ||
630 | * @return 0 if the strings are the same, 1 or -1 if not. | ||
631 | */ | ||
632 | static int | ||
633 | sb_strcmp (const struct StringBuffer *s1, const struct StringBuffer *s2) | ||
634 | { | ||
635 | if (s1->slen != s2->slen) | ||
636 | return -1; | ||
637 | if (0 == s1->slen) | ||
638 | return 0; | ||
639 | return memcmp (s1->sbuf, s2->sbuf, s1->slen); | ||
640 | } | ||
641 | |||
642 | |||
643 | /** | ||
644 | * Reallocate the buffer of 'ret' to fit 'nlen' characters; | ||
645 | * move the existing string to the beginning of the new buffer. | ||
646 | * | ||
647 | * @param ret current buffer, to be updated | ||
648 | * @param nlen target length for the buffer, must be at least ret->slen | ||
649 | */ | ||
650 | static void | ||
651 | sb_realloc (struct StringBuffer *ret, size_t nlen) | ||
652 | { | ||
653 | char *old; | ||
654 | |||
655 | GNUNET_assert (nlen >= ret->slen); | ||
656 | old = ret->abuf; | ||
657 | ret->abuf = GNUNET_malloc (nlen); | ||
658 | ret->blen = nlen; | ||
659 | GNUNET_memcpy (ret->abuf, ret->sbuf, ret->slen); | ||
660 | ret->sbuf = ret->abuf; | ||
661 | GNUNET_free (old); | ||
662 | } | ||
663 | |||
664 | |||
665 | /** | ||
666 | * Append a string. | ||
667 | * | ||
668 | * @param ret where to write the result | ||
669 | * @param sarg string to append | ||
670 | */ | ||
671 | static void | ||
672 | sb_append (struct StringBuffer *ret, const struct StringBuffer *sarg) | ||
673 | { | ||
674 | if (GNUNET_YES == ret->null_flag) | ||
675 | ret->slen = 0; | ||
676 | ret->null_flag = GNUNET_NO; | ||
677 | if (ret->blen < sarg->slen + ret->slen) | ||
678 | sb_realloc (ret, ret->blen + sarg->slen + 128); | ||
679 | GNUNET_memcpy (&ret->sbuf[ret->slen], sarg->sbuf, sarg->slen); | ||
680 | ret->slen += sarg->slen; | ||
681 | } | ||
682 | |||
683 | |||
684 | /** | ||
685 | * Append a C string. | ||
686 | * | ||
687 | * @param ret where to write the result | ||
688 | * @param cstr string to append | ||
689 | */ | ||
690 | static void | ||
691 | sb_append_cstr (struct StringBuffer *ret, const char *cstr) | ||
692 | { | ||
693 | size_t cstr_len = strlen (cstr); | ||
694 | |||
695 | if (GNUNET_YES == ret->null_flag) | ||
696 | ret->slen = 0; | ||
697 | ret->null_flag = GNUNET_NO; | ||
698 | if (ret->blen < cstr_len + ret->slen) | ||
699 | sb_realloc (ret, ret->blen + cstr_len + 128); | ||
700 | GNUNET_memcpy (&ret->sbuf[ret->slen], cstr, cstr_len); | ||
701 | ret->slen += cstr_len; | ||
702 | } | ||
703 | |||
704 | |||
705 | /** | ||
706 | * Wrap a string buffer, that is, set ret to the format string | ||
707 | * which contains an "%s" which is to be replaced with the original | ||
708 | * content of 'ret'. Note that optimizing this function is not | ||
709 | * really worth it, it is rarely called. | ||
710 | * | ||
711 | * @param ret where to write the result and take the input for %.*s from | ||
712 | * @param format format string, fprintf-style, with exactly one "%.*s" | ||
713 | * @param extra_chars how long will the result be, in addition to 'sarg' length | ||
714 | */ | ||
715 | static void | ||
716 | sb_wrap (struct StringBuffer *ret, const char *format, size_t extra_chars) | ||
717 | { | ||
718 | char *temp; | ||
719 | |||
720 | if (GNUNET_YES == ret->null_flag) | ||
721 | ret->slen = 0; | ||
722 | ret->null_flag = GNUNET_NO; | ||
723 | temp = GNUNET_malloc (ret->slen + extra_chars + 1); | ||
724 | GNUNET_snprintf (temp, | ||
725 | ret->slen + extra_chars + 1, | ||
726 | format, | ||
727 | (int) ret->slen, | ||
728 | ret->sbuf); | ||
729 | GNUNET_free (ret->abuf); | ||
730 | ret->abuf = temp; | ||
731 | ret->sbuf = temp; | ||
732 | ret->blen = ret->slen + extra_chars + 1; | ||
733 | ret->slen = ret->slen + extra_chars; | ||
734 | } | ||
735 | |||
736 | |||
737 | /** | ||
738 | * Format a string buffer. Note that optimizing this function is not | ||
739 | * really worth it, it is rarely called. | ||
740 | * | ||
741 | * @param ret where to write the result | ||
742 | * @param format format string, fprintf-style, with exactly one "%.*s" | ||
743 | * @param extra_chars how long will the result be, in addition to 'sarg' length | ||
744 | * @param sarg string to print into the format | ||
745 | */ | ||
746 | static void | ||
747 | sb_printf1 (struct StringBuffer *ret, | ||
748 | const char *format, | ||
749 | size_t extra_chars, | ||
750 | const struct StringBuffer *sarg) | ||
751 | { | ||
752 | if (ret->blen < sarg->slen + extra_chars + 1) | ||
753 | sb_realloc (ret, sarg->slen + extra_chars + 1); | ||
754 | ret->null_flag = GNUNET_NO; | ||
755 | ret->sbuf = ret->abuf; | ||
756 | ret->slen = sarg->slen + extra_chars; | ||
757 | GNUNET_snprintf (ret->sbuf, ret->blen, format, (int) sarg->slen, sarg->sbuf); | ||
758 | } | ||
759 | |||
760 | |||
761 | /** | ||
762 | * Format a string buffer. | ||
763 | * | ||
764 | * @param ret where to write the result | ||
765 | * @param format format string, fprintf-style, with exactly two "%.*s" | ||
766 | * @param extra_chars how long will the result be, in addition to 'sarg1/2' length | ||
767 | * @param sarg1 first string to print into the format | ||
768 | * @param sarg2 second string to print into the format | ||
769 | */ | ||
770 | static void | ||
771 | sb_printf2 (struct StringBuffer *ret, | ||
772 | const char *format, | ||
773 | size_t extra_chars, | ||
774 | const struct StringBuffer *sarg1, | ||
775 | const struct StringBuffer *sarg2) | ||
776 | { | ||
777 | if (ret->blen < sarg1->slen + sarg2->slen + extra_chars + 1) | ||
778 | sb_realloc (ret, sarg1->slen + sarg2->slen + extra_chars + 1); | ||
779 | ret->null_flag = GNUNET_NO; | ||
780 | ret->slen = sarg1->slen + sarg2->slen + extra_chars; | ||
781 | ret->sbuf = ret->abuf; | ||
782 | GNUNET_snprintf (ret->sbuf, | ||
783 | ret->blen, | ||
784 | format, | ||
785 | (int) sarg1->slen, | ||
786 | sarg1->sbuf, | ||
787 | (int) sarg2->slen, | ||
788 | sarg2->sbuf); | ||
789 | } | ||
790 | |||
791 | |||
792 | /** | ||
793 | * Format a string buffer. Note that optimizing this function is not | ||
794 | * really worth it, it is rarely called. | ||
795 | * | ||
796 | * @param ret where to write the result | ||
797 | * @param format format string, fprintf-style, with exactly three "%.*s" | ||
798 | * @param extra_chars how long will the result be, in addition to 'sarg1/2/3' length | ||
799 | * @param sarg1 first string to print into the format | ||
800 | * @param sarg2 second string to print into the format | ||
801 | * @param sarg3 third string to print into the format | ||
802 | */ | ||
803 | static void | ||
804 | sb_printf3 (struct StringBuffer *ret, | ||
805 | const char *format, | ||
806 | size_t extra_chars, | ||
807 | const struct StringBuffer *sarg1, | ||
808 | const struct StringBuffer *sarg2, | ||
809 | const struct StringBuffer *sarg3) | ||
810 | { | ||
811 | if (ret->blen < sarg1->slen + sarg2->slen + sarg3->slen + extra_chars + 1) | ||
812 | sb_realloc (ret, sarg1->slen + sarg2->slen + sarg3->slen + extra_chars + 1); | ||
813 | ret->null_flag = GNUNET_NO; | ||
814 | ret->slen = sarg1->slen + sarg2->slen + sarg3->slen + extra_chars; | ||
815 | ret->sbuf = ret->abuf; | ||
816 | GNUNET_snprintf (ret->sbuf, | ||
817 | ret->blen, | ||
818 | format, | ||
819 | (int) sarg1->slen, | ||
820 | sarg1->sbuf, | ||
821 | (int) sarg2->slen, | ||
822 | sarg2->sbuf, | ||
823 | (int) sarg3->slen, | ||
824 | sarg3->sbuf); | ||
825 | } | ||
826 | |||
827 | |||
828 | /** | ||
829 | * Free resources of the given string buffer. | ||
830 | * | ||
831 | * @param sb buffer to free (actual pointer is not freed, as they | ||
832 | * should not be individually allocated) | ||
833 | */ | ||
834 | static void | ||
835 | sb_free (struct StringBuffer *sb) | ||
836 | { | ||
837 | GNUNET_array_grow (sb->abuf, sb->blen, 0); | ||
838 | sb->slen = 0; | ||
839 | sb->sbuf = NULL; | ||
840 | sb->null_flag = GNUNET_YES; | ||
841 | } | ||
842 | |||
843 | |||
844 | /** | ||
845 | * Copy the given string buffer from 'in' to 'out'. | ||
846 | * | ||
847 | * @param in input string | ||
848 | * @param out output string | ||
849 | */ | ||
850 | static void | ||
851 | sb_strdup (struct StringBuffer *out, const struct StringBuffer *in) | ||
852 | |||
853 | { | ||
854 | out->null_flag = in->null_flag; | ||
855 | if (GNUNET_YES == out->null_flag) | ||
856 | return; | ||
857 | if (out->blen < in->slen) | ||
858 | { | ||
859 | GNUNET_array_grow (out->abuf, out->blen, in->slen); | ||
860 | } | ||
861 | out->sbuf = out->abuf; | ||
862 | out->slen = in->slen; | ||
863 | GNUNET_memcpy (out->sbuf, in->sbuf, out->slen); | ||
864 | } | ||
865 | |||
866 | |||
867 | /** | ||
868 | * Copy the given string buffer from 'in' to 'out'. | ||
869 | * | ||
870 | * @param cstr input string | ||
871 | * @param out output string | ||
872 | */ | ||
873 | static void | ||
874 | sb_strdup_cstr (struct StringBuffer *out, const char *cstr) | ||
875 | { | ||
876 | if (NULL == cstr) | ||
877 | { | ||
878 | out->null_flag = GNUNET_YES; | ||
879 | return; | ||
880 | } | ||
881 | out->null_flag = GNUNET_NO; | ||
882 | out->slen = strlen (cstr); | ||
883 | if (out->blen < out->slen) | ||
884 | { | ||
885 | GNUNET_array_grow (out->abuf, out->blen, out->slen); | ||
886 | } | ||
887 | out->sbuf = out->abuf; | ||
888 | GNUNET_memcpy (out->sbuf, cstr, out->slen); | ||
889 | } | ||
890 | |||
891 | |||
892 | /** | ||
893 | * Check if the given string @a str needs parentheses around it when | ||
894 | * using it to generate a regex. | ||
895 | * | ||
896 | * @param str string | ||
897 | * | ||
898 | * @return #GNUNET_YES if parentheses are needed, #GNUNET_NO otherwise | ||
899 | */ | ||
900 | static int | ||
901 | needs_parentheses (const struct StringBuffer *str) | ||
902 | { | ||
903 | size_t slen; | ||
904 | const char *op; | ||
905 | const char *cl; | ||
906 | const char *pos; | ||
907 | const char *end; | ||
908 | unsigned int cnt; | ||
909 | |||
910 | if ((GNUNET_YES == str->null_flag) || ((slen = str->slen) < 2)) | ||
911 | return GNUNET_NO; | ||
912 | pos = str->sbuf; | ||
913 | if ('(' != pos[0]) | ||
914 | return GNUNET_YES; | ||
915 | end = str->sbuf + slen; | ||
916 | cnt = 1; | ||
917 | pos++; | ||
918 | while (cnt > 0) | ||
919 | { | ||
920 | cl = memchr (pos, ')', end - pos); | ||
921 | if (NULL == cl) | ||
922 | { | ||
923 | GNUNET_break (0); | ||
924 | return GNUNET_YES; | ||
925 | } | ||
926 | /* while '(' before ')', count opening parens */ | ||
927 | while ((NULL != (op = memchr (pos, '(', end - pos))) && (op < cl)) | ||
928 | { | ||
929 | cnt++; | ||
930 | pos = op + 1; | ||
931 | } | ||
932 | /* got ')' first */ | ||
933 | cnt--; | ||
934 | pos = cl + 1; | ||
935 | } | ||
936 | return (*pos == '\0') ? GNUNET_NO : GNUNET_YES; | ||
937 | } | ||
938 | |||
939 | |||
940 | /** | ||
941 | * Remove parentheses surrounding string @a str. | ||
942 | * Example: "(a)" becomes "a", "(a|b)|(a|c)" stays the same. | ||
943 | * You need to #GNUNET_free() the returned string. | ||
944 | * | ||
945 | * @param str string, modified to contain a | ||
946 | * @return string without surrounding parentheses, string 'str' if no preceding | ||
947 | * epsilon could be found, NULL if 'str' was NULL | ||
948 | */ | ||
949 | static void | ||
950 | remove_parentheses (struct StringBuffer *str) | ||
951 | { | ||
952 | size_t slen; | ||
953 | const char *pos; | ||
954 | const char *end; | ||
955 | const char *sbuf; | ||
956 | const char *op; | ||
957 | const char *cp; | ||
958 | unsigned int cnt; | ||
959 | |||
960 | if (0) | ||
961 | return; | ||
962 | sbuf = str->sbuf; | ||
963 | if ((GNUNET_YES == str->null_flag) || (1 >= (slen = str->slen)) || | ||
964 | ('(' != str->sbuf[0]) || (')' != str->sbuf[slen - 1])) | ||
965 | return; | ||
966 | cnt = 0; | ||
967 | pos = &sbuf[1]; | ||
968 | end = &sbuf[slen - 1]; | ||
969 | op = memchr (pos, '(', end - pos); | ||
970 | cp = memchr (pos, ')', end - pos); | ||
971 | while (NULL != cp) | ||
972 | { | ||
973 | while ((NULL != op) && (op < cp)) | ||
974 | { | ||
975 | cnt++; | ||
976 | pos = op + 1; | ||
977 | op = memchr (pos, '(', end - pos); | ||
978 | } | ||
979 | while ((NULL != cp) && ((NULL == op) || (cp < op))) | ||
980 | { | ||
981 | if (0 == cnt) | ||
982 | return; /* can't strip parens */ | ||
983 | cnt--; | ||
984 | pos = cp + 1; | ||
985 | cp = memchr (pos, ')', end - pos); | ||
986 | } | ||
987 | } | ||
988 | if (0 != cnt) | ||
989 | { | ||
990 | GNUNET_break (0); | ||
991 | return; | ||
992 | } | ||
993 | str->sbuf++; | ||
994 | str->slen -= 2; | ||
995 | } | ||
996 | |||
997 | |||
998 | /** | ||
999 | * Check if the string 'str' starts with an epsilon (empty string). | ||
1000 | * Example: "(|a)" is starting with an epsilon. | ||
1001 | * | ||
1002 | * @param str string to test | ||
1003 | * | ||
1004 | * @return 0 if str has no epsilon, 1 if str starts with '(|' and ends with ')' | ||
1005 | */ | ||
1006 | static int | ||
1007 | has_epsilon (const struct StringBuffer *str) | ||
1008 | { | ||
1009 | return (GNUNET_YES != str->null_flag) && (0 < str->slen) && | ||
1010 | ('(' == str->sbuf[0]) && ('|' == str->sbuf[1]) && | ||
1011 | (')' == str->sbuf[str->slen - 1]); | ||
1012 | } | ||
1013 | |||
1014 | |||
1015 | /** | ||
1016 | * Remove an epsilon from the string str. Where epsilon is an empty string | ||
1017 | * Example: str = "(|a|b|c)", result: "a|b|c" | ||
1018 | * The returned string needs to be freed. | ||
1019 | * | ||
1020 | * @param str original string | ||
1021 | * @param ret where to return string without preceding epsilon, string 'str' if no preceding | ||
1022 | * epsilon could be found, NULL if 'str' was NULL | ||
1023 | */ | ||
1024 | static void | ||
1025 | remove_epsilon (const struct StringBuffer *str, struct StringBuffer *ret) | ||
1026 | { | ||
1027 | if (GNUNET_YES == str->null_flag) | ||
1028 | { | ||
1029 | ret->null_flag = GNUNET_YES; | ||
1030 | return; | ||
1031 | } | ||
1032 | if ((str->slen > 1) && ('(' == str->sbuf[0]) && ('|' == str->sbuf[1]) && | ||
1033 | (')' == str->sbuf[str->slen - 1])) | ||
1034 | { | ||
1035 | /* remove epsilon */ | ||
1036 | if (ret->blen < str->slen - 3) | ||
1037 | { | ||
1038 | GNUNET_array_grow (ret->abuf, ret->blen, str->slen - 3); | ||
1039 | } | ||
1040 | ret->sbuf = ret->abuf; | ||
1041 | ret->slen = str->slen - 3; | ||
1042 | GNUNET_memcpy (ret->sbuf, &str->sbuf[2], ret->slen); | ||
1043 | return; | ||
1044 | } | ||
1045 | sb_strdup (ret, str); | ||
1046 | } | ||
1047 | |||
1048 | |||
1049 | /** | ||
1050 | * Compare n bytes of 'str1' and 'str2' | ||
1051 | * | ||
1052 | * @param str1 first string to compare | ||
1053 | * @param str2 second string for comparison | ||
1054 | * @param n number of bytes to compare | ||
1055 | * | ||
1056 | * @return -1 if any of the strings is NULL, 0 if equal, non 0 otherwise | ||
1057 | */ | ||
1058 | static int | ||
1059 | sb_strncmp (const struct StringBuffer *str1, | ||
1060 | const struct StringBuffer *str2, | ||
1061 | size_t n) | ||
1062 | { | ||
1063 | size_t max; | ||
1064 | |||
1065 | if ((str1->slen != str2->slen) && ((str1->slen < n) || (str2->slen < n))) | ||
1066 | return -1; | ||
1067 | max = GNUNET_MAX (str1->slen, str2->slen); | ||
1068 | if (max > n) | ||
1069 | max = n; | ||
1070 | return memcmp (str1->sbuf, str2->sbuf, max); | ||
1071 | } | ||
1072 | |||
1073 | |||
1074 | /** | ||
1075 | * Compare n bytes of 'str1' and 'str2' | ||
1076 | * | ||
1077 | * @param str1 first string to compare | ||
1078 | * @param str2 second C string for comparison | ||
1079 | * @param n number of bytes to compare (and length of str2) | ||
1080 | * | ||
1081 | * @return -1 if any of the strings is NULL, 0 if equal, non 0 otherwise | ||
1082 | */ | ||
1083 | static int | ||
1084 | sb_strncmp_cstr (const struct StringBuffer *str1, const char *str2, size_t n) | ||
1085 | { | ||
1086 | if (str1->slen < n) | ||
1087 | return -1; | ||
1088 | return memcmp (str1->sbuf, str2, n); | ||
1089 | } | ||
1090 | |||
1091 | |||
1092 | /** | ||
1093 | * Initialize string buffer for storing strings of up to n | ||
1094 | * characters. | ||
1095 | * | ||
1096 | * @param sb buffer to initialize | ||
1097 | * @param n desired target length | ||
1098 | */ | ||
1099 | static void | ||
1100 | sb_init (struct StringBuffer *sb, size_t n) | ||
1101 | { | ||
1102 | sb->null_flag = GNUNET_NO; | ||
1103 | sb->abuf = sb->sbuf = (0 == n) ? NULL : GNUNET_malloc (n); | ||
1104 | sb->blen = n; | ||
1105 | sb->slen = 0; | ||
1106 | } | ||
1107 | |||
1108 | |||
1109 | /** | ||
1110 | * Compare 'str1', starting from position 'k', with whole 'str2' | ||
1111 | * | ||
1112 | * @param str1 first string to compare, starting from position 'k' | ||
1113 | * @param str2 second string for comparison | ||
1114 | * @param k starting position in 'str1' | ||
1115 | * | ||
1116 | * @return -1 if any of the strings is NULL, 0 if equal, non 0 otherwise | ||
1117 | */ | ||
1118 | static int | ||
1119 | sb_strkcmp (const struct StringBuffer *str1, | ||
1120 | const struct StringBuffer *str2, | ||
1121 | size_t k) | ||
1122 | { | ||
1123 | if ((GNUNET_YES == str1->null_flag) || (GNUNET_YES == str2->null_flag) || | ||
1124 | (k > str1->slen) || (str1->slen - k != str2->slen)) | ||
1125 | return -1; | ||
1126 | return memcmp (&str1->sbuf[k], str2->sbuf, str2->slen); | ||
1127 | } | ||
1128 | |||
1129 | |||
1130 | /** | ||
1131 | * Helper function used as 'action' in 'REGEX_INTERNAL_automaton_traverse' | ||
1132 | * function to create the depth-first numbering of the states. | ||
1133 | * | ||
1134 | * @param cls states array. | ||
1135 | * @param count current state counter. | ||
1136 | * @param s current state. | ||
1137 | */ | ||
1138 | static void | ||
1139 | number_states (void *cls, | ||
1140 | const unsigned int count, | ||
1141 | struct REGEX_INTERNAL_State *s) | ||
1142 | { | ||
1143 | struct REGEX_INTERNAL_State **states = cls; | ||
1144 | |||
1145 | s->dfs_id = count; | ||
1146 | if (NULL != states) | ||
1147 | states[count] = s; | ||
1148 | } | ||
1149 | |||
1150 | |||
1151 | #define PRIS(a) \ | ||
1152 | ((GNUNET_YES == a.null_flag) ? 6 : (int) a.slen), \ | ||
1153 | ((GNUNET_YES == a.null_flag) ? "(null)" : a.sbuf) | ||
1154 | |||
1155 | |||
1156 | /** | ||
1157 | * Construct the regular expression given the inductive step, | ||
1158 | * $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* | ||
1159 | * R^{(k-1)}_{kj}, and simplify the resulting expression saved in R_cur_ij. | ||
1160 | * | ||
1161 | * @param R_last_ij value of $R^{(k-1)_{ij}. | ||
1162 | * @param R_last_ik value of $R^{(k-1)_{ik}. | ||
1163 | * @param R_last_kk value of $R^{(k-1)_{kk}. | ||
1164 | * @param R_last_kj value of $R^{(k-1)_{kj}. | ||
1165 | * @param R_cur_ij result for this inductive step is saved in R_cur_ij, R_cur_ij | ||
1166 | * is expected to be NULL when called! | ||
1167 | * @param R_cur_l optimization -- kept between iterations to avoid realloc | ||
1168 | * @param R_cur_r optimization -- kept between iterations to avoid realloc | ||
1169 | */ | ||
1170 | static void | ||
1171 | automaton_create_proofs_simplify (const struct StringBuffer *R_last_ij, | ||
1172 | const struct StringBuffer *R_last_ik, | ||
1173 | const struct StringBuffer *R_last_kk, | ||
1174 | const struct StringBuffer *R_last_kj, | ||
1175 | struct StringBuffer *R_cur_ij, | ||
1176 | struct StringBuffer *R_cur_l, | ||
1177 | struct StringBuffer *R_cur_r) | ||
1178 | { | ||
1179 | struct StringBuffer R_temp_ij; | ||
1180 | struct StringBuffer R_temp_ik; | ||
1181 | struct StringBuffer R_temp_kj; | ||
1182 | struct StringBuffer R_temp_kk; | ||
1183 | int eps_check; | ||
1184 | int ij_ik_cmp; | ||
1185 | int ij_kj_cmp; | ||
1186 | int ik_kk_cmp; | ||
1187 | int kk_kj_cmp; | ||
1188 | int clean_ik_kk_cmp; | ||
1189 | int clean_kk_kj_cmp; | ||
1190 | size_t length; | ||
1191 | size_t length_l; | ||
1192 | size_t length_r; | ||
1193 | |||
1194 | /* | ||
1195 | * $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj} | ||
1196 | * R_last == R^{(k-1)}, R_cur == R^{(k)} | ||
1197 | * R_cur_ij = R_cur_l | R_cur_r | ||
1198 | * R_cur_l == R^{(k-1)}_{ij} | ||
1199 | * R_cur_r == R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj} | ||
1200 | */if ((GNUNET_YES == R_last_ij->null_flag) && | ||
1201 | ((GNUNET_YES == R_last_ik->null_flag) || | ||
1202 | (GNUNET_YES == R_last_kj->null_flag))) | ||
1203 | { | ||
1204 | /* R^{(k)}_{ij} = N | N */ | ||
1205 | R_cur_ij->null_flag = GNUNET_YES; | ||
1206 | R_cur_ij->synced = GNUNET_NO; | ||
1207 | return; | ||
1208 | } | ||
1209 | |||
1210 | if ((GNUNET_YES == R_last_ik->null_flag) || | ||
1211 | (GNUNET_YES == R_last_kj->null_flag)) | ||
1212 | { | ||
1213 | /* R^{(k)}_{ij} = R^{(k-1)}_{ij} | N */ | ||
1214 | if (GNUNET_YES == R_last_ij->synced) | ||
1215 | { | ||
1216 | R_cur_ij->synced = GNUNET_YES; | ||
1217 | R_cur_ij->null_flag = GNUNET_NO; | ||
1218 | return; | ||
1219 | } | ||
1220 | R_cur_ij->synced = GNUNET_YES; | ||
1221 | sb_strdup (R_cur_ij, R_last_ij); | ||
1222 | return; | ||
1223 | } | ||
1224 | R_cur_ij->synced = GNUNET_NO; | ||
1225 | |||
1226 | /* $R^{(k)}_{ij} = N | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj} OR | ||
1227 | * $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj} */ | ||
1228 | |||
1229 | R_cur_r->null_flag = GNUNET_YES; | ||
1230 | R_cur_r->slen = 0; | ||
1231 | R_cur_l->null_flag = GNUNET_YES; | ||
1232 | R_cur_l->slen = 0; | ||
1233 | |||
1234 | /* cache results from strcmp, we might need these many times */ | ||
1235 | ij_kj_cmp = sb_nullstrcmp (R_last_ij, R_last_kj); | ||
1236 | ij_ik_cmp = sb_nullstrcmp (R_last_ij, R_last_ik); | ||
1237 | ik_kk_cmp = sb_nullstrcmp (R_last_ik, R_last_kk); | ||
1238 | kk_kj_cmp = sb_nullstrcmp (R_last_kk, R_last_kj); | ||
1239 | |||
1240 | /* Assign R_temp_(ik|kk|kj) to R_last[][] and remove epsilon as well | ||
1241 | * as parentheses, so we can better compare the contents */ | ||
1242 | |||
1243 | memset (&R_temp_ij, 0, sizeof(struct StringBuffer)); | ||
1244 | memset (&R_temp_ik, 0, sizeof(struct StringBuffer)); | ||
1245 | memset (&R_temp_kk, 0, sizeof(struct StringBuffer)); | ||
1246 | memset (&R_temp_kj, 0, sizeof(struct StringBuffer)); | ||
1247 | remove_epsilon (R_last_ik, &R_temp_ik); | ||
1248 | remove_epsilon (R_last_kk, &R_temp_kk); | ||
1249 | remove_epsilon (R_last_kj, &R_temp_kj); | ||
1250 | remove_parentheses (&R_temp_ik); | ||
1251 | remove_parentheses (&R_temp_kk); | ||
1252 | remove_parentheses (&R_temp_kj); | ||
1253 | clean_ik_kk_cmp = sb_nullstrcmp (R_last_ik, &R_temp_kk); | ||
1254 | clean_kk_kj_cmp = sb_nullstrcmp (&R_temp_kk, R_last_kj); | ||
1255 | |||
1256 | /* construct R_cur_l (and, if necessary R_cur_r) */ | ||
1257 | if (GNUNET_YES != R_last_ij->null_flag) | ||
1258 | { | ||
1259 | /* Assign R_temp_ij to R_last_ij and remove epsilon as well | ||
1260 | * as parentheses, so we can better compare the contents */ | ||
1261 | remove_epsilon (R_last_ij, &R_temp_ij); | ||
1262 | remove_parentheses (&R_temp_ij); | ||
1263 | |||
1264 | if ((0 == sb_strcmp (&R_temp_ij, &R_temp_ik)) && | ||
1265 | (0 == sb_strcmp (&R_temp_ik, &R_temp_kk)) && | ||
1266 | (0 == sb_strcmp (&R_temp_kk, &R_temp_kj))) | ||
1267 | { | ||
1268 | if (0 == R_temp_ij.slen) | ||
1269 | { | ||
1270 | R_cur_r->null_flag = GNUNET_NO; | ||
1271 | } | ||
1272 | else if ((0 == sb_strncmp_cstr (R_last_ij, "(|", 2)) || | ||
1273 | ((0 == sb_strncmp_cstr (R_last_ik, "(|", 2)) && | ||
1274 | (0 == sb_strncmp_cstr (R_last_kj, "(|", 2)) )) | ||
1275 | { | ||
1276 | /* | ||
1277 | * a|(e|a)a*(e|a) = a* | ||
1278 | * a|(e|a)(e|a)*(e|a) = a* | ||
1279 | * (e|a)|aa*a = a* | ||
1280 | * (e|a)|aa*(e|a) = a* | ||
1281 | * (e|a)|(e|a)a*a = a* | ||
1282 | * (e|a)|(e|a)a*(e|a) = a* | ||
1283 | * (e|a)|(e|a)(e|a)*(e|a) = a* | ||
1284 | */if (GNUNET_YES == needs_parentheses (&R_temp_ij)) | ||
1285 | sb_printf1 (R_cur_r, "(%.*s)*", 3, &R_temp_ij); | ||
1286 | else | ||
1287 | sb_printf1 (R_cur_r, "%.*s*", 1, &R_temp_ij); | ||
1288 | } | ||
1289 | else | ||
1290 | { | ||
1291 | /* | ||
1292 | * a|aa*a = a+ | ||
1293 | * a|(e|a)a*a = a+ | ||
1294 | * a|aa*(e|a) = a+ | ||
1295 | * a|(e|a)(e|a)*a = a+ | ||
1296 | * a|a(e|a)*(e|a) = a+ | ||
1297 | */if (GNUNET_YES == needs_parentheses (&R_temp_ij)) | ||
1298 | sb_printf1 (R_cur_r, "(%.*s)+", 3, &R_temp_ij); | ||
1299 | else | ||
1300 | sb_printf1 (R_cur_r, "%.*s+", 1, &R_temp_ij); | ||
1301 | } | ||
1302 | } | ||
1303 | else if ((0 == ij_ik_cmp) && (0 == clean_kk_kj_cmp) && | ||
1304 | (0 != clean_ik_kk_cmp)) | ||
1305 | { | ||
1306 | /* a|ab*b = ab* */ | ||
1307 | if (0 == R_last_kk->slen) | ||
1308 | sb_strdup (R_cur_r, R_last_ij); | ||
1309 | else if (GNUNET_YES == needs_parentheses (&R_temp_kk)) | ||
1310 | sb_printf2 (R_cur_r, "%.*s(%.*s)*", 3, R_last_ij, &R_temp_kk); | ||
1311 | else | ||
1312 | sb_printf2 (R_cur_r, "%.*s%.*s*", 1, R_last_ij, R_last_kk); | ||
1313 | R_cur_l->null_flag = GNUNET_YES; | ||
1314 | } | ||
1315 | else if ((0 == ij_kj_cmp) && (0 == clean_ik_kk_cmp) && | ||
1316 | (0 != clean_kk_kj_cmp)) | ||
1317 | { | ||
1318 | /* a|bb*a = b*a */ | ||
1319 | if (R_last_kk->slen < 1) | ||
1320 | { | ||
1321 | sb_strdup (R_cur_r, R_last_kj); | ||
1322 | } | ||
1323 | else if (GNUNET_YES == needs_parentheses (&R_temp_kk)) | ||
1324 | sb_printf2 (R_cur_r, "(%.*s)*%.*s", 3, &R_temp_kk, R_last_kj); | ||
1325 | else | ||
1326 | sb_printf2 (R_cur_r, "%.*s*%.*s", 1, &R_temp_kk, R_last_kj); | ||
1327 | |||
1328 | R_cur_l->null_flag = GNUNET_YES; | ||
1329 | } | ||
1330 | else if ((0 == ij_ik_cmp) && (0 == kk_kj_cmp) && | ||
1331 | (! has_epsilon (R_last_ij)) && has_epsilon (R_last_kk)) | ||
1332 | { | ||
1333 | /* a|a(e|b)*(e|b) = a|ab* = a|a|ab|abb|abbb|... = ab* */ | ||
1334 | if (needs_parentheses (&R_temp_kk)) | ||
1335 | sb_printf2 (R_cur_r, "%.*s(%.*s)*", 3, R_last_ij, &R_temp_kk); | ||
1336 | else | ||
1337 | sb_printf2 (R_cur_r, "%.*s%.*s*", 1, R_last_ij, &R_temp_kk); | ||
1338 | R_cur_l->null_flag = GNUNET_YES; | ||
1339 | } | ||
1340 | else if ((0 == ij_kj_cmp) && (0 == ik_kk_cmp) && | ||
1341 | (! has_epsilon (R_last_ij)) && has_epsilon (R_last_kk)) | ||
1342 | { | ||
1343 | /* a|(e|b)(e|b)*a = a|b*a = a|a|ba|bba|bbba|... = b*a */ | ||
1344 | if (needs_parentheses (&R_temp_kk)) | ||
1345 | sb_printf2 (R_cur_r, "(%.*s)*%.*s", 3, &R_temp_kk, R_last_ij); | ||
1346 | else | ||
1347 | sb_printf2 (R_cur_r, "%.*s*%.*s", 1, &R_temp_kk, R_last_ij); | ||
1348 | R_cur_l->null_flag = GNUNET_YES; | ||
1349 | } | ||
1350 | else | ||
1351 | { | ||
1352 | sb_strdup (R_cur_l, R_last_ij); | ||
1353 | remove_parentheses (R_cur_l); | ||
1354 | } | ||
1355 | } | ||
1356 | else | ||
1357 | { | ||
1358 | /* we have no left side */ | ||
1359 | R_cur_l->null_flag = GNUNET_YES; | ||
1360 | } | ||
1361 | |||
1362 | /* construct R_cur_r, if not already constructed */ | ||
1363 | if (GNUNET_YES == R_cur_r->null_flag) | ||
1364 | { | ||
1365 | length = R_temp_kk.slen - R_last_ik->slen; | ||
1366 | |||
1367 | /* a(ba)*bx = (ab)+x */ | ||
1368 | if ((length > 0) && (GNUNET_YES != R_last_kk->null_flag) && | ||
1369 | (0 < R_last_kk->slen) && (GNUNET_YES != R_last_kj->null_flag) && | ||
1370 | (0 < R_last_kj->slen) && (GNUNET_YES != R_last_ik->null_flag) && | ||
1371 | (0 < R_last_ik->slen) && | ||
1372 | (0 == sb_strkcmp (&R_temp_kk, R_last_ik, length)) && | ||
1373 | (0 == sb_strncmp (&R_temp_kk, R_last_kj, length))) | ||
1374 | { | ||
1375 | struct StringBuffer temp_a; | ||
1376 | struct StringBuffer temp_b; | ||
1377 | |||
1378 | sb_init (&temp_a, length); | ||
1379 | sb_init (&temp_b, R_last_kj->slen - length); | ||
1380 | |||
1381 | length_l = length; | ||
1382 | temp_a.sbuf = temp_a.abuf; | ||
1383 | GNUNET_memcpy (temp_a.sbuf, R_last_kj->sbuf, length_l); | ||
1384 | temp_a.slen = length_l; | ||
1385 | |||
1386 | length_r = R_last_kj->slen - length; | ||
1387 | temp_b.sbuf = temp_b.abuf; | ||
1388 | GNUNET_memcpy (temp_b.sbuf, &R_last_kj->sbuf[length], length_r); | ||
1389 | temp_b.slen = length_r; | ||
1390 | |||
1391 | /* e|(ab)+ = (ab)* */ | ||
1392 | if ((GNUNET_YES != R_cur_l->null_flag) && (0 == R_cur_l->slen) && | ||
1393 | (0 == temp_b.slen)) | ||
1394 | { | ||
1395 | sb_printf2 (R_cur_r, "(%.*s%.*s)*", 3, R_last_ik, &temp_a); | ||
1396 | sb_free (R_cur_l); | ||
1397 | R_cur_l->null_flag = GNUNET_YES; | ||
1398 | } | ||
1399 | else | ||
1400 | { | ||
1401 | sb_printf3 (R_cur_r, "(%.*s%.*s)+%.*s", 3, R_last_ik, &temp_a, &temp_b); | ||
1402 | } | ||
1403 | sb_free (&temp_a); | ||
1404 | sb_free (&temp_b); | ||
1405 | } | ||
1406 | else if ((0 == sb_strcmp (&R_temp_ik, &R_temp_kk)) && | ||
1407 | (0 == sb_strcmp (&R_temp_kk, &R_temp_kj)) ) | ||
1408 | { | ||
1409 | /* | ||
1410 | * (e|a)a*(e|a) = a* | ||
1411 | * (e|a)(e|a)*(e|a) = a* | ||
1412 | */ | ||
1413 | if (has_epsilon (R_last_ik) && has_epsilon (R_last_kj)) | ||
1414 | { | ||
1415 | if (needs_parentheses (&R_temp_kk)) | ||
1416 | sb_printf1 (R_cur_r, "(%.*s)*", 3, &R_temp_kk); | ||
1417 | else | ||
1418 | sb_printf1 (R_cur_r, "%.*s*", 1, &R_temp_kk); | ||
1419 | } | ||
1420 | /* aa*a = a+a */ | ||
1421 | else if ((0 == clean_ik_kk_cmp) && (0 == clean_kk_kj_cmp) && | ||
1422 | (! has_epsilon (R_last_ik))) | ||
1423 | { | ||
1424 | if (needs_parentheses (&R_temp_kk)) | ||
1425 | sb_printf2 (R_cur_r, "(%.*s)+%.*s", 3, &R_temp_kk, &R_temp_kk); | ||
1426 | else | ||
1427 | sb_printf2 (R_cur_r, "%.*s+%.*s", 1, &R_temp_kk, &R_temp_kk); | ||
1428 | } | ||
1429 | /* | ||
1430 | * (e|a)a*a = a+ | ||
1431 | * aa*(e|a) = a+ | ||
1432 | * a(e|a)*(e|a) = a+ | ||
1433 | * (e|a)a*a = a+ | ||
1434 | */else | ||
1435 | { | ||
1436 | eps_check = (has_epsilon (R_last_ik) + has_epsilon (R_last_kk) | ||
1437 | + has_epsilon (R_last_kj)); | ||
1438 | |||
1439 | if (1 == eps_check) | ||
1440 | { | ||
1441 | if (needs_parentheses (&R_temp_kk)) | ||
1442 | sb_printf1 (R_cur_r, "(%.*s)+", 3, &R_temp_kk); | ||
1443 | else | ||
1444 | sb_printf1 (R_cur_r, "%.*s+", 1, &R_temp_kk); | ||
1445 | } | ||
1446 | } | ||
1447 | } | ||
1448 | /* | ||
1449 | * aa*b = a+b | ||
1450 | * (e|a)(e|a)*b = a*b | ||
1451 | */ | ||
1452 | else if (0 == sb_strcmp (&R_temp_ik, &R_temp_kk)) | ||
1453 | { | ||
1454 | if (has_epsilon (R_last_ik)) | ||
1455 | { | ||
1456 | if (needs_parentheses (&R_temp_kk)) | ||
1457 | sb_printf2 (R_cur_r, "(%.*s)*%.*s", 3, &R_temp_kk, R_last_kj); | ||
1458 | else | ||
1459 | sb_printf2 (R_cur_r, "%.*s*%.*s", 1, &R_temp_kk, R_last_kj); | ||
1460 | } | ||
1461 | else | ||
1462 | { | ||
1463 | if (needs_parentheses (&R_temp_kk)) | ||
1464 | sb_printf2 (R_cur_r, "(%.*s)+%.*s", 3, &R_temp_kk, R_last_kj); | ||
1465 | else | ||
1466 | sb_printf2 (R_cur_r, "%.*s+%.*s", 1, &R_temp_kk, R_last_kj); | ||
1467 | } | ||
1468 | } | ||
1469 | /* | ||
1470 | * ba*a = ba+ | ||
1471 | * b(e|a)*(e|a) = ba* | ||
1472 | */ | ||
1473 | else if (0 == sb_strcmp (&R_temp_kk, &R_temp_kj)) | ||
1474 | { | ||
1475 | if (has_epsilon (R_last_kj)) | ||
1476 | { | ||
1477 | if (needs_parentheses (&R_temp_kk)) | ||
1478 | sb_printf2 (R_cur_r, "%.*s(%.*s)*", 3, R_last_ik, &R_temp_kk); | ||
1479 | else | ||
1480 | sb_printf2 (R_cur_r, "%.*s%.*s*", 1, R_last_ik, &R_temp_kk); | ||
1481 | } | ||
1482 | else | ||
1483 | { | ||
1484 | if (needs_parentheses (&R_temp_kk)) | ||
1485 | sb_printf2 (R_cur_r, "(%.*s)+%.*s", 3, R_last_ik, &R_temp_kk); | ||
1486 | else | ||
1487 | sb_printf2 (R_cur_r, "%.*s+%.*s", 1, R_last_ik, &R_temp_kk); | ||
1488 | } | ||
1489 | } | ||
1490 | else | ||
1491 | { | ||
1492 | if (0 < R_temp_kk.slen) | ||
1493 | { | ||
1494 | if (needs_parentheses (&R_temp_kk)) | ||
1495 | { | ||
1496 | sb_printf3 (R_cur_r, | ||
1497 | "%.*s(%.*s)*%.*s", | ||
1498 | 3, | ||
1499 | R_last_ik, | ||
1500 | &R_temp_kk, | ||
1501 | R_last_kj); | ||
1502 | } | ||
1503 | else | ||
1504 | { | ||
1505 | sb_printf3 (R_cur_r, | ||
1506 | "%.*s%.*s*%.*s", | ||
1507 | 1, | ||
1508 | R_last_ik, | ||
1509 | &R_temp_kk, | ||
1510 | R_last_kj); | ||
1511 | } | ||
1512 | } | ||
1513 | else | ||
1514 | { | ||
1515 | sb_printf2 (R_cur_r, "%.*s%.*s", 0, R_last_ik, R_last_kj); | ||
1516 | } | ||
1517 | } | ||
1518 | } | ||
1519 | sb_free (&R_temp_ij); | ||
1520 | sb_free (&R_temp_ik); | ||
1521 | sb_free (&R_temp_kk); | ||
1522 | sb_free (&R_temp_kj); | ||
1523 | |||
1524 | if ((GNUNET_YES == R_cur_l->null_flag) && (GNUNET_YES == R_cur_r->null_flag)) | ||
1525 | { | ||
1526 | R_cur_ij->null_flag = GNUNET_YES; | ||
1527 | return; | ||
1528 | } | ||
1529 | |||
1530 | if ((GNUNET_YES != R_cur_l->null_flag) && (GNUNET_YES == R_cur_r->null_flag)) | ||
1531 | { | ||
1532 | struct StringBuffer tmp; | ||
1533 | |||
1534 | tmp = *R_cur_ij; | ||
1535 | *R_cur_ij = *R_cur_l; | ||
1536 | *R_cur_l = tmp; | ||
1537 | return; | ||
1538 | } | ||
1539 | |||
1540 | if ((GNUNET_YES == R_cur_l->null_flag) && (GNUNET_YES != R_cur_r->null_flag)) | ||
1541 | { | ||
1542 | struct StringBuffer tmp; | ||
1543 | |||
1544 | tmp = *R_cur_ij; | ||
1545 | *R_cur_ij = *R_cur_r; | ||
1546 | *R_cur_r = tmp; | ||
1547 | return; | ||
1548 | } | ||
1549 | |||
1550 | if (0 == sb_nullstrcmp (R_cur_l, R_cur_r)) | ||
1551 | { | ||
1552 | struct StringBuffer tmp; | ||
1553 | |||
1554 | tmp = *R_cur_ij; | ||
1555 | *R_cur_ij = *R_cur_l; | ||
1556 | *R_cur_l = tmp; | ||
1557 | return; | ||
1558 | } | ||
1559 | sb_printf2 (R_cur_ij, "(%.*s|%.*s)", 3, R_cur_l, R_cur_r); | ||
1560 | } | ||
1561 | |||
1562 | |||
1563 | /** | ||
1564 | * Create proofs for all states in the given automaton. Implementation of the | ||
1565 | * algorithm described in chapter 3.2.1 of "Automata Theory, Languages, and | ||
1566 | * Computation 3rd Edition" by Hopcroft, Motwani and Ullman. | ||
1567 | * | ||
1568 | * Each state in the automaton gets assigned 'proof' and 'hash' (hash of the | ||
1569 | * proof) fields. The starting state will only have a valid proof/hash if it has | ||
1570 | * any incoming transitions. | ||
1571 | * | ||
1572 | * @param a automaton for which to assign proofs and hashes, must not be NULL | ||
1573 | */ | ||
1574 | static int | ||
1575 | automaton_create_proofs (struct REGEX_INTERNAL_Automaton *a) | ||
1576 | { | ||
1577 | unsigned int n = a->state_count; | ||
1578 | struct REGEX_INTERNAL_State *states[n]; | ||
1579 | struct StringBuffer *R_last; | ||
1580 | struct StringBuffer *R_cur; | ||
1581 | struct StringBuffer R_cur_r; | ||
1582 | struct StringBuffer R_cur_l; | ||
1583 | struct StringBuffer *R_swap; | ||
1584 | struct REGEX_INTERNAL_Transition *t; | ||
1585 | struct StringBuffer complete_regex; | ||
1586 | unsigned int i; | ||
1587 | unsigned int j; | ||
1588 | unsigned int k; | ||
1589 | |||
1590 | R_last = GNUNET_malloc_large (sizeof(struct StringBuffer) * n * n); | ||
1591 | R_cur = GNUNET_malloc_large (sizeof(struct StringBuffer) * n * n); | ||
1592 | if ((NULL == R_last) || (NULL == R_cur)) | ||
1593 | { | ||
1594 | GNUNET_log_strerror (GNUNET_ERROR_TYPE_ERROR, "malloc"); | ||
1595 | GNUNET_free (R_cur); | ||
1596 | GNUNET_free (R_last); | ||
1597 | return GNUNET_SYSERR; | ||
1598 | } | ||
1599 | |||
1600 | /* create depth-first numbering of the states, initializes 'state' */ | ||
1601 | REGEX_INTERNAL_automaton_traverse (a, | ||
1602 | a->start, | ||
1603 | NULL, | ||
1604 | NULL, | ||
1605 | &number_states, | ||
1606 | states); | ||
1607 | |||
1608 | for (i = 0; i < n; i++) | ||
1609 | GNUNET_assert (NULL != states[i]); | ||
1610 | for (i = 0; i < n; i++) | ||
1611 | for (j = 0; j < n; j++) | ||
1612 | R_last[i * n + j].null_flag = GNUNET_YES; | ||
1613 | |||
1614 | /* Compute regular expressions of length "1" between each pair of states */ | ||
1615 | for (i = 0; i < n; i++) | ||
1616 | { | ||
1617 | for (t = states[i]->transitions_head; NULL != t; t = t->next) | ||
1618 | { | ||
1619 | j = t->to_state->dfs_id; | ||
1620 | if (GNUNET_YES == R_last[i * n + j].null_flag) | ||
1621 | { | ||
1622 | sb_strdup_cstr (&R_last[i * n + j], t->label); | ||
1623 | } | ||
1624 | else | ||
1625 | { | ||
1626 | sb_append_cstr (&R_last[i * n + j], "|"); | ||
1627 | sb_append_cstr (&R_last[i * n + j], t->label); | ||
1628 | } | ||
1629 | } | ||
1630 | /* add self-loop: i is reachable from i via epsilon-transition */ | ||
1631 | if (GNUNET_YES == R_last[i * n + i].null_flag) | ||
1632 | { | ||
1633 | R_last[i * n + i].slen = 0; | ||
1634 | R_last[i * n + i].null_flag = GNUNET_NO; | ||
1635 | } | ||
1636 | else | ||
1637 | { | ||
1638 | sb_wrap (&R_last[i * n + i], "(|%.*s)", 3); | ||
1639 | } | ||
1640 | } | ||
1641 | for (i = 0; i < n; i++) | ||
1642 | for (j = 0; j < n; j++) | ||
1643 | if (needs_parentheses (&R_last[i * n + j])) | ||
1644 | sb_wrap (&R_last[i * n + j], "(%.*s)", 2); | ||
1645 | /* Compute regular expressions of length "k" between each pair of states per | ||
1646 | * induction */ | ||
1647 | memset (&R_cur_l, 0, sizeof(struct StringBuffer)); | ||
1648 | memset (&R_cur_r, 0, sizeof(struct StringBuffer)); | ||
1649 | for (k = 0; k < n; k++) | ||
1650 | { | ||
1651 | for (i = 0; i < n; i++) | ||
1652 | { | ||
1653 | for (j = 0; j < n; j++) | ||
1654 | { | ||
1655 | /* Basis for the recursion: | ||
1656 | * $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk} )^* R^{(k-1)}_{kj} | ||
1657 | * R_last == R^{(k-1)}, R_cur == R^{(k)} | ||
1658 | */ | ||
1659 | |||
1660 | /* Create R_cur[i][j] and simplify the expression */ | ||
1661 | automaton_create_proofs_simplify (&R_last[i * n + j], | ||
1662 | &R_last[i * n + k], | ||
1663 | &R_last[k * n + k], | ||
1664 | &R_last[k * n + j], | ||
1665 | &R_cur[i * n + j], | ||
1666 | &R_cur_l, | ||
1667 | &R_cur_r); | ||
1668 | } | ||
1669 | } | ||
1670 | /* set R_last = R_cur */ | ||
1671 | R_swap = R_last; | ||
1672 | R_last = R_cur; | ||
1673 | R_cur = R_swap; | ||
1674 | /* clear 'R_cur' for next iteration */ | ||
1675 | for (i = 0; i < n; i++) | ||
1676 | for (j = 0; j < n; j++) | ||
1677 | R_cur[i * n + j].null_flag = GNUNET_YES; | ||
1678 | } | ||
1679 | sb_free (&R_cur_l); | ||
1680 | sb_free (&R_cur_r); | ||
1681 | /* assign proofs and hashes */ | ||
1682 | for (i = 0; i < n; i++) | ||
1683 | { | ||
1684 | if (GNUNET_YES != R_last[a->start->dfs_id * n + i].null_flag) | ||
1685 | { | ||
1686 | states[i]->proof = GNUNET_strndup (R_last[a->start->dfs_id * n + i].sbuf, | ||
1687 | R_last[a->start->dfs_id * n + i].slen); | ||
1688 | GNUNET_CRYPTO_hash (states[i]->proof, | ||
1689 | strlen (states[i]->proof), | ||
1690 | &states[i]->hash); | ||
1691 | } | ||
1692 | } | ||
1693 | |||
1694 | /* complete regex for whole DFA: union of all pairs (start state/accepting | ||
1695 | * state(s)). */ | ||
1696 | sb_init (&complete_regex, 16 * n); | ||
1697 | for (i = 0; i < n; i++) | ||
1698 | { | ||
1699 | if (states[i]->accepting) | ||
1700 | { | ||
1701 | if ((0 == complete_regex.slen) && | ||
1702 | (0 < R_last[a->start->dfs_id * n + i].slen)) | ||
1703 | { | ||
1704 | sb_append (&complete_regex, &R_last[a->start->dfs_id * n + i]); | ||
1705 | } | ||
1706 | else if ((GNUNET_YES != R_last[a->start->dfs_id * n + i].null_flag) && | ||
1707 | (0 < R_last[a->start->dfs_id * n + i].slen)) | ||
1708 | { | ||
1709 | sb_append_cstr (&complete_regex, "|"); | ||
1710 | sb_append (&complete_regex, &R_last[a->start->dfs_id * n + i]); | ||
1711 | } | ||
1712 | } | ||
1713 | } | ||
1714 | a->canonical_regex = | ||
1715 | GNUNET_strndup (complete_regex.sbuf, complete_regex.slen); | ||
1716 | |||
1717 | /* cleanup */ | ||
1718 | sb_free (&complete_regex); | ||
1719 | for (i = 0; i < n; i++) | ||
1720 | for (j = 0; j < n; j++) | ||
1721 | { | ||
1722 | sb_free (&R_cur[i * n + j]); | ||
1723 | sb_free (&R_last[i * n + j]); | ||
1724 | } | ||
1725 | GNUNET_free (R_cur); | ||
1726 | GNUNET_free (R_last); | ||
1727 | return GNUNET_OK; | ||
1728 | } | ||
1729 | |||
1730 | |||
1731 | /** | ||
1732 | * Creates a new DFA state based on a set of NFA states. Needs to be freed using | ||
1733 | * automaton_destroy_state. | ||
1734 | * | ||
1735 | * @param ctx context | ||
1736 | * @param nfa_states set of NFA states on which the DFA should be based on | ||
1737 | * | ||
1738 | * @return new DFA state | ||
1739 | */ | ||
1740 | static struct REGEX_INTERNAL_State * | ||
1741 | dfa_state_create (struct REGEX_INTERNAL_Context *ctx, | ||
1742 | struct REGEX_INTERNAL_StateSet *nfa_states) | ||
1743 | { | ||
1744 | struct REGEX_INTERNAL_State *s; | ||
1745 | char *pos; | ||
1746 | size_t len; | ||
1747 | struct REGEX_INTERNAL_State *cstate; | ||
1748 | struct REGEX_INTERNAL_Transition *ctran; | ||
1749 | unsigned int i; | ||
1750 | |||
1751 | s = GNUNET_new (struct REGEX_INTERNAL_State); | ||
1752 | s->id = ctx->state_id++; | ||
1753 | s->index = -1; | ||
1754 | s->lowlink = -1; | ||
1755 | |||
1756 | if (NULL == nfa_states) | ||
1757 | { | ||
1758 | GNUNET_asprintf (&s->name, "s%i", s->id); | ||
1759 | return s; | ||
1760 | } | ||
1761 | |||
1762 | s->nfa_set = *nfa_states; | ||
1763 | |||
1764 | if (nfa_states->off < 1) | ||
1765 | return s; | ||
1766 | |||
1767 | /* Create a name based on 'nfa_states' */ | ||
1768 | len = nfa_states->off * 14 + 4; | ||
1769 | s->name = GNUNET_malloc (len); | ||
1770 | strcat (s->name, "{"); | ||
1771 | pos = s->name + 1; | ||
1772 | |||
1773 | for (i = 0; i < nfa_states->off; i++) | ||
1774 | { | ||
1775 | cstate = nfa_states->states[i]; | ||
1776 | GNUNET_snprintf (pos, pos - s->name + len, "%i,", cstate->id); | ||
1777 | pos += strlen (pos); | ||
1778 | |||
1779 | /* Add a transition for each distinct label to NULL state */ | ||
1780 | for (ctran = cstate->transitions_head; NULL != ctran; ctran = ctran->next) | ||
1781 | if (NULL != ctran->label) | ||
1782 | state_add_transition (ctx, s, ctran->label, NULL); | ||
1783 | |||
1784 | /* If the nfa_states contain an accepting state, the new dfa state is also | ||
1785 | * accepting. */ | ||
1786 | if (cstate->accepting) | ||
1787 | s->accepting = 1; | ||
1788 | } | ||
1789 | pos[-1] = '}'; | ||
1790 | s->name = GNUNET_realloc (s->name, strlen (s->name) + 1); | ||
1791 | |||
1792 | memset (nfa_states, 0, sizeof(struct REGEX_INTERNAL_StateSet)); | ||
1793 | return s; | ||
1794 | } | ||
1795 | |||
1796 | |||
1797 | /** | ||
1798 | * Move from the given state 's' to the next state on transition 'str'. Consumes | ||
1799 | * as much of the given 'str' as possible (useful for strided DFAs). On return | ||
1800 | * 's' will point to the next state, and the length of the substring used for | ||
1801 | * this transition will be returned. If no transition possible 0 is returned and | ||
1802 | * 's' points to NULL. | ||
1803 | * | ||
1804 | * @param s starting state, will point to the next state or NULL (if no | ||
1805 | * transition possible) | ||
1806 | * @param str edge label to follow (will match longest common prefix) | ||
1807 | * | ||
1808 | * @return length of the substring consumed from 'str' | ||
1809 | */ | ||
1810 | static unsigned int | ||
1811 | dfa_move (struct REGEX_INTERNAL_State **s, const char *str) | ||
1812 | { | ||
1813 | struct REGEX_INTERNAL_Transition *t; | ||
1814 | struct REGEX_INTERNAL_State *new_s; | ||
1815 | unsigned int len; | ||
1816 | unsigned int max_len; | ||
1817 | |||
1818 | if (NULL == s) | ||
1819 | return 0; | ||
1820 | |||
1821 | new_s = NULL; | ||
1822 | max_len = 0; | ||
1823 | for (t = (*s)->transitions_head; NULL != t; t = t->next) | ||
1824 | { | ||
1825 | len = strlen (t->label); | ||
1826 | |||
1827 | if (0 == strncmp (t->label, str, len)) | ||
1828 | { | ||
1829 | if (len >= max_len) | ||
1830 | { | ||
1831 | max_len = len; | ||
1832 | new_s = t->to_state; | ||
1833 | } | ||
1834 | } | ||
1835 | } | ||
1836 | |||
1837 | *s = new_s; | ||
1838 | return max_len; | ||
1839 | } | ||
1840 | |||
1841 | |||
1842 | /** | ||
1843 | * Set the given state 'marked' to #GNUNET_YES. Used by the | ||
1844 | * #dfa_remove_unreachable_states() function to detect unreachable states in the | ||
1845 | * automaton. | ||
1846 | * | ||
1847 | * @param cls closure, not used. | ||
1848 | * @param count count, not used. | ||
1849 | * @param s state where the marked attribute will be set to #GNUNET_YES. | ||
1850 | */ | ||
1851 | static void | ||
1852 | mark_states (void *cls, | ||
1853 | const unsigned int count, | ||
1854 | struct REGEX_INTERNAL_State *s) | ||
1855 | { | ||
1856 | s->marked = GNUNET_YES; | ||
1857 | } | ||
1858 | |||
1859 | |||
1860 | /** | ||
1861 | * Remove all unreachable states from DFA 'a'. Unreachable states are those | ||
1862 | * states that are not reachable from the starting state. | ||
1863 | * | ||
1864 | * @param a DFA automaton | ||
1865 | */ | ||
1866 | static void | ||
1867 | dfa_remove_unreachable_states (struct REGEX_INTERNAL_Automaton *a) | ||
1868 | { | ||
1869 | struct REGEX_INTERNAL_State *s; | ||
1870 | struct REGEX_INTERNAL_State *s_next; | ||
1871 | |||
1872 | /* 1. unmark all states */ | ||
1873 | for (s = a->states_head; NULL != s; s = s->next) | ||
1874 | s->marked = GNUNET_NO; | ||
1875 | |||
1876 | /* 2. traverse dfa from start state and mark all visited states */ | ||
1877 | REGEX_INTERNAL_automaton_traverse (a, | ||
1878 | a->start, | ||
1879 | NULL, | ||
1880 | NULL, | ||
1881 | &mark_states, | ||
1882 | NULL); | ||
1883 | |||
1884 | /* 3. delete all states that were not visited */ | ||
1885 | for (s = a->states_head; NULL != s; s = s_next) | ||
1886 | { | ||
1887 | s_next = s->next; | ||
1888 | if (GNUNET_NO == s->marked) | ||
1889 | automaton_remove_state (a, s); | ||
1890 | } | ||
1891 | } | ||
1892 | |||
1893 | |||
1894 | /** | ||
1895 | * Remove all dead states from the DFA 'a'. Dead states are those states that do | ||
1896 | * not transition to any other state but themselves. | ||
1897 | * | ||
1898 | * @param a DFA automaton | ||
1899 | */ | ||
1900 | static void | ||
1901 | dfa_remove_dead_states (struct REGEX_INTERNAL_Automaton *a) | ||
1902 | { | ||
1903 | struct REGEX_INTERNAL_State *s; | ||
1904 | struct REGEX_INTERNAL_State *s_next; | ||
1905 | struct REGEX_INTERNAL_Transition *t; | ||
1906 | int dead; | ||
1907 | |||
1908 | GNUNET_assert (DFA == a->type); | ||
1909 | |||
1910 | for (s = a->states_head; NULL != s; s = s_next) | ||
1911 | { | ||
1912 | s_next = s->next; | ||
1913 | |||
1914 | if (s->accepting) | ||
1915 | continue; | ||
1916 | |||
1917 | dead = 1; | ||
1918 | for (t = s->transitions_head; NULL != t; t = t->next) | ||
1919 | { | ||
1920 | if ((NULL != t->to_state) && (t->to_state != s) ) | ||
1921 | { | ||
1922 | dead = 0; | ||
1923 | break; | ||
1924 | } | ||
1925 | } | ||
1926 | |||
1927 | if (0 == dead) | ||
1928 | continue; | ||
1929 | |||
1930 | /* state s is dead, remove it */ | ||
1931 | automaton_remove_state (a, s); | ||
1932 | } | ||
1933 | } | ||
1934 | |||
1935 | |||
1936 | /** | ||
1937 | * Merge all non distinguishable states in the DFA 'a' | ||
1938 | * | ||
1939 | * @param ctx context | ||
1940 | * @param a DFA automaton | ||
1941 | * @return #GNUNET_OK on success | ||
1942 | */ | ||
1943 | static int | ||
1944 | dfa_merge_nondistinguishable_states (struct REGEX_INTERNAL_Context *ctx, | ||
1945 | struct REGEX_INTERNAL_Automaton *a) | ||
1946 | { | ||
1947 | uint32_t *table; | ||
1948 | struct REGEX_INTERNAL_State *s1; | ||
1949 | struct REGEX_INTERNAL_State *s2; | ||
1950 | struct REGEX_INTERNAL_Transition *t1; | ||
1951 | struct REGEX_INTERNAL_Transition *t2; | ||
1952 | struct REGEX_INTERNAL_State *s1_next; | ||
1953 | struct REGEX_INTERNAL_State *s2_next; | ||
1954 | int change; | ||
1955 | unsigned int num_equal_edges; | ||
1956 | unsigned int i; | ||
1957 | unsigned int state_cnt; | ||
1958 | unsigned long long idx; | ||
1959 | unsigned long long idx1; | ||
1960 | |||
1961 | if ((NULL == a) || (0 == a->state_count)) | ||
1962 | { | ||
1963 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
1964 | "Could not merge nondistinguishable states, automaton was NULL.\n"); | ||
1965 | return GNUNET_SYSERR; | ||
1966 | } | ||
1967 | |||
1968 | state_cnt = a->state_count; | ||
1969 | table = GNUNET_malloc_large ( | ||
1970 | (sizeof(uint32_t) * state_cnt * state_cnt / 32) + sizeof(uint32_t)); | ||
1971 | if (NULL == table) | ||
1972 | { | ||
1973 | GNUNET_log_strerror (GNUNET_ERROR_TYPE_ERROR, "malloc"); | ||
1974 | return GNUNET_SYSERR; | ||
1975 | } | ||
1976 | |||
1977 | for (i = 0, s1 = a->states_head; NULL != s1; s1 = s1->next) | ||
1978 | s1->marked = i++; | ||
1979 | |||
1980 | /* Mark all pairs of accepting/!accepting states */ | ||
1981 | for (s1 = a->states_head; NULL != s1; s1 = s1->next) | ||
1982 | for (s2 = a->states_head; NULL != s2; s2 = s2->next) | ||
1983 | if ((s1->accepting && ! s2->accepting) || | ||
1984 | (! s1->accepting && s2->accepting)) | ||
1985 | { | ||
1986 | idx = (unsigned long long) s1->marked * state_cnt + s2->marked; | ||
1987 | table[idx / 32] |= (1U << (idx % 32)); | ||
1988 | } | ||
1989 | |||
1990 | /* Find all equal states */ | ||
1991 | change = 1; | ||
1992 | while (0 != change) | ||
1993 | { | ||
1994 | change = 0; | ||
1995 | for (s1 = a->states_head; NULL != s1; s1 = s1->next) | ||
1996 | { | ||
1997 | for (s2 = a->states_head; NULL != s2 && s1 != s2; s2 = s2->next) | ||
1998 | { | ||
1999 | idx = (unsigned long long) s1->marked * state_cnt + s2->marked; | ||
2000 | if (0 != (table[idx / 32] & (1U << (idx % 32)))) | ||
2001 | continue; | ||
2002 | num_equal_edges = 0; | ||
2003 | for (t1 = s1->transitions_head; NULL != t1; t1 = t1->next) | ||
2004 | { | ||
2005 | for (t2 = s2->transitions_head; NULL != t2; t2 = t2->next) | ||
2006 | { | ||
2007 | if (0 == strcmp (t1->label, t2->label)) | ||
2008 | { | ||
2009 | num_equal_edges++; | ||
2010 | /* same edge, but targets definitively different, so we're different | ||
2011 | as well */ | ||
2012 | if (t1->to_state->marked > t2->to_state->marked) | ||
2013 | idx1 = (unsigned long long) t1->to_state->marked * state_cnt | ||
2014 | + t2->to_state->marked; | ||
2015 | else | ||
2016 | idx1 = (unsigned long long) t2->to_state->marked * state_cnt | ||
2017 | + t1->to_state->marked; | ||
2018 | if (0 != (table[idx1 / 32] & (1U << (idx1 % 32)))) | ||
2019 | { | ||
2020 | table[idx / 32] |= (1U << (idx % 32)); | ||
2021 | change = 1; /* changed a marker, need to run again */ | ||
2022 | } | ||
2023 | } | ||
2024 | } | ||
2025 | } | ||
2026 | if ((num_equal_edges != s1->transition_count) || | ||
2027 | (num_equal_edges != s2->transition_count)) | ||
2028 | { | ||
2029 | /* Make sure ALL edges of possible equal states are the same */ | ||
2030 | table[idx / 32] |= (1U << (idx % 32)); | ||
2031 | change = 1; /* changed a marker, need to run again */ | ||
2032 | } | ||
2033 | } | ||
2034 | } | ||
2035 | } | ||
2036 | |||
2037 | /* Merge states that are equal */ | ||
2038 | for (s1 = a->states_head; NULL != s1; s1 = s1_next) | ||
2039 | { | ||
2040 | s1_next = s1->next; | ||
2041 | for (s2 = a->states_head; NULL != s2 && s1 != s2; s2 = s2_next) | ||
2042 | { | ||
2043 | s2_next = s2->next; | ||
2044 | idx = (unsigned long long) s1->marked * state_cnt + s2->marked; | ||
2045 | if (0 == (table[idx / 32] & (1U << (idx % 32)))) | ||
2046 | automaton_merge_states (ctx, a, s1, s2); | ||
2047 | } | ||
2048 | } | ||
2049 | |||
2050 | GNUNET_free (table); | ||
2051 | return GNUNET_OK; | ||
2052 | } | ||
2053 | |||
2054 | |||
2055 | /** | ||
2056 | * Minimize the given DFA 'a' by removing all unreachable states, removing all | ||
2057 | * dead states and merging all non distinguishable states | ||
2058 | * | ||
2059 | * @param ctx context | ||
2060 | * @param a DFA automaton | ||
2061 | * @return GNUNET_OK on success | ||
2062 | */ | ||
2063 | static int | ||
2064 | dfa_minimize (struct REGEX_INTERNAL_Context *ctx, | ||
2065 | struct REGEX_INTERNAL_Automaton *a) | ||
2066 | { | ||
2067 | if (NULL == a) | ||
2068 | return GNUNET_SYSERR; | ||
2069 | |||
2070 | GNUNET_assert (DFA == a->type); | ||
2071 | |||
2072 | /* 1. remove unreachable states */ | ||
2073 | dfa_remove_unreachable_states (a); | ||
2074 | |||
2075 | /* 2. remove dead states */ | ||
2076 | dfa_remove_dead_states (a); | ||
2077 | |||
2078 | /* 3. Merge nondistinguishable states */ | ||
2079 | if (GNUNET_OK != dfa_merge_nondistinguishable_states (ctx, a)) | ||
2080 | return GNUNET_SYSERR; | ||
2081 | return GNUNET_OK; | ||
2082 | } | ||
2083 | |||
2084 | |||
2085 | /** | ||
2086 | * Context for adding strided transitions to a DFA. | ||
2087 | */ | ||
2088 | struct REGEX_INTERNAL_Strided_Context | ||
2089 | { | ||
2090 | /** | ||
2091 | * Length of the strides. | ||
2092 | */ | ||
2093 | const unsigned int stride; | ||
2094 | |||
2095 | /** | ||
2096 | * Strided transitions DLL. New strided transitions will be stored in this DLL | ||
2097 | * and afterwards added to the DFA. | ||
2098 | */ | ||
2099 | struct REGEX_INTERNAL_Transition *transitions_head; | ||
2100 | |||
2101 | /** | ||
2102 | * Strided transitions DLL. | ||
2103 | */ | ||
2104 | struct REGEX_INTERNAL_Transition *transitions_tail; | ||
2105 | }; | ||
2106 | |||
2107 | |||
2108 | /** | ||
2109 | * Recursive helper function to add strides to a DFA. | ||
2110 | * | ||
2111 | * @param cls context, contains stride length and strided transitions DLL. | ||
2112 | * @param depth current depth of the depth-first traversal of the graph. | ||
2113 | * @param label current label, string that contains all labels on the path from | ||
2114 | * 'start' to 's'. | ||
2115 | * @param start start state for the depth-first traversal of the graph. | ||
2116 | * @param s current state in the depth-first traversal | ||
2117 | */ | ||
2118 | static void | ||
2119 | dfa_add_multi_strides_helper (void *cls, | ||
2120 | const unsigned int depth, | ||
2121 | char *label, | ||
2122 | struct REGEX_INTERNAL_State *start, | ||
2123 | struct REGEX_INTERNAL_State *s) | ||
2124 | { | ||
2125 | struct REGEX_INTERNAL_Strided_Context *ctx = cls; | ||
2126 | struct REGEX_INTERNAL_Transition *t; | ||
2127 | char *new_label; | ||
2128 | |||
2129 | if (depth == ctx->stride) | ||
2130 | { | ||
2131 | t = GNUNET_new (struct REGEX_INTERNAL_Transition); | ||
2132 | t->label = GNUNET_strdup (label); | ||
2133 | t->to_state = s; | ||
2134 | t->from_state = start; | ||
2135 | GNUNET_CONTAINER_DLL_insert (ctx->transitions_head, | ||
2136 | ctx->transitions_tail, | ||
2137 | t); | ||
2138 | } | ||
2139 | else | ||
2140 | { | ||
2141 | for (t = s->transitions_head; NULL != t; t = t->next) | ||
2142 | { | ||
2143 | /* Do not consider self-loops, because it end's up in too many | ||
2144 | * transitions */ | ||
2145 | if (t->to_state == t->from_state) | ||
2146 | continue; | ||
2147 | |||
2148 | if (NULL != label) | ||
2149 | { | ||
2150 | GNUNET_asprintf (&new_label, "%s%s", label, t->label); | ||
2151 | } | ||
2152 | else | ||
2153 | new_label = GNUNET_strdup (t->label); | ||
2154 | |||
2155 | dfa_add_multi_strides_helper (cls, | ||
2156 | (depth + 1), | ||
2157 | new_label, | ||
2158 | start, | ||
2159 | t->to_state); | ||
2160 | } | ||
2161 | } | ||
2162 | GNUNET_free (label); | ||
2163 | } | ||
2164 | |||
2165 | |||
2166 | /** | ||
2167 | * Function called for each state in the DFA. Starts a traversal of depth set in | ||
2168 | * context starting from state 's'. | ||
2169 | * | ||
2170 | * @param cls context. | ||
2171 | * @param count not used. | ||
2172 | * @param s current state. | ||
2173 | */ | ||
2174 | static void | ||
2175 | dfa_add_multi_strides (void *cls, | ||
2176 | const unsigned int count, | ||
2177 | struct REGEX_INTERNAL_State *s) | ||
2178 | { | ||
2179 | dfa_add_multi_strides_helper (cls, 0, NULL, s, s); | ||
2180 | } | ||
2181 | |||
2182 | |||
2183 | /** | ||
2184 | * Adds multi-strided transitions to the given 'dfa'. | ||
2185 | * | ||
2186 | * @param regex_ctx regex context needed to add transitions to the automaton. | ||
2187 | * @param dfa DFA to which the multi strided transitions should be added. | ||
2188 | * @param stride_len length of the strides. | ||
2189 | */ | ||
2190 | void | ||
2191 | REGEX_INTERNAL_dfa_add_multi_strides (struct REGEX_INTERNAL_Context *regex_ctx, | ||
2192 | struct REGEX_INTERNAL_Automaton *dfa, | ||
2193 | const unsigned int stride_len) | ||
2194 | { | ||
2195 | struct REGEX_INTERNAL_Strided_Context ctx = { stride_len, NULL, NULL }; | ||
2196 | struct REGEX_INTERNAL_Transition *t; | ||
2197 | struct REGEX_INTERNAL_Transition *t_next; | ||
2198 | |||
2199 | if ((1 > stride_len) || (GNUNET_YES == dfa->is_multistrided)) | ||
2200 | return; | ||
2201 | |||
2202 | /* Compute the new transitions of given stride_len */ | ||
2203 | REGEX_INTERNAL_automaton_traverse (dfa, | ||
2204 | dfa->start, | ||
2205 | NULL, | ||
2206 | NULL, | ||
2207 | &dfa_add_multi_strides, | ||
2208 | &ctx); | ||
2209 | |||
2210 | /* Add all the new transitions to the automaton. */ | ||
2211 | for (t = ctx.transitions_head; NULL != t; t = t_next) | ||
2212 | { | ||
2213 | t_next = t->next; | ||
2214 | state_add_transition (regex_ctx, t->from_state, t->label, t->to_state); | ||
2215 | GNUNET_CONTAINER_DLL_remove (ctx.transitions_head, ctx.transitions_tail, t); | ||
2216 | GNUNET_free (t->label); | ||
2217 | GNUNET_free (t); | ||
2218 | } | ||
2219 | |||
2220 | /* Mark this automaton as multistrided */ | ||
2221 | dfa->is_multistrided = GNUNET_YES; | ||
2222 | } | ||
2223 | |||
2224 | |||
2225 | /** | ||
2226 | * Recursive Helper function for DFA path compression. Does DFS on the DFA graph | ||
2227 | * and adds new transitions to the given transitions DLL and marks states that | ||
2228 | * should be removed by setting state->contained to GNUNET_YES. | ||
2229 | * | ||
2230 | * @param dfa DFA for which the paths should be compressed. | ||
2231 | * @param start starting state for linear path search. | ||
2232 | * @param cur current state in the recursive DFS. | ||
2233 | * @param label current label (string of traversed labels). | ||
2234 | * @param max_len maximal path compression length. | ||
2235 | * @param transitions_head transitions DLL. | ||
2236 | * @param transitions_tail transitions DLL. | ||
2237 | */ | ||
2238 | void | ||
2239 | dfa_compress_paths_helper (struct REGEX_INTERNAL_Automaton *dfa, | ||
2240 | struct REGEX_INTERNAL_State *start, | ||
2241 | struct REGEX_INTERNAL_State *cur, | ||
2242 | char *label, | ||
2243 | unsigned int max_len, | ||
2244 | struct REGEX_INTERNAL_Transition **transitions_head, | ||
2245 | struct REGEX_INTERNAL_Transition **transitions_tail) | ||
2246 | { | ||
2247 | struct REGEX_INTERNAL_Transition *t; | ||
2248 | char *new_label; | ||
2249 | |||
2250 | |||
2251 | if ((NULL != label) && | ||
2252 | (((cur->incoming_transition_count > 1) || (GNUNET_YES == | ||
2253 | cur->accepting) || | ||
2254 | (GNUNET_YES == cur->marked) ) || | ||
2255 | ((start != dfa->start) && (max_len > 0) && (max_len == strlen ( | ||
2256 | label))) || | ||
2257 | ((start == dfa->start) && (GNUNET_REGEX_INITIAL_BYTES == strlen ( | ||
2258 | label))))) | ||
2259 | { | ||
2260 | t = GNUNET_new (struct REGEX_INTERNAL_Transition); | ||
2261 | t->label = GNUNET_strdup (label); | ||
2262 | t->to_state = cur; | ||
2263 | t->from_state = start; | ||
2264 | GNUNET_CONTAINER_DLL_insert (*transitions_head, *transitions_tail, t); | ||
2265 | |||
2266 | if (GNUNET_NO == cur->marked) | ||
2267 | { | ||
2268 | dfa_compress_paths_helper (dfa, | ||
2269 | cur, | ||
2270 | cur, | ||
2271 | NULL, | ||
2272 | max_len, | ||
2273 | transitions_head, | ||
2274 | transitions_tail); | ||
2275 | } | ||
2276 | return; | ||
2277 | } | ||
2278 | else if (cur != start) | ||
2279 | cur->contained = GNUNET_YES; | ||
2280 | |||
2281 | if ((GNUNET_YES == cur->marked) && (cur != start)) | ||
2282 | return; | ||
2283 | |||
2284 | cur->marked = GNUNET_YES; | ||
2285 | |||
2286 | |||
2287 | for (t = cur->transitions_head; NULL != t; t = t->next) | ||
2288 | { | ||
2289 | if (NULL != label) | ||
2290 | GNUNET_asprintf (&new_label, "%s%s", label, t->label); | ||
2291 | else | ||
2292 | new_label = GNUNET_strdup (t->label); | ||
2293 | |||
2294 | if (t->to_state != cur) | ||
2295 | { | ||
2296 | dfa_compress_paths_helper (dfa, | ||
2297 | start, | ||
2298 | t->to_state, | ||
2299 | new_label, | ||
2300 | max_len, | ||
2301 | transitions_head, | ||
2302 | transitions_tail); | ||
2303 | } | ||
2304 | GNUNET_free (new_label); | ||
2305 | } | ||
2306 | } | ||
2307 | |||
2308 | |||
2309 | /** | ||
2310 | * Compress paths in the given 'dfa'. Linear paths like 0->1->2->3 will be | ||
2311 | * compressed to 0->3 by combining transitions. | ||
2312 | * | ||
2313 | * @param regex_ctx context for adding new transitions. | ||
2314 | * @param dfa DFA representation, will directly modify the given DFA. | ||
2315 | * @param max_len maximal length of the compressed paths. | ||
2316 | */ | ||
2317 | static void | ||
2318 | dfa_compress_paths (struct REGEX_INTERNAL_Context *regex_ctx, | ||
2319 | struct REGEX_INTERNAL_Automaton *dfa, | ||
2320 | unsigned int max_len) | ||
2321 | { | ||
2322 | struct REGEX_INTERNAL_State *s; | ||
2323 | struct REGEX_INTERNAL_State *s_next; | ||
2324 | struct REGEX_INTERNAL_Transition *t; | ||
2325 | struct REGEX_INTERNAL_Transition *t_next; | ||
2326 | struct REGEX_INTERNAL_Transition *transitions_head = NULL; | ||
2327 | struct REGEX_INTERNAL_Transition *transitions_tail = NULL; | ||
2328 | |||
2329 | if (NULL == dfa) | ||
2330 | return; | ||
2331 | |||
2332 | /* Count the incoming transitions on each state. */ | ||
2333 | for (s = dfa->states_head; NULL != s; s = s->next) | ||
2334 | { | ||
2335 | for (t = s->transitions_head; NULL != t; t = t->next) | ||
2336 | { | ||
2337 | if (NULL != t->to_state) | ||
2338 | t->to_state->incoming_transition_count++; | ||
2339 | } | ||
2340 | } | ||
2341 | |||
2342 | /* Unmark all states. */ | ||
2343 | for (s = dfa->states_head; NULL != s; s = s->next) | ||
2344 | { | ||
2345 | s->marked = GNUNET_NO; | ||
2346 | s->contained = GNUNET_NO; | ||
2347 | } | ||
2348 | |||
2349 | /* Add strides and mark states that can be deleted. */ | ||
2350 | dfa_compress_paths_helper (dfa, | ||
2351 | dfa->start, | ||
2352 | dfa->start, | ||
2353 | NULL, | ||
2354 | max_len, | ||
2355 | &transitions_head, | ||
2356 | &transitions_tail); | ||
2357 | |||
2358 | /* Add all the new transitions to the automaton. */ | ||
2359 | for (t = transitions_head; NULL != t; t = t_next) | ||
2360 | { | ||
2361 | t_next = t->next; | ||
2362 | state_add_transition (regex_ctx, t->from_state, t->label, t->to_state); | ||
2363 | GNUNET_CONTAINER_DLL_remove (transitions_head, transitions_tail, t); | ||
2364 | GNUNET_free (t->label); | ||
2365 | GNUNET_free (t); | ||
2366 | } | ||
2367 | |||
2368 | /* Remove marked states (including their incoming and outgoing transitions). */ | ||
2369 | for (s = dfa->states_head; NULL != s; s = s_next) | ||
2370 | { | ||
2371 | s_next = s->next; | ||
2372 | if (GNUNET_YES == s->contained) | ||
2373 | automaton_remove_state (dfa, s); | ||
2374 | } | ||
2375 | } | ||
2376 | |||
2377 | |||
2378 | /** | ||
2379 | * Creates a new NFA fragment. Needs to be cleared using | ||
2380 | * automaton_fragment_clear. | ||
2381 | * | ||
2382 | * @param start starting state | ||
2383 | * @param end end state | ||
2384 | * | ||
2385 | * @return new NFA fragment | ||
2386 | */ | ||
2387 | static struct REGEX_INTERNAL_Automaton * | ||
2388 | nfa_fragment_create (struct REGEX_INTERNAL_State *start, | ||
2389 | struct REGEX_INTERNAL_State *end) | ||
2390 | { | ||
2391 | struct REGEX_INTERNAL_Automaton *n; | ||
2392 | |||
2393 | n = GNUNET_new (struct REGEX_INTERNAL_Automaton); | ||
2394 | |||
2395 | n->type = NFA; | ||
2396 | n->start = NULL; | ||
2397 | n->end = NULL; | ||
2398 | n->state_count = 0; | ||
2399 | |||
2400 | if ((NULL == start) || (NULL == end)) | ||
2401 | return n; | ||
2402 | |||
2403 | automaton_add_state (n, end); | ||
2404 | automaton_add_state (n, start); | ||
2405 | |||
2406 | n->state_count = 2; | ||
2407 | |||
2408 | n->start = start; | ||
2409 | n->end = end; | ||
2410 | |||
2411 | return n; | ||
2412 | } | ||
2413 | |||
2414 | |||
2415 | /** | ||
2416 | * Adds a list of states to the given automaton 'n'. | ||
2417 | * | ||
2418 | * @param n automaton to which the states should be added | ||
2419 | * @param states_head head of the DLL of states | ||
2420 | * @param states_tail tail of the DLL of states | ||
2421 | */ | ||
2422 | static void | ||
2423 | nfa_add_states (struct REGEX_INTERNAL_Automaton *n, | ||
2424 | struct REGEX_INTERNAL_State *states_head, | ||
2425 | struct REGEX_INTERNAL_State *states_tail) | ||
2426 | { | ||
2427 | struct REGEX_INTERNAL_State *s; | ||
2428 | |||
2429 | if ((NULL == n) || (NULL == states_head)) | ||
2430 | { | ||
2431 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not add states\n"); | ||
2432 | return; | ||
2433 | } | ||
2434 | |||
2435 | if (NULL == n->states_head) | ||
2436 | { | ||
2437 | n->states_head = states_head; | ||
2438 | n->states_tail = states_tail; | ||
2439 | return; | ||
2440 | } | ||
2441 | |||
2442 | if (NULL != states_head) | ||
2443 | { | ||
2444 | n->states_tail->next = states_head; | ||
2445 | n->states_tail = states_tail; | ||
2446 | } | ||
2447 | |||
2448 | for (s = states_head; NULL != s; s = s->next) | ||
2449 | n->state_count++; | ||
2450 | } | ||
2451 | |||
2452 | |||
2453 | /** | ||
2454 | * Creates a new NFA state. Needs to be freed using automaton_destroy_state. | ||
2455 | * | ||
2456 | * @param ctx context | ||
2457 | * @param accepting is it an accepting state or not | ||
2458 | * | ||
2459 | * @return new NFA state | ||
2460 | */ | ||
2461 | static struct REGEX_INTERNAL_State * | ||
2462 | nfa_state_create (struct REGEX_INTERNAL_Context *ctx, int accepting) | ||
2463 | { | ||
2464 | struct REGEX_INTERNAL_State *s; | ||
2465 | |||
2466 | s = GNUNET_new (struct REGEX_INTERNAL_State); | ||
2467 | s->id = ctx->state_id++; | ||
2468 | s->accepting = accepting; | ||
2469 | s->marked = GNUNET_NO; | ||
2470 | s->contained = 0; | ||
2471 | s->index = -1; | ||
2472 | s->lowlink = -1; | ||
2473 | s->scc_id = 0; | ||
2474 | s->name = NULL; | ||
2475 | GNUNET_asprintf (&s->name, "s%i", s->id); | ||
2476 | |||
2477 | return s; | ||
2478 | } | ||
2479 | |||
2480 | |||
2481 | /** | ||
2482 | * Calculates the closure set for the given set of states. | ||
2483 | * | ||
2484 | * @param ret set to sorted nfa closure on 'label' (epsilon closure if 'label' is NULL) | ||
2485 | * @param nfa the NFA containing 's' | ||
2486 | * @param states list of states on which to base the closure on | ||
2487 | * @param label transitioning label for which to base the closure on, | ||
2488 | * pass NULL for epsilon transition | ||
2489 | */ | ||
2490 | static void | ||
2491 | nfa_closure_set_create (struct REGEX_INTERNAL_StateSet *ret, | ||
2492 | struct REGEX_INTERNAL_Automaton *nfa, | ||
2493 | struct REGEX_INTERNAL_StateSet *states, | ||
2494 | const char *label) | ||
2495 | { | ||
2496 | struct REGEX_INTERNAL_State *s; | ||
2497 | unsigned int i; | ||
2498 | struct REGEX_INTERNAL_StateSet_MDLL cls_stack; | ||
2499 | struct REGEX_INTERNAL_State *clsstate; | ||
2500 | struct REGEX_INTERNAL_State *currentstate; | ||
2501 | struct REGEX_INTERNAL_Transition *ctran; | ||
2502 | |||
2503 | memset (ret, 0, sizeof(struct REGEX_INTERNAL_StateSet)); | ||
2504 | if (NULL == states) | ||
2505 | return; | ||
2506 | |||
2507 | for (i = 0; i < states->off; i++) | ||
2508 | { | ||
2509 | s = states->states[i]; | ||
2510 | |||
2511 | /* Add start state to closure only for epsilon closure */ | ||
2512 | if (NULL == label) | ||
2513 | state_set_append (ret, s); | ||
2514 | |||
2515 | /* initialize work stack */ | ||
2516 | cls_stack.head = NULL; | ||
2517 | cls_stack.tail = NULL; | ||
2518 | GNUNET_CONTAINER_MDLL_insert (ST, cls_stack.head, cls_stack.tail, s); | ||
2519 | cls_stack.len = 1; | ||
2520 | |||
2521 | while (NULL != (currentstate = cls_stack.tail)) | ||
2522 | { | ||
2523 | GNUNET_CONTAINER_MDLL_remove (ST, | ||
2524 | cls_stack.head, | ||
2525 | cls_stack.tail, | ||
2526 | currentstate); | ||
2527 | cls_stack.len--; | ||
2528 | for (ctran = currentstate->transitions_head; NULL != ctran; | ||
2529 | ctran = ctran->next) | ||
2530 | { | ||
2531 | if (NULL == (clsstate = ctran->to_state)) | ||
2532 | continue; | ||
2533 | if (0 != clsstate->contained) | ||
2534 | continue; | ||
2535 | if (0 != nullstrcmp (label, ctran->label)) | ||
2536 | continue; | ||
2537 | state_set_append (ret, clsstate); | ||
2538 | GNUNET_CONTAINER_MDLL_insert_tail (ST, | ||
2539 | cls_stack.head, | ||
2540 | cls_stack.tail, | ||
2541 | clsstate); | ||
2542 | cls_stack.len++; | ||
2543 | clsstate->contained = 1; | ||
2544 | } | ||
2545 | } | ||
2546 | } | ||
2547 | for (i = 0; i < ret->off; i++) | ||
2548 | ret->states[i]->contained = 0; | ||
2549 | |||
2550 | if (ret->off > 1) | ||
2551 | qsort (ret->states, | ||
2552 | ret->off, | ||
2553 | sizeof(struct REGEX_INTERNAL_State *), | ||
2554 | &state_compare); | ||
2555 | } | ||
2556 | |||
2557 | |||
2558 | /** | ||
2559 | * Pops two NFA fragments (a, b) from the stack and concatenates them (ab) | ||
2560 | * | ||
2561 | * @param ctx context | ||
2562 | */ | ||
2563 | static void | ||
2564 | nfa_add_concatenation (struct REGEX_INTERNAL_Context *ctx) | ||
2565 | { | ||
2566 | struct REGEX_INTERNAL_Automaton *a; | ||
2567 | struct REGEX_INTERNAL_Automaton *b; | ||
2568 | struct REGEX_INTERNAL_Automaton *new_nfa; | ||
2569 | |||
2570 | b = ctx->stack_tail; | ||
2571 | GNUNET_assert (NULL != b); | ||
2572 | GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, b); | ||
2573 | a = ctx->stack_tail; | ||
2574 | GNUNET_assert (NULL != a); | ||
2575 | GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a); | ||
2576 | |||
2577 | state_add_transition (ctx, a->end, NULL, b->start); | ||
2578 | a->end->accepting = 0; | ||
2579 | b->end->accepting = 1; | ||
2580 | |||
2581 | new_nfa = nfa_fragment_create (NULL, NULL); | ||
2582 | nfa_add_states (new_nfa, a->states_head, a->states_tail); | ||
2583 | nfa_add_states (new_nfa, b->states_head, b->states_tail); | ||
2584 | new_nfa->start = a->start; | ||
2585 | new_nfa->end = b->end; | ||
2586 | new_nfa->state_count += a->state_count + b->state_count; | ||
2587 | automaton_fragment_clear (a); | ||
2588 | automaton_fragment_clear (b); | ||
2589 | |||
2590 | GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new_nfa); | ||
2591 | } | ||
2592 | |||
2593 | |||
2594 | /** | ||
2595 | * Pops a NFA fragment from the stack (a) and adds a new fragment (a*) | ||
2596 | * | ||
2597 | * @param ctx context | ||
2598 | */ | ||
2599 | static void | ||
2600 | nfa_add_star_op (struct REGEX_INTERNAL_Context *ctx) | ||
2601 | { | ||
2602 | struct REGEX_INTERNAL_Automaton *a; | ||
2603 | struct REGEX_INTERNAL_Automaton *new_nfa; | ||
2604 | struct REGEX_INTERNAL_State *start; | ||
2605 | struct REGEX_INTERNAL_State *end; | ||
2606 | |||
2607 | a = ctx->stack_tail; | ||
2608 | |||
2609 | if (NULL == a) | ||
2610 | { | ||
2611 | GNUNET_log ( | ||
2612 | GNUNET_ERROR_TYPE_ERROR, | ||
2613 | "nfa_add_star_op failed, because there was no element on the stack"); | ||
2614 | return; | ||
2615 | } | ||
2616 | |||
2617 | GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a); | ||
2618 | |||
2619 | start = nfa_state_create (ctx, 0); | ||
2620 | end = nfa_state_create (ctx, 1); | ||
2621 | |||
2622 | state_add_transition (ctx, start, NULL, a->start); | ||
2623 | state_add_transition (ctx, start, NULL, end); | ||
2624 | state_add_transition (ctx, a->end, NULL, a->start); | ||
2625 | state_add_transition (ctx, a->end, NULL, end); | ||
2626 | |||
2627 | a->end->accepting = 0; | ||
2628 | end->accepting = 1; | ||
2629 | |||
2630 | new_nfa = nfa_fragment_create (start, end); | ||
2631 | nfa_add_states (new_nfa, a->states_head, a->states_tail); | ||
2632 | automaton_fragment_clear (a); | ||
2633 | |||
2634 | GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new_nfa); | ||
2635 | } | ||
2636 | |||
2637 | |||
2638 | /** | ||
2639 | * Pops an NFA fragment (a) from the stack and adds a new fragment (a+) | ||
2640 | * | ||
2641 | * @param ctx context | ||
2642 | */ | ||
2643 | static void | ||
2644 | nfa_add_plus_op (struct REGEX_INTERNAL_Context *ctx) | ||
2645 | { | ||
2646 | struct REGEX_INTERNAL_Automaton *a; | ||
2647 | |||
2648 | a = ctx->stack_tail; | ||
2649 | |||
2650 | if (NULL == a) | ||
2651 | { | ||
2652 | GNUNET_log ( | ||
2653 | GNUNET_ERROR_TYPE_ERROR, | ||
2654 | "nfa_add_plus_op failed, because there was no element on the stack"); | ||
2655 | return; | ||
2656 | } | ||
2657 | |||
2658 | GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a); | ||
2659 | |||
2660 | state_add_transition (ctx, a->end, NULL, a->start); | ||
2661 | |||
2662 | GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, a); | ||
2663 | } | ||
2664 | |||
2665 | |||
2666 | /** | ||
2667 | * Pops an NFA fragment (a) from the stack and adds a new fragment (a?) | ||
2668 | * | ||
2669 | * @param ctx context | ||
2670 | */ | ||
2671 | static void | ||
2672 | nfa_add_question_op (struct REGEX_INTERNAL_Context *ctx) | ||
2673 | { | ||
2674 | struct REGEX_INTERNAL_Automaton *a; | ||
2675 | struct REGEX_INTERNAL_Automaton *new_nfa; | ||
2676 | struct REGEX_INTERNAL_State *start; | ||
2677 | struct REGEX_INTERNAL_State *end; | ||
2678 | |||
2679 | a = ctx->stack_tail; | ||
2680 | if (NULL == a) | ||
2681 | { | ||
2682 | GNUNET_log ( | ||
2683 | GNUNET_ERROR_TYPE_ERROR, | ||
2684 | "nfa_add_question_op failed, because there was no element on the stack"); | ||
2685 | return; | ||
2686 | } | ||
2687 | |||
2688 | GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a); | ||
2689 | |||
2690 | start = nfa_state_create (ctx, 0); | ||
2691 | end = nfa_state_create (ctx, 1); | ||
2692 | |||
2693 | state_add_transition (ctx, start, NULL, a->start); | ||
2694 | state_add_transition (ctx, start, NULL, end); | ||
2695 | state_add_transition (ctx, a->end, NULL, end); | ||
2696 | |||
2697 | a->end->accepting = 0; | ||
2698 | |||
2699 | new_nfa = nfa_fragment_create (start, end); | ||
2700 | nfa_add_states (new_nfa, a->states_head, a->states_tail); | ||
2701 | GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new_nfa); | ||
2702 | automaton_fragment_clear (a); | ||
2703 | } | ||
2704 | |||
2705 | |||
2706 | /** | ||
2707 | * Pops two NFA fragments (a, b) from the stack and adds a new NFA fragment that | ||
2708 | * alternates between a and b (a|b) | ||
2709 | * | ||
2710 | * @param ctx context | ||
2711 | */ | ||
2712 | static void | ||
2713 | nfa_add_alternation (struct REGEX_INTERNAL_Context *ctx) | ||
2714 | { | ||
2715 | struct REGEX_INTERNAL_Automaton *a; | ||
2716 | struct REGEX_INTERNAL_Automaton *b; | ||
2717 | struct REGEX_INTERNAL_Automaton *new_nfa; | ||
2718 | struct REGEX_INTERNAL_State *start; | ||
2719 | struct REGEX_INTERNAL_State *end; | ||
2720 | |||
2721 | b = ctx->stack_tail; | ||
2722 | GNUNET_assert (NULL != b); | ||
2723 | GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, b); | ||
2724 | a = ctx->stack_tail; | ||
2725 | GNUNET_assert (NULL != a); | ||
2726 | GNUNET_CONTAINER_DLL_remove (ctx->stack_head, ctx->stack_tail, a); | ||
2727 | |||
2728 | start = nfa_state_create (ctx, 0); | ||
2729 | end = nfa_state_create (ctx, 1); | ||
2730 | state_add_transition (ctx, start, NULL, a->start); | ||
2731 | state_add_transition (ctx, start, NULL, b->start); | ||
2732 | |||
2733 | state_add_transition (ctx, a->end, NULL, end); | ||
2734 | state_add_transition (ctx, b->end, NULL, end); | ||
2735 | |||
2736 | a->end->accepting = 0; | ||
2737 | b->end->accepting = 0; | ||
2738 | end->accepting = 1; | ||
2739 | |||
2740 | new_nfa = nfa_fragment_create (start, end); | ||
2741 | nfa_add_states (new_nfa, a->states_head, a->states_tail); | ||
2742 | nfa_add_states (new_nfa, b->states_head, b->states_tail); | ||
2743 | automaton_fragment_clear (a); | ||
2744 | automaton_fragment_clear (b); | ||
2745 | |||
2746 | GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, new_nfa); | ||
2747 | } | ||
2748 | |||
2749 | |||
2750 | /** | ||
2751 | * Adds a new nfa fragment to the stack | ||
2752 | * | ||
2753 | * @param ctx context | ||
2754 | * @param label label for nfa transition | ||
2755 | */ | ||
2756 | static void | ||
2757 | nfa_add_label (struct REGEX_INTERNAL_Context *ctx, const char *label) | ||
2758 | { | ||
2759 | struct REGEX_INTERNAL_Automaton *n; | ||
2760 | struct REGEX_INTERNAL_State *start; | ||
2761 | struct REGEX_INTERNAL_State *end; | ||
2762 | |||
2763 | GNUNET_assert (NULL != ctx); | ||
2764 | |||
2765 | start = nfa_state_create (ctx, 0); | ||
2766 | end = nfa_state_create (ctx, 1); | ||
2767 | state_add_transition (ctx, start, label, end); | ||
2768 | n = nfa_fragment_create (start, end); | ||
2769 | GNUNET_assert (NULL != n); | ||
2770 | GNUNET_CONTAINER_DLL_insert_tail (ctx->stack_head, ctx->stack_tail, n); | ||
2771 | } | ||
2772 | |||
2773 | |||
2774 | /** | ||
2775 | * Initialize a new context | ||
2776 | * | ||
2777 | * @param ctx context | ||
2778 | */ | ||
2779 | static void | ||
2780 | REGEX_INTERNAL_context_init (struct REGEX_INTERNAL_Context *ctx) | ||
2781 | { | ||
2782 | if (NULL == ctx) | ||
2783 | { | ||
2784 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Context was NULL!"); | ||
2785 | return; | ||
2786 | } | ||
2787 | ctx->state_id = 0; | ||
2788 | ctx->transition_id = 0; | ||
2789 | ctx->stack_head = NULL; | ||
2790 | ctx->stack_tail = NULL; | ||
2791 | } | ||
2792 | |||
2793 | |||
2794 | /** | ||
2795 | * Construct an NFA by parsing the regex string of length 'len'. | ||
2796 | * | ||
2797 | * @param regex regular expression string | ||
2798 | * @param len length of the string | ||
2799 | * | ||
2800 | * @return NFA, needs to be freed using REGEX_INTERNAL_destroy_automaton | ||
2801 | */ | ||
2802 | struct REGEX_INTERNAL_Automaton * | ||
2803 | REGEX_INTERNAL_construct_nfa (const char *regex, const size_t len) | ||
2804 | { | ||
2805 | struct REGEX_INTERNAL_Context ctx; | ||
2806 | struct REGEX_INTERNAL_Automaton *nfa; | ||
2807 | const char *regexp; | ||
2808 | char curlabel[2]; | ||
2809 | char *error_msg; | ||
2810 | unsigned int count; | ||
2811 | unsigned int altcount; | ||
2812 | unsigned int atomcount; | ||
2813 | unsigned int poff; | ||
2814 | unsigned int psize; | ||
2815 | |||
2816 | struct | ||
2817 | { | ||
2818 | int altcount; | ||
2819 | int atomcount; | ||
2820 | } *p; | ||
2821 | |||
2822 | if ((NULL == regex) || (0 == strlen (regex)) || (0 == len)) | ||
2823 | { | ||
2824 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
2825 | "Could not parse regex. Empty regex string provided.\n"); | ||
2826 | |||
2827 | return NULL; | ||
2828 | } | ||
2829 | REGEX_INTERNAL_context_init (&ctx); | ||
2830 | |||
2831 | regexp = regex; | ||
2832 | curlabel[1] = '\0'; | ||
2833 | p = NULL; | ||
2834 | error_msg = NULL; | ||
2835 | altcount = 0; | ||
2836 | atomcount = 0; | ||
2837 | poff = 0; | ||
2838 | psize = 0; | ||
2839 | |||
2840 | for (count = 0; count < len && *regexp; count++, regexp++) | ||
2841 | { | ||
2842 | switch (*regexp) | ||
2843 | { | ||
2844 | case '(': | ||
2845 | if (atomcount > 1) | ||
2846 | { | ||
2847 | --atomcount; | ||
2848 | nfa_add_concatenation (&ctx); | ||
2849 | } | ||
2850 | if (poff == psize) | ||
2851 | GNUNET_array_grow (p, psize, psize * 2 + 4); /* FIXME why *2 +4? */ | ||
2852 | p[poff].altcount = altcount; | ||
2853 | p[poff].atomcount = atomcount; | ||
2854 | poff++; | ||
2855 | altcount = 0; | ||
2856 | atomcount = 0; | ||
2857 | break; | ||
2858 | |||
2859 | case '|': | ||
2860 | if (0 == atomcount) | ||
2861 | { | ||
2862 | error_msg = "Cannot append '|' to nothing"; | ||
2863 | goto error; | ||
2864 | } | ||
2865 | while (--atomcount > 0) | ||
2866 | nfa_add_concatenation (&ctx); | ||
2867 | altcount++; | ||
2868 | break; | ||
2869 | |||
2870 | case ')': | ||
2871 | if (0 == poff) | ||
2872 | { | ||
2873 | error_msg = "Missing opening '('"; | ||
2874 | goto error; | ||
2875 | } | ||
2876 | if (0 == atomcount) | ||
2877 | { | ||
2878 | /* Ignore this: "()" */ | ||
2879 | poff--; | ||
2880 | altcount = p[poff].altcount; | ||
2881 | atomcount = p[poff].atomcount; | ||
2882 | break; | ||
2883 | } | ||
2884 | while (--atomcount > 0) | ||
2885 | nfa_add_concatenation (&ctx); | ||
2886 | for (; altcount > 0; altcount--) | ||
2887 | nfa_add_alternation (&ctx); | ||
2888 | poff--; | ||
2889 | altcount = p[poff].altcount; | ||
2890 | atomcount = p[poff].atomcount; | ||
2891 | atomcount++; | ||
2892 | break; | ||
2893 | |||
2894 | case '*': | ||
2895 | if (atomcount == 0) | ||
2896 | { | ||
2897 | error_msg = "Cannot append '*' to nothing"; | ||
2898 | goto error; | ||
2899 | } | ||
2900 | nfa_add_star_op (&ctx); | ||
2901 | break; | ||
2902 | |||
2903 | case '+': | ||
2904 | if (atomcount == 0) | ||
2905 | { | ||
2906 | error_msg = "Cannot append '+' to nothing"; | ||
2907 | goto error; | ||
2908 | } | ||
2909 | nfa_add_plus_op (&ctx); | ||
2910 | break; | ||
2911 | |||
2912 | case '?': | ||
2913 | if (atomcount == 0) | ||
2914 | { | ||
2915 | error_msg = "Cannot append '?' to nothing"; | ||
2916 | goto error; | ||
2917 | } | ||
2918 | nfa_add_question_op (&ctx); | ||
2919 | break; | ||
2920 | |||
2921 | default: | ||
2922 | if (atomcount > 1) | ||
2923 | { | ||
2924 | --atomcount; | ||
2925 | nfa_add_concatenation (&ctx); | ||
2926 | } | ||
2927 | curlabel[0] = *regexp; | ||
2928 | nfa_add_label (&ctx, curlabel); | ||
2929 | atomcount++; | ||
2930 | break; | ||
2931 | } | ||
2932 | } | ||
2933 | if (0 != poff) | ||
2934 | { | ||
2935 | error_msg = "Unbalanced parenthesis"; | ||
2936 | goto error; | ||
2937 | } | ||
2938 | while (--atomcount > 0) | ||
2939 | nfa_add_concatenation (&ctx); | ||
2940 | for (; altcount > 0; altcount--) | ||
2941 | nfa_add_alternation (&ctx); | ||
2942 | |||
2943 | GNUNET_array_grow (p, psize, 0); | ||
2944 | |||
2945 | nfa = ctx.stack_tail; | ||
2946 | GNUNET_CONTAINER_DLL_remove (ctx.stack_head, ctx.stack_tail, nfa); | ||
2947 | |||
2948 | if (NULL != ctx.stack_head) | ||
2949 | { | ||
2950 | error_msg = "Creating the NFA failed. NFA stack was not empty!"; | ||
2951 | goto error; | ||
2952 | } | ||
2953 | |||
2954 | /* Remember the regex that was used to generate this NFA */ | ||
2955 | nfa->regex = GNUNET_strdup (regex); | ||
2956 | |||
2957 | /* create depth-first numbering of the states for pretty printing */ | ||
2958 | REGEX_INTERNAL_automaton_traverse (nfa, | ||
2959 | NULL, | ||
2960 | NULL, | ||
2961 | NULL, | ||
2962 | &number_states, | ||
2963 | NULL); | ||
2964 | |||
2965 | /* No multistriding added so far */ | ||
2966 | nfa->is_multistrided = GNUNET_NO; | ||
2967 | |||
2968 | return nfa; | ||
2969 | |||
2970 | error: | ||
2971 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not parse regex: `%s'\n", regex); | ||
2972 | if (NULL != error_msg) | ||
2973 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "%s\n", error_msg); | ||
2974 | |||
2975 | GNUNET_free (p); | ||
2976 | |||
2977 | while (NULL != (nfa = ctx.stack_head)) | ||
2978 | { | ||
2979 | GNUNET_CONTAINER_DLL_remove (ctx.stack_head, ctx.stack_tail, nfa); | ||
2980 | REGEX_INTERNAL_automaton_destroy (nfa); | ||
2981 | } | ||
2982 | |||
2983 | return NULL; | ||
2984 | } | ||
2985 | |||
2986 | |||
2987 | /** | ||
2988 | * Create DFA states based on given 'nfa' and starting with 'dfa_state'. | ||
2989 | * | ||
2990 | * @param ctx context. | ||
2991 | * @param nfa NFA automaton. | ||
2992 | * @param dfa DFA automaton. | ||
2993 | * @param dfa_state current dfa state, pass epsilon closure of first nfa state | ||
2994 | * for starting. | ||
2995 | */ | ||
2996 | static void | ||
2997 | construct_dfa_states (struct REGEX_INTERNAL_Context *ctx, | ||
2998 | struct REGEX_INTERNAL_Automaton *nfa, | ||
2999 | struct REGEX_INTERNAL_Automaton *dfa, | ||
3000 | struct REGEX_INTERNAL_State *dfa_state) | ||
3001 | { | ||
3002 | struct REGEX_INTERNAL_Transition *ctran; | ||
3003 | struct REGEX_INTERNAL_State *new_dfa_state; | ||
3004 | struct REGEX_INTERNAL_State *state_contains; | ||
3005 | struct REGEX_INTERNAL_State *state_iter; | ||
3006 | struct REGEX_INTERNAL_StateSet tmp; | ||
3007 | struct REGEX_INTERNAL_StateSet nfa_set; | ||
3008 | |||
3009 | for (ctran = dfa_state->transitions_head; NULL != ctran; ctran = ctran->next) | ||
3010 | { | ||
3011 | if ((NULL == ctran->label) || (NULL != ctran->to_state) ) | ||
3012 | continue; | ||
3013 | |||
3014 | nfa_closure_set_create (&tmp, nfa, &dfa_state->nfa_set, ctran->label); | ||
3015 | nfa_closure_set_create (&nfa_set, nfa, &tmp, NULL); | ||
3016 | state_set_clear (&tmp); | ||
3017 | |||
3018 | state_contains = NULL; | ||
3019 | for (state_iter = dfa->states_head; NULL != state_iter; | ||
3020 | state_iter = state_iter->next) | ||
3021 | { | ||
3022 | if (0 == state_set_compare (&state_iter->nfa_set, &nfa_set)) | ||
3023 | { | ||
3024 | state_contains = state_iter; | ||
3025 | break; | ||
3026 | } | ||
3027 | } | ||
3028 | if (NULL == state_contains) | ||
3029 | { | ||
3030 | new_dfa_state = dfa_state_create (ctx, &nfa_set); | ||
3031 | automaton_add_state (dfa, new_dfa_state); | ||
3032 | ctran->to_state = new_dfa_state; | ||
3033 | construct_dfa_states (ctx, nfa, dfa, new_dfa_state); | ||
3034 | } | ||
3035 | else | ||
3036 | { | ||
3037 | ctran->to_state = state_contains; | ||
3038 | state_set_clear (&nfa_set); | ||
3039 | } | ||
3040 | } | ||
3041 | } | ||
3042 | |||
3043 | |||
3044 | /** | ||
3045 | * Construct DFA for the given 'regex' of length 'len'. | ||
3046 | * | ||
3047 | * Path compression means, that for example a DFA o -> a -> b -> c -> o will be | ||
3048 | * compressed to o -> abc -> o. Note that this parameter influences the | ||
3049 | * non-determinism of states of the resulting NFA in the DHT (number of outgoing | ||
3050 | * edges with the same label). For example for an application that stores IPv4 | ||
3051 | * addresses as bitstrings it could make sense to limit the path compression to | ||
3052 | * 4 or 8. | ||
3053 | * | ||
3054 | * @param regex regular expression string. | ||
3055 | * @param len length of the regular expression. | ||
3056 | * @param max_path_len limit the path compression length to the | ||
3057 | * given value. If set to 1, no path compression is applied. Set to 0 for | ||
3058 | * maximal possible path compression (generally not desirable). | ||
3059 | * @return DFA, needs to be freed using REGEX_INTERNAL_automaton_destroy. | ||
3060 | */ | ||
3061 | struct REGEX_INTERNAL_Automaton * | ||
3062 | REGEX_INTERNAL_construct_dfa (const char *regex, | ||
3063 | const size_t len, | ||
3064 | unsigned int max_path_len) | ||
3065 | { | ||
3066 | struct REGEX_INTERNAL_Context ctx; | ||
3067 | struct REGEX_INTERNAL_Automaton *dfa; | ||
3068 | struct REGEX_INTERNAL_Automaton *nfa; | ||
3069 | struct REGEX_INTERNAL_StateSet nfa_start_eps_cls; | ||
3070 | struct REGEX_INTERNAL_StateSet singleton_set; | ||
3071 | |||
3072 | REGEX_INTERNAL_context_init (&ctx); | ||
3073 | |||
3074 | /* Create NFA */ | ||
3075 | nfa = REGEX_INTERNAL_construct_nfa (regex, len); | ||
3076 | |||
3077 | if (NULL == nfa) | ||
3078 | { | ||
3079 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
3080 | "Could not create DFA, because NFA creation failed\n"); | ||
3081 | return NULL; | ||
3082 | } | ||
3083 | |||
3084 | dfa = GNUNET_new (struct REGEX_INTERNAL_Automaton); | ||
3085 | dfa->type = DFA; | ||
3086 | dfa->regex = GNUNET_strdup (regex); | ||
3087 | |||
3088 | /* Create DFA start state from epsilon closure */ | ||
3089 | memset (&singleton_set, 0, sizeof(struct REGEX_INTERNAL_StateSet)); | ||
3090 | state_set_append (&singleton_set, nfa->start); | ||
3091 | nfa_closure_set_create (&nfa_start_eps_cls, nfa, &singleton_set, NULL); | ||
3092 | state_set_clear (&singleton_set); | ||
3093 | dfa->start = dfa_state_create (&ctx, &nfa_start_eps_cls); | ||
3094 | automaton_add_state (dfa, dfa->start); | ||
3095 | |||
3096 | construct_dfa_states (&ctx, nfa, dfa, dfa->start); | ||
3097 | REGEX_INTERNAL_automaton_destroy (nfa); | ||
3098 | |||
3099 | /* Minimize DFA */ | ||
3100 | if (GNUNET_OK != dfa_minimize (&ctx, dfa)) | ||
3101 | { | ||
3102 | REGEX_INTERNAL_automaton_destroy (dfa); | ||
3103 | return NULL; | ||
3104 | } | ||
3105 | |||
3106 | /* Create proofs and hashes for all states */ | ||
3107 | if (GNUNET_OK != automaton_create_proofs (dfa)) | ||
3108 | { | ||
3109 | REGEX_INTERNAL_automaton_destroy (dfa); | ||
3110 | return NULL; | ||
3111 | } | ||
3112 | |||
3113 | /* Compress linear DFA paths */ | ||
3114 | if (1 != max_path_len) | ||
3115 | dfa_compress_paths (&ctx, dfa, max_path_len); | ||
3116 | |||
3117 | return dfa; | ||
3118 | } | ||
3119 | |||
3120 | |||
3121 | /** | ||
3122 | * Free the memory allocated by constructing the REGEX_INTERNAL_Automaton data | ||
3123 | * structure. | ||
3124 | * | ||
3125 | * @param a automaton to be destroyed | ||
3126 | */ | ||
3127 | void | ||
3128 | REGEX_INTERNAL_automaton_destroy (struct REGEX_INTERNAL_Automaton *a) | ||
3129 | { | ||
3130 | struct REGEX_INTERNAL_State *s; | ||
3131 | struct REGEX_INTERNAL_State *next_state; | ||
3132 | |||
3133 | if (NULL == a) | ||
3134 | return; | ||
3135 | |||
3136 | GNUNET_free (a->regex); | ||
3137 | GNUNET_free (a->canonical_regex); | ||
3138 | |||
3139 | for (s = a->states_head; NULL != s; s = next_state) | ||
3140 | { | ||
3141 | next_state = s->next; | ||
3142 | GNUNET_CONTAINER_DLL_remove (a->states_head, a->states_tail, s); | ||
3143 | automaton_destroy_state (s); | ||
3144 | } | ||
3145 | |||
3146 | GNUNET_free (a); | ||
3147 | } | ||
3148 | |||
3149 | |||
3150 | /** | ||
3151 | * Evaluates the given string using the given DFA automaton | ||
3152 | * | ||
3153 | * @param a automaton, type must be DFA | ||
3154 | * @param string string that should be evaluated | ||
3155 | * | ||
3156 | * @return 0 if string matches, non-0 otherwise | ||
3157 | */ | ||
3158 | static int | ||
3159 | evaluate_dfa (struct REGEX_INTERNAL_Automaton *a, const char *string) | ||
3160 | { | ||
3161 | const char *strp; | ||
3162 | struct REGEX_INTERNAL_State *s; | ||
3163 | unsigned int step_len; | ||
3164 | |||
3165 | if (DFA != a->type) | ||
3166 | { | ||
3167 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
3168 | "Tried to evaluate DFA, but NFA automaton given"); | ||
3169 | return -1; | ||
3170 | } | ||
3171 | |||
3172 | s = a->start; | ||
3173 | |||
3174 | /* If the string is empty but the starting state is accepting, we accept. */ | ||
3175 | if (((NULL == string) || (0 == strlen (string))) && s->accepting) | ||
3176 | return 0; | ||
3177 | |||
3178 | for (strp = string; NULL != strp && *strp; strp += step_len) | ||
3179 | { | ||
3180 | step_len = dfa_move (&s, strp); | ||
3181 | |||
3182 | if (NULL == s) | ||
3183 | break; | ||
3184 | } | ||
3185 | |||
3186 | if ((NULL != s) && s->accepting) | ||
3187 | return 0; | ||
3188 | |||
3189 | return 1; | ||
3190 | } | ||
3191 | |||
3192 | |||
3193 | /** | ||
3194 | * Evaluates the given string using the given NFA automaton | ||
3195 | * | ||
3196 | * @param a automaton, type must be NFA | ||
3197 | * @param string string that should be evaluated | ||
3198 | * @return 0 if string matches, non-0 otherwise | ||
3199 | */ | ||
3200 | static int | ||
3201 | evaluate_nfa (struct REGEX_INTERNAL_Automaton *a, const char *string) | ||
3202 | { | ||
3203 | const char *strp; | ||
3204 | char str[2]; | ||
3205 | struct REGEX_INTERNAL_State *s; | ||
3206 | struct REGEX_INTERNAL_StateSet sset; | ||
3207 | struct REGEX_INTERNAL_StateSet new_sset; | ||
3208 | struct REGEX_INTERNAL_StateSet singleton_set; | ||
3209 | unsigned int i; | ||
3210 | int result; | ||
3211 | |||
3212 | if (NFA != a->type) | ||
3213 | { | ||
3214 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
3215 | "Tried to evaluate NFA, but DFA automaton given"); | ||
3216 | return -1; | ||
3217 | } | ||
3218 | |||
3219 | /* If the string is empty but the starting state is accepting, we accept. */ | ||
3220 | if (((NULL == string) || (0 == strlen (string))) && a->start->accepting) | ||
3221 | return 0; | ||
3222 | |||
3223 | result = 1; | ||
3224 | memset (&singleton_set, 0, sizeof(struct REGEX_INTERNAL_StateSet)); | ||
3225 | state_set_append (&singleton_set, a->start); | ||
3226 | nfa_closure_set_create (&sset, a, &singleton_set, NULL); | ||
3227 | state_set_clear (&singleton_set); | ||
3228 | |||
3229 | str[1] = '\0'; | ||
3230 | for (strp = string; NULL != strp && *strp; strp++) | ||
3231 | { | ||
3232 | str[0] = *strp; | ||
3233 | nfa_closure_set_create (&new_sset, a, &sset, str); | ||
3234 | state_set_clear (&sset); | ||
3235 | nfa_closure_set_create (&sset, a, &new_sset, 0); | ||
3236 | state_set_clear (&new_sset); | ||
3237 | } | ||
3238 | |||
3239 | for (i = 0; i < sset.off; i++) | ||
3240 | { | ||
3241 | s = sset.states[i]; | ||
3242 | if ((NULL != s) && (s->accepting)) | ||
3243 | { | ||
3244 | result = 0; | ||
3245 | break; | ||
3246 | } | ||
3247 | } | ||
3248 | |||
3249 | state_set_clear (&sset); | ||
3250 | return result; | ||
3251 | } | ||
3252 | |||
3253 | |||
3254 | /** | ||
3255 | * Evaluates the given @a string against the given compiled regex @a a | ||
3256 | * | ||
3257 | * @param a automaton | ||
3258 | * @param string string to check | ||
3259 | * @return 0 if string matches, non-0 otherwise | ||
3260 | */ | ||
3261 | int | ||
3262 | REGEX_INTERNAL_eval (struct REGEX_INTERNAL_Automaton *a, const char *string) | ||
3263 | { | ||
3264 | int result; | ||
3265 | |||
3266 | switch (a->type) | ||
3267 | { | ||
3268 | case DFA: | ||
3269 | result = evaluate_dfa (a, string); | ||
3270 | break; | ||
3271 | |||
3272 | case NFA: | ||
3273 | result = evaluate_nfa (a, string); | ||
3274 | break; | ||
3275 | |||
3276 | default: | ||
3277 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
3278 | "Evaluating regex failed, automaton has no type!\n"); | ||
3279 | result = GNUNET_SYSERR; | ||
3280 | break; | ||
3281 | } | ||
3282 | |||
3283 | return result; | ||
3284 | } | ||
3285 | |||
3286 | |||
3287 | /** | ||
3288 | * Get the canonical regex of the given automaton. | ||
3289 | * When constructing the automaton a proof is computed for each state, | ||
3290 | * consisting of the regular expression leading to this state. A complete | ||
3291 | * regex for the automaton can be computed by combining these proofs. | ||
3292 | * As of now this function is only useful for testing. | ||
3293 | * | ||
3294 | * @param a automaton for which the canonical regex should be returned. | ||
3295 | * | ||
3296 | * @return | ||
3297 | */ | ||
3298 | const char * | ||
3299 | REGEX_INTERNAL_get_canonical_regex (struct REGEX_INTERNAL_Automaton *a) | ||
3300 | { | ||
3301 | if (NULL == a) | ||
3302 | return NULL; | ||
3303 | |||
3304 | return a->canonical_regex; | ||
3305 | } | ||
3306 | |||
3307 | |||
3308 | /** | ||
3309 | * Get the number of transitions that are contained in the given automaton. | ||
3310 | * | ||
3311 | * @param a automaton for which the number of transitions should be returned. | ||
3312 | * | ||
3313 | * @return number of transitions in the given automaton. | ||
3314 | */ | ||
3315 | unsigned int | ||
3316 | REGEX_INTERNAL_get_transition_count (struct REGEX_INTERNAL_Automaton *a) | ||
3317 | { | ||
3318 | unsigned int t_count; | ||
3319 | struct REGEX_INTERNAL_State *s; | ||
3320 | |||
3321 | if (NULL == a) | ||
3322 | return 0; | ||
3323 | |||
3324 | t_count = 0; | ||
3325 | for (s = a->states_head; NULL != s; s = s->next) | ||
3326 | t_count += s->transition_count; | ||
3327 | |||
3328 | return t_count; | ||
3329 | } | ||
3330 | |||
3331 | |||
3332 | /** | ||
3333 | * Get the first key for the given @a input_string. This hashes the first x bits | ||
3334 | * of the @a input_string. | ||
3335 | * | ||
3336 | * @param input_string string. | ||
3337 | * @param string_len length of the @a input_string. | ||
3338 | * @param key pointer to where to write the hash code. | ||
3339 | * @return number of bits of @a input_string that have been consumed | ||
3340 | * to construct the key | ||
3341 | */ | ||
3342 | size_t | ||
3343 | REGEX_INTERNAL_get_first_key (const char *input_string, | ||
3344 | size_t string_len, | ||
3345 | struct GNUNET_HashCode *key) | ||
3346 | { | ||
3347 | size_t size; | ||
3348 | |||
3349 | size = string_len < GNUNET_REGEX_INITIAL_BYTES ? string_len | ||
3350 | : GNUNET_REGEX_INITIAL_BYTES; | ||
3351 | if (NULL == input_string) | ||
3352 | { | ||
3353 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Given input string was NULL!\n"); | ||
3354 | return 0; | ||
3355 | } | ||
3356 | GNUNET_CRYPTO_hash (input_string, size, key); | ||
3357 | |||
3358 | return size; | ||
3359 | } | ||
3360 | |||
3361 | |||
3362 | /** | ||
3363 | * Recursive function that calls the iterator for each synthetic start state. | ||
3364 | * | ||
3365 | * @param min_len minimum length of the path in the graph. | ||
3366 | * @param max_len maximum length of the path in the graph. | ||
3367 | * @param consumed_string string consumed by traversing the graph till this state. | ||
3368 | * @param state current state of the automaton. | ||
3369 | * @param iterator iterator function called for each edge. | ||
3370 | * @param iterator_cls closure for the @a iterator function. | ||
3371 | */ | ||
3372 | static void | ||
3373 | iterate_initial_edge (unsigned int min_len, | ||
3374 | unsigned int max_len, | ||
3375 | char *consumed_string, | ||
3376 | struct REGEX_INTERNAL_State *state, | ||
3377 | REGEX_INTERNAL_KeyIterator iterator, | ||
3378 | void *iterator_cls) | ||
3379 | { | ||
3380 | char *temp; | ||
3381 | struct REGEX_INTERNAL_Transition *t; | ||
3382 | unsigned int num_edges = state->transition_count; | ||
3383 | struct REGEX_BLOCK_Edge edges[num_edges]; | ||
3384 | struct REGEX_BLOCK_Edge edge[1]; | ||
3385 | struct GNUNET_HashCode hash; | ||
3386 | struct GNUNET_HashCode hash_new; | ||
3387 | unsigned int cur_len; | ||
3388 | |||
3389 | if (NULL != consumed_string) | ||
3390 | cur_len = strlen (consumed_string); | ||
3391 | else | ||
3392 | cur_len = 0; | ||
3393 | |||
3394 | if (((cur_len >= min_len) || (GNUNET_YES == state->accepting)) && | ||
3395 | (cur_len > 0) && (NULL != consumed_string)) | ||
3396 | { | ||
3397 | if (cur_len <= max_len) | ||
3398 | { | ||
3399 | if ((NULL != state->proof) && | ||
3400 | (0 != strcmp (consumed_string, state->proof))) | ||
3401 | { | ||
3402 | (void) state_get_edges (state, edges); | ||
3403 | GNUNET_CRYPTO_hash (consumed_string, strlen (consumed_string), &hash); | ||
3404 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, | ||
3405 | "Start state for string `%s' is %s\n", | ||
3406 | consumed_string, | ||
3407 | GNUNET_h2s (&hash)); | ||
3408 | iterator (iterator_cls, | ||
3409 | &hash, | ||
3410 | consumed_string, | ||
3411 | state->accepting, | ||
3412 | num_edges, | ||
3413 | edges); | ||
3414 | } | ||
3415 | |||
3416 | if ((GNUNET_YES == state->accepting) && (cur_len > 1) && | ||
3417 | (state->transition_count < 1) && (cur_len < max_len)) | ||
3418 | { | ||
3419 | /* Special case for regex consisting of just a string that is shorter than | ||
3420 | * max_len */ | ||
3421 | edge[0].label = &consumed_string[cur_len - 1]; | ||
3422 | edge[0].destination = state->hash; | ||
3423 | temp = GNUNET_strdup (consumed_string); | ||
3424 | temp[cur_len - 1] = '\0'; | ||
3425 | GNUNET_CRYPTO_hash (temp, cur_len - 1, &hash_new); | ||
3426 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, | ||
3427 | "Start state for short string `%s' is %s\n", | ||
3428 | temp, | ||
3429 | GNUNET_h2s (&hash_new)); | ||
3430 | iterator (iterator_cls, &hash_new, temp, GNUNET_NO, 1, edge); | ||
3431 | GNUNET_free (temp); | ||
3432 | } | ||
3433 | } | ||
3434 | else /* cur_len > max_len */ | ||
3435 | { | ||
3436 | /* Case where the concatenated labels are longer than max_len, then split. */ | ||
3437 | edge[0].label = &consumed_string[max_len]; | ||
3438 | edge[0].destination = state->hash; | ||
3439 | temp = GNUNET_strdup (consumed_string); | ||
3440 | temp[max_len] = '\0'; | ||
3441 | GNUNET_CRYPTO_hash (temp, max_len, &hash); | ||
3442 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, | ||
3443 | "Start state at split edge `%s'-`%s` is %s\n", | ||
3444 | temp, | ||
3445 | edge[0].label, | ||
3446 | GNUNET_h2s (&hash_new)); | ||
3447 | iterator (iterator_cls, &hash, temp, GNUNET_NO, 1, edge); | ||
3448 | GNUNET_free (temp); | ||
3449 | } | ||
3450 | } | ||
3451 | |||
3452 | if (cur_len < max_len) | ||
3453 | { | ||
3454 | for (t = state->transitions_head; NULL != t; t = t->next) | ||
3455 | { | ||
3456 | if (NULL != strchr (t->label, (int) '.')) | ||
3457 | { | ||
3458 | /* Wildcards not allowed during starting states */ | ||
3459 | GNUNET_break (0); | ||
3460 | continue; | ||
3461 | } | ||
3462 | if (NULL != consumed_string) | ||
3463 | GNUNET_asprintf (&temp, "%s%s", consumed_string, t->label); | ||
3464 | else | ||
3465 | GNUNET_asprintf (&temp, "%s", t->label); | ||
3466 | iterate_initial_edge (min_len, | ||
3467 | max_len, | ||
3468 | temp, | ||
3469 | t->to_state, | ||
3470 | iterator, | ||
3471 | iterator_cls); | ||
3472 | GNUNET_free (temp); | ||
3473 | } | ||
3474 | } | ||
3475 | } | ||
3476 | |||
3477 | |||
3478 | /** | ||
3479 | * Iterate over all edges starting from start state of automaton 'a'. Calling | ||
3480 | * iterator for each edge. | ||
3481 | * | ||
3482 | * @param a automaton. | ||
3483 | * @param iterator iterator called for each edge. | ||
3484 | * @param iterator_cls closure. | ||
3485 | */ | ||
3486 | void | ||
3487 | REGEX_INTERNAL_iterate_all_edges (struct REGEX_INTERNAL_Automaton *a, | ||
3488 | REGEX_INTERNAL_KeyIterator iterator, | ||
3489 | void *iterator_cls) | ||
3490 | { | ||
3491 | struct REGEX_INTERNAL_State *s; | ||
3492 | |||
3493 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Iterating over starting edges\n"); | ||
3494 | iterate_initial_edge (GNUNET_REGEX_INITIAL_BYTES, | ||
3495 | GNUNET_REGEX_INITIAL_BYTES, | ||
3496 | NULL, | ||
3497 | a->start, | ||
3498 | iterator, | ||
3499 | iterator_cls); | ||
3500 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Iterating over DFA edges\n"); | ||
3501 | for (s = a->states_head; NULL != s; s = s->next) | ||
3502 | { | ||
3503 | struct REGEX_BLOCK_Edge edges[s->transition_count]; | ||
3504 | unsigned int num_edges; | ||
3505 | |||
3506 | num_edges = state_get_edges (s, edges); | ||
3507 | if (((NULL != s->proof) && (0 < strlen (s->proof))) || s->accepting) | ||
3508 | { | ||
3509 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, | ||
3510 | "Creating DFA edges at `%s' under key %s\n", | ||
3511 | s->proof, | ||
3512 | GNUNET_h2s (&s->hash)); | ||
3513 | iterator (iterator_cls, | ||
3514 | &s->hash, | ||
3515 | s->proof, | ||
3516 | s->accepting, | ||
3517 | num_edges, | ||
3518 | edges); | ||
3519 | } | ||
3520 | s->marked = GNUNET_NO; | ||
3521 | } | ||
3522 | } | ||
3523 | |||
3524 | |||
3525 | /** | ||
3526 | * Struct to hold all the relevant state information in the HashMap. | ||
3527 | * | ||
3528 | * Contains the same info as the Regex Iterator parameters except the key, | ||
3529 | * which comes directly from the HashMap iterator. | ||
3530 | */ | ||
3531 | struct temporal_state_store | ||
3532 | { | ||
3533 | int reachable; | ||
3534 | char *proof; | ||
3535 | int accepting; | ||
3536 | int num_edges; | ||
3537 | struct REGEX_BLOCK_Edge *edges; | ||
3538 | }; | ||
3539 | |||
3540 | |||
3541 | /** | ||
3542 | * Store regex iterator and cls in one place to pass to the hashmap iterator. | ||
3543 | */ | ||
3544 | struct client_iterator | ||
3545 | { | ||
3546 | REGEX_INTERNAL_KeyIterator iterator; | ||
3547 | void *iterator_cls; | ||
3548 | }; | ||
3549 | |||
3550 | |||
3551 | /** | ||
3552 | * Iterator over all edges of a dfa. Stores all of them in a HashMap | ||
3553 | * for later reachability marking. | ||
3554 | * | ||
3555 | * @param cls Closure (HashMap) | ||
3556 | * @param key hash for current state. | ||
3557 | * @param proof proof for current state | ||
3558 | * @param accepting GNUNET_YES if this is an accepting state, GNUNET_NO if not. | ||
3559 | * @param num_edges number of edges leaving current state. | ||
3560 | * @param edges edges leaving current state. | ||
3561 | */ | ||
3562 | static void | ||
3563 | store_all_states (void *cls, | ||
3564 | const struct GNUNET_HashCode *key, | ||
3565 | const char *proof, | ||
3566 | int accepting, | ||
3567 | unsigned int num_edges, | ||
3568 | const struct REGEX_BLOCK_Edge *edges) | ||
3569 | { | ||
3570 | struct GNUNET_CONTAINER_MultiHashMap *hm = cls; | ||
3571 | struct temporal_state_store *tmp; | ||
3572 | size_t edges_size; | ||
3573 | |||
3574 | tmp = GNUNET_new (struct temporal_state_store); | ||
3575 | tmp->reachable = GNUNET_NO; | ||
3576 | tmp->proof = GNUNET_strdup (proof); | ||
3577 | tmp->accepting = accepting; | ||
3578 | tmp->num_edges = num_edges; | ||
3579 | edges_size = sizeof(struct REGEX_BLOCK_Edge) * num_edges; | ||
3580 | tmp->edges = GNUNET_malloc (edges_size); | ||
3581 | GNUNET_memcpy (tmp->edges, edges, edges_size); | ||
3582 | GNUNET_assert (GNUNET_YES == | ||
3583 | GNUNET_CONTAINER_multihashmap_put ( | ||
3584 | hm, | ||
3585 | key, | ||
3586 | tmp, | ||
3587 | GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_FAST)); | ||
3588 | } | ||
3589 | |||
3590 | |||
3591 | /** | ||
3592 | * Mark state as reachable and call recursively on all its edges. | ||
3593 | * | ||
3594 | * If already marked as reachable, do nothing. | ||
3595 | * | ||
3596 | * @param state State to mark as reachable. | ||
3597 | * @param hm HashMap which stores all the states indexed by key. | ||
3598 | */ | ||
3599 | static void | ||
3600 | mark_as_reachable (struct temporal_state_store *state, | ||
3601 | struct GNUNET_CONTAINER_MultiHashMap *hm) | ||
3602 | { | ||
3603 | struct temporal_state_store *child; | ||
3604 | unsigned int i; | ||
3605 | |||
3606 | if (GNUNET_YES == state->reachable) | ||
3607 | /* visited */ | ||
3608 | return; | ||
3609 | |||
3610 | state->reachable = GNUNET_YES; | ||
3611 | for (i = 0; i < state->num_edges; i++) | ||
3612 | { | ||
3613 | child = | ||
3614 | GNUNET_CONTAINER_multihashmap_get (hm, &state->edges[i].destination); | ||
3615 | if (NULL == child) | ||
3616 | { | ||
3617 | GNUNET_break (0); | ||
3618 | continue; | ||
3619 | } | ||
3620 | mark_as_reachable (child, hm); | ||
3621 | } | ||
3622 | } | ||
3623 | |||
3624 | |||
3625 | /** | ||
3626 | * Iterator over hash map entries to mark the ones that are reachable. | ||
3627 | * | ||
3628 | * @param cls closure | ||
3629 | * @param key current key code | ||
3630 | * @param value value in the hash map | ||
3631 | * @return #GNUNET_YES if we should continue to iterate, | ||
3632 | * #GNUNET_NO if not. | ||
3633 | */ | ||
3634 | static int | ||
3635 | reachability_iterator (void *cls, | ||
3636 | const struct GNUNET_HashCode *key, | ||
3637 | void *value) | ||
3638 | { | ||
3639 | struct GNUNET_CONTAINER_MultiHashMap *hm = cls; | ||
3640 | struct temporal_state_store *state = value; | ||
3641 | |||
3642 | if (GNUNET_YES == state->reachable) | ||
3643 | /* already visited and marked */ | ||
3644 | return GNUNET_YES; | ||
3645 | |||
3646 | if ((GNUNET_REGEX_INITIAL_BYTES > strlen (state->proof)) && | ||
3647 | (GNUNET_NO == state->accepting) ) | ||
3648 | /* not directly reachable */ | ||
3649 | return GNUNET_YES; | ||
3650 | |||
3651 | mark_as_reachable (state, hm); | ||
3652 | return GNUNET_YES; | ||
3653 | } | ||
3654 | |||
3655 | |||
3656 | /** | ||
3657 | * Iterator over hash map entries. | ||
3658 | * Calling the callback on the ones marked as reachables. | ||
3659 | * | ||
3660 | * @param cls closure | ||
3661 | * @param key current key code | ||
3662 | * @param value value in the hash map | ||
3663 | * @return #GNUNET_YES if we should continue to iterate, | ||
3664 | * #GNUNET_NO if not. | ||
3665 | */ | ||
3666 | static int | ||
3667 | iterate_reachables (void *cls, const struct GNUNET_HashCode *key, void *value) | ||
3668 | { | ||
3669 | struct client_iterator *ci = cls; | ||
3670 | struct temporal_state_store *state = value; | ||
3671 | |||
3672 | if (GNUNET_YES == state->reachable) | ||
3673 | { | ||
3674 | ci->iterator (ci->iterator_cls, | ||
3675 | key, | ||
3676 | state->proof, | ||
3677 | state->accepting, | ||
3678 | state->num_edges, | ||
3679 | state->edges); | ||
3680 | } | ||
3681 | GNUNET_free (state->edges); | ||
3682 | GNUNET_free (state->proof); | ||
3683 | GNUNET_free (state); | ||
3684 | return GNUNET_YES; | ||
3685 | } | ||
3686 | |||
3687 | |||
3688 | /** | ||
3689 | * Iterate over all edges of automaton 'a' that are reachable from a state with | ||
3690 | * a proof of at least GNUNET_REGEX_INITIAL_BYTES characters. | ||
3691 | * | ||
3692 | * Call the iterator for each such edge. | ||
3693 | * | ||
3694 | * @param a automaton. | ||
3695 | * @param iterator iterator called for each reachable edge. | ||
3696 | * @param iterator_cls closure. | ||
3697 | */ | ||
3698 | void | ||
3699 | REGEX_INTERNAL_iterate_reachable_edges (struct REGEX_INTERNAL_Automaton *a, | ||
3700 | REGEX_INTERNAL_KeyIterator iterator, | ||
3701 | void *iterator_cls) | ||
3702 | { | ||
3703 | struct GNUNET_CONTAINER_MultiHashMap *hm; | ||
3704 | struct client_iterator ci; | ||
3705 | |||
3706 | hm = GNUNET_CONTAINER_multihashmap_create (a->state_count * 2, GNUNET_NO); | ||
3707 | ci.iterator = iterator; | ||
3708 | ci.iterator_cls = iterator_cls; | ||
3709 | |||
3710 | REGEX_INTERNAL_iterate_all_edges (a, &store_all_states, hm); | ||
3711 | GNUNET_CONTAINER_multihashmap_iterate (hm, &reachability_iterator, hm); | ||
3712 | GNUNET_CONTAINER_multihashmap_iterate (hm, &iterate_reachables, &ci); | ||
3713 | |||
3714 | GNUNET_CONTAINER_multihashmap_destroy (hm); | ||
3715 | } | ||
3716 | |||
3717 | |||
3718 | /* end of regex_internal.c */ | ||
diff --git a/src/regex/regex_internal.h b/src/regex/regex_internal.h deleted file mode 100644 index 8f29cff33..000000000 --- a/src/regex/regex_internal.h +++ /dev/null | |||
@@ -1,456 +0,0 @@ | |||
1 | /* | ||
2 | This file is part of GNUnet | ||
3 | Copyright (C) 2012 GNUnet e.V. | ||
4 | |||
5 | GNUnet is free software: you can redistribute it and/or modify it | ||
6 | under the terms of the GNU Affero General Public License as published | ||
7 | by the Free Software Foundation, either version 3 of the License, | ||
8 | or (at your option) any later version. | ||
9 | |||
10 | GNUnet is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | Affero General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU Affero General Public License | ||
16 | along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | |||
18 | SPDX-License-Identifier: AGPL3.0-or-later | ||
19 | */ | ||
20 | /** | ||
21 | * @file src/regex/regex_internal.h | ||
22 | * @brief common internal definitions for regex library. | ||
23 | * @author Maximilian Szengel | ||
24 | */ | ||
25 | #ifndef REGEX_INTERNAL_H | ||
26 | #define REGEX_INTERNAL_H | ||
27 | |||
28 | #include "regex_internal_lib.h" | ||
29 | |||
30 | #ifdef __cplusplus | ||
31 | extern "C" | ||
32 | { | ||
33 | #if 0 /* keep Emacsens' auto-indent happy */ | ||
34 | } | ||
35 | #endif | ||
36 | #endif | ||
37 | |||
38 | /** | ||
39 | * char array of literals that are allowed inside a regex (apart from the | ||
40 | * operators) | ||
41 | */ | ||
42 | #define ALLOWED_LITERALS \ | ||
43 | "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" | ||
44 | |||
45 | |||
46 | /** | ||
47 | * Transition between two states. Transitions are stored at the states from | ||
48 | * which they origin ('from_state'). Each state can have 0-n transitions. | ||
49 | * If label is NULL, this is considered to be an epsilon transition. | ||
50 | */ | ||
51 | struct REGEX_INTERNAL_Transition | ||
52 | { | ||
53 | /** | ||
54 | * This is a linked list. | ||
55 | */ | ||
56 | struct REGEX_INTERNAL_Transition *prev; | ||
57 | |||
58 | /** | ||
59 | * This is a linked list. | ||
60 | */ | ||
61 | struct REGEX_INTERNAL_Transition *next; | ||
62 | |||
63 | /** | ||
64 | * Unique id of this transition. | ||
65 | */ | ||
66 | unsigned int id; | ||
67 | |||
68 | /** | ||
69 | * Label for this transition. This is basically the edge label for the graph. | ||
70 | */ | ||
71 | char *label; | ||
72 | |||
73 | /** | ||
74 | * State to which this transition leads. | ||
75 | */ | ||
76 | struct REGEX_INTERNAL_State *to_state; | ||
77 | |||
78 | /** | ||
79 | * State from which this transition origins. | ||
80 | */ | ||
81 | struct REGEX_INTERNAL_State *from_state; | ||
82 | }; | ||
83 | |||
84 | |||
85 | /** | ||
86 | * A state. Can be used in DFA and NFA automatons. | ||
87 | */ | ||
88 | struct REGEX_INTERNAL_State; | ||
89 | |||
90 | |||
91 | /** | ||
92 | * Set of states. | ||
93 | */ | ||
94 | struct REGEX_INTERNAL_StateSet | ||
95 | { | ||
96 | /** | ||
97 | * Array of states. | ||
98 | */ | ||
99 | struct REGEX_INTERNAL_State **states; | ||
100 | |||
101 | /** | ||
102 | * Number of entries in *use* in the 'states' array. | ||
103 | */ | ||
104 | unsigned int off; | ||
105 | |||
106 | /** | ||
107 | * Length of the 'states' array. | ||
108 | */ | ||
109 | unsigned int size; | ||
110 | }; | ||
111 | |||
112 | |||
113 | /** | ||
114 | * A state. Can be used in DFA and NFA automatons. | ||
115 | */ | ||
116 | struct REGEX_INTERNAL_State | ||
117 | { | ||
118 | /** | ||
119 | * This is a linked list to keep states in an automaton. | ||
120 | */ | ||
121 | struct REGEX_INTERNAL_State *prev; | ||
122 | |||
123 | /** | ||
124 | * This is a linked list to keep states in an automaton. | ||
125 | */ | ||
126 | struct REGEX_INTERNAL_State *next; | ||
127 | |||
128 | /** | ||
129 | * This is a multi DLL for StateSet_MDLL. | ||
130 | */ | ||
131 | struct REGEX_INTERNAL_State *prev_SS; | ||
132 | |||
133 | /** | ||
134 | * This is a multi DLL for StateSet_MDLL. | ||
135 | */ | ||
136 | struct REGEX_INTERNAL_State *next_SS; | ||
137 | |||
138 | /** | ||
139 | * This is a multi DLL for StateSet_MDLL Stack. | ||
140 | */ | ||
141 | struct REGEX_INTERNAL_State *prev_ST; | ||
142 | |||
143 | /** | ||
144 | * This is a multi DLL for StateSet_MDLL Stack. | ||
145 | */ | ||
146 | struct REGEX_INTERNAL_State *next_ST; | ||
147 | |||
148 | /** | ||
149 | * Unique state id. | ||
150 | */ | ||
151 | unsigned int id; | ||
152 | |||
153 | /** | ||
154 | * Unique state id that is used for traversing the automaton. It is guaranteed | ||
155 | * to be > 0 and < state_count. | ||
156 | */ | ||
157 | unsigned int traversal_id; | ||
158 | |||
159 | /** | ||
160 | * If this is an accepting state or not. | ||
161 | */ | ||
162 | int accepting; | ||
163 | |||
164 | /** | ||
165 | * Marking of the state. This is used for marking all visited states when | ||
166 | * traversing all states of an automaton and for cases where the state id | ||
167 | * cannot be used (dfa minimization). | ||
168 | */ | ||
169 | int marked; | ||
170 | |||
171 | /** | ||
172 | * Marking the state as contained. This is used for checking, if the state is | ||
173 | * contained in a set in constant time. | ||
174 | */ | ||
175 | int contained; | ||
176 | |||
177 | /** | ||
178 | * Marking the state as part of an SCC (Strongly Connected Component). All | ||
179 | * states with the same scc_id are part of the same SCC. scc_id is 0, if state | ||
180 | * is not a part of any SCC. | ||
181 | */ | ||
182 | unsigned int scc_id; | ||
183 | |||
184 | /** | ||
185 | * Used for SCC detection. | ||
186 | */ | ||
187 | int index; | ||
188 | |||
189 | /** | ||
190 | * Used for SCC detection. | ||
191 | */ | ||
192 | int lowlink; | ||
193 | |||
194 | /** | ||
195 | * Human readable name of the state. Used for debugging and graph | ||
196 | * creation. | ||
197 | */ | ||
198 | char *name; | ||
199 | |||
200 | /** | ||
201 | * Hash of the state. | ||
202 | */ | ||
203 | struct GNUNET_HashCode hash; | ||
204 | |||
205 | /** | ||
206 | * Linear state ID acquired by depth-first-search. This ID should be used for | ||
207 | * storing information about the state in an array, because the 'id' of the | ||
208 | * state is not guaranteed to be linear. The 'dfs_id' is guaranteed to be > 0 | ||
209 | * and < 'state_count'. | ||
210 | */ | ||
211 | unsigned int dfs_id; | ||
212 | |||
213 | /** | ||
214 | * Proof for this state. | ||
215 | */ | ||
216 | char *proof; | ||
217 | |||
218 | /** | ||
219 | * Number of transitions from this state to other states. | ||
220 | */ | ||
221 | unsigned int transition_count; | ||
222 | |||
223 | /** | ||
224 | * DLL of transitions. | ||
225 | */ | ||
226 | struct REGEX_INTERNAL_Transition *transitions_head; | ||
227 | |||
228 | /** | ||
229 | * DLL of transitions. | ||
230 | */ | ||
231 | struct REGEX_INTERNAL_Transition *transitions_tail; | ||
232 | |||
233 | /** | ||
234 | * Number of incoming transitions. Used for compressing DFA paths. | ||
235 | */ | ||
236 | unsigned int incoming_transition_count; | ||
237 | |||
238 | /** | ||
239 | * Set of states on which this state is based on. Used when creating a DFA out | ||
240 | * of several NFA states. | ||
241 | */ | ||
242 | struct REGEX_INTERNAL_StateSet nfa_set; | ||
243 | }; | ||
244 | |||
245 | |||
246 | /** | ||
247 | * Type of an automaton. | ||
248 | */ | ||
249 | enum REGEX_INTERNAL_AutomatonType | ||
250 | { | ||
251 | NFA, | ||
252 | DFA | ||
253 | }; | ||
254 | |||
255 | |||
256 | /** | ||
257 | * Automaton representation. | ||
258 | */ | ||
259 | struct REGEX_INTERNAL_Automaton | ||
260 | { | ||
261 | /** | ||
262 | * Linked list of NFAs used for partial NFA creation. | ||
263 | */ | ||
264 | struct REGEX_INTERNAL_Automaton *prev; | ||
265 | |||
266 | /** | ||
267 | * Linked list of NFAs used for partial NFA creation. | ||
268 | */ | ||
269 | struct REGEX_INTERNAL_Automaton *next; | ||
270 | |||
271 | /** | ||
272 | * First state of the automaton. This is mainly used for constructing an NFA, | ||
273 | * where each NFA itself consists of one or more NFAs linked together. | ||
274 | */ | ||
275 | struct REGEX_INTERNAL_State *start; | ||
276 | |||
277 | /** | ||
278 | * End state of the partial NFA. This is undefined for DFAs | ||
279 | */ | ||
280 | struct REGEX_INTERNAL_State *end; | ||
281 | |||
282 | /** | ||
283 | * Number of states in the automaton. | ||
284 | */ | ||
285 | unsigned int state_count; | ||
286 | |||
287 | /** | ||
288 | * DLL of states. | ||
289 | */ | ||
290 | struct REGEX_INTERNAL_State *states_head; | ||
291 | |||
292 | /** | ||
293 | * DLL of states | ||
294 | */ | ||
295 | struct REGEX_INTERNAL_State *states_tail; | ||
296 | |||
297 | /** | ||
298 | * Type of the automaton. | ||
299 | */ | ||
300 | enum REGEX_INTERNAL_AutomatonType type; | ||
301 | |||
302 | /** | ||
303 | * Regex | ||
304 | */ | ||
305 | char *regex; | ||
306 | |||
307 | /** | ||
308 | * Canonical regex (result of RX->NFA->DFA->RX) | ||
309 | */ | ||
310 | char *canonical_regex; | ||
311 | |||
312 | /** | ||
313 | * GNUNET_YES, if multi strides have been added to the Automaton. | ||
314 | */ | ||
315 | int is_multistrided; | ||
316 | }; | ||
317 | |||
318 | |||
319 | /** | ||
320 | * Construct an NFA by parsing the regex string of length 'len'. | ||
321 | * | ||
322 | * @param regex regular expression string. | ||
323 | * @param len length of the string. | ||
324 | * | ||
325 | * @return NFA, needs to be freed using REGEX_INTERNAL_automaton_destroy. | ||
326 | */ | ||
327 | struct REGEX_INTERNAL_Automaton * | ||
328 | REGEX_INTERNAL_construct_nfa (const char *regex, const size_t len); | ||
329 | |||
330 | |||
331 | /** | ||
332 | * Function that gets passed to automaton traversal and is called before each | ||
333 | * next traversal from state 's' using transition 't' to check if traversal | ||
334 | * should proceed. Return GNUNET_NO to stop traversal or GNUNET_YES to continue. | ||
335 | * | ||
336 | * @param cls closure for the check. | ||
337 | * @param s current state in the traversal. | ||
338 | * @param t current transition from state 's' that will be used for the next | ||
339 | * step. | ||
340 | * | ||
341 | * @return GNUNET_YES to proceed traversal, GNUNET_NO to stop. | ||
342 | */ | ||
343 | typedef int (*REGEX_INTERNAL_traverse_check) (void *cls, | ||
344 | struct REGEX_INTERNAL_State *s, | ||
345 | struct REGEX_INTERNAL_Transition * | ||
346 | t); | ||
347 | |||
348 | |||
349 | /** | ||
350 | * Function that is called with each state, when traversing an automaton. | ||
351 | * | ||
352 | * @param cls closure. | ||
353 | * @param count current count of the state, from 0 to a->state_count -1. | ||
354 | * @param s state. | ||
355 | */ | ||
356 | typedef void (*REGEX_INTERNAL_traverse_action) (void *cls, | ||
357 | const unsigned int count, | ||
358 | struct REGEX_INTERNAL_State *s); | ||
359 | |||
360 | |||
361 | /** | ||
362 | * Traverses the given automaton using depth-first-search (DFS) from it's start | ||
363 | * state, visiting all reachable states and calling 'action' on each one of | ||
364 | * them. | ||
365 | * | ||
366 | * @param a automaton to be traversed. | ||
367 | * @param start start state, pass a->start or NULL to traverse the whole automaton. | ||
368 | * @param check function that is checked before advancing on each transition | ||
369 | * in the DFS. | ||
370 | * @param check_cls closure for check. | ||
371 | * @param action action to be performed on each state. | ||
372 | * @param action_cls closure for action | ||
373 | */ | ||
374 | void | ||
375 | REGEX_INTERNAL_automaton_traverse (const struct REGEX_INTERNAL_Automaton *a, | ||
376 | struct REGEX_INTERNAL_State *start, | ||
377 | REGEX_INTERNAL_traverse_check check, | ||
378 | void *check_cls, | ||
379 | REGEX_INTERNAL_traverse_action action, | ||
380 | void *action_cls); | ||
381 | |||
382 | /** | ||
383 | * Get the canonical regex of the given automaton. | ||
384 | * When constructing the automaton a proof is computed for each state, | ||
385 | * consisting of the regular expression leading to this state. A complete | ||
386 | * regex for the automaton can be computed by combining these proofs. | ||
387 | * As of now this function is only useful for testing. | ||
388 | * | ||
389 | * @param a automaton for which the canonical regex should be returned. | ||
390 | * | ||
391 | * @return canonical regex string. | ||
392 | */ | ||
393 | const char * | ||
394 | REGEX_INTERNAL_get_canonical_regex (struct REGEX_INTERNAL_Automaton *a); | ||
395 | |||
396 | |||
397 | /** | ||
398 | * Get the number of transitions that are contained in the given automaton. | ||
399 | * | ||
400 | * @param a automaton for which the number of transitions should be returned. | ||
401 | * | ||
402 | * @return number of transitions in the given automaton. | ||
403 | */ | ||
404 | unsigned int | ||
405 | REGEX_INTERNAL_get_transition_count (struct REGEX_INTERNAL_Automaton *a); | ||
406 | |||
407 | |||
408 | /** | ||
409 | * Context that contains an id counter for states and transitions as well as a | ||
410 | * DLL of automatons used as a stack for NFA construction. | ||
411 | */ | ||
412 | struct REGEX_INTERNAL_Context | ||
413 | { | ||
414 | /** | ||
415 | * Unique state id. | ||
416 | */ | ||
417 | unsigned int state_id; | ||
418 | |||
419 | /** | ||
420 | * Unique transition id. | ||
421 | */ | ||
422 | unsigned int transition_id; | ||
423 | |||
424 | /** | ||
425 | * DLL of REGEX_INTERNAL_Automaton's used as a stack. | ||
426 | */ | ||
427 | struct REGEX_INTERNAL_Automaton *stack_head; | ||
428 | |||
429 | /** | ||
430 | * DLL of REGEX_INTERNAL_Automaton's used as a stack. | ||
431 | */ | ||
432 | struct REGEX_INTERNAL_Automaton *stack_tail; | ||
433 | }; | ||
434 | |||
435 | |||
436 | /** | ||
437 | * Adds multi-strided transitions to the given 'dfa'. | ||
438 | * | ||
439 | * @param regex_ctx regex context needed to add transitions to the automaton. | ||
440 | * @param dfa DFA to which the multi strided transitions should be added. | ||
441 | * @param stride_len length of the strides. | ||
442 | */ | ||
443 | void | ||
444 | REGEX_INTERNAL_dfa_add_multi_strides (struct REGEX_INTERNAL_Context *regex_ctx, | ||
445 | struct REGEX_INTERNAL_Automaton *dfa, | ||
446 | const unsigned int stride_len); | ||
447 | |||
448 | |||
449 | #if 0 /* keep Emacsens' auto-indent happy */ | ||
450 | { | ||
451 | #endif | ||
452 | #ifdef __cplusplus | ||
453 | } | ||
454 | #endif | ||
455 | |||
456 | #endif | ||
diff --git a/src/regex/regex_internal_dht.c b/src/regex/regex_internal_dht.c deleted file mode 100644 index 3fb74c319..000000000 --- a/src/regex/regex_internal_dht.c +++ /dev/null | |||
@@ -1,831 +0,0 @@ | |||
1 | /* | ||
2 | This file is part of GNUnet | ||
3 | Copyright (C) 2012, 2015 GNUnet e.V. | ||
4 | |||
5 | GNUnet is free software: you can redistribute it and/or modify it | ||
6 | under the terms of the GNU Affero General Public License as published | ||
7 | by the Free Software Foundation, either version 3 of the License, | ||
8 | or (at your option) any later version. | ||
9 | |||
10 | GNUnet is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | Affero General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU Affero General Public License | ||
16 | along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | |||
18 | SPDX-License-Identifier: AGPL3.0-or-later | ||
19 | */ | ||
20 | /** | ||
21 | * @file src/regex/regex_internal_dht.c | ||
22 | * @brief library to announce regexes in the network and match strings | ||
23 | * against published regexes. | ||
24 | * @author Bartlomiej Polot | ||
25 | */ | ||
26 | #include "platform.h" | ||
27 | #include "regex_internal_lib.h" | ||
28 | #include "regex_block_lib.h" | ||
29 | #include "gnunet_dht_service.h" | ||
30 | #include "gnunet_statistics_service.h" | ||
31 | #include "gnunet_constants.h" | ||
32 | #include "gnunet_signatures.h" | ||
33 | |||
34 | |||
35 | #define LOG(kind, ...) GNUNET_log_from (kind, "regex-dht", __VA_ARGS__) | ||
36 | |||
37 | /** | ||
38 | * DHT replication level to use. | ||
39 | */ | ||
40 | #define DHT_REPLICATION 5 | ||
41 | |||
42 | /** | ||
43 | * DHT record lifetime to use. | ||
44 | */ | ||
45 | #define DHT_TTL GNUNET_TIME_UNIT_HOURS | ||
46 | |||
47 | /** | ||
48 | * DHT options to set. | ||
49 | */ | ||
50 | #define DHT_OPT GNUNET_DHT_RO_DEMULTIPLEX_EVERYWHERE | ||
51 | |||
52 | |||
53 | /** | ||
54 | * Handle to store cached data about a regex announce. | ||
55 | */ | ||
56 | struct REGEX_INTERNAL_Announcement | ||
57 | { | ||
58 | /** | ||
59 | * DHT handle to use, must be initialized externally. | ||
60 | */ | ||
61 | struct GNUNET_DHT_Handle *dht; | ||
62 | |||
63 | /** | ||
64 | * Regular expression. | ||
65 | */ | ||
66 | const char *regex; | ||
67 | |||
68 | /** | ||
69 | * Automaton representation of the regex (expensive to build). | ||
70 | */ | ||
71 | struct REGEX_INTERNAL_Automaton *dfa; | ||
72 | |||
73 | /** | ||
74 | * Our private key. | ||
75 | */ | ||
76 | const struct GNUNET_CRYPTO_EddsaPrivateKey *priv; | ||
77 | |||
78 | /** | ||
79 | * Optional statistics handle to report usage. Can be NULL. | ||
80 | */ | ||
81 | struct GNUNET_STATISTICS_Handle *stats; | ||
82 | }; | ||
83 | |||
84 | |||
85 | /** | ||
86 | * Regex callback iterator to store own service description in the DHT. | ||
87 | * | ||
88 | * @param cls closure. | ||
89 | * @param key hash for current state. | ||
90 | * @param proof proof for current state. | ||
91 | * @param accepting #GNUNET_YES if this is an accepting state, #GNUNET_NO if not. | ||
92 | * @param num_edges number of edges leaving current state. | ||
93 | * @param edges edges leaving current state. | ||
94 | */ | ||
95 | static void | ||
96 | regex_iterator (void *cls, | ||
97 | const struct GNUNET_HashCode *key, | ||
98 | const char *proof, | ||
99 | int accepting, | ||
100 | unsigned int num_edges, | ||
101 | const struct REGEX_BLOCK_Edge *edges) | ||
102 | { | ||
103 | struct REGEX_INTERNAL_Announcement *h = cls; | ||
104 | struct RegexBlock *block; | ||
105 | size_t size; | ||
106 | unsigned int i; | ||
107 | |||
108 | LOG (GNUNET_ERROR_TYPE_INFO, | ||
109 | "DHT PUT for state %s with proof `%s' and %u edges:\n", | ||
110 | GNUNET_h2s (key), | ||
111 | proof, | ||
112 | num_edges); | ||
113 | for (i = 0; i < num_edges; i++) | ||
114 | { | ||
115 | LOG (GNUNET_ERROR_TYPE_INFO, | ||
116 | "Edge %u `%s' towards %s\n", | ||
117 | i, | ||
118 | edges[i].label, | ||
119 | GNUNET_h2s (&edges[i].destination)); | ||
120 | } | ||
121 | if (GNUNET_YES == accepting) | ||
122 | { | ||
123 | struct RegexAcceptBlock ab; | ||
124 | |||
125 | LOG (GNUNET_ERROR_TYPE_INFO, | ||
126 | "State %s is accepting, putting own id\n", | ||
127 | GNUNET_h2s (key)); | ||
128 | size = sizeof(struct RegexAcceptBlock); | ||
129 | ab.purpose.size = ntohl (sizeof(struct GNUNET_CRYPTO_EccSignaturePurpose) | ||
130 | + sizeof(struct GNUNET_TIME_AbsoluteNBO) | ||
131 | + sizeof(struct GNUNET_HashCode)); | ||
132 | ab.purpose.purpose = ntohl (GNUNET_SIGNATURE_PURPOSE_REGEX_ACCEPT); | ||
133 | ab.expiration_time = GNUNET_TIME_absolute_hton ( | ||
134 | GNUNET_TIME_relative_to_absolute (GNUNET_CONSTANTS_DHT_MAX_EXPIRATION)); | ||
135 | ab.key = *key; | ||
136 | GNUNET_CRYPTO_eddsa_key_get_public (h->priv, | ||
137 | &ab.peer.public_key); | ||
138 | GNUNET_assert (GNUNET_OK == | ||
139 | GNUNET_CRYPTO_eddsa_sign_ (h->priv, | ||
140 | &ab.purpose, | ||
141 | &ab.signature)); | ||
142 | |||
143 | GNUNET_STATISTICS_update (h->stats, "# regex accepting blocks stored", | ||
144 | 1, GNUNET_NO); | ||
145 | GNUNET_STATISTICS_update (h->stats, "# regex accepting block bytes stored", | ||
146 | sizeof(struct RegexAcceptBlock), GNUNET_NO); | ||
147 | (void) | ||
148 | GNUNET_DHT_put (h->dht, key, | ||
149 | DHT_REPLICATION, | ||
150 | DHT_OPT | GNUNET_DHT_RO_RECORD_ROUTE, | ||
151 | GNUNET_BLOCK_TYPE_REGEX_ACCEPT, | ||
152 | size, | ||
153 | &ab, | ||
154 | GNUNET_TIME_relative_to_absolute (DHT_TTL), | ||
155 | NULL, NULL); | ||
156 | } | ||
157 | block = REGEX_BLOCK_create (proof, | ||
158 | num_edges, | ||
159 | edges, | ||
160 | accepting, | ||
161 | &size); | ||
162 | if (NULL == block) | ||
163 | return; | ||
164 | (void) GNUNET_DHT_put (h->dht, | ||
165 | key, | ||
166 | DHT_REPLICATION, | ||
167 | DHT_OPT, | ||
168 | GNUNET_BLOCK_TYPE_REGEX, | ||
169 | size, | ||
170 | block, | ||
171 | GNUNET_TIME_relative_to_absolute (DHT_TTL), | ||
172 | NULL, | ||
173 | NULL); | ||
174 | GNUNET_STATISTICS_update (h->stats, | ||
175 | "# regex blocks stored", | ||
176 | 1, | ||
177 | GNUNET_NO); | ||
178 | GNUNET_STATISTICS_update (h->stats, | ||
179 | "# regex block bytes stored", | ||
180 | size, | ||
181 | GNUNET_NO); | ||
182 | GNUNET_free (block); | ||
183 | } | ||
184 | |||
185 | |||
186 | /** | ||
187 | * Announce a regular expression: put all states of the automaton in the DHT. | ||
188 | * Does not free resources, must call #REGEX_INTERNAL_announce_cancel() for that. | ||
189 | * | ||
190 | * @param dht An existing and valid DHT service handle. CANNOT be NULL. | ||
191 | * @param priv our private key, must remain valid until the announcement is cancelled | ||
192 | * @param regex Regular expression to announce. | ||
193 | * @param compression How many characters per edge can we squeeze? | ||
194 | * @param stats Optional statistics handle to report usage. Can be NULL. | ||
195 | * @return Handle to reuse o free cached resources. | ||
196 | * Must be freed by calling #REGEX_INTERNAL_announce_cancel(). | ||
197 | */ | ||
198 | struct REGEX_INTERNAL_Announcement * | ||
199 | REGEX_INTERNAL_announce (struct GNUNET_DHT_Handle *dht, | ||
200 | const struct GNUNET_CRYPTO_EddsaPrivateKey *priv, | ||
201 | const char *regex, | ||
202 | uint16_t compression, | ||
203 | struct GNUNET_STATISTICS_Handle *stats) | ||
204 | { | ||
205 | struct REGEX_INTERNAL_Announcement *h; | ||
206 | |||
207 | GNUNET_assert (NULL != dht); | ||
208 | h = GNUNET_new (struct REGEX_INTERNAL_Announcement); | ||
209 | h->regex = regex; | ||
210 | h->dht = dht; | ||
211 | h->stats = stats; | ||
212 | h->priv = priv; | ||
213 | h->dfa = REGEX_INTERNAL_construct_dfa (regex, strlen (regex), compression); | ||
214 | REGEX_INTERNAL_reannounce (h); | ||
215 | return h; | ||
216 | } | ||
217 | |||
218 | |||
219 | /** | ||
220 | * Announce again a regular expression previously announced. | ||
221 | * Does use caching to speed up process. | ||
222 | * | ||
223 | * @param h Handle returned by a previous #REGEX_INTERNAL_announce call(). | ||
224 | */ | ||
225 | void | ||
226 | REGEX_INTERNAL_reannounce (struct REGEX_INTERNAL_Announcement *h) | ||
227 | { | ||
228 | GNUNET_assert (NULL != h->dfa); /* make sure to call announce first */ | ||
229 | LOG (GNUNET_ERROR_TYPE_INFO, | ||
230 | "REGEX_INTERNAL_reannounce: %s\n", | ||
231 | h->regex); | ||
232 | REGEX_INTERNAL_iterate_reachable_edges (h->dfa, | ||
233 | ®ex_iterator, | ||
234 | h); | ||
235 | } | ||
236 | |||
237 | |||
238 | /** | ||
239 | * Clear all cached data used by a regex announce. | ||
240 | * Does not close DHT connection. | ||
241 | * | ||
242 | * @param h Handle returned by a previous #REGEX_INTERNAL_announce() call. | ||
243 | */ | ||
244 | void | ||
245 | REGEX_INTERNAL_announce_cancel (struct REGEX_INTERNAL_Announcement *h) | ||
246 | { | ||
247 | REGEX_INTERNAL_automaton_destroy (h->dfa); | ||
248 | GNUNET_free (h); | ||
249 | } | ||
250 | |||
251 | |||
252 | /******************************************************************************/ | ||
253 | |||
254 | |||
255 | /** | ||
256 | * Struct to keep state of running searches that have consumed a part of | ||
257 | * the initial string. | ||
258 | */ | ||
259 | struct RegexSearchContext | ||
260 | { | ||
261 | /** | ||
262 | * Part of the description already consumed by | ||
263 | * this particular search branch. | ||
264 | */ | ||
265 | size_t position; | ||
266 | |||
267 | /** | ||
268 | * Information about the search. | ||
269 | */ | ||
270 | struct REGEX_INTERNAL_Search *info; | ||
271 | |||
272 | /** | ||
273 | * We just want to look for one edge, the longer the better. | ||
274 | * Keep its length. | ||
275 | */ | ||
276 | unsigned int longest_match; | ||
277 | |||
278 | /** | ||
279 | * Destination hash of the longest match. | ||
280 | */ | ||
281 | struct GNUNET_HashCode hash; | ||
282 | }; | ||
283 | |||
284 | |||
285 | /** | ||
286 | * Type of values in `dht_get_results`. | ||
287 | */ | ||
288 | struct Result | ||
289 | { | ||
290 | /** | ||
291 | * Number of bytes in data. | ||
292 | */ | ||
293 | size_t size; | ||
294 | |||
295 | /** | ||
296 | * The raw result data. | ||
297 | */ | ||
298 | const void *data; | ||
299 | }; | ||
300 | |||
301 | |||
302 | /** | ||
303 | * Struct to keep information of searches of services described by a regex | ||
304 | * using a user-provided string service description. | ||
305 | */ | ||
306 | struct REGEX_INTERNAL_Search | ||
307 | { | ||
308 | /** | ||
309 | * DHT handle to use, must be initialized externally. | ||
310 | */ | ||
311 | struct GNUNET_DHT_Handle *dht; | ||
312 | |||
313 | /** | ||
314 | * Optional statistics handle to report usage. Can be NULL. | ||
315 | */ | ||
316 | struct GNUNET_STATISTICS_Handle *stats; | ||
317 | |||
318 | /** | ||
319 | * User provided description of the searched service. | ||
320 | */ | ||
321 | char *description; | ||
322 | |||
323 | /** | ||
324 | * Running DHT GETs. | ||
325 | */ | ||
326 | struct GNUNET_CONTAINER_MultiHashMap *dht_get_handles; | ||
327 | |||
328 | /** | ||
329 | * Results from running DHT GETs, values are of type | ||
330 | * 'struct Result'. | ||
331 | */ | ||
332 | struct GNUNET_CONTAINER_MultiHashMap *dht_get_results; | ||
333 | |||
334 | /** | ||
335 | * Contexts, for each running DHT GET. Free all on end of search. | ||
336 | */ | ||
337 | struct RegexSearchContext **contexts; | ||
338 | |||
339 | /** | ||
340 | * Number of contexts (branches/steps in search). | ||
341 | */ | ||
342 | unsigned int n_contexts; | ||
343 | |||
344 | /** | ||
345 | * @param callback Callback for found peers. | ||
346 | */ | ||
347 | REGEX_INTERNAL_Found callback; | ||
348 | |||
349 | /** | ||
350 | * @param callback_cls Closure for @c callback. | ||
351 | */ | ||
352 | void *callback_cls; | ||
353 | }; | ||
354 | |||
355 | |||
356 | /** | ||
357 | * Jump to the next edge, with the longest matching token. | ||
358 | * | ||
359 | * @param block Block found in the DHT. | ||
360 | * @param size Size of the block. | ||
361 | * @param ctx Context of the search. | ||
362 | */ | ||
363 | static void | ||
364 | regex_next_edge (const struct RegexBlock *block, | ||
365 | size_t size, | ||
366 | struct RegexSearchContext *ctx); | ||
367 | |||
368 | |||
369 | /** | ||
370 | * Function to process DHT string to regex matching. | ||
371 | * Called on each result obtained for the DHT search. | ||
372 | * | ||
373 | * @param cls Closure (search context). | ||
374 | * @param exp When will this value expire. | ||
375 | * @param key Key of the result. | ||
376 | * @param get_path Path of the get request. | ||
377 | * @param get_path_length Length of get_path. | ||
378 | * @param put_path Path of the put request. | ||
379 | * @param put_path_length Length of the put_path. | ||
380 | * @param type Type of the result. | ||
381 | * @param size Number of bytes in data. | ||
382 | * @param data Pointer to the result data. | ||
383 | */ | ||
384 | static void | ||
385 | dht_get_string_accept_handler (void *cls, struct GNUNET_TIME_Absolute exp, | ||
386 | const struct GNUNET_HashCode *key, | ||
387 | const struct GNUNET_PeerIdentity *get_path, | ||
388 | unsigned int get_path_length, | ||
389 | const struct GNUNET_PeerIdentity *put_path, | ||
390 | unsigned int put_path_length, | ||
391 | enum GNUNET_BLOCK_Type type, | ||
392 | size_t size, const void *data) | ||
393 | { | ||
394 | const struct RegexAcceptBlock *block = data; | ||
395 | struct RegexSearchContext *ctx = cls; | ||
396 | struct REGEX_INTERNAL_Search *info = ctx->info; | ||
397 | |||
398 | LOG (GNUNET_ERROR_TYPE_DEBUG, | ||
399 | "Regex result accept for %s (key %s)\n", | ||
400 | info->description, GNUNET_h2s (key)); | ||
401 | |||
402 | GNUNET_STATISTICS_update (info->stats, | ||
403 | "# regex accepting blocks found", | ||
404 | 1, GNUNET_NO); | ||
405 | GNUNET_STATISTICS_update (info->stats, | ||
406 | "# regex accepting block bytes found", | ||
407 | size, GNUNET_NO); | ||
408 | info->callback (info->callback_cls, | ||
409 | &block->peer, | ||
410 | get_path, get_path_length, | ||
411 | put_path, put_path_length); | ||
412 | } | ||
413 | |||
414 | |||
415 | /** | ||
416 | * Find a path to a peer that offers a regex service compatible | ||
417 | * with a given string. | ||
418 | * | ||
419 | * @param key The key of the accepting state. | ||
420 | * @param ctx Context containing info about the string, tunnel, etc. | ||
421 | */ | ||
422 | static void | ||
423 | regex_find_path (const struct GNUNET_HashCode *key, | ||
424 | struct RegexSearchContext *ctx) | ||
425 | { | ||
426 | struct GNUNET_DHT_GetHandle *get_h; | ||
427 | |||
428 | LOG (GNUNET_ERROR_TYPE_DEBUG, | ||
429 | "Accept state found, now searching for paths to %s\n", | ||
430 | GNUNET_h2s (key)); | ||
431 | get_h = GNUNET_DHT_get_start (ctx->info->dht, /* handle */ | ||
432 | GNUNET_BLOCK_TYPE_REGEX_ACCEPT, /* type */ | ||
433 | key, /* key to search */ | ||
434 | DHT_REPLICATION, /* replication level */ | ||
435 | DHT_OPT | GNUNET_DHT_RO_RECORD_ROUTE, | ||
436 | NULL, /* xquery */ // FIXME BLOOMFILTER | ||
437 | 0, /* xquery bits */ // FIXME BLOOMFILTER SIZE | ||
438 | &dht_get_string_accept_handler, ctx); | ||
439 | GNUNET_break (GNUNET_OK == | ||
440 | GNUNET_CONTAINER_multihashmap_put (ctx->info->dht_get_handles, | ||
441 | key, | ||
442 | get_h, | ||
443 | GNUNET_CONTAINER_MULTIHASHMAPOPTION_MULTIPLE)); | ||
444 | } | ||
445 | |||
446 | |||
447 | /** | ||
448 | * Function to process DHT string to regex matching. | ||
449 | * Called on each result obtained for the DHT search. | ||
450 | * | ||
451 | * @param cls closure (search context) | ||
452 | * @param exp when will this value expire | ||
453 | * @param key key of the result | ||
454 | * @param get_path path of the get request (not used) | ||
455 | * @param get_path_length length of @a get_path (not used) | ||
456 | * @param put_path path of the put request (not used) | ||
457 | * @param put_path_length length of the @a put_path (not used) | ||
458 | * @param type type of the result | ||
459 | * @param size number of bytes in data | ||
460 | * @param data pointer to the result data | ||
461 | * | ||
462 | * TODO: re-issue the request after certain time? cancel after X results? | ||
463 | */ | ||
464 | static void | ||
465 | dht_get_string_handler (void *cls, struct GNUNET_TIME_Absolute exp, | ||
466 | const struct GNUNET_HashCode *key, | ||
467 | const struct GNUNET_PeerIdentity *get_path, | ||
468 | unsigned int get_path_length, | ||
469 | const struct GNUNET_PeerIdentity *put_path, | ||
470 | unsigned int put_path_length, | ||
471 | enum GNUNET_BLOCK_Type type, | ||
472 | size_t size, const void *data) | ||
473 | { | ||
474 | const struct RegexBlock *block = data; | ||
475 | struct RegexSearchContext *ctx = cls; | ||
476 | struct REGEX_INTERNAL_Search *info = ctx->info; | ||
477 | size_t len; | ||
478 | struct Result *copy; | ||
479 | |||
480 | LOG (GNUNET_ERROR_TYPE_INFO, | ||
481 | "DHT GET result for %s (%s)\n", | ||
482 | GNUNET_h2s (key), ctx->info->description); | ||
483 | copy = GNUNET_malloc (sizeof(struct Result) + size); | ||
484 | copy->size = size; | ||
485 | copy->data = ©[1]; | ||
486 | GNUNET_memcpy (©[1], block, size); | ||
487 | GNUNET_break (GNUNET_OK == | ||
488 | GNUNET_CONTAINER_multihashmap_put (info->dht_get_results, | ||
489 | key, copy, | ||
490 | GNUNET_CONTAINER_MULTIHASHMAPOPTION_MULTIPLE)); | ||
491 | len = strlen (info->description); | ||
492 | if (len == ctx->position) // String processed | ||
493 | { | ||
494 | if (GNUNET_YES == GNUNET_BLOCK_is_accepting (block, size)) | ||
495 | { | ||
496 | regex_find_path (key, ctx); | ||
497 | } | ||
498 | else | ||
499 | { | ||
500 | LOG (GNUNET_ERROR_TYPE_INFO, "block not accepting!\n"); | ||
501 | /* FIXME REGEX this block not successful, wait for more? start timeout? */ | ||
502 | } | ||
503 | return; | ||
504 | } | ||
505 | regex_next_edge (block, size, ctx); | ||
506 | } | ||
507 | |||
508 | |||
509 | /** | ||
510 | * Iterator over found existing cadet regex blocks that match an ongoing search. | ||
511 | * | ||
512 | * @param cls Closure (current context)- | ||
513 | * @param key Current key code (key for cached block). | ||
514 | * @param value Value in the hash map (cached RegexBlock). | ||
515 | * @return #GNUNET_YES: we should always continue to iterate. | ||
516 | */ | ||
517 | static int | ||
518 | regex_result_iterator (void *cls, | ||
519 | const struct GNUNET_HashCode *key, | ||
520 | void *value) | ||
521 | { | ||
522 | struct Result *result = value; | ||
523 | const struct RegexBlock *block = result->data; | ||
524 | struct RegexSearchContext *ctx = cls; | ||
525 | |||
526 | if ((GNUNET_YES == | ||
527 | GNUNET_BLOCK_is_accepting (block, result->size)) && | ||
528 | (ctx->position == strlen (ctx->info->description))) | ||
529 | { | ||
530 | LOG (GNUNET_ERROR_TYPE_INFO, | ||
531 | "Found accepting known block\n"); | ||
532 | regex_find_path (key, ctx); | ||
533 | return GNUNET_YES; // We found an accept state! | ||
534 | } | ||
535 | LOG (GNUNET_ERROR_TYPE_DEBUG, | ||
536 | "* %lu, %lu, [%u]\n", | ||
537 | (unsigned long) ctx->position, | ||
538 | strlen (ctx->info->description), | ||
539 | GNUNET_BLOCK_is_accepting (block, result->size)); | ||
540 | regex_next_edge (block, result->size, ctx); | ||
541 | |||
542 | GNUNET_STATISTICS_update (ctx->info->stats, "# regex cadet blocks iterated", | ||
543 | 1, GNUNET_NO); | ||
544 | |||
545 | return GNUNET_YES; | ||
546 | } | ||
547 | |||
548 | |||
549 | /** | ||
550 | * Iterator over edges in a regex block retrieved from the DHT. | ||
551 | * | ||
552 | * @param cls Closure (context of the search). | ||
553 | * @param token Token that follows to next state. | ||
554 | * @param len Length of token. | ||
555 | * @param key Hash of next state. | ||
556 | * @return #GNUNET_YES if should keep iterating, #GNUNET_NO otherwise. | ||
557 | */ | ||
558 | static int | ||
559 | regex_edge_iterator (void *cls, | ||
560 | const char *token, | ||
561 | size_t len, | ||
562 | const struct GNUNET_HashCode *key) | ||
563 | { | ||
564 | struct RegexSearchContext *ctx = cls; | ||
565 | struct REGEX_INTERNAL_Search *info = ctx->info; | ||
566 | const char *current; | ||
567 | size_t current_len; | ||
568 | |||
569 | GNUNET_STATISTICS_update (info->stats, "# regex edges iterated", | ||
570 | 1, GNUNET_NO); | ||
571 | current = &info->description[ctx->position]; | ||
572 | current_len = strlen (info->description) - ctx->position; | ||
573 | if (len > current_len) | ||
574 | { | ||
575 | LOG (GNUNET_ERROR_TYPE_DEBUG, "Token too long, END\n"); | ||
576 | return GNUNET_YES; | ||
577 | } | ||
578 | if (0 != strncmp (current, token, len)) | ||
579 | { | ||
580 | LOG (GNUNET_ERROR_TYPE_DEBUG, "Token doesn't match, END\n"); | ||
581 | return GNUNET_YES; | ||
582 | } | ||
583 | |||
584 | if (len > ctx->longest_match) | ||
585 | { | ||
586 | LOG (GNUNET_ERROR_TYPE_DEBUG, "Token is longer, KEEP\n"); | ||
587 | ctx->longest_match = len; | ||
588 | ctx->hash = *key; | ||
589 | } | ||
590 | else | ||
591 | { | ||
592 | LOG (GNUNET_ERROR_TYPE_DEBUG, "Token is not longer, IGNORE\n"); | ||
593 | } | ||
594 | |||
595 | LOG (GNUNET_ERROR_TYPE_DEBUG, "* End of regex edge iterator\n"); | ||
596 | return GNUNET_YES; | ||
597 | } | ||
598 | |||
599 | |||
600 | /** | ||
601 | * Jump to the next edge, with the longest matching token. | ||
602 | * | ||
603 | * @param block Block found in the DHT. | ||
604 | * @param size Size of the block. | ||
605 | * @param ctx Context of the search. | ||
606 | */ | ||
607 | static void | ||
608 | regex_next_edge (const struct RegexBlock *block, | ||
609 | size_t size, | ||
610 | struct RegexSearchContext *ctx) | ||
611 | { | ||
612 | struct RegexSearchContext *new_ctx; | ||
613 | struct REGEX_INTERNAL_Search *info = ctx->info; | ||
614 | struct GNUNET_DHT_GetHandle *get_h; | ||
615 | struct GNUNET_HashCode *hash; | ||
616 | const char *rest; | ||
617 | int result; | ||
618 | |||
619 | LOG (GNUNET_ERROR_TYPE_DEBUG, "Next edge\n"); | ||
620 | /* Find the longest match for the current string position, | ||
621 | * among tokens in the given block */ | ||
622 | ctx->longest_match = 0; | ||
623 | result = REGEX_BLOCK_iterate (block, size, | ||
624 | ®ex_edge_iterator, ctx); | ||
625 | GNUNET_break (GNUNET_OK == result); | ||
626 | |||
627 | /* Did anything match? */ | ||
628 | if (0 == ctx->longest_match) | ||
629 | { | ||
630 | LOG (GNUNET_ERROR_TYPE_DEBUG, | ||
631 | "no match in block\n"); | ||
632 | return; | ||
633 | } | ||
634 | |||
635 | hash = &ctx->hash; | ||
636 | new_ctx = GNUNET_new (struct RegexSearchContext); | ||
637 | new_ctx->info = info; | ||
638 | new_ctx->position = ctx->position + ctx->longest_match; | ||
639 | GNUNET_array_append (info->contexts, info->n_contexts, new_ctx); | ||
640 | |||
641 | /* Check whether we already have a DHT GET running for it */ | ||
642 | if (GNUNET_YES == | ||
643 | GNUNET_CONTAINER_multihashmap_contains (info->dht_get_handles, hash)) | ||
644 | { | ||
645 | LOG (GNUNET_ERROR_TYPE_DEBUG, | ||
646 | "GET for %s running, END\n", | ||
647 | GNUNET_h2s (hash)); | ||
648 | GNUNET_CONTAINER_multihashmap_get_multiple (info->dht_get_results, | ||
649 | hash, | ||
650 | ®ex_result_iterator, | ||
651 | new_ctx); | ||
652 | return; /* We are already looking for it */ | ||
653 | } | ||
654 | |||
655 | GNUNET_STATISTICS_update (info->stats, "# regex nodes traversed", | ||
656 | 1, GNUNET_NO); | ||
657 | |||
658 | LOG (GNUNET_ERROR_TYPE_DEBUG, | ||
659 | "Following edges at %s for offset %u in `%s'\n", | ||
660 | GNUNET_h2s (hash), | ||
661 | (unsigned int) ctx->position, | ||
662 | info->description); | ||
663 | rest = &new_ctx->info->description[new_ctx->position]; | ||
664 | get_h = | ||
665 | GNUNET_DHT_get_start (info->dht, /* handle */ | ||
666 | GNUNET_BLOCK_TYPE_REGEX, /* type */ | ||
667 | hash, /* key to search */ | ||
668 | DHT_REPLICATION, /* replication level */ | ||
669 | DHT_OPT, | ||
670 | rest, /* xquery */ | ||
671 | strlen (rest) + 1, /* xquery bits */ | ||
672 | &dht_get_string_handler, new_ctx); | ||
673 | if (GNUNET_OK != | ||
674 | GNUNET_CONTAINER_multihashmap_put (info->dht_get_handles, | ||
675 | hash, | ||
676 | get_h, | ||
677 | GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_FAST)) | ||
678 | { | ||
679 | GNUNET_break (0); | ||
680 | return; | ||
681 | } | ||
682 | } | ||
683 | |||
684 | |||
685 | /** | ||
686 | * Search for a peer offering a regex matching certain string in the DHT. | ||
687 | * The search runs until #REGEX_INTERNAL_search_cancel() is called, even if results | ||
688 | * are returned. | ||
689 | * | ||
690 | * @param dht An existing and valid DHT service handle. | ||
691 | * @param string String to match against the regexes in the DHT. | ||
692 | * @param callback Callback for found peers. | ||
693 | * @param callback_cls Closure for @c callback. | ||
694 | * @param stats Optional statistics handle to report usage. Can be NULL. | ||
695 | * @return Handle to stop search and free resources. | ||
696 | * Must be freed by calling #REGEX_INTERNAL_search_cancel(). | ||
697 | */ | ||
698 | struct REGEX_INTERNAL_Search * | ||
699 | REGEX_INTERNAL_search (struct GNUNET_DHT_Handle *dht, | ||
700 | const char *string, | ||
701 | REGEX_INTERNAL_Found callback, | ||
702 | void *callback_cls, | ||
703 | struct GNUNET_STATISTICS_Handle *stats) | ||
704 | { | ||
705 | struct REGEX_INTERNAL_Search *h; | ||
706 | struct GNUNET_DHT_GetHandle *get_h; | ||
707 | struct RegexSearchContext *ctx; | ||
708 | struct GNUNET_HashCode key; | ||
709 | size_t size; | ||
710 | size_t len; | ||
711 | |||
712 | /* Initialize handle */ | ||
713 | GNUNET_assert (NULL != dht); | ||
714 | GNUNET_assert (NULL != callback); | ||
715 | h = GNUNET_new (struct REGEX_INTERNAL_Search); | ||
716 | h->dht = dht; | ||
717 | h->description = GNUNET_strdup (string); | ||
718 | h->callback = callback; | ||
719 | h->callback_cls = callback_cls; | ||
720 | h->stats = stats; | ||
721 | h->dht_get_handles = GNUNET_CONTAINER_multihashmap_create (32, GNUNET_NO); | ||
722 | h->dht_get_results = GNUNET_CONTAINER_multihashmap_create (32, GNUNET_NO); | ||
723 | |||
724 | /* Initialize context */ | ||
725 | len = strlen (string); | ||
726 | size = REGEX_INTERNAL_get_first_key (string, len, &key); | ||
727 | LOG (GNUNET_ERROR_TYPE_INFO, | ||
728 | "Initial key for `%s' is %s (based on `%.*s')\n", | ||
729 | string, | ||
730 | GNUNET_h2s (&key), | ||
731 | (int) size, | ||
732 | string); | ||
733 | ctx = GNUNET_new (struct RegexSearchContext); | ||
734 | ctx->position = size; | ||
735 | ctx->info = h; | ||
736 | GNUNET_array_append (h->contexts, | ||
737 | h->n_contexts, | ||
738 | ctx); | ||
739 | /* Start search in DHT */ | ||
740 | get_h = GNUNET_DHT_get_start (h->dht, /* handle */ | ||
741 | GNUNET_BLOCK_TYPE_REGEX, /* type */ | ||
742 | &key, /* key to search */ | ||
743 | DHT_REPLICATION, /* replication level */ | ||
744 | DHT_OPT, | ||
745 | &h->description[size], /* xquery */ | ||
746 | // FIXME add BLOOMFILTER to exclude filtered peers | ||
747 | len + 1 - size, /* xquery bits */ | ||
748 | // FIXME add BLOOMFILTER SIZE | ||
749 | &dht_get_string_handler, ctx); | ||
750 | GNUNET_break ( | ||
751 | GNUNET_OK == | ||
752 | GNUNET_CONTAINER_multihashmap_put (h->dht_get_handles, | ||
753 | &key, | ||
754 | get_h, | ||
755 | GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_FAST) | ||
756 | ); | ||
757 | |||
758 | return h; | ||
759 | } | ||
760 | |||
761 | |||
762 | /** | ||
763 | * Iterator over hash map entries to cancel DHT GET requests after a | ||
764 | * successful connect_by_string. | ||
765 | * | ||
766 | * @param cls Closure (unused). | ||
767 | * @param key Current key code (unused). | ||
768 | * @param value Value in the hash map (get handle). | ||
769 | * @return #GNUNET_YES if we should continue to iterate, | ||
770 | * #GNUNET_NO if not. | ||
771 | */ | ||
772 | static int | ||
773 | regex_cancel_dht_get (void *cls, | ||
774 | const struct GNUNET_HashCode *key, | ||
775 | void *value) | ||
776 | { | ||
777 | struct GNUNET_DHT_GetHandle *h = value; | ||
778 | |||
779 | GNUNET_DHT_get_stop (h); | ||
780 | return GNUNET_YES; | ||
781 | } | ||
782 | |||
783 | |||
784 | /** | ||
785 | * Iterator over hash map entries to free CadetRegexBlocks stored during the | ||
786 | * search for connect_by_string. | ||
787 | * | ||
788 | * @param cls Closure (unused). | ||
789 | * @param key Current key code (unused). | ||
790 | * @param value CadetRegexBlock in the hash map. | ||
791 | * @return #GNUNET_YES if we should continue to iterate, | ||
792 | * #GNUNET_NO if not. | ||
793 | */ | ||
794 | static int | ||
795 | regex_free_result (void *cls, | ||
796 | const struct GNUNET_HashCode *key, | ||
797 | void *value) | ||
798 | { | ||
799 | GNUNET_free (value); | ||
800 | return GNUNET_YES; | ||
801 | } | ||
802 | |||
803 | |||
804 | /** | ||
805 | * Cancel an ongoing regex search in the DHT and free all resources. | ||
806 | * | ||
807 | * @param h the search context. | ||
808 | */ | ||
809 | void | ||
810 | REGEX_INTERNAL_search_cancel (struct REGEX_INTERNAL_Search *h) | ||
811 | { | ||
812 | unsigned int i; | ||
813 | |||
814 | GNUNET_free (h->description); | ||
815 | GNUNET_CONTAINER_multihashmap_iterate (h->dht_get_handles, | ||
816 | ®ex_cancel_dht_get, NULL); | ||
817 | GNUNET_CONTAINER_multihashmap_iterate (h->dht_get_results, | ||
818 | ®ex_free_result, NULL); | ||
819 | GNUNET_CONTAINER_multihashmap_destroy (h->dht_get_results); | ||
820 | GNUNET_CONTAINER_multihashmap_destroy (h->dht_get_handles); | ||
821 | if (0 < h->n_contexts) | ||
822 | { | ||
823 | for (i = 0; i < h->n_contexts; i++) | ||
824 | GNUNET_free (h->contexts[i]); | ||
825 | GNUNET_free (h->contexts); | ||
826 | } | ||
827 | GNUNET_free (h); | ||
828 | } | ||
829 | |||
830 | |||
831 | /* end of regex_internal_dht.c */ | ||
diff --git a/src/regex/regex_internal_lib.h b/src/regex/regex_internal_lib.h deleted file mode 100644 index 94fac19f9..000000000 --- a/src/regex/regex_internal_lib.h +++ /dev/null | |||
@@ -1,268 +0,0 @@ | |||
1 | /* | ||
2 | This file is part of GNUnet | ||
3 | Copyright (C) 2012, 2013 GNUnet e.V. | ||
4 | |||
5 | GNUnet is free software: you can redistribute it and/or modify it | ||
6 | under the terms of the GNU Affero General Public License as published | ||
7 | by the Free Software Foundation, either version 3 of the License, | ||
8 | or (at your option) any later version. | ||
9 | |||
10 | GNUnet is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | Affero General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU Affero General Public License | ||
16 | along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | |||
18 | SPDX-License-Identifier: AGPL3.0-or-later | ||
19 | */ | ||
20 | /** | ||
21 | * @file regex/regex_internal_lib.h | ||
22 | * @brief library to parse regular expressions into dfa | ||
23 | * @author Maximilian Szengel | ||
24 | */ | ||
25 | |||
26 | #ifndef REGEX_INTERNAL_LIB_H | ||
27 | #define REGEX_INTERNAL_LIB_H | ||
28 | |||
29 | #include "gnunet_util_lib.h" | ||
30 | #include "gnunet_dht_service.h" | ||
31 | #include "gnunet_statistics_service.h" | ||
32 | #include "regex_block_lib.h" | ||
33 | |||
34 | #ifdef __cplusplus | ||
35 | extern "C" | ||
36 | { | ||
37 | #if 0 /* keep Emacsens' auto-indent happy */ | ||
38 | } | ||
39 | #endif | ||
40 | #endif | ||
41 | |||
42 | |||
43 | /** | ||
44 | * Automaton (NFA/DFA) representation. | ||
45 | */ | ||
46 | struct REGEX_INTERNAL_Automaton; | ||
47 | |||
48 | |||
49 | /** | ||
50 | * Construct DFA for the given 'regex' of length 'len'. | ||
51 | * | ||
52 | * Path compression means, that for example a DFA o -> a -> b -> c -> o will be | ||
53 | * compressed to o -> abc -> o. Note that this parameter influences the | ||
54 | * non-determinism of states of the resulting NFA in the DHT (number of outgoing | ||
55 | * edges with the same label). For example for an application that stores IPv4 | ||
56 | * addresses as bitstrings it could make sense to limit the path compression to | ||
57 | * 4 or 8. | ||
58 | * | ||
59 | * @param regex regular expression string. | ||
60 | * @param len length of the regular expression. | ||
61 | * @param max_path_len limit the path compression length to the | ||
62 | * given value. If set to 1, no path compression is applied. Set to 0 for | ||
63 | * maximal possible path compression (generally not desirable). | ||
64 | * @return DFA, needs to be freed using #REGEX_INTERNAL_automaton_destroy(). | ||
65 | */ | ||
66 | struct REGEX_INTERNAL_Automaton * | ||
67 | REGEX_INTERNAL_construct_dfa (const char *regex, | ||
68 | const size_t len, | ||
69 | unsigned int max_path_len); | ||
70 | |||
71 | |||
72 | /** | ||
73 | * Free the memory allocated by constructing the REGEX_INTERNAL_Automaton. | ||
74 | * data structure. | ||
75 | * | ||
76 | * @param a automaton to be destroyed. | ||
77 | */ | ||
78 | void | ||
79 | REGEX_INTERNAL_automaton_destroy (struct REGEX_INTERNAL_Automaton *a); | ||
80 | |||
81 | |||
82 | /** | ||
83 | * Evaluates the given 'string' against the given compiled regex. | ||
84 | * | ||
85 | * @param a automaton. | ||
86 | * @param string string to check. | ||
87 | * | ||
88 | * @return 0 if string matches, non 0 otherwise. | ||
89 | */ | ||
90 | int | ||
91 | REGEX_INTERNAL_eval (struct REGEX_INTERNAL_Automaton *a, | ||
92 | const char *string); | ||
93 | |||
94 | |||
95 | /** | ||
96 | * Get the first key for the given @a input_string. This hashes | ||
97 | * the first x bits of the @a input_string. | ||
98 | * | ||
99 | * @param input_string string. | ||
100 | * @param string_len length of the @a input_string. | ||
101 | * @param key pointer to where to write the hash code. | ||
102 | * @return number of bits of @a input_string that have been consumed | ||
103 | * to construct the key | ||
104 | */ | ||
105 | size_t | ||
106 | REGEX_INTERNAL_get_first_key (const char *input_string, | ||
107 | size_t string_len, | ||
108 | struct GNUNET_HashCode *key); | ||
109 | |||
110 | |||
111 | /** | ||
112 | * Iterator callback function. | ||
113 | * | ||
114 | * @param cls closure. | ||
115 | * @param key hash for current state. | ||
116 | * @param proof proof for current state | ||
117 | * @param accepting #GNUNET_YES if this is an accepting state, #GNUNET_NO if not. | ||
118 | * @param num_edges number of edges leaving current state. | ||
119 | * @param edges edges leaving current state. | ||
120 | */ | ||
121 | typedef void | ||
122 | (*REGEX_INTERNAL_KeyIterator)(void *cls, | ||
123 | const struct GNUNET_HashCode *key, | ||
124 | const char *proof, | ||
125 | int accepting, | ||
126 | unsigned int num_edges, | ||
127 | const struct REGEX_BLOCK_Edge *edges); | ||
128 | |||
129 | |||
130 | /** | ||
131 | * Iterate over all edges starting from start state of automaton 'a'. Calling | ||
132 | * iterator for each edge. | ||
133 | * | ||
134 | * @param a automaton. | ||
135 | * @param iterator iterator called for each edge. | ||
136 | * @param iterator_cls closure. | ||
137 | */ | ||
138 | void | ||
139 | REGEX_INTERNAL_iterate_all_edges (struct REGEX_INTERNAL_Automaton *a, | ||
140 | REGEX_INTERNAL_KeyIterator iterator, | ||
141 | void *iterator_cls); | ||
142 | |||
143 | |||
144 | /** | ||
145 | * Iterate over all edges of automaton 'a' that are reachable from a state with | ||
146 | * a proof of at least #GNUNET_REGEX_INITIAL_BYTES characters. | ||
147 | * | ||
148 | * Call the iterator for each such edge. | ||
149 | * | ||
150 | * @param a automaton. | ||
151 | * @param iterator iterator called for each reachable edge. | ||
152 | * @param iterator_cls closure. | ||
153 | */ | ||
154 | void | ||
155 | REGEX_INTERNAL_iterate_reachable_edges (struct REGEX_INTERNAL_Automaton *a, | ||
156 | REGEX_INTERNAL_KeyIterator iterator, | ||
157 | void *iterator_cls); | ||
158 | |||
159 | |||
160 | /** | ||
161 | * Handle to store cached data about a regex announce. | ||
162 | */ | ||
163 | struct REGEX_INTERNAL_Announcement; | ||
164 | |||
165 | /** | ||
166 | * Handle to store data about a regex search. | ||
167 | */ | ||
168 | struct REGEX_INTERNAL_Search; | ||
169 | |||
170 | |||
171 | /** | ||
172 | * Announce a regular expression: put all states of the automaton in the DHT. | ||
173 | * Does not free resources, must call #REGEX_INTERNAL_announce_cancel() for that. | ||
174 | * | ||
175 | * @param dht An existing and valid DHT service handle. CANNOT be NULL. | ||
176 | * @param priv our private key, must remain valid until the announcement is cancelled | ||
177 | * @param regex Regular expression to announce. | ||
178 | * @param compression How many characters per edge can we squeeze? | ||
179 | * @param stats Optional statistics handle to report usage. Can be NULL. | ||
180 | * @return Handle to reuse o free cached resources. | ||
181 | * Must be freed by calling #REGEX_INTERNAL_announce_cancel(). | ||
182 | */ | ||
183 | struct REGEX_INTERNAL_Announcement * | ||
184 | REGEX_INTERNAL_announce (struct GNUNET_DHT_Handle *dht, | ||
185 | const struct GNUNET_CRYPTO_EddsaPrivateKey *priv, | ||
186 | const char *regex, | ||
187 | uint16_t compression, | ||
188 | struct GNUNET_STATISTICS_Handle *stats); | ||
189 | |||
190 | |||
191 | /** | ||
192 | * Announce again a regular expression previously announced. | ||
193 | * Does use caching to speed up process. | ||
194 | * | ||
195 | * @param h Handle returned by a previous #REGEX_INTERNAL_announce() call. | ||
196 | */ | ||
197 | void | ||
198 | REGEX_INTERNAL_reannounce (struct REGEX_INTERNAL_Announcement *h); | ||
199 | |||
200 | |||
201 | /** | ||
202 | * Clear all cached data used by a regex announce. | ||
203 | * Does not close DHT connection. | ||
204 | * | ||
205 | * @param h Handle returned by a previous #REGEX_INTERNAL_announce() call. | ||
206 | */ | ||
207 | void | ||
208 | REGEX_INTERNAL_announce_cancel (struct REGEX_INTERNAL_Announcement *h); | ||
209 | |||
210 | |||
211 | /** | ||
212 | * Search callback function. | ||
213 | * | ||
214 | * @param cls Closure provided in #REGEX_INTERNAL_search(). | ||
215 | * @param id Peer providing a regex that matches the string. | ||
216 | * @param get_path Path of the get request. | ||
217 | * @param get_path_length Length of @a get_path. | ||
218 | * @param put_path Path of the put request. | ||
219 | * @param put_path_length Length of the @a put_path. | ||
220 | */ | ||
221 | typedef void | ||
222 | (*REGEX_INTERNAL_Found)(void *cls, | ||
223 | const struct GNUNET_PeerIdentity *id, | ||
224 | const struct GNUNET_PeerIdentity *get_path, | ||
225 | unsigned int get_path_length, | ||
226 | const struct GNUNET_PeerIdentity *put_path, | ||
227 | unsigned int put_path_length); | ||
228 | |||
229 | |||
230 | /** | ||
231 | * Search for a peer offering a regex matching certain string in the DHT. | ||
232 | * The search runs until #REGEX_INTERNAL_search_cancel() is called, even if results | ||
233 | * are returned. | ||
234 | * | ||
235 | * @param dht An existing and valid DHT service handle. | ||
236 | * @param string String to match against the regexes in the DHT. | ||
237 | * @param callback Callback for found peers. | ||
238 | * @param callback_cls Closure for @c callback. | ||
239 | * @param stats Optional statistics handle to report usage. Can be NULL. | ||
240 | * @return Handle to stop search and free resources. | ||
241 | * Must be freed by calling #REGEX_INTERNAL_search_cancel(). | ||
242 | */ | ||
243 | struct REGEX_INTERNAL_Search * | ||
244 | REGEX_INTERNAL_search (struct GNUNET_DHT_Handle *dht, | ||
245 | const char *string, | ||
246 | REGEX_INTERNAL_Found callback, | ||
247 | void *callback_cls, | ||
248 | struct GNUNET_STATISTICS_Handle *stats); | ||
249 | |||
250 | /** | ||
251 | * Stop search and free all data used by a #REGEX_INTERNAL_search() call. | ||
252 | * Does not close DHT connection. | ||
253 | * | ||
254 | * @param h Handle returned by a previous #REGEX_INTERNAL_search() call. | ||
255 | */ | ||
256 | void | ||
257 | REGEX_INTERNAL_search_cancel (struct REGEX_INTERNAL_Search *h); | ||
258 | |||
259 | |||
260 | #if 0 /* keep Emacsens' auto-indent happy */ | ||
261 | { | ||
262 | #endif | ||
263 | #ifdef __cplusplus | ||
264 | } | ||
265 | #endif | ||
266 | |||
267 | /* end of regex_internal_lib.h */ | ||
268 | #endif | ||
diff --git a/src/regex/regex_ipc.h b/src/regex/regex_ipc.h deleted file mode 100644 index b5a474d56..000000000 --- a/src/regex/regex_ipc.h +++ /dev/null | |||
@@ -1,104 +0,0 @@ | |||
1 | /* | ||
2 | This file is part of GNUnet | ||
3 | Copyright (C) 2012, 2013 GNUnet e.V. | ||
4 | |||
5 | GNUnet is free software: you can redistribute it and/or modify it | ||
6 | under the terms of the GNU Affero General Public License as published | ||
7 | by the Free Software Foundation, either version 3 of the License, | ||
8 | or (at your option) any later version. | ||
9 | |||
10 | GNUnet is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | Affero General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU Affero General Public License | ||
16 | along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | |||
18 | SPDX-License-Identifier: AGPL3.0-or-later | ||
19 | */ | ||
20 | /** | ||
21 | * @file regex/regex_ipc.h | ||
22 | * @brief regex IPC messages (not called 'regex.h' due to conflict with | ||
23 | * system headers) | ||
24 | * @author Christian Grothoff | ||
25 | */ | ||
26 | #ifndef REGEX_IPC_H | ||
27 | #define REGEX_IPC_H | ||
28 | |||
29 | #include "gnunet_util_lib.h" | ||
30 | |||
31 | /** | ||
32 | * Request for regex service to announce capability. | ||
33 | */ | ||
34 | struct AnnounceMessage | ||
35 | { | ||
36 | /** | ||
37 | * Type is GNUNET_MESSAGE_TYPE_REGEX_ANNOUNCE | ||
38 | */ | ||
39 | struct GNUNET_MessageHeader header; | ||
40 | |||
41 | /** | ||
42 | * How many characters can we squeeze per edge? | ||
43 | */ | ||
44 | uint16_t compression; | ||
45 | |||
46 | /** | ||
47 | * Always zero. | ||
48 | */ | ||
49 | uint16_t reserved; | ||
50 | |||
51 | /** | ||
52 | * Delay between repeated announcements. | ||
53 | */ | ||
54 | struct GNUNET_TIME_RelativeNBO refresh_delay; | ||
55 | |||
56 | /* followed by 0-terminated regex as string */ | ||
57 | }; | ||
58 | |||
59 | |||
60 | /** | ||
61 | * Message to initiate regex search. | ||
62 | */ | ||
63 | struct RegexSearchMessage | ||
64 | { | ||
65 | /** | ||
66 | * Type is GNUNET_MESSAGE_TYPE_REGEX_SEARCH | ||
67 | */ | ||
68 | struct GNUNET_MessageHeader header; | ||
69 | |||
70 | /* followed by 0-terminated search string */ | ||
71 | }; | ||
72 | |||
73 | |||
74 | /** | ||
75 | * Result from regex search. | ||
76 | */ | ||
77 | struct ResultMessage | ||
78 | { | ||
79 | /** | ||
80 | * Type is GNUNET_MESSAGE_TYPE_REGEX_RESULT | ||
81 | */ | ||
82 | struct GNUNET_MessageHeader header; | ||
83 | |||
84 | /** | ||
85 | * Number of entries in the GET path. | ||
86 | */ | ||
87 | uint16_t get_path_length; | ||
88 | |||
89 | /** | ||
90 | * Number of entries in the PUT path. | ||
91 | */ | ||
92 | uint16_t put_path_length; | ||
93 | |||
94 | /** | ||
95 | * Identity of the peer that was found. | ||
96 | */ | ||
97 | struct GNUNET_PeerIdentity id; | ||
98 | |||
99 | /* followed by GET path and PUT path arrays */ | ||
100 | }; | ||
101 | |||
102 | |||
103 | /* end of regex_ipc.h */ | ||
104 | #endif | ||
diff --git a/src/regex/regex_simulation_profiler_test.conf b/src/regex/regex_simulation_profiler_test.conf deleted file mode 100644 index 9384aa249..000000000 --- a/src/regex/regex_simulation_profiler_test.conf +++ /dev/null | |||
@@ -1,7 +0,0 @@ | |||
1 | [regex-mysql] | ||
2 | DATABASE = regex | ||
3 | USER = gnunet | ||
4 | PASSWORD = | ||
5 | HOST = localhost | ||
6 | PORT = 3306 | ||
7 | REGEX_PREFIX = GNVPN-0001-PAD | ||
diff --git a/src/regex/regex_test_graph.c b/src/regex/regex_test_graph.c deleted file mode 100644 index c8efae772..000000000 --- a/src/regex/regex_test_graph.c +++ /dev/null | |||
@@ -1,317 +0,0 @@ | |||
1 | /* | ||
2 | This file is part of GNUnet | ||
3 | Copyright (C) 2012 GNUnet e.V. | ||
4 | |||
5 | GNUnet is free software: you can redistribute it and/or modify it | ||
6 | under the terms of the GNU Affero General Public License as published | ||
7 | by the Free Software Foundation, either version 3 of the License, | ||
8 | or (at your option) any later version. | ||
9 | |||
10 | GNUnet is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | Affero General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU Affero General Public License | ||
16 | along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | |||
18 | SPDX-License-Identifier: AGPL3.0-or-later | ||
19 | */ | ||
20 | /** | ||
21 | * @file src/regex/regex_test_graph.c | ||
22 | * @brief functions for creating .dot graphs from regexes | ||
23 | * @author Maximilian Szengel | ||
24 | */ | ||
25 | #include "platform.h" | ||
26 | #include "regex_internal_lib.h" | ||
27 | #include "regex_test_lib.h" | ||
28 | #include "regex_internal.h" | ||
29 | |||
30 | /** | ||
31 | * Context for graph creation. Passed as the cls to | ||
32 | * REGEX_TEST_automaton_save_graph_step. | ||
33 | */ | ||
34 | struct REGEX_TEST_Graph_Context | ||
35 | { | ||
36 | /** | ||
37 | * File pointer to the dot file used for output. | ||
38 | */ | ||
39 | FILE *filep; | ||
40 | |||
41 | /** | ||
42 | * Verbose flag, if it's set to GNUNET_YES additional info will be printed in | ||
43 | * the graph. | ||
44 | */ | ||
45 | int verbose; | ||
46 | |||
47 | /** | ||
48 | * Coloring flag, if set to GNUNET_YES SCCs will be colored. | ||
49 | */ | ||
50 | int coloring; | ||
51 | }; | ||
52 | |||
53 | |||
54 | /** | ||
55 | * Recursive function doing DFS with 'v' as a start, detecting all SCCs inside | ||
56 | * the subgraph reachable from 'v'. Used with scc_tarjan function to detect all | ||
57 | * SCCs inside an automaton. | ||
58 | * | ||
59 | * @param scc_counter counter for numbering the sccs | ||
60 | * @param v start vertex | ||
61 | * @param index current index | ||
62 | * @param stack stack for saving all SCCs | ||
63 | * @param stack_size current size of the stack | ||
64 | */ | ||
65 | static void | ||
66 | scc_tarjan_strongconnect (unsigned int *scc_counter, | ||
67 | struct REGEX_INTERNAL_State *v, unsigned int *index, | ||
68 | struct REGEX_INTERNAL_State **stack, | ||
69 | unsigned int *stack_size) | ||
70 | { | ||
71 | struct REGEX_INTERNAL_State *w; | ||
72 | struct REGEX_INTERNAL_Transition *t; | ||
73 | |||
74 | v->index = *index; | ||
75 | v->lowlink = *index; | ||
76 | (*index)++; | ||
77 | stack[(*stack_size)++] = v; | ||
78 | v->contained = 1; | ||
79 | |||
80 | for (t = v->transitions_head; NULL != t; t = t->next) | ||
81 | { | ||
82 | w = t->to_state; | ||
83 | |||
84 | if (NULL == w) | ||
85 | continue; | ||
86 | |||
87 | if (w->index < 0) | ||
88 | { | ||
89 | scc_tarjan_strongconnect (scc_counter, w, index, stack, stack_size); | ||
90 | v->lowlink = (v->lowlink > w->lowlink) ? w->lowlink : v->lowlink; | ||
91 | } | ||
92 | else if (1 == w->contained) | ||
93 | v->lowlink = (v->lowlink > w->index) ? w->index : v->lowlink; | ||
94 | } | ||
95 | |||
96 | if (v->lowlink == v->index) | ||
97 | { | ||
98 | (*scc_counter)++; | ||
99 | do | ||
100 | { | ||
101 | w = stack[--(*stack_size)]; | ||
102 | w->contained = 0; | ||
103 | w->scc_id = *scc_counter; | ||
104 | } | ||
105 | while (w != v); | ||
106 | } | ||
107 | } | ||
108 | |||
109 | |||
110 | /** | ||
111 | * Detect all SCCs (Strongly Connected Components) inside the given automaton. | ||
112 | * SCCs will be marked using the scc_id on each state. | ||
113 | * | ||
114 | * @param a the automaton for which SCCs should be computed and assigned. | ||
115 | */ | ||
116 | static void | ||
117 | scc_tarjan (struct REGEX_INTERNAL_Automaton *a) | ||
118 | { | ||
119 | unsigned int index; | ||
120 | unsigned int scc_counter; | ||
121 | struct REGEX_INTERNAL_State *v; | ||
122 | struct REGEX_INTERNAL_State *stack[a->state_count]; | ||
123 | unsigned int stack_size; | ||
124 | |||
125 | for (v = a->states_head; NULL != v; v = v->next) | ||
126 | { | ||
127 | v->contained = 0; | ||
128 | v->index = -1; | ||
129 | v->lowlink = -1; | ||
130 | } | ||
131 | |||
132 | stack_size = 0; | ||
133 | index = 0; | ||
134 | scc_counter = 0; | ||
135 | |||
136 | for (v = a->states_head; NULL != v; v = v->next) | ||
137 | { | ||
138 | if (v->index < 0) | ||
139 | scc_tarjan_strongconnect (&scc_counter, v, &index, stack, &stack_size); | ||
140 | } | ||
141 | } | ||
142 | |||
143 | |||
144 | /** | ||
145 | * Save a state to an open file pointer. cls is expected to be a file pointer to | ||
146 | * an open file. Used only in conjunction with | ||
147 | * REGEX_TEST_automaton_save_graph. | ||
148 | * | ||
149 | * @param cls file pointer. | ||
150 | * @param count current count of the state, not used. | ||
151 | * @param s state. | ||
152 | */ | ||
153 | void | ||
154 | REGEX_TEST_automaton_save_graph_step (void *cls, unsigned int count, | ||
155 | struct REGEX_INTERNAL_State *s) | ||
156 | { | ||
157 | struct REGEX_TEST_Graph_Context *ctx = cls; | ||
158 | struct REGEX_INTERNAL_Transition *ctran; | ||
159 | char *s_acc = NULL; | ||
160 | char *s_tran = NULL; | ||
161 | char *name; | ||
162 | char *to_name; | ||
163 | |||
164 | if (GNUNET_YES == ctx->verbose) | ||
165 | GNUNET_asprintf (&name, "%i (%s) (%s) (%s)", s->dfs_id, s->name, s->proof, | ||
166 | GNUNET_h2s (&s->hash)); | ||
167 | else | ||
168 | GNUNET_asprintf (&name, "%i", s->dfs_id); | ||
169 | |||
170 | if (s->accepting) | ||
171 | { | ||
172 | if (GNUNET_YES == ctx->coloring) | ||
173 | { | ||
174 | GNUNET_asprintf (&s_acc, | ||
175 | "\"%s\" [shape=doublecircle, color=\"0.%i 0.8 0.95\"];\n", | ||
176 | name, s->scc_id * s->scc_id); | ||
177 | } | ||
178 | else | ||
179 | { | ||
180 | GNUNET_asprintf (&s_acc, "\"%s\" [shape=doublecircle];\n", name); | ||
181 | } | ||
182 | } | ||
183 | else if (GNUNET_YES == ctx->coloring) | ||
184 | { | ||
185 | GNUNET_asprintf (&s_acc, | ||
186 | "\"%s\" [shape=circle, color=\"0.%i 0.8 0.95\"];\n", name, | ||
187 | s->scc_id * s->scc_id); | ||
188 | } | ||
189 | else | ||
190 | { | ||
191 | GNUNET_asprintf (&s_acc, "\"%s\" [shape=circle];\n", name); | ||
192 | } | ||
193 | |||
194 | GNUNET_assert (NULL != s_acc); | ||
195 | |||
196 | fwrite (s_acc, strlen (s_acc), 1, ctx->filep); | ||
197 | GNUNET_free (s_acc); | ||
198 | s_acc = NULL; | ||
199 | |||
200 | for (ctran = s->transitions_head; NULL != ctran; ctran = ctran->next) | ||
201 | { | ||
202 | if (NULL == ctran->to_state) | ||
203 | { | ||
204 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
205 | "Transition from State %i has no state for transitioning\n", | ||
206 | s->id); | ||
207 | continue; | ||
208 | } | ||
209 | |||
210 | if (GNUNET_YES == ctx->verbose) | ||
211 | { | ||
212 | GNUNET_asprintf (&to_name, "%i (%s) (%s) (%s)", ctran->to_state->dfs_id, | ||
213 | ctran->to_state->name, ctran->to_state->proof, | ||
214 | GNUNET_h2s (&ctran->to_state->hash)); | ||
215 | } | ||
216 | else | ||
217 | GNUNET_asprintf (&to_name, "%i", ctran->to_state->dfs_id); | ||
218 | |||
219 | if (NULL == ctran->label) | ||
220 | { | ||
221 | if (GNUNET_YES == ctx->coloring) | ||
222 | { | ||
223 | GNUNET_asprintf (&s_tran, | ||
224 | "\"%s\" -> \"%s\" [label = \"ε\", color=\"0.%i 0.8 0.95\"];\n", | ||
225 | name, to_name, s->scc_id * s->scc_id); | ||
226 | } | ||
227 | else | ||
228 | { | ||
229 | GNUNET_asprintf (&s_tran, "\"%s\" -> \"%s\" [label = \"ε\"];\n", name, | ||
230 | to_name); | ||
231 | } | ||
232 | } | ||
233 | else | ||
234 | { | ||
235 | if (GNUNET_YES == ctx->coloring) | ||
236 | { | ||
237 | GNUNET_asprintf (&s_tran, | ||
238 | "\"%s\" -> \"%s\" [label = \"%s\", color=\"0.%i 0.8 0.95\"];\n", | ||
239 | name, to_name, ctran->label, s->scc_id * s->scc_id); | ||
240 | } | ||
241 | else | ||
242 | { | ||
243 | GNUNET_asprintf (&s_tran, "\"%s\" -> \"%s\" [label = \"%s\"];\n", name, | ||
244 | to_name, ctran->label); | ||
245 | } | ||
246 | } | ||
247 | |||
248 | GNUNET_free (to_name); | ||
249 | |||
250 | GNUNET_assert (NULL != s_tran); | ||
251 | |||
252 | fwrite (s_tran, strlen (s_tran), 1, ctx->filep); | ||
253 | GNUNET_free (s_tran); | ||
254 | s_tran = NULL; | ||
255 | } | ||
256 | |||
257 | GNUNET_free (name); | ||
258 | } | ||
259 | |||
260 | |||
261 | /** | ||
262 | * Save the given automaton as a GraphViz dot file. | ||
263 | * | ||
264 | * @param a the automaton to be saved. | ||
265 | * @param filename where to save the file. | ||
266 | * @param options options for graph generation that include coloring or verbose | ||
267 | * mode | ||
268 | */ | ||
269 | void | ||
270 | REGEX_TEST_automaton_save_graph (struct REGEX_INTERNAL_Automaton *a, | ||
271 | const char *filename, | ||
272 | enum REGEX_TEST_GraphSavingOptions options) | ||
273 | { | ||
274 | char *start; | ||
275 | char *end; | ||
276 | struct REGEX_TEST_Graph_Context ctx; | ||
277 | |||
278 | if (NULL == a) | ||
279 | { | ||
280 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not print NFA, was NULL!"); | ||
281 | return; | ||
282 | } | ||
283 | |||
284 | if ((NULL == filename) || (strlen (filename) < 1)) | ||
285 | { | ||
286 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "No Filename given!"); | ||
287 | return; | ||
288 | } | ||
289 | |||
290 | ctx.filep = fopen (filename, "w"); | ||
291 | ctx.verbose = | ||
292 | (0 == (options & REGEX_TEST_GRAPH_VERBOSE)) ? GNUNET_NO : GNUNET_YES; | ||
293 | ctx.coloring = | ||
294 | (0 == (options & REGEX_TEST_GRAPH_COLORING)) ? GNUNET_NO : GNUNET_YES; | ||
295 | |||
296 | if (NULL == ctx.filep) | ||
297 | { | ||
298 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not open file for writing: %s", | ||
299 | filename); | ||
300 | return; | ||
301 | } | ||
302 | |||
303 | /* First add the SCCs to the automaton, so we can color them nicely */ | ||
304 | if (GNUNET_YES == ctx.coloring) | ||
305 | scc_tarjan (a); | ||
306 | |||
307 | start = "digraph G {\nrankdir=LR\n"; | ||
308 | fwrite (start, strlen (start), 1, ctx.filep); | ||
309 | |||
310 | REGEX_INTERNAL_automaton_traverse (a, a->start, NULL, NULL, | ||
311 | ®EX_TEST_automaton_save_graph_step, | ||
312 | &ctx); | ||
313 | |||
314 | end = "\n}\n"; | ||
315 | fwrite (end, strlen (end), 1, ctx.filep); | ||
316 | fclose (ctx.filep); | ||
317 | } | ||
diff --git a/src/regex/regex_test_lib.c b/src/regex/regex_test_lib.c deleted file mode 100644 index 2fece6bff..000000000 --- a/src/regex/regex_test_lib.c +++ /dev/null | |||
@@ -1,662 +0,0 @@ | |||
1 | /* | ||
2 | * This file is part of GNUnet | ||
3 | * Copyright (C) 2012-2017 GNUnet e.V. | ||
4 | * | ||
5 | * GNUnet is free software: you can redistribute it and/or modify it | ||
6 | * under the terms of the GNU Affero General Public License as published | ||
7 | * by the Free Software Foundation, either version 3 of the License, | ||
8 | * or (at your option) any later version. | ||
9 | * | ||
10 | * GNUnet is distributed in the hope that it will be useful, but | ||
11 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | * Affero General Public License for more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU Affero General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | |||
18 | SPDX-License-Identifier: AGPL3.0-or-later | ||
19 | */ | ||
20 | /** | ||
21 | * @file src/regex/regex_test_lib.c | ||
22 | * @brief library to read regexes representing IP networks from a file. | ||
23 | * and simplyfinying the into one big regex, in order to run | ||
24 | * tests (regex performance, cadet profiler). | ||
25 | * @author Bartlomiej Polot | ||
26 | */ | ||
27 | |||
28 | #include "platform.h" | ||
29 | #include "gnunet_util_lib.h" | ||
30 | |||
31 | |||
32 | /** | ||
33 | * Struct to hold the tree formed by prefix-combining the regexes. | ||
34 | */ | ||
35 | struct RegexCombineCtx | ||
36 | { | ||
37 | /** | ||
38 | * Child nodes with same prefix and token. | ||
39 | */ | ||
40 | struct RegexCombineCtx **children; | ||
41 | |||
42 | /** | ||
43 | * Alphabet size (how many @a children there are) | ||
44 | */ | ||
45 | unsigned int size; | ||
46 | |||
47 | /** | ||
48 | * Token. | ||
49 | */ | ||
50 | char *s; | ||
51 | }; | ||
52 | |||
53 | |||
54 | /** | ||
55 | * Char 2 int | ||
56 | * | ||
57 | * Convert a character into its int value depending on the base used | ||
58 | * | ||
59 | * @param c Char | ||
60 | * @param size base (2, 8 or 16(hex)) | ||
61 | * | ||
62 | * @return Int in range [0, (base-1)] | ||
63 | */ | ||
64 | static int | ||
65 | c2i (char c, int size) | ||
66 | { | ||
67 | switch (size) | ||
68 | { | ||
69 | case 2: | ||
70 | case 8: | ||
71 | return c - '0'; | ||
72 | break; | ||
73 | |||
74 | case 16: | ||
75 | if ((c >= '0') && (c <= '9') ) | ||
76 | return c - '0'; | ||
77 | else if ((c >= 'A') && (c <= 'F') ) | ||
78 | return c - 'A' + 10; | ||
79 | else if ((c >= 'a') && (c <= 'f') ) | ||
80 | return c - 'a' + 10; | ||
81 | else | ||
82 | { | ||
83 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
84 | "Cannot convert char %c in base %u\n", | ||
85 | c, size); | ||
86 | GNUNET_assert (0); | ||
87 | } | ||
88 | break; | ||
89 | |||
90 | default: | ||
91 | GNUNET_assert (0); | ||
92 | } | ||
93 | } | ||
94 | |||
95 | |||
96 | #if DEBUG_REGEX | ||
97 | /** | ||
98 | * Printf spaces to indent the regex tree | ||
99 | * | ||
100 | * @param n Indentation level | ||
101 | */ | ||
102 | static void | ||
103 | space (int n) | ||
104 | { | ||
105 | for (int i = 0; i < n; i++) | ||
106 | fprintf (stderr, "| "); | ||
107 | } | ||
108 | |||
109 | |||
110 | #endif | ||
111 | |||
112 | |||
113 | /** | ||
114 | * Printf the combined regex ctx. | ||
115 | * | ||
116 | * @param ctx The ctx to printf | ||
117 | * @param level Indentation level to start with | ||
118 | */ | ||
119 | static void | ||
120 | debugctx (struct RegexCombineCtx *ctx, int level) | ||
121 | { | ||
122 | #if DEBUG_REGEX | ||
123 | if (NULL != ctx->s) | ||
124 | { | ||
125 | space (level - 1); | ||
126 | fprintf (stderr, "%u:'%s'\n", c2i (ctx->s[0], ctx->size), ctx->s); | ||
127 | } | ||
128 | else | ||
129 | fprintf (stderr, "ROOT (base %u)\n", ctx->size); | ||
130 | for (unsigned int i = 0; i < ctx->size; i++) | ||
131 | { | ||
132 | if (NULL != ctx->children[i]) | ||
133 | { | ||
134 | space (level); | ||
135 | debugctx (ctx->children[i], level + 1); | ||
136 | } | ||
137 | } | ||
138 | fflush (stderr); | ||
139 | #endif | ||
140 | } | ||
141 | |||
142 | |||
143 | /** | ||
144 | * Add a single regex to a context, combining with existing regex by-prefix. | ||
145 | * | ||
146 | * @param ctx Context with 0 or more regexes. | ||
147 | * @param regex Regex to add. | ||
148 | */ | ||
149 | static void | ||
150 | regex_add (struct RegexCombineCtx *ctx, | ||
151 | const char *regex); | ||
152 | |||
153 | |||
154 | /** | ||
155 | * Create and initialize a new RegexCombineCtx. | ||
156 | * | ||
157 | * @param alphabet_size Size of the alphabet (and the Trie array) | ||
158 | */ | ||
159 | static struct RegexCombineCtx * | ||
160 | new_regex_ctx (unsigned int alphabet_size) | ||
161 | { | ||
162 | struct RegexCombineCtx *ctx; | ||
163 | size_t array_size; | ||
164 | |||
165 | array_size = sizeof(struct RegexCombineCtx *) * alphabet_size; | ||
166 | ctx = GNUNET_new (struct RegexCombineCtx); | ||
167 | ctx->children = GNUNET_malloc (array_size); | ||
168 | ctx->size = alphabet_size; | ||
169 | |||
170 | return ctx; | ||
171 | } | ||
172 | |||
173 | |||
174 | static void | ||
175 | move_children (struct RegexCombineCtx *dst, | ||
176 | const struct RegexCombineCtx *src) | ||
177 | { | ||
178 | size_t array_size; | ||
179 | |||
180 | array_size = sizeof(struct RegexCombineCtx *) * src->size; | ||
181 | GNUNET_memcpy (dst->children, | ||
182 | src->children, | ||
183 | array_size); | ||
184 | for (unsigned int i = 0; i < src->size; i++) | ||
185 | { | ||
186 | src->children[i] = NULL; | ||
187 | } | ||
188 | } | ||
189 | |||
190 | |||
191 | /** | ||
192 | * Extract a string from all prefix-combined regexes. | ||
193 | * | ||
194 | * @param ctx Context with 0 or more regexes. | ||
195 | * | ||
196 | * @return Regex that matches any of the added regexes. | ||
197 | */ | ||
198 | static char * | ||
199 | regex_combine (struct RegexCombineCtx *ctx) | ||
200 | { | ||
201 | struct RegexCombineCtx *p; | ||
202 | unsigned int i; | ||
203 | size_t len; | ||
204 | char *regex; | ||
205 | char *tmp; | ||
206 | char *s; | ||
207 | int opt; | ||
208 | |||
209 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "new combine %s\n", ctx->s); | ||
210 | regex = GNUNET_strdup (""); | ||
211 | opt = GNUNET_NO; | ||
212 | for (i = 0; i < ctx->size; i++) | ||
213 | { | ||
214 | p = ctx->children[i]; | ||
215 | if (NULL == p) | ||
216 | continue; | ||
217 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, | ||
218 | "adding '%s' to innner %s\n", | ||
219 | p->s, ctx->s); | ||
220 | s = regex_combine (p); | ||
221 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " total '%s'\n", s); | ||
222 | if (strlen (s) == 0) | ||
223 | { | ||
224 | opt = GNUNET_YES; | ||
225 | } | ||
226 | else | ||
227 | { | ||
228 | GNUNET_asprintf (&tmp, "%s%s|", regex, s); | ||
229 | GNUNET_free (regex); | ||
230 | regex = tmp; | ||
231 | } | ||
232 | GNUNET_free (s); | ||
233 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " so far '%s' for inner %s\n", regex, | ||
234 | ctx->s); | ||
235 | } | ||
236 | |||
237 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "opt: %d, innner: '%s'\n", opt, regex); | ||
238 | len = strlen (regex); | ||
239 | if (0 == len) | ||
240 | { | ||
241 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "empty, returning ''\n"); | ||
242 | GNUNET_free (regex); | ||
243 | return NULL == ctx->s ? NULL : GNUNET_strdup (ctx->s); | ||
244 | } | ||
245 | |||
246 | if ('|' == regex[len - 1]) | ||
247 | regex[len - 1] = '\0'; | ||
248 | |||
249 | if (NULL != ctx->s) | ||
250 | { | ||
251 | if (opt) | ||
252 | GNUNET_asprintf (&s, "%s(%s)?", ctx->s, regex); | ||
253 | else | ||
254 | GNUNET_asprintf (&s, "%s(%s)", ctx->s, regex); | ||
255 | GNUNET_free (regex); | ||
256 | regex = s; | ||
257 | } | ||
258 | |||
259 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "partial: %s\n", regex); | ||
260 | return regex; | ||
261 | } | ||
262 | |||
263 | |||
264 | /** | ||
265 | * Get the number of matching characters on the prefix of both strings. | ||
266 | * | ||
267 | * @param s1 String 1. | ||
268 | * @param s2 String 2. | ||
269 | * | ||
270 | * @return Number of characters of matching prefix. | ||
271 | */ | ||
272 | static unsigned int | ||
273 | get_prefix_length (const char *s1, const char *s2) | ||
274 | { | ||
275 | unsigned int l1; | ||
276 | unsigned int l2; | ||
277 | unsigned int limit; | ||
278 | unsigned int i; | ||
279 | |||
280 | l1 = strlen (s1); | ||
281 | l2 = strlen (s2); | ||
282 | limit = l1 > l2 ? l2 : l1; | ||
283 | |||
284 | for (i = 0; i < limit; i++) | ||
285 | { | ||
286 | if (s1[i] != s2[i]) | ||
287 | return i; | ||
288 | } | ||
289 | return limit; | ||
290 | } | ||
291 | |||
292 | |||
293 | /** | ||
294 | * Return the child context with the longest prefix match with the regex. | ||
295 | * Usually only one child will match, search all just in case. | ||
296 | * | ||
297 | * @param ctx Context whose children to search. | ||
298 | * @param regex String to match. | ||
299 | * | ||
300 | * @return Child with the longest prefix, NULL if no child matches. | ||
301 | */ | ||
302 | static struct RegexCombineCtx * | ||
303 | get_longest_prefix (struct RegexCombineCtx *ctx, const char *regex) | ||
304 | { | ||
305 | struct RegexCombineCtx *p; | ||
306 | struct RegexCombineCtx *best; | ||
307 | unsigned int i; | ||
308 | unsigned int l; | ||
309 | unsigned int best_l; | ||
310 | |||
311 | best_l = 0; | ||
312 | best = NULL; | ||
313 | |||
314 | for (i = 0; i < ctx->size; i++) | ||
315 | { | ||
316 | p = ctx->children[i]; | ||
317 | if (NULL == p) | ||
318 | continue; | ||
319 | |||
320 | l = get_prefix_length (p->s, regex); | ||
321 | if (l > best_l) | ||
322 | { | ||
323 | GNUNET_break (0 == best_l); | ||
324 | best = p; | ||
325 | best_l = l; | ||
326 | } | ||
327 | } | ||
328 | return best; | ||
329 | } | ||
330 | |||
331 | |||
332 | static void | ||
333 | regex_add_multiple (struct RegexCombineCtx *ctx, | ||
334 | const char *regex, | ||
335 | struct RegexCombineCtx **children) | ||
336 | { | ||
337 | char tmp[2]; | ||
338 | long unsigned int i; | ||
339 | size_t l; | ||
340 | struct RegexCombineCtx *newctx; | ||
341 | unsigned int count; | ||
342 | |||
343 | if ('(' != regex[0]) | ||
344 | { | ||
345 | GNUNET_assert (0); | ||
346 | } | ||
347 | |||
348 | /* Does the regex cover *all* possible children? Then don't add any, | ||
349 | * as it will be covered by the post-regex "(a-z)*" | ||
350 | */ | ||
351 | l = strlen (regex); | ||
352 | count = 0; | ||
353 | for (i = 1UL; i < l; i++) | ||
354 | { | ||
355 | if ((regex[i] != '|') && (regex[i] != ')') ) | ||
356 | { | ||
357 | count++; | ||
358 | } | ||
359 | } | ||
360 | if (count == ctx->size) | ||
361 | { | ||
362 | return; | ||
363 | } | ||
364 | |||
365 | /* Add every component as a child node */ | ||
366 | tmp[1] = '\0'; | ||
367 | for (i = 1UL; i < l; i++) | ||
368 | { | ||
369 | if ((regex[i] != '|') && (regex[i] != ')') ) | ||
370 | { | ||
371 | tmp[0] = regex[i]; | ||
372 | newctx = new_regex_ctx (ctx->size); | ||
373 | newctx->s = GNUNET_strdup (tmp); | ||
374 | if (children != NULL) | ||
375 | GNUNET_memcpy (newctx->children, | ||
376 | children, | ||
377 | sizeof(*children) * ctx->size); | ||
378 | ctx->children[c2i (tmp[0], ctx->size)] = newctx; | ||
379 | } | ||
380 | } | ||
381 | } | ||
382 | |||
383 | |||
384 | /** | ||
385 | * Add a single regex to a context, splitting the existing state. | ||
386 | * | ||
387 | * We only had a partial match, split existing state, truncate the current node | ||
388 | * so it only contains the prefix, add suffix(es) as children. | ||
389 | * | ||
390 | * @param ctx Context to split. | ||
391 | * @param len Length of ctx->s | ||
392 | * @param prefix_l Length of common prefix of the new regex and @a ctx->s | ||
393 | */ | ||
394 | static void | ||
395 | regex_split (struct RegexCombineCtx *ctx, | ||
396 | unsigned int len, | ||
397 | unsigned int prefix_l) | ||
398 | { | ||
399 | struct RegexCombineCtx *newctx; | ||
400 | unsigned int idx; | ||
401 | char *suffix; | ||
402 | |||
403 | suffix = GNUNET_malloc (len - prefix_l + 1); | ||
404 | /* | ||
405 | * We can use GNUNET_strlcpy because ctx->s is null-terminated | ||
406 | */ | ||
407 | GNUNET_strlcpy (suffix, &ctx->s[prefix_l], len - prefix_l + 1); | ||
408 | |||
409 | /* Suffix saved, truncate current node so it only contains the prefix, | ||
410 | * copy any children nodes to put as grandchildren and initialize new empty | ||
411 | * children array. | ||
412 | */ | ||
413 | ctx->s[prefix_l] = '\0'; | ||
414 | |||
415 | /* If the suffix is an OR expression, add multiple children */ | ||
416 | if ('(' == suffix[0]) | ||
417 | { | ||
418 | struct RegexCombineCtx **tmp; | ||
419 | |||
420 | tmp = ctx->children; | ||
421 | ctx->children = GNUNET_malloc (sizeof(*tmp) * ctx->size); | ||
422 | regex_add_multiple (ctx, suffix, tmp); | ||
423 | GNUNET_free (suffix); | ||
424 | GNUNET_free (tmp); | ||
425 | return; | ||
426 | } | ||
427 | |||
428 | /* The suffix is a normal string, add as one node */ | ||
429 | newctx = new_regex_ctx (ctx->size); | ||
430 | newctx->s = suffix; | ||
431 | move_children (newctx, ctx); | ||
432 | idx = c2i (suffix[0], ctx->size); | ||
433 | ctx->children[idx] = newctx; | ||
434 | } | ||
435 | |||
436 | |||
437 | /** | ||
438 | * Add a single regex to a context, combining with existing regex by-prefix. | ||
439 | * | ||
440 | * @param ctx Context with 0 or more regexes. | ||
441 | * @param regex Regex to add. | ||
442 | */ | ||
443 | static void | ||
444 | regex_add (struct RegexCombineCtx *ctx, const char *regex) | ||
445 | { | ||
446 | struct RegexCombineCtx *p; | ||
447 | struct RegexCombineCtx *newctx; | ||
448 | long unsigned int l; | ||
449 | unsigned int prefix_l; | ||
450 | const char *rest_r; | ||
451 | const char *rest_s; | ||
452 | size_t len; | ||
453 | int idx; | ||
454 | |||
455 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, | ||
456 | "regex_add '%s' into '%s'\n", | ||
457 | regex, ctx->s); | ||
458 | l = strlen (regex); | ||
459 | if (0UL == l) | ||
460 | return; | ||
461 | |||
462 | /* If the regex is in the form of (a|b|c), add every character separately */ | ||
463 | if ('(' == regex[0]) | ||
464 | { | ||
465 | regex_add_multiple (ctx, regex, NULL); | ||
466 | return; | ||
467 | } | ||
468 | |||
469 | p = get_longest_prefix (ctx, regex); | ||
470 | if (NULL != p) | ||
471 | { | ||
472 | /* There is some prefix match, reduce regex and try again */ | ||
473 | prefix_l = get_prefix_length (p->s, regex); | ||
474 | rest_s = &p->s[prefix_l]; | ||
475 | rest_r = ®ex[prefix_l]; | ||
476 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "chosen '%s' [%u]\n", p->s, prefix_l); | ||
477 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "prefix r '%.*s'\n", prefix_l, p->s); | ||
478 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "rest r '%s'\n", rest_r); | ||
479 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "rest s '%s'\n", rest_s); | ||
480 | len = strlen (p->s); | ||
481 | if (prefix_l < len) | ||
482 | { | ||
483 | regex_split (p, len, prefix_l); | ||
484 | } | ||
485 | regex_add (p, rest_r); | ||
486 | return; | ||
487 | } | ||
488 | |||
489 | /* There is no prefix match, add new */ | ||
490 | idx = c2i (regex[0], ctx->size); | ||
491 | if ((NULL == ctx->children[idx]) && (NULL != ctx->s)) | ||
492 | { | ||
493 | /* this was the end before, add empty string */ | ||
494 | newctx = new_regex_ctx (ctx->size); | ||
495 | newctx->s = GNUNET_strdup (""); | ||
496 | ctx->children[idx] = newctx; | ||
497 | } | ||
498 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " no match\n"); | ||
499 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " new state %s\n", regex); | ||
500 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " under %s\n", ctx->s); | ||
501 | newctx = new_regex_ctx (ctx->size); | ||
502 | newctx->s = GNUNET_strdup (regex); | ||
503 | ctx->children[idx] = newctx; | ||
504 | } | ||
505 | |||
506 | |||
507 | /** | ||
508 | * Free all resources used by the context node and all its children. | ||
509 | * | ||
510 | * @param ctx Context to free. | ||
511 | */ | ||
512 | static void | ||
513 | regex_ctx_destroy (struct RegexCombineCtx *ctx) | ||
514 | { | ||
515 | unsigned int i; | ||
516 | |||
517 | if (NULL == ctx) | ||
518 | return; | ||
519 | |||
520 | for (i = 0; i < ctx->size; i++) | ||
521 | { | ||
522 | regex_ctx_destroy (ctx->children[i]); | ||
523 | } | ||
524 | GNUNET_free (ctx->s); /* 's' on root node is null */ | ||
525 | GNUNET_free (ctx->children); | ||
526 | GNUNET_free (ctx); | ||
527 | } | ||
528 | |||
529 | |||
530 | /** | ||
531 | * Combine an array of regexes into a single prefix-shared regex. | ||
532 | * Returns a prefix-combine regex that matches the same strings as | ||
533 | * any of the original regexes. | ||
534 | * | ||
535 | * WARNING: only useful for reading specific regexes for specific applications, | ||
536 | * namely the gnunet-regex-profiler / gnunet-regex-daemon. | ||
537 | * This function DOES NOT support arbitrary regex combining. | ||
538 | * | ||
539 | * @param regexes A NULL-terminated array of regexes. | ||
540 | * @param alphabet_size Size of the alphabet the regex uses. | ||
541 | * | ||
542 | * @return A string with a single regex that matches any of the original regexes | ||
543 | */ | ||
544 | char * | ||
545 | REGEX_TEST_combine (char *const regexes[], unsigned int alphabet_size) | ||
546 | { | ||
547 | unsigned int i; | ||
548 | char *combined; | ||
549 | const char *current; | ||
550 | struct RegexCombineCtx *ctx; | ||
551 | |||
552 | ctx = new_regex_ctx (alphabet_size); | ||
553 | for (i = 0; regexes[i]; i++) | ||
554 | { | ||
555 | current = regexes[i]; | ||
556 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Regex %u: %s\n", i, current); | ||
557 | regex_add (ctx, current); | ||
558 | debugctx (ctx, 0); | ||
559 | } | ||
560 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "\nCombining...\n"); | ||
561 | debugctx (ctx, 0); | ||
562 | |||
563 | combined = regex_combine (ctx); | ||
564 | |||
565 | regex_ctx_destroy (ctx); | ||
566 | |||
567 | return combined; | ||
568 | } | ||
569 | |||
570 | |||
571 | /** | ||
572 | * Read a set of regexes from a file, one per line and return them in an array | ||
573 | * suitable for REGEX_TEST_combine. | ||
574 | * The array must be free'd using REGEX_TEST_free_from_file. | ||
575 | * | ||
576 | * @param filename Name of the file containing the regexes. | ||
577 | * | ||
578 | * @return A newly allocated, NULL terminated array of regexes. | ||
579 | */ | ||
580 | char ** | ||
581 | REGEX_TEST_read_from_file (const char *filename) | ||
582 | { | ||
583 | struct GNUNET_DISK_FileHandle *f; | ||
584 | unsigned int nr; | ||
585 | unsigned int offset; | ||
586 | off_t size; | ||
587 | size_t len; | ||
588 | char *buffer; | ||
589 | char *regex; | ||
590 | char **regexes; | ||
591 | |||
592 | f = GNUNET_DISK_file_open (filename, | ||
593 | GNUNET_DISK_OPEN_READ, | ||
594 | GNUNET_DISK_PERM_NONE); | ||
595 | if (NULL == f) | ||
596 | { | ||
597 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
598 | "Can't open file %s for reading\n", filename); | ||
599 | return NULL; | ||
600 | } | ||
601 | if (GNUNET_OK != GNUNET_DISK_file_handle_size (f, &size)) | ||
602 | { | ||
603 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
604 | "Can't get size of file %s\n", filename); | ||
605 | GNUNET_DISK_file_close (f); | ||
606 | return NULL; | ||
607 | } | ||
608 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, | ||
609 | "using file %s, size %llu\n", | ||
610 | filename, (unsigned long long) size); | ||
611 | |||
612 | buffer = GNUNET_malloc (size + 1); | ||
613 | GNUNET_DISK_file_read (f, buffer, size); | ||
614 | GNUNET_DISK_file_close (f); | ||
615 | regexes = GNUNET_malloc (sizeof(char *)); | ||
616 | nr = 1; | ||
617 | offset = 0; | ||
618 | regex = NULL; | ||
619 | do | ||
620 | { | ||
621 | if (NULL == regex) | ||
622 | regex = GNUNET_malloc (size + 1); | ||
623 | len = (size_t) sscanf (&buffer[offset], "%s", regex); | ||
624 | if (0 == len) | ||
625 | break; | ||
626 | len = strlen (regex); | ||
627 | offset += len + 1; | ||
628 | if (len < 1) | ||
629 | continue; | ||
630 | regex[len] = '\0'; | ||
631 | regex = GNUNET_realloc (regex, len + 1); | ||
632 | GNUNET_array_grow (regexes, nr, nr + 1); | ||
633 | GNUNET_assert (NULL == regexes[nr - 2]); | ||
634 | regexes[nr - 2] = regex; | ||
635 | regexes[nr - 1] = NULL; | ||
636 | regex = NULL; | ||
637 | } | ||
638 | while (offset < size); | ||
639 | GNUNET_free (regex); | ||
640 | GNUNET_free (buffer); | ||
641 | |||
642 | return regexes; | ||
643 | } | ||
644 | |||
645 | |||
646 | /** | ||
647 | * Free all memory reserved for a set of regexes created by read_from_file. | ||
648 | * | ||
649 | * @param regexes NULL-terminated array of regexes. | ||
650 | */ | ||
651 | void | ||
652 | REGEX_TEST_free_from_file (char **regexes) | ||
653 | { | ||
654 | unsigned int i; | ||
655 | |||
656 | for (i = 0; regexes[i]; i++) | ||
657 | GNUNET_free (regexes[i]); | ||
658 | GNUNET_free (regexes); | ||
659 | } | ||
660 | |||
661 | |||
662 | /* end of regex_test_lib.c */ | ||
diff --git a/src/regex/regex_test_lib.h b/src/regex/regex_test_lib.h deleted file mode 100644 index 98f60d317..000000000 --- a/src/regex/regex_test_lib.h +++ /dev/null | |||
@@ -1,158 +0,0 @@ | |||
1 | /* | ||
2 | * This file is part of GNUnet | ||
3 | * Copyright (C) 2012 GNUnet e.V. | ||
4 | * | ||
5 | * GNUnet is free software: you can redistribute it and/or modify it | ||
6 | * under the terms of the GNU Affero General Public License as published | ||
7 | * by the Free Software Foundation, either version 3 of the License, | ||
8 | * or (at your option) any later version. | ||
9 | * | ||
10 | * GNUnet is distributed in the hope that it will be useful, but | ||
11 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | * Affero General Public License for more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU Affero General Public License | ||
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | |||
18 | SPDX-License-Identifier: AGPL3.0-or-later | ||
19 | */ | ||
20 | /** | ||
21 | * @file src/regex/regex_test_lib.h | ||
22 | * @brief library to read regexes representing IP networks from a file. | ||
23 | * and simplifying the into one big regex, in order to run | ||
24 | * tests (regex performance, regex profiler). | ||
25 | * @author Bertlomiej Polot | ||
26 | */ | ||
27 | |||
28 | #ifndef REGEX_INTERNAL_TEST_LIB_H | ||
29 | #define REGEX_INTERNAL_TEST_LIB_H | ||
30 | |||
31 | #include "regex_internal_lib.h" | ||
32 | |||
33 | #ifdef __cplusplus | ||
34 | extern "C" | ||
35 | { | ||
36 | #if 0 /* keep Emacsens' auto-indent happy */ | ||
37 | } | ||
38 | #endif | ||
39 | #endif | ||
40 | |||
41 | |||
42 | /** | ||
43 | * Combine an array of regexes into a single prefix-shared regex. | ||
44 | * Returns a prefix-combine regex that matches the same strings as | ||
45 | * any of the original regexes. | ||
46 | * | ||
47 | * WARNING: only useful for reading specific regexes for specific applications, | ||
48 | * namely the gnunet-regex-profiler / gnunet-regex-daemon. | ||
49 | * This function DOES NOT support arbitrary regex combining. | ||
50 | * | ||
51 | * @param regexes A NULL-terminated array of regexes. | ||
52 | * @param alphabet_size Size of the alphabet the regex uses. | ||
53 | * | ||
54 | * @return A string with a single regex that matches any of the original regexes | ||
55 | */ | ||
56 | char * | ||
57 | REGEX_TEST_combine (char *const regexes[], unsigned int alphabet_size); | ||
58 | |||
59 | |||
60 | /** | ||
61 | * Read a set of regexes from a file, one per line and return them in an array | ||
62 | * suitable for REGEX_TEST_combine. | ||
63 | * The array must be free'd using REGEX_TEST_free_from_file. | ||
64 | * | ||
65 | * @param filename Name of the file containing the regexes. | ||
66 | * | ||
67 | * @return A newly allocated, NULL terminated array of regexes. | ||
68 | */ | ||
69 | char ** | ||
70 | REGEX_TEST_read_from_file (const char *filename); | ||
71 | |||
72 | |||
73 | /** | ||
74 | * Free all memory reserved for a set of regexes created by read_from_file. | ||
75 | * | ||
76 | * @param regexes NULL-terminated array of regexes. | ||
77 | */ | ||
78 | void | ||
79 | REGEX_TEST_free_from_file (char **regexes); | ||
80 | |||
81 | |||
82 | /** | ||
83 | * Generate a (pseudo) random regular expression of length 'rx_length', as well | ||
84 | * as a (optional) string that will be matched by the generated regex. The | ||
85 | * returned regex needs to be freed. | ||
86 | * | ||
87 | * @param rx_length length of the random regex. | ||
88 | * @param matching_str (optional) pointer to a string that will contain a string | ||
89 | * that will be matched by the generated regex, if | ||
90 | * 'matching_str' pointer was not NULL. | ||
91 | * | ||
92 | * @return NULL if 'rx_length' is 0, a random regex of length 'rx_length', which | ||
93 | * needs to be freed, otherwise. | ||
94 | */ | ||
95 | char * | ||
96 | REGEX_TEST_generate_random_regex (size_t rx_length, char *matching_str); | ||
97 | |||
98 | |||
99 | /** | ||
100 | * Generate a random string of maximum length 'max_len' that only contains literals allowed | ||
101 | * in a regular expression. The string might be 0 chars long but is garantueed | ||
102 | * to be shorter or equal to 'max_len'. | ||
103 | * | ||
104 | * @param max_len maximum length of the string that should be generated. | ||
105 | * | ||
106 | * @return random string that needs to be freed. | ||
107 | */ | ||
108 | char * | ||
109 | REGEX_TEST_generate_random_string (size_t max_len); | ||
110 | |||
111 | |||
112 | /** | ||
113 | * Options for graph creation function | ||
114 | * REGEX_TEST_automaton_save_graph. | ||
115 | */ | ||
116 | enum REGEX_TEST_GraphSavingOptions | ||
117 | { | ||
118 | /** | ||
119 | * Default. Do nothing special. | ||
120 | */ | ||
121 | REGEX_TEST_GRAPH_DEFAULT = 0, | ||
122 | |||
123 | /** | ||
124 | * The generated graph will include extra information such as the NFA states | ||
125 | * that were used to generate the DFA state. | ||
126 | */ | ||
127 | REGEX_TEST_GRAPH_VERBOSE = 1, | ||
128 | |||
129 | /** | ||
130 | * Enable graph coloring. Will color each SCC in a different color. | ||
131 | */ | ||
132 | REGEX_TEST_GRAPH_COLORING = 2 | ||
133 | }; | ||
134 | |||
135 | |||
136 | /** | ||
137 | * Save the given automaton as a GraphViz dot file. | ||
138 | * | ||
139 | * @param a the automaton to be saved. | ||
140 | * @param filename where to save the file. | ||
141 | * @param options options for graph generation that include coloring or verbose | ||
142 | * mode | ||
143 | */ | ||
144 | void | ||
145 | REGEX_TEST_automaton_save_graph (struct REGEX_INTERNAL_Automaton *a, | ||
146 | const char *filename, | ||
147 | enum REGEX_TEST_GraphSavingOptions options); | ||
148 | |||
149 | |||
150 | #if 0 /* keep Emacsens' auto-indent happy */ | ||
151 | { | ||
152 | #endif | ||
153 | #ifdef __cplusplus | ||
154 | } | ||
155 | #endif | ||
156 | |||
157 | /* end of regex_internal_lib.h */ | ||
158 | #endif | ||
diff --git a/src/regex/regex_test_random.c b/src/regex/regex_test_random.c deleted file mode 100644 index 823563d4d..000000000 --- a/src/regex/regex_test_random.c +++ /dev/null | |||
@@ -1,175 +0,0 @@ | |||
1 | /* | ||
2 | This file is part of GNUnet | ||
3 | Copyright (C) 2012 GNUnet e.V. | ||
4 | |||
5 | GNUnet is free software: you can redistribute it and/or modify it | ||
6 | under the terms of the GNU Affero General Public License as published | ||
7 | by the Free Software Foundation, either version 3 of the License, | ||
8 | or (at your option) any later version. | ||
9 | |||
10 | GNUnet is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | Affero General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU Affero General Public License | ||
16 | along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | |||
18 | SPDX-License-Identifier: AGPL3.0-or-later | ||
19 | */ | ||
20 | /** | ||
21 | * @file src/regex/regex_test_random.c | ||
22 | * @brief functions for creating random regular expressions and strings | ||
23 | * @author Maximilian Szengel | ||
24 | */ | ||
25 | #include "platform.h" | ||
26 | #include "regex_test_lib.h" | ||
27 | #include "gnunet_crypto_lib.h" | ||
28 | #include "regex_internal.h" | ||
29 | |||
30 | |||
31 | /** | ||
32 | * Get a (pseudo) random valid literal for building a regular expression. | ||
33 | * | ||
34 | * @return random valid literal | ||
35 | */ | ||
36 | static char | ||
37 | get_random_literal () | ||
38 | { | ||
39 | uint32_t ridx; | ||
40 | |||
41 | ridx = | ||
42 | GNUNET_CRYPTO_random_u32 (GNUNET_CRYPTO_QUALITY_WEAK, | ||
43 | (uint32_t) strlen (ALLOWED_LITERALS)); | ||
44 | |||
45 | return ALLOWED_LITERALS[ridx]; | ||
46 | } | ||
47 | |||
48 | |||
49 | /** | ||
50 | * Generate a (pseudo) random regular expression of length 'rx_length', as well | ||
51 | * as a (optional) string that will be matched by the generated regex. The | ||
52 | * returned regex needs to be freed. | ||
53 | * | ||
54 | * @param rx_length length of the random regex. | ||
55 | * @param matching_str (optional) pointer to a string that will contain a string | ||
56 | * that will be matched by the generated regex, if | ||
57 | * 'matching_str' pointer was not NULL. Make sure you | ||
58 | * allocated at least rx_length+1 bytes for this string. | ||
59 | * | ||
60 | * @return NULL if 'rx_length' is 0, a random regex of length 'rx_length', which | ||
61 | * needs to be freed, otherwise. | ||
62 | */ | ||
63 | char * | ||
64 | REGEX_TEST_generate_random_regex (size_t rx_length, char *matching_str) | ||
65 | { | ||
66 | char *rx; | ||
67 | char *rx_p; | ||
68 | char *matching_strp; | ||
69 | unsigned int i; | ||
70 | unsigned int char_op_switch; | ||
71 | unsigned int last_was_op; | ||
72 | int rx_op; | ||
73 | char current_char; | ||
74 | |||
75 | if (0 == rx_length) | ||
76 | return NULL; | ||
77 | |||
78 | if (NULL != matching_str) | ||
79 | matching_strp = matching_str; | ||
80 | else | ||
81 | matching_strp = NULL; | ||
82 | |||
83 | rx = GNUNET_malloc (rx_length + 1); | ||
84 | rx_p = rx; | ||
85 | current_char = 0; | ||
86 | last_was_op = 1; | ||
87 | |||
88 | for (i = 0; i < rx_length; i++) | ||
89 | { | ||
90 | char_op_switch = GNUNET_CRYPTO_random_u32 (GNUNET_CRYPTO_QUALITY_WEAK, 2); | ||
91 | |||
92 | if ((0 == char_op_switch) && ! last_was_op) | ||
93 | { | ||
94 | last_was_op = 1; | ||
95 | rx_op = GNUNET_CRYPTO_random_u32 (GNUNET_CRYPTO_QUALITY_WEAK, 4); | ||
96 | |||
97 | switch (rx_op) | ||
98 | { | ||
99 | case 0: | ||
100 | current_char = '+'; | ||
101 | break; | ||
102 | |||
103 | case 1: | ||
104 | current_char = '*'; | ||
105 | break; | ||
106 | |||
107 | case 2: | ||
108 | current_char = '?'; | ||
109 | break; | ||
110 | |||
111 | case 3: | ||
112 | if (i < rx_length - 1) /* '|' cannot be at the end */ | ||
113 | current_char = '|'; | ||
114 | else | ||
115 | current_char = get_random_literal (); | ||
116 | break; | ||
117 | } | ||
118 | } | ||
119 | else | ||
120 | { | ||
121 | current_char = get_random_literal (); | ||
122 | last_was_op = 0; | ||
123 | } | ||
124 | |||
125 | if ((NULL != matching_strp) && | ||
126 | ((current_char != '+') && (current_char != '*') && (current_char != | ||
127 | '?') && | ||
128 | (current_char != '|') )) | ||
129 | { | ||
130 | *matching_strp = current_char; | ||
131 | matching_strp++; | ||
132 | } | ||
133 | |||
134 | *rx_p = current_char; | ||
135 | rx_p++; | ||
136 | } | ||
137 | *rx_p = '\0'; | ||
138 | if (NULL != matching_strp) | ||
139 | *matching_strp = '\0'; | ||
140 | |||
141 | return rx; | ||
142 | } | ||
143 | |||
144 | |||
145 | /** | ||
146 | * Generate a random string of maximum length 'max_len' that only contains literals allowed | ||
147 | * in a regular expression. The string might be 0 chars long but is garantueed | ||
148 | * to be shorter or equal to 'max_len'. | ||
149 | * | ||
150 | * @param max_len maximum length of the string that should be generated. | ||
151 | * | ||
152 | * @return random string that needs to be freed. | ||
153 | */ | ||
154 | char * | ||
155 | REGEX_TEST_generate_random_string (size_t max_len) | ||
156 | { | ||
157 | unsigned int i; | ||
158 | char *str; | ||
159 | size_t len; | ||
160 | |||
161 | if (1 > max_len) | ||
162 | return GNUNET_strdup (""); | ||
163 | |||
164 | len = (size_t) GNUNET_CRYPTO_random_u32 (GNUNET_CRYPTO_QUALITY_WEAK, max_len); | ||
165 | str = GNUNET_malloc (len + 1); | ||
166 | |||
167 | for (i = 0; i < len; i++) | ||
168 | { | ||
169 | str[i] = get_random_literal (); | ||
170 | } | ||
171 | |||
172 | str[i] = '\0'; | ||
173 | |||
174 | return str; | ||
175 | } | ||
diff --git a/src/regex/test_regex_api.c b/src/regex/test_regex_api.c deleted file mode 100644 index 9761e80f9..000000000 --- a/src/regex/test_regex_api.c +++ /dev/null | |||
@@ -1,132 +0,0 @@ | |||
1 | /* | ||
2 | This file is part of GNUnet. | ||
3 | Copyright (C) 2013 GNUnet e.V. | ||
4 | |||
5 | GNUnet is free software: you can redistribute it and/or modify it | ||
6 | under the terms of the GNU Affero General Public License as published | ||
7 | by the Free Software Foundation, either version 3 of the License, | ||
8 | or (at your option) any later version. | ||
9 | |||
10 | GNUnet is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | Affero General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU Affero General Public License | ||
16 | along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | |||
18 | SPDX-License-Identifier: AGPL3.0-or-later | ||
19 | */ | ||
20 | /** | ||
21 | * @file regex/test_regex_api.c | ||
22 | * @brief base test case for regex api (and DHT functions) | ||
23 | * @author Christian Grothoff | ||
24 | */ | ||
25 | #include "platform.h" | ||
26 | #include "gnunet_util_lib.h" | ||
27 | #include "gnunet_testing_lib.h" | ||
28 | #include "gnunet_regex_service.h" | ||
29 | |||
30 | |||
31 | /** | ||
32 | * How long until we really give up on a particular testcase portion? | ||
33 | */ | ||
34 | #define TOTAL_TIMEOUT GNUNET_TIME_relative_multiply (GNUNET_TIME_UNIT_SECONDS, \ | ||
35 | 600) | ||
36 | |||
37 | /** | ||
38 | * How long until we give up on any particular operation (and retry)? | ||
39 | */ | ||
40 | #define BASE_TIMEOUT GNUNET_TIME_relative_multiply (GNUNET_TIME_UNIT_SECONDS, 3) | ||
41 | |||
42 | |||
43 | static struct GNUNET_REGEX_Announcement *a; | ||
44 | |||
45 | static struct GNUNET_REGEX_Search *s; | ||
46 | |||
47 | static int ok = 1; | ||
48 | |||
49 | static struct GNUNET_SCHEDULER_Task *die_task; | ||
50 | |||
51 | |||
52 | static void | ||
53 | end (void *cls) | ||
54 | { | ||
55 | die_task = NULL; | ||
56 | GNUNET_REGEX_announce_cancel (a); | ||
57 | a = NULL; | ||
58 | GNUNET_REGEX_search_cancel (s); | ||
59 | s = NULL; | ||
60 | ok = 0; | ||
61 | } | ||
62 | |||
63 | |||
64 | static void | ||
65 | end_badly () | ||
66 | { | ||
67 | die_task = NULL; | ||
68 | fprintf (stderr, "%s", "Testcase failed (timeout).\n"); | ||
69 | GNUNET_REGEX_announce_cancel (a); | ||
70 | a = NULL; | ||
71 | GNUNET_REGEX_search_cancel (s); | ||
72 | s = NULL; | ||
73 | ok = 1; | ||
74 | } | ||
75 | |||
76 | |||
77 | /** | ||
78 | * Search callback function, invoked for every result that was found. | ||
79 | * | ||
80 | * @param cls Closure provided in GNUNET_REGEX_search. | ||
81 | * @param id Peer providing a regex that matches the string. | ||
82 | * @param get_path Path of the get request. | ||
83 | * @param get_path_length Length of get_path. | ||
84 | * @param put_path Path of the put request. | ||
85 | * @param put_path_length Length of the put_path. | ||
86 | */ | ||
87 | static void | ||
88 | found_cb (void *cls, | ||
89 | const struct GNUNET_PeerIdentity *id, | ||
90 | const struct GNUNET_PeerIdentity *get_path, | ||
91 | unsigned int get_path_length, | ||
92 | const struct GNUNET_PeerIdentity *put_path, | ||
93 | unsigned int put_path_length) | ||
94 | { | ||
95 | GNUNET_SCHEDULER_cancel (die_task); | ||
96 | die_task = | ||
97 | GNUNET_SCHEDULER_add_now (&end, NULL); | ||
98 | } | ||
99 | |||
100 | |||
101 | static void | ||
102 | run (void *cls, | ||
103 | const struct GNUNET_CONFIGURATION_Handle *cfg, | ||
104 | struct GNUNET_TESTING_Peer *peer) | ||
105 | { | ||
106 | die_task = | ||
107 | GNUNET_SCHEDULER_add_delayed (TOTAL_TIMEOUT, | ||
108 | &end_badly, NULL); | ||
109 | a = GNUNET_REGEX_announce (cfg, | ||
110 | "my long prefix - hello world(0|1)*", | ||
111 | GNUNET_TIME_relative_multiply ( | ||
112 | GNUNET_TIME_UNIT_SECONDS, | ||
113 | 5), | ||
114 | 1); | ||
115 | s = GNUNET_REGEX_search (cfg, | ||
116 | "my long prefix - hello world0101", | ||
117 | &found_cb, NULL); | ||
118 | } | ||
119 | |||
120 | |||
121 | int | ||
122 | main (int argc, char *argv[]) | ||
123 | { | ||
124 | if (0 != GNUNET_TESTING_peer_run ("test-regex-api", | ||
125 | "test_regex_api_data.conf", | ||
126 | &run, NULL)) | ||
127 | return 1; | ||
128 | return ok; | ||
129 | } | ||
130 | |||
131 | |||
132 | /* end of test_regex_api.c */ | ||
diff --git a/src/regex/test_regex_api_data.conf b/src/regex/test_regex_api_data.conf deleted file mode 100644 index 40fee1e54..000000000 --- a/src/regex/test_regex_api_data.conf +++ /dev/null | |||
@@ -1,39 +0,0 @@ | |||
1 | @INLINE@ ../../contrib/conf/gnunet/no_forcestart.conf | ||
2 | @INLINE@ ../../contrib/conf/gnunet/no_autostart_above_core.conf | ||
3 | |||
4 | [PATHS] | ||
5 | GNUNET_TEST_HOME = $GNUNET_TMP/test-regex-api/ | ||
6 | |||
7 | [dhtcache] | ||
8 | QUOTA = 1 MB | ||
9 | DATABASE = heap | ||
10 | |||
11 | [topology] | ||
12 | TARGET-CONNECTION-COUNT = 16 | ||
13 | AUTOCONNECT = YES | ||
14 | FRIENDS-ONLY = NO | ||
15 | MINIMUM-FRIENDS = 0 | ||
16 | |||
17 | [ats] | ||
18 | WAN_QUOTA_IN = 1 GB | ||
19 | WAN_QUOTA_OUT = 1 GB | ||
20 | |||
21 | [dht] | ||
22 | START_ON_DEMAND = YES | ||
23 | PORT = 12370 | ||
24 | |||
25 | [regex] | ||
26 | START_ON_DEMAND = YES | ||
27 | |||
28 | [transport] | ||
29 | plugins = tcp | ||
30 | NEIGHBOUR_LIMIT = 50 | ||
31 | |||
32 | [nat] | ||
33 | DISABLEV6 = YES | ||
34 | BINDTO = 127.0.0.1 | ||
35 | ENABLE_UPNP = NO | ||
36 | BEHIND_NAT = NO | ||
37 | ALLOW_NAT = NO | ||
38 | INTERNAL_ADDRESS = 127.0.0.1 | ||
39 | EXTERNAL_ADDRESS = 127.0.0.1 | ||
diff --git a/src/regex/test_regex_eval_api.c b/src/regex/test_regex_eval_api.c deleted file mode 100644 index be3649dbd..000000000 --- a/src/regex/test_regex_eval_api.c +++ /dev/null | |||
@@ -1,385 +0,0 @@ | |||
1 | /* | ||
2 | This file is part of GNUnet | ||
3 | Copyright (C) 2012 GNUnet e.V. | ||
4 | |||
5 | GNUnet is free software: you can redistribute it and/or modify it | ||
6 | under the terms of the GNU Affero General Public License as published | ||
7 | by the Free Software Foundation, either version 3 of the License, | ||
8 | or (at your option) any later version. | ||
9 | |||
10 | GNUnet is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | Affero General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU Affero General Public License | ||
16 | along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | |||
18 | SPDX-License-Identifier: AGPL3.0-or-later | ||
19 | */ | ||
20 | /** | ||
21 | * @file regex/test_regex_eval_api.c | ||
22 | * @brief test for regex.c | ||
23 | * @author Maximilian Szengel | ||
24 | */ | ||
25 | #include <regex.h> | ||
26 | #include <time.h> | ||
27 | #include "platform.h" | ||
28 | #include "regex_internal_lib.h" | ||
29 | #include "regex_test_lib.h" | ||
30 | #include "regex_internal.h" | ||
31 | |||
32 | enum Match_Result | ||
33 | { | ||
34 | match = 0, | ||
35 | nomatch = 1 | ||
36 | }; | ||
37 | |||
38 | struct Regex_String_Pair | ||
39 | { | ||
40 | char *regex; | ||
41 | int string_count; | ||
42 | char *strings[20]; | ||
43 | enum Match_Result expected_results[20]; | ||
44 | }; | ||
45 | |||
46 | |||
47 | /** | ||
48 | * Random regex test. Generate a random regex as well as 'str_count' strings to | ||
49 | * match it against. Will match using GNUNET_REGEX implementation and compare | ||
50 | * the result to glibc regex result. 'rx_length' has to be smaller then | ||
51 | * 'max_str_len'. | ||
52 | * | ||
53 | * @param rx_length length of the regular expression. | ||
54 | * @param max_str_len maximum length of the random strings. | ||
55 | * @param str_count number of generated random strings. | ||
56 | * | ||
57 | * @return 0 on success, non 0 otherwise. | ||
58 | */ | ||
59 | int | ||
60 | test_random (unsigned int rx_length, unsigned int max_str_len, | ||
61 | unsigned int str_count) | ||
62 | { | ||
63 | unsigned int i; | ||
64 | char *rand_rx; | ||
65 | char *matching_str; | ||
66 | int eval; | ||
67 | int eval_check; | ||
68 | int eval_canonical; | ||
69 | int eval_canonical_check; | ||
70 | struct REGEX_INTERNAL_Automaton *dfa; | ||
71 | regex_t rx; | ||
72 | regmatch_t matchptr[1]; | ||
73 | char error[200]; | ||
74 | int result; | ||
75 | char *canonical_regex = NULL; | ||
76 | |||
77 | /* At least one string is needed for matching */ | ||
78 | GNUNET_assert (str_count > 0); | ||
79 | /* The string should be at least as long as the regex itself */ | ||
80 | GNUNET_assert (max_str_len >= rx_length); | ||
81 | |||
82 | /* Generate random regex and a string that matches the regex */ | ||
83 | matching_str = GNUNET_malloc (rx_length + 1); | ||
84 | rand_rx = REGEX_TEST_generate_random_regex (rx_length, matching_str); | ||
85 | |||
86 | /* Now match */ | ||
87 | result = 0; | ||
88 | for (i = 0; i < str_count; i++) | ||
89 | { | ||
90 | if (0 < i) | ||
91 | { | ||
92 | matching_str = REGEX_TEST_generate_random_string (max_str_len); | ||
93 | } | ||
94 | |||
95 | /* Match string using DFA */ | ||
96 | dfa = REGEX_INTERNAL_construct_dfa (rand_rx, strlen (rand_rx), 0); | ||
97 | if (NULL == dfa) | ||
98 | { | ||
99 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Constructing DFA failed\n"); | ||
100 | goto error; | ||
101 | } | ||
102 | |||
103 | eval = REGEX_INTERNAL_eval (dfa, matching_str); | ||
104 | /* save the canonical regex for later comparison */ | ||
105 | canonical_regex = GNUNET_strdup (REGEX_INTERNAL_get_canonical_regex (dfa)); | ||
106 | REGEX_INTERNAL_automaton_destroy (dfa); | ||
107 | |||
108 | /* Match string using glibc regex */ | ||
109 | if (0 != regcomp (&rx, rand_rx, REG_EXTENDED)) | ||
110 | { | ||
111 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
112 | "Could not compile regex using regcomp: %s\n", rand_rx); | ||
113 | goto error; | ||
114 | } | ||
115 | |||
116 | eval_check = regexec (&rx, matching_str, 1, matchptr, 0); | ||
117 | regfree (&rx); | ||
118 | |||
119 | /* We only want to match the whole string, because that's what our DFA does, | ||
120 | * too. */ | ||
121 | if ((eval_check == 0) && | ||
122 | ((matchptr[0].rm_so != 0) || (matchptr[0].rm_eo != strlen ( | ||
123 | matching_str)) )) | ||
124 | eval_check = 1; | ||
125 | |||
126 | /* Match canonical regex */ | ||
127 | dfa = | ||
128 | REGEX_INTERNAL_construct_dfa (canonical_regex, strlen (canonical_regex), | ||
129 | 0); | ||
130 | if (NULL == dfa) | ||
131 | { | ||
132 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Constructing DFA failed\n"); | ||
133 | goto error; | ||
134 | } | ||
135 | |||
136 | eval_canonical = REGEX_INTERNAL_eval (dfa, matching_str); | ||
137 | REGEX_INTERNAL_automaton_destroy (dfa); | ||
138 | |||
139 | if (0 != regcomp (&rx, canonical_regex, REG_EXTENDED)) | ||
140 | { | ||
141 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
142 | "Could not compile regex using regcomp: %s\n", | ||
143 | canonical_regex); | ||
144 | goto error; | ||
145 | } | ||
146 | |||
147 | eval_canonical_check = regexec (&rx, matching_str, 1, matchptr, 0); | ||
148 | regfree (&rx); | ||
149 | |||
150 | /* We only want to match the whole string, because that's what our DFA does, | ||
151 | * too. */ | ||
152 | if ((eval_canonical_check == 0) && | ||
153 | ((matchptr[0].rm_so != 0) || (matchptr[0].rm_eo != strlen ( | ||
154 | matching_str)) )) | ||
155 | eval_canonical_check = 1; | ||
156 | |||
157 | /* compare results */ | ||
158 | if ((eval_check != eval) || (eval_canonical != eval_canonical_check) ) | ||
159 | { | ||
160 | regerror (eval_check, &rx, error, sizeof error); | ||
161 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
162 | "Unexpected result:\nregex: %s\ncanonical_regex: %s\n\ | ||
163 | string: %s\ngnunet regex: %i\nglibc regex: %i\n\ | ||
164 | canonical regex: %i\ncanonical regex glibc: %i\n\ | ||
165 | glibc error: %s\n\n", rand_rx, canonical_regex, matching_str, | ||
166 | eval, eval_check, eval_canonical, eval_canonical_check, | ||
167 | error); | ||
168 | result += 1; | ||
169 | } | ||
170 | GNUNET_free (canonical_regex); | ||
171 | GNUNET_free (matching_str); | ||
172 | canonical_regex = NULL; | ||
173 | matching_str = NULL; | ||
174 | } | ||
175 | |||
176 | GNUNET_free (rand_rx); | ||
177 | |||
178 | return result; | ||
179 | |||
180 | error: | ||
181 | GNUNET_free (matching_str); | ||
182 | GNUNET_free (rand_rx); | ||
183 | GNUNET_free (canonical_regex); | ||
184 | return -1; | ||
185 | } | ||
186 | |||
187 | |||
188 | /** | ||
189 | * Automaton test that compares the result of matching regular expression 'rx' | ||
190 | * with the strings and expected results in 'rxstr' with the result of matching | ||
191 | * the same strings with glibc regex. | ||
192 | * | ||
193 | * @param a automaton. | ||
194 | * @param rx compiled glibc regex. | ||
195 | * @param rxstr regular expression and strings with expected results to | ||
196 | * match against. | ||
197 | * | ||
198 | * @return 0 on successful, non 0 otherwise | ||
199 | */ | ||
200 | int | ||
201 | test_automaton (struct REGEX_INTERNAL_Automaton *a, regex_t *rx, | ||
202 | struct Regex_String_Pair *rxstr) | ||
203 | { | ||
204 | int result; | ||
205 | int eval; | ||
206 | int eval_check; | ||
207 | char error[200]; | ||
208 | regmatch_t matchptr[1]; | ||
209 | int i; | ||
210 | |||
211 | if (NULL == a) | ||
212 | { | ||
213 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Automaton was NULL\n"); | ||
214 | return 1; | ||
215 | } | ||
216 | |||
217 | result = 0; | ||
218 | |||
219 | for (i = 0; i < rxstr->string_count; i++) | ||
220 | { | ||
221 | eval = REGEX_INTERNAL_eval (a, rxstr->strings[i]); | ||
222 | eval_check = regexec (rx, rxstr->strings[i], 1, matchptr, 0); | ||
223 | |||
224 | /* We only want to match the whole string, because that's what our DFA does, | ||
225 | * too. */ | ||
226 | if ((eval_check == 0) && | ||
227 | ((matchptr[0].rm_so != 0) || | ||
228 | (matchptr[0].rm_eo != strlen (rxstr->strings[i])) )) | ||
229 | eval_check = 1; | ||
230 | |||
231 | if (((rxstr->expected_results[i] == match) && ((0 != eval) || (0 != | ||
232 | eval_check) )) | ||
233 | || ((rxstr->expected_results[i] == nomatch) && | ||
234 | ((0 == eval) || (0 == eval_check) ))) | ||
235 | { | ||
236 | result = 1; | ||
237 | regerror (eval_check, rx, error, sizeof error); | ||
238 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
239 | "Unexpected result:\nregex: %s\ncanonical_regex: %s\n" | ||
240 | "string: %s\nexpected result: %i\n" | ||
241 | "gnunet regex: %i\nglibc regex: %i\nglibc error: %s\n" | ||
242 | "rm_so: %i\nrm_eo: %i\n\n", rxstr->regex, | ||
243 | REGEX_INTERNAL_get_canonical_regex (a), rxstr->strings[i], | ||
244 | rxstr->expected_results[i], eval, eval_check, error, | ||
245 | matchptr[0].rm_so, matchptr[0].rm_eo); | ||
246 | } | ||
247 | } | ||
248 | return result; | ||
249 | } | ||
250 | |||
251 | |||
252 | int | ||
253 | main (int argc, char *argv[]) | ||
254 | { | ||
255 | GNUNET_log_setup ("test-regex", "WARNING", NULL); | ||
256 | |||
257 | struct REGEX_INTERNAL_Automaton *a; | ||
258 | regex_t rx; | ||
259 | int i; | ||
260 | int check_nfa; | ||
261 | int check_dfa; | ||
262 | int check_rand; | ||
263 | char *check_proof; | ||
264 | |||
265 | struct Regex_String_Pair rxstr[19] = { | ||
266 | { "ab?(abcd)?", 5, | ||
267 | { "ababcd", "abab", "aabcd", "a", "abb" }, | ||
268 | { match, nomatch, match, match, nomatch } }, | ||
269 | { "ab(c|d)+c*(a(b|c)d)+", 5, | ||
270 | { "abcdcdcdcdddddabd", "abcd", | ||
271 | "abcddddddccccccccccccccccccccccccabdacdabd", | ||
272 | "abccccca", "abcdcdcdccdabdabd" }, | ||
273 | { match, nomatch, match, nomatch, match } }, | ||
274 | { "ab+c*(a(bx|c)d)+", 5, | ||
275 | { "abcdcdcdcdddddabd", "abcd", | ||
276 | "abcddddddccccccccccccccccccccccccabdacdabd", | ||
277 | "abccccca", "abcdcdcdccdabdabd" }, | ||
278 | { nomatch, nomatch, nomatch, nomatch, nomatch } }, | ||
279 | { "a+X*y+c|p|R|Z*K*y*R+w|Y*6+n+h*k*w+V*F|W*B*e*", 1, | ||
280 | { "kaXycQepRZKyRwY6nhkwVFWBegNVtLPj39XhJJ6bEifRSZRYZg" }, | ||
281 | { nomatch } }, | ||
282 | { | ||
283 | "k|a+X*y+c|Q*e|p|R|Z*K*y*R+w|Y*6+n+h*k*w+V*F|W*B*e*g|N+V|t+L|P*j*3*9+X*h*J|J*6|b|E*i*f*R+S|Z|R|Y*Z|g*", | ||
284 | 1, | ||
285 | { "kaXycQepRZKyRwY6nhkwVFWBegNVtLPj39XhJJ6bEifRSZRYZg" }, | ||
286 | { nomatch } | ||
287 | }, | ||
288 | { | ||
289 | "F?W+m+2*6*c*s|P?U?a|B|y*i+t+A|V|6*C*7*e?Z*n*i|J?5+g?W*V?7*j?p?1|r?B?C+E+3+6*i+W*P?K?0|D+7?y*m+3?g?K?", | ||
290 | 1, | ||
291 | { "osfjsodfonONONOnosndfsdnfsd" }, | ||
292 | { nomatch } | ||
293 | }, | ||
294 | { | ||
295 | "V|M*o?x*p*d+h+b|E*m?h?Y*E*O?W*W*P+o?Z+H*M|I*q+C*a+5?5*9|b?z|G*y*k?R|p+u|8*h?B+l*H|e|L*O|1|F?v*0?5|C+", | ||
296 | 1, | ||
297 | { "VMoxpdhbEmhYEOWWPoZHMIqCa559bzGykRpu8hBlHeLO1Fv05C" }, | ||
298 | { nomatch } | ||
299 | }, | ||
300 | { "(bla)*", 8, | ||
301 | { "", "bla", "blabla", "bl", "la", "b", "l", "a" }, | ||
302 | { match, match, match, nomatch, nomatch, nomatch, nomatch, nomatch } }, | ||
303 | { "ab(c|d)+c*(a(b|c)+d)+(bla)(bla)*", 8, | ||
304 | { "ab", "abcabdbla", "abdcccccccccccabcbccdblablabla", "bl", "la", "b", | ||
305 | "l", | ||
306 | "a" }, | ||
307 | { nomatch, match, match, nomatch, nomatch, nomatch, nomatch, nomatch } }, | ||
308 | { "a|aa*a", 6, | ||
309 | { "", "a", "aa", "aaa", "aaaa", "aaaaa" }, | ||
310 | { nomatch, match, match, match, match, match } }, | ||
311 | { "ab(c|d)+c*(a(b|c)+d)+(bla)+", 1, | ||
312 | { "abcabdblaacdbla" }, | ||
313 | { nomatch } }, | ||
314 | { "(ac|b)+", 8, | ||
315 | { "b", "bb", "ac", "", "acb", "bacbacac", "acacac", "abc" }, | ||
316 | { match, match, match, nomatch, match, match, match, nomatch } }, | ||
317 | { "(ab|c)+", 7, | ||
318 | { "", "ab", "c", "abc", "ababcc", "acc", "abac" }, | ||
319 | { nomatch, match, match, match, match, nomatch, nomatch } }, | ||
320 | { "((j|2j)K|(j|2j)AK|(j|2j)(D|e|(j|2j)A(D|e))D*K)", 1, | ||
321 | { "", "2j2jADK", "j2jADK" }, | ||
322 | { nomatch, match, match } }, | ||
323 | { "((j|2j)K|(j|2j)(D|e|((j|2j)j|(j|2j)2j)A(D|e))D*K|(j|2j)AK)", 2, | ||
324 | { "", "2j2jjADK", "j2jADK" }, | ||
325 | { nomatch, match, match } }, | ||
326 | { "ab(c|d)+c*(a(b|c)d)+", 1, | ||
327 | { "abacd" }, | ||
328 | { nomatch } }, | ||
329 | { "d|5kl", 1, | ||
330 | { "d5kl" }, | ||
331 | { nomatch } }, | ||
332 | { "a()b", 1, | ||
333 | { "ab" }, | ||
334 | { match } }, | ||
335 | { | ||
336 | "GNVPN-0001-PAD(001110101001001010(0|1)*|001110101001001010000(0|1)*|001110101001001010001(0|1)*|001110101001001010010(0|1)*|001110101001001010011(0|1)*|001110101001001010100(0|1)*|001110101001001010101(0|1)*|001110101001001010110(0|1)*|001110101001001010111(0|1)*|0011101010110110(0|1)*|001110101011011000000(0|1)*|001110101011011000001(0|1)*|001110101011011000010(0|1)*|001110101011011000011(0|1)*|001110101011011000100(0|1)*|001110101011011000101(0|1)*|001110101011011000110(0|1)*|001110101011011000111(0|1)*|001110101011011001000(0|1)*|001110101011011001001(0|1)*|001110101011011001010(0|1)*|001110101011011001011(0|1)*|001110101011011001100(0|1)*|001110101011011001101(0|1)*|001110101011011001110(0|1)*|001110101011011001111(0|1)*|001110101011011010000(0|1)*|001110101011011010001(0|1)*|001110101011011010010(0|1)*|001110101011011010011(0|1)*|001110101011011010100(0|1)*|001110101011011010101(0|1)*|001110101011011010110(0|1)*|001110101011011010111(0|1)*|001110101011011011000(0|1)*|001110101011011011001(0|1)*|001110101011011011010(0|1)*|001110101011011011011(0|1)*|001110101011011011100(0|1)*|001110101011011011101(0|1)*|001110101011011011110(0|1)*|001110101011011011111(0|1)*|0011101110111101(0|1)*|001110111011110100000(0|1)*|001110111011110100001(0|1)*|001110111011110100010(0|1)*|001110111011110100011(0|1)*|001110111011110100100(0|1)*|001110111011110100101(0|1)*|001110111011110100110(0|1)*|001110111011110100111(0|1)*|001110111011110101000(0|1)*|001110111011110101001(0|1)*|001110111011110101010(0|1)*|001110111011110101011(0|1)*|001110111011110101100(0|1)*|001110111011110101101(0|1)*|001110111011110101110(0|1)*|001110111011110101111(0|1)*|001110111011110110000(0|1)*|001110111011110110001(0|1)*|001110111011110110010(0|1)*|001110111011110110011(0|1)*|001110111011110110100(0|1)*|001110111011110110101(0|1)*|001110111011110110110(0|1)*|001110111011110110111(0|1)*|001110111011110111000(0|1)*|001110111011110111001(0|1)*|001110111011110111010(0|1)*|001110111011110111011(0|1)*|001110111011110111100(0|1)*|001110111011110111101(0|1)*|001110111011110111110(0|1)*|0111010001010110(0|1)*|011101000101011000000(0|1)*|011101000101011000001(0|1)*|011101000101011000010(0|1)*|011101000101011000011(0|1)*|011101000101011000100(0|1)*|011101000101011000101(0|1)*|011101000101011000110(0|1)*|011101000101011000111(0|1)*|011101000101011001000(0|1)*|011101000101011001001(0|1)*|011101000101011001010(0|1)*|011101000101011001011(0|1)*|011101000101011001100(0|1)*|011101000101011001101(0|1)*|011101000101011001110(0|1)*|011101000101011001111(0|1)*|011101000101011010000(0|1)*|011101000101011010001(0|1)*|011101000101011010010(0|1)*|011101000101011010011(0|1)*|011101000101011010100(0|1)*|011101000101011010101(0|1)*|011101000101011010110(0|1)*|011101000101011010111(0|1)*|011101000101011011000(0|1)*|011101000101011011001(0|1)*|011101000101011011010(0|1)*|011101000101011011011(0|1)*|011101000101011011100(0|1)*|011101000101011011101(0|1)*|011101000101011011110(0|1)*|011101000101011011111(0|1)*|0111010001010111(0|1)*|011101000101011100000(0|1)*|011101000101011100001(0|1)*|011101000101011100010(0|1)*|011101000101011100011(0|1)*|011101000101011100100(0|1)*|011101000101011100101(0|1)*|011101000101011100110(0|1)*|011101000101011100111(0|1)*|011101000101011101000(0|1)*|011101000101011101001(0|1)*|011101000101011101010(0|1)*|011101000101011101011(0|1)*|011101000101011101100(0|1)*|011101000101011101101(0|1)*|011101000101011101110(0|1)*|011101000101011101111(0|1)*|011101000101011110000(0|1)*|011101000101011110001(0|1)*|011101000101011110010(0|1)*|011101000101011110011(0|1)*|011101000101011110100(0|1)*|011101000101011110101(0|1)*|011101000101011110110(0|1)*|011101000101011110111(0|1)*|011101000101011111000(0|1)*|011101000101011111001(0|1)*|011101000101011111010(0|1)*|011101000101011111011(0|1)*|011101000101011111100(0|1)*|011101000101011111101(0|1)*|011101000101011111110(0|1)*|011101000101011111111(0|1)*|0111010001011000(0|1)*|011101000101100000000(0|1)*|011101000101100000001(0|1)*|011101000101100000010(0|1)*|011101000101100000011(0|1)*|011101000101100000100(0|1)*|011101000101100000101(0|1)*|011101000101100000110(0|1)*|011101000101100000111(0|1)*|011101000101100001000(0|1)*|011101000101100001001(0|1)*|011101000101100001010(0|1)*|011101000101100001011(0|1)*|011101000101100001100(0|1)*|011101000101100001101(0|1)*|011101000101100001110(0|1)*|011101000101100001111(0|1)*|011101000101100010000(0|1)*|011101000101100010001(0|1)*|011101000101100010010(0|1)*|011101000101100010011(0|1)*|011101000101100010100(0|1)*|011101000101100010101(0|1)*|011101000101100010110(0|1)*|011101000101100010111(0|1)*|011101000101100011000(0|1)*|011101000101100011001(0|1)*|011101000101100011010(0|1)*|011101000101100011011(0|1)*|011101000101100011100(0|1)*|011101000101100011101(0|1)*|011101000101100011110(0|1)*|011101000101100011111(0|1)*|01110100010110010(0|1)*|011101000101100100000(0|1)*|011101000101100100001(0|1)*|011101000101100100010(0|1)*|011101000101100100011(0|1)*|011101000101100100100(0|1)*|011101000101100100101(0|1)*|011101000101100100110(0|1)*|011101000101100100111(0|1)*|011101000101100101000(0|1)*|011101000101100101001(0|1)*|011101000101100101010(0|1)*|011101000101100101011(0|1)*|011101000101100101100(0|1)*|011101000101100101101(0|1)*|011101000101100101110(0|1)*|011101000101100101111(0|1)*|011101000101100101111000(0|1)*|1100101010011100(0|1)*|110010101001110000000(0|1)*|110010101001110000000001(0|1)*|110010101001110000000010(0|1)*|110010101001110000000110(0|1)*|110010101001110000001(0|1)*|110010101001110000001000(0|1)*|110010101001110000001001(0|1)*|110010101001110000001010(0|1)*|110010101001110000001011(0|1)*|110010101001110000001101(0|1)*|110010101001110000001110(0|1)*|110010101001110000010(0|1)*|110010101001110000011(0|1)*|110010101001110000100(0|1)*|110010101001110000101(0|1)*|110010101001110000110(0|1)*|110010101001110000111(0|1)*|110010101001110001000(0|1)*|110010101001110001001(0|1)*|110010101001110001010(0|1)*|110010101001110001011(0|1)*|110010101001110001100(0|1)*|110010101001110001101(0|1)*|110010101001110001110(0|1)*|110010101001110001111(0|1)*|110010101001110010000(0|1)*|110010101001110010001(0|1)*|110010101001110010010(0|1)*|110010101001110010011(0|1)*|110010101001110010100(0|1)*|110010101001110010101(0|1)*|110010101001110010110(0|1)*|110010101001110010111(0|1)*|110010101001110011000(0|1)*|110010101001110011001(0|1)*|110010101001110011010(0|1)*|110010101001110011011(0|1)*|110010101001110011100(0|1)*|110010101001110011101(0|1)*|110010101001110011110(0|1)*|110010101001110011111(0|1)*|1101101010111010(0|1)*|110110101011101000000(0|1)*|110110101011101000000001(0|1)*|110110101011101000001000(0|1)*|110110101011101000001001(0|1)*|110110101011101000001010(0|1)*|110110101011101000001011(0|1)*|110110101011101000001100(0|1)*|110110101011101000001110(0|1)*|110110101011101000001111(0|1)*|110110101011101000010(0|1)*|110110101011101000010000(0|1)*|110110101011101000010001(0|1)*|110110101011101000010010(0|1)*|110110101011101000010011(0|1)*|110110101011101000011(0|1)*|110110101011101000100(0|1)*|110110101011101000101(0|1)*|110110101011101000110(0|1)*|110110101011101000111(0|1)*|110110101011101001000(0|1)*|110110101011101001001(0|1)*|110110101011101001010(0|1)*|110110101011101001011(0|1)*|110110101011101001100(0|1)*|110110101011101001101(0|1)*|110110101011101001110(0|1)*|110110101011101001111(0|1)*|110110101011101010000(0|1)*|110110101011101010001(0|1)*|110110101011101010010(0|1)*|110110101011101010011(0|1)*|110110101011101010100(0|1)*|110110101011101010101(0|1)*|110110101011101010110(0|1)*|110110101011101010111(0|1)*|110110101011101011000(0|1)*|110110101011101011001(0|1)*|110110101011101011010(0|1)*|110110101011101011011(0|1)*|110110101011101011100(0|1)*|110110101011101011101(0|1)*|110110101011101011110(0|1)*|110110101011101011111(0|1)*|1101101011010100(0|1)*|110110101101010000000(0|1)*|110110101101010000001(0|1)*|110110101101010000010(0|1)*|110110101101010000011(0|1)*|110110101101010000100(0|1)*|110110101101010000101(0|1)*|110110101101010000110(0|1)*|110110101101010000111(0|1)*|110110101101010001000(0|1)*|110110101101010001001(0|1)*|110110101101010001010(0|1)*|110110101101010001011(0|1)*|110110101101010001100(0|1)*|110110101101010001101(0|1)*|110110101101010001110(0|1)*|110110101101010001111(0|1)*|110110101101010010000(0|1)*|110110101101010010001(0|1)*|110110101101010010010(0|1)*|110110101101010010011(0|1)*|110110101101010010100(0|1)*|1101101011010100101000(0|1)*|110110101101010010101(0|1)*|110110101101010010110(0|1)*|110110101101010010111(0|1)*|110110101101010011000(0|1)*|110110101101010011010(0|1)*|110110101101010011011(0|1)*|110110101101010011100(0|1)*|110110101101010011101(0|1)*|110110101101010011110(0|1)*|110110101101010011111(0|1)*|1101111010100100(0|1)*|110111101010010000000(0|1)*|110111101010010000001(0|1)*|110111101010010000010(0|1)*|110111101010010000011(0|1)*|110111101010010000100(0|1)*|110111101010010000101(0|1)*|110111101010010000110(0|1)*|110111101010010000111(0|1)*|110111101010010001000(0|1)*|110111101010010001001(0|1)*|110111101010010001010(0|1)*|110111101010010001011(0|1)*|110111101010010001100(0|1)*|110111101010010001101(0|1)*|110111101010010001110(0|1)*|110111101010010001111(0|1)*|110111101010010010000(0|1)*|110111101010010010001(0|1)*|110111101010010010010(0|1)*|110111101010010010011(0|1)*|110111101010010010100(0|1)*|110111101010010010101(0|1)*|110111101010010010110(0|1)*|110111101010010010111(0|1)*|110111101010010011000(0|1)*|110111101010010011001(0|1)*|110111101010010011010(0|1)*|110111101010010011011(0|1)*|110111101010010011100(0|1)*|110111101010010011101(0|1)*|110111101010010011110(0|1)*|110111101010010011111(0|1)*|11011110101001010(0|1)*|110111101010010100000(0|1)*|110111101010010100001(0|1)*|110111101010010100010(0|1)*|110111101010010100011(0|1)*|110111101010010100100(0|1)*|110111101010010100101(0|1)*|110111101010010100110(0|1)*|110111101010010100111(0|1)*|110111101010010101000(0|1)*|110111101010010101001(0|1)*|110111101010010101010(0|1)*|110111101010010101011(0|1)*|110111101010010101100(0|1)*|110111101010010101101(0|1)*|110111101010010101110(0|1)*|110111101010010101111(0|1)*)", | ||
337 | 2, | ||
338 | { "GNVPN-0001-PAD1101111010100101011101010101010101", | ||
339 | "GNVPN-0001-PAD11001010100111000101101010101" }, | ||
340 | { match, match } | ||
341 | } | ||
342 | }; | ||
343 | |||
344 | check_nfa = 0; | ||
345 | check_dfa = 0; | ||
346 | check_rand = 0; | ||
347 | |||
348 | for (i = 0; i < 19; i++) | ||
349 | { | ||
350 | if (0 != regcomp (&rx, rxstr[i].regex, REG_EXTENDED)) | ||
351 | { | ||
352 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
353 | "Could not compile regex using regcomp()\n"); | ||
354 | return 1; | ||
355 | } | ||
356 | |||
357 | /* NFA test */ | ||
358 | a = REGEX_INTERNAL_construct_nfa (rxstr[i].regex, strlen (rxstr[i].regex)); | ||
359 | check_nfa += test_automaton (a, &rx, &rxstr[i]); | ||
360 | REGEX_INTERNAL_automaton_destroy (a); | ||
361 | |||
362 | /* DFA test */ | ||
363 | a = REGEX_INTERNAL_construct_dfa (rxstr[i].regex, strlen (rxstr[i].regex), | ||
364 | 0); | ||
365 | check_dfa += test_automaton (a, &rx, &rxstr[i]); | ||
366 | check_proof = GNUNET_strdup (REGEX_INTERNAL_get_canonical_regex (a)); | ||
367 | REGEX_INTERNAL_automaton_destroy (a); | ||
368 | |||
369 | a = REGEX_INTERNAL_construct_dfa (check_proof, strlen (check_proof), 0); | ||
370 | check_dfa += test_automaton (a, &rx, &rxstr[i]); | ||
371 | REGEX_INTERNAL_automaton_destroy (a); | ||
372 | if (0 != check_dfa) | ||
373 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "check_proof: %s\n", check_proof); | ||
374 | GNUNET_free (check_proof); | ||
375 | |||
376 | regfree (&rx); | ||
377 | } | ||
378 | |||
379 | /* Random tests */ | ||
380 | srand (time (NULL)); | ||
381 | for (i = 0; i < 20; i++) | ||
382 | check_rand += test_random (50, 60, 10); | ||
383 | |||
384 | return check_nfa + check_dfa + check_rand; | ||
385 | } | ||
diff --git a/src/regex/test_regex_graph_api.c b/src/regex/test_regex_graph_api.c deleted file mode 100644 index 923bb9f42..000000000 --- a/src/regex/test_regex_graph_api.c +++ /dev/null | |||
@@ -1,157 +0,0 @@ | |||
1 | /* | ||
2 | This file is part of GNUnet | ||
3 | Copyright (C) 2012 GNUnet e.V. | ||
4 | |||
5 | GNUnet is free software: you can redistribute it and/or modify it | ||
6 | under the terms of the GNU Affero General Public License as published | ||
7 | by the Free Software Foundation, either version 3 of the License, | ||
8 | or (at your option) any later version. | ||
9 | |||
10 | GNUnet is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | Affero General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU Affero General Public License | ||
16 | along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | |||
18 | SPDX-License-Identifier: AGPL3.0-or-later | ||
19 | */ | ||
20 | /** | ||
21 | * @file regex/test_regex_graph_api.c | ||
22 | * @brief test for regex_graph.c | ||
23 | * @author Maximilian Szengel | ||
24 | */ | ||
25 | #include <regex.h> | ||
26 | #include <time.h> | ||
27 | #include "platform.h" | ||
28 | #include "regex_internal_lib.h" | ||
29 | #include "regex_test_lib.h" | ||
30 | #include "regex_internal.h" | ||
31 | |||
32 | #define KEEP_FILES 1 | ||
33 | |||
34 | /** | ||
35 | * Check if 'filename' exists and is not empty. | ||
36 | * | ||
37 | * @param filename name of the file that should be checked | ||
38 | * | ||
39 | * @return 0 if ok, non 0 on error. | ||
40 | */ | ||
41 | static int | ||
42 | filecheck (const char *filename) | ||
43 | { | ||
44 | int error = 0; | ||
45 | FILE *fp; | ||
46 | |||
47 | /* Check if file was created and delete it again */ | ||
48 | if (NULL == (fp = fopen (filename, "r"))) | ||
49 | { | ||
50 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not find graph %s\n", filename); | ||
51 | return 1; | ||
52 | } | ||
53 | |||
54 | GNUNET_break (0 == fseek (fp, 0L, SEEK_END)); | ||
55 | if (1 > ftell (fp)) | ||
56 | { | ||
57 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
58 | "Graph writing failed, got empty file (%s)!\n", filename); | ||
59 | error = 2; | ||
60 | } | ||
61 | |||
62 | GNUNET_assert (0 == fclose (fp)); | ||
63 | |||
64 | if (! KEEP_FILES) | ||
65 | { | ||
66 | if (0 != unlink (filename)) | ||
67 | GNUNET_log_strerror_file (GNUNET_ERROR_TYPE_ERROR, "unlink", filename); | ||
68 | } | ||
69 | return error; | ||
70 | } | ||
71 | |||
72 | |||
73 | int | ||
74 | main (int argc, char *argv[]) | ||
75 | { | ||
76 | int error; | ||
77 | struct REGEX_INTERNAL_Automaton *a; | ||
78 | unsigned int i; | ||
79 | const char *filename = "test_graph.dot"; | ||
80 | |||
81 | const char *regex[12] = { | ||
82 | "ab(c|d)+c*(a(b|c)+d)+(bla)+", | ||
83 | "(bla)*", | ||
84 | "b(lab)*la", | ||
85 | "(ab)*", | ||
86 | "ab(c|d)+c*(a(b|c)+d)+(bla)(bla)*", | ||
87 | "z(abc|def)?xyz", | ||
88 | "1*0(0|1)*", | ||
89 | "a*b*", | ||
90 | "a+X*y+c|p|R|Z*K*y*R+w|Y*6+n+h*k*w+V*F|W*B*e*", | ||
91 | "a", | ||
92 | "a|b", | ||
93 | "PADPADPADPADPADPabcdefghixxxxxxxxxxxxxjklmnop*qstoisdjfguisdfguihsdfgbdsuivggsd" | ||
94 | }; | ||
95 | |||
96 | GNUNET_log_setup ("test-regex", "WARNING", NULL); | ||
97 | error = 0; | ||
98 | for (i = 0; i < 12; i++) | ||
99 | { | ||
100 | /* Check NFA graph creation */ | ||
101 | a = REGEX_INTERNAL_construct_nfa (regex[i], strlen (regex[i])); | ||
102 | REGEX_TEST_automaton_save_graph (a, filename, REGEX_TEST_GRAPH_DEFAULT); | ||
103 | REGEX_INTERNAL_automaton_destroy (a); | ||
104 | error += filecheck (filename); | ||
105 | |||
106 | a = REGEX_INTERNAL_construct_nfa (regex[i], strlen (regex[i])); | ||
107 | REGEX_TEST_automaton_save_graph (a, filename, | ||
108 | REGEX_TEST_GRAPH_DEFAULT | ||
109 | | REGEX_TEST_GRAPH_VERBOSE); | ||
110 | REGEX_INTERNAL_automaton_destroy (a); | ||
111 | error += filecheck (filename); | ||
112 | |||
113 | a = REGEX_INTERNAL_construct_nfa (regex[i], strlen (regex[i])); | ||
114 | REGEX_TEST_automaton_save_graph (a, filename, | ||
115 | REGEX_TEST_GRAPH_DEFAULT | ||
116 | | REGEX_TEST_GRAPH_COLORING); | ||
117 | REGEX_INTERNAL_automaton_destroy (a); | ||
118 | error += filecheck (filename); | ||
119 | |||
120 | a = REGEX_INTERNAL_construct_nfa (regex[i], strlen (regex[i])); | ||
121 | REGEX_TEST_automaton_save_graph (a, filename, | ||
122 | REGEX_TEST_GRAPH_DEFAULT | ||
123 | | REGEX_TEST_GRAPH_VERBOSE | ||
124 | | REGEX_TEST_GRAPH_COLORING); | ||
125 | REGEX_INTERNAL_automaton_destroy (a); | ||
126 | error += filecheck (filename); | ||
127 | |||
128 | |||
129 | /* Check DFA graph creation */ | ||
130 | a = REGEX_INTERNAL_construct_dfa (regex[i], strlen (regex[i]), 0); | ||
131 | REGEX_TEST_automaton_save_graph (a, filename, REGEX_TEST_GRAPH_DEFAULT); | ||
132 | REGEX_INTERNAL_automaton_destroy (a); | ||
133 | error += filecheck (filename); | ||
134 | |||
135 | a = REGEX_INTERNAL_construct_dfa (regex[i], strlen (regex[i]), 0); | ||
136 | REGEX_TEST_automaton_save_graph (a, filename, | ||
137 | REGEX_TEST_GRAPH_DEFAULT | ||
138 | | REGEX_TEST_GRAPH_VERBOSE); | ||
139 | REGEX_INTERNAL_automaton_destroy (a); | ||
140 | error += filecheck (filename); | ||
141 | |||
142 | a = REGEX_INTERNAL_construct_dfa (regex[i], strlen (regex[i]), 0); | ||
143 | REGEX_TEST_automaton_save_graph (a, filename, | ||
144 | REGEX_TEST_GRAPH_DEFAULT | ||
145 | | REGEX_TEST_GRAPH_COLORING); | ||
146 | REGEX_INTERNAL_automaton_destroy (a); | ||
147 | error += filecheck (filename); | ||
148 | |||
149 | |||
150 | a = REGEX_INTERNAL_construct_dfa (regex[i], strlen (regex[i]), 4); | ||
151 | REGEX_TEST_automaton_save_graph (a, filename, REGEX_TEST_GRAPH_DEFAULT); | ||
152 | REGEX_INTERNAL_automaton_destroy (a); | ||
153 | error += filecheck (filename); | ||
154 | } | ||
155 | |||
156 | return error; | ||
157 | } | ||
diff --git a/src/regex/test_regex_integration.c b/src/regex/test_regex_integration.c deleted file mode 100644 index 2e6874e4d..000000000 --- a/src/regex/test_regex_integration.c +++ /dev/null | |||
@@ -1,212 +0,0 @@ | |||
1 | /* | ||
2 | This file is part of GNUnet. | ||
3 | Copyright (C) 2013, 2015 GNUnet e.V. | ||
4 | |||
5 | GNUnet is free software: you can redistribute it and/or modify it | ||
6 | under the terms of the GNU Affero General Public License as published | ||
7 | by the Free Software Foundation, either version 3 of the License, | ||
8 | or (at your option) any later version. | ||
9 | |||
10 | GNUnet is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | Affero General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU Affero General Public License | ||
16 | along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | |||
18 | SPDX-License-Identifier: AGPL3.0-or-later | ||
19 | */ | ||
20 | /** | ||
21 | * @file regex/test_regex_integration.c | ||
22 | * @brief base test case for regex integration with VPN; | ||
23 | * tests that the regexes generated by the TUN API | ||
24 | * for IP addresses work (for some simple cases) | ||
25 | * @author Christian Grothoff | ||
26 | */ | ||
27 | #include "platform.h" | ||
28 | #include "gnunet_applications.h" | ||
29 | #include "gnunet_util_lib.h" | ||
30 | #include "gnunet_tun_lib.h" | ||
31 | #include "gnunet_testing_lib.h" | ||
32 | #include "gnunet_regex_service.h" | ||
33 | |||
34 | |||
35 | /** | ||
36 | * How long until we really give up on a particular testcase portion? | ||
37 | */ | ||
38 | #define TOTAL_TIMEOUT GNUNET_TIME_relative_multiply (GNUNET_TIME_UNIT_SECONDS, \ | ||
39 | 600) | ||
40 | |||
41 | /** | ||
42 | * How long until we give up on any particular operation (and retry)? | ||
43 | */ | ||
44 | #define BASE_TIMEOUT GNUNET_TIME_relative_multiply (GNUNET_TIME_UNIT_SECONDS, 3) | ||
45 | |||
46 | |||
47 | static struct GNUNET_REGEX_Announcement *a4; | ||
48 | |||
49 | static struct GNUNET_REGEX_Search *s4; | ||
50 | |||
51 | static struct GNUNET_REGEX_Announcement *a6; | ||
52 | |||
53 | static struct GNUNET_REGEX_Search *s6; | ||
54 | |||
55 | static int ok = 1; | ||
56 | |||
57 | static struct GNUNET_SCHEDULER_Task *die_task; | ||
58 | |||
59 | |||
60 | static void | ||
61 | end (void *cls) | ||
62 | { | ||
63 | die_task = NULL; | ||
64 | GNUNET_REGEX_announce_cancel (a4); | ||
65 | a4 = NULL; | ||
66 | GNUNET_REGEX_search_cancel (s4); | ||
67 | s4 = NULL; | ||
68 | GNUNET_REGEX_announce_cancel (a6); | ||
69 | a6 = NULL; | ||
70 | GNUNET_REGEX_search_cancel (s6); | ||
71 | s6 = NULL; | ||
72 | ok = 0; | ||
73 | } | ||
74 | |||
75 | |||
76 | static void | ||
77 | end_badly () | ||
78 | { | ||
79 | fprintf (stderr, "%s", "Testcase failed (timeout).\n"); | ||
80 | end (NULL); | ||
81 | ok = 1; | ||
82 | } | ||
83 | |||
84 | |||
85 | /** | ||
86 | * Search callback function, invoked for every result that was found. | ||
87 | * | ||
88 | * @param cls Closure provided in #GNUNET_REGEX_search(). | ||
89 | * @param id Peer providing a regex that matches the string. | ||
90 | * @param get_path Path of the get request. | ||
91 | * @param get_path_length Length of @a get_path. | ||
92 | * @param put_path Path of the put request. | ||
93 | * @param put_path_length Length of the @a put_path. | ||
94 | */ | ||
95 | static void | ||
96 | found_cb (void *cls, | ||
97 | const struct GNUNET_PeerIdentity *id, | ||
98 | const struct GNUNET_PeerIdentity *get_path, | ||
99 | unsigned int get_path_length, | ||
100 | const struct GNUNET_PeerIdentity *put_path, | ||
101 | unsigned int put_path_length) | ||
102 | { | ||
103 | const char *str = cls; | ||
104 | static int found; | ||
105 | |||
106 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, | ||
107 | "IPv%s-exit found\n", | ||
108 | str); | ||
109 | if (0 == strcmp (str, "4")) | ||
110 | found |= 4; | ||
111 | if (0 == strcmp (str, "6")) | ||
112 | found |= 2; | ||
113 | if ((4 | 2) == found) | ||
114 | { | ||
115 | GNUNET_SCHEDULER_cancel (die_task); | ||
116 | die_task = | ||
117 | GNUNET_SCHEDULER_add_now (&end, NULL); | ||
118 | } | ||
119 | } | ||
120 | |||
121 | |||
122 | static void | ||
123 | run (void *cls, | ||
124 | const struct GNUNET_CONFIGURATION_Handle *cfg, | ||
125 | struct GNUNET_TESTING_Peer *peer) | ||
126 | { | ||
127 | char rxstr4[GNUNET_TUN_IPV4_REGEXLEN]; | ||
128 | char rxstr6[GNUNET_TUN_IPV6_REGEXLEN]; | ||
129 | char *p4r; | ||
130 | char *p6r; | ||
131 | char *p4; | ||
132 | char *p6; | ||
133 | char *ss4; | ||
134 | char *ss6; | ||
135 | struct in_addr i4; | ||
136 | struct in6_addr i6; | ||
137 | |||
138 | die_task = | ||
139 | GNUNET_SCHEDULER_add_delayed (TOTAL_TIMEOUT, | ||
140 | &end_badly, NULL); | ||
141 | GNUNET_assert (1 == | ||
142 | inet_pton (AF_INET, | ||
143 | "127.0.0.1", | ||
144 | &i4)); | ||
145 | GNUNET_assert (1 == | ||
146 | inet_pton (AF_INET6, | ||
147 | "::1:5", | ||
148 | &i6)); | ||
149 | GNUNET_TUN_ipv4toregexsearch (&i4, | ||
150 | 8080, | ||
151 | rxstr4); | ||
152 | GNUNET_TUN_ipv6toregexsearch (&i6, | ||
153 | 8686, | ||
154 | rxstr6); | ||
155 | GNUNET_asprintf (&ss4, | ||
156 | "%s%s", | ||
157 | GNUNET_APPLICATION_TYPE_EXIT_REGEX_PREFIX, | ||
158 | rxstr4); | ||
159 | GNUNET_asprintf (&ss6, | ||
160 | "%s%s", | ||
161 | GNUNET_APPLICATION_TYPE_EXIT_REGEX_PREFIX, | ||
162 | rxstr6); | ||
163 | p4r = GNUNET_TUN_ipv4policy2regex ("0.0.0.0/0:!25;"); | ||
164 | p6r = GNUNET_TUN_ipv6policy2regex ("::/0:!25;"); | ||
165 | GNUNET_asprintf (&p4, | ||
166 | "%s%s", | ||
167 | GNUNET_APPLICATION_TYPE_EXIT_REGEX_PREFIX, | ||
168 | p4r); | ||
169 | GNUNET_asprintf (&p6, | ||
170 | "%s%s", | ||
171 | GNUNET_APPLICATION_TYPE_EXIT_REGEX_PREFIX, | ||
172 | p6r); | ||
173 | GNUNET_free (p4r); | ||
174 | GNUNET_free (p6r); | ||
175 | a4 = GNUNET_REGEX_announce (cfg, | ||
176 | p4, | ||
177 | GNUNET_TIME_relative_multiply ( | ||
178 | GNUNET_TIME_UNIT_SECONDS, | ||
179 | 5), | ||
180 | 1); | ||
181 | a6 = GNUNET_REGEX_announce (cfg, | ||
182 | p6, | ||
183 | GNUNET_TIME_relative_multiply ( | ||
184 | GNUNET_TIME_UNIT_SECONDS, | ||
185 | 5), | ||
186 | 1); | ||
187 | GNUNET_free (p4); | ||
188 | GNUNET_free (p6); | ||
189 | |||
190 | s4 = GNUNET_REGEX_search (cfg, | ||
191 | ss4, | ||
192 | &found_cb, "4"); | ||
193 | s6 = GNUNET_REGEX_search (cfg, | ||
194 | ss6, | ||
195 | &found_cb, "6"); | ||
196 | GNUNET_free (ss4); | ||
197 | GNUNET_free (ss6); | ||
198 | } | ||
199 | |||
200 | |||
201 | int | ||
202 | main (int argc, char *argv[]) | ||
203 | { | ||
204 | if (0 != GNUNET_TESTING_peer_run ("test-regex-integration", | ||
205 | "test_regex_api_data.conf", | ||
206 | &run, NULL)) | ||
207 | return 1; | ||
208 | return ok; | ||
209 | } | ||
210 | |||
211 | |||
212 | /* end of test_regex_integration.c */ | ||
diff --git a/src/regex/test_regex_iterate_api.c b/src/regex/test_regex_iterate_api.c deleted file mode 100644 index e7ef72b58..000000000 --- a/src/regex/test_regex_iterate_api.c +++ /dev/null | |||
@@ -1,262 +0,0 @@ | |||
1 | /* | ||
2 | This file is part of GNUnet | ||
3 | Copyright (C) 2012 GNUnet e.V. | ||
4 | |||
5 | GNUnet is free software: you can redistribute it and/or modify it | ||
6 | under the terms of the GNU Affero General Public License as published | ||
7 | by the Free Software Foundation, either version 3 of the License, | ||
8 | or (at your option) any later version. | ||
9 | |||
10 | GNUnet is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | Affero General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU Affero General Public License | ||
16 | along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | |||
18 | SPDX-License-Identifier: AGPL3.0-or-later | ||
19 | */ | ||
20 | /** | ||
21 | * @file regex/test_regex_iterate_api.c | ||
22 | * @brief test for regex.c | ||
23 | * @author Maximilian Szengel | ||
24 | */ | ||
25 | #include <regex.h> | ||
26 | #include <time.h> | ||
27 | #include "platform.h" | ||
28 | #include "regex_internal_lib.h" | ||
29 | #include "regex_block_lib.h" | ||
30 | #include "regex_internal.h" | ||
31 | |||
32 | /** | ||
33 | * Regex initial padding. | ||
34 | */ | ||
35 | #define INITIAL_PADDING "PADPADPADPADPADP" | ||
36 | |||
37 | /** | ||
38 | * Set to GNUNET_YES to save a debug graph. | ||
39 | */ | ||
40 | #define REGEX_INTERNAL_ITERATE_SAVE_DEBUG_GRAPH GNUNET_NO | ||
41 | |||
42 | static unsigned int transition_counter; | ||
43 | |||
44 | struct IteratorContext | ||
45 | { | ||
46 | int error; | ||
47 | int should_save_graph; | ||
48 | FILE *graph_filep; | ||
49 | unsigned int string_count; | ||
50 | char *const *strings; | ||
51 | unsigned int match_count; | ||
52 | }; | ||
53 | |||
54 | struct RegexStringPair | ||
55 | { | ||
56 | char *regex; | ||
57 | unsigned int string_count; | ||
58 | char *strings[20]; | ||
59 | }; | ||
60 | |||
61 | |||
62 | static void | ||
63 | key_iterator (void *cls, const struct GNUNET_HashCode *key, | ||
64 | const char *proof, | ||
65 | int accepting, unsigned int num_edges, | ||
66 | const struct REGEX_BLOCK_Edge *edges) | ||
67 | { | ||
68 | unsigned int i; | ||
69 | struct IteratorContext *ctx = cls; | ||
70 | char *out_str; | ||
71 | char *state_id = GNUNET_strdup (GNUNET_h2s (key)); | ||
72 | |||
73 | GNUNET_assert (NULL != proof); | ||
74 | if (GNUNET_YES == ctx->should_save_graph) | ||
75 | { | ||
76 | if (GNUNET_YES == accepting) | ||
77 | GNUNET_asprintf (&out_str, "\"%s\" [shape=doublecircle]\n", state_id); | ||
78 | else | ||
79 | GNUNET_asprintf (&out_str, "\"%s\" [shape=circle]\n", state_id); | ||
80 | fwrite (out_str, strlen (out_str), 1, ctx->graph_filep); | ||
81 | GNUNET_free (out_str); | ||
82 | |||
83 | for (i = 0; i < num_edges; i++) | ||
84 | { | ||
85 | transition_counter++; | ||
86 | GNUNET_asprintf (&out_str, "\"%s\" -> \"%s\" [label = \"%s (%s)\"]\n", | ||
87 | state_id, GNUNET_h2s (&edges[i].destination), | ||
88 | edges[i].label, proof); | ||
89 | fwrite (out_str, strlen (out_str), 1, ctx->graph_filep); | ||
90 | |||
91 | GNUNET_free (out_str); | ||
92 | } | ||
93 | } | ||
94 | else | ||
95 | { | ||
96 | for (i = 0; i < num_edges; i++) | ||
97 | transition_counter++; | ||
98 | } | ||
99 | |||
100 | for (i = 0; i < ctx->string_count; i++) | ||
101 | { | ||
102 | if (0 == strcmp (proof, ctx->strings[i])) | ||
103 | ctx->match_count++; | ||
104 | } | ||
105 | |||
106 | if (GNUNET_OK != REGEX_BLOCK_check_proof (proof, strlen (proof), key)) | ||
107 | { | ||
108 | ctx->error++; | ||
109 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
110 | "Proof check failed: proof: %s key: %s\n", proof, state_id); | ||
111 | } | ||
112 | GNUNET_free (state_id); | ||
113 | } | ||
114 | |||
115 | |||
116 | int | ||
117 | main (int argc, char *argv[]) | ||
118 | { | ||
119 | GNUNET_log_setup ("test-regex", "WARNING", NULL); | ||
120 | |||
121 | int error; | ||
122 | struct REGEX_INTERNAL_Automaton *dfa; | ||
123 | unsigned int i; | ||
124 | unsigned int num_transitions; | ||
125 | char *filename = NULL; | ||
126 | struct IteratorContext ctx = { 0, 0, NULL, 0, NULL, 0 }; | ||
127 | |||
128 | error = 0; | ||
129 | |||
130 | const struct RegexStringPair rxstr[13] = { | ||
131 | { INITIAL_PADDING "ab(c|d)+c*(a(b|c)+d)+(bla)+", 2, | ||
132 | { INITIAL_PADDING "abcdcdca", INITIAL_PADDING "abcabdbl" } }, | ||
133 | { INITIAL_PADDING | ||
134 | "abcdefghixxxxxxxxxxxxxjklmnop*qstoisdjfguisdfguihsdfgbdsuivggsd", 1, | ||
135 | { INITIAL_PADDING "abcdefgh" } }, | ||
136 | { INITIAL_PADDING "VPN-4-1(0|1)*", 2, | ||
137 | { INITIAL_PADDING "VPN-4-10", INITIAL_PADDING "VPN-4-11" } }, | ||
138 | { INITIAL_PADDING "(a+X*y+c|p|R|Z*K*y*R+w|Y*6+n+h*k*w+V*F|W*B*e*)", 2, | ||
139 | { INITIAL_PADDING "aaaaaaaa", INITIAL_PADDING "aaXXyyyc" } }, | ||
140 | { INITIAL_PADDING "a*", 1, { INITIAL_PADDING "aaaaaaaa" } }, | ||
141 | { INITIAL_PADDING "xzxzxzxzxz", 1, { INITIAL_PADDING "xzxzxzxz" } }, | ||
142 | { INITIAL_PADDING "xyz*", 1, { INITIAL_PADDING "xyzzzzzz" } }, | ||
143 | { INITIAL_PADDING | ||
144 | "abcd:(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1):(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)", | ||
145 | 2, { INITIAL_PADDING "abcd:000", INITIAL_PADDING "abcd:101" } }, | ||
146 | { INITIAL_PADDING "(x*|(0|1|2)(a|b|c|d)+)", 2, | ||
147 | { INITIAL_PADDING "xxxxxxxx", INITIAL_PADDING "0abcdbad" } }, | ||
148 | { INITIAL_PADDING "(0|1)(0|1)23456789ABC", 1, | ||
149 | { INITIAL_PADDING "11234567" } }, | ||
150 | { INITIAL_PADDING "0*123456789ABC*", 3, | ||
151 | { INITIAL_PADDING "00123456", INITIAL_PADDING "00000000", | ||
152 | INITIAL_PADDING "12345678" } }, | ||
153 | { INITIAL_PADDING "0123456789A*BC", 1, { INITIAL_PADDING "01234567" } }, | ||
154 | { "GNUNETVPN000100000IPEX6-fc5a:4e1:c2ba::1", 1, | ||
155 | { "GNUNETVPN000100000IPEX6-" } } | ||
156 | }; | ||
157 | |||
158 | const char *graph_start_str = "digraph G {\nrankdir=LR\n"; | ||
159 | const char *graph_end_str = "\n}\n"; | ||
160 | |||
161 | for (i = 0; i < 13; i++) | ||
162 | { | ||
163 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Iterating DFA for regex %s\n", | ||
164 | rxstr[i].regex); | ||
165 | |||
166 | |||
167 | /* Create graph */ | ||
168 | if (GNUNET_YES == REGEX_INTERNAL_ITERATE_SAVE_DEBUG_GRAPH) | ||
169 | { | ||
170 | GNUNET_asprintf (&filename, "iteration_graph_%u.dot", i); | ||
171 | ctx.graph_filep = fopen (filename, "w"); | ||
172 | if (NULL == ctx.graph_filep) | ||
173 | { | ||
174 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, | ||
175 | "Could not open file %s for saving iteration graph.\n", | ||
176 | filename); | ||
177 | ctx.should_save_graph = GNUNET_NO; | ||
178 | } | ||
179 | else | ||
180 | { | ||
181 | ctx.should_save_graph = GNUNET_YES; | ||
182 | fwrite (graph_start_str, strlen (graph_start_str), 1, ctx.graph_filep); | ||
183 | } | ||
184 | GNUNET_free (filename); | ||
185 | } | ||
186 | else | ||
187 | { | ||
188 | ctx.should_save_graph = GNUNET_NO; | ||
189 | ctx.graph_filep = NULL; | ||
190 | } | ||
191 | |||
192 | /* Iterate over DFA edges */ | ||
193 | transition_counter = 0; | ||
194 | ctx.string_count = rxstr[i].string_count; | ||
195 | ctx.strings = rxstr[i].strings; | ||
196 | ctx.match_count = 0; | ||
197 | dfa = | ||
198 | REGEX_INTERNAL_construct_dfa (rxstr[i].regex, strlen (rxstr[i].regex), 0); | ||
199 | REGEX_INTERNAL_iterate_all_edges (dfa, key_iterator, &ctx); | ||
200 | num_transitions = | ||
201 | REGEX_INTERNAL_get_transition_count (dfa) - dfa->start->transition_count; | ||
202 | |||
203 | if (transition_counter < num_transitions) | ||
204 | { | ||
205 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
206 | "Automaton has %d transitions, iterated over %d transitions\n", | ||
207 | num_transitions, transition_counter); | ||
208 | error += 1; | ||
209 | } | ||
210 | |||
211 | if (ctx.match_count < ctx.string_count) | ||
212 | { | ||
213 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
214 | "Missing initial states for regex %s\n", rxstr[i].regex); | ||
215 | error += (ctx.string_count - ctx.match_count); | ||
216 | } | ||
217 | else if (ctx.match_count > ctx.string_count) | ||
218 | { | ||
219 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
220 | "Duplicate initial transitions for regex %s\n", | ||
221 | rxstr[i].regex); | ||
222 | error += (ctx.string_count - ctx.match_count); | ||
223 | } | ||
224 | |||
225 | REGEX_INTERNAL_automaton_destroy (dfa); | ||
226 | |||
227 | /* Finish graph */ | ||
228 | if (GNUNET_YES == ctx.should_save_graph) | ||
229 | { | ||
230 | fwrite (graph_end_str, strlen (graph_end_str), 1, ctx.graph_filep); | ||
231 | fclose (ctx.graph_filep); | ||
232 | ctx.graph_filep = NULL; | ||
233 | ctx.should_save_graph = GNUNET_NO; | ||
234 | } | ||
235 | } | ||
236 | |||
237 | |||
238 | for (i = 0; i < 13; i++) | ||
239 | { | ||
240 | ctx.string_count = rxstr[i].string_count; | ||
241 | ctx.strings = rxstr[i].strings; | ||
242 | ctx.match_count = 0; | ||
243 | |||
244 | dfa = | ||
245 | REGEX_INTERNAL_construct_dfa (rxstr[i].regex, strlen (rxstr[i].regex), 0); | ||
246 | REGEX_INTERNAL_dfa_add_multi_strides (NULL, dfa, 2); | ||
247 | REGEX_INTERNAL_iterate_all_edges (dfa, key_iterator, &ctx); | ||
248 | |||
249 | if (ctx.match_count < ctx.string_count) | ||
250 | { | ||
251 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
252 | "Missing initial states for regex %s\n", rxstr[i].regex); | ||
253 | error += (ctx.string_count - ctx.match_count); | ||
254 | } | ||
255 | |||
256 | REGEX_INTERNAL_automaton_destroy (dfa); | ||
257 | } | ||
258 | |||
259 | error += ctx.error; | ||
260 | |||
261 | return error; | ||
262 | } | ||
diff --git a/src/regex/test_regex_proofs.c b/src/regex/test_regex_proofs.c deleted file mode 100644 index 289b1183b..000000000 --- a/src/regex/test_regex_proofs.c +++ /dev/null | |||
@@ -1,173 +0,0 @@ | |||
1 | /* | ||
2 | This file is part of GNUnet | ||
3 | Copyright (C) 2012 GNUnet e.V. | ||
4 | |||
5 | GNUnet is free software: you can redistribute it and/or modify it | ||
6 | under the terms of the GNU Affero General Public License as published | ||
7 | by the Free Software Foundation, either version 3 of the License, | ||
8 | or (at your option) any later version. | ||
9 | |||
10 | GNUnet is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | Affero General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU Affero General Public License | ||
16 | along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | |||
18 | SPDX-License-Identifier: AGPL3.0-or-later | ||
19 | */ | ||
20 | /** | ||
21 | * @file regex/test_regex_proofs.c | ||
22 | * @brief test for regex.c | ||
23 | * @author Maximilian Szengel | ||
24 | */ | ||
25 | #include "platform.h" | ||
26 | #include "regex_internal_lib.h" | ||
27 | #include "regex_test_lib.h" | ||
28 | #include "regex_internal.h" | ||
29 | |||
30 | |||
31 | /** | ||
32 | * Test if the given regex's canonical regex is the same as this canonical | ||
33 | * regex's canonical regex. Confused? Ok, then: 1. construct a dfa A from the | ||
34 | * given 'regex' 2. get the canonical regex of dfa A 3. construct a dfa B from | ||
35 | * this canonical regex 3. compare the canonical regex of dfa A with the | ||
36 | * canonical regex of dfa B. | ||
37 | * | ||
38 | * @param regex regular expression used for this test (see above). | ||
39 | * | ||
40 | * @return 0 on success, 1 on failure | ||
41 | */ | ||
42 | static unsigned int | ||
43 | test_proof (const char *regex) | ||
44 | { | ||
45 | unsigned int error; | ||
46 | struct REGEX_INTERNAL_Automaton *dfa; | ||
47 | char *c_rx1; | ||
48 | const char *c_rx2; | ||
49 | |||
50 | dfa = REGEX_INTERNAL_construct_dfa (regex, strlen (regex), 1); | ||
51 | GNUNET_assert (NULL != dfa); | ||
52 | c_rx1 = GNUNET_strdup (REGEX_INTERNAL_get_canonical_regex (dfa)); | ||
53 | REGEX_INTERNAL_automaton_destroy (dfa); | ||
54 | dfa = REGEX_INTERNAL_construct_dfa (c_rx1, strlen (c_rx1), 1); | ||
55 | GNUNET_assert (NULL != dfa); | ||
56 | c_rx2 = REGEX_INTERNAL_get_canonical_regex (dfa); | ||
57 | |||
58 | error = (0 == strcmp (c_rx1, c_rx2)) ? 0 : 1; | ||
59 | |||
60 | if (error > 0) | ||
61 | { | ||
62 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
63 | "Comparing canonical regex of\n%s\nfailed:\n%s\nvs.\n%s\n", | ||
64 | regex, c_rx1, c_rx2); | ||
65 | } | ||
66 | |||
67 | GNUNET_free (c_rx1); | ||
68 | REGEX_INTERNAL_automaton_destroy (dfa); | ||
69 | |||
70 | return error; | ||
71 | } | ||
72 | |||
73 | |||
74 | /** | ||
75 | * Use 'test_proof' function to randomly test the canonical regexes of 'count' | ||
76 | * random expressions of length 'rx_length'. | ||
77 | * | ||
78 | * @param count number of random regular expressions to test. | ||
79 | * @param rx_length length of the random regular expressions. | ||
80 | * | ||
81 | * @return 0 on success, number of failures otherwise. | ||
82 | */ | ||
83 | static unsigned int | ||
84 | test_proofs_random (unsigned int count, size_t rx_length) | ||
85 | { | ||
86 | unsigned int i; | ||
87 | char *rand_rx; | ||
88 | unsigned int failures; | ||
89 | |||
90 | failures = 0; | ||
91 | |||
92 | for (i = 0; i < count; i++) | ||
93 | { | ||
94 | rand_rx = REGEX_TEST_generate_random_regex (rx_length, NULL); | ||
95 | failures += test_proof (rand_rx); | ||
96 | GNUNET_free (rand_rx); | ||
97 | } | ||
98 | |||
99 | return failures; | ||
100 | } | ||
101 | |||
102 | |||
103 | /** | ||
104 | * Test a number of known examples of regexes for proper canonicalization. | ||
105 | * | ||
106 | * @return 0 on success, number of failures otherwise. | ||
107 | */ | ||
108 | static unsigned int | ||
109 | test_proofs_static () | ||
110 | { | ||
111 | unsigned int i; | ||
112 | unsigned int error; | ||
113 | |||
114 | const char *regex[8] = { | ||
115 | "a|aa*a", | ||
116 | "a+", | ||
117 | "a*", | ||
118 | "a*a*", | ||
119 | "(F*C|WfPf|y+F*C)", | ||
120 | "y*F*C|WfPf", | ||
121 | "((a|b)c|(a|b)(d|(a|b)e))", | ||
122 | "((a|b)(c|d)|(a|b)(a|b)e)" | ||
123 | }; | ||
124 | |||
125 | const char *canon_rx1; | ||
126 | const char *canon_rx2; | ||
127 | struct REGEX_INTERNAL_Automaton *dfa1; | ||
128 | struct REGEX_INTERNAL_Automaton *dfa2; | ||
129 | |||
130 | error = 0; | ||
131 | |||
132 | for (i = 0; i < 8; i += 2) | ||
133 | { | ||
134 | dfa1 = REGEX_INTERNAL_construct_dfa (regex[i], strlen (regex[i]), 1); | ||
135 | dfa2 = REGEX_INTERNAL_construct_dfa (regex[i + 1], strlen (regex[i + 1]), | ||
136 | 1); | ||
137 | GNUNET_assert (NULL != dfa1); | ||
138 | GNUNET_assert (NULL != dfa2); | ||
139 | |||
140 | canon_rx1 = REGEX_INTERNAL_get_canonical_regex (dfa1); | ||
141 | canon_rx2 = REGEX_INTERNAL_get_canonical_regex (dfa2); | ||
142 | |||
143 | error += (0 == strcmp (canon_rx1, canon_rx2)) ? 0 : 1; | ||
144 | |||
145 | if (error > 0) | ||
146 | { | ||
147 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
148 | "Comparing canonical regex failed:\nrx1:\t%s\ncrx1:\t%s\nrx2:\t%s\ncrx2:\t%s\n", | ||
149 | regex[i], canon_rx1, regex[i + 1], canon_rx2); | ||
150 | } | ||
151 | |||
152 | REGEX_INTERNAL_automaton_destroy (dfa1); | ||
153 | REGEX_INTERNAL_automaton_destroy (dfa2); | ||
154 | } | ||
155 | |||
156 | return error; | ||
157 | } | ||
158 | |||
159 | |||
160 | int | ||
161 | main (int argc, char *argv[]) | ||
162 | { | ||
163 | GNUNET_log_setup ("test-regex", "WARNING", NULL); | ||
164 | |||
165 | int error; | ||
166 | |||
167 | error = 0; | ||
168 | |||
169 | error += test_proofs_static (); | ||
170 | error += test_proofs_random (100, 30); | ||
171 | |||
172 | return error; | ||
173 | } | ||