diff options
Diffstat (limited to 'src/regex/regex_internal_lib.h')
-rw-r--r-- | src/regex/regex_internal_lib.h | 320 |
1 files changed, 320 insertions, 0 deletions
diff --git a/src/regex/regex_internal_lib.h b/src/regex/regex_internal_lib.h new file mode 100644 index 000000000..f14665538 --- /dev/null +++ b/src/regex/regex_internal_lib.h | |||
@@ -0,0 +1,320 @@ | |||
1 | /* | ||
2 | This file is part of GNUnet | ||
3 | (C) 2012, 2013 Christian Grothoff (and other contributing authors) | ||
4 | |||
5 | GNUnet is free software; you can redistribute it and/or modify | ||
6 | it under the terms of the GNU General Public License as published | ||
7 | by the Free Software Foundation; either version 3, or (at your | ||
8 | option) any later version. | ||
9 | |||
10 | GNUnet is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU General Public License | ||
16 | along with GNUnet; see the file COPYING. If not, write to the | ||
17 | Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
18 | Boston, MA 02111-1307, USA. | ||
19 | */ | ||
20 | /** | ||
21 | * @file regex/regex_internal_lib.h | ||
22 | * @brief library to parse regular expressions into dfa | ||
23 | * @author Maximilian Szengel | ||
24 | * | ||
25 | */ | ||
26 | |||
27 | #ifndef REGEX_INTERNAL_LIB_H | ||
28 | #define REGEX_INTERNAL_LIB_H | ||
29 | |||
30 | #include "gnunet_util_lib.h" | ||
31 | #include "gnunet_dht_service.h" | ||
32 | #include "gnunet_statistics_service.h" | ||
33 | |||
34 | #ifdef __cplusplus | ||
35 | extern "C" | ||
36 | { | ||
37 | #if 0 /* keep Emacsens' auto-indent happy */ | ||
38 | } | ||
39 | #endif | ||
40 | #endif | ||
41 | |||
42 | |||
43 | |||
44 | |||
45 | /** | ||
46 | * Automaton (NFA/DFA) representation. | ||
47 | - */ | ||
48 | struct REGEX_ITERNAL_Automaton; | ||
49 | |||
50 | |||
51 | /** | ||
52 | * Edge representation. | ||
53 | */ | ||
54 | struct REGEX_ITERNAL_Edge | ||
55 | { | ||
56 | /** | ||
57 | * Label of the edge. FIXME: might want to not consume exactly multiples of 8 bits, need length? | ||
58 | */ | ||
59 | const char *label; | ||
60 | |||
61 | /** | ||
62 | * Destionation of the edge. | ||
63 | */ | ||
64 | struct GNUNET_HashCode destination; | ||
65 | }; | ||
66 | |||
67 | |||
68 | /** | ||
69 | * Construct DFA for the given 'regex' of length 'len'. | ||
70 | * | ||
71 | * Path compression means, that for example a DFA o -> a -> b -> c -> o will be | ||
72 | * compressed to o -> abc -> o. Note that this parameter influences the | ||
73 | * non-determinism of states of the resulting NFA in the DHT (number of outgoing | ||
74 | * edges with the same label). For example for an application that stores IPv4 | ||
75 | * addresses as bitstrings it could make sense to limit the path compression to | ||
76 | * 4 or 8. | ||
77 | * | ||
78 | * @param regex regular expression string. | ||
79 | * @param len length of the regular expression. | ||
80 | * @param max_path_len limit the path compression length to the | ||
81 | * given value. If set to 1, no path compression is applied. Set to 0 for | ||
82 | * maximal possible path compression (generally not desireable). | ||
83 | * @return DFA, needs to be freed using REGEX_ITERNAL_automaton_destroy. | ||
84 | */ | ||
85 | struct REGEX_ITERNAL_Automaton * | ||
86 | REGEX_ITERNAL_construct_dfa (const char *regex, const size_t len, | ||
87 | unsigned int max_path_len); | ||
88 | |||
89 | |||
90 | /** | ||
91 | * Free the memory allocated by constructing the REGEX_ITERNAL_Automaton. | ||
92 | * data structure. | ||
93 | * | ||
94 | * @param a automaton to be destroyed. | ||
95 | */ | ||
96 | void | ||
97 | REGEX_ITERNAL_automaton_destroy (struct REGEX_ITERNAL_Automaton *a); | ||
98 | |||
99 | |||
100 | /** | ||
101 | * Options for graph creation function | ||
102 | * REGEX_ITERNAL_automaton_save_graph. | ||
103 | */ | ||
104 | enum REGEX_ITERNAL_GraphSavingOptions | ||
105 | { | ||
106 | /** | ||
107 | * Default. Do nothing special. | ||
108 | */ | ||
109 | REGEX_ITERNAL_GRAPH_DEFAULT = 0, | ||
110 | |||
111 | /** | ||
112 | * The generated graph will include extra information such as the NFA states | ||
113 | * that were used to generate the DFA state. | ||
114 | */ | ||
115 | REGEX_ITERNAL_GRAPH_VERBOSE = 1, | ||
116 | |||
117 | /** | ||
118 | * Enable graph coloring. Will color each SCC in a different color. | ||
119 | */ | ||
120 | REGEX_ITERNAL_GRAPH_COLORING = 2 | ||
121 | }; | ||
122 | |||
123 | |||
124 | /** | ||
125 | * Save the given automaton as a GraphViz dot file. | ||
126 | * | ||
127 | * @param a the automaton to be saved. | ||
128 | * @param filename where to save the file. | ||
129 | * @param options options for graph generation that include coloring or verbose | ||
130 | * mode | ||
131 | */ | ||
132 | void | ||
133 | REGEX_ITERNAL_automaton_save_graph (struct REGEX_ITERNAL_Automaton *a, | ||
134 | const char *filename, | ||
135 | enum REGEX_ITERNAL_GraphSavingOptions options); | ||
136 | |||
137 | |||
138 | /** | ||
139 | * Evaluates the given 'string' against the given compiled regex. | ||
140 | * | ||
141 | * @param a automaton. | ||
142 | * @param string string to check. | ||
143 | * | ||
144 | * @return 0 if string matches, non 0 otherwise. | ||
145 | */ | ||
146 | int | ||
147 | REGEX_ITERNAL_eval (struct REGEX_ITERNAL_Automaton *a, | ||
148 | const char *string); | ||
149 | |||
150 | |||
151 | /** | ||
152 | * Get the first key for the given 'input_string'. This hashes | ||
153 | * the first x bits of the 'input_string'. | ||
154 | * | ||
155 | * @param input_string string. | ||
156 | * @param string_len length of the 'input_string'. | ||
157 | * @param key pointer to where to write the hash code. | ||
158 | * | ||
159 | * @return number of bits of 'input_string' that have been consumed | ||
160 | * to construct the key | ||
161 | */ | ||
162 | size_t | ||
163 | REGEX_ITERNAL_get_first_key (const char *input_string, size_t string_len, | ||
164 | struct GNUNET_HashCode * key); | ||
165 | |||
166 | |||
167 | /** | ||
168 | * Check if the given 'proof' matches the given 'key'. | ||
169 | * | ||
170 | * @param proof partial regex of a state. | ||
171 | * @param key hash of a state. | ||
172 | * | ||
173 | * @return GNUNET_OK if the proof is valid for the given key. | ||
174 | */ | ||
175 | int | ||
176 | REGEX_ITERNAL_check_proof (const char *proof, | ||
177 | const struct GNUNET_HashCode *key); | ||
178 | |||
179 | |||
180 | /** | ||
181 | * Iterator callback function. | ||
182 | * | ||
183 | * @param cls closure. | ||
184 | * @param key hash for current state. | ||
185 | * @param proof proof for current state. | ||
186 | * @param accepting GNUNET_YES if this is an accepting state, GNUNET_NO if not. | ||
187 | * @param num_edges number of edges leaving current state. | ||
188 | * @param edges edges leaving current state. | ||
189 | */ | ||
190 | typedef void (*REGEX_ITERNAL_KeyIterator)(void *cls, | ||
191 | const struct GNUNET_HashCode *key, | ||
192 | const char *proof, | ||
193 | int accepting, | ||
194 | unsigned int num_edges, | ||
195 | const struct REGEX_ITERNAL_Edge *edges); | ||
196 | |||
197 | |||
198 | /** | ||
199 | * Iterate over all edges starting from start state of automaton 'a'. Calling | ||
200 | * iterator for each edge. | ||
201 | * | ||
202 | * @param a automaton. | ||
203 | * @param iterator iterator called for each edge. | ||
204 | * @param iterator_cls closure. | ||
205 | */ | ||
206 | void | ||
207 | REGEX_ITERNAL_iterate_all_edges (struct REGEX_ITERNAL_Automaton *a, | ||
208 | REGEX_ITERNAL_KeyIterator iterator, | ||
209 | void *iterator_cls); | ||
210 | |||
211 | |||
212 | |||
213 | /** | ||
214 | * Handle to store cached data about a regex announce. | ||
215 | */ | ||
216 | struct REGEX_ITERNAL_Announcement; | ||
217 | |||
218 | /** | ||
219 | * Handle to store data about a regex search. | ||
220 | */ | ||
221 | struct REGEX_ITERNAL_Search; | ||
222 | |||
223 | /** | ||
224 | * Announce a regular expression: put all states of the automaton in the DHT. | ||
225 | * Does not free resources, must call REGEX_ITERNAL_announce_cancel for that. | ||
226 | * | ||
227 | * @param dht An existing and valid DHT service handle. CANNOT be NULL. | ||
228 | * @param id ID to announce as provider of regex. Own ID in most cases. | ||
229 | * @param regex Regular expression to announce. | ||
230 | * @param compression How many characters per edge can we squeeze? | ||
231 | * @param stats Optional statistics handle to report usage. Can be NULL. | ||
232 | * | ||
233 | * @return Handle to reuse o free cached resources. | ||
234 | * Must be freed by calling REGEX_ITERNAL_announce_cancel. | ||
235 | */ | ||
236 | struct REGEX_ITERNAL_Announcement * | ||
237 | REGEX_ITERNAL_announce (struct GNUNET_DHT_Handle *dht, | ||
238 | const struct GNUNET_PeerIdentity *id, | ||
239 | const char *regex, | ||
240 | uint16_t compression, | ||
241 | struct GNUNET_STATISTICS_Handle *stats); | ||
242 | |||
243 | /** | ||
244 | * Announce again a regular expression previously announced. | ||
245 | * Does use caching to speed up process. | ||
246 | * | ||
247 | * @param h Handle returned by a previous REGEX_ITERNAL_announce call. | ||
248 | */ | ||
249 | void | ||
250 | REGEX_ITERNAL_reannounce (struct REGEX_ITERNAL_Announcement *h); | ||
251 | |||
252 | |||
253 | /** | ||
254 | * Clear all cached data used by a regex announce. | ||
255 | * Does not close DHT connection. | ||
256 | * | ||
257 | * @param h Handle returned by a previous REGEX_ITERNAL_announce call. | ||
258 | */ | ||
259 | void | ||
260 | REGEX_ITERNAL_announce_cancel (struct REGEX_ITERNAL_Announcement *h); | ||
261 | |||
262 | |||
263 | /** | ||
264 | * Search callback function. | ||
265 | * | ||
266 | * @param cls Closure provided in REGEX_ITERNAL_search. | ||
267 | * @param id Peer providing a regex that matches the string. | ||
268 | * @param get_path Path of the get request. | ||
269 | * @param get_path_length Lenght of get_path. | ||
270 | * @param put_path Path of the put request. | ||
271 | * @param put_path_length Length of the put_path. | ||
272 | */ | ||
273 | typedef void (*REGEX_ITERNAL_Found)(void *cls, | ||
274 | const struct GNUNET_PeerIdentity *id, | ||
275 | const struct GNUNET_PeerIdentity *get_path, | ||
276 | unsigned int get_path_length, | ||
277 | const struct GNUNET_PeerIdentity *put_path, | ||
278 | unsigned int put_path_length); | ||
279 | |||
280 | |||
281 | /** | ||
282 | * Search for a peer offering a regex matching certain string in the DHT. | ||
283 | * The search runs until REGEX_ITERNAL_search_cancel is called, even if results | ||
284 | * are returned. | ||
285 | * | ||
286 | * @param dht An existing and valid DHT service handle. | ||
287 | * @param string String to match against the regexes in the DHT. | ||
288 | * @param callback Callback for found peers. | ||
289 | * @param callback_cls Closure for @c callback. | ||
290 | * @param stats Optional statistics handle to report usage. Can be NULL. | ||
291 | * | ||
292 | * @return Handle to stop search and free resources. | ||
293 | * Must be freed by calling REGEX_ITERNAL_search_cancel. | ||
294 | */ | ||
295 | struct REGEX_ITERNAL_Search * | ||
296 | REGEX_ITERNAL_search (struct GNUNET_DHT_Handle *dht, | ||
297 | const char *string, | ||
298 | REGEX_ITERNAL_Found callback, | ||
299 | void *callback_cls, | ||
300 | struct GNUNET_STATISTICS_Handle *stats); | ||
301 | |||
302 | /** | ||
303 | * Stop search and free all data used by a REGEX_ITERNAL_search call. | ||
304 | * Does not close DHT connection. | ||
305 | * | ||
306 | * @param h Handle returned by a previous REGEX_ITERNAL_search call. | ||
307 | */ | ||
308 | void | ||
309 | REGEX_ITERNAL_search_cancel (struct REGEX_ITERNAL_Search *h); | ||
310 | |||
311 | |||
312 | #if 0 /* keep Emacsens' auto-indent happy */ | ||
313 | { | ||
314 | #endif | ||
315 | #ifdef __cplusplus | ||
316 | } | ||
317 | #endif | ||
318 | |||
319 | /* end of regex_internal_lib.h */ | ||
320 | #endif | ||