aboutsummaryrefslogtreecommitdiff
path: root/src/regex/regex_internal.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/regex/regex_internal.h')
-rw-r--r--src/regex/regex_internal.h453
1 files changed, 0 insertions, 453 deletions
diff --git a/src/regex/regex_internal.h b/src/regex/regex_internal.h
deleted file mode 100644
index d52479ffe..000000000
--- a/src/regex/regex_internal.h
+++ /dev/null
@@ -1,453 +0,0 @@
1/*
2 This file is part of GNUnet
3 Copyright (C) 2012 GNUnet e.V.
4
5 GNUnet is free software: you can redistribute it and/or modify it
6 under the terms of the GNU Affero General Public License as published
7 by the Free Software Foundation, either version 3 of the License,
8 or (at your option) any later version.
9
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Affero General Public License for more details.
14
15 You should have received a copy of the GNU Affero General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>.
17*/
18/**
19 * @file src/regex/regex_internal.h
20 * @brief common internal definitions for regex library.
21 * @author Maximilian Szengel
22 */
23#ifndef REGEX_INTERNAL_H
24#define REGEX_INTERNAL_H
25
26#include "regex_internal_lib.h"
27
28#ifdef __cplusplus
29extern "C"
30{
31#if 0 /* keep Emacsens' auto-indent happy */
32}
33#endif
34#endif
35
36/**
37 * char array of literals that are allowed inside a regex (apart from the
38 * operators)
39 */
40#define ALLOWED_LITERALS "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
41
42
43/**
44 * Transition between two states. Transitions are stored at the states from
45 * which they origin ('from_state'). Each state can have 0-n transitions.
46 * If label is NULL, this is considered to be an epsilon transition.
47 */
48struct REGEX_INTERNAL_Transition
49{
50 /**
51 * This is a linked list.
52 */
53 struct REGEX_INTERNAL_Transition *prev;
54
55 /**
56 * This is a linked list.
57 */
58 struct REGEX_INTERNAL_Transition *next;
59
60 /**
61 * Unique id of this transition.
62 */
63 unsigned int id;
64
65 /**
66 * Label for this transition. This is basically the edge label for the graph.
67 */
68 char *label;
69
70 /**
71 * State to which this transition leads.
72 */
73 struct REGEX_INTERNAL_State *to_state;
74
75 /**
76 * State from which this transition origins.
77 */
78 struct REGEX_INTERNAL_State *from_state;
79};
80
81
82/**
83 * A state. Can be used in DFA and NFA automatons.
84 */
85struct REGEX_INTERNAL_State;
86
87
88/**
89 * Set of states.
90 */
91struct REGEX_INTERNAL_StateSet
92{
93 /**
94 * Array of states.
95 */
96 struct REGEX_INTERNAL_State **states;
97
98 /**
99 * Number of entries in *use* in the 'states' array.
100 */
101 unsigned int off;
102
103 /**
104 * Length of the 'states' array.
105 */
106 unsigned int size;
107};
108
109
110/**
111 * A state. Can be used in DFA and NFA automatons.
112 */
113struct REGEX_INTERNAL_State
114{
115 /**
116 * This is a linked list to keep states in an automaton.
117 */
118 struct REGEX_INTERNAL_State *prev;
119
120 /**
121 * This is a linked list to keep states in an automaton.
122 */
123 struct REGEX_INTERNAL_State *next;
124
125 /**
126 * This is a multi DLL for StateSet_MDLL.
127 */
128 struct REGEX_INTERNAL_State *prev_SS;
129
130 /**
131 * This is a multi DLL for StateSet_MDLL.
132 */
133 struct REGEX_INTERNAL_State *next_SS;
134
135 /**
136 * This is a multi DLL for StateSet_MDLL Stack.
137 */
138 struct REGEX_INTERNAL_State *prev_ST;
139
140 /**
141 * This is a multi DLL for StateSet_MDLL Stack.
142 */
143 struct REGEX_INTERNAL_State *next_ST;
144
145 /**
146 * Unique state id.
147 */
148 unsigned int id;
149
150 /**
151 * Unique state id that is used for traversing the automaton. It is guaranteed
152 * to be > 0 and < state_count.
153 */
154 unsigned int traversal_id;
155
156 /**
157 * If this is an accepting state or not.
158 */
159 int accepting;
160
161 /**
162 * Marking of the state. This is used for marking all visited states when
163 * traversing all states of an automaton and for cases where the state id
164 * cannot be used (dfa minimization).
165 */
166 int marked;
167
168 /**
169 * Marking the state as contained. This is used for checking, if the state is
170 * contained in a set in constant time.
171 */
172 int contained;
173
174 /**
175 * Marking the state as part of an SCC (Strongly Connected Component). All
176 * states with the same scc_id are part of the same SCC. scc_id is 0, if state
177 * is not a part of any SCC.
178 */
179 unsigned int scc_id;
180
181 /**
182 * Used for SCC detection.
183 */
184 int index;
185
186 /**
187 * Used for SCC detection.
188 */
189 int lowlink;
190
191 /**
192 * Human readable name of the state. Used for debugging and graph
193 * creation.
194 */
195 char *name;
196
197 /**
198 * Hash of the state.
199 */
200 struct GNUNET_HashCode hash;
201
202 /**
203 * Linear state ID accquired by depth-first-search. This ID should be used for
204 * storing information about the state in an array, because the 'id' of the
205 * state is not guaranteed to be linear. The 'dfs_id' is guaranteed to be > 0
206 * and < 'state_count'.
207 */
208 unsigned int dfs_id;
209
210 /**
211 * Proof for this state.
212 */
213 char *proof;
214
215 /**
216 * Number of transitions from this state to other states.
217 */
218 unsigned int transition_count;
219
220 /**
221 * DLL of transitions.
222 */
223 struct REGEX_INTERNAL_Transition *transitions_head;
224
225 /**
226 * DLL of transitions.
227 */
228 struct REGEX_INTERNAL_Transition *transitions_tail;
229
230 /**
231 * Number of incoming transitions. Used for compressing DFA paths.
232 */
233 unsigned int incoming_transition_count;
234
235 /**
236 * Set of states on which this state is based on. Used when creating a DFA out
237 * of several NFA states.
238 */
239 struct REGEX_INTERNAL_StateSet nfa_set;
240};
241
242
243/**
244 * Type of an automaton.
245 */
246enum REGEX_INTERNAL_AutomatonType
247{
248 NFA,
249 DFA
250};
251
252
253/**
254 * Automaton representation.
255 */
256struct REGEX_INTERNAL_Automaton
257{
258 /**
259 * Linked list of NFAs used for partial NFA creation.
260 */
261 struct REGEX_INTERNAL_Automaton *prev;
262
263 /**
264 * Linked list of NFAs used for partial NFA creation.
265 */
266 struct REGEX_INTERNAL_Automaton *next;
267
268 /**
269 * First state of the automaton. This is mainly used for constructing an NFA,
270 * where each NFA itself consists of one or more NFAs linked together.
271 */
272 struct REGEX_INTERNAL_State *start;
273
274 /**
275 * End state of the partial NFA. This is undefined for DFAs
276 */
277 struct REGEX_INTERNAL_State *end;
278
279 /**
280 * Number of states in the automaton.
281 */
282 unsigned int state_count;
283
284 /**
285 * DLL of states.
286 */
287 struct REGEX_INTERNAL_State *states_head;
288
289 /**
290 * DLL of states
291 */
292 struct REGEX_INTERNAL_State *states_tail;
293
294 /**
295 * Type of the automaton.
296 */
297 enum REGEX_INTERNAL_AutomatonType type;
298
299 /**
300 * Regex
301 */
302 char *regex;
303
304 /**
305 * Canonical regex (result of RX->NFA->DFA->RX)
306 */
307 char *canonical_regex;
308
309 /**
310 * GNUNET_YES, if multi strides have been added to the Automaton.
311 */
312 int is_multistrided;
313};
314
315
316/**
317 * Construct an NFA by parsing the regex string of length 'len'.
318 *
319 * @param regex regular expression string.
320 * @param len length of the string.
321 *
322 * @return NFA, needs to be freed using REGEX_INTERNAL_automaton_destroy.
323 */
324struct REGEX_INTERNAL_Automaton *
325REGEX_INTERNAL_construct_nfa (const char *regex, const size_t len);
326
327
328/**
329 * Function that get's passed to automaton traversal and is called before each
330 * next traversal from state 's' using transition 't' to check if traversal
331 * should proceed. Return GNUNET_NO to stop traversal or GNUNET_YES to continue.
332 *
333 * @param cls closure for the check.
334 * @param s current state in the traversal.
335 * @param t current transition from state 's' that will be used for the next
336 * step.
337 *
338 * @return GNUNET_YES to proceed traversal, GNUNET_NO to stop.
339 */
340typedef int (*REGEX_INTERNAL_traverse_check) (void *cls,
341 struct REGEX_INTERNAL_State * s,
342 struct REGEX_INTERNAL_Transition * t);
343
344
345/**
346 * Function that is called with each state, when traversing an automaton.
347 *
348 * @param cls closure.
349 * @param count current count of the state, from 0 to a->state_count -1.
350 * @param s state.
351 */
352typedef void (*REGEX_INTERNAL_traverse_action) (void *cls,
353 const unsigned int count,
354 struct REGEX_INTERNAL_State * s);
355
356
357/**
358 * Traverses the given automaton using depth-first-search (DFS) from it's start
359 * state, visiting all reachable states and calling 'action' on each one of
360 * them.
361 *
362 * @param a automaton to be traversed.
363 * @param start start state, pass a->start or NULL to traverse the whole automaton.
364 * @param check function that is checked before advancing on each transition
365 * in the DFS.
366 * @param check_cls closure for check.
367 * @param action action to be performed on each state.
368 * @param action_cls closure for action
369 */
370void
371REGEX_INTERNAL_automaton_traverse (const struct REGEX_INTERNAL_Automaton *a,
372 struct REGEX_INTERNAL_State *start,
373 REGEX_INTERNAL_traverse_check check,
374 void *check_cls,
375 REGEX_INTERNAL_traverse_action action,
376 void *action_cls);
377
378/**
379 * Get the canonical regex of the given automaton.
380 * When constructing the automaton a proof is computed for each state,
381 * consisting of the regular expression leading to this state. A complete
382 * regex for the automaton can be computed by combining these proofs.
383 * As of now this function is only useful for testing.
384 *
385 * @param a automaton for which the canonical regex should be returned.
386 *
387 * @return canonical regex string.
388 */
389const char *
390REGEX_INTERNAL_get_canonical_regex (struct REGEX_INTERNAL_Automaton *a);
391
392
393/**
394 * Get the number of transitions that are contained in the given automaton.
395 *
396 * @param a automaton for which the number of transitions should be returned.
397 *
398 * @return number of transitions in the given automaton.
399 */
400unsigned int
401REGEX_INTERNAL_get_transition_count (struct REGEX_INTERNAL_Automaton *a);
402
403
404/**
405 * Context that contains an id counter for states and transitions as well as a
406 * DLL of automatons used as a stack for NFA construction.
407 */
408struct REGEX_INTERNAL_Context
409{
410 /**
411 * Unique state id.
412 */
413 unsigned int state_id;
414
415 /**
416 * Unique transition id.
417 */
418 unsigned int transition_id;
419
420 /**
421 * DLL of REGEX_INTERNAL_Automaton's used as a stack.
422 */
423 struct REGEX_INTERNAL_Automaton *stack_head;
424
425 /**
426 * DLL of REGEX_INTERNAL_Automaton's used as a stack.
427 */
428 struct REGEX_INTERNAL_Automaton *stack_tail;
429};
430
431
432/**
433 * Adds multi-strided transitions to the given 'dfa'.
434 *
435 * @param regex_ctx regex context needed to add transitions to the automaton.
436 * @param dfa DFA to which the multi strided transitions should be added.
437 * @param stride_len length of the strides.
438 */
439void
440REGEX_INTERNAL_dfa_add_multi_strides (struct REGEX_INTERNAL_Context *regex_ctx,
441 struct REGEX_INTERNAL_Automaton *dfa,
442 const unsigned int stride_len);
443
444
445
446#if 0 /* keep Emacsens' auto-indent happy */
447{
448#endif
449#ifdef __cplusplus
450}
451#endif
452
453#endif