aboutsummaryrefslogtreecommitdiff
path: root/src/regex/regex_internal.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/regex/regex_internal.h')
-rw-r--r--src/regex/regex_internal.h456
1 files changed, 0 insertions, 456 deletions
diff --git a/src/regex/regex_internal.h b/src/regex/regex_internal.h
deleted file mode 100644
index 8f29cff33..000000000
--- a/src/regex/regex_internal.h
+++ /dev/null
@@ -1,456 +0,0 @@
1/*
2 This file is part of GNUnet
3 Copyright (C) 2012 GNUnet e.V.
4
5 GNUnet is free software: you can redistribute it and/or modify it
6 under the terms of the GNU Affero General Public License as published
7 by the Free Software Foundation, either version 3 of the License,
8 or (at your option) any later version.
9
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Affero General Public License for more details.
14
15 You should have received a copy of the GNU Affero General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>.
17
18 SPDX-License-Identifier: AGPL3.0-or-later
19 */
20/**
21 * @file src/regex/regex_internal.h
22 * @brief common internal definitions for regex library.
23 * @author Maximilian Szengel
24 */
25#ifndef REGEX_INTERNAL_H
26#define REGEX_INTERNAL_H
27
28#include "regex_internal_lib.h"
29
30#ifdef __cplusplus
31extern "C"
32{
33#if 0 /* keep Emacsens' auto-indent happy */
34}
35#endif
36#endif
37
38/**
39 * char array of literals that are allowed inside a regex (apart from the
40 * operators)
41 */
42#define ALLOWED_LITERALS \
43 "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
44
45
46/**
47 * Transition between two states. Transitions are stored at the states from
48 * which they origin ('from_state'). Each state can have 0-n transitions.
49 * If label is NULL, this is considered to be an epsilon transition.
50 */
51struct REGEX_INTERNAL_Transition
52{
53 /**
54 * This is a linked list.
55 */
56 struct REGEX_INTERNAL_Transition *prev;
57
58 /**
59 * This is a linked list.
60 */
61 struct REGEX_INTERNAL_Transition *next;
62
63 /**
64 * Unique id of this transition.
65 */
66 unsigned int id;
67
68 /**
69 * Label for this transition. This is basically the edge label for the graph.
70 */
71 char *label;
72
73 /**
74 * State to which this transition leads.
75 */
76 struct REGEX_INTERNAL_State *to_state;
77
78 /**
79 * State from which this transition origins.
80 */
81 struct REGEX_INTERNAL_State *from_state;
82};
83
84
85/**
86 * A state. Can be used in DFA and NFA automatons.
87 */
88struct REGEX_INTERNAL_State;
89
90
91/**
92 * Set of states.
93 */
94struct REGEX_INTERNAL_StateSet
95{
96 /**
97 * Array of states.
98 */
99 struct REGEX_INTERNAL_State **states;
100
101 /**
102 * Number of entries in *use* in the 'states' array.
103 */
104 unsigned int off;
105
106 /**
107 * Length of the 'states' array.
108 */
109 unsigned int size;
110};
111
112
113/**
114 * A state. Can be used in DFA and NFA automatons.
115 */
116struct REGEX_INTERNAL_State
117{
118 /**
119 * This is a linked list to keep states in an automaton.
120 */
121 struct REGEX_INTERNAL_State *prev;
122
123 /**
124 * This is a linked list to keep states in an automaton.
125 */
126 struct REGEX_INTERNAL_State *next;
127
128 /**
129 * This is a multi DLL for StateSet_MDLL.
130 */
131 struct REGEX_INTERNAL_State *prev_SS;
132
133 /**
134 * This is a multi DLL for StateSet_MDLL.
135 */
136 struct REGEX_INTERNAL_State *next_SS;
137
138 /**
139 * This is a multi DLL for StateSet_MDLL Stack.
140 */
141 struct REGEX_INTERNAL_State *prev_ST;
142
143 /**
144 * This is a multi DLL for StateSet_MDLL Stack.
145 */
146 struct REGEX_INTERNAL_State *next_ST;
147
148 /**
149 * Unique state id.
150 */
151 unsigned int id;
152
153 /**
154 * Unique state id that is used for traversing the automaton. It is guaranteed
155 * to be > 0 and < state_count.
156 */
157 unsigned int traversal_id;
158
159 /**
160 * If this is an accepting state or not.
161 */
162 int accepting;
163
164 /**
165 * Marking of the state. This is used for marking all visited states when
166 * traversing all states of an automaton and for cases where the state id
167 * cannot be used (dfa minimization).
168 */
169 int marked;
170
171 /**
172 * Marking the state as contained. This is used for checking, if the state is
173 * contained in a set in constant time.
174 */
175 int contained;
176
177 /**
178 * Marking the state as part of an SCC (Strongly Connected Component). All
179 * states with the same scc_id are part of the same SCC. scc_id is 0, if state
180 * is not a part of any SCC.
181 */
182 unsigned int scc_id;
183
184 /**
185 * Used for SCC detection.
186 */
187 int index;
188
189 /**
190 * Used for SCC detection.
191 */
192 int lowlink;
193
194 /**
195 * Human readable name of the state. Used for debugging and graph
196 * creation.
197 */
198 char *name;
199
200 /**
201 * Hash of the state.
202 */
203 struct GNUNET_HashCode hash;
204
205 /**
206 * Linear state ID acquired by depth-first-search. This ID should be used for
207 * storing information about the state in an array, because the 'id' of the
208 * state is not guaranteed to be linear. The 'dfs_id' is guaranteed to be > 0
209 * and < 'state_count'.
210 */
211 unsigned int dfs_id;
212
213 /**
214 * Proof for this state.
215 */
216 char *proof;
217
218 /**
219 * Number of transitions from this state to other states.
220 */
221 unsigned int transition_count;
222
223 /**
224 * DLL of transitions.
225 */
226 struct REGEX_INTERNAL_Transition *transitions_head;
227
228 /**
229 * DLL of transitions.
230 */
231 struct REGEX_INTERNAL_Transition *transitions_tail;
232
233 /**
234 * Number of incoming transitions. Used for compressing DFA paths.
235 */
236 unsigned int incoming_transition_count;
237
238 /**
239 * Set of states on which this state is based on. Used when creating a DFA out
240 * of several NFA states.
241 */
242 struct REGEX_INTERNAL_StateSet nfa_set;
243};
244
245
246/**
247 * Type of an automaton.
248 */
249enum REGEX_INTERNAL_AutomatonType
250{
251 NFA,
252 DFA
253};
254
255
256/**
257 * Automaton representation.
258 */
259struct REGEX_INTERNAL_Automaton
260{
261 /**
262 * Linked list of NFAs used for partial NFA creation.
263 */
264 struct REGEX_INTERNAL_Automaton *prev;
265
266 /**
267 * Linked list of NFAs used for partial NFA creation.
268 */
269 struct REGEX_INTERNAL_Automaton *next;
270
271 /**
272 * First state of the automaton. This is mainly used for constructing an NFA,
273 * where each NFA itself consists of one or more NFAs linked together.
274 */
275 struct REGEX_INTERNAL_State *start;
276
277 /**
278 * End state of the partial NFA. This is undefined for DFAs
279 */
280 struct REGEX_INTERNAL_State *end;
281
282 /**
283 * Number of states in the automaton.
284 */
285 unsigned int state_count;
286
287 /**
288 * DLL of states.
289 */
290 struct REGEX_INTERNAL_State *states_head;
291
292 /**
293 * DLL of states
294 */
295 struct REGEX_INTERNAL_State *states_tail;
296
297 /**
298 * Type of the automaton.
299 */
300 enum REGEX_INTERNAL_AutomatonType type;
301
302 /**
303 * Regex
304 */
305 char *regex;
306
307 /**
308 * Canonical regex (result of RX->NFA->DFA->RX)
309 */
310 char *canonical_regex;
311
312 /**
313 * GNUNET_YES, if multi strides have been added to the Automaton.
314 */
315 int is_multistrided;
316};
317
318
319/**
320 * Construct an NFA by parsing the regex string of length 'len'.
321 *
322 * @param regex regular expression string.
323 * @param len length of the string.
324 *
325 * @return NFA, needs to be freed using REGEX_INTERNAL_automaton_destroy.
326 */
327struct REGEX_INTERNAL_Automaton *
328REGEX_INTERNAL_construct_nfa (const char *regex, const size_t len);
329
330
331/**
332 * Function that gets passed to automaton traversal and is called before each
333 * next traversal from state 's' using transition 't' to check if traversal
334 * should proceed. Return GNUNET_NO to stop traversal or GNUNET_YES to continue.
335 *
336 * @param cls closure for the check.
337 * @param s current state in the traversal.
338 * @param t current transition from state 's' that will be used for the next
339 * step.
340 *
341 * @return GNUNET_YES to proceed traversal, GNUNET_NO to stop.
342 */
343typedef int (*REGEX_INTERNAL_traverse_check) (void *cls,
344 struct REGEX_INTERNAL_State *s,
345 struct REGEX_INTERNAL_Transition *
346 t);
347
348
349/**
350 * Function that is called with each state, when traversing an automaton.
351 *
352 * @param cls closure.
353 * @param count current count of the state, from 0 to a->state_count -1.
354 * @param s state.
355 */
356typedef void (*REGEX_INTERNAL_traverse_action) (void *cls,
357 const unsigned int count,
358 struct REGEX_INTERNAL_State *s);
359
360
361/**
362 * Traverses the given automaton using depth-first-search (DFS) from it's start
363 * state, visiting all reachable states and calling 'action' on each one of
364 * them.
365 *
366 * @param a automaton to be traversed.
367 * @param start start state, pass a->start or NULL to traverse the whole automaton.
368 * @param check function that is checked before advancing on each transition
369 * in the DFS.
370 * @param check_cls closure for check.
371 * @param action action to be performed on each state.
372 * @param action_cls closure for action
373 */
374void
375REGEX_INTERNAL_automaton_traverse (const struct REGEX_INTERNAL_Automaton *a,
376 struct REGEX_INTERNAL_State *start,
377 REGEX_INTERNAL_traverse_check check,
378 void *check_cls,
379 REGEX_INTERNAL_traverse_action action,
380 void *action_cls);
381
382/**
383 * Get the canonical regex of the given automaton.
384 * When constructing the automaton a proof is computed for each state,
385 * consisting of the regular expression leading to this state. A complete
386 * regex for the automaton can be computed by combining these proofs.
387 * As of now this function is only useful for testing.
388 *
389 * @param a automaton for which the canonical regex should be returned.
390 *
391 * @return canonical regex string.
392 */
393const char *
394REGEX_INTERNAL_get_canonical_regex (struct REGEX_INTERNAL_Automaton *a);
395
396
397/**
398 * Get the number of transitions that are contained in the given automaton.
399 *
400 * @param a automaton for which the number of transitions should be returned.
401 *
402 * @return number of transitions in the given automaton.
403 */
404unsigned int
405REGEX_INTERNAL_get_transition_count (struct REGEX_INTERNAL_Automaton *a);
406
407
408/**
409 * Context that contains an id counter for states and transitions as well as a
410 * DLL of automatons used as a stack for NFA construction.
411 */
412struct REGEX_INTERNAL_Context
413{
414 /**
415 * Unique state id.
416 */
417 unsigned int state_id;
418
419 /**
420 * Unique transition id.
421 */
422 unsigned int transition_id;
423
424 /**
425 * DLL of REGEX_INTERNAL_Automaton's used as a stack.
426 */
427 struct REGEX_INTERNAL_Automaton *stack_head;
428
429 /**
430 * DLL of REGEX_INTERNAL_Automaton's used as a stack.
431 */
432 struct REGEX_INTERNAL_Automaton *stack_tail;
433};
434
435
436/**
437 * Adds multi-strided transitions to the given 'dfa'.
438 *
439 * @param regex_ctx regex context needed to add transitions to the automaton.
440 * @param dfa DFA to which the multi strided transitions should be added.
441 * @param stride_len length of the strides.
442 */
443void
444REGEX_INTERNAL_dfa_add_multi_strides (struct REGEX_INTERNAL_Context *regex_ctx,
445 struct REGEX_INTERNAL_Automaton *dfa,
446 const unsigned int stride_len);
447
448
449#if 0 /* keep Emacsens' auto-indent happy */
450{
451#endif
452#ifdef __cplusplus
453}
454#endif
455
456#endif