diff options
author | Maximilian Szengel <gnunet@maxsz.de> | 2012-07-04 13:54:43 +0000 |
---|---|---|
committer | Maximilian Szengel <gnunet@maxsz.de> | 2012-07-04 13:54:43 +0000 |
commit | bd0e2cd49ae9ae916f6a4288ac8893128b8168d6 (patch) | |
tree | c680e35f3d2bb3cffb114808bd19f2299885af50 /src/regex/regex_internal.h | |
parent | a93954693fef730d7a41c168b4961d19e5dff90c (diff) | |
download | gnunet-bd0e2cd49ae9ae916f6a4288ac8893128b8168d6.tar.gz gnunet-bd0e2cd49ae9ae916f6a4288ac8893128b8168d6.zip |
Summary: regex cleanup and bugfixes
Author: szengel
Diffstat (limited to 'src/regex/regex_internal.h')
-rw-r--r-- | src/regex/regex_internal.h | 235 |
1 files changed, 235 insertions, 0 deletions
diff --git a/src/regex/regex_internal.h b/src/regex/regex_internal.h index 8ea597d40..d1e829633 100644 --- a/src/regex/regex_internal.h +++ b/src/regex/regex_internal.h | |||
@@ -43,6 +43,241 @@ extern "C" | |||
43 | 43 | ||
44 | 44 | ||
45 | /** | 45 | /** |
46 | * Transition between two states. Each state can have 0-n transitions. If label | ||
47 | * is 0, this is considered to be an epsilon transition. | ||
48 | */ | ||
49 | struct GNUNET_REGEX_Transition | ||
50 | { | ||
51 | /** | ||
52 | * This is a linked list. | ||
53 | */ | ||
54 | struct GNUNET_REGEX_Transition *prev; | ||
55 | |||
56 | /** | ||
57 | * This is a linked list. | ||
58 | */ | ||
59 | struct GNUNET_REGEX_Transition *next; | ||
60 | |||
61 | /** | ||
62 | * Unique id of this transition. | ||
63 | */ | ||
64 | unsigned int id; | ||
65 | |||
66 | /** | ||
67 | * Label for this transition. This is basically the edge label for the graph. | ||
68 | */ | ||
69 | char label; | ||
70 | |||
71 | /** | ||
72 | * State to which this transition leads. | ||
73 | */ | ||
74 | struct GNUNET_REGEX_State *to_state; | ||
75 | |||
76 | /** | ||
77 | * State from which this transition origins. | ||
78 | */ | ||
79 | struct GNUNET_REGEX_State *from_state; | ||
80 | |||
81 | /** | ||
82 | * Mark this transition. For example when reversing the automaton. | ||
83 | */ | ||
84 | int mark; | ||
85 | }; | ||
86 | |||
87 | |||
88 | /** | ||
89 | * A state. Can be used in DFA and NFA automatons. | ||
90 | */ | ||
91 | struct GNUNET_REGEX_State | ||
92 | { | ||
93 | /** | ||
94 | * This is a linked list. | ||
95 | */ | ||
96 | struct GNUNET_REGEX_State *prev; | ||
97 | |||
98 | /** | ||
99 | * This is a linked list. | ||
100 | */ | ||
101 | struct GNUNET_REGEX_State *next; | ||
102 | |||
103 | /** | ||
104 | * Unique state id. | ||
105 | */ | ||
106 | unsigned int id; | ||
107 | |||
108 | /** | ||
109 | * If this is an accepting state or not. | ||
110 | */ | ||
111 | int accepting; | ||
112 | |||
113 | /** | ||
114 | * Marking of the state. This is used for marking all visited states when | ||
115 | * traversing all states of an automaton and for cases where the state id | ||
116 | * cannot be used (dfa minimization). | ||
117 | */ | ||
118 | int marked; | ||
119 | |||
120 | /** | ||
121 | * Marking the state as contained. This is used for checking, if the state is | ||
122 | * contained in a set in constant time | ||
123 | */ | ||
124 | int contained; | ||
125 | |||
126 | /** | ||
127 | * Marking the state as part of an SCC (Strongly Connected Component). All | ||
128 | * states with the same scc_id are part of the same SCC. scc_id is 0, if state | ||
129 | * is not a part of any SCC. | ||
130 | */ | ||
131 | unsigned int scc_id; | ||
132 | |||
133 | /** | ||
134 | * Used for SCC detection. | ||
135 | */ | ||
136 | int index; | ||
137 | |||
138 | /** | ||
139 | * Used for SCC detection. | ||
140 | */ | ||
141 | int lowlink; | ||
142 | |||
143 | /** | ||
144 | * Human readable name of the automaton. Used for debugging and graph | ||
145 | * creation. | ||
146 | */ | ||
147 | char *name; | ||
148 | |||
149 | /** | ||
150 | * Hash of the state. | ||
151 | */ | ||
152 | struct GNUNET_HashCode hash; | ||
153 | |||
154 | /** | ||
155 | * State ID for proof creation. | ||
156 | */ | ||
157 | unsigned int proof_id; | ||
158 | |||
159 | /** | ||
160 | * Proof for this state. | ||
161 | */ | ||
162 | char *proof; | ||
163 | |||
164 | /** | ||
165 | * Number of transitions from this state to other states. | ||
166 | */ | ||
167 | unsigned int transition_count; | ||
168 | |||
169 | /** | ||
170 | * DLL of transitions. | ||
171 | */ | ||
172 | struct GNUNET_REGEX_Transition *transitions_head; | ||
173 | |||
174 | /** | ||
175 | * DLL of transitions. | ||
176 | */ | ||
177 | struct GNUNET_REGEX_Transition *transitions_tail; | ||
178 | |||
179 | /** | ||
180 | * Set of states on which this state is based on. Used when creating a DFA out | ||
181 | * of several NFA states. | ||
182 | */ | ||
183 | struct GNUNET_REGEX_StateSet *nfa_set; | ||
184 | }; | ||
185 | |||
186 | |||
187 | /** | ||
188 | * Type of an automaton. | ||
189 | */ | ||
190 | enum GNUNET_REGEX_AutomatonType | ||
191 | { | ||
192 | NFA, | ||
193 | DFA | ||
194 | }; | ||
195 | |||
196 | |||
197 | /** | ||
198 | * Automaton representation. | ||
199 | */ | ||
200 | struct GNUNET_REGEX_Automaton | ||
201 | { | ||
202 | /** | ||
203 | * Linked list of NFAs used for partial NFA creation. | ||
204 | */ | ||
205 | struct GNUNET_REGEX_Automaton *prev; | ||
206 | |||
207 | /** | ||
208 | * Linked list of NFAs used for partial NFA creation. | ||
209 | */ | ||
210 | struct GNUNET_REGEX_Automaton *next; | ||
211 | |||
212 | /** | ||
213 | * First state of the automaton. This is mainly used for constructing an NFA, | ||
214 | * where each NFA itself consists of one or more NFAs linked together. | ||
215 | */ | ||
216 | struct GNUNET_REGEX_State *start; | ||
217 | |||
218 | /** | ||
219 | * End state of the partial NFA. This is undefined for DFAs | ||
220 | */ | ||
221 | struct GNUNET_REGEX_State *end; | ||
222 | |||
223 | /** | ||
224 | * Number of states in the automaton. | ||
225 | */ | ||
226 | unsigned int state_count; | ||
227 | |||
228 | /** | ||
229 | * DLL of states. | ||
230 | */ | ||
231 | struct GNUNET_REGEX_State *states_head; | ||
232 | |||
233 | /** | ||
234 | * DLL of states | ||
235 | */ | ||
236 | struct GNUNET_REGEX_State *states_tail; | ||
237 | |||
238 | /** | ||
239 | * Type of the automaton. | ||
240 | */ | ||
241 | enum GNUNET_REGEX_AutomatonType type; | ||
242 | |||
243 | /** | ||
244 | * Regex | ||
245 | */ | ||
246 | char *regex; | ||
247 | |||
248 | /** | ||
249 | * Canonical regex (result of RX->NFA->DFA->RX) | ||
250 | */ | ||
251 | char *canonical_regex; | ||
252 | }; | ||
253 | |||
254 | |||
255 | /** | ||
256 | * Function that is called with each state, when traversing an automaton. | ||
257 | * | ||
258 | * @param cls closure. | ||
259 | * @param count current count of the state, from 0 to a->state_count -1. | ||
260 | * @param s state. | ||
261 | */ | ||
262 | typedef void (*GNUNET_REGEX_traverse_action) (void *cls, unsigned int count, | ||
263 | struct GNUNET_REGEX_State * s); | ||
264 | |||
265 | |||
266 | /** | ||
267 | * Traverses the given automaton from it's start state, visiting all reachable | ||
268 | * states and calling 'action' on each one of them. | ||
269 | * | ||
270 | * @param a automaton. | ||
271 | * @param action action to be performed on each state. | ||
272 | * @param action_cls closure for action | ||
273 | */ | ||
274 | void | ||
275 | GNUNET_REGEX_automaton_traverse (struct GNUNET_REGEX_Automaton *a, | ||
276 | GNUNET_REGEX_traverse_action action, | ||
277 | void *action_cls); | ||
278 | |||
279 | |||
280 | /** | ||
46 | * Get the canonical regex of the given automaton. | 281 | * Get the canonical regex of the given automaton. |
47 | * When constructing the automaton a proof is computed for each state, | 282 | * When constructing the automaton a proof is computed for each state, |
48 | * consisting of the regular expression leading to this state. A complete | 283 | * consisting of the regular expression leading to this state. A complete |