diff options
author | Bart Polot <bart@net.in.tum.de> | 2013-05-27 23:35:17 +0000 |
---|---|---|
committer | Bart Polot <bart@net.in.tum.de> | 2013-05-27 23:35:17 +0000 |
commit | 08561388cf181dc3eed416a0aeb85c28c9f4cc0b (patch) | |
tree | b9badb121085713e1e23165681ef496cdfbec52a /src/regex/regex_test_lib.c | |
parent | 4ef9b0c5376fd8bd257fd2e1e5baabfc9e05a521 (diff) | |
download | gnunet-08561388cf181dc3eed416a0aeb85c28c9f4cc0b.tar.gz gnunet-08561388cf181dc3eed416a0aeb85c28c9f4cc0b.zip |
Fix regexes with accepting common prefix, optimize regex prefix lengths
Diffstat (limited to 'src/regex/regex_test_lib.c')
-rw-r--r-- | src/regex/regex_test_lib.c | 185 |
1 files changed, 132 insertions, 53 deletions
diff --git a/src/regex/regex_test_lib.c b/src/regex/regex_test_lib.c index b4c7278c0..df873c112 100644 --- a/src/regex/regex_test_lib.c +++ b/src/regex/regex_test_lib.c | |||
@@ -60,29 +60,30 @@ struct RegexCombineCtx { | |||
60 | char *s; | 60 | char *s; |
61 | }; | 61 | }; |
62 | 62 | ||
63 | // static void | 63 | /* |
64 | // space (int n) | 64 | static void |
65 | // { | 65 | space (int n) |
66 | // int i; | 66 | { |
67 | // for (i = 0; i < n; i++) | 67 | int i; |
68 | // printf (" "); | 68 | for (i = 0; i < n; i++) |
69 | // } | 69 | printf (" "); |
70 | // | 70 | } |
71 | // static void | ||
72 | // debugctx (struct RegexCombineCtx *ctx, int level) | ||
73 | // { | ||
74 | // struct RegexCombineCtx *p; | ||
75 | // space (level); | ||
76 | // if (NULL != ctx->s) | ||
77 | // printf ("'%s'\n", ctx->s); | ||
78 | // else | ||
79 | // printf ("NULL\n"); | ||
80 | // for (p = ctx->head; NULL != p; p = p->next) | ||
81 | // { | ||
82 | // debugctx (p, level + 1); | ||
83 | // } | ||
84 | // } | ||
85 | 71 | ||
72 | static void | ||
73 | debugctx (struct RegexCombineCtx *ctx, int level) | ||
74 | { | ||
75 | struct RegexCombineCtx *p; | ||
76 | space (level); | ||
77 | if (NULL != ctx->s) | ||
78 | printf ("'%s'\n", ctx->s); | ||
79 | else | ||
80 | printf ("NULL\n"); | ||
81 | for (p = ctx->head; NULL != p; p = p->next) | ||
82 | { | ||
83 | debugctx (p, level + 1); | ||
84 | } | ||
85 | } | ||
86 | */ | ||
86 | 87 | ||
87 | /** | 88 | /** |
88 | * Extract a string from all prefix-combined regexes. | 89 | * Extract a string from all prefix-combined regexes. |
@@ -110,7 +111,9 @@ regex_combine (struct RegexCombineCtx *ctx) | |||
110 | s = regex_combine (p); | 111 | s = regex_combine (p); |
111 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " total '%s'\n", s); | 112 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " total '%s'\n", s); |
112 | if (strlen(s) == 0) | 113 | if (strlen(s) == 0) |
114 | { | ||
113 | opt = GNUNET_YES; | 115 | opt = GNUNET_YES; |
116 | } | ||
114 | else | 117 | else |
115 | { | 118 | { |
116 | GNUNET_asprintf (&tmp, "%s%s|", regex, s); | 119 | GNUNET_asprintf (&tmp, "%s%s|", regex, s); |
@@ -136,7 +139,7 @@ regex_combine (struct RegexCombineCtx *ctx) | |||
136 | if (NULL != ctx->s) | 139 | if (NULL != ctx->s) |
137 | { | 140 | { |
138 | if (opt) | 141 | if (opt) |
139 | GNUNET_asprintf (&s, "%s[%s]", ctx->s, regex); | 142 | GNUNET_asprintf (&s, "%s(%s)?", ctx->s, regex); |
140 | else | 143 | else |
141 | GNUNET_asprintf (&s, "%s(%s)", ctx->s, regex); | 144 | GNUNET_asprintf (&s, "%s(%s)", ctx->s, regex); |
142 | GNUNET_free (regex); | 145 | GNUNET_free (regex); |
@@ -149,6 +152,68 @@ regex_combine (struct RegexCombineCtx *ctx) | |||
149 | 152 | ||
150 | 153 | ||
151 | /** | 154 | /** |
155 | * Get the number of matching characters on the prefix of both strings. | ||
156 | * | ||
157 | * @param s1 String 1. | ||
158 | * @param s2 String 2. | ||
159 | * | ||
160 | * @return Number of characters of matching prefix. | ||
161 | */ | ||
162 | static unsigned int | ||
163 | get_prefix_length (const char *s1, const char *s2) | ||
164 | { | ||
165 | unsigned int l1; | ||
166 | unsigned int l2; | ||
167 | unsigned int limit; | ||
168 | unsigned int i; | ||
169 | |||
170 | l1 = strlen (s1); | ||
171 | l2 = strlen (s2); | ||
172 | limit = l1 > l2 ? l2 : l1; | ||
173 | |||
174 | for (i = 1; i <= limit; i++) | ||
175 | { | ||
176 | if (0 != strncmp (s1, s2, i)) | ||
177 | return i - 1; | ||
178 | } | ||
179 | return limit; | ||
180 | } | ||
181 | |||
182 | |||
183 | /** | ||
184 | * Return the child context with the longest prefix match with the regex. | ||
185 | * Usually only one child will match, search all just in case. | ||
186 | * | ||
187 | * @param ctx Context whose children to search. | ||
188 | * @param regex String to match. | ||
189 | * | ||
190 | * @return Child with the longest prefix, NULL if no child matches. | ||
191 | */ | ||
192 | static struct RegexCombineCtx * | ||
193 | get_longest_prefix (struct RegexCombineCtx *ctx, const char *regex) | ||
194 | { | ||
195 | struct RegexCombineCtx *p; | ||
196 | struct RegexCombineCtx *best; | ||
197 | unsigned int l; | ||
198 | unsigned int best_l; | ||
199 | |||
200 | best_l = 0; | ||
201 | best = NULL; | ||
202 | for (p = ctx->head; NULL != p; p = p->next) | ||
203 | { | ||
204 | l = get_prefix_length (p->s, regex); | ||
205 | if (l > best_l) | ||
206 | { | ||
207 | GNUNET_break (0 == best_l); | ||
208 | best = p; | ||
209 | best_l = l; | ||
210 | } | ||
211 | } | ||
212 | return best; | ||
213 | } | ||
214 | |||
215 | |||
216 | /** | ||
152 | * Add a single regex to a context, combining with exisiting regex by-prefix. | 217 | * Add a single regex to a context, combining with exisiting regex by-prefix. |
153 | * | 218 | * |
154 | * @param ctx Context with 0 or more regexes. | 219 | * @param ctx Context with 0 or more regexes. |
@@ -158,42 +223,55 @@ static void | |||
158 | regex_add (struct RegexCombineCtx *ctx, const char *regex) | 223 | regex_add (struct RegexCombineCtx *ctx, const char *regex) |
159 | { | 224 | { |
160 | struct RegexCombineCtx *p; | 225 | struct RegexCombineCtx *p; |
161 | const char *rest; | 226 | struct RegexCombineCtx *newctx; |
227 | unsigned int prefix_l; | ||
228 | const char *rest_r; | ||
229 | const char *rest_s; | ||
230 | size_t len; | ||
162 | 231 | ||
163 | rest = ®ex[1]; | 232 | if (0 == strlen (regex)) |
164 | for (p = ctx->head; NULL != p; p = p->next) | 233 | return; |
234 | |||
235 | p = get_longest_prefix (ctx, regex); | ||
236 | if (NULL != p) /* There is some prefix match, reduce regex and try again */ | ||
165 | { | 237 | { |
166 | if (p->s[0] == regex[0]) | 238 | prefix_l = get_prefix_length (p->s, regex); |
239 | rest_s = &p->s[prefix_l]; | ||
240 | rest_r = ®ex[prefix_l]; | ||
241 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "chosen '%s' [%u]\n", p->s, prefix_l); | ||
242 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "prefix r '%.*s'\n", prefix_l, p->s); | ||
243 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "rest r '%s'\n", rest_r); | ||
244 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "rest s '%s'\n", rest_s); | ||
245 | len = strlen (p->s); | ||
246 | if (prefix_l < len) /* only partial match, split existing state */ | ||
167 | { | 247 | { |
168 | if (1 == strlen(p->s)) | 248 | newctx = GNUNET_malloc (sizeof (struct RegexCombineCtx)); |
169 | { | 249 | newctx->head = p->head; |
170 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "common char %s\n", p->s); | 250 | newctx->tail = p->tail; |
171 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "adding %s\n", rest); | 251 | newctx->s = GNUNET_malloc(len - prefix_l + 1); |
172 | regex_add (p, rest); | 252 | strncpy (newctx->s, rest_s, len - prefix_l + 1); |
173 | } | 253 | |
174 | else | 254 | p->head = newctx; |
175 | { | 255 | p->tail = newctx; |
176 | struct RegexCombineCtx *newctx; | 256 | p->s[prefix_l] = '\0'; |
177 | newctx = GNUNET_malloc (sizeof (struct RegexCombineCtx)); | ||
178 | newctx->s = GNUNET_strdup (&p->s[1]); | ||
179 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " p has now %s\n", p->s); | ||
180 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " p will have %.1s\n", p->s); | ||
181 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " regex is %s\n", regex); | ||
182 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " new has now %s\n", newctx->s); | ||
183 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " rest is now %s\n", rest); | ||
184 | p->s[1] = '\0'; /* dont realloc */ | ||
185 | GNUNET_CONTAINER_DLL_insert (p->head, p->tail, newctx); | ||
186 | regex_add (p, rest); | ||
187 | } | ||
188 | return; | ||
189 | } | 257 | } |
258 | regex_add (p, rest_r); | ||
259 | return; | ||
260 | } | ||
261 | /* There is no prefix match, add new */ | ||
262 | if (NULL == ctx->head && NULL != ctx->s) | ||
263 | { | ||
264 | /* this was the end before, add empty string */ | ||
265 | newctx = GNUNET_malloc (sizeof (struct RegexCombineCtx)); | ||
266 | newctx->s = GNUNET_strdup (""); | ||
267 | GNUNET_CONTAINER_DLL_insert (ctx->head, ctx->tail, newctx); | ||
190 | } | 268 | } |
191 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " no match\n"); | 269 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " no match\n"); |
192 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " new state %s\n", regex); | 270 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " new state %s\n", regex); |
193 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " under %s\n", ctx->s); | 271 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " under %s\n", ctx->s); |
194 | p = GNUNET_malloc (sizeof (struct RegexCombineCtx)); | 272 | newctx = GNUNET_malloc (sizeof (struct RegexCombineCtx)); |
195 | p->s = GNUNET_strdup (regex); | 273 | newctx->s = GNUNET_strdup (regex); |
196 | GNUNET_CONTAINER_DLL_insert (ctx->head, ctx->tail, p); | 274 | GNUNET_CONTAINER_DLL_insert (ctx->head, ctx->tail, newctx); |
197 | } | 275 | } |
198 | 276 | ||
199 | 277 | ||
@@ -240,9 +318,10 @@ GNUNET_REGEX_combine (char * const regexes[]) | |||
240 | current = regexes[i]; | 318 | current = regexes[i]; |
241 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Regex %u: %s\n", i, current); | 319 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Regex %u: %s\n", i, current); |
242 | regex_add (ctx, current); | 320 | regex_add (ctx, current); |
321 | /* debugctx (ctx, 0); */ | ||
243 | } | 322 | } |
244 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "\nCombining...\n"); | 323 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "\nCombining...\n"); |
245 | //debugctx (ctx, 0); | 324 | /* debugctx (ctx, 0); */ |
246 | 325 | ||
247 | combined = regex_combine (ctx); | 326 | combined = regex_combine (ctx); |
248 | 327 | ||