aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBart Polot <bart@net.in.tum.de>2013-05-27 23:35:17 +0000
committerBart Polot <bart@net.in.tum.de>2013-05-27 23:35:17 +0000
commit08561388cf181dc3eed416a0aeb85c28c9f4cc0b (patch)
treeb9badb121085713e1e23165681ef496cdfbec52a
parent4ef9b0c5376fd8bd257fd2e1e5baabfc9e05a521 (diff)
downloadgnunet-08561388cf181dc3eed416a0aeb85c28c9f4cc0b.tar.gz
gnunet-08561388cf181dc3eed416a0aeb85c28c9f4cc0b.zip
Fix regexes with accepting common prefix, optimize regex prefix lengths
-rw-r--r--src/regex/regex_test_lib.c185
1 files changed, 132 insertions, 53 deletions
diff --git a/src/regex/regex_test_lib.c b/src/regex/regex_test_lib.c
index b4c7278c0..df873c112 100644
--- a/src/regex/regex_test_lib.c
+++ b/src/regex/regex_test_lib.c
@@ -60,29 +60,30 @@ struct RegexCombineCtx {
60 char *s; 60 char *s;
61}; 61};
62 62
63// static void 63/*
64// space (int n) 64static void
65// { 65space (int n)
66// int i; 66{
67// for (i = 0; i < n; i++) 67 int i;
68// printf (" "); 68 for (i = 0; i < n; i++)
69// } 69 printf (" ");
70// 70}
71// static void
72// debugctx (struct RegexCombineCtx *ctx, int level)
73// {
74// struct RegexCombineCtx *p;
75// space (level);
76// if (NULL != ctx->s)
77// printf ("'%s'\n", ctx->s);
78// else
79// printf ("NULL\n");
80// for (p = ctx->head; NULL != p; p = p->next)
81// {
82// debugctx (p, level + 1);
83// }
84// }
85 71
72static void
73debugctx (struct RegexCombineCtx *ctx, int level)
74{
75 struct RegexCombineCtx *p;
76 space (level);
77 if (NULL != ctx->s)
78 printf ("'%s'\n", ctx->s);
79 else
80 printf ("NULL\n");
81 for (p = ctx->head; NULL != p; p = p->next)
82 {
83 debugctx (p, level + 1);
84 }
85}
86*/
86 87
87/** 88/**
88 * Extract a string from all prefix-combined regexes. 89 * Extract a string from all prefix-combined regexes.
@@ -110,7 +111,9 @@ regex_combine (struct RegexCombineCtx *ctx)
110 s = regex_combine (p); 111 s = regex_combine (p);
111 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " total '%s'\n", s); 112 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " total '%s'\n", s);
112 if (strlen(s) == 0) 113 if (strlen(s) == 0)
114 {
113 opt = GNUNET_YES; 115 opt = GNUNET_YES;
116 }
114 else 117 else
115 { 118 {
116 GNUNET_asprintf (&tmp, "%s%s|", regex, s); 119 GNUNET_asprintf (&tmp, "%s%s|", regex, s);
@@ -136,7 +139,7 @@ regex_combine (struct RegexCombineCtx *ctx)
136 if (NULL != ctx->s) 139 if (NULL != ctx->s)
137 { 140 {
138 if (opt) 141 if (opt)
139 GNUNET_asprintf (&s, "%s[%s]", ctx->s, regex); 142 GNUNET_asprintf (&s, "%s(%s)?", ctx->s, regex);
140 else 143 else
141 GNUNET_asprintf (&s, "%s(%s)", ctx->s, regex); 144 GNUNET_asprintf (&s, "%s(%s)", ctx->s, regex);
142 GNUNET_free (regex); 145 GNUNET_free (regex);
@@ -149,6 +152,68 @@ regex_combine (struct RegexCombineCtx *ctx)
149 152
150 153
151/** 154/**
155 * Get the number of matching characters on the prefix of both strings.
156 *
157 * @param s1 String 1.
158 * @param s2 String 2.
159 *
160 * @return Number of characters of matching prefix.
161 */
162static unsigned int
163get_prefix_length (const char *s1, const char *s2)
164{
165 unsigned int l1;
166 unsigned int l2;
167 unsigned int limit;
168 unsigned int i;
169
170 l1 = strlen (s1);
171 l2 = strlen (s2);
172 limit = l1 > l2 ? l2 : l1;
173
174 for (i = 1; i <= limit; i++)
175 {
176 if (0 != strncmp (s1, s2, i))
177 return i - 1;
178 }
179 return limit;
180}
181
182
183/**
184 * Return the child context with the longest prefix match with the regex.
185 * Usually only one child will match, search all just in case.
186 *
187 * @param ctx Context whose children to search.
188 * @param regex String to match.
189 *
190 * @return Child with the longest prefix, NULL if no child matches.
191 */
192static struct RegexCombineCtx *
193get_longest_prefix (struct RegexCombineCtx *ctx, const char *regex)
194{
195 struct RegexCombineCtx *p;
196 struct RegexCombineCtx *best;
197 unsigned int l;
198 unsigned int best_l;
199
200 best_l = 0;
201 best = NULL;
202 for (p = ctx->head; NULL != p; p = p->next)
203 {
204 l = get_prefix_length (p->s, regex);
205 if (l > best_l)
206 {
207 GNUNET_break (0 == best_l);
208 best = p;
209 best_l = l;
210 }
211 }
212 return best;
213}
214
215
216/**
152 * Add a single regex to a context, combining with exisiting regex by-prefix. 217 * Add a single regex to a context, combining with exisiting regex by-prefix.
153 * 218 *
154 * @param ctx Context with 0 or more regexes. 219 * @param ctx Context with 0 or more regexes.
@@ -158,42 +223,55 @@ static void
158regex_add (struct RegexCombineCtx *ctx, const char *regex) 223regex_add (struct RegexCombineCtx *ctx, const char *regex)
159{ 224{
160 struct RegexCombineCtx *p; 225 struct RegexCombineCtx *p;
161 const char *rest; 226 struct RegexCombineCtx *newctx;
227 unsigned int prefix_l;
228 const char *rest_r;
229 const char *rest_s;
230 size_t len;
162 231
163 rest = &regex[1]; 232 if (0 == strlen (regex))
164 for (p = ctx->head; NULL != p; p = p->next) 233 return;
234
235 p = get_longest_prefix (ctx, regex);
236 if (NULL != p) /* There is some prefix match, reduce regex and try again */
165 { 237 {
166 if (p->s[0] == regex[0]) 238 prefix_l = get_prefix_length (p->s, regex);
239 rest_s = &p->s[prefix_l];
240 rest_r = &regex[prefix_l];
241 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "chosen '%s' [%u]\n", p->s, prefix_l);
242 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "prefix r '%.*s'\n", prefix_l, p->s);
243 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "rest r '%s'\n", rest_r);
244 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "rest s '%s'\n", rest_s);
245 len = strlen (p->s);
246 if (prefix_l < len) /* only partial match, split existing state */
167 { 247 {
168 if (1 == strlen(p->s)) 248 newctx = GNUNET_malloc (sizeof (struct RegexCombineCtx));
169 { 249 newctx->head = p->head;
170 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "common char %s\n", p->s); 250 newctx->tail = p->tail;
171 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "adding %s\n", rest); 251 newctx->s = GNUNET_malloc(len - prefix_l + 1);
172 regex_add (p, rest); 252 strncpy (newctx->s, rest_s, len - prefix_l + 1);
173 } 253
174 else 254 p->head = newctx;
175 { 255 p->tail = newctx;
176 struct RegexCombineCtx *newctx; 256 p->s[prefix_l] = '\0';
177 newctx = GNUNET_malloc (sizeof (struct RegexCombineCtx));
178 newctx->s = GNUNET_strdup (&p->s[1]);
179 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " p has now %s\n", p->s);
180 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " p will have %.1s\n", p->s);
181 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " regex is %s\n", regex);
182 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " new has now %s\n", newctx->s);
183 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " rest is now %s\n", rest);
184 p->s[1] = '\0'; /* dont realloc */
185 GNUNET_CONTAINER_DLL_insert (p->head, p->tail, newctx);
186 regex_add (p, rest);
187 }
188 return;
189 } 257 }
258 regex_add (p, rest_r);
259 return;
260 }
261 /* There is no prefix match, add new */
262 if (NULL == ctx->head && NULL != ctx->s)
263 {
264 /* this was the end before, add empty string */
265 newctx = GNUNET_malloc (sizeof (struct RegexCombineCtx));
266 newctx->s = GNUNET_strdup ("");
267 GNUNET_CONTAINER_DLL_insert (ctx->head, ctx->tail, newctx);
190 } 268 }
191 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " no match\n"); 269 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " no match\n");
192 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " new state %s\n", regex); 270 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " new state %s\n", regex);
193 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " under %s\n", ctx->s); 271 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " under %s\n", ctx->s);
194 p = GNUNET_malloc (sizeof (struct RegexCombineCtx)); 272 newctx = GNUNET_malloc (sizeof (struct RegexCombineCtx));
195 p->s = GNUNET_strdup (regex); 273 newctx->s = GNUNET_strdup (regex);
196 GNUNET_CONTAINER_DLL_insert (ctx->head, ctx->tail, p); 274 GNUNET_CONTAINER_DLL_insert (ctx->head, ctx->tail, newctx);
197} 275}
198 276
199 277
@@ -240,9 +318,10 @@ GNUNET_REGEX_combine (char * const regexes[])
240 current = regexes[i]; 318 current = regexes[i];
241 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Regex %u: %s\n", i, current); 319 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Regex %u: %s\n", i, current);
242 regex_add (ctx, current); 320 regex_add (ctx, current);
321 /* debugctx (ctx, 0); */
243 } 322 }
244 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "\nCombining...\n"); 323 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "\nCombining...\n");
245 //debugctx (ctx, 0); 324 /* debugctx (ctx, 0); */
246 325
247 combined = regex_combine (ctx); 326 combined = regex_combine (ctx);
248 327