aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/regex/regex.c93
-rw-r--r--src/regex/regex_internal.h7
2 files changed, 64 insertions, 36 deletions
diff --git a/src/regex/regex.c b/src/regex/regex.c
index 73ec5b14a..677894875 100644
--- a/src/regex/regex.c
+++ b/src/regex/regex.c
@@ -60,6 +60,22 @@ struct GNUNET_REGEX_StateSet_MDLL
60 60
61 61
62/** 62/**
63 * Append state to the given StateSet '
64 *
65 * @param set set to be modified
66 * @param state state to be appended
67 */
68static void
69state_set_append (struct GNUNET_REGEX_StateSet *set,
70 struct GNUNET_REGEX_State *state)
71{
72 if (set->off == set->size)
73 GNUNET_array_grow (set->states, set->size, set->size * 2 + 4);
74 set->states[set->off++] = state;
75}
76
77
78/**
63 * Compare two strings for equality. If either is NULL they are not equal. 79 * Compare two strings for equality. If either is NULL they are not equal.
64 * 80 *
65 * @param str1 first string for comparison. 81 * @param str1 first string for comparison.
@@ -231,12 +247,12 @@ state_set_compare (struct GNUNET_REGEX_StateSet *sset1,
231 if (NULL == sset1 || NULL == sset2) 247 if (NULL == sset1 || NULL == sset2)
232 return 1; 248 return 1;
233 249
234 result = sset1->len - sset2->len; 250 result = sset1->off - sset2->off;
235 if (result < 0) 251 if (result < 0)
236 return -1; 252 return -1;
237 if (result > 0) 253 if (result > 0)
238 return 1; 254 return 1;
239 for (i = 0; i < sset1->len; i++) 255 for (i = 0; i < sset1->off; i++)
240 if (0 != (result = state_compare (&sset1->states[i], &sset2->states[i]))) 256 if (0 != (result = state_compare (&sset1->states[i], &sset2->states[i])))
241 break; 257 break;
242 return result; 258 return result;
@@ -251,7 +267,8 @@ state_set_compare (struct GNUNET_REGEX_StateSet *sset1,
251static void 267static void
252state_set_clear (struct GNUNET_REGEX_StateSet *set) 268state_set_clear (struct GNUNET_REGEX_StateSet *set)
253{ 269{
254 GNUNET_array_grow (set->states, set->len, 0); 270 GNUNET_array_grow (set->states, set->size, 0);
271 set->off = 0;
255} 272}
256 273
257 274
@@ -1250,16 +1267,16 @@ dfa_state_create (struct GNUNET_REGEX_Context *ctx,
1250 1267
1251 s->nfa_set = *nfa_states; 1268 s->nfa_set = *nfa_states;
1252 1269
1253 if (nfa_states->len < 1) 1270 if (nfa_states->off < 1)
1254 return s; 1271 return s;
1255 1272
1256 /* Create a name based on 'nfa_states' */ 1273 /* Create a name based on 'nfa_states' */
1257 len = nfa_states->len * 14 + 4; 1274 len = nfa_states->off * 14 + 4;
1258 s->name = GNUNET_malloc (len); 1275 s->name = GNUNET_malloc (len);
1259 strcat (s->name, "{"); 1276 strcat (s->name, "{");
1260 pos = s->name + 1; 1277 pos = s->name + 1;
1261 1278
1262 for (i = 0; i < nfa_states->len; i++) 1279 for (i = 0; i < nfa_states->off; i++)
1263 { 1280 {
1264 cstate = nfa_states->states[i]; 1281 cstate = nfa_states->states[i];
1265 GNUNET_snprintf (pos, pos - s->name + len, 1282 GNUNET_snprintf (pos, pos - s->name + len,
@@ -1749,6 +1766,7 @@ dfa_compress_paths_helper (struct GNUNET_REGEX_Automaton *dfa,
1749 } 1766 }
1750} 1767}
1751 1768
1769
1752/** 1770/**
1753 * Compress paths in the given 'dfa'. Linear paths like 0->1->2->3 will be 1771 * Compress paths in the given 'dfa'. Linear paths like 0->1->2->3 will be
1754 * compressed to 0->3 by combining transitions. 1772 * compressed to 0->3 by combining transitions.
@@ -1944,7 +1962,7 @@ nfa_closure_create (struct GNUNET_REGEX_StateSet *cls,
1944 1962
1945 /* Add start state to closure only for epsilon closure */ 1963 /* Add start state to closure only for epsilon closure */
1946 if (NULL == label) 1964 if (NULL == label)
1947 GNUNET_array_append (cls->states, cls->len, s); 1965 state_set_append (cls, s);
1948 1966
1949 GNUNET_CONTAINER_MDLL_insert (ST, cls_stack.head, cls_stack.tail, s); 1967 GNUNET_CONTAINER_MDLL_insert (ST, cls_stack.head, cls_stack.tail, s);
1950 cls_stack.len = 1; 1968 cls_stack.len = 1;
@@ -1965,7 +1983,7 @@ nfa_closure_create (struct GNUNET_REGEX_StateSet *cls,
1965 continue; 1983 continue;
1966 if (0 == clsstate->contained) 1984 if (0 == clsstate->contained)
1967 { 1985 {
1968 GNUNET_array_append (cls->states, cls->len, clsstate); 1986 state_set_append (cls, clsstate);
1969 GNUNET_CONTAINER_MDLL_insert_tail (ST, cls_stack.head, cls_stack.tail, 1987 GNUNET_CONTAINER_MDLL_insert_tail (ST, cls_stack.head, cls_stack.tail,
1970 clsstate); 1988 clsstate);
1971 cls_stack.len++; 1989 cls_stack.len++;
@@ -1974,12 +1992,12 @@ nfa_closure_create (struct GNUNET_REGEX_StateSet *cls,
1974 } 1992 }
1975 } 1993 }
1976 1994
1977 for (i = 0; i < cls->len; i++) 1995 for (i = 0; i < cls->off; i++)
1978 cls->states[i]->contained = 0; 1996 cls->states[i]->contained = 0;
1979 1997
1980 /* sort the states */ 1998 /* sort the states */
1981 if (cls->len > 1) 1999 if (cls->off > 1)
1982 qsort (cls->states, cls->len, sizeof (struct GNUNET_REGEX_State *), 2000 qsort (cls->states, cls->off, sizeof (struct GNUNET_REGEX_State *),
1983 &state_compare); 2001 &state_compare);
1984} 2002}
1985 2003
@@ -2009,14 +2027,14 @@ nfa_closure_set_create (struct GNUNET_REGEX_StateSet *ret,
2009 if (NULL == states) 2027 if (NULL == states)
2010 return; 2028 return;
2011 2029
2012 for (i = 0; i < states->len; i++) 2030 for (i = 0; i < states->off; i++)
2013 { 2031 {
2014 s = states->states[i]; 2032 s = states->states[i];
2015 nfa_closure_create (&sset, nfa, s, label); 2033 nfa_closure_create (&sset, nfa, s, label);
2016 for (j = 0; j < sset.len; j++) 2034 for (j = 0; j < sset.off; j++)
2017 { 2035 {
2018 contains = 0; 2036 contains = 0;
2019 for (k = 0; k < ret->len; k++) 2037 for (k = 0; k < ret->off; k++)
2020 { 2038 {
2021 if (sset.states[j]->id == ret->states[k]->id) 2039 if (sset.states[j]->id == ret->states[k]->id)
2022 { 2040 {
@@ -2025,14 +2043,14 @@ nfa_closure_set_create (struct GNUNET_REGEX_StateSet *ret,
2025 } 2043 }
2026 } 2044 }
2027 if (!contains) 2045 if (!contains)
2028 GNUNET_array_append (ret->states, ret->len, sset.states[j]); 2046 state_set_append (ret, sset.states[j]);
2029 } 2047 }
2030 state_set_clear (&sset); 2048 state_set_clear (&sset);
2031 } 2049 }
2032 2050
2033 if (ret->len > 1) 2051 if (ret->off > 1)
2034 qsort (ret->states, ret->len, sizeof (struct GNUNET_REGEX_State *), 2052 qsort (ret->states, ret->off, sizeof (struct GNUNET_REGEX_State *),
2035 state_compare); 2053 &state_compare);
2036} 2054}
2037 2055
2038 2056
@@ -2289,7 +2307,8 @@ GNUNET_REGEX_construct_nfa (const char *regex, const size_t len)
2289 unsigned int count; 2307 unsigned int count;
2290 unsigned int altcount; 2308 unsigned int altcount;
2291 unsigned int atomcount; 2309 unsigned int atomcount;
2292 unsigned int pcount; 2310 unsigned int poff;
2311 unsigned int psize;
2293 struct 2312 struct
2294 { 2313 {
2295 int altcount; 2314 int altcount;
@@ -2312,7 +2331,8 @@ GNUNET_REGEX_construct_nfa (const char *regex, const size_t len)
2312 error_msg = NULL; 2331 error_msg = NULL;
2313 altcount = 0; 2332 altcount = 0;
2314 atomcount = 0; 2333 atomcount = 0;
2315 pcount = 0; 2334 poff = 0;
2335 psize = 0;
2316 2336
2317 for (count = 0; count < len && *regexp; count++, regexp++) 2337 for (count = 0; count < len && *regexp; count++, regexp++)
2318 { 2338 {
@@ -2324,9 +2344,11 @@ GNUNET_REGEX_construct_nfa (const char *regex, const size_t len)
2324 --atomcount; 2344 --atomcount;
2325 nfa_add_concatenation (&ctx); 2345 nfa_add_concatenation (&ctx);
2326 } 2346 }
2327 GNUNET_array_grow (p, pcount, pcount + 1); 2347 if (poff == psize)
2328 p[pcount - 1].altcount = altcount; 2348 GNUNET_array_grow (p, psize, psize * 2 + 4);
2329 p[pcount - 1].atomcount = atomcount; 2349 p[poff].altcount = altcount;
2350 p[poff].atomcount = atomcount;
2351 poff++;
2330 altcount = 0; 2352 altcount = 0;
2331 atomcount = 0; 2353 atomcount = 0;
2332 break; 2354 break;
@@ -2341,7 +2363,7 @@ GNUNET_REGEX_construct_nfa (const char *regex, const size_t len)
2341 altcount++; 2363 altcount++;
2342 break; 2364 break;
2343 case ')': 2365 case ')':
2344 if (0 == pcount) 2366 if (0 == poff)
2345 { 2367 {
2346 error_msg = "Missing opening '('"; 2368 error_msg = "Missing opening '('";
2347 goto error; 2369 goto error;
@@ -2349,18 +2371,18 @@ GNUNET_REGEX_construct_nfa (const char *regex, const size_t len)
2349 if (0 == atomcount) 2371 if (0 == atomcount)
2350 { 2372 {
2351 /* Ignore this: "()" */ 2373 /* Ignore this: "()" */
2352 pcount--; 2374 poff--;
2353 altcount = p[pcount].altcount; 2375 altcount = p[poff].altcount;
2354 atomcount = p[pcount].atomcount; 2376 atomcount = p[poff].atomcount;
2355 break; 2377 break;
2356 } 2378 }
2357 while (--atomcount > 0) 2379 while (--atomcount > 0)
2358 nfa_add_concatenation (&ctx); 2380 nfa_add_concatenation (&ctx);
2359 for (; altcount > 0; altcount--) 2381 for (; altcount > 0; altcount--)
2360 nfa_add_alternation (&ctx); 2382 nfa_add_alternation (&ctx);
2361 pcount--; 2383 poff--;
2362 altcount = p[pcount].altcount; 2384 altcount = p[poff].altcount;
2363 atomcount = p[pcount].atomcount; 2385 atomcount = p[poff].atomcount;
2364 atomcount++; 2386 atomcount++;
2365 break; 2387 break;
2366 case '*': 2388 case '*':
@@ -2399,7 +2421,7 @@ GNUNET_REGEX_construct_nfa (const char *regex, const size_t len)
2399 break; 2421 break;
2400 } 2422 }
2401 } 2423 }
2402 if (0 != pcount) 2424 if (0 != poff)
2403 { 2425 {
2404 error_msg = "Unbalanced parenthesis"; 2426 error_msg = "Unbalanced parenthesis";
2405 goto error; 2427 goto error;
@@ -2409,7 +2431,7 @@ GNUNET_REGEX_construct_nfa (const char *regex, const size_t len)
2409 for (; altcount > 0; altcount--) 2431 for (; altcount > 0; altcount--)
2410 nfa_add_alternation (&ctx); 2432 nfa_add_alternation (&ctx);
2411 2433
2412 GNUNET_free_non_null (p); 2434 GNUNET_array_grow (p, psize, 0);
2413 2435
2414 nfa = ctx.stack_tail; 2436 nfa = ctx.stack_tail;
2415 GNUNET_CONTAINER_DLL_remove (ctx.stack_head, ctx.stack_tail, nfa); 2437 GNUNET_CONTAINER_DLL_remove (ctx.stack_head, ctx.stack_tail, nfa);
@@ -2449,6 +2471,7 @@ error:
2449 2471
2450 2472
2451static unsigned long long loopy; 2473static unsigned long long loopy;
2474static unsigned long long doopy;
2452 2475
2453/** 2476/**
2454 * Create DFA states based on given 'nfa' and starting with 'dfa_state'. 2477 * Create DFA states based on given 'nfa' and starting with 'dfa_state'.
@@ -2483,6 +2506,7 @@ construct_dfa_states (struct GNUNET_REGEX_Context *ctx,
2483 2506
2484 /* FIXME: this O(n) loop can be done in O(1) with a hash map */ 2507 /* FIXME: this O(n) loop can be done in O(1) with a hash map */
2485 state_contains = NULL; 2508 state_contains = NULL;
2509 doopy++;
2486 for (state_iter = dfa->states_head; NULL != state_iter; 2510 for (state_iter = dfa->states_head; NULL != state_iter;
2487 state_iter = state_iter->next) 2511 state_iter = state_iter->next)
2488 { 2512 {
@@ -2559,7 +2583,7 @@ GNUNET_REGEX_construct_dfa (const char *regex, const size_t len,
2559 2583
2560 fprintf (stderr, "D"); 2584 fprintf (stderr, "D");
2561 construct_dfa_states (&ctx, nfa, dfa, dfa->start); 2585 construct_dfa_states (&ctx, nfa, dfa, dfa->start);
2562 // fprintf (stderr, "D-X: %llu\n", loopy); 2586 fprintf (stderr, "D-X: %llu/%llu\n", loopy, doopy);
2563 GNUNET_REGEX_automaton_destroy (nfa); 2587 GNUNET_REGEX_automaton_destroy (nfa);
2564 2588
2565 /* Minimize DFA */ 2589 /* Minimize DFA */
@@ -2567,7 +2591,6 @@ GNUNET_REGEX_construct_dfa (const char *regex, const size_t len,
2567 dfa_minimize (&ctx, dfa); 2591 dfa_minimize (&ctx, dfa);
2568 2592
2569 /* Create proofs and hashes for all states */ 2593 /* Create proofs and hashes for all states */
2570 // fprintf (stderr, "P");
2571 automaton_create_proofs (dfa); 2594 automaton_create_proofs (dfa);
2572 2595
2573 /* Compress linear DFA paths */ 2596 /* Compress linear DFA paths */
@@ -2693,7 +2716,7 @@ evaluate_nfa (struct GNUNET_REGEX_Automaton *a, const char *string)
2693 state_set_clear (&new_sset); 2716 state_set_clear (&new_sset);
2694 } 2717 }
2695 2718
2696 for (i = 0; i < sset.len; i++) 2719 for (i = 0; i < sset.off; i++)
2697 { 2720 {
2698 s = sset.states[i]; 2721 s = sset.states[i];
2699 if ( (NULL != s) && (s->accepting) ) 2722 if ( (NULL != s) && (s->accepting) )
diff --git a/src/regex/regex_internal.h b/src/regex/regex_internal.h
index fe76d1537..00badc54d 100644
--- a/src/regex/regex_internal.h
+++ b/src/regex/regex_internal.h
@@ -98,9 +98,14 @@ struct GNUNET_REGEX_StateSet
98 struct GNUNET_REGEX_State **states; 98 struct GNUNET_REGEX_State **states;
99 99
100 /** 100 /**
101 * Number of entries in *use* in the 'states' array.
102 */
103 unsigned int off;
104
105 /**
101 * Length of the 'states' array. 106 * Length of the 'states' array.
102 */ 107 */
103 unsigned int len; 108 unsigned int size;
104}; 109};
105 110
106 111