diff options
-rw-r--r-- | src/regex/regex.c | 93 | ||||
-rw-r--r-- | src/regex/regex_internal.h | 7 |
2 files changed, 64 insertions, 36 deletions
diff --git a/src/regex/regex.c b/src/regex/regex.c index 73ec5b14a..677894875 100644 --- a/src/regex/regex.c +++ b/src/regex/regex.c | |||
@@ -60,6 +60,22 @@ struct GNUNET_REGEX_StateSet_MDLL | |||
60 | 60 | ||
61 | 61 | ||
62 | /** | 62 | /** |
63 | * Append state to the given StateSet ' | ||
64 | * | ||
65 | * @param set set to be modified | ||
66 | * @param state state to be appended | ||
67 | */ | ||
68 | static void | ||
69 | state_set_append (struct GNUNET_REGEX_StateSet *set, | ||
70 | struct GNUNET_REGEX_State *state) | ||
71 | { | ||
72 | if (set->off == set->size) | ||
73 | GNUNET_array_grow (set->states, set->size, set->size * 2 + 4); | ||
74 | set->states[set->off++] = state; | ||
75 | } | ||
76 | |||
77 | |||
78 | /** | ||
63 | * Compare two strings for equality. If either is NULL they are not equal. | 79 | * Compare two strings for equality. If either is NULL they are not equal. |
64 | * | 80 | * |
65 | * @param str1 first string for comparison. | 81 | * @param str1 first string for comparison. |
@@ -231,12 +247,12 @@ state_set_compare (struct GNUNET_REGEX_StateSet *sset1, | |||
231 | if (NULL == sset1 || NULL == sset2) | 247 | if (NULL == sset1 || NULL == sset2) |
232 | return 1; | 248 | return 1; |
233 | 249 | ||
234 | result = sset1->len - sset2->len; | 250 | result = sset1->off - sset2->off; |
235 | if (result < 0) | 251 | if (result < 0) |
236 | return -1; | 252 | return -1; |
237 | if (result > 0) | 253 | if (result > 0) |
238 | return 1; | 254 | return 1; |
239 | for (i = 0; i < sset1->len; i++) | 255 | for (i = 0; i < sset1->off; i++) |
240 | if (0 != (result = state_compare (&sset1->states[i], &sset2->states[i]))) | 256 | if (0 != (result = state_compare (&sset1->states[i], &sset2->states[i]))) |
241 | break; | 257 | break; |
242 | return result; | 258 | return result; |
@@ -251,7 +267,8 @@ state_set_compare (struct GNUNET_REGEX_StateSet *sset1, | |||
251 | static void | 267 | static void |
252 | state_set_clear (struct GNUNET_REGEX_StateSet *set) | 268 | state_set_clear (struct GNUNET_REGEX_StateSet *set) |
253 | { | 269 | { |
254 | GNUNET_array_grow (set->states, set->len, 0); | 270 | GNUNET_array_grow (set->states, set->size, 0); |
271 | set->off = 0; | ||
255 | } | 272 | } |
256 | 273 | ||
257 | 274 | ||
@@ -1250,16 +1267,16 @@ dfa_state_create (struct GNUNET_REGEX_Context *ctx, | |||
1250 | 1267 | ||
1251 | s->nfa_set = *nfa_states; | 1268 | s->nfa_set = *nfa_states; |
1252 | 1269 | ||
1253 | if (nfa_states->len < 1) | 1270 | if (nfa_states->off < 1) |
1254 | return s; | 1271 | return s; |
1255 | 1272 | ||
1256 | /* Create a name based on 'nfa_states' */ | 1273 | /* Create a name based on 'nfa_states' */ |
1257 | len = nfa_states->len * 14 + 4; | 1274 | len = nfa_states->off * 14 + 4; |
1258 | s->name = GNUNET_malloc (len); | 1275 | s->name = GNUNET_malloc (len); |
1259 | strcat (s->name, "{"); | 1276 | strcat (s->name, "{"); |
1260 | pos = s->name + 1; | 1277 | pos = s->name + 1; |
1261 | 1278 | ||
1262 | for (i = 0; i < nfa_states->len; i++) | 1279 | for (i = 0; i < nfa_states->off; i++) |
1263 | { | 1280 | { |
1264 | cstate = nfa_states->states[i]; | 1281 | cstate = nfa_states->states[i]; |
1265 | GNUNET_snprintf (pos, pos - s->name + len, | 1282 | GNUNET_snprintf (pos, pos - s->name + len, |
@@ -1749,6 +1766,7 @@ dfa_compress_paths_helper (struct GNUNET_REGEX_Automaton *dfa, | |||
1749 | } | 1766 | } |
1750 | } | 1767 | } |
1751 | 1768 | ||
1769 | |||
1752 | /** | 1770 | /** |
1753 | * Compress paths in the given 'dfa'. Linear paths like 0->1->2->3 will be | 1771 | * Compress paths in the given 'dfa'. Linear paths like 0->1->2->3 will be |
1754 | * compressed to 0->3 by combining transitions. | 1772 | * compressed to 0->3 by combining transitions. |
@@ -1944,7 +1962,7 @@ nfa_closure_create (struct GNUNET_REGEX_StateSet *cls, | |||
1944 | 1962 | ||
1945 | /* Add start state to closure only for epsilon closure */ | 1963 | /* Add start state to closure only for epsilon closure */ |
1946 | if (NULL == label) | 1964 | if (NULL == label) |
1947 | GNUNET_array_append (cls->states, cls->len, s); | 1965 | state_set_append (cls, s); |
1948 | 1966 | ||
1949 | GNUNET_CONTAINER_MDLL_insert (ST, cls_stack.head, cls_stack.tail, s); | 1967 | GNUNET_CONTAINER_MDLL_insert (ST, cls_stack.head, cls_stack.tail, s); |
1950 | cls_stack.len = 1; | 1968 | cls_stack.len = 1; |
@@ -1965,7 +1983,7 @@ nfa_closure_create (struct GNUNET_REGEX_StateSet *cls, | |||
1965 | continue; | 1983 | continue; |
1966 | if (0 == clsstate->contained) | 1984 | if (0 == clsstate->contained) |
1967 | { | 1985 | { |
1968 | GNUNET_array_append (cls->states, cls->len, clsstate); | 1986 | state_set_append (cls, clsstate); |
1969 | GNUNET_CONTAINER_MDLL_insert_tail (ST, cls_stack.head, cls_stack.tail, | 1987 | GNUNET_CONTAINER_MDLL_insert_tail (ST, cls_stack.head, cls_stack.tail, |
1970 | clsstate); | 1988 | clsstate); |
1971 | cls_stack.len++; | 1989 | cls_stack.len++; |
@@ -1974,12 +1992,12 @@ nfa_closure_create (struct GNUNET_REGEX_StateSet *cls, | |||
1974 | } | 1992 | } |
1975 | } | 1993 | } |
1976 | 1994 | ||
1977 | for (i = 0; i < cls->len; i++) | 1995 | for (i = 0; i < cls->off; i++) |
1978 | cls->states[i]->contained = 0; | 1996 | cls->states[i]->contained = 0; |
1979 | 1997 | ||
1980 | /* sort the states */ | 1998 | /* sort the states */ |
1981 | if (cls->len > 1) | 1999 | if (cls->off > 1) |
1982 | qsort (cls->states, cls->len, sizeof (struct GNUNET_REGEX_State *), | 2000 | qsort (cls->states, cls->off, sizeof (struct GNUNET_REGEX_State *), |
1983 | &state_compare); | 2001 | &state_compare); |
1984 | } | 2002 | } |
1985 | 2003 | ||
@@ -2009,14 +2027,14 @@ nfa_closure_set_create (struct GNUNET_REGEX_StateSet *ret, | |||
2009 | if (NULL == states) | 2027 | if (NULL == states) |
2010 | return; | 2028 | return; |
2011 | 2029 | ||
2012 | for (i = 0; i < states->len; i++) | 2030 | for (i = 0; i < states->off; i++) |
2013 | { | 2031 | { |
2014 | s = states->states[i]; | 2032 | s = states->states[i]; |
2015 | nfa_closure_create (&sset, nfa, s, label); | 2033 | nfa_closure_create (&sset, nfa, s, label); |
2016 | for (j = 0; j < sset.len; j++) | 2034 | for (j = 0; j < sset.off; j++) |
2017 | { | 2035 | { |
2018 | contains = 0; | 2036 | contains = 0; |
2019 | for (k = 0; k < ret->len; k++) | 2037 | for (k = 0; k < ret->off; k++) |
2020 | { | 2038 | { |
2021 | if (sset.states[j]->id == ret->states[k]->id) | 2039 | if (sset.states[j]->id == ret->states[k]->id) |
2022 | { | 2040 | { |
@@ -2025,14 +2043,14 @@ nfa_closure_set_create (struct GNUNET_REGEX_StateSet *ret, | |||
2025 | } | 2043 | } |
2026 | } | 2044 | } |
2027 | if (!contains) | 2045 | if (!contains) |
2028 | GNUNET_array_append (ret->states, ret->len, sset.states[j]); | 2046 | state_set_append (ret, sset.states[j]); |
2029 | } | 2047 | } |
2030 | state_set_clear (&sset); | 2048 | state_set_clear (&sset); |
2031 | } | 2049 | } |
2032 | 2050 | ||
2033 | if (ret->len > 1) | 2051 | if (ret->off > 1) |
2034 | qsort (ret->states, ret->len, sizeof (struct GNUNET_REGEX_State *), | 2052 | qsort (ret->states, ret->off, sizeof (struct GNUNET_REGEX_State *), |
2035 | state_compare); | 2053 | &state_compare); |
2036 | } | 2054 | } |
2037 | 2055 | ||
2038 | 2056 | ||
@@ -2289,7 +2307,8 @@ GNUNET_REGEX_construct_nfa (const char *regex, const size_t len) | |||
2289 | unsigned int count; | 2307 | unsigned int count; |
2290 | unsigned int altcount; | 2308 | unsigned int altcount; |
2291 | unsigned int atomcount; | 2309 | unsigned int atomcount; |
2292 | unsigned int pcount; | 2310 | unsigned int poff; |
2311 | unsigned int psize; | ||
2293 | struct | 2312 | struct |
2294 | { | 2313 | { |
2295 | int altcount; | 2314 | int altcount; |
@@ -2312,7 +2331,8 @@ GNUNET_REGEX_construct_nfa (const char *regex, const size_t len) | |||
2312 | error_msg = NULL; | 2331 | error_msg = NULL; |
2313 | altcount = 0; | 2332 | altcount = 0; |
2314 | atomcount = 0; | 2333 | atomcount = 0; |
2315 | pcount = 0; | 2334 | poff = 0; |
2335 | psize = 0; | ||
2316 | 2336 | ||
2317 | for (count = 0; count < len && *regexp; count++, regexp++) | 2337 | for (count = 0; count < len && *regexp; count++, regexp++) |
2318 | { | 2338 | { |
@@ -2324,9 +2344,11 @@ GNUNET_REGEX_construct_nfa (const char *regex, const size_t len) | |||
2324 | --atomcount; | 2344 | --atomcount; |
2325 | nfa_add_concatenation (&ctx); | 2345 | nfa_add_concatenation (&ctx); |
2326 | } | 2346 | } |
2327 | GNUNET_array_grow (p, pcount, pcount + 1); | 2347 | if (poff == psize) |
2328 | p[pcount - 1].altcount = altcount; | 2348 | GNUNET_array_grow (p, psize, psize * 2 + 4); |
2329 | p[pcount - 1].atomcount = atomcount; | 2349 | p[poff].altcount = altcount; |
2350 | p[poff].atomcount = atomcount; | ||
2351 | poff++; | ||
2330 | altcount = 0; | 2352 | altcount = 0; |
2331 | atomcount = 0; | 2353 | atomcount = 0; |
2332 | break; | 2354 | break; |
@@ -2341,7 +2363,7 @@ GNUNET_REGEX_construct_nfa (const char *regex, const size_t len) | |||
2341 | altcount++; | 2363 | altcount++; |
2342 | break; | 2364 | break; |
2343 | case ')': | 2365 | case ')': |
2344 | if (0 == pcount) | 2366 | if (0 == poff) |
2345 | { | 2367 | { |
2346 | error_msg = "Missing opening '('"; | 2368 | error_msg = "Missing opening '('"; |
2347 | goto error; | 2369 | goto error; |
@@ -2349,18 +2371,18 @@ GNUNET_REGEX_construct_nfa (const char *regex, const size_t len) | |||
2349 | if (0 == atomcount) | 2371 | if (0 == atomcount) |
2350 | { | 2372 | { |
2351 | /* Ignore this: "()" */ | 2373 | /* Ignore this: "()" */ |
2352 | pcount--; | 2374 | poff--; |
2353 | altcount = p[pcount].altcount; | 2375 | altcount = p[poff].altcount; |
2354 | atomcount = p[pcount].atomcount; | 2376 | atomcount = p[poff].atomcount; |
2355 | break; | 2377 | break; |
2356 | } | 2378 | } |
2357 | while (--atomcount > 0) | 2379 | while (--atomcount > 0) |
2358 | nfa_add_concatenation (&ctx); | 2380 | nfa_add_concatenation (&ctx); |
2359 | for (; altcount > 0; altcount--) | 2381 | for (; altcount > 0; altcount--) |
2360 | nfa_add_alternation (&ctx); | 2382 | nfa_add_alternation (&ctx); |
2361 | pcount--; | 2383 | poff--; |
2362 | altcount = p[pcount].altcount; | 2384 | altcount = p[poff].altcount; |
2363 | atomcount = p[pcount].atomcount; | 2385 | atomcount = p[poff].atomcount; |
2364 | atomcount++; | 2386 | atomcount++; |
2365 | break; | 2387 | break; |
2366 | case '*': | 2388 | case '*': |
@@ -2399,7 +2421,7 @@ GNUNET_REGEX_construct_nfa (const char *regex, const size_t len) | |||
2399 | break; | 2421 | break; |
2400 | } | 2422 | } |
2401 | } | 2423 | } |
2402 | if (0 != pcount) | 2424 | if (0 != poff) |
2403 | { | 2425 | { |
2404 | error_msg = "Unbalanced parenthesis"; | 2426 | error_msg = "Unbalanced parenthesis"; |
2405 | goto error; | 2427 | goto error; |
@@ -2409,7 +2431,7 @@ GNUNET_REGEX_construct_nfa (const char *regex, const size_t len) | |||
2409 | for (; altcount > 0; altcount--) | 2431 | for (; altcount > 0; altcount--) |
2410 | nfa_add_alternation (&ctx); | 2432 | nfa_add_alternation (&ctx); |
2411 | 2433 | ||
2412 | GNUNET_free_non_null (p); | 2434 | GNUNET_array_grow (p, psize, 0); |
2413 | 2435 | ||
2414 | nfa = ctx.stack_tail; | 2436 | nfa = ctx.stack_tail; |
2415 | GNUNET_CONTAINER_DLL_remove (ctx.stack_head, ctx.stack_tail, nfa); | 2437 | GNUNET_CONTAINER_DLL_remove (ctx.stack_head, ctx.stack_tail, nfa); |
@@ -2449,6 +2471,7 @@ error: | |||
2449 | 2471 | ||
2450 | 2472 | ||
2451 | static unsigned long long loopy; | 2473 | static unsigned long long loopy; |
2474 | static unsigned long long doopy; | ||
2452 | 2475 | ||
2453 | /** | 2476 | /** |
2454 | * Create DFA states based on given 'nfa' and starting with 'dfa_state'. | 2477 | * Create DFA states based on given 'nfa' and starting with 'dfa_state'. |
@@ -2483,6 +2506,7 @@ construct_dfa_states (struct GNUNET_REGEX_Context *ctx, | |||
2483 | 2506 | ||
2484 | /* FIXME: this O(n) loop can be done in O(1) with a hash map */ | 2507 | /* FIXME: this O(n) loop can be done in O(1) with a hash map */ |
2485 | state_contains = NULL; | 2508 | state_contains = NULL; |
2509 | doopy++; | ||
2486 | for (state_iter = dfa->states_head; NULL != state_iter; | 2510 | for (state_iter = dfa->states_head; NULL != state_iter; |
2487 | state_iter = state_iter->next) | 2511 | state_iter = state_iter->next) |
2488 | { | 2512 | { |
@@ -2559,7 +2583,7 @@ GNUNET_REGEX_construct_dfa (const char *regex, const size_t len, | |||
2559 | 2583 | ||
2560 | fprintf (stderr, "D"); | 2584 | fprintf (stderr, "D"); |
2561 | construct_dfa_states (&ctx, nfa, dfa, dfa->start); | 2585 | construct_dfa_states (&ctx, nfa, dfa, dfa->start); |
2562 | // fprintf (stderr, "D-X: %llu\n", loopy); | 2586 | fprintf (stderr, "D-X: %llu/%llu\n", loopy, doopy); |
2563 | GNUNET_REGEX_automaton_destroy (nfa); | 2587 | GNUNET_REGEX_automaton_destroy (nfa); |
2564 | 2588 | ||
2565 | /* Minimize DFA */ | 2589 | /* Minimize DFA */ |
@@ -2567,7 +2591,6 @@ GNUNET_REGEX_construct_dfa (const char *regex, const size_t len, | |||
2567 | dfa_minimize (&ctx, dfa); | 2591 | dfa_minimize (&ctx, dfa); |
2568 | 2592 | ||
2569 | /* Create proofs and hashes for all states */ | 2593 | /* Create proofs and hashes for all states */ |
2570 | // fprintf (stderr, "P"); | ||
2571 | automaton_create_proofs (dfa); | 2594 | automaton_create_proofs (dfa); |
2572 | 2595 | ||
2573 | /* Compress linear DFA paths */ | 2596 | /* Compress linear DFA paths */ |
@@ -2693,7 +2716,7 @@ evaluate_nfa (struct GNUNET_REGEX_Automaton *a, const char *string) | |||
2693 | state_set_clear (&new_sset); | 2716 | state_set_clear (&new_sset); |
2694 | } | 2717 | } |
2695 | 2718 | ||
2696 | for (i = 0; i < sset.len; i++) | 2719 | for (i = 0; i < sset.off; i++) |
2697 | { | 2720 | { |
2698 | s = sset.states[i]; | 2721 | s = sset.states[i]; |
2699 | if ( (NULL != s) && (s->accepting) ) | 2722 | if ( (NULL != s) && (s->accepting) ) |
diff --git a/src/regex/regex_internal.h b/src/regex/regex_internal.h index fe76d1537..00badc54d 100644 --- a/src/regex/regex_internal.h +++ b/src/regex/regex_internal.h | |||
@@ -98,9 +98,14 @@ struct GNUNET_REGEX_StateSet | |||
98 | struct GNUNET_REGEX_State **states; | 98 | struct GNUNET_REGEX_State **states; |
99 | 99 | ||
100 | /** | 100 | /** |
101 | * Number of entries in *use* in the 'states' array. | ||
102 | */ | ||
103 | unsigned int off; | ||
104 | |||
105 | /** | ||
101 | * Length of the 'states' array. | 106 | * Length of the 'states' array. |
102 | */ | 107 | */ |
103 | unsigned int len; | 108 | unsigned int size; |
104 | }; | 109 | }; |
105 | 110 | ||
106 | 111 | ||