diff options
-rw-r--r-- | src/regex/regex.c | 126 | ||||
-rw-r--r-- | src/regex/regex_internal.h | 25 |
2 files changed, 73 insertions, 78 deletions
diff --git a/src/regex/regex.c b/src/regex/regex.c index 08bd7e608..73ec5b14a 100644 --- a/src/regex/regex.c +++ b/src/regex/regex.c | |||
@@ -36,21 +36,6 @@ | |||
36 | */ | 36 | */ |
37 | #define REGEX_DEBUG_DFA GNUNET_NO | 37 | #define REGEX_DEBUG_DFA GNUNET_NO |
38 | 38 | ||
39 | /** | ||
40 | * Set of states. | ||
41 | */ | ||
42 | struct GNUNET_REGEX_StateSet | ||
43 | { | ||
44 | /** | ||
45 | * Array of states. | ||
46 | */ | ||
47 | struct GNUNET_REGEX_State **states; | ||
48 | |||
49 | /** | ||
50 | * Length of the 'states' array. | ||
51 | */ | ||
52 | unsigned int len; | ||
53 | }; | ||
54 | 39 | ||
55 | /** | 40 | /** |
56 | * Set of states using MDLL API. | 41 | * Set of states using MDLL API. |
@@ -266,12 +251,7 @@ state_set_compare (struct GNUNET_REGEX_StateSet *sset1, | |||
266 | static void | 251 | static void |
267 | state_set_clear (struct GNUNET_REGEX_StateSet *set) | 252 | state_set_clear (struct GNUNET_REGEX_StateSet *set) |
268 | { | 253 | { |
269 | if (NULL == set) | 254 | GNUNET_array_grow (set->states, set->len, 0); |
270 | return; | ||
271 | |||
272 | if (set->len > 0) | ||
273 | GNUNET_array_grow (set->states, set->len, 0); | ||
274 | GNUNET_free (set); | ||
275 | } | 255 | } |
276 | 256 | ||
277 | 257 | ||
@@ -312,8 +292,7 @@ automaton_destroy_state (struct GNUNET_REGEX_State *s) | |||
312 | 292 | ||
313 | GNUNET_free_non_null (s->name); | 293 | GNUNET_free_non_null (s->name); |
314 | GNUNET_free_non_null (s->proof); | 294 | GNUNET_free_non_null (s->proof); |
315 | state_set_clear (s->nfa_set); | 295 | state_set_clear (&s->nfa_set); |
316 | |||
317 | for (t = s->transitions_head; NULL != t; t = next_t) | 296 | for (t = s->transitions_head; NULL != t; t = next_t) |
318 | { | 297 | { |
319 | next_t = t->next; | 298 | next_t = t->next; |
@@ -1269,7 +1248,7 @@ dfa_state_create (struct GNUNET_REGEX_Context *ctx, | |||
1269 | return s; | 1248 | return s; |
1270 | } | 1249 | } |
1271 | 1250 | ||
1272 | s->nfa_set = nfa_states; | 1251 | s->nfa_set = *nfa_states; |
1273 | 1252 | ||
1274 | if (nfa_states->len < 1) | 1253 | if (nfa_states->len < 1) |
1275 | return s; | 1254 | return s; |
@@ -1300,6 +1279,7 @@ dfa_state_create (struct GNUNET_REGEX_Context *ctx, | |||
1300 | pos[-1] = '}'; | 1279 | pos[-1] = '}'; |
1301 | s->name = GNUNET_realloc (s->name, strlen (s->name) + 1); | 1280 | s->name = GNUNET_realloc (s->name, strlen (s->name) + 1); |
1302 | 1281 | ||
1282 | memset (nfa_states, 0, sizeof (struct GNUNET_REGEX_StateSet)); | ||
1303 | return s; | 1283 | return s; |
1304 | } | 1284 | } |
1305 | 1285 | ||
@@ -1938,28 +1918,27 @@ nfa_state_create (struct GNUNET_REGEX_Context *ctx, int accepting) | |||
1938 | /** | 1918 | /** |
1939 | * Calculates the NFA closure set for the given state. | 1919 | * Calculates the NFA closure set for the given state. |
1940 | * | 1920 | * |
1921 | * @param cls set to sorted nfa closure on 'label' (epsilon closure if 'label' is NULL) | ||
1941 | * @param nfa the NFA containing 's' | 1922 | * @param nfa the NFA containing 's' |
1942 | * @param s starting point state | 1923 | * @param s starting point state |
1943 | * @param label transitioning label on which to base the closure on, | 1924 | * @param label transitioning label on which to base the closure on, |
1944 | * pass NULL for epsilon transition | 1925 | * pass NULL for epsilon transition |
1945 | * | ||
1946 | * @return sorted nfa closure on 'label' (epsilon closure if 'label' is NULL) | ||
1947 | */ | 1926 | */ |
1948 | static struct GNUNET_REGEX_StateSet * | 1927 | static void |
1949 | nfa_closure_create (struct GNUNET_REGEX_Automaton *nfa, | 1928 | nfa_closure_create (struct GNUNET_REGEX_StateSet *cls, |
1929 | struct GNUNET_REGEX_Automaton *nfa, | ||
1950 | struct GNUNET_REGEX_State *s, const char *label) | 1930 | struct GNUNET_REGEX_State *s, const char *label) |
1951 | { | 1931 | { |
1952 | unsigned int i; | 1932 | unsigned int i; |
1953 | struct GNUNET_REGEX_StateSet *cls; | ||
1954 | struct GNUNET_REGEX_StateSet_MDLL cls_stack; | 1933 | struct GNUNET_REGEX_StateSet_MDLL cls_stack; |
1955 | struct GNUNET_REGEX_State *clsstate; | 1934 | struct GNUNET_REGEX_State *clsstate; |
1956 | struct GNUNET_REGEX_State *currentstate; | 1935 | struct GNUNET_REGEX_State *currentstate; |
1957 | struct GNUNET_REGEX_Transition *ctran; | 1936 | struct GNUNET_REGEX_Transition *ctran; |
1958 | 1937 | ||
1938 | memset (cls, 0, sizeof (struct GNUNET_REGEX_StateSet)); | ||
1959 | if (NULL == s) | 1939 | if (NULL == s) |
1960 | return NULL; | 1940 | return; |
1961 | 1941 | ||
1962 | cls = GNUNET_malloc (sizeof (struct GNUNET_REGEX_StateSet)); | ||
1963 | cls_stack.head = NULL; | 1942 | cls_stack.head = NULL; |
1964 | cls_stack.tail = NULL; | 1943 | cls_stack.tail = NULL; |
1965 | 1944 | ||
@@ -2002,65 +1981,58 @@ nfa_closure_create (struct GNUNET_REGEX_Automaton *nfa, | |||
2002 | if (cls->len > 1) | 1981 | if (cls->len > 1) |
2003 | qsort (cls->states, cls->len, sizeof (struct GNUNET_REGEX_State *), | 1982 | qsort (cls->states, cls->len, sizeof (struct GNUNET_REGEX_State *), |
2004 | &state_compare); | 1983 | &state_compare); |
2005 | |||
2006 | return cls; | ||
2007 | } | 1984 | } |
2008 | 1985 | ||
2009 | 1986 | ||
2010 | /** | 1987 | /** |
2011 | * Calculates the closure set for the given set of states. | 1988 | * Calculates the closure set for the given set of states. |
2012 | * | 1989 | * |
1990 | * @param ret set to sorted nfa closure on 'label' (epsilon closure if 'label' is NULL) | ||
2013 | * @param nfa the NFA containing 's' | 1991 | * @param nfa the NFA containing 's' |
2014 | * @param states list of states on which to base the closure on | 1992 | * @param states list of states on which to base the closure on |
2015 | * @param label transitioning label for which to base the closure on, | 1993 | * @param label transitioning label for which to base the closure on, |
2016 | * pass NULL for epsilon transition | 1994 | * pass NULL for epsilon transition |
2017 | * | ||
2018 | * @return sorted nfa closure on 'label' (epsilon closure if 'label' is NULL) | ||
2019 | */ | 1995 | */ |
2020 | static struct GNUNET_REGEX_StateSet * | 1996 | static void |
2021 | nfa_closure_set_create (struct GNUNET_REGEX_Automaton *nfa, | 1997 | nfa_closure_set_create (struct GNUNET_REGEX_StateSet *ret, |
1998 | struct GNUNET_REGEX_Automaton *nfa, | ||
2022 | struct GNUNET_REGEX_StateSet *states, const char *label) | 1999 | struct GNUNET_REGEX_StateSet *states, const char *label) |
2023 | { | 2000 | { |
2024 | struct GNUNET_REGEX_State *s; | 2001 | struct GNUNET_REGEX_State *s; |
2025 | struct GNUNET_REGEX_StateSet *sset; | 2002 | struct GNUNET_REGEX_StateSet sset; |
2026 | struct GNUNET_REGEX_StateSet *cls; | ||
2027 | unsigned int i; | 2003 | unsigned int i; |
2028 | unsigned int j; | 2004 | unsigned int j; |
2029 | unsigned int k; | 2005 | unsigned int k; |
2030 | unsigned int contains; | 2006 | unsigned int contains; |
2031 | 2007 | ||
2008 | memset (ret, 0, sizeof (struct GNUNET_REGEX_StateSet)); | ||
2032 | if (NULL == states) | 2009 | if (NULL == states) |
2033 | return NULL; | 2010 | return; |
2034 | |||
2035 | cls = GNUNET_malloc (sizeof (struct GNUNET_REGEX_StateSet)); | ||
2036 | 2011 | ||
2037 | for (i = 0; i < states->len; i++) | 2012 | for (i = 0; i < states->len; i++) |
2038 | { | 2013 | { |
2039 | s = states->states[i]; | 2014 | s = states->states[i]; |
2040 | sset = nfa_closure_create (nfa, s, label); | 2015 | nfa_closure_create (&sset, nfa, s, label); |
2041 | 2016 | for (j = 0; j < sset.len; j++) | |
2042 | for (j = 0; j < sset->len; j++) | ||
2043 | { | 2017 | { |
2044 | contains = 0; | 2018 | contains = 0; |
2045 | for (k = 0; k < cls->len; k++) | 2019 | for (k = 0; k < ret->len; k++) |
2046 | { | 2020 | { |
2047 | if (sset->states[j]->id == cls->states[k]->id) | 2021 | if (sset.states[j]->id == ret->states[k]->id) |
2048 | { | 2022 | { |
2049 | contains = 1; | 2023 | contains = 1; |
2050 | break; | 2024 | break; |
2051 | } | 2025 | } |
2052 | } | 2026 | } |
2053 | if (!contains) | 2027 | if (!contains) |
2054 | GNUNET_array_append (cls->states, cls->len, sset->states[j]); | 2028 | GNUNET_array_append (ret->states, ret->len, sset.states[j]); |
2055 | } | 2029 | } |
2056 | state_set_clear (sset); | 2030 | state_set_clear (&sset); |
2057 | } | 2031 | } |
2058 | 2032 | ||
2059 | if (cls->len > 1) | 2033 | if (ret->len > 1) |
2060 | qsort (cls->states, cls->len, sizeof (struct GNUNET_REGEX_State *), | 2034 | qsort (ret->states, ret->len, sizeof (struct GNUNET_REGEX_State *), |
2061 | state_compare); | 2035 | state_compare); |
2062 | |||
2063 | return cls; | ||
2064 | } | 2036 | } |
2065 | 2037 | ||
2066 | 2038 | ||
@@ -2497,17 +2469,17 @@ construct_dfa_states (struct GNUNET_REGEX_Context *ctx, | |||
2497 | struct GNUNET_REGEX_State *state_iter; | 2469 | struct GNUNET_REGEX_State *state_iter; |
2498 | struct GNUNET_REGEX_State *new_dfa_state; | 2470 | struct GNUNET_REGEX_State *new_dfa_state; |
2499 | struct GNUNET_REGEX_State *state_contains; | 2471 | struct GNUNET_REGEX_State *state_contains; |
2500 | struct GNUNET_REGEX_StateSet *tmp; | 2472 | struct GNUNET_REGEX_StateSet tmp; |
2501 | struct GNUNET_REGEX_StateSet *nfa_set; | 2473 | struct GNUNET_REGEX_StateSet nfa_set; |
2502 | 2474 | ||
2503 | for (ctran = dfa_state->transitions_head; NULL != ctran; ctran = ctran->next) | 2475 | for (ctran = dfa_state->transitions_head; NULL != ctran; ctran = ctran->next) |
2504 | { | 2476 | { |
2505 | if (NULL == ctran->label || NULL != ctran->to_state) | 2477 | if (NULL == ctran->label || NULL != ctran->to_state) |
2506 | continue; | 2478 | continue; |
2507 | 2479 | ||
2508 | tmp = nfa_closure_set_create (nfa, dfa_state->nfa_set, ctran->label); | 2480 | nfa_closure_set_create (&tmp, nfa, &dfa_state->nfa_set, ctran->label); |
2509 | nfa_set = nfa_closure_set_create (nfa, tmp, NULL); | 2481 | nfa_closure_set_create (&nfa_set, nfa, &tmp, NULL); |
2510 | state_set_clear (tmp); | 2482 | state_set_clear (&tmp); |
2511 | 2483 | ||
2512 | /* FIXME: this O(n) loop can be done in O(1) with a hash map */ | 2484 | /* FIXME: this O(n) loop can be done in O(1) with a hash map */ |
2513 | state_contains = NULL; | 2485 | state_contains = NULL; |
@@ -2515,7 +2487,7 @@ construct_dfa_states (struct GNUNET_REGEX_Context *ctx, | |||
2515 | state_iter = state_iter->next) | 2487 | state_iter = state_iter->next) |
2516 | { | 2488 | { |
2517 | loopy++; | 2489 | loopy++; |
2518 | if (0 == state_set_compare (state_iter->nfa_set, nfa_set)) | 2490 | if (0 == state_set_compare (&state_iter->nfa_set, &nfa_set)) |
2519 | { | 2491 | { |
2520 | state_contains = state_iter; | 2492 | state_contains = state_iter; |
2521 | break; | 2493 | break; |
@@ -2524,7 +2496,7 @@ construct_dfa_states (struct GNUNET_REGEX_Context *ctx, | |||
2524 | loopy--; | 2496 | loopy--; |
2525 | if (NULL == state_contains) | 2497 | if (NULL == state_contains) |
2526 | { | 2498 | { |
2527 | new_dfa_state = dfa_state_create (ctx, nfa_set); | 2499 | new_dfa_state = dfa_state_create (ctx, &nfa_set); |
2528 | automaton_add_state (dfa, new_dfa_state); | 2500 | automaton_add_state (dfa, new_dfa_state); |
2529 | ctran->to_state = new_dfa_state; | 2501 | ctran->to_state = new_dfa_state; |
2530 | construct_dfa_states (ctx, nfa, dfa, new_dfa_state); | 2502 | construct_dfa_states (ctx, nfa, dfa, new_dfa_state); |
@@ -2561,7 +2533,7 @@ GNUNET_REGEX_construct_dfa (const char *regex, const size_t len, | |||
2561 | struct GNUNET_REGEX_Context ctx; | 2533 | struct GNUNET_REGEX_Context ctx; |
2562 | struct GNUNET_REGEX_Automaton *dfa; | 2534 | struct GNUNET_REGEX_Automaton *dfa; |
2563 | struct GNUNET_REGEX_Automaton *nfa; | 2535 | struct GNUNET_REGEX_Automaton *nfa; |
2564 | struct GNUNET_REGEX_StateSet *nfa_start_eps_cls; | 2536 | struct GNUNET_REGEX_StateSet nfa_start_eps_cls; |
2565 | 2537 | ||
2566 | GNUNET_REGEX_context_init (&ctx); | 2538 | GNUNET_REGEX_context_init (&ctx); |
2567 | 2539 | ||
@@ -2581,13 +2553,13 @@ GNUNET_REGEX_construct_dfa (const char *regex, const size_t len, | |||
2581 | dfa->regex = GNUNET_strdup (regex); | 2553 | dfa->regex = GNUNET_strdup (regex); |
2582 | 2554 | ||
2583 | /* Create DFA start state from epsilon closure */ | 2555 | /* Create DFA start state from epsilon closure */ |
2584 | nfa_start_eps_cls = nfa_closure_create (nfa, nfa->start, NULL); | 2556 | nfa_closure_create (&nfa_start_eps_cls, nfa, nfa->start, NULL); |
2585 | dfa->start = dfa_state_create (&ctx, nfa_start_eps_cls); | 2557 | dfa->start = dfa_state_create (&ctx, &nfa_start_eps_cls); |
2586 | automaton_add_state (dfa, dfa->start); | 2558 | automaton_add_state (dfa, dfa->start); |
2587 | 2559 | ||
2588 | fprintf (stderr, "D"); | 2560 | fprintf (stderr, "D"); |
2589 | construct_dfa_states (&ctx, nfa, dfa, dfa->start); | 2561 | construct_dfa_states (&ctx, nfa, dfa, dfa->start); |
2590 | fprintf (stderr, "D-X: %llu\n", loopy); | 2562 | // fprintf (stderr, "D-X: %llu\n", loopy); |
2591 | GNUNET_REGEX_automaton_destroy (nfa); | 2563 | GNUNET_REGEX_automaton_destroy (nfa); |
2592 | 2564 | ||
2593 | /* Minimize DFA */ | 2565 | /* Minimize DFA */ |
@@ -2595,8 +2567,8 @@ GNUNET_REGEX_construct_dfa (const char *regex, const size_t len, | |||
2595 | dfa_minimize (&ctx, dfa); | 2567 | dfa_minimize (&ctx, dfa); |
2596 | 2568 | ||
2597 | /* Create proofs and hashes for all states */ | 2569 | /* Create proofs and hashes for all states */ |
2598 | fprintf (stderr, "P"); | 2570 | // fprintf (stderr, "P"); |
2599 | // automaton_create_proofs (dfa); | 2571 | automaton_create_proofs (dfa); |
2600 | 2572 | ||
2601 | /* Compress linear DFA paths */ | 2573 | /* Compress linear DFA paths */ |
2602 | if (1 != max_path_len) | 2574 | if (1 != max_path_len) |
@@ -2692,8 +2664,8 @@ evaluate_nfa (struct GNUNET_REGEX_Automaton *a, const char *string) | |||
2692 | const char *strp; | 2664 | const char *strp; |
2693 | char str[2]; | 2665 | char str[2]; |
2694 | struct GNUNET_REGEX_State *s; | 2666 | struct GNUNET_REGEX_State *s; |
2695 | struct GNUNET_REGEX_StateSet *sset; | 2667 | struct GNUNET_REGEX_StateSet sset; |
2696 | struct GNUNET_REGEX_StateSet *new_sset; | 2668 | struct GNUNET_REGEX_StateSet new_sset; |
2697 | unsigned int i; | 2669 | unsigned int i; |
2698 | int result; | 2670 | int result; |
2699 | 2671 | ||
@@ -2709,29 +2681,29 @@ evaluate_nfa (struct GNUNET_REGEX_Automaton *a, const char *string) | |||
2709 | return 0; | 2681 | return 0; |
2710 | 2682 | ||
2711 | result = 1; | 2683 | result = 1; |
2712 | sset = nfa_closure_create (a, a->start, 0); | 2684 | nfa_closure_create (&sset, a, a->start, NULL); |
2713 | 2685 | ||
2714 | str[1] = '\0'; | 2686 | str[1] = '\0'; |
2715 | for (strp = string; NULL != strp && *strp; strp++) | 2687 | for (strp = string; NULL != strp && *strp; strp++) |
2716 | { | 2688 | { |
2717 | str[0] = *strp; | 2689 | str[0] = *strp; |
2718 | new_sset = nfa_closure_set_create (a, sset, str); | 2690 | nfa_closure_set_create (&new_sset, a, &sset, str); |
2719 | state_set_clear (sset); | 2691 | state_set_clear (&sset); |
2720 | sset = nfa_closure_set_create (a, new_sset, 0); | 2692 | nfa_closure_set_create (&sset, a, &new_sset, 0); |
2721 | state_set_clear (new_sset); | 2693 | state_set_clear (&new_sset); |
2722 | } | 2694 | } |
2723 | 2695 | ||
2724 | for (i = 0; i < sset->len; i++) | 2696 | for (i = 0; i < sset.len; i++) |
2725 | { | 2697 | { |
2726 | s = sset->states[i]; | 2698 | s = sset.states[i]; |
2727 | if (NULL != s && s->accepting) | 2699 | if ( (NULL != s) && (s->accepting) ) |
2728 | { | 2700 | { |
2729 | result = 0; | 2701 | result = 0; |
2730 | break; | 2702 | break; |
2731 | } | 2703 | } |
2732 | } | 2704 | } |
2733 | 2705 | ||
2734 | state_set_clear (sset); | 2706 | state_set_clear (&sset); |
2735 | return result; | 2707 | return result; |
2736 | } | 2708 | } |
2737 | 2709 | ||
diff --git a/src/regex/regex_internal.h b/src/regex/regex_internal.h index c25b938c3..fe76d1537 100644 --- a/src/regex/regex_internal.h +++ b/src/regex/regex_internal.h | |||
@@ -84,6 +84,29 @@ struct GNUNET_REGEX_Transition | |||
84 | /** | 84 | /** |
85 | * A state. Can be used in DFA and NFA automatons. | 85 | * A state. Can be used in DFA and NFA automatons. |
86 | */ | 86 | */ |
87 | struct GNUNET_REGEX_State; | ||
88 | |||
89 | |||
90 | /** | ||
91 | * Set of states. | ||
92 | */ | ||
93 | struct GNUNET_REGEX_StateSet | ||
94 | { | ||
95 | /** | ||
96 | * Array of states. | ||
97 | */ | ||
98 | struct GNUNET_REGEX_State **states; | ||
99 | |||
100 | /** | ||
101 | * Length of the 'states' array. | ||
102 | */ | ||
103 | unsigned int len; | ||
104 | }; | ||
105 | |||
106 | |||
107 | /** | ||
108 | * A state. Can be used in DFA and NFA automatons. | ||
109 | */ | ||
87 | struct GNUNET_REGEX_State | 110 | struct GNUNET_REGEX_State |
88 | { | 111 | { |
89 | /** | 112 | /** |
@@ -210,7 +233,7 @@ struct GNUNET_REGEX_State | |||
210 | * Set of states on which this state is based on. Used when creating a DFA out | 233 | * Set of states on which this state is based on. Used when creating a DFA out |
211 | * of several NFA states. | 234 | * of several NFA states. |
212 | */ | 235 | */ |
213 | struct GNUNET_REGEX_StateSet *nfa_set; | 236 | struct GNUNET_REGEX_StateSet nfa_set; |
214 | }; | 237 | }; |
215 | 238 | ||
216 | 239 | ||