aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/regex/regex.c126
-rw-r--r--src/regex/regex_internal.h25
2 files changed, 73 insertions, 78 deletions
diff --git a/src/regex/regex.c b/src/regex/regex.c
index 08bd7e608..73ec5b14a 100644
--- a/src/regex/regex.c
+++ b/src/regex/regex.c
@@ -36,21 +36,6 @@
36 */ 36 */
37#define REGEX_DEBUG_DFA GNUNET_NO 37#define REGEX_DEBUG_DFA GNUNET_NO
38 38
39/**
40 * Set of states.
41 */
42struct GNUNET_REGEX_StateSet
43{
44 /**
45 * Array of states.
46 */
47 struct GNUNET_REGEX_State **states;
48
49 /**
50 * Length of the 'states' array.
51 */
52 unsigned int len;
53};
54 39
55/** 40/**
56 * Set of states using MDLL API. 41 * Set of states using MDLL API.
@@ -266,12 +251,7 @@ state_set_compare (struct GNUNET_REGEX_StateSet *sset1,
266static void 251static void
267state_set_clear (struct GNUNET_REGEX_StateSet *set) 252state_set_clear (struct GNUNET_REGEX_StateSet *set)
268{ 253{
269 if (NULL == set) 254 GNUNET_array_grow (set->states, set->len, 0);
270 return;
271
272 if (set->len > 0)
273 GNUNET_array_grow (set->states, set->len, 0);
274 GNUNET_free (set);
275} 255}
276 256
277 257
@@ -312,8 +292,7 @@ automaton_destroy_state (struct GNUNET_REGEX_State *s)
312 292
313 GNUNET_free_non_null (s->name); 293 GNUNET_free_non_null (s->name);
314 GNUNET_free_non_null (s->proof); 294 GNUNET_free_non_null (s->proof);
315 state_set_clear (s->nfa_set); 295 state_set_clear (&s->nfa_set);
316
317 for (t = s->transitions_head; NULL != t; t = next_t) 296 for (t = s->transitions_head; NULL != t; t = next_t)
318 { 297 {
319 next_t = t->next; 298 next_t = t->next;
@@ -1269,7 +1248,7 @@ dfa_state_create (struct GNUNET_REGEX_Context *ctx,
1269 return s; 1248 return s;
1270 } 1249 }
1271 1250
1272 s->nfa_set = nfa_states; 1251 s->nfa_set = *nfa_states;
1273 1252
1274 if (nfa_states->len < 1) 1253 if (nfa_states->len < 1)
1275 return s; 1254 return s;
@@ -1300,6 +1279,7 @@ dfa_state_create (struct GNUNET_REGEX_Context *ctx,
1300 pos[-1] = '}'; 1279 pos[-1] = '}';
1301 s->name = GNUNET_realloc (s->name, strlen (s->name) + 1); 1280 s->name = GNUNET_realloc (s->name, strlen (s->name) + 1);
1302 1281
1282 memset (nfa_states, 0, sizeof (struct GNUNET_REGEX_StateSet));
1303 return s; 1283 return s;
1304} 1284}
1305 1285
@@ -1938,28 +1918,27 @@ nfa_state_create (struct GNUNET_REGEX_Context *ctx, int accepting)
1938/** 1918/**
1939 * Calculates the NFA closure set for the given state. 1919 * Calculates the NFA closure set for the given state.
1940 * 1920 *
1921 * @param cls set to sorted nfa closure on 'label' (epsilon closure if 'label' is NULL)
1941 * @param nfa the NFA containing 's' 1922 * @param nfa the NFA containing 's'
1942 * @param s starting point state 1923 * @param s starting point state
1943 * @param label transitioning label on which to base the closure on, 1924 * @param label transitioning label on which to base the closure on,
1944 * pass NULL for epsilon transition 1925 * pass NULL for epsilon transition
1945 *
1946 * @return sorted nfa closure on 'label' (epsilon closure if 'label' is NULL)
1947 */ 1926 */
1948static struct GNUNET_REGEX_StateSet * 1927static void
1949nfa_closure_create (struct GNUNET_REGEX_Automaton *nfa, 1928nfa_closure_create (struct GNUNET_REGEX_StateSet *cls,
1929 struct GNUNET_REGEX_Automaton *nfa,
1950 struct GNUNET_REGEX_State *s, const char *label) 1930 struct GNUNET_REGEX_State *s, const char *label)
1951{ 1931{
1952 unsigned int i; 1932 unsigned int i;
1953 struct GNUNET_REGEX_StateSet *cls;
1954 struct GNUNET_REGEX_StateSet_MDLL cls_stack; 1933 struct GNUNET_REGEX_StateSet_MDLL cls_stack;
1955 struct GNUNET_REGEX_State *clsstate; 1934 struct GNUNET_REGEX_State *clsstate;
1956 struct GNUNET_REGEX_State *currentstate; 1935 struct GNUNET_REGEX_State *currentstate;
1957 struct GNUNET_REGEX_Transition *ctran; 1936 struct GNUNET_REGEX_Transition *ctran;
1958 1937
1938 memset (cls, 0, sizeof (struct GNUNET_REGEX_StateSet));
1959 if (NULL == s) 1939 if (NULL == s)
1960 return NULL; 1940 return;
1961 1941
1962 cls = GNUNET_malloc (sizeof (struct GNUNET_REGEX_StateSet));
1963 cls_stack.head = NULL; 1942 cls_stack.head = NULL;
1964 cls_stack.tail = NULL; 1943 cls_stack.tail = NULL;
1965 1944
@@ -2002,65 +1981,58 @@ nfa_closure_create (struct GNUNET_REGEX_Automaton *nfa,
2002 if (cls->len > 1) 1981 if (cls->len > 1)
2003 qsort (cls->states, cls->len, sizeof (struct GNUNET_REGEX_State *), 1982 qsort (cls->states, cls->len, sizeof (struct GNUNET_REGEX_State *),
2004 &state_compare); 1983 &state_compare);
2005
2006 return cls;
2007} 1984}
2008 1985
2009 1986
2010/** 1987/**
2011 * Calculates the closure set for the given set of states. 1988 * Calculates the closure set for the given set of states.
2012 * 1989 *
1990 * @param ret set to sorted nfa closure on 'label' (epsilon closure if 'label' is NULL)
2013 * @param nfa the NFA containing 's' 1991 * @param nfa the NFA containing 's'
2014 * @param states list of states on which to base the closure on 1992 * @param states list of states on which to base the closure on
2015 * @param label transitioning label for which to base the closure on, 1993 * @param label transitioning label for which to base the closure on,
2016 * pass NULL for epsilon transition 1994 * pass NULL for epsilon transition
2017 *
2018 * @return sorted nfa closure on 'label' (epsilon closure if 'label' is NULL)
2019 */ 1995 */
2020static struct GNUNET_REGEX_StateSet * 1996static void
2021nfa_closure_set_create (struct GNUNET_REGEX_Automaton *nfa, 1997nfa_closure_set_create (struct GNUNET_REGEX_StateSet *ret,
1998 struct GNUNET_REGEX_Automaton *nfa,
2022 struct GNUNET_REGEX_StateSet *states, const char *label) 1999 struct GNUNET_REGEX_StateSet *states, const char *label)
2023{ 2000{
2024 struct GNUNET_REGEX_State *s; 2001 struct GNUNET_REGEX_State *s;
2025 struct GNUNET_REGEX_StateSet *sset; 2002 struct GNUNET_REGEX_StateSet sset;
2026 struct GNUNET_REGEX_StateSet *cls;
2027 unsigned int i; 2003 unsigned int i;
2028 unsigned int j; 2004 unsigned int j;
2029 unsigned int k; 2005 unsigned int k;
2030 unsigned int contains; 2006 unsigned int contains;
2031 2007
2008 memset (ret, 0, sizeof (struct GNUNET_REGEX_StateSet));
2032 if (NULL == states) 2009 if (NULL == states)
2033 return NULL; 2010 return;
2034
2035 cls = GNUNET_malloc (sizeof (struct GNUNET_REGEX_StateSet));
2036 2011
2037 for (i = 0; i < states->len; i++) 2012 for (i = 0; i < states->len; i++)
2038 { 2013 {
2039 s = states->states[i]; 2014 s = states->states[i];
2040 sset = nfa_closure_create (nfa, s, label); 2015 nfa_closure_create (&sset, nfa, s, label);
2041 2016 for (j = 0; j < sset.len; j++)
2042 for (j = 0; j < sset->len; j++)
2043 { 2017 {
2044 contains = 0; 2018 contains = 0;
2045 for (k = 0; k < cls->len; k++) 2019 for (k = 0; k < ret->len; k++)
2046 { 2020 {
2047 if (sset->states[j]->id == cls->states[k]->id) 2021 if (sset.states[j]->id == ret->states[k]->id)
2048 { 2022 {
2049 contains = 1; 2023 contains = 1;
2050 break; 2024 break;
2051 } 2025 }
2052 } 2026 }
2053 if (!contains) 2027 if (!contains)
2054 GNUNET_array_append (cls->states, cls->len, sset->states[j]); 2028 GNUNET_array_append (ret->states, ret->len, sset.states[j]);
2055 } 2029 }
2056 state_set_clear (sset); 2030 state_set_clear (&sset);
2057 } 2031 }
2058 2032
2059 if (cls->len > 1) 2033 if (ret->len > 1)
2060 qsort (cls->states, cls->len, sizeof (struct GNUNET_REGEX_State *), 2034 qsort (ret->states, ret->len, sizeof (struct GNUNET_REGEX_State *),
2061 state_compare); 2035 state_compare);
2062
2063 return cls;
2064} 2036}
2065 2037
2066 2038
@@ -2497,17 +2469,17 @@ construct_dfa_states (struct GNUNET_REGEX_Context *ctx,
2497 struct GNUNET_REGEX_State *state_iter; 2469 struct GNUNET_REGEX_State *state_iter;
2498 struct GNUNET_REGEX_State *new_dfa_state; 2470 struct GNUNET_REGEX_State *new_dfa_state;
2499 struct GNUNET_REGEX_State *state_contains; 2471 struct GNUNET_REGEX_State *state_contains;
2500 struct GNUNET_REGEX_StateSet *tmp; 2472 struct GNUNET_REGEX_StateSet tmp;
2501 struct GNUNET_REGEX_StateSet *nfa_set; 2473 struct GNUNET_REGEX_StateSet nfa_set;
2502 2474
2503 for (ctran = dfa_state->transitions_head; NULL != ctran; ctran = ctran->next) 2475 for (ctran = dfa_state->transitions_head; NULL != ctran; ctran = ctran->next)
2504 { 2476 {
2505 if (NULL == ctran->label || NULL != ctran->to_state) 2477 if (NULL == ctran->label || NULL != ctran->to_state)
2506 continue; 2478 continue;
2507 2479
2508 tmp = nfa_closure_set_create (nfa, dfa_state->nfa_set, ctran->label); 2480 nfa_closure_set_create (&tmp, nfa, &dfa_state->nfa_set, ctran->label);
2509 nfa_set = nfa_closure_set_create (nfa, tmp, NULL); 2481 nfa_closure_set_create (&nfa_set, nfa, &tmp, NULL);
2510 state_set_clear (tmp); 2482 state_set_clear (&tmp);
2511 2483
2512 /* FIXME: this O(n) loop can be done in O(1) with a hash map */ 2484 /* FIXME: this O(n) loop can be done in O(1) with a hash map */
2513 state_contains = NULL; 2485 state_contains = NULL;
@@ -2515,7 +2487,7 @@ construct_dfa_states (struct GNUNET_REGEX_Context *ctx,
2515 state_iter = state_iter->next) 2487 state_iter = state_iter->next)
2516 { 2488 {
2517 loopy++; 2489 loopy++;
2518 if (0 == state_set_compare (state_iter->nfa_set, nfa_set)) 2490 if (0 == state_set_compare (&state_iter->nfa_set, &nfa_set))
2519 { 2491 {
2520 state_contains = state_iter; 2492 state_contains = state_iter;
2521 break; 2493 break;
@@ -2524,7 +2496,7 @@ construct_dfa_states (struct GNUNET_REGEX_Context *ctx,
2524 loopy--; 2496 loopy--;
2525 if (NULL == state_contains) 2497 if (NULL == state_contains)
2526 { 2498 {
2527 new_dfa_state = dfa_state_create (ctx, nfa_set); 2499 new_dfa_state = dfa_state_create (ctx, &nfa_set);
2528 automaton_add_state (dfa, new_dfa_state); 2500 automaton_add_state (dfa, new_dfa_state);
2529 ctran->to_state = new_dfa_state; 2501 ctran->to_state = new_dfa_state;
2530 construct_dfa_states (ctx, nfa, dfa, new_dfa_state); 2502 construct_dfa_states (ctx, nfa, dfa, new_dfa_state);
@@ -2561,7 +2533,7 @@ GNUNET_REGEX_construct_dfa (const char *regex, const size_t len,
2561 struct GNUNET_REGEX_Context ctx; 2533 struct GNUNET_REGEX_Context ctx;
2562 struct GNUNET_REGEX_Automaton *dfa; 2534 struct GNUNET_REGEX_Automaton *dfa;
2563 struct GNUNET_REGEX_Automaton *nfa; 2535 struct GNUNET_REGEX_Automaton *nfa;
2564 struct GNUNET_REGEX_StateSet *nfa_start_eps_cls; 2536 struct GNUNET_REGEX_StateSet nfa_start_eps_cls;
2565 2537
2566 GNUNET_REGEX_context_init (&ctx); 2538 GNUNET_REGEX_context_init (&ctx);
2567 2539
@@ -2581,13 +2553,13 @@ GNUNET_REGEX_construct_dfa (const char *regex, const size_t len,
2581 dfa->regex = GNUNET_strdup (regex); 2553 dfa->regex = GNUNET_strdup (regex);
2582 2554
2583 /* Create DFA start state from epsilon closure */ 2555 /* Create DFA start state from epsilon closure */
2584 nfa_start_eps_cls = nfa_closure_create (nfa, nfa->start, NULL); 2556 nfa_closure_create (&nfa_start_eps_cls, nfa, nfa->start, NULL);
2585 dfa->start = dfa_state_create (&ctx, nfa_start_eps_cls); 2557 dfa->start = dfa_state_create (&ctx, &nfa_start_eps_cls);
2586 automaton_add_state (dfa, dfa->start); 2558 automaton_add_state (dfa, dfa->start);
2587 2559
2588 fprintf (stderr, "D"); 2560 fprintf (stderr, "D");
2589 construct_dfa_states (&ctx, nfa, dfa, dfa->start); 2561 construct_dfa_states (&ctx, nfa, dfa, dfa->start);
2590 fprintf (stderr, "D-X: %llu\n", loopy); 2562 // fprintf (stderr, "D-X: %llu\n", loopy);
2591 GNUNET_REGEX_automaton_destroy (nfa); 2563 GNUNET_REGEX_automaton_destroy (nfa);
2592 2564
2593 /* Minimize DFA */ 2565 /* Minimize DFA */
@@ -2595,8 +2567,8 @@ GNUNET_REGEX_construct_dfa (const char *regex, const size_t len,
2595 dfa_minimize (&ctx, dfa); 2567 dfa_minimize (&ctx, dfa);
2596 2568
2597 /* Create proofs and hashes for all states */ 2569 /* Create proofs and hashes for all states */
2598 fprintf (stderr, "P"); 2570 // fprintf (stderr, "P");
2599 // automaton_create_proofs (dfa); 2571 automaton_create_proofs (dfa);
2600 2572
2601 /* Compress linear DFA paths */ 2573 /* Compress linear DFA paths */
2602 if (1 != max_path_len) 2574 if (1 != max_path_len)
@@ -2692,8 +2664,8 @@ evaluate_nfa (struct GNUNET_REGEX_Automaton *a, const char *string)
2692 const char *strp; 2664 const char *strp;
2693 char str[2]; 2665 char str[2];
2694 struct GNUNET_REGEX_State *s; 2666 struct GNUNET_REGEX_State *s;
2695 struct GNUNET_REGEX_StateSet *sset; 2667 struct GNUNET_REGEX_StateSet sset;
2696 struct GNUNET_REGEX_StateSet *new_sset; 2668 struct GNUNET_REGEX_StateSet new_sset;
2697 unsigned int i; 2669 unsigned int i;
2698 int result; 2670 int result;
2699 2671
@@ -2709,29 +2681,29 @@ evaluate_nfa (struct GNUNET_REGEX_Automaton *a, const char *string)
2709 return 0; 2681 return 0;
2710 2682
2711 result = 1; 2683 result = 1;
2712 sset = nfa_closure_create (a, a->start, 0); 2684 nfa_closure_create (&sset, a, a->start, NULL);
2713 2685
2714 str[1] = '\0'; 2686 str[1] = '\0';
2715 for (strp = string; NULL != strp && *strp; strp++) 2687 for (strp = string; NULL != strp && *strp; strp++)
2716 { 2688 {
2717 str[0] = *strp; 2689 str[0] = *strp;
2718 new_sset = nfa_closure_set_create (a, sset, str); 2690 nfa_closure_set_create (&new_sset, a, &sset, str);
2719 state_set_clear (sset); 2691 state_set_clear (&sset);
2720 sset = nfa_closure_set_create (a, new_sset, 0); 2692 nfa_closure_set_create (&sset, a, &new_sset, 0);
2721 state_set_clear (new_sset); 2693 state_set_clear (&new_sset);
2722 } 2694 }
2723 2695
2724 for (i = 0; i < sset->len; i++) 2696 for (i = 0; i < sset.len; i++)
2725 { 2697 {
2726 s = sset->states[i]; 2698 s = sset.states[i];
2727 if (NULL != s && s->accepting) 2699 if ( (NULL != s) && (s->accepting) )
2728 { 2700 {
2729 result = 0; 2701 result = 0;
2730 break; 2702 break;
2731 } 2703 }
2732 } 2704 }
2733 2705
2734 state_set_clear (sset); 2706 state_set_clear (&sset);
2735 return result; 2707 return result;
2736} 2708}
2737 2709
diff --git a/src/regex/regex_internal.h b/src/regex/regex_internal.h
index c25b938c3..fe76d1537 100644
--- a/src/regex/regex_internal.h
+++ b/src/regex/regex_internal.h
@@ -84,6 +84,29 @@ struct GNUNET_REGEX_Transition
84/** 84/**
85 * A state. Can be used in DFA and NFA automatons. 85 * A state. Can be used in DFA and NFA automatons.
86 */ 86 */
87struct GNUNET_REGEX_State;
88
89
90/**
91 * Set of states.
92 */
93struct GNUNET_REGEX_StateSet
94{
95 /**
96 * Array of states.
97 */
98 struct GNUNET_REGEX_State **states;
99
100 /**
101 * Length of the 'states' array.
102 */
103 unsigned int len;
104};
105
106
107/**
108 * A state. Can be used in DFA and NFA automatons.
109 */
87struct GNUNET_REGEX_State 110struct GNUNET_REGEX_State
88{ 111{
89 /** 112 /**
@@ -210,7 +233,7 @@ struct GNUNET_REGEX_State
210 * Set of states on which this state is based on. Used when creating a DFA out 233 * Set of states on which this state is based on. Used when creating a DFA out
211 * of several NFA states. 234 * of several NFA states.
212 */ 235 */
213 struct GNUNET_REGEX_StateSet *nfa_set; 236 struct GNUNET_REGEX_StateSet nfa_set;
214}; 237};
215 238
216 239