aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorChristian Grothoff <christian@grothoff.org>2012-12-13 20:01:08 +0000
committerChristian Grothoff <christian@grothoff.org>2012-12-13 20:01:08 +0000
commita53650cbce6fe26a2b53bb5840ada3e020d9b89b (patch)
treebe2ba4f53caca8ed026a34227731b8a5dd9e70ae /src
parenta24191dfa264e888ce9a19441bcf317e84345a93 (diff)
downloadgnunet-a53650cbce6fe26a2b53bb5840ada3e020d9b89b.tar.gz
gnunet-a53650cbce6fe26a2b53bb5840ada3e020d9b89b.zip
-reducing CPU usage from nfa_closure_set_create by avoiding double-sorting and quadratic scan
Diffstat (limited to 'src')
-rw-r--r--src/regex/regex.c139
1 files changed, 47 insertions, 92 deletions
diff --git a/src/regex/regex.c b/src/regex/regex.c
index 677894875..bc7a9e742 100644
--- a/src/regex/regex.c
+++ b/src/regex/regex.c
@@ -1934,75 +1934,6 @@ nfa_state_create (struct GNUNET_REGEX_Context *ctx, int accepting)
1934 1934
1935 1935
1936/** 1936/**
1937 * Calculates the NFA closure set for the given state.
1938 *
1939 * @param cls set to sorted nfa closure on 'label' (epsilon closure if 'label' is NULL)
1940 * @param nfa the NFA containing 's'
1941 * @param s starting point state
1942 * @param label transitioning label on which to base the closure on,
1943 * pass NULL for epsilon transition
1944 */
1945static void
1946nfa_closure_create (struct GNUNET_REGEX_StateSet *cls,
1947 struct GNUNET_REGEX_Automaton *nfa,
1948 struct GNUNET_REGEX_State *s, const char *label)
1949{
1950 unsigned int i;
1951 struct GNUNET_REGEX_StateSet_MDLL cls_stack;
1952 struct GNUNET_REGEX_State *clsstate;
1953 struct GNUNET_REGEX_State *currentstate;
1954 struct GNUNET_REGEX_Transition *ctran;
1955
1956 memset (cls, 0, sizeof (struct GNUNET_REGEX_StateSet));
1957 if (NULL == s)
1958 return;
1959
1960 cls_stack.head = NULL;
1961 cls_stack.tail = NULL;
1962
1963 /* Add start state to closure only for epsilon closure */
1964 if (NULL == label)
1965 state_set_append (cls, s);
1966
1967 GNUNET_CONTAINER_MDLL_insert (ST, cls_stack.head, cls_stack.tail, s);
1968 cls_stack.len = 1;
1969
1970 while (cls_stack.len > 0)
1971 {
1972 currentstate = cls_stack.tail;
1973 GNUNET_CONTAINER_MDLL_remove (ST, cls_stack.head, cls_stack.tail,
1974 currentstate);
1975 cls_stack.len--;
1976
1977 for (ctran = currentstate->transitions_head; NULL != ctran;
1978 ctran = ctran->next)
1979 {
1980 if (NULL == (clsstate = ctran->to_state))
1981 continue;
1982 if (0 != nullstrcmp (label, ctran->label))
1983 continue;
1984 if (0 == clsstate->contained)
1985 {
1986 state_set_append (cls, clsstate);
1987 GNUNET_CONTAINER_MDLL_insert_tail (ST, cls_stack.head, cls_stack.tail,
1988 clsstate);
1989 cls_stack.len++;
1990 clsstate->contained = 1;
1991 }
1992 }
1993 }
1994
1995 for (i = 0; i < cls->off; i++)
1996 cls->states[i]->contained = 0;
1997
1998 /* sort the states */
1999 if (cls->off > 1)
2000 qsort (cls->states, cls->off, sizeof (struct GNUNET_REGEX_State *),
2001 &state_compare);
2002}
2003
2004
2005/**
2006 * Calculates the closure set for the given set of states. 1937 * Calculates the closure set for the given set of states.
2007 * 1938 *
2008 * @param ret set to sorted nfa closure on 'label' (epsilon closure if 'label' is NULL) 1939 * @param ret set to sorted nfa closure on 'label' (epsilon closure if 'label' is NULL)
@@ -2017,11 +1948,11 @@ nfa_closure_set_create (struct GNUNET_REGEX_StateSet *ret,
2017 struct GNUNET_REGEX_StateSet *states, const char *label) 1948 struct GNUNET_REGEX_StateSet *states, const char *label)
2018{ 1949{
2019 struct GNUNET_REGEX_State *s; 1950 struct GNUNET_REGEX_State *s;
2020 struct GNUNET_REGEX_StateSet sset;
2021 unsigned int i; 1951 unsigned int i;
2022 unsigned int j; 1952 struct GNUNET_REGEX_StateSet_MDLL cls_stack;
2023 unsigned int k; 1953 struct GNUNET_REGEX_State *clsstate;
2024 unsigned int contains; 1954 struct GNUNET_REGEX_State *currentstate;
1955 struct GNUNET_REGEX_Transition *ctran;
2025 1956
2026 memset (ret, 0, sizeof (struct GNUNET_REGEX_StateSet)); 1957 memset (ret, 0, sizeof (struct GNUNET_REGEX_StateSet));
2027 if (NULL == states) 1958 if (NULL == states)
@@ -2030,23 +1961,41 @@ nfa_closure_set_create (struct GNUNET_REGEX_StateSet *ret,
2030 for (i = 0; i < states->off; i++) 1961 for (i = 0; i < states->off; i++)
2031 { 1962 {
2032 s = states->states[i]; 1963 s = states->states[i];
2033 nfa_closure_create (&sset, nfa, s, label); 1964
2034 for (j = 0; j < sset.off; j++) 1965 /* Add start state to closure only for epsilon closure */
1966 if (NULL == label)
1967 state_set_append (ret, s);
1968
1969 /* initialize work stack */
1970 cls_stack.head = NULL;
1971 cls_stack.tail = NULL;
1972 GNUNET_CONTAINER_MDLL_insert (ST, cls_stack.head, cls_stack.tail, s);
1973 cls_stack.len = 1;
1974
1975 while (NULL != (currentstate = cls_stack.tail))
2035 { 1976 {
2036 contains = 0; 1977 GNUNET_CONTAINER_MDLL_remove (ST, cls_stack.head, cls_stack.tail,
2037 for (k = 0; k < ret->off; k++) 1978 currentstate);
1979 cls_stack.len--;
1980 for (ctran = currentstate->transitions_head; NULL != ctran;
1981 ctran = ctran->next)
2038 { 1982 {
2039 if (sset.states[j]->id == ret->states[k]->id) 1983 if (NULL == (clsstate = ctran->to_state))
2040 { 1984 continue;
2041 contains = 1; 1985 if (0 != clsstate->contained)
2042 break; 1986 continue;
2043 } 1987 if (0 != nullstrcmp (label, ctran->label))
2044 } 1988 continue;
2045 if (!contains) 1989 state_set_append (ret, clsstate);
2046 state_set_append (ret, sset.states[j]); 1990 GNUNET_CONTAINER_MDLL_insert_tail (ST, cls_stack.head, cls_stack.tail,
1991 clsstate);
1992 cls_stack.len++;
1993 clsstate->contained = 1;
1994 }
2047 } 1995 }
2048 state_set_clear (&sset);
2049 } 1996 }
1997 for (i = 0; i < ret->off; i++)
1998 ret->states[i]->contained = 0;
2050 1999
2051 if (ret->off > 1) 2000 if (ret->off > 1)
2052 qsort (ret->states, ret->off, sizeof (struct GNUNET_REGEX_State *), 2001 qsort (ret->states, ret->off, sizeof (struct GNUNET_REGEX_State *),
@@ -2469,9 +2418,7 @@ error:
2469 return NULL; 2418 return NULL;
2470} 2419}
2471 2420
2472 2421static unsigned long long doopy,loopy;
2473static unsigned long long loopy;
2474static unsigned long long doopy;
2475 2422
2476/** 2423/**
2477 * Create DFA states based on given 'nfa' and starting with 'dfa_state'. 2424 * Create DFA states based on given 'nfa' and starting with 'dfa_state'.
@@ -2558,6 +2505,7 @@ GNUNET_REGEX_construct_dfa (const char *regex, const size_t len,
2558 struct GNUNET_REGEX_Automaton *dfa; 2505 struct GNUNET_REGEX_Automaton *dfa;
2559 struct GNUNET_REGEX_Automaton *nfa; 2506 struct GNUNET_REGEX_Automaton *nfa;
2560 struct GNUNET_REGEX_StateSet nfa_start_eps_cls; 2507 struct GNUNET_REGEX_StateSet nfa_start_eps_cls;
2508 struct GNUNET_REGEX_StateSet singleton_set;
2561 2509
2562 GNUNET_REGEX_context_init (&ctx); 2510 GNUNET_REGEX_context_init (&ctx);
2563 2511
@@ -2577,7 +2525,10 @@ GNUNET_REGEX_construct_dfa (const char *regex, const size_t len,
2577 dfa->regex = GNUNET_strdup (regex); 2525 dfa->regex = GNUNET_strdup (regex);
2578 2526
2579 /* Create DFA start state from epsilon closure */ 2527 /* Create DFA start state from epsilon closure */
2580 nfa_closure_create (&nfa_start_eps_cls, nfa, nfa->start, NULL); 2528 memset (&singleton_set, 0, sizeof (struct GNUNET_REGEX_StateSet));
2529 state_set_append (&singleton_set, nfa->start);
2530 nfa_closure_set_create (&nfa_start_eps_cls, nfa, &singleton_set, NULL);
2531 state_set_clear (&singleton_set);
2581 dfa->start = dfa_state_create (&ctx, &nfa_start_eps_cls); 2532 dfa->start = dfa_state_create (&ctx, &nfa_start_eps_cls);
2582 automaton_add_state (dfa, dfa->start); 2533 automaton_add_state (dfa, dfa->start);
2583 2534
@@ -2591,7 +2542,7 @@ GNUNET_REGEX_construct_dfa (const char *regex, const size_t len,
2591 dfa_minimize (&ctx, dfa); 2542 dfa_minimize (&ctx, dfa);
2592 2543
2593 /* Create proofs and hashes for all states */ 2544 /* Create proofs and hashes for all states */
2594 automaton_create_proofs (dfa); 2545 // automaton_create_proofs (dfa);
2595 2546
2596 /* Compress linear DFA paths */ 2547 /* Compress linear DFA paths */
2597 if (1 != max_path_len) 2548 if (1 != max_path_len)
@@ -2689,6 +2640,7 @@ evaluate_nfa (struct GNUNET_REGEX_Automaton *a, const char *string)
2689 struct GNUNET_REGEX_State *s; 2640 struct GNUNET_REGEX_State *s;
2690 struct GNUNET_REGEX_StateSet sset; 2641 struct GNUNET_REGEX_StateSet sset;
2691 struct GNUNET_REGEX_StateSet new_sset; 2642 struct GNUNET_REGEX_StateSet new_sset;
2643 struct GNUNET_REGEX_StateSet singleton_set;
2692 unsigned int i; 2644 unsigned int i;
2693 int result; 2645 int result;
2694 2646
@@ -2704,7 +2656,10 @@ evaluate_nfa (struct GNUNET_REGEX_Automaton *a, const char *string)
2704 return 0; 2656 return 0;
2705 2657
2706 result = 1; 2658 result = 1;
2707 nfa_closure_create (&sset, a, a->start, NULL); 2659 memset (&singleton_set, 0, sizeof (struct GNUNET_REGEX_StateSet));
2660 state_set_append (&singleton_set, a->start);
2661 nfa_closure_set_create (&sset, a, &singleton_set, NULL);
2662 state_set_clear (&singleton_set);
2708 2663
2709 str[1] = '\0'; 2664 str[1] = '\0';
2710 for (strp = string; NULL != strp && *strp; strp++) 2665 for (strp = string; NULL != strp && *strp; strp++)