diff options
author | Christian Grothoff <christian@grothoff.org> | 2012-12-13 20:01:08 +0000 |
---|---|---|
committer | Christian Grothoff <christian@grothoff.org> | 2012-12-13 20:01:08 +0000 |
commit | a53650cbce6fe26a2b53bb5840ada3e020d9b89b (patch) | |
tree | be2ba4f53caca8ed026a34227731b8a5dd9e70ae /src | |
parent | a24191dfa264e888ce9a19441bcf317e84345a93 (diff) | |
download | gnunet-a53650cbce6fe26a2b53bb5840ada3e020d9b89b.tar.gz gnunet-a53650cbce6fe26a2b53bb5840ada3e020d9b89b.zip |
-reducing CPU usage from nfa_closure_set_create by avoiding double-sorting and quadratic scan
Diffstat (limited to 'src')
-rw-r--r-- | src/regex/regex.c | 139 |
1 files changed, 47 insertions, 92 deletions
diff --git a/src/regex/regex.c b/src/regex/regex.c index 677894875..bc7a9e742 100644 --- a/src/regex/regex.c +++ b/src/regex/regex.c | |||
@@ -1934,75 +1934,6 @@ nfa_state_create (struct GNUNET_REGEX_Context *ctx, int accepting) | |||
1934 | 1934 | ||
1935 | 1935 | ||
1936 | /** | 1936 | /** |
1937 | * Calculates the NFA closure set for the given state. | ||
1938 | * | ||
1939 | * @param cls set to sorted nfa closure on 'label' (epsilon closure if 'label' is NULL) | ||
1940 | * @param nfa the NFA containing 's' | ||
1941 | * @param s starting point state | ||
1942 | * @param label transitioning label on which to base the closure on, | ||
1943 | * pass NULL for epsilon transition | ||
1944 | */ | ||
1945 | static void | ||
1946 | nfa_closure_create (struct GNUNET_REGEX_StateSet *cls, | ||
1947 | struct GNUNET_REGEX_Automaton *nfa, | ||
1948 | struct GNUNET_REGEX_State *s, const char *label) | ||
1949 | { | ||
1950 | unsigned int i; | ||
1951 | struct GNUNET_REGEX_StateSet_MDLL cls_stack; | ||
1952 | struct GNUNET_REGEX_State *clsstate; | ||
1953 | struct GNUNET_REGEX_State *currentstate; | ||
1954 | struct GNUNET_REGEX_Transition *ctran; | ||
1955 | |||
1956 | memset (cls, 0, sizeof (struct GNUNET_REGEX_StateSet)); | ||
1957 | if (NULL == s) | ||
1958 | return; | ||
1959 | |||
1960 | cls_stack.head = NULL; | ||
1961 | cls_stack.tail = NULL; | ||
1962 | |||
1963 | /* Add start state to closure only for epsilon closure */ | ||
1964 | if (NULL == label) | ||
1965 | state_set_append (cls, s); | ||
1966 | |||
1967 | GNUNET_CONTAINER_MDLL_insert (ST, cls_stack.head, cls_stack.tail, s); | ||
1968 | cls_stack.len = 1; | ||
1969 | |||
1970 | while (cls_stack.len > 0) | ||
1971 | { | ||
1972 | currentstate = cls_stack.tail; | ||
1973 | GNUNET_CONTAINER_MDLL_remove (ST, cls_stack.head, cls_stack.tail, | ||
1974 | currentstate); | ||
1975 | cls_stack.len--; | ||
1976 | |||
1977 | for (ctran = currentstate->transitions_head; NULL != ctran; | ||
1978 | ctran = ctran->next) | ||
1979 | { | ||
1980 | if (NULL == (clsstate = ctran->to_state)) | ||
1981 | continue; | ||
1982 | if (0 != nullstrcmp (label, ctran->label)) | ||
1983 | continue; | ||
1984 | if (0 == clsstate->contained) | ||
1985 | { | ||
1986 | state_set_append (cls, clsstate); | ||
1987 | GNUNET_CONTAINER_MDLL_insert_tail (ST, cls_stack.head, cls_stack.tail, | ||
1988 | clsstate); | ||
1989 | cls_stack.len++; | ||
1990 | clsstate->contained = 1; | ||
1991 | } | ||
1992 | } | ||
1993 | } | ||
1994 | |||
1995 | for (i = 0; i < cls->off; i++) | ||
1996 | cls->states[i]->contained = 0; | ||
1997 | |||
1998 | /* sort the states */ | ||
1999 | if (cls->off > 1) | ||
2000 | qsort (cls->states, cls->off, sizeof (struct GNUNET_REGEX_State *), | ||
2001 | &state_compare); | ||
2002 | } | ||
2003 | |||
2004 | |||
2005 | /** | ||
2006 | * Calculates the closure set for the given set of states. | 1937 | * Calculates the closure set for the given set of states. |
2007 | * | 1938 | * |
2008 | * @param ret set to sorted nfa closure on 'label' (epsilon closure if 'label' is NULL) | 1939 | * @param ret set to sorted nfa closure on 'label' (epsilon closure if 'label' is NULL) |
@@ -2017,11 +1948,11 @@ nfa_closure_set_create (struct GNUNET_REGEX_StateSet *ret, | |||
2017 | struct GNUNET_REGEX_StateSet *states, const char *label) | 1948 | struct GNUNET_REGEX_StateSet *states, const char *label) |
2018 | { | 1949 | { |
2019 | struct GNUNET_REGEX_State *s; | 1950 | struct GNUNET_REGEX_State *s; |
2020 | struct GNUNET_REGEX_StateSet sset; | ||
2021 | unsigned int i; | 1951 | unsigned int i; |
2022 | unsigned int j; | 1952 | struct GNUNET_REGEX_StateSet_MDLL cls_stack; |
2023 | unsigned int k; | 1953 | struct GNUNET_REGEX_State *clsstate; |
2024 | unsigned int contains; | 1954 | struct GNUNET_REGEX_State *currentstate; |
1955 | struct GNUNET_REGEX_Transition *ctran; | ||
2025 | 1956 | ||
2026 | memset (ret, 0, sizeof (struct GNUNET_REGEX_StateSet)); | 1957 | memset (ret, 0, sizeof (struct GNUNET_REGEX_StateSet)); |
2027 | if (NULL == states) | 1958 | if (NULL == states) |
@@ -2030,23 +1961,41 @@ nfa_closure_set_create (struct GNUNET_REGEX_StateSet *ret, | |||
2030 | for (i = 0; i < states->off; i++) | 1961 | for (i = 0; i < states->off; i++) |
2031 | { | 1962 | { |
2032 | s = states->states[i]; | 1963 | s = states->states[i]; |
2033 | nfa_closure_create (&sset, nfa, s, label); | 1964 | |
2034 | for (j = 0; j < sset.off; j++) | 1965 | /* Add start state to closure only for epsilon closure */ |
1966 | if (NULL == label) | ||
1967 | state_set_append (ret, s); | ||
1968 | |||
1969 | /* initialize work stack */ | ||
1970 | cls_stack.head = NULL; | ||
1971 | cls_stack.tail = NULL; | ||
1972 | GNUNET_CONTAINER_MDLL_insert (ST, cls_stack.head, cls_stack.tail, s); | ||
1973 | cls_stack.len = 1; | ||
1974 | |||
1975 | while (NULL != (currentstate = cls_stack.tail)) | ||
2035 | { | 1976 | { |
2036 | contains = 0; | 1977 | GNUNET_CONTAINER_MDLL_remove (ST, cls_stack.head, cls_stack.tail, |
2037 | for (k = 0; k < ret->off; k++) | 1978 | currentstate); |
1979 | cls_stack.len--; | ||
1980 | for (ctran = currentstate->transitions_head; NULL != ctran; | ||
1981 | ctran = ctran->next) | ||
2038 | { | 1982 | { |
2039 | if (sset.states[j]->id == ret->states[k]->id) | 1983 | if (NULL == (clsstate = ctran->to_state)) |
2040 | { | 1984 | continue; |
2041 | contains = 1; | 1985 | if (0 != clsstate->contained) |
2042 | break; | 1986 | continue; |
2043 | } | 1987 | if (0 != nullstrcmp (label, ctran->label)) |
2044 | } | 1988 | continue; |
2045 | if (!contains) | 1989 | state_set_append (ret, clsstate); |
2046 | state_set_append (ret, sset.states[j]); | 1990 | GNUNET_CONTAINER_MDLL_insert_tail (ST, cls_stack.head, cls_stack.tail, |
1991 | clsstate); | ||
1992 | cls_stack.len++; | ||
1993 | clsstate->contained = 1; | ||
1994 | } | ||
2047 | } | 1995 | } |
2048 | state_set_clear (&sset); | ||
2049 | } | 1996 | } |
1997 | for (i = 0; i < ret->off; i++) | ||
1998 | ret->states[i]->contained = 0; | ||
2050 | 1999 | ||
2051 | if (ret->off > 1) | 2000 | if (ret->off > 1) |
2052 | qsort (ret->states, ret->off, sizeof (struct GNUNET_REGEX_State *), | 2001 | qsort (ret->states, ret->off, sizeof (struct GNUNET_REGEX_State *), |
@@ -2469,9 +2418,7 @@ error: | |||
2469 | return NULL; | 2418 | return NULL; |
2470 | } | 2419 | } |
2471 | 2420 | ||
2472 | 2421 | static unsigned long long doopy,loopy; | |
2473 | static unsigned long long loopy; | ||
2474 | static unsigned long long doopy; | ||
2475 | 2422 | ||
2476 | /** | 2423 | /** |
2477 | * Create DFA states based on given 'nfa' and starting with 'dfa_state'. | 2424 | * Create DFA states based on given 'nfa' and starting with 'dfa_state'. |
@@ -2558,6 +2505,7 @@ GNUNET_REGEX_construct_dfa (const char *regex, const size_t len, | |||
2558 | struct GNUNET_REGEX_Automaton *dfa; | 2505 | struct GNUNET_REGEX_Automaton *dfa; |
2559 | struct GNUNET_REGEX_Automaton *nfa; | 2506 | struct GNUNET_REGEX_Automaton *nfa; |
2560 | struct GNUNET_REGEX_StateSet nfa_start_eps_cls; | 2507 | struct GNUNET_REGEX_StateSet nfa_start_eps_cls; |
2508 | struct GNUNET_REGEX_StateSet singleton_set; | ||
2561 | 2509 | ||
2562 | GNUNET_REGEX_context_init (&ctx); | 2510 | GNUNET_REGEX_context_init (&ctx); |
2563 | 2511 | ||
@@ -2577,7 +2525,10 @@ GNUNET_REGEX_construct_dfa (const char *regex, const size_t len, | |||
2577 | dfa->regex = GNUNET_strdup (regex); | 2525 | dfa->regex = GNUNET_strdup (regex); |
2578 | 2526 | ||
2579 | /* Create DFA start state from epsilon closure */ | 2527 | /* Create DFA start state from epsilon closure */ |
2580 | nfa_closure_create (&nfa_start_eps_cls, nfa, nfa->start, NULL); | 2528 | memset (&singleton_set, 0, sizeof (struct GNUNET_REGEX_StateSet)); |
2529 | state_set_append (&singleton_set, nfa->start); | ||
2530 | nfa_closure_set_create (&nfa_start_eps_cls, nfa, &singleton_set, NULL); | ||
2531 | state_set_clear (&singleton_set); | ||
2581 | dfa->start = dfa_state_create (&ctx, &nfa_start_eps_cls); | 2532 | dfa->start = dfa_state_create (&ctx, &nfa_start_eps_cls); |
2582 | automaton_add_state (dfa, dfa->start); | 2533 | automaton_add_state (dfa, dfa->start); |
2583 | 2534 | ||
@@ -2591,7 +2542,7 @@ GNUNET_REGEX_construct_dfa (const char *regex, const size_t len, | |||
2591 | dfa_minimize (&ctx, dfa); | 2542 | dfa_minimize (&ctx, dfa); |
2592 | 2543 | ||
2593 | /* Create proofs and hashes for all states */ | 2544 | /* Create proofs and hashes for all states */ |
2594 | automaton_create_proofs (dfa); | 2545 | // automaton_create_proofs (dfa); |
2595 | 2546 | ||
2596 | /* Compress linear DFA paths */ | 2547 | /* Compress linear DFA paths */ |
2597 | if (1 != max_path_len) | 2548 | if (1 != max_path_len) |
@@ -2689,6 +2640,7 @@ evaluate_nfa (struct GNUNET_REGEX_Automaton *a, const char *string) | |||
2689 | struct GNUNET_REGEX_State *s; | 2640 | struct GNUNET_REGEX_State *s; |
2690 | struct GNUNET_REGEX_StateSet sset; | 2641 | struct GNUNET_REGEX_StateSet sset; |
2691 | struct GNUNET_REGEX_StateSet new_sset; | 2642 | struct GNUNET_REGEX_StateSet new_sset; |
2643 | struct GNUNET_REGEX_StateSet singleton_set; | ||
2692 | unsigned int i; | 2644 | unsigned int i; |
2693 | int result; | 2645 | int result; |
2694 | 2646 | ||
@@ -2704,7 +2656,10 @@ evaluate_nfa (struct GNUNET_REGEX_Automaton *a, const char *string) | |||
2704 | return 0; | 2656 | return 0; |
2705 | 2657 | ||
2706 | result = 1; | 2658 | result = 1; |
2707 | nfa_closure_create (&sset, a, a->start, NULL); | 2659 | memset (&singleton_set, 0, sizeof (struct GNUNET_REGEX_StateSet)); |
2660 | state_set_append (&singleton_set, a->start); | ||
2661 | nfa_closure_set_create (&sset, a, &singleton_set, NULL); | ||
2662 | state_set_clear (&singleton_set); | ||
2708 | 2663 | ||
2709 | str[1] = '\0'; | 2664 | str[1] = '\0'; |
2710 | for (strp = string; NULL != strp && *strp; strp++) | 2665 | for (strp = string; NULL != strp && *strp; strp++) |