diff options
author | Maximilian Szengel <gnunet@maxsz.de> | 2012-09-20 21:46:06 +0000 |
---|---|---|
committer | Maximilian Szengel <gnunet@maxsz.de> | 2012-09-20 21:46:06 +0000 |
commit | 4eb238e5115333e3a5724e4ba787e8e9c60cdf2e (patch) | |
tree | b5e67562a94cd3a7b3564908f222f0d6b420efbe /src/regex | |
parent | d9e210bffb15ca0b3b69faa5b56c01fc31a64b71 (diff) | |
download | gnunet-4eb238e5115333e3a5724e4ba787e8e9c60cdf2e.tar.gz gnunet-4eb238e5115333e3a5724e4ba787e8e9c60cdf2e.zip |
optimizations
Diffstat (limited to 'src/regex')
-rw-r--r-- | src/regex/regex.c | 75 |
1 files changed, 54 insertions, 21 deletions
diff --git a/src/regex/regex.c b/src/regex/regex.c index d659cb949..a5d84ce95 100644 --- a/src/regex/regex.c +++ b/src/regex/regex.c | |||
@@ -642,13 +642,19 @@ GNUNET_REGEX_add_multi_strides_to_dfa (struct GNUNET_REGEX_Context *regex_ctx, | |||
642 | const unsigned int stride_len) | 642 | const unsigned int stride_len) |
643 | { | 643 | { |
644 | struct GNUNET_REGEX_Strided_Context ctx = { stride_len, NULL, NULL }; | 644 | struct GNUNET_REGEX_Strided_Context ctx = { stride_len, NULL, NULL }; |
645 | struct GNUNET_REGEX_State *s; | ||
646 | struct GNUNET_REGEX_State *s_next; | ||
645 | struct GNUNET_REGEX_Transition *t; | 647 | struct GNUNET_REGEX_Transition *t; |
646 | struct GNUNET_REGEX_Transition *t_next; | 648 | struct GNUNET_REGEX_Transition *t_next; |
647 | 649 | ||
648 | if (1 > stride_len || GNUNET_YES == dfa->is_multistrided) | 650 | if (1 > stride_len || GNUNET_YES == dfa->is_multistrided) |
649 | return; | 651 | return; |
650 | 652 | ||
651 | // Compute the new transitions. | 653 | // Unmark all states |
654 | for (s = dfa->states_head; NULL != s; s = s->next) | ||
655 | s->marked = GNUNET_NO; | ||
656 | |||
657 | // Compute the new transitions of given stride_len | ||
652 | GNUNET_REGEX_automaton_traverse (dfa, dfa->start, NULL, NULL, | 658 | GNUNET_REGEX_automaton_traverse (dfa, dfa->start, NULL, NULL, |
653 | &add_multi_strides_to_dfa, &ctx); | 659 | &add_multi_strides_to_dfa, &ctx); |
654 | 660 | ||
@@ -662,6 +668,15 @@ GNUNET_REGEX_add_multi_strides_to_dfa (struct GNUNET_REGEX_Context *regex_ctx, | |||
662 | GNUNET_free (t); | 668 | GNUNET_free (t); |
663 | } | 669 | } |
664 | 670 | ||
671 | // Remove marked states (including their incoming and outgoing transitions) | ||
672 | for (s = dfa->states_head; NULL != s; s = s_next) | ||
673 | { | ||
674 | s_next = s->next; | ||
675 | if (GNUNET_YES == s->marked) | ||
676 | automaton_remove_state (dfa, s); | ||
677 | } | ||
678 | |||
679 | // Mark this automaton as multistrided | ||
665 | dfa->is_multistrided = GNUNET_YES; | 680 | dfa->is_multistrided = GNUNET_YES; |
666 | } | 681 | } |
667 | 682 | ||
@@ -1425,36 +1440,47 @@ dfa_state_create (struct GNUNET_REGEX_Context *ctx, | |||
1425 | 1440 | ||
1426 | 1441 | ||
1427 | /** | 1442 | /** |
1428 | * Move from the given state 's' to the next state on transition 'label' | 1443 | * Move from the given state 's' to the next state on transition 'str'. Consumes |
1444 | * as much of the given 'str' as possible (usefull for strided DFAs). On return | ||
1445 | * 's' will point to the next state, and the length of the substring used for | ||
1446 | * this transition will be returned. If no transition possible 0 is returned and | ||
1447 | * 's' points to NULL. | ||
1429 | * | 1448 | * |
1430 | * @param s starting state | 1449 | * @param s starting state, will point to the next state or NULL (if no |
1431 | * @param label edge label to follow | 1450 | * transition possible) |
1451 | * @param str edge label to follow (will match longest common prefix) | ||
1432 | * | 1452 | * |
1433 | * @return new state or NULL, if transition on label not possible | 1453 | * @return length of the substring comsumed from 'str' |
1434 | */ | 1454 | */ |
1435 | static struct GNUNET_REGEX_State * | 1455 | static unsigned int |
1436 | dfa_move (struct GNUNET_REGEX_State *s, const char *label) | 1456 | dfa_move (struct GNUNET_REGEX_State **s, const char *str) |
1437 | { | 1457 | { |
1438 | struct GNUNET_REGEX_Transition *t; | 1458 | struct GNUNET_REGEX_Transition *t; |
1439 | struct GNUNET_REGEX_State *new_s; | 1459 | struct GNUNET_REGEX_State *new_s; |
1460 | unsigned int len; | ||
1461 | unsigned int max_len; | ||
1440 | 1462 | ||
1441 | if (NULL == s) | 1463 | if (NULL == s) |
1442 | return NULL; | 1464 | return 0; |
1443 | 1465 | ||
1444 | new_s = NULL; | 1466 | new_s = NULL; |
1445 | 1467 | max_len = 0; | |
1446 | for (t = s->transitions_head; NULL != t; t = t->next) | 1468 | for (t = (*s)->transitions_head; NULL != t; t = t->next) |
1447 | { | 1469 | { |
1448 | // TODO: Use strstr to match substring and return number of char's that have | 1470 | len = strlen (t->label); |
1449 | // been consumed' | 1471 | |
1450 | if (0 == strcmp (label, t->label)) | 1472 | if (0 == strncmp (t->label, str, len)) |
1451 | { | 1473 | { |
1452 | new_s = t->to_state; | 1474 | if (len >= max_len) |
1453 | break; | 1475 | { |
1476 | max_len = len; | ||
1477 | new_s = t->to_state; | ||
1478 | } | ||
1454 | } | 1479 | } |
1455 | } | 1480 | } |
1456 | 1481 | ||
1457 | return new_s; | 1482 | *s = new_s; |
1483 | return max_len; | ||
1458 | } | 1484 | } |
1459 | 1485 | ||
1460 | /** | 1486 | /** |
@@ -2142,6 +2168,14 @@ GNUNET_REGEX_construct_nfa (const char *regex, const size_t len) | |||
2142 | int atomcount; | 2168 | int atomcount; |
2143 | } *p; | 2169 | } *p; |
2144 | 2170 | ||
2171 | if (NULL == regex || 0 == strlen (regex) || 0 == len) | ||
2172 | { | ||
2173 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
2174 | "Could not parse regex. Empty regex string provided.\n"); | ||
2175 | |||
2176 | return NULL; | ||
2177 | } | ||
2178 | |||
2145 | GNUNET_REGEX_context_init (&ctx); | 2179 | GNUNET_REGEX_context_init (&ctx); |
2146 | 2180 | ||
2147 | regexp = regex; | 2181 | regexp = regex; |
@@ -2439,8 +2473,8 @@ static int | |||
2439 | evaluate_dfa (struct GNUNET_REGEX_Automaton *a, const char *string) | 2473 | evaluate_dfa (struct GNUNET_REGEX_Automaton *a, const char *string) |
2440 | { | 2474 | { |
2441 | const char *strp; | 2475 | const char *strp; |
2442 | char str[2]; | ||
2443 | struct GNUNET_REGEX_State *s; | 2476 | struct GNUNET_REGEX_State *s; |
2477 | unsigned int step_len; | ||
2444 | 2478 | ||
2445 | if (DFA != a->type) | 2479 | if (DFA != a->type) |
2446 | { | 2480 | { |
@@ -2455,11 +2489,10 @@ evaluate_dfa (struct GNUNET_REGEX_Automaton *a, const char *string) | |||
2455 | if ((NULL == string || 0 == strlen (string)) && s->accepting) | 2489 | if ((NULL == string || 0 == strlen (string)) && s->accepting) |
2456 | return 0; | 2490 | return 0; |
2457 | 2491 | ||
2458 | str[1] = '\0'; | 2492 | for (strp = string; NULL != strp && *strp; strp += step_len) |
2459 | for (strp = string; NULL != strp && *strp; strp++) | ||
2460 | { | 2493 | { |
2461 | str[0] = *strp; | 2494 | step_len = dfa_move (&s, strp); |
2462 | s = dfa_move (s, str); | 2495 | |
2463 | if (NULL == s) | 2496 | if (NULL == s) |
2464 | break; | 2497 | break; |
2465 | } | 2498 | } |