aboutsummaryrefslogtreecommitdiff
path: root/src/regex
diff options
context:
space:
mode:
authorMaximilian Szengel <gnunet@maxsz.de>2012-09-20 21:46:06 +0000
committerMaximilian Szengel <gnunet@maxsz.de>2012-09-20 21:46:06 +0000
commit4eb238e5115333e3a5724e4ba787e8e9c60cdf2e (patch)
treeb5e67562a94cd3a7b3564908f222f0d6b420efbe /src/regex
parentd9e210bffb15ca0b3b69faa5b56c01fc31a64b71 (diff)
downloadgnunet-4eb238e5115333e3a5724e4ba787e8e9c60cdf2e.tar.gz
gnunet-4eb238e5115333e3a5724e4ba787e8e9c60cdf2e.zip
optimizations
Diffstat (limited to 'src/regex')
-rw-r--r--src/regex/regex.c75
1 files changed, 54 insertions, 21 deletions
diff --git a/src/regex/regex.c b/src/regex/regex.c
index d659cb949..a5d84ce95 100644
--- a/src/regex/regex.c
+++ b/src/regex/regex.c
@@ -642,13 +642,19 @@ GNUNET_REGEX_add_multi_strides_to_dfa (struct GNUNET_REGEX_Context *regex_ctx,
642 const unsigned int stride_len) 642 const unsigned int stride_len)
643{ 643{
644 struct GNUNET_REGEX_Strided_Context ctx = { stride_len, NULL, NULL }; 644 struct GNUNET_REGEX_Strided_Context ctx = { stride_len, NULL, NULL };
645 struct GNUNET_REGEX_State *s;
646 struct GNUNET_REGEX_State *s_next;
645 struct GNUNET_REGEX_Transition *t; 647 struct GNUNET_REGEX_Transition *t;
646 struct GNUNET_REGEX_Transition *t_next; 648 struct GNUNET_REGEX_Transition *t_next;
647 649
648 if (1 > stride_len || GNUNET_YES == dfa->is_multistrided) 650 if (1 > stride_len || GNUNET_YES == dfa->is_multistrided)
649 return; 651 return;
650 652
651 // Compute the new transitions. 653 // Unmark all states
654 for (s = dfa->states_head; NULL != s; s = s->next)
655 s->marked = GNUNET_NO;
656
657 // Compute the new transitions of given stride_len
652 GNUNET_REGEX_automaton_traverse (dfa, dfa->start, NULL, NULL, 658 GNUNET_REGEX_automaton_traverse (dfa, dfa->start, NULL, NULL,
653 &add_multi_strides_to_dfa, &ctx); 659 &add_multi_strides_to_dfa, &ctx);
654 660
@@ -662,6 +668,15 @@ GNUNET_REGEX_add_multi_strides_to_dfa (struct GNUNET_REGEX_Context *regex_ctx,
662 GNUNET_free (t); 668 GNUNET_free (t);
663 } 669 }
664 670
671 // Remove marked states (including their incoming and outgoing transitions)
672 for (s = dfa->states_head; NULL != s; s = s_next)
673 {
674 s_next = s->next;
675 if (GNUNET_YES == s->marked)
676 automaton_remove_state (dfa, s);
677 }
678
679 // Mark this automaton as multistrided
665 dfa->is_multistrided = GNUNET_YES; 680 dfa->is_multistrided = GNUNET_YES;
666} 681}
667 682
@@ -1425,36 +1440,47 @@ dfa_state_create (struct GNUNET_REGEX_Context *ctx,
1425 1440
1426 1441
1427/** 1442/**
1428 * Move from the given state 's' to the next state on transition 'label' 1443 * Move from the given state 's' to the next state on transition 'str'. Consumes
1444 * as much of the given 'str' as possible (usefull for strided DFAs). On return
1445 * 's' will point to the next state, and the length of the substring used for
1446 * this transition will be returned. If no transition possible 0 is returned and
1447 * 's' points to NULL.
1429 * 1448 *
1430 * @param s starting state 1449 * @param s starting state, will point to the next state or NULL (if no
1431 * @param label edge label to follow 1450 * transition possible)
1451 * @param str edge label to follow (will match longest common prefix)
1432 * 1452 *
1433 * @return new state or NULL, if transition on label not possible 1453 * @return length of the substring comsumed from 'str'
1434 */ 1454 */
1435static struct GNUNET_REGEX_State * 1455static unsigned int
1436dfa_move (struct GNUNET_REGEX_State *s, const char *label) 1456dfa_move (struct GNUNET_REGEX_State **s, const char *str)
1437{ 1457{
1438 struct GNUNET_REGEX_Transition *t; 1458 struct GNUNET_REGEX_Transition *t;
1439 struct GNUNET_REGEX_State *new_s; 1459 struct GNUNET_REGEX_State *new_s;
1460 unsigned int len;
1461 unsigned int max_len;
1440 1462
1441 if (NULL == s) 1463 if (NULL == s)
1442 return NULL; 1464 return 0;
1443 1465
1444 new_s = NULL; 1466 new_s = NULL;
1445 1467 max_len = 0;
1446 for (t = s->transitions_head; NULL != t; t = t->next) 1468 for (t = (*s)->transitions_head; NULL != t; t = t->next)
1447 { 1469 {
1448 // TODO: Use strstr to match substring and return number of char's that have 1470 len = strlen (t->label);
1449 // been consumed' 1471
1450 if (0 == strcmp (label, t->label)) 1472 if (0 == strncmp (t->label, str, len))
1451 { 1473 {
1452 new_s = t->to_state; 1474 if (len >= max_len)
1453 break; 1475 {
1476 max_len = len;
1477 new_s = t->to_state;
1478 }
1454 } 1479 }
1455 } 1480 }
1456 1481
1457 return new_s; 1482 *s = new_s;
1483 return max_len;
1458} 1484}
1459 1485
1460/** 1486/**
@@ -2142,6 +2168,14 @@ GNUNET_REGEX_construct_nfa (const char *regex, const size_t len)
2142 int atomcount; 2168 int atomcount;
2143 } *p; 2169 } *p;
2144 2170
2171 if (NULL == regex || 0 == strlen (regex) || 0 == len)
2172 {
2173 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
2174 "Could not parse regex. Empty regex string provided.\n");
2175
2176 return NULL;
2177 }
2178
2145 GNUNET_REGEX_context_init (&ctx); 2179 GNUNET_REGEX_context_init (&ctx);
2146 2180
2147 regexp = regex; 2181 regexp = regex;
@@ -2439,8 +2473,8 @@ static int
2439evaluate_dfa (struct GNUNET_REGEX_Automaton *a, const char *string) 2473evaluate_dfa (struct GNUNET_REGEX_Automaton *a, const char *string)
2440{ 2474{
2441 const char *strp; 2475 const char *strp;
2442 char str[2];
2443 struct GNUNET_REGEX_State *s; 2476 struct GNUNET_REGEX_State *s;
2477 unsigned int step_len;
2444 2478
2445 if (DFA != a->type) 2479 if (DFA != a->type)
2446 { 2480 {
@@ -2455,11 +2489,10 @@ evaluate_dfa (struct GNUNET_REGEX_Automaton *a, const char *string)
2455 if ((NULL == string || 0 == strlen (string)) && s->accepting) 2489 if ((NULL == string || 0 == strlen (string)) && s->accepting)
2456 return 0; 2490 return 0;
2457 2491
2458 str[1] = '\0'; 2492 for (strp = string; NULL != strp && *strp; strp += step_len)
2459 for (strp = string; NULL != strp && *strp; strp++)
2460 { 2493 {
2461 str[0] = *strp; 2494 step_len = dfa_move (&s, strp);
2462 s = dfa_move (s, str); 2495
2463 if (NULL == s) 2496 if (NULL == s)
2464 break; 2497 break;
2465 } 2498 }