diff options
author | Christian Grothoff <christian@grothoff.org> | 2012-12-14 14:45:01 +0000 |
---|---|---|
committer | Christian Grothoff <christian@grothoff.org> | 2012-12-14 14:45:01 +0000 |
commit | 047a1fc2bf4fac55aed53128b57800fe6b25688b (patch) | |
tree | 4fd446d0c16b6e88eb7b821d08f9da9c70c3eb41 /src/regex/regex.c | |
parent | b9eae2650d7130f57428927a49912a18246d5201 (diff) | |
download | gnunet-047a1fc2bf4fac55aed53128b57800fe6b25688b.tar.gz gnunet-047a1fc2bf4fac55aed53128b57800fe6b25688b.zip |
-improve swapping behavior
Diffstat (limited to 'src/regex/regex.c')
-rw-r--r-- | src/regex/regex.c | 105 |
1 files changed, 81 insertions, 24 deletions
diff --git a/src/regex/regex.c b/src/regex/regex.c index 19eea14d5..2d1769c1b 100644 --- a/src/regex/regex.c +++ b/src/regex/regex.c | |||
@@ -539,6 +539,31 @@ GNUNET_REGEX_automaton_traverse (const struct GNUNET_REGEX_Automaton *a, | |||
539 | 539 | ||
540 | 540 | ||
541 | /** | 541 | /** |
542 | * String container for faster string operations. | ||
543 | */ | ||
544 | struct StringBuffer | ||
545 | { | ||
546 | /** | ||
547 | * Buffer holding the string. | ||
548 | */ | ||
549 | char *sbuf; | ||
550 | |||
551 | /** | ||
552 | * Length of the string in the buffer. | ||
553 | */ | ||
554 | size_t slen; | ||
555 | |||
556 | /** | ||
557 | * Number of bytes allocated for 'sbuf' | ||
558 | */ | ||
559 | size_t blen; | ||
560 | }; | ||
561 | |||
562 | |||
563 | |||
564 | |||
565 | |||
566 | /** | ||
542 | * Check if the given string 'str' needs parentheses around it when | 567 | * Check if the given string 'str' needs parentheses around it when |
543 | * using it to generate a regex. | 568 | * using it to generate a regex. |
544 | * | 569 | * |
@@ -642,7 +667,7 @@ has_epsilon (const char *str) | |||
642 | * epsilon could be found, NULL if 'str' was NULL | 667 | * epsilon could be found, NULL if 'str' was NULL |
643 | */ | 668 | */ |
644 | static char * | 669 | static char * |
645 | remove_epsilon (char *str) | 670 | remove_epsilon (const char *str) |
646 | { | 671 | { |
647 | size_t len; | 672 | size_t len; |
648 | 673 | ||
@@ -709,8 +734,10 @@ number_states (void *cls, const unsigned int count, | |||
709 | * is expected to be NULL when called! | 734 | * is expected to be NULL when called! |
710 | */ | 735 | */ |
711 | static void | 736 | static void |
712 | automaton_create_proofs_simplify (char *R_last_ij, char *R_last_ik, | 737 | automaton_create_proofs_simplify (const char *R_last_ij, |
713 | char *R_last_kk, char *R_last_kj, | 738 | const char *R_last_ik, |
739 | const char *R_last_kk, | ||
740 | const char *R_last_kj, | ||
714 | char **R_cur_ij) | 741 | char **R_cur_ij) |
715 | { | 742 | { |
716 | char *R_cur_l; | 743 | char *R_cur_l; |
@@ -789,8 +816,9 @@ automaton_create_proofs_simplify (char *R_last_ij, char *R_last_ik, | |||
789 | * as parentheses, so we can better compare the contents */ | 816 | * as parentheses, so we can better compare the contents */ |
790 | R_temp_ij = remove_parentheses (remove_epsilon (R_last_ij)); | 817 | R_temp_ij = remove_parentheses (remove_epsilon (R_last_ij)); |
791 | 818 | ||
792 | if (0 == strcmp (R_temp_ij, R_temp_ik) && 0 == strcmp (R_temp_ik, R_temp_kk) | 819 | if ( (0 == strcmp (R_temp_ij, R_temp_ik)) && |
793 | && 0 == strcmp (R_temp_kk, R_temp_kj)) | 820 | (0 == strcmp (R_temp_ik, R_temp_kk)) && |
821 | (0 == strcmp (R_temp_kk, R_temp_kj)) ) | ||
794 | { | 822 | { |
795 | if (0 == strlen (R_temp_ij)) | 823 | if (0 == strlen (R_temp_ij)) |
796 | { | 824 | { |
@@ -1092,13 +1120,14 @@ automaton_create_proofs_simplify (char *R_last_ij, char *R_last_ik, | |||
1092 | * | 1120 | * |
1093 | * @param a automaton for which to assign proofs and hashes, must not be NULL | 1121 | * @param a automaton for which to assign proofs and hashes, must not be NULL |
1094 | */ | 1122 | */ |
1095 | static void | 1123 | static int |
1096 | automaton_create_proofs (struct GNUNET_REGEX_Automaton *a) | 1124 | automaton_create_proofs (struct GNUNET_REGEX_Automaton *a) |
1097 | { | 1125 | { |
1098 | unsigned int n = a->state_count; | 1126 | unsigned int n = a->state_count; |
1099 | struct GNUNET_REGEX_State *states[n]; | 1127 | struct GNUNET_REGEX_State *states[n]; |
1100 | char **R_last; | 1128 | char **R_last; |
1101 | char **R_cur; | 1129 | char **R_cur; |
1130 | char **R_swap; | ||
1102 | char *temp; | 1131 | char *temp; |
1103 | struct GNUNET_REGEX_Transition *t; | 1132 | struct GNUNET_REGEX_Transition *t; |
1104 | char *complete_regex; | 1133 | char *complete_regex; |
@@ -1108,6 +1137,14 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a) | |||
1108 | 1137 | ||
1109 | R_last = GNUNET_malloc_large (sizeof (char *) * n * n); | 1138 | R_last = GNUNET_malloc_large (sizeof (char *) * n * n); |
1110 | R_cur = GNUNET_malloc_large (sizeof (char *) * n * n); | 1139 | R_cur = GNUNET_malloc_large (sizeof (char *) * n * n); |
1140 | if ( (NULL == R_last) || | ||
1141 | (NULL == R_cur) ) | ||
1142 | { | ||
1143 | GNUNET_log_strerror (GNUNET_ERROR_TYPE_ERROR, "malloc"); | ||
1144 | GNUNET_free_non_null (R_cur); | ||
1145 | GNUNET_free_non_null (R_last); | ||
1146 | return GNUNET_SYSERR; | ||
1147 | } | ||
1111 | 1148 | ||
1112 | /* create depth-first numbering of the states, initializes 'state' */ | 1149 | /* create depth-first numbering of the states, initializes 'state' */ |
1113 | GNUNET_REGEX_automaton_traverse (a, a->start, NULL, NULL, &number_states, | 1150 | GNUNET_REGEX_automaton_traverse (a, a->start, NULL, NULL, &number_states, |
@@ -1119,16 +1156,13 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a) | |||
1119 | /* Compute regular expressions of length "1" between each pair of states */ | 1156 | /* Compute regular expressions of length "1" between each pair of states */ |
1120 | for (i = 0; i < n; i++) | 1157 | for (i = 0; i < n; i++) |
1121 | { | 1158 | { |
1122 | for (j = 0; j < n; j++) | ||
1123 | { | ||
1124 | R_cur[i * n + j] = NULL; | ||
1125 | R_last[i * n + j] = NULL; | ||
1126 | } | ||
1127 | for (t = states[i]->transitions_head; NULL != t; t = t->next) | 1159 | for (t = states[i]->transitions_head; NULL != t; t = t->next) |
1128 | { | 1160 | { |
1129 | j = t->to_state->dfs_id; | 1161 | j = t->to_state->dfs_id; |
1130 | if (NULL == R_last[i * n + j]) | 1162 | if (NULL == R_last[i * n + j]) |
1163 | { | ||
1131 | GNUNET_asprintf (&R_last[i * n + j], "%s", t->label); | 1164 | GNUNET_asprintf (&R_last[i * n + j], "%s", t->label); |
1165 | } | ||
1132 | else | 1166 | else |
1133 | { | 1167 | { |
1134 | temp = R_last[i * n + j]; | 1168 | temp = R_last[i * n + j]; |
@@ -1137,8 +1171,11 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a) | |||
1137 | GNUNET_free (temp); | 1171 | GNUNET_free (temp); |
1138 | } | 1172 | } |
1139 | } | 1173 | } |
1174 | /* add self-loop: i is reachable from i via epsilon-transition */ | ||
1140 | if (NULL == R_last[i * n + i]) | 1175 | if (NULL == R_last[i * n + i]) |
1176 | { | ||
1141 | GNUNET_asprintf (&R_last[i * n + i], ""); | 1177 | GNUNET_asprintf (&R_last[i * n + i], ""); |
1178 | } | ||
1142 | else | 1179 | else |
1143 | { | 1180 | { |
1144 | temp = R_last[i * n + i]; | 1181 | temp = R_last[i * n + i]; |
@@ -1176,12 +1213,15 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a) | |||
1176 | } | 1213 | } |
1177 | 1214 | ||
1178 | /* set R_last = R_cur */ | 1215 | /* set R_last = R_cur */ |
1216 | R_swap = R_last; | ||
1217 | R_last = R_cur; | ||
1218 | R_cur = R_swap; | ||
1219 | /* clear 'R_cur' for next iteration */ | ||
1179 | for (i = 0; i < n; i++) | 1220 | for (i = 0; i < n; i++) |
1180 | { | 1221 | { |
1181 | for (j = 0; j < n; j++) | 1222 | for (j = 0; j < n; j++) |
1182 | { | 1223 | { |
1183 | GNUNET_free_non_null (R_last[i * n + j]); | 1224 | GNUNET_free_non_null (R_cur[i * n + j]); |
1184 | R_last[i * n + j] = R_cur[i * n + j]; | ||
1185 | R_cur[i * n + j] = NULL; | 1225 | R_cur[i * n + j] = NULL; |
1186 | } | 1226 | } |
1187 | } | 1227 | } |
@@ -1224,13 +1264,12 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a) | |||
1224 | a->canonical_regex = complete_regex; | 1264 | a->canonical_regex = complete_regex; |
1225 | 1265 | ||
1226 | /* cleanup */ | 1266 | /* cleanup */ |
1227 | for (i = 0; i < n; i++) | 1267 | for (i = 0; i < n; i++) |
1228 | { | ||
1229 | for (j = 0; j < n; j++) | 1268 | for (j = 0; j < n; j++) |
1230 | GNUNET_free_non_null (R_last[i * n + j]); | 1269 | GNUNET_free_non_null (R_last[i * n + j]); |
1231 | } | ||
1232 | GNUNET_free (R_cur); | 1270 | GNUNET_free (R_cur); |
1233 | GNUNET_free (R_last); | 1271 | GNUNET_free (R_last); |
1272 | return GNUNET_OK; | ||
1234 | } | 1273 | } |
1235 | 1274 | ||
1236 | 1275 | ||
@@ -1438,8 +1477,9 @@ dfa_remove_dead_states (struct GNUNET_REGEX_Automaton *a) | |||
1438 | * | 1477 | * |
1439 | * @param ctx context | 1478 | * @param ctx context |
1440 | * @param a DFA automaton | 1479 | * @param a DFA automaton |
1480 | * @return GNUNET_OK on success | ||
1441 | */ | 1481 | */ |
1442 | static void | 1482 | static int |
1443 | dfa_merge_nondistinguishable_states (struct GNUNET_REGEX_Context *ctx, | 1483 | dfa_merge_nondistinguishable_states (struct GNUNET_REGEX_Context *ctx, |
1444 | struct GNUNET_REGEX_Automaton *a) | 1484 | struct GNUNET_REGEX_Automaton *a) |
1445 | { | 1485 | { |
@@ -1461,11 +1501,16 @@ dfa_merge_nondistinguishable_states (struct GNUNET_REGEX_Context *ctx, | |||
1461 | { | 1501 | { |
1462 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | 1502 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, |
1463 | "Could not merge nondistinguishable states, automaton was NULL.\n"); | 1503 | "Could not merge nondistinguishable states, automaton was NULL.\n"); |
1464 | return; | 1504 | return GNUNET_SYSERR; |
1465 | } | 1505 | } |
1466 | 1506 | ||
1467 | state_cnt = a->state_count; | 1507 | state_cnt = a->state_count; |
1468 | table = GNUNET_malloc_large ((sizeof (uint32_t) * state_cnt * a->state_count / 32) + 1); | 1508 | table = GNUNET_malloc_large ((sizeof (uint32_t) * state_cnt * a->state_count / 32) + 1); |
1509 | if (NULL == table) | ||
1510 | { | ||
1511 | GNUNET_log_strerror (GNUNET_ERROR_TYPE_ERROR, "malloc"); | ||
1512 | return GNUNET_SYSERR; | ||
1513 | } | ||
1469 | 1514 | ||
1470 | for (i = 0, s1 = a->states_head; NULL != s1; s1 = s1->next) | 1515 | for (i = 0, s1 = a->states_head; NULL != s1; s1 = s1->next) |
1471 | s1->marked = i++; | 1516 | s1->marked = i++; |
@@ -1539,6 +1584,7 @@ dfa_merge_nondistinguishable_states (struct GNUNET_REGEX_Context *ctx, | |||
1539 | } | 1584 | } |
1540 | 1585 | ||
1541 | GNUNET_free (table); | 1586 | GNUNET_free (table); |
1587 | return GNUNET_OK; | ||
1542 | } | 1588 | } |
1543 | 1589 | ||
1544 | 1590 | ||
@@ -1548,13 +1594,14 @@ dfa_merge_nondistinguishable_states (struct GNUNET_REGEX_Context *ctx, | |||
1548 | * | 1594 | * |
1549 | * @param ctx context | 1595 | * @param ctx context |
1550 | * @param a DFA automaton | 1596 | * @param a DFA automaton |
1597 | * @return GNUNET_OK on success | ||
1551 | */ | 1598 | */ |
1552 | static void | 1599 | static int |
1553 | dfa_minimize (struct GNUNET_REGEX_Context *ctx, | 1600 | dfa_minimize (struct GNUNET_REGEX_Context *ctx, |
1554 | struct GNUNET_REGEX_Automaton *a) | 1601 | struct GNUNET_REGEX_Automaton *a) |
1555 | { | 1602 | { |
1556 | if (NULL == a) | 1603 | if (NULL == a) |
1557 | return; | 1604 | return GNUNET_SYSERR; |
1558 | 1605 | ||
1559 | GNUNET_assert (DFA == a->type); | 1606 | GNUNET_assert (DFA == a->type); |
1560 | 1607 | ||
@@ -1565,7 +1612,9 @@ dfa_minimize (struct GNUNET_REGEX_Context *ctx, | |||
1565 | dfa_remove_dead_states (a); | 1612 | dfa_remove_dead_states (a); |
1566 | 1613 | ||
1567 | /* 3. Merge nondistinguishable states */ | 1614 | /* 3. Merge nondistinguishable states */ |
1568 | dfa_merge_nondistinguishable_states (ctx, a); | 1615 | if (GNUNET_OK != dfa_merge_nondistinguishable_states (ctx, a)) |
1616 | return GNUNET_SYSERR; | ||
1617 | return GNUNET_OK; | ||
1569 | } | 1618 | } |
1570 | 1619 | ||
1571 | 1620 | ||
@@ -2533,10 +2582,18 @@ GNUNET_REGEX_construct_dfa (const char *regex, const size_t len, | |||
2533 | 2582 | ||
2534 | /* Minimize DFA */ | 2583 | /* Minimize DFA */ |
2535 | // fprintf (stderr, "M"); | 2584 | // fprintf (stderr, "M"); |
2536 | dfa_minimize (&ctx, dfa); | 2585 | if (GNUNET_OK != dfa_minimize (&ctx, dfa)) |
2586 | { | ||
2587 | GNUNET_REGEX_automaton_destroy (dfa); | ||
2588 | return NULL; | ||
2589 | } | ||
2537 | 2590 | ||
2538 | /* Create proofs and hashes for all states */ | 2591 | /* Create proofs and hashes for all states */ |
2539 | automaton_create_proofs (dfa); | 2592 | if (GNUNET_OK != automaton_create_proofs (dfa)) |
2593 | { | ||
2594 | GNUNET_REGEX_automaton_destroy (dfa); | ||
2595 | return NULL; | ||
2596 | } | ||
2540 | 2597 | ||
2541 | /* Compress linear DFA paths */ | 2598 | /* Compress linear DFA paths */ |
2542 | if (1 != max_path_len) | 2599 | if (1 != max_path_len) |