diff options
-rw-r--r-- | src/regex/regex.c | 39 | ||||
-rw-r--r-- | src/regex/test_regex.c | 215 |
2 files changed, 210 insertions, 44 deletions
diff --git a/src/regex/regex.c b/src/regex/regex.c index 6432075a5..fc9de529c 100644 --- a/src/regex/regex.c +++ b/src/regex/regex.c | |||
@@ -416,6 +416,15 @@ dfa_state_create (struct GNUNET_REGEX_Context *ctx, struct StateSet *nfa_states) | |||
416 | return s; | 416 | return s; |
417 | } | 417 | } |
418 | 418 | ||
419 | /** | ||
420 | * Move from the given state 's' to the next state on | ||
421 | * transition 'literal' | ||
422 | * | ||
423 | * @param s starting state | ||
424 | * @param literal edge label to follow | ||
425 | * | ||
426 | * @return new state or NULL, if transition on literal not possible | ||
427 | */ | ||
419 | static struct State * | 428 | static struct State * |
420 | dfa_move (struct State *s, const char literal) | 429 | dfa_move (struct State *s, const char literal) |
421 | { | 430 | { |
@@ -439,6 +448,12 @@ dfa_move (struct State *s, const char literal) | |||
439 | return new_s; | 448 | return new_s; |
440 | } | 449 | } |
441 | 450 | ||
451 | /** | ||
452 | * Remove all unreachable states from DFA 'a'. Unreachable states | ||
453 | * are those states that are not reachable from the starting state. | ||
454 | * | ||
455 | * @param a DFA automaton | ||
456 | */ | ||
442 | static void | 457 | static void |
443 | dfa_remove_unreachable_states (struct GNUNET_REGEX_Automaton *a) | 458 | dfa_remove_unreachable_states (struct GNUNET_REGEX_Automaton *a) |
444 | { | 459 | { |
@@ -472,6 +487,12 @@ dfa_remove_unreachable_states (struct GNUNET_REGEX_Automaton *a) | |||
472 | */ | 487 | */ |
473 | } | 488 | } |
474 | 489 | ||
490 | /** | ||
491 | * Remove all dead states from the DFA 'a'. Dead states are those | ||
492 | * states that do not transition to any other state but themselfes. | ||
493 | * | ||
494 | * @param a DFA automaton | ||
495 | */ | ||
475 | static void | 496 | static void |
476 | dfa_remove_dead_states (struct GNUNET_REGEX_Automaton *a) | 497 | dfa_remove_dead_states (struct GNUNET_REGEX_Automaton *a) |
477 | { | 498 | { |
@@ -520,12 +541,23 @@ dfa_remove_dead_states (struct GNUNET_REGEX_Automaton *a) | |||
520 | } | 541 | } |
521 | } | 542 | } |
522 | 543 | ||
544 | /** | ||
545 | * Merge all non distinguishable states in the DFA 'a' | ||
546 | * | ||
547 | * @param a DFA automaton | ||
548 | */ | ||
523 | static void | 549 | static void |
524 | dfa_merge_nondistinguishable_states (struct GNUNET_REGEX_Automaton *a) | 550 | dfa_merge_nondistinguishable_states (struct GNUNET_REGEX_Automaton *a) |
525 | { | 551 | { |
526 | 552 | ||
527 | } | 553 | } |
528 | 554 | ||
555 | /** | ||
556 | * Minimize the given DFA 'a' by removing all unreachable states, | ||
557 | * removing all dead states and merging all non distinguishable states | ||
558 | * | ||
559 | * @param a DFA automaton | ||
560 | */ | ||
529 | static void | 561 | static void |
530 | dfa_minimize (struct GNUNET_REGEX_Automaton *a) | 562 | dfa_minimize (struct GNUNET_REGEX_Automaton *a) |
531 | { | 563 | { |
@@ -1029,10 +1061,6 @@ GNUNET_REGEX_construct_nfa (const char *regex, const size_t len) | |||
1029 | goto error; | 1061 | goto error; |
1030 | } | 1062 | } |
1031 | 1063 | ||
1032 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, | ||
1033 | "Created NFA with %i States and a total of %i Transitions\n", | ||
1034 | ctx.state_id, ctx.transition_id); | ||
1035 | |||
1036 | return nfa; | 1064 | return nfa; |
1037 | 1065 | ||
1038 | error: | 1066 | error: |
@@ -1156,9 +1184,6 @@ GNUNET_REGEX_construct_dfa (const char *regex, const size_t len) | |||
1156 | 1184 | ||
1157 | dfa_minimize (dfa); | 1185 | dfa_minimize (dfa); |
1158 | 1186 | ||
1159 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Created DFA with %i States\n", | ||
1160 | ctx.state_id); | ||
1161 | |||
1162 | return dfa; | 1187 | return dfa; |
1163 | } | 1188 | } |
1164 | 1189 | ||
diff --git a/src/regex/test_regex.c b/src/regex/test_regex.c index 7428c001e..d32bc9ed8 100644 --- a/src/regex/test_regex.c +++ b/src/regex/test_regex.c | |||
@@ -23,7 +23,7 @@ | |||
23 | * @author Maximilian Szengel | 23 | * @author Maximilian Szengel |
24 | */ | 24 | */ |
25 | #include <regex.h> | 25 | #include <regex.h> |
26 | 26 | #include <time.h> | |
27 | #include "platform.h" | 27 | #include "platform.h" |
28 | #include "gnunet_regex_lib.h" | 28 | #include "gnunet_regex_lib.h" |
29 | 29 | ||
@@ -36,44 +36,164 @@ enum Match_Result | |||
36 | struct Regex_String_Pair | 36 | struct Regex_String_Pair |
37 | { | 37 | { |
38 | char *regex; | 38 | char *regex; |
39 | char *string; | 39 | int string_count; |
40 | enum Match_Result expected_result; | 40 | char **strings; |
41 | enum Match_Result *expected_results; | ||
41 | }; | 42 | }; |
42 | 43 | ||
43 | |||
44 | int | 44 | int |
45 | test_automaton (struct GNUNET_REGEX_Automaton *a, struct Regex_String_Pair *rxstr) | 45 | test_random (unsigned int rx_length, unsigned int max_str_len, unsigned int str_count) |
46 | { | 46 | { |
47 | int i; | ||
48 | int rx_exp; | ||
49 | char rand_rx[rx_length+1]; | ||
50 | char matching_str[str_count][max_str_len+1]; | ||
51 | char *rand_rxp; | ||
52 | char *matching_strp; | ||
53 | int char_op_switch; | ||
54 | int last_was_op; | ||
55 | char current_char; | ||
56 | int eval; | ||
57 | int eval_check; | ||
58 | struct GNUNET_REGEX_Automaton *dfa; | ||
47 | regex_t rx; | 59 | regex_t rx; |
60 | regmatch_t matchptr[1]; | ||
61 | int char_offset; | ||
62 | char error[200]; | ||
63 | int result; | ||
64 | |||
65 | // At least one string is needed for matching | ||
66 | GNUNET_assert (str_count > 0); | ||
67 | // The string should be at least as long as the regex itself | ||
68 | GNUNET_assert (max_str_len >= rx_length); | ||
69 | |||
70 | rand_rxp = rand_rx; | ||
71 | matching_strp = matching_str[0]; | ||
72 | |||
73 | // Generate random regex and a string that matches the regex | ||
74 | for (i=0; i<rx_length; i++) | ||
75 | { | ||
76 | char_op_switch = 0 + (int)(1.0 * rand() / (RAND_MAX + 1.0)); | ||
77 | char_offset = (rand()%2) ? 65 : 97; | ||
78 | |||
79 | if (0 == char_op_switch | ||
80 | && !last_was_op) | ||
81 | { | ||
82 | last_was_op = 1; | ||
83 | rx_exp = rand () % 3; | ||
84 | |||
85 | switch (rx_exp) | ||
86 | { | ||
87 | case 0: | ||
88 | current_char = '+'; | ||
89 | break; | ||
90 | case 1: | ||
91 | current_char = '*'; | ||
92 | break; | ||
93 | case 2: | ||
94 | if (i < rx_length -1) | ||
95 | current_char = '|'; | ||
96 | else | ||
97 | current_char = (char)(char_offset + (int)( 25.0 * rand() / (RAND_MAX + 1.0))); | ||
98 | break; | ||
99 | } | ||
100 | } | ||
101 | else | ||
102 | { | ||
103 | current_char = (char)(char_offset + (int)( 25.0 * rand() / (RAND_MAX + 1.0))); | ||
104 | last_was_op = 0; | ||
105 | } | ||
106 | |||
107 | if (current_char != '+' | ||
108 | && current_char != '*' | ||
109 | && current_char != '|') | ||
110 | { | ||
111 | *matching_strp = current_char; | ||
112 | matching_strp++; | ||
113 | } | ||
114 | |||
115 | *rand_rxp = current_char; | ||
116 | rand_rxp++; | ||
117 | } | ||
118 | *rand_rxp = '\0'; | ||
119 | *matching_strp = '\0'; | ||
120 | |||
121 | result = 0; | ||
122 | |||
123 | for (i=0; i<str_count; i++) | ||
124 | { | ||
125 | // Match string using DFA | ||
126 | dfa = GNUNET_REGEX_construct_dfa (rand_rx, strlen (rand_rx)); | ||
127 | if (NULL == dfa) | ||
128 | { | ||
129 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Constructing DFA failed\n"); | ||
130 | return -1; | ||
131 | } | ||
132 | |||
133 | eval = GNUNET_REGEX_eval (dfa, matching_str[i]); | ||
134 | GNUNET_REGEX_automaton_destroy (dfa); | ||
135 | |||
136 | // Match string using glibc regex | ||
137 | if (0 != regcomp (&rx, rand_rx, REG_EXTENDED)) | ||
138 | { | ||
139 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not compile regex using regcomp\n"); | ||
140 | return -1; | ||
141 | } | ||
142 | |||
143 | eval_check = regexec (&rx, matching_str[i], 1, matchptr, 0); | ||
144 | |||
145 | // We only want to match the whole string, because that's what our DFA does, too. | ||
146 | if (eval_check == 0 && (matchptr[0].rm_so != 0 || matchptr[0].rm_eo != strlen (matching_str[i]))) | ||
147 | eval_check = 1; | ||
148 | |||
149 | // compare result | ||
150 | if (eval_check != eval) | ||
151 | { | ||
152 | regerror (eval_check, &rx, error, sizeof error); | ||
153 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
154 | "Unexpected result:\nregex: %s\nstring: %s\ngnunet regex: %i\nglibc regex: %i\nglibc error: %s\n\n", | ||
155 | rand_rx, matching_str, eval, eval_check, error); | ||
156 | result += 1; | ||
157 | } | ||
158 | } | ||
159 | return result; | ||
160 | } | ||
161 | |||
162 | int | ||
163 | test_automaton (struct GNUNET_REGEX_Automaton *a, regex_t *rx, struct Regex_String_Pair *rxstr) | ||
164 | { | ||
48 | int result; | 165 | int result; |
49 | int eval; | 166 | int eval; |
50 | int eval_check; | 167 | int eval_check; |
168 | char error[200]; | ||
169 | int i; | ||
51 | 170 | ||
52 | if (NULL == a) | 171 | if (NULL == a) |
172 | { | ||
173 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Automaton was NULL\n"); | ||
53 | return 1; | 174 | return 1; |
175 | } | ||
54 | 176 | ||
55 | result = 0; | 177 | result = 0; |
56 | 178 | ||
57 | eval = GNUNET_REGEX_eval (a, rxstr->string); | 179 | for (i=0; i<rxstr->string_count; i++) |
58 | regcomp (&rx, rxstr->regex, REG_EXTENDED); | ||
59 | eval_check = regexec (&rx, rxstr->string, 0, NULL, 0); | ||
60 | |||
61 | if ((rxstr->expected_result == match | ||
62 | && (0 != eval || 0 != eval_check)) | ||
63 | || | ||
64 | (rxstr->expected_result == nomatch | ||
65 | && (0 == eval || 0 == eval_check))) | ||
66 | { | 180 | { |
67 | result = 1; | 181 | eval = GNUNET_REGEX_eval (a, rxstr->strings[i]); |
68 | char error[200]; | 182 | eval_check = regexec (rx, rxstr->strings[i], 0, NULL, 0); |
69 | regerror (eval_check, &rx, error, sizeof error); | ||
70 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
71 | "Unexpected result:\nregex: %s\nstring: %s\nexpected result: %i\ngnunet regex: %i\nglibc regex: %i\nglibc error: %s\n\n", | ||
72 | rxstr->regex, rxstr->string, rxstr->expected_result, eval, eval_check, error); | ||
73 | } | ||
74 | |||
75 | regfree (&rx); | ||
76 | 183 | ||
184 | if ((rxstr->expected_results[i] == match | ||
185 | && (0 != eval || 0 != eval_check)) | ||
186 | || | ||
187 | (rxstr->expected_results[i] == nomatch | ||
188 | && (0 == eval || 0 == eval_check))) | ||
189 | { | ||
190 | result = 1; | ||
191 | regerror (eval_check, rx, error, sizeof error); | ||
192 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
193 | "Unexpected result:\nregex: %s\nstring: %s\nexpected result: %i\ngnunet regex: %i\nglibc regex: %i\nglibc error: %s\n\n", | ||
194 | rxstr->regex, rxstr->strings[i], rxstr->expected_results[i], eval, eval_check, error); | ||
195 | } | ||
196 | } | ||
77 | return result; | 197 | return result; |
78 | } | 198 | } |
79 | 199 | ||
@@ -90,34 +210,55 @@ main (int argc, char *argv[]) | |||
90 | 210 | ||
91 | int check_nfa; | 211 | int check_nfa; |
92 | int check_dfa; | 212 | int check_dfa; |
213 | int check_rand; | ||
93 | struct Regex_String_Pair rxstr[3]; | 214 | struct Regex_String_Pair rxstr[3]; |
94 | struct GNUNET_REGEX_Automaton *a; | 215 | struct GNUNET_REGEX_Automaton *a; |
216 | regex_t rx; | ||
95 | int i; | 217 | int i; |
96 | 218 | ||
97 | rxstr[0].regex = "ab(c|d)+c*(a(b|c)d)+"; | 219 | check_nfa = 0; |
98 | rxstr[0].string = "abcdcdcdcdddddabd"; | 220 | check_dfa = 0; |
99 | rxstr[0].expected_result = match; | 221 | check_rand = 0; |
100 | 222 | ||
101 | rxstr[1].regex = "a*"; | 223 | rxstr[0].regex = "ab(c|d)+c*(a(b|c)d)+"; |
102 | rxstr[1].string = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"; | 224 | rxstr[0].string_count = 5; |
103 | rxstr[1].expected_result = match; | 225 | rxstr[0].strings = GNUNET_malloc (sizeof (char *) * rxstr[0].string_count); |
104 | 226 | rxstr[0].strings[0] = "abcdcdcdcdddddabd"; | |
105 | rxstr[2].regex = "a*b*c*d+"; | 227 | rxstr[0].strings[1] = "abcd"; |
106 | rxstr[2].string = "a"; | 228 | rxstr[0].strings[2] = "abcddddddccccccccccccccccccccccccabdacdabd"; |
107 | rxstr[2].expected_result = nomatch; | 229 | rxstr[0].strings[3] = "abccccca"; |
230 | rxstr[0].strings[4] = "abcdcdcdccdabdabd"; | ||
231 | rxstr[0].expected_results = GNUNET_malloc (sizeof (enum Match_Result) * rxstr[0].string_count); | ||
232 | rxstr[0].expected_results[0] = match; | ||
233 | rxstr[0].expected_results[1] = nomatch; | ||
234 | rxstr[0].expected_results[2] = match; | ||
235 | rxstr[0].expected_results[3] = nomatch; | ||
236 | rxstr[0].expected_results[4] = match; | ||
108 | 237 | ||
109 | for (i=0; i<3; i++) | 238 | for (i=0; i<1; i++) |
110 | { | 239 | { |
240 | if (0 != regcomp (&rx, rxstr->regex, REG_EXTENDED | REG_NOSUB)) | ||
241 | { | ||
242 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not compile regex using regcomp()\n"); | ||
243 | return 1; | ||
244 | } | ||
245 | |||
111 | // NFA test | 246 | // NFA test |
112 | a = GNUNET_REGEX_construct_nfa (rxstr[i].regex, strlen (rxstr[i].regex)); | 247 | a = GNUNET_REGEX_construct_nfa (rxstr[i].regex, strlen (rxstr[i].regex)); |
113 | check_nfa += test_automaton (a, &rxstr[i]); | 248 | check_nfa += test_automaton (a, &rx, &rxstr[i]); |
114 | GNUNET_REGEX_automaton_destroy (a); | 249 | GNUNET_REGEX_automaton_destroy (a); |
115 | 250 | ||
116 | // DFA test | 251 | // DFA test |
117 | a = GNUNET_REGEX_construct_dfa (rxstr[i].regex, strlen (rxstr[i].regex)); | 252 | a = GNUNET_REGEX_construct_dfa (rxstr[i].regex, strlen (rxstr[i].regex)); |
118 | check_dfa += test_automaton (a, &rxstr[i]); | 253 | check_dfa += test_automaton (a, &rx, &rxstr[i]); |
119 | GNUNET_REGEX_automaton_destroy (a); | 254 | GNUNET_REGEX_automaton_destroy (a); |
255 | |||
256 | regfree (&rx); | ||
120 | } | 257 | } |
121 | 258 | ||
122 | return check_nfa + check_dfa; | 259 | srand (time(NULL)); |
260 | for (i=0; i< 100; i++) | ||
261 | check_rand += test_random (100, 100, 1); | ||
262 | |||
263 | return check_nfa + check_dfa + check_rand; | ||
123 | } | 264 | } |