aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/regex/regex.c39
-rw-r--r--src/regex/test_regex.c215
2 files changed, 210 insertions, 44 deletions
diff --git a/src/regex/regex.c b/src/regex/regex.c
index 6432075a5..fc9de529c 100644
--- a/src/regex/regex.c
+++ b/src/regex/regex.c
@@ -416,6 +416,15 @@ dfa_state_create (struct GNUNET_REGEX_Context *ctx, struct StateSet *nfa_states)
416 return s; 416 return s;
417} 417}
418 418
419/**
420 * Move from the given state 's' to the next state on
421 * transition 'literal'
422 *
423 * @param s starting state
424 * @param literal edge label to follow
425 *
426 * @return new state or NULL, if transition on literal not possible
427 */
419static struct State * 428static struct State *
420dfa_move (struct State *s, const char literal) 429dfa_move (struct State *s, const char literal)
421{ 430{
@@ -439,6 +448,12 @@ dfa_move (struct State *s, const char literal)
439 return new_s; 448 return new_s;
440} 449}
441 450
451/**
452 * Remove all unreachable states from DFA 'a'. Unreachable states
453 * are those states that are not reachable from the starting state.
454 *
455 * @param a DFA automaton
456 */
442static void 457static void
443dfa_remove_unreachable_states (struct GNUNET_REGEX_Automaton *a) 458dfa_remove_unreachable_states (struct GNUNET_REGEX_Automaton *a)
444{ 459{
@@ -472,6 +487,12 @@ dfa_remove_unreachable_states (struct GNUNET_REGEX_Automaton *a)
472 */ 487 */
473} 488}
474 489
490/**
491 * Remove all dead states from the DFA 'a'. Dead states are those
492 * states that do not transition to any other state but themselfes.
493 *
494 * @param a DFA automaton
495 */
475static void 496static void
476dfa_remove_dead_states (struct GNUNET_REGEX_Automaton *a) 497dfa_remove_dead_states (struct GNUNET_REGEX_Automaton *a)
477{ 498{
@@ -520,12 +541,23 @@ dfa_remove_dead_states (struct GNUNET_REGEX_Automaton *a)
520 } 541 }
521} 542}
522 543
544/**
545 * Merge all non distinguishable states in the DFA 'a'
546 *
547 * @param a DFA automaton
548 */
523static void 549static void
524dfa_merge_nondistinguishable_states (struct GNUNET_REGEX_Automaton *a) 550dfa_merge_nondistinguishable_states (struct GNUNET_REGEX_Automaton *a)
525{ 551{
526 552
527} 553}
528 554
555/**
556 * Minimize the given DFA 'a' by removing all unreachable states,
557 * removing all dead states and merging all non distinguishable states
558 *
559 * @param a DFA automaton
560 */
529static void 561static void
530dfa_minimize (struct GNUNET_REGEX_Automaton *a) 562dfa_minimize (struct GNUNET_REGEX_Automaton *a)
531{ 563{
@@ -1029,10 +1061,6 @@ GNUNET_REGEX_construct_nfa (const char *regex, const size_t len)
1029 goto error; 1061 goto error;
1030 } 1062 }
1031 1063
1032 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
1033 "Created NFA with %i States and a total of %i Transitions\n",
1034 ctx.state_id, ctx.transition_id);
1035
1036 return nfa; 1064 return nfa;
1037 1065
1038error: 1066error:
@@ -1156,9 +1184,6 @@ GNUNET_REGEX_construct_dfa (const char *regex, const size_t len)
1156 1184
1157 dfa_minimize (dfa); 1185 dfa_minimize (dfa);
1158 1186
1159 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Created DFA with %i States\n",
1160 ctx.state_id);
1161
1162 return dfa; 1187 return dfa;
1163} 1188}
1164 1189
diff --git a/src/regex/test_regex.c b/src/regex/test_regex.c
index 7428c001e..d32bc9ed8 100644
--- a/src/regex/test_regex.c
+++ b/src/regex/test_regex.c
@@ -23,7 +23,7 @@
23 * @author Maximilian Szengel 23 * @author Maximilian Szengel
24 */ 24 */
25#include <regex.h> 25#include <regex.h>
26 26#include <time.h>
27#include "platform.h" 27#include "platform.h"
28#include "gnunet_regex_lib.h" 28#include "gnunet_regex_lib.h"
29 29
@@ -36,44 +36,164 @@ enum Match_Result
36struct Regex_String_Pair 36struct Regex_String_Pair
37{ 37{
38 char *regex; 38 char *regex;
39 char *string; 39 int string_count;
40 enum Match_Result expected_result; 40 char **strings;
41 enum Match_Result *expected_results;
41}; 42};
42 43
43
44int 44int
45test_automaton (struct GNUNET_REGEX_Automaton *a, struct Regex_String_Pair *rxstr) 45test_random (unsigned int rx_length, unsigned int max_str_len, unsigned int str_count)
46{ 46{
47 int i;
48 int rx_exp;
49 char rand_rx[rx_length+1];
50 char matching_str[str_count][max_str_len+1];
51 char *rand_rxp;
52 char *matching_strp;
53 int char_op_switch;
54 int last_was_op;
55 char current_char;
56 int eval;
57 int eval_check;
58 struct GNUNET_REGEX_Automaton *dfa;
47 regex_t rx; 59 regex_t rx;
60 regmatch_t matchptr[1];
61 int char_offset;
62 char error[200];
63 int result;
64
65 // At least one string is needed for matching
66 GNUNET_assert (str_count > 0);
67 // The string should be at least as long as the regex itself
68 GNUNET_assert (max_str_len >= rx_length);
69
70 rand_rxp = rand_rx;
71 matching_strp = matching_str[0];
72
73 // Generate random regex and a string that matches the regex
74 for (i=0; i<rx_length; i++)
75 {
76 char_op_switch = 0 + (int)(1.0 * rand() / (RAND_MAX + 1.0));
77 char_offset = (rand()%2) ? 65 : 97;
78
79 if (0 == char_op_switch
80 && !last_was_op)
81 {
82 last_was_op = 1;
83 rx_exp = rand () % 3;
84
85 switch (rx_exp)
86 {
87 case 0:
88 current_char = '+';
89 break;
90 case 1:
91 current_char = '*';
92 break;
93 case 2:
94 if (i < rx_length -1)
95 current_char = '|';
96 else
97 current_char = (char)(char_offset + (int)( 25.0 * rand() / (RAND_MAX + 1.0)));
98 break;
99 }
100 }
101 else
102 {
103 current_char = (char)(char_offset + (int)( 25.0 * rand() / (RAND_MAX + 1.0)));
104 last_was_op = 0;
105 }
106
107 if (current_char != '+'
108 && current_char != '*'
109 && current_char != '|')
110 {
111 *matching_strp = current_char;
112 matching_strp++;
113 }
114
115 *rand_rxp = current_char;
116 rand_rxp++;
117 }
118 *rand_rxp = '\0';
119 *matching_strp = '\0';
120
121 result = 0;
122
123 for (i=0; i<str_count; i++)
124 {
125 // Match string using DFA
126 dfa = GNUNET_REGEX_construct_dfa (rand_rx, strlen (rand_rx));
127 if (NULL == dfa)
128 {
129 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Constructing DFA failed\n");
130 return -1;
131 }
132
133 eval = GNUNET_REGEX_eval (dfa, matching_str[i]);
134 GNUNET_REGEX_automaton_destroy (dfa);
135
136 // Match string using glibc regex
137 if (0 != regcomp (&rx, rand_rx, REG_EXTENDED))
138 {
139 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not compile regex using regcomp\n");
140 return -1;
141 }
142
143 eval_check = regexec (&rx, matching_str[i], 1, matchptr, 0);
144
145 // We only want to match the whole string, because that's what our DFA does, too.
146 if (eval_check == 0 && (matchptr[0].rm_so != 0 || matchptr[0].rm_eo != strlen (matching_str[i])))
147 eval_check = 1;
148
149 // compare result
150 if (eval_check != eval)
151 {
152 regerror (eval_check, &rx, error, sizeof error);
153 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
154 "Unexpected result:\nregex: %s\nstring: %s\ngnunet regex: %i\nglibc regex: %i\nglibc error: %s\n\n",
155 rand_rx, matching_str, eval, eval_check, error);
156 result += 1;
157 }
158 }
159 return result;
160}
161
162int
163test_automaton (struct GNUNET_REGEX_Automaton *a, regex_t *rx, struct Regex_String_Pair *rxstr)
164{
48 int result; 165 int result;
49 int eval; 166 int eval;
50 int eval_check; 167 int eval_check;
168 char error[200];
169 int i;
51 170
52 if (NULL == a) 171 if (NULL == a)
172 {
173 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Automaton was NULL\n");
53 return 1; 174 return 1;
175 }
54 176
55 result = 0; 177 result = 0;
56 178
57 eval = GNUNET_REGEX_eval (a, rxstr->string); 179 for (i=0; i<rxstr->string_count; i++)
58 regcomp (&rx, rxstr->regex, REG_EXTENDED);
59 eval_check = regexec (&rx, rxstr->string, 0, NULL, 0);
60
61 if ((rxstr->expected_result == match
62 && (0 != eval || 0 != eval_check))
63 ||
64 (rxstr->expected_result == nomatch
65 && (0 == eval || 0 == eval_check)))
66 { 180 {
67 result = 1; 181 eval = GNUNET_REGEX_eval (a, rxstr->strings[i]);
68 char error[200]; 182 eval_check = regexec (rx, rxstr->strings[i], 0, NULL, 0);
69 regerror (eval_check, &rx, error, sizeof error);
70 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
71 "Unexpected result:\nregex: %s\nstring: %s\nexpected result: %i\ngnunet regex: %i\nglibc regex: %i\nglibc error: %s\n\n",
72 rxstr->regex, rxstr->string, rxstr->expected_result, eval, eval_check, error);
73 }
74
75 regfree (&rx);
76 183
184 if ((rxstr->expected_results[i] == match
185 && (0 != eval || 0 != eval_check))
186 ||
187 (rxstr->expected_results[i] == nomatch
188 && (0 == eval || 0 == eval_check)))
189 {
190 result = 1;
191 regerror (eval_check, rx, error, sizeof error);
192 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
193 "Unexpected result:\nregex: %s\nstring: %s\nexpected result: %i\ngnunet regex: %i\nglibc regex: %i\nglibc error: %s\n\n",
194 rxstr->regex, rxstr->strings[i], rxstr->expected_results[i], eval, eval_check, error);
195 }
196 }
77 return result; 197 return result;
78} 198}
79 199
@@ -90,34 +210,55 @@ main (int argc, char *argv[])
90 210
91 int check_nfa; 211 int check_nfa;
92 int check_dfa; 212 int check_dfa;
213 int check_rand;
93 struct Regex_String_Pair rxstr[3]; 214 struct Regex_String_Pair rxstr[3];
94 struct GNUNET_REGEX_Automaton *a; 215 struct GNUNET_REGEX_Automaton *a;
216 regex_t rx;
95 int i; 217 int i;
96 218
97 rxstr[0].regex = "ab(c|d)+c*(a(b|c)d)+"; 219 check_nfa = 0;
98 rxstr[0].string = "abcdcdcdcdddddabd"; 220 check_dfa = 0;
99 rxstr[0].expected_result = match; 221 check_rand = 0;
100 222
101 rxstr[1].regex = "a*"; 223 rxstr[0].regex = "ab(c|d)+c*(a(b|c)d)+";
102 rxstr[1].string = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"; 224 rxstr[0].string_count = 5;
103 rxstr[1].expected_result = match; 225 rxstr[0].strings = GNUNET_malloc (sizeof (char *) * rxstr[0].string_count);
104 226 rxstr[0].strings[0] = "abcdcdcdcdddddabd";
105 rxstr[2].regex = "a*b*c*d+"; 227 rxstr[0].strings[1] = "abcd";
106 rxstr[2].string = "a"; 228 rxstr[0].strings[2] = "abcddddddccccccccccccccccccccccccabdacdabd";
107 rxstr[2].expected_result = nomatch; 229 rxstr[0].strings[3] = "abccccca";
230 rxstr[0].strings[4] = "abcdcdcdccdabdabd";
231 rxstr[0].expected_results = GNUNET_malloc (sizeof (enum Match_Result) * rxstr[0].string_count);
232 rxstr[0].expected_results[0] = match;
233 rxstr[0].expected_results[1] = nomatch;
234 rxstr[0].expected_results[2] = match;
235 rxstr[0].expected_results[3] = nomatch;
236 rxstr[0].expected_results[4] = match;
108 237
109 for (i=0; i<3; i++) 238 for (i=0; i<1; i++)
110 { 239 {
240 if (0 != regcomp (&rx, rxstr->regex, REG_EXTENDED | REG_NOSUB))
241 {
242 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, "Could not compile regex using regcomp()\n");
243 return 1;
244 }
245
111 // NFA test 246 // NFA test
112 a = GNUNET_REGEX_construct_nfa (rxstr[i].regex, strlen (rxstr[i].regex)); 247 a = GNUNET_REGEX_construct_nfa (rxstr[i].regex, strlen (rxstr[i].regex));
113 check_nfa += test_automaton (a, &rxstr[i]); 248 check_nfa += test_automaton (a, &rx, &rxstr[i]);
114 GNUNET_REGEX_automaton_destroy (a); 249 GNUNET_REGEX_automaton_destroy (a);
115 250
116 // DFA test 251 // DFA test
117 a = GNUNET_REGEX_construct_dfa (rxstr[i].regex, strlen (rxstr[i].regex)); 252 a = GNUNET_REGEX_construct_dfa (rxstr[i].regex, strlen (rxstr[i].regex));
118 check_dfa += test_automaton (a, &rxstr[i]); 253 check_dfa += test_automaton (a, &rx, &rxstr[i]);
119 GNUNET_REGEX_automaton_destroy (a); 254 GNUNET_REGEX_automaton_destroy (a);
255
256 regfree (&rx);
120 } 257 }
121 258
122 return check_nfa + check_dfa; 259 srand (time(NULL));
260 for (i=0; i< 100; i++)
261 check_rand += test_random (100, 100, 1);
262
263 return check_nfa + check_dfa + check_rand;
123} 264}