diff options
author | Maximilian Szengel <gnunet@maxsz.de> | 2012-06-27 16:13:48 +0000 |
---|---|---|
committer | Maximilian Szengel <gnunet@maxsz.de> | 2012-06-27 16:13:48 +0000 |
commit | 24f2c9d570bd181c622955506f6ecc000d5b2a98 (patch) | |
tree | 9bfb6f07aae4ce7e6df353becbb371fb63c24651 /src | |
parent | 039c0a8e1c193c692c4f492d75cc2b98643203ef (diff) | |
download | gnunet-24f2c9d570bd181c622955506f6ecc000d5b2a98.tar.gz gnunet-24f2c9d570bd181c622955506f6ecc000d5b2a98.zip |
new and improved tests
Diffstat (limited to 'src')
-rw-r--r-- | src/include/gnunet_regex_lib.h | 22 | ||||
-rw-r--r-- | src/regex/Makefile.am | 3 | ||||
-rw-r--r-- | src/regex/regex.c | 17 | ||||
-rw-r--r-- | src/regex/regex_internal.h | 96 | ||||
-rw-r--r-- | src/regex/regex_random.c | 170 | ||||
-rw-r--r-- | src/regex/test_regex_eval_api.c | 102 | ||||
-rw-r--r-- | src/regex/test_regex_proofs.c | 156 |
7 files changed, 425 insertions, 141 deletions
diff --git a/src/include/gnunet_regex_lib.h b/src/include/gnunet_regex_lib.h index 64a370df3..911128647 100644 --- a/src/include/gnunet_regex_lib.h +++ b/src/include/gnunet_regex_lib.h | |||
@@ -42,6 +42,7 @@ extern "C" | |||
42 | */ | 42 | */ |
43 | struct GNUNET_REGEX_Automaton; | 43 | struct GNUNET_REGEX_Automaton; |
44 | 44 | ||
45 | |||
45 | /** | 46 | /** |
46 | * Edge representation. | 47 | * Edge representation. |
47 | */ | 48 | */ |
@@ -58,6 +59,7 @@ struct GNUNET_REGEX_Edge | |||
58 | struct GNUNET_HashCode destination; | 59 | struct GNUNET_HashCode destination; |
59 | }; | 60 | }; |
60 | 61 | ||
62 | |||
61 | /** | 63 | /** |
62 | * Construct an NFA by parsing the regex string of length 'len'. | 64 | * Construct an NFA by parsing the regex string of length 'len'. |
63 | * | 65 | * |
@@ -69,6 +71,7 @@ struct GNUNET_REGEX_Edge | |||
69 | struct GNUNET_REGEX_Automaton * | 71 | struct GNUNET_REGEX_Automaton * |
70 | GNUNET_REGEX_construct_nfa (const char *regex, const size_t len); | 72 | GNUNET_REGEX_construct_nfa (const char *regex, const size_t len); |
71 | 73 | ||
74 | |||
72 | /** | 75 | /** |
73 | * Construct DFA for the given 'regex' of length 'len'. | 76 | * Construct DFA for the given 'regex' of length 'len'. |
74 | * | 77 | * |
@@ -80,6 +83,7 @@ GNUNET_REGEX_construct_nfa (const char *regex, const size_t len); | |||
80 | struct GNUNET_REGEX_Automaton * | 83 | struct GNUNET_REGEX_Automaton * |
81 | GNUNET_REGEX_construct_dfa (const char *regex, const size_t len); | 84 | GNUNET_REGEX_construct_dfa (const char *regex, const size_t len); |
82 | 85 | ||
86 | |||
83 | /** | 87 | /** |
84 | * Free the memory allocated by constructing the GNUNET_REGEX_Automaton. | 88 | * Free the memory allocated by constructing the GNUNET_REGEX_Automaton. |
85 | * data structure. | 89 | * data structure. |
@@ -89,6 +93,7 @@ GNUNET_REGEX_construct_dfa (const char *regex, const size_t len); | |||
89 | void | 93 | void |
90 | GNUNET_REGEX_automaton_destroy (struct GNUNET_REGEX_Automaton *a); | 94 | GNUNET_REGEX_automaton_destroy (struct GNUNET_REGEX_Automaton *a); |
91 | 95 | ||
96 | |||
92 | /** | 97 | /** |
93 | * Save the given automaton as a GraphViz dot file. | 98 | * Save the given automaton as a GraphViz dot file. |
94 | * | 99 | * |
@@ -111,19 +116,6 @@ int | |||
111 | GNUNET_REGEX_eval (struct GNUNET_REGEX_Automaton *a, | 116 | GNUNET_REGEX_eval (struct GNUNET_REGEX_Automaton *a, |
112 | const char *string); | 117 | const char *string); |
113 | 118 | ||
114 | /** | ||
115 | * Get the canonical regex of the given automaton. | ||
116 | * When constructing the automaton a proof is computed for each state, | ||
117 | * consisting of the regular expression leading to this state. A complete | ||
118 | * regex for the automaton can be computed by combining these proofs. | ||
119 | * As of now this function is only useful for testing. | ||
120 | * | ||
121 | * @param a automaton for which the canonical regex should be returned. | ||
122 | * | ||
123 | * @return | ||
124 | */ | ||
125 | const char * | ||
126 | GNUNET_REGEX_get_canonical_regex (struct GNUNET_REGEX_Automaton *a); | ||
127 | 119 | ||
128 | /** | 120 | /** |
129 | * Get the first key for the given 'input_string'. This hashes | 121 | * Get the first key for the given 'input_string'. This hashes |
@@ -140,6 +132,7 @@ unsigned int /* FIXME: size_t */ | |||
140 | GNUNET_REGEX_get_first_key (const char *input_string, /* FIXME: size_t */ unsigned int string_len, | 132 | GNUNET_REGEX_get_first_key (const char *input_string, /* FIXME: size_t */ unsigned int string_len, |
141 | struct GNUNET_HashCode * key); | 133 | struct GNUNET_HashCode * key); |
142 | 134 | ||
135 | |||
143 | /** | 136 | /** |
144 | * Check if the given 'proof' matches the given 'key'. | 137 | * Check if the given 'proof' matches the given 'key'. |
145 | * | 138 | * |
@@ -152,6 +145,7 @@ int | |||
152 | GNUNET_REGEX_check_proof (const char *proof, | 145 | GNUNET_REGEX_check_proof (const char *proof, |
153 | const struct GNUNET_HashCode *key); | 146 | const struct GNUNET_HashCode *key); |
154 | 147 | ||
148 | |||
155 | /** | 149 | /** |
156 | * Iterator callback function. | 150 | * Iterator callback function. |
157 | * | 151 | * |
@@ -169,6 +163,7 @@ typedef void (*GNUNET_REGEX_KeyIterator)(void *cls, | |||
169 | unsigned int num_edges, | 163 | unsigned int num_edges, |
170 | const struct GNUNET_REGEX_Edge *edges); | 164 | const struct GNUNET_REGEX_Edge *edges); |
171 | 165 | ||
166 | |||
172 | /** | 167 | /** |
173 | * Iterate over all edges starting from start state of automaton 'a'. Calling | 168 | * Iterate over all edges starting from start state of automaton 'a'. Calling |
174 | * iterator for each edge. | 169 | * iterator for each edge. |
@@ -182,6 +177,7 @@ GNUNET_REGEX_iterate_all_edges (struct GNUNET_REGEX_Automaton *a, | |||
182 | GNUNET_REGEX_KeyIterator iterator, | 177 | GNUNET_REGEX_KeyIterator iterator, |
183 | void *iterator_cls); | 178 | void *iterator_cls); |
184 | 179 | ||
180 | |||
185 | #if 0 /* keep Emacsens' auto-indent happy */ | 181 | #if 0 /* keep Emacsens' auto-indent happy */ |
186 | { | 182 | { |
187 | #endif | 183 | #endif |
diff --git a/src/regex/Makefile.am b/src/regex/Makefile.am index cb9bc093a..1284111d8 100644 --- a/src/regex/Makefile.am +++ b/src/regex/Makefile.am | |||
@@ -11,7 +11,8 @@ endif | |||
11 | lib_LTLIBRARIES = libgnunetregex.la | 11 | lib_LTLIBRARIES = libgnunetregex.la |
12 | 12 | ||
13 | libgnunetregex_la_SOURCES = \ | 13 | libgnunetregex_la_SOURCES = \ |
14 | regex.c | 14 | regex_internal.h regex.c \ |
15 | regex_random.c | ||
15 | libgnunetregex_la_LIBADD = -lm \ | 16 | libgnunetregex_la_LIBADD = -lm \ |
16 | $(top_builddir)/src/util/libgnunetutil.la | 17 | $(top_builddir)/src/util/libgnunetutil.la |
17 | libgnunetregex_la_LDFLAGS = \ | 18 | libgnunetregex_la_LDFLAGS = \ |
diff --git a/src/regex/regex.c b/src/regex/regex.c index 411c72c08..f237334d8 100644 --- a/src/regex/regex.c +++ b/src/regex/regex.c | |||
@@ -26,7 +26,7 @@ | |||
26 | #include "gnunet_container_lib.h" | 26 | #include "gnunet_container_lib.h" |
27 | #include "gnunet_crypto_lib.h" | 27 | #include "gnunet_crypto_lib.h" |
28 | #include "gnunet_regex_lib.h" | 28 | #include "gnunet_regex_lib.h" |
29 | #include "regex.h" | 29 | #include "regex_internal.h" |
30 | 30 | ||
31 | /** | 31 | /** |
32 | * Constant for how many bits the initial string regex should have. | 32 | * Constant for how many bits the initial string regex should have. |
@@ -1078,12 +1078,6 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a) | |||
1078 | GNUNET_asprintf (&R_last[i][j], "%s|%c", R_last[i][j], t->label); | 1078 | GNUNET_asprintf (&R_last[i][j], "%s|%c", R_last[i][j], t->label); |
1079 | GNUNET_free (temp_a); | 1079 | GNUNET_free (temp_a); |
1080 | } | 1080 | } |
1081 | if (GNUNET_YES == needs_parentheses (R_last[i][j])) | ||
1082 | { | ||
1083 | temp_a = R_last[i][j]; | ||
1084 | GNUNET_asprintf (&R_last[i][j], "(%s)", R_last[i][j]); | ||
1085 | GNUNET_free (temp_a); | ||
1086 | } | ||
1087 | } | 1081 | } |
1088 | if (NULL == R_last[i][i]) | 1082 | if (NULL == R_last[i][i]) |
1089 | GNUNET_asprintf (&R_last[i][i], ""); | 1083 | GNUNET_asprintf (&R_last[i][i], ""); |
@@ -1094,7 +1088,16 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a) | |||
1094 | GNUNET_free (temp_a); | 1088 | GNUNET_free (temp_a); |
1095 | } | 1089 | } |
1096 | } | 1090 | } |
1091 | for (i = 0; i < n; i++) | ||
1092 | for (j = 0; j < n; j++) | ||
1093 | if (needs_parentheses (R_last[i][j])) | ||
1094 | { | ||
1095 | temp_a = R_last[i][j]; | ||
1096 | GNUNET_asprintf (&R_last[i][j], "(%s)", R_last[i][j]); | ||
1097 | GNUNET_free (temp_a); | ||
1098 | } | ||
1097 | 1099 | ||
1100 | // TODO: clean up and fix the induction part | ||
1098 | 1101 | ||
1099 | // INDUCTION | 1102 | // INDUCTION |
1100 | for (k = 0; k < n; k++) | 1103 | for (k = 0; k < n; k++) |
diff --git a/src/regex/regex_internal.h b/src/regex/regex_internal.h new file mode 100644 index 000000000..8ea597d40 --- /dev/null +++ b/src/regex/regex_internal.h | |||
@@ -0,0 +1,96 @@ | |||
1 | /* | ||
2 | This file is part of GNUnet | ||
3 | (C) 2012 Christian Grothoff (and other contributing authors) | ||
4 | |||
5 | GNUnet is free software; you can redistribute it and/or modify | ||
6 | it under the terms of the GNU General Public License as published | ||
7 | by the Free Software Foundation; either version 3, or (at your | ||
8 | option) any later version. | ||
9 | |||
10 | GNUnet is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU General Public License | ||
16 | along with GNUnet; see the file COPYING. If not, write to the | ||
17 | Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
18 | Boston, MA 02111-1307, USA. | ||
19 | */ | ||
20 | /** | ||
21 | * @file src/regex/regex_internal.h | ||
22 | * @brief common internal definitions for regex library | ||
23 | * @author Maximilian Szengel | ||
24 | */ | ||
25 | #ifndef REGEX_INTERNAL_H | ||
26 | #define REGEX_INTERNAL_H | ||
27 | |||
28 | #include "gnunet_regex_lib.h" | ||
29 | |||
30 | #ifdef __cplusplus | ||
31 | extern "C" | ||
32 | { | ||
33 | #if 0 /* keep Emacsens' auto-indent happy */ | ||
34 | } | ||
35 | #endif | ||
36 | #endif | ||
37 | |||
38 | /** | ||
39 | * char array of literals that are allowed inside a regex (apart from the | ||
40 | * operators) | ||
41 | */ | ||
42 | #define ALLOWED_LITERALS "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" | ||
43 | |||
44 | |||
45 | /** | ||
46 | * Get the canonical regex of the given automaton. | ||
47 | * When constructing the automaton a proof is computed for each state, | ||
48 | * consisting of the regular expression leading to this state. A complete | ||
49 | * regex for the automaton can be computed by combining these proofs. | ||
50 | * As of now this function is only useful for testing. | ||
51 | * | ||
52 | * @param a automaton for which the canonical regex should be returned. | ||
53 | * | ||
54 | * @return | ||
55 | */ | ||
56 | const char * | ||
57 | GNUNET_REGEX_get_canonical_regex (struct GNUNET_REGEX_Automaton *a); | ||
58 | |||
59 | |||
60 | /** | ||
61 | * Generate a (pseudo) random regular expression of length 'rx_length', as well | ||
62 | * as a (optional) string that will be matched by the generated regex. The | ||
63 | * returned regex needs to be freed. | ||
64 | * | ||
65 | * @param rx_length length of the random regex. | ||
66 | * @param matching_str (optional) pointer to a string that will contain a string | ||
67 | * that will be matched by the generated regex, if | ||
68 | * 'matching_str' pointer was not NULL. | ||
69 | * | ||
70 | * @return NULL if 'rx_length' is 0, a random regex of length 'rx_length', which | ||
71 | * needs to be freed, otherwise. | ||
72 | */ | ||
73 | char * | ||
74 | GNUNET_REGEX_generate_random_regex (size_t rx_length, char *matching_str); | ||
75 | |||
76 | |||
77 | /** | ||
78 | * Generate a random string of maximum length 'max_len' that only contains literals allowed | ||
79 | * in a regular expression. The string might be 0 chars long but is garantueed | ||
80 | * to be shorter or equal to 'max_len'. | ||
81 | * | ||
82 | * @param max_len maximum length of the string that should be generated. | ||
83 | * | ||
84 | * @return random string that needs to be freed. | ||
85 | */ | ||
86 | char * | ||
87 | GNUNET_REGEX_generate_random_string (size_t max_len); | ||
88 | |||
89 | #if 0 /* keep Emacsens' auto-indent happy */ | ||
90 | { | ||
91 | #endif | ||
92 | #ifdef __cplusplus | ||
93 | } | ||
94 | #endif | ||
95 | |||
96 | #endif | ||
diff --git a/src/regex/regex_random.c b/src/regex/regex_random.c new file mode 100644 index 000000000..3af9b7c5a --- /dev/null +++ b/src/regex/regex_random.c | |||
@@ -0,0 +1,170 @@ | |||
1 | /* | ||
2 | This file is part of GNUnet | ||
3 | (C) 2012 Christian Grothoff (and other contributing authors) | ||
4 | |||
5 | GNUnet is free software; you can redistribute it and/or modify | ||
6 | it under the terms of the GNU General Public License as published | ||
7 | by the Free Software Foundation; either version 3, or (at your | ||
8 | option) any later version. | ||
9 | |||
10 | GNUnet is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU General Public License | ||
16 | along with GNUnet; see the file COPYING. If not, write to the | ||
17 | Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
18 | Boston, MA 02111-1307, USA. | ||
19 | */ | ||
20 | /** | ||
21 | * @file src/regex/regex_random.c | ||
22 | * @brief functions for creating random regular expressions and strings | ||
23 | * @author Maximilian Szengel | ||
24 | */ | ||
25 | #include "platform.h" | ||
26 | #include "gnunet_regex_lib.h" | ||
27 | #include "gnunet_crypto_lib.h" | ||
28 | #include "regex_internal.h" | ||
29 | |||
30 | |||
31 | /** | ||
32 | * Get a (pseudo) random valid literal for building a regular expression. | ||
33 | * | ||
34 | * @return random valid literal | ||
35 | */ | ||
36 | char | ||
37 | get_random_literal () | ||
38 | { | ||
39 | uint32_t ridx; | ||
40 | |||
41 | ridx = | ||
42 | GNUNET_CRYPTO_random_u32 (GNUNET_CRYPTO_QUALITY_WEAK, | ||
43 | (uint32_t) strlen (ALLOWED_LITERALS)); | ||
44 | |||
45 | return ALLOWED_LITERALS[ridx]; | ||
46 | } | ||
47 | |||
48 | |||
49 | /** | ||
50 | * Generate a (pseudo) random regular expression of length 'rx_length', as well | ||
51 | * as a (optional) string that will be matched by the generated regex. The | ||
52 | * returned regex needs to be freed. | ||
53 | * | ||
54 | * @param rx_length length of the random regex. | ||
55 | * @param matching_str (optional) pointer to a string that will contain a string | ||
56 | * that will be matched by the generated regex, if | ||
57 | * 'matching_str' pointer was not NULL. Make sure you | ||
58 | * allocated at least rx_length+1 bytes for this sting. | ||
59 | * | ||
60 | * @return NULL if 'rx_length' is 0, a random regex of length 'rx_length', which | ||
61 | * needs to be freed, otherwise. | ||
62 | */ | ||
63 | char * | ||
64 | GNUNET_REGEX_generate_random_regex (size_t rx_length, char *matching_str) | ||
65 | { | ||
66 | char *rx; | ||
67 | char *rx_p; | ||
68 | char *matching_strp; | ||
69 | unsigned int i; | ||
70 | unsigned int char_op_switch; | ||
71 | unsigned int last_was_op; | ||
72 | int rx_op; | ||
73 | char current_char; | ||
74 | |||
75 | if (0 == rx_length) | ||
76 | return NULL; | ||
77 | |||
78 | if (NULL != matching_str) | ||
79 | matching_strp = matching_str; | ||
80 | else | ||
81 | matching_strp = NULL; | ||
82 | |||
83 | rx = GNUNET_malloc (rx_length + 1); | ||
84 | rx_p = rx; | ||
85 | current_char = 0; | ||
86 | last_was_op = 1; | ||
87 | |||
88 | for (i = 0; i < rx_length; i++) | ||
89 | { | ||
90 | char_op_switch = GNUNET_CRYPTO_random_u32 (GNUNET_CRYPTO_QUALITY_WEAK, 2); | ||
91 | |||
92 | if (0 == char_op_switch && !last_was_op) | ||
93 | { | ||
94 | last_was_op = 1; | ||
95 | rx_op = GNUNET_CRYPTO_random_u32 (GNUNET_CRYPTO_QUALITY_WEAK, 4); | ||
96 | |||
97 | switch (rx_op) | ||
98 | { | ||
99 | case 0: | ||
100 | current_char = '+'; | ||
101 | break; | ||
102 | case 1: | ||
103 | current_char = '*'; | ||
104 | break; | ||
105 | case 2: | ||
106 | current_char = '?'; | ||
107 | break; | ||
108 | case 3: | ||
109 | if (i < rx_length - 1) // '|' cannot be at the end | ||
110 | current_char = '|'; | ||
111 | else | ||
112 | current_char = get_random_literal (); | ||
113 | break; | ||
114 | } | ||
115 | } | ||
116 | else | ||
117 | { | ||
118 | current_char = get_random_literal (); | ||
119 | last_was_op = 0; | ||
120 | } | ||
121 | |||
122 | if (NULL != matching_strp && | ||
123 | (current_char != '+' && current_char != '*' && current_char != '?' && | ||
124 | current_char != '|')) | ||
125 | { | ||
126 | *matching_strp = current_char; | ||
127 | matching_strp++; | ||
128 | } | ||
129 | |||
130 | *rx_p = current_char; | ||
131 | rx_p++; | ||
132 | } | ||
133 | *rx_p = '\0'; | ||
134 | if (NULL != matching_strp) | ||
135 | *matching_strp = '\0'; | ||
136 | |||
137 | return rx; | ||
138 | } | ||
139 | |||
140 | /** | ||
141 | * Generate a random string of maximum length 'max_len' that only contains literals allowed | ||
142 | * in a regular expression. The string might be 0 chars long but is garantueed | ||
143 | * to be shorter or equal to 'max_len'. | ||
144 | * | ||
145 | * @param max_len maximum length of the string that should be generated. | ||
146 | * | ||
147 | * @return random string that needs to be freed. | ||
148 | */ | ||
149 | char * | ||
150 | GNUNET_REGEX_generate_random_string (size_t max_len) | ||
151 | { | ||
152 | unsigned int i; | ||
153 | char *str; | ||
154 | size_t len; | ||
155 | |||
156 | if (1 > max_len) | ||
157 | return GNUNET_strdup (""); | ||
158 | |||
159 | len = (size_t) GNUNET_CRYPTO_random_u32 (GNUNET_CRYPTO_QUALITY_WEAK, max_len); | ||
160 | str = GNUNET_malloc (len + 1); | ||
161 | |||
162 | for (i = 0; i < len; i++) | ||
163 | { | ||
164 | str[i] = get_random_literal (); | ||
165 | } | ||
166 | |||
167 | str[i] = '\0'; | ||
168 | |||
169 | return str; | ||
170 | } | ||
diff --git a/src/regex/test_regex_eval_api.c b/src/regex/test_regex_eval_api.c index b6cdbe100..6d575a05c 100644 --- a/src/regex/test_regex_eval_api.c +++ b/src/regex/test_regex_eval_api.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <time.h> | 26 | #include <time.h> |
27 | #include "platform.h" | 27 | #include "platform.h" |
28 | #include "gnunet_regex_lib.h" | 28 | #include "gnunet_regex_lib.h" |
29 | #include "regex_internal.h" | ||
29 | 30 | ||
30 | enum Match_Result | 31 | enum Match_Result |
31 | { | 32 | { |
@@ -41,8 +42,6 @@ struct Regex_String_Pair | |||
41 | enum Match_Result expected_results[20]; | 42 | enum Match_Result expected_results[20]; |
42 | }; | 43 | }; |
43 | 44 | ||
44 | static const char allowed_literals[] = | ||
45 | "0123456789" "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "abcdefghijklmnopqrstuvwxyz"; | ||
46 | 45 | ||
47 | /** | 46 | /** |
48 | * Random regex test. Generate a random regex as well as 'str_count' strings to | 47 | * Random regex test. Generate a random regex as well as 'str_count' strings to |
@@ -60,15 +59,8 @@ test_random (unsigned int rx_length, unsigned int max_str_len, | |||
60 | unsigned int str_count) | 59 | unsigned int str_count) |
61 | { | 60 | { |
62 | int i; | 61 | int i; |
63 | int j; | 62 | char *rand_rx; |
64 | int rx_exp; | 63 | char *matching_str; |
65 | char rand_rx[rx_length + 1]; | ||
66 | char matching_str[str_count][max_str_len + 1]; | ||
67 | char *rand_rxp; | ||
68 | char *matching_strp; | ||
69 | int char_op_switch; | ||
70 | int last_was_op; | ||
71 | char current_char; | ||
72 | int eval; | 64 | int eval; |
73 | int eval_check; | 65 | int eval_check; |
74 | int eval_canonical; | 66 | int eval_canonical; |
@@ -77,7 +69,7 @@ test_random (unsigned int rx_length, unsigned int max_str_len, | |||
77 | regmatch_t matchptr[1]; | 69 | regmatch_t matchptr[1]; |
78 | char error[200]; | 70 | char error[200]; |
79 | int result; | 71 | int result; |
80 | unsigned int str_len; | 72 | size_t str_len; |
81 | char *canonical_regex; | 73 | char *canonical_regex; |
82 | 74 | ||
83 | // At least one string is needed for matching | 75 | // At least one string is needed for matching |
@@ -85,76 +77,20 @@ test_random (unsigned int rx_length, unsigned int max_str_len, | |||
85 | // The string should be at least as long as the regex itself | 77 | // The string should be at least as long as the regex itself |
86 | GNUNET_assert (max_str_len >= rx_length); | 78 | GNUNET_assert (max_str_len >= rx_length); |
87 | 79 | ||
88 | rand_rxp = rand_rx; | ||
89 | matching_strp = matching_str[0]; | ||
90 | current_char = 0; | ||
91 | last_was_op = 1; | ||
92 | |||
93 | // Generate random regex and a string that matches the regex | 80 | // Generate random regex and a string that matches the regex |
94 | for (i = 0; i < rx_length; i++) | 81 | matching_str = GNUNET_malloc (rx_length + 1); |
95 | { | 82 | rand_rx = GNUNET_REGEX_generate_random_regex (rx_length, matching_str); |
96 | char_op_switch = 0 + (int) (1.0 * rand () / (RAND_MAX + 1.0)); | ||
97 | |||
98 | if (0 == char_op_switch && !last_was_op) | ||
99 | { | ||
100 | last_was_op = 1; | ||
101 | rx_exp = rand () % 4; | ||
102 | |||
103 | switch (rx_exp) | ||
104 | { | ||
105 | case 0: | ||
106 | current_char = '+'; | ||
107 | break; | ||
108 | case 1: | ||
109 | current_char = '*'; | ||
110 | break; | ||
111 | case 2: | ||
112 | current_char = '?'; | ||
113 | break; | ||
114 | case 3: | ||
115 | if (i < rx_length - 1) // '|' cannot be at the end | ||
116 | current_char = '|'; | ||
117 | else | ||
118 | current_char = | ||
119 | allowed_literals[rand () % (sizeof (allowed_literals) - 1)]; | ||
120 | break; | ||
121 | } | ||
122 | } | ||
123 | else | ||
124 | { | ||
125 | current_char = | ||
126 | allowed_literals[rand () % (sizeof (allowed_literals) - 1)]; | ||
127 | last_was_op = 0; | ||
128 | } | ||
129 | |||
130 | if (current_char != '+' && current_char != '*' && current_char != '?' && | ||
131 | current_char != '|') | ||
132 | { | ||
133 | *matching_strp = current_char; | ||
134 | matching_strp++; | ||
135 | } | ||
136 | |||
137 | *rand_rxp = current_char; | ||
138 | rand_rxp++; | ||
139 | } | ||
140 | *rand_rxp = '\0'; | ||
141 | *matching_strp = '\0'; | ||
142 | |||
143 | // Generate some random strings for matching... | ||
144 | // Start at 1, because the first string is generated above during regex generation | ||
145 | for (i = 1; i < str_count; i++) | ||
146 | { | ||
147 | str_len = rand () % max_str_len; | ||
148 | for (j = 0; j < str_len; j++) | ||
149 | matching_str[i][j] = | ||
150 | allowed_literals[rand () % (sizeof (allowed_literals) - 1)]; | ||
151 | matching_str[i][str_len] = '\0'; | ||
152 | } | ||
153 | 83 | ||
154 | // Now match | 84 | // Now match |
155 | result = 0; | 85 | result = 0; |
156 | for (i = 0; i < str_count; i++) | 86 | for (i = 0; i < str_count; i++) |
157 | { | 87 | { |
88 | if (0 < i) | ||
89 | { | ||
90 | matching_str = GNUNET_REGEX_generate_random_string (max_str_len); | ||
91 | str_len = strlen (matching_str); | ||
92 | } | ||
93 | |||
158 | // Match string using DFA | 94 | // Match string using DFA |
159 | dfa = GNUNET_REGEX_construct_dfa (rand_rx, strlen (rand_rx)); | 95 | dfa = GNUNET_REGEX_construct_dfa (rand_rx, strlen (rand_rx)); |
160 | if (NULL == dfa) | 96 | if (NULL == dfa) |
@@ -163,7 +99,7 @@ test_random (unsigned int rx_length, unsigned int max_str_len, | |||
163 | return -1; | 99 | return -1; |
164 | } | 100 | } |
165 | 101 | ||
166 | eval = GNUNET_REGEX_eval (dfa, matching_str[i]); | 102 | eval = GNUNET_REGEX_eval (dfa, matching_str); |
167 | canonical_regex = GNUNET_strdup (GNUNET_REGEX_get_canonical_regex (dfa)); | 103 | canonical_regex = GNUNET_strdup (GNUNET_REGEX_get_canonical_regex (dfa)); |
168 | GNUNET_REGEX_automaton_destroy (dfa); | 104 | GNUNET_REGEX_automaton_destroy (dfa); |
169 | 105 | ||
@@ -175,7 +111,7 @@ test_random (unsigned int rx_length, unsigned int max_str_len, | |||
175 | return -1; | 111 | return -1; |
176 | } | 112 | } |
177 | 113 | ||
178 | eval_check = regexec (&rx, matching_str[i], 1, matchptr, 0); | 114 | eval_check = regexec (&rx, matching_str, 1, matchptr, 0); |
179 | regfree (&rx); | 115 | regfree (&rx); |
180 | 116 | ||
181 | // Match canonical regex | 117 | // Match canonical regex |
@@ -187,14 +123,13 @@ test_random (unsigned int rx_length, unsigned int max_str_len, | |||
187 | return -1; | 123 | return -1; |
188 | } | 124 | } |
189 | 125 | ||
190 | eval_canonical = regexec (&rx, matching_str[i], 1, matchptr, 0); | 126 | eval_canonical = regexec (&rx, matching_str, 1, matchptr, 0); |
191 | regfree (&rx); | 127 | regfree (&rx); |
192 | GNUNET_free (canonical_regex); | 128 | GNUNET_free (canonical_regex); |
193 | 129 | ||
194 | // We only want to match the whole string, because that's what our DFA does, too. | 130 | // We only want to match the whole string, because that's what our DFA does, too. |
195 | if (eval_check == 0 && | 131 | if (eval_check == 0 && |
196 | (matchptr[0].rm_so != 0 || | 132 | (matchptr[0].rm_so != 0 || matchptr[0].rm_eo != strlen (matching_str))) |
197 | matchptr[0].rm_eo != strlen (matching_str[i]))) | ||
198 | eval_check = 1; | 133 | eval_check = 1; |
199 | 134 | ||
200 | // compare result | 135 | // compare result |
@@ -206,7 +141,12 @@ test_random (unsigned int rx_length, unsigned int max_str_len, | |||
206 | rand_rx, matching_str, eval, eval_check, error); | 141 | rand_rx, matching_str, eval, eval_check, error); |
207 | result += 1; | 142 | result += 1; |
208 | } | 143 | } |
144 | |||
145 | GNUNET_free (matching_str); | ||
209 | } | 146 | } |
147 | |||
148 | GNUNET_free (rand_rx); | ||
149 | |||
210 | return result; | 150 | return result; |
211 | } | 151 | } |
212 | 152 | ||
diff --git a/src/regex/test_regex_proofs.c b/src/regex/test_regex_proofs.c index 5d0aabd00..85fc3079d 100644 --- a/src/regex/test_regex_proofs.c +++ b/src/regex/test_regex_proofs.c | |||
@@ -22,68 +22,146 @@ | |||
22 | * @brief test for regex.c | 22 | * @brief test for regex.c |
23 | * @author Maximilian Szengel | 23 | * @author Maximilian Szengel |
24 | */ | 24 | */ |
25 | #include <regex.h> | ||
26 | #include <time.h> | ||
27 | #include "platform.h" | 25 | #include "platform.h" |
28 | #include "gnunet_regex_lib.h" | 26 | #include "gnunet_regex_lib.h" |
27 | #include "regex_internal.h" | ||
29 | 28 | ||
30 | int | 29 | |
31 | main (int argc, char *argv[]) | 30 | /** |
31 | * Test if the given regex's canonical regex is the same as this canonical | ||
32 | * regex's canonical regex. Confused? Ok, then: 1. construct a dfa A from the | ||
33 | * given 'regex' 2. get the canonical regex of dfa A 3. construct a dfa B from | ||
34 | * this canonical regex 3. compare the canonical regex of dfa A with the | ||
35 | * canonical regex of dfa B. | ||
36 | * | ||
37 | * @param regex regular expression used for this test (see above). | ||
38 | * | ||
39 | * @return 0 on success, 1 on failure | ||
40 | */ | ||
41 | unsigned int | ||
42 | test_proof (const char *regex) | ||
32 | { | 43 | { |
33 | GNUNET_log_setup ("test-regex", | 44 | unsigned int error; |
34 | #if VERBOSE | 45 | struct GNUNET_REGEX_Automaton *dfa; |
35 | "DEBUG", | 46 | char *c_rx1; |
36 | #else | 47 | const char *c_rx2; |
37 | "WARNING", | ||
38 | #endif | ||
39 | NULL); | ||
40 | 48 | ||
41 | int error; | 49 | dfa = GNUNET_REGEX_construct_dfa (regex, strlen (regex)); |
42 | int i; | 50 | c_rx1 = GNUNET_strdup (GNUNET_REGEX_get_canonical_regex (dfa)); |
43 | 51 | GNUNET_REGEX_automaton_destroy (dfa); | |
44 | const char *regex[21] = { | 52 | dfa = GNUNET_REGEX_construct_dfa (c_rx1, strlen (c_rx1)); |
45 | "ab(c|d)+c*(a(b|c)+d)+(bla)+", | 53 | c_rx2 = GNUNET_REGEX_get_canonical_regex (dfa); |
46 | "(bla)*", | 54 | |
47 | "b(lab)*la", | 55 | error = (0 == strcmp (c_rx1, c_rx2)) ? 0 : 1; |
48 | "(ab)*", | 56 | |
49 | "ab(c|d)+c*(a(b|c)+d)+(bla)(bla)*", | 57 | if (error > 0) |
50 | "z(abc|def)?xyz", | 58 | { |
51 | "1*0(0|1)*", | 59 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, |
52 | "a+X*y+c|p|R|Z*K*y*R+w|Y*6+n+h*k*w+V*F|W*B*e*", | 60 | "Comparing canonical regex of\n%s\nfailed:\n%s\nvs.\n%s\n", |
53 | "(cd|ab)*", | 61 | regex, c_rx1, c_rx2); |
54 | "abcd:(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1):(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)", | 62 | } |
55 | "abc(1|0)*def", | 63 | |
56 | "ab|ac", | 64 | GNUNET_free (c_rx1); |
57 | "(ab)(ab)*", | 65 | GNUNET_REGEX_automaton_destroy (dfa); |
58 | "ab|cd|ef|gh", | 66 | |
59 | "a|b|c|d|e|f|g", | 67 | return error; |
60 | "(ab)|(ac)", | 68 | } |
61 | "a(b|c)", | 69 | |
62 | "a*a", | 70 | /** |
63 | "ab?(abcd)?", | 71 | * Use 'test_proof' function to randomly test the canonical regexes of 'count' |
64 | "(ab|cs|df|sdf)*", | 72 | * random expressions of length 'rx_length'. |
65 | "a|aa*a" | 73 | * |
74 | * @param count number of random regular expressions to test. | ||
75 | * @param rx_length length of the random regular expressions. | ||
76 | * | ||
77 | * @return 0 on succes, number of failures otherwise. | ||
78 | */ | ||
79 | unsigned int | ||
80 | test_proofs_random (unsigned int count, size_t rx_length) | ||
81 | { | ||
82 | unsigned int i; | ||
83 | char *rand_rx; | ||
84 | unsigned int failures; | ||
85 | |||
86 | failures = 0; | ||
87 | |||
88 | for (i = 0; i < count; i++) | ||
89 | { | ||
90 | rand_rx = GNUNET_REGEX_generate_random_regex (rx_length, NULL); | ||
91 | failures += test_proof (rand_rx); | ||
92 | GNUNET_free (rand_rx); | ||
93 | } | ||
94 | |||
95 | return failures; | ||
96 | } | ||
97 | |||
98 | /** | ||
99 | * Test a number of known examples of regexes for proper canonicalization. | ||
100 | * | ||
101 | * @return 0 on success, number of failures otherwise. | ||
102 | */ | ||
103 | unsigned int | ||
104 | test_proofs_static (void) | ||
105 | { | ||
106 | unsigned int i; | ||
107 | unsigned int error; | ||
108 | |||
109 | const char *regex[4] = { | ||
110 | "a|aa*a", | ||
111 | "a+", | ||
112 | "a*", | ||
113 | "a*a*" | ||
66 | }; | 114 | }; |
115 | |||
67 | char *canonical_regex; | 116 | char *canonical_regex; |
68 | struct GNUNET_REGEX_Automaton *dfa; | 117 | struct GNUNET_REGEX_Automaton *dfa; |
69 | 118 | ||
70 | error = 0; | 119 | error = 0; |
71 | 120 | ||
72 | for (i = 0; i < 21; i++) | 121 | for (i = 0; i < 4; i += 2) |
73 | { | 122 | { |
74 | dfa = GNUNET_REGEX_construct_dfa (regex[i], strlen (regex[i])); | 123 | dfa = GNUNET_REGEX_construct_dfa (regex[i], strlen (regex[i])); |
75 | canonical_regex = GNUNET_strdup (GNUNET_REGEX_get_canonical_regex (dfa)); | 124 | canonical_regex = GNUNET_strdup (GNUNET_REGEX_get_canonical_regex (dfa)); |
76 | GNUNET_REGEX_automaton_destroy (dfa); | 125 | GNUNET_REGEX_automaton_destroy (dfa); |
77 | 126 | ||
78 | dfa = | 127 | dfa = GNUNET_REGEX_construct_dfa (regex[i + 1], strlen (regex[i + 1])); |
79 | GNUNET_REGEX_construct_dfa (canonical_regex, strlen (canonical_regex)); | ||
80 | error += | 128 | error += |
81 | (0 == | 129 | (0 == |
82 | strcmp (canonical_regex, | 130 | strcmp (canonical_regex, |
83 | GNUNET_REGEX_get_canonical_regex (dfa))) ? 0 : 1; | 131 | GNUNET_REGEX_get_canonical_regex (dfa))) ? 0 : 1; |
132 | |||
133 | if (error > 0) | ||
134 | { | ||
135 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | ||
136 | "Comparing canonical regex of %s with %s failed.\n", regex[i], | ||
137 | regex[i + 1]); | ||
138 | } | ||
139 | |||
84 | GNUNET_free (canonical_regex); | 140 | GNUNET_free (canonical_regex); |
85 | GNUNET_REGEX_automaton_destroy (dfa); | 141 | GNUNET_REGEX_automaton_destroy (dfa); |
86 | } | 142 | } |
87 | 143 | ||
88 | return error; | 144 | return error; |
89 | } | 145 | } |
146 | |||
147 | |||
148 | int | ||
149 | main (int argc, char *argv[]) | ||
150 | { | ||
151 | GNUNET_log_setup ("test-regex", | ||
152 | #if VERBOSE | ||
153 | "DEBUG", | ||
154 | #else | ||
155 | "WARNING", | ||
156 | #endif | ||
157 | NULL); | ||
158 | |||
159 | int error; | ||
160 | |||
161 | error = 0; | ||
162 | |||
163 | error += test_proofs_static (); | ||
164 | // error += test_proofs_random (100, 10); | ||
165 | |||
166 | return error; | ||
167 | } | ||