aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMaximilian Szengel <gnunet@maxsz.de>2012-06-27 16:13:48 +0000
committerMaximilian Szengel <gnunet@maxsz.de>2012-06-27 16:13:48 +0000
commit24f2c9d570bd181c622955506f6ecc000d5b2a98 (patch)
tree9bfb6f07aae4ce7e6df353becbb371fb63c24651 /src
parent039c0a8e1c193c692c4f492d75cc2b98643203ef (diff)
downloadgnunet-24f2c9d570bd181c622955506f6ecc000d5b2a98.tar.gz
gnunet-24f2c9d570bd181c622955506f6ecc000d5b2a98.zip
new and improved tests
Diffstat (limited to 'src')
-rw-r--r--src/include/gnunet_regex_lib.h22
-rw-r--r--src/regex/Makefile.am3
-rw-r--r--src/regex/regex.c17
-rw-r--r--src/regex/regex_internal.h96
-rw-r--r--src/regex/regex_random.c170
-rw-r--r--src/regex/test_regex_eval_api.c102
-rw-r--r--src/regex/test_regex_proofs.c156
7 files changed, 425 insertions, 141 deletions
diff --git a/src/include/gnunet_regex_lib.h b/src/include/gnunet_regex_lib.h
index 64a370df3..911128647 100644
--- a/src/include/gnunet_regex_lib.h
+++ b/src/include/gnunet_regex_lib.h
@@ -42,6 +42,7 @@ extern "C"
42 */ 42 */
43struct GNUNET_REGEX_Automaton; 43struct GNUNET_REGEX_Automaton;
44 44
45
45/** 46/**
46 * Edge representation. 47 * Edge representation.
47 */ 48 */
@@ -58,6 +59,7 @@ struct GNUNET_REGEX_Edge
58 struct GNUNET_HashCode destination; 59 struct GNUNET_HashCode destination;
59}; 60};
60 61
62
61/** 63/**
62 * Construct an NFA by parsing the regex string of length 'len'. 64 * Construct an NFA by parsing the regex string of length 'len'.
63 * 65 *
@@ -69,6 +71,7 @@ struct GNUNET_REGEX_Edge
69struct GNUNET_REGEX_Automaton * 71struct GNUNET_REGEX_Automaton *
70GNUNET_REGEX_construct_nfa (const char *regex, const size_t len); 72GNUNET_REGEX_construct_nfa (const char *regex, const size_t len);
71 73
74
72/** 75/**
73 * Construct DFA for the given 'regex' of length 'len'. 76 * Construct DFA for the given 'regex' of length 'len'.
74 * 77 *
@@ -80,6 +83,7 @@ GNUNET_REGEX_construct_nfa (const char *regex, const size_t len);
80struct GNUNET_REGEX_Automaton * 83struct GNUNET_REGEX_Automaton *
81GNUNET_REGEX_construct_dfa (const char *regex, const size_t len); 84GNUNET_REGEX_construct_dfa (const char *regex, const size_t len);
82 85
86
83/** 87/**
84 * Free the memory allocated by constructing the GNUNET_REGEX_Automaton. 88 * Free the memory allocated by constructing the GNUNET_REGEX_Automaton.
85 * data structure. 89 * data structure.
@@ -89,6 +93,7 @@ GNUNET_REGEX_construct_dfa (const char *regex, const size_t len);
89void 93void
90GNUNET_REGEX_automaton_destroy (struct GNUNET_REGEX_Automaton *a); 94GNUNET_REGEX_automaton_destroy (struct GNUNET_REGEX_Automaton *a);
91 95
96
92/** 97/**
93 * Save the given automaton as a GraphViz dot file. 98 * Save the given automaton as a GraphViz dot file.
94 * 99 *
@@ -111,19 +116,6 @@ int
111GNUNET_REGEX_eval (struct GNUNET_REGEX_Automaton *a, 116GNUNET_REGEX_eval (struct GNUNET_REGEX_Automaton *a,
112 const char *string); 117 const char *string);
113 118
114/**
115 * Get the canonical regex of the given automaton.
116 * When constructing the automaton a proof is computed for each state,
117 * consisting of the regular expression leading to this state. A complete
118 * regex for the automaton can be computed by combining these proofs.
119 * As of now this function is only useful for testing.
120 *
121 * @param a automaton for which the canonical regex should be returned.
122 *
123 * @return
124 */
125const char *
126GNUNET_REGEX_get_canonical_regex (struct GNUNET_REGEX_Automaton *a);
127 119
128/** 120/**
129 * Get the first key for the given 'input_string'. This hashes 121 * Get the first key for the given 'input_string'. This hashes
@@ -140,6 +132,7 @@ unsigned int /* FIXME: size_t */
140GNUNET_REGEX_get_first_key (const char *input_string, /* FIXME: size_t */ unsigned int string_len, 132GNUNET_REGEX_get_first_key (const char *input_string, /* FIXME: size_t */ unsigned int string_len,
141 struct GNUNET_HashCode * key); 133 struct GNUNET_HashCode * key);
142 134
135
143/** 136/**
144 * Check if the given 'proof' matches the given 'key'. 137 * Check if the given 'proof' matches the given 'key'.
145 * 138 *
@@ -152,6 +145,7 @@ int
152GNUNET_REGEX_check_proof (const char *proof, 145GNUNET_REGEX_check_proof (const char *proof,
153 const struct GNUNET_HashCode *key); 146 const struct GNUNET_HashCode *key);
154 147
148
155/** 149/**
156 * Iterator callback function. 150 * Iterator callback function.
157 * 151 *
@@ -169,6 +163,7 @@ typedef void (*GNUNET_REGEX_KeyIterator)(void *cls,
169 unsigned int num_edges, 163 unsigned int num_edges,
170 const struct GNUNET_REGEX_Edge *edges); 164 const struct GNUNET_REGEX_Edge *edges);
171 165
166
172/** 167/**
173 * Iterate over all edges starting from start state of automaton 'a'. Calling 168 * Iterate over all edges starting from start state of automaton 'a'. Calling
174 * iterator for each edge. 169 * iterator for each edge.
@@ -182,6 +177,7 @@ GNUNET_REGEX_iterate_all_edges (struct GNUNET_REGEX_Automaton *a,
182 GNUNET_REGEX_KeyIterator iterator, 177 GNUNET_REGEX_KeyIterator iterator,
183 void *iterator_cls); 178 void *iterator_cls);
184 179
180
185#if 0 /* keep Emacsens' auto-indent happy */ 181#if 0 /* keep Emacsens' auto-indent happy */
186{ 182{
187#endif 183#endif
diff --git a/src/regex/Makefile.am b/src/regex/Makefile.am
index cb9bc093a..1284111d8 100644
--- a/src/regex/Makefile.am
+++ b/src/regex/Makefile.am
@@ -11,7 +11,8 @@ endif
11lib_LTLIBRARIES = libgnunetregex.la 11lib_LTLIBRARIES = libgnunetregex.la
12 12
13libgnunetregex_la_SOURCES = \ 13libgnunetregex_la_SOURCES = \
14 regex.c 14 regex_internal.h regex.c \
15 regex_random.c
15libgnunetregex_la_LIBADD = -lm \ 16libgnunetregex_la_LIBADD = -lm \
16 $(top_builddir)/src/util/libgnunetutil.la 17 $(top_builddir)/src/util/libgnunetutil.la
17libgnunetregex_la_LDFLAGS = \ 18libgnunetregex_la_LDFLAGS = \
diff --git a/src/regex/regex.c b/src/regex/regex.c
index 411c72c08..f237334d8 100644
--- a/src/regex/regex.c
+++ b/src/regex/regex.c
@@ -26,7 +26,7 @@
26#include "gnunet_container_lib.h" 26#include "gnunet_container_lib.h"
27#include "gnunet_crypto_lib.h" 27#include "gnunet_crypto_lib.h"
28#include "gnunet_regex_lib.h" 28#include "gnunet_regex_lib.h"
29#include "regex.h" 29#include "regex_internal.h"
30 30
31/** 31/**
32 * Constant for how many bits the initial string regex should have. 32 * Constant for how many bits the initial string regex should have.
@@ -1078,12 +1078,6 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a)
1078 GNUNET_asprintf (&R_last[i][j], "%s|%c", R_last[i][j], t->label); 1078 GNUNET_asprintf (&R_last[i][j], "%s|%c", R_last[i][j], t->label);
1079 GNUNET_free (temp_a); 1079 GNUNET_free (temp_a);
1080 } 1080 }
1081 if (GNUNET_YES == needs_parentheses (R_last[i][j]))
1082 {
1083 temp_a = R_last[i][j];
1084 GNUNET_asprintf (&R_last[i][j], "(%s)", R_last[i][j]);
1085 GNUNET_free (temp_a);
1086 }
1087 } 1081 }
1088 if (NULL == R_last[i][i]) 1082 if (NULL == R_last[i][i])
1089 GNUNET_asprintf (&R_last[i][i], ""); 1083 GNUNET_asprintf (&R_last[i][i], "");
@@ -1094,7 +1088,16 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a)
1094 GNUNET_free (temp_a); 1088 GNUNET_free (temp_a);
1095 } 1089 }
1096 } 1090 }
1091 for (i = 0; i < n; i++)
1092 for (j = 0; j < n; j++)
1093 if (needs_parentheses (R_last[i][j]))
1094 {
1095 temp_a = R_last[i][j];
1096 GNUNET_asprintf (&R_last[i][j], "(%s)", R_last[i][j]);
1097 GNUNET_free (temp_a);
1098 }
1097 1099
1100 // TODO: clean up and fix the induction part
1098 1101
1099 // INDUCTION 1102 // INDUCTION
1100 for (k = 0; k < n; k++) 1103 for (k = 0; k < n; k++)
diff --git a/src/regex/regex_internal.h b/src/regex/regex_internal.h
new file mode 100644
index 000000000..8ea597d40
--- /dev/null
+++ b/src/regex/regex_internal.h
@@ -0,0 +1,96 @@
1/*
2 This file is part of GNUnet
3 (C) 2012 Christian Grothoff (and other contributing authors)
4
5 GNUnet is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 3, or (at your
8 option) any later version.
9
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with GNUnet; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
19*/
20/**
21 * @file src/regex/regex_internal.h
22 * @brief common internal definitions for regex library
23 * @author Maximilian Szengel
24 */
25#ifndef REGEX_INTERNAL_H
26#define REGEX_INTERNAL_H
27
28#include "gnunet_regex_lib.h"
29
30#ifdef __cplusplus
31extern "C"
32{
33#if 0 /* keep Emacsens' auto-indent happy */
34}
35#endif
36#endif
37
38/**
39 * char array of literals that are allowed inside a regex (apart from the
40 * operators)
41 */
42#define ALLOWED_LITERALS "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
43
44
45/**
46 * Get the canonical regex of the given automaton.
47 * When constructing the automaton a proof is computed for each state,
48 * consisting of the regular expression leading to this state. A complete
49 * regex for the automaton can be computed by combining these proofs.
50 * As of now this function is only useful for testing.
51 *
52 * @param a automaton for which the canonical regex should be returned.
53 *
54 * @return
55 */
56const char *
57GNUNET_REGEX_get_canonical_regex (struct GNUNET_REGEX_Automaton *a);
58
59
60/**
61 * Generate a (pseudo) random regular expression of length 'rx_length', as well
62 * as a (optional) string that will be matched by the generated regex. The
63 * returned regex needs to be freed.
64 *
65 * @param rx_length length of the random regex.
66 * @param matching_str (optional) pointer to a string that will contain a string
67 * that will be matched by the generated regex, if
68 * 'matching_str' pointer was not NULL.
69 *
70 * @return NULL if 'rx_length' is 0, a random regex of length 'rx_length', which
71 * needs to be freed, otherwise.
72 */
73char *
74GNUNET_REGEX_generate_random_regex (size_t rx_length, char *matching_str);
75
76
77/**
78 * Generate a random string of maximum length 'max_len' that only contains literals allowed
79 * in a regular expression. The string might be 0 chars long but is garantueed
80 * to be shorter or equal to 'max_len'.
81 *
82 * @param max_len maximum length of the string that should be generated.
83 *
84 * @return random string that needs to be freed.
85 */
86char *
87GNUNET_REGEX_generate_random_string (size_t max_len);
88
89#if 0 /* keep Emacsens' auto-indent happy */
90{
91#endif
92#ifdef __cplusplus
93}
94#endif
95
96#endif
diff --git a/src/regex/regex_random.c b/src/regex/regex_random.c
new file mode 100644
index 000000000..3af9b7c5a
--- /dev/null
+++ b/src/regex/regex_random.c
@@ -0,0 +1,170 @@
1/*
2 This file is part of GNUnet
3 (C) 2012 Christian Grothoff (and other contributing authors)
4
5 GNUnet is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; either version 3, or (at your
8 option) any later version.
9
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with GNUnet; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA.
19*/
20/**
21 * @file src/regex/regex_random.c
22 * @brief functions for creating random regular expressions and strings
23 * @author Maximilian Szengel
24 */
25#include "platform.h"
26#include "gnunet_regex_lib.h"
27#include "gnunet_crypto_lib.h"
28#include "regex_internal.h"
29
30
31/**
32 * Get a (pseudo) random valid literal for building a regular expression.
33 *
34 * @return random valid literal
35 */
36char
37get_random_literal ()
38{
39 uint32_t ridx;
40
41 ridx =
42 GNUNET_CRYPTO_random_u32 (GNUNET_CRYPTO_QUALITY_WEAK,
43 (uint32_t) strlen (ALLOWED_LITERALS));
44
45 return ALLOWED_LITERALS[ridx];
46}
47
48
49/**
50 * Generate a (pseudo) random regular expression of length 'rx_length', as well
51 * as a (optional) string that will be matched by the generated regex. The
52 * returned regex needs to be freed.
53 *
54 * @param rx_length length of the random regex.
55 * @param matching_str (optional) pointer to a string that will contain a string
56 * that will be matched by the generated regex, if
57 * 'matching_str' pointer was not NULL. Make sure you
58 * allocated at least rx_length+1 bytes for this sting.
59 *
60 * @return NULL if 'rx_length' is 0, a random regex of length 'rx_length', which
61 * needs to be freed, otherwise.
62 */
63char *
64GNUNET_REGEX_generate_random_regex (size_t rx_length, char *matching_str)
65{
66 char *rx;
67 char *rx_p;
68 char *matching_strp;
69 unsigned int i;
70 unsigned int char_op_switch;
71 unsigned int last_was_op;
72 int rx_op;
73 char current_char;
74
75 if (0 == rx_length)
76 return NULL;
77
78 if (NULL != matching_str)
79 matching_strp = matching_str;
80 else
81 matching_strp = NULL;
82
83 rx = GNUNET_malloc (rx_length + 1);
84 rx_p = rx;
85 current_char = 0;
86 last_was_op = 1;
87
88 for (i = 0; i < rx_length; i++)
89 {
90 char_op_switch = GNUNET_CRYPTO_random_u32 (GNUNET_CRYPTO_QUALITY_WEAK, 2);
91
92 if (0 == char_op_switch && !last_was_op)
93 {
94 last_was_op = 1;
95 rx_op = GNUNET_CRYPTO_random_u32 (GNUNET_CRYPTO_QUALITY_WEAK, 4);
96
97 switch (rx_op)
98 {
99 case 0:
100 current_char = '+';
101 break;
102 case 1:
103 current_char = '*';
104 break;
105 case 2:
106 current_char = '?';
107 break;
108 case 3:
109 if (i < rx_length - 1) // '|' cannot be at the end
110 current_char = '|';
111 else
112 current_char = get_random_literal ();
113 break;
114 }
115 }
116 else
117 {
118 current_char = get_random_literal ();
119 last_was_op = 0;
120 }
121
122 if (NULL != matching_strp &&
123 (current_char != '+' && current_char != '*' && current_char != '?' &&
124 current_char != '|'))
125 {
126 *matching_strp = current_char;
127 matching_strp++;
128 }
129
130 *rx_p = current_char;
131 rx_p++;
132 }
133 *rx_p = '\0';
134 if (NULL != matching_strp)
135 *matching_strp = '\0';
136
137 return rx;
138}
139
140/**
141 * Generate a random string of maximum length 'max_len' that only contains literals allowed
142 * in a regular expression. The string might be 0 chars long but is garantueed
143 * to be shorter or equal to 'max_len'.
144 *
145 * @param max_len maximum length of the string that should be generated.
146 *
147 * @return random string that needs to be freed.
148 */
149char *
150GNUNET_REGEX_generate_random_string (size_t max_len)
151{
152 unsigned int i;
153 char *str;
154 size_t len;
155
156 if (1 > max_len)
157 return GNUNET_strdup ("");
158
159 len = (size_t) GNUNET_CRYPTO_random_u32 (GNUNET_CRYPTO_QUALITY_WEAK, max_len);
160 str = GNUNET_malloc (len + 1);
161
162 for (i = 0; i < len; i++)
163 {
164 str[i] = get_random_literal ();
165 }
166
167 str[i] = '\0';
168
169 return str;
170}
diff --git a/src/regex/test_regex_eval_api.c b/src/regex/test_regex_eval_api.c
index b6cdbe100..6d575a05c 100644
--- a/src/regex/test_regex_eval_api.c
+++ b/src/regex/test_regex_eval_api.c
@@ -26,6 +26,7 @@
26#include <time.h> 26#include <time.h>
27#include "platform.h" 27#include "platform.h"
28#include "gnunet_regex_lib.h" 28#include "gnunet_regex_lib.h"
29#include "regex_internal.h"
29 30
30enum Match_Result 31enum Match_Result
31{ 32{
@@ -41,8 +42,6 @@ struct Regex_String_Pair
41 enum Match_Result expected_results[20]; 42 enum Match_Result expected_results[20];
42}; 43};
43 44
44static const char allowed_literals[] =
45 "0123456789" "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "abcdefghijklmnopqrstuvwxyz";
46 45
47/** 46/**
48 * Random regex test. Generate a random regex as well as 'str_count' strings to 47 * Random regex test. Generate a random regex as well as 'str_count' strings to
@@ -60,15 +59,8 @@ test_random (unsigned int rx_length, unsigned int max_str_len,
60 unsigned int str_count) 59 unsigned int str_count)
61{ 60{
62 int i; 61 int i;
63 int j; 62 char *rand_rx;
64 int rx_exp; 63 char *matching_str;
65 char rand_rx[rx_length + 1];
66 char matching_str[str_count][max_str_len + 1];
67 char *rand_rxp;
68 char *matching_strp;
69 int char_op_switch;
70 int last_was_op;
71 char current_char;
72 int eval; 64 int eval;
73 int eval_check; 65 int eval_check;
74 int eval_canonical; 66 int eval_canonical;
@@ -77,7 +69,7 @@ test_random (unsigned int rx_length, unsigned int max_str_len,
77 regmatch_t matchptr[1]; 69 regmatch_t matchptr[1];
78 char error[200]; 70 char error[200];
79 int result; 71 int result;
80 unsigned int str_len; 72 size_t str_len;
81 char *canonical_regex; 73 char *canonical_regex;
82 74
83 // At least one string is needed for matching 75 // At least one string is needed for matching
@@ -85,76 +77,20 @@ test_random (unsigned int rx_length, unsigned int max_str_len,
85 // The string should be at least as long as the regex itself 77 // The string should be at least as long as the regex itself
86 GNUNET_assert (max_str_len >= rx_length); 78 GNUNET_assert (max_str_len >= rx_length);
87 79
88 rand_rxp = rand_rx;
89 matching_strp = matching_str[0];
90 current_char = 0;
91 last_was_op = 1;
92
93 // Generate random regex and a string that matches the regex 80 // Generate random regex and a string that matches the regex
94 for (i = 0; i < rx_length; i++) 81 matching_str = GNUNET_malloc (rx_length + 1);
95 { 82 rand_rx = GNUNET_REGEX_generate_random_regex (rx_length, matching_str);
96 char_op_switch = 0 + (int) (1.0 * rand () / (RAND_MAX + 1.0));
97
98 if (0 == char_op_switch && !last_was_op)
99 {
100 last_was_op = 1;
101 rx_exp = rand () % 4;
102
103 switch (rx_exp)
104 {
105 case 0:
106 current_char = '+';
107 break;
108 case 1:
109 current_char = '*';
110 break;
111 case 2:
112 current_char = '?';
113 break;
114 case 3:
115 if (i < rx_length - 1) // '|' cannot be at the end
116 current_char = '|';
117 else
118 current_char =
119 allowed_literals[rand () % (sizeof (allowed_literals) - 1)];
120 break;
121 }
122 }
123 else
124 {
125 current_char =
126 allowed_literals[rand () % (sizeof (allowed_literals) - 1)];
127 last_was_op = 0;
128 }
129
130 if (current_char != '+' && current_char != '*' && current_char != '?' &&
131 current_char != '|')
132 {
133 *matching_strp = current_char;
134 matching_strp++;
135 }
136
137 *rand_rxp = current_char;
138 rand_rxp++;
139 }
140 *rand_rxp = '\0';
141 *matching_strp = '\0';
142
143 // Generate some random strings for matching...
144 // Start at 1, because the first string is generated above during regex generation
145 for (i = 1; i < str_count; i++)
146 {
147 str_len = rand () % max_str_len;
148 for (j = 0; j < str_len; j++)
149 matching_str[i][j] =
150 allowed_literals[rand () % (sizeof (allowed_literals) - 1)];
151 matching_str[i][str_len] = '\0';
152 }
153 83
154 // Now match 84 // Now match
155 result = 0; 85 result = 0;
156 for (i = 0; i < str_count; i++) 86 for (i = 0; i < str_count; i++)
157 { 87 {
88 if (0 < i)
89 {
90 matching_str = GNUNET_REGEX_generate_random_string (max_str_len);
91 str_len = strlen (matching_str);
92 }
93
158 // Match string using DFA 94 // Match string using DFA
159 dfa = GNUNET_REGEX_construct_dfa (rand_rx, strlen (rand_rx)); 95 dfa = GNUNET_REGEX_construct_dfa (rand_rx, strlen (rand_rx));
160 if (NULL == dfa) 96 if (NULL == dfa)
@@ -163,7 +99,7 @@ test_random (unsigned int rx_length, unsigned int max_str_len,
163 return -1; 99 return -1;
164 } 100 }
165 101
166 eval = GNUNET_REGEX_eval (dfa, matching_str[i]); 102 eval = GNUNET_REGEX_eval (dfa, matching_str);
167 canonical_regex = GNUNET_strdup (GNUNET_REGEX_get_canonical_regex (dfa)); 103 canonical_regex = GNUNET_strdup (GNUNET_REGEX_get_canonical_regex (dfa));
168 GNUNET_REGEX_automaton_destroy (dfa); 104 GNUNET_REGEX_automaton_destroy (dfa);
169 105
@@ -175,7 +111,7 @@ test_random (unsigned int rx_length, unsigned int max_str_len,
175 return -1; 111 return -1;
176 } 112 }
177 113
178 eval_check = regexec (&rx, matching_str[i], 1, matchptr, 0); 114 eval_check = regexec (&rx, matching_str, 1, matchptr, 0);
179 regfree (&rx); 115 regfree (&rx);
180 116
181 // Match canonical regex 117 // Match canonical regex
@@ -187,14 +123,13 @@ test_random (unsigned int rx_length, unsigned int max_str_len,
187 return -1; 123 return -1;
188 } 124 }
189 125
190 eval_canonical = regexec (&rx, matching_str[i], 1, matchptr, 0); 126 eval_canonical = regexec (&rx, matching_str, 1, matchptr, 0);
191 regfree (&rx); 127 regfree (&rx);
192 GNUNET_free (canonical_regex); 128 GNUNET_free (canonical_regex);
193 129
194 // We only want to match the whole string, because that's what our DFA does, too. 130 // We only want to match the whole string, because that's what our DFA does, too.
195 if (eval_check == 0 && 131 if (eval_check == 0 &&
196 (matchptr[0].rm_so != 0 || 132 (matchptr[0].rm_so != 0 || matchptr[0].rm_eo != strlen (matching_str)))
197 matchptr[0].rm_eo != strlen (matching_str[i])))
198 eval_check = 1; 133 eval_check = 1;
199 134
200 // compare result 135 // compare result
@@ -206,7 +141,12 @@ test_random (unsigned int rx_length, unsigned int max_str_len,
206 rand_rx, matching_str, eval, eval_check, error); 141 rand_rx, matching_str, eval, eval_check, error);
207 result += 1; 142 result += 1;
208 } 143 }
144
145 GNUNET_free (matching_str);
209 } 146 }
147
148 GNUNET_free (rand_rx);
149
210 return result; 150 return result;
211} 151}
212 152
diff --git a/src/regex/test_regex_proofs.c b/src/regex/test_regex_proofs.c
index 5d0aabd00..85fc3079d 100644
--- a/src/regex/test_regex_proofs.c
+++ b/src/regex/test_regex_proofs.c
@@ -22,68 +22,146 @@
22 * @brief test for regex.c 22 * @brief test for regex.c
23 * @author Maximilian Szengel 23 * @author Maximilian Szengel
24 */ 24 */
25#include <regex.h>
26#include <time.h>
27#include "platform.h" 25#include "platform.h"
28#include "gnunet_regex_lib.h" 26#include "gnunet_regex_lib.h"
27#include "regex_internal.h"
29 28
30int 29
31main (int argc, char *argv[]) 30/**
31 * Test if the given regex's canonical regex is the same as this canonical
32 * regex's canonical regex. Confused? Ok, then: 1. construct a dfa A from the
33 * given 'regex' 2. get the canonical regex of dfa A 3. construct a dfa B from
34 * this canonical regex 3. compare the canonical regex of dfa A with the
35 * canonical regex of dfa B.
36 *
37 * @param regex regular expression used for this test (see above).
38 *
39 * @return 0 on success, 1 on failure
40 */
41unsigned int
42test_proof (const char *regex)
32{ 43{
33 GNUNET_log_setup ("test-regex", 44 unsigned int error;
34#if VERBOSE 45 struct GNUNET_REGEX_Automaton *dfa;
35 "DEBUG", 46 char *c_rx1;
36#else 47 const char *c_rx2;
37 "WARNING",
38#endif
39 NULL);
40 48
41 int error; 49 dfa = GNUNET_REGEX_construct_dfa (regex, strlen (regex));
42 int i; 50 c_rx1 = GNUNET_strdup (GNUNET_REGEX_get_canonical_regex (dfa));
43 51 GNUNET_REGEX_automaton_destroy (dfa);
44 const char *regex[21] = { 52 dfa = GNUNET_REGEX_construct_dfa (c_rx1, strlen (c_rx1));
45 "ab(c|d)+c*(a(b|c)+d)+(bla)+", 53 c_rx2 = GNUNET_REGEX_get_canonical_regex (dfa);
46 "(bla)*", 54
47 "b(lab)*la", 55 error = (0 == strcmp (c_rx1, c_rx2)) ? 0 : 1;
48 "(ab)*", 56
49 "ab(c|d)+c*(a(b|c)+d)+(bla)(bla)*", 57 if (error > 0)
50 "z(abc|def)?xyz", 58 {
51 "1*0(0|1)*", 59 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
52 "a+X*y+c|p|R|Z*K*y*R+w|Y*6+n+h*k*w+V*F|W*B*e*", 60 "Comparing canonical regex of\n%s\nfailed:\n%s\nvs.\n%s\n",
53 "(cd|ab)*", 61 regex, c_rx1, c_rx2);
54 "abcd:(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1):(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)", 62 }
55 "abc(1|0)*def", 63
56 "ab|ac", 64 GNUNET_free (c_rx1);
57 "(ab)(ab)*", 65 GNUNET_REGEX_automaton_destroy (dfa);
58 "ab|cd|ef|gh", 66
59 "a|b|c|d|e|f|g", 67 return error;
60 "(ab)|(ac)", 68}
61 "a(b|c)", 69
62 "a*a", 70/**
63 "ab?(abcd)?", 71 * Use 'test_proof' function to randomly test the canonical regexes of 'count'
64 "(ab|cs|df|sdf)*", 72 * random expressions of length 'rx_length'.
65 "a|aa*a" 73 *
74 * @param count number of random regular expressions to test.
75 * @param rx_length length of the random regular expressions.
76 *
77 * @return 0 on succes, number of failures otherwise.
78 */
79unsigned int
80test_proofs_random (unsigned int count, size_t rx_length)
81{
82 unsigned int i;
83 char *rand_rx;
84 unsigned int failures;
85
86 failures = 0;
87
88 for (i = 0; i < count; i++)
89 {
90 rand_rx = GNUNET_REGEX_generate_random_regex (rx_length, NULL);
91 failures += test_proof (rand_rx);
92 GNUNET_free (rand_rx);
93 }
94
95 return failures;
96}
97
98/**
99 * Test a number of known examples of regexes for proper canonicalization.
100 *
101 * @return 0 on success, number of failures otherwise.
102 */
103unsigned int
104test_proofs_static (void)
105{
106 unsigned int i;
107 unsigned int error;
108
109 const char *regex[4] = {
110 "a|aa*a",
111 "a+",
112 "a*",
113 "a*a*"
66 }; 114 };
115
67 char *canonical_regex; 116 char *canonical_regex;
68 struct GNUNET_REGEX_Automaton *dfa; 117 struct GNUNET_REGEX_Automaton *dfa;
69 118
70 error = 0; 119 error = 0;
71 120
72 for (i = 0; i < 21; i++) 121 for (i = 0; i < 4; i += 2)
73 { 122 {
74 dfa = GNUNET_REGEX_construct_dfa (regex[i], strlen (regex[i])); 123 dfa = GNUNET_REGEX_construct_dfa (regex[i], strlen (regex[i]));
75 canonical_regex = GNUNET_strdup (GNUNET_REGEX_get_canonical_regex (dfa)); 124 canonical_regex = GNUNET_strdup (GNUNET_REGEX_get_canonical_regex (dfa));
76 GNUNET_REGEX_automaton_destroy (dfa); 125 GNUNET_REGEX_automaton_destroy (dfa);
77 126
78 dfa = 127 dfa = GNUNET_REGEX_construct_dfa (regex[i + 1], strlen (regex[i + 1]));
79 GNUNET_REGEX_construct_dfa (canonical_regex, strlen (canonical_regex));
80 error += 128 error +=
81 (0 == 129 (0 ==
82 strcmp (canonical_regex, 130 strcmp (canonical_regex,
83 GNUNET_REGEX_get_canonical_regex (dfa))) ? 0 : 1; 131 GNUNET_REGEX_get_canonical_regex (dfa))) ? 0 : 1;
132
133 if (error > 0)
134 {
135 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
136 "Comparing canonical regex of %s with %s failed.\n", regex[i],
137 regex[i + 1]);
138 }
139
84 GNUNET_free (canonical_regex); 140 GNUNET_free (canonical_regex);
85 GNUNET_REGEX_automaton_destroy (dfa); 141 GNUNET_REGEX_automaton_destroy (dfa);
86 } 142 }
87 143
88 return error; 144 return error;
89} 145}
146
147
148int
149main (int argc, char *argv[])
150{
151 GNUNET_log_setup ("test-regex",
152#if VERBOSE
153 "DEBUG",
154#else
155 "WARNING",
156#endif
157 NULL);
158
159 int error;
160
161 error = 0;
162
163 error += test_proofs_static ();
164// error += test_proofs_random (100, 10);
165
166 return error;
167}