diff options
author | Maximilian Szengel <gnunet@maxsz.de> | 2012-06-26 13:52:08 +0000 |
---|---|---|
committer | Maximilian Szengel <gnunet@maxsz.de> | 2012-06-26 13:52:08 +0000 |
commit | d6871d5ea20b9cd746431f4cefcc78740e246a99 (patch) | |
tree | da33ba19c9b8b8cdd46556ccddd2b168305437d4 | |
parent | 3a3e4c0f1b588f687ff98fcee4952fa02e97dfb2 (diff) | |
download | gnunet-d6871d5ea20b9cd746431f4cefcc78740e246a99.tar.gz gnunet-d6871d5ea20b9cd746431f4cefcc78740e246a99.zip |
doxygen fixes
-rw-r--r-- | src/include/gnunet_regex_lib.h | 12 | ||||
-rw-r--r-- | src/regex/regex.c | 166 | ||||
-rw-r--r-- | src/regex/test_regex_eval_api.c | 22 | ||||
-rw-r--r-- | src/regex/test_regex_proofs.c | 17 |
4 files changed, 129 insertions, 88 deletions
diff --git a/src/include/gnunet_regex_lib.h b/src/include/gnunet_regex_lib.h index 22d9a36f6..64a370df3 100644 --- a/src/include/gnunet_regex_lib.h +++ b/src/include/gnunet_regex_lib.h | |||
@@ -111,15 +111,19 @@ int | |||
111 | GNUNET_REGEX_eval (struct GNUNET_REGEX_Automaton *a, | 111 | GNUNET_REGEX_eval (struct GNUNET_REGEX_Automaton *a, |
112 | const char *string); | 112 | const char *string); |
113 | 113 | ||
114 | /** | 114 | /** |
115 | * Get the computed regex of the given automaton. | 115 | * Get the canonical regex of the given automaton. |
116 | * When constructing the automaton a proof is computed for each state, | 116 | * When constructing the automaton a proof is computed for each state, |
117 | * consisting of the regular expression leading to this state. A complete | 117 | * consisting of the regular expression leading to this state. A complete |
118 | * regex for the automaton can be computed by combining these proofs. | 118 | * regex for the automaton can be computed by combining these proofs. |
119 | * As of now this computed regex is only useful for testing. | 119 | * As of now this function is only useful for testing. |
120 | * | ||
121 | * @param a automaton for which the canonical regex should be returned. | ||
122 | * | ||
123 | * @return | ||
120 | */ | 124 | */ |
121 | const char * | 125 | const char * |
122 | GNUNET_REGEX_get_computed_regex (struct GNUNET_REGEX_Automaton *a); | 126 | GNUNET_REGEX_get_canonical_regex (struct GNUNET_REGEX_Automaton *a); |
123 | 127 | ||
124 | /** | 128 | /** |
125 | * Get the first key for the given 'input_string'. This hashes | 129 | * Get the first key for the given 'input_string'. This hashes |
diff --git a/src/regex/regex.c b/src/regex/regex.c index 6d4a5b22d..411c72c08 100644 --- a/src/regex/regex.c +++ b/src/regex/regex.c | |||
@@ -28,6 +28,9 @@ | |||
28 | #include "gnunet_regex_lib.h" | 28 | #include "gnunet_regex_lib.h" |
29 | #include "regex.h" | 29 | #include "regex.h" |
30 | 30 | ||
31 | /** | ||
32 | * Constant for how many bits the initial string regex should have. | ||
33 | */ | ||
31 | #define INITIAL_BITS 10 | 34 | #define INITIAL_BITS 10 |
32 | 35 | ||
33 | /** | 36 | /** |
@@ -118,9 +121,9 @@ struct GNUNET_REGEX_Automaton | |||
118 | char *regex; | 121 | char *regex; |
119 | 122 | ||
120 | /** | 123 | /** |
121 | * Computed regex (result of RX->NFA->DFA->RX) | 124 | * Canonical regex (result of RX->NFA->DFA->RX) |
122 | */ | 125 | */ |
123 | char *computed_regex; | 126 | char *canonical_regex; |
124 | }; | 127 | }; |
125 | 128 | ||
126 | /** | 129 | /** |
@@ -282,9 +285,20 @@ struct GNUNET_REGEX_StateSet | |||
282 | /* | 285 | /* |
283 | * Debug helper functions | 286 | * Debug helper functions |
284 | */ | 287 | */ |
288 | |||
289 | /** | ||
290 | * Print all the transitions of state 's'. | ||
291 | * | ||
292 | * @param s state for which to print it's transitions. | ||
293 | */ | ||
285 | void | 294 | void |
286 | debug_print_transitions (struct GNUNET_REGEX_State *); | 295 | debug_print_transitions (struct GNUNET_REGEX_State *s); |
287 | 296 | ||
297 | /** | ||
298 | * Print information of the given state 's'. | ||
299 | * | ||
300 | * @param s state for which debug information should be printed. | ||
301 | */ | ||
288 | void | 302 | void |
289 | debug_print_state (struct GNUNET_REGEX_State *s) | 303 | debug_print_state (struct GNUNET_REGEX_State *s) |
290 | { | 304 | { |
@@ -304,6 +318,11 @@ debug_print_state (struct GNUNET_REGEX_State *s) | |||
304 | debug_print_transitions (s); | 318 | debug_print_transitions (s); |
305 | } | 319 | } |
306 | 320 | ||
321 | /** | ||
322 | * Print debug information for all states contained in the automaton 'a'. | ||
323 | * | ||
324 | * @param a automaton for which debug information of it's states should be printed. | ||
325 | */ | ||
307 | void | 326 | void |
308 | debug_print_states (struct GNUNET_REGEX_Automaton *a) | 327 | debug_print_states (struct GNUNET_REGEX_Automaton *a) |
309 | { | 328 | { |
@@ -313,6 +332,11 @@ debug_print_states (struct GNUNET_REGEX_Automaton *a) | |||
313 | debug_print_state (s); | 332 | debug_print_state (s); |
314 | } | 333 | } |
315 | 334 | ||
335 | /** | ||
336 | * Print debug information for given transition 't'. | ||
337 | * | ||
338 | * @param t transition for which to print debug info. | ||
339 | */ | ||
316 | void | 340 | void |
317 | debug_print_transition (struct Transition *t) | 341 | debug_print_transition (struct Transition *t) |
318 | { | 342 | { |
@@ -351,12 +375,13 @@ debug_print_transitions (struct GNUNET_REGEX_State *s) | |||
351 | debug_print_transition (t); | 375 | debug_print_transition (t); |
352 | } | 376 | } |
353 | 377 | ||
378 | |||
354 | /** | 379 | /** |
355 | * Recursive function doing DFS with 'v' as a start, detecting all SCCs inside | 380 | * Recursive function doing DFS with 'v' as a start, detecting all SCCs inside |
356 | * the subgraph reachable from 'v'. Used with scc_tarjan function to detect all | 381 | * the subgraph reachable from 'v'. Used with scc_tarjan function to detect all |
357 | * SCCs inside an automaton. | 382 | * SCCs inside an automaton. |
358 | * | 383 | * |
359 | * @param ctx context | 384 | * @param scc_counter counter for numbering the sccs |
360 | * @param v start vertex | 385 | * @param v start vertex |
361 | * @param index current index | 386 | * @param index current index |
362 | * @param stack stack for saving all SCCs | 387 | * @param stack stack for saving all SCCs |
@@ -408,12 +433,12 @@ scc_tarjan_strongconnect (unsigned int *scc_counter, | |||
408 | } | 433 | } |
409 | } | 434 | } |
410 | 435 | ||
436 | |||
411 | /** | 437 | /** |
412 | * Detect all SCCs (Strongly Connected Components) inside the given automaton. | 438 | * Detect all SCCs (Strongly Connected Components) inside the given automaton. |
413 | * SCCs will be marked using the scc_id on each state. | 439 | * SCCs will be marked using the scc_id on each state. |
414 | * | 440 | * |
415 | * @param ctx context | 441 | * @param a the automaton for which SCCs should be computed and assigned. |
416 | * @param a automaton | ||
417 | */ | 442 | */ |
418 | static void | 443 | static void |
419 | scc_tarjan (struct GNUNET_REGEX_Automaton *a) | 444 | scc_tarjan (struct GNUNET_REGEX_Automaton *a) |
@@ -781,10 +806,8 @@ typedef void (*GNUNET_REGEX_traverse_action) (void *cls, unsigned int count, | |||
781 | * @param action_cls closure for action | 806 | * @param action_cls closure for action |
782 | */ | 807 | */ |
783 | static void | 808 | static void |
784 | automaton_state_traverse (struct GNUNET_REGEX_State *s, | 809 | automaton_state_traverse (struct GNUNET_REGEX_State *s, unsigned int *count, |
785 | unsigned int *count, | 810 | GNUNET_REGEX_traverse_action action, void *action_cls) |
786 | GNUNET_REGEX_traverse_action action, | ||
787 | void *action_cls) | ||
788 | { | 811 | { |
789 | struct Transition *t; | 812 | struct Transition *t; |
790 | 813 | ||
@@ -795,7 +818,7 @@ automaton_state_traverse (struct GNUNET_REGEX_State *s, | |||
795 | action (action_cls, *count, s); | 818 | action (action_cls, *count, s); |
796 | (*count)++; | 819 | (*count)++; |
797 | for (t = s->transitions_head; NULL != t; t = t->next) | 820 | for (t = s->transitions_head; NULL != t; t = t->next) |
798 | automaton_state_traverse (t->to_state, count, action, action_cls); | 821 | automaton_state_traverse (t->to_state, count, action, action_cls); |
799 | } | 822 | } |
800 | 823 | ||
801 | 824 | ||
@@ -809,8 +832,7 @@ automaton_state_traverse (struct GNUNET_REGEX_State *s, | |||
809 | */ | 832 | */ |
810 | static void | 833 | static void |
811 | automaton_traverse (struct GNUNET_REGEX_Automaton *a, | 834 | automaton_traverse (struct GNUNET_REGEX_Automaton *a, |
812 | GNUNET_REGEX_traverse_action action, | 835 | GNUNET_REGEX_traverse_action action, void *action_cls) |
813 | void *action_cls) | ||
814 | { | 836 | { |
815 | unsigned int count; | 837 | unsigned int count; |
816 | struct GNUNET_REGEX_State *s; | 838 | struct GNUNET_REGEX_State *s; |
@@ -827,7 +849,7 @@ automaton_traverse (struct GNUNET_REGEX_Automaton *a, | |||
827 | * using it to generate a regex. | 849 | * using it to generate a regex. |
828 | * | 850 | * |
829 | * Currently only tests for first and last characters being '()' respectively. | 851 | * Currently only tests for first and last characters being '()' respectively. |
830 | * FIXME: What about "(ab)|(cd)"? | 852 | * FIXME: What about "(ab)|(cd)"? |
831 | * | 853 | * |
832 | * @param str string | 854 | * @param str string |
833 | * | 855 | * |
@@ -842,10 +864,9 @@ needs_parentheses (const char *str) | |||
842 | const char *pos; | 864 | const char *pos; |
843 | unsigned int cnt; | 865 | unsigned int cnt; |
844 | 866 | ||
845 | if ( (NULL == str) || | 867 | if ((NULL == str) || ((slen = strlen (str)) < 2)) |
846 | ((slen = strlen(str)) < 2) ) | ||
847 | return GNUNET_NO; | 868 | return GNUNET_NO; |
848 | 869 | ||
849 | if ('(' != str[0]) | 870 | if ('(' != str[0]) |
850 | return GNUNET_YES; | 871 | return GNUNET_YES; |
851 | cnt = 1; | 872 | cnt = 1; |
@@ -859,7 +880,7 @@ needs_parentheses (const char *str) | |||
859 | return GNUNET_YES; | 880 | return GNUNET_YES; |
860 | } | 881 | } |
861 | op = strchr (pos, '('); | 882 | op = strchr (pos, '('); |
862 | if ( (NULL != op) && (op < cl)) | 883 | if ((NULL != op) && (op < cl)) |
863 | { | 884 | { |
864 | cnt++; | 885 | cnt++; |
865 | pos = op + 1; | 886 | pos = op + 1; |
@@ -879,7 +900,7 @@ needs_parentheses (const char *str) | |||
879 | * You need to GNUNET_free the returned string. | 900 | * You need to GNUNET_free the returned string. |
880 | * | 901 | * |
881 | * Currently only tests for first and last characters being '()' respectively. | 902 | * Currently only tests for first and last characters being '()' respectively. |
882 | * FIXME: What about "(ab)|(cd)"? | 903 | * FIXME: What about "(ab)|(cd)"? |
883 | * | 904 | * |
884 | * @param str string, free'd or re-used by this function, can be NULL | 905 | * @param str string, free'd or re-used by this function, can be NULL |
885 | * | 906 | * |
@@ -891,7 +912,8 @@ remove_parentheses (char *str) | |||
891 | { | 912 | { |
892 | size_t slen; | 913 | size_t slen; |
893 | 914 | ||
894 | if ( (NULL == str) || ('(' != str[0]) || (str[(slen = strlen(str)) - 1] != ')') ) | 915 | if ((NULL == str) || ('(' != str[0]) || |
916 | (str[(slen = strlen (str)) - 1] != ')')) | ||
895 | return str; | 917 | return str; |
896 | memmove (str, &str[1], slen - 2); | 918 | memmove (str, &str[1], slen - 2); |
897 | str[slen - 2] = '\0'; | 919 | str[slen - 2] = '\0'; |
@@ -910,7 +932,8 @@ remove_parentheses (char *str) | |||
910 | static int | 932 | static int |
911 | has_epsilon (const char *str) | 933 | has_epsilon (const char *str) |
912 | { | 934 | { |
913 | return (NULL != str) && ('(' == str[0]) && ('|' == str[1]) && (')' == str[strlen(str) - 1]); | 935 | return (NULL != str) && ('(' == str[0]) && ('|' == str[1]) && |
936 | (')' == str[strlen (str) - 1]); | ||
914 | } | 937 | } |
915 | 938 | ||
916 | 939 | ||
@@ -931,28 +954,28 @@ remove_epsilon (const char *str) | |||
931 | 954 | ||
932 | if (NULL == str) | 955 | if (NULL == str) |
933 | return NULL; | 956 | return NULL; |
934 | if ( ('(' == str[0]) && ('|' == str[1]) ) | 957 | if (('(' == str[0]) && ('|' == str[1])) |
935 | { | 958 | { |
936 | len = strlen (str); | 959 | len = strlen (str); |
937 | if (')' == str[len-1]) | 960 | if (')' == str[len - 1]) |
938 | return GNUNET_strndup (&str[2], len - 3); | 961 | return GNUNET_strndup (&str[2], len - 3); |
939 | } | 962 | } |
940 | return GNUNET_strdup (str); | 963 | return GNUNET_strdup (str); |
941 | } | 964 | } |
942 | 965 | ||
943 | /** | 966 | /** |
944 | * Compare 'str1', starting from position 'k', with whole 'str2' | 967 | * Compare 'str1', starting from position 'k', with whole 'str2' |
945 | * | 968 | * |
946 | * @param str1 first string to compare, starting from position 'k' | 969 | * @param str1 first string to compare, starting from position 'k' |
947 | * @param str2 second string for comparison | 970 | * @param str2 second string for comparison |
948 | * @param k starting position in 'str1' | 971 | * @param k starting position in 'str1' |
949 | * | 972 | * |
950 | * @return -1 if any of the strings is NULL, 0 if equal, non 0 otherwise | 973 | * @return -1 if any of the strings is NULL, 0 if equal, non 0 otherwise |
951 | */ | 974 | */ |
952 | static int | 975 | static int |
953 | strkcmp (const char *str1, const char *str2, size_t k) | 976 | strkcmp (const char *str1, const char *str2, size_t k) |
954 | { | 977 | { |
955 | if ( (NULL == str1) || (NULL == str2) || (strlen(str1) < k) ) | 978 | if ((NULL == str1) || (NULL == str2) || (strlen (str1) < k)) |
956 | return -1; | 979 | return -1; |
957 | return strcmp (&str1[k], str2); | 980 | return strcmp (&str1[k], str2); |
958 | } | 981 | } |
@@ -962,20 +985,23 @@ strkcmp (const char *str1, const char *str2, size_t k) | |||
962 | * Compare two strings for equality. If either is NULL (or if both are | 985 | * Compare two strings for equality. If either is NULL (or if both are |
963 | * NULL), they are not equal. | 986 | * NULL), they are not equal. |
964 | * | 987 | * |
965 | * @return 0 if the strings are the same, 1 or -1 if not | 988 | * @param str1 first string for comparison. |
989 | * @param str2 second string for comparison. | ||
990 | * | ||
991 | * @return 0 if the strings are the same, 1 or -1 if not | ||
966 | */ | 992 | */ |
967 | static int | 993 | static int |
968 | nullstrcmp (const char *str1, const char *str2) | 994 | nullstrcmp (const char *str1, const char *str2) |
969 | { | 995 | { |
970 | if ( (NULL == str1) || (NULL == str2) ) | 996 | if ((NULL == str1) || (NULL == str2)) |
971 | return -1; | 997 | return -1; |
972 | return strcmp (str1, str2); | 998 | return strcmp (str1, str2); |
973 | } | 999 | } |
974 | 1000 | ||
975 | /** | 1001 | /** |
976 | * Helper function used as 'action' in 'automaton_traverse' function to create | 1002 | * Helper function used as 'action' in 'automaton_traverse' function to create |
977 | * the depth-first numbering of the states. | 1003 | * the depth-first numbering of the states. |
978 | * | 1004 | * |
979 | * @param cls states array. | 1005 | * @param cls states array. |
980 | * @param count current state counter. | 1006 | * @param count current state counter. |
981 | * @param s current state. | 1007 | * @param s current state. |
@@ -1036,37 +1062,37 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a) | |||
1036 | /* Compute regular expressions of length "1" between each pair of states */ | 1062 | /* Compute regular expressions of length "1" between each pair of states */ |
1037 | for (i = 0; i < n; i++) | 1063 | for (i = 0; i < n; i++) |
1038 | { | 1064 | { |
1039 | for (j=0;j<n;j++) | 1065 | for (j = 0; j < n; j++) |
1040 | { | 1066 | { |
1041 | R_cur[i][j] = NULL; | 1067 | R_cur[i][j] = NULL; |
1042 | R_last[i][j] = NULL; | 1068 | R_last[i][j] = NULL; |
1043 | } | 1069 | } |
1044 | for (t = states[i]->transitions_head; NULL != t; t = t->next) | 1070 | for (t = states[i]->transitions_head; NULL != t; t = t->next) |
1045 | { | 1071 | { |
1046 | j = t->to_state->proof_id; | 1072 | j = t->to_state->proof_id; |
1047 | if (NULL == R_last[i][j]) | 1073 | if (NULL == R_last[i][j]) |
1048 | GNUNET_asprintf (&R_last[i][j], "%c", t->label); | 1074 | GNUNET_asprintf (&R_last[i][j], "%c", t->label); |
1049 | else | 1075 | else |
1050 | { | 1076 | { |
1051 | temp_a = R_last[i][j]; | 1077 | temp_a = R_last[i][j]; |
1052 | GNUNET_asprintf (&R_last[i][j], "%s|%c", R_last[i][j], t->label); | 1078 | GNUNET_asprintf (&R_last[i][j], "%s|%c", R_last[i][j], t->label); |
1053 | GNUNET_free (temp_a); | 1079 | GNUNET_free (temp_a); |
1054 | } | 1080 | } |
1055 | if (GNUNET_YES == needs_parentheses (R_last[i][j])) | 1081 | if (GNUNET_YES == needs_parentheses (R_last[i][j])) |
1056 | { | 1082 | { |
1057 | temp_a = R_last[i][j]; | 1083 | temp_a = R_last[i][j]; |
1058 | GNUNET_asprintf (&R_last[i][j], "(%s)", R_last[i][j]); | 1084 | GNUNET_asprintf (&R_last[i][j], "(%s)", R_last[i][j]); |
1059 | GNUNET_free (temp_a); | 1085 | GNUNET_free (temp_a); |
1060 | } | 1086 | } |
1061 | } | 1087 | } |
1062 | if (NULL == R_last[i][i]) | 1088 | if (NULL == R_last[i][i]) |
1063 | GNUNET_asprintf (&R_last[i][i], ""); | 1089 | GNUNET_asprintf (&R_last[i][i], ""); |
1064 | else | 1090 | else |
1065 | { | 1091 | { |
1066 | temp_a = R_last[i][i]; | 1092 | temp_a = R_last[i][i]; |
1067 | GNUNET_asprintf (&R_last[i][i], "(|%s)", R_last[i][i]); | 1093 | GNUNET_asprintf (&R_last[i][i], "(|%s)", R_last[i][i]); |
1068 | GNUNET_free (temp_a); | 1094 | GNUNET_free (temp_a); |
1069 | } | 1095 | } |
1070 | } | 1096 | } |
1071 | 1097 | ||
1072 | 1098 | ||
@@ -1086,12 +1112,12 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a) | |||
1086 | R_cur_r = NULL; | 1112 | R_cur_r = NULL; |
1087 | R_cur_l = NULL; | 1113 | R_cur_l = NULL; |
1088 | 1114 | ||
1089 | // cache results from strcmp, we might need these many times | 1115 | // cache results from strcmp, we might need these many times |
1090 | ij_kj_cmp = nullstrcmp (R_last[i][j], R_last[k][j]); | 1116 | ij_kj_cmp = nullstrcmp (R_last[i][j], R_last[k][j]); |
1091 | ij_ik_cmp = nullstrcmp (R_last[i][j], R_last[i][k]); | 1117 | ij_ik_cmp = nullstrcmp (R_last[i][j], R_last[i][k]); |
1092 | ik_kk_cmp = nullstrcmp (R_last[i][k], R_last[k][k]); | 1118 | ik_kk_cmp = nullstrcmp (R_last[i][k], R_last[k][k]); |
1093 | ik_kj_cmp = nullstrcmp (R_last[i][k], R_last[k][j]); | 1119 | ik_kj_cmp = nullstrcmp (R_last[i][k], R_last[k][j]); |
1094 | kk_kj_cmp = nullstrcmp (R_last[k][k], R_last[k][j]); | 1120 | kk_kj_cmp = nullstrcmp (R_last[k][k], R_last[k][j]); |
1095 | 1121 | ||
1096 | // $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk})^* R^{(k-1)}_{kj} | 1122 | // $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk})^* R^{(k-1)}_{kj} |
1097 | // With: R_cur[i][j] = R_cur_l | R_cur_r | 1123 | // With: R_cur[i][j] = R_cur_l | R_cur_r |
@@ -1116,19 +1142,19 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a) | |||
1116 | 1142 | ||
1117 | // Assign R_temp_(ik|kk|kj) to R_last[][] and remove epsilon as well | 1143 | // Assign R_temp_(ik|kk|kj) to R_last[][] and remove epsilon as well |
1118 | // as parentheses, so we can better compare the contents | 1144 | // as parentheses, so we can better compare the contents |
1119 | R_temp_ik = remove_parentheses (remove_epsilon (R_last[i][k])); | 1145 | R_temp_ik = remove_parentheses (remove_epsilon (R_last[i][k])); |
1120 | R_temp_kk = remove_parentheses (remove_epsilon (R_last[k][k])); | 1146 | R_temp_kk = remove_parentheses (remove_epsilon (R_last[k][k])); |
1121 | R_temp_kj = remove_parentheses (remove_epsilon (R_last[k][j])); | 1147 | R_temp_kj = remove_parentheses (remove_epsilon (R_last[k][j])); |
1122 | 1148 | ||
1123 | clean_ik_kk_cmp = nullstrcmp (R_last[i][k], R_temp_kk); | 1149 | clean_ik_kk_cmp = nullstrcmp (R_last[i][k], R_temp_kk); |
1124 | clean_kk_kj_cmp = nullstrcmp (R_temp_kk, R_last[k][j]); | 1150 | clean_kk_kj_cmp = nullstrcmp (R_temp_kk, R_last[k][j]); |
1125 | 1151 | ||
1126 | // construct R_cur_l (and, if necessary R_cur_r) | 1152 | // construct R_cur_l (and, if necessary R_cur_r) |
1127 | if (NULL != R_last[i][j]) | 1153 | if (NULL != R_last[i][j]) |
1128 | { | 1154 | { |
1129 | // Assign R_temp_ij to R_last[i][j] and remove epsilon as well | 1155 | // Assign R_temp_ij to R_last[i][j] and remove epsilon as well |
1130 | // as parentheses, so we can better compare the contents | 1156 | // as parentheses, so we can better compare the contents |
1131 | R_temp_ij = remove_parentheses (remove_epsilon (R_last[i][j])); | 1157 | R_temp_ij = remove_parentheses (remove_epsilon (R_last[i][j])); |
1132 | 1158 | ||
1133 | if (0 == strcmp (R_temp_ij, R_temp_ik) && | 1159 | if (0 == strcmp (R_temp_ij, R_temp_ik) && |
1134 | 0 == strcmp (R_temp_ik, R_temp_kk) && | 1160 | 0 == strcmp (R_temp_ik, R_temp_kk) && |
@@ -1218,7 +1244,8 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a) | |||
1218 | else | 1244 | else |
1219 | { | 1245 | { |
1220 | /* GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "NO SIMPLIFICATION\n"); */ | 1246 | /* GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "NO SIMPLIFICATION\n"); */ |
1221 | temp_a = (NULL == R_last[i][j]) ? NULL : GNUNET_strdup (R_last[i][j]); | 1247 | temp_a = |
1248 | (NULL == R_last[i][j]) ? NULL : GNUNET_strdup (R_last[i][j]); | ||
1222 | temp_a = remove_parentheses (temp_a); | 1249 | temp_a = remove_parentheses (temp_a); |
1223 | R_cur_l = temp_a; | 1250 | R_cur_l = temp_a; |
1224 | } | 1251 | } |
@@ -1431,8 +1458,8 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a) | |||
1431 | for (j = 0; j < n; j++) | 1458 | for (j = 0; j < n; j++) |
1432 | { | 1459 | { |
1433 | GNUNET_free_non_null (R_last[i][j]); | 1460 | GNUNET_free_non_null (R_last[i][j]); |
1434 | R_last[i][j] = R_cur[i][j]; | 1461 | R_last[i][j] = R_cur[i][j]; |
1435 | R_cur[i][j] = NULL; | 1462 | R_cur[i][j] = NULL; |
1436 | } | 1463 | } |
1437 | } | 1464 | } |
1438 | } | 1465 | } |
@@ -1466,7 +1493,7 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a) | |||
1466 | } | 1493 | } |
1467 | } | 1494 | } |
1468 | } | 1495 | } |
1469 | a->computed_regex = complete_regex; | 1496 | a->canonical_regex = complete_regex; |
1470 | 1497 | ||
1471 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, | 1498 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, |
1472 | "---------------------------------------------\n"); | 1499 | "---------------------------------------------\n"); |
@@ -2508,7 +2535,7 @@ GNUNET_REGEX_automaton_destroy (struct GNUNET_REGEX_Automaton *a) | |||
2508 | return; | 2535 | return; |
2509 | 2536 | ||
2510 | GNUNET_free_non_null (a->regex); | 2537 | GNUNET_free_non_null (a->regex); |
2511 | GNUNET_free_non_null (a->computed_regex); | 2538 | GNUNET_free_non_null (a->canonical_regex); |
2512 | 2539 | ||
2513 | for (s = a->states_head; NULL != s;) | 2540 | for (s = a->states_head; NULL != s;) |
2514 | { | 2541 | { |
@@ -2773,20 +2800,25 @@ GNUNET_REGEX_eval (struct GNUNET_REGEX_Automaton *a, const char *string) | |||
2773 | return result; | 2800 | return result; |
2774 | } | 2801 | } |
2775 | 2802 | ||
2803 | |||
2776 | /** | 2804 | /** |
2777 | * Get the computed regex of the given automaton. | 2805 | * Get the canonical regex of the given automaton. |
2778 | * When constructing the automaton a proof is computed for each state, | 2806 | * When constructing the automaton a proof is computed for each state, |
2779 | * consisting of the regular expression leading to this state. A complete | 2807 | * consisting of the regular expression leading to this state. A complete |
2780 | * regex for the automaton can be computed by combining these proofs. | 2808 | * regex for the automaton can be computed by combining these proofs. |
2781 | * As of now this computed regex is only useful for testing. | 2809 | * As of now this function is only useful for testing. |
2810 | * | ||
2811 | * @param a automaton for which the canonical regex should be returned. | ||
2812 | * | ||
2813 | * @return | ||
2782 | */ | 2814 | */ |
2783 | const char * | 2815 | const char * |
2784 | GNUNET_REGEX_get_computed_regex (struct GNUNET_REGEX_Automaton *a) | 2816 | GNUNET_REGEX_get_canonical_regex (struct GNUNET_REGEX_Automaton *a) |
2785 | { | 2817 | { |
2786 | if (NULL == a) | 2818 | if (NULL == a) |
2787 | return NULL; | 2819 | return NULL; |
2788 | 2820 | ||
2789 | return a->computed_regex; | 2821 | return a->canonical_regex; |
2790 | } | 2822 | } |
2791 | 2823 | ||
2792 | /** | 2824 | /** |
diff --git a/src/regex/test_regex_eval_api.c b/src/regex/test_regex_eval_api.c index 1a25c4173..b6cdbe100 100644 --- a/src/regex/test_regex_eval_api.c +++ b/src/regex/test_regex_eval_api.c | |||
@@ -71,14 +71,14 @@ test_random (unsigned int rx_length, unsigned int max_str_len, | |||
71 | char current_char; | 71 | char current_char; |
72 | int eval; | 72 | int eval; |
73 | int eval_check; | 73 | int eval_check; |
74 | int eval_computed; | 74 | int eval_canonical; |
75 | struct GNUNET_REGEX_Automaton *dfa; | 75 | struct GNUNET_REGEX_Automaton *dfa; |
76 | regex_t rx; | 76 | regex_t rx; |
77 | regmatch_t matchptr[1]; | 77 | regmatch_t matchptr[1]; |
78 | char error[200]; | 78 | char error[200]; |
79 | int result; | 79 | int result; |
80 | unsigned int str_len; | 80 | unsigned int str_len; |
81 | char *computed_regex; | 81 | char *canonical_regex; |
82 | 82 | ||
83 | // At least one string is needed for matching | 83 | // At least one string is needed for matching |
84 | GNUNET_assert (str_count > 0); | 84 | GNUNET_assert (str_count > 0); |
@@ -164,7 +164,7 @@ test_random (unsigned int rx_length, unsigned int max_str_len, | |||
164 | } | 164 | } |
165 | 165 | ||
166 | eval = GNUNET_REGEX_eval (dfa, matching_str[i]); | 166 | eval = GNUNET_REGEX_eval (dfa, matching_str[i]); |
167 | computed_regex = GNUNET_strdup (GNUNET_REGEX_get_computed_regex (dfa)); | 167 | canonical_regex = GNUNET_strdup (GNUNET_REGEX_get_canonical_regex (dfa)); |
168 | GNUNET_REGEX_automaton_destroy (dfa); | 168 | GNUNET_REGEX_automaton_destroy (dfa); |
169 | 169 | ||
170 | // Match string using glibc regex | 170 | // Match string using glibc regex |
@@ -178,18 +178,18 @@ test_random (unsigned int rx_length, unsigned int max_str_len, | |||
178 | eval_check = regexec (&rx, matching_str[i], 1, matchptr, 0); | 178 | eval_check = regexec (&rx, matching_str[i], 1, matchptr, 0); |
179 | regfree (&rx); | 179 | regfree (&rx); |
180 | 180 | ||
181 | // Match computed regex | 181 | // Match canonical regex |
182 | if (0 != regcomp (&rx, computed_regex, REG_EXTENDED)) | 182 | if (0 != regcomp (&rx, canonical_regex, REG_EXTENDED)) |
183 | { | 183 | { |
184 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | 184 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, |
185 | "Could not compile regex using regcomp: %s\n", | 185 | "Could not compile regex using regcomp: %s\n", |
186 | computed_regex); | 186 | canonical_regex); |
187 | return -1; | 187 | return -1; |
188 | } | 188 | } |
189 | 189 | ||
190 | eval_computed = regexec (&rx, matching_str[i], 1, matchptr, 0); | 190 | eval_canonical = regexec (&rx, matching_str[i], 1, matchptr, 0); |
191 | regfree (&rx); | 191 | regfree (&rx); |
192 | GNUNET_free (computed_regex); | 192 | GNUNET_free (canonical_regex); |
193 | 193 | ||
194 | // We only want to match the whole string, because that's what our DFA does, too. | 194 | // We only want to match the whole string, because that's what our DFA does, too. |
195 | if (eval_check == 0 && | 195 | if (eval_check == 0 && |
@@ -356,7 +356,7 @@ main (int argc, char *argv[]) | |||
356 | // DFA test | 356 | // DFA test |
357 | a = GNUNET_REGEX_construct_dfa (rxstr[i].regex, strlen (rxstr[i].regex)); | 357 | a = GNUNET_REGEX_construct_dfa (rxstr[i].regex, strlen (rxstr[i].regex)); |
358 | check_dfa += test_automaton (a, &rx, &rxstr[i]); | 358 | check_dfa += test_automaton (a, &rx, &rxstr[i]); |
359 | check_proof = GNUNET_strdup (GNUNET_REGEX_get_computed_regex (a)); | 359 | check_proof = GNUNET_strdup (GNUNET_REGEX_get_canonical_regex (a)); |
360 | GNUNET_REGEX_automaton_destroy (a); | 360 | GNUNET_REGEX_automaton_destroy (a); |
361 | a = GNUNET_REGEX_construct_dfa (check_proof, strlen (check_proof)); | 361 | a = GNUNET_REGEX_construct_dfa (check_proof, strlen (check_proof)); |
362 | check_dfa += test_automaton (a, &rx, &rxstr[i]); | 362 | check_dfa += test_automaton (a, &rx, &rxstr[i]); |
@@ -369,8 +369,8 @@ main (int argc, char *argv[]) | |||
369 | } | 369 | } |
370 | 370 | ||
371 | srand (time (NULL)); | 371 | srand (time (NULL)); |
372 | for (i = 0; i < 50; i++) | 372 | for (i = 0; i < 50; i++) |
373 | check_rand += test_random (100, 120, 20); | 373 | check_rand += test_random (100, 120, 20); |
374 | 374 | ||
375 | return check_nfa + check_dfa + check_rand; | 375 | return check_nfa + check_dfa + check_rand; |
376 | } | 376 | } |
diff --git a/src/regex/test_regex_proofs.c b/src/regex/test_regex_proofs.c index 47cc4ee5b..5d0aabd00 100644 --- a/src/regex/test_regex_proofs.c +++ b/src/regex/test_regex_proofs.c | |||
@@ -40,6 +40,7 @@ main (int argc, char *argv[]) | |||
40 | 40 | ||
41 | int error; | 41 | int error; |
42 | int i; | 42 | int i; |
43 | |||
43 | const char *regex[21] = { | 44 | const char *regex[21] = { |
44 | "ab(c|d)+c*(a(b|c)+d)+(bla)+", | 45 | "ab(c|d)+c*(a(b|c)+d)+(bla)+", |
45 | "(bla)*", | 46 | "(bla)*", |
@@ -63,7 +64,7 @@ main (int argc, char *argv[]) | |||
63 | "(ab|cs|df|sdf)*", | 64 | "(ab|cs|df|sdf)*", |
64 | "a|aa*a" | 65 | "a|aa*a" |
65 | }; | 66 | }; |
66 | char *computed_regex; | 67 | char *canonical_regex; |
67 | struct GNUNET_REGEX_Automaton *dfa; | 68 | struct GNUNET_REGEX_Automaton *dfa; |
68 | 69 | ||
69 | error = 0; | 70 | error = 0; |
@@ -71,14 +72,18 @@ main (int argc, char *argv[]) | |||
71 | for (i = 0; i < 21; i++) | 72 | for (i = 0; i < 21; i++) |
72 | { | 73 | { |
73 | dfa = GNUNET_REGEX_construct_dfa (regex[i], strlen (regex[i])); | 74 | dfa = GNUNET_REGEX_construct_dfa (regex[i], strlen (regex[i])); |
74 | computed_regex = GNUNET_strdup (GNUNET_REGEX_get_computed_regex (dfa)); | 75 | canonical_regex = GNUNET_strdup (GNUNET_REGEX_get_canonical_regex (dfa)); |
75 | GNUNET_REGEX_automaton_destroy (dfa); | 76 | GNUNET_REGEX_automaton_destroy (dfa); |
76 | 77 | ||
77 | dfa = GNUNET_REGEX_construct_dfa (computed_regex, strlen (computed_regex)); | 78 | dfa = |
78 | error += (0 == strcmp (computed_regex, GNUNET_REGEX_get_computed_regex (dfa))) ? 0 : 1; | 79 | GNUNET_REGEX_construct_dfa (canonical_regex, strlen (canonical_regex)); |
79 | GNUNET_free (computed_regex); | 80 | error += |
81 | (0 == | ||
82 | strcmp (canonical_regex, | ||
83 | GNUNET_REGEX_get_canonical_regex (dfa))) ? 0 : 1; | ||
84 | GNUNET_free (canonical_regex); | ||
80 | GNUNET_REGEX_automaton_destroy (dfa); | 85 | GNUNET_REGEX_automaton_destroy (dfa); |
81 | } | 86 | } |
82 | 87 | ||
83 | return error; | 88 | return error; |
84 | } | 89 | } |