diff options
author | Maximilian Szengel <gnunet@maxsz.de> | 2012-06-25 08:33:13 +0000 |
---|---|---|
committer | Maximilian Szengel <gnunet@maxsz.de> | 2012-06-25 08:33:13 +0000 |
commit | 6803579aeefc80a95cd2047901fc691fd03401a1 (patch) | |
tree | 8efdd648eeeae8bc76bd8f4a0bd0c62755a0260f /src/regex | |
parent | febbf34313acf57b3a1c336fe0eb2fed80d7a1e0 (diff) | |
download | gnunet-6803579aeefc80a95cd2047901fc691fd03401a1.tar.gz gnunet-6803579aeefc80a95cd2047901fc691fd03401a1.zip |
regex simplification wip
Diffstat (limited to 'src/regex')
-rw-r--r-- | src/regex/regex.c | 768 | ||||
-rw-r--r-- | src/regex/test_regex_eval_api.c | 26 | ||||
-rw-r--r-- | src/regex/test_regex_iterate_api.c | 48 |
3 files changed, 618 insertions, 224 deletions
diff --git a/src/regex/regex.c b/src/regex/regex.c index a3f44c3b0..4a381780f 100644 --- a/src/regex/regex.c +++ b/src/regex/regex.c | |||
@@ -195,6 +195,11 @@ struct GNUNET_REGEX_State | |||
195 | struct GNUNET_HashCode hash; | 195 | struct GNUNET_HashCode hash; |
196 | 196 | ||
197 | /** | 197 | /** |
198 | * State ID for proof creation. | ||
199 | */ | ||
200 | unsigned int proof_id; | ||
201 | |||
202 | /** | ||
198 | * Proof for this state. | 203 | * Proof for this state. |
199 | */ | 204 | */ |
200 | char *proof; | 205 | char *proof; |
@@ -762,10 +767,11 @@ automaton_add_state (struct GNUNET_REGEX_Automaton *a, | |||
762 | /** | 767 | /** |
763 | * Function that is called with each state, when traversing an automaton. | 768 | * Function that is called with each state, when traversing an automaton. |
764 | * | 769 | * |
765 | * @param cls closure | 770 | * @param cls closure. |
766 | * @param s state | 771 | * @param count current count of the state, from 0 to a->state_count -1. |
772 | * @param s state. | ||
767 | */ | 773 | */ |
768 | typedef void (*GNUNET_REGEX_traverse_action) (void *cls, | 774 | typedef void (*GNUNET_REGEX_traverse_action) (void *cls, unsigned int count, |
769 | struct GNUNET_REGEX_State * s); | 775 | struct GNUNET_REGEX_State * s); |
770 | 776 | ||
771 | /** | 777 | /** |
@@ -775,10 +781,12 @@ typedef void (*GNUNET_REGEX_traverse_action) (void *cls, | |||
775 | * | 781 | * |
776 | * @param cls closure. | 782 | * @param cls closure. |
777 | * @param s start state. | 783 | * @param s start state. |
784 | * @param count current count of the state. | ||
778 | * @param action action to be performed on each state. | 785 | * @param action action to be performed on each state. |
779 | */ | 786 | */ |
780 | static void | 787 | static void |
781 | automaton_state_traverse (void *cls, struct GNUNET_REGEX_State *s, | 788 | automaton_state_traverse (void *cls, struct GNUNET_REGEX_State *s, |
789 | unsigned int *count, | ||
782 | GNUNET_REGEX_traverse_action action) | 790 | GNUNET_REGEX_traverse_action action) |
783 | { | 791 | { |
784 | struct Transition *t; | 792 | struct Transition *t; |
@@ -788,10 +796,12 @@ automaton_state_traverse (void *cls, struct GNUNET_REGEX_State *s, | |||
788 | s->marked = GNUNET_YES; | 796 | s->marked = GNUNET_YES; |
789 | 797 | ||
790 | if (action > 0) | 798 | if (action > 0) |
791 | action (cls, s); | 799 | action (cls, *count, s); |
800 | |||
801 | (*count)++; | ||
792 | 802 | ||
793 | for (t = s->transitions_head; NULL != t; t = t->next) | 803 | for (t = s->transitions_head; NULL != t; t = t->next) |
794 | automaton_state_traverse (cls, t->to_state, action); | 804 | automaton_state_traverse (cls, t->to_state, count, action); |
795 | } | 805 | } |
796 | } | 806 | } |
797 | 807 | ||
@@ -807,16 +817,169 @@ static void | |||
807 | automaton_traverse (void *cls, struct GNUNET_REGEX_Automaton *a, | 817 | automaton_traverse (void *cls, struct GNUNET_REGEX_Automaton *a, |
808 | GNUNET_REGEX_traverse_action action) | 818 | GNUNET_REGEX_traverse_action action) |
809 | { | 819 | { |
820 | unsigned int count; | ||
810 | struct GNUNET_REGEX_State *s; | 821 | struct GNUNET_REGEX_State *s; |
811 | 822 | ||
812 | for (s = a->states_head; NULL != s; s = s->next) | 823 | for (s = a->states_head; NULL != s; s = s->next) |
813 | s->marked = GNUNET_NO; | 824 | s->marked = GNUNET_NO; |
814 | 825 | ||
815 | automaton_state_traverse (cls, a->start, action); | 826 | count = 0; |
827 | |||
828 | automaton_state_traverse (cls, a->start, &count, action); | ||
816 | } | 829 | } |
817 | 830 | ||
818 | /** | 831 | /** |
819 | * Create proofs for all states in the given automaton. Implementation of the | 832 | * Check if the given string 'str' needs parentheses around it when |
833 | * using it to generate a regex. | ||
834 | * | ||
835 | * @param str string | ||
836 | * | ||
837 | * @return GNUNET_YES if parentheses are needed, GNUNET_NO otherwise | ||
838 | */ | ||
839 | static int | ||
840 | needs_parentheses (char *str) | ||
841 | { | ||
842 | int length; | ||
843 | |||
844 | if (NULL == str) | ||
845 | return GNUNET_NO; | ||
846 | |||
847 | length = strlen (str); | ||
848 | |||
849 | if (length < 2) | ||
850 | return GNUNET_NO; | ||
851 | |||
852 | if (str[0] == '(' && str[strlen (str) - 1] == ')') | ||
853 | return GNUNET_NO; | ||
854 | |||
855 | return GNUNET_YES; | ||
856 | } | ||
857 | |||
858 | /** | ||
859 | * Remove parentheses surrounding string 'str'. | ||
860 | * Example: "(a)" becomes "a". | ||
861 | * You need to GNUNET_free the retunred string. | ||
862 | * | ||
863 | * @param str string | ||
864 | * | ||
865 | * @return string without surrounding parentheses, string 'str' if no preceding | ||
866 | * epsilon could be found, NULL if 'str' was NULL | ||
867 | */ | ||
868 | static char * | ||
869 | remove_parentheses (char *str) | ||
870 | { | ||
871 | char *new_str; | ||
872 | int length; | ||
873 | int i; | ||
874 | int j; | ||
875 | |||
876 | if (NULL == str) | ||
877 | return NULL; | ||
878 | |||
879 | length = strlen (str); | ||
880 | |||
881 | if (str[0] == '(' && str[length - 1] == ')') | ||
882 | { | ||
883 | new_str = GNUNET_malloc (length - 1); | ||
884 | for (j = 0, i = 1; i < length - 1; i++, j++) | ||
885 | new_str[j] = str[i]; | ||
886 | new_str[j] = '\0'; | ||
887 | } | ||
888 | else | ||
889 | new_str = GNUNET_strdup (str); | ||
890 | |||
891 | return new_str; | ||
892 | } | ||
893 | |||
894 | /** | ||
895 | * Check if the string 'str' starts with an epsilon (empty string). | ||
896 | * Example: "(|a)" is starting with an epsilon. | ||
897 | * | ||
898 | * @param str | ||
899 | * | ||
900 | * @return | ||
901 | */ | ||
902 | static int | ||
903 | has_epsilon (char *str) | ||
904 | { | ||
905 | int len; | ||
906 | |||
907 | if (NULL == str) | ||
908 | return 0; | ||
909 | |||
910 | len = strlen (str); | ||
911 | |||
912 | return (0 == strncmp (str, "(|", 2) && str[len - 1] == ')'); | ||
913 | } | ||
914 | |||
915 | /** | ||
916 | * Remove an epsilon from the string str. Where epsilon is an empty string | ||
917 | * Example: str = "(|a|b|c)", result: "a|b|c" | ||
918 | * The returned string needs to be freed. | ||
919 | * | ||
920 | * @param str string | ||
921 | * | ||
922 | * @return string without preceding epsilon, string 'str' if no preceding epsilon | ||
923 | * could be found, NULL if 'str' was NULL | ||
924 | */ | ||
925 | static char * | ||
926 | remove_epsilon (char *str) | ||
927 | { | ||
928 | int len; | ||
929 | char *new_str; | ||
930 | int i; | ||
931 | int j; | ||
932 | |||
933 | if (NULL == str) | ||
934 | return NULL; | ||
935 | |||
936 | len = strlen (str); | ||
937 | |||
938 | if (len > 2 && 0 == strncmp (str, "(|", 2) && str[len - 1] == ')') | ||
939 | { | ||
940 | new_str = GNUNET_malloc (len - 1); | ||
941 | |||
942 | j = 0; | ||
943 | for (i = 2; i < len - 1; i++) | ||
944 | { | ||
945 | new_str[j] = str[i]; | ||
946 | j++; | ||
947 | } | ||
948 | new_str[j] = '\0'; | ||
949 | } | ||
950 | else | ||
951 | { | ||
952 | new_str = GNUNET_strdup (str); | ||
953 | } | ||
954 | |||
955 | return new_str; | ||
956 | } | ||
957 | |||
958 | static int | ||
959 | strkcmp (const char *str1, const char *str2, int k) | ||
960 | { | ||
961 | const char *new_str1; | ||
962 | |||
963 | if (NULL == str1 || NULL == str2) | ||
964 | return -1; | ||
965 | |||
966 | new_str1 = &str1[k]; | ||
967 | |||
968 | return strcmp (new_str1, str2); | ||
969 | } | ||
970 | |||
971 | void | ||
972 | number_states (void *cls, unsigned int count, struct GNUNET_REGEX_State *s) | ||
973 | { | ||
974 | struct GNUNET_REGEX_State **states; | ||
975 | |||
976 | states = cls; | ||
977 | s->proof_id = count; | ||
978 | states[count] = s; | ||
979 | } | ||
980 | |||
981 | /** | ||
982 | * create proofs for all states in the given automaton. Implementation of the | ||
820 | * algorithm descriped in chapter 3.2.1 of "Automata Theory, Languages, and | 983 | * algorithm descriped in chapter 3.2.1 of "Automata Theory, Languages, and |
821 | * Computation 3rd Edition" by Hopcroft, Motwani and Ullman. | 984 | * Computation 3rd Edition" by Hopcroft, Motwani and Ullman. |
822 | * | 985 | * |
@@ -825,36 +988,41 @@ automaton_traverse (void *cls, struct GNUNET_REGEX_Automaton *a, | |||
825 | static void | 988 | static void |
826 | automaton_create_proofs (struct GNUNET_REGEX_Automaton *a) | 989 | automaton_create_proofs (struct GNUNET_REGEX_Automaton *a) |
827 | { | 990 | { |
828 | struct GNUNET_REGEX_State *s; | ||
829 | struct Transition *t; | ||
830 | int i; | ||
831 | int j; | ||
832 | int k; | ||
833 | int n; | ||
834 | int cnt; | ||
835 | struct GNUNET_REGEX_State *states[a->state_count]; | 991 | struct GNUNET_REGEX_State *states[a->state_count]; |
992 | struct Transition *t; | ||
836 | char *R_last[a->state_count][a->state_count]; | 993 | char *R_last[a->state_count][a->state_count]; |
837 | char *R_cur[a->state_count][a->state_count]; | 994 | char *R_cur[a->state_count][a->state_count]; |
838 | char *R_cur_l; | 995 | char *R_cur_l; |
839 | char *R_cur_r; | 996 | char *R_cur_r; |
840 | char *temp_a; | 997 | char *temp_a; |
841 | char *temp_b; | 998 | char *temp_b; |
999 | char *R_temp_ij; | ||
1000 | char *R_temp_ik; | ||
1001 | char *R_temp_kj; | ||
1002 | char *R_temp_kk; | ||
1003 | char *complete_regex; | ||
1004 | int cnt; | ||
1005 | int eps_check; | ||
1006 | int i; | ||
1007 | int j; | ||
1008 | int k; | ||
1009 | int n; | ||
1010 | int ij_ik_cmp; | ||
1011 | int ij_kj_cmp; | ||
1012 | int ik_kj_cmp; | ||
1013 | int ik_kk_cmp; | ||
1014 | int kk_kj_cmp; | ||
1015 | int clean_ik_kk_cmp; | ||
1016 | int clean_kk_kj_cmp; | ||
842 | int length; | 1017 | int length; |
843 | int length_l; | 1018 | int length_l; |
844 | int length_r; | 1019 | int length_r; |
845 | int s_cnt; | ||
846 | int l_cnt; | ||
847 | char *complete_regex; | ||
848 | 1020 | ||
849 | k = 0; | 1021 | k = 0; |
850 | n = a->state_count; | 1022 | n = a->state_count; |
851 | 1023 | ||
852 | // number the states | 1024 | // number the states |
853 | for (i = 0, s = a->states_head; NULL != s; s = s->next, i++) | 1025 | automaton_traverse (states, a, number_states); |
854 | { | ||
855 | s->marked = i; | ||
856 | states[i] = s; | ||
857 | } | ||
858 | 1026 | ||
859 | // BASIS | 1027 | // BASIS |
860 | for (i = 0; i < n; i++) | 1028 | for (i = 0; i < n; i++) |
@@ -882,14 +1050,14 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a) | |||
882 | { | 1050 | { |
883 | if (NULL == R_last[i][j]) | 1051 | if (NULL == R_last[i][j]) |
884 | GNUNET_asprintf (&R_last[i][j], ""); | 1052 | GNUNET_asprintf (&R_last[i][j], ""); |
885 | else if (NULL != R_last[i][j] && 1 < strlen (R_last[i][j])) | 1053 | else |
886 | { | 1054 | { |
887 | temp_a = R_last[i][j]; | 1055 | temp_a = R_last[i][j]; |
888 | GNUNET_asprintf (&R_last[i][j], "(%s)", R_last[i][j]); | 1056 | GNUNET_asprintf (&R_last[i][j], "(|%s)", R_last[i][j]); |
889 | GNUNET_free (temp_a); | 1057 | GNUNET_free (temp_a); |
890 | } | 1058 | } |
891 | } | 1059 | } |
892 | else if (NULL != R_last[i][j] && 1 < strlen (R_last[i][j])) | 1060 | else if (GNUNET_YES == needs_parentheses (R_last[i][j])) |
893 | { | 1061 | { |
894 | temp_a = R_last[i][j]; | 1062 | temp_a = R_last[i][j]; |
895 | GNUNET_asprintf (&R_last[i][j], "(%s)", R_last[i][j]); | 1063 | GNUNET_asprintf (&R_last[i][j], "(%s)", R_last[i][j]); |
@@ -898,6 +1066,7 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a) | |||
898 | } | 1066 | } |
899 | } | 1067 | } |
900 | 1068 | ||
1069 | |||
901 | // INDUCTION | 1070 | // INDUCTION |
902 | for (k = 0; k < n; k++) | 1071 | for (k = 0; k < n; k++) |
903 | { | 1072 | { |
@@ -905,210 +1074,387 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a) | |||
905 | { | 1074 | { |
906 | for (j = 0; j < n; j++) | 1075 | for (j = 0; j < n; j++) |
907 | { | 1076 | { |
908 | //construct R_cur | 1077 | /* GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, */ |
1078 | /* ">>> R_last[i][j] = %s R_last[i][k] = %s " */ | ||
1079 | /* "R_last[k][k] = %s R_last[k][j] = %s\n", R_last[i][j], */ | ||
1080 | /* R_last[i][k], R_last[k][k], R_last[k][j]); */ | ||
1081 | |||
1082 | R_cur[i][j] = NULL; | ||
1083 | R_cur_r = NULL; | ||
1084 | R_cur_l = NULL; | ||
1085 | |||
1086 | // Assign R_temp_(ij|ik|kk|kj) to R_last[][] and remove epsilon as well | ||
1087 | // as parentheses, so we can better compare the contents | ||
1088 | temp_a = remove_epsilon (R_last[i][j]); | ||
1089 | R_temp_ij = remove_parentheses (temp_a); | ||
1090 | GNUNET_free_non_null (temp_a); | ||
1091 | temp_a = remove_epsilon (R_last[i][k]); | ||
1092 | R_temp_ik = remove_parentheses (temp_a); | ||
1093 | GNUNET_free_non_null (temp_a); | ||
1094 | temp_a = remove_epsilon (R_last[k][k]); | ||
1095 | R_temp_kk = remove_parentheses (temp_a); | ||
1096 | GNUNET_free_non_null (temp_a); | ||
1097 | temp_a = remove_epsilon (R_last[k][j]); | ||
1098 | R_temp_kj = remove_parentheses (temp_a); | ||
1099 | GNUNET_free_non_null (temp_a); | ||
1100 | |||
1101 | if (NULL != R_last[i][j] && NULL != R_last[k][j]) | ||
1102 | ij_kj_cmp = strcmp (R_last[i][j], R_last[k][j]); | ||
1103 | else | ||
1104 | ij_kj_cmp = -1; | ||
909 | 1105 | ||
910 | // 0*R = R*0 = 0 | 1106 | if (NULL != R_last[i][j] && NULL != R_last[i][k]) |
911 | // 0+R = R+0 = R | 1107 | ij_ik_cmp = strcmp (R_last[i][j], R_last[i][k]); |
912 | if (NULL == R_last[i][k] || NULL == R_last[k][j]) | 1108 | else |
913 | { | 1109 | ij_ik_cmp = -1; |
914 | if (NULL != R_last[i][j]) | ||
915 | R_cur[i][j] = GNUNET_strdup (R_last[i][j]); | ||
916 | } | ||
917 | // a(ba)*bx = (ab)+x | ||
918 | else if ((NULL == R_last[i][j] || 0 == strlen (R_last[i][j])) && | ||
919 | NULL != R_last[k][k] && 0 < strlen (R_last[k][k]) && | ||
920 | NULL != R_last[k][j] && 0 < strlen (R_last[k][j]) && | ||
921 | 0 == strncmp (R_last[k][k], R_last[k][j], | ||
922 | (strlen (R_last[k][k]) - 1)) && | ||
923 | R_last[i][k][0] == R_last[k][k][strlen (R_last[k][k]) - 1]) | ||
924 | { | ||
925 | length = strlen (R_last[k][k]) - strlen (R_last[i][k]); | ||
926 | 1110 | ||
927 | temp_a = GNUNET_malloc (length + 1); | 1111 | if (NULL != R_last[i][k] && NULL != R_last[k][k]) |
928 | temp_b = GNUNET_malloc ((strlen (R_last[k][j]) - length) + 1); | 1112 | ik_kk_cmp = strcmp (R_last[i][k], R_last[k][k]); |
1113 | else | ||
1114 | ik_kk_cmp = -1; | ||
929 | 1115 | ||
930 | length_l = 0; | 1116 | if (NULL != R_last[i][k] && NULL != R_last[k][j]) |
931 | length_r = 0; | 1117 | ik_kj_cmp = strcmp (R_last[i][k], R_last[k][j]); |
1118 | else | ||
1119 | ik_kj_cmp = -1; | ||
932 | 1120 | ||
933 | for (cnt = 0; cnt < strlen (R_last[k][j]); cnt++) | 1121 | if (NULL != R_last[k][k] && NULL != R_last[k][j]) |
934 | { | 1122 | kk_kj_cmp = strcmp (R_last[k][k], R_last[k][j]); |
935 | if (cnt < length) | 1123 | else |
936 | { | 1124 | kk_kj_cmp = -1; |
937 | temp_a[length_l] = R_last[k][j][cnt]; | ||
938 | length_l++; | ||
939 | } | ||
940 | else | ||
941 | { | ||
942 | temp_b[length_r] = R_last[k][j][cnt]; | ||
943 | length_r++; | ||
944 | } | ||
945 | } | ||
946 | temp_a[length_l] = '\0'; | ||
947 | temp_b[length_r] = '\0'; | ||
948 | 1125 | ||
949 | GNUNET_asprintf (&R_cur[i][j], "(%s%s)+%s", R_last[i][k], temp_a, | 1126 | if (NULL != R_last[i][k] && NULL != R_temp_kk) |
950 | temp_b); | 1127 | clean_ik_kk_cmp = strcmp (R_last[i][k], R_temp_kk); |
951 | GNUNET_free (temp_a); | 1128 | else |
952 | GNUNET_free (temp_b); | 1129 | clean_ik_kk_cmp = 1; |
1130 | |||
1131 | if (NULL != R_temp_kk && NULL != R_last[k][j]) | ||
1132 | clean_kk_kj_cmp = strcmp (R_temp_kk, R_last[k][j]); | ||
1133 | else | ||
1134 | clean_kk_kj_cmp = -1; | ||
1135 | |||
1136 | |||
1137 | // $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk})^* R^{(k-1)}_{kj} | ||
1138 | // With: R_cur[i][j] = R_cur_l | R_cur_r | ||
1139 | // Rij(k) = Rij(k-1), because right side (R_cur_r) is empty set (NULL) | ||
1140 | if ((NULL == R_last[i][k] || NULL == R_last[k][j] || | ||
1141 | NULL == R_last[k][k]) && NULL != R_last[i][j]) | ||
1142 | { | ||
1143 | R_cur[i][j] = GNUNET_strdup (R_last[i][j]); | ||
1144 | } | ||
1145 | // Everything is NULL, so Rij(k) = NULL | ||
1146 | else if ((NULL == R_last[i][k] || NULL == R_last[k][j] || | ||
1147 | NULL == R_last[k][k]) && NULL == R_last[i][j]) | ||
1148 | { | ||
1149 | R_cur[i][j] = NULL; | ||
953 | } | 1150 | } |
1151 | // Right side (R_cur_r) not NULL | ||
954 | else | 1152 | else |
955 | { | 1153 | { |
956 | // R(k)ij = R(k-1)ij + R(k-1)ik (R(k-1)kk)* R(k-1)kj | 1154 | /* GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, */ |
957 | length_l = (NULL == R_last[i][j]) ? 1 : strlen (R_last[i][j]) + 1; | 1155 | /* "R_temp_ij = %s R_temp_ik = %s R_temp_kk = %s R_temp_kj = %s\n", */ |
958 | length_r = | 1156 | /* R_temp_ij, R_temp_ik, R_temp_kk, R_temp_kj); */ |
959 | snprintf (NULL, 0, "%s(%s)*%s", R_last[i][k], R_last[k][k], | ||
960 | R_last[k][j]) + 1; | ||
961 | R_cur_l = GNUNET_malloc (length_l); | ||
962 | R_cur_r = GNUNET_malloc (length_r); | ||
963 | 1157 | ||
964 | if (NULL != R_last[i][j]) | 1158 | GNUNET_assert (NULL != R_last[i][k] && NULL != R_last[k][k] && |
965 | strcat (R_cur_l, R_last[i][j]); | 1159 | NULL != R_last[k][j]); |
966 | 1160 | GNUNET_assert (NULL != R_temp_ik && NULL != R_temp_kk && | |
967 | if (NULL != R_last[i][k] && 0 != strcmp (R_last[i][k], R_last[k][k])) | 1161 | NULL != R_temp_kj); |
968 | strcat (R_cur_r, R_last[i][k]); | ||
969 | 1162 | ||
970 | if (NULL != R_last[k][k] && 0 != strcmp (R_last[k][k], "")) | 1163 | // construct R_cur_l (and, if necessary R_cur_r) |
1164 | if (NULL != R_last[i][j]) | ||
971 | { | 1165 | { |
972 | if (1 >= strlen (R_last[k][k]) || | 1166 | if (0 == strcmp (R_temp_ij, R_temp_ik) && |
973 | (R_last[k][k][0] == '(' && | 1167 | 0 == strcmp (R_temp_ik, R_temp_kk) && |
974 | R_last[k][k][strlen (R_last[k][k]) - 1] == ')')) | 1168 | 0 == strcmp (R_temp_kk, R_temp_kj)) |
1169 | { | ||
1170 | if (0 == strlen (R_temp_ij)) | ||
1171 | { | ||
1172 | GNUNET_asprintf (&R_cur_r, ""); | ||
1173 | } | ||
1174 | // a|(e|a)a*(e|a) = a* | ||
1175 | // a|(e|a)(e|a)*(e|a) = a* | ||
1176 | // (e|a)|aa*a = a* | ||
1177 | // (e|a)|aa*(e|a) = a* | ||
1178 | // (e|a)|(e|a)a*a = a* | ||
1179 | // (e|a)|(e|a)a*(e|a) = a* | ||
1180 | // (e|a)|(e|a)(e|a)*(e|a) = a* | ||
1181 | else if ((0 == strncmp (R_last[i][j], "(|", 2)) || | ||
1182 | (0 == strncmp (R_last[i][k], "(|", 2) && | ||
1183 | 0 == strncmp (R_last[k][j], "(|", 2))) | ||
1184 | { | ||
1185 | if (GNUNET_YES == needs_parentheses (R_temp_ij)) | ||
1186 | GNUNET_asprintf (&R_cur_r, "(%s)*", R_temp_ij); | ||
1187 | else | ||
1188 | GNUNET_asprintf (&R_cur_r, "%s*", R_temp_ij); | ||
1189 | } | ||
1190 | // a|aa*a = a+ | ||
1191 | // a|(e|a)a*a = a+ | ||
1192 | // a|aa*(e|a) = a+ | ||
1193 | // a|(e|a)(e|a)*a = a+ | ||
1194 | // a|a(e|a)*(e|a) = a+ | ||
1195 | else | ||
1196 | { | ||
1197 | if (GNUNET_YES == needs_parentheses (R_temp_ij)) | ||
1198 | GNUNET_asprintf (&R_cur_r, "(%s)+", R_temp_ij); | ||
1199 | else | ||
1200 | GNUNET_asprintf (&R_cur_r, "%s+", R_temp_ij); | ||
1201 | } | ||
1202 | } | ||
1203 | // a|ab*b = ab* | ||
1204 | else if (0 == ij_ik_cmp && 0 == clean_kk_kj_cmp && | ||
1205 | 0 != clean_ik_kk_cmp) | ||
975 | { | 1206 | { |
976 | strcat (R_cur_r, R_last[k][k]); | 1207 | if (strlen (R_last[k][k]) < 1) |
1208 | R_cur_r = GNUNET_strdup (R_last[i][j]); | ||
1209 | else if (GNUNET_YES == needs_parentheses (R_temp_kk)) | ||
1210 | GNUNET_asprintf (&R_cur_r, "%s(%s)*", R_last[i][j], R_temp_kk); | ||
1211 | else | ||
1212 | GNUNET_asprintf (&R_cur_r, "%s%s*", R_last[i][j], R_last[k][k]); | ||
1213 | |||
1214 | R_cur_l = NULL; | ||
977 | } | 1215 | } |
978 | else | 1216 | // a|bb*a = b*a |
1217 | else if (0 == ij_kj_cmp && 0 == clean_ik_kk_cmp && | ||
1218 | 0 != clean_kk_kj_cmp) | ||
979 | { | 1219 | { |
980 | strcat (R_cur_r, "("); | 1220 | if (strlen (R_last[k][k]) < 1) |
981 | strcat (R_cur_r, R_last[k][k]); | 1221 | R_cur_r = GNUNET_strdup (R_last[k][j]); |
982 | strcat (R_cur_r, ")"); | 1222 | else if (GNUNET_YES == needs_parentheses (R_temp_kk)) |
1223 | GNUNET_asprintf (&R_cur_r, "(%s)*%s", R_temp_kk, R_last[k][j]); | ||
1224 | else | ||
1225 | GNUNET_asprintf (&R_cur_r, "%s*%s", R_temp_kk, R_last[k][j]); | ||
1226 | |||
1227 | R_cur_l = NULL; | ||
983 | } | 1228 | } |
1229 | // a|a(e|b)*(e|b) = a|ab* = a|a|ab|abb|abbb|... = ab* | ||
1230 | else if (0 == ij_ik_cmp && 0 == kk_kj_cmp && | ||
1231 | !has_epsilon (R_last[i][j]) && has_epsilon (R_last[k][k])) | ||
1232 | { | ||
1233 | if (needs_parentheses (R_temp_kk)) | ||
1234 | GNUNET_asprintf (&R_cur_r, "%s(%s)*", R_last[i][j], R_temp_kk); | ||
1235 | else | ||
1236 | GNUNET_asprintf (&R_cur_r, "%s%s*", R_last[i][j], R_temp_kk); | ||
984 | 1237 | ||
985 | if (0 == strcmp (R_last[i][k], R_last[k][k]) || | 1238 | R_cur_l = NULL; |
986 | 0 == strcmp (R_last[k][k], R_last[k][j])) | 1239 | } |
987 | strcat (R_cur_r, "+"); | 1240 | // a|(e|b)(e|b)*a = a|b*a = a|a|ba|bba|bbba|... = b*a |
988 | else | 1241 | else if (0 == ij_kj_cmp && 0 == ik_kk_cmp && |
989 | strcat (R_cur_r, "*"); | 1242 | !has_epsilon (R_last[i][j]) && has_epsilon (R_last[k][k])) |
990 | } | 1243 | { |
991 | 1244 | if (needs_parentheses (R_temp_kk)) | |
992 | if (NULL != R_last[k][j] && 0 != strcmp (R_last[k][k], R_last[k][j])) | 1245 | GNUNET_asprintf (&R_cur_r, "(%s)*%s", R_temp_kk, R_last[i][j]); |
993 | strcat (R_cur_r, R_last[k][j]); | 1246 | else |
994 | 1247 | GNUNET_asprintf (&R_cur_r, "%s*%s", R_temp_kk, R_last[i][j]); | |
995 | // simplifications... | ||
996 | 1248 | ||
997 | // | is idempotent: a | a = a for all a in A | 1249 | R_cur_l = NULL; |
998 | if (0 == strcmp (R_cur_l, R_cur_r) || 0 == strcmp (R_cur_l, "") || | 1250 | } |
999 | 0 == strcmp (R_cur_r, "")) | ||
1000 | { | ||
1001 | if (0 == strcmp (R_cur_l, "")) | ||
1002 | GNUNET_asprintf (&R_cur[i][j], "%s", R_cur_r); | ||
1003 | else | 1251 | else |
1004 | GNUNET_asprintf (&R_cur[i][j], "%s", R_cur_l); | ||
1005 | } | ||
1006 | // TODO: in theory only applicable if (e + a) | (e + a)(e + a)*(e+a) | ||
1007 | // where e means epsilon... check if practical! | ||
1008 | // a | a a* a = a* | ||
1009 | else if (R_last[i][j] == R_last[i][k] && R_last[i][k] == R_last[k][k] | ||
1010 | && R_last[k][k] == R_last[k][j]) | ||
1011 | { | ||
1012 | if (1 >= strlen (R_last[k][k]) || | ||
1013 | (R_last[k][k][0] == '(' && | ||
1014 | R_last[k][k][strlen (R_last[k][k]) - 1] == ')')) | ||
1015 | { | 1252 | { |
1016 | GNUNET_asprintf (&R_cur[i][j], "%s*", R_last[k][k]); | 1253 | /* GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "NO SIMPLIFICATION\n"); */ |
1254 | |||
1255 | temp_a = remove_parentheses (R_last[i][j]); | ||
1256 | R_cur_l = GNUNET_strdup (temp_a); | ||
1257 | GNUNET_free (temp_a); | ||
1017 | } | 1258 | } |
1018 | else | ||
1019 | GNUNET_asprintf (&R_cur[i][j], "(%s)*", R_last[k][k]); | ||
1020 | } | 1259 | } |
1021 | // a | a b* b => a | a b | a b b | ... => a b* | 1260 | // we have no left side |
1022 | else if (R_last[i][j] == R_last[i][k] && R_last[k][k] == R_last[k][j]) | 1261 | else |
1023 | { | 1262 | { |
1024 | // if a is xb then a b* becomes x b b* = x b+ | 1263 | R_cur_l = NULL; |
1264 | } | ||
1025 | 1265 | ||
1026 | s_cnt = strlen (R_last[k][k]); | 1266 | // construct R_cur_r, if not already constructed |
1027 | l_cnt = strlen (R_last[i][k]); | 1267 | if (NULL == R_cur_r) |
1028 | R_cur[i][j] = GNUNET_malloc (s_cnt + l_cnt + 4); | 1268 | { |
1269 | length = strlen (R_temp_kk) - strlen (R_last[i][k]); | ||
1270 | |||
1271 | // a(ba)*bx = (ab)+x | ||
1272 | if (length > 0 && NULL != R_last[k][k] && 0 < strlen (R_last[k][k]) | ||
1273 | && NULL != R_last[k][j] && 0 < strlen (R_last[k][j]) && | ||
1274 | NULL != R_last[i][k] && 0 < strlen (R_last[i][k]) && | ||
1275 | 0 == strkcmp (R_temp_kk, R_last[i][k], length) && | ||
1276 | 0 == strncmp (R_temp_kk, R_last[k][j], length)) | ||
1277 | { | ||
1278 | temp_a = GNUNET_malloc (length + 1); | ||
1279 | temp_b = GNUNET_malloc ((strlen (R_last[k][j]) - length) + 1); | ||
1029 | 1280 | ||
1030 | if (l_cnt > 0 && s_cnt >= l_cnt) | 1281 | length_l = 0; |
1031 | for (; s_cnt > 0; s_cnt--, l_cnt--) | 1282 | length_r = 0; |
1032 | if (R_last[i][k][l_cnt] != R_last[k][k][s_cnt]) | ||
1033 | break; | ||
1034 | 1283 | ||
1035 | if (strlen (R_last[i][k]) > 0 && 0 == s_cnt && 0 <= l_cnt) | 1284 | for (cnt = 0; cnt < strlen (R_last[k][j]); cnt++) |
1036 | strncat (R_cur[i][j], R_last[i][k], l_cnt); | 1285 | { |
1037 | else | 1286 | if (cnt < length) |
1038 | strcat (R_cur[i][j], R_last[i][k]); | 1287 | { |
1288 | temp_a[length_l] = R_last[k][j][cnt]; | ||
1289 | length_l++; | ||
1290 | } | ||
1291 | else | ||
1292 | { | ||
1293 | temp_b[length_r] = R_last[k][j][cnt]; | ||
1294 | length_r++; | ||
1295 | } | ||
1296 | } | ||
1297 | temp_a[length_l] = '\0'; | ||
1298 | temp_b[length_r] = '\0'; | ||
1039 | 1299 | ||
1040 | if (1 >= strlen (R_last[k][k]) || | 1300 | // e|(ab)+ = (ab)* |
1041 | (R_last[k][k][0] == '(' && | 1301 | if (NULL != R_cur_l && 0 == strlen (R_cur_l) && |
1042 | R_last[k][k][strlen (R_last[k][k]) - 1] == ')')) | 1302 | 0 == strlen (temp_b)) |
1303 | { | ||
1304 | GNUNET_asprintf (&R_cur_r, "(%s%s)*", R_last[i][k], temp_a); | ||
1305 | GNUNET_free (R_cur_l); | ||
1306 | R_cur_l = NULL; | ||
1307 | } | ||
1308 | else | ||
1309 | { | ||
1310 | GNUNET_asprintf (&R_cur_r, "(%s%s)+%s", R_last[i][k], temp_a, | ||
1311 | temp_b); | ||
1312 | } | ||
1313 | GNUNET_free (temp_a); | ||
1314 | GNUNET_free (temp_b); | ||
1315 | } | ||
1316 | else if (0 == strcmp (R_temp_ik, R_temp_kk) && | ||
1317 | 0 == strcmp (R_temp_kk, R_temp_kj)) | ||
1043 | { | 1318 | { |
1044 | strcat (R_cur[i][j], R_last[k][k]); | 1319 | // (e|a)a*(e|a) = a* |
1320 | // (e|a)(e|a)*(e|a) = a* | ||
1321 | if (has_epsilon (R_last[i][k]) && has_epsilon (R_last[k][j])) | ||
1322 | { | ||
1323 | if (needs_parentheses (R_temp_kk)) | ||
1324 | GNUNET_asprintf (&R_cur_r, "(%s)*", R_temp_kk); | ||
1325 | else | ||
1326 | GNUNET_asprintf (&R_cur_r, "%s*", R_temp_kk); | ||
1327 | } | ||
1328 | // aa*a = a+a | ||
1329 | else if (0 == clean_ik_kk_cmp && 0 == clean_kk_kj_cmp && | ||
1330 | !has_epsilon (R_last[i][k])) | ||
1331 | { | ||
1332 | if (needs_parentheses (R_temp_kk)) | ||
1333 | GNUNET_asprintf (&R_cur_r, "(%s)+%s", R_temp_kk, R_temp_kk); | ||
1334 | else | ||
1335 | GNUNET_asprintf (&R_cur_r, "(%s)+%s", R_temp_kk, R_temp_kk); | ||
1336 | } | ||
1337 | // (e|a)a*a = a+ | ||
1338 | // aa*(e|a) = a+ | ||
1339 | // a(e|a)*(e|a) = a+ | ||
1340 | // (e|a)a*a = a+ | ||
1341 | else | ||
1342 | { | ||
1343 | eps_check = | ||
1344 | (has_epsilon (R_last[i][k]) + has_epsilon (R_last[k][k]) + | ||
1345 | has_epsilon (R_last[k][j])); | ||
1346 | |||
1347 | if (eps_check == 1) | ||
1348 | { | ||
1349 | if (needs_parentheses (R_temp_kk)) | ||
1350 | GNUNET_asprintf (&R_cur_r, "(%s)+", R_temp_kk); | ||
1351 | else | ||
1352 | GNUNET_asprintf (&R_cur_r, "%s+", R_temp_kk); | ||
1353 | } | ||
1354 | } | ||
1045 | } | 1355 | } |
1046 | else | 1356 | // aa*b = a+b |
1357 | // (e|a)(e|a)*b = a*b | ||
1358 | else if (0 == strcmp (R_temp_ik, R_temp_kk)) | ||
1047 | { | 1359 | { |
1048 | strcat (R_cur[i][j], "("); | 1360 | if (has_epsilon (R_last[i][k])) |
1049 | strcat (R_cur[i][j], R_last[k][k]); | 1361 | { |
1050 | strcat (R_cur[i][j], ")"); | 1362 | if (needs_parentheses (R_temp_kk)) |
1363 | GNUNET_asprintf (&R_cur_r, "(%s)*%s", R_temp_kk, | ||
1364 | R_last[k][j]); | ||
1365 | else | ||
1366 | GNUNET_asprintf (&R_cur_r, "%s*%s", R_temp_kk, R_last[k][j]); | ||
1367 | } | ||
1368 | else | ||
1369 | { | ||
1370 | if (needs_parentheses (R_temp_kk)) | ||
1371 | GNUNET_asprintf (&R_cur_r, "(%s)+%s", R_temp_kk, | ||
1372 | R_last[k][j]); | ||
1373 | else | ||
1374 | GNUNET_asprintf (&R_cur_r, "%s+%s", R_temp_kk, R_last[k][j]); | ||
1375 | } | ||
1051 | } | 1376 | } |
1052 | 1377 | // ba*a = ba+ | |
1053 | if (0 == s_cnt && 0 <= l_cnt) | 1378 | // b(e|a)*(e|a) = ba* |
1054 | strcat (R_cur[i][j], "+"); | 1379 | else if (0 == strcmp (R_temp_kk, R_temp_kj)) |
1055 | else | ||
1056 | strcat (R_cur[i][j], "*"); | ||
1057 | } | ||
1058 | // a | b b* a => a | b a | b b a | ... => b* a | ||
1059 | else if (R_last[i][j] == R_last[k][j] && R_last[i][k] == R_last[k][k]) | ||
1060 | { | ||
1061 | // if a is bx then b* a becomes b+ x | ||
1062 | |||
1063 | s_cnt = strlen (R_last[k][k]); | ||
1064 | l_cnt = strlen (R_last[k][j]); | ||
1065 | R_cur[i][j] = GNUNET_malloc (s_cnt + l_cnt + 4); | ||
1066 | |||
1067 | if (l_cnt > 0 && s_cnt >= l_cnt) | ||
1068 | for (cnt = 0; cnt < s_cnt; cnt++) | ||
1069 | if (R_last[k][k][cnt] != R_last[k][j][cnt]) | ||
1070 | break; | ||
1071 | |||
1072 | if (1 >= strlen (R_last[k][k]) && | ||
1073 | !(R_last[k][k][0] == '(' && | ||
1074 | R_last[k][k][strlen (R_last[k][k]) - 1] == ')')) | ||
1075 | { | 1380 | { |
1076 | strcat (R_cur[i][j], "("); | 1381 | if (has_epsilon (R_last[k][j])) |
1077 | strcat (R_cur[i][j], R_last[k][k]); | 1382 | { |
1078 | strcat (R_cur[i][j], ")"); | 1383 | if (needs_parentheses (R_temp_kk)) |
1384 | GNUNET_asprintf (&R_cur_r, "%s(%s)*", R_last[i][k], | ||
1385 | R_temp_kk); | ||
1386 | else | ||
1387 | GNUNET_asprintf (&R_cur_r, "%s%s*", R_last[i][k], R_temp_kk); | ||
1388 | } | ||
1389 | else | ||
1390 | { | ||
1391 | if (needs_parentheses (R_temp_kk)) | ||
1392 | GNUNET_asprintf (&R_cur_r, "(%s)+%s", R_last[i][k], | ||
1393 | R_temp_kk); | ||
1394 | else | ||
1395 | GNUNET_asprintf (&R_cur_r, "%s+%s", R_last[i][k], R_temp_kk); | ||
1396 | } | ||
1079 | } | 1397 | } |
1080 | else | 1398 | else |
1081 | strcat (R_cur[i][j], R_last[k][k]); | 1399 | { |
1400 | if (strlen (R_temp_kk) > 0) | ||
1401 | { | ||
1402 | if (needs_parentheses (R_temp_kk)) | ||
1403 | { | ||
1404 | GNUNET_asprintf (&R_cur_r, "%s(%s)*%s", R_last[i][k], | ||
1405 | R_temp_kk, R_last[k][j]); | ||
1406 | } | ||
1407 | else | ||
1408 | { | ||
1409 | GNUNET_asprintf (&R_cur_r, "%s%s*%s", R_last[i][k], R_temp_kk, | ||
1410 | R_last[k][j]); | ||
1411 | } | ||
1412 | } | ||
1413 | else | ||
1414 | { | ||
1415 | GNUNET_asprintf (&R_cur_r, "%s%s", R_last[i][k], R_last[k][j]); | ||
1416 | } | ||
1417 | } | ||
1418 | } | ||
1082 | 1419 | ||
1083 | if (cnt == s_cnt) | 1420 | /* GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "R_cur_l: %s\n", R_cur_l); */ |
1084 | strcat (R_cur[i][j], "+"); | 1421 | /* GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "R_cur_r: %s\n", R_cur_r); */ |
1085 | else | ||
1086 | strcat (R_cur[i][j], "*"); | ||
1087 | 1422 | ||
1088 | if (strlen (R_last[k][j]) > 0 && cnt == s_cnt) | 1423 | // putting it all together |
1089 | strcat (R_cur[i][j], &R_last[k][j][cnt]); | 1424 | if (NULL != R_cur_l && NULL != R_cur_r) |
1090 | else | ||
1091 | strcat (R_cur[i][j], R_last[k][j]); | ||
1092 | } | ||
1093 | else | ||
1094 | { | 1425 | { |
1095 | if (R_cur_l[0] == '(' && R_cur_l[strlen (R_cur_l) - 1] == ')') | 1426 | // a|a = a |
1427 | if (0 == strcmp (R_cur_l, R_cur_r)) | ||
1096 | { | 1428 | { |
1097 | temp_a = GNUNET_malloc (strlen (R_cur_l) - 1); | 1429 | R_cur[i][j] = GNUNET_strdup (R_cur_l); |
1098 | for (cnt = 0; cnt < strlen (R_cur_l) - 2; cnt++) | ||
1099 | { | ||
1100 | temp_a[cnt] = R_cur_l[cnt + 1]; | ||
1101 | } | ||
1102 | GNUNET_asprintf (&R_cur[i][j], "(%s|%s)", temp_a, R_cur_r); | ||
1103 | GNUNET_free (temp_a); | ||
1104 | } | 1430 | } |
1431 | // R_cur_l | R_cur_r | ||
1105 | else | 1432 | else |
1433 | { | ||
1106 | GNUNET_asprintf (&R_cur[i][j], "(%s|%s)", R_cur_l, R_cur_r); | 1434 | GNUNET_asprintf (&R_cur[i][j], "(%s|%s)", R_cur_l, R_cur_r); |
1435 | } | ||
1436 | } | ||
1437 | else if (NULL != R_cur_l) | ||
1438 | { | ||
1439 | R_cur[i][j] = GNUNET_strdup (R_cur_l); | ||
1440 | } | ||
1441 | else if (NULL != R_cur_r) | ||
1442 | { | ||
1443 | R_cur[i][j] = GNUNET_strdup (R_cur_r); | ||
1444 | } | ||
1445 | else | ||
1446 | { | ||
1447 | R_cur[i][j] = NULL; | ||
1107 | } | 1448 | } |
1108 | 1449 | ||
1109 | GNUNET_free_non_null (R_cur_l); | 1450 | GNUNET_free_non_null (R_cur_l); |
1110 | GNUNET_free_non_null (R_cur_r); | 1451 | GNUNET_free_non_null (R_cur_r); |
1111 | } | 1452 | } |
1453 | |||
1454 | GNUNET_free_non_null (R_temp_ij); | ||
1455 | GNUNET_free_non_null (R_temp_ik); | ||
1456 | GNUNET_free_non_null (R_temp_kk); | ||
1457 | GNUNET_free_non_null (R_temp_kj); | ||
1112 | } | 1458 | } |
1113 | } | 1459 | } |
1114 | 1460 | ||
@@ -1132,9 +1478,12 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a) | |||
1132 | // assign proofs and hashes | 1478 | // assign proofs and hashes |
1133 | for (i = 0; i < n; i++) | 1479 | for (i = 0; i < n; i++) |
1134 | { | 1480 | { |
1135 | states[i]->proof = GNUNET_strdup (R_last[a->start->marked][i]); | 1481 | if (NULL != R_last[a->start->proof_id][i]) |
1136 | GNUNET_CRYPTO_hash (states[i]->proof, strlen (states[i]->proof), | 1482 | { |
1137 | &states[i]->hash); | 1483 | states[i]->proof = GNUNET_strdup (R_last[a->start->proof_id][i]); |
1484 | GNUNET_CRYPTO_hash (states[i]->proof, strlen (states[i]->proof), | ||
1485 | &states[i]->hash); | ||
1486 | } | ||
1138 | } | 1487 | } |
1139 | 1488 | ||
1140 | // complete regex for whole DFA: union of all pairs (start state/accepting state(s)). | 1489 | // complete regex for whole DFA: union of all pairs (start state/accepting state(s)). |
@@ -1143,20 +1492,27 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a) | |||
1143 | { | 1492 | { |
1144 | if (states[i]->accepting) | 1493 | if (states[i]->accepting) |
1145 | { | 1494 | { |
1146 | if (NULL == complete_regex) | 1495 | if (NULL == complete_regex && 0 < strlen (R_last[a->start->proof_id][i])) |
1147 | GNUNET_asprintf (&complete_regex, "%s", R_last[a->start->marked][i]); | 1496 | GNUNET_asprintf (&complete_regex, "%s", R_last[a->start->proof_id][i]); |
1148 | else if (NULL != R_last[a->start->marked][i] && | 1497 | else if (NULL != R_last[a->start->proof_id][i] && |
1149 | 0 != strcmp (R_last[a->start->marked][i], "")) | 1498 | 0 < strlen (R_last[a->start->proof_id][i])) |
1150 | { | 1499 | { |
1151 | temp_a = complete_regex; | 1500 | temp_a = complete_regex; |
1152 | GNUNET_asprintf (&complete_regex, "%s|%s", complete_regex, | 1501 | GNUNET_asprintf (&complete_regex, "%s|%s", complete_regex, |
1153 | R_last[a->start->marked][i]); | 1502 | R_last[a->start->proof_id][i]); |
1154 | GNUNET_free (temp_a); | 1503 | GNUNET_free (temp_a); |
1155 | } | 1504 | } |
1156 | } | 1505 | } |
1157 | } | 1506 | } |
1158 | a->computed_regex = complete_regex; | 1507 | a->computed_regex = complete_regex; |
1159 | 1508 | ||
1509 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, | ||
1510 | "---------------------------------------------\n"); | ||
1511 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Regex: %s\n", a->regex); | ||
1512 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Complete Regex: %s\n", complete_regex); | ||
1513 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, | ||
1514 | "---------------------------------------------\n"); | ||
1515 | |||
1160 | // cleanup | 1516 | // cleanup |
1161 | for (i = 0; i < n; i++) | 1517 | for (i = 0; i < n; i++) |
1162 | { | 1518 | { |
@@ -2211,11 +2567,13 @@ GNUNET_REGEX_automaton_destroy (struct GNUNET_REGEX_Automaton *a) | |||
2211 | * an open file. Used only in conjunction with | 2567 | * an open file. Used only in conjunction with |
2212 | * GNUNET_REGEX_automaton_save_graph. | 2568 | * GNUNET_REGEX_automaton_save_graph. |
2213 | * | 2569 | * |
2214 | * @param cls file pointer | 2570 | * @param cls file pointer. |
2215 | * @param s state | 2571 | * @param count current count of the state, not used. |
2572 | * @param s state. | ||
2216 | */ | 2573 | */ |
2217 | void | 2574 | void |
2218 | GNUNET_REGEX_automaton_save_graph_step (void *cls, struct GNUNET_REGEX_State *s) | 2575 | GNUNET_REGEX_automaton_save_graph_step (void *cls, unsigned int count, |
2576 | struct GNUNET_REGEX_State *s) | ||
2219 | { | 2577 | { |
2220 | FILE *p; | 2578 | FILE *p; |
2221 | struct Transition *ctran; | 2579 | struct Transition *ctran; |
@@ -2227,13 +2585,13 @@ GNUNET_REGEX_automaton_save_graph_step (void *cls, struct GNUNET_REGEX_State *s) | |||
2227 | if (s->accepting) | 2585 | if (s->accepting) |
2228 | { | 2586 | { |
2229 | GNUNET_asprintf (&s_acc, | 2587 | GNUNET_asprintf (&s_acc, |
2230 | "\"%s\" [shape=doublecircle, color=\"0.%i 0.8 0.95\"];\n", | 2588 | "\"%s(%i)\" [shape=doublecircle, color=\"0.%i 0.8 0.95\"];\n", |
2231 | s->name, s->scc_id); | 2589 | s->name, s->proof_id, s->scc_id); |
2232 | } | 2590 | } |
2233 | else | 2591 | else |
2234 | { | 2592 | { |
2235 | GNUNET_asprintf (&s_acc, "\"%s\" [color=\"0.%i 0.8 0.95\"];\n", s->name, | 2593 | GNUNET_asprintf (&s_acc, "\"%s(%i)\" [color=\"0.%i 0.8 0.95\"];\n", s->name, |
2236 | s->scc_id); | 2594 | s->proof_id, s->scc_id); |
2237 | } | 2595 | } |
2238 | 2596 | ||
2239 | if (NULL == s_acc) | 2597 | if (NULL == s_acc) |
@@ -2250,7 +2608,7 @@ GNUNET_REGEX_automaton_save_graph_step (void *cls, struct GNUNET_REGEX_State *s) | |||
2250 | if (NULL == ctran->to_state) | 2608 | if (NULL == ctran->to_state) |
2251 | { | 2609 | { |
2252 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, | 2610 | GNUNET_log (GNUNET_ERROR_TYPE_ERROR, |
2253 | "Transition from State %i has has no state for transitioning\n", | 2611 | "Transition from State %i has no state for transitioning\n", |
2254 | s->id); | 2612 | s->id); |
2255 | continue; | 2613 | continue; |
2256 | } | 2614 | } |
@@ -2258,14 +2616,16 @@ GNUNET_REGEX_automaton_save_graph_step (void *cls, struct GNUNET_REGEX_State *s) | |||
2258 | if (ctran->label == 0) | 2616 | if (ctran->label == 0) |
2259 | { | 2617 | { |
2260 | GNUNET_asprintf (&s_tran, | 2618 | GNUNET_asprintf (&s_tran, |
2261 | "\"%s\" -> \"%s\" [label = \"epsilon\", color=\"0.%i 0.8 0.95\"];\n", | 2619 | "\"%s(%i)\" -> \"%s(%i)\" [label = \"epsilon\", color=\"0.%i 0.8 0.95\"];\n", |
2262 | s->name, ctran->to_state->name, s->scc_id); | 2620 | s->name, s->proof_id, ctran->to_state->name, |
2621 | ctran->to_state->proof_id, s->scc_id); | ||
2263 | } | 2622 | } |
2264 | else | 2623 | else |
2265 | { | 2624 | { |
2266 | GNUNET_asprintf (&s_tran, | 2625 | GNUNET_asprintf (&s_tran, |
2267 | "\"%s\" -> \"%s\" [label = \"%c\", color=\"0.%i 0.8 0.95\"];\n", | 2626 | "\"%s(%i)\" -> \"%s(%i)\" [label = \"%c\", color=\"0.%i 0.8 0.95\"];\n", |
2268 | s->name, ctran->to_state->name, ctran->label, s->scc_id); | 2627 | s->name, s->proof_id, ctran->to_state->name, |
2628 | ctran->to_state->proof_id, ctran->label, s->scc_id); | ||
2269 | } | 2629 | } |
2270 | 2630 | ||
2271 | if (NULL == s_tran) | 2631 | if (NULL == s_tran) |
diff --git a/src/regex/test_regex_eval_api.c b/src/regex/test_regex_eval_api.c index b875f4088..7d68f84f4 100644 --- a/src/regex/test_regex_eval_api.c +++ b/src/regex/test_regex_eval_api.c | |||
@@ -44,6 +44,17 @@ struct Regex_String_Pair | |||
44 | static const char allowed_literals[] = | 44 | static const char allowed_literals[] = |
45 | "0123456789" "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "abcdefghijklmnopqrstuvwxyz"; | 45 | "0123456789" "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "abcdefghijklmnopqrstuvwxyz"; |
46 | 46 | ||
47 | /** | ||
48 | * Random regex test. Generate a random regex as well as 'str_count' strings to | ||
49 | * match it against. Will match using GNUNET_REGEX implementation and compare | ||
50 | * the result to glibc regex result. 'rx_length' has to be smaller then 'max_str_len'. | ||
51 | * | ||
52 | * @param rx_length length of the regular expression. | ||
53 | * @param max_str_len maximum length of the random strings. | ||
54 | * @param str_count number of generated random strings. | ||
55 | * | ||
56 | * @return 0 on success, non 0 otherwise. | ||
57 | */ | ||
47 | int | 58 | int |
48 | test_random (unsigned int rx_length, unsigned int max_str_len, | 59 | test_random (unsigned int rx_length, unsigned int max_str_len, |
49 | unsigned int str_count) | 60 | unsigned int str_count) |
@@ -199,6 +210,17 @@ test_random (unsigned int rx_length, unsigned int max_str_len, | |||
199 | return result; | 210 | return result; |
200 | } | 211 | } |
201 | 212 | ||
213 | /** | ||
214 | * Automaton test that compares the result of matching regular expression 'rx' | ||
215 | * with the strings and expected results in 'rxstr' with the result of matching | ||
216 | * the same strings with glibc regex. | ||
217 | * | ||
218 | * @param a automaton. | ||
219 | * @param rx compiled glibc regex. | ||
220 | * @param rxstr regular expression and strings with expected results to match against. | ||
221 | * | ||
222 | * @return 0 on successfull, non 0 otherwise | ||
223 | */ | ||
202 | int | 224 | int |
203 | test_automaton (struct GNUNET_REGEX_Automaton *a, regex_t * rx, | 225 | test_automaton (struct GNUNET_REGEX_Automaton *a, regex_t * rx, |
204 | struct Regex_String_Pair *rxstr) | 226 | struct Regex_String_Pair *rxstr) |
@@ -347,8 +369,8 @@ main (int argc, char *argv[]) | |||
347 | } | 369 | } |
348 | 370 | ||
349 | srand (time (NULL)); | 371 | srand (time (NULL)); |
350 | for (i = 0; i < 50; i++) | 372 | /* for (i = 0; i < 50; i++) */ |
351 | check_rand += test_random (100, 120, 20); | 373 | /* check_rand += test_random (100, 120, 20); */ |
352 | 374 | ||
353 | return check_nfa + check_dfa + check_rand; | 375 | return check_nfa + check_dfa + check_rand; |
354 | } | 376 | } |
diff --git a/src/regex/test_regex_iterate_api.c b/src/regex/test_regex_iterate_api.c index a36419a95..e9843e180 100644 --- a/src/regex/test_regex_iterate_api.c +++ b/src/regex/test_regex_iterate_api.c | |||
@@ -34,7 +34,8 @@ key_iterator (void *cls, const struct GNUNET_HashCode *key, const char *proof, | |||
34 | { | 34 | { |
35 | int i; | 35 | int i; |
36 | 36 | ||
37 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Iterating...\n"); | 37 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Iterating... (accepting: %i)\n", |
38 | accepting); | ||
38 | for (i = 0; i < num_edges; i++) | 39 | for (i = 0; i < num_edges; i++) |
39 | { | 40 | { |
40 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Edge %i: %s\n", i, edges[i].label); | 41 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Edge %i: %s\n", i, edges[i].label); |
@@ -60,26 +61,37 @@ main (int argc, char *argv[]) | |||
60 | struct GNUNET_REGEX_Automaton *dfa; | 61 | struct GNUNET_REGEX_Automaton *dfa; |
61 | 62 | ||
62 | error = 0; | 63 | error = 0; |
63 | regex = "ab(c|d)+c*(a(b|c)+d)+(bla)+"; | 64 | /* regex = "ab(c|d)+c*(a(b|c)+d)+(bla)+"; */ |
64 | /*regex = "(bla)+"; */ | 65 | /* regex = "(bla)*"; */ |
65 | /*regex = "b(lab)*la"; */ | 66 | /*regex = "b(lab)*la"; */ |
66 | /*regex = "(bla)*"; */ | 67 | /* regex = "(ab)*"; */ |
67 | /*regex = "ab(c|d)+c*(a(b|c)+d)+(bla)(bla)*"; */ | 68 | /*regex = "ab(c|d)+c*(a(b|c)+d)+(bla)(bla)*"; */ |
68 | /*regex = "z(abc|def)?xyz"; */ | 69 | /*regex = "z(abc|def)?xyz"; */ |
69 | /*regex = "1*0(0|1)*"; */ | 70 | /* regex = "1*0(0|1)*"; */ |
70 | /*regex = "a+X*y+c|p|R|Z*K*y*R+w|Y*6+n+h*k*w+V*F|W*B*e*"; */ | 71 | /* regex = "a*b*"; */ |
71 | /*regex = "abcd:(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)"; */ | 72 | /* regex = "a+X*y+c|p|R|Z*K*y*R+w|Y*6+n+h*k*w+V*F|W*B*e*"; */ |
72 | /*regex = "abc(1|0)*def"; */ | 73 | /* regex = "abcd:(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1):(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)"; */ |
73 | /*regex = "ab|ac"; */ | 74 | /* regex = "abc(1|0)*def"; */ |
74 | /*regex = "(ab)(ab)*"; */ | 75 | /* regex = "ab|ac"; */ |
75 | /*regex = "ab|cd|ef|gh"; */ | 76 | /* regex = "(ab)(ab)*"; */ |
76 | /*regex = "a|b|c|d|e|f|g"; */ | 77 | /* regex = "ab|cd|ef|gh"; */ |
77 | /*regex = "(ab)|(ac)"; */ | 78 | /* regex = "a|b|c|d|e|f|g"; */ |
78 | /*regex = "a(b|c)"; */ | 79 | /* regex = "(ab)|(ac)"; */ |
79 | /*regex = "a*a"; */ | 80 | /* regex = "a(b|c)"; */ |
80 | /*regex = "ab?(abcd)?"; */ | 81 | /* regex = "a*a"; */ |
81 | /*regex = "(ab)+"; */ | 82 | /* regex = "ab?(abcd)?"; */ |
82 | /*regex = "(abcsdfsdf)+"; */ | 83 | /* regex = "(ab)+"; */ |
84 | /* regex = "(ab|cs|df|sdf)*"; */ | ||
85 | /* regex = "(ab|cd)*"; */ | ||
86 | regex = "(cd|ab)*"; | ||
87 | regex = "(cd|ab)*"; | ||
88 | /* regex = "(ab|c)+"; */ | ||
89 | /* regex = "(a|bc)+"; */ | ||
90 | /* regex = "(ab|c)(ab|c)*"; */ | ||
91 | /* regex = "(a|bc)(a|bc)*"; */ | ||
92 | /* regex = "(ac|b)*"; */ | ||
93 | /* regex = "a|aa*a"; */ | ||
94 | |||
83 | dfa = GNUNET_REGEX_construct_dfa (regex, strlen (regex)); | 95 | dfa = GNUNET_REGEX_construct_dfa (regex, strlen (regex)); |
84 | GNUNET_REGEX_automaton_save_graph (dfa, "dfa.dot"); | 96 | GNUNET_REGEX_automaton_save_graph (dfa, "dfa.dot"); |
85 | GNUNET_REGEX_iterate_all_edges (dfa, key_iterator, NULL); | 97 | GNUNET_REGEX_iterate_all_edges (dfa, key_iterator, NULL); |