aboutsummaryrefslogtreecommitdiff
path: root/src/regex
diff options
context:
space:
mode:
authorMaximilian Szengel <gnunet@maxsz.de>2012-06-25 08:33:13 +0000
committerMaximilian Szengel <gnunet@maxsz.de>2012-06-25 08:33:13 +0000
commit6803579aeefc80a95cd2047901fc691fd03401a1 (patch)
tree8efdd648eeeae8bc76bd8f4a0bd0c62755a0260f /src/regex
parentfebbf34313acf57b3a1c336fe0eb2fed80d7a1e0 (diff)
downloadgnunet-6803579aeefc80a95cd2047901fc691fd03401a1.tar.gz
gnunet-6803579aeefc80a95cd2047901fc691fd03401a1.zip
regex simplification wip
Diffstat (limited to 'src/regex')
-rw-r--r--src/regex/regex.c768
-rw-r--r--src/regex/test_regex_eval_api.c26
-rw-r--r--src/regex/test_regex_iterate_api.c48
3 files changed, 618 insertions, 224 deletions
diff --git a/src/regex/regex.c b/src/regex/regex.c
index a3f44c3b0..4a381780f 100644
--- a/src/regex/regex.c
+++ b/src/regex/regex.c
@@ -195,6 +195,11 @@ struct GNUNET_REGEX_State
195 struct GNUNET_HashCode hash; 195 struct GNUNET_HashCode hash;
196 196
197 /** 197 /**
198 * State ID for proof creation.
199 */
200 unsigned int proof_id;
201
202 /**
198 * Proof for this state. 203 * Proof for this state.
199 */ 204 */
200 char *proof; 205 char *proof;
@@ -762,10 +767,11 @@ automaton_add_state (struct GNUNET_REGEX_Automaton *a,
762/** 767/**
763 * Function that is called with each state, when traversing an automaton. 768 * Function that is called with each state, when traversing an automaton.
764 * 769 *
765 * @param cls closure 770 * @param cls closure.
766 * @param s state 771 * @param count current count of the state, from 0 to a->state_count -1.
772 * @param s state.
767 */ 773 */
768typedef void (*GNUNET_REGEX_traverse_action) (void *cls, 774typedef void (*GNUNET_REGEX_traverse_action) (void *cls, unsigned int count,
769 struct GNUNET_REGEX_State * s); 775 struct GNUNET_REGEX_State * s);
770 776
771/** 777/**
@@ -775,10 +781,12 @@ typedef void (*GNUNET_REGEX_traverse_action) (void *cls,
775 * 781 *
776 * @param cls closure. 782 * @param cls closure.
777 * @param s start state. 783 * @param s start state.
784 * @param count current count of the state.
778 * @param action action to be performed on each state. 785 * @param action action to be performed on each state.
779 */ 786 */
780static void 787static void
781automaton_state_traverse (void *cls, struct GNUNET_REGEX_State *s, 788automaton_state_traverse (void *cls, struct GNUNET_REGEX_State *s,
789 unsigned int *count,
782 GNUNET_REGEX_traverse_action action) 790 GNUNET_REGEX_traverse_action action)
783{ 791{
784 struct Transition *t; 792 struct Transition *t;
@@ -788,10 +796,12 @@ automaton_state_traverse (void *cls, struct GNUNET_REGEX_State *s,
788 s->marked = GNUNET_YES; 796 s->marked = GNUNET_YES;
789 797
790 if (action > 0) 798 if (action > 0)
791 action (cls, s); 799 action (cls, *count, s);
800
801 (*count)++;
792 802
793 for (t = s->transitions_head; NULL != t; t = t->next) 803 for (t = s->transitions_head; NULL != t; t = t->next)
794 automaton_state_traverse (cls, t->to_state, action); 804 automaton_state_traverse (cls, t->to_state, count, action);
795 } 805 }
796} 806}
797 807
@@ -807,16 +817,169 @@ static void
807automaton_traverse (void *cls, struct GNUNET_REGEX_Automaton *a, 817automaton_traverse (void *cls, struct GNUNET_REGEX_Automaton *a,
808 GNUNET_REGEX_traverse_action action) 818 GNUNET_REGEX_traverse_action action)
809{ 819{
820 unsigned int count;
810 struct GNUNET_REGEX_State *s; 821 struct GNUNET_REGEX_State *s;
811 822
812 for (s = a->states_head; NULL != s; s = s->next) 823 for (s = a->states_head; NULL != s; s = s->next)
813 s->marked = GNUNET_NO; 824 s->marked = GNUNET_NO;
814 825
815 automaton_state_traverse (cls, a->start, action); 826 count = 0;
827
828 automaton_state_traverse (cls, a->start, &count, action);
816} 829}
817 830
818/** 831/**
819 * Create proofs for all states in the given automaton. Implementation of the 832 * Check if the given string 'str' needs parentheses around it when
833 * using it to generate a regex.
834 *
835 * @param str string
836 *
837 * @return GNUNET_YES if parentheses are needed, GNUNET_NO otherwise
838 */
839static int
840needs_parentheses (char *str)
841{
842 int length;
843
844 if (NULL == str)
845 return GNUNET_NO;
846
847 length = strlen (str);
848
849 if (length < 2)
850 return GNUNET_NO;
851
852 if (str[0] == '(' && str[strlen (str) - 1] == ')')
853 return GNUNET_NO;
854
855 return GNUNET_YES;
856}
857
858/**
859 * Remove parentheses surrounding string 'str'.
860 * Example: "(a)" becomes "a".
861 * You need to GNUNET_free the retunred string.
862 *
863 * @param str string
864 *
865 * @return string without surrounding parentheses, string 'str' if no preceding
866 * epsilon could be found, NULL if 'str' was NULL
867 */
868static char *
869remove_parentheses (char *str)
870{
871 char *new_str;
872 int length;
873 int i;
874 int j;
875
876 if (NULL == str)
877 return NULL;
878
879 length = strlen (str);
880
881 if (str[0] == '(' && str[length - 1] == ')')
882 {
883 new_str = GNUNET_malloc (length - 1);
884 for (j = 0, i = 1; i < length - 1; i++, j++)
885 new_str[j] = str[i];
886 new_str[j] = '\0';
887 }
888 else
889 new_str = GNUNET_strdup (str);
890
891 return new_str;
892}
893
894/**
895 * Check if the string 'str' starts with an epsilon (empty string).
896 * Example: "(|a)" is starting with an epsilon.
897 *
898 * @param str
899 *
900 * @return
901 */
902static int
903has_epsilon (char *str)
904{
905 int len;
906
907 if (NULL == str)
908 return 0;
909
910 len = strlen (str);
911
912 return (0 == strncmp (str, "(|", 2) && str[len - 1] == ')');
913}
914
915/**
916 * Remove an epsilon from the string str. Where epsilon is an empty string
917 * Example: str = "(|a|b|c)", result: "a|b|c"
918 * The returned string needs to be freed.
919 *
920 * @param str string
921 *
922 * @return string without preceding epsilon, string 'str' if no preceding epsilon
923 * could be found, NULL if 'str' was NULL
924 */
925static char *
926remove_epsilon (char *str)
927{
928 int len;
929 char *new_str;
930 int i;
931 int j;
932
933 if (NULL == str)
934 return NULL;
935
936 len = strlen (str);
937
938 if (len > 2 && 0 == strncmp (str, "(|", 2) && str[len - 1] == ')')
939 {
940 new_str = GNUNET_malloc (len - 1);
941
942 j = 0;
943 for (i = 2; i < len - 1; i++)
944 {
945 new_str[j] = str[i];
946 j++;
947 }
948 new_str[j] = '\0';
949 }
950 else
951 {
952 new_str = GNUNET_strdup (str);
953 }
954
955 return new_str;
956}
957
958static int
959strkcmp (const char *str1, const char *str2, int k)
960{
961 const char *new_str1;
962
963 if (NULL == str1 || NULL == str2)
964 return -1;
965
966 new_str1 = &str1[k];
967
968 return strcmp (new_str1, str2);
969}
970
971void
972number_states (void *cls, unsigned int count, struct GNUNET_REGEX_State *s)
973{
974 struct GNUNET_REGEX_State **states;
975
976 states = cls;
977 s->proof_id = count;
978 states[count] = s;
979}
980
981/**
982 * create proofs for all states in the given automaton. Implementation of the
820 * algorithm descriped in chapter 3.2.1 of "Automata Theory, Languages, and 983 * algorithm descriped in chapter 3.2.1 of "Automata Theory, Languages, and
821 * Computation 3rd Edition" by Hopcroft, Motwani and Ullman. 984 * Computation 3rd Edition" by Hopcroft, Motwani and Ullman.
822 * 985 *
@@ -825,36 +988,41 @@ automaton_traverse (void *cls, struct GNUNET_REGEX_Automaton *a,
825static void 988static void
826automaton_create_proofs (struct GNUNET_REGEX_Automaton *a) 989automaton_create_proofs (struct GNUNET_REGEX_Automaton *a)
827{ 990{
828 struct GNUNET_REGEX_State *s;
829 struct Transition *t;
830 int i;
831 int j;
832 int k;
833 int n;
834 int cnt;
835 struct GNUNET_REGEX_State *states[a->state_count]; 991 struct GNUNET_REGEX_State *states[a->state_count];
992 struct Transition *t;
836 char *R_last[a->state_count][a->state_count]; 993 char *R_last[a->state_count][a->state_count];
837 char *R_cur[a->state_count][a->state_count]; 994 char *R_cur[a->state_count][a->state_count];
838 char *R_cur_l; 995 char *R_cur_l;
839 char *R_cur_r; 996 char *R_cur_r;
840 char *temp_a; 997 char *temp_a;
841 char *temp_b; 998 char *temp_b;
999 char *R_temp_ij;
1000 char *R_temp_ik;
1001 char *R_temp_kj;
1002 char *R_temp_kk;
1003 char *complete_regex;
1004 int cnt;
1005 int eps_check;
1006 int i;
1007 int j;
1008 int k;
1009 int n;
1010 int ij_ik_cmp;
1011 int ij_kj_cmp;
1012 int ik_kj_cmp;
1013 int ik_kk_cmp;
1014 int kk_kj_cmp;
1015 int clean_ik_kk_cmp;
1016 int clean_kk_kj_cmp;
842 int length; 1017 int length;
843 int length_l; 1018 int length_l;
844 int length_r; 1019 int length_r;
845 int s_cnt;
846 int l_cnt;
847 char *complete_regex;
848 1020
849 k = 0; 1021 k = 0;
850 n = a->state_count; 1022 n = a->state_count;
851 1023
852 // number the states 1024 // number the states
853 for (i = 0, s = a->states_head; NULL != s; s = s->next, i++) 1025 automaton_traverse (states, a, number_states);
854 {
855 s->marked = i;
856 states[i] = s;
857 }
858 1026
859 // BASIS 1027 // BASIS
860 for (i = 0; i < n; i++) 1028 for (i = 0; i < n; i++)
@@ -882,14 +1050,14 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a)
882 { 1050 {
883 if (NULL == R_last[i][j]) 1051 if (NULL == R_last[i][j])
884 GNUNET_asprintf (&R_last[i][j], ""); 1052 GNUNET_asprintf (&R_last[i][j], "");
885 else if (NULL != R_last[i][j] && 1 < strlen (R_last[i][j])) 1053 else
886 { 1054 {
887 temp_a = R_last[i][j]; 1055 temp_a = R_last[i][j];
888 GNUNET_asprintf (&R_last[i][j], "(%s)", R_last[i][j]); 1056 GNUNET_asprintf (&R_last[i][j], "(|%s)", R_last[i][j]);
889 GNUNET_free (temp_a); 1057 GNUNET_free (temp_a);
890 } 1058 }
891 } 1059 }
892 else if (NULL != R_last[i][j] && 1 < strlen (R_last[i][j])) 1060 else if (GNUNET_YES == needs_parentheses (R_last[i][j]))
893 { 1061 {
894 temp_a = R_last[i][j]; 1062 temp_a = R_last[i][j];
895 GNUNET_asprintf (&R_last[i][j], "(%s)", R_last[i][j]); 1063 GNUNET_asprintf (&R_last[i][j], "(%s)", R_last[i][j]);
@@ -898,6 +1066,7 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a)
898 } 1066 }
899 } 1067 }
900 1068
1069
901 // INDUCTION 1070 // INDUCTION
902 for (k = 0; k < n; k++) 1071 for (k = 0; k < n; k++)
903 { 1072 {
@@ -905,210 +1074,387 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a)
905 { 1074 {
906 for (j = 0; j < n; j++) 1075 for (j = 0; j < n; j++)
907 { 1076 {
908 //construct R_cur 1077 /* GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, */
1078 /* ">>> R_last[i][j] = %s R_last[i][k] = %s " */
1079 /* "R_last[k][k] = %s R_last[k][j] = %s\n", R_last[i][j], */
1080 /* R_last[i][k], R_last[k][k], R_last[k][j]); */
1081
1082 R_cur[i][j] = NULL;
1083 R_cur_r = NULL;
1084 R_cur_l = NULL;
1085
1086 // Assign R_temp_(ij|ik|kk|kj) to R_last[][] and remove epsilon as well
1087 // as parentheses, so we can better compare the contents
1088 temp_a = remove_epsilon (R_last[i][j]);
1089 R_temp_ij = remove_parentheses (temp_a);
1090 GNUNET_free_non_null (temp_a);
1091 temp_a = remove_epsilon (R_last[i][k]);
1092 R_temp_ik = remove_parentheses (temp_a);
1093 GNUNET_free_non_null (temp_a);
1094 temp_a = remove_epsilon (R_last[k][k]);
1095 R_temp_kk = remove_parentheses (temp_a);
1096 GNUNET_free_non_null (temp_a);
1097 temp_a = remove_epsilon (R_last[k][j]);
1098 R_temp_kj = remove_parentheses (temp_a);
1099 GNUNET_free_non_null (temp_a);
1100
1101 if (NULL != R_last[i][j] && NULL != R_last[k][j])
1102 ij_kj_cmp = strcmp (R_last[i][j], R_last[k][j]);
1103 else
1104 ij_kj_cmp = -1;
909 1105
910 // 0*R = R*0 = 0 1106 if (NULL != R_last[i][j] && NULL != R_last[i][k])
911 // 0+R = R+0 = R 1107 ij_ik_cmp = strcmp (R_last[i][j], R_last[i][k]);
912 if (NULL == R_last[i][k] || NULL == R_last[k][j]) 1108 else
913 { 1109 ij_ik_cmp = -1;
914 if (NULL != R_last[i][j])
915 R_cur[i][j] = GNUNET_strdup (R_last[i][j]);
916 }
917 // a(ba)*bx = (ab)+x
918 else if ((NULL == R_last[i][j] || 0 == strlen (R_last[i][j])) &&
919 NULL != R_last[k][k] && 0 < strlen (R_last[k][k]) &&
920 NULL != R_last[k][j] && 0 < strlen (R_last[k][j]) &&
921 0 == strncmp (R_last[k][k], R_last[k][j],
922 (strlen (R_last[k][k]) - 1)) &&
923 R_last[i][k][0] == R_last[k][k][strlen (R_last[k][k]) - 1])
924 {
925 length = strlen (R_last[k][k]) - strlen (R_last[i][k]);
926 1110
927 temp_a = GNUNET_malloc (length + 1); 1111 if (NULL != R_last[i][k] && NULL != R_last[k][k])
928 temp_b = GNUNET_malloc ((strlen (R_last[k][j]) - length) + 1); 1112 ik_kk_cmp = strcmp (R_last[i][k], R_last[k][k]);
1113 else
1114 ik_kk_cmp = -1;
929 1115
930 length_l = 0; 1116 if (NULL != R_last[i][k] && NULL != R_last[k][j])
931 length_r = 0; 1117 ik_kj_cmp = strcmp (R_last[i][k], R_last[k][j]);
1118 else
1119 ik_kj_cmp = -1;
932 1120
933 for (cnt = 0; cnt < strlen (R_last[k][j]); cnt++) 1121 if (NULL != R_last[k][k] && NULL != R_last[k][j])
934 { 1122 kk_kj_cmp = strcmp (R_last[k][k], R_last[k][j]);
935 if (cnt < length) 1123 else
936 { 1124 kk_kj_cmp = -1;
937 temp_a[length_l] = R_last[k][j][cnt];
938 length_l++;
939 }
940 else
941 {
942 temp_b[length_r] = R_last[k][j][cnt];
943 length_r++;
944 }
945 }
946 temp_a[length_l] = '\0';
947 temp_b[length_r] = '\0';
948 1125
949 GNUNET_asprintf (&R_cur[i][j], "(%s%s)+%s", R_last[i][k], temp_a, 1126 if (NULL != R_last[i][k] && NULL != R_temp_kk)
950 temp_b); 1127 clean_ik_kk_cmp = strcmp (R_last[i][k], R_temp_kk);
951 GNUNET_free (temp_a); 1128 else
952 GNUNET_free (temp_b); 1129 clean_ik_kk_cmp = 1;
1130
1131 if (NULL != R_temp_kk && NULL != R_last[k][j])
1132 clean_kk_kj_cmp = strcmp (R_temp_kk, R_last[k][j]);
1133 else
1134 clean_kk_kj_cmp = -1;
1135
1136
1137 // $R^{(k)}_{ij} = R^{(k-1)}_{ij} | R^{(k-1)}_{ik} ( R^{(k-1)}_{kk})^* R^{(k-1)}_{kj}
1138 // With: R_cur[i][j] = R_cur_l | R_cur_r
1139 // Rij(k) = Rij(k-1), because right side (R_cur_r) is empty set (NULL)
1140 if ((NULL == R_last[i][k] || NULL == R_last[k][j] ||
1141 NULL == R_last[k][k]) && NULL != R_last[i][j])
1142 {
1143 R_cur[i][j] = GNUNET_strdup (R_last[i][j]);
1144 }
1145 // Everything is NULL, so Rij(k) = NULL
1146 else if ((NULL == R_last[i][k] || NULL == R_last[k][j] ||
1147 NULL == R_last[k][k]) && NULL == R_last[i][j])
1148 {
1149 R_cur[i][j] = NULL;
953 } 1150 }
1151 // Right side (R_cur_r) not NULL
954 else 1152 else
955 { 1153 {
956 // R(k)ij = R(k-1)ij + R(k-1)ik (R(k-1)kk)* R(k-1)kj 1154 /* GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, */
957 length_l = (NULL == R_last[i][j]) ? 1 : strlen (R_last[i][j]) + 1; 1155 /* "R_temp_ij = %s R_temp_ik = %s R_temp_kk = %s R_temp_kj = %s\n", */
958 length_r = 1156 /* R_temp_ij, R_temp_ik, R_temp_kk, R_temp_kj); */
959 snprintf (NULL, 0, "%s(%s)*%s", R_last[i][k], R_last[k][k],
960 R_last[k][j]) + 1;
961 R_cur_l = GNUNET_malloc (length_l);
962 R_cur_r = GNUNET_malloc (length_r);
963 1157
964 if (NULL != R_last[i][j]) 1158 GNUNET_assert (NULL != R_last[i][k] && NULL != R_last[k][k] &&
965 strcat (R_cur_l, R_last[i][j]); 1159 NULL != R_last[k][j]);
966 1160 GNUNET_assert (NULL != R_temp_ik && NULL != R_temp_kk &&
967 if (NULL != R_last[i][k] && 0 != strcmp (R_last[i][k], R_last[k][k])) 1161 NULL != R_temp_kj);
968 strcat (R_cur_r, R_last[i][k]);
969 1162
970 if (NULL != R_last[k][k] && 0 != strcmp (R_last[k][k], "")) 1163 // construct R_cur_l (and, if necessary R_cur_r)
1164 if (NULL != R_last[i][j])
971 { 1165 {
972 if (1 >= strlen (R_last[k][k]) || 1166 if (0 == strcmp (R_temp_ij, R_temp_ik) &&
973 (R_last[k][k][0] == '(' && 1167 0 == strcmp (R_temp_ik, R_temp_kk) &&
974 R_last[k][k][strlen (R_last[k][k]) - 1] == ')')) 1168 0 == strcmp (R_temp_kk, R_temp_kj))
1169 {
1170 if (0 == strlen (R_temp_ij))
1171 {
1172 GNUNET_asprintf (&R_cur_r, "");
1173 }
1174 // a|(e|a)a*(e|a) = a*
1175 // a|(e|a)(e|a)*(e|a) = a*
1176 // (e|a)|aa*a = a*
1177 // (e|a)|aa*(e|a) = a*
1178 // (e|a)|(e|a)a*a = a*
1179 // (e|a)|(e|a)a*(e|a) = a*
1180 // (e|a)|(e|a)(e|a)*(e|a) = a*
1181 else if ((0 == strncmp (R_last[i][j], "(|", 2)) ||
1182 (0 == strncmp (R_last[i][k], "(|", 2) &&
1183 0 == strncmp (R_last[k][j], "(|", 2)))
1184 {
1185 if (GNUNET_YES == needs_parentheses (R_temp_ij))
1186 GNUNET_asprintf (&R_cur_r, "(%s)*", R_temp_ij);
1187 else
1188 GNUNET_asprintf (&R_cur_r, "%s*", R_temp_ij);
1189 }
1190 // a|aa*a = a+
1191 // a|(e|a)a*a = a+
1192 // a|aa*(e|a) = a+
1193 // a|(e|a)(e|a)*a = a+
1194 // a|a(e|a)*(e|a) = a+
1195 else
1196 {
1197 if (GNUNET_YES == needs_parentheses (R_temp_ij))
1198 GNUNET_asprintf (&R_cur_r, "(%s)+", R_temp_ij);
1199 else
1200 GNUNET_asprintf (&R_cur_r, "%s+", R_temp_ij);
1201 }
1202 }
1203 // a|ab*b = ab*
1204 else if (0 == ij_ik_cmp && 0 == clean_kk_kj_cmp &&
1205 0 != clean_ik_kk_cmp)
975 { 1206 {
976 strcat (R_cur_r, R_last[k][k]); 1207 if (strlen (R_last[k][k]) < 1)
1208 R_cur_r = GNUNET_strdup (R_last[i][j]);
1209 else if (GNUNET_YES == needs_parentheses (R_temp_kk))
1210 GNUNET_asprintf (&R_cur_r, "%s(%s)*", R_last[i][j], R_temp_kk);
1211 else
1212 GNUNET_asprintf (&R_cur_r, "%s%s*", R_last[i][j], R_last[k][k]);
1213
1214 R_cur_l = NULL;
977 } 1215 }
978 else 1216 // a|bb*a = b*a
1217 else if (0 == ij_kj_cmp && 0 == clean_ik_kk_cmp &&
1218 0 != clean_kk_kj_cmp)
979 { 1219 {
980 strcat (R_cur_r, "("); 1220 if (strlen (R_last[k][k]) < 1)
981 strcat (R_cur_r, R_last[k][k]); 1221 R_cur_r = GNUNET_strdup (R_last[k][j]);
982 strcat (R_cur_r, ")"); 1222 else if (GNUNET_YES == needs_parentheses (R_temp_kk))
1223 GNUNET_asprintf (&R_cur_r, "(%s)*%s", R_temp_kk, R_last[k][j]);
1224 else
1225 GNUNET_asprintf (&R_cur_r, "%s*%s", R_temp_kk, R_last[k][j]);
1226
1227 R_cur_l = NULL;
983 } 1228 }
1229 // a|a(e|b)*(e|b) = a|ab* = a|a|ab|abb|abbb|... = ab*
1230 else if (0 == ij_ik_cmp && 0 == kk_kj_cmp &&
1231 !has_epsilon (R_last[i][j]) && has_epsilon (R_last[k][k]))
1232 {
1233 if (needs_parentheses (R_temp_kk))
1234 GNUNET_asprintf (&R_cur_r, "%s(%s)*", R_last[i][j], R_temp_kk);
1235 else
1236 GNUNET_asprintf (&R_cur_r, "%s%s*", R_last[i][j], R_temp_kk);
984 1237
985 if (0 == strcmp (R_last[i][k], R_last[k][k]) || 1238 R_cur_l = NULL;
986 0 == strcmp (R_last[k][k], R_last[k][j])) 1239 }
987 strcat (R_cur_r, "+"); 1240 // a|(e|b)(e|b)*a = a|b*a = a|a|ba|bba|bbba|... = b*a
988 else 1241 else if (0 == ij_kj_cmp && 0 == ik_kk_cmp &&
989 strcat (R_cur_r, "*"); 1242 !has_epsilon (R_last[i][j]) && has_epsilon (R_last[k][k]))
990 } 1243 {
991 1244 if (needs_parentheses (R_temp_kk))
992 if (NULL != R_last[k][j] && 0 != strcmp (R_last[k][k], R_last[k][j])) 1245 GNUNET_asprintf (&R_cur_r, "(%s)*%s", R_temp_kk, R_last[i][j]);
993 strcat (R_cur_r, R_last[k][j]); 1246 else
994 1247 GNUNET_asprintf (&R_cur_r, "%s*%s", R_temp_kk, R_last[i][j]);
995 // simplifications...
996 1248
997 // | is idempotent: a | a = a for all a in A 1249 R_cur_l = NULL;
998 if (0 == strcmp (R_cur_l, R_cur_r) || 0 == strcmp (R_cur_l, "") || 1250 }
999 0 == strcmp (R_cur_r, ""))
1000 {
1001 if (0 == strcmp (R_cur_l, ""))
1002 GNUNET_asprintf (&R_cur[i][j], "%s", R_cur_r);
1003 else 1251 else
1004 GNUNET_asprintf (&R_cur[i][j], "%s", R_cur_l);
1005 }
1006 // TODO: in theory only applicable if (e + a) | (e + a)(e + a)*(e+a)
1007 // where e means epsilon... check if practical!
1008 // a | a a* a = a*
1009 else if (R_last[i][j] == R_last[i][k] && R_last[i][k] == R_last[k][k]
1010 && R_last[k][k] == R_last[k][j])
1011 {
1012 if (1 >= strlen (R_last[k][k]) ||
1013 (R_last[k][k][0] == '(' &&
1014 R_last[k][k][strlen (R_last[k][k]) - 1] == ')'))
1015 { 1252 {
1016 GNUNET_asprintf (&R_cur[i][j], "%s*", R_last[k][k]); 1253 /* GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "NO SIMPLIFICATION\n"); */
1254
1255 temp_a = remove_parentheses (R_last[i][j]);
1256 R_cur_l = GNUNET_strdup (temp_a);
1257 GNUNET_free (temp_a);
1017 } 1258 }
1018 else
1019 GNUNET_asprintf (&R_cur[i][j], "(%s)*", R_last[k][k]);
1020 } 1259 }
1021 // a | a b* b => a | a b | a b b | ... => a b* 1260 // we have no left side
1022 else if (R_last[i][j] == R_last[i][k] && R_last[k][k] == R_last[k][j]) 1261 else
1023 { 1262 {
1024 // if a is xb then a b* becomes x b b* = x b+ 1263 R_cur_l = NULL;
1264 }
1025 1265
1026 s_cnt = strlen (R_last[k][k]); 1266 // construct R_cur_r, if not already constructed
1027 l_cnt = strlen (R_last[i][k]); 1267 if (NULL == R_cur_r)
1028 R_cur[i][j] = GNUNET_malloc (s_cnt + l_cnt + 4); 1268 {
1269 length = strlen (R_temp_kk) - strlen (R_last[i][k]);
1270
1271 // a(ba)*bx = (ab)+x
1272 if (length > 0 && NULL != R_last[k][k] && 0 < strlen (R_last[k][k])
1273 && NULL != R_last[k][j] && 0 < strlen (R_last[k][j]) &&
1274 NULL != R_last[i][k] && 0 < strlen (R_last[i][k]) &&
1275 0 == strkcmp (R_temp_kk, R_last[i][k], length) &&
1276 0 == strncmp (R_temp_kk, R_last[k][j], length))
1277 {
1278 temp_a = GNUNET_malloc (length + 1);
1279 temp_b = GNUNET_malloc ((strlen (R_last[k][j]) - length) + 1);
1029 1280
1030 if (l_cnt > 0 && s_cnt >= l_cnt) 1281 length_l = 0;
1031 for (; s_cnt > 0; s_cnt--, l_cnt--) 1282 length_r = 0;
1032 if (R_last[i][k][l_cnt] != R_last[k][k][s_cnt])
1033 break;
1034 1283
1035 if (strlen (R_last[i][k]) > 0 && 0 == s_cnt && 0 <= l_cnt) 1284 for (cnt = 0; cnt < strlen (R_last[k][j]); cnt++)
1036 strncat (R_cur[i][j], R_last[i][k], l_cnt); 1285 {
1037 else 1286 if (cnt < length)
1038 strcat (R_cur[i][j], R_last[i][k]); 1287 {
1288 temp_a[length_l] = R_last[k][j][cnt];
1289 length_l++;
1290 }
1291 else
1292 {
1293 temp_b[length_r] = R_last[k][j][cnt];
1294 length_r++;
1295 }
1296 }
1297 temp_a[length_l] = '\0';
1298 temp_b[length_r] = '\0';
1039 1299
1040 if (1 >= strlen (R_last[k][k]) || 1300 // e|(ab)+ = (ab)*
1041 (R_last[k][k][0] == '(' && 1301 if (NULL != R_cur_l && 0 == strlen (R_cur_l) &&
1042 R_last[k][k][strlen (R_last[k][k]) - 1] == ')')) 1302 0 == strlen (temp_b))
1303 {
1304 GNUNET_asprintf (&R_cur_r, "(%s%s)*", R_last[i][k], temp_a);
1305 GNUNET_free (R_cur_l);
1306 R_cur_l = NULL;
1307 }
1308 else
1309 {
1310 GNUNET_asprintf (&R_cur_r, "(%s%s)+%s", R_last[i][k], temp_a,
1311 temp_b);
1312 }
1313 GNUNET_free (temp_a);
1314 GNUNET_free (temp_b);
1315 }
1316 else if (0 == strcmp (R_temp_ik, R_temp_kk) &&
1317 0 == strcmp (R_temp_kk, R_temp_kj))
1043 { 1318 {
1044 strcat (R_cur[i][j], R_last[k][k]); 1319 // (e|a)a*(e|a) = a*
1320 // (e|a)(e|a)*(e|a) = a*
1321 if (has_epsilon (R_last[i][k]) && has_epsilon (R_last[k][j]))
1322 {
1323 if (needs_parentheses (R_temp_kk))
1324 GNUNET_asprintf (&R_cur_r, "(%s)*", R_temp_kk);
1325 else
1326 GNUNET_asprintf (&R_cur_r, "%s*", R_temp_kk);
1327 }
1328 // aa*a = a+a
1329 else if (0 == clean_ik_kk_cmp && 0 == clean_kk_kj_cmp &&
1330 !has_epsilon (R_last[i][k]))
1331 {
1332 if (needs_parentheses (R_temp_kk))
1333 GNUNET_asprintf (&R_cur_r, "(%s)+%s", R_temp_kk, R_temp_kk);
1334 else
1335 GNUNET_asprintf (&R_cur_r, "(%s)+%s", R_temp_kk, R_temp_kk);
1336 }
1337 // (e|a)a*a = a+
1338 // aa*(e|a) = a+
1339 // a(e|a)*(e|a) = a+
1340 // (e|a)a*a = a+
1341 else
1342 {
1343 eps_check =
1344 (has_epsilon (R_last[i][k]) + has_epsilon (R_last[k][k]) +
1345 has_epsilon (R_last[k][j]));
1346
1347 if (eps_check == 1)
1348 {
1349 if (needs_parentheses (R_temp_kk))
1350 GNUNET_asprintf (&R_cur_r, "(%s)+", R_temp_kk);
1351 else
1352 GNUNET_asprintf (&R_cur_r, "%s+", R_temp_kk);
1353 }
1354 }
1045 } 1355 }
1046 else 1356 // aa*b = a+b
1357 // (e|a)(e|a)*b = a*b
1358 else if (0 == strcmp (R_temp_ik, R_temp_kk))
1047 { 1359 {
1048 strcat (R_cur[i][j], "("); 1360 if (has_epsilon (R_last[i][k]))
1049 strcat (R_cur[i][j], R_last[k][k]); 1361 {
1050 strcat (R_cur[i][j], ")"); 1362 if (needs_parentheses (R_temp_kk))
1363 GNUNET_asprintf (&R_cur_r, "(%s)*%s", R_temp_kk,
1364 R_last[k][j]);
1365 else
1366 GNUNET_asprintf (&R_cur_r, "%s*%s", R_temp_kk, R_last[k][j]);
1367 }
1368 else
1369 {
1370 if (needs_parentheses (R_temp_kk))
1371 GNUNET_asprintf (&R_cur_r, "(%s)+%s", R_temp_kk,
1372 R_last[k][j]);
1373 else
1374 GNUNET_asprintf (&R_cur_r, "%s+%s", R_temp_kk, R_last[k][j]);
1375 }
1051 } 1376 }
1052 1377 // ba*a = ba+
1053 if (0 == s_cnt && 0 <= l_cnt) 1378 // b(e|a)*(e|a) = ba*
1054 strcat (R_cur[i][j], "+"); 1379 else if (0 == strcmp (R_temp_kk, R_temp_kj))
1055 else
1056 strcat (R_cur[i][j], "*");
1057 }
1058 // a | b b* a => a | b a | b b a | ... => b* a
1059 else if (R_last[i][j] == R_last[k][j] && R_last[i][k] == R_last[k][k])
1060 {
1061 // if a is bx then b* a becomes b+ x
1062
1063 s_cnt = strlen (R_last[k][k]);
1064 l_cnt = strlen (R_last[k][j]);
1065 R_cur[i][j] = GNUNET_malloc (s_cnt + l_cnt + 4);
1066
1067 if (l_cnt > 0 && s_cnt >= l_cnt)
1068 for (cnt = 0; cnt < s_cnt; cnt++)
1069 if (R_last[k][k][cnt] != R_last[k][j][cnt])
1070 break;
1071
1072 if (1 >= strlen (R_last[k][k]) &&
1073 !(R_last[k][k][0] == '(' &&
1074 R_last[k][k][strlen (R_last[k][k]) - 1] == ')'))
1075 { 1380 {
1076 strcat (R_cur[i][j], "("); 1381 if (has_epsilon (R_last[k][j]))
1077 strcat (R_cur[i][j], R_last[k][k]); 1382 {
1078 strcat (R_cur[i][j], ")"); 1383 if (needs_parentheses (R_temp_kk))
1384 GNUNET_asprintf (&R_cur_r, "%s(%s)*", R_last[i][k],
1385 R_temp_kk);
1386 else
1387 GNUNET_asprintf (&R_cur_r, "%s%s*", R_last[i][k], R_temp_kk);
1388 }
1389 else
1390 {
1391 if (needs_parentheses (R_temp_kk))
1392 GNUNET_asprintf (&R_cur_r, "(%s)+%s", R_last[i][k],
1393 R_temp_kk);
1394 else
1395 GNUNET_asprintf (&R_cur_r, "%s+%s", R_last[i][k], R_temp_kk);
1396 }
1079 } 1397 }
1080 else 1398 else
1081 strcat (R_cur[i][j], R_last[k][k]); 1399 {
1400 if (strlen (R_temp_kk) > 0)
1401 {
1402 if (needs_parentheses (R_temp_kk))
1403 {
1404 GNUNET_asprintf (&R_cur_r, "%s(%s)*%s", R_last[i][k],
1405 R_temp_kk, R_last[k][j]);
1406 }
1407 else
1408 {
1409 GNUNET_asprintf (&R_cur_r, "%s%s*%s", R_last[i][k], R_temp_kk,
1410 R_last[k][j]);
1411 }
1412 }
1413 else
1414 {
1415 GNUNET_asprintf (&R_cur_r, "%s%s", R_last[i][k], R_last[k][j]);
1416 }
1417 }
1418 }
1082 1419
1083 if (cnt == s_cnt) 1420 /* GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "R_cur_l: %s\n", R_cur_l); */
1084 strcat (R_cur[i][j], "+"); 1421 /* GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "R_cur_r: %s\n", R_cur_r); */
1085 else
1086 strcat (R_cur[i][j], "*");
1087 1422
1088 if (strlen (R_last[k][j]) > 0 && cnt == s_cnt) 1423 // putting it all together
1089 strcat (R_cur[i][j], &R_last[k][j][cnt]); 1424 if (NULL != R_cur_l && NULL != R_cur_r)
1090 else
1091 strcat (R_cur[i][j], R_last[k][j]);
1092 }
1093 else
1094 { 1425 {
1095 if (R_cur_l[0] == '(' && R_cur_l[strlen (R_cur_l) - 1] == ')') 1426 // a|a = a
1427 if (0 == strcmp (R_cur_l, R_cur_r))
1096 { 1428 {
1097 temp_a = GNUNET_malloc (strlen (R_cur_l) - 1); 1429 R_cur[i][j] = GNUNET_strdup (R_cur_l);
1098 for (cnt = 0; cnt < strlen (R_cur_l) - 2; cnt++)
1099 {
1100 temp_a[cnt] = R_cur_l[cnt + 1];
1101 }
1102 GNUNET_asprintf (&R_cur[i][j], "(%s|%s)", temp_a, R_cur_r);
1103 GNUNET_free (temp_a);
1104 } 1430 }
1431 // R_cur_l | R_cur_r
1105 else 1432 else
1433 {
1106 GNUNET_asprintf (&R_cur[i][j], "(%s|%s)", R_cur_l, R_cur_r); 1434 GNUNET_asprintf (&R_cur[i][j], "(%s|%s)", R_cur_l, R_cur_r);
1435 }
1436 }
1437 else if (NULL != R_cur_l)
1438 {
1439 R_cur[i][j] = GNUNET_strdup (R_cur_l);
1440 }
1441 else if (NULL != R_cur_r)
1442 {
1443 R_cur[i][j] = GNUNET_strdup (R_cur_r);
1444 }
1445 else
1446 {
1447 R_cur[i][j] = NULL;
1107 } 1448 }
1108 1449
1109 GNUNET_free_non_null (R_cur_l); 1450 GNUNET_free_non_null (R_cur_l);
1110 GNUNET_free_non_null (R_cur_r); 1451 GNUNET_free_non_null (R_cur_r);
1111 } 1452 }
1453
1454 GNUNET_free_non_null (R_temp_ij);
1455 GNUNET_free_non_null (R_temp_ik);
1456 GNUNET_free_non_null (R_temp_kk);
1457 GNUNET_free_non_null (R_temp_kj);
1112 } 1458 }
1113 } 1459 }
1114 1460
@@ -1132,9 +1478,12 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a)
1132 // assign proofs and hashes 1478 // assign proofs and hashes
1133 for (i = 0; i < n; i++) 1479 for (i = 0; i < n; i++)
1134 { 1480 {
1135 states[i]->proof = GNUNET_strdup (R_last[a->start->marked][i]); 1481 if (NULL != R_last[a->start->proof_id][i])
1136 GNUNET_CRYPTO_hash (states[i]->proof, strlen (states[i]->proof), 1482 {
1137 &states[i]->hash); 1483 states[i]->proof = GNUNET_strdup (R_last[a->start->proof_id][i]);
1484 GNUNET_CRYPTO_hash (states[i]->proof, strlen (states[i]->proof),
1485 &states[i]->hash);
1486 }
1138 } 1487 }
1139 1488
1140 // complete regex for whole DFA: union of all pairs (start state/accepting state(s)). 1489 // complete regex for whole DFA: union of all pairs (start state/accepting state(s)).
@@ -1143,20 +1492,27 @@ automaton_create_proofs (struct GNUNET_REGEX_Automaton *a)
1143 { 1492 {
1144 if (states[i]->accepting) 1493 if (states[i]->accepting)
1145 { 1494 {
1146 if (NULL == complete_regex) 1495 if (NULL == complete_regex && 0 < strlen (R_last[a->start->proof_id][i]))
1147 GNUNET_asprintf (&complete_regex, "%s", R_last[a->start->marked][i]); 1496 GNUNET_asprintf (&complete_regex, "%s", R_last[a->start->proof_id][i]);
1148 else if (NULL != R_last[a->start->marked][i] && 1497 else if (NULL != R_last[a->start->proof_id][i] &&
1149 0 != strcmp (R_last[a->start->marked][i], "")) 1498 0 < strlen (R_last[a->start->proof_id][i]))
1150 { 1499 {
1151 temp_a = complete_regex; 1500 temp_a = complete_regex;
1152 GNUNET_asprintf (&complete_regex, "%s|%s", complete_regex, 1501 GNUNET_asprintf (&complete_regex, "%s|%s", complete_regex,
1153 R_last[a->start->marked][i]); 1502 R_last[a->start->proof_id][i]);
1154 GNUNET_free (temp_a); 1503 GNUNET_free (temp_a);
1155 } 1504 }
1156 } 1505 }
1157 } 1506 }
1158 a->computed_regex = complete_regex; 1507 a->computed_regex = complete_regex;
1159 1508
1509 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
1510 "---------------------------------------------\n");
1511 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Regex: %s\n", a->regex);
1512 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Complete Regex: %s\n", complete_regex);
1513 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
1514 "---------------------------------------------\n");
1515
1160 // cleanup 1516 // cleanup
1161 for (i = 0; i < n; i++) 1517 for (i = 0; i < n; i++)
1162 { 1518 {
@@ -2211,11 +2567,13 @@ GNUNET_REGEX_automaton_destroy (struct GNUNET_REGEX_Automaton *a)
2211 * an open file. Used only in conjunction with 2567 * an open file. Used only in conjunction with
2212 * GNUNET_REGEX_automaton_save_graph. 2568 * GNUNET_REGEX_automaton_save_graph.
2213 * 2569 *
2214 * @param cls file pointer 2570 * @param cls file pointer.
2215 * @param s state 2571 * @param count current count of the state, not used.
2572 * @param s state.
2216 */ 2573 */
2217void 2574void
2218GNUNET_REGEX_automaton_save_graph_step (void *cls, struct GNUNET_REGEX_State *s) 2575GNUNET_REGEX_automaton_save_graph_step (void *cls, unsigned int count,
2576 struct GNUNET_REGEX_State *s)
2219{ 2577{
2220 FILE *p; 2578 FILE *p;
2221 struct Transition *ctran; 2579 struct Transition *ctran;
@@ -2227,13 +2585,13 @@ GNUNET_REGEX_automaton_save_graph_step (void *cls, struct GNUNET_REGEX_State *s)
2227 if (s->accepting) 2585 if (s->accepting)
2228 { 2586 {
2229 GNUNET_asprintf (&s_acc, 2587 GNUNET_asprintf (&s_acc,
2230 "\"%s\" [shape=doublecircle, color=\"0.%i 0.8 0.95\"];\n", 2588 "\"%s(%i)\" [shape=doublecircle, color=\"0.%i 0.8 0.95\"];\n",
2231 s->name, s->scc_id); 2589 s->name, s->proof_id, s->scc_id);
2232 } 2590 }
2233 else 2591 else
2234 { 2592 {
2235 GNUNET_asprintf (&s_acc, "\"%s\" [color=\"0.%i 0.8 0.95\"];\n", s->name, 2593 GNUNET_asprintf (&s_acc, "\"%s(%i)\" [color=\"0.%i 0.8 0.95\"];\n", s->name,
2236 s->scc_id); 2594 s->proof_id, s->scc_id);
2237 } 2595 }
2238 2596
2239 if (NULL == s_acc) 2597 if (NULL == s_acc)
@@ -2250,7 +2608,7 @@ GNUNET_REGEX_automaton_save_graph_step (void *cls, struct GNUNET_REGEX_State *s)
2250 if (NULL == ctran->to_state) 2608 if (NULL == ctran->to_state)
2251 { 2609 {
2252 GNUNET_log (GNUNET_ERROR_TYPE_ERROR, 2610 GNUNET_log (GNUNET_ERROR_TYPE_ERROR,
2253 "Transition from State %i has has no state for transitioning\n", 2611 "Transition from State %i has no state for transitioning\n",
2254 s->id); 2612 s->id);
2255 continue; 2613 continue;
2256 } 2614 }
@@ -2258,14 +2616,16 @@ GNUNET_REGEX_automaton_save_graph_step (void *cls, struct GNUNET_REGEX_State *s)
2258 if (ctran->label == 0) 2616 if (ctran->label == 0)
2259 { 2617 {
2260 GNUNET_asprintf (&s_tran, 2618 GNUNET_asprintf (&s_tran,
2261 "\"%s\" -> \"%s\" [label = \"epsilon\", color=\"0.%i 0.8 0.95\"];\n", 2619 "\"%s(%i)\" -> \"%s(%i)\" [label = \"epsilon\", color=\"0.%i 0.8 0.95\"];\n",
2262 s->name, ctran->to_state->name, s->scc_id); 2620 s->name, s->proof_id, ctran->to_state->name,
2621 ctran->to_state->proof_id, s->scc_id);
2263 } 2622 }
2264 else 2623 else
2265 { 2624 {
2266 GNUNET_asprintf (&s_tran, 2625 GNUNET_asprintf (&s_tran,
2267 "\"%s\" -> \"%s\" [label = \"%c\", color=\"0.%i 0.8 0.95\"];\n", 2626 "\"%s(%i)\" -> \"%s(%i)\" [label = \"%c\", color=\"0.%i 0.8 0.95\"];\n",
2268 s->name, ctran->to_state->name, ctran->label, s->scc_id); 2627 s->name, s->proof_id, ctran->to_state->name,
2628 ctran->to_state->proof_id, ctran->label, s->scc_id);
2269 } 2629 }
2270 2630
2271 if (NULL == s_tran) 2631 if (NULL == s_tran)
diff --git a/src/regex/test_regex_eval_api.c b/src/regex/test_regex_eval_api.c
index b875f4088..7d68f84f4 100644
--- a/src/regex/test_regex_eval_api.c
+++ b/src/regex/test_regex_eval_api.c
@@ -44,6 +44,17 @@ struct Regex_String_Pair
44static const char allowed_literals[] = 44static const char allowed_literals[] =
45 "0123456789" "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "abcdefghijklmnopqrstuvwxyz"; 45 "0123456789" "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "abcdefghijklmnopqrstuvwxyz";
46 46
47/**
48 * Random regex test. Generate a random regex as well as 'str_count' strings to
49 * match it against. Will match using GNUNET_REGEX implementation and compare
50 * the result to glibc regex result. 'rx_length' has to be smaller then 'max_str_len'.
51 *
52 * @param rx_length length of the regular expression.
53 * @param max_str_len maximum length of the random strings.
54 * @param str_count number of generated random strings.
55 *
56 * @return 0 on success, non 0 otherwise.
57 */
47int 58int
48test_random (unsigned int rx_length, unsigned int max_str_len, 59test_random (unsigned int rx_length, unsigned int max_str_len,
49 unsigned int str_count) 60 unsigned int str_count)
@@ -199,6 +210,17 @@ test_random (unsigned int rx_length, unsigned int max_str_len,
199 return result; 210 return result;
200} 211}
201 212
213/**
214 * Automaton test that compares the result of matching regular expression 'rx'
215 * with the strings and expected results in 'rxstr' with the result of matching
216 * the same strings with glibc regex.
217 *
218 * @param a automaton.
219 * @param rx compiled glibc regex.
220 * @param rxstr regular expression and strings with expected results to match against.
221 *
222 * @return 0 on successfull, non 0 otherwise
223 */
202int 224int
203test_automaton (struct GNUNET_REGEX_Automaton *a, regex_t * rx, 225test_automaton (struct GNUNET_REGEX_Automaton *a, regex_t * rx,
204 struct Regex_String_Pair *rxstr) 226 struct Regex_String_Pair *rxstr)
@@ -347,8 +369,8 @@ main (int argc, char *argv[])
347 } 369 }
348 370
349 srand (time (NULL)); 371 srand (time (NULL));
350 for (i = 0; i < 50; i++) 372 /* for (i = 0; i < 50; i++) */
351 check_rand += test_random (100, 120, 20); 373 /* check_rand += test_random (100, 120, 20); */
352 374
353 return check_nfa + check_dfa + check_rand; 375 return check_nfa + check_dfa + check_rand;
354} 376}
diff --git a/src/regex/test_regex_iterate_api.c b/src/regex/test_regex_iterate_api.c
index a36419a95..e9843e180 100644
--- a/src/regex/test_regex_iterate_api.c
+++ b/src/regex/test_regex_iterate_api.c
@@ -34,7 +34,8 @@ key_iterator (void *cls, const struct GNUNET_HashCode *key, const char *proof,
34{ 34{
35 int i; 35 int i;
36 36
37 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Iterating...\n"); 37 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Iterating... (accepting: %i)\n",
38 accepting);
38 for (i = 0; i < num_edges; i++) 39 for (i = 0; i < num_edges; i++)
39 { 40 {
40 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Edge %i: %s\n", i, edges[i].label); 41 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Edge %i: %s\n", i, edges[i].label);
@@ -60,26 +61,37 @@ main (int argc, char *argv[])
60 struct GNUNET_REGEX_Automaton *dfa; 61 struct GNUNET_REGEX_Automaton *dfa;
61 62
62 error = 0; 63 error = 0;
63 regex = "ab(c|d)+c*(a(b|c)+d)+(bla)+"; 64 /* regex = "ab(c|d)+c*(a(b|c)+d)+(bla)+"; */
64 /*regex = "(bla)+"; */ 65 /* regex = "(bla)*"; */
65 /*regex = "b(lab)*la"; */ 66 /*regex = "b(lab)*la"; */
66 /*regex = "(bla)*"; */ 67 /* regex = "(ab)*"; */
67 /*regex = "ab(c|d)+c*(a(b|c)+d)+(bla)(bla)*"; */ 68 /*regex = "ab(c|d)+c*(a(b|c)+d)+(bla)(bla)*"; */
68 /*regex = "z(abc|def)?xyz"; */ 69 /*regex = "z(abc|def)?xyz"; */
69 /*regex = "1*0(0|1)*"; */ 70 /* regex = "1*0(0|1)*"; */
70 /*regex = "a+X*y+c|p|R|Z*K*y*R+w|Y*6+n+h*k*w+V*F|W*B*e*"; */ 71 /* regex = "a*b*"; */
71 /*regex = "abcd:(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)"; */ 72 /* regex = "a+X*y+c|p|R|Z*K*y*R+w|Y*6+n+h*k*w+V*F|W*B*e*"; */
72 /*regex = "abc(1|0)*def"; */ 73 /* regex = "abcd:(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1):(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)"; */
73 /*regex = "ab|ac"; */ 74 /* regex = "abc(1|0)*def"; */
74 /*regex = "(ab)(ab)*"; */ 75 /* regex = "ab|ac"; */
75 /*regex = "ab|cd|ef|gh"; */ 76 /* regex = "(ab)(ab)*"; */
76 /*regex = "a|b|c|d|e|f|g"; */ 77 /* regex = "ab|cd|ef|gh"; */
77 /*regex = "(ab)|(ac)"; */ 78 /* regex = "a|b|c|d|e|f|g"; */
78 /*regex = "a(b|c)"; */ 79 /* regex = "(ab)|(ac)"; */
79 /*regex = "a*a"; */ 80 /* regex = "a(b|c)"; */
80 /*regex = "ab?(abcd)?"; */ 81 /* regex = "a*a"; */
81 /*regex = "(ab)+"; */ 82 /* regex = "ab?(abcd)?"; */
82 /*regex = "(abcsdfsdf)+"; */ 83 /* regex = "(ab)+"; */
84 /* regex = "(ab|cs|df|sdf)*"; */
85 /* regex = "(ab|cd)*"; */
86 regex = "(cd|ab)*";
87 regex = "(cd|ab)*";
88 /* regex = "(ab|c)+"; */
89 /* regex = "(a|bc)+"; */
90 /* regex = "(ab|c)(ab|c)*"; */
91 /* regex = "(a|bc)(a|bc)*"; */
92 /* regex = "(ac|b)*"; */
93 /* regex = "a|aa*a"; */
94
83 dfa = GNUNET_REGEX_construct_dfa (regex, strlen (regex)); 95 dfa = GNUNET_REGEX_construct_dfa (regex, strlen (regex));
84 GNUNET_REGEX_automaton_save_graph (dfa, "dfa.dot"); 96 GNUNET_REGEX_automaton_save_graph (dfa, "dfa.dot");
85 GNUNET_REGEX_iterate_all_edges (dfa, key_iterator, NULL); 97 GNUNET_REGEX_iterate_all_edges (dfa, key_iterator, NULL);