aboutsummaryrefslogtreecommitdiff
path: root/src/regex
diff options
context:
space:
mode:
authorBart Polot <bart@net.in.tum.de>2012-12-13 20:21:45 +0000
committerBart Polot <bart@net.in.tum.de>2012-12-13 20:21:45 +0000
commit26805033dca64c1b6f3a2ac66067e81b6e53844a (patch)
treeb3fc4ac69be18170b1ad0fa6b02f2abf4dcad3a1 /src/regex
parent09a43a3deb45aba79a0e22c2e3ed455602a0e4f9 (diff)
downloadgnunet-26805033dca64c1b6f3a2ac66067e81b6e53844a.tar.gz
gnunet-26805033dca64c1b6f3a2ac66067e81b6e53844a.zip
- new perf
Diffstat (limited to 'src/regex')
-rw-r--r--src/regex/perf-regex.c57
-rw-r--r--src/regex/regex.c128
2 files changed, 176 insertions, 9 deletions
diff --git a/src/regex/perf-regex.c b/src/regex/perf-regex.c
index 72da6f270..18c51987d 100644
--- a/src/regex/perf-regex.c
+++ b/src/regex/perf-regex.c
@@ -46,12 +46,17 @@ usage(void)
46int 46int
47main (int argc, char *const *argv) 47main (int argc, char *const *argv)
48{ 48{
49 FILE *f;
50 struct GNUNET_REGEX_Automaton* dfa; 49 struct GNUNET_REGEX_Automaton* dfa;
51 long size; 50 char **regexes;
51 char *buffer;
52 char *regex; 52 char *regex;
53 unsigned int nr;
53 int compression; 54 int compression;
55 long size;
56 size_t len;
57 FILE *f;
54 58
59 GNUNET_log_setup ("perf-regex", "DEBUG", NULL);
55 exe = argv[0]; 60 exe = argv[0];
56 if (3 != argc) 61 if (3 != argc)
57 { 62 {
@@ -67,18 +72,52 @@ main (int argc, char *const *argv)
67 } 72 }
68 fseek (f, 0, SEEK_END); 73 fseek (f, 0, SEEK_END);
69 size = ftell (f); 74 size = ftell (f);
75 fprintf (stderr, "using file %s, size %ld\n", argv[1], size);
70 fseek (f, 0, SEEK_SET); 76 fseek (f, 0, SEEK_SET);
71 regex = GNUNET_malloc (size); 77 buffer = GNUNET_malloc (size + 1);
72 if (fread (regex, sizeof(char), size, f) != size) 78 regexes = GNUNET_malloc (sizeof (char *));
79 nr = 1;
80 do
73 { 81 {
74 fprintf (stderr, "Can't read file %s\n", argv[1]); 82 if (NULL == fgets (buffer, size + 1, f))
75 usage(); 83 {
76 return 3; 84 fprintf (stderr, "Can't read file %s\n", argv[1]);
77 } 85 usage();
86 return 3;
87 }
88 len = strlen (buffer);
89 if (len < 1)
90 continue;
91 if ('\n' == buffer[len - 1])
92 {
93 len--;
94 buffer[len] = '\0';
95 }
96 if (len < 6 || strncmp (&buffer[len - 6], "(0|1)*", 6) != 0)
97 {
98 fprintf (stderr, "\nWARNING:\n");
99 fprintf (stderr, "%s (line %u) does not end in (0|1)*\n", buffer, nr);
100 }
101 else
102 {
103 buffer[len - 6] = '\0';
104 }
105 GNUNET_array_grow (regexes, nr, nr+1);
106 regexes[nr - 2] = GNUNET_strdup (buffer);
107 regexes[nr - 1] = NULL;
108 } while (ftell(f) < size);
109 GNUNET_free (buffer);
110
111 buffer = GNUNET_REGEX_combine (regexes);
112
113 GNUNET_asprintf (&regex, "GNVPN-0001-PAD(%s)(0|1)*", buffer);
114
115// fprintf (stderr, "Combined regex:\n%s\n", regex);
116// return 0;
117
78 compression = atoi (argv[2]); 118 compression = atoi (argv[2]);
79 dfa = GNUNET_REGEX_construct_dfa (regex, size, compression); 119 dfa = GNUNET_REGEX_construct_dfa (regex, size, compression);
80 GNUNET_REGEX_automaton_destroy (dfa); 120 GNUNET_REGEX_automaton_destroy (dfa);
81 GNUNET_free (regex);
82 return 0; 121 return 0;
83} 122}
84 123
diff --git a/src/regex/regex.c b/src/regex/regex.c
index 511aa4141..19eea14d5 100644
--- a/src/regex/regex.c
+++ b/src/regex/regex.c
@@ -3052,3 +3052,131 @@ GNUNET_REGEX_ipv6toregex (const struct in6_addr *ipv6, unsigned int prefixlen,
3052 if (prefixlen < 128) 3052 if (prefixlen < 128)
3053 strcat (rxstr, "(0|1)+"); 3053 strcat (rxstr, "(0|1)+");
3054} 3054}
3055
3056
3057struct RegexCombineCtx {
3058 struct RegexCombineCtx *next;
3059 struct RegexCombineCtx *prev;
3060
3061 struct RegexCombineCtx *head;
3062 struct RegexCombineCtx *tail;
3063
3064 char *s;
3065};
3066
3067
3068static char *
3069regex_combine (struct RegexCombineCtx *ctx)
3070{
3071 struct RegexCombineCtx *p;
3072 size_t len;
3073 char *regex;
3074 char *tmp;
3075 char *s;
3076
3077 if (NULL != ctx->s)
3078 GNUNET_asprintf (&regex, "%s(", ctx->s);
3079 else
3080 regex = GNUNET_strdup ("(");
3081 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "prefix: %s\n", regex);
3082
3083 for (p = ctx->head; NULL != p; p = p->next)
3084 {
3085 s = regex_combine (p);
3086 GNUNET_asprintf (&tmp, "%s%s|", regex, s);
3087 GNUNET_free_non_null (s);
3088 GNUNET_free_non_null (regex);
3089 regex = tmp;
3090 }
3091 len = strlen (regex);
3092 if (1 == len)
3093 return GNUNET_strdup ("");
3094
3095 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "pre-partial: %s\n", regex);
3096 if ('|' == regex[len - 1])
3097 regex[len - 1] = ')';
3098 if ('(' == regex[len - 1])
3099 regex[len - 1] = '\0';
3100
3101 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "partial: %s\n", regex);
3102 return regex;
3103}
3104
3105static void
3106regex_add (struct RegexCombineCtx *ctx, const char *regex)
3107{
3108 struct RegexCombineCtx *p;
3109 const char *rest;
3110
3111 rest = &regex[1];
3112 for (p = ctx->head; NULL != p; p = p->next)
3113 {
3114 if (p->s[0] == regex[0])
3115 {
3116 if (1 == strlen(p->s))
3117 {
3118 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "common char %s\n", p->s);
3119 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "adding %s\n", rest);
3120 regex_add (p, rest);
3121 }
3122 else
3123 {
3124 struct RegexCombineCtx *new;
3125 new = GNUNET_malloc (sizeof (struct RegexCombineCtx));
3126 new->s = GNUNET_strdup (&p->s[1]);
3127 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " p has now %s\n", p->s);
3128 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " p will have %.1s\n", p->s);
3129 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " regex is %s\n", regex);
3130 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " new has now %s\n", new->s);
3131 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " rest is now %s\n", rest);
3132 p->s[1] = '\0'; /* dont realloc */
3133 GNUNET_CONTAINER_DLL_insert (p->head, p->tail, new);
3134 regex_add (p, rest);
3135 }
3136 return;
3137 }
3138 }
3139 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " no match\n");
3140 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " new state %s\n", regex);
3141 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, " under %s\n", ctx->s);
3142 p = GNUNET_malloc (sizeof (struct RegexCombineCtx));
3143 p->s = GNUNET_strdup (regex);
3144 GNUNET_CONTAINER_DLL_insert (ctx->head, ctx->tail, p);
3145}
3146/*
3147static void
3148debug (struct RegexCombineCtx *ctx, int lvl)
3149{
3150 struct RegexCombineCtx *p;
3151 unsigned int i;
3152
3153 for (i = 0; i < lvl; i++) fprintf (stderr, " ");
3154 fprintf (stderr, "%s\n", ctx->s);
3155
3156 for (p = ctx->head; NULL != p; p = p->next)
3157 {
3158 debug (p, lvl + 2);
3159 }
3160}*/
3161
3162char *
3163GNUNET_REGEX_combine (char * const regexes[])
3164{
3165 unsigned int i;
3166 char *combined;
3167 const char *current;
3168 struct RegexCombineCtx *ctx;
3169
3170 ctx = GNUNET_malloc (sizeof (struct RegexCombineCtx));
3171 for (i = 0; regexes[i]; i++)
3172 {
3173 current = regexes[i];
3174 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Regex %u: %s\n", i, current);
3175 regex_add (ctx, current);
3176 }
3177 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "\nCombining...\n");
3178
3179 combined = regex_combine (ctx);
3180
3181 return combined;
3182}