aboutsummaryrefslogtreecommitdiff
path: root/src/regex
diff options
context:
space:
mode:
authorBart Polot <bart@net.in.tum.de>2013-09-26 12:45:31 +0000
committerBart Polot <bart@net.in.tum.de>2013-09-26 12:45:31 +0000
commit6612e2a579c9e42bc679df285edb193ed6d974dd (patch)
tree03dd758210c39a5e72e40041411ec266d46747d5 /src/regex
parent19d15fe2f575f4f6db2410d9de3ffa4de93a837b (diff)
downloadgnunet-6612e2a579c9e42bc679df285edb193ed6d974dd.tar.gz
gnunet-6612e2a579c9e42bc679df285edb193ed6d974dd.zip
Add REGEX_INTERNAL_iterate_reachable_edges which only reveals edges reachable from
states with proof longer or equal to REGEX_INITIAL_BYTES
Diffstat (limited to 'src/regex')
-rw-r--r--src/regex/perf-regex.c3
-rw-r--r--src/regex/regex_internal.c201
-rw-r--r--src/regex/regex_internal_lib.h16
3 files changed, 208 insertions, 12 deletions
diff --git a/src/regex/perf-regex.c b/src/regex/perf-regex.c
index c2ec7441e..163cacd1e 100644
--- a/src/regex/perf-regex.c
+++ b/src/regex/perf-regex.c
@@ -109,7 +109,10 @@ main (int argc, char *const *argv)
109 size, 109 size,
110 regex); 110 regex);
111 dfa = REGEX_INTERNAL_construct_dfa (regex, size, compression); 111 dfa = REGEX_INTERNAL_construct_dfa (regex, size, compression);
112 printf ("********* ALL EDGES *********'\n");
112 REGEX_INTERNAL_iterate_all_edges (dfa, &print_edge, NULL); 113 REGEX_INTERNAL_iterate_all_edges (dfa, &print_edge, NULL);
114 printf ("\n\n********* REACHABLE EDGES *********'\n");
115 REGEX_INTERNAL_iterate_reachable_edges (dfa, &print_edge, NULL);
113 REGEX_INTERNAL_automaton_destroy (dfa); 116 REGEX_INTERNAL_automaton_destroy (dfa);
114 GNUNET_free (buffer); 117 GNUNET_free (buffer);
115 REGEX_TEST_free_from_file (regexes); 118 REGEX_TEST_free_from_file (regexes);
diff --git a/src/regex/regex_internal.c b/src/regex/regex_internal.c
index eac420835..02bf5dc6f 100644
--- a/src/regex/regex_internal.c
+++ b/src/regex/regex_internal.c
@@ -3338,7 +3338,6 @@ iterate_initial_edge (const unsigned int min_len, const unsigned int max_len,
3338 char *consumed_string, struct REGEX_INTERNAL_State *state, 3338 char *consumed_string, struct REGEX_INTERNAL_State *state,
3339 REGEX_INTERNAL_KeyIterator iterator, void *iterator_cls) 3339 REGEX_INTERNAL_KeyIterator iterator, void *iterator_cls)
3340{ 3340{
3341 unsigned int i;
3342 char *temp; 3341 char *temp;
3343 struct REGEX_INTERNAL_Transition *t; 3342 struct REGEX_INTERNAL_Transition *t;
3344 unsigned int num_edges = state->transition_count; 3343 unsigned int num_edges = state->transition_count;
@@ -3360,12 +3359,7 @@ iterate_initial_edge (const unsigned int min_len, const unsigned int max_len,
3360 { 3359 {
3361 if (state->proof != NULL && 0 != strcmp (consumed_string, state->proof)) 3360 if (state->proof != NULL && 0 != strcmp (consumed_string, state->proof))
3362 { 3361 {
3363 for (i = 0, t = state->transitions_head; NULL != t && i < num_edges; 3362 (void) state_get_edges (state, edges);
3364 t = t->next, i++)
3365 {
3366 edges[i].label = t->label;
3367 edges[i].destination = t->to_state->hash;
3368 }
3369 GNUNET_CRYPTO_hash (consumed_string, strlen (consumed_string), &hash); 3363 GNUNET_CRYPTO_hash (consumed_string, strlen (consumed_string), &hash);
3370 iterator (iterator_cls, &hash, consumed_string, state->accepting, 3364 iterator (iterator_cls, &hash, consumed_string, state->accepting,
3371 num_edges, edges); 3365 num_edges, edges);
@@ -3385,7 +3379,7 @@ iterate_initial_edge (const unsigned int min_len, const unsigned int max_len,
3385 GNUNET_free (temp); 3379 GNUNET_free (temp);
3386 } 3380 }
3387 } 3381 }
3388 else if (max_len < cur_len) 3382 else /* cur_len > max_len */
3389 { 3383 {
3390 /* Case where the concatenated labels are longer than max_len, then split. */ 3384 /* Case where the concatenated labels are longer than max_len, then split. */
3391 edge[0].label = &consumed_string[max_len]; 3385 edge[0].label = &consumed_string[max_len];
@@ -3425,8 +3419,8 @@ iterate_initial_edge (const unsigned int min_len, const unsigned int max_len,
3425 */ 3419 */
3426void 3420void
3427REGEX_INTERNAL_iterate_all_edges (struct REGEX_INTERNAL_Automaton *a, 3421REGEX_INTERNAL_iterate_all_edges (struct REGEX_INTERNAL_Automaton *a,
3428 REGEX_INTERNAL_KeyIterator iterator, 3422 REGEX_INTERNAL_KeyIterator iterator,
3429 void *iterator_cls) 3423 void *iterator_cls)
3430{ 3424{
3431 struct REGEX_INTERNAL_State *s; 3425 struct REGEX_INTERNAL_State *s;
3432 3426
@@ -3437,19 +3431,202 @@ REGEX_INTERNAL_iterate_all_edges (struct REGEX_INTERNAL_Automaton *a,
3437 3431
3438 num_edges = state_get_edges (s, edges); 3432 num_edges = state_get_edges (s, edges);
3439 if ( ( (NULL != s->proof) && 3433 if ( ( (NULL != s->proof) &&
3440 (GNUNET_REGEX_INITIAL_BYTES <= strlen (s->proof)) ) || s->accepting) 3434 (0 <= strlen (s->proof)) ) || s->accepting)
3441 iterator (iterator_cls, &s->hash, s->proof, 3435 iterator (iterator_cls, &s->hash, s->proof,
3442 s->accepting, 3436 s->accepting,
3443 num_edges, edges); 3437 num_edges, edges);
3444 s->marked = GNUNET_NO; 3438 s->marked = GNUNET_NO;
3445 } 3439 }
3446 3440
3447 iterate_initial_edge (1, 3441 iterate_initial_edge (GNUNET_REGEX_INITIAL_BYTES,
3448 GNUNET_REGEX_INITIAL_BYTES, 3442 GNUNET_REGEX_INITIAL_BYTES,
3449 NULL, a->start, 3443 NULL, a->start,
3450 iterator, iterator_cls); 3444 iterator, iterator_cls);
3445}
3446
3447/**
3448 * Struct to hold all the relevant state information in the HashMap.
3449 *
3450 * Contains the same info as the Regex Iterator parametes except the key,
3451 * which comes directly from the HashMap iterator.
3452 */
3453struct temporal_state_store {
3454 int reachable;
3455 char *proof;
3456 int accepting;
3457 int num_edges;
3458 struct REGEX_BLOCK_Edge *edges;
3459};
3460
3461
3462/**
3463 * Store regex iterator and cls in one place to pass to the hashmap iterator.
3464 */
3465struct client_iterator {
3466 REGEX_INTERNAL_KeyIterator iterator;
3467 void *iterator_cls;
3468};
3469
3470
3471/**
3472 * Iterator over all edges of a dfa. Stores all of them in a HashMap
3473 * for later reachability marking.
3474 *
3475 * @param cls Closure (HashMap)
3476 * @param key hash for current state.
3477 * @param proof proof for current state
3478 * @param accepting GNUNET_YES if this is an accepting state, GNUNET_NO if not.
3479 * @param num_edges number of edges leaving current state.
3480 * @param edges edges leaving current state.
3481 */
3482static void
3483store_all_states (void *cls,
3484 const struct GNUNET_HashCode *key,
3485 const char *proof,
3486 int accepting,
3487 unsigned int num_edges,
3488 const struct REGEX_BLOCK_Edge *edges)
3489{
3490 struct GNUNET_CONTAINER_MultiHashMap *hm = cls;
3491 struct temporal_state_store *tmp;
3492 size_t edges_size;
3493
3494 tmp = GNUNET_new (struct temporal_state_store);
3495 tmp->reachable = GNUNET_NO;
3496 tmp->proof = GNUNET_strdup (proof);
3497 tmp->accepting = accepting;
3498 tmp->num_edges = num_edges;
3499 edges_size = sizeof (struct REGEX_BLOCK_Edge) * num_edges;
3500 tmp->edges = GNUNET_malloc (edges_size);
3501 memcpy(tmp->edges, edges, edges_size);
3502 GNUNET_CONTAINER_multihashmap_put (hm, key, tmp,
3503 GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_FAST);
3504}
3505
3506
3507/**
3508 * Mark state as reachable and call recursively on all its edges.
3509 *
3510 * If already marked as reachable, do nothing.
3511 *
3512 * @param state State to mark as reachable.
3513 * @param hm HashMap which stores all the states indexed by key.
3514 */
3515static void
3516mark_as_reachable (struct temporal_state_store *state,
3517 struct GNUNET_CONTAINER_MultiHashMap *hm)
3518{
3519 struct temporal_state_store *child;
3520 unsigned int i;
3521
3522 if (GNUNET_YES == state->reachable)
3523 /* visited */
3524 return;
3525
3526 state->reachable = GNUNET_YES;
3527 for (i = 0; i < state->num_edges; i++)
3528 {
3529 child = GNUNET_CONTAINER_multihashmap_get (hm,
3530 &state->edges[i].destination);
3531 if (NULL == child)
3532 {
3533 GNUNET_break (0);
3534 continue;
3535 }
3536 mark_as_reachable (child, hm);
3537 }
3538}
3539
3540
3541/**
3542 * Iterator over hash map entries to mark the ones that are reachable.
3543 *
3544 * @param cls closure
3545 * @param key current key code
3546 * @param value value in the hash map
3547 * @return #GNUNET_YES if we should continue to iterate,
3548 * #GNUNET_NO if not.
3549 */
3550static int
3551reachability_iterator (void *cls,
3552 const struct GNUNET_HashCode *key,
3553 void *value)
3554{
3555 struct GNUNET_CONTAINER_MultiHashMap *hm = cls;
3556 struct temporal_state_store *state = value;
3557
3558 if (GNUNET_YES == state->reachable)
3559 /* already visited and marked */
3560 return GNUNET_YES;
3561
3562 if (GNUNET_REGEX_INITIAL_BYTES > strlen (state->proof) &&
3563 GNUNET_NO == state->accepting)
3564 /* not directly reachable */
3565 return GNUNET_YES;
3566
3567 mark_as_reachable (state, hm);
3568 return GNUNET_YES;
3569}
3570
3571
3572/**
3573 * Iterator over hash map entries.
3574 * Calling the callback on the ones marked as reachables.
3575 *
3576 * @param cls closure
3577 * @param key current key code
3578 * @param value value in the hash map
3579 * @return #GNUNET_YES if we should continue to iterate,
3580 * #GNUNET_NO if not.
3581 */
3582static int
3583iterate_reachables (void *cls,
3584 const struct GNUNET_HashCode *key,
3585 void *value)
3586{
3587 struct client_iterator *ci = cls;
3588 struct temporal_state_store *state = value;
3589
3590 if (GNUNET_YES == state->reachable)
3591 {
3592 ci->iterator (ci->iterator_cls, key,
3593 state->proof, state->accepting,
3594 state->num_edges, state->edges);
3595 }
3596 GNUNET_free (state->edges);
3597 GNUNET_free (state->proof);
3598 GNUNET_free (state);
3599 return GNUNET_YES;
3600
3601}
3602
3603/**
3604 * Iterate over all edges of automaton 'a' that are reachable from a state with
3605 * a proof of at least GNUNET_REGEX_INITIAL_BYTES characters.
3606 *
3607 * Call the iterator for each such edge.
3608 *
3609 * @param a automaton.
3610 * @param iterator iterator called for each reachable edge.
3611 * @param iterator_cls closure.
3612 */
3613void
3614REGEX_INTERNAL_iterate_reachable_edges (struct REGEX_INTERNAL_Automaton *a,
3615 REGEX_INTERNAL_KeyIterator iterator,
3616 void *iterator_cls)
3617{
3618 struct GNUNET_CONTAINER_MultiHashMap *hm;
3619 struct client_iterator ci;
3620
3621 hm = GNUNET_CONTAINER_multihashmap_create (a->state_count * 2, GNUNET_YES);
3622 ci.iterator = iterator;
3623 ci.iterator_cls = iterator_cls;
3451 3624
3625 REGEX_INTERNAL_iterate_all_edges (a, &store_all_states, hm);
3626 GNUNET_CONTAINER_multihashmap_iterate (hm, &reachability_iterator, hm);
3627 GNUNET_CONTAINER_multihashmap_iterate (hm, &iterate_reachables, &ci);
3452 3628
3629 GNUNET_CONTAINER_multihashmap_destroy (hm);
3453} 3630}
3454 3631
3455 3632
diff --git a/src/regex/regex_internal_lib.h b/src/regex/regex_internal_lib.h
index 391a33fd2..96334c1c3 100644
--- a/src/regex/regex_internal_lib.h
+++ b/src/regex/regex_internal_lib.h
@@ -139,6 +139,22 @@ REGEX_INTERNAL_iterate_all_edges (struct REGEX_INTERNAL_Automaton *a,
139 void *iterator_cls); 139 void *iterator_cls);
140 140
141 141
142/**
143 * Iterate over all edges of automaton 'a' that are reachable from a state with
144 * a proof of at least GNUNET_REGEX_INITIAL_BYTES characters.
145 *
146 * Call the iterator for each such edge.
147 *
148 * @param a automaton.
149 * @param iterator iterator called for each reachable edge.
150 * @param iterator_cls closure.
151 */
152void
153REGEX_INTERNAL_iterate_reachable_edges (struct REGEX_INTERNAL_Automaton *a,
154 REGEX_INTERNAL_KeyIterator iterator,
155 void *iterator_cls);
156
157
142 158
143/** 159/**
144 * Handle to store cached data about a regex announce. 160 * Handle to store cached data about a regex announce.