diff options
author | Bart Polot <bart@net.in.tum.de> | 2013-09-26 12:45:31 +0000 |
---|---|---|
committer | Bart Polot <bart@net.in.tum.de> | 2013-09-26 12:45:31 +0000 |
commit | 6612e2a579c9e42bc679df285edb193ed6d974dd (patch) | |
tree | 03dd758210c39a5e72e40041411ec266d46747d5 /src/regex | |
parent | 19d15fe2f575f4f6db2410d9de3ffa4de93a837b (diff) | |
download | gnunet-6612e2a579c9e42bc679df285edb193ed6d974dd.tar.gz gnunet-6612e2a579c9e42bc679df285edb193ed6d974dd.zip |
Add REGEX_INTERNAL_iterate_reachable_edges which only reveals edges reachable from
states with proof longer or equal to REGEX_INITIAL_BYTES
Diffstat (limited to 'src/regex')
-rw-r--r-- | src/regex/perf-regex.c | 3 | ||||
-rw-r--r-- | src/regex/regex_internal.c | 201 | ||||
-rw-r--r-- | src/regex/regex_internal_lib.h | 16 |
3 files changed, 208 insertions, 12 deletions
diff --git a/src/regex/perf-regex.c b/src/regex/perf-regex.c index c2ec7441e..163cacd1e 100644 --- a/src/regex/perf-regex.c +++ b/src/regex/perf-regex.c | |||
@@ -109,7 +109,10 @@ main (int argc, char *const *argv) | |||
109 | size, | 109 | size, |
110 | regex); | 110 | regex); |
111 | dfa = REGEX_INTERNAL_construct_dfa (regex, size, compression); | 111 | dfa = REGEX_INTERNAL_construct_dfa (regex, size, compression); |
112 | printf ("********* ALL EDGES *********'\n"); | ||
112 | REGEX_INTERNAL_iterate_all_edges (dfa, &print_edge, NULL); | 113 | REGEX_INTERNAL_iterate_all_edges (dfa, &print_edge, NULL); |
114 | printf ("\n\n********* REACHABLE EDGES *********'\n"); | ||
115 | REGEX_INTERNAL_iterate_reachable_edges (dfa, &print_edge, NULL); | ||
113 | REGEX_INTERNAL_automaton_destroy (dfa); | 116 | REGEX_INTERNAL_automaton_destroy (dfa); |
114 | GNUNET_free (buffer); | 117 | GNUNET_free (buffer); |
115 | REGEX_TEST_free_from_file (regexes); | 118 | REGEX_TEST_free_from_file (regexes); |
diff --git a/src/regex/regex_internal.c b/src/regex/regex_internal.c index eac420835..02bf5dc6f 100644 --- a/src/regex/regex_internal.c +++ b/src/regex/regex_internal.c | |||
@@ -3338,7 +3338,6 @@ iterate_initial_edge (const unsigned int min_len, const unsigned int max_len, | |||
3338 | char *consumed_string, struct REGEX_INTERNAL_State *state, | 3338 | char *consumed_string, struct REGEX_INTERNAL_State *state, |
3339 | REGEX_INTERNAL_KeyIterator iterator, void *iterator_cls) | 3339 | REGEX_INTERNAL_KeyIterator iterator, void *iterator_cls) |
3340 | { | 3340 | { |
3341 | unsigned int i; | ||
3342 | char *temp; | 3341 | char *temp; |
3343 | struct REGEX_INTERNAL_Transition *t; | 3342 | struct REGEX_INTERNAL_Transition *t; |
3344 | unsigned int num_edges = state->transition_count; | 3343 | unsigned int num_edges = state->transition_count; |
@@ -3360,12 +3359,7 @@ iterate_initial_edge (const unsigned int min_len, const unsigned int max_len, | |||
3360 | { | 3359 | { |
3361 | if (state->proof != NULL && 0 != strcmp (consumed_string, state->proof)) | 3360 | if (state->proof != NULL && 0 != strcmp (consumed_string, state->proof)) |
3362 | { | 3361 | { |
3363 | for (i = 0, t = state->transitions_head; NULL != t && i < num_edges; | 3362 | (void) state_get_edges (state, edges); |
3364 | t = t->next, i++) | ||
3365 | { | ||
3366 | edges[i].label = t->label; | ||
3367 | edges[i].destination = t->to_state->hash; | ||
3368 | } | ||
3369 | GNUNET_CRYPTO_hash (consumed_string, strlen (consumed_string), &hash); | 3363 | GNUNET_CRYPTO_hash (consumed_string, strlen (consumed_string), &hash); |
3370 | iterator (iterator_cls, &hash, consumed_string, state->accepting, | 3364 | iterator (iterator_cls, &hash, consumed_string, state->accepting, |
3371 | num_edges, edges); | 3365 | num_edges, edges); |
@@ -3385,7 +3379,7 @@ iterate_initial_edge (const unsigned int min_len, const unsigned int max_len, | |||
3385 | GNUNET_free (temp); | 3379 | GNUNET_free (temp); |
3386 | } | 3380 | } |
3387 | } | 3381 | } |
3388 | else if (max_len < cur_len) | 3382 | else /* cur_len > max_len */ |
3389 | { | 3383 | { |
3390 | /* Case where the concatenated labels are longer than max_len, then split. */ | 3384 | /* Case where the concatenated labels are longer than max_len, then split. */ |
3391 | edge[0].label = &consumed_string[max_len]; | 3385 | edge[0].label = &consumed_string[max_len]; |
@@ -3425,8 +3419,8 @@ iterate_initial_edge (const unsigned int min_len, const unsigned int max_len, | |||
3425 | */ | 3419 | */ |
3426 | void | 3420 | void |
3427 | REGEX_INTERNAL_iterate_all_edges (struct REGEX_INTERNAL_Automaton *a, | 3421 | REGEX_INTERNAL_iterate_all_edges (struct REGEX_INTERNAL_Automaton *a, |
3428 | REGEX_INTERNAL_KeyIterator iterator, | 3422 | REGEX_INTERNAL_KeyIterator iterator, |
3429 | void *iterator_cls) | 3423 | void *iterator_cls) |
3430 | { | 3424 | { |
3431 | struct REGEX_INTERNAL_State *s; | 3425 | struct REGEX_INTERNAL_State *s; |
3432 | 3426 | ||
@@ -3437,19 +3431,202 @@ REGEX_INTERNAL_iterate_all_edges (struct REGEX_INTERNAL_Automaton *a, | |||
3437 | 3431 | ||
3438 | num_edges = state_get_edges (s, edges); | 3432 | num_edges = state_get_edges (s, edges); |
3439 | if ( ( (NULL != s->proof) && | 3433 | if ( ( (NULL != s->proof) && |
3440 | (GNUNET_REGEX_INITIAL_BYTES <= strlen (s->proof)) ) || s->accepting) | 3434 | (0 <= strlen (s->proof)) ) || s->accepting) |
3441 | iterator (iterator_cls, &s->hash, s->proof, | 3435 | iterator (iterator_cls, &s->hash, s->proof, |
3442 | s->accepting, | 3436 | s->accepting, |
3443 | num_edges, edges); | 3437 | num_edges, edges); |
3444 | s->marked = GNUNET_NO; | 3438 | s->marked = GNUNET_NO; |
3445 | } | 3439 | } |
3446 | 3440 | ||
3447 | iterate_initial_edge (1, | 3441 | iterate_initial_edge (GNUNET_REGEX_INITIAL_BYTES, |
3448 | GNUNET_REGEX_INITIAL_BYTES, | 3442 | GNUNET_REGEX_INITIAL_BYTES, |
3449 | NULL, a->start, | 3443 | NULL, a->start, |
3450 | iterator, iterator_cls); | 3444 | iterator, iterator_cls); |
3445 | } | ||
3446 | |||
3447 | /** | ||
3448 | * Struct to hold all the relevant state information in the HashMap. | ||
3449 | * | ||
3450 | * Contains the same info as the Regex Iterator parametes except the key, | ||
3451 | * which comes directly from the HashMap iterator. | ||
3452 | */ | ||
3453 | struct temporal_state_store { | ||
3454 | int reachable; | ||
3455 | char *proof; | ||
3456 | int accepting; | ||
3457 | int num_edges; | ||
3458 | struct REGEX_BLOCK_Edge *edges; | ||
3459 | }; | ||
3460 | |||
3461 | |||
3462 | /** | ||
3463 | * Store regex iterator and cls in one place to pass to the hashmap iterator. | ||
3464 | */ | ||
3465 | struct client_iterator { | ||
3466 | REGEX_INTERNAL_KeyIterator iterator; | ||
3467 | void *iterator_cls; | ||
3468 | }; | ||
3469 | |||
3470 | |||
3471 | /** | ||
3472 | * Iterator over all edges of a dfa. Stores all of them in a HashMap | ||
3473 | * for later reachability marking. | ||
3474 | * | ||
3475 | * @param cls Closure (HashMap) | ||
3476 | * @param key hash for current state. | ||
3477 | * @param proof proof for current state | ||
3478 | * @param accepting GNUNET_YES if this is an accepting state, GNUNET_NO if not. | ||
3479 | * @param num_edges number of edges leaving current state. | ||
3480 | * @param edges edges leaving current state. | ||
3481 | */ | ||
3482 | static void | ||
3483 | store_all_states (void *cls, | ||
3484 | const struct GNUNET_HashCode *key, | ||
3485 | const char *proof, | ||
3486 | int accepting, | ||
3487 | unsigned int num_edges, | ||
3488 | const struct REGEX_BLOCK_Edge *edges) | ||
3489 | { | ||
3490 | struct GNUNET_CONTAINER_MultiHashMap *hm = cls; | ||
3491 | struct temporal_state_store *tmp; | ||
3492 | size_t edges_size; | ||
3493 | |||
3494 | tmp = GNUNET_new (struct temporal_state_store); | ||
3495 | tmp->reachable = GNUNET_NO; | ||
3496 | tmp->proof = GNUNET_strdup (proof); | ||
3497 | tmp->accepting = accepting; | ||
3498 | tmp->num_edges = num_edges; | ||
3499 | edges_size = sizeof (struct REGEX_BLOCK_Edge) * num_edges; | ||
3500 | tmp->edges = GNUNET_malloc (edges_size); | ||
3501 | memcpy(tmp->edges, edges, edges_size); | ||
3502 | GNUNET_CONTAINER_multihashmap_put (hm, key, tmp, | ||
3503 | GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_FAST); | ||
3504 | } | ||
3505 | |||
3506 | |||
3507 | /** | ||
3508 | * Mark state as reachable and call recursively on all its edges. | ||
3509 | * | ||
3510 | * If already marked as reachable, do nothing. | ||
3511 | * | ||
3512 | * @param state State to mark as reachable. | ||
3513 | * @param hm HashMap which stores all the states indexed by key. | ||
3514 | */ | ||
3515 | static void | ||
3516 | mark_as_reachable (struct temporal_state_store *state, | ||
3517 | struct GNUNET_CONTAINER_MultiHashMap *hm) | ||
3518 | { | ||
3519 | struct temporal_state_store *child; | ||
3520 | unsigned int i; | ||
3521 | |||
3522 | if (GNUNET_YES == state->reachable) | ||
3523 | /* visited */ | ||
3524 | return; | ||
3525 | |||
3526 | state->reachable = GNUNET_YES; | ||
3527 | for (i = 0; i < state->num_edges; i++) | ||
3528 | { | ||
3529 | child = GNUNET_CONTAINER_multihashmap_get (hm, | ||
3530 | &state->edges[i].destination); | ||
3531 | if (NULL == child) | ||
3532 | { | ||
3533 | GNUNET_break (0); | ||
3534 | continue; | ||
3535 | } | ||
3536 | mark_as_reachable (child, hm); | ||
3537 | } | ||
3538 | } | ||
3539 | |||
3540 | |||
3541 | /** | ||
3542 | * Iterator over hash map entries to mark the ones that are reachable. | ||
3543 | * | ||
3544 | * @param cls closure | ||
3545 | * @param key current key code | ||
3546 | * @param value value in the hash map | ||
3547 | * @return #GNUNET_YES if we should continue to iterate, | ||
3548 | * #GNUNET_NO if not. | ||
3549 | */ | ||
3550 | static int | ||
3551 | reachability_iterator (void *cls, | ||
3552 | const struct GNUNET_HashCode *key, | ||
3553 | void *value) | ||
3554 | { | ||
3555 | struct GNUNET_CONTAINER_MultiHashMap *hm = cls; | ||
3556 | struct temporal_state_store *state = value; | ||
3557 | |||
3558 | if (GNUNET_YES == state->reachable) | ||
3559 | /* already visited and marked */ | ||
3560 | return GNUNET_YES; | ||
3561 | |||
3562 | if (GNUNET_REGEX_INITIAL_BYTES > strlen (state->proof) && | ||
3563 | GNUNET_NO == state->accepting) | ||
3564 | /* not directly reachable */ | ||
3565 | return GNUNET_YES; | ||
3566 | |||
3567 | mark_as_reachable (state, hm); | ||
3568 | return GNUNET_YES; | ||
3569 | } | ||
3570 | |||
3571 | |||
3572 | /** | ||
3573 | * Iterator over hash map entries. | ||
3574 | * Calling the callback on the ones marked as reachables. | ||
3575 | * | ||
3576 | * @param cls closure | ||
3577 | * @param key current key code | ||
3578 | * @param value value in the hash map | ||
3579 | * @return #GNUNET_YES if we should continue to iterate, | ||
3580 | * #GNUNET_NO if not. | ||
3581 | */ | ||
3582 | static int | ||
3583 | iterate_reachables (void *cls, | ||
3584 | const struct GNUNET_HashCode *key, | ||
3585 | void *value) | ||
3586 | { | ||
3587 | struct client_iterator *ci = cls; | ||
3588 | struct temporal_state_store *state = value; | ||
3589 | |||
3590 | if (GNUNET_YES == state->reachable) | ||
3591 | { | ||
3592 | ci->iterator (ci->iterator_cls, key, | ||
3593 | state->proof, state->accepting, | ||
3594 | state->num_edges, state->edges); | ||
3595 | } | ||
3596 | GNUNET_free (state->edges); | ||
3597 | GNUNET_free (state->proof); | ||
3598 | GNUNET_free (state); | ||
3599 | return GNUNET_YES; | ||
3600 | |||
3601 | } | ||
3602 | |||
3603 | /** | ||
3604 | * Iterate over all edges of automaton 'a' that are reachable from a state with | ||
3605 | * a proof of at least GNUNET_REGEX_INITIAL_BYTES characters. | ||
3606 | * | ||
3607 | * Call the iterator for each such edge. | ||
3608 | * | ||
3609 | * @param a automaton. | ||
3610 | * @param iterator iterator called for each reachable edge. | ||
3611 | * @param iterator_cls closure. | ||
3612 | */ | ||
3613 | void | ||
3614 | REGEX_INTERNAL_iterate_reachable_edges (struct REGEX_INTERNAL_Automaton *a, | ||
3615 | REGEX_INTERNAL_KeyIterator iterator, | ||
3616 | void *iterator_cls) | ||
3617 | { | ||
3618 | struct GNUNET_CONTAINER_MultiHashMap *hm; | ||
3619 | struct client_iterator ci; | ||
3620 | |||
3621 | hm = GNUNET_CONTAINER_multihashmap_create (a->state_count * 2, GNUNET_YES); | ||
3622 | ci.iterator = iterator; | ||
3623 | ci.iterator_cls = iterator_cls; | ||
3451 | 3624 | ||
3625 | REGEX_INTERNAL_iterate_all_edges (a, &store_all_states, hm); | ||
3626 | GNUNET_CONTAINER_multihashmap_iterate (hm, &reachability_iterator, hm); | ||
3627 | GNUNET_CONTAINER_multihashmap_iterate (hm, &iterate_reachables, &ci); | ||
3452 | 3628 | ||
3629 | GNUNET_CONTAINER_multihashmap_destroy (hm); | ||
3453 | } | 3630 | } |
3454 | 3631 | ||
3455 | 3632 | ||
diff --git a/src/regex/regex_internal_lib.h b/src/regex/regex_internal_lib.h index 391a33fd2..96334c1c3 100644 --- a/src/regex/regex_internal_lib.h +++ b/src/regex/regex_internal_lib.h | |||
@@ -139,6 +139,22 @@ REGEX_INTERNAL_iterate_all_edges (struct REGEX_INTERNAL_Automaton *a, | |||
139 | void *iterator_cls); | 139 | void *iterator_cls); |
140 | 140 | ||
141 | 141 | ||
142 | /** | ||
143 | * Iterate over all edges of automaton 'a' that are reachable from a state with | ||
144 | * a proof of at least GNUNET_REGEX_INITIAL_BYTES characters. | ||
145 | * | ||
146 | * Call the iterator for each such edge. | ||
147 | * | ||
148 | * @param a automaton. | ||
149 | * @param iterator iterator called for each reachable edge. | ||
150 | * @param iterator_cls closure. | ||
151 | */ | ||
152 | void | ||
153 | REGEX_INTERNAL_iterate_reachable_edges (struct REGEX_INTERNAL_Automaton *a, | ||
154 | REGEX_INTERNAL_KeyIterator iterator, | ||
155 | void *iterator_cls); | ||
156 | |||
157 | |||
142 | 158 | ||
143 | /** | 159 | /** |
144 | * Handle to store cached data about a regex announce. | 160 | * Handle to store cached data about a regex announce. |