diff options
author | Nathan S. Evans <evans@in.tum.de> | 2011-02-17 11:29:32 +0000 |
---|---|---|
committer | Nathan S. Evans <evans@in.tum.de> | 2011-02-17 11:29:32 +0000 |
commit | 98e091a064450600fc82eb86c269af514056e37c (patch) | |
tree | 836c31d2611a569b4ed5dfff10e61b06966162bd /src/dht | |
parent | 9706e822ff61a85bf2353d2c233eb766ffc13323 (diff) | |
download | gnunet-98e091a064450600fc82eb86c269af514056e37c.tar.gz gnunet-98e091a064450600fc82eb86c269af514056e37c.zip |
attempt to detect and debug doom spiral
Diffstat (limited to 'src/dht')
-rw-r--r-- | src/dht/gnunet-dht-driver.c | 97 |
1 files changed, 97 insertions, 0 deletions
diff --git a/src/dht/gnunet-dht-driver.c b/src/dht/gnunet-dht-driver.c index 8cca5f0d6..14c119ff9 100644 --- a/src/dht/gnunet-dht-driver.c +++ b/src/dht/gnunet-dht-driver.c | |||
@@ -35,6 +35,9 @@ | |||
35 | #include "dhtlog.h" | 35 | #include "dhtlog.h" |
36 | #include "dht.h" | 36 | #include "dht.h" |
37 | 37 | ||
38 | /* Specific DEBUG hack, do not use normally (may leak memory, segfault, or eat children.) */ | ||
39 | #define ONLY_TESTING GNUNET_YES | ||
40 | |||
38 | /* DEFINES */ | 41 | /* DEFINES */ |
39 | #define VERBOSE GNUNET_NO | 42 | #define VERBOSE GNUNET_NO |
40 | 43 | ||
@@ -432,6 +435,21 @@ static struct GNUNET_TIME_Absolute connect_last_time; | |||
432 | */ | 435 | */ |
433 | static unsigned int do_find_peer; | 436 | static unsigned int do_find_peer; |
434 | 437 | ||
438 | #if ONLY_TESTING | ||
439 | /** | ||
440 | * Are we currently trying to connect two peers repeatedly? | ||
441 | */ | ||
442 | static unsigned int repeat_connect_mode; | ||
443 | |||
444 | /** | ||
445 | * Task for repeating connects. | ||
446 | */ | ||
447 | GNUNET_SCHEDULER_TaskIdentifier repeat_connect_task; | ||
448 | |||
449 | struct GNUNET_TESTING_Daemon *repeat_connect_peer1; | ||
450 | struct GNUNET_TESTING_Daemon *repeat_connect_peer2; | ||
451 | #endif | ||
452 | |||
435 | /** | 453 | /** |
436 | * Boolean value, should replication be done by the dht | 454 | * Boolean value, should replication be done by the dht |
437 | * service (GNUNET_YES) or by the driver (GNUNET_NO) | 455 | * service (GNUNET_YES) or by the driver (GNUNET_NO) |
@@ -2522,6 +2540,39 @@ setup_malicious_peers (void *cls, const struct GNUNET_SCHEDULER_TaskContext * tc | |||
2522 | } | 2540 | } |
2523 | #endif | 2541 | #endif |
2524 | 2542 | ||
2543 | /* Forward declaration */ | ||
2544 | static void | ||
2545 | topology_callback (void *cls, | ||
2546 | const struct GNUNET_PeerIdentity *first, | ||
2547 | const struct GNUNET_PeerIdentity *second, | ||
2548 | uint32_t distance, | ||
2549 | const struct GNUNET_CONFIGURATION_Handle *first_cfg, | ||
2550 | const struct GNUNET_CONFIGURATION_Handle *second_cfg, | ||
2551 | struct GNUNET_TESTING_Daemon *first_daemon, | ||
2552 | struct GNUNET_TESTING_Daemon *second_daemon, | ||
2553 | const char *emsg); | ||
2554 | |||
2555 | /** | ||
2556 | * Retry connecting two specific peers until they connect, | ||
2557 | * at a specific interval. These two peers previously failed | ||
2558 | * to connect, and we hope they continue to so that we can | ||
2559 | * debug the reason they are having issues. | ||
2560 | */ | ||
2561 | static void | ||
2562 | repeat_connect (void *cls, const struct GNUNET_SCHEDULER_TaskContext * tc) | ||
2563 | { | ||
2564 | |||
2565 | GNUNET_log(GNUNET_ERROR_TYPE_WARNING, "Repeating connect attempt between %s and %s.\n", repeat_connect_peer1->shortname, repeat_connect_peer2->shortname); | ||
2566 | GNUNET_log(GNUNET_ERROR_TYPE_WARNING, "Peer 1 configuration `%s'\n", repeat_connect_peer1->cfgfile); | ||
2567 | GNUNET_log(GNUNET_ERROR_TYPE_WARNING, "Peer 2 configuration `%s'\n", repeat_connect_peer2->cfgfile); | ||
2568 | |||
2569 | GNUNET_TESTING_daemons_connect(repeat_connect_peer1, | ||
2570 | repeat_connect_peer2, | ||
2571 | GNUNET_TIME_relative_multiply(GNUNET_TIME_UNIT_SECONDS, 60), | ||
2572 | 2, &topology_callback, NULL); | ||
2573 | } | ||
2574 | |||
2575 | |||
2525 | /** | 2576 | /** |
2526 | * This function is called whenever a connection attempt is finished between two of | 2577 | * This function is called whenever a connection attempt is finished between two of |
2527 | * the started peers (started with GNUNET_TESTING_daemons_start). The total | 2578 | * the started peers (started with GNUNET_TESTING_daemons_start). The total |
@@ -2546,6 +2597,28 @@ topology_callback (void *cls, | |||
2546 | unsigned long long duration; | 2597 | unsigned long long duration; |
2547 | unsigned long long total_duration; | 2598 | unsigned long long total_duration; |
2548 | unsigned int new_connections; | 2599 | unsigned int new_connections; |
2600 | float conns_per_sec_recent; | ||
2601 | float conns_per_sec_total; | ||
2602 | |||
2603 | #if ONLY_TESTING | ||
2604 | if (repeat_connect_mode == GNUNET_YES) | ||
2605 | { | ||
2606 | if ((first_daemon == repeat_connect_peer1) && | ||
2607 | (second_daemon == repeat_connect_peer2)) | ||
2608 | { | ||
2609 | if (emsg != NULL) /* Peers failed to connect again! */ | ||
2610 | return; | ||
2611 | else /* Repeat peers actually connected! */ | ||
2612 | { | ||
2613 | if (repeat_connect_task != GNUNET_SCHEDULER_NO_TASK) | ||
2614 | GNUNET_SCHEDULER_cancel(repeat_connect_task); | ||
2615 | repeat_connect_peer1 = NULL; | ||
2616 | repeat_connect_peer2 = NULL; | ||
2617 | repeat_connect_mode = GNUNET_NO; | ||
2618 | } | ||
2619 | } | ||
2620 | } | ||
2621 | #endif | ||
2549 | 2622 | ||
2550 | if (GNUNET_TIME_absolute_get_difference (connect_last_time, | 2623 | if (GNUNET_TIME_absolute_get_difference (connect_last_time, |
2551 | GNUNET_TIME_absolute_get()).rel_value > GNUNET_TIME_relative_multiply(GNUNET_TIME_UNIT_SECONDS, CONN_UPDATE_DURATION).rel_value) | 2624 | GNUNET_TIME_absolute_get()).rel_value > GNUNET_TIME_relative_multiply(GNUNET_TIME_UNIT_SECONDS, CONN_UPDATE_DURATION).rel_value) |
@@ -2557,10 +2630,29 @@ topology_callback (void *cls, | |||
2557 | GNUNET_TIME_absolute_get()).rel_value / 1000; | 2630 | GNUNET_TIME_absolute_get()).rel_value / 1000; |
2558 | total_duration = GNUNET_TIME_absolute_get_difference (connect_start_time, | 2631 | total_duration = GNUNET_TIME_absolute_get_difference (connect_start_time, |
2559 | GNUNET_TIME_absolute_get()).rel_value / 1000; | 2632 | GNUNET_TIME_absolute_get()).rel_value / 1000; |
2633 | conns_per_sec_recent = (float)new_connections / duration; | ||
2634 | conns_per_sec_total = (float)total_connections / total_duration; | ||
2560 | GNUNET_log(GNUNET_ERROR_TYPE_WARNING, "Conns/sec in last %d seconds: %f, Conns/sec for entire duration: %f\n", CONN_UPDATE_DURATION, (float)new_connections / duration, (float)total_connections / total_duration); | 2635 | GNUNET_log(GNUNET_ERROR_TYPE_WARNING, "Conns/sec in last %d seconds: %f, Conns/sec for entire duration: %f\n", CONN_UPDATE_DURATION, (float)new_connections / duration, (float)total_connections / total_duration); |
2561 | connect_last_time = GNUNET_TIME_absolute_get(); | 2636 | connect_last_time = GNUNET_TIME_absolute_get(); |
2562 | previous_connections = total_connections; | 2637 | previous_connections = total_connections; |
2563 | GNUNET_log(GNUNET_ERROR_TYPE_WARNING, "have %u total_connections\n", total_connections); | 2638 | GNUNET_log(GNUNET_ERROR_TYPE_WARNING, "have %u total_connections\n", total_connections); |
2639 | #if ONLY_TESTING | ||
2640 | /* These conditions likely mean we've entered the death spiral of doom */ | ||
2641 | if ((total_connections > 100000) && | ||
2642 | (conns_per_sec_recent < 5.0) && | ||
2643 | (conns_per_sec_total > 10.0) && | ||
2644 | (emsg != NULL) && | ||
2645 | (repeat_connect_mode == GNUNET_NO)) | ||
2646 | { | ||
2647 | GNUNET_log(GNUNET_ERROR_TYPE_WARNING, "Entering repeat connection attempt mode!\n"); | ||
2648 | |||
2649 | repeat_connect_peer1 = first_daemon; | ||
2650 | repeat_connect_peer2 = second_daemon; | ||
2651 | repeat_connect_mode = GNUNET_YES; | ||
2652 | repeat_connect_task = GNUNET_SCHEDULER_add_now(&repeat_connect, NULL); | ||
2653 | } | ||
2654 | |||
2655 | #endif | ||
2564 | } | 2656 | } |
2565 | if (emsg == NULL) | 2657 | if (emsg == NULL) |
2566 | { | 2658 | { |
@@ -2585,6 +2677,11 @@ topology_callback (void *cls, | |||
2585 | #endif | 2677 | #endif |
2586 | } | 2678 | } |
2587 | 2679 | ||
2680 | #if ONLY_TESTING | ||
2681 | if (repeat_connect_mode == GNUNET_YES) | ||
2682 | return; | ||
2683 | #endif | ||
2684 | |||
2588 | GNUNET_assert(peer_connect_meter != NULL); | 2685 | GNUNET_assert(peer_connect_meter != NULL); |
2589 | if (GNUNET_YES == update_meter(peer_connect_meter)) | 2686 | if (GNUNET_YES == update_meter(peer_connect_meter)) |
2590 | { | 2687 | { |