aboutsummaryrefslogtreecommitdiff
path: root/src/dht
diff options
context:
space:
mode:
authorNathan S. Evans <evans@in.tum.de>2011-02-17 11:29:32 +0000
committerNathan S. Evans <evans@in.tum.de>2011-02-17 11:29:32 +0000
commit98e091a064450600fc82eb86c269af514056e37c (patch)
tree836c31d2611a569b4ed5dfff10e61b06966162bd /src/dht
parent9706e822ff61a85bf2353d2c233eb766ffc13323 (diff)
downloadgnunet-98e091a064450600fc82eb86c269af514056e37c.tar.gz
gnunet-98e091a064450600fc82eb86c269af514056e37c.zip
attempt to detect and debug doom spiral
Diffstat (limited to 'src/dht')
-rw-r--r--src/dht/gnunet-dht-driver.c97
1 files changed, 97 insertions, 0 deletions
diff --git a/src/dht/gnunet-dht-driver.c b/src/dht/gnunet-dht-driver.c
index 8cca5f0d6..14c119ff9 100644
--- a/src/dht/gnunet-dht-driver.c
+++ b/src/dht/gnunet-dht-driver.c
@@ -35,6 +35,9 @@
35#include "dhtlog.h" 35#include "dhtlog.h"
36#include "dht.h" 36#include "dht.h"
37 37
38/* Specific DEBUG hack, do not use normally (may leak memory, segfault, or eat children.) */
39#define ONLY_TESTING GNUNET_YES
40
38/* DEFINES */ 41/* DEFINES */
39#define VERBOSE GNUNET_NO 42#define VERBOSE GNUNET_NO
40 43
@@ -432,6 +435,21 @@ static struct GNUNET_TIME_Absolute connect_last_time;
432 */ 435 */
433static unsigned int do_find_peer; 436static unsigned int do_find_peer;
434 437
438#if ONLY_TESTING
439/**
440 * Are we currently trying to connect two peers repeatedly?
441 */
442static unsigned int repeat_connect_mode;
443
444/**
445 * Task for repeating connects.
446 */
447GNUNET_SCHEDULER_TaskIdentifier repeat_connect_task;
448
449struct GNUNET_TESTING_Daemon *repeat_connect_peer1;
450struct GNUNET_TESTING_Daemon *repeat_connect_peer2;
451#endif
452
435/** 453/**
436 * Boolean value, should replication be done by the dht 454 * Boolean value, should replication be done by the dht
437 * service (GNUNET_YES) or by the driver (GNUNET_NO) 455 * service (GNUNET_YES) or by the driver (GNUNET_NO)
@@ -2522,6 +2540,39 @@ setup_malicious_peers (void *cls, const struct GNUNET_SCHEDULER_TaskContext * tc
2522} 2540}
2523#endif 2541#endif
2524 2542
2543/* Forward declaration */
2544static void
2545topology_callback (void *cls,
2546 const struct GNUNET_PeerIdentity *first,
2547 const struct GNUNET_PeerIdentity *second,
2548 uint32_t distance,
2549 const struct GNUNET_CONFIGURATION_Handle *first_cfg,
2550 const struct GNUNET_CONFIGURATION_Handle *second_cfg,
2551 struct GNUNET_TESTING_Daemon *first_daemon,
2552 struct GNUNET_TESTING_Daemon *second_daemon,
2553 const char *emsg);
2554
2555/**
2556 * Retry connecting two specific peers until they connect,
2557 * at a specific interval. These two peers previously failed
2558 * to connect, and we hope they continue to so that we can
2559 * debug the reason they are having issues.
2560 */
2561static void
2562repeat_connect (void *cls, const struct GNUNET_SCHEDULER_TaskContext * tc)
2563{
2564
2565 GNUNET_log(GNUNET_ERROR_TYPE_WARNING, "Repeating connect attempt between %s and %s.\n", repeat_connect_peer1->shortname, repeat_connect_peer2->shortname);
2566 GNUNET_log(GNUNET_ERROR_TYPE_WARNING, "Peer 1 configuration `%s'\n", repeat_connect_peer1->cfgfile);
2567 GNUNET_log(GNUNET_ERROR_TYPE_WARNING, "Peer 2 configuration `%s'\n", repeat_connect_peer2->cfgfile);
2568
2569 GNUNET_TESTING_daemons_connect(repeat_connect_peer1,
2570 repeat_connect_peer2,
2571 GNUNET_TIME_relative_multiply(GNUNET_TIME_UNIT_SECONDS, 60),
2572 2, &topology_callback, NULL);
2573}
2574
2575
2525/** 2576/**
2526 * This function is called whenever a connection attempt is finished between two of 2577 * This function is called whenever a connection attempt is finished between two of
2527 * the started peers (started with GNUNET_TESTING_daemons_start). The total 2578 * the started peers (started with GNUNET_TESTING_daemons_start). The total
@@ -2546,6 +2597,28 @@ topology_callback (void *cls,
2546 unsigned long long duration; 2597 unsigned long long duration;
2547 unsigned long long total_duration; 2598 unsigned long long total_duration;
2548 unsigned int new_connections; 2599 unsigned int new_connections;
2600 float conns_per_sec_recent;
2601 float conns_per_sec_total;
2602
2603#if ONLY_TESTING
2604 if (repeat_connect_mode == GNUNET_YES)
2605 {
2606 if ((first_daemon == repeat_connect_peer1) &&
2607 (second_daemon == repeat_connect_peer2))
2608 {
2609 if (emsg != NULL) /* Peers failed to connect again! */
2610 return;
2611 else /* Repeat peers actually connected! */
2612 {
2613 if (repeat_connect_task != GNUNET_SCHEDULER_NO_TASK)
2614 GNUNET_SCHEDULER_cancel(repeat_connect_task);
2615 repeat_connect_peer1 = NULL;
2616 repeat_connect_peer2 = NULL;
2617 repeat_connect_mode = GNUNET_NO;
2618 }
2619 }
2620 }
2621#endif
2549 2622
2550 if (GNUNET_TIME_absolute_get_difference (connect_last_time, 2623 if (GNUNET_TIME_absolute_get_difference (connect_last_time,
2551 GNUNET_TIME_absolute_get()).rel_value > GNUNET_TIME_relative_multiply(GNUNET_TIME_UNIT_SECONDS, CONN_UPDATE_DURATION).rel_value) 2624 GNUNET_TIME_absolute_get()).rel_value > GNUNET_TIME_relative_multiply(GNUNET_TIME_UNIT_SECONDS, CONN_UPDATE_DURATION).rel_value)
@@ -2557,10 +2630,29 @@ topology_callback (void *cls,
2557 GNUNET_TIME_absolute_get()).rel_value / 1000; 2630 GNUNET_TIME_absolute_get()).rel_value / 1000;
2558 total_duration = GNUNET_TIME_absolute_get_difference (connect_start_time, 2631 total_duration = GNUNET_TIME_absolute_get_difference (connect_start_time,
2559 GNUNET_TIME_absolute_get()).rel_value / 1000; 2632 GNUNET_TIME_absolute_get()).rel_value / 1000;
2633 conns_per_sec_recent = (float)new_connections / duration;
2634 conns_per_sec_total = (float)total_connections / total_duration;
2560 GNUNET_log(GNUNET_ERROR_TYPE_WARNING, "Conns/sec in last %d seconds: %f, Conns/sec for entire duration: %f\n", CONN_UPDATE_DURATION, (float)new_connections / duration, (float)total_connections / total_duration); 2635 GNUNET_log(GNUNET_ERROR_TYPE_WARNING, "Conns/sec in last %d seconds: %f, Conns/sec for entire duration: %f\n", CONN_UPDATE_DURATION, (float)new_connections / duration, (float)total_connections / total_duration);
2561 connect_last_time = GNUNET_TIME_absolute_get(); 2636 connect_last_time = GNUNET_TIME_absolute_get();
2562 previous_connections = total_connections; 2637 previous_connections = total_connections;
2563 GNUNET_log(GNUNET_ERROR_TYPE_WARNING, "have %u total_connections\n", total_connections); 2638 GNUNET_log(GNUNET_ERROR_TYPE_WARNING, "have %u total_connections\n", total_connections);
2639#if ONLY_TESTING
2640 /* These conditions likely mean we've entered the death spiral of doom */
2641 if ((total_connections > 100000) &&
2642 (conns_per_sec_recent < 5.0) &&
2643 (conns_per_sec_total > 10.0) &&
2644 (emsg != NULL) &&
2645 (repeat_connect_mode == GNUNET_NO))
2646 {
2647 GNUNET_log(GNUNET_ERROR_TYPE_WARNING, "Entering repeat connection attempt mode!\n");
2648
2649 repeat_connect_peer1 = first_daemon;
2650 repeat_connect_peer2 = second_daemon;
2651 repeat_connect_mode = GNUNET_YES;
2652 repeat_connect_task = GNUNET_SCHEDULER_add_now(&repeat_connect, NULL);
2653 }
2654
2655#endif
2564 } 2656 }
2565 if (emsg == NULL) 2657 if (emsg == NULL)
2566 { 2658 {
@@ -2585,6 +2677,11 @@ topology_callback (void *cls,
2585#endif 2677#endif
2586 } 2678 }
2587 2679
2680#if ONLY_TESTING
2681 if (repeat_connect_mode == GNUNET_YES)
2682 return;
2683#endif
2684
2588 GNUNET_assert(peer_connect_meter != NULL); 2685 GNUNET_assert(peer_connect_meter != NULL);
2589 if (GNUNET_YES == update_meter(peer_connect_meter)) 2686 if (GNUNET_YES == update_meter(peer_connect_meter))
2590 { 2687 {