aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorSafey A.Halim <safey.allah@gmail.com>2009-12-08 08:59:49 +0000
committerSafey A.Halim <safey.allah@gmail.com>2009-12-08 08:59:49 +0000
commit1253be546cc47789197480a82975aa65ad973b0d (patch)
tree16865dc4d6e4f94a05cebd43d148c7f7a8ebcaea /src
parentc73a3e5fafa6a55347642503ed77de7977def1cb (diff)
downloadgnunet-1253be546cc47789197480a82975aa65ad973b0d.tar.gz
gnunet-1253be546cc47789197480a82975aa65ad973b0d.zip
Arm Exponential backoff
Diffstat (limited to 'src')
-rw-r--r--src/arm/gnunet-service-arm.c431
1 files changed, 317 insertions, 114 deletions
diff --git a/src/arm/gnunet-service-arm.c b/src/arm/gnunet-service-arm.c
index cc9e2cd27..f525750cf 100644
--- a/src/arm/gnunet-service-arm.c
+++ b/src/arm/gnunet-service-arm.c
@@ -42,6 +42,7 @@
42#include "gnunet_os_lib.h" 42#include "gnunet_os_lib.h"
43#include "gnunet_protocols.h" 43#include "gnunet_protocols.h"
44#include "gnunet_service_lib.h" 44#include "gnunet_service_lib.h"
45#include "gnunet_signal_lib.h"
45#include "arm.h" 46#include "arm.h"
46 47
47 48
@@ -62,6 +63,13 @@
62 */ 63 */
63#define CHECK_TIMEOUT GNUNET_TIME_UNIT_MINUTES 64#define CHECK_TIMEOUT GNUNET_TIME_UNIT_MINUTES
64 65
66
67/**
68 * Threshold after which exponential backoff shouldn't increase
69 */
70#define EXPONENTIAL_BACKOFF_THRESHOLD 8
71
72
65/** 73/**
66 * List of our services. 74 * List of our services.
67 */ 75 */
@@ -123,6 +131,12 @@ struct ServiceList
123 */ 131 */
124 time_t mtime; 132 time_t mtime;
125 133
134 /* Process exponential backoff time */
135 struct GNUNET_TIME_Relative backoff;
136
137 /* Absolute time at which the process is scheduled to restart in case of death */
138 struct GNUNET_TIME_Absolute restartAt;
139
126 /** 140 /**
127 * Reference counter (counts how many times we've been 141 * Reference counter (counts how many times we've been
128 * asked to start the service). We only actually stop 142 * asked to start the service). We only actually stop
@@ -157,6 +171,7 @@ static char *prefix_command;
157 */ 171 */
158static int in_shutdown; 172static int in_shutdown;
159 173
174
160/** 175/**
161 * Handle to our server instance. Our server is a bit special in that 176 * Handle to our server instance. Our server is a bit special in that
162 * its service is not immediately stopped once we get a shutdown 177 * its service is not immediately stopped once we get a shutdown
@@ -175,8 +190,7 @@ static struct GNUNET_SERVER_Handle *server;
175 * @param cls closure, NULL if we need to self-restart 190 * @param cls closure, NULL if we need to self-restart
176 * @param tc context 191 * @param tc context
177 */ 192 */
178static void 193static void maint (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc);
179maint (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc);
180 194
181 195
182/** 196/**
@@ -197,7 +211,7 @@ write_result (void *cls, size_t size, void *buf)
197 { 211 {
198 GNUNET_log (GNUNET_ERROR_TYPE_WARNING, 212 GNUNET_log (GNUNET_ERROR_TYPE_WARNING,
199 _("Could not send status result to client\n")); 213 _("Could not send status result to client\n"));
200 return 0; /* error, not much we can do */ 214 return 0; /* error, not much we can do */
201 } 215 }
202 GNUNET_assert (size >= sizeof (struct GNUNET_MessageHeader)); 216 GNUNET_assert (size >= sizeof (struct GNUNET_MessageHeader));
203 msg = buf; 217 msg = buf;
@@ -220,28 +234,29 @@ write_result (void *cls, size_t size, void *buf)
220 */ 234 */
221static void 235static void
222signal_result (struct GNUNET_SERVER_Client *client, 236signal_result (struct GNUNET_SERVER_Client *client,
223 const char *name, uint16_t result) 237 const char *name, uint16_t result)
224{ 238{
225 uint16_t *res; 239 uint16_t *res;
226 240
227 if (NULL == client) 241 if (NULL == client)
228 { 242 {
229 GNUNET_log (GNUNET_ERROR_TYPE_INFO, 243 GNUNET_log (GNUNET_ERROR_TYPE_INFO,
230 _("Not sending status result to client: no client known\n")); 244 _
245 ("Not sending status result to client: no client known\n"));
231 return; 246 return;
232 } 247 }
233#if DEBUG_ARM 248#if DEBUG_ARM
234 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, 249 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
235 "Telling client that service `%s' is now %s\n", 250 "Telling client that service `%s' is now %s\n",
236 name, 251 name,
237 result == GNUNET_MESSAGE_TYPE_ARM_IS_DOWN ? "down" : "up"); 252 result == GNUNET_MESSAGE_TYPE_ARM_IS_DOWN ? "down" : "up");
238#endif 253#endif
239 res = GNUNET_malloc (sizeof (uint16_t)); 254 res = GNUNET_malloc (sizeof (uint16_t));
240 *res = result; 255 *res = result;
241 GNUNET_SERVER_notify_transmit_ready (client, 256 GNUNET_SERVER_notify_transmit_ready (client,
242 sizeof (struct GNUNET_MessageHeader), 257 sizeof (struct GNUNET_MessageHeader),
243 GNUNET_TIME_UNIT_FOREVER_REL, 258 GNUNET_TIME_UNIT_FOREVER_REL,
244 &write_result, res); 259 &write_result, res);
245} 260}
246 261
247 262
@@ -263,14 +278,14 @@ find_name (const char *name)
263 while (pos != NULL) 278 while (pos != NULL)
264 { 279 {
265 if (0 == strcmp (pos->name, name)) 280 if (0 == strcmp (pos->name, name))
266 { 281 {
267 if (prev == NULL) 282 if (prev == NULL)
268 running = pos->next; 283 running = pos->next;
269 else 284 else
270 prev->next = pos->next; 285 prev->next = pos->next;
271 pos->next = NULL; 286 pos->next = NULL;
272 return pos; 287 return pos;
273 } 288 }
274 prev = pos; 289 prev = pos;
275 pos = pos->next; 290 pos = pos->next;
276 } 291 }
@@ -313,19 +328,19 @@ start_process (struct ServiceList *sl)
313 /* start service */ 328 /* start service */
314 if (GNUNET_OK != 329 if (GNUNET_OK !=
315 GNUNET_CONFIGURATION_get_value_string (cfg, 330 GNUNET_CONFIGURATION_get_value_string (cfg,
316 sl->name, "PREFIX", &loprefix)) 331 sl->name, "PREFIX", &loprefix))
317 loprefix = GNUNET_strdup (prefix_command); 332 loprefix = GNUNET_strdup (prefix_command);
318 if (GNUNET_OK != 333 if (GNUNET_OK !=
319 GNUNET_CONFIGURATION_get_value_string (cfg, 334 GNUNET_CONFIGURATION_get_value_string (cfg,
320 sl->name, "OPTIONS", &options)) 335 sl->name, "OPTIONS", &options))
321 options = GNUNET_strdup (""); 336 options = GNUNET_strdup ("");
322 use_debug = GNUNET_CONFIGURATION_get_value_yesno (cfg, sl->name, "DEBUG"); 337 use_debug = GNUNET_CONFIGURATION_get_value_yesno (cfg, sl->name, "DEBUG");
323 338
324 GNUNET_log (GNUNET_ERROR_TYPE_INFO, _("Starting service `%s'\n"), sl->name); 339 GNUNET_log (GNUNET_ERROR_TYPE_INFO, _("Starting service `%s'\n"), sl->name);
325#if DEBUG_ARM 340#if DEBUG_ARM
326 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, 341 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
327 "Starting service `%s' using binary `%s' and configuration `%s'\n", 342 "Starting service `%s' using binary `%s' and configuration `%s'\n",
328 sl->name, sl->binary, sl->config); 343 sl->name, sl->binary, sl->config);
329#endif 344#endif
330 argv_size = 6; 345 argv_size = 6;
331 if (use_debug) 346 if (use_debug)
@@ -334,14 +349,14 @@ start_process (struct ServiceList *sl)
334 while ('\0' != *lopos) 349 while ('\0' != *lopos)
335 { 350 {
336 if (*lopos == ' ') 351 if (*lopos == ' ')
337 argv_size++; 352 argv_size++;
338 lopos++; 353 lopos++;
339 } 354 }
340 optpos = options; 355 optpos = options;
341 while ('\0' != *optpos) 356 while ('\0' != *optpos)
342 { 357 {
343 if (*optpos == ' ') 358 if (*optpos == ' ')
344 argv_size++; 359 argv_size++;
345 optpos++; 360 optpos++;
346 } 361 }
347 firstarg = NULL; 362 firstarg = NULL;
@@ -352,16 +367,16 @@ start_process (struct ServiceList *sl)
352 while ('\0' != *lopos) 367 while ('\0' != *lopos)
353 { 368 {
354 while (*lopos == ' ') 369 while (*lopos == ' ')
355 lopos++; 370 lopos++;
356 if (*lopos == '\0') 371 if (*lopos == '\0')
357 continue; 372 continue;
358 if (argv_size == 0) 373 if (argv_size == 0)
359 firstarg = lopos; 374 firstarg = lopos;
360 argv[argv_size++] = lopos; 375 argv[argv_size++] = lopos;
361 while (('\0' != *lopos) && (' ' != *lopos)) 376 while (('\0' != *lopos) && (' ' != *lopos))
362 lopos++; 377 lopos++;
363 if ('\0' == *lopos) 378 if ('\0' == *lopos)
364 continue; 379 continue;
365 *lopos = '\0'; 380 *lopos = '\0';
366 lopos++; 381 lopos++;
367 } 382 }
@@ -379,14 +394,14 @@ start_process (struct ServiceList *sl)
379 while ('\0' != *optpos) 394 while ('\0' != *optpos)
380 { 395 {
381 while (*optpos == ' ') 396 while (*optpos == ' ')
382 optpos++; 397 optpos++;
383 if (*optpos == '\0') 398 if (*optpos == '\0')
384 continue; 399 continue;
385 argv[argv_size++] = optpos; 400 argv[argv_size++] = optpos;
386 while (('\0' != *optpos) && (' ' != *optpos)) 401 while (('\0' != *optpos) && (' ' != *optpos))
387 optpos++; 402 optpos++;
388 if ('\0' == *optpos) 403 if ('\0' == *optpos)
389 continue; 404 continue;
390 *optpos = '\0'; 405 *optpos = '\0';
391 optpos++; 406 optpos++;
392 } 407 }
@@ -415,7 +430,8 @@ start_service (struct GNUNET_SERVER_Client *client, const char *servicename)
415 if (GNUNET_YES == in_shutdown) 430 if (GNUNET_YES == in_shutdown)
416 { 431 {
417 GNUNET_log (GNUNET_ERROR_TYPE_INFO, 432 GNUNET_log (GNUNET_ERROR_TYPE_INFO,
418 _("ARM is shutting down, service `%s' not started.\n"), servicename); 433 _("ARM is shutting down, service `%s' not started.\n"),
434 servicename);
419 signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_DOWN); 435 signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_DOWN);
420 return; 436 return;
421 } 437 }
@@ -424,7 +440,7 @@ start_service (struct GNUNET_SERVER_Client *client, const char *servicename)
424 { 440 {
425 /* already running, just increment RC */ 441 /* already running, just increment RC */
426 GNUNET_log (GNUNET_ERROR_TYPE_INFO, 442 GNUNET_log (GNUNET_ERROR_TYPE_INFO,
427 _("Service `%s' already running.\n"), servicename); 443 _("Service `%s' already running.\n"), servicename);
428 sl->rc++; 444 sl->rc++;
429 sl->next = running; 445 sl->next = running;
430 running = sl; 446 running = sl;
@@ -433,24 +449,24 @@ start_service (struct GNUNET_SERVER_Client *client, const char *servicename)
433 } 449 }
434 if (GNUNET_OK != 450 if (GNUNET_OK !=
435 GNUNET_CONFIGURATION_get_value_string (cfg, 451 GNUNET_CONFIGURATION_get_value_string (cfg,
436 servicename, "BINARY", &binary)) 452 servicename, "BINARY", &binary))
437 { 453 {
438 GNUNET_log (GNUNET_ERROR_TYPE_WARNING, 454 GNUNET_log (GNUNET_ERROR_TYPE_WARNING,
439 _("Binary implementing service `%s' not known!\n"), 455 _("Binary implementing service `%s' not known!\n"),
440 servicename); 456 servicename);
441 signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_DOWN); 457 signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_DOWN);
442 return; 458 return;
443 } 459 }
444 if ((GNUNET_OK != 460 if ((GNUNET_OK !=
445 GNUNET_CONFIGURATION_get_value_filename (cfg, 461 GNUNET_CONFIGURATION_get_value_filename (cfg,
446 servicename, 462 servicename,
447 "CONFIG", 463 "CONFIG",
448 &config)) || 464 &config)) ||
449 (0 != STAT (config, &sbuf))) 465 (0 != STAT (config, &sbuf)))
450 { 466 {
451 GNUNET_log (GNUNET_ERROR_TYPE_WARNING, 467 GNUNET_log (GNUNET_ERROR_TYPE_WARNING,
452 _("Configuration file `%s' for service `%s' not known!\n"), 468 _("Configuration file `%s' for service `%s' not known!\n"),
453 config, servicename); 469 config, servicename);
454 signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_DOWN); 470 signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_DOWN);
455 GNUNET_free (binary); 471 GNUNET_free (binary);
456 GNUNET_free_non_null (config); 472 GNUNET_free_non_null (config);
@@ -463,6 +479,9 @@ start_service (struct GNUNET_SERVER_Client *client, const char *servicename)
463 sl->binary = binary; 479 sl->binary = binary;
464 sl->config = config; 480 sl->config = config;
465 sl->mtime = sbuf.st_mtime; 481 sl->mtime = sbuf.st_mtime;
482 sl->backoff = GNUNET_TIME_UNIT_MILLISECONDS;
483 sl->restartAt = GNUNET_TIME_UNIT_FOREVER_ABS;
484
466 running = sl; 485 running = sl;
467 start_process (sl); 486 start_process (sl);
468 if (NULL != client) 487 if (NULL != client)
@@ -497,13 +516,12 @@ free_and_signal (void *cls, struct ServiceList *pos)
497 * @param servicename name of the service to stop 516 * @param servicename name of the service to stop
498 */ 517 */
499static void 518static void
500stop_service (struct GNUNET_SERVER_Client *client, 519stop_service (struct GNUNET_SERVER_Client *client, const char *servicename)
501 const char *servicename)
502{ 520{
503 struct ServiceList *pos; 521 struct ServiceList *pos;
504 522
505 GNUNET_log (GNUNET_ERROR_TYPE_INFO, 523 GNUNET_log (GNUNET_ERROR_TYPE_INFO,
506 _("Preparing to stop `%s'\n"), servicename); 524 _("Preparing to stop `%s'\n"), servicename);
507 pos = find_name (servicename); 525 pos = find_name (servicename);
508 if (pos == NULL) 526 if (pos == NULL)
509 { 527 {
@@ -520,15 +538,14 @@ stop_service (struct GNUNET_SERVER_Client *client,
520#if DEBUG_ARM 538#if DEBUG_ARM
521 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, 539 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
522 "Service `%s' still used by %u clients, will keep it running!\n", 540 "Service `%s' still used by %u clients, will keep it running!\n",
523 servicename, 541 servicename, pos->rc);
524 pos->rc);
525#endif 542#endif
526 signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_UP); 543 signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_UP);
527 GNUNET_SERVER_receive_done (client, GNUNET_OK); 544 GNUNET_SERVER_receive_done (client, GNUNET_OK);
528 return; 545 return;
529 } 546 }
530 if (pos->rc == 1) 547 if (pos->rc == 1)
531 pos->rc--; /* decrement RC to zero */ 548 pos->rc--; /* decrement RC to zero */
532 if (pos->kill_continuation != NULL) 549 if (pos->kill_continuation != NULL)
533 { 550 {
534 /* killing already in progress */ 551 /* killing already in progress */
@@ -582,8 +599,8 @@ stop_service (struct GNUNET_SERVER_Client *client,
582 */ 599 */
583static void 600static void
584handle_start (void *cls, 601handle_start (void *cls,
585 struct GNUNET_SERVER_Client *client, 602 struct GNUNET_SERVER_Client *client,
586 const struct GNUNET_MessageHeader *message) 603 const struct GNUNET_MessageHeader *message)
587{ 604{
588 const char *servicename; 605 const char *servicename;
589 uint16_t size; 606 uint16_t size;
@@ -613,8 +630,8 @@ handle_start (void *cls,
613 */ 630 */
614static void 631static void
615handle_stop (void *cls, 632handle_stop (void *cls,
616 struct GNUNET_SERVER_Client *client, 633 struct GNUNET_SERVER_Client *client,
617 const struct GNUNET_MessageHeader *message) 634 const struct GNUNET_MessageHeader *message)
618{ 635{
619 const char *servicename; 636 const char *servicename;
620 uint16_t size; 637 uint16_t size;
@@ -647,49 +664,183 @@ maint (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc)
647 const char *statstr; 664 const char *statstr;
648 int statcode; 665 int statcode;
649 struct stat sbuf; 666 struct stat sbuf;
667 struct GNUNET_TIME_Relative lowestRestartDelay;
650 int ret; 668 int ret;
651 669
670 lowestRestartDelay = GNUNET_TIME_UNIT_FOREVER_REL;
671
652 if (0 != (tc->reason & GNUNET_SCHEDULER_REASON_SHUTDOWN)) 672 if (0 != (tc->reason & GNUNET_SCHEDULER_REASON_SHUTDOWN))
653 { 673 {
654 GNUNET_log (GNUNET_ERROR_TYPE_INFO, _("Stopping all services\n")); 674 GNUNET_log (GNUNET_ERROR_TYPE_INFO, _("Stopping all services\n"));
675
655 in_shutdown = GNUNET_YES; 676 in_shutdown = GNUNET_YES;
656 pos = running; 677 pos = running;
657 while (NULL != pos) 678 while (NULL != pos)
658 { 679 {
659 if (pos->pid != 0) 680 if (pos->pid != 0)
660 { 681 {
661#if DEBUG_ARM 682#if DEBUG_ARM
662 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, 683 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
663 "Sending SIGTERM to `%s'\n", 684 "Sending SIGTERM to `%s'\n", pos->name);
664 pos->name);
665#endif 685#endif
666 if (0 != PLIBC_KILL (pos->pid, SIGTERM)) 686 if (0 != PLIBC_KILL (pos->pid, SIGTERM))
667 GNUNET_log_strerror (GNUNET_ERROR_TYPE_WARNING, "kill"); 687 GNUNET_log_strerror (GNUNET_ERROR_TYPE_WARNING, "kill");
668 } 688 }
669 pos = pos->next; 689 pos = pos->next;
670 } 690 }
671 } 691 }
672 if (cls == NULL) 692 if (cls == NULL)
673 { 693 {
674 if ( (in_shutdown == GNUNET_YES) && 694 if ((in_shutdown == GNUNET_YES) && (running == NULL))
675 (running == NULL) )
676 { 695 {
677#if DEBUG_ARM 696#if DEBUG_ARM
678 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, 697 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "ARM service terminates.\n");
679 "ARM service terminates.\n");
680#endif 698#endif
681 GNUNET_assert (server != NULL); 699 GNUNET_assert (server != NULL);
682 GNUNET_SERVER_destroy (server); 700 GNUNET_SERVER_destroy (server);
683 server = NULL; 701 server = NULL;
684 return; /* we are done! */ 702 return; /* we are done! */
685 } 703 }
686 GNUNET_SCHEDULER_add_delayed (tc->sched, 704 GNUNET_SCHEDULER_add_delayed (tc->sched,
687 (in_shutdown == GNUNET_YES) 705 (in_shutdown == GNUNET_YES)
688 ? MAINT_FAST_FREQUENCY 706 ? MAINT_FAST_FREQUENCY
689 : MAINT_FREQUENCY, 707 : MAINT_FREQUENCY, &maint, NULL);
690 &maint, NULL); 708 }
709
710 /* check for services that died (WAITPID) */
711 prev = NULL;
712 next = running;
713 while (NULL != (pos = next))
714 {
715 enum GNUNET_OS_ProcessStatusType statusType;
716 unsigned long statusCode;
717
718 next = pos->next;
719 if ((NULL != pos->kill_continuation) ||
720 ((GNUNET_YES == in_shutdown) && (pos->pid == 0)))
721 {
722 if (prev == NULL)
723 running = next;
724 else
725 prev->next = next;
726 if (NULL != pos->kill_continuation)
727 pos->kill_continuation (pos->kill_continuation_cls, pos);
728 else
729 free_entry (pos);
730 continue;
731 }
732 if ((GNUNET_SYSERR == (ret = GNUNET_OS_process_status (pos->pid,
733 &statusType,
734 &statusCode))) ||
735 ((ret == GNUNET_NO) ||
736 (statusType == GNUNET_OS_PROCESS_STOPPED) ||
737 (statusType == GNUNET_OS_PROCESS_RUNNING)))
738 {
739 prev = pos;
740 continue;
741 }
742 if (statusType == GNUNET_OS_PROCESS_EXITED)
743 {
744 statstr = _( /* process termination method */ "exit");
745 statcode = statusCode;
746 }
747 else if (statusType == GNUNET_OS_PROCESS_SIGNALED)
748 {
749 statstr = _( /* process termination method */ "signal");
750 statcode = statusCode;
751 }
752 else
753 {
754 statstr = _( /* process termination method */ "unknown");
755 statcode = 0;
756 }
757 if (GNUNET_YES != in_shutdown)
758 GNUNET_log (GNUNET_ERROR_TYPE_WARNING,
759 _
760 ("Service `%s' terminated with status %s/%d, will try to restart it!\n"),
761 pos->name, statstr, statcode);
762#if DEBUG_ARM
763 else
764 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
765 "Service `%s' terminated with status %s/%d\n",
766 pos->name, statstr, statcode);
767#endif
768 /* schedule restart */
769 pos->pid = 0;
770 prev = pos;
691 } 771 }
692 772
773 /* check for services that need to be restarted due to
774 configuration changes or because the last restart failed */
775 pos = running;
776 while (pos != NULL)
777 {
778 if ((0 == STAT (pos->config, &sbuf)) && (pos->mtime < sbuf.st_mtime))
779 {
780 GNUNET_log (GNUNET_ERROR_TYPE_INFO,
781 _
782 ("Restarting service `%s' due to configuration file change.\n"));
783 if (0 != PLIBC_KILL (pos->pid, SIGTERM))
784 GNUNET_log_strerror (GNUNET_ERROR_TYPE_WARNING, "kill");
785 }
786 if ((pos->pid == 0) && (GNUNET_YES != in_shutdown))
787 {
788 GNUNET_log (GNUNET_ERROR_TYPE_INFO,
789 _("Restarting service `%s'.\n"), pos->name);
790 /* FIXME: should have some exponentially
791 increasing timer to avoid tight restart loops */
792 if (pos->restartAt.value != GNUNET_TIME_UNIT_FOREVER_ABS.value)
793 {
794 /* Otherwise, the process died for the first time, backoff should't increase */
795 if (pos->backoff.value < EXPONENTIAL_BACKOFF_THRESHOLD)
796 pos->backoff =
797 GNUNET_TIME_relative_multiply (pos->backoff, 2);
798 }
799
800 pos->restartAt = GNUNET_TIME_relative_to_absolute (pos->backoff);
801
802 lowestRestartDelay = GNUNET_TIME_relative_min (lowestRestartDelay,
803 GNUNET_TIME_absolute_get_remaining
804 (pos->restartAt));
805
806 if (GNUNET_TIME_absolute_get_remaining (pos->restartAt).value == 0)
807 start_process (pos);
808 }
809 pos = pos->next;
810 }
811}
812
813#if 0
814static GNUNET_SCHEDULER_TaskIdentifier child_death_task;
815
816static GNUNET_SCHEDULER_TaskIdentifier child_restart_task;
817
818
819
820/**
821 *
822 *
823 * @param cls closure, NULL if we need to self-restart
824 * @param tc context
825 */
826static void
827maint_child_death (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc)
828{
829 struct ServiceList *pos;
830 struct ServiceList *prev;
831 struct ServiceList *next;
832 const char *statstr;
833 int statcode;
834 struct stat sbuf;
835 struct GNUNET_TIME_Relative lowestRestartDelay;
836 int ret;
837
838 child_death_task = GNUNET_SCHEDULER_NO_TASK;
839 if (0 != (tc->reason & GNUNET_SCHEDULER_REASON_SHUTDOWN))
840 return;
841 child_death_task =
842 GNUNET_SCHEDULER_add_read_file (sched, GNUNET_TIME_UNIT_FOREVER_REL, pr,
843 &maint_child_death, NULL);
693 /* check for services that died (WAITPID) */ 844 /* check for services that died (WAITPID) */
694 prev = NULL; 845 prev = NULL;
695 next = running; 846 next = running;
@@ -697,12 +848,11 @@ maint (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc)
697 { 848 {
698 enum GNUNET_OS_ProcessStatusType statusType; 849 enum GNUNET_OS_ProcessStatusType statusType;
699 unsigned long statusCode; 850 unsigned long statusCode;
700 851
701 next = pos->next; 852 next = pos->next;
702 if ( (NULL != pos->kill_continuation) || 853 if ((NULL != pos->kill_continuation) ||
703 ( (GNUNET_YES == in_shutdown) && 854 ((GNUNET_YES == in_shutdown) && (pos->pid == 0)))
704 (pos->pid == 0) ) ) 855 {
705 {
706 if (prev == NULL) 856 if (prev == NULL)
707 running = next; 857 running = next;
708 else 858 else
@@ -712,13 +862,13 @@ maint (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc)
712 else 862 else
713 free_entry (pos); 863 free_entry (pos);
714 continue; 864 continue;
715 } 865 }
716 if ( (GNUNET_SYSERR == (ret = GNUNET_OS_process_status(pos->pid, 866 if ((GNUNET_SYSERR == (ret = GNUNET_OS_process_status (pos->pid,
717 &statusType, 867 &statusType,
718 &statusCode))) || 868 &statusCode))) ||
719 ( (ret == GNUNET_NO) || 869 ((ret == GNUNET_NO) ||
720 (statusType == GNUNET_OS_PROCESS_STOPPED) || 870 (statusType == GNUNET_OS_PROCESS_STOPPED) ||
721 (statusType == GNUNET_OS_PROCESS_RUNNING) ) ) 871 (statusType == GNUNET_OS_PROCESS_RUNNING)))
722 { 872 {
723 prev = pos; 873 prev = pos;
724 continue; 874 continue;
@@ -737,17 +887,18 @@ maint (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc)
737 { 887 {
738 statstr = _( /* process termination method */ "unknown"); 888 statstr = _( /* process termination method */ "unknown");
739 statcode = 0; 889 statcode = 0;
740 } 890 }
741 if (GNUNET_YES != in_shutdown) 891 if (GNUNET_YES != in_shutdown)
742 GNUNET_log (GNUNET_ERROR_TYPE_WARNING, 892 GNUNET_log (GNUNET_ERROR_TYPE_WARNING,
743 _("Service `%s' terminated with status %s/%d, will try to restart it!\n"), 893 _
894 ("Service `%s' terminated with status %s/%d, will try to restart it!\n"),
744 pos->name, statstr, statcode); 895 pos->name, statstr, statcode);
745#if DEBUG_ARM 896#if DEBUG_ARM
746 else 897 else
747 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, 898 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
748 "Service `%s' terminated with status %s/%d\n", 899 "Service `%s' terminated with status %s/%d\n",
749 pos->name, statstr, statcode); 900 pos->name, statstr, statcode);
750#endif 901#endif
751 /* schedule restart */ 902 /* schedule restart */
752 pos->pid = 0; 903 pos->pid = 0;
753 prev = pos; 904 prev = pos;
@@ -759,25 +910,45 @@ maint (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc)
759 while (pos != NULL) 910 while (pos != NULL)
760 { 911 {
761 if ((0 == STAT (pos->config, &sbuf)) && (pos->mtime < sbuf.st_mtime)) 912 if ((0 == STAT (pos->config, &sbuf)) && (pos->mtime < sbuf.st_mtime))
762 { 913 {
763 GNUNET_log (GNUNET_ERROR_TYPE_INFO, 914 GNUNET_log (GNUNET_ERROR_TYPE_INFO,
764 _("Restarting service `%s' due to configuration file change.\n")); 915 _
765 if (0 != PLIBC_KILL (pos->pid, SIGTERM)) 916 ("Restarting service `%s' due to configuration file change.\n"));
766 GNUNET_log_strerror (GNUNET_ERROR_TYPE_WARNING, "kill"); 917 if (0 != PLIBC_KILL (pos->pid, SIGTERM))
767 } 918 GNUNET_log_strerror (GNUNET_ERROR_TYPE_WARNING, "kill");
768 if ( (pos->pid == 0) && 919 }
769 (GNUNET_YES != in_shutdown) ) 920 if ((pos->pid == 0) && (GNUNET_YES != in_shutdown))
770 { 921 {
771 GNUNET_log (GNUNET_ERROR_TYPE_INFO, 922 GNUNET_log (GNUNET_ERROR_TYPE_INFO,
772 _("Restarting service `%s'.\n"), pos->name); 923 _("Restarting service `%s'.\n"), pos->name);
773 /* FIXME: should have some exponentially 924 /* FIXME: should have some exponentially
774 increasing timer to avoid tight restart loops */ 925 increasing timer to avoid tight restart loops */
775 start_process (pos); 926 if (pos->restartAt.value != GNUNET_TIME_UNIT_FOREVER_ABS.value)
776 } 927 {
928 /* Otherwise, the process died for the first time, backoff should't increase */
929 if (pos->backoff.value < EXPONENTIAL_BACKOFF_THRESHOLD)
930 pos->backoff =
931 GNUNET_TIME_relative_multiply (pos->backoff, 2);
932 }
933
934 pos->restartAt = GNUNET_TIME_relative_to_absolute (pos->backoff);
935
936 lowestRestartDelay = GNUNET_TIME_relative_min (lowestRestartDelay,
937 GNUNET_TIME_absolute_get_remaining
938 (pos->restartAt));
939
940 if (GNUNET_TIME_absolute_get_remaining (pos->restartAt).value == 0)
941 start_process (pos);
942 }
777 pos = pos->next; 943 pos = pos->next;
778 } 944 }
779} 945}
780 946
947#endif
948
949
950
951
781 952
782/** 953/**
783 * List of handlers for the messages understood by this service. 954 * List of handlers for the messages understood by this service.
@@ -788,6 +959,28 @@ static struct GNUNET_SERVER_MessageHandler handlers[] = {
788 {NULL, NULL, 0, 0} 959 {NULL, NULL, 0, 0}
789}; 960};
790 961
962static struct GNUNET_SIGNAL_Context *shc_chld;
963
964/**
965 * Pipe used to communicate shutdown via signal.
966 */
967static struct GNUNET_DISK_PipeHandle *sigpipe;
968
969static const struct GNUNET_DISK_FileHandle *pr;
970
971/**
972 * Signal handler called for signals that should cause us to shutdown.
973 */
974static void
975sighandler_child_death ()
976{
977 static char c;
978
979 GNUNET_DISK_file_write (GNUNET_DISK_pipe_handle
980 (sigpipe, GNUNET_DISK_PIPE_END_WRITE), &c,
981 sizeof (c));
982}
983
791 984
792/** 985/**
793 * Process arm requests. 986 * Process arm requests.
@@ -806,41 +999,53 @@ run (void *cls,
806 char *defaultservices; 999 char *defaultservices;
807 char *pos; 1000 char *pos;
808 1001
1002 shc_chld = GNUNET_SIGNAL_handler_install (SIGCHLD, &sighandler_child_death);
1003 GNUNET_assert (sigpipe == NULL);
1004 sigpipe = GNUNET_DISK_pipe (GNUNET_NO);
1005 GNUNET_assert (sigpipe != NULL);
1006 pr = GNUNET_DISK_pipe_handle (sigpipe, GNUNET_DISK_PIPE_END_READ);
1007 GNUNET_assert (pr != NULL);
1008
809 GNUNET_SERVER_ignore_shutdown (serv, GNUNET_YES); 1009 GNUNET_SERVER_ignore_shutdown (serv, GNUNET_YES);
810 GNUNET_assert (serv != NULL); 1010 GNUNET_assert (serv != NULL);
811 cfg = c; 1011 cfg = c;
812 sched = s; 1012 sched = s;
813 server = serv; 1013 server = serv;
1014 /*
1015 * child_death_task =
1016 GNUNET_SCHEDULER_add_read_file (sched, GNUNET_TIME_UNIT_FOREVER_REL, pr,
1017 &maint_child_death, NULL);
1018*/
814 if (GNUNET_OK != 1019 if (GNUNET_OK !=
815 GNUNET_CONFIGURATION_get_value_string (cfg, 1020 GNUNET_CONFIGURATION_get_value_string (cfg,
816 "ARM", 1021 "ARM",
817 "GLOBAL_PREFIX", 1022 "GLOBAL_PREFIX",
818 &prefix_command)) 1023 &prefix_command))
819 prefix_command = GNUNET_strdup (""); 1024 prefix_command = GNUNET_strdup ("");
820 /* start default services... */ 1025 /* start default services... */
821 if (GNUNET_OK == 1026 if (GNUNET_OK ==
822 GNUNET_CONFIGURATION_get_value_string (cfg, 1027 GNUNET_CONFIGURATION_get_value_string (cfg,
823 "ARM", 1028 "ARM",
824 "DEFAULTSERVICES", 1029 "DEFAULTSERVICES",
825 &defaultservices)) 1030 &defaultservices))
826 { 1031 {
827#if DEBUG_ARM 1032#if DEBUG_ARM
828 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, 1033 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
829 "Starting default services `%s'\n", defaultservices); 1034 "Starting default services `%s'\n", defaultservices);
830#endif 1035#endif
831 pos = strtok (defaultservices, " "); 1036 pos = strtok (defaultservices, " ");
832 while (pos != NULL) 1037 while (pos != NULL)
833 { 1038 {
834 start_service (NULL, pos); 1039 start_service (NULL, pos);
835 pos = strtok (NULL, " "); 1040 pos = strtok (NULL, " ");
836 } 1041 }
837 GNUNET_free (defaultservices); 1042 GNUNET_free (defaultservices);
838 } 1043 }
839 else 1044 else
840 { 1045 {
841#if DEBUG_ARM 1046#if DEBUG_ARM
842 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, 1047 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
843 "No default services configured.\n"); 1048 "No default services configured.\n");
844#endif 1049#endif
845 } 1050 }
846 1051
@@ -865,10 +1070,8 @@ int
865main (int argc, char *const *argv) 1070main (int argc, char *const *argv)
866{ 1071{
867 return (GNUNET_OK == 1072 return (GNUNET_OK ==
868 GNUNET_SERVICE_run (argc, 1073 GNUNET_SERVICE_run (argc,
869 argv, "arm", 1074 argv, "arm", GNUNET_YES, &run, NULL)) ? 0 : 1;
870 GNUNET_YES,
871 &run, NULL)) ? 0 : 1;
872} 1075}
873 1076
874/* end of gnunet-service-arm.c */ 1077/* end of gnunet-service-arm.c */