diff options
author | Safey A.Halim <safey.allah@gmail.com> | 2009-12-08 08:59:49 +0000 |
---|---|---|
committer | Safey A.Halim <safey.allah@gmail.com> | 2009-12-08 08:59:49 +0000 |
commit | 1253be546cc47789197480a82975aa65ad973b0d (patch) | |
tree | 16865dc4d6e4f94a05cebd43d148c7f7a8ebcaea /src | |
parent | c73a3e5fafa6a55347642503ed77de7977def1cb (diff) | |
download | gnunet-1253be546cc47789197480a82975aa65ad973b0d.tar.gz gnunet-1253be546cc47789197480a82975aa65ad973b0d.zip |
Arm Exponential backoff
Diffstat (limited to 'src')
-rw-r--r-- | src/arm/gnunet-service-arm.c | 431 |
1 files changed, 317 insertions, 114 deletions
diff --git a/src/arm/gnunet-service-arm.c b/src/arm/gnunet-service-arm.c index cc9e2cd27..f525750cf 100644 --- a/src/arm/gnunet-service-arm.c +++ b/src/arm/gnunet-service-arm.c | |||
@@ -42,6 +42,7 @@ | |||
42 | #include "gnunet_os_lib.h" | 42 | #include "gnunet_os_lib.h" |
43 | #include "gnunet_protocols.h" | 43 | #include "gnunet_protocols.h" |
44 | #include "gnunet_service_lib.h" | 44 | #include "gnunet_service_lib.h" |
45 | #include "gnunet_signal_lib.h" | ||
45 | #include "arm.h" | 46 | #include "arm.h" |
46 | 47 | ||
47 | 48 | ||
@@ -62,6 +63,13 @@ | |||
62 | */ | 63 | */ |
63 | #define CHECK_TIMEOUT GNUNET_TIME_UNIT_MINUTES | 64 | #define CHECK_TIMEOUT GNUNET_TIME_UNIT_MINUTES |
64 | 65 | ||
66 | |||
67 | /** | ||
68 | * Threshold after which exponential backoff shouldn't increase | ||
69 | */ | ||
70 | #define EXPONENTIAL_BACKOFF_THRESHOLD 8 | ||
71 | |||
72 | |||
65 | /** | 73 | /** |
66 | * List of our services. | 74 | * List of our services. |
67 | */ | 75 | */ |
@@ -123,6 +131,12 @@ struct ServiceList | |||
123 | */ | 131 | */ |
124 | time_t mtime; | 132 | time_t mtime; |
125 | 133 | ||
134 | /* Process exponential backoff time */ | ||
135 | struct GNUNET_TIME_Relative backoff; | ||
136 | |||
137 | /* Absolute time at which the process is scheduled to restart in case of death */ | ||
138 | struct GNUNET_TIME_Absolute restartAt; | ||
139 | |||
126 | /** | 140 | /** |
127 | * Reference counter (counts how many times we've been | 141 | * Reference counter (counts how many times we've been |
128 | * asked to start the service). We only actually stop | 142 | * asked to start the service). We only actually stop |
@@ -157,6 +171,7 @@ static char *prefix_command; | |||
157 | */ | 171 | */ |
158 | static int in_shutdown; | 172 | static int in_shutdown; |
159 | 173 | ||
174 | |||
160 | /** | 175 | /** |
161 | * Handle to our server instance. Our server is a bit special in that | 176 | * Handle to our server instance. Our server is a bit special in that |
162 | * its service is not immediately stopped once we get a shutdown | 177 | * its service is not immediately stopped once we get a shutdown |
@@ -175,8 +190,7 @@ static struct GNUNET_SERVER_Handle *server; | |||
175 | * @param cls closure, NULL if we need to self-restart | 190 | * @param cls closure, NULL if we need to self-restart |
176 | * @param tc context | 191 | * @param tc context |
177 | */ | 192 | */ |
178 | static void | 193 | static void maint (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc); |
179 | maint (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc); | ||
180 | 194 | ||
181 | 195 | ||
182 | /** | 196 | /** |
@@ -197,7 +211,7 @@ write_result (void *cls, size_t size, void *buf) | |||
197 | { | 211 | { |
198 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, | 212 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, |
199 | _("Could not send status result to client\n")); | 213 | _("Could not send status result to client\n")); |
200 | return 0; /* error, not much we can do */ | 214 | return 0; /* error, not much we can do */ |
201 | } | 215 | } |
202 | GNUNET_assert (size >= sizeof (struct GNUNET_MessageHeader)); | 216 | GNUNET_assert (size >= sizeof (struct GNUNET_MessageHeader)); |
203 | msg = buf; | 217 | msg = buf; |
@@ -220,28 +234,29 @@ write_result (void *cls, size_t size, void *buf) | |||
220 | */ | 234 | */ |
221 | static void | 235 | static void |
222 | signal_result (struct GNUNET_SERVER_Client *client, | 236 | signal_result (struct GNUNET_SERVER_Client *client, |
223 | const char *name, uint16_t result) | 237 | const char *name, uint16_t result) |
224 | { | 238 | { |
225 | uint16_t *res; | 239 | uint16_t *res; |
226 | 240 | ||
227 | if (NULL == client) | 241 | if (NULL == client) |
228 | { | 242 | { |
229 | GNUNET_log (GNUNET_ERROR_TYPE_INFO, | 243 | GNUNET_log (GNUNET_ERROR_TYPE_INFO, |
230 | _("Not sending status result to client: no client known\n")); | 244 | _ |
245 | ("Not sending status result to client: no client known\n")); | ||
231 | return; | 246 | return; |
232 | } | 247 | } |
233 | #if DEBUG_ARM | 248 | #if DEBUG_ARM |
234 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, | 249 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, |
235 | "Telling client that service `%s' is now %s\n", | 250 | "Telling client that service `%s' is now %s\n", |
236 | name, | 251 | name, |
237 | result == GNUNET_MESSAGE_TYPE_ARM_IS_DOWN ? "down" : "up"); | 252 | result == GNUNET_MESSAGE_TYPE_ARM_IS_DOWN ? "down" : "up"); |
238 | #endif | 253 | #endif |
239 | res = GNUNET_malloc (sizeof (uint16_t)); | 254 | res = GNUNET_malloc (sizeof (uint16_t)); |
240 | *res = result; | 255 | *res = result; |
241 | GNUNET_SERVER_notify_transmit_ready (client, | 256 | GNUNET_SERVER_notify_transmit_ready (client, |
242 | sizeof (struct GNUNET_MessageHeader), | 257 | sizeof (struct GNUNET_MessageHeader), |
243 | GNUNET_TIME_UNIT_FOREVER_REL, | 258 | GNUNET_TIME_UNIT_FOREVER_REL, |
244 | &write_result, res); | 259 | &write_result, res); |
245 | } | 260 | } |
246 | 261 | ||
247 | 262 | ||
@@ -263,14 +278,14 @@ find_name (const char *name) | |||
263 | while (pos != NULL) | 278 | while (pos != NULL) |
264 | { | 279 | { |
265 | if (0 == strcmp (pos->name, name)) | 280 | if (0 == strcmp (pos->name, name)) |
266 | { | 281 | { |
267 | if (prev == NULL) | 282 | if (prev == NULL) |
268 | running = pos->next; | 283 | running = pos->next; |
269 | else | 284 | else |
270 | prev->next = pos->next; | 285 | prev->next = pos->next; |
271 | pos->next = NULL; | 286 | pos->next = NULL; |
272 | return pos; | 287 | return pos; |
273 | } | 288 | } |
274 | prev = pos; | 289 | prev = pos; |
275 | pos = pos->next; | 290 | pos = pos->next; |
276 | } | 291 | } |
@@ -313,19 +328,19 @@ start_process (struct ServiceList *sl) | |||
313 | /* start service */ | 328 | /* start service */ |
314 | if (GNUNET_OK != | 329 | if (GNUNET_OK != |
315 | GNUNET_CONFIGURATION_get_value_string (cfg, | 330 | GNUNET_CONFIGURATION_get_value_string (cfg, |
316 | sl->name, "PREFIX", &loprefix)) | 331 | sl->name, "PREFIX", &loprefix)) |
317 | loprefix = GNUNET_strdup (prefix_command); | 332 | loprefix = GNUNET_strdup (prefix_command); |
318 | if (GNUNET_OK != | 333 | if (GNUNET_OK != |
319 | GNUNET_CONFIGURATION_get_value_string (cfg, | 334 | GNUNET_CONFIGURATION_get_value_string (cfg, |
320 | sl->name, "OPTIONS", &options)) | 335 | sl->name, "OPTIONS", &options)) |
321 | options = GNUNET_strdup (""); | 336 | options = GNUNET_strdup (""); |
322 | use_debug = GNUNET_CONFIGURATION_get_value_yesno (cfg, sl->name, "DEBUG"); | 337 | use_debug = GNUNET_CONFIGURATION_get_value_yesno (cfg, sl->name, "DEBUG"); |
323 | 338 | ||
324 | GNUNET_log (GNUNET_ERROR_TYPE_INFO, _("Starting service `%s'\n"), sl->name); | 339 | GNUNET_log (GNUNET_ERROR_TYPE_INFO, _("Starting service `%s'\n"), sl->name); |
325 | #if DEBUG_ARM | 340 | #if DEBUG_ARM |
326 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, | 341 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, |
327 | "Starting service `%s' using binary `%s' and configuration `%s'\n", | 342 | "Starting service `%s' using binary `%s' and configuration `%s'\n", |
328 | sl->name, sl->binary, sl->config); | 343 | sl->name, sl->binary, sl->config); |
329 | #endif | 344 | #endif |
330 | argv_size = 6; | 345 | argv_size = 6; |
331 | if (use_debug) | 346 | if (use_debug) |
@@ -334,14 +349,14 @@ start_process (struct ServiceList *sl) | |||
334 | while ('\0' != *lopos) | 349 | while ('\0' != *lopos) |
335 | { | 350 | { |
336 | if (*lopos == ' ') | 351 | if (*lopos == ' ') |
337 | argv_size++; | 352 | argv_size++; |
338 | lopos++; | 353 | lopos++; |
339 | } | 354 | } |
340 | optpos = options; | 355 | optpos = options; |
341 | while ('\0' != *optpos) | 356 | while ('\0' != *optpos) |
342 | { | 357 | { |
343 | if (*optpos == ' ') | 358 | if (*optpos == ' ') |
344 | argv_size++; | 359 | argv_size++; |
345 | optpos++; | 360 | optpos++; |
346 | } | 361 | } |
347 | firstarg = NULL; | 362 | firstarg = NULL; |
@@ -352,16 +367,16 @@ start_process (struct ServiceList *sl) | |||
352 | while ('\0' != *lopos) | 367 | while ('\0' != *lopos) |
353 | { | 368 | { |
354 | while (*lopos == ' ') | 369 | while (*lopos == ' ') |
355 | lopos++; | 370 | lopos++; |
356 | if (*lopos == '\0') | 371 | if (*lopos == '\0') |
357 | continue; | 372 | continue; |
358 | if (argv_size == 0) | 373 | if (argv_size == 0) |
359 | firstarg = lopos; | 374 | firstarg = lopos; |
360 | argv[argv_size++] = lopos; | 375 | argv[argv_size++] = lopos; |
361 | while (('\0' != *lopos) && (' ' != *lopos)) | 376 | while (('\0' != *lopos) && (' ' != *lopos)) |
362 | lopos++; | 377 | lopos++; |
363 | if ('\0' == *lopos) | 378 | if ('\0' == *lopos) |
364 | continue; | 379 | continue; |
365 | *lopos = '\0'; | 380 | *lopos = '\0'; |
366 | lopos++; | 381 | lopos++; |
367 | } | 382 | } |
@@ -379,14 +394,14 @@ start_process (struct ServiceList *sl) | |||
379 | while ('\0' != *optpos) | 394 | while ('\0' != *optpos) |
380 | { | 395 | { |
381 | while (*optpos == ' ') | 396 | while (*optpos == ' ') |
382 | optpos++; | 397 | optpos++; |
383 | if (*optpos == '\0') | 398 | if (*optpos == '\0') |
384 | continue; | 399 | continue; |
385 | argv[argv_size++] = optpos; | 400 | argv[argv_size++] = optpos; |
386 | while (('\0' != *optpos) && (' ' != *optpos)) | 401 | while (('\0' != *optpos) && (' ' != *optpos)) |
387 | optpos++; | 402 | optpos++; |
388 | if ('\0' == *optpos) | 403 | if ('\0' == *optpos) |
389 | continue; | 404 | continue; |
390 | *optpos = '\0'; | 405 | *optpos = '\0'; |
391 | optpos++; | 406 | optpos++; |
392 | } | 407 | } |
@@ -415,7 +430,8 @@ start_service (struct GNUNET_SERVER_Client *client, const char *servicename) | |||
415 | if (GNUNET_YES == in_shutdown) | 430 | if (GNUNET_YES == in_shutdown) |
416 | { | 431 | { |
417 | GNUNET_log (GNUNET_ERROR_TYPE_INFO, | 432 | GNUNET_log (GNUNET_ERROR_TYPE_INFO, |
418 | _("ARM is shutting down, service `%s' not started.\n"), servicename); | 433 | _("ARM is shutting down, service `%s' not started.\n"), |
434 | servicename); | ||
419 | signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_DOWN); | 435 | signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_DOWN); |
420 | return; | 436 | return; |
421 | } | 437 | } |
@@ -424,7 +440,7 @@ start_service (struct GNUNET_SERVER_Client *client, const char *servicename) | |||
424 | { | 440 | { |
425 | /* already running, just increment RC */ | 441 | /* already running, just increment RC */ |
426 | GNUNET_log (GNUNET_ERROR_TYPE_INFO, | 442 | GNUNET_log (GNUNET_ERROR_TYPE_INFO, |
427 | _("Service `%s' already running.\n"), servicename); | 443 | _("Service `%s' already running.\n"), servicename); |
428 | sl->rc++; | 444 | sl->rc++; |
429 | sl->next = running; | 445 | sl->next = running; |
430 | running = sl; | 446 | running = sl; |
@@ -433,24 +449,24 @@ start_service (struct GNUNET_SERVER_Client *client, const char *servicename) | |||
433 | } | 449 | } |
434 | if (GNUNET_OK != | 450 | if (GNUNET_OK != |
435 | GNUNET_CONFIGURATION_get_value_string (cfg, | 451 | GNUNET_CONFIGURATION_get_value_string (cfg, |
436 | servicename, "BINARY", &binary)) | 452 | servicename, "BINARY", &binary)) |
437 | { | 453 | { |
438 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, | 454 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, |
439 | _("Binary implementing service `%s' not known!\n"), | 455 | _("Binary implementing service `%s' not known!\n"), |
440 | servicename); | 456 | servicename); |
441 | signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_DOWN); | 457 | signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_DOWN); |
442 | return; | 458 | return; |
443 | } | 459 | } |
444 | if ((GNUNET_OK != | 460 | if ((GNUNET_OK != |
445 | GNUNET_CONFIGURATION_get_value_filename (cfg, | 461 | GNUNET_CONFIGURATION_get_value_filename (cfg, |
446 | servicename, | 462 | servicename, |
447 | "CONFIG", | 463 | "CONFIG", |
448 | &config)) || | 464 | &config)) || |
449 | (0 != STAT (config, &sbuf))) | 465 | (0 != STAT (config, &sbuf))) |
450 | { | 466 | { |
451 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, | 467 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, |
452 | _("Configuration file `%s' for service `%s' not known!\n"), | 468 | _("Configuration file `%s' for service `%s' not known!\n"), |
453 | config, servicename); | 469 | config, servicename); |
454 | signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_DOWN); | 470 | signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_DOWN); |
455 | GNUNET_free (binary); | 471 | GNUNET_free (binary); |
456 | GNUNET_free_non_null (config); | 472 | GNUNET_free_non_null (config); |
@@ -463,6 +479,9 @@ start_service (struct GNUNET_SERVER_Client *client, const char *servicename) | |||
463 | sl->binary = binary; | 479 | sl->binary = binary; |
464 | sl->config = config; | 480 | sl->config = config; |
465 | sl->mtime = sbuf.st_mtime; | 481 | sl->mtime = sbuf.st_mtime; |
482 | sl->backoff = GNUNET_TIME_UNIT_MILLISECONDS; | ||
483 | sl->restartAt = GNUNET_TIME_UNIT_FOREVER_ABS; | ||
484 | |||
466 | running = sl; | 485 | running = sl; |
467 | start_process (sl); | 486 | start_process (sl); |
468 | if (NULL != client) | 487 | if (NULL != client) |
@@ -497,13 +516,12 @@ free_and_signal (void *cls, struct ServiceList *pos) | |||
497 | * @param servicename name of the service to stop | 516 | * @param servicename name of the service to stop |
498 | */ | 517 | */ |
499 | static void | 518 | static void |
500 | stop_service (struct GNUNET_SERVER_Client *client, | 519 | stop_service (struct GNUNET_SERVER_Client *client, const char *servicename) |
501 | const char *servicename) | ||
502 | { | 520 | { |
503 | struct ServiceList *pos; | 521 | struct ServiceList *pos; |
504 | 522 | ||
505 | GNUNET_log (GNUNET_ERROR_TYPE_INFO, | 523 | GNUNET_log (GNUNET_ERROR_TYPE_INFO, |
506 | _("Preparing to stop `%s'\n"), servicename); | 524 | _("Preparing to stop `%s'\n"), servicename); |
507 | pos = find_name (servicename); | 525 | pos = find_name (servicename); |
508 | if (pos == NULL) | 526 | if (pos == NULL) |
509 | { | 527 | { |
@@ -520,15 +538,14 @@ stop_service (struct GNUNET_SERVER_Client *client, | |||
520 | #if DEBUG_ARM | 538 | #if DEBUG_ARM |
521 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, | 539 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, |
522 | "Service `%s' still used by %u clients, will keep it running!\n", | 540 | "Service `%s' still used by %u clients, will keep it running!\n", |
523 | servicename, | 541 | servicename, pos->rc); |
524 | pos->rc); | ||
525 | #endif | 542 | #endif |
526 | signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_UP); | 543 | signal_result (client, servicename, GNUNET_MESSAGE_TYPE_ARM_IS_UP); |
527 | GNUNET_SERVER_receive_done (client, GNUNET_OK); | 544 | GNUNET_SERVER_receive_done (client, GNUNET_OK); |
528 | return; | 545 | return; |
529 | } | 546 | } |
530 | if (pos->rc == 1) | 547 | if (pos->rc == 1) |
531 | pos->rc--; /* decrement RC to zero */ | 548 | pos->rc--; /* decrement RC to zero */ |
532 | if (pos->kill_continuation != NULL) | 549 | if (pos->kill_continuation != NULL) |
533 | { | 550 | { |
534 | /* killing already in progress */ | 551 | /* killing already in progress */ |
@@ -582,8 +599,8 @@ stop_service (struct GNUNET_SERVER_Client *client, | |||
582 | */ | 599 | */ |
583 | static void | 600 | static void |
584 | handle_start (void *cls, | 601 | handle_start (void *cls, |
585 | struct GNUNET_SERVER_Client *client, | 602 | struct GNUNET_SERVER_Client *client, |
586 | const struct GNUNET_MessageHeader *message) | 603 | const struct GNUNET_MessageHeader *message) |
587 | { | 604 | { |
588 | const char *servicename; | 605 | const char *servicename; |
589 | uint16_t size; | 606 | uint16_t size; |
@@ -613,8 +630,8 @@ handle_start (void *cls, | |||
613 | */ | 630 | */ |
614 | static void | 631 | static void |
615 | handle_stop (void *cls, | 632 | handle_stop (void *cls, |
616 | struct GNUNET_SERVER_Client *client, | 633 | struct GNUNET_SERVER_Client *client, |
617 | const struct GNUNET_MessageHeader *message) | 634 | const struct GNUNET_MessageHeader *message) |
618 | { | 635 | { |
619 | const char *servicename; | 636 | const char *servicename; |
620 | uint16_t size; | 637 | uint16_t size; |
@@ -647,49 +664,183 @@ maint (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc) | |||
647 | const char *statstr; | 664 | const char *statstr; |
648 | int statcode; | 665 | int statcode; |
649 | struct stat sbuf; | 666 | struct stat sbuf; |
667 | struct GNUNET_TIME_Relative lowestRestartDelay; | ||
650 | int ret; | 668 | int ret; |
651 | 669 | ||
670 | lowestRestartDelay = GNUNET_TIME_UNIT_FOREVER_REL; | ||
671 | |||
652 | if (0 != (tc->reason & GNUNET_SCHEDULER_REASON_SHUTDOWN)) | 672 | if (0 != (tc->reason & GNUNET_SCHEDULER_REASON_SHUTDOWN)) |
653 | { | 673 | { |
654 | GNUNET_log (GNUNET_ERROR_TYPE_INFO, _("Stopping all services\n")); | 674 | GNUNET_log (GNUNET_ERROR_TYPE_INFO, _("Stopping all services\n")); |
675 | |||
655 | in_shutdown = GNUNET_YES; | 676 | in_shutdown = GNUNET_YES; |
656 | pos = running; | 677 | pos = running; |
657 | while (NULL != pos) | 678 | while (NULL != pos) |
658 | { | 679 | { |
659 | if (pos->pid != 0) | 680 | if (pos->pid != 0) |
660 | { | 681 | { |
661 | #if DEBUG_ARM | 682 | #if DEBUG_ARM |
662 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, | 683 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, |
663 | "Sending SIGTERM to `%s'\n", | 684 | "Sending SIGTERM to `%s'\n", pos->name); |
664 | pos->name); | ||
665 | #endif | 685 | #endif |
666 | if (0 != PLIBC_KILL (pos->pid, SIGTERM)) | 686 | if (0 != PLIBC_KILL (pos->pid, SIGTERM)) |
667 | GNUNET_log_strerror (GNUNET_ERROR_TYPE_WARNING, "kill"); | 687 | GNUNET_log_strerror (GNUNET_ERROR_TYPE_WARNING, "kill"); |
668 | } | 688 | } |
669 | pos = pos->next; | 689 | pos = pos->next; |
670 | } | 690 | } |
671 | } | 691 | } |
672 | if (cls == NULL) | 692 | if (cls == NULL) |
673 | { | 693 | { |
674 | if ( (in_shutdown == GNUNET_YES) && | 694 | if ((in_shutdown == GNUNET_YES) && (running == NULL)) |
675 | (running == NULL) ) | ||
676 | { | 695 | { |
677 | #if DEBUG_ARM | 696 | #if DEBUG_ARM |
678 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, | 697 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "ARM service terminates.\n"); |
679 | "ARM service terminates.\n"); | ||
680 | #endif | 698 | #endif |
681 | GNUNET_assert (server != NULL); | 699 | GNUNET_assert (server != NULL); |
682 | GNUNET_SERVER_destroy (server); | 700 | GNUNET_SERVER_destroy (server); |
683 | server = NULL; | 701 | server = NULL; |
684 | return; /* we are done! */ | 702 | return; /* we are done! */ |
685 | } | 703 | } |
686 | GNUNET_SCHEDULER_add_delayed (tc->sched, | 704 | GNUNET_SCHEDULER_add_delayed (tc->sched, |
687 | (in_shutdown == GNUNET_YES) | 705 | (in_shutdown == GNUNET_YES) |
688 | ? MAINT_FAST_FREQUENCY | 706 | ? MAINT_FAST_FREQUENCY |
689 | : MAINT_FREQUENCY, | 707 | : MAINT_FREQUENCY, &maint, NULL); |
690 | &maint, NULL); | 708 | } |
709 | |||
710 | /* check for services that died (WAITPID) */ | ||
711 | prev = NULL; | ||
712 | next = running; | ||
713 | while (NULL != (pos = next)) | ||
714 | { | ||
715 | enum GNUNET_OS_ProcessStatusType statusType; | ||
716 | unsigned long statusCode; | ||
717 | |||
718 | next = pos->next; | ||
719 | if ((NULL != pos->kill_continuation) || | ||
720 | ((GNUNET_YES == in_shutdown) && (pos->pid == 0))) | ||
721 | { | ||
722 | if (prev == NULL) | ||
723 | running = next; | ||
724 | else | ||
725 | prev->next = next; | ||
726 | if (NULL != pos->kill_continuation) | ||
727 | pos->kill_continuation (pos->kill_continuation_cls, pos); | ||
728 | else | ||
729 | free_entry (pos); | ||
730 | continue; | ||
731 | } | ||
732 | if ((GNUNET_SYSERR == (ret = GNUNET_OS_process_status (pos->pid, | ||
733 | &statusType, | ||
734 | &statusCode))) || | ||
735 | ((ret == GNUNET_NO) || | ||
736 | (statusType == GNUNET_OS_PROCESS_STOPPED) || | ||
737 | (statusType == GNUNET_OS_PROCESS_RUNNING))) | ||
738 | { | ||
739 | prev = pos; | ||
740 | continue; | ||
741 | } | ||
742 | if (statusType == GNUNET_OS_PROCESS_EXITED) | ||
743 | { | ||
744 | statstr = _( /* process termination method */ "exit"); | ||
745 | statcode = statusCode; | ||
746 | } | ||
747 | else if (statusType == GNUNET_OS_PROCESS_SIGNALED) | ||
748 | { | ||
749 | statstr = _( /* process termination method */ "signal"); | ||
750 | statcode = statusCode; | ||
751 | } | ||
752 | else | ||
753 | { | ||
754 | statstr = _( /* process termination method */ "unknown"); | ||
755 | statcode = 0; | ||
756 | } | ||
757 | if (GNUNET_YES != in_shutdown) | ||
758 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, | ||
759 | _ | ||
760 | ("Service `%s' terminated with status %s/%d, will try to restart it!\n"), | ||
761 | pos->name, statstr, statcode); | ||
762 | #if DEBUG_ARM | ||
763 | else | ||
764 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, | ||
765 | "Service `%s' terminated with status %s/%d\n", | ||
766 | pos->name, statstr, statcode); | ||
767 | #endif | ||
768 | /* schedule restart */ | ||
769 | pos->pid = 0; | ||
770 | prev = pos; | ||
691 | } | 771 | } |
692 | 772 | ||
773 | /* check for services that need to be restarted due to | ||
774 | configuration changes or because the last restart failed */ | ||
775 | pos = running; | ||
776 | while (pos != NULL) | ||
777 | { | ||
778 | if ((0 == STAT (pos->config, &sbuf)) && (pos->mtime < sbuf.st_mtime)) | ||
779 | { | ||
780 | GNUNET_log (GNUNET_ERROR_TYPE_INFO, | ||
781 | _ | ||
782 | ("Restarting service `%s' due to configuration file change.\n")); | ||
783 | if (0 != PLIBC_KILL (pos->pid, SIGTERM)) | ||
784 | GNUNET_log_strerror (GNUNET_ERROR_TYPE_WARNING, "kill"); | ||
785 | } | ||
786 | if ((pos->pid == 0) && (GNUNET_YES != in_shutdown)) | ||
787 | { | ||
788 | GNUNET_log (GNUNET_ERROR_TYPE_INFO, | ||
789 | _("Restarting service `%s'.\n"), pos->name); | ||
790 | /* FIXME: should have some exponentially | ||
791 | increasing timer to avoid tight restart loops */ | ||
792 | if (pos->restartAt.value != GNUNET_TIME_UNIT_FOREVER_ABS.value) | ||
793 | { | ||
794 | /* Otherwise, the process died for the first time, backoff should't increase */ | ||
795 | if (pos->backoff.value < EXPONENTIAL_BACKOFF_THRESHOLD) | ||
796 | pos->backoff = | ||
797 | GNUNET_TIME_relative_multiply (pos->backoff, 2); | ||
798 | } | ||
799 | |||
800 | pos->restartAt = GNUNET_TIME_relative_to_absolute (pos->backoff); | ||
801 | |||
802 | lowestRestartDelay = GNUNET_TIME_relative_min (lowestRestartDelay, | ||
803 | GNUNET_TIME_absolute_get_remaining | ||
804 | (pos->restartAt)); | ||
805 | |||
806 | if (GNUNET_TIME_absolute_get_remaining (pos->restartAt).value == 0) | ||
807 | start_process (pos); | ||
808 | } | ||
809 | pos = pos->next; | ||
810 | } | ||
811 | } | ||
812 | |||
813 | #if 0 | ||
814 | static GNUNET_SCHEDULER_TaskIdentifier child_death_task; | ||
815 | |||
816 | static GNUNET_SCHEDULER_TaskIdentifier child_restart_task; | ||
817 | |||
818 | |||
819 | |||
820 | /** | ||
821 | * | ||
822 | * | ||
823 | * @param cls closure, NULL if we need to self-restart | ||
824 | * @param tc context | ||
825 | */ | ||
826 | static void | ||
827 | maint_child_death (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc) | ||
828 | { | ||
829 | struct ServiceList *pos; | ||
830 | struct ServiceList *prev; | ||
831 | struct ServiceList *next; | ||
832 | const char *statstr; | ||
833 | int statcode; | ||
834 | struct stat sbuf; | ||
835 | struct GNUNET_TIME_Relative lowestRestartDelay; | ||
836 | int ret; | ||
837 | |||
838 | child_death_task = GNUNET_SCHEDULER_NO_TASK; | ||
839 | if (0 != (tc->reason & GNUNET_SCHEDULER_REASON_SHUTDOWN)) | ||
840 | return; | ||
841 | child_death_task = | ||
842 | GNUNET_SCHEDULER_add_read_file (sched, GNUNET_TIME_UNIT_FOREVER_REL, pr, | ||
843 | &maint_child_death, NULL); | ||
693 | /* check for services that died (WAITPID) */ | 844 | /* check for services that died (WAITPID) */ |
694 | prev = NULL; | 845 | prev = NULL; |
695 | next = running; | 846 | next = running; |
@@ -697,12 +848,11 @@ maint (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc) | |||
697 | { | 848 | { |
698 | enum GNUNET_OS_ProcessStatusType statusType; | 849 | enum GNUNET_OS_ProcessStatusType statusType; |
699 | unsigned long statusCode; | 850 | unsigned long statusCode; |
700 | 851 | ||
701 | next = pos->next; | 852 | next = pos->next; |
702 | if ( (NULL != pos->kill_continuation) || | 853 | if ((NULL != pos->kill_continuation) || |
703 | ( (GNUNET_YES == in_shutdown) && | 854 | ((GNUNET_YES == in_shutdown) && (pos->pid == 0))) |
704 | (pos->pid == 0) ) ) | 855 | { |
705 | { | ||
706 | if (prev == NULL) | 856 | if (prev == NULL) |
707 | running = next; | 857 | running = next; |
708 | else | 858 | else |
@@ -712,13 +862,13 @@ maint (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc) | |||
712 | else | 862 | else |
713 | free_entry (pos); | 863 | free_entry (pos); |
714 | continue; | 864 | continue; |
715 | } | 865 | } |
716 | if ( (GNUNET_SYSERR == (ret = GNUNET_OS_process_status(pos->pid, | 866 | if ((GNUNET_SYSERR == (ret = GNUNET_OS_process_status (pos->pid, |
717 | &statusType, | 867 | &statusType, |
718 | &statusCode))) || | 868 | &statusCode))) || |
719 | ( (ret == GNUNET_NO) || | 869 | ((ret == GNUNET_NO) || |
720 | (statusType == GNUNET_OS_PROCESS_STOPPED) || | 870 | (statusType == GNUNET_OS_PROCESS_STOPPED) || |
721 | (statusType == GNUNET_OS_PROCESS_RUNNING) ) ) | 871 | (statusType == GNUNET_OS_PROCESS_RUNNING))) |
722 | { | 872 | { |
723 | prev = pos; | 873 | prev = pos; |
724 | continue; | 874 | continue; |
@@ -737,17 +887,18 @@ maint (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc) | |||
737 | { | 887 | { |
738 | statstr = _( /* process termination method */ "unknown"); | 888 | statstr = _( /* process termination method */ "unknown"); |
739 | statcode = 0; | 889 | statcode = 0; |
740 | } | 890 | } |
741 | if (GNUNET_YES != in_shutdown) | 891 | if (GNUNET_YES != in_shutdown) |
742 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, | 892 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, |
743 | _("Service `%s' terminated with status %s/%d, will try to restart it!\n"), | 893 | _ |
894 | ("Service `%s' terminated with status %s/%d, will try to restart it!\n"), | ||
744 | pos->name, statstr, statcode); | 895 | pos->name, statstr, statcode); |
745 | #if DEBUG_ARM | 896 | #if DEBUG_ARM |
746 | else | 897 | else |
747 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, | 898 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, |
748 | "Service `%s' terminated with status %s/%d\n", | 899 | "Service `%s' terminated with status %s/%d\n", |
749 | pos->name, statstr, statcode); | 900 | pos->name, statstr, statcode); |
750 | #endif | 901 | #endif |
751 | /* schedule restart */ | 902 | /* schedule restart */ |
752 | pos->pid = 0; | 903 | pos->pid = 0; |
753 | prev = pos; | 904 | prev = pos; |
@@ -759,25 +910,45 @@ maint (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc) | |||
759 | while (pos != NULL) | 910 | while (pos != NULL) |
760 | { | 911 | { |
761 | if ((0 == STAT (pos->config, &sbuf)) && (pos->mtime < sbuf.st_mtime)) | 912 | if ((0 == STAT (pos->config, &sbuf)) && (pos->mtime < sbuf.st_mtime)) |
762 | { | 913 | { |
763 | GNUNET_log (GNUNET_ERROR_TYPE_INFO, | 914 | GNUNET_log (GNUNET_ERROR_TYPE_INFO, |
764 | _("Restarting service `%s' due to configuration file change.\n")); | 915 | _ |
765 | if (0 != PLIBC_KILL (pos->pid, SIGTERM)) | 916 | ("Restarting service `%s' due to configuration file change.\n")); |
766 | GNUNET_log_strerror (GNUNET_ERROR_TYPE_WARNING, "kill"); | 917 | if (0 != PLIBC_KILL (pos->pid, SIGTERM)) |
767 | } | 918 | GNUNET_log_strerror (GNUNET_ERROR_TYPE_WARNING, "kill"); |
768 | if ( (pos->pid == 0) && | 919 | } |
769 | (GNUNET_YES != in_shutdown) ) | 920 | if ((pos->pid == 0) && (GNUNET_YES != in_shutdown)) |
770 | { | 921 | { |
771 | GNUNET_log (GNUNET_ERROR_TYPE_INFO, | 922 | GNUNET_log (GNUNET_ERROR_TYPE_INFO, |
772 | _("Restarting service `%s'.\n"), pos->name); | 923 | _("Restarting service `%s'.\n"), pos->name); |
773 | /* FIXME: should have some exponentially | 924 | /* FIXME: should have some exponentially |
774 | increasing timer to avoid tight restart loops */ | 925 | increasing timer to avoid tight restart loops */ |
775 | start_process (pos); | 926 | if (pos->restartAt.value != GNUNET_TIME_UNIT_FOREVER_ABS.value) |
776 | } | 927 | { |
928 | /* Otherwise, the process died for the first time, backoff should't increase */ | ||
929 | if (pos->backoff.value < EXPONENTIAL_BACKOFF_THRESHOLD) | ||
930 | pos->backoff = | ||
931 | GNUNET_TIME_relative_multiply (pos->backoff, 2); | ||
932 | } | ||
933 | |||
934 | pos->restartAt = GNUNET_TIME_relative_to_absolute (pos->backoff); | ||
935 | |||
936 | lowestRestartDelay = GNUNET_TIME_relative_min (lowestRestartDelay, | ||
937 | GNUNET_TIME_absolute_get_remaining | ||
938 | (pos->restartAt)); | ||
939 | |||
940 | if (GNUNET_TIME_absolute_get_remaining (pos->restartAt).value == 0) | ||
941 | start_process (pos); | ||
942 | } | ||
777 | pos = pos->next; | 943 | pos = pos->next; |
778 | } | 944 | } |
779 | } | 945 | } |
780 | 946 | ||
947 | #endif | ||
948 | |||
949 | |||
950 | |||
951 | |||
781 | 952 | ||
782 | /** | 953 | /** |
783 | * List of handlers for the messages understood by this service. | 954 | * List of handlers for the messages understood by this service. |
@@ -788,6 +959,28 @@ static struct GNUNET_SERVER_MessageHandler handlers[] = { | |||
788 | {NULL, NULL, 0, 0} | 959 | {NULL, NULL, 0, 0} |
789 | }; | 960 | }; |
790 | 961 | ||
962 | static struct GNUNET_SIGNAL_Context *shc_chld; | ||
963 | |||
964 | /** | ||
965 | * Pipe used to communicate shutdown via signal. | ||
966 | */ | ||
967 | static struct GNUNET_DISK_PipeHandle *sigpipe; | ||
968 | |||
969 | static const struct GNUNET_DISK_FileHandle *pr; | ||
970 | |||
971 | /** | ||
972 | * Signal handler called for signals that should cause us to shutdown. | ||
973 | */ | ||
974 | static void | ||
975 | sighandler_child_death () | ||
976 | { | ||
977 | static char c; | ||
978 | |||
979 | GNUNET_DISK_file_write (GNUNET_DISK_pipe_handle | ||
980 | (sigpipe, GNUNET_DISK_PIPE_END_WRITE), &c, | ||
981 | sizeof (c)); | ||
982 | } | ||
983 | |||
791 | 984 | ||
792 | /** | 985 | /** |
793 | * Process arm requests. | 986 | * Process arm requests. |
@@ -806,41 +999,53 @@ run (void *cls, | |||
806 | char *defaultservices; | 999 | char *defaultservices; |
807 | char *pos; | 1000 | char *pos; |
808 | 1001 | ||
1002 | shc_chld = GNUNET_SIGNAL_handler_install (SIGCHLD, &sighandler_child_death); | ||
1003 | GNUNET_assert (sigpipe == NULL); | ||
1004 | sigpipe = GNUNET_DISK_pipe (GNUNET_NO); | ||
1005 | GNUNET_assert (sigpipe != NULL); | ||
1006 | pr = GNUNET_DISK_pipe_handle (sigpipe, GNUNET_DISK_PIPE_END_READ); | ||
1007 | GNUNET_assert (pr != NULL); | ||
1008 | |||
809 | GNUNET_SERVER_ignore_shutdown (serv, GNUNET_YES); | 1009 | GNUNET_SERVER_ignore_shutdown (serv, GNUNET_YES); |
810 | GNUNET_assert (serv != NULL); | 1010 | GNUNET_assert (serv != NULL); |
811 | cfg = c; | 1011 | cfg = c; |
812 | sched = s; | 1012 | sched = s; |
813 | server = serv; | 1013 | server = serv; |
1014 | /* | ||
1015 | * child_death_task = | ||
1016 | GNUNET_SCHEDULER_add_read_file (sched, GNUNET_TIME_UNIT_FOREVER_REL, pr, | ||
1017 | &maint_child_death, NULL); | ||
1018 | */ | ||
814 | if (GNUNET_OK != | 1019 | if (GNUNET_OK != |
815 | GNUNET_CONFIGURATION_get_value_string (cfg, | 1020 | GNUNET_CONFIGURATION_get_value_string (cfg, |
816 | "ARM", | 1021 | "ARM", |
817 | "GLOBAL_PREFIX", | 1022 | "GLOBAL_PREFIX", |
818 | &prefix_command)) | 1023 | &prefix_command)) |
819 | prefix_command = GNUNET_strdup (""); | 1024 | prefix_command = GNUNET_strdup (""); |
820 | /* start default services... */ | 1025 | /* start default services... */ |
821 | if (GNUNET_OK == | 1026 | if (GNUNET_OK == |
822 | GNUNET_CONFIGURATION_get_value_string (cfg, | 1027 | GNUNET_CONFIGURATION_get_value_string (cfg, |
823 | "ARM", | 1028 | "ARM", |
824 | "DEFAULTSERVICES", | 1029 | "DEFAULTSERVICES", |
825 | &defaultservices)) | 1030 | &defaultservices)) |
826 | { | 1031 | { |
827 | #if DEBUG_ARM | 1032 | #if DEBUG_ARM |
828 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, | 1033 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, |
829 | "Starting default services `%s'\n", defaultservices); | 1034 | "Starting default services `%s'\n", defaultservices); |
830 | #endif | 1035 | #endif |
831 | pos = strtok (defaultservices, " "); | 1036 | pos = strtok (defaultservices, " "); |
832 | while (pos != NULL) | 1037 | while (pos != NULL) |
833 | { | 1038 | { |
834 | start_service (NULL, pos); | 1039 | start_service (NULL, pos); |
835 | pos = strtok (NULL, " "); | 1040 | pos = strtok (NULL, " "); |
836 | } | 1041 | } |
837 | GNUNET_free (defaultservices); | 1042 | GNUNET_free (defaultservices); |
838 | } | 1043 | } |
839 | else | 1044 | else |
840 | { | 1045 | { |
841 | #if DEBUG_ARM | 1046 | #if DEBUG_ARM |
842 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, | 1047 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, |
843 | "No default services configured.\n"); | 1048 | "No default services configured.\n"); |
844 | #endif | 1049 | #endif |
845 | } | 1050 | } |
846 | 1051 | ||
@@ -865,10 +1070,8 @@ int | |||
865 | main (int argc, char *const *argv) | 1070 | main (int argc, char *const *argv) |
866 | { | 1071 | { |
867 | return (GNUNET_OK == | 1072 | return (GNUNET_OK == |
868 | GNUNET_SERVICE_run (argc, | 1073 | GNUNET_SERVICE_run (argc, |
869 | argv, "arm", | 1074 | argv, "arm", GNUNET_YES, &run, NULL)) ? 0 : 1; |
870 | GNUNET_YES, | ||
871 | &run, NULL)) ? 0 : 1; | ||
872 | } | 1075 | } |
873 | 1076 | ||
874 | /* end of gnunet-service-arm.c */ | 1077 | /* end of gnunet-service-arm.c */ |