aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSree Harsha Totakura <totakura@in.tum.de>2013-03-06 10:52:53 +0000
committerSree Harsha Totakura <totakura@in.tum.de>2013-03-06 10:52:53 +0000
commitabbe7e59aef50c7899948d2cdd3343ef0946b62f (patch)
tree461886f7d50539684f2c949969e86624de44c2ba
parentbea6a5e6fe596fe974bd6febbb17ce4731d1f7d8 (diff)
downloadgnunet-abbe7e59aef50c7899948d2cdd3343ef0946b62f.tar.gz
gnunet-abbe7e59aef50c7899948d2cdd3343ef0946b62f.zip
Remove MPI code
Starts the child process only in one instance (checks for lowest host ip; and that host ensures all instances co-ordinate via explicit lock file creation) Implemented basic ARM-like functionality for dealing with child processes
-rw-r--r--src/testbed/gnunet_testbed_mpi_spawn.c393
1 files changed, 336 insertions, 57 deletions
diff --git a/src/testbed/gnunet_testbed_mpi_spawn.c b/src/testbed/gnunet_testbed_mpi_spawn.c
index cb382b987..05b42a531 100644
--- a/src/testbed/gnunet_testbed_mpi_spawn.c
+++ b/src/testbed/gnunet_testbed_mpi_spawn.c
@@ -1,12 +1,13 @@
1#include "platform.h" 1#include "platform.h"
2#include "gnunet_util_lib.h" 2#include "gnunet_util_lib.h"
3#include <mpi.h> 3#include "gnunet_testbed_service.h"
4
4 5
5/** 6/**
6 * Generic logging shorthand 7 * Generic logging shorthand
7 */ 8 */
8#define LOG(kind,...) \ 9#define LOG(kind,...) \
9 fprintf (stderr, __VA_ARGS__) 10 GNUNET_log (kind, __VA_ARGS__)
10 11
11/** 12/**
12 * Debug logging shorthand 13 * Debug logging shorthand
@@ -19,6 +20,317 @@
19 */ 20 */
20static int ret; 21static int ret;
21 22
23/**
24 * The child process we spawn
25 */
26static struct GNUNET_OS_Process *child;
27
28/**
29 * The arguments including the binary to spawn
30 */
31static char **argv2;
32
33/**
34 * All our IP addresses
35 */
36static char **our_addrs;
37
38/**
39 * Pipe used to communicate shutdown via signal.
40 */
41static struct GNUNET_DISK_PipeHandle *sigpipe;
42
43/**
44 * Filename of the unique file
45 */
46static char *fn;
47
48/**
49 * Handle to the unique file
50 */
51static int fh;
52
53/**
54 * The return code of the binary
55 */
56static unsigned long child_exit_code;
57
58/**
59 * The process status of the child
60 */
61static enum GNUNET_OS_ProcessStatusType child_status;
62
63/**
64 * how many IP addresses are currently assigned to us
65 */
66static unsigned int num_addrs;
67
68/**
69 * The shutdown task
70 */
71static GNUNET_SCHEDULER_TaskIdentifier shutdown_task_id;
72
73/**
74 * Task to kill the child
75 */
76static GNUNET_SCHEDULER_TaskIdentifier terminate_task_id;
77
78/**
79 * Task to kill the child
80 */
81static GNUNET_SCHEDULER_TaskIdentifier child_death_task_id;
82
83/**
84 * The shutdown task
85 */
86static void
87shutdown_task (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc)
88{
89 shutdown_task_id = GNUNET_SCHEDULER_NO_TASK;
90 if (0 != child_exit_code)
91 {
92 LOG (GNUNET_ERROR_TYPE_WARNING, "Child exited with error code: %lu\n",
93 child_exit_code);
94 ret = 128 + (int) child_exit_code;
95 }
96 if (0 != fh)
97 {
98 close (fh);
99 }
100 if ((NULL != fn) && (0 != unlink (fn)))
101 {
102 GNUNET_log_strerror (GNUNET_ERROR_TYPE_ERROR, "open");
103 ret = GNUNET_SYSERR;
104 }
105}
106
107
108static void
109terminate_task (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc)
110{
111 static int hard_kill;
112
113 GNUNET_assert (NULL != child);
114 terminate_task_id =
115 GNUNET_SCHEDULER_add_delayed (GNUNET_TIME_UNIT_FOREVER_REL,
116 &terminate_task, NULL);
117 if (0 != hard_kill)
118 {
119 switch (hard_kill)
120 {
121 case 1:
122 case 2:
123 LOG (GNUNET_ERROR_TYPE_WARNING,
124 "%d more interrupts needed to send SIGKILL to the child\n",
125 3 - hard_kill);
126 hard_kill++;
127 return;
128 case 3:
129 GNUNET_break (0 == GNUNET_OS_process_kill (child, SIGKILL));
130 return;
131 }
132 }
133 hard_kill++;
134 GNUNET_break (0 == GNUNET_OS_process_kill (child, SIGTERM));
135 LOG (GNUNET_ERROR_TYPE_INFO, _("Waiting for child to exit.\n"));
136}
137
138
139/**
140 * Task triggered whenever we receive a SIGCHLD (child
141 * process died).
142 *
143 * @param cls closure, NULL if we need to self-restart
144 * @param tc context
145 */
146static void
147child_death_task (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc)
148{
149 const struct GNUNET_DISK_FileHandle *pr;
150 char c[16];
151
152 pr = GNUNET_DISK_pipe_handle (sigpipe, GNUNET_DISK_PIPE_END_READ);
153 child_death_task_id = GNUNET_SCHEDULER_NO_TASK;
154 if (0 == (tc->reason & GNUNET_SCHEDULER_REASON_READ_READY))
155 {
156 child_death_task_id =
157 GNUNET_SCHEDULER_add_read_file (GNUNET_TIME_UNIT_FOREVER_REL,
158 pr, &child_death_task, NULL);
159 return;
160 }
161 /* consume the signal */
162 GNUNET_break (0 < GNUNET_DISK_file_read (pr, &c, sizeof (c)));
163 LOG_DEBUG ("Child died\n");
164 GNUNET_SCHEDULER_cancel (terminate_task_id);
165 terminate_task_id = GNUNET_SCHEDULER_NO_TASK;
166 GNUNET_assert (GNUNET_OK == GNUNET_OS_process_status (child, &child_status,
167 &child_exit_code));
168 GNUNET_OS_process_destroy (child);
169 child = NULL;
170 shutdown_task_id = GNUNET_SCHEDULER_add_now (&shutdown_task, NULL);
171}
172
173
174/**
175 * Callback function invoked for each interface found.
176 *
177 * @param cls NULL
178 * @param name name of the interface (can be NULL for unknown)
179 * @param isDefault is this presumably the default interface
180 * @param addr address of this interface (can be NULL for unknown or unassigned)
181 * @param broadcast_addr the broadcast address (can be NULL for unknown or unassigned)
182 * @param netmask the network mask (can be NULL for unknown or unassigned))
183 * @param addrlen length of the address
184 * @return GNUNET_OK to continue iteration, GNUNET_SYSERR to abort
185 */
186static int
187addr_proc (void *cls, const char *name, int isDefault,
188 const struct sockaddr *addr,
189 const struct sockaddr *broadcast_addr,
190 const struct sockaddr *netmask, socklen_t addrlen)
191{
192 const struct sockaddr_in *in_addr;
193 char *ipaddr;
194
195 if (sizeof (struct sockaddr_in) != addrlen)
196 return GNUNET_OK;
197 in_addr = (const struct sockaddr_in *) addr;
198 if (NULL == (ipaddr = inet_ntoa (in_addr->sin_addr)))
199 return GNUNET_OK;
200 GNUNET_array_append (our_addrs, num_addrs, GNUNET_strdup (ipaddr));
201 return GNUNET_OK;
202}
203
204
205static void
206destroy_hosts(struct GNUNET_TESTBED_Host **hosts, unsigned int nhosts)
207{
208 unsigned int host;
209
210 GNUNET_assert (NULL != hosts);
211 for (host = 0; host < nhosts; host++)
212 if (NULL != hosts[host])
213 GNUNET_TESTBED_host_destroy (hosts[host]);
214 GNUNET_free (hosts);
215 hosts = NULL;
216}
217
218
219/**
220 * The main scheduler run task
221 *
222 * @param cls NULL
223 * @param tc scheduler task context
224 */
225static void
226run (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc)
227{
228 struct GNUNET_TESTBED_Host **hosts;
229 const struct GNUNET_CONFIGURATION_Handle *null_cfg;
230 const char *host_ip;
231 char *tmpdir;
232 unsigned int nhosts;
233 unsigned int host_cnt;
234 unsigned int addr_cnt;
235
236 GNUNET_OS_network_interfaces_list (&addr_proc, NULL);
237 if (0 == num_addrs)
238 {
239 GNUNET_break (0);
240 ret = GNUNET_SYSERR;
241 return;
242 }
243 null_cfg = GNUNET_CONFIGURATION_create ();
244 nhosts = GNUNET_TESTBED_hosts_load_from_loadleveler (null_cfg, &hosts);
245 if (0 == nhosts)
246 {
247 GNUNET_break (0);
248 ret = GNUNET_SYSERR;
249 return;
250 }
251 for (host_cnt = 0; host_cnt < nhosts; host_cnt++)
252 {
253 host_ip = GNUNET_TESTBED_host_get_hostname (hosts[host_cnt]);
254 for (addr_cnt = 0; addr_cnt < num_addrs; addr_cnt++)
255 if (0 == strcmp (host_ip, our_addrs[addr_cnt]))
256 goto proceed;
257 }
258 GNUNET_break (0);
259 ret = GNUNET_SYSERR;
260 destroy_hosts (hosts, nhosts);
261 return;
262
263 proceed:
264 destroy_hosts (hosts, nhosts);
265 if (0 != host_cnt)
266 {
267 LOG_DEBUG ("Exiting as we are not the lowest host\n");
268 ret = GNUNET_OK;
269 return;
270 }
271 tmpdir = getenv ("TMPDIR");
272 if (NULL == tmpdir)
273 tmpdir = getenv ("TMP");
274 if (NULL == tmpdir)
275 tmpdir = getenv ("TEMP");
276 if (NULL == tmpdir)
277 tmpdir = "/tmp";
278 (void) GNUNET_asprintf (&fn, "%s/gnunet-testbed-spawn.lock", tmpdir);
279 /* Open the unique file; we can create it then we can spawn the child process
280 else we exit */
281 fh = open (fn, O_CREAT | O_EXCL | O_CLOEXEC,
282 S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP);
283 if (-1 == fh)
284 {
285 if (EEXIST == errno)
286 {
287 LOG_DEBUG ("Lock file already created by other process. Exiting\n");
288 ret = GNUNET_OK;
289 return;
290 }
291 GNUNET_log_strerror (GNUNET_ERROR_TYPE_ERROR, "open");
292 ret = GNUNET_SYSERR;
293 return;
294 }
295 /* Spawn the new process here */
296 LOG (GNUNET_ERROR_TYPE_INFO, _("Spawning process `%s'\n"), argv2[0]);
297 child = GNUNET_OS_start_process_vap (GNUNET_NO, GNUNET_OS_INHERIT_STD_ALL, NULL,
298 NULL,
299 argv2[0], argv2);
300 if (NULL == child)
301 {
302 GNUNET_break (0);
303 ret = GNUNET_SYSERR;
304 return;
305 }
306 ret = GNUNET_OK;
307 terminate_task_id =
308 GNUNET_SCHEDULER_add_delayed (GNUNET_TIME_UNIT_FOREVER_REL,
309 &terminate_task, NULL);
310 child_death_task_id =
311 GNUNET_SCHEDULER_add_read_file (GNUNET_TIME_UNIT_FOREVER_REL,
312 GNUNET_DISK_pipe_handle (sigpipe,
313 GNUNET_DISK_PIPE_END_READ),
314 &child_death_task, NULL);
315}
316
317
318/**
319 * Signal handler called for SIGCHLD.
320 */
321static void
322sighandler_child_death ()
323{
324 static char c;
325 int old_errno = errno; /* back-up errno */
326
327 GNUNET_break (1 ==
328 GNUNET_DISK_file_write (GNUNET_DISK_pipe_handle
329 (sigpipe, GNUNET_DISK_PIPE_END_WRITE),
330 &c, sizeof (c)));
331 errno = old_errno; /* restore errno */
332}
333
22 334
23/** 335/**
24 * Execution start point 336 * Execution start point
@@ -26,77 +338,44 @@ static int ret;
26int 338int
27main (int argc, char *argv[]) 339main (int argc, char *argv[])
28{ 340{
29 static const struct GNUNET_GETOPT_CommandLineOption options[] = { 341 struct GNUNET_SIGNAL_Context *shc_chld;
30 GNUNET_GETOPT_OPTION_END
31 };
32 struct GNUNET_OS_Process *proc;
33 char **argv2;
34 unsigned long code;
35 enum GNUNET_OS_ProcessStatusType proc_status;
36 int rank;
37 int chstat;
38 unsigned int host;
39 unsigned int cnt; 342 unsigned int cnt;
40 343
41 ret = -1; 344 ret = -1;
42 if (argc < 2) 345 if (argc < 2)
43 { 346 {
44 printf ("Need arguments: gnunet-testbed-mpi-spawn <cmd> <cmd_args>"); 347 printf ("Need arguments: gnunet-testbed-mpi-spawn <cmd> <cmd_args>");
45 return 1; 348 return 1;
46 } 349 }
47 if (MPI_SUCCESS != MPI_Init (&argc, &argv)) 350 if (GNUNET_OK != GNUNET_log_setup ("gnunet-testbed-spawn", NULL, NULL))
48 { 351 {
49 GNUNET_break (0); 352 GNUNET_break (0);
50 return 2; 353 return 1;
51 } 354 }
52 if (MPI_SUCCESS != MPI_Comm_rank (MPI_COMM_WORLD, &rank)) 355 if (NULL == (sigpipe = GNUNET_DISK_pipe (GNUNET_NO, GNUNET_NO,
356 GNUNET_NO, GNUNET_NO)))
53 { 357 {
54 GNUNET_break (0); 358 GNUNET_break (0);
55 ret = 3; 359 ret = GNUNET_SYSERR;
56 (void) MPI_Finalize (); 360 return 1;
57 goto end;
58 } 361 }
59 if (0 != rank) 362 shc_chld =
363 GNUNET_SIGNAL_handler_install (GNUNET_SIGCHLD, &sighandler_child_death);
364 if (NULL == shc_chld)
60 { 365 {
61 ret = 0; 366 LOG (GNUNET_ERROR_TYPE_ERROR, "Cannot install a signal handler\n");
62 (void) MPI_Finalize (); 367 return 1;
63 goto end;
64 } 368 }
65 (void) MPI_Finalize ();
66 PRINTF ("Spawning process\n");
67 argv2 = GNUNET_malloc (sizeof (char *) * argc); 369 argv2 = GNUNET_malloc (sizeof (char *) * argc);
68 for (cnt = 1; cnt < argc; cnt++) 370 for (cnt = 1; cnt < argc; cnt++)
69 argv2[cnt - 1] = argv[cnt]; 371 argv2[cnt - 1] = argv[cnt];
70 proc = 372 GNUNET_SCHEDULER_run (run, NULL);
71 GNUNET_OS_start_process_vap (GNUNET_NO, GNUNET_OS_INHERIT_STD_ALL, NULL, 373 GNUNET_free (argv2);
72 NULL, argv2[0], argv2); 374 GNUNET_SIGNAL_handler_uninstall (shc_chld);
73 if (NULL == proc) 375 shc_chld = NULL;
74 { 376 GNUNET_DISK_pipe_close (sigpipe);
75 LOG (GNUNET_ERROR_TYPE_ERROR, "Cannot exec\n"); 377 GNUNET_free_non_null (fn);
76 ret = 5; 378 if (GNUNET_OK != ret)
77 goto end; 379 return ret;
78 } 380 return 0;
79 do
80 {
81 (void) sleep (1);
82 chstat = GNUNET_OS_process_status (proc, &proc_status, &code);
83 }
84 while (GNUNET_NO == chstat);
85 if (GNUNET_OK != chstat)
86 {
87 ret = 6;
88 goto end;
89 }
90 if (0 != code)
91 {
92 LOG (GNUNET_ERROR_TYPE_WARNING, "Child terminated abnormally\n");
93 ret = 50 + (int) code;
94 goto end;
95 }
96 ret = 0;
97
98 end:
99 if (0 != ret)
100 LOG (GNUNET_ERROR_TYPE_ERROR, "Something went wrong. Error: %d\n", ret);
101 return ret;
102} 381}