diff options
author | Sree Harsha Totakura <totakura@in.tum.de> | 2013-03-06 10:52:53 +0000 |
---|---|---|
committer | Sree Harsha Totakura <totakura@in.tum.de> | 2013-03-06 10:52:53 +0000 |
commit | abbe7e59aef50c7899948d2cdd3343ef0946b62f (patch) | |
tree | 461886f7d50539684f2c949969e86624de44c2ba | |
parent | bea6a5e6fe596fe974bd6febbb17ce4731d1f7d8 (diff) | |
download | gnunet-abbe7e59aef50c7899948d2cdd3343ef0946b62f.tar.gz gnunet-abbe7e59aef50c7899948d2cdd3343ef0946b62f.zip |
Remove MPI code
Starts the child process only in one instance (checks for lowest host ip; and
that host ensures all instances co-ordinate via explicit lock file creation)
Implemented basic ARM-like functionality for dealing with child processes
-rw-r--r-- | src/testbed/gnunet_testbed_mpi_spawn.c | 393 |
1 files changed, 336 insertions, 57 deletions
diff --git a/src/testbed/gnunet_testbed_mpi_spawn.c b/src/testbed/gnunet_testbed_mpi_spawn.c index cb382b987..05b42a531 100644 --- a/src/testbed/gnunet_testbed_mpi_spawn.c +++ b/src/testbed/gnunet_testbed_mpi_spawn.c | |||
@@ -1,12 +1,13 @@ | |||
1 | #include "platform.h" | 1 | #include "platform.h" |
2 | #include "gnunet_util_lib.h" | 2 | #include "gnunet_util_lib.h" |
3 | #include <mpi.h> | 3 | #include "gnunet_testbed_service.h" |
4 | |||
4 | 5 | ||
5 | /** | 6 | /** |
6 | * Generic logging shorthand | 7 | * Generic logging shorthand |
7 | */ | 8 | */ |
8 | #define LOG(kind,...) \ | 9 | #define LOG(kind,...) \ |
9 | fprintf (stderr, __VA_ARGS__) | 10 | GNUNET_log (kind, __VA_ARGS__) |
10 | 11 | ||
11 | /** | 12 | /** |
12 | * Debug logging shorthand | 13 | * Debug logging shorthand |
@@ -19,6 +20,317 @@ | |||
19 | */ | 20 | */ |
20 | static int ret; | 21 | static int ret; |
21 | 22 | ||
23 | /** | ||
24 | * The child process we spawn | ||
25 | */ | ||
26 | static struct GNUNET_OS_Process *child; | ||
27 | |||
28 | /** | ||
29 | * The arguments including the binary to spawn | ||
30 | */ | ||
31 | static char **argv2; | ||
32 | |||
33 | /** | ||
34 | * All our IP addresses | ||
35 | */ | ||
36 | static char **our_addrs; | ||
37 | |||
38 | /** | ||
39 | * Pipe used to communicate shutdown via signal. | ||
40 | */ | ||
41 | static struct GNUNET_DISK_PipeHandle *sigpipe; | ||
42 | |||
43 | /** | ||
44 | * Filename of the unique file | ||
45 | */ | ||
46 | static char *fn; | ||
47 | |||
48 | /** | ||
49 | * Handle to the unique file | ||
50 | */ | ||
51 | static int fh; | ||
52 | |||
53 | /** | ||
54 | * The return code of the binary | ||
55 | */ | ||
56 | static unsigned long child_exit_code; | ||
57 | |||
58 | /** | ||
59 | * The process status of the child | ||
60 | */ | ||
61 | static enum GNUNET_OS_ProcessStatusType child_status; | ||
62 | |||
63 | /** | ||
64 | * how many IP addresses are currently assigned to us | ||
65 | */ | ||
66 | static unsigned int num_addrs; | ||
67 | |||
68 | /** | ||
69 | * The shutdown task | ||
70 | */ | ||
71 | static GNUNET_SCHEDULER_TaskIdentifier shutdown_task_id; | ||
72 | |||
73 | /** | ||
74 | * Task to kill the child | ||
75 | */ | ||
76 | static GNUNET_SCHEDULER_TaskIdentifier terminate_task_id; | ||
77 | |||
78 | /** | ||
79 | * Task to kill the child | ||
80 | */ | ||
81 | static GNUNET_SCHEDULER_TaskIdentifier child_death_task_id; | ||
82 | |||
83 | /** | ||
84 | * The shutdown task | ||
85 | */ | ||
86 | static void | ||
87 | shutdown_task (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc) | ||
88 | { | ||
89 | shutdown_task_id = GNUNET_SCHEDULER_NO_TASK; | ||
90 | if (0 != child_exit_code) | ||
91 | { | ||
92 | LOG (GNUNET_ERROR_TYPE_WARNING, "Child exited with error code: %lu\n", | ||
93 | child_exit_code); | ||
94 | ret = 128 + (int) child_exit_code; | ||
95 | } | ||
96 | if (0 != fh) | ||
97 | { | ||
98 | close (fh); | ||
99 | } | ||
100 | if ((NULL != fn) && (0 != unlink (fn))) | ||
101 | { | ||
102 | GNUNET_log_strerror (GNUNET_ERROR_TYPE_ERROR, "open"); | ||
103 | ret = GNUNET_SYSERR; | ||
104 | } | ||
105 | } | ||
106 | |||
107 | |||
108 | static void | ||
109 | terminate_task (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc) | ||
110 | { | ||
111 | static int hard_kill; | ||
112 | |||
113 | GNUNET_assert (NULL != child); | ||
114 | terminate_task_id = | ||
115 | GNUNET_SCHEDULER_add_delayed (GNUNET_TIME_UNIT_FOREVER_REL, | ||
116 | &terminate_task, NULL); | ||
117 | if (0 != hard_kill) | ||
118 | { | ||
119 | switch (hard_kill) | ||
120 | { | ||
121 | case 1: | ||
122 | case 2: | ||
123 | LOG (GNUNET_ERROR_TYPE_WARNING, | ||
124 | "%d more interrupts needed to send SIGKILL to the child\n", | ||
125 | 3 - hard_kill); | ||
126 | hard_kill++; | ||
127 | return; | ||
128 | case 3: | ||
129 | GNUNET_break (0 == GNUNET_OS_process_kill (child, SIGKILL)); | ||
130 | return; | ||
131 | } | ||
132 | } | ||
133 | hard_kill++; | ||
134 | GNUNET_break (0 == GNUNET_OS_process_kill (child, SIGTERM)); | ||
135 | LOG (GNUNET_ERROR_TYPE_INFO, _("Waiting for child to exit.\n")); | ||
136 | } | ||
137 | |||
138 | |||
139 | /** | ||
140 | * Task triggered whenever we receive a SIGCHLD (child | ||
141 | * process died). | ||
142 | * | ||
143 | * @param cls closure, NULL if we need to self-restart | ||
144 | * @param tc context | ||
145 | */ | ||
146 | static void | ||
147 | child_death_task (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc) | ||
148 | { | ||
149 | const struct GNUNET_DISK_FileHandle *pr; | ||
150 | char c[16]; | ||
151 | |||
152 | pr = GNUNET_DISK_pipe_handle (sigpipe, GNUNET_DISK_PIPE_END_READ); | ||
153 | child_death_task_id = GNUNET_SCHEDULER_NO_TASK; | ||
154 | if (0 == (tc->reason & GNUNET_SCHEDULER_REASON_READ_READY)) | ||
155 | { | ||
156 | child_death_task_id = | ||
157 | GNUNET_SCHEDULER_add_read_file (GNUNET_TIME_UNIT_FOREVER_REL, | ||
158 | pr, &child_death_task, NULL); | ||
159 | return; | ||
160 | } | ||
161 | /* consume the signal */ | ||
162 | GNUNET_break (0 < GNUNET_DISK_file_read (pr, &c, sizeof (c))); | ||
163 | LOG_DEBUG ("Child died\n"); | ||
164 | GNUNET_SCHEDULER_cancel (terminate_task_id); | ||
165 | terminate_task_id = GNUNET_SCHEDULER_NO_TASK; | ||
166 | GNUNET_assert (GNUNET_OK == GNUNET_OS_process_status (child, &child_status, | ||
167 | &child_exit_code)); | ||
168 | GNUNET_OS_process_destroy (child); | ||
169 | child = NULL; | ||
170 | shutdown_task_id = GNUNET_SCHEDULER_add_now (&shutdown_task, NULL); | ||
171 | } | ||
172 | |||
173 | |||
174 | /** | ||
175 | * Callback function invoked for each interface found. | ||
176 | * | ||
177 | * @param cls NULL | ||
178 | * @param name name of the interface (can be NULL for unknown) | ||
179 | * @param isDefault is this presumably the default interface | ||
180 | * @param addr address of this interface (can be NULL for unknown or unassigned) | ||
181 | * @param broadcast_addr the broadcast address (can be NULL for unknown or unassigned) | ||
182 | * @param netmask the network mask (can be NULL for unknown or unassigned)) | ||
183 | * @param addrlen length of the address | ||
184 | * @return GNUNET_OK to continue iteration, GNUNET_SYSERR to abort | ||
185 | */ | ||
186 | static int | ||
187 | addr_proc (void *cls, const char *name, int isDefault, | ||
188 | const struct sockaddr *addr, | ||
189 | const struct sockaddr *broadcast_addr, | ||
190 | const struct sockaddr *netmask, socklen_t addrlen) | ||
191 | { | ||
192 | const struct sockaddr_in *in_addr; | ||
193 | char *ipaddr; | ||
194 | |||
195 | if (sizeof (struct sockaddr_in) != addrlen) | ||
196 | return GNUNET_OK; | ||
197 | in_addr = (const struct sockaddr_in *) addr; | ||
198 | if (NULL == (ipaddr = inet_ntoa (in_addr->sin_addr))) | ||
199 | return GNUNET_OK; | ||
200 | GNUNET_array_append (our_addrs, num_addrs, GNUNET_strdup (ipaddr)); | ||
201 | return GNUNET_OK; | ||
202 | } | ||
203 | |||
204 | |||
205 | static void | ||
206 | destroy_hosts(struct GNUNET_TESTBED_Host **hosts, unsigned int nhosts) | ||
207 | { | ||
208 | unsigned int host; | ||
209 | |||
210 | GNUNET_assert (NULL != hosts); | ||
211 | for (host = 0; host < nhosts; host++) | ||
212 | if (NULL != hosts[host]) | ||
213 | GNUNET_TESTBED_host_destroy (hosts[host]); | ||
214 | GNUNET_free (hosts); | ||
215 | hosts = NULL; | ||
216 | } | ||
217 | |||
218 | |||
219 | /** | ||
220 | * The main scheduler run task | ||
221 | * | ||
222 | * @param cls NULL | ||
223 | * @param tc scheduler task context | ||
224 | */ | ||
225 | static void | ||
226 | run (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc) | ||
227 | { | ||
228 | struct GNUNET_TESTBED_Host **hosts; | ||
229 | const struct GNUNET_CONFIGURATION_Handle *null_cfg; | ||
230 | const char *host_ip; | ||
231 | char *tmpdir; | ||
232 | unsigned int nhosts; | ||
233 | unsigned int host_cnt; | ||
234 | unsigned int addr_cnt; | ||
235 | |||
236 | GNUNET_OS_network_interfaces_list (&addr_proc, NULL); | ||
237 | if (0 == num_addrs) | ||
238 | { | ||
239 | GNUNET_break (0); | ||
240 | ret = GNUNET_SYSERR; | ||
241 | return; | ||
242 | } | ||
243 | null_cfg = GNUNET_CONFIGURATION_create (); | ||
244 | nhosts = GNUNET_TESTBED_hosts_load_from_loadleveler (null_cfg, &hosts); | ||
245 | if (0 == nhosts) | ||
246 | { | ||
247 | GNUNET_break (0); | ||
248 | ret = GNUNET_SYSERR; | ||
249 | return; | ||
250 | } | ||
251 | for (host_cnt = 0; host_cnt < nhosts; host_cnt++) | ||
252 | { | ||
253 | host_ip = GNUNET_TESTBED_host_get_hostname (hosts[host_cnt]); | ||
254 | for (addr_cnt = 0; addr_cnt < num_addrs; addr_cnt++) | ||
255 | if (0 == strcmp (host_ip, our_addrs[addr_cnt])) | ||
256 | goto proceed; | ||
257 | } | ||
258 | GNUNET_break (0); | ||
259 | ret = GNUNET_SYSERR; | ||
260 | destroy_hosts (hosts, nhosts); | ||
261 | return; | ||
262 | |||
263 | proceed: | ||
264 | destroy_hosts (hosts, nhosts); | ||
265 | if (0 != host_cnt) | ||
266 | { | ||
267 | LOG_DEBUG ("Exiting as we are not the lowest host\n"); | ||
268 | ret = GNUNET_OK; | ||
269 | return; | ||
270 | } | ||
271 | tmpdir = getenv ("TMPDIR"); | ||
272 | if (NULL == tmpdir) | ||
273 | tmpdir = getenv ("TMP"); | ||
274 | if (NULL == tmpdir) | ||
275 | tmpdir = getenv ("TEMP"); | ||
276 | if (NULL == tmpdir) | ||
277 | tmpdir = "/tmp"; | ||
278 | (void) GNUNET_asprintf (&fn, "%s/gnunet-testbed-spawn.lock", tmpdir); | ||
279 | /* Open the unique file; we can create it then we can spawn the child process | ||
280 | else we exit */ | ||
281 | fh = open (fn, O_CREAT | O_EXCL | O_CLOEXEC, | ||
282 | S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP); | ||
283 | if (-1 == fh) | ||
284 | { | ||
285 | if (EEXIST == errno) | ||
286 | { | ||
287 | LOG_DEBUG ("Lock file already created by other process. Exiting\n"); | ||
288 | ret = GNUNET_OK; | ||
289 | return; | ||
290 | } | ||
291 | GNUNET_log_strerror (GNUNET_ERROR_TYPE_ERROR, "open"); | ||
292 | ret = GNUNET_SYSERR; | ||
293 | return; | ||
294 | } | ||
295 | /* Spawn the new process here */ | ||
296 | LOG (GNUNET_ERROR_TYPE_INFO, _("Spawning process `%s'\n"), argv2[0]); | ||
297 | child = GNUNET_OS_start_process_vap (GNUNET_NO, GNUNET_OS_INHERIT_STD_ALL, NULL, | ||
298 | NULL, | ||
299 | argv2[0], argv2); | ||
300 | if (NULL == child) | ||
301 | { | ||
302 | GNUNET_break (0); | ||
303 | ret = GNUNET_SYSERR; | ||
304 | return; | ||
305 | } | ||
306 | ret = GNUNET_OK; | ||
307 | terminate_task_id = | ||
308 | GNUNET_SCHEDULER_add_delayed (GNUNET_TIME_UNIT_FOREVER_REL, | ||
309 | &terminate_task, NULL); | ||
310 | child_death_task_id = | ||
311 | GNUNET_SCHEDULER_add_read_file (GNUNET_TIME_UNIT_FOREVER_REL, | ||
312 | GNUNET_DISK_pipe_handle (sigpipe, | ||
313 | GNUNET_DISK_PIPE_END_READ), | ||
314 | &child_death_task, NULL); | ||
315 | } | ||
316 | |||
317 | |||
318 | /** | ||
319 | * Signal handler called for SIGCHLD. | ||
320 | */ | ||
321 | static void | ||
322 | sighandler_child_death () | ||
323 | { | ||
324 | static char c; | ||
325 | int old_errno = errno; /* back-up errno */ | ||
326 | |||
327 | GNUNET_break (1 == | ||
328 | GNUNET_DISK_file_write (GNUNET_DISK_pipe_handle | ||
329 | (sigpipe, GNUNET_DISK_PIPE_END_WRITE), | ||
330 | &c, sizeof (c))); | ||
331 | errno = old_errno; /* restore errno */ | ||
332 | } | ||
333 | |||
22 | 334 | ||
23 | /** | 335 | /** |
24 | * Execution start point | 336 | * Execution start point |
@@ -26,77 +338,44 @@ static int ret; | |||
26 | int | 338 | int |
27 | main (int argc, char *argv[]) | 339 | main (int argc, char *argv[]) |
28 | { | 340 | { |
29 | static const struct GNUNET_GETOPT_CommandLineOption options[] = { | 341 | struct GNUNET_SIGNAL_Context *shc_chld; |
30 | GNUNET_GETOPT_OPTION_END | ||
31 | }; | ||
32 | struct GNUNET_OS_Process *proc; | ||
33 | char **argv2; | ||
34 | unsigned long code; | ||
35 | enum GNUNET_OS_ProcessStatusType proc_status; | ||
36 | int rank; | ||
37 | int chstat; | ||
38 | unsigned int host; | ||
39 | unsigned int cnt; | 342 | unsigned int cnt; |
40 | 343 | ||
41 | ret = -1; | 344 | ret = -1; |
42 | if (argc < 2) | 345 | if (argc < 2) |
43 | { | 346 | { |
44 | printf ("Need arguments: gnunet-testbed-mpi-spawn <cmd> <cmd_args>"); | 347 | printf ("Need arguments: gnunet-testbed-mpi-spawn <cmd> <cmd_args>"); |
45 | return 1; | 348 | return 1; |
46 | } | 349 | } |
47 | if (MPI_SUCCESS != MPI_Init (&argc, &argv)) | 350 | if (GNUNET_OK != GNUNET_log_setup ("gnunet-testbed-spawn", NULL, NULL)) |
48 | { | 351 | { |
49 | GNUNET_break (0); | 352 | GNUNET_break (0); |
50 | return 2; | 353 | return 1; |
51 | } | 354 | } |
52 | if (MPI_SUCCESS != MPI_Comm_rank (MPI_COMM_WORLD, &rank)) | 355 | if (NULL == (sigpipe = GNUNET_DISK_pipe (GNUNET_NO, GNUNET_NO, |
356 | GNUNET_NO, GNUNET_NO))) | ||
53 | { | 357 | { |
54 | GNUNET_break (0); | 358 | GNUNET_break (0); |
55 | ret = 3; | 359 | ret = GNUNET_SYSERR; |
56 | (void) MPI_Finalize (); | 360 | return 1; |
57 | goto end; | ||
58 | } | 361 | } |
59 | if (0 != rank) | 362 | shc_chld = |
363 | GNUNET_SIGNAL_handler_install (GNUNET_SIGCHLD, &sighandler_child_death); | ||
364 | if (NULL == shc_chld) | ||
60 | { | 365 | { |
61 | ret = 0; | 366 | LOG (GNUNET_ERROR_TYPE_ERROR, "Cannot install a signal handler\n"); |
62 | (void) MPI_Finalize (); | 367 | return 1; |
63 | goto end; | ||
64 | } | 368 | } |
65 | (void) MPI_Finalize (); | ||
66 | PRINTF ("Spawning process\n"); | ||
67 | argv2 = GNUNET_malloc (sizeof (char *) * argc); | 369 | argv2 = GNUNET_malloc (sizeof (char *) * argc); |
68 | for (cnt = 1; cnt < argc; cnt++) | 370 | for (cnt = 1; cnt < argc; cnt++) |
69 | argv2[cnt - 1] = argv[cnt]; | 371 | argv2[cnt - 1] = argv[cnt]; |
70 | proc = | 372 | GNUNET_SCHEDULER_run (run, NULL); |
71 | GNUNET_OS_start_process_vap (GNUNET_NO, GNUNET_OS_INHERIT_STD_ALL, NULL, | 373 | GNUNET_free (argv2); |
72 | NULL, argv2[0], argv2); | 374 | GNUNET_SIGNAL_handler_uninstall (shc_chld); |
73 | if (NULL == proc) | 375 | shc_chld = NULL; |
74 | { | 376 | GNUNET_DISK_pipe_close (sigpipe); |
75 | LOG (GNUNET_ERROR_TYPE_ERROR, "Cannot exec\n"); | 377 | GNUNET_free_non_null (fn); |
76 | ret = 5; | 378 | if (GNUNET_OK != ret) |
77 | goto end; | 379 | return ret; |
78 | } | 380 | return 0; |
79 | do | ||
80 | { | ||
81 | (void) sleep (1); | ||
82 | chstat = GNUNET_OS_process_status (proc, &proc_status, &code); | ||
83 | } | ||
84 | while (GNUNET_NO == chstat); | ||
85 | if (GNUNET_OK != chstat) | ||
86 | { | ||
87 | ret = 6; | ||
88 | goto end; | ||
89 | } | ||
90 | if (0 != code) | ||
91 | { | ||
92 | LOG (GNUNET_ERROR_TYPE_WARNING, "Child terminated abnormally\n"); | ||
93 | ret = 50 + (int) code; | ||
94 | goto end; | ||
95 | } | ||
96 | ret = 0; | ||
97 | |||
98 | end: | ||
99 | if (0 != ret) | ||
100 | LOG (GNUNET_ERROR_TYPE_ERROR, "Something went wrong. Error: %d\n", ret); | ||
101 | return ret; | ||
102 | } | 381 | } |