diff options
Diffstat (limited to 'src/service/fs/gnunet-helper-fs-publish.c')
-rw-r--r-- | src/service/fs/gnunet-helper-fs-publish.c | 579 |
1 files changed, 579 insertions, 0 deletions
diff --git a/src/service/fs/gnunet-helper-fs-publish.c b/src/service/fs/gnunet-helper-fs-publish.c new file mode 100644 index 000000000..0e07b79dc --- /dev/null +++ b/src/service/fs/gnunet-helper-fs-publish.c | |||
@@ -0,0 +1,579 @@ | |||
1 | /* | ||
2 | This file is part of GNUnet. | ||
3 | Copyright (C) 2012 GNUnet e.V. | ||
4 | |||
5 | GNUnet is free software: you can redistribute it and/or modify it | ||
6 | under the terms of the GNU Affero General Public License as published | ||
7 | by the Free Software Foundation, either version 3 of the License, | ||
8 | or (at your option) any later version. | ||
9 | |||
10 | GNUnet is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | Affero General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU Affero General Public License | ||
16 | along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | |||
18 | SPDX-License-Identifier: AGPL3.0-or-later | ||
19 | */ | ||
20 | |||
21 | /** | ||
22 | * @file src/fs/gnunet-helper-fs-publish.c | ||
23 | * @brief Tool to help extract meta data asynchronously | ||
24 | * @author Christian Grothoff | ||
25 | * | ||
26 | * This program will scan a directory for files with meta data | ||
27 | * and report the results to stdout. | ||
28 | */ | ||
29 | #include "platform.h" | ||
30 | |||
31 | #include "gnunet_fs_service.h" | ||
32 | |||
33 | |||
34 | /** | ||
35 | * A node of a directory tree. | ||
36 | */ | ||
37 | struct ScanTreeNode | ||
38 | { | ||
39 | /** | ||
40 | * This is a doubly-linked list | ||
41 | */ | ||
42 | struct ScanTreeNode *next; | ||
43 | |||
44 | /** | ||
45 | * This is a doubly-linked list | ||
46 | */ | ||
47 | struct ScanTreeNode *prev; | ||
48 | |||
49 | /** | ||
50 | * Parent of this node, NULL for top-level entries. | ||
51 | */ | ||
52 | struct ScanTreeNode *parent; | ||
53 | |||
54 | /** | ||
55 | * This is a doubly-linked tree | ||
56 | * NULL for files and empty directories | ||
57 | */ | ||
58 | struct ScanTreeNode *children_head; | ||
59 | |||
60 | /** | ||
61 | * This is a doubly-linked tree | ||
62 | * NULL for files and empty directories | ||
63 | */ | ||
64 | struct ScanTreeNode *children_tail; | ||
65 | |||
66 | /** | ||
67 | * Name of the file/directory | ||
68 | */ | ||
69 | char *filename; | ||
70 | |||
71 | /** | ||
72 | * Size of the file (if it is a file), in bytes. | ||
73 | * At the moment it is set to 0 for directories. | ||
74 | */ | ||
75 | uint64_t file_size; | ||
76 | |||
77 | /** | ||
78 | * #GNUNET_YES if this is a directory | ||
79 | */ | ||
80 | int is_directory; | ||
81 | }; | ||
82 | |||
83 | |||
84 | #if HAVE_LIBEXTRACTOR | ||
85 | /** | ||
86 | * List of libextractor plugins to use for extracting. | ||
87 | */ | ||
88 | static struct EXTRACTOR_PluginList *plugins; | ||
89 | #endif | ||
90 | |||
91 | /** | ||
92 | * File descriptor we use for IPC with the parent. | ||
93 | */ | ||
94 | static int output_stream; | ||
95 | |||
96 | |||
97 | #if HAVE_LIBEXTRACTOR | ||
98 | /** | ||
99 | * Add meta data that libextractor finds to our meta data | ||
100 | * container. | ||
101 | * | ||
102 | * @param cls closure, our meta data container | ||
103 | * @param plugin_name name of the plugin that produced this value; | ||
104 | * special values can be used (e.g. '<zlib>' for zlib being | ||
105 | * used in the main libextractor library and yielding | ||
106 | * meta data). | ||
107 | * @param type libextractor-type describing the meta data | ||
108 | * @param format basic format information about data | ||
109 | * @param data_mime_type mime-type of data (not of the original file); | ||
110 | * can be NULL (if mime-type is not known) | ||
111 | * @param data actual meta-data found | ||
112 | * @param data_len number of bytes in @a data | ||
113 | * @return always 0 to continue extracting | ||
114 | */ | ||
115 | static int | ||
116 | add_to_md (void *cls, | ||
117 | const char *plugin_name, | ||
118 | enum EXTRACTOR_MetaType type, | ||
119 | enum EXTRACTOR_MetaFormat format, | ||
120 | const char *data_mime_type, | ||
121 | const char *data, | ||
122 | size_t data_len) | ||
123 | { | ||
124 | struct GNUNET_FS_MetaData *md = cls; | ||
125 | |||
126 | if (((EXTRACTOR_METAFORMAT_UTF8 == format) || | ||
127 | (EXTRACTOR_METAFORMAT_C_STRING == format)) && | ||
128 | ('\0' != data[data_len - 1])) | ||
129 | { | ||
130 | char zdata[data_len + 1]; | ||
131 | GNUNET_memcpy (zdata, data, data_len); | ||
132 | zdata[data_len] = '\0'; | ||
133 | (void) GNUNET_FS_meta_data_insert (md, | ||
134 | plugin_name, | ||
135 | type, | ||
136 | format, | ||
137 | data_mime_type, | ||
138 | zdata, | ||
139 | data_len + 1); | ||
140 | } | ||
141 | else | ||
142 | { | ||
143 | (void) GNUNET_FS_meta_data_insert (md, | ||
144 | plugin_name, | ||
145 | type, | ||
146 | format, | ||
147 | data_mime_type, | ||
148 | data, | ||
149 | data_len); | ||
150 | } | ||
151 | return 0; | ||
152 | } | ||
153 | |||
154 | |||
155 | #endif | ||
156 | |||
157 | |||
158 | /** | ||
159 | * Free memory of the @a tree structure | ||
160 | * | ||
161 | * @param tree tree to free | ||
162 | */ | ||
163 | static void | ||
164 | free_tree (struct ScanTreeNode *tree) | ||
165 | { | ||
166 | struct ScanTreeNode *pos; | ||
167 | |||
168 | while (NULL != (pos = tree->children_head)) | ||
169 | free_tree (pos); | ||
170 | if (NULL != tree->parent) | ||
171 | GNUNET_CONTAINER_DLL_remove (tree->parent->children_head, | ||
172 | tree->parent->children_tail, | ||
173 | tree); | ||
174 | GNUNET_free (tree->filename); | ||
175 | GNUNET_free (tree); | ||
176 | } | ||
177 | |||
178 | |||
179 | /** | ||
180 | * Write @a size bytes from @a buf into the #output_stream. | ||
181 | * | ||
182 | * @param buf buffer with data to write | ||
183 | * @param size number of bytes to write | ||
184 | * @return #GNUNET_OK on success, #GNUNET_SYSERR on error | ||
185 | */ | ||
186 | static int | ||
187 | write_all (const void *buf, size_t size) | ||
188 | { | ||
189 | const char *cbuf = buf; | ||
190 | size_t total; | ||
191 | ssize_t wr; | ||
192 | |||
193 | total = 0; | ||
194 | do | ||
195 | { | ||
196 | wr = write (output_stream, &cbuf[total], size - total); | ||
197 | if (wr > 0) | ||
198 | total += wr; | ||
199 | } | ||
200 | while ((wr > 0) && (total < size)); | ||
201 | if (wr <= 0) | ||
202 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, | ||
203 | "Failed to write to stdout: %s\n", | ||
204 | strerror (errno)); | ||
205 | return (total == size) ? GNUNET_OK : GNUNET_SYSERR; | ||
206 | } | ||
207 | |||
208 | |||
209 | /** | ||
210 | * Write message to the master process. | ||
211 | * | ||
212 | * @param message_type message type to use | ||
213 | * @param data data to append, NULL for none | ||
214 | * @param data_length number of bytes in @a data | ||
215 | * @return #GNUNET_SYSERR to stop scanning (the pipe was broken somehow) | ||
216 | */ | ||
217 | static int | ||
218 | write_message (uint16_t message_type, const char *data, size_t data_length) | ||
219 | { | ||
220 | struct GNUNET_MessageHeader hdr; | ||
221 | |||
222 | #if 0 | ||
223 | fprintf (stderr, | ||
224 | "Helper sends %u-byte message of type %u\n", | ||
225 | (unsigned int) (sizeof(struct GNUNET_MessageHeader) + data_length), | ||
226 | (unsigned int) message_type); | ||
227 | #endif | ||
228 | hdr.type = htons (message_type); | ||
229 | hdr.size = htons (sizeof(struct GNUNET_MessageHeader) + data_length); | ||
230 | if ((GNUNET_OK != write_all (&hdr, sizeof(hdr))) || | ||
231 | (GNUNET_OK != write_all (data, data_length))) | ||
232 | return GNUNET_SYSERR; | ||
233 | return GNUNET_OK; | ||
234 | } | ||
235 | |||
236 | |||
237 | /** | ||
238 | * Function called to (recursively) add all of the files in the | ||
239 | * directory to the tree. Called by the directory scanner to initiate | ||
240 | * the scan. Does NOT yet add any metadata. | ||
241 | * | ||
242 | * @param filename file or directory to scan | ||
243 | * @param dst where to store the resulting share tree item; | ||
244 | * NULL is stored in @a dst upon recoverable errors (#GNUNET_OK is returned) | ||
245 | * @return #GNUNET_OK on success, #GNUNET_SYSERR on error | ||
246 | */ | ||
247 | static int | ||
248 | preprocess_file (const char *filename, struct ScanTreeNode **dst); | ||
249 | |||
250 | |||
251 | /** | ||
252 | * Closure for the 'scan_callback' | ||
253 | */ | ||
254 | struct RecursionContext | ||
255 | { | ||
256 | /** | ||
257 | * Parent to add the files to. | ||
258 | */ | ||
259 | struct ScanTreeNode *parent; | ||
260 | |||
261 | /** | ||
262 | * Flag to set to GNUNET_YES on serious errors. | ||
263 | */ | ||
264 | int stop; | ||
265 | }; | ||
266 | |||
267 | |||
268 | /** | ||
269 | * Function called by the directory iterator to (recursively) add all | ||
270 | * of the files in the directory to the tree. Called by the directory | ||
271 | * scanner to initiate the scan. Does NOT yet add any metadata. | ||
272 | * | ||
273 | * @param cls the `struct RecursionContext` | ||
274 | * @param filename file or directory to scan | ||
275 | * @return #GNUNET_OK on success, #GNUNET_SYSERR on error | ||
276 | */ | ||
277 | static int | ||
278 | scan_callback (void *cls, const char *filename) | ||
279 | { | ||
280 | struct RecursionContext *rc = cls; | ||
281 | struct ScanTreeNode *chld; | ||
282 | |||
283 | if (GNUNET_OK != preprocess_file (filename, &chld)) | ||
284 | { | ||
285 | rc->stop = GNUNET_YES; | ||
286 | return GNUNET_SYSERR; | ||
287 | } | ||
288 | if (NULL == chld) | ||
289 | return GNUNET_OK; | ||
290 | chld->parent = rc->parent; | ||
291 | GNUNET_CONTAINER_DLL_insert (rc->parent->children_head, | ||
292 | rc->parent->children_tail, | ||
293 | chld); | ||
294 | return GNUNET_OK; | ||
295 | } | ||
296 | |||
297 | |||
298 | /** | ||
299 | * Function called to (recursively) add all of the files in the | ||
300 | * directory to the tree. Called by the directory scanner to initiate | ||
301 | * the scan. Does NOT yet add any metadata. | ||
302 | * | ||
303 | * @param filename file or directory to scan | ||
304 | * @param dst where to store the resulting share tree item; | ||
305 | * NULL is stored in @a dst upon recoverable errors (#GNUNET_OK is returned) | ||
306 | * @return #GNUNET_OK on success, #GNUNET_SYSERR on error | ||
307 | */ | ||
308 | static int | ||
309 | preprocess_file (const char *filename, struct ScanTreeNode **dst) | ||
310 | { | ||
311 | struct ScanTreeNode *item; | ||
312 | struct stat sbuf; | ||
313 | uint64_t fsize = 0; | ||
314 | |||
315 | if ((0 != stat (filename, &sbuf)) || | ||
316 | ((! S_ISDIR (sbuf.st_mode)) && | ||
317 | (GNUNET_OK != | ||
318 | GNUNET_DISK_file_size (filename, &fsize, GNUNET_NO, GNUNET_YES)))) | ||
319 | { | ||
320 | /* If the file doesn't exist (or is not stat-able for any other reason) | ||
321 | skip it (but report it), but do continue. */ | ||
322 | if (GNUNET_OK != | ||
323 | write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_SKIP_FILE, | ||
324 | filename, | ||
325 | strlen (filename) + 1)) | ||
326 | return GNUNET_SYSERR; | ||
327 | /* recoverable error, store 'NULL' in *dst */ | ||
328 | *dst = NULL; | ||
329 | return GNUNET_OK; | ||
330 | } | ||
331 | |||
332 | /* Report the progress */ | ||
333 | if ( | ||
334 | GNUNET_OK != | ||
335 | write_message (S_ISDIR (sbuf.st_mode) | ||
336 | ? GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_PROGRESS_DIRECTORY | ||
337 | : GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_PROGRESS_FILE, | ||
338 | filename, | ||
339 | strlen (filename) + 1)) | ||
340 | return GNUNET_SYSERR; | ||
341 | item = GNUNET_new (struct ScanTreeNode); | ||
342 | item->filename = GNUNET_strdup (filename); | ||
343 | item->is_directory = (S_ISDIR (sbuf.st_mode)) ? GNUNET_YES : GNUNET_NO; | ||
344 | item->file_size = fsize; | ||
345 | if (GNUNET_YES == item->is_directory) | ||
346 | { | ||
347 | struct RecursionContext rc; | ||
348 | |||
349 | rc.parent = item; | ||
350 | rc.stop = GNUNET_NO; | ||
351 | GNUNET_DISK_directory_scan (filename, &scan_callback, &rc); | ||
352 | if ( | ||
353 | (GNUNET_YES == rc.stop) || | ||
354 | (GNUNET_OK != | ||
355 | write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_PROGRESS_DIRECTORY, | ||
356 | "..", | ||
357 | 3))) | ||
358 | { | ||
359 | free_tree (item); | ||
360 | return GNUNET_SYSERR; | ||
361 | } | ||
362 | } | ||
363 | *dst = item; | ||
364 | return GNUNET_OK; | ||
365 | } | ||
366 | |||
367 | |||
368 | /** | ||
369 | * Extract metadata from files. | ||
370 | * | ||
371 | * @param item entry we are processing | ||
372 | * @return #GNUNET_OK on success, #GNUNET_SYSERR on fatal errors | ||
373 | */ | ||
374 | static int | ||
375 | extract_files (struct ScanTreeNode *item) | ||
376 | { | ||
377 | struct GNUNET_FS_MetaData *meta; | ||
378 | ssize_t size; | ||
379 | size_t slen; | ||
380 | |||
381 | if (GNUNET_YES == item->is_directory) | ||
382 | { | ||
383 | /* for directories, we simply only descent, no extraction, no | ||
384 | progress reporting */ | ||
385 | struct ScanTreeNode *pos; | ||
386 | |||
387 | for (pos = item->children_head; NULL != pos; pos = pos->next) | ||
388 | if (GNUNET_OK != extract_files (pos)) | ||
389 | return GNUNET_SYSERR; | ||
390 | return GNUNET_OK; | ||
391 | } | ||
392 | |||
393 | /* this is the expensive operation, *afterwards* we'll check for aborts */ | ||
394 | meta = GNUNET_FS_meta_data_create (); | ||
395 | #if HAVE_LIBEXTRACTOR | ||
396 | EXTRACTOR_extract (plugins, item->filename, NULL, 0, &add_to_md, meta); | ||
397 | #endif | ||
398 | slen = strlen (item->filename) + 1; | ||
399 | size = GNUNET_FS_meta_data_get_serialized_size (meta); | ||
400 | if (-1 == size) | ||
401 | { | ||
402 | /* no meta data */ | ||
403 | GNUNET_FS_meta_data_destroy (meta); | ||
404 | if (GNUNET_OK != | ||
405 | write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_META_DATA, | ||
406 | item->filename, | ||
407 | slen)) | ||
408 | return GNUNET_SYSERR; | ||
409 | return GNUNET_OK; | ||
410 | } | ||
411 | else if (size > (UINT16_MAX - sizeof(struct GNUNET_MessageHeader) - slen)) | ||
412 | { | ||
413 | /* We can't transfer more than 64k bytes in one message. */ | ||
414 | size = UINT16_MAX - sizeof(struct GNUNET_MessageHeader) - slen; | ||
415 | } | ||
416 | { | ||
417 | char buf[size + slen]; | ||
418 | char *dst = &buf[slen]; | ||
419 | |||
420 | GNUNET_memcpy (buf, item->filename, slen); | ||
421 | size = GNUNET_FS_meta_data_serialize ( | ||
422 | meta, | ||
423 | &dst, | ||
424 | size, | ||
425 | GNUNET_FS_META_DATA_SERIALIZE_PART); | ||
426 | if (size < 0) | ||
427 | { | ||
428 | GNUNET_break (0); | ||
429 | size = 0; | ||
430 | } | ||
431 | GNUNET_FS_meta_data_destroy (meta); | ||
432 | if (GNUNET_OK != | ||
433 | write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_META_DATA, | ||
434 | buf, | ||
435 | slen + size)) | ||
436 | return GNUNET_SYSERR; | ||
437 | } | ||
438 | return GNUNET_OK; | ||
439 | } | ||
440 | |||
441 | |||
442 | /** | ||
443 | * Install a signal handler to ignore SIGPIPE. | ||
444 | */ | ||
445 | static void | ||
446 | ignore_sigpipe () | ||
447 | { | ||
448 | struct sigaction oldsig; | ||
449 | struct sigaction sig; | ||
450 | |||
451 | memset (&sig, 0, sizeof(struct sigaction)); | ||
452 | sig.sa_handler = SIG_IGN; | ||
453 | sigemptyset (&sig.sa_mask); | ||
454 | #ifdef SA_INTERRUPT | ||
455 | sig.sa_flags = SA_INTERRUPT; /* SunOS */ | ||
456 | #else | ||
457 | sig.sa_flags = SA_RESTART; | ||
458 | #endif | ||
459 | if (0 != sigaction (SIGPIPE, &sig, &oldsig)) | ||
460 | fprintf (stderr, | ||
461 | "Failed to install SIGPIPE handler: %s\n", | ||
462 | strerror (errno)); | ||
463 | } | ||
464 | |||
465 | |||
466 | /** | ||
467 | * Turn the given file descriptor in to '/dev/null'. | ||
468 | * | ||
469 | * @param fd fd to bind to /dev/null | ||
470 | * @param flags flags to use (O_RDONLY or O_WRONLY) | ||
471 | */ | ||
472 | static void | ||
473 | make_dev_zero (int fd, int flags) | ||
474 | { | ||
475 | int z; | ||
476 | |||
477 | GNUNET_assert (0 == close (fd)); | ||
478 | z = open ("/dev/null", flags); | ||
479 | GNUNET_assert (-1 != z); | ||
480 | if (z == fd) | ||
481 | return; | ||
482 | GNUNET_break (fd == dup2 (z, fd)); | ||
483 | GNUNET_assert (0 == close (z)); | ||
484 | } | ||
485 | |||
486 | |||
487 | /** | ||
488 | * Main function of the helper process to extract meta data. | ||
489 | * | ||
490 | * @param argc should be 3 | ||
491 | * @param argv [0] our binary name | ||
492 | * [1] name of the file or directory to process | ||
493 | * [2] "-" to disable extraction, NULL for defaults, | ||
494 | * otherwise custom plugins to load from LE | ||
495 | * @return 0 on success | ||
496 | */ | ||
497 | int | ||
498 | main (int argc, char *const *argv) | ||
499 | { | ||
500 | const char *filename_expanded; | ||
501 | const char *ex; | ||
502 | struct ScanTreeNode *root; | ||
503 | |||
504 | ignore_sigpipe (); | ||
505 | /* move stdout to some other FD for IPC, bind | ||
506 | stdout/stderr to /dev/null */ | ||
507 | output_stream = dup (1); | ||
508 | make_dev_zero (1, O_WRONLY); | ||
509 | make_dev_zero (2, O_WRONLY); | ||
510 | |||
511 | /* parse command line */ | ||
512 | if ((3 != argc) && (2 != argc)) | ||
513 | { | ||
514 | fprintf (stderr, | ||
515 | "%s", | ||
516 | "gnunet-helper-fs-publish needs exactly one or two arguments\n"); | ||
517 | return 1; | ||
518 | } | ||
519 | filename_expanded = argv[1]; | ||
520 | ex = argv[2]; | ||
521 | if ((NULL == ex) || (0 != strcmp (ex, "-"))) | ||
522 | { | ||
523 | #if HAVE_LIBEXTRACTOR | ||
524 | plugins = EXTRACTOR_plugin_add_defaults (EXTRACTOR_OPTION_DEFAULT_POLICY); | ||
525 | if (NULL != ex) | ||
526 | plugins = EXTRACTOR_plugin_add_config (plugins, | ||
527 | ex, | ||
528 | EXTRACTOR_OPTION_DEFAULT_POLICY); | ||
529 | #endif | ||
530 | } | ||
531 | |||
532 | /* scan tree to find out how much work there is to be done */ | ||
533 | if (GNUNET_OK != preprocess_file (filename_expanded, &root)) | ||
534 | { | ||
535 | (void) write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_ERROR, NULL, 0); | ||
536 | #if HAVE_LIBEXTRACTOR | ||
537 | EXTRACTOR_plugin_remove_all (plugins); | ||
538 | #endif | ||
539 | return 2; | ||
540 | } | ||
541 | /* signal that we're done counting files, so that a percentage of | ||
542 | progress can now be calculated */ | ||
543 | if (GNUNET_OK != | ||
544 | write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_COUNTING_DONE, | ||
545 | NULL, | ||
546 | 0)) | ||
547 | { | ||
548 | #if HAVE_LIBEXTRACTOR | ||
549 | EXTRACTOR_plugin_remove_all (plugins); | ||
550 | #endif | ||
551 | return 3; | ||
552 | } | ||
553 | if (NULL != root) | ||
554 | { | ||
555 | if (GNUNET_OK != extract_files (root)) | ||
556 | { | ||
557 | (void) write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_ERROR, | ||
558 | NULL, | ||
559 | 0); | ||
560 | free_tree (root); | ||
561 | #if HAVE_LIBEXTRACTOR | ||
562 | EXTRACTOR_plugin_remove_all (plugins); | ||
563 | #endif | ||
564 | return 4; | ||
565 | } | ||
566 | free_tree (root); | ||
567 | } | ||
568 | /* enable "clean" shutdown by telling parent that we are done */ | ||
569 | (void) write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_FINISHED, | ||
570 | NULL, | ||
571 | 0); | ||
572 | #if HAVE_LIBEXTRACTOR | ||
573 | EXTRACTOR_plugin_remove_all (plugins); | ||
574 | #endif | ||
575 | return 0; | ||
576 | } | ||
577 | |||
578 | |||
579 | /* end of gnunet-helper-fs-publish.c */ | ||