diff options
Diffstat (limited to 'src/fs/gnunet-helper-fs-publish.c')
-rw-r--r-- | src/fs/gnunet-helper-fs-publish.c | 578 |
1 files changed, 0 insertions, 578 deletions
diff --git a/src/fs/gnunet-helper-fs-publish.c b/src/fs/gnunet-helper-fs-publish.c deleted file mode 100644 index ef1a9ce4b..000000000 --- a/src/fs/gnunet-helper-fs-publish.c +++ /dev/null | |||
@@ -1,578 +0,0 @@ | |||
1 | /* | ||
2 | This file is part of GNUnet. | ||
3 | Copyright (C) 2012 GNUnet e.V. | ||
4 | |||
5 | GNUnet is free software: you can redistribute it and/or modify it | ||
6 | under the terms of the GNU Affero General Public License as published | ||
7 | by the Free Software Foundation, either version 3 of the License, | ||
8 | or (at your option) any later version. | ||
9 | |||
10 | GNUnet is distributed in the hope that it will be useful, but | ||
11 | WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
13 | Affero General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU Affero General Public License | ||
16 | along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
17 | |||
18 | SPDX-License-Identifier: AGPL3.0-or-later | ||
19 | */ | ||
20 | |||
21 | /** | ||
22 | * @file src/fs/gnunet-helper-fs-publish.c | ||
23 | * @brief Tool to help extract meta data asynchronously | ||
24 | * @author Christian Grothoff | ||
25 | * | ||
26 | * This program will scan a directory for files with meta data | ||
27 | * and report the results to stdout. | ||
28 | */ | ||
29 | #include "platform.h" | ||
30 | #include "gnunet_fs_service.h" | ||
31 | |||
32 | |||
33 | /** | ||
34 | * A node of a directory tree. | ||
35 | */ | ||
36 | struct ScanTreeNode | ||
37 | { | ||
38 | /** | ||
39 | * This is a doubly-linked list | ||
40 | */ | ||
41 | struct ScanTreeNode *next; | ||
42 | |||
43 | /** | ||
44 | * This is a doubly-linked list | ||
45 | */ | ||
46 | struct ScanTreeNode *prev; | ||
47 | |||
48 | /** | ||
49 | * Parent of this node, NULL for top-level entries. | ||
50 | */ | ||
51 | struct ScanTreeNode *parent; | ||
52 | |||
53 | /** | ||
54 | * This is a doubly-linked tree | ||
55 | * NULL for files and empty directories | ||
56 | */ | ||
57 | struct ScanTreeNode *children_head; | ||
58 | |||
59 | /** | ||
60 | * This is a doubly-linked tree | ||
61 | * NULL for files and empty directories | ||
62 | */ | ||
63 | struct ScanTreeNode *children_tail; | ||
64 | |||
65 | /** | ||
66 | * Name of the file/directory | ||
67 | */ | ||
68 | char *filename; | ||
69 | |||
70 | /** | ||
71 | * Size of the file (if it is a file), in bytes. | ||
72 | * At the moment it is set to 0 for directories. | ||
73 | */ | ||
74 | uint64_t file_size; | ||
75 | |||
76 | /** | ||
77 | * #GNUNET_YES if this is a directory | ||
78 | */ | ||
79 | int is_directory; | ||
80 | }; | ||
81 | |||
82 | |||
83 | #if HAVE_LIBEXTRACTOR | ||
84 | /** | ||
85 | * List of libextractor plugins to use for extracting. | ||
86 | */ | ||
87 | static struct EXTRACTOR_PluginList *plugins; | ||
88 | #endif | ||
89 | |||
90 | /** | ||
91 | * File descriptor we use for IPC with the parent. | ||
92 | */ | ||
93 | static int output_stream; | ||
94 | |||
95 | |||
96 | #if HAVE_LIBEXTRACTOR | ||
97 | /** | ||
98 | * Add meta data that libextractor finds to our meta data | ||
99 | * container. | ||
100 | * | ||
101 | * @param cls closure, our meta data container | ||
102 | * @param plugin_name name of the plugin that produced this value; | ||
103 | * special values can be used (e.g. '<zlib>' for zlib being | ||
104 | * used in the main libextractor library and yielding | ||
105 | * meta data). | ||
106 | * @param type libextractor-type describing the meta data | ||
107 | * @param format basic format information about data | ||
108 | * @param data_mime_type mime-type of data (not of the original file); | ||
109 | * can be NULL (if mime-type is not known) | ||
110 | * @param data actual meta-data found | ||
111 | * @param data_len number of bytes in @a data | ||
112 | * @return always 0 to continue extracting | ||
113 | */ | ||
114 | static int | ||
115 | add_to_md (void *cls, | ||
116 | const char *plugin_name, | ||
117 | enum EXTRACTOR_MetaType type, | ||
118 | enum EXTRACTOR_MetaFormat format, | ||
119 | const char *data_mime_type, | ||
120 | const char *data, | ||
121 | size_t data_len) | ||
122 | { | ||
123 | struct GNUNET_CONTAINER_MetaData *md = cls; | ||
124 | |||
125 | if (((EXTRACTOR_METAFORMAT_UTF8 == format) || | ||
126 | (EXTRACTOR_METAFORMAT_C_STRING == format)) && | ||
127 | ('\0' != data[data_len - 1])) | ||
128 | { | ||
129 | char zdata[data_len + 1]; | ||
130 | GNUNET_memcpy (zdata, data, data_len); | ||
131 | zdata[data_len] = '\0'; | ||
132 | (void) GNUNET_CONTAINER_meta_data_insert (md, | ||
133 | plugin_name, | ||
134 | type, | ||
135 | format, | ||
136 | data_mime_type, | ||
137 | zdata, | ||
138 | data_len + 1); | ||
139 | } | ||
140 | else | ||
141 | { | ||
142 | (void) GNUNET_CONTAINER_meta_data_insert (md, | ||
143 | plugin_name, | ||
144 | type, | ||
145 | format, | ||
146 | data_mime_type, | ||
147 | data, | ||
148 | data_len); | ||
149 | } | ||
150 | return 0; | ||
151 | } | ||
152 | |||
153 | |||
154 | #endif | ||
155 | |||
156 | |||
157 | /** | ||
158 | * Free memory of the @a tree structure | ||
159 | * | ||
160 | * @param tree tree to free | ||
161 | */ | ||
162 | static void | ||
163 | free_tree (struct ScanTreeNode *tree) | ||
164 | { | ||
165 | struct ScanTreeNode *pos; | ||
166 | |||
167 | while (NULL != (pos = tree->children_head)) | ||
168 | free_tree (pos); | ||
169 | if (NULL != tree->parent) | ||
170 | GNUNET_CONTAINER_DLL_remove (tree->parent->children_head, | ||
171 | tree->parent->children_tail, | ||
172 | tree); | ||
173 | GNUNET_free (tree->filename); | ||
174 | GNUNET_free (tree); | ||
175 | } | ||
176 | |||
177 | |||
178 | /** | ||
179 | * Write @a size bytes from @a buf into the #output_stream. | ||
180 | * | ||
181 | * @param buf buffer with data to write | ||
182 | * @param size number of bytes to write | ||
183 | * @return #GNUNET_OK on success, #GNUNET_SYSERR on error | ||
184 | */ | ||
185 | static int | ||
186 | write_all (const void *buf, size_t size) | ||
187 | { | ||
188 | const char *cbuf = buf; | ||
189 | size_t total; | ||
190 | ssize_t wr; | ||
191 | |||
192 | total = 0; | ||
193 | do | ||
194 | { | ||
195 | wr = write (output_stream, &cbuf[total], size - total); | ||
196 | if (wr > 0) | ||
197 | total += wr; | ||
198 | } | ||
199 | while ((wr > 0) && (total < size)); | ||
200 | if (wr <= 0) | ||
201 | GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, | ||
202 | "Failed to write to stdout: %s\n", | ||
203 | strerror (errno)); | ||
204 | return (total == size) ? GNUNET_OK : GNUNET_SYSERR; | ||
205 | } | ||
206 | |||
207 | |||
208 | /** | ||
209 | * Write message to the master process. | ||
210 | * | ||
211 | * @param message_type message type to use | ||
212 | * @param data data to append, NULL for none | ||
213 | * @param data_length number of bytes in @a data | ||
214 | * @return #GNUNET_SYSERR to stop scanning (the pipe was broken somehow) | ||
215 | */ | ||
216 | static int | ||
217 | write_message (uint16_t message_type, const char *data, size_t data_length) | ||
218 | { | ||
219 | struct GNUNET_MessageHeader hdr; | ||
220 | |||
221 | #if 0 | ||
222 | fprintf (stderr, | ||
223 | "Helper sends %u-byte message of type %u\n", | ||
224 | (unsigned int) (sizeof(struct GNUNET_MessageHeader) + data_length), | ||
225 | (unsigned int) message_type); | ||
226 | #endif | ||
227 | hdr.type = htons (message_type); | ||
228 | hdr.size = htons (sizeof(struct GNUNET_MessageHeader) + data_length); | ||
229 | if ((GNUNET_OK != write_all (&hdr, sizeof(hdr))) || | ||
230 | (GNUNET_OK != write_all (data, data_length))) | ||
231 | return GNUNET_SYSERR; | ||
232 | return GNUNET_OK; | ||
233 | } | ||
234 | |||
235 | |||
236 | /** | ||
237 | * Function called to (recursively) add all of the files in the | ||
238 | * directory to the tree. Called by the directory scanner to initiate | ||
239 | * the scan. Does NOT yet add any metadata. | ||
240 | * | ||
241 | * @param filename file or directory to scan | ||
242 | * @param dst where to store the resulting share tree item; | ||
243 | * NULL is stored in @a dst upon recoverable errors (#GNUNET_OK is returned) | ||
244 | * @return #GNUNET_OK on success, #GNUNET_SYSERR on error | ||
245 | */ | ||
246 | static int | ||
247 | preprocess_file (const char *filename, struct ScanTreeNode **dst); | ||
248 | |||
249 | |||
250 | /** | ||
251 | * Closure for the 'scan_callback' | ||
252 | */ | ||
253 | struct RecursionContext | ||
254 | { | ||
255 | /** | ||
256 | * Parent to add the files to. | ||
257 | */ | ||
258 | struct ScanTreeNode *parent; | ||
259 | |||
260 | /** | ||
261 | * Flag to set to GNUNET_YES on serious errors. | ||
262 | */ | ||
263 | int stop; | ||
264 | }; | ||
265 | |||
266 | |||
267 | /** | ||
268 | * Function called by the directory iterator to (recursively) add all | ||
269 | * of the files in the directory to the tree. Called by the directory | ||
270 | * scanner to initiate the scan. Does NOT yet add any metadata. | ||
271 | * | ||
272 | * @param cls the `struct RecursionContext` | ||
273 | * @param filename file or directory to scan | ||
274 | * @return #GNUNET_OK on success, #GNUNET_SYSERR on error | ||
275 | */ | ||
276 | static int | ||
277 | scan_callback (void *cls, const char *filename) | ||
278 | { | ||
279 | struct RecursionContext *rc = cls; | ||
280 | struct ScanTreeNode *chld; | ||
281 | |||
282 | if (GNUNET_OK != preprocess_file (filename, &chld)) | ||
283 | { | ||
284 | rc->stop = GNUNET_YES; | ||
285 | return GNUNET_SYSERR; | ||
286 | } | ||
287 | if (NULL == chld) | ||
288 | return GNUNET_OK; | ||
289 | chld->parent = rc->parent; | ||
290 | GNUNET_CONTAINER_DLL_insert (rc->parent->children_head, | ||
291 | rc->parent->children_tail, | ||
292 | chld); | ||
293 | return GNUNET_OK; | ||
294 | } | ||
295 | |||
296 | |||
297 | /** | ||
298 | * Function called to (recursively) add all of the files in the | ||
299 | * directory to the tree. Called by the directory scanner to initiate | ||
300 | * the scan. Does NOT yet add any metadata. | ||
301 | * | ||
302 | * @param filename file or directory to scan | ||
303 | * @param dst where to store the resulting share tree item; | ||
304 | * NULL is stored in @a dst upon recoverable errors (#GNUNET_OK is returned) | ||
305 | * @return #GNUNET_OK on success, #GNUNET_SYSERR on error | ||
306 | */ | ||
307 | static int | ||
308 | preprocess_file (const char *filename, struct ScanTreeNode **dst) | ||
309 | { | ||
310 | struct ScanTreeNode *item; | ||
311 | struct stat sbuf; | ||
312 | uint64_t fsize = 0; | ||
313 | |||
314 | if ((0 != stat (filename, &sbuf)) || | ||
315 | ((! S_ISDIR (sbuf.st_mode)) && | ||
316 | (GNUNET_OK != | ||
317 | GNUNET_DISK_file_size (filename, &fsize, GNUNET_NO, GNUNET_YES)))) | ||
318 | { | ||
319 | /* If the file doesn't exist (or is not stat-able for any other reason) | ||
320 | skip it (but report it), but do continue. */ | ||
321 | if (GNUNET_OK != | ||
322 | write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_SKIP_FILE, | ||
323 | filename, | ||
324 | strlen (filename) + 1)) | ||
325 | return GNUNET_SYSERR; | ||
326 | /* recoverable error, store 'NULL' in *dst */ | ||
327 | *dst = NULL; | ||
328 | return GNUNET_OK; | ||
329 | } | ||
330 | |||
331 | /* Report the progress */ | ||
332 | if ( | ||
333 | GNUNET_OK != | ||
334 | write_message (S_ISDIR (sbuf.st_mode) | ||
335 | ? GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_PROGRESS_DIRECTORY | ||
336 | : GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_PROGRESS_FILE, | ||
337 | filename, | ||
338 | strlen (filename) + 1)) | ||
339 | return GNUNET_SYSERR; | ||
340 | item = GNUNET_new (struct ScanTreeNode); | ||
341 | item->filename = GNUNET_strdup (filename); | ||
342 | item->is_directory = (S_ISDIR (sbuf.st_mode)) ? GNUNET_YES : GNUNET_NO; | ||
343 | item->file_size = fsize; | ||
344 | if (GNUNET_YES == item->is_directory) | ||
345 | { | ||
346 | struct RecursionContext rc; | ||
347 | |||
348 | rc.parent = item; | ||
349 | rc.stop = GNUNET_NO; | ||
350 | GNUNET_DISK_directory_scan (filename, &scan_callback, &rc); | ||
351 | if ( | ||
352 | (GNUNET_YES == rc.stop) || | ||
353 | (GNUNET_OK != | ||
354 | write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_PROGRESS_DIRECTORY, | ||
355 | "..", | ||
356 | 3))) | ||
357 | { | ||
358 | free_tree (item); | ||
359 | return GNUNET_SYSERR; | ||
360 | } | ||
361 | } | ||
362 | *dst = item; | ||
363 | return GNUNET_OK; | ||
364 | } | ||
365 | |||
366 | |||
367 | /** | ||
368 | * Extract metadata from files. | ||
369 | * | ||
370 | * @param item entry we are processing | ||
371 | * @return #GNUNET_OK on success, #GNUNET_SYSERR on fatal errors | ||
372 | */ | ||
373 | static int | ||
374 | extract_files (struct ScanTreeNode *item) | ||
375 | { | ||
376 | struct GNUNET_CONTAINER_MetaData *meta; | ||
377 | ssize_t size; | ||
378 | size_t slen; | ||
379 | |||
380 | if (GNUNET_YES == item->is_directory) | ||
381 | { | ||
382 | /* for directories, we simply only descent, no extraction, no | ||
383 | progress reporting */ | ||
384 | struct ScanTreeNode *pos; | ||
385 | |||
386 | for (pos = item->children_head; NULL != pos; pos = pos->next) | ||
387 | if (GNUNET_OK != extract_files (pos)) | ||
388 | return GNUNET_SYSERR; | ||
389 | return GNUNET_OK; | ||
390 | } | ||
391 | |||
392 | /* this is the expensive operation, *afterwards* we'll check for aborts */ | ||
393 | meta = GNUNET_CONTAINER_meta_data_create (); | ||
394 | #if HAVE_LIBEXTRACTOR | ||
395 | EXTRACTOR_extract (plugins, item->filename, NULL, 0, &add_to_md, meta); | ||
396 | #endif | ||
397 | slen = strlen (item->filename) + 1; | ||
398 | size = GNUNET_CONTAINER_meta_data_get_serialized_size (meta); | ||
399 | if (-1 == size) | ||
400 | { | ||
401 | /* no meta data */ | ||
402 | GNUNET_CONTAINER_meta_data_destroy (meta); | ||
403 | if (GNUNET_OK != | ||
404 | write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_META_DATA, | ||
405 | item->filename, | ||
406 | slen)) | ||
407 | return GNUNET_SYSERR; | ||
408 | return GNUNET_OK; | ||
409 | } | ||
410 | else if (size > (UINT16_MAX - sizeof(struct GNUNET_MessageHeader) - slen)) | ||
411 | { | ||
412 | /* We can't transfer more than 64k bytes in one message. */ | ||
413 | size = UINT16_MAX - sizeof(struct GNUNET_MessageHeader) - slen; | ||
414 | } | ||
415 | { | ||
416 | char buf[size + slen]; | ||
417 | char *dst = &buf[slen]; | ||
418 | |||
419 | GNUNET_memcpy (buf, item->filename, slen); | ||
420 | size = GNUNET_CONTAINER_meta_data_serialize ( | ||
421 | meta, | ||
422 | &dst, | ||
423 | size, | ||
424 | GNUNET_CONTAINER_META_DATA_SERIALIZE_PART); | ||
425 | if (size < 0) | ||
426 | { | ||
427 | GNUNET_break (0); | ||
428 | size = 0; | ||
429 | } | ||
430 | GNUNET_CONTAINER_meta_data_destroy (meta); | ||
431 | if (GNUNET_OK != | ||
432 | write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_META_DATA, | ||
433 | buf, | ||
434 | slen + size)) | ||
435 | return GNUNET_SYSERR; | ||
436 | } | ||
437 | return GNUNET_OK; | ||
438 | } | ||
439 | |||
440 | |||
441 | /** | ||
442 | * Install a signal handler to ignore SIGPIPE. | ||
443 | */ | ||
444 | static void | ||
445 | ignore_sigpipe () | ||
446 | { | ||
447 | struct sigaction oldsig; | ||
448 | struct sigaction sig; | ||
449 | |||
450 | memset (&sig, 0, sizeof(struct sigaction)); | ||
451 | sig.sa_handler = SIG_IGN; | ||
452 | sigemptyset (&sig.sa_mask); | ||
453 | #ifdef SA_INTERRUPT | ||
454 | sig.sa_flags = SA_INTERRUPT; /* SunOS */ | ||
455 | #else | ||
456 | sig.sa_flags = SA_RESTART; | ||
457 | #endif | ||
458 | if (0 != sigaction (SIGPIPE, &sig, &oldsig)) | ||
459 | fprintf (stderr, | ||
460 | "Failed to install SIGPIPE handler: %s\n", | ||
461 | strerror (errno)); | ||
462 | } | ||
463 | |||
464 | |||
465 | /** | ||
466 | * Turn the given file descriptor in to '/dev/null'. | ||
467 | * | ||
468 | * @param fd fd to bind to /dev/null | ||
469 | * @param flags flags to use (O_RDONLY or O_WRONLY) | ||
470 | */ | ||
471 | static void | ||
472 | make_dev_zero (int fd, int flags) | ||
473 | { | ||
474 | int z; | ||
475 | |||
476 | GNUNET_assert (0 == close (fd)); | ||
477 | z = open ("/dev/null", flags); | ||
478 | GNUNET_assert (-1 != z); | ||
479 | if (z == fd) | ||
480 | return; | ||
481 | GNUNET_break (fd == dup2 (z, fd)); | ||
482 | GNUNET_assert (0 == close (z)); | ||
483 | } | ||
484 | |||
485 | |||
486 | /** | ||
487 | * Main function of the helper process to extract meta data. | ||
488 | * | ||
489 | * @param argc should be 3 | ||
490 | * @param argv [0] our binary name | ||
491 | * [1] name of the file or directory to process | ||
492 | * [2] "-" to disable extraction, NULL for defaults, | ||
493 | * otherwise custom plugins to load from LE | ||
494 | * @return 0 on success | ||
495 | */ | ||
496 | int | ||
497 | main (int argc, char *const *argv) | ||
498 | { | ||
499 | const char *filename_expanded; | ||
500 | const char *ex; | ||
501 | struct ScanTreeNode *root; | ||
502 | |||
503 | ignore_sigpipe (); | ||
504 | /* move stdout to some other FD for IPC, bind | ||
505 | stdout/stderr to /dev/null */ | ||
506 | output_stream = dup (1); | ||
507 | make_dev_zero (1, O_WRONLY); | ||
508 | make_dev_zero (2, O_WRONLY); | ||
509 | |||
510 | /* parse command line */ | ||
511 | if ((3 != argc) && (2 != argc)) | ||
512 | { | ||
513 | fprintf (stderr, | ||
514 | "%s", | ||
515 | "gnunet-helper-fs-publish needs exactly one or two arguments\n"); | ||
516 | return 1; | ||
517 | } | ||
518 | filename_expanded = argv[1]; | ||
519 | ex = argv[2]; | ||
520 | if ((NULL == ex) || (0 != strcmp (ex, "-"))) | ||
521 | { | ||
522 | #if HAVE_LIBEXTRACTOR | ||
523 | plugins = EXTRACTOR_plugin_add_defaults (EXTRACTOR_OPTION_DEFAULT_POLICY); | ||
524 | if (NULL != ex) | ||
525 | plugins = EXTRACTOR_plugin_add_config (plugins, | ||
526 | ex, | ||
527 | EXTRACTOR_OPTION_DEFAULT_POLICY); | ||
528 | #endif | ||
529 | } | ||
530 | |||
531 | /* scan tree to find out how much work there is to be done */ | ||
532 | if (GNUNET_OK != preprocess_file (filename_expanded, &root)) | ||
533 | { | ||
534 | (void) write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_ERROR, NULL, 0); | ||
535 | #if HAVE_LIBEXTRACTOR | ||
536 | EXTRACTOR_plugin_remove_all (plugins); | ||
537 | #endif | ||
538 | return 2; | ||
539 | } | ||
540 | /* signal that we're done counting files, so that a percentage of | ||
541 | progress can now be calculated */ | ||
542 | if (GNUNET_OK != | ||
543 | write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_COUNTING_DONE, | ||
544 | NULL, | ||
545 | 0)) | ||
546 | { | ||
547 | #if HAVE_LIBEXTRACTOR | ||
548 | EXTRACTOR_plugin_remove_all (plugins); | ||
549 | #endif | ||
550 | return 3; | ||
551 | } | ||
552 | if (NULL != root) | ||
553 | { | ||
554 | if (GNUNET_OK != extract_files (root)) | ||
555 | { | ||
556 | (void) write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_ERROR, | ||
557 | NULL, | ||
558 | 0); | ||
559 | free_tree (root); | ||
560 | #if HAVE_LIBEXTRACTOR | ||
561 | EXTRACTOR_plugin_remove_all (plugins); | ||
562 | #endif | ||
563 | return 4; | ||
564 | } | ||
565 | free_tree (root); | ||
566 | } | ||
567 | /* enable "clean" shutdown by telling parent that we are done */ | ||
568 | (void) write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_FINISHED, | ||
569 | NULL, | ||
570 | 0); | ||
571 | #if HAVE_LIBEXTRACTOR | ||
572 | EXTRACTOR_plugin_remove_all (plugins); | ||
573 | #endif | ||
574 | return 0; | ||
575 | } | ||
576 | |||
577 | |||
578 | /* end of gnunet-helper-fs-publish.c */ | ||