aboutsummaryrefslogtreecommitdiff
path: root/src/service/fs/gnunet-helper-fs-publish.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/service/fs/gnunet-helper-fs-publish.c')
-rw-r--r--src/service/fs/gnunet-helper-fs-publish.c579
1 files changed, 579 insertions, 0 deletions
diff --git a/src/service/fs/gnunet-helper-fs-publish.c b/src/service/fs/gnunet-helper-fs-publish.c
new file mode 100644
index 000000000..0e07b79dc
--- /dev/null
+++ b/src/service/fs/gnunet-helper-fs-publish.c
@@ -0,0 +1,579 @@
1/*
2 This file is part of GNUnet.
3 Copyright (C) 2012 GNUnet e.V.
4
5 GNUnet is free software: you can redistribute it and/or modify it
6 under the terms of the GNU Affero General Public License as published
7 by the Free Software Foundation, either version 3 of the License,
8 or (at your option) any later version.
9
10 GNUnet is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Affero General Public License for more details.
14
15 You should have received a copy of the GNU Affero General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>.
17
18 SPDX-License-Identifier: AGPL3.0-or-later
19 */
20
21/**
22 * @file src/fs/gnunet-helper-fs-publish.c
23 * @brief Tool to help extract meta data asynchronously
24 * @author Christian Grothoff
25 *
26 * This program will scan a directory for files with meta data
27 * and report the results to stdout.
28 */
29#include "platform.h"
30
31#include "gnunet_fs_service.h"
32
33
34/**
35 * A node of a directory tree.
36 */
37struct ScanTreeNode
38{
39 /**
40 * This is a doubly-linked list
41 */
42 struct ScanTreeNode *next;
43
44 /**
45 * This is a doubly-linked list
46 */
47 struct ScanTreeNode *prev;
48
49 /**
50 * Parent of this node, NULL for top-level entries.
51 */
52 struct ScanTreeNode *parent;
53
54 /**
55 * This is a doubly-linked tree
56 * NULL for files and empty directories
57 */
58 struct ScanTreeNode *children_head;
59
60 /**
61 * This is a doubly-linked tree
62 * NULL for files and empty directories
63 */
64 struct ScanTreeNode *children_tail;
65
66 /**
67 * Name of the file/directory
68 */
69 char *filename;
70
71 /**
72 * Size of the file (if it is a file), in bytes.
73 * At the moment it is set to 0 for directories.
74 */
75 uint64_t file_size;
76
77 /**
78 * #GNUNET_YES if this is a directory
79 */
80 int is_directory;
81};
82
83
84#if HAVE_LIBEXTRACTOR
85/**
86 * List of libextractor plugins to use for extracting.
87 */
88static struct EXTRACTOR_PluginList *plugins;
89#endif
90
91/**
92 * File descriptor we use for IPC with the parent.
93 */
94static int output_stream;
95
96
97#if HAVE_LIBEXTRACTOR
98/**
99 * Add meta data that libextractor finds to our meta data
100 * container.
101 *
102 * @param cls closure, our meta data container
103 * @param plugin_name name of the plugin that produced this value;
104 * special values can be used (e.g. '&lt;zlib&gt;' for zlib being
105 * used in the main libextractor library and yielding
106 * meta data).
107 * @param type libextractor-type describing the meta data
108 * @param format basic format information about data
109 * @param data_mime_type mime-type of data (not of the original file);
110 * can be NULL (if mime-type is not known)
111 * @param data actual meta-data found
112 * @param data_len number of bytes in @a data
113 * @return always 0 to continue extracting
114 */
115static int
116add_to_md (void *cls,
117 const char *plugin_name,
118 enum EXTRACTOR_MetaType type,
119 enum EXTRACTOR_MetaFormat format,
120 const char *data_mime_type,
121 const char *data,
122 size_t data_len)
123{
124 struct GNUNET_FS_MetaData *md = cls;
125
126 if (((EXTRACTOR_METAFORMAT_UTF8 == format) ||
127 (EXTRACTOR_METAFORMAT_C_STRING == format)) &&
128 ('\0' != data[data_len - 1]))
129 {
130 char zdata[data_len + 1];
131 GNUNET_memcpy (zdata, data, data_len);
132 zdata[data_len] = '\0';
133 (void) GNUNET_FS_meta_data_insert (md,
134 plugin_name,
135 type,
136 format,
137 data_mime_type,
138 zdata,
139 data_len + 1);
140 }
141 else
142 {
143 (void) GNUNET_FS_meta_data_insert (md,
144 plugin_name,
145 type,
146 format,
147 data_mime_type,
148 data,
149 data_len);
150 }
151 return 0;
152}
153
154
155#endif
156
157
158/**
159 * Free memory of the @a tree structure
160 *
161 * @param tree tree to free
162 */
163static void
164free_tree (struct ScanTreeNode *tree)
165{
166 struct ScanTreeNode *pos;
167
168 while (NULL != (pos = tree->children_head))
169 free_tree (pos);
170 if (NULL != tree->parent)
171 GNUNET_CONTAINER_DLL_remove (tree->parent->children_head,
172 tree->parent->children_tail,
173 tree);
174 GNUNET_free (tree->filename);
175 GNUNET_free (tree);
176}
177
178
179/**
180 * Write @a size bytes from @a buf into the #output_stream.
181 *
182 * @param buf buffer with data to write
183 * @param size number of bytes to write
184 * @return #GNUNET_OK on success, #GNUNET_SYSERR on error
185 */
186static int
187write_all (const void *buf, size_t size)
188{
189 const char *cbuf = buf;
190 size_t total;
191 ssize_t wr;
192
193 total = 0;
194 do
195 {
196 wr = write (output_stream, &cbuf[total], size - total);
197 if (wr > 0)
198 total += wr;
199 }
200 while ((wr > 0) && (total < size));
201 if (wr <= 0)
202 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
203 "Failed to write to stdout: %s\n",
204 strerror (errno));
205 return (total == size) ? GNUNET_OK : GNUNET_SYSERR;
206}
207
208
209/**
210 * Write message to the master process.
211 *
212 * @param message_type message type to use
213 * @param data data to append, NULL for none
214 * @param data_length number of bytes in @a data
215 * @return #GNUNET_SYSERR to stop scanning (the pipe was broken somehow)
216 */
217static int
218write_message (uint16_t message_type, const char *data, size_t data_length)
219{
220 struct GNUNET_MessageHeader hdr;
221
222#if 0
223 fprintf (stderr,
224 "Helper sends %u-byte message of type %u\n",
225 (unsigned int) (sizeof(struct GNUNET_MessageHeader) + data_length),
226 (unsigned int) message_type);
227#endif
228 hdr.type = htons (message_type);
229 hdr.size = htons (sizeof(struct GNUNET_MessageHeader) + data_length);
230 if ((GNUNET_OK != write_all (&hdr, sizeof(hdr))) ||
231 (GNUNET_OK != write_all (data, data_length)))
232 return GNUNET_SYSERR;
233 return GNUNET_OK;
234}
235
236
237/**
238 * Function called to (recursively) add all of the files in the
239 * directory to the tree. Called by the directory scanner to initiate
240 * the scan. Does NOT yet add any metadata.
241 *
242 * @param filename file or directory to scan
243 * @param dst where to store the resulting share tree item;
244 * NULL is stored in @a dst upon recoverable errors (#GNUNET_OK is returned)
245 * @return #GNUNET_OK on success, #GNUNET_SYSERR on error
246 */
247static int
248preprocess_file (const char *filename, struct ScanTreeNode **dst);
249
250
251/**
252 * Closure for the 'scan_callback'
253 */
254struct RecursionContext
255{
256 /**
257 * Parent to add the files to.
258 */
259 struct ScanTreeNode *parent;
260
261 /**
262 * Flag to set to GNUNET_YES on serious errors.
263 */
264 int stop;
265};
266
267
268/**
269 * Function called by the directory iterator to (recursively) add all
270 * of the files in the directory to the tree. Called by the directory
271 * scanner to initiate the scan. Does NOT yet add any metadata.
272 *
273 * @param cls the `struct RecursionContext`
274 * @param filename file or directory to scan
275 * @return #GNUNET_OK on success, #GNUNET_SYSERR on error
276 */
277static int
278scan_callback (void *cls, const char *filename)
279{
280 struct RecursionContext *rc = cls;
281 struct ScanTreeNode *chld;
282
283 if (GNUNET_OK != preprocess_file (filename, &chld))
284 {
285 rc->stop = GNUNET_YES;
286 return GNUNET_SYSERR;
287 }
288 if (NULL == chld)
289 return GNUNET_OK;
290 chld->parent = rc->parent;
291 GNUNET_CONTAINER_DLL_insert (rc->parent->children_head,
292 rc->parent->children_tail,
293 chld);
294 return GNUNET_OK;
295}
296
297
298/**
299 * Function called to (recursively) add all of the files in the
300 * directory to the tree. Called by the directory scanner to initiate
301 * the scan. Does NOT yet add any metadata.
302 *
303 * @param filename file or directory to scan
304 * @param dst where to store the resulting share tree item;
305 * NULL is stored in @a dst upon recoverable errors (#GNUNET_OK is returned)
306 * @return #GNUNET_OK on success, #GNUNET_SYSERR on error
307 */
308static int
309preprocess_file (const char *filename, struct ScanTreeNode **dst)
310{
311 struct ScanTreeNode *item;
312 struct stat sbuf;
313 uint64_t fsize = 0;
314
315 if ((0 != stat (filename, &sbuf)) ||
316 ((! S_ISDIR (sbuf.st_mode)) &&
317 (GNUNET_OK !=
318 GNUNET_DISK_file_size (filename, &fsize, GNUNET_NO, GNUNET_YES))))
319 {
320 /* If the file doesn't exist (or is not stat-able for any other reason)
321 skip it (but report it), but do continue. */
322 if (GNUNET_OK !=
323 write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_SKIP_FILE,
324 filename,
325 strlen (filename) + 1))
326 return GNUNET_SYSERR;
327 /* recoverable error, store 'NULL' in *dst */
328 *dst = NULL;
329 return GNUNET_OK;
330 }
331
332 /* Report the progress */
333 if (
334 GNUNET_OK !=
335 write_message (S_ISDIR (sbuf.st_mode)
336 ? GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_PROGRESS_DIRECTORY
337 : GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_PROGRESS_FILE,
338 filename,
339 strlen (filename) + 1))
340 return GNUNET_SYSERR;
341 item = GNUNET_new (struct ScanTreeNode);
342 item->filename = GNUNET_strdup (filename);
343 item->is_directory = (S_ISDIR (sbuf.st_mode)) ? GNUNET_YES : GNUNET_NO;
344 item->file_size = fsize;
345 if (GNUNET_YES == item->is_directory)
346 {
347 struct RecursionContext rc;
348
349 rc.parent = item;
350 rc.stop = GNUNET_NO;
351 GNUNET_DISK_directory_scan (filename, &scan_callback, &rc);
352 if (
353 (GNUNET_YES == rc.stop) ||
354 (GNUNET_OK !=
355 write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_PROGRESS_DIRECTORY,
356 "..",
357 3)))
358 {
359 free_tree (item);
360 return GNUNET_SYSERR;
361 }
362 }
363 *dst = item;
364 return GNUNET_OK;
365}
366
367
368/**
369 * Extract metadata from files.
370 *
371 * @param item entry we are processing
372 * @return #GNUNET_OK on success, #GNUNET_SYSERR on fatal errors
373 */
374static int
375extract_files (struct ScanTreeNode *item)
376{
377 struct GNUNET_FS_MetaData *meta;
378 ssize_t size;
379 size_t slen;
380
381 if (GNUNET_YES == item->is_directory)
382 {
383 /* for directories, we simply only descent, no extraction, no
384 progress reporting */
385 struct ScanTreeNode *pos;
386
387 for (pos = item->children_head; NULL != pos; pos = pos->next)
388 if (GNUNET_OK != extract_files (pos))
389 return GNUNET_SYSERR;
390 return GNUNET_OK;
391 }
392
393 /* this is the expensive operation, *afterwards* we'll check for aborts */
394 meta = GNUNET_FS_meta_data_create ();
395#if HAVE_LIBEXTRACTOR
396 EXTRACTOR_extract (plugins, item->filename, NULL, 0, &add_to_md, meta);
397#endif
398 slen = strlen (item->filename) + 1;
399 size = GNUNET_FS_meta_data_get_serialized_size (meta);
400 if (-1 == size)
401 {
402 /* no meta data */
403 GNUNET_FS_meta_data_destroy (meta);
404 if (GNUNET_OK !=
405 write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_META_DATA,
406 item->filename,
407 slen))
408 return GNUNET_SYSERR;
409 return GNUNET_OK;
410 }
411 else if (size > (UINT16_MAX - sizeof(struct GNUNET_MessageHeader) - slen))
412 {
413 /* We can't transfer more than 64k bytes in one message. */
414 size = UINT16_MAX - sizeof(struct GNUNET_MessageHeader) - slen;
415 }
416 {
417 char buf[size + slen];
418 char *dst = &buf[slen];
419
420 GNUNET_memcpy (buf, item->filename, slen);
421 size = GNUNET_FS_meta_data_serialize (
422 meta,
423 &dst,
424 size,
425 GNUNET_FS_META_DATA_SERIALIZE_PART);
426 if (size < 0)
427 {
428 GNUNET_break (0);
429 size = 0;
430 }
431 GNUNET_FS_meta_data_destroy (meta);
432 if (GNUNET_OK !=
433 write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_META_DATA,
434 buf,
435 slen + size))
436 return GNUNET_SYSERR;
437 }
438 return GNUNET_OK;
439}
440
441
442/**
443 * Install a signal handler to ignore SIGPIPE.
444 */
445static void
446ignore_sigpipe ()
447{
448 struct sigaction oldsig;
449 struct sigaction sig;
450
451 memset (&sig, 0, sizeof(struct sigaction));
452 sig.sa_handler = SIG_IGN;
453 sigemptyset (&sig.sa_mask);
454#ifdef SA_INTERRUPT
455 sig.sa_flags = SA_INTERRUPT; /* SunOS */
456#else
457 sig.sa_flags = SA_RESTART;
458#endif
459 if (0 != sigaction (SIGPIPE, &sig, &oldsig))
460 fprintf (stderr,
461 "Failed to install SIGPIPE handler: %s\n",
462 strerror (errno));
463}
464
465
466/**
467 * Turn the given file descriptor in to '/dev/null'.
468 *
469 * @param fd fd to bind to /dev/null
470 * @param flags flags to use (O_RDONLY or O_WRONLY)
471 */
472static void
473make_dev_zero (int fd, int flags)
474{
475 int z;
476
477 GNUNET_assert (0 == close (fd));
478 z = open ("/dev/null", flags);
479 GNUNET_assert (-1 != z);
480 if (z == fd)
481 return;
482 GNUNET_break (fd == dup2 (z, fd));
483 GNUNET_assert (0 == close (z));
484}
485
486
487/**
488 * Main function of the helper process to extract meta data.
489 *
490 * @param argc should be 3
491 * @param argv [0] our binary name
492 * [1] name of the file or directory to process
493 * [2] "-" to disable extraction, NULL for defaults,
494 * otherwise custom plugins to load from LE
495 * @return 0 on success
496 */
497int
498main (int argc, char *const *argv)
499{
500 const char *filename_expanded;
501 const char *ex;
502 struct ScanTreeNode *root;
503
504 ignore_sigpipe ();
505 /* move stdout to some other FD for IPC, bind
506 stdout/stderr to /dev/null */
507 output_stream = dup (1);
508 make_dev_zero (1, O_WRONLY);
509 make_dev_zero (2, O_WRONLY);
510
511 /* parse command line */
512 if ((3 != argc) && (2 != argc))
513 {
514 fprintf (stderr,
515 "%s",
516 "gnunet-helper-fs-publish needs exactly one or two arguments\n");
517 return 1;
518 }
519 filename_expanded = argv[1];
520 ex = argv[2];
521 if ((NULL == ex) || (0 != strcmp (ex, "-")))
522 {
523#if HAVE_LIBEXTRACTOR
524 plugins = EXTRACTOR_plugin_add_defaults (EXTRACTOR_OPTION_DEFAULT_POLICY);
525 if (NULL != ex)
526 plugins = EXTRACTOR_plugin_add_config (plugins,
527 ex,
528 EXTRACTOR_OPTION_DEFAULT_POLICY);
529#endif
530 }
531
532 /* scan tree to find out how much work there is to be done */
533 if (GNUNET_OK != preprocess_file (filename_expanded, &root))
534 {
535 (void) write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_ERROR, NULL, 0);
536#if HAVE_LIBEXTRACTOR
537 EXTRACTOR_plugin_remove_all (plugins);
538#endif
539 return 2;
540 }
541 /* signal that we're done counting files, so that a percentage of
542 progress can now be calculated */
543 if (GNUNET_OK !=
544 write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_COUNTING_DONE,
545 NULL,
546 0))
547 {
548#if HAVE_LIBEXTRACTOR
549 EXTRACTOR_plugin_remove_all (plugins);
550#endif
551 return 3;
552 }
553 if (NULL != root)
554 {
555 if (GNUNET_OK != extract_files (root))
556 {
557 (void) write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_ERROR,
558 NULL,
559 0);
560 free_tree (root);
561#if HAVE_LIBEXTRACTOR
562 EXTRACTOR_plugin_remove_all (plugins);
563#endif
564 return 4;
565 }
566 free_tree (root);
567 }
568 /* enable "clean" shutdown by telling parent that we are done */
569 (void) write_message (GNUNET_MESSAGE_TYPE_FS_PUBLISH_HELPER_FINISHED,
570 NULL,
571 0);
572#if HAVE_LIBEXTRACTOR
573 EXTRACTOR_plugin_remove_all (plugins);
574#endif
575 return 0;
576}
577
578
579/* end of gnunet-helper-fs-publish.c */