aboutsummaryrefslogtreecommitdiff
path: root/src/fs/fs_file_information.c
diff options
context:
space:
mode:
authorChristian Grothoff <christian@grothoff.org>2012-01-28 14:52:48 +0000
committerChristian Grothoff <christian@grothoff.org>2012-01-28 14:52:48 +0000
commit4b9e92b8f0c33dd655abff1cda89b33aaf90ef16 (patch)
tree4dbe0b3109daad81b6f7c6845b152afd787576fc /src/fs/fs_file_information.c
parent0f5c5a8d55e8f3df8dcb8ddff61da621b6a482af (diff)
downloadgnunet-4b9e92b8f0c33dd655abff1cda89b33aaf90ef16.tar.gz
gnunet-4b9e92b8f0c33dd655abff1cda89b33aaf90ef16.zip
-change callback argument from char to int, as that is cleaner for a boolean; document API some
Diffstat (limited to 'src/fs/fs_file_information.c')
-rw-r--r--src/fs/fs_file_information.c512
1 files changed, 0 insertions, 512 deletions
diff --git a/src/fs/fs_file_information.c b/src/fs/fs_file_information.c
index 94b4c1866..01ce6f54b 100644
--- a/src/fs/fs_file_information.c
+++ b/src/fs/fs_file_information.c
@@ -313,518 +313,6 @@ GNUNET_FS_file_information_create_from_reader (struct GNUNET_FS_Handle *h,
313 313
314 314
315/** 315/**
316 * Closure for "dir_scan_cb".
317 */
318struct DirScanCls
319{
320 /**
321 * Metadata extractors to use.
322 */
323 struct EXTRACTOR_PluginList *extractors;
324
325 /**
326 * Master context.
327 */
328 struct GNUNET_FS_Handle *h;
329
330 /**
331 * Function to call on each directory entry.
332 */
333 GNUNET_FS_FileProcessor proc;
334
335 /**
336 * Closure for proc.
337 */
338 void *proc_cls;
339
340 /**
341 * Scanner to use for subdirectories.
342 */
343 GNUNET_FS_DirectoryScanner scanner;
344
345 /**
346 * Closure for scanner.
347 */
348 void *scanner_cls;
349
350 /**
351 * Set to an error message (if any).
352 */
353 char *emsg;
354
355 /**
356 * Block options.
357 */
358 const struct GNUNET_FS_BlockOptions *bo;
359
360 /**
361 * Should files be indexed?
362 */
363 int do_index;
364
365};
366
367
368/**
369 * Function called on each entry in a file to cause
370 * default-publishing.
371 *
372 * @param cls closure (struct DirScanCls)
373 * @param filename name of the file to be published
374 * @return GNUNET_OK on success, GNUNET_SYSERR to abort
375 */
376static int
377dir_scan_cb (void *cls, const char *filename)
378{
379 struct DirScanCls *dsc = cls;
380 struct stat sbuf;
381 struct GNUNET_FS_FileInformation *fi;
382 struct GNUNET_FS_Uri *keywords;
383 struct GNUNET_CONTAINER_MetaData *meta;
384
385 if (0 != STAT (filename, &sbuf))
386 {
387 GNUNET_asprintf (&dsc->emsg, _("`%s' failed on file `%s': %s"), "stat",
388 filename, STRERROR (errno));
389 return GNUNET_SYSERR;
390 }
391 if (S_ISDIR (sbuf.st_mode))
392 {
393 fi = GNUNET_FS_file_information_create_from_directory (dsc->h, NULL,
394 filename,
395 dsc->scanner,
396 dsc->scanner_cls,
397 dsc->do_index,
398 dsc->bo, &dsc->emsg);
399 if (NULL == fi)
400 {
401 GNUNET_assert (NULL != dsc->emsg);
402 return GNUNET_SYSERR;
403 }
404 }
405 else
406 {
407 meta = GNUNET_CONTAINER_meta_data_create ();
408 GNUNET_FS_meta_data_extract_from_file (meta, filename, dsc->extractors);
409 keywords = GNUNET_FS_uri_ksk_create_from_meta_data (meta);
410 fi = GNUNET_FS_file_information_create_from_file (dsc->h, NULL, filename,
411 keywords, meta,
412 dsc->do_index, dsc->bo);
413 GNUNET_CONTAINER_meta_data_destroy (meta);
414 GNUNET_FS_uri_destroy (keywords);
415 }
416 dsc->proc (dsc->proc_cls, filename, fi);
417 return GNUNET_OK;
418}
419
420
421/**
422 * Simple, useful default implementation of a directory scanner
423 * (GNUNET_FS_DirectoryScanner). This implementation expects to get a
424 * UNIX filename, will publish all files in the directory except hidden
425 * files (those starting with a "."). Metadata will be extracted
426 * using GNU libextractor; the specific list of plugins should be
427 * specified in "cls", passing NULL will disable (!) metadata
428 * extraction. Keywords will be derived from the metadata and be
429 * subject to default canonicalization. This is strictly a
430 * convenience function.
431 *
432 * @param cls must be of type "struct EXTRACTOR_Extractor*"
433 * @param h handle to the file sharing subsystem
434 * @param dirname name of the directory to scan
435 * @param do_index should files be indexed or inserted
436 * @param bo block options
437 * @param proc function called on each entry
438 * @param proc_cls closure for proc
439 * @param emsg where to store an error message (on errors)
440 * @return GNUNET_OK on success
441 */
442int
443GNUNET_FS_directory_scanner_default (void *cls, struct GNUNET_FS_Handle *h,
444 const char *dirname, int do_index,
445 const struct GNUNET_FS_BlockOptions *bo,
446 GNUNET_FS_FileProcessor proc,
447 void *proc_cls, char **emsg)
448{
449 struct EXTRACTOR_PluginList *ex = cls;
450 struct DirScanCls dsc;
451
452 dsc.h = h;
453 dsc.extractors = ex;
454 dsc.proc = proc;
455 dsc.proc_cls = proc_cls;
456 dsc.scanner = &GNUNET_FS_directory_scanner_default;
457 dsc.scanner_cls = cls;
458 dsc.do_index = do_index;
459 dsc.bo = bo;
460 if (-1 == GNUNET_DISK_directory_scan (dirname, &dir_scan_cb, &dsc))
461 {
462 GNUNET_assert (NULL != dsc.emsg);
463 *emsg = dsc.emsg;
464 return GNUNET_SYSERR;
465 }
466 return GNUNET_OK;
467}
468
469
470/**
471 * Aggregate information we keep for meta data in each directory.
472 */
473struct MetaValueInformation
474{
475
476 /**
477 * Mime-type of data.
478 */
479 const char *mime_type;
480
481 /**
482 * The actual meta data.
483 */
484 const char *data;
485
486 /**
487 * Number of bytes in 'data'.
488 */
489 size_t data_size;
490
491 /**
492 * Type of the meta data.
493 */
494 enum EXTRACTOR_MetaType type;
495
496 /**
497 * Format of the meta data.
498 */
499 enum EXTRACTOR_MetaFormat format;
500
501 /**
502 * How often does this meta value occur in this directory?
503 */
504 unsigned int frequency;
505
506};
507
508
509/**
510 * Type of a function that libextractor calls for each
511 * meta data item found.
512 *
513 * @param cls the container multihashmap to update
514 * @param plugin_name name of the plugin that produced this value;
515 * special values can be used (i.e. '&lt;zlib&gt;' for zlib being
516 * used in the main libextractor library and yielding
517 * meta data).
518 * @param type libextractor-type describing the meta data
519 * @param format basic format information about data
520 * @param data_mime_type mime-type of data (not of the original file);
521 * can be NULL (if mime-type is not known)
522 * @param data actual meta-data found
523 * @param data_len number of bytes in data
524 * @return 0 to continue extracting / iterating
525 */
526static int
527update_metamap (void *cls, const char *plugin_name,
528 enum EXTRACTOR_MetaType type, enum EXTRACTOR_MetaFormat format,
529 const char *data_mime_type, const char *data, size_t data_len)
530{
531 struct GNUNET_CONTAINER_MultiHashMap *map = cls;
532 GNUNET_HashCode key;
533 struct MetaValueInformation *mvi;
534
535 GNUNET_CRYPTO_hash (data, data_len, &key);
536 mvi = GNUNET_CONTAINER_multihashmap_get (map, &key);
537 if (mvi == NULL)
538 {
539 mvi = GNUNET_malloc (sizeof (struct MetaValueInformation));
540 mvi->mime_type = data_mime_type;
541 mvi->data = data;
542 mvi->data_size = data_len;
543 mvi->type = type;
544 mvi->format = format;
545 GNUNET_CONTAINER_multihashmap_put (map, &key, mvi,
546 GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_ONLY);
547 }
548 mvi->frequency++;
549 return 0;
550}
551
552
553/**
554 * Aggregate information we keep for keywords in each directory.
555 */
556struct KeywordInformation
557{
558
559 /**
560 * Mime-type of keyword.
561 */
562 const char *keyword;
563
564 /**
565 * How often does this meta value occur in this directory?
566 */
567 unsigned int frequency;
568
569};
570
571
572/**
573 * Closure for dirproc function.
574 */
575struct EntryProcCls
576{
577 /**
578 * Linked list of directory entries that is being
579 * created.
580 */
581 struct GNUNET_FS_FileInformation *entries;
582
583 /**
584 * Map describing the meta data for all entries in the
585 * directory. Keys are the hash of the meta-value,
586 * values are of type 'struct MetaValueInformation'.
587 */
588 struct GNUNET_CONTAINER_MultiHashMap *metamap;
589
590 /**
591 * Map describing the keywords for all entries in the
592 * directory. Keys are the hash of the keyword,
593 * values are of type 'struct KeywordInformation'.
594 */
595 struct GNUNET_CONTAINER_MultiHashMap *keywordmap;
596
597 /**
598 * Number of entries in 'entries'.
599 */
600 unsigned int count;
601
602};
603
604
605/**
606 * Function that processes a directory entry that
607 * was obtained from the scanner. Adds each entry to
608 * the directory and computes directroy meta map.
609 *
610 * @param cls our closure
611 * @param filename name of the file (unused, why there???)
612 * @param fi information for publishing the file
613 */
614static void
615dirproc_add (void *cls, const char *filename,
616 struct GNUNET_FS_FileInformation *fi)
617{
618 struct EntryProcCls *dc = cls;
619 unsigned int i;
620 const char *kw;
621 struct KeywordInformation *ki;
622 GNUNET_HashCode key;
623
624 GNUNET_assert (fi->next == NULL);
625 GNUNET_assert (fi->dir == NULL);
626 fi->next = dc->entries;
627 dc->entries = fi;
628 dc->count++;
629 if (NULL != fi->meta)
630 GNUNET_CONTAINER_meta_data_iterate (fi->meta, &update_metamap, dc->metamap);
631 for (i = 0; i < fi->keywords->data.ksk.keywordCount; i++)
632 {
633 kw = fi->keywords->data.ksk.keywords[i];
634 GNUNET_CRYPTO_hash (kw, strlen (kw), &key);
635 ki = GNUNET_CONTAINER_multihashmap_get (dc->keywordmap, &key);
636 if (ki == NULL)
637 {
638 ki = GNUNET_malloc (sizeof (struct KeywordInformation));
639 ki->keyword = &kw[1];
640 GNUNET_CONTAINER_multihashmap_put (dc->keywordmap, &key, ki,
641 GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_ONLY);
642 }
643 ki->frequency++;
644 }
645}
646
647
648/**
649 * Closure for 'compute_directory_metadata'.
650 */
651struct ComputeDirectoryMetadataContext
652{
653 /**
654 * Where to store the extracted keywords.
655 */
656 struct GNUNET_FS_Uri *ksk;
657
658 /**
659 * Where to store the extracted meta data.
660 */
661 struct GNUNET_CONTAINER_MetaData *meta;
662
663 /**
664 * Threshold to apply for adding meta data.
665 */
666 unsigned int threshold;
667};
668
669
670/**
671 * Add metadata that occurs in more than the threshold entries of the
672 * directory to the directory itself. For example, if most files in a
673 * directory are of the same mime-type, the directory should have that
674 * mime-type as a keyword.
675 *
676 * @param cls the 'struct ComputeDirectoryMetadataContext'
677 * @param key unused
678 * @param value the 'struct MetaValueInformation' (to be freed as well)
679 * @return GNUNET_OK
680 */
681static int
682compute_directory_metadata (void *cls, const GNUNET_HashCode * key, void *value)
683{
684 struct ComputeDirectoryMetadataContext *cdmc = cls;
685 struct MetaValueInformation *mvi = value;
686
687 if (mvi->frequency > cdmc->threshold)
688 {
689 if (mvi->type != EXTRACTOR_METATYPE_GNUNET_ORIGINAL_FILENAME)
690 (void) GNUNET_CONTAINER_meta_data_insert (cdmc->meta, "<children>",
691 mvi->type, mvi->format,
692 mvi->mime_type, mvi->data,
693 mvi->data_size);
694 if ((mvi->format == EXTRACTOR_METAFORMAT_UTF8) ||
695 (mvi->format == EXTRACTOR_METAFORMAT_C_STRING))
696 GNUNET_FS_uri_ksk_add_keyword (cdmc->ksk, mvi->data, GNUNET_NO);
697 }
698 GNUNET_free (mvi);
699 return GNUNET_OK;
700}
701
702
703/**
704 * Add keywords that occur in more than the threshold entries of the
705 * directory to the directory itself.
706 *
707 * @param cls the 'struct ComputeDirectoryMetadataContext'
708 * @param key unused
709 * @param value the 'struct Keywordnformation' (to be freed as well)
710 * @return GNUNET_OK
711 */
712static int
713compute_directory_keywords (void *cls, const GNUNET_HashCode * key, void *value)
714{
715 struct ComputeDirectoryMetadataContext *cdmc = cls;
716 struct KeywordInformation *ki = value;
717
718 if (ki->frequency > cdmc->threshold)
719 (void) GNUNET_FS_uri_ksk_add_keyword (cdmc->ksk, ki->keyword, GNUNET_NO);
720 GNUNET_free (ki);
721 return GNUNET_OK;
722}
723
724
725/**
726 * Create a publish-structure from an existing file hierarchy, inferring
727 * and organizing keywords and metadata as much as possible. This
728 * function primarily performs the recursive build and re-organizes
729 * keywords and metadata; for automatically getting metadata
730 * extraction, scanning of directories and creation of the respective
731 * GNUNET_FS_FileInformation entries the default scanner should be
732 * passed (GNUNET_FS_directory_scanner_default). This is strictly a
733 * convenience function.
734 *
735 * @param h handle to the file sharing subsystem
736 * @param client_info initial value for the client-info value for this entry
737 * @param filename name of the top-level file or directory
738 * @param scanner function used to get a list of files in a directory
739 * @param scanner_cls closure for scanner
740 * @param do_index should files in the hierarchy be indexed?
741 * @param bo block options
742 * @param emsg where to store an error message
743 * @return publish structure entry for the directory, NULL on error
744 */
745struct GNUNET_FS_FileInformation *
746GNUNET_FS_file_information_create_from_directory (struct GNUNET_FS_Handle *h,
747 void *client_info,
748 const char *filename,
749 GNUNET_FS_DirectoryScanner
750 scanner, void *scanner_cls,
751 int do_index,
752 const struct
753 GNUNET_FS_BlockOptions *bo,
754 char **emsg)
755{
756 struct GNUNET_FS_FileInformation *ret;
757 struct ComputeDirectoryMetadataContext cdmc;
758 struct EntryProcCls dc;
759 const char *fn;
760 const char *ss;
761 char *dn;
762 struct GNUNET_FS_FileInformation *epos;
763 unsigned int i;
764 const char *kw;
765
766 dc.entries = NULL;
767 dc.count = 0;
768 dc.metamap = GNUNET_CONTAINER_multihashmap_create (64);
769 dc.keywordmap = GNUNET_CONTAINER_multihashmap_create (64);
770 /* update children to point to directory and generate statistics
771 * on all meta data in children */
772 scanner (scanner_cls, h, filename, do_index, bo, &dirproc_add, &dc, emsg);
773 cdmc.meta = GNUNET_CONTAINER_meta_data_create ();
774 cdmc.ksk = GNUNET_malloc (sizeof (struct GNUNET_FS_Uri));
775 cdmc.ksk->type = ksk;
776 cdmc.threshold = 1 + dc.count / 2; /* 50% threshold for now */
777 GNUNET_FS_meta_data_make_directory (cdmc.meta);
778 GNUNET_CONTAINER_multihashmap_iterate (dc.metamap,
779 &compute_directory_metadata, &cdmc);
780 GNUNET_CONTAINER_multihashmap_iterate (dc.keywordmap,
781 &compute_directory_keywords, &cdmc);
782 GNUNET_CONTAINER_multihashmap_destroy (dc.metamap);
783 GNUNET_CONTAINER_multihashmap_destroy (dc.keywordmap);
784
785 /* remove keywords in children that are already in the
786 * parent */
787 for (epos = dc.entries; NULL != epos; epos = epos->next)
788 {
789 for (i = 0; i < cdmc.ksk->data.ksk.keywordCount; i++)
790 {
791 kw = cdmc.ksk->data.ksk.keywords[i];
792 GNUNET_FS_uri_ksk_remove_keyword (epos->keywords, &kw[1]);
793 }
794 }
795 GNUNET_FS_uri_ksk_add_keyword (cdmc.ksk, GNUNET_FS_DIRECTORY_MIME, GNUNET_NO);
796 ret =
797 GNUNET_FS_file_information_create_empty_directory (h, client_info, cdmc.ksk,
798 cdmc.meta, bo, filename);
799 GNUNET_CONTAINER_meta_data_destroy (cdmc.meta);
800 GNUNET_FS_uri_destroy (cdmc.ksk);
801 ret->data.dir.entries = dc.entries;
802 while (dc.entries != NULL)
803 {
804 dc.entries->dir = ret;
805 dc.entries = dc.entries->next;
806 }
807 fn = filename;
808 while ((NULL != (ss = strstr (fn, DIR_SEPARATOR_STR))) && (strlen (ss) > 1))
809 fn = ss + 1;
810 GNUNET_asprintf (&dn, "%s/", fn);
811#if !WINDOWS
812 GNUNET_CONTAINER_meta_data_insert (ret->meta, "<gnunet>",
813 EXTRACTOR_METATYPE_GNUNET_ORIGINAL_FILENAME,
814 EXTRACTOR_METAFORMAT_C_STRING,
815 "text/plain", dn, strlen (dn) + 1);
816#else
817 GNUNET_CONTAINER_meta_data_insert (ret->meta, "<gnunet>",
818 EXTRACTOR_METATYPE_GNUNET_ORIGINAL_FILENAME,
819 EXTRACTOR_METAFORMAT_UTF8,
820 "text/plain", dn, strlen (dn) + 1);
821#endif
822 GNUNET_free (dn);
823 return ret;
824}
825
826
827/**
828 * Test if a given entry represents a directory. 316 * Test if a given entry represents a directory.
829 * 317 *
830 * @param ent check if this FI represents a directory 318 * @param ent check if this FI represents a directory