diff options
Diffstat (limited to 'src/fs/fs_file_information.c')
-rw-r--r-- | src/fs/fs_file_information.c | 112 |
1 files changed, 45 insertions, 67 deletions
diff --git a/src/fs/fs_file_information.c b/src/fs/fs_file_information.c index ce4f189a9..bb84b948e 100644 --- a/src/fs/fs_file_information.c +++ b/src/fs/fs_file_information.c | |||
@@ -437,7 +437,7 @@ GNUNET_FS_directory_scanner_default (void *cls, struct GNUNET_FS_Handle *h, | |||
437 | */ | 437 | */ |
438 | struct MetaValueInformation | 438 | struct MetaValueInformation |
439 | { | 439 | { |
440 | 440 | ||
441 | /** | 441 | /** |
442 | * Mime-type of data. | 442 | * Mime-type of data. |
443 | */ | 443 | */ |
@@ -481,21 +481,17 @@ struct MetaValueInformation | |||
481 | * used in the main libextractor library and yielding | 481 | * used in the main libextractor library and yielding |
482 | * meta data). | 482 | * meta data). |
483 | * @param type libextractor-type describing the meta data | 483 | * @param type libextractor-type describing the meta data |
484 | * @param format basic format information about data | 484 | * @param format basic format information about data |
485 | * @param data_mime_type mime-type of data (not of the original file); | 485 | * @param data_mime_type mime-type of data (not of the original file); |
486 | * can be NULL (if mime-type is not known) | 486 | * can be NULL (if mime-type is not known) |
487 | * @param data actual meta-data found | 487 | * @param data actual meta-data found |
488 | * @param data_len number of bytes in data | 488 | * @param data_len number of bytes in data |
489 | * @return 0 to continue extracting / iterating | 489 | * @return 0 to continue extracting / iterating |
490 | */ | 490 | */ |
491 | static int | 491 | static int |
492 | update_metamap (void *cls, | 492 | update_metamap (void *cls, const char *plugin_name, |
493 | const char *plugin_name, | 493 | enum EXTRACTOR_MetaType type, enum EXTRACTOR_MetaFormat format, |
494 | enum EXTRACTOR_MetaType type, | 494 | const char *data_mime_type, const char *data, size_t data_len) |
495 | enum EXTRACTOR_MetaFormat format, | ||
496 | const char *data_mime_type, | ||
497 | const char *data, | ||
498 | size_t data_len) | ||
499 | { | 495 | { |
500 | struct GNUNET_CONTAINER_MultiHashMap *map = cls; | 496 | struct GNUNET_CONTAINER_MultiHashMap *map = cls; |
501 | GNUNET_HashCode key; | 497 | GNUNET_HashCode key; |
@@ -512,10 +508,10 @@ update_metamap (void *cls, | |||
512 | mvi->type = type; | 508 | mvi->type = type; |
513 | mvi->format = format; | 509 | mvi->format = format; |
514 | GNUNET_CONTAINER_multihashmap_put (map, &key, mvi, | 510 | GNUNET_CONTAINER_multihashmap_put (map, &key, mvi, |
515 | GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_ONLY); | 511 | GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_ONLY); |
516 | } | 512 | } |
517 | mvi->frequency++; | 513 | mvi->frequency++; |
518 | return 0; | 514 | return 0; |
519 | } | 515 | } |
520 | 516 | ||
521 | 517 | ||
@@ -524,7 +520,7 @@ update_metamap (void *cls, | |||
524 | */ | 520 | */ |
525 | struct KeywordInformation | 521 | struct KeywordInformation |
526 | { | 522 | { |
527 | 523 | ||
528 | /** | 524 | /** |
529 | * Mime-type of keyword. | 525 | * Mime-type of keyword. |
530 | */ | 526 | */ |
@@ -581,37 +577,35 @@ struct EntryProcCls | |||
581 | * @param fi information for publishing the file | 577 | * @param fi information for publishing the file |
582 | */ | 578 | */ |
583 | static void | 579 | static void |
584 | dirproc_add (void *cls, const char *filename, | 580 | dirproc_add (void *cls, const char *filename, |
585 | struct GNUNET_FS_FileInformation *fi) | 581 | struct GNUNET_FS_FileInformation *fi) |
586 | { | 582 | { |
587 | struct EntryProcCls *dc = cls; | 583 | struct EntryProcCls *dc = cls; |
588 | unsigned int i; | 584 | unsigned int i; |
589 | const char *kw; | 585 | const char *kw; |
590 | struct KeywordInformation *ki; | 586 | struct KeywordInformation *ki; |
591 | GNUNET_HashCode key; | 587 | GNUNET_HashCode key; |
592 | 588 | ||
593 | GNUNET_assert (fi->next == NULL); | 589 | GNUNET_assert (fi->next == NULL); |
594 | GNUNET_assert (fi->dir == NULL); | 590 | GNUNET_assert (fi->dir == NULL); |
595 | fi->next = dc->entries; | 591 | fi->next = dc->entries; |
596 | dc->entries = fi; | 592 | dc->entries = fi; |
597 | dc->count++; | 593 | dc->count++; |
598 | if (NULL != fi->meta) | 594 | if (NULL != fi->meta) |
599 | GNUNET_CONTAINER_meta_data_iterate (fi->meta, | 595 | GNUNET_CONTAINER_meta_data_iterate (fi->meta, &update_metamap, dc->metamap); |
600 | &update_metamap, | 596 | for (i = 0; i < fi->keywords->data.ksk.keywordCount; i++) |
601 | dc->metamap); | ||
602 | for (i=0;i<fi->keywords->data.ksk.keywordCount;i++) | ||
603 | { | 597 | { |
604 | kw = fi->keywords->data.ksk.keywords[i]; | 598 | kw = fi->keywords->data.ksk.keywords[i]; |
605 | GNUNET_CRYPTO_hash (kw, strlen(kw), &key); | 599 | GNUNET_CRYPTO_hash (kw, strlen (kw), &key); |
606 | ki = GNUNET_CONTAINER_multihashmap_get (dc->keywordmap, &key); | 600 | ki = GNUNET_CONTAINER_multihashmap_get (dc->keywordmap, &key); |
607 | if (ki == NULL) | 601 | if (ki == NULL) |
608 | { | 602 | { |
609 | ki = GNUNET_malloc (sizeof (struct KeywordInformation)); | 603 | ki = GNUNET_malloc (sizeof (struct KeywordInformation)); |
610 | ki->keyword = &kw[1]; | 604 | ki->keyword = &kw[1]; |
611 | GNUNET_CONTAINER_multihashmap_put (dc->keywordmap, &key, ki, | 605 | GNUNET_CONTAINER_multihashmap_put (dc->keywordmap, &key, ki, |
612 | GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_ONLY); | 606 | GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_ONLY); |
613 | } | 607 | } |
614 | ki->frequency++; | 608 | ki->frequency++; |
615 | } | 609 | } |
616 | } | 610 | } |
617 | 611 | ||
@@ -633,7 +627,7 @@ struct ComputeDirectoryMetadataContext | |||
633 | 627 | ||
634 | /** | 628 | /** |
635 | * Threshold to apply for adding meta data. | 629 | * Threshold to apply for adding meta data. |
636 | */ | 630 | */ |
637 | unsigned int threshold; | 631 | unsigned int threshold; |
638 | }; | 632 | }; |
639 | 633 | ||
@@ -650,29 +644,22 @@ struct ComputeDirectoryMetadataContext | |||
650 | * @return GNUNET_OK | 644 | * @return GNUNET_OK |
651 | */ | 645 | */ |
652 | static int | 646 | static int |
653 | compute_directory_metadata (void *cls, | 647 | compute_directory_metadata (void *cls, const GNUNET_HashCode * key, void *value) |
654 | const GNUNET_HashCode *key, | ||
655 | void *value) | ||
656 | { | 648 | { |
657 | struct ComputeDirectoryMetadataContext *cdmc = cls; | 649 | struct ComputeDirectoryMetadataContext *cdmc = cls; |
658 | struct MetaValueInformation *mvi = value; | 650 | struct MetaValueInformation *mvi = value; |
659 | 651 | ||
660 | if (mvi->frequency > cdmc->threshold) | 652 | if (mvi->frequency > cdmc->threshold) |
661 | { | 653 | { |
662 | if (mvi->type != EXTRACTOR_METATYPE_GNUNET_ORIGINAL_FILENAME) | 654 | if (mvi->type != EXTRACTOR_METATYPE_GNUNET_ORIGINAL_FILENAME) |
663 | (void) GNUNET_CONTAINER_meta_data_insert (cdmc->meta, | 655 | (void) GNUNET_CONTAINER_meta_data_insert (cdmc->meta, "<children>", |
664 | "<children>", | 656 | mvi->type, mvi->format, |
665 | mvi->type, | 657 | mvi->mime_type, mvi->data, |
666 | mvi->format, | 658 | mvi->data_size); |
667 | mvi->mime_type, | 659 | if ((mvi->format == EXTRACTOR_METAFORMAT_UTF8) || |
668 | mvi->data, | 660 | (mvi->format == EXTRACTOR_METAFORMAT_C_STRING)) |
669 | mvi->data_size); | 661 | GNUNET_FS_uri_ksk_add_keyword (cdmc->ksk, mvi->data, GNUNET_NO); |
670 | if ( (mvi->format == EXTRACTOR_METAFORMAT_UTF8) || | 662 | } |
671 | (mvi->format == EXTRACTOR_METAFORMAT_C_STRING) ) | ||
672 | GNUNET_FS_uri_ksk_add_keyword (cdmc->ksk, | ||
673 | mvi->data, | ||
674 | GNUNET_NO); | ||
675 | } | ||
676 | GNUNET_free (mvi); | 663 | GNUNET_free (mvi); |
677 | return GNUNET_OK; | 664 | return GNUNET_OK; |
678 | } | 665 | } |
@@ -680,7 +667,7 @@ compute_directory_metadata (void *cls, | |||
680 | 667 | ||
681 | /** | 668 | /** |
682 | * Add keywords that occur in more than the threshold entries of the | 669 | * Add keywords that occur in more than the threshold entries of the |
683 | * directory to the directory itself. | 670 | * directory to the directory itself. |
684 | * | 671 | * |
685 | * @param cls the 'struct ComputeDirectoryMetadataContext' | 672 | * @param cls the 'struct ComputeDirectoryMetadataContext' |
686 | * @param key unused | 673 | * @param key unused |
@@ -688,17 +675,13 @@ compute_directory_metadata (void *cls, | |||
688 | * @return GNUNET_OK | 675 | * @return GNUNET_OK |
689 | */ | 676 | */ |
690 | static int | 677 | static int |
691 | compute_directory_keywords (void *cls, | 678 | compute_directory_keywords (void *cls, const GNUNET_HashCode * key, void *value) |
692 | const GNUNET_HashCode *key, | ||
693 | void *value) | ||
694 | { | 679 | { |
695 | struct ComputeDirectoryMetadataContext *cdmc = cls; | 680 | struct ComputeDirectoryMetadataContext *cdmc = cls; |
696 | struct KeywordInformation *ki = value; | 681 | struct KeywordInformation *ki = value; |
697 | 682 | ||
698 | if (ki->frequency > cdmc->threshold) | 683 | if (ki->frequency > cdmc->threshold) |
699 | (void) GNUNET_FS_uri_ksk_add_keyword (cdmc->ksk, | 684 | (void) GNUNET_FS_uri_ksk_add_keyword (cdmc->ksk, ki->keyword, GNUNET_NO); |
700 | ki->keyword, | ||
701 | GNUNET_NO); | ||
702 | GNUNET_free (ki); | 685 | GNUNET_free (ki); |
703 | return GNUNET_OK; | 686 | return GNUNET_OK; |
704 | } | 687 | } |
@@ -751,36 +734,31 @@ GNUNET_FS_file_information_create_from_directory (struct GNUNET_FS_Handle *h, | |||
751 | dc.metamap = GNUNET_CONTAINER_multihashmap_create (64); | 734 | dc.metamap = GNUNET_CONTAINER_multihashmap_create (64); |
752 | dc.keywordmap = GNUNET_CONTAINER_multihashmap_create (64); | 735 | dc.keywordmap = GNUNET_CONTAINER_multihashmap_create (64); |
753 | /* update children to point to directory and generate statistics | 736 | /* update children to point to directory and generate statistics |
754 | on all meta data in children */ | 737 | * on all meta data in children */ |
755 | scanner (scanner_cls, h, filename, do_index, bo, &dirproc_add, &dc, emsg); | 738 | scanner (scanner_cls, h, filename, do_index, bo, &dirproc_add, &dc, emsg); |
756 | cdmc.meta = GNUNET_CONTAINER_meta_data_create (); | 739 | cdmc.meta = GNUNET_CONTAINER_meta_data_create (); |
757 | cdmc.ksk = GNUNET_malloc (sizeof (struct GNUNET_FS_Uri)); | 740 | cdmc.ksk = GNUNET_malloc (sizeof (struct GNUNET_FS_Uri)); |
758 | cdmc.ksk->type = ksk; | 741 | cdmc.ksk->type = ksk; |
759 | cdmc.threshold = 1 + dc.count / 2; /* 50% threshold for now */ | 742 | cdmc.threshold = 1 + dc.count / 2; /* 50% threshold for now */ |
760 | GNUNET_FS_meta_data_make_directory (cdmc.meta); | 743 | GNUNET_FS_meta_data_make_directory (cdmc.meta); |
761 | GNUNET_CONTAINER_multihashmap_iterate (dc.metamap, | 744 | GNUNET_CONTAINER_multihashmap_iterate (dc.metamap, |
762 | &compute_directory_metadata, | 745 | &compute_directory_metadata, &cdmc); |
763 | &cdmc); | ||
764 | GNUNET_CONTAINER_multihashmap_iterate (dc.keywordmap, | 746 | GNUNET_CONTAINER_multihashmap_iterate (dc.keywordmap, |
765 | &compute_directory_keywords, | 747 | &compute_directory_keywords, &cdmc); |
766 | &cdmc); | ||
767 | GNUNET_CONTAINER_multihashmap_destroy (dc.metamap); | 748 | GNUNET_CONTAINER_multihashmap_destroy (dc.metamap); |
768 | GNUNET_CONTAINER_multihashmap_destroy (dc.keywordmap); | 749 | GNUNET_CONTAINER_multihashmap_destroy (dc.keywordmap); |
769 | GNUNET_FS_uri_ksk_add_keyword (cdmc.ksk, | 750 | GNUNET_FS_uri_ksk_add_keyword (cdmc.ksk, GNUNET_FS_DIRECTORY_MIME, GNUNET_NO); |
770 | GNUNET_FS_DIRECTORY_MIME, | ||
771 | GNUNET_NO); | ||
772 | cksk = GNUNET_FS_uri_ksk_canonicalize (cdmc.ksk); | 751 | cksk = GNUNET_FS_uri_ksk_canonicalize (cdmc.ksk); |
773 | 752 | ||
774 | /* remove keywords in children that are already in the | 753 | /* remove keywords in children that are already in the |
775 | parent */ | 754 | * parent */ |
776 | for (epos = dc.entries; NULL != epos; epos = epos->next) | 755 | for (epos = dc.entries; NULL != epos; epos = epos->next) |
777 | { | 756 | { |
778 | for (i=0;i<cksk->data.ksk.keywordCount;i++) | 757 | for (i = 0; i < cksk->data.ksk.keywordCount; i++) |
779 | { | 758 | { |
780 | kw = cksk->data.ksk.keywords[i]; | 759 | kw = cksk->data.ksk.keywords[i]; |
781 | GNUNET_FS_uri_ksk_remove_keyword (epos->keywords, | 760 | GNUNET_FS_uri_ksk_remove_keyword (epos->keywords, &kw[1]); |
782 | &kw[1]); | 761 | } |
783 | } | ||
784 | } | 762 | } |
785 | ret = | 763 | ret = |
786 | GNUNET_FS_file_information_create_empty_directory (h, client_info, cksk, | 764 | GNUNET_FS_file_information_create_empty_directory (h, client_info, cksk, |