aboutsummaryrefslogtreecommitdiff
path: root/src/fs/fs_file_information.c
diff options
context:
space:
mode:
authorChristian Grothoff <christian@grothoff.org>2011-10-21 12:54:44 +0000
committerChristian Grothoff <christian@grothoff.org>2011-10-21 12:54:44 +0000
commit59d63f89e82ccf724ac7c95f2e589d75a4eb0cc8 (patch)
treeb2de864167c12f9f2f165b9827b06e3a3006efe0 /src/fs/fs_file_information.c
parentfef75e2cf4dbd36a09d7f853f91a7a7f2542d34b (diff)
downloadgnunet-59d63f89e82ccf724ac7c95f2e589d75a4eb0cc8.tar.gz
gnunet-59d63f89e82ccf724ac7c95f2e589d75a4eb0cc8.zip
fix #1784
Diffstat (limited to 'src/fs/fs_file_information.c')
-rw-r--r--src/fs/fs_file_information.c205
1 files changed, 193 insertions, 12 deletions
diff --git a/src/fs/fs_file_information.c b/src/fs/fs_file_information.c
index fadcfea05..a07ebe2cf 100644
--- a/src/fs/fs_file_information.c
+++ b/src/fs/fs_file_information.c
@@ -328,8 +328,9 @@ struct DirScanCls
328 328
329 329
330/** 330/**
331 * Function called on each entry in a file to 331 * Function called on each entry in a file to cause
332 * cause default-publishing. 332 * default-publishing.
333 *
333 * @param cls closure (struct DirScanCls) 334 * @param cls closure (struct DirScanCls)
334 * @param filename name of the file to be published 335 * @param filename name of the file to be published
335 * @return GNUNET_OK on success, GNUNET_SYSERR to abort 336 * @return GNUNET_OK on success, GNUNET_SYSERR to abort
@@ -432,6 +433,93 @@ GNUNET_FS_directory_scanner_default (void *cls, struct GNUNET_FS_Handle *h,
432 433
433 434
434/** 435/**
436 * Aggregate information we keep for meta data in each directory.
437 */
438struct MetaValueInformation
439{
440
441 /**
442 * Mime-type of data.
443 */
444 const char *mime_type;
445
446 /**
447 * The actual meta data.
448 */
449 const char *data;
450
451 /**
452 * Number of bytes in 'data'.
453 */
454 size_t data_size;
455
456 /**
457 * Type of the meta data.
458 */
459 enum EXTRACTOR_MetaType type;
460
461 /**
462 * Format of the meta data.
463 */
464 enum EXTRACTOR_MetaFormat format;
465
466 /**
467 * How often does this meta value occur in this directory?
468 */
469 unsigned int frequency;
470
471};
472
473
474/**
475 * Type of a function that libextractor calls for each
476 * meta data item found.
477 *
478 * @param cls the container multihashmap to update
479 * @param plugin_name name of the plugin that produced this value;
480 * special values can be used (i.e. '<zlib>' for zlib being
481 * used in the main libextractor library and yielding
482 * meta data).
483 * @param type libextractor-type describing the meta data
484 * @param format basic format information about data
485 * @param data_mime_type mime-type of data (not of the original file);
486 * can be NULL (if mime-type is not known)
487 * @param data actual meta-data found
488 * @param data_len number of bytes in data
489 * @return 0 to continue extracting / iterating
490 */
491static int
492update_metamap (void *cls,
493 const char *plugin_name,
494 enum EXTRACTOR_MetaType type,
495 enum EXTRACTOR_MetaFormat format,
496 const char *data_mime_type,
497 const char *data,
498 size_t data_len)
499{
500 struct GNUNET_CONTAINER_MultiHashMap *map = cls;
501 GNUNET_HashCode key;
502 struct MetaValueInformation *mvi;
503
504 GNUNET_CRYPTO_hash (data, data_len, &key);
505 mvi = GNUNET_CONTAINER_multihashmap_get (map, &key);
506 if (mvi == NULL)
507 {
508 mvi = GNUNET_malloc (sizeof (struct MetaValueInformation));
509 mvi->mime_type = data_mime_type;
510 mvi->data = data;
511 mvi->data_size = data_len;
512 mvi->type = type;
513 mvi->format = format;
514 GNUNET_CONTAINER_multihashmap_put (map, &key, mvi,
515 GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_ONLY);
516 }
517 mvi->frequency++;
518 return 0;
519}
520
521
522/**
435 * Closure for dirproc function. 523 * Closure for dirproc function.
436 */ 524 */
437struct EntryProcCls 525struct EntryProcCls
@@ -442,6 +530,18 @@ struct EntryProcCls
442 */ 530 */
443 struct GNUNET_FS_FileInformation *entries; 531 struct GNUNET_FS_FileInformation *entries;
444 532
533 /**
534 * Map describing the meta data for all entries in the
535 * directory. Keys are the hash of the meta-value,
536 * values are of type 'struct MetaValueInformation'.
537 */
538 struct GNUNET_CONTAINER_MultiHashMap *metamap;
539
540 /**
541 * Number of entries in 'entries'.
542 */
543 unsigned int count;
544
445}; 545};
446 546
447 547
@@ -453,7 +553,8 @@ struct EntryProcCls
453 * @param fi information for publishing the file 553 * @param fi information for publishing the file
454 */ 554 */
455static void 555static void
456dirproc (void *cls, const char *filename, struct GNUNET_FS_FileInformation *fi) 556dirproc (void *cls, const char *filename,
557 struct GNUNET_FS_FileInformation *fi)
457{ 558{
458 struct EntryProcCls *dc = cls; 559 struct EntryProcCls *dc = cls;
459 560
@@ -461,6 +562,72 @@ dirproc (void *cls, const char *filename, struct GNUNET_FS_FileInformation *fi)
461 GNUNET_assert (fi->dir == NULL); 562 GNUNET_assert (fi->dir == NULL);
462 fi->next = dc->entries; 563 fi->next = dc->entries;
463 dc->entries = fi; 564 dc->entries = fi;
565 dc->count++;
566 if (NULL != fi->meta)
567 GNUNET_CONTAINER_meta_data_iterate (fi->meta,
568 &update_metamap,
569 dc->metamap);
570}
571
572
573/**
574 * Closure for 'compute_directory_metadata'.
575 */
576struct ComputeDirectoryMetadataContext
577{
578 /**
579 * Where to store the extracted keywords.
580 */
581 struct GNUNET_FS_Uri *ksk;
582
583 /**
584 * Where to store the extracted meta data.
585 */
586 struct GNUNET_CONTAINER_MetaData *meta;
587
588 /**
589 * Threshold to apply for adding meta data.
590 */
591 unsigned int threshold;
592};
593
594
595/**
596 * Add metadata that occurs in more than the threshold entries of the
597 * directory to the directory itself. For example, if most files in a
598 * directory are of the same mime-type, the directory should have that
599 * mime-type as a keyword.
600 *
601 * @param cls the 'struct ComputeDirectoryMetadataContext'
602 * @param key unused
603 * @param value the 'struct MetaValueInformation' (to be freed as well)
604 * @return GNUNET_OK
605 */
606static int
607compute_directory_metadata (void *cls,
608 const GNUNET_HashCode *key,
609 void *value)
610{
611 struct ComputeDirectoryMetadataContext *cdmc = cls;
612 struct MetaValueInformation *mvi = value;
613
614 if (mvi->frequency > cdmc->threshold)
615 {
616 (void) GNUNET_CONTAINER_meta_data_insert (cdmc->meta,
617 "<children>",
618 mvi->type,
619 mvi->format,
620 mvi->mime_type,
621 mvi->data,
622 mvi->data_size);
623 if ( (mvi->format == EXTRACTOR_METAFORMAT_UTF8) ||
624 (mvi->format == EXTRACTOR_METAFORMAT_C_STRING) )
625 GNUNET_FS_uri_ksk_add_keyword (cdmc->ksk,
626 mvi->data,
627 GNUNET_NO);
628 }
629 GNUNET_free (mvi);
630 return GNUNET_OK;
464} 631}
465 632
466 633
@@ -496,23 +663,37 @@ GNUNET_FS_file_information_create_from_directory (struct GNUNET_FS_Handle *h,
496 char **emsg) 663 char **emsg)
497{ 664{
498 struct GNUNET_FS_FileInformation *ret; 665 struct GNUNET_FS_FileInformation *ret;
666 struct ComputeDirectoryMetadataContext cdmc;
499 struct EntryProcCls dc; 667 struct EntryProcCls dc;
500 struct GNUNET_FS_Uri *ksk;
501 struct GNUNET_CONTAINER_MetaData *meta;
502 const char *fn; 668 const char *fn;
503 const char *ss; 669 const char *ss;
670 struct GNUNET_FS_Uri *cksk;
504 char *dn; 671 char *dn;
505 672
506 dc.entries = NULL; 673 dc.entries = NULL;
507 meta = GNUNET_CONTAINER_meta_data_create (); 674 dc.count = 0;
508 GNUNET_FS_meta_data_make_directory (meta); 675 dc.metamap = GNUNET_CONTAINER_multihashmap_create (64);
509 scanner (scanner_cls, h, filename, do_index, bo, &dirproc, &dc, emsg); 676 scanner (scanner_cls, h, filename, do_index, bo, &dirproc, &dc, emsg);
510 ksk = NULL; // FIXME... 677 cdmc.meta = GNUNET_CONTAINER_meta_data_create ();
511 // FIXME: create meta! 678 cdmc.ksk = GNUNET_malloc (sizeof (struct GNUNET_FS_Uri));
679 cdmc.ksk->type = ksk;
680 cdmc.threshold = dc.count / 2; /* 50% threshold for now */
681 GNUNET_FS_meta_data_make_directory (cdmc.meta);
682 /* FIXME: remove meta data above a certain threshold from files
683 to *only* have it for the directory? */
684 GNUNET_CONTAINER_multihashmap_iterate (dc.metamap,
685 &compute_directory_metadata,
686 &cdmc);
687 GNUNET_CONTAINER_multihashmap_destroy (dc.metamap);
688 GNUNET_FS_uri_ksk_add_keyword (cdmc.ksk,
689 GNUNET_FS_DIRECTORY_MIME,
690 GNUNET_NO);
691 cksk = GNUNET_FS_uri_ksk_canonicalize (cdmc.ksk);
512 ret = 692 ret =
513 GNUNET_FS_file_information_create_empty_directory (h, client_info, ksk, 693 GNUNET_FS_file_information_create_empty_directory (h, client_info, cksk,
514 meta, bo); 694 cdmc.meta, bo);
515 GNUNET_CONTAINER_meta_data_destroy (meta); 695 GNUNET_CONTAINER_meta_data_destroy (cdmc.meta);
696 GNUNET_FS_uri_destroy (cdmc.ksk);
516 ret->data.dir.entries = dc.entries; 697 ret->data.dir.entries = dc.entries;
517 while (dc.entries != NULL) 698 while (dc.entries != NULL)
518 { 699 {