diff options
author | Christian Grothoff <christian@grothoff.org> | 2011-10-21 12:54:44 +0000 |
---|---|---|
committer | Christian Grothoff <christian@grothoff.org> | 2011-10-21 12:54:44 +0000 |
commit | 59d63f89e82ccf724ac7c95f2e589d75a4eb0cc8 (patch) | |
tree | b2de864167c12f9f2f165b9827b06e3a3006efe0 /src/fs/fs_file_information.c | |
parent | fef75e2cf4dbd36a09d7f853f91a7a7f2542d34b (diff) | |
download | gnunet-59d63f89e82ccf724ac7c95f2e589d75a4eb0cc8.tar.gz gnunet-59d63f89e82ccf724ac7c95f2e589d75a4eb0cc8.zip |
fix #1784
Diffstat (limited to 'src/fs/fs_file_information.c')
-rw-r--r-- | src/fs/fs_file_information.c | 205 |
1 files changed, 193 insertions, 12 deletions
diff --git a/src/fs/fs_file_information.c b/src/fs/fs_file_information.c index fadcfea05..a07ebe2cf 100644 --- a/src/fs/fs_file_information.c +++ b/src/fs/fs_file_information.c | |||
@@ -328,8 +328,9 @@ struct DirScanCls | |||
328 | 328 | ||
329 | 329 | ||
330 | /** | 330 | /** |
331 | * Function called on each entry in a file to | 331 | * Function called on each entry in a file to cause |
332 | * cause default-publishing. | 332 | * default-publishing. |
333 | * | ||
333 | * @param cls closure (struct DirScanCls) | 334 | * @param cls closure (struct DirScanCls) |
334 | * @param filename name of the file to be published | 335 | * @param filename name of the file to be published |
335 | * @return GNUNET_OK on success, GNUNET_SYSERR to abort | 336 | * @return GNUNET_OK on success, GNUNET_SYSERR to abort |
@@ -432,6 +433,93 @@ GNUNET_FS_directory_scanner_default (void *cls, struct GNUNET_FS_Handle *h, | |||
432 | 433 | ||
433 | 434 | ||
434 | /** | 435 | /** |
436 | * Aggregate information we keep for meta data in each directory. | ||
437 | */ | ||
438 | struct MetaValueInformation | ||
439 | { | ||
440 | |||
441 | /** | ||
442 | * Mime-type of data. | ||
443 | */ | ||
444 | const char *mime_type; | ||
445 | |||
446 | /** | ||
447 | * The actual meta data. | ||
448 | */ | ||
449 | const char *data; | ||
450 | |||
451 | /** | ||
452 | * Number of bytes in 'data'. | ||
453 | */ | ||
454 | size_t data_size; | ||
455 | |||
456 | /** | ||
457 | * Type of the meta data. | ||
458 | */ | ||
459 | enum EXTRACTOR_MetaType type; | ||
460 | |||
461 | /** | ||
462 | * Format of the meta data. | ||
463 | */ | ||
464 | enum EXTRACTOR_MetaFormat format; | ||
465 | |||
466 | /** | ||
467 | * How often does this meta value occur in this directory? | ||
468 | */ | ||
469 | unsigned int frequency; | ||
470 | |||
471 | }; | ||
472 | |||
473 | |||
474 | /** | ||
475 | * Type of a function that libextractor calls for each | ||
476 | * meta data item found. | ||
477 | * | ||
478 | * @param cls the container multihashmap to update | ||
479 | * @param plugin_name name of the plugin that produced this value; | ||
480 | * special values can be used (i.e. '<zlib>' for zlib being | ||
481 | * used in the main libextractor library and yielding | ||
482 | * meta data). | ||
483 | * @param type libextractor-type describing the meta data | ||
484 | * @param format basic format information about data | ||
485 | * @param data_mime_type mime-type of data (not of the original file); | ||
486 | * can be NULL (if mime-type is not known) | ||
487 | * @param data actual meta-data found | ||
488 | * @param data_len number of bytes in data | ||
489 | * @return 0 to continue extracting / iterating | ||
490 | */ | ||
491 | static int | ||
492 | update_metamap (void *cls, | ||
493 | const char *plugin_name, | ||
494 | enum EXTRACTOR_MetaType type, | ||
495 | enum EXTRACTOR_MetaFormat format, | ||
496 | const char *data_mime_type, | ||
497 | const char *data, | ||
498 | size_t data_len) | ||
499 | { | ||
500 | struct GNUNET_CONTAINER_MultiHashMap *map = cls; | ||
501 | GNUNET_HashCode key; | ||
502 | struct MetaValueInformation *mvi; | ||
503 | |||
504 | GNUNET_CRYPTO_hash (data, data_len, &key); | ||
505 | mvi = GNUNET_CONTAINER_multihashmap_get (map, &key); | ||
506 | if (mvi == NULL) | ||
507 | { | ||
508 | mvi = GNUNET_malloc (sizeof (struct MetaValueInformation)); | ||
509 | mvi->mime_type = data_mime_type; | ||
510 | mvi->data = data; | ||
511 | mvi->data_size = data_len; | ||
512 | mvi->type = type; | ||
513 | mvi->format = format; | ||
514 | GNUNET_CONTAINER_multihashmap_put (map, &key, mvi, | ||
515 | GNUNET_CONTAINER_MULTIHASHMAPOPTION_UNIQUE_ONLY); | ||
516 | } | ||
517 | mvi->frequency++; | ||
518 | return 0; | ||
519 | } | ||
520 | |||
521 | |||
522 | /** | ||
435 | * Closure for dirproc function. | 523 | * Closure for dirproc function. |
436 | */ | 524 | */ |
437 | struct EntryProcCls | 525 | struct EntryProcCls |
@@ -442,6 +530,18 @@ struct EntryProcCls | |||
442 | */ | 530 | */ |
443 | struct GNUNET_FS_FileInformation *entries; | 531 | struct GNUNET_FS_FileInformation *entries; |
444 | 532 | ||
533 | /** | ||
534 | * Map describing the meta data for all entries in the | ||
535 | * directory. Keys are the hash of the meta-value, | ||
536 | * values are of type 'struct MetaValueInformation'. | ||
537 | */ | ||
538 | struct GNUNET_CONTAINER_MultiHashMap *metamap; | ||
539 | |||
540 | /** | ||
541 | * Number of entries in 'entries'. | ||
542 | */ | ||
543 | unsigned int count; | ||
544 | |||
445 | }; | 545 | }; |
446 | 546 | ||
447 | 547 | ||
@@ -453,7 +553,8 @@ struct EntryProcCls | |||
453 | * @param fi information for publishing the file | 553 | * @param fi information for publishing the file |
454 | */ | 554 | */ |
455 | static void | 555 | static void |
456 | dirproc (void *cls, const char *filename, struct GNUNET_FS_FileInformation *fi) | 556 | dirproc (void *cls, const char *filename, |
557 | struct GNUNET_FS_FileInformation *fi) | ||
457 | { | 558 | { |
458 | struct EntryProcCls *dc = cls; | 559 | struct EntryProcCls *dc = cls; |
459 | 560 | ||
@@ -461,6 +562,72 @@ dirproc (void *cls, const char *filename, struct GNUNET_FS_FileInformation *fi) | |||
461 | GNUNET_assert (fi->dir == NULL); | 562 | GNUNET_assert (fi->dir == NULL); |
462 | fi->next = dc->entries; | 563 | fi->next = dc->entries; |
463 | dc->entries = fi; | 564 | dc->entries = fi; |
565 | dc->count++; | ||
566 | if (NULL != fi->meta) | ||
567 | GNUNET_CONTAINER_meta_data_iterate (fi->meta, | ||
568 | &update_metamap, | ||
569 | dc->metamap); | ||
570 | } | ||
571 | |||
572 | |||
573 | /** | ||
574 | * Closure for 'compute_directory_metadata'. | ||
575 | */ | ||
576 | struct ComputeDirectoryMetadataContext | ||
577 | { | ||
578 | /** | ||
579 | * Where to store the extracted keywords. | ||
580 | */ | ||
581 | struct GNUNET_FS_Uri *ksk; | ||
582 | |||
583 | /** | ||
584 | * Where to store the extracted meta data. | ||
585 | */ | ||
586 | struct GNUNET_CONTAINER_MetaData *meta; | ||
587 | |||
588 | /** | ||
589 | * Threshold to apply for adding meta data. | ||
590 | */ | ||
591 | unsigned int threshold; | ||
592 | }; | ||
593 | |||
594 | |||
595 | /** | ||
596 | * Add metadata that occurs in more than the threshold entries of the | ||
597 | * directory to the directory itself. For example, if most files in a | ||
598 | * directory are of the same mime-type, the directory should have that | ||
599 | * mime-type as a keyword. | ||
600 | * | ||
601 | * @param cls the 'struct ComputeDirectoryMetadataContext' | ||
602 | * @param key unused | ||
603 | * @param value the 'struct MetaValueInformation' (to be freed as well) | ||
604 | * @return GNUNET_OK | ||
605 | */ | ||
606 | static int | ||
607 | compute_directory_metadata (void *cls, | ||
608 | const GNUNET_HashCode *key, | ||
609 | void *value) | ||
610 | { | ||
611 | struct ComputeDirectoryMetadataContext *cdmc = cls; | ||
612 | struct MetaValueInformation *mvi = value; | ||
613 | |||
614 | if (mvi->frequency > cdmc->threshold) | ||
615 | { | ||
616 | (void) GNUNET_CONTAINER_meta_data_insert (cdmc->meta, | ||
617 | "<children>", | ||
618 | mvi->type, | ||
619 | mvi->format, | ||
620 | mvi->mime_type, | ||
621 | mvi->data, | ||
622 | mvi->data_size); | ||
623 | if ( (mvi->format == EXTRACTOR_METAFORMAT_UTF8) || | ||
624 | (mvi->format == EXTRACTOR_METAFORMAT_C_STRING) ) | ||
625 | GNUNET_FS_uri_ksk_add_keyword (cdmc->ksk, | ||
626 | mvi->data, | ||
627 | GNUNET_NO); | ||
628 | } | ||
629 | GNUNET_free (mvi); | ||
630 | return GNUNET_OK; | ||
464 | } | 631 | } |
465 | 632 | ||
466 | 633 | ||
@@ -496,23 +663,37 @@ GNUNET_FS_file_information_create_from_directory (struct GNUNET_FS_Handle *h, | |||
496 | char **emsg) | 663 | char **emsg) |
497 | { | 664 | { |
498 | struct GNUNET_FS_FileInformation *ret; | 665 | struct GNUNET_FS_FileInformation *ret; |
666 | struct ComputeDirectoryMetadataContext cdmc; | ||
499 | struct EntryProcCls dc; | 667 | struct EntryProcCls dc; |
500 | struct GNUNET_FS_Uri *ksk; | ||
501 | struct GNUNET_CONTAINER_MetaData *meta; | ||
502 | const char *fn; | 668 | const char *fn; |
503 | const char *ss; | 669 | const char *ss; |
670 | struct GNUNET_FS_Uri *cksk; | ||
504 | char *dn; | 671 | char *dn; |
505 | 672 | ||
506 | dc.entries = NULL; | 673 | dc.entries = NULL; |
507 | meta = GNUNET_CONTAINER_meta_data_create (); | 674 | dc.count = 0; |
508 | GNUNET_FS_meta_data_make_directory (meta); | 675 | dc.metamap = GNUNET_CONTAINER_multihashmap_create (64); |
509 | scanner (scanner_cls, h, filename, do_index, bo, &dirproc, &dc, emsg); | 676 | scanner (scanner_cls, h, filename, do_index, bo, &dirproc, &dc, emsg); |
510 | ksk = NULL; // FIXME... | 677 | cdmc.meta = GNUNET_CONTAINER_meta_data_create (); |
511 | // FIXME: create meta! | 678 | cdmc.ksk = GNUNET_malloc (sizeof (struct GNUNET_FS_Uri)); |
679 | cdmc.ksk->type = ksk; | ||
680 | cdmc.threshold = dc.count / 2; /* 50% threshold for now */ | ||
681 | GNUNET_FS_meta_data_make_directory (cdmc.meta); | ||
682 | /* FIXME: remove meta data above a certain threshold from files | ||
683 | to *only* have it for the directory? */ | ||
684 | GNUNET_CONTAINER_multihashmap_iterate (dc.metamap, | ||
685 | &compute_directory_metadata, | ||
686 | &cdmc); | ||
687 | GNUNET_CONTAINER_multihashmap_destroy (dc.metamap); | ||
688 | GNUNET_FS_uri_ksk_add_keyword (cdmc.ksk, | ||
689 | GNUNET_FS_DIRECTORY_MIME, | ||
690 | GNUNET_NO); | ||
691 | cksk = GNUNET_FS_uri_ksk_canonicalize (cdmc.ksk); | ||
512 | ret = | 692 | ret = |
513 | GNUNET_FS_file_information_create_empty_directory (h, client_info, ksk, | 693 | GNUNET_FS_file_information_create_empty_directory (h, client_info, cksk, |
514 | meta, bo); | 694 | cdmc.meta, bo); |
515 | GNUNET_CONTAINER_meta_data_destroy (meta); | 695 | GNUNET_CONTAINER_meta_data_destroy (cdmc.meta); |
696 | GNUNET_FS_uri_destroy (cdmc.ksk); | ||
516 | ret->data.dir.entries = dc.entries; | 697 | ret->data.dir.entries = dc.entries; |
517 | while (dc.entries != NULL) | 698 | while (dc.entries != NULL) |
518 | { | 699 | { |