diff options
author | Christian Grothoff <christian@grothoff.org> | 2009-08-30 21:07:10 +0000 |
---|---|---|
committer | Christian Grothoff <christian@grothoff.org> | 2009-08-30 21:07:10 +0000 |
commit | c3d7c40c3cd0ec03c7f6b27e6b5f7eac1aa80ed5 (patch) | |
tree | ace615d5745cbd037c6534abdc3b5f94a2bba7d6 /src/fs | |
parent | a095a849fcd95efeb57db80b4346e4f2eedf9899 (diff) | |
download | gnunet-c3d7c40c3cd0ec03c7f6b27e6b5f7eac1aa80ed5.tar.gz gnunet-c3d7c40c3cd0ec03c7f6b27e6b5f7eac1aa80ed5.zip |
adding indexing support
Diffstat (limited to 'src/fs')
-rw-r--r-- | src/fs/fs.h | 94 | ||||
-rw-r--r-- | src/fs/fs_publish.c | 230 |
2 files changed, 304 insertions, 20 deletions
diff --git a/src/fs/fs.h b/src/fs/fs.h index 288903418..e4eee7fd0 100644 --- a/src/fs/fs.h +++ b/src/fs/fs.h | |||
@@ -289,6 +289,21 @@ struct GNUNET_FS_FileInformation | |||
289 | void *reader_cls; | 289 | void *reader_cls; |
290 | 290 | ||
291 | /** | 291 | /** |
292 | * Name of the file (must be an absolute path). | ||
293 | * Only required for indexing. FIXME: not yet | ||
294 | * initialized! | ||
295 | */ | ||
296 | char *filename; | ||
297 | |||
298 | /** | ||
299 | * If this file is being indexed, this value | ||
300 | * is set to the hash over the entire file | ||
301 | * (when the indexing process is started). | ||
302 | * Otherwise this field is not used. | ||
303 | */ | ||
304 | GNUNET_HashCode file_id; | ||
305 | |||
306 | /** | ||
292 | * Size of the file (in bytes). | 307 | * Size of the file (in bytes). |
293 | */ | 308 | */ |
294 | uint64_t file_size; | 309 | uint64_t file_size; |
@@ -430,6 +445,13 @@ struct GNUNET_FS_PublishContext | |||
430 | GNUNET_SCHEDULER_TaskIdentifier upload_task; | 445 | GNUNET_SCHEDULER_TaskIdentifier upload_task; |
431 | 446 | ||
432 | /** | 447 | /** |
448 | * Our own client handle for the FS service; | ||
449 | * only briefly used when we start to index a | ||
450 | * file, otherwise NULL. | ||
451 | */ | ||
452 | struct GNUNET_CLIENT_Connection *client; | ||
453 | |||
454 | /** | ||
433 | * Typically GNUNET_NO. Set to GNUNET_YES if | 455 | * Typically GNUNET_NO. Set to GNUNET_YES if |
434 | * "upload_task" is GNUNET_SCHEDULER_NO_TASK | 456 | * "upload_task" is GNUNET_SCHEDULER_NO_TASK |
435 | * and we're waiting for a response from the | 457 | * and we're waiting for a response from the |
@@ -507,6 +529,29 @@ struct GNUNET_FS_Namespace | |||
507 | 529 | ||
508 | 530 | ||
509 | /** | 531 | /** |
532 | * @brief index block (indexing a DBlock that | ||
533 | * can be obtained directly from reading | ||
534 | * the plaintext file) | ||
535 | */ | ||
536 | struct OnDemandBlock | ||
537 | { | ||
538 | /** | ||
539 | * Hash code of the entire content of the | ||
540 | * file that was indexed (used to uniquely | ||
541 | * identify the plaintext file). | ||
542 | */ | ||
543 | GNUNET_HashCode file_id; | ||
544 | |||
545 | /** | ||
546 | * At which offset should we be able to find | ||
547 | * this on-demand encoded block? | ||
548 | */ | ||
549 | uint64_t offset; | ||
550 | |||
551 | }; | ||
552 | |||
553 | |||
554 | /** | ||
510 | * @brief keyword block (advertising data under a keyword) | 555 | * @brief keyword block (advertising data under a keyword) |
511 | */ | 556 | */ |
512 | struct KBlock | 557 | struct KBlock |
@@ -571,9 +616,58 @@ struct SBlock | |||
571 | }; | 616 | }; |
572 | 617 | ||
573 | 618 | ||
619 | /** | ||
620 | * Message sent from a GNUnet (fs) publishing | ||
621 | * activity to the gnunet-fs-service to | ||
622 | * initiate indexing of a file. The service | ||
623 | * is supposed to check if the specified file | ||
624 | * is available and has the same cryptographic | ||
625 | * hash. It should then respond with either | ||
626 | * a confirmation or a denial. | ||
627 | * | ||
628 | * On OSes where this works, it is considered | ||
629 | * acceptable if the service only checks that | ||
630 | * the path, device and inode match (it can | ||
631 | * then be assumed that the hash will also match | ||
632 | * without actually computing it; this is an | ||
633 | * optimization that should be safe given that | ||
634 | * the client is not our adversary). | ||
635 | */ | ||
574 | struct IndexStartMessage | 636 | struct IndexStartMessage |
575 | { | 637 | { |
576 | 638 | ||
639 | /** | ||
640 | * Message type will be | ||
641 | * GNUNET_MESSAGE_TYPE_FS_INDEX_START. | ||
642 | */ | ||
643 | struct GNUNET_MessageHeader header; | ||
644 | |||
645 | /** | ||
646 | * ID of device containing the file, as seen by the client. This | ||
647 | * device ID is obtained using a call like "statvfs" (and converting | ||
648 | * the "f_fsid" field to a 32-bit big-endian number). Use 0 if the | ||
649 | * OS does not support this, in which case the service must do a | ||
650 | * full hash recomputation. | ||
651 | */ | ||
652 | uint32_t device; | ||
653 | |||
654 | /** | ||
655 | * Inode of the file on the given device, as seen by the client | ||
656 | * ("st_ino" field from "struct stat"). Use 0 if the OS does not | ||
657 | * support this, in which case the service must do a full hash | ||
658 | * recomputation. | ||
659 | */ | ||
660 | uint64_t inode; | ||
661 | |||
662 | /** | ||
663 | * Hash of the file that we would like to index. | ||
664 | */ | ||
665 | GNUNET_HashCode file_id; | ||
666 | |||
667 | /* this is followed by a 0-terminated | ||
668 | filename of a file with the hash | ||
669 | "file_id" as seen by the client */ | ||
670 | |||
577 | }; | 671 | }; |
578 | 672 | ||
579 | 673 | ||
diff --git a/src/fs/fs_publish.c b/src/fs/fs_publish.c index 91ca3240a..13ce4d5aa 100644 --- a/src/fs/fs_publish.c +++ b/src/fs/fs_publish.c | |||
@@ -26,7 +26,7 @@ | |||
26 | * @author Christian Grothoff | 26 | * @author Christian Grothoff |
27 | * | 27 | * |
28 | * TODO: | 28 | * TODO: |
29 | * - indexing support | 29 | * - indexing cleanup: unindex on failure (can wait) |
30 | * - code-sharing with unindex (can wait) | 30 | * - code-sharing with unindex (can wait) |
31 | * - persistence support (can wait) | 31 | * - persistence support (can wait) |
32 | * - datastore reservation support (optimization) | 32 | * - datastore reservation support (optimization) |
@@ -52,6 +52,14 @@ | |||
52 | */ | 52 | */ |
53 | #define MAX_SBLOCK_SIZE 60000 | 53 | #define MAX_SBLOCK_SIZE 60000 |
54 | 54 | ||
55 | /** | ||
56 | * Blocksize to use when hashing files | ||
57 | * for indexing (blocksize for IO, not for | ||
58 | * the DBlocks). Larger blocksizes can | ||
59 | * be more efficient but will be more disruptive | ||
60 | * as far as the scheduler is concerned. | ||
61 | */ | ||
62 | #define HASHING_BLOCKSIZE (1024 * 1024) | ||
55 | 63 | ||
56 | /** | 64 | /** |
57 | * Main function that performs the upload. | 65 | * Main function that performs the upload. |
@@ -471,6 +479,7 @@ publish_content (struct GNUNET_FS_PublishContext *sc, | |||
471 | void *raw_data; | 479 | void *raw_data; |
472 | char *dd; | 480 | char *dd; |
473 | struct PutContCtx * dpc_cls; | 481 | struct PutContCtx * dpc_cls; |
482 | struct OnDemandBlock odb; | ||
474 | 483 | ||
475 | // FIXME: figure out how to share this code | 484 | // FIXME: figure out how to share this code |
476 | // with unindex! | 485 | // with unindex! |
@@ -593,8 +602,6 @@ publish_content (struct GNUNET_FS_PublishContext *sc, | |||
593 | enc); | 602 | enc); |
594 | // NOTE: this block below is all that really differs | 603 | // NOTE: this block below is all that really differs |
595 | // between publish/unindex! Parameterize & move this code! | 604 | // between publish/unindex! Parameterize & move this code! |
596 | // FIXME: something around here would need to change | ||
597 | // for indexing! | ||
598 | if (NULL == sc->dsh) | 605 | if (NULL == sc->dsh) |
599 | { | 606 | { |
600 | sc->upload_task | 607 | sc->upload_task |
@@ -614,20 +621,42 @@ publish_content (struct GNUNET_FS_PublishContext *sc, | |||
614 | dpc_cls->cont = &do_upload; | 621 | dpc_cls->cont = &do_upload; |
615 | dpc_cls->cont_cls = sc; | 622 | dpc_cls->cont_cls = sc; |
616 | dpc_cls->p = p; | 623 | dpc_cls->p = p; |
617 | GNUNET_DATASTORE_put (sc->dsh, | 624 | if ( (p->is_directory) && |
618 | sc->rid, | 625 | (p->data.file.do_index) && |
619 | &mychk->query, | 626 | (p->current_depth == p->chk_tree_depth) ) |
620 | pt_size, | 627 | { |
621 | enc, | 628 | odb.offset = p->publish_offset; |
622 | (p->current_depth == p->chk_tree_depth) | 629 | odb.file_id = p->data.file.file_id; |
623 | ? GNUNET_DATASTORE_BLOCKTYPE_DBLOCK | 630 | GNUNET_DATASTORE_put (sc->dsh, |
624 | : GNUNET_DATASTORE_BLOCKTYPE_IBLOCK, | 631 | sc->rid, |
625 | p->priority, | 632 | &mychk->query, |
626 | p->anonymity, | 633 | sizeof(struct OnDemandBlock), |
627 | p->expirationTime, | 634 | &odb, |
628 | GNUNET_CONSTANTS_SERVICE_TIMEOUT, | 635 | GNUNET_DATASTORE_BLOCKTYPE_ONDEMAND, |
629 | &ds_put_cont, | 636 | p->priority, |
630 | dpc_cls); | 637 | p->anonymity, |
638 | p->expirationTime, | ||
639 | GNUNET_CONSTANTS_SERVICE_TIMEOUT, | ||
640 | &ds_put_cont, | ||
641 | dpc_cls); | ||
642 | } | ||
643 | else | ||
644 | { | ||
645 | GNUNET_DATASTORE_put (sc->dsh, | ||
646 | sc->rid, | ||
647 | &mychk->query, | ||
648 | pt_size, | ||
649 | enc, | ||
650 | (p->current_depth == p->chk_tree_depth) | ||
651 | ? GNUNET_DATASTORE_BLOCKTYPE_DBLOCK | ||
652 | : GNUNET_DATASTORE_BLOCKTYPE_IBLOCK, | ||
653 | p->priority, | ||
654 | p->anonymity, | ||
655 | p->expirationTime, | ||
656 | GNUNET_CONSTANTS_SERVICE_TIMEOUT, | ||
657 | &ds_put_cont, | ||
658 | dpc_cls); | ||
659 | } | ||
631 | } | 660 | } |
632 | if (p->current_depth == p->chk_tree_depth) | 661 | if (p->current_depth == p->chk_tree_depth) |
633 | { | 662 | { |
@@ -668,6 +697,153 @@ publish_content (struct GNUNET_FS_PublishContext *sc, | |||
668 | } | 697 | } |
669 | 698 | ||
670 | 699 | ||
700 | |||
701 | |||
702 | /** | ||
703 | * Process the response (or lack thereof) from | ||
704 | * the "fs" service to our 'start index' request. | ||
705 | * | ||
706 | * @param cls closure (of type "struct GNUNET_FS_PublishContext*"_) | ||
707 | * @param msg the response we got | ||
708 | */ | ||
709 | static void | ||
710 | process_index_start_response (void *cls, | ||
711 | const struct GNUNET_MessageHeader *msg) | ||
712 | { | ||
713 | struct GNUNET_FS_PublishContext *sc = cls; | ||
714 | struct GNUNET_FS_FileInformation *p; | ||
715 | const char *emsg; | ||
716 | uint16_t msize; | ||
717 | |||
718 | GNUNET_CLIENT_disconnect (sc->client); | ||
719 | sc->client = NULL; | ||
720 | p = sc->fi_pos; | ||
721 | if (msg == NULL) | ||
722 | { | ||
723 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, | ||
724 | _("Can not index file `%s': %s. Will try to insert instead.\n"), | ||
725 | p->data.file.filename, | ||
726 | _("timeout on index-start request to `fs' service")); | ||
727 | p->data.file.do_index = GNUNET_NO; | ||
728 | publish_content (sc, p); | ||
729 | return; | ||
730 | } | ||
731 | if (ntohs (msg->type) != GNUNET_MESSAGE_TYPE_FS_INDEX_START_OK) | ||
732 | { | ||
733 | msize = ntohs (msg->size); | ||
734 | emsg = (const char *) &msg[1]; | ||
735 | if ( (msize <= sizeof (struct GNUNET_MessageHeader)) || | ||
736 | (emsg[msize - sizeof(struct GNUNET_MessageHeader) - 1] != '\0') ) | ||
737 | emsg = gettext_noop ("unknown error"); | ||
738 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, | ||
739 | _("Can not index file `%s': %s. Will try to insert instead.\n"), | ||
740 | p->data.file.filename, | ||
741 | gettext (emsg)); | ||
742 | p->data.file.do_index = GNUNET_NO; | ||
743 | publish_content (sc, p); | ||
744 | return; | ||
745 | } | ||
746 | /* success! continue with indexing */ | ||
747 | publish_content (sc, p); | ||
748 | } | ||
749 | |||
750 | |||
751 | #if LINUX | ||
752 | #include <sys/statvfs.h> | ||
753 | #endif | ||
754 | |||
755 | /** | ||
756 | * Function called once the hash computation over an | ||
757 | * indexed file has completed. | ||
758 | * | ||
759 | * @param cls closure, our publishing context | ||
760 | * @param res resulting hash, NULL on error | ||
761 | */ | ||
762 | static void | ||
763 | hash_for_index_cb (void *cls, | ||
764 | const GNUNET_HashCode * | ||
765 | res) | ||
766 | { | ||
767 | struct GNUNET_FS_PublishContext *sc = cls; | ||
768 | struct GNUNET_FS_FileInformation *p; | ||
769 | struct IndexStartMessage *ism; | ||
770 | size_t slen; | ||
771 | struct GNUNET_CLIENT_Connection *client; | ||
772 | #if LINUX | ||
773 | struct stat sbuf; | ||
774 | struct statvfs fbuf; | ||
775 | #endif | ||
776 | |||
777 | p = sc->fi_pos; | ||
778 | if (NULL == res) | ||
779 | { | ||
780 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, | ||
781 | _("Can not index file `%s': %s. Will try to insert instead.\n"), | ||
782 | p->data.file.filename, | ||
783 | _("failed to compute hash")); | ||
784 | p->data.file.do_index = GNUNET_NO; | ||
785 | publish_content (sc, p); | ||
786 | return; | ||
787 | } | ||
788 | slen = strlen (p->data.file.filename) + 1; | ||
789 | if (slen > GNUNET_SERVER_MAX_MESSAGE_SIZE - sizeof(struct IndexStartMessage)) | ||
790 | { | ||
791 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, | ||
792 | _("Can not index file `%s': %s. Will try to insert instead.\n"), | ||
793 | p->data.file.filename, | ||
794 | _("filename too long")); | ||
795 | p->data.file.do_index = GNUNET_NO; | ||
796 | publish_content (sc, p); | ||
797 | return; | ||
798 | } | ||
799 | client = GNUNET_CLIENT_connect (sc->h->sched, | ||
800 | "fs", | ||
801 | sc->h->cfg); | ||
802 | if (NULL == client) | ||
803 | { | ||
804 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, | ||
805 | _("Can not index file `%s': %s. Will try to insert instead.\n"), | ||
806 | p->data.file.filename, | ||
807 | _("could not connect to `fs' service")); | ||
808 | p->data.file.do_index = GNUNET_NO; | ||
809 | publish_content (sc, p); | ||
810 | return; | ||
811 | } | ||
812 | p->data.file.file_id = *res; | ||
813 | ism = GNUNET_malloc (sizeof(struct IndexStartMessage) + | ||
814 | slen); | ||
815 | ism->header.size = htons(sizeof(struct IndexStartMessage) + | ||
816 | slen); | ||
817 | ism->header.type = htons(GNUNET_MESSAGE_TYPE_FS_INDEX_START); | ||
818 | /* FIXME: activate this on other OSes that | ||
819 | support it (or something very similar; make | ||
820 | sure to also adjust corresponding code | ||
821 | on the service-side) */ | ||
822 | /* FIXME: the block below should probably be | ||
823 | abstracted into a function in the DISK API */ | ||
824 | #if LINUX | ||
825 | if ( (0 == stat(p->data.file.filename, | ||
826 | &sbuf)) && | ||
827 | (0 == statvfs (p->data.file.filename, | ||
828 | &fbuf) ) ) | ||
829 | { | ||
830 | ism->device = htonl ((uint32_t) fbuf.f_fsid); | ||
831 | ism->inode = GNUNET_htonll( (uint64_t) sbuf.st_ino); | ||
832 | } | ||
833 | #endif | ||
834 | memcpy (&ism[1], | ||
835 | p->data.file.filename, | ||
836 | slen); | ||
837 | sc->client = client; | ||
838 | GNUNET_CLIENT_transmit_and_get_response (client, | ||
839 | &ism->header, | ||
840 | GNUNET_TIME_UNIT_FOREVER_REL, | ||
841 | &process_index_start_response, | ||
842 | sc); | ||
843 | GNUNET_free (ism); | ||
844 | } | ||
845 | |||
846 | |||
671 | /** | 847 | /** |
672 | * Main function that performs the upload. | 848 | * Main function that performs the upload. |
673 | * @param cls "struct GNUNET_FS_PublishContext" identifies the upload | 849 | * @param cls "struct GNUNET_FS_PublishContext" identifies the upload |
@@ -744,9 +920,23 @@ do_upload (void *cls, | |||
744 | if ( (!p->is_directory) && | 920 | if ( (!p->is_directory) && |
745 | (p->data.file.do_index) ) | 921 | (p->data.file.do_index) ) |
746 | { | 922 | { |
747 | // FIXME: need to pre-compute hash over | 923 | if (NULL == p->data.file.filename) |
748 | // the entire file and ask FS to prepare | 924 | { |
749 | // for indexing! | 925 | p->data.file.do_index = GNUNET_NO; |
926 | GNUNET_log (GNUNET_ERROR_TYPE_WARNING, | ||
927 | _("Can not index file `%s': %s. Will try to insert instead.\n"), | ||
928 | "<no-name>", | ||
929 | _("needs to be an actual file")); | ||
930 | publish_content (sc, p); | ||
931 | return; | ||
932 | } | ||
933 | GNUNET_CRYPTO_hash_file (sc->h->sched, | ||
934 | GNUNET_SCHEDULER_PRIORITY_IDLE, | ||
935 | GNUNET_NO, | ||
936 | p->data.file.filename, | ||
937 | HASHING_BLOCKSIZE, | ||
938 | &hash_for_index_cb, | ||
939 | sc); | ||
750 | return; | 940 | return; |
751 | } | 941 | } |
752 | publish_content (sc, p); | 942 | publish_content (sc, p); |