aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorChristian Grothoff <christian@grothoff.org>2009-08-30 21:07:10 +0000
committerChristian Grothoff <christian@grothoff.org>2009-08-30 21:07:10 +0000
commitc3d7c40c3cd0ec03c7f6b27e6b5f7eac1aa80ed5 (patch)
treeace615d5745cbd037c6534abdc3b5f94a2bba7d6 /src
parenta095a849fcd95efeb57db80b4346e4f2eedf9899 (diff)
downloadgnunet-c3d7c40c3cd0ec03c7f6b27e6b5f7eac1aa80ed5.tar.gz
gnunet-c3d7c40c3cd0ec03c7f6b27e6b5f7eac1aa80ed5.zip
adding indexing support
Diffstat (limited to 'src')
-rw-r--r--src/fs/fs.h94
-rw-r--r--src/fs/fs_publish.c230
-rw-r--r--src/include/gnunet_datastore_service.h3
-rw-r--r--src/include/gnunet_protocols.h18
4 files changed, 324 insertions, 21 deletions
diff --git a/src/fs/fs.h b/src/fs/fs.h
index 288903418..e4eee7fd0 100644
--- a/src/fs/fs.h
+++ b/src/fs/fs.h
@@ -289,6 +289,21 @@ struct GNUNET_FS_FileInformation
289 void *reader_cls; 289 void *reader_cls;
290 290
291 /** 291 /**
292 * Name of the file (must be an absolute path).
293 * Only required for indexing. FIXME: not yet
294 * initialized!
295 */
296 char *filename;
297
298 /**
299 * If this file is being indexed, this value
300 * is set to the hash over the entire file
301 * (when the indexing process is started).
302 * Otherwise this field is not used.
303 */
304 GNUNET_HashCode file_id;
305
306 /**
292 * Size of the file (in bytes). 307 * Size of the file (in bytes).
293 */ 308 */
294 uint64_t file_size; 309 uint64_t file_size;
@@ -430,6 +445,13 @@ struct GNUNET_FS_PublishContext
430 GNUNET_SCHEDULER_TaskIdentifier upload_task; 445 GNUNET_SCHEDULER_TaskIdentifier upload_task;
431 446
432 /** 447 /**
448 * Our own client handle for the FS service;
449 * only briefly used when we start to index a
450 * file, otherwise NULL.
451 */
452 struct GNUNET_CLIENT_Connection *client;
453
454 /**
433 * Typically GNUNET_NO. Set to GNUNET_YES if 455 * Typically GNUNET_NO. Set to GNUNET_YES if
434 * "upload_task" is GNUNET_SCHEDULER_NO_TASK 456 * "upload_task" is GNUNET_SCHEDULER_NO_TASK
435 * and we're waiting for a response from the 457 * and we're waiting for a response from the
@@ -507,6 +529,29 @@ struct GNUNET_FS_Namespace
507 529
508 530
509/** 531/**
532 * @brief index block (indexing a DBlock that
533 * can be obtained directly from reading
534 * the plaintext file)
535 */
536struct OnDemandBlock
537{
538 /**
539 * Hash code of the entire content of the
540 * file that was indexed (used to uniquely
541 * identify the plaintext file).
542 */
543 GNUNET_HashCode file_id;
544
545 /**
546 * At which offset should we be able to find
547 * this on-demand encoded block?
548 */
549 uint64_t offset;
550
551};
552
553
554/**
510 * @brief keyword block (advertising data under a keyword) 555 * @brief keyword block (advertising data under a keyword)
511 */ 556 */
512struct KBlock 557struct KBlock
@@ -571,9 +616,58 @@ struct SBlock
571}; 616};
572 617
573 618
619/**
620 * Message sent from a GNUnet (fs) publishing
621 * activity to the gnunet-fs-service to
622 * initiate indexing of a file. The service
623 * is supposed to check if the specified file
624 * is available and has the same cryptographic
625 * hash. It should then respond with either
626 * a confirmation or a denial.
627 *
628 * On OSes where this works, it is considered
629 * acceptable if the service only checks that
630 * the path, device and inode match (it can
631 * then be assumed that the hash will also match
632 * without actually computing it; this is an
633 * optimization that should be safe given that
634 * the client is not our adversary).
635 */
574struct IndexStartMessage 636struct IndexStartMessage
575{ 637{
576 638
639 /**
640 * Message type will be
641 * GNUNET_MESSAGE_TYPE_FS_INDEX_START.
642 */
643 struct GNUNET_MessageHeader header;
644
645 /**
646 * ID of device containing the file, as seen by the client. This
647 * device ID is obtained using a call like "statvfs" (and converting
648 * the "f_fsid" field to a 32-bit big-endian number). Use 0 if the
649 * OS does not support this, in which case the service must do a
650 * full hash recomputation.
651 */
652 uint32_t device;
653
654 /**
655 * Inode of the file on the given device, as seen by the client
656 * ("st_ino" field from "struct stat"). Use 0 if the OS does not
657 * support this, in which case the service must do a full hash
658 * recomputation.
659 */
660 uint64_t inode;
661
662 /**
663 * Hash of the file that we would like to index.
664 */
665 GNUNET_HashCode file_id;
666
667 /* this is followed by a 0-terminated
668 filename of a file with the hash
669 "file_id" as seen by the client */
670
577}; 671};
578 672
579 673
diff --git a/src/fs/fs_publish.c b/src/fs/fs_publish.c
index 91ca3240a..13ce4d5aa 100644
--- a/src/fs/fs_publish.c
+++ b/src/fs/fs_publish.c
@@ -26,7 +26,7 @@
26 * @author Christian Grothoff 26 * @author Christian Grothoff
27 * 27 *
28 * TODO: 28 * TODO:
29 * - indexing support 29 * - indexing cleanup: unindex on failure (can wait)
30 * - code-sharing with unindex (can wait) 30 * - code-sharing with unindex (can wait)
31 * - persistence support (can wait) 31 * - persistence support (can wait)
32 * - datastore reservation support (optimization) 32 * - datastore reservation support (optimization)
@@ -52,6 +52,14 @@
52 */ 52 */
53#define MAX_SBLOCK_SIZE 60000 53#define MAX_SBLOCK_SIZE 60000
54 54
55/**
56 * Blocksize to use when hashing files
57 * for indexing (blocksize for IO, not for
58 * the DBlocks). Larger blocksizes can
59 * be more efficient but will be more disruptive
60 * as far as the scheduler is concerned.
61 */
62#define HASHING_BLOCKSIZE (1024 * 1024)
55 63
56/** 64/**
57 * Main function that performs the upload. 65 * Main function that performs the upload.
@@ -471,6 +479,7 @@ publish_content (struct GNUNET_FS_PublishContext *sc,
471 void *raw_data; 479 void *raw_data;
472 char *dd; 480 char *dd;
473 struct PutContCtx * dpc_cls; 481 struct PutContCtx * dpc_cls;
482 struct OnDemandBlock odb;
474 483
475 // FIXME: figure out how to share this code 484 // FIXME: figure out how to share this code
476 // with unindex! 485 // with unindex!
@@ -593,8 +602,6 @@ publish_content (struct GNUNET_FS_PublishContext *sc,
593 enc); 602 enc);
594 // NOTE: this block below is all that really differs 603 // NOTE: this block below is all that really differs
595 // between publish/unindex! Parameterize & move this code! 604 // between publish/unindex! Parameterize & move this code!
596 // FIXME: something around here would need to change
597 // for indexing!
598 if (NULL == sc->dsh) 605 if (NULL == sc->dsh)
599 { 606 {
600 sc->upload_task 607 sc->upload_task
@@ -614,20 +621,42 @@ publish_content (struct GNUNET_FS_PublishContext *sc,
614 dpc_cls->cont = &do_upload; 621 dpc_cls->cont = &do_upload;
615 dpc_cls->cont_cls = sc; 622 dpc_cls->cont_cls = sc;
616 dpc_cls->p = p; 623 dpc_cls->p = p;
617 GNUNET_DATASTORE_put (sc->dsh, 624 if ( (p->is_directory) &&
618 sc->rid, 625 (p->data.file.do_index) &&
619 &mychk->query, 626 (p->current_depth == p->chk_tree_depth) )
620 pt_size, 627 {
621 enc, 628 odb.offset = p->publish_offset;
622 (p->current_depth == p->chk_tree_depth) 629 odb.file_id = p->data.file.file_id;
623 ? GNUNET_DATASTORE_BLOCKTYPE_DBLOCK 630 GNUNET_DATASTORE_put (sc->dsh,
624 : GNUNET_DATASTORE_BLOCKTYPE_IBLOCK, 631 sc->rid,
625 p->priority, 632 &mychk->query,
626 p->anonymity, 633 sizeof(struct OnDemandBlock),
627 p->expirationTime, 634 &odb,
628 GNUNET_CONSTANTS_SERVICE_TIMEOUT, 635 GNUNET_DATASTORE_BLOCKTYPE_ONDEMAND,
629 &ds_put_cont, 636 p->priority,
630 dpc_cls); 637 p->anonymity,
638 p->expirationTime,
639 GNUNET_CONSTANTS_SERVICE_TIMEOUT,
640 &ds_put_cont,
641 dpc_cls);
642 }
643 else
644 {
645 GNUNET_DATASTORE_put (sc->dsh,
646 sc->rid,
647 &mychk->query,
648 pt_size,
649 enc,
650 (p->current_depth == p->chk_tree_depth)
651 ? GNUNET_DATASTORE_BLOCKTYPE_DBLOCK
652 : GNUNET_DATASTORE_BLOCKTYPE_IBLOCK,
653 p->priority,
654 p->anonymity,
655 p->expirationTime,
656 GNUNET_CONSTANTS_SERVICE_TIMEOUT,
657 &ds_put_cont,
658 dpc_cls);
659 }
631 } 660 }
632 if (p->current_depth == p->chk_tree_depth) 661 if (p->current_depth == p->chk_tree_depth)
633 { 662 {
@@ -668,6 +697,153 @@ publish_content (struct GNUNET_FS_PublishContext *sc,
668} 697}
669 698
670 699
700
701
702/**
703 * Process the response (or lack thereof) from
704 * the "fs" service to our 'start index' request.
705 *
706 * @param cls closure (of type "struct GNUNET_FS_PublishContext*"_)
707 * @param msg the response we got
708 */
709static void
710process_index_start_response (void *cls,
711 const struct GNUNET_MessageHeader *msg)
712{
713 struct GNUNET_FS_PublishContext *sc = cls;
714 struct GNUNET_FS_FileInformation *p;
715 const char *emsg;
716 uint16_t msize;
717
718 GNUNET_CLIENT_disconnect (sc->client);
719 sc->client = NULL;
720 p = sc->fi_pos;
721 if (msg == NULL)
722 {
723 GNUNET_log (GNUNET_ERROR_TYPE_WARNING,
724 _("Can not index file `%s': %s. Will try to insert instead.\n"),
725 p->data.file.filename,
726 _("timeout on index-start request to `fs' service"));
727 p->data.file.do_index = GNUNET_NO;
728 publish_content (sc, p);
729 return;
730 }
731 if (ntohs (msg->type) != GNUNET_MESSAGE_TYPE_FS_INDEX_START_OK)
732 {
733 msize = ntohs (msg->size);
734 emsg = (const char *) &msg[1];
735 if ( (msize <= sizeof (struct GNUNET_MessageHeader)) ||
736 (emsg[msize - sizeof(struct GNUNET_MessageHeader) - 1] != '\0') )
737 emsg = gettext_noop ("unknown error");
738 GNUNET_log (GNUNET_ERROR_TYPE_WARNING,
739 _("Can not index file `%s': %s. Will try to insert instead.\n"),
740 p->data.file.filename,
741 gettext (emsg));
742 p->data.file.do_index = GNUNET_NO;
743 publish_content (sc, p);
744 return;
745 }
746 /* success! continue with indexing */
747 publish_content (sc, p);
748}
749
750
751#if LINUX
752#include <sys/statvfs.h>
753#endif
754
755/**
756 * Function called once the hash computation over an
757 * indexed file has completed.
758 *
759 * @param cls closure, our publishing context
760 * @param res resulting hash, NULL on error
761 */
762static void
763hash_for_index_cb (void *cls,
764 const GNUNET_HashCode *
765 res)
766{
767 struct GNUNET_FS_PublishContext *sc = cls;
768 struct GNUNET_FS_FileInformation *p;
769 struct IndexStartMessage *ism;
770 size_t slen;
771 struct GNUNET_CLIENT_Connection *client;
772#if LINUX
773 struct stat sbuf;
774 struct statvfs fbuf;
775#endif
776
777 p = sc->fi_pos;
778 if (NULL == res)
779 {
780 GNUNET_log (GNUNET_ERROR_TYPE_WARNING,
781 _("Can not index file `%s': %s. Will try to insert instead.\n"),
782 p->data.file.filename,
783 _("failed to compute hash"));
784 p->data.file.do_index = GNUNET_NO;
785 publish_content (sc, p);
786 return;
787 }
788 slen = strlen (p->data.file.filename) + 1;
789 if (slen > GNUNET_SERVER_MAX_MESSAGE_SIZE - sizeof(struct IndexStartMessage))
790 {
791 GNUNET_log (GNUNET_ERROR_TYPE_WARNING,
792 _("Can not index file `%s': %s. Will try to insert instead.\n"),
793 p->data.file.filename,
794 _("filename too long"));
795 p->data.file.do_index = GNUNET_NO;
796 publish_content (sc, p);
797 return;
798 }
799 client = GNUNET_CLIENT_connect (sc->h->sched,
800 "fs",
801 sc->h->cfg);
802 if (NULL == client)
803 {
804 GNUNET_log (GNUNET_ERROR_TYPE_WARNING,
805 _("Can not index file `%s': %s. Will try to insert instead.\n"),
806 p->data.file.filename,
807 _("could not connect to `fs' service"));
808 p->data.file.do_index = GNUNET_NO;
809 publish_content (sc, p);
810 return;
811 }
812 p->data.file.file_id = *res;
813 ism = GNUNET_malloc (sizeof(struct IndexStartMessage) +
814 slen);
815 ism->header.size = htons(sizeof(struct IndexStartMessage) +
816 slen);
817 ism->header.type = htons(GNUNET_MESSAGE_TYPE_FS_INDEX_START);
818 /* FIXME: activate this on other OSes that
819 support it (or something very similar; make
820 sure to also adjust corresponding code
821 on the service-side) */
822 /* FIXME: the block below should probably be
823 abstracted into a function in the DISK API */
824#if LINUX
825 if ( (0 == stat(p->data.file.filename,
826 &sbuf)) &&
827 (0 == statvfs (p->data.file.filename,
828 &fbuf) ) )
829 {
830 ism->device = htonl ((uint32_t) fbuf.f_fsid);
831 ism->inode = GNUNET_htonll( (uint64_t) sbuf.st_ino);
832 }
833#endif
834 memcpy (&ism[1],
835 p->data.file.filename,
836 slen);
837 sc->client = client;
838 GNUNET_CLIENT_transmit_and_get_response (client,
839 &ism->header,
840 GNUNET_TIME_UNIT_FOREVER_REL,
841 &process_index_start_response,
842 sc);
843 GNUNET_free (ism);
844}
845
846
671/** 847/**
672 * Main function that performs the upload. 848 * Main function that performs the upload.
673 * @param cls "struct GNUNET_FS_PublishContext" identifies the upload 849 * @param cls "struct GNUNET_FS_PublishContext" identifies the upload
@@ -744,9 +920,23 @@ do_upload (void *cls,
744 if ( (!p->is_directory) && 920 if ( (!p->is_directory) &&
745 (p->data.file.do_index) ) 921 (p->data.file.do_index) )
746 { 922 {
747 // FIXME: need to pre-compute hash over 923 if (NULL == p->data.file.filename)
748 // the entire file and ask FS to prepare 924 {
749 // for indexing! 925 p->data.file.do_index = GNUNET_NO;
926 GNUNET_log (GNUNET_ERROR_TYPE_WARNING,
927 _("Can not index file `%s': %s. Will try to insert instead.\n"),
928 "<no-name>",
929 _("needs to be an actual file"));
930 publish_content (sc, p);
931 return;
932 }
933 GNUNET_CRYPTO_hash_file (sc->h->sched,
934 GNUNET_SCHEDULER_PRIORITY_IDLE,
935 GNUNET_NO,
936 p->data.file.filename,
937 HASHING_BLOCKSIZE,
938 &hash_for_index_cb,
939 sc);
750 return; 940 return;
751 } 941 }
752 publish_content (sc, p); 942 publish_content (sc, p);
diff --git a/src/include/gnunet_datastore_service.h b/src/include/gnunet_datastore_service.h
index 3805dea97..acd9af1de 100644
--- a/src/include/gnunet_datastore_service.h
+++ b/src/include/gnunet_datastore_service.h
@@ -46,7 +46,8 @@ extern "C"
46#define GNUNET_DATASTORE_BLOCKTYPE_IBLOCK 2 46#define GNUNET_DATASTORE_BLOCKTYPE_IBLOCK 2
47#define GNUNET_DATASTORE_BLOCKTYPE_KBLOCK 3 47#define GNUNET_DATASTORE_BLOCKTYPE_KBLOCK 3
48#define GNUNET_DATASTORE_BLOCKTYPE_SBLOCK 4 48#define GNUNET_DATASTORE_BLOCKTYPE_SBLOCK 4
49#define GNUNET_DATASTORE_BLOCKTYPE_SKBLOCK 5 49#define GNUNET_DATASTORE_BLOCKTYPE_ONDEMAND 5
50#define GNUNET_DATASTORE_BLOCKTYPE_SKBLOCK 6 /* not yet used */
50 51
51/** 52/**
52 * Handle to the datastore service. 53 * Handle to the datastore service.
diff --git a/src/include/gnunet_protocols.h b/src/include/gnunet_protocols.h
index 419bbe28d..686205c31 100644
--- a/src/include/gnunet_protocols.h
+++ b/src/include/gnunet_protocols.h
@@ -367,6 +367,24 @@ extern "C"
367 */ 367 */
368#define GNUNET_MESSAGE_TYPE_DATASTORE_DROP 102 368#define GNUNET_MESSAGE_TYPE_DATASTORE_DROP 102
369 369
370
371/**
372 * Message sent by fs client to start indexing.
373 */
374#define GNUNET_MESSAGE_TYPE_FS_INDEX_START 128
375
376/**
377 * Affirmative response to a request for start indexing.
378 */
379#define GNUNET_MESSAGE_TYPE_FS_INDEX_START_OK 129
380
381
382/**
383 * Response to a request for start indexing that
384 * refuses.
385 */
386#define GNUNET_MESSAGE_TYPE_FS_INDEX_START_FAILED 130
387
370/* 388/*
371 TODO: 389 TODO:
372 - DV 390 - DV