diff options
author | Christian Grothoff <christian@grothoff.org> | 2011-12-25 21:16:11 +0000 |
---|---|---|
committer | Christian Grothoff <christian@grothoff.org> | 2011-12-25 21:16:11 +0000 |
commit | cdee03748e83189713b32bb87bc77cde659c20d9 (patch) | |
tree | ddafc550fe8b6038106ba81140cf6e94e52f6858 /src | |
parent | fd20a7d04e49f9317de85b7f40f79d447d6e1715 (diff) | |
download | gnunet-cdee03748e83189713b32bb87bc77cde659c20d9.tar.gz gnunet-cdee03748e83189713b32bb87bc77cde659c20d9.zip |
-remove code for keyword caonicalization/normalization, makes no sense in international application, normalization methods are questionable to begin with
Diffstat (limited to 'src')
-rw-r--r-- | src/fs/fs_file_information.c | 15 | ||||
-rw-r--r-- | src/fs/fs_uri.c | 123 | ||||
-rw-r--r-- | src/include/gnunet_fs_service.h | 24 |
3 files changed, 11 insertions, 151 deletions
diff --git a/src/fs/fs_file_information.c b/src/fs/fs_file_information.c index 0925d57ec..4ea264892 100644 --- a/src/fs/fs_file_information.c +++ b/src/fs/fs_file_information.c | |||
@@ -341,7 +341,6 @@ dir_scan_cb (void *cls, const char *filename) | |||
341 | struct DirScanCls *dsc = cls; | 341 | struct DirScanCls *dsc = cls; |
342 | struct stat sbuf; | 342 | struct stat sbuf; |
343 | struct GNUNET_FS_FileInformation *fi; | 343 | struct GNUNET_FS_FileInformation *fi; |
344 | struct GNUNET_FS_Uri *ksk_uri; | ||
345 | struct GNUNET_FS_Uri *keywords; | 344 | struct GNUNET_FS_Uri *keywords; |
346 | struct GNUNET_CONTAINER_MetaData *meta; | 345 | struct GNUNET_CONTAINER_MetaData *meta; |
347 | 346 | ||
@@ -370,13 +369,11 @@ dir_scan_cb (void *cls, const char *filename) | |||
370 | meta = GNUNET_CONTAINER_meta_data_create (); | 369 | meta = GNUNET_CONTAINER_meta_data_create (); |
371 | GNUNET_FS_meta_data_extract_from_file (meta, filename, dsc->extractors); | 370 | GNUNET_FS_meta_data_extract_from_file (meta, filename, dsc->extractors); |
372 | keywords = GNUNET_FS_uri_ksk_create_from_meta_data (meta); | 371 | keywords = GNUNET_FS_uri_ksk_create_from_meta_data (meta); |
373 | ksk_uri = GNUNET_FS_uri_ksk_canonicalize (keywords); | ||
374 | fi = GNUNET_FS_file_information_create_from_file (dsc->h, NULL, filename, | 372 | fi = GNUNET_FS_file_information_create_from_file (dsc->h, NULL, filename, |
375 | ksk_uri, meta, | 373 | keywords, meta, |
376 | dsc->do_index, dsc->bo); | 374 | dsc->do_index, dsc->bo); |
377 | GNUNET_CONTAINER_meta_data_destroy (meta); | 375 | GNUNET_CONTAINER_meta_data_destroy (meta); |
378 | GNUNET_FS_uri_destroy (keywords); | 376 | GNUNET_FS_uri_destroy (keywords); |
379 | GNUNET_FS_uri_destroy (ksk_uri); | ||
380 | } | 377 | } |
381 | dsc->proc (dsc->proc_cls, filename, fi); | 378 | dsc->proc (dsc->proc_cls, filename, fi); |
382 | return GNUNET_OK; | 379 | return GNUNET_OK; |
@@ -723,7 +720,6 @@ GNUNET_FS_file_information_create_from_directory (struct GNUNET_FS_Handle *h, | |||
723 | struct EntryProcCls dc; | 720 | struct EntryProcCls dc; |
724 | const char *fn; | 721 | const char *fn; |
725 | const char *ss; | 722 | const char *ss; |
726 | struct GNUNET_FS_Uri *cksk; | ||
727 | char *dn; | 723 | char *dn; |
728 | struct GNUNET_FS_FileInformation *epos; | 724 | struct GNUNET_FS_FileInformation *epos; |
729 | unsigned int i; | 725 | unsigned int i; |
@@ -747,21 +743,20 @@ GNUNET_FS_file_information_create_from_directory (struct GNUNET_FS_Handle *h, | |||
747 | &compute_directory_keywords, &cdmc); | 743 | &compute_directory_keywords, &cdmc); |
748 | GNUNET_CONTAINER_multihashmap_destroy (dc.metamap); | 744 | GNUNET_CONTAINER_multihashmap_destroy (dc.metamap); |
749 | GNUNET_CONTAINER_multihashmap_destroy (dc.keywordmap); | 745 | GNUNET_CONTAINER_multihashmap_destroy (dc.keywordmap); |
750 | GNUNET_FS_uri_ksk_add_keyword (cdmc.ksk, GNUNET_FS_DIRECTORY_MIME, GNUNET_NO); | ||
751 | cksk = GNUNET_FS_uri_ksk_canonicalize (cdmc.ksk); | ||
752 | 746 | ||
753 | /* remove keywords in children that are already in the | 747 | /* remove keywords in children that are already in the |
754 | * parent */ | 748 | * parent */ |
755 | for (epos = dc.entries; NULL != epos; epos = epos->next) | 749 | for (epos = dc.entries; NULL != epos; epos = epos->next) |
756 | { | 750 | { |
757 | for (i = 0; i < cksk->data.ksk.keywordCount; i++) | 751 | for (i = 0; i < cdmc.ksk->data.ksk.keywordCount; i++) |
758 | { | 752 | { |
759 | kw = cksk->data.ksk.keywords[i]; | 753 | kw = cdmc.ksk->data.ksk.keywords[i]; |
760 | GNUNET_FS_uri_ksk_remove_keyword (epos->keywords, &kw[1]); | 754 | GNUNET_FS_uri_ksk_remove_keyword (epos->keywords, &kw[1]); |
761 | } | 755 | } |
762 | } | 756 | } |
757 | GNUNET_FS_uri_ksk_add_keyword (cdmc.ksk, GNUNET_FS_DIRECTORY_MIME, GNUNET_NO); | ||
763 | ret = | 758 | ret = |
764 | GNUNET_FS_file_information_create_empty_directory (h, client_info, cksk, | 759 | GNUNET_FS_file_information_create_empty_directory (h, client_info, cdmc.ksk, |
765 | cdmc.meta, bo); | 760 | cdmc.meta, bo); |
766 | GNUNET_CONTAINER_meta_data_destroy (cdmc.meta); | 761 | GNUNET_CONTAINER_meta_data_destroy (cdmc.meta); |
767 | GNUNET_FS_uri_destroy (cdmc.ksk); | 762 | GNUNET_FS_uri_destroy (cdmc.ksk); |
diff --git a/src/fs/fs_uri.c b/src/fs/fs_uri.c index 55503b71b..d3fcdd8ca 100644 --- a/src/fs/fs_uri.c +++ b/src/fs/fs_uri.c | |||
@@ -970,129 +970,6 @@ GNUNET_FS_uri_sks_create_from_nsid (GNUNET_HashCode * nsid, const char *id) | |||
970 | 970 | ||
971 | 971 | ||
972 | /** | 972 | /** |
973 | * Canonicalize a keyword. | ||
974 | * | ||
975 | * @param in input string (the keyword) | ||
976 | * @return canonicalized keyword | ||
977 | */ | ||
978 | static char * | ||
979 | canonicalize_keyword (const char *in) | ||
980 | { | ||
981 | char *ret; | ||
982 | char *wpos; | ||
983 | const char *rpos; | ||
984 | |||
985 | ret = GNUNET_strdup (in); | ||
986 | wpos = ret; | ||
987 | rpos = in; | ||
988 | while ('\0' != *rpos) | ||
989 | { | ||
990 | switch (tolower ((unsigned char) *rpos)) | ||
991 | { | ||
992 | case 'a': | ||
993 | case 'e': | ||
994 | case 'i': | ||
995 | case 'o': | ||
996 | case 'u': | ||
997 | case ' ': | ||
998 | case '\t': | ||
999 | case '\n': | ||
1000 | case '\r': | ||
1001 | /* skip characters listed above */ | ||
1002 | break; | ||
1003 | case 'b': | ||
1004 | case 'c': | ||
1005 | case 'd': | ||
1006 | case 'f': | ||
1007 | case 'g': | ||
1008 | case 'h': | ||
1009 | case 'j': | ||
1010 | case 'k': | ||
1011 | case 'l': | ||
1012 | case 'm': | ||
1013 | case 'n': | ||
1014 | case 'p': | ||
1015 | case 'r': | ||
1016 | case 's': | ||
1017 | case 't': | ||
1018 | case 'v': | ||
1019 | case 'w': | ||
1020 | case 'x': | ||
1021 | case 'y': | ||
1022 | case 'z': | ||
1023 | /* convert characters listed above to lower case */ | ||
1024 | *wpos = tolower ((unsigned char) *rpos); | ||
1025 | wpos++; | ||
1026 | break; | ||
1027 | case '!': | ||
1028 | case '.': | ||
1029 | case '?': | ||
1030 | case '-': | ||
1031 | /* keep characters listed above without changes */ | ||
1032 | *wpos = *rpos; | ||
1033 | wpos++; | ||
1034 | break; | ||
1035 | default: | ||
1036 | if (isspace ((unsigned char) *rpos) || isdigit ((unsigned char) *rpos)) | ||
1037 | break; | ||
1038 | /* replace characters listed above with '_' */ | ||
1039 | *wpos = '_'; | ||
1040 | wpos++; | ||
1041 | break; | ||
1042 | } | ||
1043 | rpos++; | ||
1044 | } | ||
1045 | *wpos = '\0'; | ||
1046 | return ret; | ||
1047 | } | ||
1048 | |||
1049 | |||
1050 | /** | ||
1051 | * Canonicalize keyword URI. Performs operations such | ||
1052 | * as decapitalization and removal of certain characters. | ||
1053 | * (useful for search). | ||
1054 | * | ||
1055 | * @param uri the URI to canonicalize | ||
1056 | * @return canonicalized version of the URI, NULL on error | ||
1057 | */ | ||
1058 | struct GNUNET_FS_Uri * | ||
1059 | GNUNET_FS_uri_ksk_canonicalize (const struct GNUNET_FS_Uri *uri) | ||
1060 | { | ||
1061 | struct GNUNET_FS_Uri *ret; | ||
1062 | unsigned int kc; | ||
1063 | unsigned int i; | ||
1064 | const char *in; | ||
1065 | char *sb; | ||
1066 | char *cc; | ||
1067 | const char *tok; | ||
1068 | |||
1069 | ret = GNUNET_malloc (sizeof (struct GNUNET_FS_Uri)); | ||
1070 | ret->type = ksk; | ||
1071 | kc = uri->data.ksk.keywordCount; | ||
1072 | for (i = 0; i < kc; i++) | ||
1073 | { | ||
1074 | in = uri->data.ksk.keywords[i]; | ||
1075 | GNUNET_FS_uri_ksk_add_keyword (ret, &in[1], | ||
1076 | (in[0] == '+') ? GNUNET_YES : GNUNET_NO); | ||
1077 | sb = GNUNET_strdup (&in[1]); | ||
1078 | #define DELIMS " \\|\"'`/&@-_,.;!?+-*^$#~=[]{}()<>" | ||
1079 | for (tok = strtok (sb, DELIMS); NULL != tok; tok = strtok (NULL, DELIMS)) | ||
1080 | #undef DELIMS | ||
1081 | { | ||
1082 | if (strlen (tok) < 3) | ||
1083 | continue; | ||
1084 | GNUNET_FS_uri_ksk_add_keyword (ret, tok, GNUNET_NO); | ||
1085 | cc = canonicalize_keyword (tok); | ||
1086 | if (strlen (cc) > 2) | ||
1087 | GNUNET_FS_uri_ksk_add_keyword (ret, cc, GNUNET_NO); | ||
1088 | } | ||
1089 | GNUNET_free (sb); | ||
1090 | } | ||
1091 | return ret; | ||
1092 | } | ||
1093 | |||
1094 | |||
1095 | /** | ||
1096 | * Merge the sets of keywords from two KSK URIs. | 973 | * Merge the sets of keywords from two KSK URIs. |
1097 | * (useful for merging the canonicalized keywords with | 974 | * (useful for merging the canonicalized keywords with |
1098 | * the original keywords for sharing). | 975 | * the original keywords for sharing). |
diff --git a/src/include/gnunet_fs_service.h b/src/include/gnunet_fs_service.h index db1d74589..ad4441bd3 100644 --- a/src/include/gnunet_fs_service.h +++ b/src/include/gnunet_fs_service.h | |||
@@ -53,7 +53,7 @@ extern "C" | |||
53 | * 6.1.x: with simplified namespace support | 53 | * 6.1.x: with simplified namespace support |
54 | * 9.0.0: CPS-style integrated API | 54 | * 9.0.0: CPS-style integrated API |
55 | */ | 55 | */ |
56 | #define GNUNET_FS_VERSION 0x00090000 | 56 | #define GNUNET_FS_VERSION 0x00090001 |
57 | 57 | ||
58 | 58 | ||
59 | /* ******************** URI API *********************** */ | 59 | /* ******************** URI API *********************** */ |
@@ -228,21 +228,7 @@ GNUNET_FS_uri_loc_create (const struct GNUNET_FS_Uri *baseUri, | |||
228 | 228 | ||
229 | 229 | ||
230 | /** | 230 | /** |
231 | * Canonicalize keyword URI. Performs operations such | ||
232 | * as decapitalization and removal of certain characters. | ||
233 | * (useful for search). | ||
234 | * | ||
235 | * @param uri the URI to canonicalize | ||
236 | * @return canonicalized version of the URI, NULL on error | ||
237 | */ | ||
238 | struct GNUNET_FS_Uri * | ||
239 | GNUNET_FS_uri_ksk_canonicalize (const struct GNUNET_FS_Uri *uri); | ||
240 | |||
241 | |||
242 | /** | ||
243 | * Merge the sets of keywords from two KSK URIs. | 231 | * Merge the sets of keywords from two KSK URIs. |
244 | * (useful for merging the canonicalized keywords with | ||
245 | * the original keywords for sharing). | ||
246 | * | 232 | * |
247 | * @param u1 first uri | 233 | * @param u1 first uri |
248 | * @param u2 second uri | 234 | * @param u2 second uri |
@@ -1898,9 +1884,11 @@ typedef int (*GNUNET_FS_DirectoryScanner) (void *cls, | |||
1898 | * files (those starting with a "."). Metadata will be extracted | 1884 | * files (those starting with a "."). Metadata will be extracted |
1899 | * using GNU libextractor; the specific list of plugins should be | 1885 | * using GNU libextractor; the specific list of plugins should be |
1900 | * specified in "cls", passing NULL will disable (!) metadata | 1886 | * specified in "cls", passing NULL will disable (!) metadata |
1901 | * extraction. Keywords will be derived from the metadata and be | 1887 | * extraction. Keywords will be derived from the metadata and |
1902 | * subject to default canonicalization. This is strictly a | 1888 | * associated with directories as appropriate. This is strictly a |
1903 | * convenience function. | 1889 | * convenience function (however, if all tools use it, there will |
1890 | * be less of a chance of distinguishing users by the specific | ||
1891 | * user-interface they were using). | ||
1904 | * | 1892 | * |
1905 | * @param cls must be of type "struct EXTRACTOR_Extractor*" | 1893 | * @param cls must be of type "struct EXTRACTOR_Extractor*" |
1906 | * @param h handle to the file sharing subsystem | 1894 | * @param h handle to the file sharing subsystem |