aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorChristian Grothoff <christian@grothoff.org>2011-12-25 21:16:11 +0000
committerChristian Grothoff <christian@grothoff.org>2011-12-25 21:16:11 +0000
commitcdee03748e83189713b32bb87bc77cde659c20d9 (patch)
treeddafc550fe8b6038106ba81140cf6e94e52f6858 /src
parentfd20a7d04e49f9317de85b7f40f79d447d6e1715 (diff)
downloadgnunet-cdee03748e83189713b32bb87bc77cde659c20d9.tar.gz
gnunet-cdee03748e83189713b32bb87bc77cde659c20d9.zip
-remove code for keyword caonicalization/normalization, makes no sense in international application, normalization methods are questionable to begin with
Diffstat (limited to 'src')
-rw-r--r--src/fs/fs_file_information.c15
-rw-r--r--src/fs/fs_uri.c123
-rw-r--r--src/include/gnunet_fs_service.h24
3 files changed, 11 insertions, 151 deletions
diff --git a/src/fs/fs_file_information.c b/src/fs/fs_file_information.c
index 0925d57ec..4ea264892 100644
--- a/src/fs/fs_file_information.c
+++ b/src/fs/fs_file_information.c
@@ -341,7 +341,6 @@ dir_scan_cb (void *cls, const char *filename)
341 struct DirScanCls *dsc = cls; 341 struct DirScanCls *dsc = cls;
342 struct stat sbuf; 342 struct stat sbuf;
343 struct GNUNET_FS_FileInformation *fi; 343 struct GNUNET_FS_FileInformation *fi;
344 struct GNUNET_FS_Uri *ksk_uri;
345 struct GNUNET_FS_Uri *keywords; 344 struct GNUNET_FS_Uri *keywords;
346 struct GNUNET_CONTAINER_MetaData *meta; 345 struct GNUNET_CONTAINER_MetaData *meta;
347 346
@@ -370,13 +369,11 @@ dir_scan_cb (void *cls, const char *filename)
370 meta = GNUNET_CONTAINER_meta_data_create (); 369 meta = GNUNET_CONTAINER_meta_data_create ();
371 GNUNET_FS_meta_data_extract_from_file (meta, filename, dsc->extractors); 370 GNUNET_FS_meta_data_extract_from_file (meta, filename, dsc->extractors);
372 keywords = GNUNET_FS_uri_ksk_create_from_meta_data (meta); 371 keywords = GNUNET_FS_uri_ksk_create_from_meta_data (meta);
373 ksk_uri = GNUNET_FS_uri_ksk_canonicalize (keywords);
374 fi = GNUNET_FS_file_information_create_from_file (dsc->h, NULL, filename, 372 fi = GNUNET_FS_file_information_create_from_file (dsc->h, NULL, filename,
375 ksk_uri, meta, 373 keywords, meta,
376 dsc->do_index, dsc->bo); 374 dsc->do_index, dsc->bo);
377 GNUNET_CONTAINER_meta_data_destroy (meta); 375 GNUNET_CONTAINER_meta_data_destroy (meta);
378 GNUNET_FS_uri_destroy (keywords); 376 GNUNET_FS_uri_destroy (keywords);
379 GNUNET_FS_uri_destroy (ksk_uri);
380 } 377 }
381 dsc->proc (dsc->proc_cls, filename, fi); 378 dsc->proc (dsc->proc_cls, filename, fi);
382 return GNUNET_OK; 379 return GNUNET_OK;
@@ -723,7 +720,6 @@ GNUNET_FS_file_information_create_from_directory (struct GNUNET_FS_Handle *h,
723 struct EntryProcCls dc; 720 struct EntryProcCls dc;
724 const char *fn; 721 const char *fn;
725 const char *ss; 722 const char *ss;
726 struct GNUNET_FS_Uri *cksk;
727 char *dn; 723 char *dn;
728 struct GNUNET_FS_FileInformation *epos; 724 struct GNUNET_FS_FileInformation *epos;
729 unsigned int i; 725 unsigned int i;
@@ -747,21 +743,20 @@ GNUNET_FS_file_information_create_from_directory (struct GNUNET_FS_Handle *h,
747 &compute_directory_keywords, &cdmc); 743 &compute_directory_keywords, &cdmc);
748 GNUNET_CONTAINER_multihashmap_destroy (dc.metamap); 744 GNUNET_CONTAINER_multihashmap_destroy (dc.metamap);
749 GNUNET_CONTAINER_multihashmap_destroy (dc.keywordmap); 745 GNUNET_CONTAINER_multihashmap_destroy (dc.keywordmap);
750 GNUNET_FS_uri_ksk_add_keyword (cdmc.ksk, GNUNET_FS_DIRECTORY_MIME, GNUNET_NO);
751 cksk = GNUNET_FS_uri_ksk_canonicalize (cdmc.ksk);
752 746
753 /* remove keywords in children that are already in the 747 /* remove keywords in children that are already in the
754 * parent */ 748 * parent */
755 for (epos = dc.entries; NULL != epos; epos = epos->next) 749 for (epos = dc.entries; NULL != epos; epos = epos->next)
756 { 750 {
757 for (i = 0; i < cksk->data.ksk.keywordCount; i++) 751 for (i = 0; i < cdmc.ksk->data.ksk.keywordCount; i++)
758 { 752 {
759 kw = cksk->data.ksk.keywords[i]; 753 kw = cdmc.ksk->data.ksk.keywords[i];
760 GNUNET_FS_uri_ksk_remove_keyword (epos->keywords, &kw[1]); 754 GNUNET_FS_uri_ksk_remove_keyword (epos->keywords, &kw[1]);
761 } 755 }
762 } 756 }
757 GNUNET_FS_uri_ksk_add_keyword (cdmc.ksk, GNUNET_FS_DIRECTORY_MIME, GNUNET_NO);
763 ret = 758 ret =
764 GNUNET_FS_file_information_create_empty_directory (h, client_info, cksk, 759 GNUNET_FS_file_information_create_empty_directory (h, client_info, cdmc.ksk,
765 cdmc.meta, bo); 760 cdmc.meta, bo);
766 GNUNET_CONTAINER_meta_data_destroy (cdmc.meta); 761 GNUNET_CONTAINER_meta_data_destroy (cdmc.meta);
767 GNUNET_FS_uri_destroy (cdmc.ksk); 762 GNUNET_FS_uri_destroy (cdmc.ksk);
diff --git a/src/fs/fs_uri.c b/src/fs/fs_uri.c
index 55503b71b..d3fcdd8ca 100644
--- a/src/fs/fs_uri.c
+++ b/src/fs/fs_uri.c
@@ -970,129 +970,6 @@ GNUNET_FS_uri_sks_create_from_nsid (GNUNET_HashCode * nsid, const char *id)
970 970
971 971
972/** 972/**
973 * Canonicalize a keyword.
974 *
975 * @param in input string (the keyword)
976 * @return canonicalized keyword
977 */
978static char *
979canonicalize_keyword (const char *in)
980{
981 char *ret;
982 char *wpos;
983 const char *rpos;
984
985 ret = GNUNET_strdup (in);
986 wpos = ret;
987 rpos = in;
988 while ('\0' != *rpos)
989 {
990 switch (tolower ((unsigned char) *rpos))
991 {
992 case 'a':
993 case 'e':
994 case 'i':
995 case 'o':
996 case 'u':
997 case ' ':
998 case '\t':
999 case '\n':
1000 case '\r':
1001 /* skip characters listed above */
1002 break;
1003 case 'b':
1004 case 'c':
1005 case 'd':
1006 case 'f':
1007 case 'g':
1008 case 'h':
1009 case 'j':
1010 case 'k':
1011 case 'l':
1012 case 'm':
1013 case 'n':
1014 case 'p':
1015 case 'r':
1016 case 's':
1017 case 't':
1018 case 'v':
1019 case 'w':
1020 case 'x':
1021 case 'y':
1022 case 'z':
1023 /* convert characters listed above to lower case */
1024 *wpos = tolower ((unsigned char) *rpos);
1025 wpos++;
1026 break;
1027 case '!':
1028 case '.':
1029 case '?':
1030 case '-':
1031 /* keep characters listed above without changes */
1032 *wpos = *rpos;
1033 wpos++;
1034 break;
1035 default:
1036 if (isspace ((unsigned char) *rpos) || isdigit ((unsigned char) *rpos))
1037 break;
1038 /* replace characters listed above with '_' */
1039 *wpos = '_';
1040 wpos++;
1041 break;
1042 }
1043 rpos++;
1044 }
1045 *wpos = '\0';
1046 return ret;
1047}
1048
1049
1050/**
1051 * Canonicalize keyword URI. Performs operations such
1052 * as decapitalization and removal of certain characters.
1053 * (useful for search).
1054 *
1055 * @param uri the URI to canonicalize
1056 * @return canonicalized version of the URI, NULL on error
1057 */
1058struct GNUNET_FS_Uri *
1059GNUNET_FS_uri_ksk_canonicalize (const struct GNUNET_FS_Uri *uri)
1060{
1061 struct GNUNET_FS_Uri *ret;
1062 unsigned int kc;
1063 unsigned int i;
1064 const char *in;
1065 char *sb;
1066 char *cc;
1067 const char *tok;
1068
1069 ret = GNUNET_malloc (sizeof (struct GNUNET_FS_Uri));
1070 ret->type = ksk;
1071 kc = uri->data.ksk.keywordCount;
1072 for (i = 0; i < kc; i++)
1073 {
1074 in = uri->data.ksk.keywords[i];
1075 GNUNET_FS_uri_ksk_add_keyword (ret, &in[1],
1076 (in[0] == '+') ? GNUNET_YES : GNUNET_NO);
1077 sb = GNUNET_strdup (&in[1]);
1078#define DELIMS " \\|\"'`/&@-_,.;!?+-*^$#~=[]{}()<>"
1079 for (tok = strtok (sb, DELIMS); NULL != tok; tok = strtok (NULL, DELIMS))
1080#undef DELIMS
1081 {
1082 if (strlen (tok) < 3)
1083 continue;
1084 GNUNET_FS_uri_ksk_add_keyword (ret, tok, GNUNET_NO);
1085 cc = canonicalize_keyword (tok);
1086 if (strlen (cc) > 2)
1087 GNUNET_FS_uri_ksk_add_keyword (ret, cc, GNUNET_NO);
1088 }
1089 GNUNET_free (sb);
1090 }
1091 return ret;
1092}
1093
1094
1095/**
1096 * Merge the sets of keywords from two KSK URIs. 973 * Merge the sets of keywords from two KSK URIs.
1097 * (useful for merging the canonicalized keywords with 974 * (useful for merging the canonicalized keywords with
1098 * the original keywords for sharing). 975 * the original keywords for sharing).
diff --git a/src/include/gnunet_fs_service.h b/src/include/gnunet_fs_service.h
index db1d74589..ad4441bd3 100644
--- a/src/include/gnunet_fs_service.h
+++ b/src/include/gnunet_fs_service.h
@@ -53,7 +53,7 @@ extern "C"
53 * 6.1.x: with simplified namespace support 53 * 6.1.x: with simplified namespace support
54 * 9.0.0: CPS-style integrated API 54 * 9.0.0: CPS-style integrated API
55 */ 55 */
56#define GNUNET_FS_VERSION 0x00090000 56#define GNUNET_FS_VERSION 0x00090001
57 57
58 58
59/* ******************** URI API *********************** */ 59/* ******************** URI API *********************** */
@@ -228,21 +228,7 @@ GNUNET_FS_uri_loc_create (const struct GNUNET_FS_Uri *baseUri,
228 228
229 229
230/** 230/**
231 * Canonicalize keyword URI. Performs operations such
232 * as decapitalization and removal of certain characters.
233 * (useful for search).
234 *
235 * @param uri the URI to canonicalize
236 * @return canonicalized version of the URI, NULL on error
237 */
238struct GNUNET_FS_Uri *
239GNUNET_FS_uri_ksk_canonicalize (const struct GNUNET_FS_Uri *uri);
240
241
242/**
243 * Merge the sets of keywords from two KSK URIs. 231 * Merge the sets of keywords from two KSK URIs.
244 * (useful for merging the canonicalized keywords with
245 * the original keywords for sharing).
246 * 232 *
247 * @param u1 first uri 233 * @param u1 first uri
248 * @param u2 second uri 234 * @param u2 second uri
@@ -1898,9 +1884,11 @@ typedef int (*GNUNET_FS_DirectoryScanner) (void *cls,
1898 * files (those starting with a "."). Metadata will be extracted 1884 * files (those starting with a "."). Metadata will be extracted
1899 * using GNU libextractor; the specific list of plugins should be 1885 * using GNU libextractor; the specific list of plugins should be
1900 * specified in "cls", passing NULL will disable (!) metadata 1886 * specified in "cls", passing NULL will disable (!) metadata
1901 * extraction. Keywords will be derived from the metadata and be 1887 * extraction. Keywords will be derived from the metadata and
1902 * subject to default canonicalization. This is strictly a 1888 * associated with directories as appropriate. This is strictly a
1903 * convenience function. 1889 * convenience function (however, if all tools use it, there will
1890 * be less of a chance of distinguishing users by the specific
1891 * user-interface they were using).
1904 * 1892 *
1905 * @param cls must be of type "struct EXTRACTOR_Extractor*" 1893 * @param cls must be of type "struct EXTRACTOR_Extractor*"
1906 * @param h handle to the file sharing subsystem 1894 * @param h handle to the file sharing subsystem