From cdee03748e83189713b32bb87bc77cde659c20d9 Mon Sep 17 00:00:00 2001 From: Christian Grothoff Date: Sun, 25 Dec 2011 21:16:11 +0000 Subject: -remove code for keyword caonicalization/normalization, makes no sense in international application, normalization methods are questionable to begin with --- src/fs/fs_uri.c | 123 -------------------------------------------------------- 1 file changed, 123 deletions(-) (limited to 'src/fs/fs_uri.c') diff --git a/src/fs/fs_uri.c b/src/fs/fs_uri.c index 55503b71b..d3fcdd8ca 100644 --- a/src/fs/fs_uri.c +++ b/src/fs/fs_uri.c @@ -969,129 +969,6 @@ GNUNET_FS_uri_sks_create_from_nsid (GNUNET_HashCode * nsid, const char *id) } -/** - * Canonicalize a keyword. - * - * @param in input string (the keyword) - * @return canonicalized keyword - */ -static char * -canonicalize_keyword (const char *in) -{ - char *ret; - char *wpos; - const char *rpos; - - ret = GNUNET_strdup (in); - wpos = ret; - rpos = in; - while ('\0' != *rpos) - { - switch (tolower ((unsigned char) *rpos)) - { - case 'a': - case 'e': - case 'i': - case 'o': - case 'u': - case ' ': - case '\t': - case '\n': - case '\r': - /* skip characters listed above */ - break; - case 'b': - case 'c': - case 'd': - case 'f': - case 'g': - case 'h': - case 'j': - case 'k': - case 'l': - case 'm': - case 'n': - case 'p': - case 'r': - case 's': - case 't': - case 'v': - case 'w': - case 'x': - case 'y': - case 'z': - /* convert characters listed above to lower case */ - *wpos = tolower ((unsigned char) *rpos); - wpos++; - break; - case '!': - case '.': - case '?': - case '-': - /* keep characters listed above without changes */ - *wpos = *rpos; - wpos++; - break; - default: - if (isspace ((unsigned char) *rpos) || isdigit ((unsigned char) *rpos)) - break; - /* replace characters listed above with '_' */ - *wpos = '_'; - wpos++; - break; - } - rpos++; - } - *wpos = '\0'; - return ret; -} - - -/** - * Canonicalize keyword URI. Performs operations such - * as decapitalization and removal of certain characters. - * (useful for search). - * - * @param uri the URI to canonicalize - * @return canonicalized version of the URI, NULL on error - */ -struct GNUNET_FS_Uri * -GNUNET_FS_uri_ksk_canonicalize (const struct GNUNET_FS_Uri *uri) -{ - struct GNUNET_FS_Uri *ret; - unsigned int kc; - unsigned int i; - const char *in; - char *sb; - char *cc; - const char *tok; - - ret = GNUNET_malloc (sizeof (struct GNUNET_FS_Uri)); - ret->type = ksk; - kc = uri->data.ksk.keywordCount; - for (i = 0; i < kc; i++) - { - in = uri->data.ksk.keywords[i]; - GNUNET_FS_uri_ksk_add_keyword (ret, &in[1], - (in[0] == '+') ? GNUNET_YES : GNUNET_NO); - sb = GNUNET_strdup (&in[1]); -#define DELIMS " \\|\"'`/&@-_,.;!?+-*^$#~=[]{}()<>" - for (tok = strtok (sb, DELIMS); NULL != tok; tok = strtok (NULL, DELIMS)) -#undef DELIMS - { - if (strlen (tok) < 3) - continue; - GNUNET_FS_uri_ksk_add_keyword (ret, tok, GNUNET_NO); - cc = canonicalize_keyword (tok); - if (strlen (cc) > 2) - GNUNET_FS_uri_ksk_add_keyword (ret, cc, GNUNET_NO); - } - GNUNET_free (sb); - } - return ret; -} - - /** * Merge the sets of keywords from two KSK URIs. * (useful for merging the canonicalized keywords with -- cgit v1.2.3