-LRN: use correct character counting, instead of byte counting

author: Christian Grothoff <christian@grothoff.org> 2012-01-14 15:25:14 +0000
committer: Christian Grothoff <christian@grothoff.org> 2012-01-14 15:25:14 +0000
commit: 429c91dd404c7d938004e9f3c90ed5dcd29823ff (patch)
tree: c43a4514a9f7acb010b8e959c547530e04abebc3 /src
parent: f83ef0e823c657c6bdb0b8e98b16b234b49777f3 (diff)
download: gnunet-429c91dd404c7d938004e9f3c90ed5dcd29823ff.tar.gz
gnunet-429c91dd404c7d938004e9f3c90ed5dcd29823ff.zip
1 files changed, 20 insertions, 3 deletions
diff --git a/src/fs/fs_uri.c b/src/fs/fs_uri.c
index 157295b09..fac478d38 100644
--- a/src/fs/fs_uri.c
+++ b/src/fs/fs_uri.c
@@ -1541,6 +1541,23 @@ normalize_metadata (enum EXTRACTOR_MetaFormat format, const char *data,
  return (char *) normalized;
 }
+/**
+ * Counts the number of UTF-8 characters (not bytes) in the string,
+ * returns that count.
+ */
+static size_t
+u8_strcount (const uint8_t *s)
+{
+  size_t count;
+  ucs4_t c;
+  GNUNET_assert (s != NULL);
+  if (s[0] == 0)
+    return 0;
+  for (count = 0; s != NULL; count++)
+    s = u8_next (&c, s);
+  return count - 1;
+}
 /**
 * Break the filename up by matching [], () and {} pairs to make
@@ -1600,7 +1617,7 @@ get_keywords_from_parens (const char *s, char **array, int index)
      tmp = close_paren[0];
      close_paren[0] = '\0';
      /* Keywords must be at least 3 characters long */
-      if (u8_strlen ((const uint8_t *) &open_paren[1]) <= 2)
+      if (u8_strcount ((const uint8_t *) &open_paren[1]) <= 2)
      {
        close_paren[0] = tmp;
        continue;
@@ -1669,7 +1686,7 @@ get_keywords_from_tokens (const char *s, char **array, int index)
  for (p = strtok (ss, TOKENS); p != NULL; p = strtok (NULL, TOKENS))
  {
    /* Keywords must be at least 3 characters long */
-    if (u8_strlen ((const uint8_t *) p) <= 2)
+    if (u8_strcount ((const uint8_t *) p) <= 2)
      continue;
    if (NULL != array)
    {
@@ -1735,7 +1752,7 @@ gather_uri_data (void *cls, const char *plugin_name,
   * and will return the length of its valid part, skipping the keyword.
   * If it does - fix the extractor, not this check!
   */
-  if (u8_strlen ((const uint8_t *) data) <= 2)
+  if (u8_strcount ((const uint8_t *) data) <= 2)
  {
    return 0;
  }
author	Christian Grothoff <christian@grothoff.org>	2012-01-14 15:25:14 +0000
committer	Christian Grothoff <christian@grothoff.org>	2012-01-14 15:25:14 +0000
commit	429c91dd404c7d938004e9f3c90ed5dcd29823ff (patch)
tree	c43a4514a9f7acb010b8e959c547530e04abebc3 /src
parent	f83ef0e823c657c6bdb0b8e98b16b234b49777f3 (diff)
download	gnunet-429c91dd404c7d938004e9f3c90ed5dcd29823ff.tar.gz gnunet-429c91dd404c7d938004e9f3c90ed5dcd29823ff.zip