diff options
author | Christian Grothoff <christian@grothoff.org> | 2012-01-14 15:25:14 +0000 |
---|---|---|
committer | Christian Grothoff <christian@grothoff.org> | 2012-01-14 15:25:14 +0000 |
commit | 429c91dd404c7d938004e9f3c90ed5dcd29823ff (patch) | |
tree | c43a4514a9f7acb010b8e959c547530e04abebc3 /src | |
parent | f83ef0e823c657c6bdb0b8e98b16b234b49777f3 (diff) | |
download | gnunet-429c91dd404c7d938004e9f3c90ed5dcd29823ff.tar.gz gnunet-429c91dd404c7d938004e9f3c90ed5dcd29823ff.zip |
-LRN: use correct character counting, instead of byte counting
Diffstat (limited to 'src')
-rw-r--r-- | src/fs/fs_uri.c | 23 |
1 files changed, 20 insertions, 3 deletions
diff --git a/src/fs/fs_uri.c b/src/fs/fs_uri.c index 157295b09..fac478d38 100644 --- a/src/fs/fs_uri.c +++ b/src/fs/fs_uri.c | |||
@@ -1541,6 +1541,23 @@ normalize_metadata (enum EXTRACTOR_MetaFormat format, const char *data, | |||
1541 | return (char *) normalized; | 1541 | return (char *) normalized; |
1542 | } | 1542 | } |
1543 | 1543 | ||
1544 | /** | ||
1545 | * Counts the number of UTF-8 characters (not bytes) in the string, | ||
1546 | * returns that count. | ||
1547 | */ | ||
1548 | static size_t | ||
1549 | u8_strcount (const uint8_t *s) | ||
1550 | { | ||
1551 | size_t count; | ||
1552 | ucs4_t c; | ||
1553 | GNUNET_assert (s != NULL); | ||
1554 | if (s[0] == 0) | ||
1555 | return 0; | ||
1556 | for (count = 0; s != NULL; count++) | ||
1557 | s = u8_next (&c, s); | ||
1558 | return count - 1; | ||
1559 | } | ||
1560 | |||
1544 | 1561 | ||
1545 | /** | 1562 | /** |
1546 | * Break the filename up by matching [], () and {} pairs to make | 1563 | * Break the filename up by matching [], () and {} pairs to make |
@@ -1600,7 +1617,7 @@ get_keywords_from_parens (const char *s, char **array, int index) | |||
1600 | tmp = close_paren[0]; | 1617 | tmp = close_paren[0]; |
1601 | close_paren[0] = '\0'; | 1618 | close_paren[0] = '\0'; |
1602 | /* Keywords must be at least 3 characters long */ | 1619 | /* Keywords must be at least 3 characters long */ |
1603 | if (u8_strlen ((const uint8_t *) &open_paren[1]) <= 2) | 1620 | if (u8_strcount ((const uint8_t *) &open_paren[1]) <= 2) |
1604 | { | 1621 | { |
1605 | close_paren[0] = tmp; | 1622 | close_paren[0] = tmp; |
1606 | continue; | 1623 | continue; |
@@ -1669,7 +1686,7 @@ get_keywords_from_tokens (const char *s, char **array, int index) | |||
1669 | for (p = strtok (ss, TOKENS); p != NULL; p = strtok (NULL, TOKENS)) | 1686 | for (p = strtok (ss, TOKENS); p != NULL; p = strtok (NULL, TOKENS)) |
1670 | { | 1687 | { |
1671 | /* Keywords must be at least 3 characters long */ | 1688 | /* Keywords must be at least 3 characters long */ |
1672 | if (u8_strlen ((const uint8_t *) p) <= 2) | 1689 | if (u8_strcount ((const uint8_t *) p) <= 2) |
1673 | continue; | 1690 | continue; |
1674 | if (NULL != array) | 1691 | if (NULL != array) |
1675 | { | 1692 | { |
@@ -1735,7 +1752,7 @@ gather_uri_data (void *cls, const char *plugin_name, | |||
1735 | * and will return the length of its valid part, skipping the keyword. | 1752 | * and will return the length of its valid part, skipping the keyword. |
1736 | * If it does - fix the extractor, not this check! | 1753 | * If it does - fix the extractor, not this check! |
1737 | */ | 1754 | */ |
1738 | if (u8_strlen ((const uint8_t *) data) <= 2) | 1755 | if (u8_strcount ((const uint8_t *) data) <= 2) |
1739 | { | 1756 | { |
1740 | return 0; | 1757 | return 0; |
1741 | } | 1758 | } |