aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/fs/Makefile.am2
-rw-r--r--src/fs/fs_uri.c105
2 files changed, 98 insertions, 9 deletions
diff --git a/src/fs/Makefile.am b/src/fs/Makefile.am
index bf355e92f..f480f2845 100644
--- a/src/fs/Makefile.am
+++ b/src/fs/Makefile.am
@@ -43,7 +43,7 @@ libgnunetfs_la_LIBADD = \
43 $(top_builddir)/src/datastore/libgnunetdatastore.la \ 43 $(top_builddir)/src/datastore/libgnunetdatastore.la \
44 $(top_builddir)/src/util/libgnunetutil.la \ 44 $(top_builddir)/src/util/libgnunetutil.la \
45 -lextractor \ 45 -lextractor \
46 $(GN_LIBINTL) $(XLIB) 46 $(GN_LIBINTL) $(XLIB) -lunistring
47 47
48libgnunetfs_la_LDFLAGS = \ 48libgnunetfs_la_LDFLAGS = \
49 $(GN_LIB_LDFLAGS) $(WINFLAGS) \ 49 $(GN_LIB_LDFLAGS) $(WINFLAGS) \
diff --git a/src/fs/fs_uri.c b/src/fs/fs_uri.c
index a39eba8de..93f936653 100644
--- a/src/fs/fs_uri.c
+++ b/src/fs/fs_uri.c
@@ -82,6 +82,11 @@
82#include "gnunet_fs_service.h" 82#include "gnunet_fs_service.h"
83#include "gnunet_signatures.h" 83#include "gnunet_signatures.h"
84#include "fs_api.h" 84#include "fs_api.h"
85#include <unicase.h>
86#include <unistr.h>
87#include <unistdio.h>
88#include <uniconv.h>
89
85 90
86 91
87/** 92/**
@@ -1493,6 +1498,48 @@ find_duplicate (const char *s, const char **array, int array_length)
1493 return GNUNET_NO; 1498 return GNUNET_NO;
1494} 1499}
1495 1500
1501static char *
1502normalize_metadata (enum EXTRACTOR_MetaFormat format, const char *data,
1503 size_t data_len)
1504{
1505 uint8_t *free_str = NULL;
1506 uint8_t *str_to_normalize = (uint8_t *) data;
1507 uint8_t *normalized;
1508 size_t r_len;
1509 if (str_to_normalize == NULL)
1510 return NULL;
1511 /* Don't trust libextractor */
1512 if (format == EXTRACTOR_METAFORMAT_UTF8)
1513 {
1514 free_str = (uint8_t *) u8_check ((const uint8_t *) data, data_len);
1515 if (free_str == NULL)
1516 free_str = NULL;
1517 else
1518 format = EXTRACTOR_METAFORMAT_C_STRING;
1519 }
1520 if (format == EXTRACTOR_METAFORMAT_C_STRING)
1521 {
1522 free_str = u8_strconv_from_encoding (data, locale_charset (), iconveh_escape_sequence);
1523 if (free_str == NULL)
1524 return NULL;
1525 }
1526
1527 normalized = u8_tolower (str_to_normalize, strlen ((char *) str_to_normalize), NULL, UNINORM_NFD, NULL, &r_len);
1528 /* free_str is allocated by libunistring internally, use free() */
1529 if (free_str != NULL)
1530 free (free_str);
1531 if (normalized != NULL)
1532 {
1533 /* u8_tolower allocates a non-NULL-terminated string! */
1534 free_str = GNUNET_malloc (r_len + 1);
1535 memcpy (free_str, normalized, r_len);
1536 free_str[r_len] = '\0';
1537 free (normalized);
1538 normalized = free_str;
1539 }
1540 return (char *) normalized;
1541}
1542
1496 1543
1497/** 1544/**
1498 * Break the filename up by matching [], () and {} pairs to make 1545 * Break the filename up by matching [], () and {} pairs to make
@@ -1551,14 +1598,29 @@ get_keywords_from_parens (const char *s, char **array, int index)
1551 { 1598 {
1552 if (NULL != array) 1599 if (NULL != array)
1553 { 1600 {
1601 char *normalized;
1554 tmp = close_paren[0]; 1602 tmp = close_paren[0];
1555 close_paren[0] = '\0'; 1603 close_paren[0] = '\0';
1556 if (GNUNET_NO == find_duplicate ((const char *) &open_paren[1], (const char **) array, index + count)) 1604 if (GNUNET_NO == find_duplicate ((const char *) &open_paren[1],
1605 (const char **) array, index + count))
1557 { 1606 {
1558 insert_non_mandatory_keyword ((const char *) &open_paren[1], array, 1607 insert_non_mandatory_keyword ((const char *) &open_paren[1], array,
1559 index + count); 1608 index + count);
1560 count++; 1609 count++;
1561 } 1610 }
1611 normalized = normalize_metadata (EXTRACTOR_METAFORMAT_UTF8,
1612 &open_paren[1], close_paren - &open_paren[1]);
1613 if (normalized != NULL)
1614 {
1615 if (GNUNET_NO == find_duplicate ((const char *) normalized,
1616 (const char **) array, index + count))
1617 {
1618 insert_non_mandatory_keyword ((const char *) normalized, array,
1619 index + count);
1620 count++;
1621 }
1622 GNUNET_free (normalized);
1623 }
1562 close_paren[0] = tmp; 1624 close_paren[0] = tmp;
1563 } 1625 }
1564 else 1626 else
@@ -1601,12 +1663,26 @@ get_keywords_from_tokens (const char *s, char **array, int index)
1601 { 1663 {
1602 if (NULL != array) 1664 if (NULL != array)
1603 { 1665 {
1666 char *normalized;
1604 if (GNUNET_NO == find_duplicate (p, (const char **) array, index + seps)) 1667 if (GNUNET_NO == find_duplicate (p, (const char **) array, index + seps))
1605 { 1668 {
1606 insert_non_mandatory_keyword (p, array, 1669 insert_non_mandatory_keyword (p, array,
1607 index + seps); 1670 index + seps);
1608 seps++; 1671 seps++;
1609 } 1672 }
1673 normalized = normalize_metadata (EXTRACTOR_METAFORMAT_UTF8,
1674 p, strlen (p));
1675 if (normalized != NULL)
1676 {
1677 if (GNUNET_NO == find_duplicate ((const char *) normalized,
1678 (const char **) array, index + seps))
1679 {
1680 insert_non_mandatory_keyword ((const char *) normalized, array,
1681 index + seps);
1682 seps++;
1683 }
1684 GNUNET_free (normalized);
1685 }
1610 } 1686 }
1611 else 1687 else
1612 seps++; 1688 seps++;
@@ -1616,7 +1692,6 @@ get_keywords_from_tokens (const char *s, char **array, int index)
1616} 1692}
1617#undef TOKENS 1693#undef TOKENS
1618 1694
1619
1620/** 1695/**
1621 * Function called on each value in the meta data. 1696 * Function called on each value in the meta data.
1622 * Adds it to the URI. 1697 * Adds it to the URI.
@@ -1640,15 +1715,28 @@ gather_uri_data (void *cls, const char *plugin_name,
1640 const char *data_mime_type, const char *data, size_t data_len) 1715 const char *data_mime_type, const char *data, size_t data_len)
1641{ 1716{
1642 struct GNUNET_FS_Uri *uri = cls; 1717 struct GNUNET_FS_Uri *uri = cls;
1718 char *normalized_data;
1643 1719
1644 if ((format != EXTRACTOR_METAFORMAT_UTF8) && 1720 if ((format != EXTRACTOR_METAFORMAT_UTF8) &&
1645 (format != EXTRACTOR_METAFORMAT_C_STRING)) 1721 (format != EXTRACTOR_METAFORMAT_C_STRING))
1646 return 0; 1722 return 0;
1647 if (find_duplicate (data, (const char **) uri->data.ksk.keywords, uri->data.ksk.keywordCount)) 1723 normalized_data = normalize_metadata (format, data, data_len);
1648 return GNUNET_OK; 1724 if (!find_duplicate (data, (const char **) uri->data.ksk.keywords, uri->data.ksk.keywordCount))
1649 insert_non_mandatory_keyword (data, 1725 {
1650 uri->data.ksk.keywords, uri->data.ksk.keywordCount); 1726 insert_non_mandatory_keyword (data,
1651 uri->data.ksk.keywordCount++; 1727 uri->data.ksk.keywords, uri->data.ksk.keywordCount);
1728 uri->data.ksk.keywordCount++;
1729 }
1730 if (normalized_data != NULL)
1731 {
1732 if (!find_duplicate (normalized_data, (const char **) uri->data.ksk.keywords, uri->data.ksk.keywordCount))
1733 {
1734 insert_non_mandatory_keyword (normalized_data,
1735 uri->data.ksk.keywords, uri->data.ksk.keywordCount);
1736 uri->data.ksk.keywordCount++;
1737 }
1738 GNUNET_free (normalized_data);
1739 }
1652 return 0; 1740 return 0;
1653} 1741}
1654 1742
@@ -1690,8 +1778,9 @@ GNUNET_FS_uri_ksk_create_from_meta_data (const struct GNUNET_CONTAINER_MetaData
1690 tok_keywords = get_keywords_from_tokens (filename, NULL, 0); 1778 tok_keywords = get_keywords_from_tokens (filename, NULL, 0);
1691 paren_keywords = get_keywords_from_parens (filename, NULL, 0); 1779 paren_keywords = get_keywords_from_parens (filename, NULL, 0);
1692 } 1780 }
1781 /* x2 because there might be a normalized variant of every keyword */
1693 ret->data.ksk.keywords = GNUNET_malloc (sizeof (char *) * (ent 1782 ret->data.ksk.keywords = GNUNET_malloc (sizeof (char *) * (ent
1694 + tok_keywords + paren_keywords)); 1783 + tok_keywords + paren_keywords) * 2);
1695 GNUNET_CONTAINER_meta_data_iterate (md, &gather_uri_data, ret); 1784 GNUNET_CONTAINER_meta_data_iterate (md, &gather_uri_data, ret);
1696 } 1785 }
1697 if (tok_keywords > 0) 1786 if (tok_keywords > 0)