diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/fs/Makefile.am | 2 | ||||
-rw-r--r-- | src/fs/fs_uri.c | 105 |
2 files changed, 98 insertions, 9 deletions
diff --git a/src/fs/Makefile.am b/src/fs/Makefile.am index bf355e92f..f480f2845 100644 --- a/src/fs/Makefile.am +++ b/src/fs/Makefile.am | |||
@@ -43,7 +43,7 @@ libgnunetfs_la_LIBADD = \ | |||
43 | $(top_builddir)/src/datastore/libgnunetdatastore.la \ | 43 | $(top_builddir)/src/datastore/libgnunetdatastore.la \ |
44 | $(top_builddir)/src/util/libgnunetutil.la \ | 44 | $(top_builddir)/src/util/libgnunetutil.la \ |
45 | -lextractor \ | 45 | -lextractor \ |
46 | $(GN_LIBINTL) $(XLIB) | 46 | $(GN_LIBINTL) $(XLIB) -lunistring |
47 | 47 | ||
48 | libgnunetfs_la_LDFLAGS = \ | 48 | libgnunetfs_la_LDFLAGS = \ |
49 | $(GN_LIB_LDFLAGS) $(WINFLAGS) \ | 49 | $(GN_LIB_LDFLAGS) $(WINFLAGS) \ |
diff --git a/src/fs/fs_uri.c b/src/fs/fs_uri.c index a39eba8de..93f936653 100644 --- a/src/fs/fs_uri.c +++ b/src/fs/fs_uri.c | |||
@@ -82,6 +82,11 @@ | |||
82 | #include "gnunet_fs_service.h" | 82 | #include "gnunet_fs_service.h" |
83 | #include "gnunet_signatures.h" | 83 | #include "gnunet_signatures.h" |
84 | #include "fs_api.h" | 84 | #include "fs_api.h" |
85 | #include <unicase.h> | ||
86 | #include <unistr.h> | ||
87 | #include <unistdio.h> | ||
88 | #include <uniconv.h> | ||
89 | |||
85 | 90 | ||
86 | 91 | ||
87 | /** | 92 | /** |
@@ -1493,6 +1498,48 @@ find_duplicate (const char *s, const char **array, int array_length) | |||
1493 | return GNUNET_NO; | 1498 | return GNUNET_NO; |
1494 | } | 1499 | } |
1495 | 1500 | ||
1501 | static char * | ||
1502 | normalize_metadata (enum EXTRACTOR_MetaFormat format, const char *data, | ||
1503 | size_t data_len) | ||
1504 | { | ||
1505 | uint8_t *free_str = NULL; | ||
1506 | uint8_t *str_to_normalize = (uint8_t *) data; | ||
1507 | uint8_t *normalized; | ||
1508 | size_t r_len; | ||
1509 | if (str_to_normalize == NULL) | ||
1510 | return NULL; | ||
1511 | /* Don't trust libextractor */ | ||
1512 | if (format == EXTRACTOR_METAFORMAT_UTF8) | ||
1513 | { | ||
1514 | free_str = (uint8_t *) u8_check ((const uint8_t *) data, data_len); | ||
1515 | if (free_str == NULL) | ||
1516 | free_str = NULL; | ||
1517 | else | ||
1518 | format = EXTRACTOR_METAFORMAT_C_STRING; | ||
1519 | } | ||
1520 | if (format == EXTRACTOR_METAFORMAT_C_STRING) | ||
1521 | { | ||
1522 | free_str = u8_strconv_from_encoding (data, locale_charset (), iconveh_escape_sequence); | ||
1523 | if (free_str == NULL) | ||
1524 | return NULL; | ||
1525 | } | ||
1526 | |||
1527 | normalized = u8_tolower (str_to_normalize, strlen ((char *) str_to_normalize), NULL, UNINORM_NFD, NULL, &r_len); | ||
1528 | /* free_str is allocated by libunistring internally, use free() */ | ||
1529 | if (free_str != NULL) | ||
1530 | free (free_str); | ||
1531 | if (normalized != NULL) | ||
1532 | { | ||
1533 | /* u8_tolower allocates a non-NULL-terminated string! */ | ||
1534 | free_str = GNUNET_malloc (r_len + 1); | ||
1535 | memcpy (free_str, normalized, r_len); | ||
1536 | free_str[r_len] = '\0'; | ||
1537 | free (normalized); | ||
1538 | normalized = free_str; | ||
1539 | } | ||
1540 | return (char *) normalized; | ||
1541 | } | ||
1542 | |||
1496 | 1543 | ||
1497 | /** | 1544 | /** |
1498 | * Break the filename up by matching [], () and {} pairs to make | 1545 | * Break the filename up by matching [], () and {} pairs to make |
@@ -1551,14 +1598,29 @@ get_keywords_from_parens (const char *s, char **array, int index) | |||
1551 | { | 1598 | { |
1552 | if (NULL != array) | 1599 | if (NULL != array) |
1553 | { | 1600 | { |
1601 | char *normalized; | ||
1554 | tmp = close_paren[0]; | 1602 | tmp = close_paren[0]; |
1555 | close_paren[0] = '\0'; | 1603 | close_paren[0] = '\0'; |
1556 | if (GNUNET_NO == find_duplicate ((const char *) &open_paren[1], (const char **) array, index + count)) | 1604 | if (GNUNET_NO == find_duplicate ((const char *) &open_paren[1], |
1605 | (const char **) array, index + count)) | ||
1557 | { | 1606 | { |
1558 | insert_non_mandatory_keyword ((const char *) &open_paren[1], array, | 1607 | insert_non_mandatory_keyword ((const char *) &open_paren[1], array, |
1559 | index + count); | 1608 | index + count); |
1560 | count++; | 1609 | count++; |
1561 | } | 1610 | } |
1611 | normalized = normalize_metadata (EXTRACTOR_METAFORMAT_UTF8, | ||
1612 | &open_paren[1], close_paren - &open_paren[1]); | ||
1613 | if (normalized != NULL) | ||
1614 | { | ||
1615 | if (GNUNET_NO == find_duplicate ((const char *) normalized, | ||
1616 | (const char **) array, index + count)) | ||
1617 | { | ||
1618 | insert_non_mandatory_keyword ((const char *) normalized, array, | ||
1619 | index + count); | ||
1620 | count++; | ||
1621 | } | ||
1622 | GNUNET_free (normalized); | ||
1623 | } | ||
1562 | close_paren[0] = tmp; | 1624 | close_paren[0] = tmp; |
1563 | } | 1625 | } |
1564 | else | 1626 | else |
@@ -1601,12 +1663,26 @@ get_keywords_from_tokens (const char *s, char **array, int index) | |||
1601 | { | 1663 | { |
1602 | if (NULL != array) | 1664 | if (NULL != array) |
1603 | { | 1665 | { |
1666 | char *normalized; | ||
1604 | if (GNUNET_NO == find_duplicate (p, (const char **) array, index + seps)) | 1667 | if (GNUNET_NO == find_duplicate (p, (const char **) array, index + seps)) |
1605 | { | 1668 | { |
1606 | insert_non_mandatory_keyword (p, array, | 1669 | insert_non_mandatory_keyword (p, array, |
1607 | index + seps); | 1670 | index + seps); |
1608 | seps++; | 1671 | seps++; |
1609 | } | 1672 | } |
1673 | normalized = normalize_metadata (EXTRACTOR_METAFORMAT_UTF8, | ||
1674 | p, strlen (p)); | ||
1675 | if (normalized != NULL) | ||
1676 | { | ||
1677 | if (GNUNET_NO == find_duplicate ((const char *) normalized, | ||
1678 | (const char **) array, index + seps)) | ||
1679 | { | ||
1680 | insert_non_mandatory_keyword ((const char *) normalized, array, | ||
1681 | index + seps); | ||
1682 | seps++; | ||
1683 | } | ||
1684 | GNUNET_free (normalized); | ||
1685 | } | ||
1610 | } | 1686 | } |
1611 | else | 1687 | else |
1612 | seps++; | 1688 | seps++; |
@@ -1616,7 +1692,6 @@ get_keywords_from_tokens (const char *s, char **array, int index) | |||
1616 | } | 1692 | } |
1617 | #undef TOKENS | 1693 | #undef TOKENS |
1618 | 1694 | ||
1619 | |||
1620 | /** | 1695 | /** |
1621 | * Function called on each value in the meta data. | 1696 | * Function called on each value in the meta data. |
1622 | * Adds it to the URI. | 1697 | * Adds it to the URI. |
@@ -1640,15 +1715,28 @@ gather_uri_data (void *cls, const char *plugin_name, | |||
1640 | const char *data_mime_type, const char *data, size_t data_len) | 1715 | const char *data_mime_type, const char *data, size_t data_len) |
1641 | { | 1716 | { |
1642 | struct GNUNET_FS_Uri *uri = cls; | 1717 | struct GNUNET_FS_Uri *uri = cls; |
1718 | char *normalized_data; | ||
1643 | 1719 | ||
1644 | if ((format != EXTRACTOR_METAFORMAT_UTF8) && | 1720 | if ((format != EXTRACTOR_METAFORMAT_UTF8) && |
1645 | (format != EXTRACTOR_METAFORMAT_C_STRING)) | 1721 | (format != EXTRACTOR_METAFORMAT_C_STRING)) |
1646 | return 0; | 1722 | return 0; |
1647 | if (find_duplicate (data, (const char **) uri->data.ksk.keywords, uri->data.ksk.keywordCount)) | 1723 | normalized_data = normalize_metadata (format, data, data_len); |
1648 | return GNUNET_OK; | 1724 | if (!find_duplicate (data, (const char **) uri->data.ksk.keywords, uri->data.ksk.keywordCount)) |
1649 | insert_non_mandatory_keyword (data, | 1725 | { |
1650 | uri->data.ksk.keywords, uri->data.ksk.keywordCount); | 1726 | insert_non_mandatory_keyword (data, |
1651 | uri->data.ksk.keywordCount++; | 1727 | uri->data.ksk.keywords, uri->data.ksk.keywordCount); |
1728 | uri->data.ksk.keywordCount++; | ||
1729 | } | ||
1730 | if (normalized_data != NULL) | ||
1731 | { | ||
1732 | if (!find_duplicate (normalized_data, (const char **) uri->data.ksk.keywords, uri->data.ksk.keywordCount)) | ||
1733 | { | ||
1734 | insert_non_mandatory_keyword (normalized_data, | ||
1735 | uri->data.ksk.keywords, uri->data.ksk.keywordCount); | ||
1736 | uri->data.ksk.keywordCount++; | ||
1737 | } | ||
1738 | GNUNET_free (normalized_data); | ||
1739 | } | ||
1652 | return 0; | 1740 | return 0; |
1653 | } | 1741 | } |
1654 | 1742 | ||
@@ -1690,8 +1778,9 @@ GNUNET_FS_uri_ksk_create_from_meta_data (const struct GNUNET_CONTAINER_MetaData | |||
1690 | tok_keywords = get_keywords_from_tokens (filename, NULL, 0); | 1778 | tok_keywords = get_keywords_from_tokens (filename, NULL, 0); |
1691 | paren_keywords = get_keywords_from_parens (filename, NULL, 0); | 1779 | paren_keywords = get_keywords_from_parens (filename, NULL, 0); |
1692 | } | 1780 | } |
1781 | /* x2 because there might be a normalized variant of every keyword */ | ||
1693 | ret->data.ksk.keywords = GNUNET_malloc (sizeof (char *) * (ent | 1782 | ret->data.ksk.keywords = GNUNET_malloc (sizeof (char *) * (ent |
1694 | + tok_keywords + paren_keywords)); | 1783 | + tok_keywords + paren_keywords) * 2); |
1695 | GNUNET_CONTAINER_meta_data_iterate (md, &gather_uri_data, ret); | 1784 | GNUNET_CONTAINER_meta_data_iterate (md, &gather_uri_data, ret); |
1696 | } | 1785 | } |
1697 | if (tok_keywords > 0) | 1786 | if (tok_keywords > 0) |