From 754d8c1b496624e5c879af7d142fc9fd34de3a21 Mon Sep 17 00:00:00 2001 From: Martin Schanzenbach Date: Sat, 1 Aug 2020 16:07:08 +0200 Subject: util: add percent/url encoding --- src/include/gnunet_strings_lib.h | 30 +++++++- src/util/strings.c | 158 +++++++++++++++++++++++++++++++++++++++ src/util/test_strings.c | 14 ++++ 3 files changed, 199 insertions(+), 3 deletions(-) diff --git a/src/include/gnunet_strings_lib.h b/src/include/gnunet_strings_lib.h index 663b44194..bd3ac9dbf 100644 --- a/src/include/gnunet_strings_lib.h +++ b/src/include/gnunet_strings_lib.h @@ -350,16 +350,28 @@ GNUNET_STRINGS_base64_encode (const void *in, /** - * Encode into Base64url. RFC7515 + * url/percent encode (RFC3986). * - * @param in the data to encode + * @param data the data to decode * @param len the length of the input * @param output where to write the output (*output should be NULL, * is allocated) * @return the size of the output */ size_t -GNUNET_STRINGS_base64url_encode (const void *in, size_t len, char **output); +GNUNET_STRINGS_urlencode (const char *data, size_t len, char **out); + +/** + * Decode from Base64url. RFC7515 + * + * @param data the data to decode + * @param len the length of the input + * @param output where to write the output (*output should be NULL, + * is allocated) + * @return the size of the output + */ +size_t +GNUNET_STRINGS_base64url_decode (const char *data, size_t len, void **out); /** @@ -389,6 +401,18 @@ GNUNET_STRINGS_base64_decode (const char *data, size_t GNUNET_STRINGS_base64url_decode (const char *data, size_t len, void **out); +/** + * url/percent encode (RFC3986). + * + * @param data the data to encode + * @param len the length of the input + * @param output where to write the output (*output should be NULL, + * is allocated) + * @return the size of the output + */ +size_t +GNUNET_STRINGS_urldecode (const char *data, size_t len, char **out); + /** * Convert a peer path to a human-readable string. diff --git a/src/util/strings.c b/src/util/strings.c index 41180dd71..d5e2f4878 100644 --- a/src/util/strings.c +++ b/src/util/strings.c @@ -29,6 +29,7 @@ #include #endif #include "gnunet_crypto_lib.h" +#include "gnunet_buffer_lib.h" #include "gnunet_strings_lib.h" #include #include @@ -2088,4 +2089,161 @@ GNUNET_STRINGS_base64url_decode (const char *data, size_t len, void **out) } +/** + * url/percent encode (RFC3986). + * + * @param data the data to encode + * @param len the length of the input + * @param output where to write the output (*output should be NULL, + * is allocated) + * @return the size of the output + */ +size_t +GNUNET_STRINGS_urldecode (const char *data, size_t len, char **out) +{ + const char *rpos = data; + *out = GNUNET_malloc (len + 1); /* output should always fit into input */ + char *wpos = *out; + size_t resl = 0; + + while ('\0' != *rpos) + { + unsigned int num; + switch (*rpos) + { + case '%': + if (1 != sscanf (rpos + 1, "%2x", &num)) + break; + *wpos = (char) ((unsigned char) num); + wpos++; + resl++; + rpos += 3; + break; + /* TODO: add bad sequence handling */ + /* intentional fall through! */ + default: + *wpos = *rpos; + wpos++; + resl++; + rpos++; + } + } + *wpos = '\0'; /* add 0-terminator */ + return resl; +} + + +/** + * url/percent encode (RFC3986). + * + * @param data the data to decode + * @param len the length of the input + * @param output where to write the output (*output should be NULL, + * is allocated) + * @return the size of the output + */ +size_t +GNUNET_STRINGS_urlencode (const char *data, size_t len, char **out) +{ + struct GNUNET_Buffer buf = { 0 }; + const uint8_t *i8 = (uint8_t *) data; + + while (0 != *i8) + { + if (0 == (0x80 & *i8)) + { + /* traditional ASCII */ + if (isalnum (*i8) || (*i8 == '-') || (*i8 == '_') || (*i8 == '.') || + (*i8 == '~') ) + GNUNET_buffer_write (&buf, (const char*) i8, 1); + else if (*i8 == ' ') + GNUNET_buffer_write (&buf, "+", 1); + else + GNUNET_buffer_write_fstr (&buf, + "%%%X%X", + *i8 >> 4, + *i8 & 15); + i8++; + continue; + } + if (0x80 + 0x40 == ((0x80 + 0x40 + 0x20) & *i8)) + { + /* 2-byte value, percent-encode */ + GNUNET_buffer_write_fstr (&buf, + "%%%X%X", + *i8 >> 4, + *i8 & 15); + i8++; + GNUNET_buffer_write_fstr (&buf, + "%%%X%X", + *i8 >> 4, + *i8 & 15); + i8++; + continue; + } + if (0x80 + 0x40 + 0x20 == ((0x80 + 0x40 + 0x20 + 0x10) & *i8)) + { + /* 3-byte value, percent-encode */ + for (unsigned int i = 0; i<4; i++) + { + GNUNET_buffer_write_fstr (&buf, + "%%%X%X", + *i8 >> 4, + *i8 & 15); + i8++; + } + continue; + } + if (0x80 + 0x40 + 0x20 + 0x10 == ((0x80 + 0x40 + 0x20 + 0x10 + 0x08) & *i8)) + { + /* 4-byte value, percent-encode */ + for (unsigned int i = 0; i<4; i++) + { + GNUNET_buffer_write_fstr (&buf, + "%%%X%X", + *i8 >> 4, + *i8 & 15); + i8++; + } + continue; + } + if (0x80 + 0x40 + 0x20 + 0x10 + 0x08 == ((0x80 + 0x40 + 0x20 + 0x10 + 0x08 + + 0x04) & *i8)) + { + /* 5-byte value, percent-encode (outside of UTF-8 modern standard, but so what) */ + for (unsigned int i = 0; i<5; i++) + { + GNUNET_buffer_write_fstr (&buf, + "%%%X%X", + *i8 >> 4, + *i8 & 15); + i8++; + } + continue; + } + if (0x80 + 0x40 + 0x20 + 0x10 + 0x08 + 0x04 == ((0x80 + 0x40 + 0x20 + 0x10 + + 0x08 + 0x04 + 0x02) + & *i8)) + { + /* 6-byte value, percent-encode (outside of UTF-8 modern standard, but so what) */ + for (unsigned int i = 0; i<6; i++) + { + GNUNET_buffer_write_fstr (&buf, + "%%%X%X", + *i8 >> 4, + *i8 & 15); + i8++; + } + continue; + } + /* really, really invalid UTF-8: fail */ + GNUNET_break (0); + GNUNET_buffer_clear (&buf); + return 0; + } + *out = GNUNET_buffer_reap_str (&buf); + return strlen (*out); +} + + /* end of strings.c */ diff --git a/src/util/test_strings.c b/src/util/test_strings.c index 90d06a473..28b1bb6f8 100644 --- a/src/util/test_strings.c +++ b/src/util/test_strings.c @@ -39,6 +39,10 @@ #define WANTB(a, b, l) if (0 != memcmp (a, b, l)) { GNUNET_break (0); return 1; \ } else { } +#define URLENCODE_TEST_VECTOR_PLAIN "Asbjlaw=ljsdlasjd?aslkdsa" + +#define URLENCODE_TEST_VECTOR_ENCODED "Asbjlaw\%3Dljsdlasjd\%3Faslkdsa" + int main (int argc, char *argv[]) { @@ -137,6 +141,16 @@ main (int argc, char *argv[]) GNUNET_STRINGS_fancy_time_to_relative ("15 m", &rtx)); GNUNET_assert (rt.rel_value_us == rtx.rel_value_us); + GNUNET_assert (0 != GNUNET_STRINGS_urlencode (URLENCODE_TEST_VECTOR_PLAIN, + strlen (URLENCODE_TEST_VECTOR_PLAIN), + &b)); + WANT (URLENCODE_TEST_VECTOR_ENCODED, b); + GNUNET_free (b); + GNUNET_assert (0 != GNUNET_STRINGS_urldecode (URLENCODE_TEST_VECTOR_ENCODED, + strlen (URLENCODE_TEST_VECTOR_ENCODED), + &b)); + WANT (URLENCODE_TEST_VECTOR_PLAIN, b); + GNUNET_free (b); return 0; } -- cgit v1.2.3