summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartin Schanzenbach <mschanzenbach@posteo.de>2020-08-01 16:07:08 +0200
committerMartin Schanzenbach <mschanzenbach@posteo.de>2020-08-01 16:07:08 +0200
commit754d8c1b496624e5c879af7d142fc9fd34de3a21 (patch)
treec8fbe2e357c37de4dcabdaaae790fb5f83bdd40c
parent7f4ddbcab8598e3d5e29c23ce883cdfa664408f1 (diff)
util: add percent/url encoding
-rw-r--r--src/include/gnunet_strings_lib.h30
-rw-r--r--src/util/strings.c158
-rw-r--r--src/util/test_strings.c14
3 files changed, 199 insertions, 3 deletions
diff --git a/src/include/gnunet_strings_lib.h b/src/include/gnunet_strings_lib.h
index 663b44194..bd3ac9dbf 100644
--- a/src/include/gnunet_strings_lib.h
+++ b/src/include/gnunet_strings_lib.h
@@ -350,16 +350,28 @@ GNUNET_STRINGS_base64_encode (const void *in,
/**
- * Encode into Base64url. RFC7515
+ * url/percent encode (RFC3986).
*
- * @param in the data to encode
+ * @param data the data to decode
* @param len the length of the input
* @param output where to write the output (*output should be NULL,
* is allocated)
* @return the size of the output
*/
size_t
-GNUNET_STRINGS_base64url_encode (const void *in, size_t len, char **output);
+GNUNET_STRINGS_urlencode (const char *data, size_t len, char **out);
+
+/**
+ * Decode from Base64url. RFC7515
+ *
+ * @param data the data to decode
+ * @param len the length of the input
+ * @param output where to write the output (*output should be NULL,
+ * is allocated)
+ * @return the size of the output
+ */
+size_t
+GNUNET_STRINGS_base64url_decode (const char *data, size_t len, void **out);
/**
@@ -389,6 +401,18 @@ GNUNET_STRINGS_base64_decode (const char *data,
size_t
GNUNET_STRINGS_base64url_decode (const char *data, size_t len, void **out);
+/**
+ * url/percent encode (RFC3986).
+ *
+ * @param data the data to encode
+ * @param len the length of the input
+ * @param output where to write the output (*output should be NULL,
+ * is allocated)
+ * @return the size of the output
+ */
+size_t
+GNUNET_STRINGS_urldecode (const char *data, size_t len, char **out);
+
/**
* Convert a peer path to a human-readable string.
diff --git a/src/util/strings.c b/src/util/strings.c
index 41180dd71..d5e2f4878 100644
--- a/src/util/strings.c
+++ b/src/util/strings.c
@@ -29,6 +29,7 @@
#include <iconv.h>
#endif
#include "gnunet_crypto_lib.h"
+#include "gnunet_buffer_lib.h"
#include "gnunet_strings_lib.h"
#include <unicase.h>
#include <unistr.h>
@@ -2088,4 +2089,161 @@ GNUNET_STRINGS_base64url_decode (const char *data, size_t len, void **out)
}
+/**
+ * url/percent encode (RFC3986).
+ *
+ * @param data the data to encode
+ * @param len the length of the input
+ * @param output where to write the output (*output should be NULL,
+ * is allocated)
+ * @return the size of the output
+ */
+size_t
+GNUNET_STRINGS_urldecode (const char *data, size_t len, char **out)
+{
+ const char *rpos = data;
+ *out = GNUNET_malloc (len + 1); /* output should always fit into input */
+ char *wpos = *out;
+ size_t resl = 0;
+
+ while ('\0' != *rpos)
+ {
+ unsigned int num;
+ switch (*rpos)
+ {
+ case '%':
+ if (1 != sscanf (rpos + 1, "%2x", &num))
+ break;
+ *wpos = (char) ((unsigned char) num);
+ wpos++;
+ resl++;
+ rpos += 3;
+ break;
+ /* TODO: add bad sequence handling */
+ /* intentional fall through! */
+ default:
+ *wpos = *rpos;
+ wpos++;
+ resl++;
+ rpos++;
+ }
+ }
+ *wpos = '\0'; /* add 0-terminator */
+ return resl;
+}
+
+
+/**
+ * url/percent encode (RFC3986).
+ *
+ * @param data the data to decode
+ * @param len the length of the input
+ * @param output where to write the output (*output should be NULL,
+ * is allocated)
+ * @return the size of the output
+ */
+size_t
+GNUNET_STRINGS_urlencode (const char *data, size_t len, char **out)
+{
+ struct GNUNET_Buffer buf = { 0 };
+ const uint8_t *i8 = (uint8_t *) data;
+
+ while (0 != *i8)
+ {
+ if (0 == (0x80 & *i8))
+ {
+ /* traditional ASCII */
+ if (isalnum (*i8) || (*i8 == '-') || (*i8 == '_') || (*i8 == '.') ||
+ (*i8 == '~') )
+ GNUNET_buffer_write (&buf, (const char*) i8, 1);
+ else if (*i8 == ' ')
+ GNUNET_buffer_write (&buf, "+", 1);
+ else
+ GNUNET_buffer_write_fstr (&buf,
+ "%%%X%X",
+ *i8 >> 4,
+ *i8 & 15);
+ i8++;
+ continue;
+ }
+ if (0x80 + 0x40 == ((0x80 + 0x40 + 0x20) & *i8))
+ {
+ /* 2-byte value, percent-encode */
+ GNUNET_buffer_write_fstr (&buf,
+ "%%%X%X",
+ *i8 >> 4,
+ *i8 & 15);
+ i8++;
+ GNUNET_buffer_write_fstr (&buf,
+ "%%%X%X",
+ *i8 >> 4,
+ *i8 & 15);
+ i8++;
+ continue;
+ }
+ if (0x80 + 0x40 + 0x20 == ((0x80 + 0x40 + 0x20 + 0x10) & *i8))
+ {
+ /* 3-byte value, percent-encode */
+ for (unsigned int i = 0; i<4; i++)
+ {
+ GNUNET_buffer_write_fstr (&buf,
+ "%%%X%X",
+ *i8 >> 4,
+ *i8 & 15);
+ i8++;
+ }
+ continue;
+ }
+ if (0x80 + 0x40 + 0x20 + 0x10 == ((0x80 + 0x40 + 0x20 + 0x10 + 0x08) & *i8))
+ {
+ /* 4-byte value, percent-encode */
+ for (unsigned int i = 0; i<4; i++)
+ {
+ GNUNET_buffer_write_fstr (&buf,
+ "%%%X%X",
+ *i8 >> 4,
+ *i8 & 15);
+ i8++;
+ }
+ continue;
+ }
+ if (0x80 + 0x40 + 0x20 + 0x10 + 0x08 == ((0x80 + 0x40 + 0x20 + 0x10 + 0x08
+ + 0x04) & *i8))
+ {
+ /* 5-byte value, percent-encode (outside of UTF-8 modern standard, but so what) */
+ for (unsigned int i = 0; i<5; i++)
+ {
+ GNUNET_buffer_write_fstr (&buf,
+ "%%%X%X",
+ *i8 >> 4,
+ *i8 & 15);
+ i8++;
+ }
+ continue;
+ }
+ if (0x80 + 0x40 + 0x20 + 0x10 + 0x08 + 0x04 == ((0x80 + 0x40 + 0x20 + 0x10
+ + 0x08 + 0x04 + 0x02)
+ & *i8))
+ {
+ /* 6-byte value, percent-encode (outside of UTF-8 modern standard, but so what) */
+ for (unsigned int i = 0; i<6; i++)
+ {
+ GNUNET_buffer_write_fstr (&buf,
+ "%%%X%X",
+ *i8 >> 4,
+ *i8 & 15);
+ i8++;
+ }
+ continue;
+ }
+ /* really, really invalid UTF-8: fail */
+ GNUNET_break (0);
+ GNUNET_buffer_clear (&buf);
+ return 0;
+ }
+ *out = GNUNET_buffer_reap_str (&buf);
+ return strlen (*out);
+}
+
+
/* end of strings.c */
diff --git a/src/util/test_strings.c b/src/util/test_strings.c
index 90d06a473..28b1bb6f8 100644
--- a/src/util/test_strings.c
+++ b/src/util/test_strings.c
@@ -39,6 +39,10 @@
#define WANTB(a, b, l) if (0 != memcmp (a, b, l)) { GNUNET_break (0); return 1; \
} else { }
+#define URLENCODE_TEST_VECTOR_PLAIN "Asbjlaw=ljsdlasjd?aslkdsa"
+
+#define URLENCODE_TEST_VECTOR_ENCODED "Asbjlaw\%3Dljsdlasjd\%3Faslkdsa"
+
int
main (int argc, char *argv[])
{
@@ -137,6 +141,16 @@ main (int argc, char *argv[])
GNUNET_STRINGS_fancy_time_to_relative ("15 m", &rtx));
GNUNET_assert (rt.rel_value_us == rtx.rel_value_us);
+ GNUNET_assert (0 != GNUNET_STRINGS_urlencode (URLENCODE_TEST_VECTOR_PLAIN,
+ strlen (URLENCODE_TEST_VECTOR_PLAIN),
+ &b));
+ WANT (URLENCODE_TEST_VECTOR_ENCODED, b);
+ GNUNET_free (b);
+ GNUNET_assert (0 != GNUNET_STRINGS_urldecode (URLENCODE_TEST_VECTOR_ENCODED,
+ strlen (URLENCODE_TEST_VECTOR_ENCODED),
+ &b));
+ WANT (URLENCODE_TEST_VECTOR_PLAIN, b);
+ GNUNET_free (b);
return 0;
}