aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMartin Schanzenbach <mschanzenbach@posteo.de>2020-08-01 16:07:08 +0200
committerMartin Schanzenbach <mschanzenbach@posteo.de>2020-08-01 16:07:08 +0200
commit754d8c1b496624e5c879af7d142fc9fd34de3a21 (patch)
treec8fbe2e357c37de4dcabdaaae790fb5f83bdd40c /src
parent7f4ddbcab8598e3d5e29c23ce883cdfa664408f1 (diff)
downloadgnunet-754d8c1b496624e5c879af7d142fc9fd34de3a21.tar.gz
gnunet-754d8c1b496624e5c879af7d142fc9fd34de3a21.zip
util: add percent/url encoding
Diffstat (limited to 'src')
-rw-r--r--src/include/gnunet_strings_lib.h30
-rw-r--r--src/util/strings.c158
-rw-r--r--src/util/test_strings.c14
3 files changed, 199 insertions, 3 deletions
diff --git a/src/include/gnunet_strings_lib.h b/src/include/gnunet_strings_lib.h
index 663b44194..bd3ac9dbf 100644
--- a/src/include/gnunet_strings_lib.h
+++ b/src/include/gnunet_strings_lib.h
@@ -350,16 +350,28 @@ GNUNET_STRINGS_base64_encode (const void *in,
350 350
351 351
352/** 352/**
353 * Encode into Base64url. RFC7515 353 * url/percent encode (RFC3986).
354 * 354 *
355 * @param in the data to encode 355 * @param data the data to decode
356 * @param len the length of the input 356 * @param len the length of the input
357 * @param output where to write the output (*output should be NULL, 357 * @param output where to write the output (*output should be NULL,
358 * is allocated) 358 * is allocated)
359 * @return the size of the output 359 * @return the size of the output
360 */ 360 */
361size_t 361size_t
362GNUNET_STRINGS_base64url_encode (const void *in, size_t len, char **output); 362GNUNET_STRINGS_urlencode (const char *data, size_t len, char **out);
363
364/**
365 * Decode from Base64url. RFC7515
366 *
367 * @param data the data to decode
368 * @param len the length of the input
369 * @param output where to write the output (*output should be NULL,
370 * is allocated)
371 * @return the size of the output
372 */
373size_t
374GNUNET_STRINGS_base64url_decode (const char *data, size_t len, void **out);
363 375
364 376
365/** 377/**
@@ -389,6 +401,18 @@ GNUNET_STRINGS_base64_decode (const char *data,
389size_t 401size_t
390GNUNET_STRINGS_base64url_decode (const char *data, size_t len, void **out); 402GNUNET_STRINGS_base64url_decode (const char *data, size_t len, void **out);
391 403
404/**
405 * url/percent encode (RFC3986).
406 *
407 * @param data the data to encode
408 * @param len the length of the input
409 * @param output where to write the output (*output should be NULL,
410 * is allocated)
411 * @return the size of the output
412 */
413size_t
414GNUNET_STRINGS_urldecode (const char *data, size_t len, char **out);
415
392 416
393/** 417/**
394 * Convert a peer path to a human-readable string. 418 * Convert a peer path to a human-readable string.
diff --git a/src/util/strings.c b/src/util/strings.c
index 41180dd71..d5e2f4878 100644
--- a/src/util/strings.c
+++ b/src/util/strings.c
@@ -29,6 +29,7 @@
29#include <iconv.h> 29#include <iconv.h>
30#endif 30#endif
31#include "gnunet_crypto_lib.h" 31#include "gnunet_crypto_lib.h"
32#include "gnunet_buffer_lib.h"
32#include "gnunet_strings_lib.h" 33#include "gnunet_strings_lib.h"
33#include <unicase.h> 34#include <unicase.h>
34#include <unistr.h> 35#include <unistr.h>
@@ -2088,4 +2089,161 @@ GNUNET_STRINGS_base64url_decode (const char *data, size_t len, void **out)
2088} 2089}
2089 2090
2090 2091
2092/**
2093 * url/percent encode (RFC3986).
2094 *
2095 * @param data the data to encode
2096 * @param len the length of the input
2097 * @param output where to write the output (*output should be NULL,
2098 * is allocated)
2099 * @return the size of the output
2100 */
2101size_t
2102GNUNET_STRINGS_urldecode (const char *data, size_t len, char **out)
2103{
2104 const char *rpos = data;
2105 *out = GNUNET_malloc (len + 1); /* output should always fit into input */
2106 char *wpos = *out;
2107 size_t resl = 0;
2108
2109 while ('\0' != *rpos)
2110 {
2111 unsigned int num;
2112 switch (*rpos)
2113 {
2114 case '%':
2115 if (1 != sscanf (rpos + 1, "%2x", &num))
2116 break;
2117 *wpos = (char) ((unsigned char) num);
2118 wpos++;
2119 resl++;
2120 rpos += 3;
2121 break;
2122 /* TODO: add bad sequence handling */
2123 /* intentional fall through! */
2124 default:
2125 *wpos = *rpos;
2126 wpos++;
2127 resl++;
2128 rpos++;
2129 }
2130 }
2131 *wpos = '\0'; /* add 0-terminator */
2132 return resl;
2133}
2134
2135
2136/**
2137 * url/percent encode (RFC3986).
2138 *
2139 * @param data the data to decode
2140 * @param len the length of the input
2141 * @param output where to write the output (*output should be NULL,
2142 * is allocated)
2143 * @return the size of the output
2144 */
2145size_t
2146GNUNET_STRINGS_urlencode (const char *data, size_t len, char **out)
2147{
2148 struct GNUNET_Buffer buf = { 0 };
2149 const uint8_t *i8 = (uint8_t *) data;
2150
2151 while (0 != *i8)
2152 {
2153 if (0 == (0x80 & *i8))
2154 {
2155 /* traditional ASCII */
2156 if (isalnum (*i8) || (*i8 == '-') || (*i8 == '_') || (*i8 == '.') ||
2157 (*i8 == '~') )
2158 GNUNET_buffer_write (&buf, (const char*) i8, 1);
2159 else if (*i8 == ' ')
2160 GNUNET_buffer_write (&buf, "+", 1);
2161 else
2162 GNUNET_buffer_write_fstr (&buf,
2163 "%%%X%X",
2164 *i8 >> 4,
2165 *i8 & 15);
2166 i8++;
2167 continue;
2168 }
2169 if (0x80 + 0x40 == ((0x80 + 0x40 + 0x20) & *i8))
2170 {
2171 /* 2-byte value, percent-encode */
2172 GNUNET_buffer_write_fstr (&buf,
2173 "%%%X%X",
2174 *i8 >> 4,
2175 *i8 & 15);
2176 i8++;
2177 GNUNET_buffer_write_fstr (&buf,
2178 "%%%X%X",
2179 *i8 >> 4,
2180 *i8 & 15);
2181 i8++;
2182 continue;
2183 }
2184 if (0x80 + 0x40 + 0x20 == ((0x80 + 0x40 + 0x20 + 0x10) & *i8))
2185 {
2186 /* 3-byte value, percent-encode */
2187 for (unsigned int i = 0; i<4; i++)
2188 {
2189 GNUNET_buffer_write_fstr (&buf,
2190 "%%%X%X",
2191 *i8 >> 4,
2192 *i8 & 15);
2193 i8++;
2194 }
2195 continue;
2196 }
2197 if (0x80 + 0x40 + 0x20 + 0x10 == ((0x80 + 0x40 + 0x20 + 0x10 + 0x08) & *i8))
2198 {
2199 /* 4-byte value, percent-encode */
2200 for (unsigned int i = 0; i<4; i++)
2201 {
2202 GNUNET_buffer_write_fstr (&buf,
2203 "%%%X%X",
2204 *i8 >> 4,
2205 *i8 & 15);
2206 i8++;
2207 }
2208 continue;
2209 }
2210 if (0x80 + 0x40 + 0x20 + 0x10 + 0x08 == ((0x80 + 0x40 + 0x20 + 0x10 + 0x08
2211 + 0x04) & *i8))
2212 {
2213 /* 5-byte value, percent-encode (outside of UTF-8 modern standard, but so what) */
2214 for (unsigned int i = 0; i<5; i++)
2215 {
2216 GNUNET_buffer_write_fstr (&buf,
2217 "%%%X%X",
2218 *i8 >> 4,
2219 *i8 & 15);
2220 i8++;
2221 }
2222 continue;
2223 }
2224 if (0x80 + 0x40 + 0x20 + 0x10 + 0x08 + 0x04 == ((0x80 + 0x40 + 0x20 + 0x10
2225 + 0x08 + 0x04 + 0x02)
2226 & *i8))
2227 {
2228 /* 6-byte value, percent-encode (outside of UTF-8 modern standard, but so what) */
2229 for (unsigned int i = 0; i<6; i++)
2230 {
2231 GNUNET_buffer_write_fstr (&buf,
2232 "%%%X%X",
2233 *i8 >> 4,
2234 *i8 & 15);
2235 i8++;
2236 }
2237 continue;
2238 }
2239 /* really, really invalid UTF-8: fail */
2240 GNUNET_break (0);
2241 GNUNET_buffer_clear (&buf);
2242 return 0;
2243 }
2244 *out = GNUNET_buffer_reap_str (&buf);
2245 return strlen (*out);
2246}
2247
2248
2091/* end of strings.c */ 2249/* end of strings.c */
diff --git a/src/util/test_strings.c b/src/util/test_strings.c
index 90d06a473..28b1bb6f8 100644
--- a/src/util/test_strings.c
+++ b/src/util/test_strings.c
@@ -39,6 +39,10 @@
39#define WANTB(a, b, l) if (0 != memcmp (a, b, l)) { GNUNET_break (0); return 1; \ 39#define WANTB(a, b, l) if (0 != memcmp (a, b, l)) { GNUNET_break (0); return 1; \
40} else { } 40} else { }
41 41
42#define URLENCODE_TEST_VECTOR_PLAIN "Asbjlaw=ljsdlasjd?aslkdsa"
43
44#define URLENCODE_TEST_VECTOR_ENCODED "Asbjlaw\%3Dljsdlasjd\%3Faslkdsa"
45
42int 46int
43main (int argc, char *argv[]) 47main (int argc, char *argv[])
44{ 48{
@@ -137,6 +141,16 @@ main (int argc, char *argv[])
137 GNUNET_STRINGS_fancy_time_to_relative ("15 m", &rtx)); 141 GNUNET_STRINGS_fancy_time_to_relative ("15 m", &rtx));
138 GNUNET_assert (rt.rel_value_us == rtx.rel_value_us); 142 GNUNET_assert (rt.rel_value_us == rtx.rel_value_us);
139 143
144 GNUNET_assert (0 != GNUNET_STRINGS_urlencode (URLENCODE_TEST_VECTOR_PLAIN,
145 strlen (URLENCODE_TEST_VECTOR_PLAIN),
146 &b));
147 WANT (URLENCODE_TEST_VECTOR_ENCODED, b);
148 GNUNET_free (b);
149 GNUNET_assert (0 != GNUNET_STRINGS_urldecode (URLENCODE_TEST_VECTOR_ENCODED,
150 strlen (URLENCODE_TEST_VECTOR_ENCODED),
151 &b));
152 WANT (URLENCODE_TEST_VECTOR_PLAIN, b);
153 GNUNET_free (b);
140 return 0; 154 return 0;
141} 155}
142 156