diff options
author | Martin Schanzenbach <mschanzenbach@posteo.de> | 2020-08-01 16:07:08 +0200 |
---|---|---|
committer | Martin Schanzenbach <mschanzenbach@posteo.de> | 2020-08-01 16:07:08 +0200 |
commit | 754d8c1b496624e5c879af7d142fc9fd34de3a21 (patch) | |
tree | c8fbe2e357c37de4dcabdaaae790fb5f83bdd40c /src | |
parent | 7f4ddbcab8598e3d5e29c23ce883cdfa664408f1 (diff) | |
download | gnunet-754d8c1b496624e5c879af7d142fc9fd34de3a21.tar.gz gnunet-754d8c1b496624e5c879af7d142fc9fd34de3a21.zip |
util: add percent/url encoding
Diffstat (limited to 'src')
-rw-r--r-- | src/include/gnunet_strings_lib.h | 30 | ||||
-rw-r--r-- | src/util/strings.c | 158 | ||||
-rw-r--r-- | src/util/test_strings.c | 14 |
3 files changed, 199 insertions, 3 deletions
diff --git a/src/include/gnunet_strings_lib.h b/src/include/gnunet_strings_lib.h index 663b44194..bd3ac9dbf 100644 --- a/src/include/gnunet_strings_lib.h +++ b/src/include/gnunet_strings_lib.h | |||
@@ -350,16 +350,28 @@ GNUNET_STRINGS_base64_encode (const void *in, | |||
350 | 350 | ||
351 | 351 | ||
352 | /** | 352 | /** |
353 | * Encode into Base64url. RFC7515 | 353 | * url/percent encode (RFC3986). |
354 | * | 354 | * |
355 | * @param in the data to encode | 355 | * @param data the data to decode |
356 | * @param len the length of the input | 356 | * @param len the length of the input |
357 | * @param output where to write the output (*output should be NULL, | 357 | * @param output where to write the output (*output should be NULL, |
358 | * is allocated) | 358 | * is allocated) |
359 | * @return the size of the output | 359 | * @return the size of the output |
360 | */ | 360 | */ |
361 | size_t | 361 | size_t |
362 | GNUNET_STRINGS_base64url_encode (const void *in, size_t len, char **output); | 362 | GNUNET_STRINGS_urlencode (const char *data, size_t len, char **out); |
363 | |||
364 | /** | ||
365 | * Decode from Base64url. RFC7515 | ||
366 | * | ||
367 | * @param data the data to decode | ||
368 | * @param len the length of the input | ||
369 | * @param output where to write the output (*output should be NULL, | ||
370 | * is allocated) | ||
371 | * @return the size of the output | ||
372 | */ | ||
373 | size_t | ||
374 | GNUNET_STRINGS_base64url_decode (const char *data, size_t len, void **out); | ||
363 | 375 | ||
364 | 376 | ||
365 | /** | 377 | /** |
@@ -389,6 +401,18 @@ GNUNET_STRINGS_base64_decode (const char *data, | |||
389 | size_t | 401 | size_t |
390 | GNUNET_STRINGS_base64url_decode (const char *data, size_t len, void **out); | 402 | GNUNET_STRINGS_base64url_decode (const char *data, size_t len, void **out); |
391 | 403 | ||
404 | /** | ||
405 | * url/percent encode (RFC3986). | ||
406 | * | ||
407 | * @param data the data to encode | ||
408 | * @param len the length of the input | ||
409 | * @param output where to write the output (*output should be NULL, | ||
410 | * is allocated) | ||
411 | * @return the size of the output | ||
412 | */ | ||
413 | size_t | ||
414 | GNUNET_STRINGS_urldecode (const char *data, size_t len, char **out); | ||
415 | |||
392 | 416 | ||
393 | /** | 417 | /** |
394 | * Convert a peer path to a human-readable string. | 418 | * Convert a peer path to a human-readable string. |
diff --git a/src/util/strings.c b/src/util/strings.c index 41180dd71..d5e2f4878 100644 --- a/src/util/strings.c +++ b/src/util/strings.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include <iconv.h> | 29 | #include <iconv.h> |
30 | #endif | 30 | #endif |
31 | #include "gnunet_crypto_lib.h" | 31 | #include "gnunet_crypto_lib.h" |
32 | #include "gnunet_buffer_lib.h" | ||
32 | #include "gnunet_strings_lib.h" | 33 | #include "gnunet_strings_lib.h" |
33 | #include <unicase.h> | 34 | #include <unicase.h> |
34 | #include <unistr.h> | 35 | #include <unistr.h> |
@@ -2088,4 +2089,161 @@ GNUNET_STRINGS_base64url_decode (const char *data, size_t len, void **out) | |||
2088 | } | 2089 | } |
2089 | 2090 | ||
2090 | 2091 | ||
2092 | /** | ||
2093 | * url/percent encode (RFC3986). | ||
2094 | * | ||
2095 | * @param data the data to encode | ||
2096 | * @param len the length of the input | ||
2097 | * @param output where to write the output (*output should be NULL, | ||
2098 | * is allocated) | ||
2099 | * @return the size of the output | ||
2100 | */ | ||
2101 | size_t | ||
2102 | GNUNET_STRINGS_urldecode (const char *data, size_t len, char **out) | ||
2103 | { | ||
2104 | const char *rpos = data; | ||
2105 | *out = GNUNET_malloc (len + 1); /* output should always fit into input */ | ||
2106 | char *wpos = *out; | ||
2107 | size_t resl = 0; | ||
2108 | |||
2109 | while ('\0' != *rpos) | ||
2110 | { | ||
2111 | unsigned int num; | ||
2112 | switch (*rpos) | ||
2113 | { | ||
2114 | case '%': | ||
2115 | if (1 != sscanf (rpos + 1, "%2x", &num)) | ||
2116 | break; | ||
2117 | *wpos = (char) ((unsigned char) num); | ||
2118 | wpos++; | ||
2119 | resl++; | ||
2120 | rpos += 3; | ||
2121 | break; | ||
2122 | /* TODO: add bad sequence handling */ | ||
2123 | /* intentional fall through! */ | ||
2124 | default: | ||
2125 | *wpos = *rpos; | ||
2126 | wpos++; | ||
2127 | resl++; | ||
2128 | rpos++; | ||
2129 | } | ||
2130 | } | ||
2131 | *wpos = '\0'; /* add 0-terminator */ | ||
2132 | return resl; | ||
2133 | } | ||
2134 | |||
2135 | |||
2136 | /** | ||
2137 | * url/percent encode (RFC3986). | ||
2138 | * | ||
2139 | * @param data the data to decode | ||
2140 | * @param len the length of the input | ||
2141 | * @param output where to write the output (*output should be NULL, | ||
2142 | * is allocated) | ||
2143 | * @return the size of the output | ||
2144 | */ | ||
2145 | size_t | ||
2146 | GNUNET_STRINGS_urlencode (const char *data, size_t len, char **out) | ||
2147 | { | ||
2148 | struct GNUNET_Buffer buf = { 0 }; | ||
2149 | const uint8_t *i8 = (uint8_t *) data; | ||
2150 | |||
2151 | while (0 != *i8) | ||
2152 | { | ||
2153 | if (0 == (0x80 & *i8)) | ||
2154 | { | ||
2155 | /* traditional ASCII */ | ||
2156 | if (isalnum (*i8) || (*i8 == '-') || (*i8 == '_') || (*i8 == '.') || | ||
2157 | (*i8 == '~') ) | ||
2158 | GNUNET_buffer_write (&buf, (const char*) i8, 1); | ||
2159 | else if (*i8 == ' ') | ||
2160 | GNUNET_buffer_write (&buf, "+", 1); | ||
2161 | else | ||
2162 | GNUNET_buffer_write_fstr (&buf, | ||
2163 | "%%%X%X", | ||
2164 | *i8 >> 4, | ||
2165 | *i8 & 15); | ||
2166 | i8++; | ||
2167 | continue; | ||
2168 | } | ||
2169 | if (0x80 + 0x40 == ((0x80 + 0x40 + 0x20) & *i8)) | ||
2170 | { | ||
2171 | /* 2-byte value, percent-encode */ | ||
2172 | GNUNET_buffer_write_fstr (&buf, | ||
2173 | "%%%X%X", | ||
2174 | *i8 >> 4, | ||
2175 | *i8 & 15); | ||
2176 | i8++; | ||
2177 | GNUNET_buffer_write_fstr (&buf, | ||
2178 | "%%%X%X", | ||
2179 | *i8 >> 4, | ||
2180 | *i8 & 15); | ||
2181 | i8++; | ||
2182 | continue; | ||
2183 | } | ||
2184 | if (0x80 + 0x40 + 0x20 == ((0x80 + 0x40 + 0x20 + 0x10) & *i8)) | ||
2185 | { | ||
2186 | /* 3-byte value, percent-encode */ | ||
2187 | for (unsigned int i = 0; i<4; i++) | ||
2188 | { | ||
2189 | GNUNET_buffer_write_fstr (&buf, | ||
2190 | "%%%X%X", | ||
2191 | *i8 >> 4, | ||
2192 | *i8 & 15); | ||
2193 | i8++; | ||
2194 | } | ||
2195 | continue; | ||
2196 | } | ||
2197 | if (0x80 + 0x40 + 0x20 + 0x10 == ((0x80 + 0x40 + 0x20 + 0x10 + 0x08) & *i8)) | ||
2198 | { | ||
2199 | /* 4-byte value, percent-encode */ | ||
2200 | for (unsigned int i = 0; i<4; i++) | ||
2201 | { | ||
2202 | GNUNET_buffer_write_fstr (&buf, | ||
2203 | "%%%X%X", | ||
2204 | *i8 >> 4, | ||
2205 | *i8 & 15); | ||
2206 | i8++; | ||
2207 | } | ||
2208 | continue; | ||
2209 | } | ||
2210 | if (0x80 + 0x40 + 0x20 + 0x10 + 0x08 == ((0x80 + 0x40 + 0x20 + 0x10 + 0x08 | ||
2211 | + 0x04) & *i8)) | ||
2212 | { | ||
2213 | /* 5-byte value, percent-encode (outside of UTF-8 modern standard, but so what) */ | ||
2214 | for (unsigned int i = 0; i<5; i++) | ||
2215 | { | ||
2216 | GNUNET_buffer_write_fstr (&buf, | ||
2217 | "%%%X%X", | ||
2218 | *i8 >> 4, | ||
2219 | *i8 & 15); | ||
2220 | i8++; | ||
2221 | } | ||
2222 | continue; | ||
2223 | } | ||
2224 | if (0x80 + 0x40 + 0x20 + 0x10 + 0x08 + 0x04 == ((0x80 + 0x40 + 0x20 + 0x10 | ||
2225 | + 0x08 + 0x04 + 0x02) | ||
2226 | & *i8)) | ||
2227 | { | ||
2228 | /* 6-byte value, percent-encode (outside of UTF-8 modern standard, but so what) */ | ||
2229 | for (unsigned int i = 0; i<6; i++) | ||
2230 | { | ||
2231 | GNUNET_buffer_write_fstr (&buf, | ||
2232 | "%%%X%X", | ||
2233 | *i8 >> 4, | ||
2234 | *i8 & 15); | ||
2235 | i8++; | ||
2236 | } | ||
2237 | continue; | ||
2238 | } | ||
2239 | /* really, really invalid UTF-8: fail */ | ||
2240 | GNUNET_break (0); | ||
2241 | GNUNET_buffer_clear (&buf); | ||
2242 | return 0; | ||
2243 | } | ||
2244 | *out = GNUNET_buffer_reap_str (&buf); | ||
2245 | return strlen (*out); | ||
2246 | } | ||
2247 | |||
2248 | |||
2091 | /* end of strings.c */ | 2249 | /* end of strings.c */ |
diff --git a/src/util/test_strings.c b/src/util/test_strings.c index 90d06a473..28b1bb6f8 100644 --- a/src/util/test_strings.c +++ b/src/util/test_strings.c | |||
@@ -39,6 +39,10 @@ | |||
39 | #define WANTB(a, b, l) if (0 != memcmp (a, b, l)) { GNUNET_break (0); return 1; \ | 39 | #define WANTB(a, b, l) if (0 != memcmp (a, b, l)) { GNUNET_break (0); return 1; \ |
40 | } else { } | 40 | } else { } |
41 | 41 | ||
42 | #define URLENCODE_TEST_VECTOR_PLAIN "Asbjlaw=ljsdlasjd?aslkdsa" | ||
43 | |||
44 | #define URLENCODE_TEST_VECTOR_ENCODED "Asbjlaw\%3Dljsdlasjd\%3Faslkdsa" | ||
45 | |||
42 | int | 46 | int |
43 | main (int argc, char *argv[]) | 47 | main (int argc, char *argv[]) |
44 | { | 48 | { |
@@ -137,6 +141,16 @@ main (int argc, char *argv[]) | |||
137 | GNUNET_STRINGS_fancy_time_to_relative ("15 m", &rtx)); | 141 | GNUNET_STRINGS_fancy_time_to_relative ("15 m", &rtx)); |
138 | GNUNET_assert (rt.rel_value_us == rtx.rel_value_us); | 142 | GNUNET_assert (rt.rel_value_us == rtx.rel_value_us); |
139 | 143 | ||
144 | GNUNET_assert (0 != GNUNET_STRINGS_urlencode (URLENCODE_TEST_VECTOR_PLAIN, | ||
145 | strlen (URLENCODE_TEST_VECTOR_PLAIN), | ||
146 | &b)); | ||
147 | WANT (URLENCODE_TEST_VECTOR_ENCODED, b); | ||
148 | GNUNET_free (b); | ||
149 | GNUNET_assert (0 != GNUNET_STRINGS_urldecode (URLENCODE_TEST_VECTOR_ENCODED, | ||
150 | strlen (URLENCODE_TEST_VECTOR_ENCODED), | ||
151 | &b)); | ||
152 | WANT (URLENCODE_TEST_VECTOR_PLAIN, b); | ||
153 | GNUNET_free (b); | ||
140 | return 0; | 154 | return 0; |
141 | } | 155 | } |
142 | 156 | ||