From 49d74db2e9bd8418f15b560e36e1f27661f65361 Mon Sep 17 00:00:00 2001 From: Jonathan Buchanan Date: Wed, 26 Aug 2020 14:49:02 -0400 Subject: use (and "GNUnet-ify") libyuarel as a basepoint for uri parsing --- src/include/gnunet_uri_lib.h | 170 +++++---- src/util/.gitignore | 3 +- src/util/Makefile.am | 7 + src/util/test_uri.c | 837 +++++++++++++++++++++++++++++++++++++++++++ src/util/uri.c | 344 ++++++++++++++++++ 5 files changed, 1288 insertions(+), 73 deletions(-) create mode 100644 src/util/test_uri.c create mode 100644 src/util/uri.c diff --git a/src/include/gnunet_uri_lib.h b/src/include/gnunet_uri_lib.h index 48db0ac85..e5f144591 100644 --- a/src/include/gnunet_uri_lib.h +++ b/src/include/gnunet_uri_lib.h @@ -1,96 +1,122 @@ -/* - This file is part of GNUnet - Copyright (C) 2020 GNUnet e.V. - - GNUnet is free software: you can redistribute it and/or modify it - under the terms of the GNU Affero General Public License as published - by the Free Software Foundation, either version 3 of the License, - or (at your option) any later version. - - GNUnet is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see . - - SPDX-License-Identifier: AGPL3.0-or-later - */ /** - * @file include/gnunet_uri_lib.h - * @brief generic parser for URIs - * @author Jonathan Buchanan + * Copyright (C) 2016 Jack Engqvist Johansson + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. */ - #ifndef GNUNET_URI_LIB_H #define GNUNET_URI_LIB_H + /** - * A Universal Resource Identifier (URI). + * The struct where the parsed values will be stored: + * + * scheme ":" [ "//" ] [ username ":" password "@" ] host [ ":" port ] [ "/" ] [ path ] [ "?" query ] + * + * Note: to make sure that no strings are copied, the first slash "/" in the + * path will be used to null terminate the hostname if no port is supplied. */ -struct GNUNET_Uri -{ - /** - * The scheme of the uri. - */ - char *scheme; - - - /** - * The authority of the uri. If not present in the uri, NULL. - */ - char *authority; - - - /** - * The list of path segments in the URI. Note that if the path ends with a - * '/', then this array will end with an empty string to indicate the empty - * segment following the '/'. - */ - char **path_segments; - - - /** - * The length of @e path_segments. - */ - unsigned int path_segments_count; +struct GNUNET_uri { + char *scheme; /* scheme, without ":" and "//" */ + char *username; /* username, default: NULL */ + char *password; /* password, default: NULL */ + char *host; /* hostname or IP address */ + int port; /* port, default: 0 */ + char *path; /* path, without leading "/", default: NULL */ + char *query; /* query, default: NULL */ + char *fragment; /* fragment, default: NULL */ +}; - /** - * The query of the uri. If not present in the uri, NULL. - */ - char *query; +/* A struct to hold the query string parameter values. */ +struct GNUNET_uri_param { + char *key; + char *val; +}; - /** - * The fragment of the uri. If not present in the uri, NULL. - */ - char *fragment; -}; +/** + * Parse a URL to a struct. + * + * The URL string should be in one of the following formats: + * + * Absolute URL: + * scheme ":" [ "//" ] [ username ":" password "@" ] host [ ":" port ] [ "/" ] [ path ] [ "?" query ] [ "#" fragment ] + * + * Relative URL: + * path [ "?" query ] [ "#" fragment ] + * + * The following parts will be parsed to the corresponding struct member. + * + * *url: a pointer to the struct where to store the parsed values. + * *url_str: a pointer to the url to be parsed (null terminated). The string + * will be modified. + * + * Returns 0 on success, otherwise -1. + */ +int +GNUNET_uri_parse (struct GNUNET_uri *url, + char *url_str); /** - * Parse a URI from a string into an internal representation. + * Split a path into several strings. + * + * No data is copied, the slashed are used as null terminators and then + * pointers to each path part will be stored in **parts. Double slashes will be + * treated as one. + * + * *path: the path to split. The string will be modified. + * **parts: a pointer to an array of (char *) where to store the result. + * max_parts: max number of parts to parse. * - * @param uri string to parse - * @param emsg where to store the parser error message (if any) - * @return handle to the internal representation of the URI, or NULL on error + * Returns the number of parsed items. -1 on error. */ -struct GNUNET_Uri * -GNUNET_uri_parse (const char *uri, - char **emsg); +int +GNUNET_uri_split_path (char *path, + char **parts, + int max_parts); /** - * Free URI. + * Parse a query string into a key/value struct. * - * @param uri uri to free + * The query string should be a null terminated string of parameters separated by + * a delimiter. Each parameter are checked for the equal sign character. If it + * appears in the parameter, it will be used as a null terminator and the part + * that comes after it will be the value of the parameter. + * + * No data are copied, the equal sign and delimiters are used as null + * terminators and then pointers to each parameter key and value will be stored + * in the yuarel_param struct. + * + * *query: the query string to parse. The string will be modified. + * delimiter: the character that separates the key/value pairs from eachother. + * *params: an array of (struct yuarel_param) where to store the result. + * max_values: max number of parameters to parse. + * + * Returns the number of parsed items. -1 on error. */ -void -GNUNET_uri_destroy (struct GNUNET_Uri *uri); +int +GNUNET_uri_parse_query (char *query, + char delimiter, + struct GNUNET_uri_param *params, + int max_params); #endif /* GNUNET_URI_LIB_H */ - -/* end of include/gnunet_uri_lib.h */ diff --git a/src/util/.gitignore b/src/util/.gitignore index 05f187869..8556ee7b8 100644 --- a/src/util/.gitignore +++ b/src/util/.gitignore @@ -71,9 +71,10 @@ perf_crypto_hash perf_crypto_symmetric perf_crypto_rsa perf_crypto_ecc_dlog -test_hexcoder +test_hexcoder test_regex test_tun +test_uri gnunet-timeout python27_location perf_malloc diff --git a/src/util/Makefile.am b/src/util/Makefile.am index 83b3b9c3d..c5059bbb1 100644 --- a/src/util/Makefile.am +++ b/src/util/Makefile.am @@ -96,6 +96,7 @@ libgnunetutil_la_SOURCES = \ strings.c \ time.c \ tun.c \ + uri.c \ speedup.c speedup.h \ proc_compat.c @@ -302,6 +303,7 @@ check_PROGRAMS = \ test_speedup \ test_time \ test_tun \ + test_uri \ $(BENCHMARKS) \ test_os_start_process \ test_common_logging_runtime_loglevels @@ -573,6 +575,11 @@ test_speedup_SOURCES = \ test_speedup_LDADD = \ libgnunetutil.la +test_uri_SOURCES = \ + test_uri.c +test_uri_LDADD = \ + libgnunetutil.la + perf_crypto_hash_SOURCES = \ perf_crypto_hash.c perf_crypto_hash_LDADD = \ diff --git a/src/util/test_uri.c b/src/util/test_uri.c new file mode 100644 index 000000000..f9e38383a --- /dev/null +++ b/src/util/test_uri.c @@ -0,0 +1,837 @@ +#include +#include +#include +#include "gnunet_uri_lib.h" + +#define KNRM "\x1B[0m" +#define KBLU "\x1B[34m" +#define KGRN "\x1B[32m" +#define KERR "\x1B[5;31;50m" + +/* macro to print out the header for a new group of tests */ +#define mu_group(name) printf ("%s • %s%s\n", KBLU, name, KNRM) + +/* macro for asserting a statement */ +#define mu_assert(message, test) do { \ + if (!(test)) { \ + printf ("\t%s× %s%s\n", KERR, message, KNRM); \ + return message; \ + } \ + printf ("\t%s• %s%s\n", KGRN, message, KNRM); \ + } while (0) + +/* macro for asserting a statement without printing it unless it is a failure */ +#define mu_silent_assert(message, test) do { \ + if (!(test)) { \ + printf ("\t\t%s× %s%s\n", KERR, message, KNRM); \ + return message; \ + } \ + } while (0) + +/* run a test function and return result */ +#define mu_run_test(test) do { \ + char *message = test (); tests_run++; \ + if (message) { return message; } \ + } while (0) + + +int tests_run; + +static int +strcmp_wrap (const char *str, + const char *str2) +{ + if (NULL == str && NULL == str2) { + return 0; + } + if (NULL == str) { + return 1; + } + if (NULL == str2) { + return -1; + } + + return strcmp (str, str2); +} + +#define assert_struct(as_url, \ + as_scheme, \ + as_user, \ + as_pass, \ + as_host, \ + as_port, \ + as_path, \ + as_query, \ + as_fragment) \ + mu_silent_assert ("should set the scheme attribute correctly", \ + 0 == strcmp_wrap (as_url.scheme, as_scheme)); \ + mu_silent_assert ("should set the username attribute correctly", \ + 0 == strcmp_wrap (as_url.username, as_user)); \ + mu_silent_assert ("should set the password attribute correctly", \ + 0 == strcmp_wrap (as_url.password, as_pass)); \ + mu_silent_assert ("should set the host attribute correctly", \ + 0 == strcmp_wrap (as_url.host, as_host)); \ + mu_silent_assert ("should set the port attribute correctly", \ + as_port == as_url.port); \ + mu_silent_assert ("should set the path attribute correctly", \ + 0 == strcmp_wrap (as_url.path, as_path)); \ + mu_silent_assert ("should set the query attribute correctly", \ + 0 == strcmp_wrap (as_url.query, as_query)); \ + mu_silent_assert ("should set the fragment attribute correctly", \ + 0 == strcmp_wrap (as_url.fragment, as_fragment)); + +static char * +test_parse_http_url_ok (void) +{ + int rc; + struct GNUNET_uri url; + char *url_string; + + /* Minimal URL */ + url_string = strdup ("http://example.com"); + rc = GNUNET_uri_parse (&url, + url_string); + mu_assert ("minimal HTTP URL", -1 != rc); + assert_struct (url, + "http", + NULL, + NULL, + "example.com", + 0, + NULL, + NULL, + NULL); + free (url_string); + + /* With path (/) */ + url_string = strdup ("http://example.com/"); + rc = GNUNET_uri_parse (&url, + url_string); + mu_assert ("with path ('/')", -1 != rc); + assert_struct (url, + "http", + NULL, + NULL, + "example.com", + 0, + "", + NULL, + NULL); + free (url_string); + + /* With path */ + url_string = strdup ("http://example.com/path"); + rc = GNUNET_uri_parse (&url, + url_string); + mu_assert ("with path ('/path')", -1 != rc); + assert_struct (url, + "http", + NULL, + NULL, + "example.com", + 0, + "path", + NULL, + NULL); + free (url_string); + + /* With port */ + url_string = strdup ("http://example.com:80"); + rc = GNUNET_uri_parse (&url, + url_string); + mu_assert ("with port only", + -1 != rc); + assert_struct (url, + "http", + NULL, + NULL, + "example.com", + 80, + NULL, + NULL, + NULL); + free (url_string); + + /* With query */ + url_string = strdup ("http://example.com?query=only"); + rc = GNUNET_uri_parse (&url, + url_string); + mu_assert ("with query only", + -1 != rc); + assert_struct (url, + "http", + NULL, + NULL, + "example.com", + 0, + NULL, + "query=only", + NULL); + free (url_string); + + /* With fragment */ + url_string = strdup ("http://example.com#frag=f1"); + rc = GNUNET_uri_parse (&url, + url_string); + mu_assert ("with fragment only", + -1 != rc); + assert_struct (url, + "http", + NULL, + NULL, + "example.com", + 0, + NULL, + NULL, + "frag=f1"); + free (url_string); + + /* With credentials */ + url_string = strdup ("http://u:p@example.com"); + rc = GNUNET_uri_parse (&url, + url_string); + mu_assert ("with credentials only", + -1 != rc); + assert_struct (url, + "http", + "u", + "p", + "example.com", + 0, + NULL, + NULL, + NULL); + free (url_string); + + /* With port and path */ + url_string = strdup ("http://example.com:8080/port/and/path"); + rc = GNUNET_uri_parse (&url, + url_string); + mu_assert ("with port and path", + -1 != rc); + assert_struct (url, + "http", + NULL, + NULL, + "example.com", + 8080, + "port/and/path", + NULL, + NULL); + free (url_string); + + /* With port and query */ + url_string = strdup ("http://example.com:8080?query=portANDquery"); + rc = GNUNET_uri_parse (&url, + url_string); + mu_assert ("with port and query", + -1 != rc); + assert_struct (url, + "http", + NULL, + NULL, + "example.com", + 8080, + NULL, + "query=portANDquery", + NULL); + free (url_string); + + /* With port and fragment */ + url_string = strdup ("http://example.com:8080#f1"); + rc = GNUNET_uri_parse (&url, + url_string); + mu_assert ("with port and fragment", + -1 != rc); + assert_struct (url, + "http", + NULL, + NULL, + "example.com", + 8080, + NULL, + NULL, + "f1"); + free (url_string); + + /* With port and credentials */ + url_string = strdup ("http://u:p@example.com:8080"); + rc = GNUNET_uri_parse (&url, + url_string); + mu_assert ("with port and credentials", + -1 != rc); + assert_struct (url, + "http", + "u", + "p", + "example.com", + 8080, + NULL, + NULL, + NULL); + free (url_string); + + /* With path and query */ + url_string = strdup ("http://example.com/path/and/query?q=yes"); + rc = GNUNET_uri_parse (&url, + url_string); + mu_assert ("with path and query", + -1 != rc); + assert_struct (url, + "http", + NULL, + NULL, + "example.com", + 0, + "path/and/query", + "q=yes", + NULL); + free (url_string); + + /* With path and fragment */ + url_string = strdup ("http://example.com/path/and#fragment"); + rc = GNUNET_uri_parse (&url, + url_string); + mu_assert ("with path and fragment", + -1 != rc); + assert_struct (url, + "http", + NULL, + NULL, + "example.com", + 0, + "path/and", + NULL, + "fragment"); + free (url_string); + + /* With query and fragment */ + url_string = strdup ("http://example.com?q=yes#f1"); + rc = GNUNET_uri_parse (&url, + url_string); + mu_assert ("with query and fragment", + -1 != rc); + assert_struct (url, + "http", + NULL, + NULL, + "example.com", + 0, + NULL, + "q=yes", + "f1"); + free (url_string); + + /* With query and credentials */ + url_string = strdup ("http://u:p@example.com?q=yes"); + rc = GNUNET_uri_parse (&url, + url_string); + mu_assert ("with query and credentials", + -1 != rc); + assert_struct (url, + "http", + "u", + "p", + "example.com", + 0, + NULL, + "q=yes", + NULL); + free (url_string); + + /* With empty credentials */ + url_string = strdup ("http://:@example.com"); + rc = GNUNET_uri_parse (&url, + url_string); + mu_assert ("with empty credentials", + -1 != rc); + assert_struct (url, + "http", + "", + "", + "example.com", + 0, + NULL, + NULL, + NULL); + free (url_string); + + /* With empty credentials and port */ + url_string = strdup ("http://:@example.com:89"); + rc = GNUNET_uri_parse (&url, + url_string); + mu_assert ("with empty credentials and port", + -1 != rc); + assert_struct (url, + "http", + "", + "", + "example.com", + 89, + NULL, + NULL, + NULL); + free (url_string); + + /* Full URL */ + url_string = strdup ("https://jack:password@localhost:8989/path/to/test?query=yes&q=jack#fragment1"); + rc = GNUNET_uri_parse (&url, + url_string); + mu_assert ("with port, path and query", + -1 != rc); + assert_struct (url, + "https", + "jack", + "password", + "localhost", + 8989, + "path/to/test", + "query=yes&q=jack", + "fragment1"); + free (url_string); + + return NULL; +} + +static char * +test_parse_http_rel_url_ok (void) +{ + int rc; + struct GNUNET_uri url; + char *url_string; + + /* Minimal relative URL */ + url_string = strdup ("/"); + rc = GNUNET_uri_parse (&url, + url_string); + mu_assert ("minimal relative URL", + -1 != rc); + assert_struct (url, + NULL, + NULL, + NULL, + NULL, + 0, + "", + NULL, + NULL); + free (url_string); + + /* Path only */ + url_string = strdup ("/hejsan"); + rc = GNUNET_uri_parse (&url, + url_string); + mu_assert ("path only", + -1 != rc); + assert_struct (url, + NULL, + NULL, + NULL, + NULL, + 0, + "hejsan", + NULL, + NULL); + free (url_string); + + /* Path and query */ + url_string = strdup ("/hejsan?q=yes"); + rc = GNUNET_uri_parse (&url, + url_string); + mu_assert ("path only", + -1 != rc); + assert_struct (url, + NULL, + NULL, + NULL, + NULL, + 0, + "hejsan", + "q=yes", + NULL); + free (url_string); + + /* Path and fragment */ + url_string = strdup ("/hejsan#fragment"); + rc = GNUNET_uri_parse (&url, + url_string); + mu_assert ("path and fragment", + -1 != rc); + assert_struct (url, + NULL, + NULL, + NULL, + NULL, + 0, + "hejsan", + NULL, + "fragment"); + free (url_string); + + /* Path, query and fragment */ + url_string = strdup ("/?q=yes&q2=no#fragment"); + rc = GNUNET_uri_parse (&url, + url_string); + mu_assert ("path, query and fragment", + -1 != rc); + assert_struct (url, + NULL, + NULL, + NULL, + NULL, + 0, + "", + "q=yes&q2=no", + "fragment"); + free (url_string); + + return NULL; +} + +static char * +test_parse_url_fail (void) +{ + int rc; + struct GNUNET_uri url; + char *url_string; + + /* Empty */ + url_string = strdup (""); + rc = GNUNET_uri_parse (&url, + url_string); + mu_assert ("empty string should return -1", + -1 == rc); + free (url_string); + + /* Scheme only */ + url_string = strdup ("rtsp://"); + rc = GNUNET_uri_parse (&url, + url_string); + mu_assert ("scheme only should return -1", + -1 == rc); + free (url_string); + + /* Hostname only */ + url_string = strdup ("hostname"); + rc = GNUNET_uri_parse (&url, + url_string); + mu_assert ("hostname only should return -1", + -1 == rc); + free (url_string); + + /* Query only */ + url_string = strdup ("?query=only"); + rc = GNUNET_uri_parse (&url, + url_string); + mu_assert ("query only should return -1", + -1 == rc); + free (url_string); + + /* Missing scheme */ + url_string = strdup ("://"); + rc = GNUNET_uri_parse (&url, + url_string); + mu_assert ("missing scheme should return -1", + -1 == rc); + free (url_string); + + /* Missing hostname */ + url_string = strdup ("rtsp://:8910/path"); + rc = GNUNET_uri_parse (&url, + url_string); + mu_assert ("missing hostname should return -1", + -1 == rc); + free (url_string); + + /* Missing credentials */ + url_string = strdup ("rtsp://@hostname:8910/path"); + rc = GNUNET_uri_parse (&url, + url_string); + mu_assert ("missing credentials should return -1", + -1 == rc); + free (url_string); + + return NULL; +} + +static char * +test_split_path_ok (void) +{ + int rc; + char *path; + char *parts[10]; + + /* Simple path */ + path = strdup ("/this/is/a/path"); + rc = GNUNET_uri_split_path (path, + parts, + 10); + mu_assert ("should be able to parse a regular path", + 4 == rc); + mu_silent_assert ("first part should be 'this'", + 0 == strcmp ("this", parts[0])); + mu_silent_assert ("second part should be 'is'", + 0 == strcmp ("is", parts[1])); + mu_silent_assert ("third part should be 'a'", + 0 == strcmp ("a", parts[2])); + mu_silent_assert ("fourth part should be 'path'", + 0 == strcmp ("path", parts[3])); + free (path); + + /* Relative path */ + path = strdup ("this/is/a/path"); + rc = GNUNET_uri_split_path (path, + parts, + 10); + mu_assert ("should be able to parse a relative path", + 4 == rc); + mu_silent_assert ("first part should be 'this'", + 0 == strcmp ("this", parts[0])); + mu_silent_assert ("second part should be 'is'", + 0 == strcmp ("is", parts[1])); + mu_silent_assert ("third part should be 'a'", + 0 == strcmp ("a", parts[2])); + mu_silent_assert ("fourth part should be 'path'", + 0 == strcmp ("path", parts[3])); + free (path); + + /* Path with empty parts */ + path = strdup ("//this//is/a/path/"); + rc = GNUNET_uri_split_path (path, + parts, + 10); + mu_assert ("should treat multiple slashes as one", + 4 == rc); + mu_silent_assert ("first part should be 'this'", + 0 == strcmp("this", parts[0])); + mu_silent_assert ("second part should be 'is'", + 0 == strcmp("is", parts[1])); + mu_silent_assert ("third part should be 'a'", + 0 == strcmp("a", parts[2])); + mu_silent_assert ("fourth part should be 'path'", + 0 == strcmp("path", parts[3])); + free (path); + + /* Just one level */ + path = strdup("/one_level"); + rc = GNUNET_uri_split_path(path, parts, 10); + mu_assert("should be able to parse a path with one level", 1 == rc); + mu_silent_assert("first part should be 'this'", 0 == strcmp("one_level", parts[0])); + free(path); + + return NULL; +} + +static char * +test_parse_query_ok (void) +{ + int rc; + char *q; + struct GNUNET_uri_param params[10]; + + /* One param query */ + q = strdup ("q=yes"); + rc = GNUNET_uri_parse_query (q, + '&', + params, + 10); + mu_assert ("single parameter with value", + 1 == rc); + mu_silent_assert ("first param key should be 'q'", + 0 == strcmp ("q", params[0].key)); + mu_silent_assert ("first param val should be 'yes'", + 0 == strcmp ("yes", params[0].val)); + free (q); + + /* One param query without value */ + q = strdup ("q"); + rc = GNUNET_uri_parse_query (q, + '&', + params, + 10); + mu_assert ("single parameter without value", + 1 == rc); + mu_silent_assert ("first param key should be 'q'", + 0 == strcmp ("q", params[0].key)); + mu_silent_assert ("first param val should be NULL", + NULL == params[0].val); + free (q); + + /* Two param query */ + q = strdup ("query=yes&a1=hello"); + rc = GNUNET_uri_parse_query (q, + '&', + params, + 10); + mu_assert ("multiple params with value", + 2 == rc); + mu_silent_assert ("first param key should be 'query'", + 0 == strcmp ("query", params[0].key)); + mu_silent_assert ("first param val should be 'yes'", + 0 == strcmp ("yes", params[0].val)); + mu_silent_assert ("second param key should be 'a1'", + 0 == strcmp ("a1", params[1].key)); + mu_silent_assert ("second param val should be 'hello'", + 0 == strcmp ("hello", params[1].val)); + free (q); + + /* Two param query, one without value */ + q = strdup ("query=yes&forceHttps"); + rc = GNUNET_uri_parse_query (q, + '&', + params, + 10); + mu_assert ("multiple params one without value", + 2 == rc); + mu_silent_assert ("first param key should be 'query'", + 0 == strcmp ("query", params[0].key)); + mu_silent_assert ("first param val should be 'yes'", + 0 == strcmp ("yes", params[0].val)); + mu_silent_assert ("second param key should be 'forceHttps'", + 0 == strcmp ("forceHttps", params[1].key)); + mu_silent_assert ("second param val should be NULL", + NULL == params[1].val); + free (q); + + /* Three param query, all without value */ + q = strdup ("query&forceHttps&log"); + rc = GNUNET_uri_parse_query (q, + '&', + params, + 10); + mu_assert ("multiple params all without value", + 3 == rc); + mu_silent_assert ("first param key should be 'query'", + 0 == strcmp ("query", params[0].key)); + mu_silent_assert ("first param val should be NULL", + NULL == params[0].val); + mu_silent_assert ("second param key should be 'forceHttps'", + 0 == strcmp ("forceHttps", params[1].key)); + mu_silent_assert ("second param val should be NULL", + NULL == params[1].val); + mu_silent_assert ("third param key should be 'log'", + 0 == strcmp ("log", params[2].key)); + mu_silent_assert ("third param val should be NULL", + NULL == params[2].val); + free (q); + + /* Param with empty value */ + q = strdup ("param=&query=no"); + rc = GNUNET_uri_parse_query (q, + '&', + params, + 10); + mu_assert ("param with empty value", + 2 == rc); + mu_silent_assert ("first param key should be 'param'", + 0 == strcmp ("param", params[0].key)); + mu_silent_assert ("first param val should be ''", + 0 == strcmp ("", params[0].val)); + mu_silent_assert ("second param key should be 'query'", + 0 == strcmp ("query", params[1].key)); + mu_silent_assert ("second param val should be 'no'", + 0 == strcmp ("no", params[1].val)); + free (q); + + /* Double delimiter */ + q = strdup ("param=jack&&query=no"); + rc = GNUNET_uri_parse_query (q, + '&', + params, + 10); + mu_assert ("double delimiter", + 3 == rc); + mu_silent_assert ("first param key should be 'param'", + 0 == strcmp ("param", params[0].key)); + mu_silent_assert ("first param val should be 'jack'", + 0 == strcmp ("jack", params[0].val)); + mu_silent_assert ("second param key should be ''", + 0 == strcmp ("", params[1].key)); + mu_silent_assert ("second param val should be NULL", + NULL == params[1].val); + mu_silent_assert ("third param key should be 'query'", + 0 == strcmp ("query", params[2].key)); + mu_silent_assert ("third param val should be 'no'", + 0 == strcmp ("no", params[2].val)); + free (q); + + /* Delimiter in beginning */ + q = strdup ("¶m=jack&query=no"); + rc = GNUNET_uri_parse_query (q, + '&', + params, + 10); + mu_assert ("delimiter in beginning", + 3 == rc); + mu_silent_assert ("first param key should be ''", + 0 == strcmp ("", params[0].key)); + mu_silent_assert ("first param val should be NULL", + NULL == params[0].val); + mu_silent_assert ("second param key should be 'param'", + 0 == strcmp ("param", params[1].key)); + mu_silent_assert ("second param val should be 'jack'", + 0 == strcmp ("jack", params[1].val)); + mu_silent_assert ("third param key should be 'query'", + 0 == strcmp ("query", params[2].key)); + mu_silent_assert ("third param val should be 'no'", + 0 == strcmp ("no", params[2].val)); + free (q); + + /* Delimiter at the end */ + q = strdup ("param=jack&query=no&"); + rc = GNUNET_uri_parse_query (q, + '&', + params, + 10); + mu_assert ("delimiter at the end", + 3 == rc); + mu_silent_assert ("first param key should be 'param'", + 0 == strcmp ("param", params[0].key)); + mu_silent_assert ("first param val should be 'jack'", + 0 == strcmp ("jack", params[0].val)); + mu_silent_assert ("second param key should be 'query'", + 0 == strcmp ("query", params[1].key)); + mu_silent_assert ("second param val should be 'no'", + 0 == strcmp ("no", params[1].val)); + mu_silent_assert ("third param key should be ''", + 0 == strcmp ("", params[2].key)); + mu_silent_assert ("third param val should be NULL", + NULL == params[2].val); + free (q); + + return NULL; +} + +static char * +all_tests (void) +{ + mu_group ("GNUNET_uri_parse () with an HTTP URL"); + mu_run_test (test_parse_http_url_ok); + + mu_group ("GNUNET_uri_parse () with an relative URL"); + mu_run_test (test_parse_http_rel_url_ok); + + mu_group ("GNUNET_uri_parse () with faulty values"); + mu_run_test (test_parse_url_fail); + + mu_group ("GNUNET_uri_split_path ()"); + mu_run_test (test_split_path_ok); + + mu_group ("GNUNET_uri_parse_query ()"); + mu_run_test (test_parse_query_ok); + + return NULL; +} + +int +main (void) +{ + char *result; + + result = all_tests (); + if (result != NULL) { + exit (EXIT_FAILURE); + } + + exit (EXIT_SUCCESS); +} diff --git a/src/util/uri.c b/src/util/uri.c new file mode 100644 index 000000000..ba5c0f716 --- /dev/null +++ b/src/util/uri.c @@ -0,0 +1,344 @@ +/** + * Copyright (C) 2016,2017 Jack Engqvist Johansson + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include +#include +#include +#include "gnunet_uri_lib.h" + + +/** + * Parse a non null terminated string into an integer. + * + * str: the string containing the number. + * len: Number of characters to parse. + */ +static inline int +natoi (const char *str, + size_t len) +{ + int i, r = 0; + for (i = 0; i < len; i++) { + r *= 10; + r += str[i] - '0'; + } + + return r; +} + + +/** + * Check if a URL is relative (no scheme and hostname). + * + * url: the string containing the URL to check. + * + * Returns 1 if relative, otherwise 0. + */ +static inline int +is_relative (const char *url) +{ + return (*url == '/') ? 1 : 0; +} + + +/** + * Parse the scheme of a URL by inserting a null terminator after the scheme. + * + * str: the string containing the URL to parse. Will be modified. + * + * Returns a pointer to the hostname on success, otherwise NULL. + */ +static inline char * +parse_scheme (char *str) +{ + char *s; + + /* If not found or first in string, return error */ + s = strchr (str, ':'); + if (s == NULL || s == str) { + return NULL; + } + + /* If not followed by two slashes, return error */ + if (s[1] == '\0' || s[1] != '/' || s[2] == '\0' || s[2] != '/') { + return NULL; + } + + *s = '\0'; // Replace ':' with NULL + + return s + 3; +} + + +/** + * Find a character in a string, replace it with '\0' and return the next + * character in the string. + * + * str: the string to search in. + * find: the character to search for. + * + * Returns a pointer to the character after the one to search for. If not + * found, NULL is returned. + */ +static inline char * +find_and_terminate (char *str, + char find) +{ + str = strchr(str, find); + if (NULL == str) { + return NULL; + } + + *str = '\0'; + return str + 1; +} + + +/* Yes, the following functions could be implemented as preprocessor macros + instead of inline functions, but I think that this approach will be more + clean in this case. */ +static inline char * +find_fragment (char *str) +{ + return find_and_terminate (str, '#'); +} + + +static inline char * +find_query (char *str) +{ + return find_and_terminate (str, '?'); +} + + +static inline char * +find_path (char *str) +{ + return find_and_terminate (str, '/'); +} + + +/** + * Parse a URL to a struct. + * + * The URL string should be in one of the following formats: + * + * Absolute URL: + * scheme ":" [ "//" ] [ username ":" password "@" ] host [ ":" port ] [ "/" ] [ path ] [ "?" query ] [ "#" fragment ] + * + * Relative URL: + * path [ "?" query ] [ "#" fragment ] + * + * The following parts will be parsed to the corresponding struct member. + * + * *url: a pointer to the struct where to store the parsed values. + * *url_str: a pointer to the url to be parsed (null terminated). The string + * will be modified. + * + * Returns 0 on success, otherwise -1. + */ +int +GNUNET_uri_parse (struct GNUNET_uri *url, + char *u) +{ + if (NULL == url || NULL == u) { + return -1; + } + + memset(url, 0, sizeof (struct GNUNET_uri)); + + /* (Fragment) */ + url->fragment = find_fragment (u); + + /* (Query) */ + url->query = find_query (u); + + /* Relative URL? Parse scheme and hostname */ + if (!is_relative (u)) { + /* Scheme */ + url->scheme = u; + u = parse_scheme (u); + if (u == NULL) { + return -1; + } + + /* Host */ + if ('\0' == *u) { + return -1; + } + url->host = u; + + /* (Path) */ + url->path = find_path (u); + + /* (Credentials) */ + u = strchr (url->host, '@'); + if (NULL != u) { + /* Missing credentials? */ + if (u == url->host) { + return -1; + } + + url->username = url->host; + url->host = u + 1; + *u = '\0'; + + u = strchr (url->username, ':'); + if (NULL == u) { + return -1; + } + + url->password = u + 1; + *u = '\0'; + } + + /* Missing hostname? */ + if ('\0' == *url->host) { + return -1; + } + + /* (Port) */ + u = strchr (url->host, ':'); + if (NULL != u && (NULL == url->path || u < url->path)) { + *(u++) = '\0'; + if ('\0' == *u) { + return -1; + } + + if (url->path) { + url->port = natoi (u, url->path - u - 1); + } else { + url->port = atoi (u); + } + } + + /* Missing hostname? */ + if ('\0' == *url->host) { + return -1; + } + } else { + /* (Path) */ + url->path = find_path (u); + } + + return 0; +} + + +/** + * Split a path into several strings. + * + * No data is copied, the slashed are used as null terminators and then + * pointers to each path part will be stored in **parts. Double slashes will be + * treated as one. + * + * *path: the path to split. The string will be modified. + * **parts: a pointer to an array of (char *) where to store the result. + * max_parts: max number of parts to parse. + * + * Returns the number of parsed items. -1 on error. + */ +int +GNUNET_uri_split_path (char *path, + char **parts, + int max_parts) +{ + int i = 0; + + if (NULL == path || '\0' == *path) { + return -1; + } + + do { + /* Forward to after slashes */ + while (*path == '/') path++; + + if ('\0' == *path) { + break; + } + + parts[i++] = path; + + path = strchr (path, '/'); + if (NULL == path) { + break; + } + + *(path++) = '\0'; + } while (i < max_parts); + + return i; +} + + +/** + * Parse a query string into a key/value struct. + * + * The query string should be a null terminated string of parameters separated by + * a delimiter. Each parameter are checked for the equal sign character. If it + * appears in the parameter, it will be used as a null terminator and the part + * that comes after it will be the value of the parameter. + * + * No data are copied, the equal sign and delimiters are used as null + * terminators and then pointers to each parameter key and value will be stored + * in the yuarel_param struct. + * + * *query: the query string to parse. The string will be modified. + * delimiter: the character that separates the key/value pairs from eachother. + * *params: an array of (struct yuarel_param) where to store the result. + * max_values: max number of parameters to parse. + * + * Returns the number of parsed items. -1 on error. + */ +int +GNUNET_uri_parse_query (char *query, + char delimiter, + struct GNUNET_uri_param *params, + int max_params) +{ + int i = 0; + + if (NULL == query || '\0' == *query) { + return -1; + } + + params[i++].key = query; + while (i < max_params && NULL != (query = strchr (query, delimiter))) { + *query = '\0'; + params[i].key = ++query; + params[i].val = NULL; + + /* Go back and split previous param */ + if (i > 0) { + if ((params[i - 1].val = strchr (params[i - 1].key, '=')) != NULL) { + *(params[i - 1].val)++ = '\0'; + } + } + i++; + } + + /* Go back and split last param */ + if ((params[i - 1].val = strchr (params[i - 1].key, '=')) != NULL) { + *(params[i - 1].val)++ = '\0'; + } + + return i; +} -- cgit v1.2.3