diff options
author | Jonathan Buchanan <jonathan.russ.buchanan@gmail.com> | 2020-08-26 14:49:02 -0400 |
---|---|---|
committer | Jonathan Buchanan <jonathan.russ.buchanan@gmail.com> | 2020-08-26 14:49:02 -0400 |
commit | 49d74db2e9bd8418f15b560e36e1f27661f65361 (patch) | |
tree | 48612892b643b0abbc63d99c8e2bcacfc1d6e396 /src/util/uri.c | |
parent | e36180a17c8da8d3639f42924ce56c11fec1fddd (diff) | |
download | gnunet-49d74db2e9bd8418f15b560e36e1f27661f65361.tar.gz gnunet-49d74db2e9bd8418f15b560e36e1f27661f65361.zip |
use (and "GNUnet-ify") libyuarel as a basepoint for uri parsing
Diffstat (limited to 'src/util/uri.c')
-rw-r--r-- | src/util/uri.c | 344 |
1 files changed, 344 insertions, 0 deletions
diff --git a/src/util/uri.c b/src/util/uri.c new file mode 100644 index 000000000..ba5c0f716 --- /dev/null +++ b/src/util/uri.c | |||
@@ -0,0 +1,344 @@ | |||
1 | /** | ||
2 | * Copyright (C) 2016,2017 Jack Engqvist Johansson | ||
3 | * | ||
4 | * Permission is hereby granted, free of charge, to any person obtaining a copy | ||
5 | * of this software and associated documentation files (the "Software"), to deal | ||
6 | * in the Software without restriction, including without limitation the rights | ||
7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
8 | * copies of the Software, and to permit persons to whom the Software is | ||
9 | * furnished to do so, subject to the following conditions: | ||
10 | * | ||
11 | * The above copyright notice and this permission notice shall be included in all | ||
12 | * copies or substantial portions of the Software. | ||
13 | * | ||
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
19 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
20 | * SOFTWARE. | ||
21 | */ | ||
22 | #include <stdlib.h> | ||
23 | #include <stdio.h> | ||
24 | #include <string.h> | ||
25 | #include "gnunet_uri_lib.h" | ||
26 | |||
27 | |||
28 | /** | ||
29 | * Parse a non null terminated string into an integer. | ||
30 | * | ||
31 | * str: the string containing the number. | ||
32 | * len: Number of characters to parse. | ||
33 | */ | ||
34 | static inline int | ||
35 | natoi (const char *str, | ||
36 | size_t len) | ||
37 | { | ||
38 | int i, r = 0; | ||
39 | for (i = 0; i < len; i++) { | ||
40 | r *= 10; | ||
41 | r += str[i] - '0'; | ||
42 | } | ||
43 | |||
44 | return r; | ||
45 | } | ||
46 | |||
47 | |||
48 | /** | ||
49 | * Check if a URL is relative (no scheme and hostname). | ||
50 | * | ||
51 | * url: the string containing the URL to check. | ||
52 | * | ||
53 | * Returns 1 if relative, otherwise 0. | ||
54 | */ | ||
55 | static inline int | ||
56 | is_relative (const char *url) | ||
57 | { | ||
58 | return (*url == '/') ? 1 : 0; | ||
59 | } | ||
60 | |||
61 | |||
62 | /** | ||
63 | * Parse the scheme of a URL by inserting a null terminator after the scheme. | ||
64 | * | ||
65 | * str: the string containing the URL to parse. Will be modified. | ||
66 | * | ||
67 | * Returns a pointer to the hostname on success, otherwise NULL. | ||
68 | */ | ||
69 | static inline char * | ||
70 | parse_scheme (char *str) | ||
71 | { | ||
72 | char *s; | ||
73 | |||
74 | /* If not found or first in string, return error */ | ||
75 | s = strchr (str, ':'); | ||
76 | if (s == NULL || s == str) { | ||
77 | return NULL; | ||
78 | } | ||
79 | |||
80 | /* If not followed by two slashes, return error */ | ||
81 | if (s[1] == '\0' || s[1] != '/' || s[2] == '\0' || s[2] != '/') { | ||
82 | return NULL; | ||
83 | } | ||
84 | |||
85 | *s = '\0'; // Replace ':' with NULL | ||
86 | |||
87 | return s + 3; | ||
88 | } | ||
89 | |||
90 | |||
91 | /** | ||
92 | * Find a character in a string, replace it with '\0' and return the next | ||
93 | * character in the string. | ||
94 | * | ||
95 | * str: the string to search in. | ||
96 | * find: the character to search for. | ||
97 | * | ||
98 | * Returns a pointer to the character after the one to search for. If not | ||
99 | * found, NULL is returned. | ||
100 | */ | ||
101 | static inline char * | ||
102 | find_and_terminate (char *str, | ||
103 | char find) | ||
104 | { | ||
105 | str = strchr(str, find); | ||
106 | if (NULL == str) { | ||
107 | return NULL; | ||
108 | } | ||
109 | |||
110 | *str = '\0'; | ||
111 | return str + 1; | ||
112 | } | ||
113 | |||
114 | |||
115 | /* Yes, the following functions could be implemented as preprocessor macros | ||
116 | instead of inline functions, but I think that this approach will be more | ||
117 | clean in this case. */ | ||
118 | static inline char * | ||
119 | find_fragment (char *str) | ||
120 | { | ||
121 | return find_and_terminate (str, '#'); | ||
122 | } | ||
123 | |||
124 | |||
125 | static inline char * | ||
126 | find_query (char *str) | ||
127 | { | ||
128 | return find_and_terminate (str, '?'); | ||
129 | } | ||
130 | |||
131 | |||
132 | static inline char * | ||
133 | find_path (char *str) | ||
134 | { | ||
135 | return find_and_terminate (str, '/'); | ||
136 | } | ||
137 | |||
138 | |||
139 | /** | ||
140 | * Parse a URL to a struct. | ||
141 | * | ||
142 | * The URL string should be in one of the following formats: | ||
143 | * | ||
144 | * Absolute URL: | ||
145 | * scheme ":" [ "//" ] [ username ":" password "@" ] host [ ":" port ] [ "/" ] [ path ] [ "?" query ] [ "#" fragment ] | ||
146 | * | ||
147 | * Relative URL: | ||
148 | * path [ "?" query ] [ "#" fragment ] | ||
149 | * | ||
150 | * The following parts will be parsed to the corresponding struct member. | ||
151 | * | ||
152 | * *url: a pointer to the struct where to store the parsed values. | ||
153 | * *url_str: a pointer to the url to be parsed (null terminated). The string | ||
154 | * will be modified. | ||
155 | * | ||
156 | * Returns 0 on success, otherwise -1. | ||
157 | */ | ||
158 | int | ||
159 | GNUNET_uri_parse (struct GNUNET_uri *url, | ||
160 | char *u) | ||
161 | { | ||
162 | if (NULL == url || NULL == u) { | ||
163 | return -1; | ||
164 | } | ||
165 | |||
166 | memset(url, 0, sizeof (struct GNUNET_uri)); | ||
167 | |||
168 | /* (Fragment) */ | ||
169 | url->fragment = find_fragment (u); | ||
170 | |||
171 | /* (Query) */ | ||
172 | url->query = find_query (u); | ||
173 | |||
174 | /* Relative URL? Parse scheme and hostname */ | ||
175 | if (!is_relative (u)) { | ||
176 | /* Scheme */ | ||
177 | url->scheme = u; | ||
178 | u = parse_scheme (u); | ||
179 | if (u == NULL) { | ||
180 | return -1; | ||
181 | } | ||
182 | |||
183 | /* Host */ | ||
184 | if ('\0' == *u) { | ||
185 | return -1; | ||
186 | } | ||
187 | url->host = u; | ||
188 | |||
189 | /* (Path) */ | ||
190 | url->path = find_path (u); | ||
191 | |||
192 | /* (Credentials) */ | ||
193 | u = strchr (url->host, '@'); | ||
194 | if (NULL != u) { | ||
195 | /* Missing credentials? */ | ||
196 | if (u == url->host) { | ||
197 | return -1; | ||
198 | } | ||
199 | |||
200 | url->username = url->host; | ||
201 | url->host = u + 1; | ||
202 | *u = '\0'; | ||
203 | |||
204 | u = strchr (url->username, ':'); | ||
205 | if (NULL == u) { | ||
206 | return -1; | ||
207 | } | ||
208 | |||
209 | url->password = u + 1; | ||
210 | *u = '\0'; | ||
211 | } | ||
212 | |||
213 | /* Missing hostname? */ | ||
214 | if ('\0' == *url->host) { | ||
215 | return -1; | ||
216 | } | ||
217 | |||
218 | /* (Port) */ | ||
219 | u = strchr (url->host, ':'); | ||
220 | if (NULL != u && (NULL == url->path || u < url->path)) { | ||
221 | *(u++) = '\0'; | ||
222 | if ('\0' == *u) { | ||
223 | return -1; | ||
224 | } | ||
225 | |||
226 | if (url->path) { | ||
227 | url->port = natoi (u, url->path - u - 1); | ||
228 | } else { | ||
229 | url->port = atoi (u); | ||
230 | } | ||
231 | } | ||
232 | |||
233 | /* Missing hostname? */ | ||
234 | if ('\0' == *url->host) { | ||
235 | return -1; | ||
236 | } | ||
237 | } else { | ||
238 | /* (Path) */ | ||
239 | url->path = find_path (u); | ||
240 | } | ||
241 | |||
242 | return 0; | ||
243 | } | ||
244 | |||
245 | |||
246 | /** | ||
247 | * Split a path into several strings. | ||
248 | * | ||
249 | * No data is copied, the slashed are used as null terminators and then | ||
250 | * pointers to each path part will be stored in **parts. Double slashes will be | ||
251 | * treated as one. | ||
252 | * | ||
253 | * *path: the path to split. The string will be modified. | ||
254 | * **parts: a pointer to an array of (char *) where to store the result. | ||
255 | * max_parts: max number of parts to parse. | ||
256 | * | ||
257 | * Returns the number of parsed items. -1 on error. | ||
258 | */ | ||
259 | int | ||
260 | GNUNET_uri_split_path (char *path, | ||
261 | char **parts, | ||
262 | int max_parts) | ||
263 | { | ||
264 | int i = 0; | ||
265 | |||
266 | if (NULL == path || '\0' == *path) { | ||
267 | return -1; | ||
268 | } | ||
269 | |||
270 | do { | ||
271 | /* Forward to after slashes */ | ||
272 | while (*path == '/') path++; | ||
273 | |||
274 | if ('\0' == *path) { | ||
275 | break; | ||
276 | } | ||
277 | |||
278 | parts[i++] = path; | ||
279 | |||
280 | path = strchr (path, '/'); | ||
281 | if (NULL == path) { | ||
282 | break; | ||
283 | } | ||
284 | |||
285 | *(path++) = '\0'; | ||
286 | } while (i < max_parts); | ||
287 | |||
288 | return i; | ||
289 | } | ||
290 | |||
291 | |||
292 | /** | ||
293 | * Parse a query string into a key/value struct. | ||
294 | * | ||
295 | * The query string should be a null terminated string of parameters separated by | ||
296 | * a delimiter. Each parameter are checked for the equal sign character. If it | ||
297 | * appears in the parameter, it will be used as a null terminator and the part | ||
298 | * that comes after it will be the value of the parameter. | ||
299 | * | ||
300 | * No data are copied, the equal sign and delimiters are used as null | ||
301 | * terminators and then pointers to each parameter key and value will be stored | ||
302 | * in the yuarel_param struct. | ||
303 | * | ||
304 | * *query: the query string to parse. The string will be modified. | ||
305 | * delimiter: the character that separates the key/value pairs from eachother. | ||
306 | * *params: an array of (struct yuarel_param) where to store the result. | ||
307 | * max_values: max number of parameters to parse. | ||
308 | * | ||
309 | * Returns the number of parsed items. -1 on error. | ||
310 | */ | ||
311 | int | ||
312 | GNUNET_uri_parse_query (char *query, | ||
313 | char delimiter, | ||
314 | struct GNUNET_uri_param *params, | ||
315 | int max_params) | ||
316 | { | ||
317 | int i = 0; | ||
318 | |||
319 | if (NULL == query || '\0' == *query) { | ||
320 | return -1; | ||
321 | } | ||
322 | |||
323 | params[i++].key = query; | ||
324 | while (i < max_params && NULL != (query = strchr (query, delimiter))) { | ||
325 | *query = '\0'; | ||
326 | params[i].key = ++query; | ||
327 | params[i].val = NULL; | ||
328 | |||
329 | /* Go back and split previous param */ | ||
330 | if (i > 0) { | ||
331 | if ((params[i - 1].val = strchr (params[i - 1].key, '=')) != NULL) { | ||
332 | *(params[i - 1].val)++ = '\0'; | ||
333 | } | ||
334 | } | ||
335 | i++; | ||
336 | } | ||
337 | |||
338 | /* Go back and split last param */ | ||
339 | if ((params[i - 1].val = strchr (params[i - 1].key, '=')) != NULL) { | ||
340 | *(params[i - 1].val)++ = '\0'; | ||
341 | } | ||
342 | |||
343 | return i; | ||
344 | } | ||