libmicrohttpd

HTTP/1.x server C library (MHD 1.x, stable)
Log | Files | Refs | Submodules | README | LICENSE

commit ada4c2ed55dd14f68430e9518f2882226a7b07cd
parent 63dd1bff0fe85142967612346c7b580b9af336ac
Author: Evgeny Grin (Karlson2k) <k2k@narod.ru>
Date:   Wed, 16 Jun 2021 16:41:58 +0300

Added MHD_str_remove_token_caseless_() function

Function removes specified token from the input string and
normalizes list of tokens.

Diffstat:
Msrc/microhttpd/.gitignore | 1+
Msrc/microhttpd/Makefile.am | 4++++
Msrc/microhttpd/mhd_str.c | 169++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
Msrc/microhttpd/mhd_str.h | 38++++++++++++++++++++++++++++++++++++++
Asrc/microhttpd/test_str_token_remove.c | 248+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
5 files changed, 459 insertions(+), 1 deletion(-)

diff --git a/src/microhttpd/.gitignore b/src/microhttpd/.gitignore @@ -53,6 +53,7 @@ test_http_reasons /test_options /test_start_stop /test_str_token +/test_str_token_remove test_shutdown_poll test_shutdown_select test_md5 diff --git a/src/microhttpd/Makefile.am b/src/microhttpd/Makefile.am @@ -158,6 +158,7 @@ check_PROGRAMS = \ test_str_compare \ test_str_to_value \ test_str_token \ + test_str_token_remove \ test_http_reasons \ test_md5 \ test_sha1 \ @@ -353,6 +354,9 @@ test_str_to_value_SOURCES = \ test_str_token_SOURCES = \ test_str_token.c mhd_str.c mhd_str.h +test_str_token_remove_SOURCES = \ + test_str_token_remove.c mhd_str.c mhd_str.h mhd_assert.h mhd_options.h + test_http_reasons_SOURCES = \ test_http_reasons.c \ reason_phrase.c mhd_str.c mhd_str.h diff --git a/src/microhttpd/mhd_str.c b/src/microhttpd/mhd_str.c @@ -27,8 +27,10 @@ #ifdef HAVE_STDBOOL_H #include <stdbool.h> -#endif +#endif /* HAVE_STDBOOL_H */ +#include <string.h> +#include "mhd_assert.h" #include "mhd_limits.h" #ifdef MHD_FAVOR_SMALL_CODE @@ -521,6 +523,171 @@ MHD_str_has_token_caseless_ (const char *str, } +/** + * Remove case-insensitive @a token from the @a str and put result + * to the output @a buf. + * + * Token could be surrounded by spaces and tabs and delimited by comma. + * The token match succeed if substring between start, end (of string) or + * comma contains only case-insensitive token and optional spaces and tabs. + * The quoted strings and comments are not supported by this function. + * + * The output string is normalised: empty tokens and repeated whitespaces + * are removed, no whitespaces before commas, exactly one space is used after + * each comma. + * + * @param str the string to process + * @param str_len the length of the @a str, not including optional + * terminating null-character. + * @param token the token to find + * @param token_len the length of @a token, not including optional + * terminating null-character. + * @param[out] buf the output buffer, not null-terminated. + * @param[in,out] buf_size pointer to the size variable, at input it + * is the size of allocated buffer, at output + * it is the size of the resulting string (can + * be up to 50% larger than input) or negative value + * if there is not enough space for the result + * @return 'true' if token has been removed, + * 'false' otherwise. + */ +bool +MHD_str_remove_token_caseless_ (const char *str, + size_t str_len, + const char *const token, + const size_t token_len, + char *buf, + ssize_t *buf_size) +{ + const char *s1; /**< the "input" string / character */ + char *s2; /**< the "output" string / character */ + size_t t_pos; /**< position of matched character in the token */ + bool token_removed; + + mhd_assert (NULL == memchr (token, 0, token_len)); + mhd_assert (NULL == memchr (token, ' ', token_len)); + mhd_assert (NULL == memchr (token, '\t', token_len)); + mhd_assert (NULL == memchr (token, ',', token_len)); + mhd_assert (0 <= *buf_size); + + s1 = str; + s2 = buf; + token_removed = false; + + while ((size_t) (s1 - str) < str_len) + { + const char *cur_token; /**< the first char of current token */ + size_t copy_size; + + /* Skip any initial whitespaces and empty tokens */ + while ( ((size_t) (s1 - str) < str_len) && + ((' ' == *s1) || ('\t' == *s1) || (',' == *s1)) ) + s1++; + + /* 's1' points to the first char of token in the input string or + * points just beyond the end of the input string */ + + if ((size_t) (s1 - str) >= str_len) + break; /* Nothing to copy, end of the input string */ + + cur_token = s1; /* the first char of input token or + the char after the input buffer */ + + /* Check the token with case-insensetive match */ + t_pos = 0; + while ( ((size_t) (s1 - str) < str_len) && (token_len > t_pos) && + (charsequalcaseless (*s1, token[t_pos])) ) + { + s1++; + t_pos++; + } + if ( (token_len == t_pos) && (0 != token_len) ) + { + /* 'token' matched, check that current input token does not have + * any suffixes */ + while ( ((size_t) (s1 - str) < str_len) && + ((' ' == *s1) || ('\t' == *s1)) ) + s1++; + /* 's1' points to the first non-whitespace char after the token matched + * requested token or points just beyond the end of the input string after + * the requested token */ + if (((size_t) (s1 - str) == str_len) || (',' == *s1)) + {/* full token match, do not copy current token to the output */ + token_removed = true; + continue; + } + } + + /* 's1' points to some non-whitespace char in the token in the input + * string, to the ',', or just beyond the end of the input string */ + /* The current token in the input string does not match excluded token, + * it must be copied to the output string */ + /* the current token size excluding leading whitespaces and current char */ + copy_size = (size_t) (s1 - cur_token); + if (buf == s2) + { /* The first token to copy to the output */ + if (buf + *buf_size < s2 + copy_size) + { /* Not enough space in the output buffer */ + *buf_size = (ssize_t) -1; + return false; + } + } + else + { /* Some token was already copied to the output buffer */ + if (buf + *buf_size < s2 + copy_size + 2) + { /* Not enough space in the output buffer */ + *buf_size = (ssize_t) -1; + return false; + } + *(s2++) = ','; + *(s2++) = ' '; + } + /* Copy non-matched token to the output */ + if (0 != copy_size) + { + memcpy (s2, cur_token, copy_size); + s2 += copy_size; + } + + while ( ((size_t) (s1 - str) < str_len) && (',' != *s1)) + { + /* 's1' points to some non-whitespace char in the token */ + while ( ((size_t) (s1 - str) < str_len) && + (',' != *s1) && (' ' != *s1) && ('\t' != *s1) ) + { + if (buf + *buf_size <= s2) /* '<= s2' equals '< s2 + 1' */ + { /* Not enough space in the output buffer */ + *buf_size = (ssize_t) -1; + return false; + } + *(s2++) = *(s1++); + } + /* 's1' points to some whitespace char in the token in the input + * string, to the ',', or just beyond the end of the input string */ + /* Skip all whitespaces */ + while ( ((size_t) (s1 - str) < str_len) && + ((' ' == *s1) || ('\t' == *s1)) ) + s1++; + + /* 's1' points to the first non-whitespace char in the input string + * after whitespace chars or just beyond the end of the input string */ + if (((size_t) (s1 - str) < str_len) && (',' != *s1)) + { /* Not the end of the current token */ + if (buf + *buf_size <= s2) /* '<= s2' equals '< s2 + 1' */ + { /* Not enough space in the output buffer */ + *buf_size = (ssize_t) -1; + return false; + } + *(s2++) = ' '; + } + } + } + mhd_assert (((ssize_t) (s2 - buf)) <= *buf_size); + *buf_size = (ssize_t) (s2 - buf); + return token_removed; +} + + #ifndef MHD_FAVOR_SMALL_CODE /* Use individual function for each case */ diff --git a/src/microhttpd/mhd_str.h b/src/microhttpd/mhd_str.h @@ -130,6 +130,44 @@ MHD_str_has_token_caseless_ (const char *str, #define MHD_str_has_s_token_caseless_(str,tkn) \ MHD_str_has_token_caseless_ ((str),(tkn),MHD_STATICSTR_LEN_ (tkn)) + +/** + * Remove case-insensitive @a token from the @a str and put result + * to the output @a buf. + * + * Token could be surrounded by spaces and tabs and delimited by comma. + * The token match succeed if substring between start, end (of string) or + * comma contains only case-insensitive token and optional spaces and tabs. + * The quoted strings and comments are not supported by this function. + * + * The output string is normalised: empty tokens and repeated whitespaces + * are removed, no whitespaces before commas, exactly one space is used after + * each comma. + * + * @param str the string to process + * @param str_len the length of the @a str, not including optional + * terminating null-character. + * @param token the token to find + * @param token_len the length of @a token, not including optional + * terminating null-character. + * @param[out] buf the output buffer, not null-terminated. + * @param[in,out] buf_size pointer to the size variable, at input it + * is the size of allocated buffer, at output + * it is the size of the resulting string (can + * be up to 50% larger than input) or negative value + * if there is not enough space for the result + * @return 'true' if token has been removed, + * 'false' otherwise. + */ +bool +MHD_str_remove_token_caseless_ (const char *str, + size_t str_len, + const char *const token, + const size_t token_len, + char *buf, + ssize_t *buf_size); + + #ifndef MHD_FAVOR_SMALL_CODE /* Use individual function for each case to improve speed */ diff --git a/src/microhttpd/test_str_token_remove.c b/src/microhttpd/test_str_token_remove.c @@ -0,0 +1,248 @@ +/* + This file is part of libmicrohttpd + Copyright (C) 2017 Karlson2k (Evgeny Grin) + + This test tool is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2, or + (at your option) any later version. + + This test tool is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +/** + * @file microhttpd/test_str_token.c + * @brief Unit tests for some mhd_str functions + * @author Karlson2k (Evgeny Grin) + */ + +#include <string.h> +#include <stdio.h> +#include "mhd_options.h" +#include "mhd_str.h" +#include "mhd_assert.h" + + +static int +expect_result_n (const char *str, size_t str_len, + const char *token, size_t token_len, + const char *expected, size_t expected_len, + const bool expected_removed) +{ + char buf_in[1024]; + char buf_token[256]; + char buf_out[1024]; + size_t buf_len; + + mhd_assert (sizeof(buf_in) > str_len + 2); + mhd_assert (sizeof(buf_token) > token_len + 2); + mhd_assert (sizeof(buf_out) > expected_len + 2); + + memset (buf_in, '#', sizeof(buf_in)); + memset (buf_token, '#', sizeof(buf_token)); + memcpy (buf_in, str, str_len); /* Copy without zero-termination */ + memcpy (buf_token, token, token_len); /* Copy without zero-termination */ + + for (buf_len = 0; buf_len <= expected_len + 3; ++buf_len) + { + bool res; + ssize_t result_len; + memset (buf_out, '$', sizeof(buf_out)); + + result_len = buf_len; + + res = MHD_str_remove_token_caseless_ (buf_in, str_len, buf_token, token_len, + buf_out, &result_len); + if (buf_len < expected_len) + { /* The result should not fit into the buffer */ + if (res || (0 < result_len)) + { + fprintf (stderr, + "MHD_str_remove_token_caseless_() FAILED:\n" + "\tMHD_str_remove_token_caseless_(\"%.*s\", %lu," + " \"%.*s\", %lu, buf, &(%ld->%ld)) returned %s\n", + (int) str_len + 2, buf_in, (unsigned long) str_len, + (int) token_len + 2, buf_token, (unsigned long) token_len, + (long) buf_len, (long) result_len, res ? "true" : "false"); + return 1; + } + } + else + { /* The result should fit into the buffer */ + if ( (expected_removed != res) || + (expected_len != (size_t) result_len) || + ((0 != result_len) && (0 != memcmp (expected, buf_out, + result_len))) || + ('$' != buf_out[result_len])) + { + fprintf (stderr, + "MHD_str_remove_token_caseless_() FAILED:\n" + "\tMHD_str_remove_token_caseless_(\"%.*s\", %lu," + " \"%.*s\", %lu, \"%.*s\", &(%ld->%ld)) returned %s\n", + (int) str_len + 2, buf_in, (unsigned long) str_len, + (int) token_len + 2, buf_token, (unsigned long) token_len, + (int) expected_len + 2, buf_out, + (long) buf_len, (long) result_len, + res ? "true" : "false"); + return 1; + } + } + } + return 0; +} + + +#define expect_result(s,t,e,found) \ + expect_result_n ((s),MHD_STATICSTR_LEN_ (s), \ + (t),MHD_STATICSTR_LEN_ (t), \ + (e),MHD_STATICSTR_LEN_ (e), found) + +int +check_result (void) +{ + int errcount = 0; + errcount += expect_result ("string", "string", "", true); + errcount += expect_result ("String", "string", "", true); + errcount += expect_result ("string", "String", "", true); + errcount += expect_result ("strinG", "String", "", true); + errcount += expect_result ("\t strinG", "String", "", true); + errcount += expect_result ("strinG\t ", "String", "", true); + errcount += expect_result (" \t tOkEn ", "toKEN", "", true); + errcount += expect_result ("not token\t, tOkEn ", "toKEN", "not token", + true); + errcount += expect_result ("not token,\t tOkEn, more token", "toKEN", + "not token, more token", true); + errcount += expect_result ("not token,\t tOkEn\t, more token", "toKEN", + "not token, more token", true); + errcount += expect_result (",,,,,,test,,,,", "TESt", "", true); + errcount += expect_result (",,,,,\t,test,,,,", "TESt", "", true); + errcount += expect_result (",,,,,,test, ,,,", "TESt", "", true); + errcount += expect_result (",,,,,, test,,,,", "TESt", "", true); + errcount += expect_result (",,,,,, test not,test,,", "TESt", "test not", + true); + errcount += expect_result (",,,,,, test not,,test,,", "TESt", "test not", + true); + errcount += expect_result (",,,,,, test not ,test,,", "TESt", "test not", + true); + errcount += expect_result (",,,,,, test", "TESt", "", true); + errcount += expect_result (",,,,,, test ", "TESt", "", true); + errcount += expect_result ("no test,,,,,, test ", "TESt", "no test", + true); + errcount += expect_result ("the-token,, the-token , the-token" \ + ",the-token ,the-token", "the-token", "", true); + errcount += expect_result (" the-token,, the-token , the-token," \ + "the-token ,the-token ", "the-token", "", true); + errcount += expect_result (" the-token ,, the-token , the-token," \ + "the-token , the-token ", "the-token", "", true); + errcount += expect_result ("the-token,a, the-token , the-token,b," \ + "the-token , c,the-token", "the-token", "a, b, c", + true); + errcount += expect_result (" the-token, a, the-token , the-token, b," \ + "the-token ,c ,the-token ", "the-token", + "a, b, c", true); + errcount += expect_result (" the-token , a , the-token , the-token, b ," \ + "the-token , c , the-token ", "the-token", + "a, b, c",true); + errcount += expect_result ("the-token,aa, the-token , the-token,bb," \ + "the-token , cc,the-token", "the-token", + "aa, bb, cc", true); + errcount += expect_result (" the-token, aa, the-token , the-token, bb," \ + "the-token ,cc ,the-token ", "the-token", + "aa, bb, cc", true); + errcount += expect_result (" the-token , aa , the-token , the-token, bb ," \ + "the-token , cc , the-token ", "the-token", + "aa, bb, cc", true); + + errcount += expect_result ("strin", "string", "strin", false); + errcount += expect_result ("Stringer", "string", "Stringer", false); + errcount += expect_result ("sstring", "String", "sstring", false); + errcount += expect_result ("string", "Strin", "string", false); + errcount += expect_result ("\t( strinG", "String", "( strinG", false); + errcount += expect_result (")strinG\t ", "String", ")strinG", false); + errcount += expect_result (" \t tOkEn t ", "toKEN", "tOkEn t", false); + errcount += expect_result ("not token\t, tOkEner ", "toKEN", + "not token, tOkEner", false); + errcount += expect_result ("not token,\t tOkEns, more token", "toKEN", + "not token, tOkEns, more token", false); + errcount += expect_result ("not token,\t tOkEns\t, more token", "toKEN", + "not token, tOkEns, more token", false); + errcount += expect_result (",,,,,,testing,,,,", "TESt", "testing", false); + errcount += expect_result (",,,,,\t,test,,,,", "TESting", "test", false); + errcount += expect_result ("tests,,,,,,quest, ,,,", "TESt", "tests, quest", + false); + errcount += expect_result (",,,,,, testы,,,,", "TESt", "testы", false); + errcount += expect_result (",,,,,, test not,хtest,,", "TESt", + "test not, хtest", false); + errcount += expect_result ("testing,,,,,, test not,,test2,,", "TESt", + "testing, test not, test2", false); + errcount += expect_result (",testi,,,,, test not ,test,,", "TESting", + "testi, test not, test", false); + errcount += expect_result (",,,,,,2 test", "TESt", "2 test", false); + errcount += expect_result (",,,,,,test test ", "test", "test test", + false); + errcount += expect_result ("no test,,,,,,test test", "test", + "no test, test test", false); + errcount += expect_result (",,,,,,,,,,,,,,,,,,,", "the-token", "", false); + errcount += expect_result (",a,b,c,d,e,f,g,,,,,,,,,,,,", "the-token", + "a, b, c, d, e, f, g", false); + errcount += expect_result (",,,,,,,,,,,,,,,,,,,", "", "", false); + errcount += expect_result (",a,b,c,d,e,f,g,,,,,,,,,,,,", "", + "a, b, c, d, e, f, g", false); + errcount += expect_result ("a,b,c,d,e,f,g", "", "a, b, c, d, e, f, g", + false); + errcount += expect_result ("a1,b1,c1,d1,e1,f1,g1", "", + "a1, b1, c1, d1, e1, f1, g1", false); + + errcount += expect_result (",a,b,c,d,e,f,g,,,,,,,,,,,,the-token", + "the-token", "a, b, c, d, e, f, g", true); + errcount += expect_result (",a,b,c,d,e,f,g,,,,,,,,,,,,the-token,", + "the-token", "a, b, c, d, e, f, g", true); + errcount += expect_result (",a,b,c,d,e,f,g,,,,,,,,,,,,the-token,x", + "the-token", "a, b, c, d, e, f, g, x", true); + errcount += expect_result (",a,b,c,d,e,f,g,,,,,,,,,,,,the-token x", + "the-token", "a, b, c, d, e, f, g, the-token x", + false); + errcount += expect_result (",a,b,c,d,e,f,g,,,,,,,,,,,,the-token x,", + "the-token", "a, b, c, d, e, f, g, the-token x", + false); + errcount += expect_result (",a,b,c,d,e,f,g,,,,,,,,,,,,the-token x,x", + "the-token", "a, b, c, d, e, f, g," \ + " the-token x, x", false); + errcount += expect_result ("the-token,a,b,c,d,e,f,g,,,,,,,,,,,,the-token", + "the-token", "a, b, c, d, e, f, g", true); + errcount += expect_result ("the-token ,a,b,c,d,e,f,g,,,,,,,,,,,,the-token,", + "the-token", "a, b, c, d, e, f, g", true); + errcount += expect_result ("the-token,a,b,c,d,e,f,g,,,,,,,,,,,,the-token,x", + "the-token", "a, b, c, d, e, f, g, x", true); + errcount += expect_result ("the-token x,a,b,c,d,e,f,g,,,,,,,,,,,," \ + "the-token x", "the-token", + "the-token x, a, b, c, d, e, f, g, the-token x", + false); + errcount += expect_result ("the-token x,a,b,c,d,e,f,g,,,,,,,,,,,," \ + "the-token x,", "the-token", + "the-token x, a, b, c, d, e, f, g, the-token x", + false); + errcount += expect_result ("the-token x,a,b,c,d,e,f,g,,,,,,,,,,,," \ + "the-token x,x", "the-token", + "the-token x, a, b, c, d, e, f, g, " \ + "the-token x, x", false); + + return errcount; +} + + +int +main (int argc, char *argv[]) +{ + int errcount = 0; + (void) argc; (void) argv; /* Unused. Silent compiler warning. */ + errcount += check_result (); + return errcount == 0 ? 0 : 1; +}