Added MHD_str_remove_token_caseless_() function - libmicrohttpd - HTTP/1.x server C library (MHD 1.x, stable)

commit ada4c2ed55dd14f68430e9518f2882226a7b07cd
parent 63dd1bff0fe85142967612346c7b580b9af336ac
Author: Evgeny Grin (Karlson2k) <k2k@narod.ru>
Date:   Wed, 16 Jun 2021 16:41:58 +0300

Added MHD_str_remove_token_caseless_() function

Function removes specified token from the input string and
normalizes list of tokens.

Diffstat:
M src/microhttpd/.gitignore  | 1 +
M src/microhttpd/Makefile.am  | 4 ++++
M src/microhttpd/mhd_str.c  | 169 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
M src/microhttpd/mhd_str.h  | 38 ++++++++++++++++++++++++++++++++++++++
A src/microhttpd/test_str_token_remove.c  | 248 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

5 files changed, 459 insertions(+), 1 deletion(-)
diff --git a/src/microhttpd/.gitignore b/src/microhttpd/.gitignore
@@ -53,6 +53,7 @@ test_http_reasons
 /test_options
 /test_start_stop
 /test_str_token
+/test_str_token_remove
 test_shutdown_poll
 test_shutdown_select
 test_md5
diff --git a/src/microhttpd/Makefile.am b/src/microhttpd/Makefile.am
@@ -158,6 +158,7 @@ check_PROGRAMS = \
   test_str_compare \
   test_str_to_value \
   test_str_token \
+  test_str_token_remove \
   test_http_reasons \
   test_md5 \
   test_sha1 \
@@ -353,6 +354,9 @@ test_str_to_value_SOURCES = \
 test_str_token_SOURCES = \
   test_str_token.c mhd_str.c mhd_str.h
 
+test_str_token_remove_SOURCES = \
+  test_str_token_remove.c mhd_str.c mhd_str.h mhd_assert.h mhd_options.h
+
 test_http_reasons_SOURCES = \
   test_http_reasons.c \
   reason_phrase.c mhd_str.c mhd_str.h
diff --git a/src/microhttpd/mhd_str.c b/src/microhttpd/mhd_str.c
@@ -27,8 +27,10 @@
 
 #ifdef HAVE_STDBOOL_H
 #include <stdbool.h>
-#endif
+#endif /* HAVE_STDBOOL_H */
+#include <string.h>
 
+#include "mhd_assert.h"
 #include "mhd_limits.h"
 
 #ifdef MHD_FAVOR_SMALL_CODE
@@ -521,6 +523,171 @@ MHD_str_has_token_caseless_ (const char *str,
 }
 
 
+/**
+ * Remove case-insensitive @a token from the @a str and put result
+ * to the output @a buf.
+ *
+ * Token could be surrounded by spaces and tabs and delimited by comma.
+ * The token match succeed if substring between start, end (of string) or
+ * comma contains only case-insensitive token and optional spaces and tabs.
+ * The quoted strings and comments are not supported by this function.
+ *
+ * The output string is normalised: empty tokens and repeated whitespaces
+ * are removed, no whitespaces before commas, exactly one space is used after
+ * each comma.
+ *
+ * @param str the string to process
+ * @param str_len the length of the @a str, not including optional
+ *                terminating null-character.
+ * @param token the token to find
+ * @param token_len the length of @a token, not including optional
+ *                  terminating null-character.
+ * @param[out] buf the output buffer, not null-terminated.
+ * @param[in,out] buf_size pointer to the size variable, at input it
+ *                         is the size of allocated buffer, at output
+ *                         it is the size of the resulting string (can
+ *                         be up to 50% larger than input) or negative value
+ *                         if there is not enough space for the result
+ * @return 'true' if token has been removed,
+ *         'false' otherwise.
+ */
+bool
+MHD_str_remove_token_caseless_ (const char *str,
+                                size_t str_len,
+                                const char *const token,
+                                const size_t token_len,
+                                char *buf,
+                                ssize_t *buf_size)
+{
+  const char *s1; /**< the "input" string / character */
+  char *s2;       /**< the "output" string / character */
+  size_t t_pos;   /**< position of matched character in the token */
+  bool token_removed;
+
+  mhd_assert (NULL == memchr (token, 0, token_len));
+  mhd_assert (NULL == memchr (token, ' ', token_len));
+  mhd_assert (NULL == memchr (token, '\t', token_len));
+  mhd_assert (NULL == memchr (token, ',', token_len));
+  mhd_assert (0 <= *buf_size);
+
+  s1 = str;
+  s2 = buf;
+  token_removed = false;
+
+  while ((size_t) (s1 - str) < str_len)
+  {
+    const char *cur_token; /**< the first char of current token */
+    size_t copy_size;
+
+    /* Skip any initial whitespaces and empty tokens */
+    while ( ((size_t) (s1 - str) < str_len) &&
+            ((' ' == *s1) || ('\t' == *s1) || (',' == *s1)) )
+      s1++;
+
+    /* 's1' points to the first char of token in the input string or
+     * points just beyond the end of the input string */
+
+    if ((size_t) (s1 - str) >= str_len)
+      break; /* Nothing to copy, end of the input string */
+
+    cur_token = s1; /* the first char of input token or
+                       the char after the input buffer */
+
+    /* Check the token with case-insensetive match */
+    t_pos = 0;
+    while ( ((size_t) (s1 - str) < str_len) && (token_len > t_pos) &&
+            (charsequalcaseless (*s1, token[t_pos])) )
+    {
+      s1++;
+      t_pos++;
+    }
+    if ( (token_len == t_pos) && (0 != token_len) )
+    {
+      /* 'token' matched, check that current input token does not have
+       * any suffixes */
+      while ( ((size_t) (s1 - str) < str_len) &&
+              ((' ' == *s1) || ('\t' == *s1)) )
+        s1++;
+      /* 's1' points to the first non-whitespace char after the token matched
+       * requested token or points just beyond the end of the input string after
+       * the requested token */
+      if (((size_t) (s1 - str) == str_len) || (',' == *s1))
+      {/* full token match, do not copy current token to the output */
+        token_removed = true;
+        continue;
+      }
+    }
+
+    /* 's1' points to some non-whitespace char in the token in the input
+     * string, to the ',', or just beyond the end of the input string */
+    /* The current token in the input string does not match excluded token,
+     * it must be copied to the output string */
+    /* the current token size excluding leading whitespaces and current char */
+    copy_size = (size_t) (s1 - cur_token);
+    if (buf == s2)
+    { /* The first token to copy to the output */
+      if (buf + *buf_size < s2 + copy_size)
+      { /* Not enough space in the output buffer */
+        *buf_size = (ssize_t) -1;
+        return false;
+      }
+    }
+    else
+    { /* Some token was already copied to the output buffer */
+      if (buf + *buf_size < s2 + copy_size + 2)
+      { /* Not enough space in the output buffer */
+        *buf_size = (ssize_t) -1;
+        return false;
+      }
+      *(s2++) = ',';
+      *(s2++) = ' ';
+    }
+    /* Copy non-matched token to the output */
+    if (0 != copy_size)
+    {
+      memcpy (s2, cur_token, copy_size);
+      s2 += copy_size;
+    }
+
+    while ( ((size_t) (s1 - str) < str_len) && (',' != *s1))
+    {
+      /* 's1' points to some non-whitespace char in the token */
+      while ( ((size_t) (s1 - str) < str_len) &&
+              (',' != *s1) && (' ' != *s1) && ('\t' != *s1) )
+      {
+        if (buf + *buf_size <= s2) /* '<= s2' equals '< s2 + 1' */
+        { /* Not enough space in the output buffer */
+          *buf_size = (ssize_t) -1;
+          return false;
+        }
+        *(s2++) = *(s1++);
+      }
+      /* 's1' points to some whitespace char in the token in the input
+       * string, to the ',', or just beyond the end of the input string */
+      /* Skip all whitespaces */
+      while ( ((size_t) (s1 - str) < str_len) &&
+              ((' ' == *s1) || ('\t' == *s1)) )
+        s1++;
+
+      /* 's1' points to the first non-whitespace char in the input string
+       * after whitespace chars or just beyond the end of the input string */
+      if (((size_t) (s1 - str) < str_len) && (',' != *s1))
+      { /* Not the end of the current token */
+        if (buf + *buf_size <= s2) /* '<= s2' equals '< s2 + 1' */
+        { /* Not enough space in the output buffer */
+          *buf_size = (ssize_t) -1;
+          return false;
+        }
+        *(s2++) = ' ';
+      }
+    }
+  }
+  mhd_assert (((ssize_t) (s2 - buf)) <= *buf_size);
+  *buf_size = (ssize_t) (s2 - buf);
+  return token_removed;
+}
+
+
 #ifndef MHD_FAVOR_SMALL_CODE
 /* Use individual function for each case */
 
diff --git a/src/microhttpd/mhd_str.h b/src/microhttpd/mhd_str.h
@@ -130,6 +130,44 @@ MHD_str_has_token_caseless_ (const char *str,
 #define MHD_str_has_s_token_caseless_(str,tkn) \
   MHD_str_has_token_caseless_ ((str),(tkn),MHD_STATICSTR_LEN_ (tkn))
 
+
+/**
+ * Remove case-insensitive @a token from the @a str and put result
+ * to the output @a buf.
+ *
+ * Token could be surrounded by spaces and tabs and delimited by comma.
+ * The token match succeed if substring between start, end (of string) or
+ * comma contains only case-insensitive token and optional spaces and tabs.
+ * The quoted strings and comments are not supported by this function.
+ *
+ * The output string is normalised: empty tokens and repeated whitespaces
+ * are removed, no whitespaces before commas, exactly one space is used after
+ * each comma.
+ *
+ * @param str the string to process
+ * @param str_len the length of the @a str, not including optional
+ *                terminating null-character.
+ * @param token the token to find
+ * @param token_len the length of @a token, not including optional
+ *                  terminating null-character.
+ * @param[out] buf the output buffer, not null-terminated.
+ * @param[in,out] buf_size pointer to the size variable, at input it
+ *                         is the size of allocated buffer, at output
+ *                         it is the size of the resulting string (can
+ *                         be up to 50% larger than input) or negative value
+ *                         if there is not enough space for the result
+ * @return 'true' if token has been removed,
+ *         'false' otherwise.
+ */
+bool
+MHD_str_remove_token_caseless_ (const char *str,
+                                size_t str_len,
+                                const char *const token,
+                                const size_t token_len,
+                                char *buf,
+                                ssize_t *buf_size);
+
+
 #ifndef MHD_FAVOR_SMALL_CODE
 /* Use individual function for each case to improve speed */
 
diff --git a/src/microhttpd/test_str_token_remove.c b/src/microhttpd/test_str_token_remove.c
@@ -0,0 +1,248 @@
+/*
+  This file is part of libmicrohttpd
+  Copyright (C) 2017 Karlson2k (Evgeny Grin)
+
+  This test tool is free software; you can redistribute it and/or
+  modify it under the terms of the GNU General Public License as
+  published by the Free Software Foundation; either version 2, or
+  (at your option) any later version.
+
+  This test tool is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public
+  License along with this library; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+*/
+
+/**
+ * @file microhttpd/test_str_token.c
+ * @brief  Unit tests for some mhd_str functions
+ * @author Karlson2k (Evgeny Grin)
+ */
+
+#include <string.h>
+#include <stdio.h>
+#include "mhd_options.h"
+#include "mhd_str.h"
+#include "mhd_assert.h"
+
+
+static int
+expect_result_n (const char *str, size_t str_len,
+                 const char *token, size_t token_len,
+                 const char *expected, size_t expected_len,
+                 const bool expected_removed)
+{
+  char buf_in[1024];
+  char buf_token[256];
+  char buf_out[1024];
+  size_t buf_len;
+
+  mhd_assert (sizeof(buf_in) > str_len + 2);
+  mhd_assert (sizeof(buf_token) > token_len + 2);
+  mhd_assert (sizeof(buf_out) > expected_len + 2);
+
+  memset (buf_in, '#', sizeof(buf_in));
+  memset (buf_token, '#', sizeof(buf_token));
+  memcpy (buf_in, str, str_len); /* Copy without zero-termination */
+  memcpy (buf_token, token, token_len); /* Copy without zero-termination */
+
+  for (buf_len = 0; buf_len <= expected_len + 3; ++buf_len)
+  {
+    bool res;
+    ssize_t result_len;
+    memset (buf_out, '$', sizeof(buf_out));
+
+    result_len = buf_len;
+
+    res = MHD_str_remove_token_caseless_ (buf_in, str_len, buf_token, token_len,
+                                          buf_out, &result_len);
+    if (buf_len < expected_len)
+    { /* The result should not fit into the buffer */
+      if (res || (0 < result_len))
+      {
+        fprintf (stderr,
+                 "MHD_str_remove_token_caseless_() FAILED:\n"
+                 "\tMHD_str_remove_token_caseless_(\"%.*s\", %lu,"
+                 " \"%.*s\", %lu, buf, &(%ld->%ld)) returned %s\n",
+                 (int) str_len + 2, buf_in, (unsigned long) str_len,
+                 (int) token_len + 2, buf_token, (unsigned long) token_len,
+                 (long) buf_len, (long) result_len, res ? "true" : "false");
+        return 1;
+      }
+    }
+    else
+    { /* The result should fit into the buffer */
+      if ( (expected_removed != res) ||
+           (expected_len != (size_t) result_len) ||
+           ((0 != result_len) && (0 != memcmp (expected, buf_out,
+                                               result_len))) ||
+           ('$' != buf_out[result_len]))
+      {
+        fprintf (stderr,
+                 "MHD_str_remove_token_caseless_() FAILED:\n"
+                 "\tMHD_str_remove_token_caseless_(\"%.*s\", %lu,"
+                 " \"%.*s\", %lu, \"%.*s\", &(%ld->%ld)) returned %s\n",
+                 (int) str_len + 2, buf_in, (unsigned long) str_len,
+                 (int) token_len + 2, buf_token, (unsigned long) token_len,
+                 (int) expected_len + 2, buf_out,
+                 (long) buf_len, (long) result_len,
+                 res ? "true" : "false");
+        return 1;
+      }
+    }
+  }
+  return 0;
+}
+
+
+#define expect_result(s,t,e,found) \
+  expect_result_n ((s),MHD_STATICSTR_LEN_ (s), \
+                   (t),MHD_STATICSTR_LEN_ (t), \
+                   (e),MHD_STATICSTR_LEN_ (e), found)
+
+int
+check_result (void)
+{
+  int errcount = 0;
+  errcount += expect_result ("string", "string", "", true);
+  errcount += expect_result ("String", "string", "", true);
+  errcount += expect_result ("string", "String", "", true);
+  errcount += expect_result ("strinG", "String", "", true);
+  errcount += expect_result ("\t strinG", "String", "", true);
+  errcount += expect_result ("strinG\t ", "String", "", true);
+  errcount += expect_result (" \t tOkEn  ", "toKEN", "", true);
+  errcount += expect_result ("not token\t,  tOkEn  ", "toKEN", "not token",
+                             true);
+  errcount += expect_result ("not token,\t  tOkEn, more token", "toKEN",
+                             "not token, more token", true);
+  errcount += expect_result ("not token,\t  tOkEn\t, more token", "toKEN",
+                             "not token, more token", true);
+  errcount += expect_result (",,,,,,test,,,,", "TESt", "", true);
+  errcount += expect_result (",,,,,\t,test,,,,", "TESt", "", true);
+  errcount += expect_result (",,,,,,test, ,,,", "TESt", "", true);
+  errcount += expect_result (",,,,,, test,,,,", "TESt", "", true);
+  errcount += expect_result (",,,,,, test not,test,,", "TESt", "test not",
+                             true);
+  errcount += expect_result (",,,,,, test not,,test,,", "TESt", "test not",
+                             true);
+  errcount += expect_result (",,,,,, test not ,test,,", "TESt", "test not",
+                             true);
+  errcount += expect_result (",,,,,, test", "TESt", "", true);
+  errcount += expect_result (",,,,,, test      ", "TESt", "", true);
+  errcount += expect_result ("no test,,,,,, test      ", "TESt", "no test",
+                             true);
+  errcount += expect_result ("the-token,, the-token , the-token" \
+                             ",the-token ,the-token", "the-token", "", true);
+  errcount += expect_result (" the-token,, the-token , the-token," \
+                             "the-token ,the-token ", "the-token", "", true);
+  errcount += expect_result (" the-token ,, the-token , the-token," \
+                             "the-token , the-token ", "the-token", "", true);
+  errcount += expect_result ("the-token,a, the-token , the-token,b," \
+                             "the-token , c,the-token", "the-token", "a, b, c",
+                             true);
+  errcount += expect_result (" the-token, a, the-token , the-token, b," \
+                             "the-token ,c ,the-token ", "the-token",
+                             "a, b, c", true);
+  errcount += expect_result (" the-token , a , the-token , the-token, b ," \
+                             "the-token , c , the-token ", "the-token",
+                             "a, b, c",true);
+  errcount += expect_result ("the-token,aa, the-token , the-token,bb," \
+                             "the-token , cc,the-token", "the-token",
+                             "aa, bb, cc", true);
+  errcount += expect_result (" the-token, aa, the-token , the-token, bb," \
+                             "the-token ,cc ,the-token ", "the-token",
+                             "aa, bb, cc", true);
+  errcount += expect_result (" the-token , aa , the-token , the-token, bb ," \
+                             "the-token , cc , the-token ", "the-token",
+                             "aa, bb, cc", true);
+
+  errcount += expect_result ("strin", "string", "strin", false);
+  errcount += expect_result ("Stringer", "string", "Stringer", false);
+  errcount += expect_result ("sstring", "String", "sstring", false);
+  errcount += expect_result ("string", "Strin", "string", false);
+  errcount += expect_result ("\t( strinG", "String", "( strinG", false);
+  errcount += expect_result (")strinG\t ", "String", ")strinG", false);
+  errcount += expect_result (" \t tOkEn t ", "toKEN", "tOkEn t", false);
+  errcount += expect_result ("not token\t,  tOkEner  ", "toKEN",
+                             "not token, tOkEner", false);
+  errcount += expect_result ("not token,\t  tOkEns, more token", "toKEN",
+                             "not token, tOkEns, more token", false);
+  errcount += expect_result ("not token,\t  tOkEns\t, more token", "toKEN",
+                             "not token, tOkEns, more token", false);
+  errcount += expect_result (",,,,,,testing,,,,", "TESt", "testing", false);
+  errcount += expect_result (",,,,,\t,test,,,,", "TESting", "test", false);
+  errcount += expect_result ("tests,,,,,,quest, ,,,", "TESt", "tests, quest",
+                             false);
+  errcount += expect_result (",,,,,, testы,,,,", "TESt", "testы", false);
+  errcount += expect_result (",,,,,, test not,хtest,,", "TESt",
+                             "test not, хtest", false);
+  errcount += expect_result ("testing,,,,,, test not,,test2,,", "TESt",
+                             "testing, test not, test2", false);
+  errcount += expect_result (",testi,,,,, test not ,test,,", "TESting",
+                             "testi, test not, test", false);
+  errcount += expect_result (",,,,,,2 test", "TESt", "2 test", false);
+  errcount += expect_result (",,,,,,test test      ", "test", "test test",
+                             false);
+  errcount += expect_result ("no test,,,,,,test test", "test",
+                             "no test, test test", false);
+  errcount += expect_result (",,,,,,,,,,,,,,,,,,,", "the-token", "", false);
+  errcount += expect_result (",a,b,c,d,e,f,g,,,,,,,,,,,,", "the-token",
+                             "a, b, c, d, e, f, g", false);
+  errcount += expect_result (",,,,,,,,,,,,,,,,,,,", "", "", false);
+  errcount += expect_result (",a,b,c,d,e,f,g,,,,,,,,,,,,", "",
+                             "a, b, c, d, e, f, g", false);
+  errcount += expect_result ("a,b,c,d,e,f,g", "", "a, b, c, d, e, f, g",
+                             false);
+  errcount += expect_result ("a1,b1,c1,d1,e1,f1,g1", "",
+                             "a1, b1, c1, d1, e1, f1, g1", false);
+
+  errcount += expect_result (",a,b,c,d,e,f,g,,,,,,,,,,,,the-token",
+                             "the-token", "a, b, c, d, e, f, g", true);
+  errcount += expect_result (",a,b,c,d,e,f,g,,,,,,,,,,,,the-token,",
+                             "the-token", "a, b, c, d, e, f, g", true);
+  errcount += expect_result (",a,b,c,d,e,f,g,,,,,,,,,,,,the-token,x",
+                             "the-token", "a, b, c, d, e, f, g, x", true);
+  errcount += expect_result (",a,b,c,d,e,f,g,,,,,,,,,,,,the-token x",
+                             "the-token", "a, b, c, d, e, f, g, the-token x",
+                             false);
+  errcount += expect_result (",a,b,c,d,e,f,g,,,,,,,,,,,,the-token x,",
+                             "the-token", "a, b, c, d, e, f, g, the-token x",
+                             false);
+  errcount += expect_result (",a,b,c,d,e,f,g,,,,,,,,,,,,the-token x,x",
+                             "the-token", "a, b, c, d, e, f, g," \
+                             " the-token x, x", false);
+  errcount += expect_result ("the-token,a,b,c,d,e,f,g,,,,,,,,,,,,the-token",
+                             "the-token", "a, b, c, d, e, f, g", true);
+  errcount += expect_result ("the-token ,a,b,c,d,e,f,g,,,,,,,,,,,,the-token,",
+                             "the-token", "a, b, c, d, e, f, g", true);
+  errcount += expect_result ("the-token,a,b,c,d,e,f,g,,,,,,,,,,,,the-token,x",
+                             "the-token", "a, b, c, d, e, f, g, x", true);
+  errcount += expect_result ("the-token x,a,b,c,d,e,f,g,,,,,,,,,,,," \
+                             "the-token x", "the-token",
+                             "the-token x, a, b, c, d, e, f, g, the-token x",
+                             false);
+  errcount += expect_result ("the-token x,a,b,c,d,e,f,g,,,,,,,,,,,," \
+                             "the-token x,", "the-token",
+                             "the-token x, a, b, c, d, e, f, g, the-token x",
+                             false);
+  errcount += expect_result ("the-token x,a,b,c,d,e,f,g,,,,,,,,,,,," \
+                             "the-token x,x", "the-token",
+                             "the-token x, a, b, c, d, e, f, g, " \
+                             "the-token x, x", false);
+
+  return errcount;
+}
+
+
+int
+main (int argc, char *argv[])
+{
+  int errcount = 0;
+  (void) argc; (void) argv; /* Unused. Silent compiler warning. */
+  errcount += check_result ();
+  return errcount == 0 ? 0 : 1;
+}

	libmicrohttpd HTTP/1.x server C library (MHD 1.x, stable)
	Log \| Files \| Refs \| Submodules \| README \| LICENSE

M	src/microhttpd/.gitignore	\|	1	+
M	src/microhttpd/Makefile.am	\|	4	++++
M	src/microhttpd/mhd_str.c	\|	169	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
M	src/microhttpd/mhd_str.h	\|	38	++++++++++++++++++++++++++++++++++++++
A	src/microhttpd/test_str_token_remove.c	\|	248	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++