Added MHD_str_remove_tokens_caseless_() function - libmicrohttpd - HTTP/1.x server C library (MHD 1.x, stable)

commit ed3daed30580e6c4ae62d4114c58e38b6f0f9f85
parent ada4c2ed55dd14f68430e9518f2882226a7b07cd
Author: Evgeny Grin (Karlson2k) <k2k@narod.ru>
Date:   Wed, 23 Jun 2021 10:47:41 +0300

Added MHD_str_remove_tokens_caseless_() function

Diffstat:
M src/microhttpd/.gitignore  | 1 +
M src/microhttpd/Makefile.am  | 4 ++++
M src/microhttpd/mhd_str.c  | 166 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M src/microhttpd/mhd_str.h  | 30 ++++++++++++++++++++++++++++++
A src/microhttpd/test_str_tokens_remove.c  | 282 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

5 files changed, 483 insertions(+), 0 deletions(-)
diff --git a/src/microhttpd/.gitignore b/src/microhttpd/.gitignore
@@ -54,6 +54,7 @@ test_http_reasons
 /test_start_stop
 /test_str_token
 /test_str_token_remove
+/test_str_tokens_remove
 test_shutdown_poll
 test_shutdown_select
 test_md5
diff --git a/src/microhttpd/Makefile.am b/src/microhttpd/Makefile.am
@@ -159,6 +159,7 @@ check_PROGRAMS = \
   test_str_to_value \
   test_str_token \
   test_str_token_remove \
+  test_str_tokens_remove \
   test_http_reasons \
   test_md5 \
   test_sha1 \
@@ -357,6 +358,9 @@ test_str_token_SOURCES = \
 test_str_token_remove_SOURCES = \
   test_str_token_remove.c mhd_str.c mhd_str.h mhd_assert.h mhd_options.h
 
+test_str_tokens_remove_SOURCES = \
+  test_str_tokens_remove.c mhd_str.c mhd_str.h mhd_assert.h mhd_options.h
+
 test_http_reasons_SOURCES = \
   test_http_reasons.c \
   reason_phrase.c mhd_str.c mhd_str.h
diff --git a/src/microhttpd/mhd_str.c b/src/microhttpd/mhd_str.c
@@ -688,6 +688,172 @@ MHD_str_remove_token_caseless_ (const char *str,
 }
 
 
+/**
+ * Perform in-place case-insensitive removal of @a tokens from the @a str.
+ *
+ * Token could be surrounded by spaces and tabs and delimited by comma.
+ * The token match succeed if substring between start, end (of string) or
+ * comma contains only case-insensitive token and optional spaces and tabs.
+ * The quoted strings and comments are not supported by this function.
+ *
+ * The input string must be normalised: empty tokens and repeated whitespaces
+ * are removed, no whitespaces before commas, exactly one space is used after
+ * each comma. The string is updated in-place.
+ *
+ * Behavior is undefined is input string in not normalised.
+ *
+ * @param[in,out] str the string to update
+ * @param[in,out] str_len the length of the @a str, not including optional
+ *                        terminating null-character, not null-terminated
+ * @param tokens the token to find
+ * @param tokens_len the length of @a tokens, not including optional
+ *                   terminating null-character.
+ * @return 'true' if any token has been removed,
+ *         'false' otherwise.
+ */
+bool
+MHD_str_remove_tokens_caseless_ (char *str,
+                                 size_t *str_len,
+                                 const char *const tokens,
+                                 const size_t tokens_len)
+{
+  const char *t;   /**< position in the @a tokens string */
+  bool token_removed;
+
+  mhd_assert (NULL == memchr (tokens, 0, tokens_len));
+
+  token_removed = false;
+  t = tokens;
+
+  while ((size_t) (t - tokens) < tokens_len && *str_len != 0)
+  {
+    const char *tkn; /**< the current token */
+    size_t tkn_len;
+
+    /* Skip any initial whitespaces and empty tokens in 'tokens' */
+    while ( ((size_t) (t - tokens) < tokens_len) &&
+            ((' ' == *t) || ('\t' == *t) || (',' == *t)) )
+      t++;
+
+    if ((size_t) (t - tokens) >= tokens_len)
+      break; /* No more tokens, nothing to remove */
+
+    /* Found non-whitespace char which is not a comma */
+    tkn = t;
+    do
+    {
+      do
+      {
+        t++;
+      } while ((size_t) (t - tokens) < tokens_len && (' ' != *t && '\t' != *t &&
+                                                      ',' != *t));
+      /* Found end of token string, space, tab, or comma */
+      tkn_len = t - tkn;
+
+      /* Skip all spaces and tabs */
+      while ((size_t) (t - tokens) < tokens_len && (' ' == *t || '\t' == *t))
+        t++;
+      /* Found end of token string or non-whitespace char */
+    } while((size_t) (t - tokens) < tokens_len && ',' != *t);
+
+    /* 'tkn' is the input token with 'tkn_len' chars */
+    mhd_assert (0 != tkn_len);
+
+    if (*str_len == tkn_len)
+    {
+      if (MHD_str_equal_caseless_bin_n_ (str, tkn, tkn_len))
+      {
+        *str_len = 0;
+        token_removed = true;
+      }
+      continue;
+    }
+    if (*str_len > tkn_len + 2)
+    { /* Remove 'tkn' from the input string */
+      const char *s1;  /**< the "input" string / character */
+      char *s2;        /**< the "output" string / character */
+
+      s1 = str;
+      s2 = str;
+
+      do
+      {
+        mhd_assert (s1 >= s2);
+        mhd_assert ((str + *str_len) >= (s1 + tkn_len));
+        if ( ( ((str + *str_len) == (s1 + tkn_len)) || (',' == s1[tkn_len]) ) &&
+             MHD_str_equal_caseless_bin_n_ (s1, tkn, tkn_len) )
+        {
+          /* current token in the input string matches the 'tkn', skip it */
+          mhd_assert ((str + *str_len == s1 + tkn_len) || \
+                      (',' == s1[tkn_len]));
+          mhd_assert ((str + *str_len == s1 + tkn_len) || \
+                      (' ' == s1[tkn_len + 1]));
+          token_removed = true;
+          /* Advance to the next token in the input string or beyond
+           * the end of the input string. */
+          s1 += tkn_len + 2;
+        }
+        else
+        {
+          /* current token in the input string does not match the 'tkn',
+           * copy to the output */
+          if (str != s2)
+          { /* not the first output token, add ", " to separate */
+            if (s1 != s2 + 2)
+            {
+              *(s2++) = ',';
+              *(s2++) = ' ';
+            }
+            else
+              s2 += 2;
+          }
+          do
+          {
+            if (s1 != s2)
+              *s2 = *s1;
+            s1++;
+            s2++;
+          } while (s1 < str + *str_len && ',' != *s1);
+          /* Advance to the next token in the input string or beyond
+           * the end of the input string. */
+          s1 += 2;
+        }
+        /* s1 should point to the next token in the input string or beyond
+         * the end of the input string */
+        if ((str + *str_len) < (s1 + tkn_len))
+        { /* The rest of the 's1' is too small to match 'tkn' */
+          if ((str + *str_len) > s1)
+          { /* Copy the rest of the string */
+            size_t copy_size;
+            copy_size = *str_len - (size_t) (s1 - str);
+            if (str != s2)
+            { /* not the first output token, add ", " to separate */
+              if (s1 != s2 + 2)
+              {
+                *(s2++) = ',';
+                *(s2++) = ' ';
+              }
+              else
+                s2 += 2;
+            }
+            if (s1 != s2)
+              memmove (s2, s1, copy_size);
+            s2 += copy_size;
+          }
+          *str_len = s2 - str;
+          break;
+        }
+        mhd_assert ((' ' != s1[0]) && ('\t' != s1[0]));
+        mhd_assert ((s1 == str) || (' ' == *(s1 - 1)));
+        mhd_assert ((s1 == str) || (',' == *(s1 - 2)));
+      } while (1);
+    }
+  }
+
+  return token_removed;
+}
+
+
 #ifndef MHD_FAVOR_SMALL_CODE
 /* Use individual function for each case */
 
diff --git a/src/microhttpd/mhd_str.h b/src/microhttpd/mhd_str.h
@@ -168,6 +168,36 @@ MHD_str_remove_token_caseless_ (const char *str,
                                 ssize_t *buf_size);
 
 
+/**
+ * Perform in-place case-insensitive removal of @a tokens from the @a str.
+ *
+ * Token could be surrounded by spaces and tabs and delimited by comma.
+ * The token match succeed if substring between start, end (of string) or
+ * comma contains only case-insensitive token and optional spaces and tabs.
+ * The quoted strings and comments are not supported by this function.
+ *
+ * The input string must be normalised: empty tokens and repeated whitespaces
+ * are removed, no whitespaces before commas, exactly one space is used after
+ * each comma. The string is updated in-place.
+ *
+ * Behavior is undefined is input string in not normalised.
+ *
+ * @param[in,out] str the string to update
+ * @param[in,out] str_len the length of the @a str, not including optional
+ *                        terminating null-character, not null-terminated
+ * @param tokens the token to find
+ * @param tokens_len the length of @a tokens, not including optional
+ *                   terminating null-character.
+ * @return 'true' if any token has been removed,
+ *         'false' otherwise.
+ */
+bool
+MHD_str_remove_tokens_caseless_ (char *str,
+                                 size_t *str_len,
+                                 const char *const tokens,
+                                 const size_t tokens_len);
+
+
 #ifndef MHD_FAVOR_SMALL_CODE
 /* Use individual function for each case to improve speed */
 
diff --git a/src/microhttpd/test_str_tokens_remove.c b/src/microhttpd/test_str_tokens_remove.c
@@ -0,0 +1,282 @@
+/*
+  This file is part of libmicrohttpd
+  Copyright (C) 2017 Karlson2k (Evgeny Grin)
+
+  This test tool is free software; you can redistribute it and/or
+  modify it under the terms of the GNU General Public License as
+  published by the Free Software Foundation; either version 2, or
+  (at your option) any later version.
+
+  This test tool is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public
+  License along with this library; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+*/
+
+/**
+ * @file microhttpd/test_str_token.c
+ * @brief  Unit tests for some mhd_str functions
+ * @author Karlson2k (Evgeny Grin)
+ */
+
+#include <string.h>
+#include <stdio.h>
+#include "mhd_options.h"
+#include "mhd_str.h"
+#include "mhd_assert.h"
+
+
+static int
+expect_result_n (const char *str, size_t str_len,
+                 const char *tokens, size_t tokens_len,
+                 const char *expected, size_t expected_len,
+                 const bool expected_removed)
+{
+  char buf_in[1024];
+  char buf_tokens[256];
+  bool res;
+  size_t result_len;
+
+  mhd_assert (sizeof(buf_in) > str_len + 2);
+  mhd_assert (sizeof(buf_tokens) > tokens_len + 2);
+
+  memset (buf_tokens, '#', sizeof(buf_tokens));
+  memcpy (buf_tokens, tokens, tokens_len); /* Copy without zero-termination */
+  memset (buf_in, '$', sizeof(buf_in));
+  memcpy (buf_in, str, str_len); /* Copy without zero-termination */
+
+  result_len = str_len;
+
+  res = MHD_str_remove_tokens_caseless_ (buf_in, &result_len,
+                                         buf_tokens, tokens_len);
+
+  if ( (expected_removed != res) ||
+       (expected_len != result_len) ||
+       ((0 != result_len) && (0 != memcmp (expected, buf_in, result_len))) ||
+       ('$' != buf_in[str_len]))
+  {
+    fprintf (stderr,
+             "MHD_str_remove_tokens_caseless_() FAILED:\n"
+             "\tRESULT: "
+             "\tMHD_str_remove_token_caseless_(\"%s\"->\"%.*s\", &(%lu->%lu),"
+             " \"%.*s\", %lu) returned %s\n",
+             str,
+             (int) result_len, buf_in,
+             (unsigned long) str_len, (unsigned long) result_len,
+             (int) tokens_len, buf_tokens, (unsigned long) tokens_len,
+             res ? "true" : "false");
+    fprintf (stderr,
+             "\tEXPECTED: "
+             "\tMHD_str_remove_token_caseless_(\"%s\"->\"%s\", &(%lu->%lu),"
+             " \"%.*s\", %lu) returned %s\n",
+             str,
+             expected,
+             (unsigned long) str_len, (unsigned long) expected_len,
+             (int) tokens_len, buf_tokens, (unsigned long) tokens_len,
+             expected_removed ? "true" : "false");
+    return 1;
+  }
+  return 0;
+}
+
+
+#define expect_result(s,t,e,found) \
+  expect_result_n ((s),MHD_STATICSTR_LEN_ (s), \
+                   (t),MHD_STATICSTR_LEN_ (t), \
+                   (e),MHD_STATICSTR_LEN_ (e), found)
+
+int
+check_result (void)
+{
+  int errcount = 0;
+  errcount += expect_result ("string", "string", "", true);
+  errcount += expect_result ("String", "string", "", true);
+  errcount += expect_result ("string", "String", "", true);
+  errcount += expect_result ("strinG", "String", "", true);
+  errcount += expect_result ("strinG", "String\t", "", true);
+  errcount += expect_result ("strinG", "\tString", "", true);
+  errcount += expect_result ("tOkEn", " \t toKEN  ", "", true);
+  errcount += expect_result ("not-token, tOkEn", "token", "not-token",
+                             true);
+  errcount += expect_result ("not-token, tOkEn, toke", "token",
+                             "not-token, toke",
+                             true);
+  errcount += expect_result ("toke, tOkEn", "token", "toke",
+                             true);
+  errcount += expect_result ("not-token, tOkEn", " \t toKEN", "not-token",
+                             true);
+  errcount += expect_result ("not-token, tOkEn, more-token", "toKEN\t",
+                             "not-token, more-token", true);
+  errcount += expect_result ("not-token, tOkEn, more-token", "\t  toKEN,,,,,",
+                             "not-token, more-token", true);
+  errcount += expect_result ("a, b, c, d", ",,,,,a", "b, c, d", true);
+  errcount += expect_result ("a, b, c, d", "a,,,,,,", "b, c, d", true);
+  errcount += expect_result ("a, b, c, d", ",,,,a,,,,,,", "b, c, d", true);
+  errcount += expect_result ("a, b, c, d", "\t \t,,,,a,,   ,   ,,,\t",
+                             "b, c, d", true);
+  errcount += expect_result ("a, b, c, d", "b, c, d", "a", true);
+  errcount += expect_result ("a, b, c, d", "a, b, c, d", "", true);
+  errcount += expect_result ("a, b, c, d", "d, c, b, a", "", true);
+  errcount += expect_result ("a, b, c, d", "b, d, a, c", "", true);
+  errcount += expect_result ("a, b, c, d, e", "b, d, a, c", "e", true);
+  errcount += expect_result ("e, a, b, c, d", "b, d, a, c", "e", true);
+  errcount += expect_result ("e, a, b, c, d, e", "b, d, a, c", "e, e", true);
+  errcount += expect_result ("a, b, c, d", "b,c,d", "a", true);
+  errcount += expect_result ("a, b, c, d", "a,b,c,d", "", true);
+  errcount += expect_result ("a, b, c, d", "d,c,b,a", "", true);
+  errcount += expect_result ("a, b, c, d", "b,d,a,c", "", true);
+  errcount += expect_result ("a, b, c, d, e", "b,d,a,c", "e", true);
+  errcount += expect_result ("e, a, b, c, d", "b,d,a,c", "e", true);
+  errcount += expect_result ("e, a, b, c, d, e", "b,d,a,c", "e, e", true);
+  errcount += expect_result ("a, b, c, d", "d,,,,,,,,,c,b,a", "", true);
+  errcount += expect_result ("a, b, c, d", "b,d,a,c,,,,,,,,,,", "", true);
+  errcount += expect_result ("a, b, c, d, e", ",,,,\t,,,,b,d,a,c,\t", "e",
+                             true);
+  errcount += expect_result ("e, a, b, c, d", "b,d,a,c", "e", true);
+  errcount += expect_result ("token, a, b, c, d", "token", "a, b, c, d", true);
+  errcount += expect_result ("token1, a, b, c, d", "token1", "a, b, c, d",
+                             true);
+  errcount += expect_result ("token12, a, b, c, d", "token12", "a, b, c, d",
+                             true);
+  errcount += expect_result ("token123, a, b, c, d", "token123", "a, b, c, d",
+                             true);
+  errcount += expect_result ("token1234, a, b, c, d", "token1234", "a, b, c, d",
+                             true);
+  errcount += expect_result ("token12345, a, b, c, d", "token12345",
+                             "a, b, c, d", true);
+  errcount += expect_result ("token123456, a, b, c, d", "token123456",
+                             "a, b, c, d", true);
+  errcount += expect_result ("token1234567, a, b, c, d", "token1234567",
+                             "a, b, c, d", true);
+  errcount += expect_result ("token12345678, a, b, c, d", "token12345678",
+                             "a, b, c, d", true);
+
+  errcount += expect_result ("", "a", "", false);
+  errcount += expect_result ("", "", "", false);
+  errcount += expect_result ("a, b, c, d", "bb, dd, aa, cc", "a, b, c, d",
+                             false);
+  errcount += expect_result ("a, b, c, d, e", "bb, dd, aa, cc", "a, b, c, d, e",
+                             false);
+  errcount += expect_result ("e, a, b, c, d", "bb, dd, aa, cc", "e, a, b, c, d",
+                             false);
+  errcount += expect_result ("e, a, b, c, d, e", "bb, dd, aa, cc",
+                             "e, a, b, c, d, e", false);
+  errcount += expect_result ("aa, bb, cc, dd", "b, d, a, c", "aa, bb, cc, dd",
+                             false);
+  errcount += expect_result ("aa, bb, cc, dd, ee", "b, d, a, c",
+                             "aa, bb, cc, dd, ee", false);
+  errcount += expect_result ("ee, aa, bb, cc, dd", "b, d, a, c",
+                             "ee, aa, bb, cc, dd", false);
+  errcount += expect_result ("ee, aa, bb, cc, dd, ee", "b, d, a, c",
+                             "ee, aa, bb, cc, dd, ee", false);
+
+  errcount += expect_result ("TESt", ",,,,,,test,,,,", "", true);
+  errcount += expect_result ("TESt", ",,,,,\t,test,,,,", "", true);
+  errcount += expect_result ("TESt", ",,,,,,test, ,,,", "", true);
+  errcount += expect_result ("TESt", ",,,,,, test,,,,", "", true);
+  errcount += expect_result ("TESt", ",,,,,, test-not,test,,", "",
+                             true);
+  errcount += expect_result ("TESt", ",,,,,, test-not,,test,,", "",
+                             true);
+  errcount += expect_result ("TESt", ",,,,,, test-not ,test,,", "",
+                             true);
+  errcount += expect_result ("TESt", ",,,,,, test", "", true);
+  errcount += expect_result ("TESt", ",,,,,, test      ", "", true);
+  errcount += expect_result ("TESt", "no-test,,,,,, test      ", "",
+                             true);
+
+  errcount += expect_result ("the-token, a, the-token, b, the-token, " \
+                             "the-token, c, the-token", "the-token", "a, b, c",
+                             true);
+  errcount += expect_result ("aa, the-token, bb, the-token, cc, the-token, " \
+                             "the-token, dd, the-token", "the-token",
+                             "aa, bb, cc, dd", true);
+  errcount += expect_result ("the-token, a, the-token, b, the-token, " \
+                             "the-token, c, the-token, e", "the-token",
+                             "a, b, c, e", true);
+  errcount += expect_result ("aa, the-token, bb, the-token, cc, the-token, " \
+                             "the-token, dd, the-token, ee", "the-token",
+                             "aa, bb, cc, dd, ee", true);
+  errcount += expect_result ("the-token, the-token, the-token, " \
+                             "the-token, the-token", "the-token", "", true);
+  errcount += expect_result ("the-token, a, the-token, the-token, b, " \
+                             "the-token, c, the-token, a", "c,a,b",
+                             "the-token, the-token, the-token, the-token, the-token",
+                             true);
+  errcount += expect_result ("the-token, xx, the-token, the-token, zz, " \
+                             "the-token, yy, the-token, ww", "ww,zz,yy",
+                             "the-token, xx, the-token, the-token, the-token, the-token",
+                             true);
+  errcount += expect_result ("the-token, a, the-token, the-token, b, " \
+                             "the-token, c, the-token, a", " c,\t a,b,,,",
+                             "the-token, the-token, the-token, the-token, the-token",
+                             true);
+  errcount += expect_result ("the-token, xx, the-token, the-token, zz, " \
+                             "the-token, yy, the-token, ww",
+                             ",,,,ww,\t zz,  yy",
+                             "the-token, xx, the-token, the-token, the-token, the-token",
+                             true);
+  errcount += expect_result ("the-token, a, the-token, the-token, b, " \
+                             "the-token, c, the-token, a", ",,,,c,\t a,b",
+                             "the-token, the-token, the-token, the-token, the-token",
+                             true);
+  errcount += expect_result ("the-token, xx, the-token, the-token, zz, " \
+                             "the-token, yy, the-token, ww", " ww,\t zz,yy,,,,",
+                             "the-token, xx, the-token, the-token, the-token, the-token",
+                             true);
+  errcount += expect_result ("close, 2", "close",
+                             "2", true);
+  errcount += expect_result ("close, 22", "close",
+                             "22", true);
+  errcount += expect_result ("close, nothing", "close",
+                             "nothing", true);
+  errcount += expect_result ("close, 2", "2",
+                             "close", true);
+  errcount += expect_result ("close", "close",
+                             "", true);
+  errcount += expect_result ("close, nothing", "close, token",
+                             "nothing", true);
+  errcount += expect_result ("close, nothing", "nothing, token",
+                             "close", true);
+  errcount += expect_result ("close, 2", "close, 10, 12, 22, nothing",
+                             "2", true);
+
+  errcount += expect_result ("strin", "string", "strin", false);
+  errcount += expect_result ("Stringer", "string", "Stringer", false);
+  errcount += expect_result ("sstring", "String", "sstring", false);
+  errcount += expect_result ("string", "Strin", "string", false);
+  errcount += expect_result ("String", "\t(-strinG", "String", false);
+  errcount += expect_result ("String", ")strinG\t ", "String", false);
+  errcount += expect_result ("not-token, tOkEner", "toKEN",
+                             "not-token, tOkEner", false);
+  errcount += expect_result ("not-token, tOkEns, more-token", "toKEN",
+                             "not-token, tOkEns, more-token", false);
+  errcount += expect_result ("tests, quest", "TESt", "tests, quest",
+                             false);
+  errcount += expect_result ("testы", "TESt", "testы", false);
+  errcount += expect_result ("test-not, хtest", "TESt",
+                             "test-not, хtest", false);
+  errcount += expect_result ("testing, test not, test2", "TESt",
+                             "testing, test not, test2", false);
+  errcount += expect_result ("", ",,,,,,,,,,,,,,,,,,,the-token", "", false);
+  errcount += expect_result ("a1, b1, c1, d1, e1, f1, g1", "",
+                             "a1, b1, c1, d1, e1, f1, g1", false);
+
+  return errcount;
+}
+
+
+int
+main (int argc, char *argv[])
+{
+  int errcount = 0;
+  (void) argc; (void) argv; /* Unused. Silent compiler warning. */
+  errcount += check_result ();
+  if (0 == errcount)
+    printf ("All tests were passed without errors.\n");
+  return errcount == 0 ? 0 : 1;
+}

	libmicrohttpd HTTP/1.x server C library (MHD 1.x, stable)
	Log \| Files \| Refs \| Submodules \| README \| LICENSE

M	src/microhttpd/.gitignore	\|	1	+
M	src/microhttpd/Makefile.am	\|	4	++++
M	src/microhttpd/mhd_str.c	\|	166	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M	src/microhttpd/mhd_str.h	\|	30	++++++++++++++++++++++++++++++
A	src/microhttpd/test_str_tokens_remove.c	\|	282	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++