mhd_str: added functions for percent-decoding - libmicrohttpd - HTTP/1.x server C library (MHD 1.x, stable)

commit 420ed698d63401e4cc6d51913145067691f050b7
parent d629ada1a20018c1cbc7f7e0badf0f8eca9b0fd7
Author: Evgeny Grin (Karlson2k) <k2k@narod.ru>
Date:   Mon, 20 Jun 2022 18:25:57 +0300

mhd_str: added functions for percent-decoding

Diffstat:
M src/microhttpd/mhd_str.c  | 308 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M src/microhttpd/mhd_str.h  | 93 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

2 files changed, 401 insertions(+), 0 deletions(-)
diff --git a/src/microhttpd/mhd_str.c b/src/microhttpd/mhd_str.c
@@ -1387,6 +1387,314 @@ MHD_bin_to_hex (const void *bin,
 }
 
 
+size_t
+MHD_str_pct_decode_strict_n_ (const char *pct_encoded,
+                              size_t pct_encoded_len,
+                              char *decoded,
+                              size_t buf_size)
+{
+#ifdef MHD_FAVOR_SMALL_CODE
+  bool broken;
+  size_t res;
+
+  res = MHD_str_pct_decode_lenient_n_ (pct_encoded, pct_encoded_len, decoded,
+                                       buf_size, &broken);
+  if (broken)
+    return 0;
+  return res;
+#else  /* ! MHD_FAVOR_SMALL_CODE */
+  size_t r;
+  size_t w;
+  r = 0;
+  w = 0;
+
+  if (buf_size >= pct_encoded_len)
+  {
+    while (r < pct_encoded_len)
+    {
+      const char chr = pct_encoded[r];
+      if ('%' == chr)
+      {
+        if (2 > pct_encoded_len - r)
+          return 0;
+        else
+        {
+          const int h = toxdigitvalue (pct_encoded[++r]);
+          const int l = toxdigitvalue (pct_encoded[++r]);
+          unsigned char out;
+          if ((0 > h) || (0 > l))
+            return 0;
+          out = (unsigned char) ( (((uint8_t) ((unsigned int) h)) << 4)
+                                  | ((uint8_t) ((unsigned int) l)) );
+          decoded[w] = (char) out;
+        }
+      }
+      else
+        decoded[w] = chr;
+      ++r;
+      ++w;
+    }
+    return w;
+  }
+
+  while (r < pct_encoded_len)
+  {
+    const char chr = pct_encoded[r];
+    if (w >= buf_size)
+      return 0;
+    if ('%' == chr)
+    {
+      if (2 > pct_encoded_len - r)
+        return 0;
+      else
+      {
+        const int h = toxdigitvalue (pct_encoded[++r]);
+        const int l = toxdigitvalue (pct_encoded[++r]);
+        unsigned char out;
+        if ((0 > h) || (0 > l))
+          return 0;
+        out = (unsigned char) ( (((uint8_t) ((unsigned int) h)) << 4)
+                                | ((uint8_t) ((unsigned int) l)) );
+        decoded[w] = (char) out;
+      }
+    }
+    else
+      decoded[w] = chr;
+    ++r;
+    ++w;
+  }
+  return w;
+#endif /* ! MHD_FAVOR_SMALL_CODE */
+}
+
+
+size_t
+MHD_str_pct_decode_lenient_n_ (const char *pct_encoded,
+                               size_t pct_encoded_len,
+                               char *decoded,
+                               size_t buf_size,
+                               bool *broken_encoding)
+{
+  size_t r;
+  size_t w;
+  r = 0;
+  w = 0;
+  if (NULL != broken_encoding)
+    *broken_encoding = false;
+#ifndef MHD_FAVOR_SMALL_CODE
+  if (buf_size >= pct_encoded_len)
+  {
+    while (r < pct_encoded_len)
+    {
+      const char chr = pct_encoded[r];
+      if ('%' == chr)
+      {
+        if (2 > pct_encoded_len - r)
+        {
+          if (NULL != broken_encoding)
+            *broken_encoding = true;
+          decoded[w] = chr; /* Copy "as is" */
+        }
+        else
+        {
+          const int h = toxdigitvalue (pct_encoded[++r]);
+          const int l = toxdigitvalue (pct_encoded[++r]);
+          unsigned char out;
+          if ((0 > h) || (0 > l))
+          {
+            r -= 2;
+            if (NULL != broken_encoding)
+              *broken_encoding = true;
+            decoded[w] = chr; /* Copy "as is" */
+          }
+          else
+          {
+            out = (unsigned char) ( (((uint8_t) ((unsigned int) h)) << 4)
+                                    | ((uint8_t) ((unsigned int) l)) );
+            decoded[w] = (char) out;
+          }
+        }
+      }
+      else
+        decoded[w] = chr;
+      ++r;
+      ++w;
+    }
+    return w;
+  }
+#endif /* ! MHD_FAVOR_SMALL_CODE */
+  while (r < pct_encoded_len)
+  {
+    const char chr = pct_encoded[r];
+    if (w >= buf_size)
+      return 0;
+    if ('%' == chr)
+    {
+      if (2 > pct_encoded_len - r)
+      {
+        if (NULL != broken_encoding)
+          *broken_encoding = true;
+        decoded[w] = chr; /* Copy "as is" */
+      }
+      else
+      {
+        const int h = toxdigitvalue (pct_encoded[++r]);
+        const int l = toxdigitvalue (pct_encoded[++r]);
+        if ((0 > h) || (0 > l))
+        {
+          r -= 2;
+          if (NULL != broken_encoding)
+            *broken_encoding = true;
+          decoded[w] = chr; /* Copy "as is" */
+        }
+        else
+        {
+          unsigned char out;
+          out = (unsigned char) ( (((uint8_t) ((unsigned int) h)) << 4)
+                                  | ((uint8_t) ((unsigned int) l)) );
+          decoded[w] = (char) out;
+        }
+      }
+    }
+    else
+      decoded[w] = chr;
+    ++r;
+    ++w;
+  }
+  return w;
+}
+
+
+size_t
+MHD_str_pct_decode_in_place_strict_ (char *str)
+{
+#ifdef MHD_FAVOR_SMALL_CODE
+  size_t res;
+  bool broken;
+
+  res = MHD_str_pct_decode_in_place_lenient_ (str, &broken);
+  if (broken)
+  {
+    res = 0;
+    str[0] = 0;
+  }
+  return res;
+#else  /* ! MHD_FAVOR_SMALL_CODE */
+  size_t r;
+  size_t w;
+  r = 0;
+  w = 0;
+
+  while (0 != str[r])
+  {
+    const char chr = str[r++];
+    if ('%' == chr)
+    {
+      const char d1 = str[r++];
+      if (0 == d1)
+        return 0;
+      else
+      {
+        const char d2 = str[r++];
+        if (0 == d2)
+          return 0;
+        else
+        {
+          const int h = toxdigitvalue (d1);
+          const int l = toxdigitvalue (d2);
+          unsigned char out;
+          if ((0 > h) || (0 > l))
+            return 0;
+          out = (unsigned char) ( (((uint8_t) ((unsigned int) h)) << 4)
+                                  | ((uint8_t) ((unsigned int) l)) );
+          str[w++] = (char) out;
+        }
+      }
+    }
+    else
+      str[w++] = chr;
+  }
+  str[w] = 0;
+  return w;
+#endif /* ! MHD_FAVOR_SMALL_CODE */
+}
+
+
+size_t
+MHD_str_pct_decode_in_place_lenient_ (char *str,
+                                      bool *broken_encoding)
+{
+#ifdef MHD_FAVOR_SMALL_CODE
+  size_t len;
+  size_t res;
+
+  len = strlen (str);
+  res = MHD_str_pct_decode_lenient_n_ (str, len, str, len, broken_encoding);
+  str[res] = 0;
+
+  return res;
+#else  /* ! MHD_FAVOR_SMALL_CODE */
+  size_t r;
+  size_t w;
+  if (NULL != broken_encoding)
+    *broken_encoding = false;
+  r = 0;
+  w = 0;
+  while (0 != str[r])
+  {
+    const char chr = str[r++];
+    if ('%' == chr)
+    {
+      const char d1 = str[r++];
+      if (0 == d1)
+      {
+        if (NULL != broken_encoding)
+          *broken_encoding = true;
+        str[w++] = chr; /* Copy "as is" */
+        str[w] = 0;
+        return w;
+      }
+      else
+      {
+        const char d2 = str[r++];
+        if (0 == d2)
+        {
+          if (NULL != broken_encoding)
+            *broken_encoding = true;
+          str[w++] = chr; /* Copy "as is" */
+          str[w++] = d1; /* Copy "as is" */
+          str[w] = 0;
+          return w;
+        }
+        else
+        {
+          const int h = toxdigitvalue (d1);
+          const int l = toxdigitvalue (d2);
+          unsigned char out;
+          if ((0 > h) || (0 > l))
+          {
+            if (NULL != broken_encoding)
+              *broken_encoding = true;
+            str[w++] = chr; /* Copy "as is" */
+            str[w++] = d1;
+            str[w++] = d2;
+            continue;
+          }
+          out = (unsigned char) ( (((uint8_t) ((unsigned int) h)) << 4)
+                                  | ((uint8_t) ((unsigned int) l)) );
+          str[w++] = (char) out;
+          continue;
+        }
+      }
+    }
+    str[w++] = chr;
+  }
+  str[w] = 0;
+  return w;
+#endif /* ! MHD_FAVOR_SMALL_CODE */
+}
+
+
 #ifdef DAUTH_SUPPORT
 bool
 MHD_str_equal_quoted_bin_n (const char *quoted,
diff --git a/src/microhttpd/mhd_str.h b/src/microhttpd/mhd_str.h
@@ -493,6 +493,99 @@ MHD_bin_to_hex (const void *bin,
                 size_t size,
                 char *hex);
 
+/**
+ * Decode string with percent-encoded characters as defined by
+ * RFC 3986 #section-2.1.
+ *
+ * This function decode string by converting percent-encoded characters to
+ * their decoded versions and copying all other characters without extra
+ * processing.
+ *
+ * @param pct_encoded the input string to be decoded
+ * @param pct_encoded_len the length of the @a pct_encoded
+ * @param[out] decoded the output buffer, NOT zero-terminated, can point
+ *                     to the same buffer as @a pct_encoded
+ * @param buf_size the size of the output buffer
+ * @return the number of characters written to the output buffer or
+ *         zero if any percent-encoded characters is broken ('%' followed
+ *         by less than two hexadecimal digits) or output buffer is too
+ *         small to hold the result
+ */
+size_t
+MHD_str_pct_decode_strict_n_ (const char *pct_encoded,
+                              size_t pct_encoded_len,
+                              char *decoded,
+                              size_t buf_size);
+
+/**
+ * Decode string with percent-encoded characters as defined by
+ * RFC 3986 #section-2.1.
+ *
+ * This function decode string by converting percent-encoded characters to
+ * their decoded versions and copying all other characters without extra
+ * processing.
+ *
+ * Any invalid percent-encoding sequences ('%' symbol not followed by two
+ * valid hexadecimal digits) are copied to the output string without decoding.
+ *
+ * @param pct_encoded the input string to be decoded
+ * @param pct_encoded_len the length of the @a pct_encoded
+ * @param[out] decoded the output buffer, NOT zero-terminated, can point
+ *                     to the same buffer as @a pct_encoded
+ * @param buf_size the size of the output buffer
+ * @param[out] broken_encoding will be set to true if any '%' symbol is not
+ *                             followed by two valid hexadecimal digits,
+ *                             optional, can be NULL
+ * @return the number of characters written to the output buffer or
+ *         zero if output buffer is too small to hold the result
+ */
+size_t
+MHD_str_pct_decode_lenient_n_ (const char *pct_encoded,
+                               size_t pct_encoded_len,
+                               char *decoded,
+                               size_t buf_size,
+                               bool *broken_encoding);
+
+
+/**
+ * Decode string in-place with percent-encoded characters as defined by
+ * RFC 3986 #section-2.1.
+ *
+ * This function decode string by converting percent-encoded characters to
+ * their decoded versions and copying back all other characters without extra
+ * processing.
+ *
+ * @param[in,out] str the string to be updated in-place, must be zero-terminated
+ *                    on input, the output is zero-terminated; the string is
+ *                    truncated to zero length if broken encoding is found
+ * @return the number of character in decoded string
+ */
+size_t
+MHD_str_pct_decode_in_place_strict_ (char *str);
+
+
+/**
+ * Decode string in-place with percent-encoded characters as defined by
+ * RFC 3986 #section-2.1.
+ *
+ * This function decode string by converting percent-encoded characters to
+ * their decoded versions and copying back all other characters without extra
+ * processing.
+ *
+ * Any invalid percent-encoding sequences ('%' symbol not followed by two
+ * valid hexadecimal digits) are copied to the output string without decoding.
+ *
+ * @param[in,out] str the string to be updated in-place, must be zero-terminated
+ *                    on input, the output is zero-terminated
+ * @param[out] broken_encoding will be set to true if any '%' symbol is not
+ *                             followed by two valid hexadecimal digits,
+ *                             optional, can be NULL
+ * @return the number of character in decoded string
+ */
+size_t
+MHD_str_pct_decode_in_place_lenient_ (char *str,
+                                      bool *broken_encoding);
+
 #ifdef DAUTH_SUPPORT
 /**
  * Check two strings for equality, "unquoting" the first string from quoted

	libmicrohttpd HTTP/1.x server C library (MHD 1.x, stable)
	Log \| Files \| Refs \| Submodules \| README \| LICENSE

M	src/microhttpd/mhd_str.c	\|	308	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M	src/microhttpd/mhd_str.h	\|	93	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++