commit 757c62c9ba8790626889553516420e294ba95137
parent aba1c8bd98795cb1eec679b762228524733e954f
Author: Evgeny Grin (Karlson2k) <k2k@drgrin.dev>
Date: Tue, 18 Nov 2025 19:11:33 +0100
Improved readability of URI normalisation function
Diffstat:
| M | src/mhd2/mhd_str.c | | | 255 | ++++++++++++++++++++++++++++++++++++------------------------------------------- |
1 file changed, 116 insertions(+), 139 deletions(-)
diff --git a/src/mhd2/mhd_str.c b/src/mhd2/mhd_str.c
@@ -2333,92 +2333,109 @@ mhd_str_pct_decode_in_place_lenient (char *restrict str,
}
+mhd_static_inline MHD_FN_PAR_NONNULL_ALL_
+MHD_FN_PAR_IN_SIZE_ (2, 1) bool
+pct_decode_no_slash (const size_t str_len,
+ const char *restrict str,
+ const size_t chr_pos,
+ char *restrict chr)
+{
+ mhd_assert ('%' == *chr);
+ mhd_assert (*chr == str[chr_pos]);
+ mhd_ASSUME (str_len > chr_pos);
+
+ if ((str_len - chr_pos) <= 2u) /* Overflow-safe check */
+ return false; /* The string tail has less than two chars */
+ else
+ {
+ const char d1 = str[chr_pos + 1u];
+ const char d2 = str[chr_pos + 2u];
+ const int h = xdigittovalue (d1);
+ const int l = xdigittovalue (d2);
+
+ if ((0 <= h) && (0 <= l))
+ {
+ char dec;
+ mhd_ASSUME (15 >= h);
+ mhd_ASSUME (15 >= l);
+ dec = (char) ((((unsigned char) h) << 4u) | ((unsigned char) l));
+ if ('/' != dec)
+ {
+ *chr = dec;
+ return true;
+ }
+ }
+ }
+ /* No valid hex-number or a slash character (must not be encoded!) */
+ return false;
+}
+
+
MHD_INTERNAL
MHD_FN_PAR_NONNULL_ (2) MHD_FN_PAR_INOUT_SIZE_ (2,1) size_t
mhd_str_dec_norm_uri_path (size_t str_len,
char *restrict str)
{
- size_t r;
- size_t w;
-
- mhd_assert ((str_len + 3u) > str_len); /* Algo does not work with str_len close to max */
+ size_t r; /**< "read" position */
+ size_t w; /**< "write" position */
- for (r = 0u, w = 0u; str_len > r; ++r)
+ w = 0u;
+ r = 0u;
+ while (str_len > r)
{
- /* Process all segments not started with "/" if any */
+ /* Process all segments not started with "/" (if any) */
char c;
mhd_ASSUME (w <= r);
c = str[r];
if ('/' == c)
break; /* Processed after this loop */
- if ('%' == c && str_len > (r + 2u))
- {
- const int h = xdigittovalue (str[r + 1u]);
- const int l = xdigittovalue (str[r + 2u]);
- if ((0 <= h) && (0 <= l))
- {
- char dec;
- dec = (char) ((unsigned char) l | (((unsigned char) h) << 4u));
- if ('/' != dec)
- {
- c = dec;
- r += 2u;
- }
- }
- }
+ if (('%' == c) &&
+ pct_decode_no_slash (str_len,
+ str,
+ r,
+ &c))
+ r += 2u;
if ('.' == c)
{
char c2;
- if (str_len == r + 1u)
- continue; /* Skip "."; actually stop */
+ if (str_len == r + 1u) /* overflow-safe as 'str_len > r' */
+ {
+ ++r; /* Skip "." */
+ break; /* At the edge, stop */
+ }
mhd_ASSUME (w <= r);
c2 = str[r + 1u];
if ('/' == c2)
{
- ++r;
- continue; /* Skip "./" */
- }
- if ('%' == c2 && str_len > (r + 3u))
- {
- const int h = xdigittovalue (str[r + 2u]);
- const int l = xdigittovalue (str[r + 3u]);
- if ((0 <= h) && (0 <= l))
- {
- char dec;
- dec = (char) ((unsigned char) l | (((unsigned char) h) << 4u));
- if ('/' != dec)
- {
- c2 = dec;
- r += 2u;
- }
- }
+ r += 2u; /* Skip "./" */
+ continue;
}
+ if (('%' == c2) &&
+ pct_decode_no_slash (str_len,
+ str,
+ r + 1u,
+ &c2))
+ r += 2u;
if ('.' == c2)
{
char c3;
- if (str_len == r + 2u)
+ if (str_len == r + 2u) /* overflow-safe as 'str_len > r + 1 ' */
{
- ++r;
- continue; /* Skip ".."; actually stop */
+ r += 2u; /* Skip ".." */
+ break; /* At the edge, stop */
}
mhd_ASSUME (w <= r);
c3 = str[r + 2u];
if ('/' == c3)
{
- r += 2u;
- continue; /* Skip "../" */
+ r += 3u; /* Skip "../" */
+ continue;
}
- str[w++] = c;
- str[w++] = c2;
- /* c3 has not been percent-decoded */
- r += 2u;
- }
- else
- {
- str[w++] = c;
- str[w++] = c2;
- r += 2u;
+ /* Do not write 'c3' as it has not been percent-decoded */
}
+ str[w++] = c;
+ str[w++] = c2;
+ r += 2u;
}
else
{
@@ -2428,30 +2445,22 @@ mhd_str_dec_norm_uri_path (size_t str_len,
break;
}
mhd_ASSUME (w <= r);
- /* Found first segment with is not "../" and is not "./" */
+ /* Found first segment which is not "../" and is not "./" OR the end of the string */
for ((void) r; str_len > r && '/' != str[r]; ++r)
{
char c;
mhd_ASSUME (w <= r);
c = str[r];
- if ('%' == c && str_len > (r + 2u))
- {
- const int h = xdigittovalue (str[r + 1u]);
- const int l = xdigittovalue (str[r + 2u]);
- if ((0 <= h) && (0 <= l))
- {
- char dec;
- dec = (char) ((unsigned char) l | (((unsigned char) h) << 4u));
- if ('/' != dec)
- {
- c = dec;
- r += 2u;
- }
- }
- }
+ if (('%' == c) &&
+ pct_decode_no_slash (str_len,
+ str,
+ r,
+ &c))
+ r += 2u;
mhd_ASSUME (w <= r);
str[w++] = c;
}
+ /* Found first '/' which is not skipped OR the end of the string */
if (str_len > r)
{
/* Found first segment started with '/' */
@@ -2470,66 +2479,49 @@ mhd_str_dec_norm_uri_path (size_t str_len,
c = str[r];
if ('/' == c)
continue;
- if ('%' == c && str_len > (r + 2u))
- {
- const int h = xdigittovalue (str[r + 1u]);
- const int l = xdigittovalue (str[r + 2u]);
- if ((0 <= h) && (0 <= l))
- {
- char dec;
- dec = (char) ((unsigned char) l | (((unsigned char) h) << 4u));
- if ('/' != dec)
- {
- c = dec;
- r += 2u;
- }
- }
- }
+ if (('%' == c) &&
+ pct_decode_no_slash (str_len,
+ str,
+ r,
+ &c))
+ r += 2u;
if ('.' == c)
{
char c2;
- if (str_len == r + 1u)
+ if (str_len == r + 1u) /* overflow-safe as 'str_len > r' */
{
- ++r;
- break; /* Skip ".", leave bare '/' */
+ ++r; /* Skip ".", leave bare '/' */
+ break; /* At the edge, stop */
}
mhd_ASSUME (w <= r);
c2 = str[r + 1u];
if ('/' == c2)
{
w = seg_start;
- ++r;
- continue; /* Skip "."; go to the next "/", which will be written again */
- }
- if ('%' == c2 && str_len > (r + 3u))
- {
- const int h = xdigittovalue (str[r + 2u]);
- const int l = xdigittovalue (str[r + 3u]);
- if ((0 <= h) && (0 <= l))
- {
- char dec;
- dec = (char) ((unsigned char) l | (((unsigned char) h) << 4u));
- if ('/' != dec)
- {
- c2 = dec;
- r += 2u;
- }
- }
+ ++r; /* Skip "." */
+ continue; /* Go to the next "/", which will be written again */
}
+ if (('%' == c2) &&
+ pct_decode_no_slash (str_len,
+ str,
+ r + 1u,
+ &c2))
+ r += 2u;
if ('.' == c2)
{
char c3;
- if (str_len == r + 2u)
+ if (str_len == r + 2u) /* overflow-safe as 'str_len > r + 1 ' */
{
w = seg_start;
if (0 < w)
do
- {
+ { /* Rewind output to the start of prev segment */
--w;
} while (0 < w && '/' != str[w]);
- str[w++] = '/';
- r += 2u;
- break; /* Skip ".."; replace prev segment with '/' */
+ mhd_ASSUME (w < r);
+ str[w++] = '/'; /* Replace prev segment with '/' */
+ r += 2u; /* Skip ".." */
+ break; /* At the edge, stop */
}
mhd_ASSUME (w <= r);
c3 = str[r + 2u];
@@ -2538,23 +2530,17 @@ mhd_str_dec_norm_uri_path (size_t str_len,
w = seg_start;
if (0 < w)
do
- {
+ { /* Rewind output to the start of prev segment */
--w;
} while (0 < w && '/' != str[w]);
- r += 2u;
- continue; /* Skip ".."; put next '/' to the start of prev segment */
+ r += 2u; /* Skip ".."; put next '/' to the start of prev segment */
+ continue;
}
- str[w++] = c;
- str[w++] = c2;
- /* c3 has not been percent-decoded */
- r += 2u;
- }
- else
- {
- str[w++] = c;
- str[w++] = c2;
- r += 2u;
+ /* Do not write 'c3' as it has not been percent-decoded */
}
+ str[w++] = c;
+ str[w++] = c2;
+ r += 2u;
}
else
{
@@ -2569,21 +2555,12 @@ mhd_str_dec_norm_uri_path (size_t str_len,
char c;
mhd_ASSUME (w <= r);
c = str[r];
- if ('%' == c && str_len > (r + 2u))
- {
- const int h = xdigittovalue (str[r + 1u]);
- const int l = xdigittovalue (str[r + 2u]);
- if ((0 <= h) && (0 <= l))
- {
- char dec;
- dec = (char) ((unsigned char) l | (((unsigned char) h) << 4u));
- if ('/' != dec)
- {
- c = dec;
- r += 2u;
- }
- }
- }
+ if (('%' == c) &&
+ pct_decode_no_slash (str_len,
+ str,
+ r,
+ &c))
+ r += 2u;
mhd_ASSUME (w <= r);
str[w++] = c;
}