libmicrohttpd2

HTTP server C library (MHD 2.x, alpha)
Log | Files | Refs | README | LICENSE

commit c5a7c98ea78305569d8fa261ec973cfe7d3e917c
parent c36e910f5689c94528beabfa3fc619aa09f5a312
Author: Evgeny Grin (Karlson2k) <k2k@drgrin.dev>
Date:   Fri, 13 Jun 2025 19:48:58 +0200

POST parser: improved parsing performance by storing complete delimiter instead of boundary

Diffstat:
Msrc/mhd2/mhd_post_parser.h | 6+++---
Msrc/mhd2/post_parser_funcs.c | 214+++++++++++++++++++++++++++++++++++++++++--------------------------------------
2 files changed, 113 insertions(+), 107 deletions(-)

diff --git a/src/mhd2/mhd_post_parser.h b/src/mhd2/mhd_post_parser.h @@ -410,10 +410,10 @@ struct mhd_PostParserMPartFormData size_t delim_check_start; /** - * The boundary marker. - * Allocated in the stream's memory pool + * Multi-part delimited. + * Consists of CRLF + "--" + boundary marker. */ - struct mhd_BufferConst bound; + struct mhd_BufferConst delim; }; diff --git a/src/mhd2/post_parser_funcs.c b/src/mhd2/post_parser_funcs.c @@ -96,6 +96,7 @@ process_mpart_header (struct MHD_Connection *restrict c, struct mhd_BufferConst mpart_bound; bool mpart_bound_quoted; enum mhd_StingStartsWithTokenResult res; + char *buf; mhd_assert (NULL != h_cnt_tp->cstr); @@ -140,39 +141,49 @@ process_mpart_header (struct MHD_Connection *restrict c, mhd_assert (NULL != mpart_bound.data); + buf = (char *) + mhd_stream_alloc_memory (c, + mpart_bound.size + 4); + if (NULL == buf) + { + /* It is very low probability that pool would not have memory just + * to held the small boundary string. While it could be possible + * to allocate memory from "large buffer", it would over-complicate + * code here and at freeing part. */ + mhd_LOG_MSG (c->daemon, MHD_SC_REQ_POST_PARSE_FAILED_NO_POOL_MEM, \ + "The request POST data cannot be parsed because " \ + "there is not enough pool memory."); + c->rq.u_proc.post.parse_result = MHD_POST_PARSE_RES_FAILED_NO_POOL_MEM; + return mhd_MPART_DET_ERROR_SET; + } + + c->rq.u_proc.post.enc = MHD_HTTP_POST_ENCODING_MULTIPART_FORMDATA; + + buf[0] = '\r'; + buf[1] = '\n'; + buf[2] = '-'; + buf[3] = '-'; + if (! mpart_bound_quoted) { - c->rq.u_proc.post.enc = MHD_HTTP_POST_ENCODING_MULTIPART_FORMDATA; - c->rq.u_proc.post.e_d.m_form.bound = mpart_bound; + memcpy (buf + 4, + mpart_bound.data, + mpart_bound.size); + c->rq.u_proc.post.e_d.m_form.delim.data = buf; + c->rq.u_proc.post.e_d.m_form.delim.size = mpart_bound.size + 4; } else { - char *buf; - + size_t unq_size; mhd_assert (2 <= mpart_bound.size); /* At least one char and at least one '\' */ - buf = (char *) - mhd_stream_alloc_memory (c, - mpart_bound.size); - if (NULL == buf) - { - /* It is very low probability that pool would not have memory just - * to held the small boundary string. While it could be possible - * to allocate memory from "large buffer", it would over-complicate - * code here and at freeing part. */ - mhd_LOG_MSG (c->daemon, MHD_SC_REQ_POST_PARSE_FAILED_NO_POOL_MEM, \ - "The request POST data cannot be parsed because " \ - "there is not enough pool memory."); - c->rq.u_proc.post.parse_result = MHD_POST_PARSE_RES_FAILED_NO_POOL_MEM; - return mhd_MPART_DET_ERROR_SET; - } - c->rq.u_proc.post.enc = MHD_HTTP_POST_ENCODING_MULTIPART_FORMDATA; - c->rq.u_proc.post.e_d.m_form.bound.size = - mhd_str_unquote (mpart_bound.data, - mpart_bound.size, - buf); - mhd_assert (0 != c->rq.u_proc.post.e_d.m_form.bound.size); + unq_size = mhd_str_unquote (mpart_bound.data, + mpart_bound.size, + buf + 4); + c->rq.u_proc.post.e_d.m_form.delim.data = buf; + c->rq.u_proc.post.e_d.m_form.delim.size = unq_size + 4; } + mhd_assert (4 < c->rq.u_proc.post.e_d.m_form.delim.size); return mhd_MPART_DET_OK; } @@ -333,12 +344,13 @@ reset_parse_field_data_mpart_init (struct mhd_PostParserData *pdata) pdata->e_d.m_form.st = mhd_POST_MPART_ST_NOT_STARTED; pdata->e_d.m_form.line_start = mhd_POST_INVALID_POS; pdata->e_d.m_form.delim_check_start = mhd_POST_INVALID_POS; - mhd_assert (NULL != pdata->e_d.m_form.bound.data); - mhd_assert (0 != pdata->e_d.m_form.bound.size); - mhd_assert (NULL == memchr (pdata->e_d.m_form.bound.data, '\r', \ - pdata->e_d.m_form.bound.size)); - mhd_assert (NULL == memchr (pdata->e_d.m_form.bound.data, '\n', \ - pdata->e_d.m_form.bound.size)); + mhd_assert (NULL != pdata->e_d.m_form.delim.data); + mhd_assert (4 < pdata->e_d.m_form.delim.size); + mhd_assert (0 == memcmp (pdata->e_d.m_form.delim.data, "\r\n--", 4)); + mhd_assert (NULL == memchr (pdata->e_d.m_form.delim.data + 4, '\r', \ + pdata->e_d.m_form.delim.size - 4)); + mhd_assert (NULL == memchr (pdata->e_d.m_form.delim.data + 4, '\n', \ + pdata->e_d.m_form.delim.size - 4)); pdata->field_start = 0; } @@ -445,7 +457,7 @@ mhd_stream_prepare_for_post_parse (struct MHD_Connection *restrict c) c->rq.u_proc.post.enc); mhd_assert ((MHD_HTTP_POST_ENCODING_MULTIPART_FORMDATA != \ c->rq.u_proc.post.enc) || \ - (0 != c->rq.u_proc.post.e_d.m_form.bound.size)); + (4 < c->rq.u_proc.post.e_d.m_form.delim.size)); init_post_parse_data (c); @@ -1430,9 +1442,11 @@ parse_post_mpart (struct MHD_Connection *restrict c, struct mhd_PostParserMPartFormData *const mf = &(p_data->e_d.m_form); /**< the current "form-data" parsing details */ size_t i; - mhd_assert (NULL != mf->bound.data); - mhd_assert (NULL == memchr (mf->bound.data, '\r', mf->bound.size)); - mhd_assert (NULL == memchr (mf->bound.data, '\n', mf->bound.size)); + mhd_assert (NULL != mf->delim.data); + mhd_assert (4 < mf->delim.size); + mhd_assert (0 == memcmp (mf->delim.data, "\r\n--", 4)); + mhd_assert (NULL == memchr (mf->delim.data + 4, '\r', mf->delim.size - 4)); + mhd_assert (NULL == memchr (mf->delim.data + 4, '\n', mf->delim.size - 4)); mhd_assert (MHD_HTTP_POST_ENCODING_MULTIPART_FORMDATA == \ c->rq.u_proc.post.enc); mhd_assert (MHD_POST_PARSE_RES_OK == p_data->parse_result); @@ -1528,14 +1542,12 @@ parse_post_mpart (struct MHD_Connection *restrict c, mhd_assert (mhd_POST_INVALID_POS == mf->line_start); mf->line_start = i; #ifndef MHD_FAVOR_SMALL_CODE - if (*pdata_size - i >= mf->bound.size + 2) - { - if (('-' == buf[i]) && - ('-' == buf[i + 1]) && - (0 == memcmp (buf + i + 2, mf->bound.data, mf->bound.size))) + if (*pdata_size - i >= mf->delim.size - 2) /* Exclude CRLF prefix for the first delimiter */ + { /* Exclude CRLF prefix for the first delimiter */ + if (0 == memcmp (buf + i, mf->delim.data + 2, mf->delim.size - 2)) { mf->st = mhd_POST_MPART_ST_FIRST_DELIM_FOUND; - i += 2 + mf->bound.size + 1; + i += mf->delim.size - 2; } else mf->st = mhd_POST_MPART_ST_BACK_TO_PREAMBL; @@ -1548,32 +1560,31 @@ parse_post_mpart (struct MHD_Connection *restrict c, case mhd_POST_MPART_ST_PREAMBL_CHECKING_FOR_DELIM: mhd_assert (mhd_POST_INVALID_POS == mf->delim_check_start); /* Ignored for first delimiter */ mhd_assert (i >= mf->line_start); - do /* Fast local loop */ + mhd_assert (*pdata_size >= mf->line_start); + mhd_assert (i < mf->line_start + (mf->delim.size - 2)); + if (*pdata_size - mf->line_start >= (mf->delim.size - 2)) { - mhd_assert (i - mf->line_start < mf->bound.size + 2); - if (i < mf->line_start + 2) + /* Enough data for the delimiter */ + if (0 == memcmp (buf + mf->line_start, + mf->delim.data + 2, + mf->delim.size - 2)) { - if ('-' != buf[i]) - { - mf->st = mhd_POST_MPART_ST_BACK_TO_PREAMBL; - break; - } - } - else if (i <= mf->line_start + mf->bound.size + 1) - { - if (mf->bound.data[i - (mf->line_start + 2)] != buf[i]) - { - mf->st = mhd_POST_MPART_ST_BACK_TO_PREAMBL; - break; - } - else if (i == mf->line_start + mf->bound.size + 1) - { - mf->st = mhd_POST_MPART_ST_FIRST_DELIM_FOUND; - ++i; - break; - } + mf->st = mhd_POST_MPART_ST_FIRST_DELIM_FOUND; + i = mf->line_start + (mf->delim.size - 2); } - } while (*pdata_size > ++i); + else + mf->st = mhd_POST_MPART_ST_BACK_TO_PREAMBL; + } + else + { + /* Not enough data for the delimiter */ + if (0 == memcmp (buf + mf->line_start, + mf->delim.data + 2, + *pdata_size - mf->line_start)) + i = *pdata_size; + else + mf->st = mhd_POST_MPART_ST_BACK_TO_PREAMBL; + } mhd_assert ((*pdata_size == i) || \ (mhd_POST_MPART_ST_FIRST_DELIM_FOUND == mf->st) || \ (mhd_POST_MPART_ST_BACK_TO_PREAMBL == mf->st)); @@ -1581,7 +1592,7 @@ parse_post_mpart (struct MHD_Connection *restrict c, case mhd_POST_MPART_ST_FIRST_DELIM_FOUND: mhd_assert (mhd_POST_INVALID_POS == mf->delim_check_start); /* Ignored for first delimiter */ mhd_assert (mhd_POST_INVALID_POS != mf->line_start); - mhd_assert (i >= mf->line_start + mf->bound.size + 2); + mhd_assert (i >= mf->line_start + mf->delim.size - 2); do /* Fast local loop */ { if ('\n' == buf[i]) @@ -1602,7 +1613,7 @@ parse_post_mpart (struct MHD_Connection *restrict c, mf->st = mhd_POST_MPART_ST_FORMAT_ERROR; break; } - else if ((i == mf->line_start + mf->bound.size + 3) && + else if ((i == mf->line_start + (mf->delim.size - 2) + 1) && ('-' == buf [i - 1]) && ('-' == buf [i])) { @@ -1666,13 +1677,11 @@ parse_post_mpart (struct MHD_Connection *restrict c, mf->st = mhd_POST_MPART_ST_FORMAT_ERROR; break; } - else if (mf->line_start + mf->bound.size + 1 == i) + else if (mf->line_start + (mf->delim.size - 2) == i + 1) { - if (('-' == buf[mf->line_start]) && - ('-' == buf[mf->line_start + 1]) && - (0 == memcmp (buf + mf->line_start + 2, - mf->bound.data, - mf->bound.size))) + if (0 == memcmp (buf + mf->line_start, + mf->delim.data + 2, + mf->delim.size - 2)) { /* The delimiter before the end of the header */ if (2 > mf->line_start) @@ -1989,14 +1998,12 @@ parse_post_mpart (struct MHD_Connection *restrict c, mhd_assert (mhd_POST_INVALID_POS != p_data->field_start); mf->line_start = i; #ifndef MHD_FAVOR_SMALL_CODE - if (*pdata_size - i >= mf->bound.size + 2) + if (*pdata_size - i >= mf->delim.size - 2) { - if (('-' == buf[i]) && - ('-' == buf[i + 1]) && - (0 == memcmp (buf + i + 2, mf->bound.data, mf->bound.size))) + if (0 == memcmp (buf + i, mf->delim.data + 2, mf->delim.size - 2)) { mf->st = mhd_POST_MPART_ST_DELIM_FOUND; - i += 2 + mf->bound.size; + i += mf->delim.size - 2; } else mf->st = mhd_POST_MPART_ST_BACK_TO_VALUE; @@ -2009,41 +2016,40 @@ parse_post_mpart (struct MHD_Connection *restrict c, case mhd_POST_MPART_ST_VALUE_CHECKING_FOR_DELIM: mhd_assert (mhd_POST_INVALID_POS != p_data->field_start); mhd_assert (i >= mf->line_start); - do /* Fast local loop */ + mhd_assert (*pdata_size >= mf->line_start); + mhd_assert (i < mf->line_start + (mf->delim.size - 2)); + if (*pdata_size - mf->line_start >= (mf->delim.size - 2)) { - mhd_assert (i - mf->line_start < mf->bound.size + 2); - if (i < mf->line_start + 2) - { - if ('-' != buf[i]) - { - mf->st = mhd_POST_MPART_ST_BACK_TO_VALUE; - break; - } - } - else if (i <= mf->line_start + mf->bound.size + 1) + /* Enough data for the delimiter */ + if (0 == memcmp (buf + mf->line_start, + mf->delim.data + 2, + mf->delim.size - 2)) { - if (mf->bound.data[i - (mf->line_start + 2)] != buf[i]) - { - mf->st = mhd_POST_MPART_ST_BACK_TO_VALUE; - break; - } - if (i == mf->line_start + mf->bound.size + 1) - { - mf->st = mhd_POST_MPART_ST_DELIM_FOUND; - ++i; - break; - } + mf->st = mhd_POST_MPART_ST_DELIM_FOUND; + i = mf->line_start + (mf->delim.size - 2); } - } while (*pdata_size > ++i); + else + mf->st = mhd_POST_MPART_ST_BACK_TO_VALUE; + } + else + { + /* Not enough data for the delimiter */ + if (0 == memcmp (buf + mf->line_start, + mf->delim.data + 2, + *pdata_size - mf->line_start)) + i = *pdata_size; + else + mf->st = mhd_POST_MPART_ST_BACK_TO_VALUE; + } mhd_assert ((*pdata_size == i) || \ - (mhd_POST_MPART_ST_BACK_TO_VALUE == mf->st) || \ - (mhd_POST_MPART_ST_DELIM_FOUND == mf->st)); + (mhd_POST_MPART_ST_DELIM_FOUND == mf->st) || \ + (mhd_POST_MPART_ST_BACK_TO_VALUE == mf->st)); continue; case mhd_POST_MPART_ST_DELIM_FOUND: mhd_assert (mhd_POST_INVALID_POS != mf->delim_check_start); mhd_assert (mhd_POST_INVALID_POS != mf->line_start); mhd_assert (mhd_POST_INVALID_POS != p_data->field_start); - mhd_assert (i >= mf->line_start + mf->bound.size + 2); + mhd_assert (i >= mf->line_start + mf->delim.size - 2); do /* Fast local loop */ { if ('\n' == buf[i]) @@ -2061,7 +2067,7 @@ parse_post_mpart (struct MHD_Connection *restrict c, mf->st = mhd_POST_MPART_ST_FORMAT_ERROR; break; } - else if ((i == mf->line_start + mf->bound.size + 3) && + else if ((i == mf->line_start + (mf->delim.size - 2) + 1) && ('-' == buf [i - 1]) && ('-' == buf [i])) {