[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[libmicrohttpd2] 15/21: POST parser: improved parsing performance by sto
From: |
Admin |
Subject: |
[libmicrohttpd2] 15/21: POST parser: improved parsing performance by storing complete delimiter instead of boundary |
Date: |
Fri, 13 Jun 2025 23:38:23 +0200 |
This is an automated email from the git hooks/post-receive script.
karlson2k pushed a commit to branch master
in repository libmicrohttpd2.
commit c5a7c98ea78305569d8fa261ec973cfe7d3e917c
Author: Evgeny Grin (Karlson2k) <k2k@drgrin.dev>
AuthorDate: Fri Jun 13 19:48:58 2025 +0200
POST parser: improved parsing performance by storing complete delimiter
instead of boundary
---
src/mhd2/mhd_post_parser.h | 6 +-
src/mhd2/post_parser_funcs.c | 214 ++++++++++++++++++++++---------------------
2 files changed, 113 insertions(+), 107 deletions(-)
diff --git a/src/mhd2/mhd_post_parser.h b/src/mhd2/mhd_post_parser.h
index 3e70cd3..378464c 100644
--- a/src/mhd2/mhd_post_parser.h
+++ b/src/mhd2/mhd_post_parser.h
@@ -410,10 +410,10 @@ struct mhd_PostParserMPartFormData
size_t delim_check_start;
/**
- * The boundary marker.
- * Allocated in the stream's memory pool
+ * Multi-part delimited.
+ * Consists of CRLF + "--" + boundary marker.
*/
- struct mhd_BufferConst bound;
+ struct mhd_BufferConst delim;
};
diff --git a/src/mhd2/post_parser_funcs.c b/src/mhd2/post_parser_funcs.c
index 97317fb..9be8860 100644
--- a/src/mhd2/post_parser_funcs.c
+++ b/src/mhd2/post_parser_funcs.c
@@ -96,6 +96,7 @@ process_mpart_header (struct MHD_Connection *restrict c,
struct mhd_BufferConst mpart_bound;
bool mpart_bound_quoted;
enum mhd_StingStartsWithTokenResult res;
+ char *buf;
mhd_assert (NULL != h_cnt_tp->cstr);
@@ -140,39 +141,49 @@ process_mpart_header (struct MHD_Connection *restrict c,
mhd_assert (NULL != mpart_bound.data);
+ buf = (char *)
+ mhd_stream_alloc_memory (c,
+ mpart_bound.size + 4);
+ if (NULL == buf)
+ {
+ /* It is very low probability that pool would not have memory just
+ * to held the small boundary string. While it could be possible
+ * to allocate memory from "large buffer", it would over-complicate
+ * code here and at freeing part. */
+ mhd_LOG_MSG (c->daemon, MHD_SC_REQ_POST_PARSE_FAILED_NO_POOL_MEM, \
+ "The request POST data cannot be parsed because " \
+ "there is not enough pool memory.");
+ c->rq.u_proc.post.parse_result = MHD_POST_PARSE_RES_FAILED_NO_POOL_MEM;
+ return mhd_MPART_DET_ERROR_SET;
+ }
+
+ c->rq.u_proc.post.enc = MHD_HTTP_POST_ENCODING_MULTIPART_FORMDATA;
+
+ buf[0] = '\r';
+ buf[1] = '\n';
+ buf[2] = '-';
+ buf[3] = '-';
+
if (! mpart_bound_quoted)
{
- c->rq.u_proc.post.enc = MHD_HTTP_POST_ENCODING_MULTIPART_FORMDATA;
- c->rq.u_proc.post.e_d.m_form.bound = mpart_bound;
+ memcpy (buf + 4,
+ mpart_bound.data,
+ mpart_bound.size);
+ c->rq.u_proc.post.e_d.m_form.delim.data = buf;
+ c->rq.u_proc.post.e_d.m_form.delim.size = mpart_bound.size + 4;
}
else
{
- char *buf;
-
+ size_t unq_size;
mhd_assert (2 <= mpart_bound.size); /* At least one char and at least one
'\' */
- buf = (char *)
- mhd_stream_alloc_memory (c,
- mpart_bound.size);
- if (NULL == buf)
- {
- /* It is very low probability that pool would not have memory just
- * to held the small boundary string. While it could be possible
- * to allocate memory from "large buffer", it would over-complicate
- * code here and at freeing part. */
- mhd_LOG_MSG (c->daemon, MHD_SC_REQ_POST_PARSE_FAILED_NO_POOL_MEM, \
- "The request POST data cannot be parsed because " \
- "there is not enough pool memory.");
- c->rq.u_proc.post.parse_result = MHD_POST_PARSE_RES_FAILED_NO_POOL_MEM;
- return mhd_MPART_DET_ERROR_SET;
- }
- c->rq.u_proc.post.enc = MHD_HTTP_POST_ENCODING_MULTIPART_FORMDATA;
- c->rq.u_proc.post.e_d.m_form.bound.size =
- mhd_str_unquote (mpart_bound.data,
- mpart_bound.size,
- buf);
- mhd_assert (0 != c->rq.u_proc.post.e_d.m_form.bound.size);
+ unq_size = mhd_str_unquote (mpart_bound.data,
+ mpart_bound.size,
+ buf + 4);
+ c->rq.u_proc.post.e_d.m_form.delim.data = buf;
+ c->rq.u_proc.post.e_d.m_form.delim.size = unq_size + 4;
}
+ mhd_assert (4 < c->rq.u_proc.post.e_d.m_form.delim.size);
return mhd_MPART_DET_OK;
}
@@ -333,12 +344,13 @@ reset_parse_field_data_mpart_init (struct
mhd_PostParserData *pdata)
pdata->e_d.m_form.st = mhd_POST_MPART_ST_NOT_STARTED;
pdata->e_d.m_form.line_start = mhd_POST_INVALID_POS;
pdata->e_d.m_form.delim_check_start = mhd_POST_INVALID_POS;
- mhd_assert (NULL != pdata->e_d.m_form.bound.data);
- mhd_assert (0 != pdata->e_d.m_form.bound.size);
- mhd_assert (NULL == memchr (pdata->e_d.m_form.bound.data, '\r', \
- pdata->e_d.m_form.bound.size));
- mhd_assert (NULL == memchr (pdata->e_d.m_form.bound.data, '\n', \
- pdata->e_d.m_form.bound.size));
+ mhd_assert (NULL != pdata->e_d.m_form.delim.data);
+ mhd_assert (4 < pdata->e_d.m_form.delim.size);
+ mhd_assert (0 == memcmp (pdata->e_d.m_form.delim.data, "\r\n--", 4));
+ mhd_assert (NULL == memchr (pdata->e_d.m_form.delim.data + 4, '\r', \
+ pdata->e_d.m_form.delim.size - 4));
+ mhd_assert (NULL == memchr (pdata->e_d.m_form.delim.data + 4, '\n', \
+ pdata->e_d.m_form.delim.size - 4));
pdata->field_start = 0;
}
@@ -445,7 +457,7 @@ mhd_stream_prepare_for_post_parse (struct MHD_Connection
*restrict c)
c->rq.u_proc.post.enc);
mhd_assert ((MHD_HTTP_POST_ENCODING_MULTIPART_FORMDATA != \
c->rq.u_proc.post.enc) || \
- (0 != c->rq.u_proc.post.e_d.m_form.bound.size));
+ (4 < c->rq.u_proc.post.e_d.m_form.delim.size));
init_post_parse_data (c);
@@ -1430,9 +1442,11 @@ parse_post_mpart (struct MHD_Connection *restrict c,
struct mhd_PostParserMPartFormData *const mf = &(p_data->e_d.m_form); /**<
the current "form-data" parsing details */
size_t i;
- mhd_assert (NULL != mf->bound.data);
- mhd_assert (NULL == memchr (mf->bound.data, '\r', mf->bound.size));
- mhd_assert (NULL == memchr (mf->bound.data, '\n', mf->bound.size));
+ mhd_assert (NULL != mf->delim.data);
+ mhd_assert (4 < mf->delim.size);
+ mhd_assert (0 == memcmp (mf->delim.data, "\r\n--", 4));
+ mhd_assert (NULL == memchr (mf->delim.data + 4, '\r', mf->delim.size - 4));
+ mhd_assert (NULL == memchr (mf->delim.data + 4, '\n', mf->delim.size - 4));
mhd_assert (MHD_HTTP_POST_ENCODING_MULTIPART_FORMDATA == \
c->rq.u_proc.post.enc);
mhd_assert (MHD_POST_PARSE_RES_OK == p_data->parse_result);
@@ -1528,14 +1542,12 @@ parse_post_mpart (struct MHD_Connection *restrict c,
mhd_assert (mhd_POST_INVALID_POS == mf->line_start);
mf->line_start = i;
#ifndef MHD_FAVOR_SMALL_CODE
- if (*pdata_size - i >= mf->bound.size + 2)
- {
- if (('-' == buf[i]) &&
- ('-' == buf[i + 1]) &&
- (0 == memcmp (buf + i + 2, mf->bound.data, mf->bound.size)))
+ if (*pdata_size - i >= mf->delim.size - 2) /* Exclude CRLF prefix for
the first delimiter */
+ { /* Exclude CRLF prefix for the first delimiter */
+ if (0 == memcmp (buf + i, mf->delim.data + 2, mf->delim.size - 2))
{
mf->st = mhd_POST_MPART_ST_FIRST_DELIM_FOUND;
- i += 2 + mf->bound.size + 1;
+ i += mf->delim.size - 2;
}
else
mf->st = mhd_POST_MPART_ST_BACK_TO_PREAMBL;
@@ -1548,32 +1560,31 @@ parse_post_mpart (struct MHD_Connection *restrict c,
case mhd_POST_MPART_ST_PREAMBL_CHECKING_FOR_DELIM:
mhd_assert (mhd_POST_INVALID_POS == mf->delim_check_start); /* Ignored
for first delimiter */
mhd_assert (i >= mf->line_start);
- do /* Fast local loop */
+ mhd_assert (*pdata_size >= mf->line_start);
+ mhd_assert (i < mf->line_start + (mf->delim.size - 2));
+ if (*pdata_size - mf->line_start >= (mf->delim.size - 2))
{
- mhd_assert (i - mf->line_start < mf->bound.size + 2);
- if (i < mf->line_start + 2)
+ /* Enough data for the delimiter */
+ if (0 == memcmp (buf + mf->line_start,
+ mf->delim.data + 2,
+ mf->delim.size - 2))
{
- if ('-' != buf[i])
- {
- mf->st = mhd_POST_MPART_ST_BACK_TO_PREAMBL;
- break;
- }
- }
- else if (i <= mf->line_start + mf->bound.size + 1)
- {
- if (mf->bound.data[i - (mf->line_start + 2)] != buf[i])
- {
- mf->st = mhd_POST_MPART_ST_BACK_TO_PREAMBL;
- break;
- }
- else if (i == mf->line_start + mf->bound.size + 1)
- {
- mf->st = mhd_POST_MPART_ST_FIRST_DELIM_FOUND;
- ++i;
- break;
- }
+ mf->st = mhd_POST_MPART_ST_FIRST_DELIM_FOUND;
+ i = mf->line_start + (mf->delim.size - 2);
}
- } while (*pdata_size > ++i);
+ else
+ mf->st = mhd_POST_MPART_ST_BACK_TO_PREAMBL;
+ }
+ else
+ {
+ /* Not enough data for the delimiter */
+ if (0 == memcmp (buf + mf->line_start,
+ mf->delim.data + 2,
+ *pdata_size - mf->line_start))
+ i = *pdata_size;
+ else
+ mf->st = mhd_POST_MPART_ST_BACK_TO_PREAMBL;
+ }
mhd_assert ((*pdata_size == i) || \
(mhd_POST_MPART_ST_FIRST_DELIM_FOUND == mf->st) || \
(mhd_POST_MPART_ST_BACK_TO_PREAMBL == mf->st));
@@ -1581,7 +1592,7 @@ parse_post_mpart (struct MHD_Connection *restrict c,
case mhd_POST_MPART_ST_FIRST_DELIM_FOUND:
mhd_assert (mhd_POST_INVALID_POS == mf->delim_check_start); /* Ignored
for first delimiter */
mhd_assert (mhd_POST_INVALID_POS != mf->line_start);
- mhd_assert (i >= mf->line_start + mf->bound.size + 2);
+ mhd_assert (i >= mf->line_start + mf->delim.size - 2);
do /* Fast local loop */
{
if ('\n' == buf[i])
@@ -1602,7 +1613,7 @@ parse_post_mpart (struct MHD_Connection *restrict c,
mf->st = mhd_POST_MPART_ST_FORMAT_ERROR;
break;
}
- else if ((i == mf->line_start + mf->bound.size + 3) &&
+ else if ((i == mf->line_start + (mf->delim.size - 2) + 1) &&
('-' == buf [i - 1]) &&
('-' == buf [i]))
{
@@ -1666,13 +1677,11 @@ parse_post_mpart (struct MHD_Connection *restrict c,
mf->st = mhd_POST_MPART_ST_FORMAT_ERROR;
break;
}
- else if (mf->line_start + mf->bound.size + 1 == i)
+ else if (mf->line_start + (mf->delim.size - 2) == i + 1)
{
- if (('-' == buf[mf->line_start]) &&
- ('-' == buf[mf->line_start + 1]) &&
- (0 == memcmp (buf + mf->line_start + 2,
- mf->bound.data,
- mf->bound.size)))
+ if (0 == memcmp (buf + mf->line_start,
+ mf->delim.data + 2,
+ mf->delim.size - 2))
{
/* The delimiter before the end of the header */
if (2 > mf->line_start)
@@ -1989,14 +1998,12 @@ parse_post_mpart (struct MHD_Connection *restrict c,
mhd_assert (mhd_POST_INVALID_POS != p_data->field_start);
mf->line_start = i;
#ifndef MHD_FAVOR_SMALL_CODE
- if (*pdata_size - i >= mf->bound.size + 2)
+ if (*pdata_size - i >= mf->delim.size - 2)
{
- if (('-' == buf[i]) &&
- ('-' == buf[i + 1]) &&
- (0 == memcmp (buf + i + 2, mf->bound.data, mf->bound.size)))
+ if (0 == memcmp (buf + i, mf->delim.data + 2, mf->delim.size - 2))
{
mf->st = mhd_POST_MPART_ST_DELIM_FOUND;
- i += 2 + mf->bound.size;
+ i += mf->delim.size - 2;
}
else
mf->st = mhd_POST_MPART_ST_BACK_TO_VALUE;
@@ -2009,41 +2016,40 @@ parse_post_mpart (struct MHD_Connection *restrict c,
case mhd_POST_MPART_ST_VALUE_CHECKING_FOR_DELIM:
mhd_assert (mhd_POST_INVALID_POS != p_data->field_start);
mhd_assert (i >= mf->line_start);
- do /* Fast local loop */
+ mhd_assert (*pdata_size >= mf->line_start);
+ mhd_assert (i < mf->line_start + (mf->delim.size - 2));
+ if (*pdata_size - mf->line_start >= (mf->delim.size - 2))
{
- mhd_assert (i - mf->line_start < mf->bound.size + 2);
- if (i < mf->line_start + 2)
- {
- if ('-' != buf[i])
- {
- mf->st = mhd_POST_MPART_ST_BACK_TO_VALUE;
- break;
- }
- }
- else if (i <= mf->line_start + mf->bound.size + 1)
+ /* Enough data for the delimiter */
+ if (0 == memcmp (buf + mf->line_start,
+ mf->delim.data + 2,
+ mf->delim.size - 2))
{
- if (mf->bound.data[i - (mf->line_start + 2)] != buf[i])
- {
- mf->st = mhd_POST_MPART_ST_BACK_TO_VALUE;
- break;
- }
- if (i == mf->line_start + mf->bound.size + 1)
- {
- mf->st = mhd_POST_MPART_ST_DELIM_FOUND;
- ++i;
- break;
- }
+ mf->st = mhd_POST_MPART_ST_DELIM_FOUND;
+ i = mf->line_start + (mf->delim.size - 2);
}
- } while (*pdata_size > ++i);
+ else
+ mf->st = mhd_POST_MPART_ST_BACK_TO_VALUE;
+ }
+ else
+ {
+ /* Not enough data for the delimiter */
+ if (0 == memcmp (buf + mf->line_start,
+ mf->delim.data + 2,
+ *pdata_size - mf->line_start))
+ i = *pdata_size;
+ else
+ mf->st = mhd_POST_MPART_ST_BACK_TO_VALUE;
+ }
mhd_assert ((*pdata_size == i) || \
- (mhd_POST_MPART_ST_BACK_TO_VALUE == mf->st) || \
- (mhd_POST_MPART_ST_DELIM_FOUND == mf->st));
+ (mhd_POST_MPART_ST_DELIM_FOUND == mf->st) || \
+ (mhd_POST_MPART_ST_BACK_TO_VALUE == mf->st));
continue;
case mhd_POST_MPART_ST_DELIM_FOUND:
mhd_assert (mhd_POST_INVALID_POS != mf->delim_check_start);
mhd_assert (mhd_POST_INVALID_POS != mf->line_start);
mhd_assert (mhd_POST_INVALID_POS != p_data->field_start);
- mhd_assert (i >= mf->line_start + mf->bound.size + 2);
+ mhd_assert (i >= mf->line_start + mf->delim.size - 2);
do /* Fast local loop */
{
if ('\n' == buf[i])
@@ -2061,7 +2067,7 @@ parse_post_mpart (struct MHD_Connection *restrict c,
mf->st = mhd_POST_MPART_ST_FORMAT_ERROR;
break;
}
- else if ((i == mf->line_start + mf->bound.size + 3) &&
+ else if ((i == mf->line_start + (mf->delim.size - 2) + 1) &&
('-' == buf [i - 1]) &&
('-' == buf [i]))
{
--
To stop receiving notification emails like this one, please contact
gnunet@gnunet.org.
- [libmicrohttpd2] 11/21: mhd_str: optimised caseless comparisons and case transformations, (continued)
- [libmicrohttpd2] 11/21: mhd_str: optimised caseless comparisons and case transformations, Admin, 2025/06/13
- [libmicrohttpd2] 13/21: parse_http_std_method(): optimised, Admin, 2025/06/13
- [libmicrohttpd2] 21/21: perf_replies: added response sizes 8 MiB and 101 MiB, Admin, 2025/06/13
- [libmicrohttpd2] 02/21: bootstrap: make sure that pre-commit hook really used, Admin, 2025/06/13
- [libmicrohttpd2] 01/21: conn_data_send.c: fixed large sending, added some asserts, Admin, 2025/06/13
- [libmicrohttpd2] 04/21: xdigittovalue(): optimised., Admin, 2025/06/13
- [libmicrohttpd2] 18/21: Renamed test_postprocessor -> test_postparser to match API naming, Admin, 2025/06/13
- [libmicrohttpd2] 16/21: configure: minor check improvement, Admin, 2025/06/13
- [libmicrohttpd2] 12/21: mhd_locks: added W32 implementation based on SRW locks (and minor improvements), Admin, 2025/06/13
- [libmicrohttpd2] 10/21: configure: added release build linker flags, Admin, 2025/06/13
- [libmicrohttpd2] 15/21: POST parser: improved parsing performance by storing complete delimiter instead of boundary,
Admin <=
- [libmicrohttpd2] 14/21: POST parser: optimised large upload processing, Admin, 2025/06/13
- [libmicrohttpd2] 07/21: daemon_start: cosmetics, fixed code style, Admin, 2025/06/13
- [libmicrohttpd2] 06/21: Fixed compiler warnings, Admin, 2025/06/13
- [libmicrohttpd2] 08/21: mhd_str: added functions attributes, fixed doxy, removed extra checks in functions, Admin, 2025/06/13
- [libmicrohttpd2] 03/21: bootstrap: English fixes, Admin, 2025/06/13
- [libmicrohttpd2] 05/21: mhd_str.c: minor readability improvements, Admin, 2025/06/13
- [libmicrohttpd2] 20/21: perf_replies: fixed formatting, Admin, 2025/06/13
- [libmicrohttpd2] 19/21: conn_data_send.c: fixed formatting, Admin, 2025/06/13
- [libmicrohttpd2] 17/21: POST parser: accelerate by using memmem() for delimiters, Admin, 2025/06/13