gnunet-svn
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[libmicrohttpd2] 15/21: POST parser: improved parsing performance by sto


From: Admin
Subject: [libmicrohttpd2] 15/21: POST parser: improved parsing performance by storing complete delimiter instead of boundary
Date: Fri, 13 Jun 2025 23:38:23 +0200

This is an automated email from the git hooks/post-receive script.

karlson2k pushed a commit to branch master
in repository libmicrohttpd2.

commit c5a7c98ea78305569d8fa261ec973cfe7d3e917c
Author: Evgeny Grin (Karlson2k) <k2k@drgrin.dev>
AuthorDate: Fri Jun 13 19:48:58 2025 +0200

    POST parser: improved parsing performance by storing complete delimiter 
instead of boundary
---
 src/mhd2/mhd_post_parser.h   |   6 +-
 src/mhd2/post_parser_funcs.c | 214 ++++++++++++++++++++++---------------------
 2 files changed, 113 insertions(+), 107 deletions(-)

diff --git a/src/mhd2/mhd_post_parser.h b/src/mhd2/mhd_post_parser.h
index 3e70cd3..378464c 100644
--- a/src/mhd2/mhd_post_parser.h
+++ b/src/mhd2/mhd_post_parser.h
@@ -410,10 +410,10 @@ struct mhd_PostParserMPartFormData
   size_t delim_check_start;
 
   /**
-   * The boundary marker.
-   * Allocated in the stream's memory pool
+   * Multi-part delimited.
+   * Consists of CRLF + "--" + boundary marker.
    */
-  struct mhd_BufferConst bound;
+  struct mhd_BufferConst delim;
 };
 
 
diff --git a/src/mhd2/post_parser_funcs.c b/src/mhd2/post_parser_funcs.c
index 97317fb..9be8860 100644
--- a/src/mhd2/post_parser_funcs.c
+++ b/src/mhd2/post_parser_funcs.c
@@ -96,6 +96,7 @@ process_mpart_header (struct MHD_Connection *restrict c,
   struct mhd_BufferConst mpart_bound;
   bool mpart_bound_quoted;
   enum mhd_StingStartsWithTokenResult res;
+  char *buf;
 
   mhd_assert (NULL != h_cnt_tp->cstr);
 
@@ -140,39 +141,49 @@ process_mpart_header (struct MHD_Connection *restrict c,
 
   mhd_assert (NULL != mpart_bound.data);
 
+  buf = (char *)
+        mhd_stream_alloc_memory (c,
+                                 mpart_bound.size + 4);
+  if (NULL == buf)
+  {
+    /* It is very low probability that pool would not have memory just
+     * to held the small boundary string. While it could be possible
+     * to allocate memory from "large buffer", it would over-complicate
+     * code here and at freeing part. */
+    mhd_LOG_MSG (c->daemon, MHD_SC_REQ_POST_PARSE_FAILED_NO_POOL_MEM, \
+                 "The request POST data cannot be parsed because " \
+                 "there is not enough pool memory.");
+    c->rq.u_proc.post.parse_result = MHD_POST_PARSE_RES_FAILED_NO_POOL_MEM;
+    return mhd_MPART_DET_ERROR_SET;
+  }
+
+  c->rq.u_proc.post.enc = MHD_HTTP_POST_ENCODING_MULTIPART_FORMDATA;
+
+  buf[0] = '\r';
+  buf[1] = '\n';
+  buf[2] = '-';
+  buf[3] = '-';
+
   if (! mpart_bound_quoted)
   {
-    c->rq.u_proc.post.enc = MHD_HTTP_POST_ENCODING_MULTIPART_FORMDATA;
-    c->rq.u_proc.post.e_d.m_form.bound = mpart_bound;
+    memcpy (buf + 4,
+            mpart_bound.data,
+            mpart_bound.size);
+    c->rq.u_proc.post.e_d.m_form.delim.data = buf;
+    c->rq.u_proc.post.e_d.m_form.delim.size = mpart_bound.size + 4;
   }
   else
   {
-    char *buf;
-
+    size_t unq_size;
     mhd_assert (2 <= mpart_bound.size); /* At least one char and at least one 
'\' */
 
-    buf = (char *)
-          mhd_stream_alloc_memory (c,
-                                   mpart_bound.size);
-    if (NULL == buf)
-    {
-      /* It is very low probability that pool would not have memory just
-       * to held the small boundary string. While it could be possible
-       * to allocate memory from "large buffer", it would over-complicate
-       * code here and at freeing part. */
-      mhd_LOG_MSG (c->daemon, MHD_SC_REQ_POST_PARSE_FAILED_NO_POOL_MEM, \
-                   "The request POST data cannot be parsed because " \
-                   "there is not enough pool memory.");
-      c->rq.u_proc.post.parse_result = MHD_POST_PARSE_RES_FAILED_NO_POOL_MEM;
-      return mhd_MPART_DET_ERROR_SET;
-    }
-    c->rq.u_proc.post.enc = MHD_HTTP_POST_ENCODING_MULTIPART_FORMDATA;
-    c->rq.u_proc.post.e_d.m_form.bound.size =
-      mhd_str_unquote (mpart_bound.data,
-                       mpart_bound.size,
-                       buf);
-    mhd_assert (0 != c->rq.u_proc.post.e_d.m_form.bound.size);
+    unq_size = mhd_str_unquote (mpart_bound.data,
+                                mpart_bound.size,
+                                buf + 4);
+    c->rq.u_proc.post.e_d.m_form.delim.data = buf;
+    c->rq.u_proc.post.e_d.m_form.delim.size = unq_size + 4;
   }
+  mhd_assert (4 < c->rq.u_proc.post.e_d.m_form.delim.size);
   return mhd_MPART_DET_OK;
 }
 
@@ -333,12 +344,13 @@ reset_parse_field_data_mpart_init (struct 
mhd_PostParserData *pdata)
   pdata->e_d.m_form.st = mhd_POST_MPART_ST_NOT_STARTED;
   pdata->e_d.m_form.line_start = mhd_POST_INVALID_POS;
   pdata->e_d.m_form.delim_check_start = mhd_POST_INVALID_POS;
-  mhd_assert (NULL != pdata->e_d.m_form.bound.data);
-  mhd_assert (0 != pdata->e_d.m_form.bound.size);
-  mhd_assert (NULL == memchr (pdata->e_d.m_form.bound.data, '\r', \
-                              pdata->e_d.m_form.bound.size));
-  mhd_assert (NULL == memchr (pdata->e_d.m_form.bound.data, '\n', \
-                              pdata->e_d.m_form.bound.size));
+  mhd_assert (NULL != pdata->e_d.m_form.delim.data);
+  mhd_assert (4 < pdata->e_d.m_form.delim.size);
+  mhd_assert (0 == memcmp (pdata->e_d.m_form.delim.data, "\r\n--", 4));
+  mhd_assert (NULL == memchr (pdata->e_d.m_form.delim.data + 4, '\r', \
+                              pdata->e_d.m_form.delim.size - 4));
+  mhd_assert (NULL == memchr (pdata->e_d.m_form.delim.data + 4, '\n', \
+                              pdata->e_d.m_form.delim.size - 4));
   pdata->field_start = 0;
 }
 
@@ -445,7 +457,7 @@ mhd_stream_prepare_for_post_parse (struct MHD_Connection 
*restrict c)
               c->rq.u_proc.post.enc);
   mhd_assert ((MHD_HTTP_POST_ENCODING_MULTIPART_FORMDATA != \
                c->rq.u_proc.post.enc) || \
-              (0 != c->rq.u_proc.post.e_d.m_form.bound.size));
+              (4 < c->rq.u_proc.post.e_d.m_form.delim.size));
 
   init_post_parse_data (c);
 
@@ -1430,9 +1442,11 @@ parse_post_mpart (struct MHD_Connection *restrict c,
   struct mhd_PostParserMPartFormData *const mf = &(p_data->e_d.m_form); /**< 
the current "form-data" parsing details */
   size_t i;
 
-  mhd_assert (NULL != mf->bound.data);
-  mhd_assert (NULL == memchr (mf->bound.data, '\r', mf->bound.size));
-  mhd_assert (NULL == memchr (mf->bound.data, '\n', mf->bound.size));
+  mhd_assert (NULL != mf->delim.data);
+  mhd_assert (4 < mf->delim.size);
+  mhd_assert (0 == memcmp (mf->delim.data, "\r\n--", 4));
+  mhd_assert (NULL == memchr (mf->delim.data + 4, '\r', mf->delim.size - 4));
+  mhd_assert (NULL == memchr (mf->delim.data + 4, '\n', mf->delim.size - 4));
   mhd_assert (MHD_HTTP_POST_ENCODING_MULTIPART_FORMDATA == \
               c->rq.u_proc.post.enc);
   mhd_assert (MHD_POST_PARSE_RES_OK == p_data->parse_result);
@@ -1528,14 +1542,12 @@ parse_post_mpart (struct MHD_Connection *restrict c,
       mhd_assert (mhd_POST_INVALID_POS == mf->line_start);
       mf->line_start = i;
 #ifndef MHD_FAVOR_SMALL_CODE
-      if (*pdata_size - i >= mf->bound.size + 2)
-      {
-        if (('-' == buf[i]) &&
-            ('-' == buf[i + 1]) &&
-            (0 == memcmp (buf + i + 2, mf->bound.data, mf->bound.size)))
+      if (*pdata_size - i >= mf->delim.size - 2) /* Exclude CRLF prefix for 
the first delimiter */
+      { /* Exclude CRLF prefix for the first delimiter */
+        if (0 == memcmp (buf + i, mf->delim.data + 2, mf->delim.size - 2))
         {
           mf->st = mhd_POST_MPART_ST_FIRST_DELIM_FOUND;
-          i += 2 + mf->bound.size + 1;
+          i += mf->delim.size - 2;
         }
         else
           mf->st = mhd_POST_MPART_ST_BACK_TO_PREAMBL;
@@ -1548,32 +1560,31 @@ parse_post_mpart (struct MHD_Connection *restrict c,
     case mhd_POST_MPART_ST_PREAMBL_CHECKING_FOR_DELIM:
       mhd_assert (mhd_POST_INVALID_POS == mf->delim_check_start); /* Ignored 
for first delimiter */
       mhd_assert (i >= mf->line_start);
-      do /* Fast local loop */
+      mhd_assert (*pdata_size >= mf->line_start);
+      mhd_assert (i < mf->line_start + (mf->delim.size - 2));
+      if (*pdata_size - mf->line_start >= (mf->delim.size - 2))
       {
-        mhd_assert (i - mf->line_start < mf->bound.size + 2);
-        if (i < mf->line_start + 2)
+        /* Enough data for the delimiter */
+        if (0 == memcmp (buf + mf->line_start,
+                         mf->delim.data + 2,
+                         mf->delim.size - 2))
         {
-          if ('-' != buf[i])
-          {
-            mf->st = mhd_POST_MPART_ST_BACK_TO_PREAMBL;
-            break;
-          }
-        }
-        else if (i <= mf->line_start + mf->bound.size + 1)
-        {
-          if (mf->bound.data[i - (mf->line_start + 2)] != buf[i])
-          {
-            mf->st = mhd_POST_MPART_ST_BACK_TO_PREAMBL;
-            break;
-          }
-          else if (i == mf->line_start + mf->bound.size + 1)
-          {
-            mf->st = mhd_POST_MPART_ST_FIRST_DELIM_FOUND;
-            ++i;
-            break;
-          }
+          mf->st = mhd_POST_MPART_ST_FIRST_DELIM_FOUND;
+          i = mf->line_start + (mf->delim.size - 2);
         }
-      } while (*pdata_size > ++i);
+        else
+          mf->st = mhd_POST_MPART_ST_BACK_TO_PREAMBL;
+      }
+      else
+      {
+        /* Not enough data for the delimiter */
+        if (0 == memcmp (buf + mf->line_start,
+                         mf->delim.data + 2,
+                         *pdata_size - mf->line_start))
+          i = *pdata_size;
+        else
+          mf->st = mhd_POST_MPART_ST_BACK_TO_PREAMBL;
+      }
       mhd_assert ((*pdata_size == i) || \
                   (mhd_POST_MPART_ST_FIRST_DELIM_FOUND == mf->st) || \
                   (mhd_POST_MPART_ST_BACK_TO_PREAMBL == mf->st));
@@ -1581,7 +1592,7 @@ parse_post_mpart (struct MHD_Connection *restrict c,
     case mhd_POST_MPART_ST_FIRST_DELIM_FOUND:
       mhd_assert (mhd_POST_INVALID_POS == mf->delim_check_start); /* Ignored 
for first delimiter */
       mhd_assert (mhd_POST_INVALID_POS != mf->line_start);
-      mhd_assert (i >= mf->line_start + mf->bound.size + 2);
+      mhd_assert (i >= mf->line_start + mf->delim.size - 2);
       do /* Fast local loop */
       {
         if ('\n' == buf[i])
@@ -1602,7 +1613,7 @@ parse_post_mpart (struct MHD_Connection *restrict c,
           mf->st = mhd_POST_MPART_ST_FORMAT_ERROR;
           break;
         }
-        else if ((i == mf->line_start + mf->bound.size + 3) &&
+        else if ((i == mf->line_start + (mf->delim.size - 2) + 1) &&
                  ('-' == buf [i - 1]) &&
                  ('-' == buf [i]))
         {
@@ -1666,13 +1677,11 @@ parse_post_mpart (struct MHD_Connection *restrict c,
             mf->st = mhd_POST_MPART_ST_FORMAT_ERROR;
           break;
         }
-        else if (mf->line_start + mf->bound.size + 1 == i)
+        else if (mf->line_start + (mf->delim.size - 2) == i + 1)
         {
-          if (('-' == buf[mf->line_start]) &&
-              ('-' == buf[mf->line_start + 1]) &&
-              (0 == memcmp (buf + mf->line_start + 2,
-                            mf->bound.data,
-                            mf->bound.size)))
+          if (0 == memcmp (buf + mf->line_start,
+                           mf->delim.data + 2,
+                           mf->delim.size - 2))
           {
             /* The delimiter before the end of the header */
             if (2 > mf->line_start)
@@ -1989,14 +1998,12 @@ parse_post_mpart (struct MHD_Connection *restrict c,
       mhd_assert (mhd_POST_INVALID_POS != p_data->field_start);
       mf->line_start = i;
 #ifndef MHD_FAVOR_SMALL_CODE
-      if (*pdata_size - i >= mf->bound.size + 2)
+      if (*pdata_size - i >= mf->delim.size - 2)
       {
-        if (('-' == buf[i]) &&
-            ('-' == buf[i + 1]) &&
-            (0 == memcmp (buf + i + 2, mf->bound.data, mf->bound.size)))
+        if (0 == memcmp (buf + i, mf->delim.data + 2, mf->delim.size - 2))
         {
           mf->st = mhd_POST_MPART_ST_DELIM_FOUND;
-          i += 2 + mf->bound.size;
+          i += mf->delim.size - 2;
         }
         else
           mf->st = mhd_POST_MPART_ST_BACK_TO_VALUE;
@@ -2009,41 +2016,40 @@ parse_post_mpart (struct MHD_Connection *restrict c,
     case mhd_POST_MPART_ST_VALUE_CHECKING_FOR_DELIM:
       mhd_assert (mhd_POST_INVALID_POS != p_data->field_start);
       mhd_assert (i >= mf->line_start);
-      do /* Fast local loop */
+      mhd_assert (*pdata_size >= mf->line_start);
+      mhd_assert (i < mf->line_start + (mf->delim.size - 2));
+      if (*pdata_size - mf->line_start >= (mf->delim.size - 2))
       {
-        mhd_assert (i - mf->line_start < mf->bound.size + 2);
-        if (i < mf->line_start + 2)
-        {
-          if ('-' != buf[i])
-          {
-            mf->st = mhd_POST_MPART_ST_BACK_TO_VALUE;
-            break;
-          }
-        }
-        else if (i <= mf->line_start + mf->bound.size + 1)
+        /* Enough data for the delimiter */
+        if (0 == memcmp (buf + mf->line_start,
+                         mf->delim.data + 2,
+                         mf->delim.size - 2))
         {
-          if (mf->bound.data[i - (mf->line_start + 2)] != buf[i])
-          {
-            mf->st = mhd_POST_MPART_ST_BACK_TO_VALUE;
-            break;
-          }
-          if (i == mf->line_start + mf->bound.size + 1)
-          {
-            mf->st = mhd_POST_MPART_ST_DELIM_FOUND;
-            ++i;
-            break;
-          }
+          mf->st = mhd_POST_MPART_ST_DELIM_FOUND;
+          i = mf->line_start + (mf->delim.size - 2);
         }
-      } while (*pdata_size > ++i);
+        else
+          mf->st = mhd_POST_MPART_ST_BACK_TO_VALUE;
+      }
+      else
+      {
+        /* Not enough data for the delimiter */
+        if (0 == memcmp (buf + mf->line_start,
+                         mf->delim.data + 2,
+                         *pdata_size - mf->line_start))
+          i = *pdata_size;
+        else
+          mf->st = mhd_POST_MPART_ST_BACK_TO_VALUE;
+      }
       mhd_assert ((*pdata_size == i) || \
-                  (mhd_POST_MPART_ST_BACK_TO_VALUE == mf->st) || \
-                  (mhd_POST_MPART_ST_DELIM_FOUND == mf->st));
+                  (mhd_POST_MPART_ST_DELIM_FOUND == mf->st) || \
+                  (mhd_POST_MPART_ST_BACK_TO_VALUE == mf->st));
       continue;
     case mhd_POST_MPART_ST_DELIM_FOUND:
       mhd_assert (mhd_POST_INVALID_POS != mf->delim_check_start);
       mhd_assert (mhd_POST_INVALID_POS != mf->line_start);
       mhd_assert (mhd_POST_INVALID_POS != p_data->field_start);
-      mhd_assert (i >= mf->line_start + mf->bound.size + 2);
+      mhd_assert (i >= mf->line_start + mf->delim.size - 2);
       do /* Fast local loop */
       {
         if ('\n' == buf[i])
@@ -2061,7 +2067,7 @@ parse_post_mpart (struct MHD_Connection *restrict c,
           mf->st = mhd_POST_MPART_ST_FORMAT_ERROR;
           break;
         }
-        else if ((i == mf->line_start + mf->bound.size + 3) &&
+        else if ((i == mf->line_start + (mf->delim.size - 2) + 1) &&
                  ('-' == buf [i - 1]) &&
                  ('-' == buf [i]))
         {

-- 
To stop receiving notification emails like this one, please contact
gnunet@gnunet.org.



reply via email to

[Prev in Thread] Current Thread [Next in Thread]