grep-commit
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

grep branch, master, updated. v2.18-32-g92956f9


From: Paul Eggert
Subject: grep branch, master, updated. v2.18-32-g92956f9
Date: Mon, 24 Mar 2014 06:05:12 +0000

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "grep".

The branch, master has been updated
       via  92956f9eafd470076170c05953c45dcbb41e7bd8 (commit)
      from  bc0732da3a2c636b87a0173c2129162dbd806037 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.savannah.gnu.org/cgit/grep.git/commit/?id=92956f9eafd470076170c05953c45dcbb41e7bd8


commit 92956f9eafd470076170c05953c45dcbb41e7bd8
Author: Paul Eggert <address@hidden>
Date:   Sun Mar 23 23:04:26 2014 -0700

    dfa: avoid undefined behavior
    
    * src/dfa.c (FETCH_WC, addtok_wc): Don't rely on undefined behavior
    when converting an out-of-range value to 'int'.
    (FETCH_WC, prepare_wc_buf): Don't rely on conversion state after
    mbrtowc returns a special value, as it's undefined for (size_t) -1.
    (prepare_wc_buf): Simplify test for valid character.

diff --git a/src/dfa.c b/src/dfa.c
index 92ac1b9..0a2b8b8 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -807,7 +807,7 @@ static int minrep, maxrep;      /* Repeat counts for {m,n}. 
 */
 static int cur_mb_len = 1;      /* Length of the multibyte representation of
                                    wctok.  */
 /* These variables are used only if (MB_CUR_MAX > 1).  */
-static mbstate_t mbs;           /* Mbstate for mbrlen.  */
+static mbstate_t mbs;           /* mbstate for mbrtowc.  */
 static wchar_t wctok;           /* Wide character representation of the current
                                    multibyte character.  */
 static unsigned char *mblen_buf;/* Correspond to the input buffer in dfaexec.
@@ -844,15 +844,18 @@ static unsigned char const *buf_end;    /* reference to 
end in dfaexec.  */
     else                                       \
       {                                                \
         wchar_t _wc;                           \
-        cur_mb_len = mbrtowc (&_wc, lexptr, lexleft, &mbs); \
-        if (cur_mb_len <= 0)                   \
+        size_t nbytes = mbrtowc (&_wc, lexptr, lexleft, &mbs); \
+        bool valid_char = 1 <= nbytes && nbytes < (size_t) -2; \
+        if (! valid_char)                      \
           {                                    \
+            memset (&mbs, 0, sizeof mbs);      \
             cur_mb_len = 1;                    \
             --lexleft;                         \
             (wc) = (c) = to_uchar (*lexptr++);  \
           }                                    \
         else                                   \
           {                                    \
+            cur_mb_len = nbytes;               \
             lexptr += cur_mb_len;              \
             lexleft -= cur_mb_len;             \
             (wc) = _wc;                                \
@@ -1685,16 +1688,19 @@ static void
 addtok_wc (wint_t wc)
 {
   unsigned char buf[MB_LEN_MAX];
-  mbstate_t s;
+  mbstate_t s = { 0 };
   int i;
-  memset (&s, 0, sizeof s);
-  cur_mb_len = wcrtomb ((char *) buf, wc, &s);
+  size_t stored_bytes = wcrtomb ((char *) buf, wc, &s);
 
-  /* This is merely stop-gap.  When cur_mb_len is 0 or negative,
-     buf[0] is undefined, yet skipping the addtok_mb call altogether
-     can result in heap corruption.  */
-  if (cur_mb_len <= 0)
-    buf[0] = 0;
+  if (stored_bytes != (size_t) -1)
+    cur_mb_len = stored_bytes;
+  else
+    {
+      /* This is merely stop-gap.  buf[0] is undefined, yet skipping
+         the addtok_mb call altogether can corrupt the heap.  */
+      cur_mb_len = 1;
+      buf[0] = 0;
+    }
 
   addtok_mb (buf[0], cur_mb_len == 1 ? 3 : 1);
   for (i = 1; i < cur_mb_len; i++)
@@ -3328,13 +3334,13 @@ prepare_wc_buf (const char *begin, const char *end)
     {
       if (remain_bytes == 0)
         {
-          remain_bytes
+          size_t nbytes
             = mbrtowc (inputwcs + i, begin + i, end - begin - i + 1, &mbs);
-          if (remain_bytes < 1
-              || remain_bytes == (size_t) -1
-              || remain_bytes == (size_t) -2
-              || (remain_bytes == 1 && inputwcs[i] == (wchar_t) begin[i]))
+          if (! (1 <= nbytes && nbytes < (size_t) -2)
+              || (nbytes == 1 && inputwcs[i] == (wchar_t) begin[i]))
             {
+              if ((size_t) -2 <= nbytes)
+                memset (&mbs, 0, sizeof mbs);
               remain_bytes = 0;
               inputwcs[i] = (wchar_t) begin[i];
               mblen_buf[i] = 0;
@@ -3343,8 +3349,8 @@ prepare_wc_buf (const char *begin, const char *end)
             }
           else
             {
-              mblen_buf[i] = remain_bytes;
-              remain_bytes--;
+              mblen_buf[i] = nbytes;
+              remain_bytes = nbytes - 1;
             }
         }
       else

-----------------------------------------------------------------------

Summary of changes:
 src/dfa.c |   42 ++++++++++++++++++++++++------------------
 1 files changed, 24 insertions(+), 18 deletions(-)


hooks/post-receive
-- 
grep



reply via email to

[Prev in Thread] Current Thread [Next in Thread]