grep-commit
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

grep branch, master, updated. v2.18-17-gd9f7791


From: Paul Eggert
Subject: grep branch, master, updated. v2.18-17-gd9f7791
Date: Mon, 03 Mar 2014 07:03:45 +0000

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "grep".

The branch, master has been updated
       via  d9f7791cf94cfe7ac8227298aa780c4462cfc312 (commit)
       via  41c233f4fa77a1aec09639b656f6a29d6e6f325f (commit)
       via  0ffd7d26dc8f7627d2b4bd745ede7049a09b0dd9 (commit)
      from  248f8490f1cf4e3e4f4e1b79a4c3fd61c23251c3 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.savannah.gnu.org/cgit/grep.git/commit/?id=d9f7791cf94cfe7ac8227298aa780c4462cfc312


commit d9f7791cf94cfe7ac8227298aa780c4462cfc312
Author: Paul Eggert <address@hidden>
Date:   Sun Mar 2 23:02:22 2014 -0800

    grep: fix some unlikely bugs in trivial_case_ignore
    
    * src/main.c (MBRTOWC, WCRTOMB): Reformat as per usual GNU style.
    (trivial_case_ignore): Don't overrun buffer in the unusual case
    when a character has both lowercase and uppercase counterparts.
    Don't rely on undefined behavior when assigning out-of-range value
    to an 'int'.  Simplify by avoiding unnecessary buffer copies.
    Work even with shift encodings, by using mbsinit to
    disable the optimization if we are not in the initial state
    when we replace B by [BCD].

diff --git a/src/main.c b/src/main.c
index 2ee585a..14b7be2 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1867,19 +1867,20 @@ parse_grep_colors (void)
       return;
 }
 
+#define MBRTOWC(pwc, s, n, ps) \
+  (MB_CUR_MAX == 1 \
+   ? (*(pwc) = btowc (*(unsigned char *) (s)), 1) \
+   : mbrtowc (pwc, s, n, ps))
+#define WCRTOMB(s, wc, ps) \
+  (MB_CUR_MAX == 1 \
+   ? (*(s) = wctob ((wint_t) (wc)), 1) \
+   : wcrtomb (s, wc, ps))
+
 /* If the newline-separated regular expressions, KEYS (with length, LEN
    and no trailing NUL byte), are amenable to transformation into
    otherwise equivalent case-ignoring ones, perform the transformation,
    put the result into malloc'd memory, *NEW_KEYS with length *NEW_LEN,
    and return true.  Otherwise, return false.  */
-#define MBRTOWC(pwc, s, n, ps) \
-  (MB_CUR_MAX == 1 ? \
-   (*(pwc) = btowc (*(unsigned char *) (s)), 1) : \
-   mbrtowc ((pwc), (s), (n), (ps)))
-#define WCRTOMB(s, wc, ps) \
-  (MB_CUR_MAX == 1 ? \
-   (*(s) = wctob ((wint_t) (wc)), 1) : \
-   wcrtomb ((s), (wc), (ps)))
 
 static bool
 trivial_case_ignore (size_t len, char const *keys,
@@ -1890,21 +1891,23 @@ trivial_case_ignore (size_t len, char const *keys,
   if (memchr (keys, '\\', len) || memchr (keys, '[', len))
     return false;
 
-  /* Worst case is that each byte B of KEYS is ASCII alphabetic and each
-     other_case(B) character, C, occupies MB_CUR_MAX bytes, so each B
-     maps to [BC], which requires MB_CUR_MAX + 3 bytes.   */
-  *new_keys = xnmalloc (MB_CUR_MAX + 3, len + 1);
+  /* Worst case is that each byte B of KEYS is ASCII alphabetic and
+     the two two other_case(B) characters, C and D, each occupies
+     MB_CUR_MAX bytes, so each B maps to [BCD], which requires 2 *
+     MB_CUR_MAX + 3 bytes; this is bounded above by the constant
+     expression 2 * MB_LEN_MAX + 3.  */
+  *new_keys = xnmalloc (len + 1, 2 * MB_LEN_MAX + 3);
   char *p = *new_keys;
 
-  mbstate_t mb_state;
-  memset (&mb_state, 0, sizeof mb_state);
+  mbstate_t mb_state = { 0 };
   while (len)
     {
+      bool initial_state = mbsinit (&mb_state) != 0;
       wchar_t wc;
-      int n = MBRTOWC (&wc, keys, len, &mb_state);
+      size_t n = MBRTOWC (&wc, keys, len, &mb_state);
 
       /* For an invalid, incomplete or L'\0', skip this optimization.  */
-      if (n <= 0)
+      if ((size_t) -2 <= n)
         {
         skip_case_ignore_optimization:
           free (*new_keys);
@@ -1915,39 +1918,30 @@ trivial_case_ignore (size_t len, char const *keys,
       keys += n;
       len -= n;
 
-      if (!iswalpha (wc))
+      wint_t lc = towlower (wc);
+      wint_t uc = towupper (wc);
+      if (lc == wc && uc == wc)
         {
           memcpy (p, orig, n);
           p += n;
         }
+      else if (! initial_state)
+        goto skip_case_ignore_optimization;
       else
         {
           *p++ = '[';
           memcpy (p, orig, n);
           p += n;
 
-          wint_t folded = towlower (wc);
-          if (folded != wc)
-            {
-              char buf[MB_CUR_MAX];
-              int n2 = WCRTOMB (buf, folded, &mb_state);
-              if (n2 <= 0)
-                goto skip_case_ignore_optimization;
-              assert (n2 <= MB_CUR_MAX);
-              memcpy (p, buf, n2);
-              p += n2;
-            }
-          folded = towupper (wc);
-          if (folded != wc)
-            {
-              char buf[MB_CUR_MAX];
-              int n2 = WCRTOMB (buf, folded, &mb_state);
-              if (n2 <= 0)
-                goto skip_case_ignore_optimization;
-              assert (n2 <= MB_CUR_MAX);
-              memcpy (p, buf, n2);
-              p += n2;
-            }
+          size_t lcbytes = WCRTOMB (p, lc, &mb_state);
+          if (lcbytes == (size_t) -1)
+            goto skip_case_ignore_optimization;
+          p += lcbytes;
+
+          size_t ucbytes = WCRTOMB (p, uc, &mb_state);
+          if (ucbytes == (size_t) -1 || ! mbsinit (&mb_state))
+            goto skip_case_ignore_optimization;
+          p += ucbytes;
 
           *p++ = ']';
         }

http://git.savannah.gnu.org/cgit/grep.git/commit/?id=41c233f4fa77a1aec09639b656f6a29d6e6f325f


commit d9f7791cf94cfe7ac8227298aa780c4462cfc312
Author: Paul Eggert <address@hidden>
Date:   Sun Mar 2 23:02:22 2014 -0800

    grep: fix some unlikely bugs in trivial_case_ignore
    
    * src/main.c (MBRTOWC, WCRTOMB): Reformat as per usual GNU style.
    (trivial_case_ignore): Don't overrun buffer in the unusual case
    when a character has both lowercase and uppercase counterparts.
    Don't rely on undefined behavior when assigning out-of-range value
    to an 'int'.  Simplify by avoiding unnecessary buffer copies.
    Work even with shift encodings, by using mbsinit to
    disable the optimization if we are not in the initial state
    when we replace B by [BCD].

diff --git a/src/main.c b/src/main.c
index 2ee585a..14b7be2 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1867,19 +1867,20 @@ parse_grep_colors (void)
       return;
 }
 
+#define MBRTOWC(pwc, s, n, ps) \
+  (MB_CUR_MAX == 1 \
+   ? (*(pwc) = btowc (*(unsigned char *) (s)), 1) \
+   : mbrtowc (pwc, s, n, ps))
+#define WCRTOMB(s, wc, ps) \
+  (MB_CUR_MAX == 1 \
+   ? (*(s) = wctob ((wint_t) (wc)), 1) \
+   : wcrtomb (s, wc, ps))
+
 /* If the newline-separated regular expressions, KEYS (with length, LEN
    and no trailing NUL byte), are amenable to transformation into
    otherwise equivalent case-ignoring ones, perform the transformation,
    put the result into malloc'd memory, *NEW_KEYS with length *NEW_LEN,
    and return true.  Otherwise, return false.  */
-#define MBRTOWC(pwc, s, n, ps) \
-  (MB_CUR_MAX == 1 ? \
-   (*(pwc) = btowc (*(unsigned char *) (s)), 1) : \
-   mbrtowc ((pwc), (s), (n), (ps)))
-#define WCRTOMB(s, wc, ps) \
-  (MB_CUR_MAX == 1 ? \
-   (*(s) = wctob ((wint_t) (wc)), 1) : \
-   wcrtomb ((s), (wc), (ps)))
 
 static bool
 trivial_case_ignore (size_t len, char const *keys,
@@ -1890,21 +1891,23 @@ trivial_case_ignore (size_t len, char const *keys,
   if (memchr (keys, '\\', len) || memchr (keys, '[', len))
     return false;
 
-  /* Worst case is that each byte B of KEYS is ASCII alphabetic and each
-     other_case(B) character, C, occupies MB_CUR_MAX bytes, so each B
-     maps to [BC], which requires MB_CUR_MAX + 3 bytes.   */
-  *new_keys = xnmalloc (MB_CUR_MAX + 3, len + 1);
+  /* Worst case is that each byte B of KEYS is ASCII alphabetic and
+     the two two other_case(B) characters, C and D, each occupies
+     MB_CUR_MAX bytes, so each B maps to [BCD], which requires 2 *
+     MB_CUR_MAX + 3 bytes; this is bounded above by the constant
+     expression 2 * MB_LEN_MAX + 3.  */
+  *new_keys = xnmalloc (len + 1, 2 * MB_LEN_MAX + 3);
   char *p = *new_keys;
 
-  mbstate_t mb_state;
-  memset (&mb_state, 0, sizeof mb_state);
+  mbstate_t mb_state = { 0 };
   while (len)
     {
+      bool initial_state = mbsinit (&mb_state) != 0;
       wchar_t wc;
-      int n = MBRTOWC (&wc, keys, len, &mb_state);
+      size_t n = MBRTOWC (&wc, keys, len, &mb_state);
 
       /* For an invalid, incomplete or L'\0', skip this optimization.  */
-      if (n <= 0)
+      if ((size_t) -2 <= n)
         {
         skip_case_ignore_optimization:
           free (*new_keys);
@@ -1915,39 +1918,30 @@ trivial_case_ignore (size_t len, char const *keys,
       keys += n;
       len -= n;
 
-      if (!iswalpha (wc))
+      wint_t lc = towlower (wc);
+      wint_t uc = towupper (wc);
+      if (lc == wc && uc == wc)
         {
           memcpy (p, orig, n);
           p += n;
         }
+      else if (! initial_state)
+        goto skip_case_ignore_optimization;
       else
         {
           *p++ = '[';
           memcpy (p, orig, n);
           p += n;
 
-          wint_t folded = towlower (wc);
-          if (folded != wc)
-            {
-              char buf[MB_CUR_MAX];
-              int n2 = WCRTOMB (buf, folded, &mb_state);
-              if (n2 <= 0)
-                goto skip_case_ignore_optimization;
-              assert (n2 <= MB_CUR_MAX);
-              memcpy (p, buf, n2);
-              p += n2;
-            }
-          folded = towupper (wc);
-          if (folded != wc)
-            {
-              char buf[MB_CUR_MAX];
-              int n2 = WCRTOMB (buf, folded, &mb_state);
-              if (n2 <= 0)
-                goto skip_case_ignore_optimization;
-              assert (n2 <= MB_CUR_MAX);
-              memcpy (p, buf, n2);
-              p += n2;
-            }
+          size_t lcbytes = WCRTOMB (p, lc, &mb_state);
+          if (lcbytes == (size_t) -1)
+            goto skip_case_ignore_optimization;
+          p += lcbytes;
+
+          size_t ucbytes = WCRTOMB (p, uc, &mb_state);
+          if (ucbytes == (size_t) -1 || ! mbsinit (&mb_state))
+            goto skip_case_ignore_optimization;
+          p += ucbytes;
 
           *p++ = ']';
         }

http://git.savannah.gnu.org/cgit/grep.git/commit/?id=0ffd7d26dc8f7627d2b4bd745ede7049a09b0dd9


commit d9f7791cf94cfe7ac8227298aa780c4462cfc312
Author: Paul Eggert <address@hidden>
Date:   Sun Mar 2 23:02:22 2014 -0800

    grep: fix some unlikely bugs in trivial_case_ignore
    
    * src/main.c (MBRTOWC, WCRTOMB): Reformat as per usual GNU style.
    (trivial_case_ignore): Don't overrun buffer in the unusual case
    when a character has both lowercase and uppercase counterparts.
    Don't rely on undefined behavior when assigning out-of-range value
    to an 'int'.  Simplify by avoiding unnecessary buffer copies.
    Work even with shift encodings, by using mbsinit to
    disable the optimization if we are not in the initial state
    when we replace B by [BCD].

diff --git a/src/main.c b/src/main.c
index 2ee585a..14b7be2 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1867,19 +1867,20 @@ parse_grep_colors (void)
       return;
 }
 
+#define MBRTOWC(pwc, s, n, ps) \
+  (MB_CUR_MAX == 1 \
+   ? (*(pwc) = btowc (*(unsigned char *) (s)), 1) \
+   : mbrtowc (pwc, s, n, ps))
+#define WCRTOMB(s, wc, ps) \
+  (MB_CUR_MAX == 1 \
+   ? (*(s) = wctob ((wint_t) (wc)), 1) \
+   : wcrtomb (s, wc, ps))
+
 /* If the newline-separated regular expressions, KEYS (with length, LEN
    and no trailing NUL byte), are amenable to transformation into
    otherwise equivalent case-ignoring ones, perform the transformation,
    put the result into malloc'd memory, *NEW_KEYS with length *NEW_LEN,
    and return true.  Otherwise, return false.  */
-#define MBRTOWC(pwc, s, n, ps) \
-  (MB_CUR_MAX == 1 ? \
-   (*(pwc) = btowc (*(unsigned char *) (s)), 1) : \
-   mbrtowc ((pwc), (s), (n), (ps)))
-#define WCRTOMB(s, wc, ps) \
-  (MB_CUR_MAX == 1 ? \
-   (*(s) = wctob ((wint_t) (wc)), 1) : \
-   wcrtomb ((s), (wc), (ps)))
 
 static bool
 trivial_case_ignore (size_t len, char const *keys,
@@ -1890,21 +1891,23 @@ trivial_case_ignore (size_t len, char const *keys,
   if (memchr (keys, '\\', len) || memchr (keys, '[', len))
     return false;
 
-  /* Worst case is that each byte B of KEYS is ASCII alphabetic and each
-     other_case(B) character, C, occupies MB_CUR_MAX bytes, so each B
-     maps to [BC], which requires MB_CUR_MAX + 3 bytes.   */
-  *new_keys = xnmalloc (MB_CUR_MAX + 3, len + 1);
+  /* Worst case is that each byte B of KEYS is ASCII alphabetic and
+     the two two other_case(B) characters, C and D, each occupies
+     MB_CUR_MAX bytes, so each B maps to [BCD], which requires 2 *
+     MB_CUR_MAX + 3 bytes; this is bounded above by the constant
+     expression 2 * MB_LEN_MAX + 3.  */
+  *new_keys = xnmalloc (len + 1, 2 * MB_LEN_MAX + 3);
   char *p = *new_keys;
 
-  mbstate_t mb_state;
-  memset (&mb_state, 0, sizeof mb_state);
+  mbstate_t mb_state = { 0 };
   while (len)
     {
+      bool initial_state = mbsinit (&mb_state) != 0;
       wchar_t wc;
-      int n = MBRTOWC (&wc, keys, len, &mb_state);
+      size_t n = MBRTOWC (&wc, keys, len, &mb_state);
 
       /* For an invalid, incomplete or L'\0', skip this optimization.  */
-      if (n <= 0)
+      if ((size_t) -2 <= n)
         {
         skip_case_ignore_optimization:
           free (*new_keys);
@@ -1915,39 +1918,30 @@ trivial_case_ignore (size_t len, char const *keys,
       keys += n;
       len -= n;
 
-      if (!iswalpha (wc))
+      wint_t lc = towlower (wc);
+      wint_t uc = towupper (wc);
+      if (lc == wc && uc == wc)
         {
           memcpy (p, orig, n);
           p += n;
         }
+      else if (! initial_state)
+        goto skip_case_ignore_optimization;
       else
         {
           *p++ = '[';
           memcpy (p, orig, n);
           p += n;
 
-          wint_t folded = towlower (wc);
-          if (folded != wc)
-            {
-              char buf[MB_CUR_MAX];
-              int n2 = WCRTOMB (buf, folded, &mb_state);
-              if (n2 <= 0)
-                goto skip_case_ignore_optimization;
-              assert (n2 <= MB_CUR_MAX);
-              memcpy (p, buf, n2);
-              p += n2;
-            }
-          folded = towupper (wc);
-          if (folded != wc)
-            {
-              char buf[MB_CUR_MAX];
-              int n2 = WCRTOMB (buf, folded, &mb_state);
-              if (n2 <= 0)
-                goto skip_case_ignore_optimization;
-              assert (n2 <= MB_CUR_MAX);
-              memcpy (p, buf, n2);
-              p += n2;
-            }
+          size_t lcbytes = WCRTOMB (p, lc, &mb_state);
+          if (lcbytes == (size_t) -1)
+            goto skip_case_ignore_optimization;
+          p += lcbytes;
+
+          size_t ucbytes = WCRTOMB (p, uc, &mb_state);
+          if (ucbytes == (size_t) -1 || ! mbsinit (&mb_state))
+            goto skip_case_ignore_optimization;
+          p += ucbytes;
 
           *p++ = ']';
         }

-----------------------------------------------------------------------

Summary of changes:
 src/dfa.c  |    8 ++--
 src/main.c |  114 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 118 insertions(+), 4 deletions(-)


hooks/post-receive
-- 
grep



reply via email to

[Prev in Thread] Current Thread [Next in Thread]