grep-commit
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

grep branch, master, updated. v2.18-53-ge9cf5ae


From: Paul Eggert
Subject: grep branch, master, updated. v2.18-53-ge9cf5ae
Date: Tue, 08 Apr 2014 03:29:18 +0000

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "grep".

The branch, master has been updated
       via  e9cf5ae927a462ac0b224668b39e0a778e2661c5 (commit)
      from  bf34bedae7276c0f5522ad40db55a7c0d23c739d (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.savannah.gnu.org/cgit/grep.git/commit/?id=e9cf5ae927a462ac0b224668b39e0a778e2661c5


commit e9cf5ae927a462ac0b224668b39e0a778e2661c5
Author: Paul Eggert <address@hidden>
Date:   Mon Apr 7 20:28:26 2014 -0700

    grep: remove trival_case_ignore
    
    This optimization is no longer needed, given the other
    optimizations recently installed.  Derived from a patch by
    Norihiro Tanaka; see <http://bugs.gnu.org/17019>.
    * bootstrap.conf (gnulib_modules): Remove assert-h.
    * src/dfa.c (CASE_FOLDED_BUFSIZE): Move here from dfa.h.
    Remove now-unnecessary static assert.
    (case_folded_counterparts): Now static.
    * src/dfa.h (CASE_FOLDED_BUFSIZE, case_folded_counterparts):
    Remove decls; no longer public.
    * src/dfasearch.c (kwsmusts): Use kwset even if fill MB_CUR_MAX > 1
    and case-insensitive.
    * src/grep.c (MBRTOWC, WCRTOMB): Remove.
    (fgrep_to_grep_pattern): Use mbrtowc, not MBRTOWC.
    (trivial_case_ignore): Remove; this optimization is no longer needed.
    All uses removed.

diff --git a/bootstrap.conf b/bootstrap.conf
index 86cd81d..367427d 100644
--- a/bootstrap.conf
+++ b/bootstrap.conf
@@ -24,7 +24,6 @@ gnulib_modules='
 alloca
 announce-gen
 argmatch
-assert-h
 binary-io
 btowc
 c-ctype
diff --git a/src/dfa.c b/src/dfa.c
index b6c1250..d8744d7 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -933,14 +933,17 @@ static short const lonesome_lower[] =
     0x03F5, 0x1E9B, 0x1FBE,
   };
 
-static_assert ((sizeof lonesome_lower / sizeof *lonesome_lower + 2
-                == CASE_FOLDED_BUFSIZE),
-               "CASE_FOLDED_BUFSIZE is wrong");
+/* Maximum number of characters that can be the case-folded
+   counterparts of a single character, not counting the character
+   itself.  This is 1 for towupper, 1 for towlower, and 1 for each
+   entry in LONESOME_LOWER.  */
+enum
+{ CASE_FOLDED_BUFSIZE = 2 + sizeof lonesome_lower / sizeof *lonesome_lower };
 
 /* Find the characters equal to C after case-folding, other than C
    itself, and store them into FOLDED.  Return the number of characters
    stored.  */
-int
+static int
 case_folded_counterparts (wchar_t c, wchar_t folded[CASE_FOLDED_BUFSIZE])
 {
   int i;
diff --git a/src/dfa.h b/src/dfa.h
index 6ed2231..db29a62 100644
--- a/src/dfa.h
+++ b/src/dfa.h
@@ -112,11 +112,3 @@ extern void dfawarn (const char *);
 extern _Noreturn void dfaerror (const char *);
 
 extern int using_utf8 (void);
-
-/* Maximum number of characters that can be the case-folded
-   counterparts of a single character, not counting the character
-   itself.  This is 1 for towupper, 1 for towlower, and 1 for each
-   entry in LONESOME_LOWER; see dfa.c.  */
-enum { CASE_FOLDED_BUFSIZE = 1 + 1 + 19 };
-
-extern int case_folded_counterparts (wchar_t, wchar_t[CASE_FOLDED_BUFSIZE]);
diff --git a/src/dfasearch.c b/src/dfasearch.c
index 44360b6..2ae0a4a 100644
--- a/src/dfasearch.c
+++ b/src/dfasearch.c
@@ -81,12 +81,6 @@ dfawarn (char const *mesg)
 static void
 kwsmusts (void)
 {
-  /* With case-insensitive matching in a multi-byte locale, do not
-     use kwsearch, because in that case, it would be too expensive,
-     requiring that we case-convert all searched input.  */
-  if (MB_CUR_MAX > 1 && match_icase)
-    return;
-
   struct dfamust const *dm = dfamusts (dfa);
   if (dm)
     {
diff --git a/src/grep.c b/src/grep.c
index 7033730..8bd6c49 100644
--- a/src/grep.c
+++ b/src/grep.c
@@ -1894,15 +1894,6 @@ parse_grep_colors (void)
       return;
 }
 
-#define MBRTOWC(pwc, s, n, ps) \
-  (MB_CUR_MAX == 1 \
-   ? (*(pwc) = btowc (*(unsigned char *) (s)), 1) \
-   : mbrtowc (pwc, s, n, ps))
-#define WCRTOMB(s, wc, ps) \
-  (MB_CUR_MAX == 1 \
-   ? (*(s) = wctob ((wint_t) (wc)), 1) \
-   : wcrtomb (s, wc, ps))
-
 /* Change a pattern for fgrep into grep.  */
 static void
 fgrep_to_grep_pattern (size_t len, char const *keys,
@@ -1915,7 +1906,7 @@ fgrep_to_grep_pattern (size_t len, char const *keys,
   for (; len; keys += n, len -= n)
     {
       wchar_t wc;
-      n = MBRTOWC (&wc, keys, len, &mb_state);
+      n = mbrtowc (&wc, keys, len, &mb_state);
       switch (n)
         {
         case (size_t) -2:
@@ -1942,86 +1933,6 @@ fgrep_to_grep_pattern (size_t len, char const *keys,
   *new_len = p - *new_keys;
 }
 
-/* If the newline-separated regular expressions, KEYS (with length, LEN
-   and no trailing NUL byte), are amenable to transformation into
-   otherwise equivalent case-ignoring ones, perform the transformation,
-   put the result into malloc'd memory, *NEW_KEYS with length *NEW_LEN,
-   and return true.  Otherwise, return false.  */
-
-static bool
-trivial_case_ignore (size_t len, char const *keys,
-                     size_t *new_len, char **new_keys)
-{
-  /* FIXME: consider removing the following restriction:
-     Reject if KEYS contain ASCII '\\' or '['.  */
-  if (memchr (keys, '\\', len) || memchr (keys, '[', len))
-    return false;
-
-  /* Worst case is that each byte B of KEYS is ASCII alphabetic and
-     CASE_FOLDED_BUFSIZE other_case(B) characters, C through Z, each
-     occupying MB_CUR_MAX bytes, so each B maps to [BC...Z], which
-     requires CASE_FOLDED_BUFSIZE * MB_CUR_MAX + 3 bytes; this is
-     bounded above by the constant expression CASE_FOLDED_BUFSIZE *
-     MB_LEN_MAX + 3.  */
-  *new_keys = xnmalloc (len + 1, CASE_FOLDED_BUFSIZE * MB_LEN_MAX + 3);
-  char *p = *new_keys;
-
-  mbstate_t mb_state = { 0 };
-  while (len)
-    {
-      bool initial_state = mbsinit (&mb_state) != 0;
-      wchar_t wc;
-      size_t n = MBRTOWC (&wc, keys, len, &mb_state);
-
-      /* For an invalid, incomplete or L'\0', skip this optimization.  */
-      if ((size_t) -2 <= n)
-        {
-        skip_case_ignore_optimization:
-          free (*new_keys);
-          return false;
-        }
-
-      char const *orig = keys;
-      keys += n;
-      len -= n;
-
-      wchar_t folded[CASE_FOLDED_BUFSIZE];
-      int nfolded = case_folded_counterparts (wc, folded);
-      if (nfolded <= 0)
-        {
-          memcpy (p, orig, n);
-          p += n;
-        }
-      else if (! initial_state)
-        goto skip_case_ignore_optimization;
-      else
-        {
-          *p++ = '[';
-          memcpy (p, orig, n);
-          p += n;
-
-          int i = 0;
-          do
-            {
-              size_t nbytes = WCRTOMB (p, folded[i], &mb_state);
-              if (nbytes == (size_t) -1)
-                goto skip_case_ignore_optimization;
-              p += nbytes;
-            }
-          while (++i < nfolded);
-
-          if (! mbsinit (&mb_state))
-            goto skip_case_ignore_optimization;
-
-          *p++ = ']';
-        }
-    }
-
-  *new_len = p - *new_keys;
-
-  return true;
-}
-
 int
 main (int argc, char **argv)
 {
@@ -2432,34 +2343,6 @@ main (int argc, char **argv)
       execute = EGexecute;
     }
 
-  /* Case-insensitive matching is expensive in multibyte locales
-     because a few characters may change size when converted to upper
-     or lower case.  To accommodate those, search the input one line
-     at a time, rather than using the much more efficient buffer search.
-
-     Try to convert a regular expression 'foo' (ignoring case) to an
-     equivalent regular expression '[fF][oO][oO]' (where case matters).
-     Not only does this avoid the expensive requirement to read and
-     process a line at a time, it also allows use of the kwset engine,
-     a win in non-UTF-8 multibyte locales.  */
-  if (match_icase)
-    {
-      size_t new_keycc;
-      char *new_keys;
-      /* It is not possible with -F, not useful with -P (pcre) and there is no
-         point when there is no regexp.  It also depends on which constructs
-         appear in the regexp.  See trivial_case_ignore for those details.  */
-      if (keycc
-          && ! (compile == Fcompile || compile == Pcompile)
-          && trivial_case_ignore (keycc, keys, &new_keycc, &new_keys))
-        {
-          match_icase = 0;
-          free (keys);
-          keys = new_keys;
-          keycc = new_keycc;
-        }
-    }
-
   if (MB_CUR_MAX > 1)
     build_mbclen_cache ();
 

-----------------------------------------------------------------------

Summary of changes:
 bootstrap.conf  |    1 -
 src/dfa.c       |   11 +++--
 src/dfa.h       |    8 ----
 src/dfasearch.c |    6 ---
 src/grep.c      |  119 +------------------------------------------------------
 5 files changed, 8 insertions(+), 137 deletions(-)


hooks/post-receive
-- 
grep



reply via email to

[Prev in Thread] Current Thread [Next in Thread]