grep-commit
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

grep branch, master, updated. v2.18-91-g14892aa


From: Paul Eggert
Subject: grep branch, master, updated. v2.18-91-g14892aa
Date: Wed, 23 Apr 2014 06:55:27 +0000

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "grep".

The branch, master has been updated
       via  14892aa6e0c21f49e5ec6d203253074ef15fedb0 (commit)
       via  73893ffbada36599fb6ec2eb489b6a7decf0c248 (commit)
      from  c7ea5aea911b950b2398454ca89cce23cabd3a40 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.savannah.gnu.org/cgit/grep.git/commit/?id=14892aa6e0c21f49e5ec6d203253074ef15fedb0


commit 14892aa6e0c21f49e5ec6d203253074ef15fedb0
Author: Paul Eggert <address@hidden>
Date:   Tue Apr 22 23:34:22 2014 -0700

    kwset: simplify and speed up Boyer-Moore unibyte -i in some cases
    
    This improves the performance of, for example,
    yes jjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjj | head -10000000 | grep -i jk
    in a unibyte locale.
    * src/kwset.c (memchr_trans): New function.
    (bmexec): Use it.  Simplify the code and remove some of the
    confusing gotos and breaks and labels.  Do not treat glibc memchr
    as a special case; if non-glibc memchr is slow, that is lower
    priority and I suppose we can try to work around the problem in
    gnulib.

diff --git a/src/kwset.c b/src/kwset.c
index 78fb0b2..f86ee03 100644
--- a/src/kwset.c
+++ b/src/kwset.c
@@ -524,6 +524,20 @@ bm_delta2_search (char const **tpp, char const *ep, char 
const *sp, int len,
   return false;
 }
 
+/* Return the address of the first byte in the buffer S that equals C.
+   S contains N bytes.  If TRANS is nonnull, use it to transliterate
+   S's bytes before comparing them.  */
+static char const *
+memchr_trans (char const *s, char c, size_t n, char const *trans)
+{
+  if (! trans)
+    return memchr (s, c, n);
+  char const *slim = s + n;
+  for (; s < slim; s++)
+    if (trans[U(*s)] == c)
+      return s;
+  return NULL;
+}
 
 /* Fast boyer-moore search. */
 static size_t _GL_ATTRIBUTE_PURE
@@ -541,18 +555,8 @@ bmexec (kwset_t kwset, char const *text, size_t size)
     return -1;
   if (len == 1)
     {
-      if (trans)
-        {
-          for (tp = text; tp < text + size; tp++)
-            if (trans[U(*tp)] == kwset->target[0])
-              return tp - text;
-          return -1;
-        }
-      else
-        {
-          tp = memchr (text, kwset->target[0], size);
-          return tp ? tp - text : -1;
-        }
+      tp = memchr_trans (text, kwset->target[0], size, trans);
+      return tp ? tp - text : -1;
     }
 
   d1 = kwset->delta;
@@ -564,48 +568,33 @@ bmexec (kwset_t kwset, char const *text, size_t size)
   /* Significance of 12: 1 (initial offset) + 10 (skip loop) + 1 (md2). */
   if (size > 12 * len)
     /* 11 is not a bug, the initial offset happens only once. */
-    for (ep = text + size - 11 * len;;)
+    for (ep = text + size - 11 * len; tp <= ep; )
       {
-        while (tp <= ep)
+        d = d1[U(tp[-1])], tp += d;
+        d = d1[U(tp[-1])], tp += d;
+        if (d != 0)
           {
             d = d1[U(tp[-1])], tp += d;
             d = d1[U(tp[-1])], tp += d;
-            if (d == 0)
-              goto found;
-            d = d1[U(tp[-1])], tp += d;
-            d = d1[U(tp[-1])], tp += d;
-            d = d1[U(tp[-1])], tp += d;
-            if (d == 0)
-              goto found;
-            d = d1[U(tp[-1])], tp += d;
             d = d1[U(tp[-1])], tp += d;
-            d = d1[U(tp[-1])], tp += d;
-            if (d == 0)
-              goto found;
-            /* memchar() of glibc is faster than seeking by delta1 on
-               some platforms.  When there is no chance to match for a
-               while, use it on them.  */
-#if defined(__GLIBC__) && (defined(__i386__) || defined(__x86_64__))
-            if (!trans)
-              {
-                tp = memchr (tp - 1, gc1, size + text - tp + 1);
-                if (tp)
-                  {
-                    ++tp;
-                    goto found;
-                  }
-                else
-                  return -1;
-              }
-            else
-#endif
+            if (d != 0)
               {
                 d = d1[U(tp[-1])], tp += d;
                 d = d1[U(tp[-1])], tp += d;
+                d = d1[U(tp[-1])], tp += d;
+                if (d != 0)
+                  {
+                    /* Typically memchr is faster than seeking by
+                       delta1 when there is no chance to match for
+                       a while.  */
+                    tp--;
+                    tp = memchr_trans (tp, gc1, text + size - tp, trans);
+                    if (! tp)
+                      return -1;
+                    tp++;
+                  }
               }
           }
-        break;
-      found:
         if (bm_delta2_search (&tp, ep, sp, len, trans, gc1, gc2, d1, kwset))
           return tp - text;
       }

http://git.savannah.gnu.org/cgit/grep.git/commit/?id=73893ffbada36599fb6ec2eb489b6a7decf0c248


commit 14892aa6e0c21f49e5ec6d203253074ef15fedb0
Author: Paul Eggert <address@hidden>
Date:   Tue Apr 22 23:34:22 2014 -0700

    kwset: simplify and speed up Boyer-Moore unibyte -i in some cases
    
    This improves the performance of, for example,
    yes jjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjj | head -10000000 | grep -i jk
    in a unibyte locale.
    * src/kwset.c (memchr_trans): New function.
    (bmexec): Use it.  Simplify the code and remove some of the
    confusing gotos and breaks and labels.  Do not treat glibc memchr
    as a special case; if non-glibc memchr is slow, that is lower
    priority and I suppose we can try to work around the problem in
    gnulib.

diff --git a/src/kwset.c b/src/kwset.c
index 78fb0b2..f86ee03 100644
--- a/src/kwset.c
+++ b/src/kwset.c
@@ -524,6 +524,20 @@ bm_delta2_search (char const **tpp, char const *ep, char 
const *sp, int len,
   return false;
 }
 
+/* Return the address of the first byte in the buffer S that equals C.
+   S contains N bytes.  If TRANS is nonnull, use it to transliterate
+   S's bytes before comparing them.  */
+static char const *
+memchr_trans (char const *s, char c, size_t n, char const *trans)
+{
+  if (! trans)
+    return memchr (s, c, n);
+  char const *slim = s + n;
+  for (; s < slim; s++)
+    if (trans[U(*s)] == c)
+      return s;
+  return NULL;
+}
 
 /* Fast boyer-moore search. */
 static size_t _GL_ATTRIBUTE_PURE
@@ -541,18 +555,8 @@ bmexec (kwset_t kwset, char const *text, size_t size)
     return -1;
   if (len == 1)
     {
-      if (trans)
-        {
-          for (tp = text; tp < text + size; tp++)
-            if (trans[U(*tp)] == kwset->target[0])
-              return tp - text;
-          return -1;
-        }
-      else
-        {
-          tp = memchr (text, kwset->target[0], size);
-          return tp ? tp - text : -1;
-        }
+      tp = memchr_trans (text, kwset->target[0], size, trans);
+      return tp ? tp - text : -1;
     }
 
   d1 = kwset->delta;
@@ -564,48 +568,33 @@ bmexec (kwset_t kwset, char const *text, size_t size)
   /* Significance of 12: 1 (initial offset) + 10 (skip loop) + 1 (md2). */
   if (size > 12 * len)
     /* 11 is not a bug, the initial offset happens only once. */
-    for (ep = text + size - 11 * len;;)
+    for (ep = text + size - 11 * len; tp <= ep; )
       {
-        while (tp <= ep)
+        d = d1[U(tp[-1])], tp += d;
+        d = d1[U(tp[-1])], tp += d;
+        if (d != 0)
           {
             d = d1[U(tp[-1])], tp += d;
             d = d1[U(tp[-1])], tp += d;
-            if (d == 0)
-              goto found;
-            d = d1[U(tp[-1])], tp += d;
-            d = d1[U(tp[-1])], tp += d;
-            d = d1[U(tp[-1])], tp += d;
-            if (d == 0)
-              goto found;
-            d = d1[U(tp[-1])], tp += d;
             d = d1[U(tp[-1])], tp += d;
-            d = d1[U(tp[-1])], tp += d;
-            if (d == 0)
-              goto found;
-            /* memchar() of glibc is faster than seeking by delta1 on
-               some platforms.  When there is no chance to match for a
-               while, use it on them.  */
-#if defined(__GLIBC__) && (defined(__i386__) || defined(__x86_64__))
-            if (!trans)
-              {
-                tp = memchr (tp - 1, gc1, size + text - tp + 1);
-                if (tp)
-                  {
-                    ++tp;
-                    goto found;
-                  }
-                else
-                  return -1;
-              }
-            else
-#endif
+            if (d != 0)
               {
                 d = d1[U(tp[-1])], tp += d;
                 d = d1[U(tp[-1])], tp += d;
+                d = d1[U(tp[-1])], tp += d;
+                if (d != 0)
+                  {
+                    /* Typically memchr is faster than seeking by
+                       delta1 when there is no chance to match for
+                       a while.  */
+                    tp--;
+                    tp = memchr_trans (tp, gc1, text + size - tp, trans);
+                    if (! tp)
+                      return -1;
+                    tp++;
+                  }
               }
           }
-        break;
-      found:
         if (bm_delta2_search (&tp, ep, sp, len, trans, gc1, gc2, d1, kwset))
           return tp - text;
       }

-----------------------------------------------------------------------

Summary of changes:
 src/kwset.c |   66 +++++++++++++++++++++++++++++++++-------------------------
 1 files changed, 37 insertions(+), 29 deletions(-)


hooks/post-receive
-- 
grep



reply via email to

[Prev in Thread] Current Thread [Next in Thread]