bug-grep
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

EGexecute: don't assume buffer ends in a newline


From: Bruno Haible
Subject: EGexecute: don't assume buffer ends in a newline
Date: Mon, 24 May 2010 02:59:15 +0200
User-agent: KMail/1.9.9

Hi,

There was some fix in
<http://git.savannah.gnu.org/gitweb/?p=grep.git;a=commitdiff;h=246dcbdb86292eb65d20ece05ecab823653e8411>
to make Fexecute and EGexecute not assume that the buffer ends in a newline.
As I understand, the 'grep' program does not need this, because it always
ensures the buffer ends in a newline, but the librarified version may need
it.

This patch is incomplete: It still uses 'end - 1' even when end[-1] is not a
newline.

I noticed this because in GNU gettext, I've now disabled the dfa and kwset
optimization, and the regular expression "error" is now reported as _not_
matching the buffer "Unknown system error" (but the regular expression
"erro" is!). The reason is that 'end' gets positioned to the end of that
string, and the code uses 'end - 1' in an attempt to eliminate the newline
that is not present.

This fixes it for me, and causes no regressions in grep's testsuite.

For your review. I don't have write access to the 'grep' repository, but
have a copyright assignment for 'grep' in place.


2010-05-23  Bruno Haible  <address@hidden>

        EGexecute: Don't assume the buffer ends in a newline.
        * src/dfasearch.c (EGexecute): Don't ignore the last byte of the buffer
        if it is not a newline.

diff --git a/src/dfasearch.c b/src/dfasearch.c
index 6178b33..558f036 100644
--- a/src/dfasearch.c
+++ b/src/dfasearch.c
@@ -241,9 +241,7 @@ EGexecute (char const *buf, size_t size, size_t *match_size,
               beg += offset;
               /* Narrow down to the line containing the candidate, and
                  run it through DFA. */
-              if ((end = memchr(beg, eol, buflim - beg)) != NULL)
-                end++;
-              else
+              if ((end = memchr (beg, eol, buflim - beg)) == NULL)
                 end = buflim;
               match = beg;
               while (beg > buf && beg[-1] != eol)
@@ -259,7 +257,8 @@ EGexecute (char const *buf, size_t size, size_t *match_size,
 #endif
                     goto success;
                 }
-              if (dfaexec (dfa, beg, (char *) end, 0, NULL, &backref) == NULL)
+              if (dfaexec (dfa, beg, (char *) end + (end < buflim), 0, NULL,
+                           &backref) == NULL)
                 continue;
             }
           else
@@ -271,9 +270,7 @@ EGexecute (char const *buf, size_t size, size_t *match_size,
                 break;
               /* Narrow down to the line we've found. */
               beg = next_beg;
-              if ((end = memchr(beg, eol, buflim - beg)) != NULL)
-                end++;
-              else
+              if ((end = memchr (beg, eol, buflim - beg)) == NULL)
                 end = buflim;
               while (beg > buf && beg[-1] != eol)
                 --beg;
@@ -288,18 +285,21 @@ EGexecute (char const *buf, size_t size, size_t 
*match_size,
              We will go through the outer loop only once.  */
           beg = start_ptr;
           end = buflim;
+          if (end > beg && end[-1] == eol)
+            --end;
         }
+      /* Here, either end < buflim && *end == eol, or end == buflim.  */
 
       /* If we've made it to this point, this means DFA has seen
          a probable match, and we need to run it through Regex. */
-      best_match = end;
+      best_match = end + 1;
       best_len = 0;
       for (i = 0; i < pcount; i++)
         {
           patterns[i].regexbuf.not_eol = 0;
           if (0 <= (start = re_search (&(patterns[i].regexbuf),
-                                       buf, end - buf - 1,
-                                       beg - buf, end - beg - 1,
+                                       buf, end - buf,
+                                       beg - buf, end - beg,
                                        &(patterns[i].regs))))
             {
               len = patterns[i].regs.end[0] - start;
@@ -309,7 +309,7 @@ EGexecute (char const *buf, size_t size, size_t *match_size,
               if (start_ptr && !match_words)
                 goto assess_pattern_match;
               if ((!match_lines && !match_words)
-                  || (match_lines && len == end - beg - 1))
+                  || (match_lines && len == end - beg))
                 {
                   match = beg;
                   len = end - beg;
@@ -326,7 +326,7 @@ EGexecute (char const *buf, size_t size, size_t *match_size,
                 while (match <= best_match)
                   {
                     if ((match == buf || !WCHAR ((unsigned char) match[-1]))
-                        && (len == end - beg - 1
+                        && (len == end - beg
                             || !WCHAR ((unsigned char) match[len])))
                       goto assess_pattern_match;
                     if (len > 0)
@@ -341,13 +341,13 @@ EGexecute (char const *buf, size_t size, size_t 
*match_size,
                     if (len <= 0)
                       {
                         /* Try looking further on. */
-                        if (match == end - 1)
+                        if (match == end)
                           break;
                         match++;
                         patterns[i].regexbuf.not_eol = 0;
                         start = re_search (&(patterns[i].regexbuf),
-                                           buf, end - buf - 1,
-                                           match - buf, end - match - 1,
+                                           buf, end - buf,
+                                           match - buf, end - match,
                                            &(patterns[i].regs));
                         if (start < 0)
                           break;
@@ -371,14 +371,16 @@ EGexecute (char const *buf, size_t size, size_t 
*match_size,
                 }
             } /* if re_search >= 0 */
         } /* for Regex patterns.  */
-        if (best_match < end)
-          {
-            /* We have found an exact match.  We were just
-               waiting for the best one (leftmost then longest).  */
-            beg = best_match;
-            len = best_len;
-            goto success_in_len;
-          }
+      if (best_match <= end)
+        {
+          /* We have found an exact match.  We were just
+             waiting for the best one (leftmost then longest).  */
+          beg = best_match;
+          len = best_len;
+          goto success_in_len;
+        }
+      if (end < buflim)
+        end++; /* skip past newline */
     } /* for (beg = end ..) */
 
  failure:
@@ -386,6 +388,8 @@ EGexecute (char const *buf, size_t size, size_t *match_size,
   goto out;
 
  success:
+  if (end < buflim)
+    end++; /* include newline */
   len = end - beg;
  success_in_len:
   *match_size = len;



reply via email to

[Prev in Thread] Current Thread [Next in Thread]