bug-grep
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: The bug in the Grep Command


From: Jim Meyering
Subject: Re: The bug in the Grep Command
Date: Tue, 07 Aug 2012 21:26:15 +0200

Jim Meyering wrote:
> Eric Blake wrote:
>> On 08/06/2012 05:34 AM, Jim Meyering wrote:
> ...
>>> +  even though there was no match, and the command generated not output.
>>
>> s/not/no/
>>
>>> +              /* If there's no match, of if we've matched the sentinel,
>>
>> s/of/or/
>
> Thanks!

Here are a complete patch, a test suite addition and a gnulib update:

>From c96b0f2c8220f171f8fabd62aa1457ebff4d9277 Mon Sep 17 00:00:00 2001
From: Jim Meyering <address@hidden>
Date: Mon, 6 Aug 2012 13:29:51 +0200
Subject: [PATCH 1/3] grep -i '^$' in a multi-byte locale could report a false
 match

* src/dfasearch.c (EGexecute): Do not match the sentinel "newline"
that is appended to each buffer.
This bug may sound like a big deal (it certainly surprised me), but
realize that only the empty-line-matching regular expression '^$'
can trigger it, and then only when you add the unnecessary (and
arguably superfluous) -i, *and* run the command in a multi-byte
locale.  Using a multi-byte locale for such a regular expression
is also pointless, and hurts performance.
* NEWS (Bug fixes): Mention it.
Reported by Alexander Katassonov <address@hidden>
---
 NEWS            | 7 +++++++
 src/dfasearch.c | 4 +++-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/NEWS b/NEWS
index fdba25e..8f8d886 100644
--- a/NEWS
+++ b/NEWS
@@ -4,6 +4,13 @@ GNU grep NEWS                                    -*- outline 
-*-

 ** Bug fixes

+  grep -i '^$' could exit 0 (i.e., report a match) in a multi-byte locale,
+  even though there was no match, and the command generated no output.
+  E.g., seq 2 | LC_ALL=en_US.utf8 grep -il '^$' would mistakenly print
+  "(standard input)".  Related, seq 9 | LC_ALL=en_US.utf8 grep -in '^$'
+  would print "2:4:6:8:10:12:14:16" and exit 0.  Now it prints nothing
+  and exits with status of 1.  [bug introduced in grep-2.6]
+
   'grep' no longer falsely reports text files as being binary on file
   systems that compress contents or that store tiny contents in metadata.

diff --git a/src/dfasearch.c b/src/dfasearch.c
index 1121176..eaf783e 100644
--- a/src/dfasearch.c
+++ b/src/dfasearch.c
@@ -277,7 +277,9 @@ EGexecute (char const *buf, size_t size, size_t *match_size,
               /* No good fixed strings; start with DFA. */
               char const *next_beg = dfaexec (dfa, beg, (char *) buflim,
                                               0, NULL, &backref);
-              if (next_beg == NULL)
+              /* If there's no match, or if we've matched the sentinel,
+                 we're done.  */
+              if (next_beg == NULL || next_beg == buflim)
                 break;
               /* Narrow down to the line we've found. */
               beg = next_beg;
--
1.7.12.rc1.22.gbfbf4d4


>From 6dda5eec9f5ec36d6b58bd9f1fd2bc9c130022af Mon Sep 17 00:00:00 2001
From: Jim Meyering <address@hidden>
Date: Sun, 5 Aug 2012 23:22:28 +0200
Subject: [PATCH 2/3] tests: test for bug with -i and ^$ in a multi-byte
 locale

* tests/empty-line-mb: New file.
* tests/Makefile.am (TESTS): Add it.
---
 tests/Makefile.am   |  1 +
 tests/empty-line-mb | 29 +++++++++++++++++++++++++++++
 2 files changed, 30 insertions(+)
 create mode 100755 tests/empty-line-mb

diff --git a/tests/Makefile.am b/tests/Makefile.am
index 7d95862..58deaa2 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -69,6 +69,7 @@ TESTS =                                               \
   inconsistent-range                            \
   khadafy                                      \
   max-count-vs-context                         \
+  empty-line-mb                                        \
   unibyte-bracket-expr                         \
   high-bit-range                               \
   options                                      \
diff --git a/tests/empty-line-mb b/tests/empty-line-mb
new file mode 100755
index 0000000..9391000
--- /dev/null
+++ b/tests/empty-line-mb
@@ -0,0 +1,29 @@
+#! /bin/sh
+# Exercise bugs in grep-2.13 with -i, -n and an RE of ^$ in a multi-byte 
locale.
+#
+# Copyright (C) 2012 Free Software Foundation, Inc.
+#
+# Copying and distribution of this file, with or without modification,
+# are permitted in any medium without royalty provided the copyright
+# notice and this notice are preserved.
+
+. "${srcdir=.}/init.sh"; path_prepend_ ../src
+
+require_en_utf8_locale_
+
+LC_ALL=en_US.UTF-8
+export LC_ALL
+
+printf 'a\n\nb\n' > in || framework_failure_
+printf '2:\n' > exp || framework_failure_
+
+printf 'a\nb\n' > in2 || framework_failure_
+
+grep -n -i '^$' in > out || fail=1
+compare exp out || fail=1
+
+# Expect no match: with grep-2.13 this would mistakenly exit 0
+grep -i '^$' in2 > out && fail=1
+compare /dev/null out || fail=1
+
+Exit $fail
--
1.7.12.rc1.22.gbfbf4d4


>From 86a709a5d48b25d6c0d9ba618e050548494b790a Mon Sep 17 00:00:00 2001
From: Jim Meyering <address@hidden>
Date: Mon, 6 Aug 2012 17:19:06 +0200
Subject: [PATCH 3/3] build: update gnulib and bootstrap

---
 bootstrap | 4 +++-
 gnulib    | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/bootstrap b/bootstrap
index e00c8bb..e3e270b 100755
--- a/bootstrap
+++ b/bootstrap
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Print a version string.
-scriptversion=2012-07-10.09; # UTC
+scriptversion=2012-07-19.14; # UTC

 # Bootstrap this package from checked-out sources.

@@ -214,6 +214,8 @@ use_git=true
 # otherwise find the first of the NAMES that can be run (i.e.,
 # supports --version).  If found, set ENVVAR to the program name,
 # die otherwise.
+#
+# FIXME: code duplication, see also gnu-web-doc-update.
 find_tool ()
 {
   find_tool_envvar=$1
diff --git a/gnulib b/gnulib
index 6c37e0a..a451aa0 160000
--- a/gnulib
+++ b/gnulib
@@ -1 +1 @@
-Subproject commit 6c37e0a73c7c1b6fe6eac4d794e2e65791a2700d
+Subproject commit a451aa0d0cbbec1efae7d07eb002fd4220511a27
--
1.7.12.rc1.22.gbfbf4d4



reply via email to

[Prev in Thread] Current Thread [Next in Thread]