>From 0e8fda0d880cccd0e1997a905eb9a7910f957245 Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Fri, 3 Jul 2015 18:23:59 -0700 Subject: [PATCH 1/6] grep: -z '.' now consistently matches newline Problem reported by Balazs Kezes in: http://bugs.gnu.org/20974 * NEWS: Document this. * tests/utf8-bracket: New file, to test for this bug. * src/grep.c (Gcompile, Ecompile): Also specify RE_DOT_NEWLINE. * tests/Makefile.am (TESTS): Add it. --- NEWS | 4 ++++ src/grep.c | 7 ++++--- tests/Makefile.am | 1 + tests/utf8-bracket | 34 ++++++++++++++++++++++++++++++++++ 4 files changed, 43 insertions(+), 3 deletions(-) create mode 100755 tests/utf8-bracket diff --git a/NEWS b/NEWS index bbbe893..88ed0f4 100644 --- a/NEWS +++ b/NEWS @@ -12,6 +12,10 @@ GNU grep NEWS -*- outline -*- grep no longer reads from uninitialized memory or from beyond the end of the heap-allocated input buffer. This fix addressed CVE-2015-1345. + With -z, '.' in a pattern now consistently matches newline. + Previously, it sometimes matched newline, and sometimes did not. + [bug introduced in grep-2.4] + When the JIT stack is exhausted, grep -P now grows the stack rather than reporting an internal PCRE error. diff --git a/src/grep.c b/src/grep.c index 778dbcb..ed54dc2 100644 --- a/src/grep.c +++ b/src/grep.c @@ -1861,15 +1861,16 @@ if any error occurs and -q is not given, the exit status is 2.\n")); static void Gcompile (char const *pattern, size_t size) { - GEAcompile (pattern, size, RE_SYNTAX_GREP | RE_NO_EMPTY_RANGES); + GEAcompile (pattern, size, + RE_SYNTAX_GREP | RE_DOT_NEWLINE | RE_NO_EMPTY_RANGES); } static void Ecompile (char const *pattern, size_t size) { GEAcompile (pattern, size, - (RE_SYNTAX_POSIX_EGREP | RE_NO_EMPTY_RANGES - | RE_UNMATCHED_RIGHT_PAREN_ORD)); + (RE_SYNTAX_POSIX_EGREP | RE_DOT_NEWLINE + | RE_NO_EMPTY_RANGES | RE_UNMATCHED_RIGHT_PAREN_ORD)); } static void diff --git a/tests/Makefile.am b/tests/Makefile.am index 7bceac7..629d322 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -120,6 +120,7 @@ TESTS = \ two-files \ unibyte-bracket-expr \ unibyte-negated-circumflex \ + utf8-bracket \ warn-char-classes \ word-delim-multibyte \ word-multi-file \ diff --git a/tests/utf8-bracket b/tests/utf8-bracket new file mode 100755 index 0000000..f5c4a60 --- /dev/null +++ b/tests/utf8-bracket @@ -0,0 +1,34 @@ +#!/bin/sh +# Check bracket expressions in a UTF-8 locale. + +# Copyright 2015 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +. "${srcdir=.}/init.sh"; path_prepend_ ../src +require_en_utf8_locale_ + +printf '1\n2\n' >in || framework_failure_ + +fail=0 + +for locale in C en_US.UTF-8; do + for pattern in '1.2' '[12].2' '[1-2].2'; do + for suffix in '' '\(\)\1'; do + LC_ALL=$locale grep --null-data --quiet "$pattern$suffix" in || fail=1 + done + done +done + +Exit $fail -- 2.1.0