From 2e4192963c6ffc34756bfa603fcd03ff7aa3a297 Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Fri, 11 Apr 2014 10:57:08 -0700 Subject: [PATCH 1/2] grep: cleanup for empty-string fix * NEWS: Document it. * src/dfasearch.c (GEAcompile): * src/kwsearch.c (Fcompile): Use C99-style decls to simplify. Avoid duplicate code. * tests/empty-line: Add some more tests like this. --- NEWS | 3 +++ src/dfasearch.c | 32 ++++++++++---------------------- src/kwsearch.c | 12 ++++-------- tests/empty-line | 38 +++++++++++++++++++++++++++++++------- 4 files changed, 48 insertions(+), 37 deletions(-) diff --git a/NEWS b/NEWS index 2a62e7b..92ce95e 100644 --- a/NEWS +++ b/NEWS @@ -8,6 +8,9 @@ GNU grep NEWS -*- outline -*- mishandles patterns like [^a] in locales that have multicharacter collating sequences so that [^a] can match a string of two characters. + grep no longer mishandles an empty pattern at the end of a pattern list. + [bug introduced in grep-2.5] + grep -P now works with -w and -x and backreferences. Before, echo aa|grep -Pw '(.)\1' would fail to match, yet echo aa|grep -Pw '(.)\2' would match. diff --git a/src/dfasearch.c b/src/dfasearch.c index 39ea442..1266c80 100644 --- a/src/dfasearch.c +++ b/src/dfasearch.c @@ -110,8 +110,6 @@ kwsmusts (void) void GEAcompile (char const *pattern, size_t size, reg_syntax_t syntax_bits) { - const char *err; - const char *p, *sep; size_t total = size; char *motif; @@ -120,15 +118,15 @@ GEAcompile (char const *pattern, size_t size, reg_syntax_t syntax_bits) re_set_syntax (syntax_bits); dfasyntax (syntax_bits, match_icase, eolbyte); - /* For GNU regex compiler we have to pass the patterns separately to detect - errors like "[\nallo\n]\n". The patterns here are "[", "allo" and "]" - GNU regex should have raise a syntax error. The same for backref, where - the backref should have been local to each pattern. */ - p = pattern; + /* For GNU regex, pass the patterns separately to detect errors like + "[\nallo\n]\n", where the patterns are "[", "allo" and "]", and + this should be a syntax error. The same for backref, where the + backref should be local to each pattern. */ + char const *p = pattern; do { size_t len; - sep = memchr (p, '\n', total); + char const *sep = memchr (p, '\n', total); if (sep) { len = sep - p; @@ -144,24 +142,14 @@ GEAcompile (char const *pattern, size_t size, reg_syntax_t syntax_bits) patterns = xnrealloc (patterns, pcount + 1, sizeof *patterns); patterns[pcount] = patterns0; - if ((err = re_compile_pattern (p, len, - &(patterns[pcount].regexbuf))) != NULL) + char const *err = re_compile_pattern (p, len, + &(patterns[pcount].regexbuf)); + if (err) error (EXIT_TROUBLE, 0, "%s", err); pcount++; - p = sep; - } while (sep && total != 0); - - if (sep) - { - patterns = xnrealloc (patterns, pcount + 1, sizeof *patterns); - patterns[pcount] = patterns0; - - if ((err = re_compile_pattern ("", 0, - &(patterns[pcount].regexbuf))) != NULL) - error (EXIT_TROUBLE, 0, "%s", err); - pcount++; } + while (p); /* In the match_words and match_lines cases, we use a different pattern for the DFA matcher that will quickly throw out cases that won't work. diff --git a/src/kwsearch.c b/src/kwsearch.c index 7fe8e48..cf8df3c 100644 --- a/src/kwsearch.c +++ b/src/kwsearch.c @@ -32,7 +32,6 @@ static kwset_t kwset; void Fcompile (char const *pattern, size_t size) { - char const *p, *sep; size_t total = size; mb_len_map_t *map = NULL; char const *pat = (match_icase && MB_CUR_MAX > 1 @@ -41,11 +40,11 @@ Fcompile (char const *pattern, size_t size) kwsinit (&kwset); - p = pat; + char const *p = pat; do { size_t len; - sep = memchr (p, '\n', total); + char const *sep = memchr (p, '\n', total); if (sep) { len = sep - p; @@ -63,12 +62,9 @@ Fcompile (char const *pattern, size_t size) } kwsincr (kwset, p, len); - p = sep; - } while (sep && total != 0); - - if (sep) - kwsincr (kwset, "", 0); + } + while (p); kwsprep (kwset); } diff --git a/tests/empty-line b/tests/empty-line index aeaa6ca..25e9509 100755 --- a/tests/empty-line +++ b/tests/empty-line @@ -1,17 +1,41 @@ #! /bin/sh -# This would fail for grep-2.18 +# Test that the empty pattern matches everything. +# Some of these tests failed in grep 2.18. . "${srcdir=.}/init.sh"; path_prepend_ ../src fail=0 printf 'abc\n' >in || framework_failure_ +nl=' +' -printf 'foo\n\n' >pat || framework_failure_ -grep -F -f pat in >out || fail=1 -compare in out || fail=1 +for opt in '' -E -F; do + case $opt in + '') prefix='\(\)\1';; + -E) prefix='()\1';; + -F) prefix="foo$nl";; + esac -printf '\(\)\\1foo\n\n' >pat || framework_failure_ -grep -f pat in >out || fail=1 -compare in out || fail=1 + for pattern in "" "$nl" "---$nl" "${nl}foo"; do + for pat in "$pattern" "$prefix$pattern"; do + grep $opt -e "$pat" in >out || fail=1 + compare in out || fail=1 + + printf -- '%s\n' "$pat" >pat || framework_failure_ + grep $opt -f pat in >out || fail=1 + compare in out || fail=1 + + # Check that pattern files that end in non-newlines + # are treated as if a newline were appended. + case $pattern in + '' | *"$nl") ;; + *) + printf -- '%s' "$pat" >pat || framework_failure_ + grep $opt -f pat in >out || fail=1 + compare in out || fail=1;; + esac + done + done +done Exit $fail -- 1.9.0