From abc6e54e759f7bd224e687c796aa8c89a6d091fa Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Fri, 18 Dec 2015 15:21:32 -0800 Subject: [PATCH] grep -oP: don't infloop when processing invalid UTF8 preceding a match * src/pcresearch.c (Pexecute): When advancing SUBJECT past an encoding error, don't blindly set P to that new value, since we will soon compute SEARCH_OFFSET = P - SUBJECT, and mistakenly making that difference too small would allow us to match some previously-processed text, resulting in an infinite loop. * NEWS (Bug fixes): Mention it. * THANKS.in: Add Christian's name and email address. * tests/pcre-invalid-utf8-infloop: New file. * tests/Makefile.am (TESTS): Add it. Reported by Christian Boltz in http://debbugs.gnu.org/22181 Introduced by commit, v2.21-37-g14f8e48. --- NEWS | 6 ++++++ THANKS.in | 1 + src/pcresearch.c | 4 +++- tests/Makefile.am | 1 + tests/pcre-invalid-utf8-infloop | 26 ++++++++++++++++++++++++++ 5 files changed, 37 insertions(+), 1 deletion(-) create mode 100755 tests/pcre-invalid-utf8-infloop diff --git a/NEWS b/NEWS index ac632d7..fc5fd3a 100644 --- a/NEWS +++ b/NEWS @@ -2,6 +2,12 @@ GNU grep NEWS -*- outline -*- * Noteworthy changes in release ?.? (????-??-??) [?] +** Bug fixes + + grep -oP is no longer susceptible to an infinite loop when processing + invalid UTF8 just before a match. + [bug introduced in grep-2.22] + * Noteworthy changes in release 2.22 (2015-11-01) [stable] diff --git a/THANKS.in b/THANKS.in index b86f772..abd64fd 100644 --- a/THANKS.in +++ b/THANKS.in @@ -18,6 +18,7 @@ Ben Elliston address@hidden Bernd Strieder address@hidden Bob Proulx address@hidden Brian Youmans address@hidden +Christian Boltz address@hidden Christian Groessler address@hidden Dagobert Michelsen address@hidden Daisuke GOTO address@hidden diff --git a/src/pcresearch.c b/src/pcresearch.c index b1f8310..dc68345 100644 --- a/src/pcresearch.c +++ b/src/pcresearch.c @@ -289,7 +289,9 @@ Pexecute (char const *buf, size_t size, size_t *match_size, break; /* Treat the encoding error as data that cannot match. */ - p = subject += valid_bytes + 1; + subject += valid_bytes + 1; + if (p < subject) + p = subject; bol = false; } diff --git a/tests/Makefile.am b/tests/Makefile.am index d379821..6ced67c 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -107,6 +107,7 @@ TESTS = \ pcre-context \ pcre-infloop \ pcre-invalid-utf8-input \ + pcre-invalid-utf8-infloop \ pcre-jitstack \ pcre-o \ pcre-utf8 \ diff --git a/tests/pcre-invalid-utf8-infloop b/tests/pcre-invalid-utf8-infloop new file mode 100755 index 0000000..2b696b4 --- /dev/null +++ b/tests/pcre-invalid-utf8-infloop @@ -0,0 +1,26 @@ +#! /bin/sh +# Ensure that grep -oaP doesn't infloop for invalid multi-byte input +# +# Copyright (C) 2015 Free Software Foundation, Inc. +# +# Copying and distribution of this file, with or without modification, +# are permitted in any medium without royalty provided the copyright +# notice and this notice are preserved. + +. "${srcdir=.}/init.sh"; path_prepend_ ../src +require_timeout_ +require_en_utf8_locale_ +require_compiled_in_MB_support +LC_ALL=en_US.UTF-8 require_pcre_ + +fail=0 + +printf '\201_\0' > in || framework_failure_ +printf '_\n' > exp || framework_failure_ + +LC_ALL=en_US.UTF-8 timeout 3 grep -aoP _ in > out 2> err || fail=1 + +compare exp out || fail=1 +compare /dev/null err || fail=1 + +Exit $fail -- 2.6.2