>From ed6228198180fedc728a4e2981939fa0c902bbf3 Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Sun, 20 Nov 2016 20:31:01 -0800 Subject: [PATCH 2/2] tests: check for unibyte French range bug Problem reported by Stephane Chazelas (Bug#24973). This bug was fixed in Gnulib. * NEWS: Document the fix. * tests/init.cfg (require_ru_RU_koi8_r): Remove. * tests/unibyte-bracket-expr: Add a test for the bug. Call get-mb-cur-max directly instead of bothering with require_ru_RU_koi8_r. --- NEWS | 3 +++ tests/init.cfg | 9 ------- tests/unibyte-bracket-expr | 58 ++++++++++++++++++++++++++++------------------ 3 files changed, 39 insertions(+), 31 deletions(-) diff --git a/NEWS b/NEWS index 6138b48..bd1a201 100644 --- a/NEWS +++ b/NEWS @@ -10,6 +10,9 @@ GNU grep NEWS -*- outline -*- >/dev/null" where PROGRAM dies when writing into a broken pipe. [bug introduced in grep-2.26] + grep no longer mishandles ranges in nontrivial unibyte locales. + [bug introduced in grep-2.26] + grep -P no longer attempts multiline matches. This works more intuitively with unusual patterns, and means that grep -Pz no longer rejects patterns containing ^ and $ and works when combined with -x. diff --git a/tests/init.cfg b/tests/init.cfg index 1677ec5..6c7abd2 100644 --- a/tests/init.cfg +++ b/tests/init.cfg @@ -74,15 +74,6 @@ require_tr_utf8_locale_() esac } -require_ru_RU_koi8_r() -{ - path_prepend_ . - case $(get-mb-cur-max ru_RU.KOI8-R) in - 1) ;; - *) skip_ 'ru_RU.KOI8-R locale not found' ;; - esac -} - require_compiled_in_MB_support() { require_en_utf8_locale_ diff --git a/tests/unibyte-bracket-expr b/tests/unibyte-bracket-expr index 68c475c..85aff1c 100755 --- a/tests/unibyte-bracket-expr +++ b/tests/unibyte-bracket-expr @@ -1,9 +1,4 @@ #!/bin/sh -# Exercise a DFA range bug that arises only with a unibyte encoding -# for which the wide-char-to-single-byte mapping is nontrivial. -# E.g., the regexp, [C] would fail to match C in a unibyte locale like -# ru_RU.KOI8-R for any C whose wide-char representation differed from -# its single-byte equivalent. # Copyright (C) 2011-2016 Free Software Foundation, Inc. @@ -21,23 +16,42 @@ # along with this program. If not, see . . "${srcdir=.}/init.sh"; path_prepend_ ../src -require_ru_RU_koi8_r -LC_ALL=ru_RU.KOI8-R -export LC_ALL - -fail=0 - -i=128 -while :; do - in=in-$i - octal=$(printf '%03o' $i) - b=$(printf "\\$octal") - echo "$b" > $in || framework_failure_ - grep "[$b]" $in > out || fail=1 - compare out $in || fail=1 - - test $i = 255 && break - i=$(expr $i + 1) + +# Add "." to PATH for the use of get-mb-cur-max. +path_prepend_ . + +# Exercise a DFA range bug that arises only with a unibyte encoding +# for which the wide-char-to-single-byte mapping is nontrivial. +# E.g., the regexp, [C] would fail to match C in a unibyte locale like +# ru_RU.KOI8-R for any C whose wide-char representation differed from +# its single-byte equivalent. + +case $(get-mb-cur-max ru_RU.KOI8-R) in + 1) + fail=0 + + i=128 + while :; do + in=in-$i + octal=$(printf '%03o' $i) + b=$(printf "\\$octal") + echo "$b" > $in || framework_failure_ + LC_ALL=ru_RU.KOI8-R grep "[$b]" $in > out || fail=1 + compare out $in || fail=1 + + test $i = 255 && break + i=$(expr $i + 1) + done;; +esac + +# Exercise a DFA range bug where '[d-f]' did not match accented 'e' in a +# unibyte French locale. + +for locale in fr_FR.iso88591 address@hidden fr_FR.ISO8859-1; do + case $(get-mb-cur-max $locale) in + 1) + printf '\351\n' | LC_ALL=$locale grep '[d-f]' || fail=1;; + esac done Exit $fail -- 2.7.4