From 0876365223a52854be2d3b37b3f6bb3d3f9c8403 Mon Sep 17 00:00:00 2001 From: Norihiro Tanaka Date: Sun, 25 Sep 2016 11:05:53 +0900 Subject: [PATCH] sed: fix incorrect match for title case * sed/regexp.c (compile_regex_1): Avoid building fastmap for case insensitive matching. * testsuite/mb-title-case.sh: Add new test for this fix. * testsuite/local.mk (T): Call the test and sort alphabetically. * init.cfg: Add require_el_iso88597_locale_. --- init.cfg | 9 +++++++++ sed/regexp.c | 7 ++++--- testsuite/local.mk | 3 ++- testsuite/mb-title-case.sh | 40 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 55 insertions(+), 4 deletions(-) create mode 100755 testsuite/mb-title-case.sh diff --git a/init.cfg b/init.cfg index 39f009c..55aeed6 100644 --- a/init.cfg +++ b/init.cfg @@ -52,6 +52,15 @@ require_en_utf8_locale_() esac } +require_el_iso88597_locale_() +{ + path_prepend_ ./testsuite + case $(get-mb-cur-max el_GR.iso88597) in + 1) ;; + *) skip_ 'el_GR.iso88597 locale not found' ;; + esac +} + # Some tests would fail without this particular locale. # If the locale is not available, just skip the test. # The exact spelling differs between operating systems diff --git a/sed/regexp.c b/sed/regexp.c index 1eecd73..cf4f8a0 100644 --- a/sed/regexp.c +++ b/sed/regexp.c @@ -93,14 +93,15 @@ compile_regex_1 (struct regex *new_regex, int needed_sub) } #ifdef RE_ICASE - syntax |= (new_regex->flags & REG_ICASE) ? RE_ICASE : 0; + if (new_regex->flags & REG_ICASE) + syntax |= RE_ICASE; + else #endif + new_regex->pattern.fastmap = malloc (1 << (sizeof (char) * 8)); #ifdef RE_NO_SUB syntax |= needed_sub ? 0 : RE_NO_SUB; #endif - new_regex->pattern.fastmap = malloc (1 << (sizeof (char) * 8)); - /* If REG_NEWLINE is set, newlines are treated differently. */ if (new_regex->flags & REG_NEWLINE) { diff --git a/testsuite/local.mk b/testsuite/local.mk index 6ae11e4..b30d44a 100644 --- a/testsuite/local.mk +++ b/testsuite/local.mk @@ -34,9 +34,10 @@ T = \ testsuite/in-place-hyphen.sh \ testsuite/in-place-suffix-backup.sh \ testsuite/invalid-mb-seq-UMR.sh \ + testsuite/mb-bad-delim.sh \ testsuite/mb-charclass-non-utf8.sh \ testsuite/mb-match-slash.sh \ - testsuite/mb-bad-delim.sh \ + testsuite/mb-title-case.sh \ testsuite/mb-y-translate.sh \ testsuite/normalize-text.sh \ testsuite/nulldata.sh \ diff --git a/testsuite/mb-title-case.sh b/testsuite/mb-title-case.sh new file mode 100755 index 0000000..a45434f --- /dev/null +++ b/testsuite/mb-title-case.sh @@ -0,0 +1,40 @@ +#!/bin/sh +# Test case insensitive matching for titlecase and similarly odd chars. + +# Copyright (C) 2016 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +. "${srcdir=.}/testsuite/init.sh"; path_prepend_ ./sed +print_ver_ sed + +fail=0 + +# Try a unibyte test with ISO 8859-7, if available. +if test "$(get-mb-cur-max el_GR.iso88597)" -eq 1; then + LC_ALL=el_GR.iso88597 + export LC_ALL + + a='\323' # SIGMA + b='\362' # stigma + c='\363' # sigma + + printf "a$a\\na$b\\na$c\\n" >in || framework_failure_ + for chr in "$a" "$b" "$c"; do + printf "/\\(\\)\\\\1$chr/Ip" >prog || fail=1 + sed -f -n -f prog in >out || fail=1 + compare_ in out || fail=1 + done +fi + +Exit $fail -- 1.7.1