From ba5017b65a45bd73ec156629e3796b6f3c33f95c Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Mon, 30 Oct 2023 01:24:28 -0700 Subject: [PATCH 10/11] maint: copy join, uniq tests from Fedora * tests/misc/join.pl, tests/uniq/uniq.pl: Copy from Fedora 39. This adds more multi-byte tests. --- tests/misc/join.pl | 50 +++++++++++++++++++++++++++++++++++++++++ tests/uniq/uniq.pl | 55 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 105 insertions(+) diff --git a/tests/misc/join.pl b/tests/misc/join.pl index 2ca8567ba..1d01a3d3d 100755 --- a/tests/misc/join.pl +++ b/tests/misc/join.pl @@ -25,6 +25,15 @@ my $limits = getlimits (); my $prog = 'join'; +my $try = "Try \`$prog --help' for more information.\n"; +my $inval = "$prog: invalid byte, character or field list\n$try"; + +my $mb_locale; +#Comment out next line to disable multibyte tests +$mb_locale = $ENV{LOCALE_FR_UTF8}; +! defined $mb_locale || $mb_locale eq 'none' + and $mb_locale = 'C'; + my $delim = chr 0247; sub t_subst ($) { @@ -333,8 +342,49 @@ foreach my $t (@tv) push @Tests, $new_ent; } +# Add _POSIX2_VERSION=199209 to the environment of each test +# that uses an old-style option like +1. +if ($mb_locale ne 'C') + { + # Duplicate each test vector, appending "-mb" to the test name and + # inserting {ENV => "LC_ALL=$mb_locale"} in the copy, so that we + # provide coverage for the distro-added multi-byte code paths. + my @new; + foreach my $t (@Tests) + { + my @new_t = @$t; + my $test_name = shift @new_t; + + # Depending on whether join is multi-byte-patched, + # it emits different diagnostics: + # non-MB: invalid byte or field list + # MB: invalid byte, character or field list + # Adjust the expected error output accordingly. + if (grep {ref $_ eq 'HASH' && exists $_->{ERR} && $_->{ERR} eq $inval} + (@new_t)) + { + my $sub = {ERR_SUBST => 's/, character//'}; + push @new_t, $sub; + push @$t, $sub; + } + #Adjust the output some error messages including test_name for mb + if (grep {ref $_ eq 'HASH' && exists $_->{ERR}} + (@new_t)) + { + my $sub2 = {ERR_SUBST => "s/$test_name-mb/$test_name/"}; + push @new_t, $sub2; + push @$t, $sub2; + } + push @new, ["$test_name-mb", @new_t, {ENV => "LC_ALL=$mb_locale"}]; + } + push @Tests, @new; + } + @Tests = triple_test \@Tests; +#skip invalid-j-mb test, it is failing because of the format +@Tests = grep {$_->[0] ne 'invalid-j-mb'} @Tests; + my $save_temps = $ENV{DEBUG}; my $verbose = $ENV{VERBOSE}; diff --git a/tests/uniq/uniq.pl b/tests/uniq/uniq.pl index a6354dc3c..e43cd6e3f 100755 --- a/tests/uniq/uniq.pl +++ b/tests/uniq/uniq.pl @@ -23,9 +23,17 @@ my $limits = getlimits (); my $prog = 'uniq'; my $try = "Try '$prog --help' for more information.\n"; +my $inval = "$prog: invalid byte, character or field list\n$try"; + # Turn off localization of executable's output. @ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3; +my $mb_locale; +#Comment out next line to disable multibyte tests +$mb_locale = $ENV{LOCALE_FR_UTF8}; +! defined $mb_locale || $mb_locale eq 'none' + and $mb_locale = 'C'; + # When possible, create a "-z"-testing variant of each test. sub add_z_variants($) { @@ -262,6 +270,53 @@ foreach my $t (@Tests) and push @$t, {ENV=>'_POSIX2_VERSION=199209'}; } +if ($mb_locale ne 'C') + { + # Duplicate each test vector, appending "-mb" to the test name and + # inserting {ENV => "LC_ALL=$mb_locale"} in the copy, so that we + # provide coverage for the distro-added multi-byte code paths. + my @new; + foreach my $t (@Tests) + { + my @new_t = @$t; + my $test_name = shift @new_t; + + # Depending on whether uniq is multi-byte-patched, + # it emits different diagnostics: + # non-MB: invalid byte or field list + # MB: invalid byte, character or field list + # Adjust the expected error output accordingly. + if (grep {ref $_ eq 'HASH' && exists $_->{ERR} && $_->{ERR} eq $inval} + (@new_t)) + { + my $sub = {ERR_SUBST => 's/, character//'}; + push @new_t, $sub; + push @$t, $sub; + } + # In test #145, replace the each ‘...’ by '...'. + if ($test_name =~ "145") + { + my $sub = { ERR_SUBST => "s/‘([^’]+)’/'\$1'/g"}; + push @new_t, $sub; + push @$t, $sub; + } + next if ( $test_name =~ "schar" + or $test_name =~ "^obs-plus" + or $test_name =~ "119"); + push @new, ["$test_name-mb", @new_t, {ENV => "LC_ALL=$mb_locale"}]; + } + push @Tests, @new; + } + +# Remember that triple_test creates from each test with exactly one "IN" +# file two more tests (.p and .r suffix on name) corresponding to reading +# input from a file and from a pipe. The pipe-reading test would fail +# due to a race condition about 1 in 20 times. +# Remove the IN_PIPE version of the "output-is-input" test above. +# The others aren't susceptible because they have three inputs each. + +@Tests = grep {$_->[0] ne 'output-is-input.p'} @Tests; + @Tests = add_z_variants \@Tests; @Tests = triple_test \@Tests; -- 2.39.2