>From 5f0a97fc5090aaf8a5e19224c87cfaeb9abec97e Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Fri, 25 Sep 2020 16:02:36 -0700 Subject: [PATCH] regex-tests: fix test and add debug output MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Perhaps this will fix the recent grep test failure reported at: https://buildfarm.opencsw.org/buildbot/builders/ggrep-solaris10-sparc/builds/199 At least, the debug output should help narrow down the failure. * tests/test-regex.c: Include stdarg.h, stdio.h. (exit_status): New var. (report_error): New function. (main): Use it to report failures to stdout instead of merely exiting with some nonzero status. The status info alone isn’t enough to do remote debugging. In the new tr_TR.UTF-8 test, clear regex before calling re_compile_pattern, fixing a portability bug. --- ChangeLog | 12 +++++ tests/test-regex.c | 132 +++++++++++++++++++++++++++++---------------- 2 files changed, 98 insertions(+), 46 deletions(-) diff --git a/ChangeLog b/ChangeLog index 2ed322e68..3ef9570e1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,17 @@ 2020-09-25 Paul Eggert + regex-tests: fix test and add debug output + Perhaps this will fix the recent grep test failure reported at: + https://buildfarm.opencsw.org/buildbot/builders/ggrep-solaris10-sparc/builds/199 + At least, the debug output should help narrow down the failure. + * tests/test-regex.c: Include stdarg.h, stdio.h. + (exit_status): New var. + (report_error): New function. + (main): Use it to report failures to stdout instead of merely + exiting with some nonzero status. The status info alone isn’t + enough to do remote debugging. In the new tr_TR.UTF-8 test, clear + regex before calling re_compile_pattern, fixing a portability bug. + regex: no longer match glibc * config/srclist.txt: Comment out regex_internal.c for now. diff --git a/tests/test-regex.c b/tests/test-regex.c index b4e23c8c8..96def52f5 100644 --- a/tests/test-regex.c +++ b/tests/test-regex.c @@ -20,6 +20,8 @@ #include #include +#include +#include #include #include #if HAVE_DECL_ALARM @@ -29,6 +31,20 @@ #include "localcharset.h" +static int exit_status; + +static void +report_error (char const *format, ...) +{ + va_list args; + va_start (args, format); + fprintf (stderr, "test-regex: "); + vfprintf (stderr, format, args); + fprintf (stderr, "\n"); + va_end (args); + exit_status = 1; +} + /* Check whether it's really a UTF-8 locale. On mingw, setlocale (LC_ALL, "en_US.UTF-8") succeeds but returns "English_United States.1252", with locale_charset () returning "CP1252". */ @@ -41,7 +57,6 @@ really_utf8 (void) int main (void) { - int result = 0; static struct re_pattern_buffer regex; unsigned char folded_chars[UCHAR_MAX + 1]; int i; @@ -70,14 +85,15 @@ main (void) memset (®ex, 0, sizeof regex); s = re_compile_pattern (pat, sizeof pat - 1, ®ex); if (s) - result |= 1; + report_error ("%s: %s", pat, s); else { memset (®s, 0, sizeof regs); - if (re_search (®ex, data, sizeof data - 1, - 0, sizeof data - 1, ®s) - != -1) - result |= 1; + i = re_search (®ex, data, sizeof data - 1, + 0, sizeof data - 1, ®s); + if (i != -1) + report_error ("re_search '%s' on '%s' returned %d", + pat, data, i); regfree (®ex); free (regs.start); free (regs.end); @@ -106,14 +122,15 @@ main (void) memset (®ex, 0, sizeof regex); s = re_compile_pattern (pat, sizeof pat - 1, ®ex); if (s) - result |= 1; + report_error ("%s: %s", pat, s); else { memset (®s, 0, sizeof regs); i = re_search (®ex, data, sizeof data - 1, 0, sizeof data - 1, 0); if (i != 0 && i != 21) - result |= 1; + report_error ("re_search '%s' on '%s' returned %d", + pat, data, i); regfree (®ex); free (regs.start); free (regs.end); @@ -127,8 +144,11 @@ main (void) if (setlocale (LC_ALL, "tr_TR.UTF-8") && really_utf8 ()) { re_set_syntax (RE_SYNTAX_GREP | RE_ICASE); - if (re_compile_pattern ("i", 1, ®ex)) - result |= 1; + memset (®ex, 0, sizeof regex); + static char const pat[] = "i"; + s = re_compile_pattern (pat, sizeof pat - 1, ®ex); + if (s) + report_error ("%s: %s", pat, s); else { /* UTF-8 encoding of U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE. @@ -138,30 +158,36 @@ main (void) static char const data[] = "\xc4\xb0"; memset (®s, 0, sizeof regs); - if (re_search (®ex, data, sizeof data - 1, 0, sizeof data - 1, - ®s)) - result |= 1; + i = re_search (®ex, data, sizeof data - 1, 0, sizeof data - 1, + ®s); + if (i != 0) + report_error ("re_search '%s' on '%s' returned %d", + pat, data, i); regfree (®ex); free (regs.start); free (regs.end); if (! setlocale (LC_ALL, "C")) - return 1; + report_error ("setlocale \"C\" failed"); } } /* This test is from glibc bug 3957, reported by Andrew Mackey. */ re_set_syntax (RE_SYNTAX_EGREP | RE_HAT_LISTS_NOT_NEWLINE); memset (®ex, 0, sizeof regex); - s = re_compile_pattern ("a[^x]b", 6, ®ex); + static char const pat_3957[] = "a[^x]b"; + s = re_compile_pattern (pat_3957, sizeof pat_3957 - 1, ®ex); if (s) - result |= 2; - /* This should fail, but succeeds for glibc-2.5. */ + report_error ("%s: %s", pat_3957, s); else { + /* This should fail, but succeeds for glibc-2.5. */ memset (®s, 0, sizeof regs); - if (re_search (®ex, "a\nb", 3, 0, 3, ®s) != -1) - result |= 2; + static char const data[] = "a\nb"; + i = re_search (®ex, data, sizeof data - 1, 0, sizeof data - 1, ®s); + if (i != -1) + report_error ("re_search '%s' on '%s' returned %d", + pat_3957, data, i); regfree (®ex); free (regs.start); free (regs.end); @@ -174,11 +200,12 @@ main (void) for (i = 0; i <= UCHAR_MAX; i++) folded_chars[i] = i; regex.translate = folded_chars; - s = re_compile_pattern ("a[[:@:>@:]]b\n", 11, ®ex); + static char const pat75[] = "a[[:@:>@:]]b\n"; + s = re_compile_pattern (pat75, sizeof pat75 - 1, ®ex); /* This should fail with _Invalid character class name_ error. */ if (!s) { - result |= 4; + report_error ("re_compile_pattern: failed to reject '%s'", pat75); regfree (®ex); } @@ -186,48 +213,57 @@ main (void) using RE_NO_EMPTY_RANGES. */ re_set_syntax (RE_SYNTAX_POSIX_EGREP | RE_NO_EMPTY_RANGES); memset (®ex, 0, sizeof regex); - s = re_compile_pattern ("a[b-a]", 6, ®ex); + static char const pat_b_a[] = "a[b-a]"; + s = re_compile_pattern (pat_b_a, sizeof pat_b_a - 1, ®ex); if (s == 0) { - result |= 8; + report_error ("re_compile_pattern: failed to reject '%s'", pat_b_a); regfree (®ex); } /* This should succeed, but does not for glibc-2.1.3. */ memset (®ex, 0, sizeof regex); - s = re_compile_pattern ("{1", 2, ®ex); + static char const pat_213[] = "{1"; + s = re_compile_pattern (pat_213, sizeof pat_213 - 1, ®ex); if (s) - result |= 8; + report_error ("%s: %s", pat_213, s); else regfree (®ex); /* The following example is derived from a problem report against gawk from Jorge Stolfi . */ memset (®ex, 0, sizeof regex); - s = re_compile_pattern ("[an\371]*n", 7, ®ex); + static char const pat_stolfi[] = "[an\371]*n"; + s = re_compile_pattern (pat_stolfi, sizeof pat_stolfi - 1, ®ex); if (s) - result |= 8; + report_error ("%s: %s", pat_stolfi, s); /* This should match, but does not for glibc-2.2.1. */ else { memset (®s, 0, sizeof regs); - if (re_match (®ex, "an", 2, 0, ®s) != 2) - result |= 8; + static char const data[] = "an"; + i = re_match (®ex, data, sizeof data - 1, 0, ®s); + if (i != 2) + report_error ("re_match '%s' on '%s' at 2 returned %d", + pat_stolfi, data, i); regfree (®ex); free (regs.start); free (regs.end); } memset (®ex, 0, sizeof regex); - s = re_compile_pattern ("x", 1, ®ex); + static char const pat_x[] = "x"; + s = re_compile_pattern (pat_x, sizeof pat_x - 1, ®ex); if (s) - result |= 8; + report_error ("%s: %s", pat_x, s); /* glibc-2.2.93 does not work with a negative RANGE argument. */ else { memset (®s, 0, sizeof regs); - if (re_search (®ex, "wxy", 3, 2, -2, ®s) != 1) - result |= 8; + static char const data[] = "wxy"; + i = re_search (®ex, data, sizeof data - 1, 2, -2, ®s); + if (i != 1) + report_error ("re_search '%s' on '%s' returned %d", pat_x, data, i); regfree (®ex); free (regs.start); free (regs.end); @@ -237,14 +273,15 @@ main (void) ignored RE_ICASE. Detect that problem too. */ re_set_syntax (RE_SYNTAX_EMACS | RE_ICASE); memset (®ex, 0, sizeof regex); - s = re_compile_pattern ("x", 1, ®ex); + s = re_compile_pattern (pat_x, 1, ®ex); if (s) - result |= 16; + report_error ("%s: %s", pat_x, s); else { memset (®s, 0, sizeof regs); - if (re_search (®ex, "WXY", 3, 0, 3, ®s) < 0) - result |= 16; + static char const data[] = "WXY"; + if (re_search (®ex, data, sizeof data - 1, 0, 3, ®s) < 0) + report_error ("re_search '%s' on '%s' returned %d", pat_x, data, i); regfree (®ex); free (regs.start); free (regs.end); @@ -256,17 +293,17 @@ main (void) re_set_syntax (RE_SYNTAX_POSIX_BASIC & ~RE_CONTEXT_INVALID_DUP & ~RE_NO_EMPTY_RANGES); - memset (®ex, 0, sizeof regex); - s = re_compile_pattern ("[[:alnum:]_-]\\\\+$", 16, ®ex); + static char const pat_shelton[] = "[[:alnum:]_-]\\\\+$"; + s = re_compile_pattern (pat_shelton, sizeof pat_shelton - 1, ®ex); if (s) - result |= 32; + report_error ("%s: %s", pat_shelton, s); else regfree (®ex); /* REG_STARTEND was added to glibc on 2004-01-15. Reject older versions. */ if (! REG_STARTEND) - result |= 64; + report_error ("REG_STARTEND is zero"); /* Matching with the compiled form of this regexp would provoke an assertion failure prior to glibc-2.28: @@ -275,9 +312,12 @@ main (void) back reference. */ re_set_syntax (RE_SYNTAX_POSIX_EGREP); memset (®ex, 0, sizeof regex); - s = re_compile_pattern ("0|()0|\\1|0", 10, ®ex); - if (!s || strcmp (s, "Invalid back reference")) - result |= 64; + static char const pat_badback[] = "0|()0|\\1|0"; + s = re_compile_pattern (pat_badback, sizeof pat_badback, ®ex); + if (!s) + s = "failed to report invalid back reference"; + if (strcmp (s, "Invalid back reference") != 0) + report_error ("%s: %s", pat_badback, s); #if 0 /* It would be nice to reject hosts whose regoff_t values are too @@ -288,8 +328,8 @@ main (void) when compiling --without-included-regex. */ if (sizeof (regoff_t) < sizeof (ptrdiff_t) || sizeof (regoff_t) < sizeof (ssize_t)) - result |= 64; + report_error ("regoff_t values are too narrow"); #endif - return result; + return exit_status; } -- 2.17.1