2007-05-22 James Youngman * src/wc.c (wc): Limit the number of messages about invalid multibyte sequences to error_limit (currently 5). If the actual number of conversion errors exceed this, indicate the number of suppressed messages. The idea is that most users typing "wc /bin/sh" actually know there are no words there and other users will be clued in by the error messages that remain (and the impressive count of suppressed messages). Index: NEWS =================================================================== RCS file: /sources/coreutils/coreutils/NEWS,v retrieving revision 1.493 diff -u -p -r1.493 NEWS --- NEWS 15 May 2007 05:15:28 -0000 1.493 +++ NEWS 22 May 2007 01:15:17 -0000 @@ -10,6 +10,11 @@ GNU coreutils NEWS option of the same name, this makes uniq consume and produce NUL-terminated lines rather than newline-terminated lines. + In multibyte locales, if you run wc on a binary file, only a small + number of multibyte character conversion error messages will be + produced, along with a count of the total number of errors. This + makes running wc on binaries still irritating but not disastrous. + ** Bug fixes ls -x DIR would sometimes output the wrong string in place of the Index: src/wc.c =================================================================== RCS file: /sources/coreutils/coreutils/src/wc.c,v retrieving revision 1.114 diff -u -p -r1.114 wc.c --- src/wc.c 28 Mar 2007 06:57:40 -0000 1.114 +++ src/wc.c 22 May 2007 01:15:17 -0000 @@ -275,6 +275,8 @@ wc (int fd, char const *file_x, struct f uintmax_t linepos = 0; mbstate_t state = { 0, }; uintmax_t last_error_line = 0; + uintmax_t error_limit = 5; + uintmax_t error_count = 0; int last_error_errno = 0; # if SUPPORT_OLD_MBRTOWC /* Back-up the state before each multibyte character conversion and @@ -327,12 +329,16 @@ wc (int fd, char const *file_x, struct f if (!(lines + 1 == last_error_line && errno == last_error_errno)) { - char line_number_buf[INT_BUFSIZE_BOUND (uintmax_t)]; - last_error_line = lines + 1; - last_error_errno = errno; - error (0, errno, "%s:%s", file, - umaxtostr (last_error_line, line_number_buf)); - ok = false; + if (error_limit == 0 || + (error_count++ < error_limit)) + { + char line_number_buf[INT_BUFSIZE_BOUND (uintmax_t)]; + last_error_line = lines + 1; + last_error_errno = errno; + error (0, errno, "%s:%s", file, + umaxtostr (last_error_line, line_number_buf)); + ok = false; + } } p++; bytes_read--; @@ -402,6 +408,16 @@ wc (int fd, char const *file_x, struct f if (linepos > linelength) linelength = linepos; words += in_word; + + if (error_limit && (error_count > error_limit)) + { + char suppression_buf[INT_BUFSIZE_BOUND (uintmax_t)]; + const uintmax_t suppressed = error_count - error_limit; + error (0, 0, + _("%s more multibyte conversion error messages " + "were suppressed"), + umaxtostr (suppressed, suppression_buf)); + } } #endif else