From 17a1a37549344cdfd95cc84b1848dafa256be5a0 Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Sun, 22 Jul 2018 08:09:01 -0700 Subject: [PATCH] df: avoid multibyte character corruption on macOS Problem reported by Chih-Hsuan Yen (Bug#32236). * NEWS: Mention the bug fix. * src/df.c: Include wchar.h and wctype.h instead of mbswidth.h. (hide_problematic_chars): Return number of screen columns. All callers changed. Use iswcntrl, not iscntrl. (get_header, get_dev): Rely on hide_problematic_chars width, not mbswidth. Scan the cell once, instead of two or three times. --- NEWS | 4 ++++ src/df.c | 46 +++++++++++++++++++++++++++++++--------------- 2 files changed, 35 insertions(+), 15 deletions(-) diff --git a/NEWS b/NEWS index af1a990..aa3b4f9 100644 --- a/NEWS +++ b/NEWS @@ -2,6 +2,10 @@ GNU coreutils NEWS -*- outline -*- * Noteworthy changes in release ?.? (????-??-??) [?] +** Bug fixes + + df no longer corrupts displayed multibyte characters on macOS. + * Noteworthy changes in release 8.30 (2018-07-01) [stable] diff --git a/src/df.c b/src/df.c index 1178865..664b88b 100644 --- a/src/df.c +++ b/src/df.c @@ -23,6 +23,8 @@ #include #include #include +#include +#include #include "system.h" #include "canonicalize.h" @@ -31,7 +33,6 @@ #include "fsusage.h" #include "human.h" #include "mbsalign.h" -#include "mbswidth.h" #include "mountlist.h" #include "quote.h" #include "find-mount-point.h" @@ -272,20 +273,36 @@ static struct option const long_options[] = }; /* Replace problematic chars with '?'. - Since only control characters are currently considered, - this should work in all encodings. */ + Return the number of screen columns. */ -static char* +static size_t hide_problematic_chars (char *cell) { - char *p = cell; - while (*p) + char *srcend = cell + strlen (cell); + char *dst = cell; + mbstate_t mbstate = { 0, }; + size_t n; + size_t width = 0; + + for (char *src = cell; src != srcend; src += n) { - if (iscntrl (to_uchar (*p))) - *p = '?'; - p++; + wchar_t wc; + n = mbrtowc (&wc, src, srcend - src, &mbstate); + if (n < (size_t) -2 && !iswcntrl (wc)) + { + memcpy (dst, src, n); + dst += n; + } + else + { + *dst++ = '?'; + memset (&mbstate, 0, sizeof mbstate); + } + width++; } - return cell; + + *dst = '\0'; + return width; } /* Dynamically allocate a row of pointers in TABLE, which @@ -569,11 +586,10 @@ get_header (void) if (!cell) xalloc_die (); - hide_problematic_chars (cell); - table[nrows - 1][col] = cell; - columns[col]->width = MAX (columns[col]->width, mbswidth (cell, 0)); + size_t cell_width = hide_problematic_chars (cell); + columns[col]->width = MAX (columns[col]->width, cell_width); } } @@ -1182,8 +1198,8 @@ get_dev (char const *disk, char const *mount_point, char const* file, if (!cell) assert (!"empty cell"); - hide_problematic_chars (cell); - columns[col]->width = MAX (columns[col]->width, mbswidth (cell, 0)); + size_t cell_width = hide_problematic_chars (cell); + columns[col]->width = MAX (columns[col]->width, cell_width); table[nrows - 1][col] = cell; } free (dev_name); -- 2.7.4