>From c985544e68d1a1c9d231d2f2db03126f9af51ad6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A1draig=20Brady?= Date: Sun, 11 Apr 2021 16:24:07 +0100 Subject: [PATCH] doc: clarify what's counted by wc * src/wc.c (usage): State that only printable characters are considered when counting words. This also disambiguates wether we're talking about bytes or characters in this context. * doc/coreutils.texi (wc invocation): Likewise. Also clarify that --characters counts valid locale aware characters, and that --lines does not count a trailing "line" unless it ends with a newline character. Fixes https://bugs.gnu.org/47702 --- doc/coreutils.texi | 17 +++++++++++------ src/wc.c | 2 +- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/doc/coreutils.texi b/doc/coreutils.texi index e53c0de6e..cd10b0d4d 100644 --- a/doc/coreutils.texi +++ b/doc/coreutils.texi @@ -3754,9 +3754,10 @@ contents of files. @cindex word count @cindex line count -@command{wc} counts the number of bytes, characters, whitespace-separated -words, and newlines in each given @var{file}, or standard input if none -are given or for a @var{file} of @samp{-}. Synopsis: +@command{wc} counts the number of bytes, characters, words, and newlines +in each given @var{file}, or standard input if none are given +or for a @var{file} of @samp{-}. A word is a nonzero length +sequence of printable characters delimited by white space. Synopsis: @example wc [@var{option}]@dots{} [@var{file}]@dots{} @@ -3807,19 +3808,23 @@ Print only the byte counts. @itemx --chars @opindex -m @opindex --chars -Print only the character counts. +Print only the character counts, as per the current locale. +Invalid characters are not counted. @item -w @itemx --words @opindex -w @opindex --words -Print only the word counts. +Print only the word counts. A word is a nonzero length +sequence of printable characters separated by white space. @item -l @itemx --lines @opindex -l @opindex --lines -Print only the newline counts. +Print only the newline character counts. +Note a file without a trailing newline character, +will not have that last portion included in the line count. @item -L @itemx --max-line-length diff --git a/src/wc.c b/src/wc.c index 5216db189..263ba30e8 100644 --- a/src/wc.c +++ b/src/wc.c @@ -123,7 +123,7 @@ Usage: %s [OPTION]... [FILE]...\n\ fputs (_("\ Print newline, word, and byte counts for each FILE, and a total line if\n\ more than one FILE is specified. A word is a non-zero-length sequence of\n\ -characters delimited by white space.\n\ +printable characters delimited by white space.\n\ "), stdout); emit_stdin_note (); -- 2.26.2