>From 2952343144654eaac158ef65292d885a61759e75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A1draig=20Brady?= Date: Sat, 7 Aug 2021 18:47:57 +0100 Subject: [PATCH] cat: with -E fix handling of \r\n spanning buffers We must delay handling when \r is the last character of the buffer being processed, as the next character may or may not be \n. * src/cat.c (pending_cr): A new global to record whether the last character processed (in -E mode) is '\r'. (cat): Honor pending_cr when processing the start of the buffer. (main): Honor pending_cr if no more files to process. * tests/misc/cat-E.sh: Add test cases. Fixes https://bugs.gnu.org/49925 --- src/cat.c | 39 +++++++++++++++++++++++++++++++++------ tests/misc/cat-E.sh | 16 ++++++++++++++-- 2 files changed, 47 insertions(+), 8 deletions(-) diff --git a/src/cat.c b/src/cat.c index 28f6e8e4e..17bc4fab9 100644 --- a/src/cat.c +++ b/src/cat.c @@ -78,6 +78,9 @@ static char *line_num_end = line_buf + LINE_COUNTER_BUF_LEN - 3; /* Preserves the 'cat' function's local 'newlines' between invocations. */ static int newlines2 = 0; +/* Whether there is a pending CR to process. */ +static bool pending_cr = false; + void usage (int status) { @@ -397,9 +400,16 @@ cat ( } /* Output a currency symbol if requested (-e). */ - if (show_ends) - *bpout++ = '$'; + { + if (pending_cr) + { + *bpout++ = '^'; + *bpout++ = 'M'; + pending_cr = false; + } + *bpout++ = '$'; + } /* Output the newline. */ @@ -409,6 +419,14 @@ cat ( } while (ch == '\n'); + /* Here CH cannot contain a newline character. */ + + if (pending_cr) + { + *bpout++ = '\r'; + pending_cr = false; + } + /* Are we at the beginning of a line, and line numbers are requested? */ if (newlines >= 0 && number) @@ -417,8 +435,6 @@ cat ( bpout = stpcpy (bpout, line_num_print); } - /* Here CH cannot contain a newline character. */ - /* The loops below continue until a newline character is found, which means that the buffer is empty or that a proper newline has been found. */ @@ -489,8 +505,13 @@ cat ( { if (ch == '\r' && *bpin == '\n' && show_ends) { - *bpout++ = '^'; - *bpout++ = 'M'; + if (bpin == eob) + pending_cr = true; + else + { + *bpout++ = '^'; + *bpout++ = 'M'; + } } else *bpout++ = ch; @@ -768,6 +789,12 @@ main (int argc, char **argv) } while (++argind < argc); + if (pending_cr) + { + if (full_write (STDOUT_FILENO, "\r", 1) != 1) + die (EXIT_FAILURE, errno, _("write error")); + } + if (have_read_stdin && close (STDIN_FILENO) < 0) die (EXIT_FAILURE, errno, _("closing standard input")); diff --git a/tests/misc/cat-E.sh b/tests/misc/cat-E.sh index 401b6d591..1131eb3a5 100755 --- a/tests/misc/cat-E.sh +++ b/tests/misc/cat-E.sh @@ -21,10 +21,22 @@ print_ver_ cat # Up to and including 8.32 the $ would have displayed at the start of the line # overwriting the first character printf 'a\rb\r\nc\n\r\nd\r' > 'in' || framework_failure_ -printf 'a\rb^M$\nc$\n^M$\nd^M' > 'exp' || framework_failure_ - +printf 'a\rb^M$\nc$\n^M$\nd\r' > 'exp' || framework_failure_ cat -E 'in' > out || fail=1 +compare exp out || fail=1 + +# Ensure \r\n spanning files (or buffers) is handled +printf '1\r' > in2 || framework_failure_ +printf '\n2\r\n' > in2b || framework_failure_ +printf '1^M$\n2^M$\n' > 'exp' || framework_failure_ +cat -E 'in2' 'in2b' > out || fail=1 +compare exp out || fail=1 +# Ensure \r at end of buffer is handled +printf '1\r' > in2 || framework_failure_ +printf '2\r\n' > in2b || framework_failure_ +printf '1\r2^M$\n' > 'exp' || framework_failure_ +cat -E 'in2' 'in2b' > out || fail=1 compare exp out || fail=1 Exit $fail -- 2.26.2