[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
master faf996d 1/2: Fix decoding ASCII strings with embedded CR characte
From: |
Eli Zaretskii |
Subject: |
master faf996d 1/2: Fix decoding ASCII strings with embedded CR characters |
Date: |
Thu, 9 Apr 2020 05:22:01 -0400 (EDT) |
branch: master
commit faf996dc6e963a8dd74e9e794ded0467dd78ea18
Author: Eli Zaretskii <address@hidden>
Commit: Eli Zaretskii <address@hidden>
Fix decoding ASCII strings with embedded CR characters
* src/coding.c (string_ascii_p): Return a negative value if an
all-ASCII string STR includes the CR character, otherwise a
positive value.
(code_convert_string): If the string is ASCII, but includes CR
characters, use the fast path only if EOL doesn't need to be
decoded. (Bug#40519)
* test/src/coding-tests.el (coding-nocopy-ascii): Add tests for
bug#40519.
---
src/coding.c | 37 ++++++++++++++++++++++++++-----------
test/src/coding-tests.el | 17 +++++++++++++++++
2 files changed, 43 insertions(+), 11 deletions(-)
diff --git a/src/coding.c b/src/coding.c
index 49c1e62..24a832f 100644
--- a/src/coding.c
+++ b/src/coding.c
@@ -9471,15 +9471,22 @@ not fully specified.) */)
return code_convert_region (start, end, coding_system, destination, 1, 0);
}
-/* Whether a string only contains chars in the 0..127 range. */
-static bool
+/* Non-zero if STR contains only characterss in the 0..127 range.
+ Positive if STR includes characters that don't need EOL conversion
+ on decoding, negative otherwise. */
+static int
string_ascii_p (Lisp_Object str)
{
ptrdiff_t nbytes = SBYTES (str);
+ bool CR_Seen = false;
for (ptrdiff_t i = 0; i < nbytes; i++)
- if (SREF (str, i) > 127)
- return false;
- return true;
+ {
+ if (SREF (str, i) > 127)
+ return 0;
+ if (SREF (str, i) == '\r')
+ CR_Seen = true;
+ }
+ return CR_Seen ? -1 : 1;
}
Lisp_Object
@@ -9517,15 +9524,23 @@ code_convert_string (Lisp_Object string, Lisp_Object
coding_system,
{
/* Fast path for ASCII-only input and an ASCII-compatible coding:
act as identity. */
+ int ascii_p;
Lisp_Object attrs = CODING_ID_ATTRS (coding.id);
if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs))
&& (STRING_MULTIBYTE (string)
- ? (chars == bytes) : string_ascii_p (string)))
- return (nocopy
- ? string
- : (encodep
- ? make_unibyte_string (SSDATA (string), bytes)
- : make_multibyte_string (SSDATA (string), bytes, bytes)));
+ ? (chars == bytes) : ((ascii_p = string_ascii_p (string)) != 0)))
+ {
+ if (ascii_p > 0
+ || (ascii_p < 0
+ && (EQ (CODING_ID_EOL_TYPE (coding.id), Qunix)
+ || inhibit_eol_conversion)))
+ return (nocopy
+ ? string
+ : (encodep
+ ? make_unibyte_string (SSDATA (string), bytes)
+ : make_multibyte_string (SSDATA (string),
+ bytes, bytes)));
+ }
}
else if (BUFFERP (dst_object))
{
diff --git a/test/src/coding-tests.el b/test/src/coding-tests.el
index 93e6709..83a06b8 100644
--- a/test/src/coding-tests.el
+++ b/test/src/coding-tests.el
@@ -388,6 +388,23 @@
(let* ((uni (apply #'string (number-sequence 0 127)))
(multi (string-to-multibyte uni)))
(dolist (s (list uni multi))
+ (dolist (coding '(us-ascii-unix iso-latin-1-unix utf-8-unix))
+ (should-not (eq (decode-coding-string s coding nil) s))
+ (should-not (eq (encode-coding-string s coding nil) s))
+ (should (eq (decode-coding-string s coding t) s))
+ (should (eq (encode-coding-string s coding t) s)))))
+ (let* ((uni (apply #'string (number-sequence 15 127)))
+ (multi (string-to-multibyte uni)))
+ (dolist (s (list uni multi))
+ (dolist (coding '(us-ascii iso-latin-1 utf-8))
+ (should-not (eq (decode-coding-string s coding nil) s))
+ (should-not (eq (encode-coding-string s coding nil) s))
+ (should (eq (decode-coding-string s coding t) s))
+ (should (eq (encode-coding-string s coding t) s)))))
+ (let* ((uni (apply #'string (number-sequence 0 127)))
+ (multi (string-to-multibyte uni))
+ (inhibit-eol-conversion t))
+ (dolist (s (list uni multi))
(dolist (coding '(us-ascii iso-latin-1 utf-8))
(should-not (eq (decode-coding-string s coding nil) s))
(should-not (eq (encode-coding-string s coding nil) s))