emacs-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH] Improve error reporting when serializing non-Unicode strings to


From: Philipp Stephani
Subject: [PATCH] Improve error reporting when serializing non-Unicode strings to JSON
Date: Sat, 23 Dec 2017 17:58:57 +0100

* src/coding.h (EOL_SEEN_NONE, EOL_SEEN_LF, EOL_SEEN_CR)
(EOL_SEEN_CRLF): Move from coding.c.

* src/coding.c (check_utf_8): Make extern.

* src/json.c (json_check_utf8): New helper function.
(lisp_to_json_toplevel_1, lisp_to_json): Use it.  To save a bit of
time, check for invalid UTF-8 strings only after encountering an
error, since Jansson already rejects them.

* test/src/json-tests.el (json-serialize/invalid-unicode): Adapt
expected error symbol.
---
 src/coding.c           | 10 +---------
 src/coding.h           |  8 ++++++++
 src/json.c             | 42 ++++++++++++++++++++++++++++++++++++------
 test/src/json-tests.el | 10 ++++------
 4 files changed, 49 insertions(+), 21 deletions(-)

diff --git a/src/coding.c b/src/coding.c
index 1705838ffa..b5cdafee4b 100644
--- a/src/coding.c
+++ b/src/coding.c
@@ -1114,14 +1114,6 @@ alloc_destination (struct coding_system *coding, 
ptrdiff_t nbytes,
     *buf++ = id;                                                       \
   } while (0)
 
-
-/* Bitmasks for coding->eol_seen.  */
-
-#define EOL_SEEN_NONE  0
-#define EOL_SEEN_LF    1
-#define EOL_SEEN_CR    2
-#define EOL_SEEN_CRLF  4
-
 
 /*** 2. Emacs' internal format (emacs-utf-8) ***/
 
@@ -6266,7 +6258,7 @@ check_ascii (struct coding_system *coding)
    the value is reliable only when all the source bytes are valid
    UTF-8.  */
 
-static ptrdiff_t
+ptrdiff_t
 check_utf_8 (struct coding_system *coding)
 {
   const unsigned char *src, *end;
diff --git a/src/coding.h b/src/coding.h
index 66d125b07e..314d044def 100644
--- a/src/coding.h
+++ b/src/coding.h
@@ -662,9 +662,17 @@ struct coding_system
 /* Note that this encodes utf-8, not utf-8-emacs, so it's not a no-op.  */
 #define ENCODE_UTF_8(str) code_convert_string_norecord (str, Qutf_8, true)
 
+/* Bitmasks for coding->eol_seen.  */
+
+#define EOL_SEEN_NONE  0
+#define EOL_SEEN_LF    1
+#define EOL_SEEN_CR    2
+#define EOL_SEEN_CRLF  4
+
 /* Extern declarations.  */
 extern Lisp_Object code_conversion_save (bool, bool);
 extern bool encode_coding_utf_8 (struct coding_system *);
+extern ptrdiff_t check_utf_8 (struct coding_system *);
 extern void setup_coding_system (Lisp_Object, struct coding_system *);
 extern Lisp_Object coding_charset_list (struct coding_system *);
 extern Lisp_Object coding_system_charset_list (Lisp_Object);
diff --git a/src/json.c b/src/json.c
index 689f6ac510..fc2265a793 100644
--- a/src/json.c
+++ b/src/json.c
@@ -313,6 +313,26 @@ json_check (json_t *object)
   return object;
 }
 
+/* If STRING is not a valid UTF-8 string, signal an error of type
+   `wrong-type-argument'.  STRING must be a unibyte string.  */
+
+static void
+json_check_utf8 (Lisp_Object string)
+{
+  eassert (!STRING_MULTIBYTE (string));
+  struct coding_system coding;
+  setup_coding_system (Qutf_8_unix, &coding);
+  /* We initialize only the fields that check_utf_8 accesses.  */
+  coding.head_ascii = -1;
+  coding.src_pos = 0;
+  coding.src_pos_byte = 0;
+  coding.src_chars = SCHARS (string);
+  coding.src_bytes = SBYTES (string);
+  coding.src_object = string;
+  coding.eol_seen = EOL_SEEN_NONE;
+  CHECK_TYPE (check_utf_8 (&coding) != -1, Qutf_8_string_p, string);
+}
+
 static json_t *lisp_to_json (Lisp_Object);
 
 /* Convert a Lisp object to a toplevel JSON object (array or object).
@@ -355,9 +375,12 @@ lisp_to_json_toplevel_1 (Lisp_Object lisp, json_t **json)
             int status = json_object_set_new (*json, SSDATA (key),
                                               lisp_to_json (HASH_VALUE (h, 
i)));
             if (status == -1)
-              /* FIXME: A failure here might also indicate that the
-                 key is not a valid Unicode string.  */
-              json_out_of_memory ();
+              {
+                /* A failure can be caused either by an invalid key or
+                   by low memory.  */
+                json_check_utf8 (key);
+                json_out_of_memory ();
+              }
           }
       clear_unwind_protect (count);
       return unbind_to (count, Qnil);
@@ -403,9 +426,15 @@ lisp_to_json (Lisp_Object lisp)
   else if (STRINGP (lisp))
     {
       Lisp_Object encoded = json_encode (lisp);
-      /* FIXME: We might throw an out-of-memory error here if the
-         string is not valid Unicode.  */
-      return json_check (json_stringn (SSDATA (encoded), SBYTES (encoded)));
+      json_t *json = json_stringn (SSDATA (encoded), SBYTES (encoded));
+      if (json == NULL)
+        {
+          /* A failure can be caused either by an invalid string or by
+             low memory.  */
+          json_check_utf8 (encoded);
+          json_out_of_memory ();
+        }
+      return json;
     }
 
   /* LISP now must be a vector or hashtable.  */
@@ -818,6 +847,7 @@ syms_of_json (void)
 
   DEFSYM (Qstring_without_embedded_nulls_p, "string-without-embedded-nulls-p");
   DEFSYM (Qjson_value_p, "json-value-p");
+  DEFSYM (Qutf_8_string_p, "utf-8-string-p");
 
   DEFSYM (Qutf_8_unix, "utf-8-unix");
 
diff --git a/test/src/json-tests.el b/test/src/json-tests.el
index 9884e9a2d5..9bdb639423 100644
--- a/test/src/json-tests.el
+++ b/test/src/json-tests.el
@@ -84,12 +84,10 @@
 
 (ert-deftest json-serialize/invalid-unicode ()
   (skip-unless (fboundp 'json-serialize))
-  ;; FIXME: "out of memory" is the wrong error signal, but we don't
-  ;; currently distinguish between error types when serializing.
-  (should-error (json-serialize ["a\uDBBBb"]) :type 'json-out-of-memory)
-  (should-error (json-serialize ["u\x110000v"]) :type 'json-out-of-memory)
-  (should-error (json-serialize ["u\x3FFFFFv"]) :type 'json-out-of-memory)
-  (should-error (json-serialize ["u\xCCv"]) :type 'json-out-of-memory))
+  (should-error (json-serialize ["a\uDBBBb"]) :type 'wrong-type-argument)
+  (should-error (json-serialize ["u\x110000v"]) :type 'wrong-type-argument)
+  (should-error (json-serialize ["u\x3FFFFFv"]) :type 'wrong-type-argument)
+  (should-error (json-serialize ["u\xCCv"]) :type 'wrong-type-argument))
 
 (ert-deftest json-parse-string/null ()
   (skip-unless (fboundp 'json-parse-string))
-- 
2.15.1




reply via email to

[Prev in Thread] Current Thread [Next in Thread]