qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH 28/56] json: Fix \uXXXX for surrogate pairs


From: Markus Armbruster
Subject: [Qemu-devel] [PATCH 28/56] json: Fix \uXXXX for surrogate pairs
Date: Wed, 8 Aug 2018 14:03:06 +0200

The JSON parser treats each half of a surrogate pair as unpaired
surrogate.  Fix it to recognize surrogate pairs.

Signed-off-by: Markus Armbruster <address@hidden>
---
 qobject/json-parser.c | 16 +++++++++++++++-
 tests/check-qjson.c   |  3 +--
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/qobject/json-parser.c b/qobject/json-parser.c
index bb54886809..703065fa2b 100644
--- a/qobject/json-parser.c
+++ b/qobject/json-parser.c
@@ -115,7 +115,7 @@ static QString *parse_string(JSONParserContext *ctxt, 
JSONToken *token)
     const char *ptr = token->str;
     QString *str;
     char quote;
-    int cp, i;
+    int cp, i, leading_surrogate;
     char *end;
     ssize_t len;
     char utf8_buf[5];
@@ -156,6 +156,8 @@ static QString *parse_string(JSONParserContext *ctxt, 
JSONToken *token)
                 qstring_append_chr(str, '\t');
                 break;
             case 'u':
+                leading_surrogate = 0;
+            hex:
                 cp = 0;
                 for (i = 0; i < 4; i++) {
                     ptr++;
@@ -168,6 +170,18 @@ static QString *parse_string(JSONParserContext *ctxt, 
JSONToken *token)
                     cp |= hex2decimal(*ptr);
                 }
 
+                if (cp >= 0xD800 && cp <= 0xDBFF && !leading_surrogate
+                    && ptr[1] == '\\' && ptr[2] == 'u') {
+                    ptr += 2;
+                    leading_surrogate = cp;
+                    goto hex;
+                }
+                if (cp >= 0xDC00 && cp <= 0xDFFF && leading_surrogate) {
+                    cp &= 0x3FF;
+                    cp |= (leading_surrogate & 0x3FF) << 10;
+                    cp += 0x010000;
+                }
+
                 if (mod_utf8_encode(utf8_buf, sizeof(utf8_buf), cp) < 0) {
                     parse_error(ctxt, token,
                                 "\\u%.4s is not a valid Unicode character",
diff --git a/tests/check-qjson.c b/tests/check-qjson.c
index 422697459f..3d3a3f105f 100644
--- a/tests/check-qjson.c
+++ b/tests/check-qjson.c
@@ -61,8 +61,7 @@ static void escaped_string(void)
         { "double byte utf-8 \\u00A2", "double byte utf-8 \xc2\xa2" },
         { "triple byte utf-8 \\u20AC", "triple byte utf-8 \xe2\x82\xac" },
         { "quadruple byte utf-8 \\uD834\\uDD1E", /* U+1D11E */
-          /* bug: want \xF0\x9D\x84\x9E */
-          NULL },
+          "quadruple byte utf-8 \xF0\x9D\x84\x9E" },
         { "\\z", NULL },
         { "\\ux", NULL },
         { "\\u1x", NULL },
-- 
2.17.1




reply via email to

[Prev in Thread] Current Thread [Next in Thread]