]> Git Repo - qemu.git/blobdiff - qobject/json-parser.c
json: Simplify parse_string()
[qemu.git] / qobject / json-parser.c
index b77931614bf2743292e2790b77281d40f45d0e70..9cb363f7e1df2433c0d22894253eb7646865a2ec 100644 (file)
@@ -101,40 +101,49 @@ static int hex2decimal(char ch)
     } else if (ch >= 'A' && ch <= 'F') {
         return 10 + (ch - 'A');
     }
-
-    return -1;
+    abort();
 }
 
 /**
- * parse_string(): Parse a json string and return a QObject
+ * parse_string(): Parse a JSON string
+ *
+ * From RFC 8259 "The JavaScript Object Notation (JSON) Data
+ * Interchange Format":
+ *
+ *    char = unescaped /
+ *        escape (
+ *            %x22 /          ; "    quotation mark  U+0022
+ *            %x5C /          ; \    reverse solidus U+005C
+ *            %x2F /          ; /    solidus         U+002F
+ *            %x62 /          ; b    backspace       U+0008
+ *            %x66 /          ; f    form feed       U+000C
+ *            %x6E /          ; n    line feed       U+000A
+ *            %x72 /          ; r    carriage return U+000D
+ *            %x74 /          ; t    tab             U+0009
+ *            %x75 4HEXDIG )  ; uXXXX                U+XXXX
+ *    escape = %x5C              ; \
+ *    quotation-mark = %x22      ; "
+ *    unescaped = %x20-21 / %x23-5B / %x5D-10FFFF
  *
- *  string
- *      ""
- *      " chars "
- *  chars
- *      char
- *      char chars
- *  char
- *      any-Unicode-character-
- *          except-"-or-\-or-
- *          control-character
- *      \"
- *      \\
- *      \/
- *      \b
- *      \f
- *      \n
- *      \r
- *      \t
- *      \u four-hex-digits 
+ * Extensions over RFC 8259:
+ * - Extra escape sequence in strings:
+ *   0x27 (apostrophe) is recognized after escape, too
+ * - Single-quoted strings:
+ *   Like double-quoted strings, except they're delimited by %x27
+ *   (apostrophe) instead of %x22 (quotation mark), and can't contain
+ *   unescaped apostrophe, but can contain unescaped quotation mark.
+ *
+ * Note:
+ * - Encoding is modified UTF-8.
+ * - Invalid Unicode characters are rejected.
+ * - Control characters \x00..\x1F are rejected by the lexer.
  */
-static QString *qstring_from_escaped_str(JSONParserContext *ctxt,
-                                         JSONToken *token)
+static QString *parse_string(JSONParserContext *ctxt, JSONToken *token)
 {
     const char *ptr = token->str;
     QString *str;
     char quote;
-    int cp;
+    int cp, i;
     char *end;
     ssize_t len;
     char utf8_buf[5];
@@ -149,58 +158,55 @@ static QString *qstring_from_escaped_str(JSONParserContext *ctxt,
             ptr++;
             switch (*ptr++) {
             case '"':
-                qstring_append(str, "\"");
+                qstring_append_chr(str, '"');
                 break;
             case '\'':
-                qstring_append(str, "'");
+                qstring_append_chr(str, '\'');
                 break;
             case '\\':
-                qstring_append(str, "\\");
+                qstring_append_chr(str, '\\');
                 break;
             case '/':
-                qstring_append(str, "/");
+                qstring_append_chr(str, '/');
                 break;
             case 'b':
-                qstring_append(str, "\b");
+                qstring_append_chr(str, '\b');
                 break;
             case 'f':
-                qstring_append(str, "\f");
+                qstring_append_chr(str, '\f');
                 break;
             case 'n':
-                qstring_append(str, "\n");
+                qstring_append_chr(str, '\n');
                 break;
             case 'r':
-                qstring_append(str, "\r");
+                qstring_append_chr(str, '\r');
                 break;
             case 't':
-                qstring_append(str, "\t");
+                qstring_append_chr(str, '\t');
                 break;
-            case 'u': {
-                uint16_t unicode_char = 0;
-                char utf8_char[4];
-                int i = 0;
-
+            case 'u':
+                cp = 0;
                 for (i = 0; i < 4; i++) {
-                    if (qemu_isxdigit(*ptr)) {
-                        unicode_char |= hex2decimal(*ptr) << ((3 - i) * 4);
-                    } else {
+                    if (!qemu_isxdigit(*ptr)) {
                         parse_error(ctxt, token,
                                     "invalid hex escape sequence in string");
                         goto out;
                     }
+                    cp <<= 4;
+                    cp |= hex2decimal(*ptr);
                     ptr++;
                 }
 
-                wchar_to_utf8(unicode_char, utf8_char, sizeof(utf8_char));
-                qstring_append(str, utf8_char);
-            }   break;
+                wchar_to_utf8(cp, utf8_buf, sizeof(utf8_buf));
+                qstring_append(str, utf8_buf);
+                break;
             default:
                 parse_error(ctxt, token, "invalid escape sequence in string");
                 goto out;
             }
         } else {
             cp = mod_utf8_codepoint(ptr, 6, &end);
-            if (cp <= 0) {
+            if (cp < 0) {
                 parse_error(ctxt, token, "invalid UTF-8 sequence in string");
                 goto out;
             }
@@ -495,7 +501,7 @@ static QObject *parse_literal(JSONParserContext *ctxt)
 
     switch (token->type) {
     case JSON_STRING:
-        return QOBJECT(qstring_from_escaped_str(ctxt, token));
+        return QOBJECT(parse_string(ctxt, token));
     case JSON_INTEGER: {
         /*
          * Represent JSON_INTEGER as QNUM_I64 if possible, else as
This page took 0.028138 seconds and 4 git commands to generate.