} else if (ch >= 'A' && ch <= 'F') {
return 10 + (ch - 'A');
}
-
- return -1;
+ abort();
}
/**
- * parse_string(): Parse a json string and return a QObject
+ * parse_string(): Parse a JSON string
+ *
+ * From RFC 8259 "The JavaScript Object Notation (JSON) Data
+ * Interchange Format":
+ *
+ * char = unescaped /
+ * escape (
+ * %x22 / ; " quotation mark U+0022
+ * %x5C / ; \ reverse solidus U+005C
+ * %x2F / ; / solidus U+002F
+ * %x62 / ; b backspace U+0008
+ * %x66 / ; f form feed U+000C
+ * %x6E / ; n line feed U+000A
+ * %x72 / ; r carriage return U+000D
+ * %x74 / ; t tab U+0009
+ * %x75 4HEXDIG ) ; uXXXX U+XXXX
+ * escape = %x5C ; \
+ * quotation-mark = %x22 ; "
+ * unescaped = %x20-21 / %x23-5B / %x5D-10FFFF
*
- * string
- * ""
- * " chars "
- * chars
- * char
- * char chars
- * char
- * any-Unicode-character-
- * except-"-or-\-or-
- * control-character
- * \"
- * \\
- * \/
- * \b
- * \f
- * \n
- * \r
- * \t
- * \u four-hex-digits
+ * Extensions over RFC 8259:
+ * - Extra escape sequence in strings:
+ * 0x27 (apostrophe) is recognized after escape, too
+ * - Single-quoted strings:
+ * Like double-quoted strings, except they're delimited by %x27
+ * (apostrophe) instead of %x22 (quotation mark), and can't contain
+ * unescaped apostrophe, but can contain unescaped quotation mark.
+ *
+ * Note:
+ * - Encoding is modified UTF-8.
+ * - Invalid Unicode characters are rejected.
+ * - Control characters \x00..\x1F are rejected by the lexer.
*/
-static QString *qstring_from_escaped_str(JSONParserContext *ctxt,
- JSONToken *token)
+static QString *parse_string(JSONParserContext *ctxt, JSONToken *token)
{
const char *ptr = token->str;
QString *str;
char quote;
- int cp;
+ int cp, i;
char *end;
ssize_t len;
char utf8_buf[5];
ptr++;
switch (*ptr++) {
case '"':
- qstring_append(str, "\"");
+ qstring_append_chr(str, '"');
break;
case '\'':
- qstring_append(str, "'");
+ qstring_append_chr(str, '\'');
break;
case '\\':
- qstring_append(str, "\\");
+ qstring_append_chr(str, '\\');
break;
case '/':
- qstring_append(str, "/");
+ qstring_append_chr(str, '/');
break;
case 'b':
- qstring_append(str, "\b");
+ qstring_append_chr(str, '\b');
break;
case 'f':
- qstring_append(str, "\f");
+ qstring_append_chr(str, '\f');
break;
case 'n':
- qstring_append(str, "\n");
+ qstring_append_chr(str, '\n');
break;
case 'r':
- qstring_append(str, "\r");
+ qstring_append_chr(str, '\r');
break;
case 't':
- qstring_append(str, "\t");
+ qstring_append_chr(str, '\t');
break;
- case 'u': {
- uint16_t unicode_char = 0;
- char utf8_char[4];
- int i = 0;
-
+ case 'u':
+ cp = 0;
for (i = 0; i < 4; i++) {
- if (qemu_isxdigit(*ptr)) {
- unicode_char |= hex2decimal(*ptr) << ((3 - i) * 4);
- } else {
+ if (!qemu_isxdigit(*ptr)) {
parse_error(ctxt, token,
"invalid hex escape sequence in string");
goto out;
}
+ cp <<= 4;
+ cp |= hex2decimal(*ptr);
ptr++;
}
- wchar_to_utf8(unicode_char, utf8_char, sizeof(utf8_char));
- qstring_append(str, utf8_char);
- } break;
+ wchar_to_utf8(cp, utf8_buf, sizeof(utf8_buf));
+ qstring_append(str, utf8_buf);
+ break;
default:
parse_error(ctxt, token, "invalid escape sequence in string");
goto out;
}
} else {
cp = mod_utf8_codepoint(ptr, 6, &end);
- if (cp <= 0) {
+ if (cp < 0) {
parse_error(ctxt, token, "invalid UTF-8 sequence in string");
goto out;
}
switch (token->type) {
case JSON_STRING:
- return QOBJECT(qstring_from_escaped_str(ctxt, token));
+ return QOBJECT(parse_string(ctxt, token));
case JSON_INTEGER: {
/*
* Represent JSON_INTEGER as QNUM_I64 if possible, else as