*
* Authors:
*
* This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
* See the COPYING.LIB file in the top-level directory.
{ "\"single byte utf-8 \\u0020\"", "single byte utf-8 ", .skip = 1 },
{ "\"double byte utf-8 \\u00A2\"", "double byte utf-8 \xc2\xa2" },
{ "\"triple byte utf-8 \\u20AC\"", "triple byte utf-8 \xe2\x82\xac" },
+ { "'\\b'", "\b", .skip = 1 },
+ { "'\\f'", "\f", .skip = 1 },
+ { "'\\n'", "\n", .skip = 1 },
+ { "'\\r'", "\r", .skip = 1 },
+ { "'\\t'", "\t", .skip = 1 },
+ { "'\\/'", "/", .skip = 1 },
+ { "'\\\\'", "\\", .skip = 1 },
{}
};
* The JSON parser rejects some invalid sequences, but accepts
* others without correcting the problem.
*
- * The JSON formatter replaces some invalid sequences by U+FFFF (a
- * noncharacter), and goes wonky for others.
- *
- * For both directions, we should either reject all invalid
- * sequences, or minimize overlong sequences and replace all other
- * invalid sequences by a suitable replacement character. A
- * common choice for replacement is U+FFFD.
+ * We should either reject all invalid sequences, or minimize
+ * overlong sequences and replace all other invalid sequences by a
+ * suitable replacement character. A common choice for
+ * replacement is U+FFFD.
*
* Problem: we can't easily deal with embedded U+0000. Parsing
* the JSON string "this \\u0000" is fun" yields "this \0 is fun",
* consider using overlong encoding \xC0\x80 for U+0000 ("modified
* UTF-8").
*
- * Test cases are scraped from Markus Kuhn's UTF-8 decoder
+ * Most test cases are scraped from Markus Kuhn's UTF-8 decoder
* capability and stress test at
* http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt
*/
* - bug: rejected
* JSON parser rejects invalid sequence(s)
* We may choose to define this as feature
- * - bug: want "\"...\""
- * JSON formatter produces incorrect result, this is the
- * correct one, assuming replacement character U+FFFF
- * - bug: want "..." (no \")
+ * - bug: want "..."
* JSON parser produces incorrect result, this is the
* correct one, assuming replacement character U+FFFF
* We may choose to reject instead of replace
- * Not marked explicitly, but trivial to find:
- * - JSON formatter replacing invalid sequence by \\uFFFF is a
- * bug if we want it to fail for invalid sequences.
*/
/* 1 Some correct UTF-8 text */
{
"\"\\u0000\"",
"", /* bug: want overlong "\xC0\x80" */
- "\"\"", /* bug: want "\"\\u0000\"" */
+ "\"\\u0000\"",
+ "\xC0\x80",
},
/* 2.1.2 2 bytes U+0080 */
{
{
"\"\xF0\x90\x80\x80\"",
"\xF0\x90\x80\x80",
- "\"\\u0400\\uFFFF\"", /* bug: want "\"\\uD800\\uDC00\"" */
+ "\"\\uD800\\uDC00\"",
},
/* 2.1.5 5 bytes U+200000 */
{
"\"\xF8\x88\x80\x80\x80\"",
- NULL, /* bug: rejected */
- "\"\\u8200\\uFFFF\\uFFFF\"", /* bug: want "\"\\uFFFF\"" */
+ NULL, /* bug: rejected */
+ "\"\\uFFFD\"",
"\xF8\x88\x80\x80\x80",
},
/* 2.1.6 6 bytes U+4000000 */
{
"\"\xFC\x84\x80\x80\x80\x80\"",
- NULL, /* bug: rejected */
- "\"\\uC100\\uFFFF\\uFFFF\\uFFFF\"", /* bug: want "\"\\uFFFF\"" */
+ NULL, /* bug: rejected */
+ "\"\\uFFFD\"",
"\xFC\x84\x80\x80\x80\x80",
},
/* 2.2 Last possible sequence of a certain length */
{
"\"\x7F\"",
"\x7F",
- "\"\177\"",
+ "\"\\u007F\"",
},
/* 2.2.2 2 bytes U+07FF */
{
"\xDF\xBF",
"\"\\u07FF\"",
},
- /* 2.2.3 3 bytes U+FFFF */
+ /*
+ * 2.2.3 3 bytes U+FFFC
+ * The last possible sequence is actually U+FFFF. But that's
+ * a noncharacter, and already covered by its own test case
+ * under 5.3. Same for U+FFFE. U+FFFD is the last character
+ * in the BMP, and covered under 2.3. Because of U+FFFD's
+ * special role as replacement character, it's worth testing
+ * U+FFFC here.
+ */
{
- "\"\xEF\xBF\xBF\"",
- "\xEF\xBF\xBF",
- "\"\\uFFFF\"",
+ "\"\xEF\xBF\xBC\"",
+ "\xEF\xBF\xBC",
+ "\"\\uFFFC\"",
},
/* 2.2.4 4 bytes U+1FFFFF */
{
"\"\xF7\xBF\xBF\xBF\"",
- NULL, /* bug: rejected */
- "\"\\u7FFF\\uFFFF\"", /* bug: want "\"\\uFFFF\"" */
+ NULL, /* bug: rejected */
+ "\"\\uFFFD\"",
"\xF7\xBF\xBF\xBF",
},
/* 2.2.5 5 bytes U+3FFFFFF */
{
"\"\xFB\xBF\xBF\xBF\xBF\"",
- NULL, /* bug: rejected */
- "\"\\uBFFF\\uFFFF\\uFFFF\"", /* bug: want "\"\\uFFFF\"" */
+ NULL, /* bug: rejected */
+ "\"\\uFFFD\"",
"\xFB\xBF\xBF\xBF\xBF",
},
/* 2.2.6 6 bytes U+7FFFFFFF */
{
"\"\xFD\xBF\xBF\xBF\xBF\xBF\"",
- NULL, /* bug: rejected */
- "\"\\uDFFF\\uFFFF\\uFFFF\\uFFFF\"", /* bug: want "\"\\uFFFF\"" */
+ NULL, /* bug: rejected */
+ "\"\\uFFFD\"",
"\xFD\xBF\xBF\xBF\xBF\xBF",
},
/* 2.3 Other boundary conditions */
{
- /* U+D7FF */
+ /* last one before surrogate range: U+D7FF */
"\"\xED\x9F\xBF\"",
"\xED\x9F\xBF",
"\"\\uD7FF\"",
},
{
- /* U+E000 */
+ /* first one after surrogate range: U+E000 */
"\"\xEE\x80\x80\"",
"\xEE\x80\x80",
"\"\\uE000\"",
},
{
- /* U+FFFD */
+ /* last one in BMP: U+FFFD */
"\"\xEF\xBF\xBD\"",
"\xEF\xBF\xBD",
"\"\\uFFFD\"",
},
{
- /* U+10FFFF */
- "\"\xF4\x8F\xBF\xBF\"",
- "\xF4\x8F\xBF\xBF",
- "\"\\u43FF\\uFFFF\"", /* bug: want "\"\\uDBFF\\uDFFF\"" */
+ /* last one in last plane: U+10FFFD */
+ "\"\xF4\x8F\xBF\xBD\"",
+ "\xF4\x8F\xBF\xBD",
+ "\"\\uDBFF\\uDFFD\""
},
{
- /* U+110000 */
+ /* first one beyond Unicode range: U+110000 */
"\"\xF4\x90\x80\x80\"",
"\xF4\x90\x80\x80",
- "\"\\u4400\\uFFFF\"", /* bug: want "\"\\uFFFF\"" */
+ "\"\\uFFFD\"",
},
/* 3 Malformed sequences */
/* 3.1 Unexpected continuation bytes */
{
"\"\x80\"",
"\x80", /* bug: not corrected */
- "\"\\uFFFF\"",
+ "\"\\uFFFD\"",
},
/* 3.1.2 Last continuation byte */
{
"\"\xBF\"",
"\xBF", /* bug: not corrected */
- "\"\\uFFFF\"",
+ "\"\\uFFFD\"",
},
/* 3.1.3 2 continuation bytes */
{
"\"\x80\xBF\"",
"\x80\xBF", /* bug: not corrected */
- "\"\\uFFFF\\uFFFF\"",
+ "\"\\uFFFD\\uFFFD\"",
},
/* 3.1.4 3 continuation bytes */
{
"\"\x80\xBF\x80\"",
"\x80\xBF\x80", /* bug: not corrected */
- "\"\\uFFFF\\uFFFF\\uFFFF\"",
+ "\"\\uFFFD\\uFFFD\\uFFFD\"",
},
/* 3.1.5 4 continuation bytes */
{
"\"\x80\xBF\x80\xBF\"",
"\x80\xBF\x80\xBF", /* bug: not corrected */
- "\"\\uFFFF\\uFFFF\\uFFFF\\uFFFF\"",
+ "\"\\uFFFD\\uFFFD\\uFFFD\\uFFFD\"",
},
/* 3.1.6 5 continuation bytes */
{
"\"\x80\xBF\x80\xBF\x80\"",
"\x80\xBF\x80\xBF\x80", /* bug: not corrected */
- "\"\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF\"",
+ "\"\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\"",
},
/* 3.1.7 6 continuation bytes */
{
"\"\x80\xBF\x80\xBF\x80\xBF\"",
"\x80\xBF\x80\xBF\x80\xBF", /* bug: not corrected */
- "\"\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF\"",
+ "\"\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\"",
},
/* 3.1.8 7 continuation bytes */
{
"\"\x80\xBF\x80\xBF\x80\xBF\x80\"",
"\x80\xBF\x80\xBF\x80\xBF\x80", /* bug: not corrected */
- "\"\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF\"",
+ "\"\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\"",
},
/* 3.1.9 Sequence of all 64 possible continuation bytes */
{
"\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF"
"\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7"
"\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF",
- "\"\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF"
- "\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF"
- "\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF"
- "\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF"
- "\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF"
- "\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF"
- "\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF"
- "\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF\""
+ "\"\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD"
+ "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD"
+ "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD"
+ "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD"
+ "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD"
+ "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD"
+ "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD"
+ "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\""
},
/* 3.2 Lonely start characters */
/* 3.2.1 All 32 first bytes of 2-byte sequences, followed by space */
"\xD0 \xD1 \xD2 \xD3 \xD4 \xD5 \xD6 \xD7 "
"\xD8 \xD9 \xDA \xDB \xDC \xDD \xDE \xDF \"",
NULL, /* bug: rejected */
- "\"\\uFFFF \\uFFFF \\uFFFF \\uFFFF \\uFFFF \\uFFFF \\uFFFF \\uFFFF "
- "\\uFFFF \\uFFFF \\uFFFF \\uFFFF \\uFFFF \\uFFFF \\uFFFF \\uFFFF "
- "\\uFFFF \\uFFFF \\uFFFF \\uFFFF \\uFFFF \\uFFFF \\uFFFF \\uFFFF "
- "\\uFFFF \\uFFFF \\uFFFF \\uFFFF \\uFFFF \\uFFFF \\uFFFF \\uFFFF \"",
+ "\"\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD "
+ "\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD "
+ "\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD "
+ "\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \"",
"\xC0 \xC1 \xC2 \xC3 \xC4 \xC5 \xC6 \xC7 "
"\xC8 \xC9 \xCA \xCB \xCC \xCD \xCE \xCF "
"\xD0 \xD1 \xD2 \xD3 \xD4 \xD5 \xD6 \xD7 "
/* bug: not corrected */
"\xE0 \xE1 \xE2 \xE3 \xE4 \xE5 \xE6 \xE7 "
"\xE8 \xE9 \xEA \xEB \xEC \xED \xEE \xEF ",
- "\"\\uFFFF \\uFFFF \\uFFFF \\uFFFF \\uFFFF \\uFFFF \\uFFFF \\uFFFF "
- "\\uFFFF \\uFFFF \\uFFFF \\uFFFF \\uFFFF \\uFFFF \\uFFFF \\uFFFF \"",
+ "\"\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD "
+ "\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \"",
},
/* 3.2.3 All 8 first bytes of 4-byte sequences, followed by space */
{
"\"\xF0 \xF1 \xF2 \xF3 \xF4 \xF5 \xF6 \xF7 \"",
NULL, /* bug: rejected */
- "\"\\uFFFF \\uFFFF \\uFFFF \\uFFFF \\uFFFF \\uFFFF \\uFFFF \\uFFFF \"",
+ "\"\\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \\uFFFD \"",
"\xF0 \xF1 \xF2 \xF3 \xF4 \xF5 \xF6 \xF7 ",
},
/* 3.2.4 All 4 first bytes of 5-byte sequences, followed by space */
{
"\"\xF8 \xF9 \xFA \xFB \"",
NULL, /* bug: rejected */
- "\"\\uFFFF \\uFFFF \\uFFFF \\uFFFF \"",
+ "\"\\uFFFD \\uFFFD \\uFFFD \\uFFFD \"",
"\xF8 \xF9 \xFA \xFB ",
},
/* 3.2.5 All 2 first bytes of 6-byte sequences, followed by space */
{
"\"\xFC \xFD \"",
NULL, /* bug: rejected */
- "\"\\uFFFF \\uFFFF \"",
+ "\"\\uFFFD \\uFFFD \"",
"\xFC \xFD ",
},
/* 3.3 Sequences with last continuation byte missing */
{
"\"\xC0\"",
NULL, /* bug: rejected */
- "\"\\uFFFF\"",
+ "\"\\uFFFD\"",
"\xC0",
},
/* 3.3.2 3-byte sequence with last byte missing (U+0000) */
{
"\"\xE0\x80\"",
"\xE0\x80", /* bug: not corrected */
- "\"\\uFFFF\\uFFFF\"", /* bug: want "\"\\uFFFF\"" */
+ "\"\\uFFFD\"",
},
/* 3.3.3 4-byte sequence with last byte missing (U+0000) */
{
"\"\xF0\x80\x80\"",
"\xF0\x80\x80", /* bug: not corrected */
- "\"\\u0000\"", /* bug: want "\"\\uFFFF\"" */
+ "\"\\uFFFD\"",
},
/* 3.3.4 5-byte sequence with last byte missing (U+0000) */
{
- /* invalid */
- "\"\xF8\x80\x80\x80\"", /* bug: not corrected */
+ "\"\xF8\x80\x80\x80\"",
NULL, /* bug: rejected */
- "\"\\u8000\\uFFFF\"", /* bug: want "\"\\uFFFF\"" */
+ "\"\\uFFFD\"",
"\xF8\x80\x80\x80",
},
/* 3.3.5 6-byte sequence with last byte missing (U+0000) */
{
"\"\xFC\x80\x80\x80\x80\"",
NULL, /* bug: rejected */
- "\"\\uC000\\uFFFF\\uFFFF\"", /* bug: want "\"\\uFFFF\"" */
+ "\"\\uFFFD\"",
"\xFC\x80\x80\x80\x80",
},
/* 3.3.6 2-byte sequence with last byte missing (U+07FF) */
{
"\"\xDF\"",
"\xDF", /* bug: not corrected */
- "\"\\uFFFF\"",
+ "\"\\uFFFD\"",
},
/* 3.3.7 3-byte sequence with last byte missing (U+FFFF) */
{
"\"\xEF\xBF\"",
"\xEF\xBF", /* bug: not corrected */
- "\"\\uFFFF\\uFFFF\"", /* bug: want "\"\\uFFFF\"" */
+ "\"\\uFFFD\"",
},
/* 3.3.8 4-byte sequence with last byte missing (U+1FFFFF) */
{
"\"\xF7\xBF\xBF\"",
NULL, /* bug: rejected */
- "\"\\u7FFF\"", /* bug: want "\"\\uFFFF\"" */
+ "\"\\uFFFD\"",
"\xF7\xBF\xBF",
},
/* 3.3.9 5-byte sequence with last byte missing (U+3FFFFFF) */
{
"\"\xFB\xBF\xBF\xBF\"",
NULL, /* bug: rejected */
- "\"\\uBFFF\\uFFFF\"", /* bug: want "\"\\uFFFF\"" */
+ "\"\\uFFFD\"",
"\xFB\xBF\xBF\xBF",
},
/* 3.3.10 6-byte sequence with last byte missing (U+7FFFFFFF) */
{
"\"\xFD\xBF\xBF\xBF\xBF\"",
NULL, /* bug: rejected */
- "\"\\uDFFF\\uFFFF\\uFFFF\"", /* bug: want "\"\\uFFFF\"", */
+ "\"\\uFFFD\"",
"\xFD\xBF\xBF\xBF\xBF",
},
/* 3.4 Concatenation of incomplete sequences */
"\"\xC0\xE0\x80\xF0\x80\x80\xF8\x80\x80\x80\xFC\x80\x80\x80\x80"
"\xDF\xEF\xBF\xF7\xBF\xBF\xFB\xBF\xBF\xBF\xFD\xBF\xBF\xBF\xBF\"",
NULL, /* bug: rejected */
- /* bug: want "\"\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF"
- "\\uFFFF\\uFFFF\\uFFFF\\uFFFF\\uFFFF\"" */
- "\"\\u0020\\uFFFF\\u0000\\u8000\\uFFFF\\uC000\\uFFFF\\uFFFF"
- "\\u07EF\\uFFFF\\u7FFF\\uBFFF\\uFFFF\\uDFFF\\uFFFF\\uFFFF\"",
+ "\"\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD"
+ "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\"",
"\xC0\xE0\x80\xF0\x80\x80\xF8\x80\x80\x80\xFC\x80\x80\x80\x80"
"\xDF\xEF\xBF\xF7\xBF\xBF\xFB\xBF\xBF\xBF\xFD\xBF\xBF\xBF\xBF",
},
{
"\"\xFE\"",
NULL, /* bug: rejected */
- "\"\\uFFFF\"",
+ "\"\\uFFFD\"",
"\xFE",
},
{
"\"\xFF\"",
NULL, /* bug: rejected */
- "\"\\uFFFF\"",
+ "\"\\uFFFD\"",
"\xFF",
},
{
"\"\xFE\xFE\xFF\xFF\"",
NULL, /* bug: rejected */
- /* bug: want "\"\\uFFFF\\uFFFF\\uFFFF\\uFFFF\"" */
- "\"\\uEFBF\\uFFFF\"",
+ "\"\\uFFFD\\uFFFD\\uFFFD\\uFFFD\"",
"\xFE\xFE\xFF\xFF",
},
/* 4 Overlong sequences */
{
"\"\xC0\xAF\"",
NULL, /* bug: rejected */
- "\"\\u002F\"", /* bug: want "\"/\"" */
+ "\"\\uFFFD\"",
"\xC0\xAF",
},
{
"\"\xE0\x80\xAF\"",
"\xE0\x80\xAF", /* bug: not corrected */
- "\"\\u002F\"", /* bug: want "\"/\"" */
+ "\"\\uFFFD\"",
},
{
"\"\xF0\x80\x80\xAF\"",
"\xF0\x80\x80\xAF", /* bug: not corrected */
- "\"\\u0000\\uFFFF\"" /* bug: want "\"/\"" */
+ "\"\\uFFFD\"",
},
{
"\"\xF8\x80\x80\x80\xAF\"",
NULL, /* bug: rejected */
- "\"\\u8000\\uFFFF\\uFFFF\"", /* bug: want "\"/\"" */
+ "\"\\uFFFD\"",
"\xF8\x80\x80\x80\xAF",
},
{
"\"\xFC\x80\x80\x80\x80\xAF\"",
NULL, /* bug: rejected */
- "\"\\uC000\\uFFFF\\uFFFF\\uFFFF\"", /* bug: want "\"/\"" */
+ "\"\\uFFFD\"",
"\xFC\x80\x80\x80\x80\xAF",
},
- /* 4.2 Maximum overlong sequences */
+ /*
+ * 4.2 Maximum overlong sequences
+ * Highest Unicode value that is still resulting in an
+ * overlong sequence if represented with the given number of
+ * bytes. This is a boundary test for safe UTF-8 decoders.
+ */
{
/* \U+007F */
"\"\xC1\xBF\"",
NULL, /* bug: rejected */
- "\"\\u007F\"", /* bug: want "\"\177\"" */
+ "\"\\uFFFD\"",
"\xC1\xBF",
},
{
/* \U+07FF */
"\"\xE0\x9F\xBF\"",
"\xE0\x9F\xBF", /* bug: not corrected */
- "\"\\u07FF\"",
+ "\"\\uFFFD\"",
},
{
- /* \U+FFFF */
- "\"\xF0\x8F\xBF\xBF\"",
- "\xF0\x8F\xBF\xBF", /* bug: not corrected */
- "\"\\u03FF\\uFFFF\"", /* bug: want "\"\\uFFFF\"" */
+ /*
+ * \U+FFFC
+ * The actual maximum would be U+FFFF, but that's a
+ * noncharacter. Testing U+FFFC seems more useful. See
+ * also 2.2.3
+ */
+ "\"\xF0\x8F\xBF\xBC\"",
+ "\xF0\x8F\xBF\xBC", /* bug: not corrected */
+ "\"\\uFFFD\"",
},
{
/* \U+1FFFFF */
"\"\xF8\x87\xBF\xBF\xBF\"",
NULL, /* bug: rejected */
- "\"\\u81FF\\uFFFF\\uFFFF\"", /* bug: want "\"\\uFFFF\"" */
+ "\"\\uFFFD\"",
"\xF8\x87\xBF\xBF\xBF",
},
{
/* \U+3FFFFFF */
"\"\xFC\x83\xBF\xBF\xBF\xBF\"",
NULL, /* bug: rejected */
- "\"\\uC0FF\\uFFFF\\uFFFF\\uFFFF\"", /* bug: want "\"\\uFFFF\"" */
+ "\"\\uFFFD\"",
"\xFC\x83\xBF\xBF\xBF\xBF",
},
/* 4.3 Overlong representation of the NUL character */
/* \U+0000 */
"\"\xE0\x80\x80\"",
"\xE0\x80\x80", /* bug: not corrected */
- "\"\\u0000\"",
+ "\"\\uFFFD\"",
},
{
/* \U+0000 */
"\"\xF0\x80\x80\x80\"",
"\xF0\x80\x80\x80", /* bug: not corrected */
- "\"\\u0000\\uFFFF\"", /* bug: want "\"\\u0000\"" */
+ "\"\\uFFFD\"",
},
{
/* \U+0000 */
"\"\xF8\x80\x80\x80\x80\"",
NULL, /* bug: rejected */
- "\"\\u8000\\uFFFF\\uFFFF\"", /* bug: want "\"\\u0000\"" */
+ "\"\\uFFFD\"",
"\xF8\x80\x80\x80\x80",
},
{
/* \U+0000 */
"\"\xFC\x80\x80\x80\x80\x80\"",
NULL, /* bug: rejected */
- "\"\\uC000\\uFFFF\\uFFFF\\uFFFF\"", /* bug: want "\"\\u0000\"" */
+ "\"\\uFFFD\"",
"\xFC\x80\x80\x80\x80\x80",
},
/* 5 Illegal code positions */
/* \U+D800 */
"\"\xED\xA0\x80\"",
"\xED\xA0\x80", /* bug: not corrected */
- "\"\\uD800\"", /* bug: want "\"\\uFFFF\"" */
+ "\"\\uFFFD\"",
},
{
/* \U+DB7F */
"\"\xED\xAD\xBF\"",
"\xED\xAD\xBF", /* bug: not corrected */
- "\"\\uDB7F\"", /* bug: want "\"\\uFFFF\"" */
+ "\"\\uFFFD\"",
},
{
/* \U+DB80 */
"\"\xED\xAE\x80\"",
"\xED\xAE\x80", /* bug: not corrected */
- "\"\\uDB80\"", /* bug: want "\"\\uFFFF\"" */
+ "\"\\uFFFD\"",
},
{
/* \U+DBFF */
"\"\xED\xAF\xBF\"",
"\xED\xAF\xBF", /* bug: not corrected */
- "\"\\uDBFF\"", /* bug: want "\"\\uFFFF\"" */
+ "\"\\uFFFD\"",
},
{
/* \U+DC00 */
"\"\xED\xB0\x80\"",
"\xED\xB0\x80", /* bug: not corrected */
- "\"\\uDC00\"", /* bug: want "\"\\uFFFF\"" */
+ "\"\\uFFFD\"",
},
{
/* \U+DF80 */
"\"\xED\xBE\x80\"",
"\xED\xBE\x80", /* bug: not corrected */
- "\"\\uDF80\"", /* bug: want "\"\\uFFFF\"" */
+ "\"\\uFFFD\"",
},
{
/* \U+DFFF */
"\"\xED\xBF\xBF\"",
"\xED\xBF\xBF", /* bug: not corrected */
- "\"\\uDFFF\"", /* bug: want "\"\\uFFFF\"" */
+ "\"\\uFFFD\"",
},
/* 5.2 Paired UTF-16 surrogates */
{
/* \U+D800\U+DC00 */
"\"\xED\xA0\x80\xED\xB0\x80\"",
"\xED\xA0\x80\xED\xB0\x80", /* bug: not corrected */
- "\"\\uD800\\uDC00\"", /* bug: want "\"\\uFFFF\\uFFFF\"" */
+ "\"\\uFFFD\\uFFFD\"",
},
{
/* \U+D800\U+DFFF */
"\"\xED\xA0\x80\xED\xBF\xBF\"",
"\xED\xA0\x80\xED\xBF\xBF", /* bug: not corrected */
- "\"\\uD800\\uDFFF\"", /* bug: want "\"\\uFFFF\\uFFFF\"" */
+ "\"\\uFFFD\\uFFFD\"",
},
{
/* \U+DB7F\U+DC00 */
"\"\xED\xAD\xBF\xED\xB0\x80\"",
"\xED\xAD\xBF\xED\xB0\x80", /* bug: not corrected */
- "\"\\uDB7F\\uDC00\"", /* bug: want "\"\\uFFFF\\uFFFF\"" */
+ "\"\\uFFFD\\uFFFD\"",
},
{
/* \U+DB7F\U+DFFF */
"\"\xED\xAD\xBF\xED\xBF\xBF\"",
"\xED\xAD\xBF\xED\xBF\xBF", /* bug: not corrected */
- "\"\\uDB7F\\uDFFF\"", /* bug: want "\"\\uFFFF\\uFFFF\"" */
+ "\"\\uFFFD\\uFFFD\"",
},
{
/* \U+DB80\U+DC00 */
"\"\xED\xAE\x80\xED\xB0\x80\"",
"\xED\xAE\x80\xED\xB0\x80", /* bug: not corrected */
- "\"\\uDB80\\uDC00\"", /* bug: want "\"\\uFFFF\\uFFFF\"" */
+ "\"\\uFFFD\\uFFFD\"",
},
{
/* \U+DB80\U+DFFF */
"\"\xED\xAE\x80\xED\xBF\xBF\"",
"\xED\xAE\x80\xED\xBF\xBF", /* bug: not corrected */
- "\"\\uDB80\\uDFFF\"", /* bug: want "\"\\uFFFF\\uFFFF\"" */
+ "\"\\uFFFD\\uFFFD\"",
},
{
/* \U+DBFF\U+DC00 */
"\"\xED\xAF\xBF\xED\xB0\x80\"",
"\xED\xAF\xBF\xED\xB0\x80", /* bug: not corrected */
- "\"\\uDBFF\\uDC00\"", /* bug: want "\"\\uFFFF\\uFFFF\"" */
+ "\"\\uFFFD\\uFFFD\"",
},
{
/* \U+DBFF\U+DFFF */
"\"\xED\xAF\xBF\xED\xBF\xBF\"",
"\xED\xAF\xBF\xED\xBF\xBF", /* bug: not corrected */
- "\"\\uDBFF\\uDFFF\"", /* bug: want "\"\\uFFFF\\uFFFF\"" */
+ "\"\\uFFFD\\uFFFD\"",
},
/* 5.3 Other illegal code positions */
+ /* BMP noncharacters */
{
/* \U+FFFE */
"\"\xEF\xBF\xBE\"",
"\xEF\xBF\xBE", /* bug: not corrected */
- "\"\\uFFFE\"", /* bug: not corrected */
+ "\"\\uFFFD\"",
},
{
/* \U+FFFF */
"\"\xEF\xBF\xBF\"",
"\xEF\xBF\xBF", /* bug: not corrected */
- "\"\\uFFFF\"", /* bug: not corrected */
+ "\"\\uFFFD\"",
+ },
+ {
+ /* U+FDD0 */
+ "\"\xEF\xB7\x90\"",
+ "\xEF\xB7\x90", /* bug: not corrected */
+ "\"\\uFFFD\"",
+ },
+ {
+ /* U+FDEF */
+ "\"\xEF\xB7\xAF\"",
+ "\xEF\xB7\xAF", /* bug: not corrected */
+ "\"\\uFFFD\"",
+ },
+ /* Plane 1 .. 16 noncharacters */
+ {
+ /* U+1FFFE U+1FFFF U+2FFFE U+2FFFF ... U+10FFFE U+10FFFF */
+ "\"\xF0\x9F\xBF\xBE\xF0\x9F\xBF\xBF"
+ "\xF0\xAF\xBF\xBE\xF0\xAF\xBF\xBF"
+ "\xF0\xBF\xBF\xBE\xF0\xBF\xBF\xBF"
+ "\xF1\x8F\xBF\xBE\xF1\x8F\xBF\xBF"
+ "\xF1\x9F\xBF\xBE\xF1\x9F\xBF\xBF"
+ "\xF1\xAF\xBF\xBE\xF1\xAF\xBF\xBF"
+ "\xF1\xBF\xBF\xBE\xF1\xBF\xBF\xBF"
+ "\xF2\x8F\xBF\xBE\xF2\x8F\xBF\xBF"
+ "\xF2\x9F\xBF\xBE\xF2\x9F\xBF\xBF"
+ "\xF2\xAF\xBF\xBE\xF2\xAF\xBF\xBF"
+ "\xF2\xBF\xBF\xBE\xF2\xBF\xBF\xBF"
+ "\xF3\x8F\xBF\xBE\xF3\x8F\xBF\xBF"
+ "\xF3\x9F\xBF\xBE\xF3\x9F\xBF\xBF"
+ "\xF3\xAF\xBF\xBE\xF3\xAF\xBF\xBF"
+ "\xF3\xBF\xBF\xBE\xF3\xBF\xBF\xBF"
+ "\xF4\x8F\xBF\xBE\xF4\x8F\xBF\xBF\"",
+ /* bug: not corrected */
+ "\xF0\x9F\xBF\xBE\xF0\x9F\xBF\xBF"
+ "\xF0\xAF\xBF\xBE\xF0\xAF\xBF\xBF"
+ "\xF0\xBF\xBF\xBE\xF0\xBF\xBF\xBF"
+ "\xF1\x8F\xBF\xBE\xF1\x8F\xBF\xBF"
+ "\xF1\x9F\xBF\xBE\xF1\x9F\xBF\xBF"
+ "\xF1\xAF\xBF\xBE\xF1\xAF\xBF\xBF"
+ "\xF1\xBF\xBF\xBE\xF1\xBF\xBF\xBF"
+ "\xF2\x8F\xBF\xBE\xF2\x8F\xBF\xBF"
+ "\xF2\x9F\xBF\xBE\xF2\x9F\xBF\xBF"
+ "\xF2\xAF\xBF\xBE\xF2\xAF\xBF\xBF"
+ "\xF2\xBF\xBF\xBE\xF2\xBF\xBF\xBF"
+ "\xF3\x8F\xBF\xBE\xF3\x8F\xBF\xBF"
+ "\xF3\x9F\xBF\xBE\xF3\x9F\xBF\xBF"
+ "\xF3\xAF\xBF\xBE\xF3\xAF\xBF\xBF"
+ "\xF3\xBF\xBF\xBE\xF3\xBF\xBF\xBF"
+ "\xF4\x8F\xBF\xBE\xF4\x8F\xBF\xBF",
+ "\"\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD"
+ "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD"
+ "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD"
+ "\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\\uFFFD\"",
},
{}
};
qobject_decref(obj);
/*
- * Disabled, because json_out currently contains the crap
- * qobject_to_json() produces.
+ * Disabled, because qobject_from_json() is buggy, and I can't
+ * be bothered to add the expected incorrect results.
* FIXME Enable once these bugs have been fixed.
*/
if (0 && json_out != json_in) {