]> Git Repo - qemu.git/blame_incremental - qobject/json-parser.c
json: Reject invalid \uXXXX, fix \u0000
[qemu.git] / qobject / json-parser.c
... / ...
CommitLineData
1/*
2 * JSON Parser
3 *
4 * Copyright IBM, Corp. 2009
5 *
6 * Authors:
7 * Anthony Liguori <[email protected]>
8 *
9 * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
10 * See the COPYING.LIB file in the top-level directory.
11 *
12 */
13
14#include "qemu/osdep.h"
15#include "qemu/cutils.h"
16#include "qemu/unicode.h"
17#include "qapi/error.h"
18#include "qemu-common.h"
19#include "qapi/qmp/qbool.h"
20#include "qapi/qmp/qdict.h"
21#include "qapi/qmp/qlist.h"
22#include "qapi/qmp/qnull.h"
23#include "qapi/qmp/qnum.h"
24#include "qapi/qmp/qstring.h"
25#include "qapi/qmp/json-parser.h"
26#include "qapi/qmp/json-lexer.h"
27#include "qapi/qmp/json-streamer.h"
28
29typedef struct JSONParserContext
30{
31 Error *err;
32 JSONToken *current;
33 GQueue *buf;
34} JSONParserContext;
35
36#define BUG_ON(cond) assert(!(cond))
37
38/**
39 * TODO
40 *
41 * 0) make errors meaningful again
42 * 1) add geometry information to tokens
43 * 3) should we return a parsed size?
44 * 4) deal with premature EOI
45 */
46
47static QObject *parse_value(JSONParserContext *ctxt, va_list *ap);
48
49/**
50 * Error handler
51 */
52static void GCC_FMT_ATTR(3, 4) parse_error(JSONParserContext *ctxt,
53 JSONToken *token, const char *msg, ...)
54{
55 va_list ap;
56 char message[1024];
57
58 if (ctxt->err) {
59 return;
60 }
61 va_start(ap, msg);
62 vsnprintf(message, sizeof(message), msg, ap);
63 va_end(ap);
64 error_setg(&ctxt->err, "JSON parse error, %s", message);
65}
66
67static int hex2decimal(char ch)
68{
69 if (ch >= '0' && ch <= '9') {
70 return (ch - '0');
71 } else if (ch >= 'a' && ch <= 'f') {
72 return 10 + (ch - 'a');
73 } else if (ch >= 'A' && ch <= 'F') {
74 return 10 + (ch - 'A');
75 }
76 abort();
77}
78
79/**
80 * parse_string(): Parse a JSON string
81 *
82 * From RFC 8259 "The JavaScript Object Notation (JSON) Data
83 * Interchange Format":
84 *
85 * char = unescaped /
86 * escape (
87 * %x22 / ; " quotation mark U+0022
88 * %x5C / ; \ reverse solidus U+005C
89 * %x2F / ; / solidus U+002F
90 * %x62 / ; b backspace U+0008
91 * %x66 / ; f form feed U+000C
92 * %x6E / ; n line feed U+000A
93 * %x72 / ; r carriage return U+000D
94 * %x74 / ; t tab U+0009
95 * %x75 4HEXDIG ) ; uXXXX U+XXXX
96 * escape = %x5C ; \
97 * quotation-mark = %x22 ; "
98 * unescaped = %x20-21 / %x23-5B / %x5D-10FFFF
99 *
100 * Extensions over RFC 8259:
101 * - Extra escape sequence in strings:
102 * 0x27 (apostrophe) is recognized after escape, too
103 * - Single-quoted strings:
104 * Like double-quoted strings, except they're delimited by %x27
105 * (apostrophe) instead of %x22 (quotation mark), and can't contain
106 * unescaped apostrophe, but can contain unescaped quotation mark.
107 *
108 * Note:
109 * - Encoding is modified UTF-8.
110 * - Invalid Unicode characters are rejected.
111 * - Control characters \x00..\x1F are rejected by the lexer.
112 */
113static QString *parse_string(JSONParserContext *ctxt, JSONToken *token)
114{
115 const char *ptr = token->str;
116 QString *str;
117 char quote;
118 int cp, i;
119 char *end;
120 ssize_t len;
121 char utf8_buf[5];
122
123 assert(*ptr == '"' || *ptr == '\'');
124 quote = *ptr++;
125 str = qstring_new();
126
127 while (*ptr != quote) {
128 assert(*ptr);
129 if (*ptr == '\\') {
130 ptr++;
131 switch (*ptr++) {
132 case '"':
133 qstring_append_chr(str, '"');
134 break;
135 case '\'':
136 qstring_append_chr(str, '\'');
137 break;
138 case '\\':
139 qstring_append_chr(str, '\\');
140 break;
141 case '/':
142 qstring_append_chr(str, '/');
143 break;
144 case 'b':
145 qstring_append_chr(str, '\b');
146 break;
147 case 'f':
148 qstring_append_chr(str, '\f');
149 break;
150 case 'n':
151 qstring_append_chr(str, '\n');
152 break;
153 case 'r':
154 qstring_append_chr(str, '\r');
155 break;
156 case 't':
157 qstring_append_chr(str, '\t');
158 break;
159 case 'u':
160 cp = 0;
161 for (i = 0; i < 4; i++) {
162 if (!qemu_isxdigit(*ptr)) {
163 parse_error(ctxt, token,
164 "invalid hex escape sequence in string");
165 goto out;
166 }
167 cp <<= 4;
168 cp |= hex2decimal(*ptr);
169 ptr++;
170 }
171
172 if (mod_utf8_encode(utf8_buf, sizeof(utf8_buf), cp) < 0) {
173 parse_error(ctxt, token,
174 "\\u%.4s is not a valid Unicode character",
175 ptr - 3);
176 goto out;
177 }
178 qstring_append(str, utf8_buf);
179 break;
180 default:
181 parse_error(ctxt, token, "invalid escape sequence in string");
182 goto out;
183 }
184 } else {
185 cp = mod_utf8_codepoint(ptr, 6, &end);
186 if (cp < 0) {
187 parse_error(ctxt, token, "invalid UTF-8 sequence in string");
188 goto out;
189 }
190 ptr = end;
191 len = mod_utf8_encode(utf8_buf, sizeof(utf8_buf), cp);
192 assert(len >= 0);
193 qstring_append(str, utf8_buf);
194 }
195 }
196
197 return str;
198
199out:
200 qobject_unref(str);
201 return NULL;
202}
203
204/* Note: the token object returned by parser_context_peek_token or
205 * parser_context_pop_token is deleted as soon as parser_context_pop_token
206 * is called again.
207 */
208static JSONToken *parser_context_pop_token(JSONParserContext *ctxt)
209{
210 g_free(ctxt->current);
211 assert(!g_queue_is_empty(ctxt->buf));
212 ctxt->current = g_queue_pop_head(ctxt->buf);
213 return ctxt->current;
214}
215
216static JSONToken *parser_context_peek_token(JSONParserContext *ctxt)
217{
218 assert(!g_queue_is_empty(ctxt->buf));
219 return g_queue_peek_head(ctxt->buf);
220}
221
222static JSONParserContext *parser_context_new(GQueue *tokens)
223{
224 JSONParserContext *ctxt;
225
226 if (!tokens) {
227 return NULL;
228 }
229
230 ctxt = g_malloc0(sizeof(JSONParserContext));
231 ctxt->buf = tokens;
232
233 return ctxt;
234}
235
236/* to support error propagation, ctxt->err must be freed separately */
237static void parser_context_free(JSONParserContext *ctxt)
238{
239 if (ctxt) {
240 while (!g_queue_is_empty(ctxt->buf)) {
241 parser_context_pop_token(ctxt);
242 }
243 g_free(ctxt->current);
244 g_queue_free(ctxt->buf);
245 g_free(ctxt);
246 }
247}
248
249/**
250 * Parsing rules
251 */
252static int parse_pair(JSONParserContext *ctxt, QDict *dict, va_list *ap)
253{
254 QObject *value;
255 QString *key = NULL;
256 JSONToken *peek, *token;
257
258 peek = parser_context_peek_token(ctxt);
259 if (peek == NULL) {
260 parse_error(ctxt, NULL, "premature EOI");
261 goto out;
262 }
263
264 key = qobject_to(QString, parse_value(ctxt, ap));
265 if (!key) {
266 parse_error(ctxt, peek, "key is not a string in object");
267 goto out;
268 }
269
270 token = parser_context_pop_token(ctxt);
271 if (token == NULL) {
272 parse_error(ctxt, NULL, "premature EOI");
273 goto out;
274 }
275
276 if (token->type != JSON_COLON) {
277 parse_error(ctxt, token, "missing : in object pair");
278 goto out;
279 }
280
281 value = parse_value(ctxt, ap);
282 if (value == NULL) {
283 parse_error(ctxt, token, "Missing value in dict");
284 goto out;
285 }
286
287 qdict_put_obj(dict, qstring_get_str(key), value);
288
289 qobject_unref(key);
290
291 return 0;
292
293out:
294 qobject_unref(key);
295
296 return -1;
297}
298
299static QObject *parse_object(JSONParserContext *ctxt, va_list *ap)
300{
301 QDict *dict = NULL;
302 JSONToken *token, *peek;
303
304 token = parser_context_pop_token(ctxt);
305 assert(token && token->type == JSON_LCURLY);
306
307 dict = qdict_new();
308
309 peek = parser_context_peek_token(ctxt);
310 if (peek == NULL) {
311 parse_error(ctxt, NULL, "premature EOI");
312 goto out;
313 }
314
315 if (peek->type != JSON_RCURLY) {
316 if (parse_pair(ctxt, dict, ap) == -1) {
317 goto out;
318 }
319
320 token = parser_context_pop_token(ctxt);
321 if (token == NULL) {
322 parse_error(ctxt, NULL, "premature EOI");
323 goto out;
324 }
325
326 while (token->type != JSON_RCURLY) {
327 if (token->type != JSON_COMMA) {
328 parse_error(ctxt, token, "expected separator in dict");
329 goto out;
330 }
331
332 if (parse_pair(ctxt, dict, ap) == -1) {
333 goto out;
334 }
335
336 token = parser_context_pop_token(ctxt);
337 if (token == NULL) {
338 parse_error(ctxt, NULL, "premature EOI");
339 goto out;
340 }
341 }
342 } else {
343 (void)parser_context_pop_token(ctxt);
344 }
345
346 return QOBJECT(dict);
347
348out:
349 qobject_unref(dict);
350 return NULL;
351}
352
353static QObject *parse_array(JSONParserContext *ctxt, va_list *ap)
354{
355 QList *list = NULL;
356 JSONToken *token, *peek;
357
358 token = parser_context_pop_token(ctxt);
359 assert(token && token->type == JSON_LSQUARE);
360
361 list = qlist_new();
362
363 peek = parser_context_peek_token(ctxt);
364 if (peek == NULL) {
365 parse_error(ctxt, NULL, "premature EOI");
366 goto out;
367 }
368
369 if (peek->type != JSON_RSQUARE) {
370 QObject *obj;
371
372 obj = parse_value(ctxt, ap);
373 if (obj == NULL) {
374 parse_error(ctxt, token, "expecting value");
375 goto out;
376 }
377
378 qlist_append_obj(list, obj);
379
380 token = parser_context_pop_token(ctxt);
381 if (token == NULL) {
382 parse_error(ctxt, NULL, "premature EOI");
383 goto out;
384 }
385
386 while (token->type != JSON_RSQUARE) {
387 if (token->type != JSON_COMMA) {
388 parse_error(ctxt, token, "expected separator in list");
389 goto out;
390 }
391
392 obj = parse_value(ctxt, ap);
393 if (obj == NULL) {
394 parse_error(ctxt, token, "expecting value");
395 goto out;
396 }
397
398 qlist_append_obj(list, obj);
399
400 token = parser_context_pop_token(ctxt);
401 if (token == NULL) {
402 parse_error(ctxt, NULL, "premature EOI");
403 goto out;
404 }
405 }
406 } else {
407 (void)parser_context_pop_token(ctxt);
408 }
409
410 return QOBJECT(list);
411
412out:
413 qobject_unref(list);
414 return NULL;
415}
416
417static QObject *parse_keyword(JSONParserContext *ctxt)
418{
419 JSONToken *token;
420
421 token = parser_context_pop_token(ctxt);
422 assert(token && token->type == JSON_KEYWORD);
423
424 if (!strcmp(token->str, "true")) {
425 return QOBJECT(qbool_from_bool(true));
426 } else if (!strcmp(token->str, "false")) {
427 return QOBJECT(qbool_from_bool(false));
428 } else if (!strcmp(token->str, "null")) {
429 return QOBJECT(qnull());
430 }
431 parse_error(ctxt, token, "invalid keyword '%s'", token->str);
432 return NULL;
433}
434
435static QObject *parse_escape(JSONParserContext *ctxt, va_list *ap)
436{
437 JSONToken *token;
438
439 if (ap == NULL) {
440 return NULL;
441 }
442
443 token = parser_context_pop_token(ctxt);
444 assert(token && token->type == JSON_ESCAPE);
445
446 if (!strcmp(token->str, "%p")) {
447 return va_arg(*ap, QObject *);
448 } else if (!strcmp(token->str, "%i")) {
449 return QOBJECT(qbool_from_bool(va_arg(*ap, int)));
450 } else if (!strcmp(token->str, "%d")) {
451 return QOBJECT(qnum_from_int(va_arg(*ap, int)));
452 } else if (!strcmp(token->str, "%ld")) {
453 return QOBJECT(qnum_from_int(va_arg(*ap, long)));
454 } else if (!strcmp(token->str, "%lld") ||
455 !strcmp(token->str, "%I64d")) {
456 return QOBJECT(qnum_from_int(va_arg(*ap, long long)));
457 } else if (!strcmp(token->str, "%u")) {
458 return QOBJECT(qnum_from_uint(va_arg(*ap, unsigned int)));
459 } else if (!strcmp(token->str, "%lu")) {
460 return QOBJECT(qnum_from_uint(va_arg(*ap, unsigned long)));
461 } else if (!strcmp(token->str, "%llu") ||
462 !strcmp(token->str, "%I64u")) {
463 return QOBJECT(qnum_from_uint(va_arg(*ap, unsigned long long)));
464 } else if (!strcmp(token->str, "%s")) {
465 return QOBJECT(qstring_from_str(va_arg(*ap, const char *)));
466 } else if (!strcmp(token->str, "%f")) {
467 return QOBJECT(qnum_from_double(va_arg(*ap, double)));
468 }
469 return NULL;
470}
471
472static QObject *parse_literal(JSONParserContext *ctxt)
473{
474 JSONToken *token;
475
476 token = parser_context_pop_token(ctxt);
477 assert(token);
478
479 switch (token->type) {
480 case JSON_STRING:
481 return QOBJECT(parse_string(ctxt, token));
482 case JSON_INTEGER: {
483 /*
484 * Represent JSON_INTEGER as QNUM_I64 if possible, else as
485 * QNUM_U64, else as QNUM_DOUBLE. Note that qemu_strtoi64()
486 * and qemu_strtou64() fail with ERANGE when it's not
487 * possible.
488 *
489 * qnum_get_int() will then work for any signed 64-bit
490 * JSON_INTEGER, qnum_get_uint() for any unsigned 64-bit
491 * integer, and qnum_get_double() both for any JSON_INTEGER
492 * and any JSON_FLOAT (with precision loss for integers beyond
493 * 53 bits)
494 */
495 int ret;
496 int64_t value;
497 uint64_t uvalue;
498
499 ret = qemu_strtoi64(token->str, NULL, 10, &value);
500 if (!ret) {
501 return QOBJECT(qnum_from_int(value));
502 }
503 assert(ret == -ERANGE);
504
505 if (token->str[0] != '-') {
506 ret = qemu_strtou64(token->str, NULL, 10, &uvalue);
507 if (!ret) {
508 return QOBJECT(qnum_from_uint(uvalue));
509 }
510 assert(ret == -ERANGE);
511 }
512 /* fall through to JSON_FLOAT */
513 }
514 case JSON_FLOAT:
515 /* FIXME dependent on locale; a pervasive issue in QEMU */
516 /* FIXME our lexer matches RFC 7159 in forbidding Inf or NaN,
517 * but those might be useful extensions beyond JSON */
518 return QOBJECT(qnum_from_double(strtod(token->str, NULL)));
519 default:
520 abort();
521 }
522}
523
524static QObject *parse_value(JSONParserContext *ctxt, va_list *ap)
525{
526 JSONToken *token;
527
528 token = parser_context_peek_token(ctxt);
529 if (token == NULL) {
530 parse_error(ctxt, NULL, "premature EOI");
531 return NULL;
532 }
533
534 switch (token->type) {
535 case JSON_LCURLY:
536 return parse_object(ctxt, ap);
537 case JSON_LSQUARE:
538 return parse_array(ctxt, ap);
539 case JSON_ESCAPE:
540 return parse_escape(ctxt, ap);
541 case JSON_INTEGER:
542 case JSON_FLOAT:
543 case JSON_STRING:
544 return parse_literal(ctxt);
545 case JSON_KEYWORD:
546 return parse_keyword(ctxt);
547 default:
548 parse_error(ctxt, token, "expecting value");
549 return NULL;
550 }
551}
552
553QObject *json_parser_parse(GQueue *tokens, va_list *ap)
554{
555 return json_parser_parse_err(tokens, ap, NULL);
556}
557
558QObject *json_parser_parse_err(GQueue *tokens, va_list *ap, Error **errp)
559{
560 JSONParserContext *ctxt = parser_context_new(tokens);
561 QObject *result;
562
563 if (!ctxt) {
564 return NULL;
565 }
566
567 result = parse_value(ctxt, ap);
568
569 error_propagate(errp, ctxt->err);
570
571 parser_context_free(ctxt);
572
573 return result;
574}
This page took 0.02441 seconds and 4 git commands to generate.