4 * Copyright IBM, Corp. 2009
9 * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
10 * See the COPYING.LIB file in the top-level directory.
14 #include "qemu-common.h"
15 #include "qapi/qmp/json-lexer.h"
18 #define MAX_TOKEN_SIZE (64ULL << 20)
21 * \"([^\\\"]|(\\\"\\'\\\\\\/\\b\\f\\n\\r\\t\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]))*\"
22 * '([^\\']|(\\\"\\'\\\\\\/\\b\\f\\n\\r\\t\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]))*'
23 * 0|([1-9][0-9]*(.[0-9]+)?([eE]([-+])?[0-9]+))
29 enum json_lexer_state {
30 IN_ERROR = 0, /* must really be 0, see json_lexer[] */
50 IN_NEG_NONZERO_NUMBER,
62 QEMU_BUILD_BUG_ON((int)JSON_MIN <= (int)IN_START);
64 #define TERMINAL(state) [0 ... 0x7F] = (state)
66 /* Return whether TERMINAL is a terminal state and the transition to it
67 from OLD_STATE required lookahead. This happens whenever the table
68 below uses the TERMINAL macro. */
69 #define TERMINAL_NEEDED_LOOKAHEAD(old_state, terminal) \
70 (json_lexer[(old_state)][0] == (terminal))
72 static const uint8_t json_lexer[][256] = {
73 /* Relies on default initialization to IN_ERROR! */
75 /* double quote string */
77 ['0' ... '9'] = IN_DQ_STRING,
78 ['a' ... 'f'] = IN_DQ_STRING,
79 ['A' ... 'F'] = IN_DQ_STRING,
82 ['0' ... '9'] = IN_DQ_UCODE3,
83 ['a' ... 'f'] = IN_DQ_UCODE3,
84 ['A' ... 'F'] = IN_DQ_UCODE3,
87 ['0' ... '9'] = IN_DQ_UCODE2,
88 ['a' ... 'f'] = IN_DQ_UCODE2,
89 ['A' ... 'F'] = IN_DQ_UCODE2,
92 ['0' ... '9'] = IN_DQ_UCODE1,
93 ['a' ... 'f'] = IN_DQ_UCODE1,
94 ['A' ... 'F'] = IN_DQ_UCODE1,
96 [IN_DQ_STRING_ESCAPE] = {
100 ['r'] = IN_DQ_STRING,
101 ['t'] = IN_DQ_STRING,
102 ['/'] = IN_DQ_STRING,
103 ['\\'] = IN_DQ_STRING,
104 ['\''] = IN_DQ_STRING,
105 ['\"'] = IN_DQ_STRING,
106 ['u'] = IN_DQ_UCODE0,
109 [1 ... 0xBF] = IN_DQ_STRING,
110 [0xC2 ... 0xF4] = IN_DQ_STRING,
111 ['\\'] = IN_DQ_STRING_ESCAPE,
115 /* single quote string */
117 ['0' ... '9'] = IN_SQ_STRING,
118 ['a' ... 'f'] = IN_SQ_STRING,
119 ['A' ... 'F'] = IN_SQ_STRING,
122 ['0' ... '9'] = IN_SQ_UCODE3,
123 ['a' ... 'f'] = IN_SQ_UCODE3,
124 ['A' ... 'F'] = IN_SQ_UCODE3,
127 ['0' ... '9'] = IN_SQ_UCODE2,
128 ['a' ... 'f'] = IN_SQ_UCODE2,
129 ['A' ... 'F'] = IN_SQ_UCODE2,
132 ['0' ... '9'] = IN_SQ_UCODE1,
133 ['a' ... 'f'] = IN_SQ_UCODE1,
134 ['A' ... 'F'] = IN_SQ_UCODE1,
136 [IN_SQ_STRING_ESCAPE] = {
137 ['b'] = IN_SQ_STRING,
138 ['f'] = IN_SQ_STRING,
139 ['n'] = IN_SQ_STRING,
140 ['r'] = IN_SQ_STRING,
141 ['t'] = IN_SQ_STRING,
142 ['/'] = IN_SQ_STRING,
143 ['\\'] = IN_SQ_STRING,
144 ['\''] = IN_SQ_STRING,
145 ['\"'] = IN_SQ_STRING,
146 ['u'] = IN_SQ_UCODE0,
149 [1 ... 0xBF] = IN_SQ_STRING,
150 [0xC2 ... 0xF4] = IN_SQ_STRING,
151 ['\\'] = IN_SQ_STRING_ESCAPE,
152 ['\''] = JSON_STRING,
157 TERMINAL(JSON_INTEGER),
158 ['0' ... '9'] = IN_ERROR,
164 TERMINAL(JSON_FLOAT),
165 ['0' ... '9'] = IN_DIGITS,
169 ['0' ... '9'] = IN_DIGITS,
175 ['0' ... '9'] = IN_DIGITS,
178 [IN_MANTISSA_DIGITS] = {
179 TERMINAL(JSON_FLOAT),
180 ['0' ... '9'] = IN_MANTISSA_DIGITS,
186 ['0' ... '9'] = IN_MANTISSA_DIGITS,
190 [IN_NONZERO_NUMBER] = {
191 TERMINAL(JSON_INTEGER),
192 ['0' ... '9'] = IN_NONZERO_NUMBER,
198 [IN_NEG_NONZERO_NUMBER] = {
200 ['1' ... '9'] = IN_NONZERO_NUMBER,
205 TERMINAL(JSON_KEYWORD),
206 ['a' ... 'z'] = IN_KEYWORD,
212 [' '] = IN_WHITESPACE,
213 ['\t'] = IN_WHITESPACE,
214 ['\r'] = IN_WHITESPACE,
215 ['\n'] = IN_WHITESPACE,
225 ['l'] = IN_ESCAPE_LL,
233 ['4'] = IN_ESCAPE_I64,
237 ['6'] = IN_ESCAPE_I6,
252 ['"'] = IN_DQ_STRING,
253 ['\''] = IN_SQ_STRING,
255 ['1' ... '9'] = IN_NONZERO_NUMBER,
256 ['-'] = IN_NEG_NONZERO_NUMBER,
259 ['['] = JSON_LSQUARE,
260 [']'] = JSON_RSQUARE,
263 ['a' ... 'z'] = IN_KEYWORD,
265 [' '] = IN_WHITESPACE,
266 ['\t'] = IN_WHITESPACE,
267 ['\r'] = IN_WHITESPACE,
268 ['\n'] = IN_WHITESPACE,
272 void json_lexer_init(JSONLexer *lexer, JSONLexerEmitter func)
275 lexer->state = IN_START;
276 lexer->token = g_string_sized_new(3);
277 lexer->x = lexer->y = 0;
280 static int json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush)
282 int char_consumed, new_state;
291 assert(lexer->state <= ARRAY_SIZE(json_lexer));
292 new_state = json_lexer[lexer->state][(uint8_t)ch];
293 char_consumed = !TERMINAL_NEEDED_LOOKAHEAD(lexer->state, new_state);
295 g_string_append_c(lexer->token, ch);
310 lexer->emit(lexer, lexer->token, new_state, lexer->x, lexer->y);
313 g_string_truncate(lexer->token, 0);
314 new_state = IN_START;
317 /* XXX: To avoid having previous bad input leaving the parser in an
318 * unresponsive state where we consume unpredictable amounts of
319 * subsequent "good" input, percolate this error state up to the
320 * tokenizer/parser by forcing a NULL object to be emitted, then
323 * Also note that this handling is required for reliable channel
324 * negotiation between QMP and the guest agent, since chr(0xFF)
325 * is placed at the beginning of certain events to ensure proper
326 * delivery when the channel is in an unknown state. chr(0xFF) is
327 * never a valid ASCII/UTF-8 sequence, so this should reliably
328 * induce an error/flush state.
330 lexer->emit(lexer, lexer->token, JSON_ERROR, lexer->x, lexer->y);
331 g_string_truncate(lexer->token, 0);
332 new_state = IN_START;
333 lexer->state = new_state;
338 lexer->state = new_state;
339 } while (!char_consumed && !flush);
341 /* Do not let a single token grow to an arbitrarily large size,
342 * this is a security consideration.
344 if (lexer->token->len > MAX_TOKEN_SIZE) {
345 lexer->emit(lexer, lexer->token, lexer->state, lexer->x, lexer->y);
346 g_string_truncate(lexer->token, 0);
347 lexer->state = IN_START;
353 int json_lexer_feed(JSONLexer *lexer, const char *buffer, size_t size)
357 for (i = 0; i < size; i++) {
360 err = json_lexer_feed_char(lexer, buffer[i], false);
369 int json_lexer_flush(JSONLexer *lexer)
371 return lexer->state == IN_START ? 0 : json_lexer_feed_char(lexer, 0, true);
374 void json_lexer_destroy(JSONLexer *lexer)
376 g_string_free(lexer->token, true);