]> Git Repo - qemu.git/blob - json-lexer.c
multiboot: Use signed type for negative error numbers
[qemu.git] / json-lexer.c
1 /*
2  * JSON lexer
3  *
4  * Copyright IBM, Corp. 2009
5  *
6  * Authors:
7  *  Anthony Liguori   <[email protected]>
8  *
9  * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
10  * See the COPYING.LIB file in the top-level directory.
11  *
12  */
13
14 #include "qstring.h"
15 #include "qlist.h"
16 #include "qdict.h"
17 #include "qint.h"
18 #include "qemu-common.h"
19 #include "json-lexer.h"
20
21 /*
22  * \"([^\\\"]|(\\\"\\'\\\\\\/\\b\\f\\n\\r\\t\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]))*\"
23  * '([^\\']|(\\\"\\'\\\\\\/\\b\\f\\n\\r\\t\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]))*'
24  * 0|([1-9][0-9]*(.[0-9]+)?([eE]([-+])?[0-9]+))
25  * [{}\[\],:]
26  * [a-z]+
27  *
28  */
29
30 enum json_lexer_state {
31     ERROR = 0,
32     IN_DONE_STRING,
33     IN_DQ_UCODE3,
34     IN_DQ_UCODE2,
35     IN_DQ_UCODE1,
36     IN_DQ_UCODE0,
37     IN_DQ_STRING_ESCAPE,
38     IN_DQ_STRING,
39     IN_SQ_UCODE3,
40     IN_SQ_UCODE2,
41     IN_SQ_UCODE1,
42     IN_SQ_UCODE0,
43     IN_SQ_STRING_ESCAPE,
44     IN_SQ_STRING,
45     IN_ZERO,
46     IN_DIGITS,
47     IN_DIGIT,
48     IN_EXP_E,
49     IN_MANTISSA,
50     IN_MANTISSA_DIGITS,
51     IN_NONZERO_NUMBER,
52     IN_NEG_NONZERO_NUMBER,
53     IN_KEYWORD,
54     IN_ESCAPE,
55     IN_ESCAPE_L,
56     IN_ESCAPE_LL,
57     IN_ESCAPE_DONE,
58     IN_WHITESPACE,
59     IN_OPERATOR_DONE,
60     IN_START,
61 };
62
63 #define TERMINAL(state) [0 ... 0x7F] = (state)
64
65 static const uint8_t json_lexer[][256] =  {
66     [IN_DONE_STRING] = {
67         TERMINAL(JSON_STRING),
68     },
69
70     /* double quote string */
71     [IN_DQ_UCODE3] = {
72         ['0' ... '9'] = IN_DQ_STRING,
73         ['a' ... 'f'] = IN_DQ_STRING,
74         ['A' ... 'F'] = IN_DQ_STRING,
75     },
76     [IN_DQ_UCODE2] = {
77         ['0' ... '9'] = IN_DQ_UCODE3,
78         ['a' ... 'f'] = IN_DQ_UCODE3,
79         ['A' ... 'F'] = IN_DQ_UCODE3,
80     },
81     [IN_DQ_UCODE1] = {
82         ['0' ... '9'] = IN_DQ_UCODE2,
83         ['a' ... 'f'] = IN_DQ_UCODE2,
84         ['A' ... 'F'] = IN_DQ_UCODE2,
85     },
86     [IN_DQ_UCODE0] = {
87         ['0' ... '9'] = IN_DQ_UCODE1,
88         ['a' ... 'f'] = IN_DQ_UCODE1,
89         ['A' ... 'F'] = IN_DQ_UCODE1,
90     },
91     [IN_DQ_STRING_ESCAPE] = {
92         ['b'] = IN_DQ_STRING,
93         ['f'] =  IN_DQ_STRING,
94         ['n'] =  IN_DQ_STRING,
95         ['r'] =  IN_DQ_STRING,
96         ['t'] =  IN_DQ_STRING,
97         ['\''] = IN_DQ_STRING,
98         ['\"'] = IN_DQ_STRING,
99         ['u'] = IN_DQ_UCODE0,
100     },
101     [IN_DQ_STRING] = {
102         [1 ... 0xFF] = IN_DQ_STRING,
103         ['\\'] = IN_DQ_STRING_ESCAPE,
104         ['"'] = IN_DONE_STRING,
105     },
106
107     /* single quote string */
108     [IN_SQ_UCODE3] = {
109         ['0' ... '9'] = IN_SQ_STRING,
110         ['a' ... 'f'] = IN_SQ_STRING,
111         ['A' ... 'F'] = IN_SQ_STRING,
112     },
113     [IN_SQ_UCODE2] = {
114         ['0' ... '9'] = IN_SQ_UCODE3,
115         ['a' ... 'f'] = IN_SQ_UCODE3,
116         ['A' ... 'F'] = IN_SQ_UCODE3,
117     },
118     [IN_SQ_UCODE1] = {
119         ['0' ... '9'] = IN_SQ_UCODE2,
120         ['a' ... 'f'] = IN_SQ_UCODE2,
121         ['A' ... 'F'] = IN_SQ_UCODE2,
122     },
123     [IN_SQ_UCODE0] = {
124         ['0' ... '9'] = IN_SQ_UCODE1,
125         ['a' ... 'f'] = IN_SQ_UCODE1,
126         ['A' ... 'F'] = IN_SQ_UCODE1,
127     },
128     [IN_SQ_STRING_ESCAPE] = {
129         ['b'] = IN_SQ_STRING,
130         ['f'] =  IN_SQ_STRING,
131         ['n'] =  IN_SQ_STRING,
132         ['r'] =  IN_SQ_STRING,
133         ['t'] =  IN_SQ_STRING,
134         ['\''] = IN_SQ_STRING,
135         ['\"'] = IN_SQ_STRING,
136         ['u'] = IN_SQ_UCODE0,
137     },
138     [IN_SQ_STRING] = {
139         [1 ... 0xFF] = IN_SQ_STRING,
140         ['\\'] = IN_SQ_STRING_ESCAPE,
141         ['\''] = IN_DONE_STRING,
142     },
143
144     /* Zero */
145     [IN_ZERO] = {
146         TERMINAL(JSON_INTEGER),
147         ['0' ... '9'] = ERROR,
148         ['.'] = IN_MANTISSA,
149     },
150
151     /* Float */
152     [IN_DIGITS] = {
153         TERMINAL(JSON_FLOAT),
154         ['0' ... '9'] = IN_DIGITS,
155     },
156
157     [IN_DIGIT] = {
158         ['0' ... '9'] = IN_DIGITS,
159     },
160
161     [IN_EXP_E] = {
162         ['-'] = IN_DIGIT,
163         ['+'] = IN_DIGIT,
164         ['0' ... '9'] = IN_DIGITS,
165     },
166
167     [IN_MANTISSA_DIGITS] = {
168         TERMINAL(JSON_FLOAT),
169         ['0' ... '9'] = IN_MANTISSA_DIGITS,
170         ['e'] = IN_EXP_E,
171         ['E'] = IN_EXP_E,
172     },
173
174     [IN_MANTISSA] = {
175         ['0' ... '9'] = IN_MANTISSA_DIGITS,
176     },
177
178     /* Number */
179     [IN_NONZERO_NUMBER] = {
180         TERMINAL(JSON_INTEGER),
181         ['0' ... '9'] = IN_NONZERO_NUMBER,
182         ['e'] = IN_EXP_E,
183         ['E'] = IN_EXP_E,
184         ['.'] = IN_MANTISSA,
185     },
186
187     [IN_NEG_NONZERO_NUMBER] = {
188         ['0'] = IN_ZERO,
189         ['1' ... '9'] = IN_NONZERO_NUMBER,
190     },
191
192     /* keywords */
193     [IN_KEYWORD] = {
194         TERMINAL(JSON_KEYWORD),
195         ['a' ... 'z'] = IN_KEYWORD,
196     },
197
198     /* whitespace */
199     [IN_WHITESPACE] = {
200         TERMINAL(JSON_SKIP),
201         [' '] = IN_WHITESPACE,
202         ['\t'] = IN_WHITESPACE,
203         ['\r'] = IN_WHITESPACE,
204         ['\n'] = IN_WHITESPACE,
205     },        
206
207     /* operator */
208     [IN_OPERATOR_DONE] = {
209         TERMINAL(JSON_OPERATOR),
210     },
211
212     /* escape */
213     [IN_ESCAPE_DONE] = {
214         TERMINAL(JSON_ESCAPE),
215     },
216
217     [IN_ESCAPE_LL] = {
218         ['d'] = IN_ESCAPE_DONE,
219     },
220
221     [IN_ESCAPE_L] = {
222         ['d'] = IN_ESCAPE_DONE,
223         ['l'] = IN_ESCAPE_LL,
224     },
225
226     [IN_ESCAPE] = {
227         ['d'] = IN_ESCAPE_DONE,
228         ['i'] = IN_ESCAPE_DONE,
229         ['p'] = IN_ESCAPE_DONE,
230         ['s'] = IN_ESCAPE_DONE,
231         ['f'] = IN_ESCAPE_DONE,
232         ['l'] = IN_ESCAPE_L,
233     },
234
235     /* top level rule */
236     [IN_START] = {
237         ['"'] = IN_DQ_STRING,
238         ['\''] = IN_SQ_STRING,
239         ['0'] = IN_ZERO,
240         ['1' ... '9'] = IN_NONZERO_NUMBER,
241         ['-'] = IN_NEG_NONZERO_NUMBER,
242         ['{'] = IN_OPERATOR_DONE,
243         ['}'] = IN_OPERATOR_DONE,
244         ['['] = IN_OPERATOR_DONE,
245         [']'] = IN_OPERATOR_DONE,
246         [','] = IN_OPERATOR_DONE,
247         [':'] = IN_OPERATOR_DONE,
248         ['a' ... 'z'] = IN_KEYWORD,
249         ['%'] = IN_ESCAPE,
250         [' '] = IN_WHITESPACE,
251         ['\t'] = IN_WHITESPACE,
252         ['\r'] = IN_WHITESPACE,
253         ['\n'] = IN_WHITESPACE,
254     },
255 };
256
257 void json_lexer_init(JSONLexer *lexer, JSONLexerEmitter func)
258 {
259     lexer->emit = func;
260     lexer->state = IN_START;
261     lexer->token = qstring_new();
262 }
263
264 static int json_lexer_feed_char(JSONLexer *lexer, char ch)
265 {
266     char buf[2];
267
268     lexer->x++;
269     if (ch == '\n') {
270         lexer->x = 0;
271         lexer->y++;
272     }
273
274     lexer->state = json_lexer[lexer->state][(uint8_t)ch];
275
276     switch (lexer->state) {
277     case JSON_OPERATOR:
278     case JSON_ESCAPE:
279     case JSON_INTEGER:
280     case JSON_FLOAT:
281     case JSON_KEYWORD:
282     case JSON_STRING:
283         lexer->emit(lexer, lexer->token, lexer->state, lexer->x, lexer->y);
284     case JSON_SKIP:
285         lexer->state = json_lexer[IN_START][(uint8_t)ch];
286         QDECREF(lexer->token);
287         lexer->token = qstring_new();
288         break;
289     case ERROR:
290         return -EINVAL;
291     default:
292         break;
293     }
294
295     buf[0] = ch;
296     buf[1] = 0;
297
298     qstring_append(lexer->token, buf);
299
300     return 0;
301 }
302
303 int json_lexer_feed(JSONLexer *lexer, const char *buffer, size_t size)
304 {
305     size_t i;
306
307     for (i = 0; i < size; i++) {
308         int err;
309
310         err = json_lexer_feed_char(lexer, buffer[i]);
311         if (err < 0) {
312             return err;
313         }
314     }
315
316     return 0;
317 }
318
319 int json_lexer_flush(JSONLexer *lexer)
320 {
321     return json_lexer_feed_char(lexer, 0);
322 }
323
324 void json_lexer_destroy(JSONLexer *lexer)
325 {
326     QDECREF(lexer->token);
327 }
This page took 0.043343 seconds and 4 git commands to generate.