]>
Commit | Line | Data |
---|---|---|
568ae7ef RH |
1 | #!/usr/bin/env python |
2 | # Copyright (c) 2018 Linaro Limited | |
3 | # | |
4 | # This library is free software; you can redistribute it and/or | |
5 | # modify it under the terms of the GNU Lesser General Public | |
6 | # License as published by the Free Software Foundation; either | |
7 | # version 2 of the License, or (at your option) any later version. | |
8 | # | |
9 | # This library is distributed in the hope that it will be useful, | |
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
12 | # Lesser General Public License for more details. | |
13 | # | |
14 | # You should have received a copy of the GNU Lesser General Public | |
15 | # License along with this library; if not, see <http://www.gnu.org/licenses/>. | |
16 | # | |
17 | ||
18 | # | |
19 | # Generate a decoding tree from a specification file. | |
20 | # | |
21 | # The tree is built from instruction "patterns". A pattern may represent | |
22 | # a single architectural instruction or a group of same, depending on what | |
23 | # is convenient for further processing. | |
24 | # | |
25 | # Each pattern has "fixedbits" & "fixedmask", the combination of which | |
26 | # describes the condition under which the pattern is matched: | |
27 | # | |
28 | # (insn & fixedmask) == fixedbits | |
29 | # | |
30 | # Each pattern may have "fields", which are extracted from the insn and | |
31 | # passed along to the translator. Examples of such are registers, | |
32 | # immediates, and sub-opcodes. | |
33 | # | |
34 | # In support of patterns, one may declare fields, argument sets, and | |
35 | # formats, each of which may be re-used to simplify further definitions. | |
36 | # | |
37 | # *** Field syntax: | |
38 | # | |
39 | # field_def := '%' identifier ( unnamed_field )+ ( !function=identifier )? | |
40 | # unnamed_field := number ':' ( 's' ) number | |
41 | # | |
42 | # For unnamed_field, the first number is the least-significant bit position of | |
43 | # the field and the second number is the length of the field. If the 's' is | |
44 | # present, the field is considered signed. If multiple unnamed_fields are | |
45 | # present, they are concatenated. In this way one can define disjoint fields. | |
46 | # | |
47 | # If !function is specified, the concatenated result is passed through the | |
48 | # named function, taking and returning an integral value. | |
49 | # | |
50 | # FIXME: the fields of the structure into which this result will be stored | |
51 | # is restricted to "int". Which means that we cannot expand 64-bit items. | |
52 | # | |
53 | # Field examples: | |
54 | # | |
55 | # %disp 0:s16 -- sextract(i, 0, 16) | |
56 | # %imm9 16:6 10:3 -- extract(i, 16, 6) << 3 | extract(i, 10, 3) | |
57 | # %disp12 0:s1 1:1 2:10 -- sextract(i, 0, 1) << 11 | |
58 | # | extract(i, 1, 1) << 10 | |
59 | # | extract(i, 2, 10) | |
60 | # %shimm8 5:s8 13:1 !function=expand_shimm8 | |
61 | # -- expand_shimm8(sextract(i, 5, 8) << 1 | |
62 | # | extract(i, 13, 1)) | |
63 | # | |
64 | # *** Argument set syntax: | |
65 | # | |
66 | # args_def := '&' identifier ( args_elt )+ | |
67 | # args_elt := identifier | |
68 | # | |
69 | # Each args_elt defines an argument within the argument set. | |
70 | # Each argument set will be rendered as a C structure "arg_$name" | |
71 | # with each of the fields being one of the member arguments. | |
72 | # | |
73 | # Argument set examples: | |
74 | # | |
75 | # ®3 ra rb rc | |
76 | # &loadstore reg base offset | |
77 | # | |
78 | # *** Format syntax: | |
79 | # | |
80 | # fmt_def := '@' identifier ( fmt_elt )+ | |
81 | # fmt_elt := fixedbit_elt | field_elt | field_ref | args_ref | |
82 | # fixedbit_elt := [01.-]+ | |
83 | # field_elt := identifier ':' 's'? number | |
84 | # field_ref := '%' identifier | identifier '=' '%' identifier | |
85 | # args_ref := '&' identifier | |
86 | # | |
87 | # Defining a format is a handy way to avoid replicating groups of fields | |
88 | # across many instruction patterns. | |
89 | # | |
90 | # A fixedbit_elt describes a contiguous sequence of bits that must | |
91 | # be 1, 0, [.-] for don't care. The difference between '.' and '-' | |
92 | # is that '.' means that the bit will be covered with a field or a | |
93 | # final [01] from the pattern, and '-' means that the bit is really | |
94 | # ignored by the cpu and will not be specified. | |
95 | # | |
96 | # A field_elt describes a simple field only given a width; the position of | |
97 | # the field is implied by its position with respect to other fixedbit_elt | |
98 | # and field_elt. | |
99 | # | |
100 | # If any fixedbit_elt or field_elt appear then all bits must be defined. | |
101 | # Padding with a fixedbit_elt of all '.' is an easy way to accomplish that. | |
102 | # | |
103 | # A field_ref incorporates a field by reference. This is the only way to | |
104 | # add a complex field to a format. A field may be renamed in the process | |
105 | # via assignment to another identifier. This is intended to allow the | |
106 | # same argument set be used with disjoint named fields. | |
107 | # | |
108 | # A single args_ref may specify an argument set to use for the format. | |
109 | # The set of fields in the format must be a subset of the arguments in | |
110 | # the argument set. If an argument set is not specified, one will be | |
111 | # inferred from the set of fields. | |
112 | # | |
113 | # It is recommended, but not required, that all field_ref and args_ref | |
114 | # appear at the end of the line, not interleaving with fixedbit_elf or | |
115 | # field_elt. | |
116 | # | |
117 | # Format examples: | |
118 | # | |
119 | # @opr ...... ra:5 rb:5 ... 0 ....... rc:5 | |
120 | # @opi ...... ra:5 lit:8 1 ....... rc:5 | |
121 | # | |
122 | # *** Pattern syntax: | |
123 | # | |
124 | # pat_def := identifier ( pat_elt )+ | |
125 | # pat_elt := fixedbit_elt | field_elt | field_ref | |
126 | # | args_ref | fmt_ref | const_elt | |
127 | # fmt_ref := '@' identifier | |
128 | # const_elt := identifier '=' number | |
129 | # | |
130 | # The fixedbit_elt and field_elt specifiers are unchanged from formats. | |
131 | # A pattern that does not specify a named format will have one inferred | |
132 | # from a referenced argument set (if present) and the set of fields. | |
133 | # | |
134 | # A const_elt allows a argument to be set to a constant value. This may | |
135 | # come in handy when fields overlap between patterns and one has to | |
136 | # include the values in the fixedbit_elt instead. | |
137 | # | |
138 | # The decoder will call a translator function for each pattern matched. | |
139 | # | |
140 | # Pattern examples: | |
141 | # | |
142 | # addl_r 010000 ..... ..... .... 0000000 ..... @opr | |
143 | # addl_i 010000 ..... ..... .... 0000000 ..... @opi | |
144 | # | |
145 | # which will, in part, invoke | |
146 | # | |
147 | # trans_addl_r(ctx, &arg_opr, insn) | |
148 | # and | |
149 | # trans_addl_i(ctx, &arg_opi, insn) | |
150 | # | |
151 | ||
152 | import io | |
153 | import os | |
154 | import re | |
155 | import sys | |
156 | import getopt | |
157 | import pdb | |
158 | ||
159 | insnwidth = 32 | |
160 | insnmask = 0xffffffff | |
161 | fields = {} | |
162 | arguments = {} | |
163 | formats = {} | |
164 | patterns = [] | |
165 | ||
166 | translate_prefix = 'trans' | |
167 | translate_scope = 'static ' | |
168 | input_file = '' | |
169 | output_file = None | |
170 | output_fd = None | |
171 | insntype = 'uint32_t' | |
172 | ||
173 | re_ident = '[a-zA-Z][a-zA-Z0-9_]*' | |
174 | ||
175 | ||
176 | def error(lineno, *args): | |
177 | """Print an error message from file:line and args and exit.""" | |
178 | global output_file | |
179 | global output_fd | |
180 | ||
181 | if lineno: | |
182 | r = '{0}:{1}: error:'.format(input_file, lineno) | |
183 | elif input_file: | |
184 | r = '{0}: error:'.format(input_file) | |
185 | else: | |
186 | r = 'error:' | |
187 | for a in args: | |
188 | r += ' ' + str(a) | |
189 | r += '\n' | |
190 | sys.stderr.write(r) | |
191 | if output_file and output_fd: | |
192 | output_fd.close() | |
193 | os.remove(output_file) | |
194 | exit(1) | |
195 | ||
196 | ||
197 | def output(*args): | |
198 | global output_fd | |
199 | for a in args: | |
200 | output_fd.write(a) | |
201 | ||
202 | ||
203 | if sys.version_info >= (3, 0): | |
204 | re_fullmatch = re.fullmatch | |
205 | else: | |
206 | def re_fullmatch(pat, str): | |
207 | return re.match('^' + pat + '$', str) | |
208 | ||
209 | ||
210 | def output_autogen(): | |
211 | output('/* This file is autogenerated by scripts/decodetree.py. */\n\n') | |
212 | ||
213 | ||
214 | def str_indent(c): | |
215 | """Return a string with C spaces""" | |
216 | return ' ' * c | |
217 | ||
218 | ||
219 | def str_fields(fields): | |
220 | """Return a string uniquely identifing FIELDS""" | |
221 | r = '' | |
222 | for n in sorted(fields.keys()): | |
223 | r += '_' + n | |
224 | return r[1:] | |
225 | ||
226 | ||
227 | def str_match_bits(bits, mask): | |
228 | """Return a string pretty-printing BITS/MASK""" | |
229 | global insnwidth | |
230 | ||
231 | i = 1 << (insnwidth - 1) | |
232 | space = 0x01010100 | |
233 | r = '' | |
234 | while i != 0: | |
235 | if i & mask: | |
236 | if i & bits: | |
237 | r += '1' | |
238 | else: | |
239 | r += '0' | |
240 | else: | |
241 | r += '.' | |
242 | if i & space: | |
243 | r += ' ' | |
244 | i >>= 1 | |
245 | return r | |
246 | ||
247 | ||
248 | def is_pow2(x): | |
249 | """Return true iff X is equal to a power of 2.""" | |
250 | return (x & (x - 1)) == 0 | |
251 | ||
252 | ||
253 | def ctz(x): | |
254 | """Return the number of times 2 factors into X.""" | |
255 | r = 0 | |
256 | while ((x >> r) & 1) == 0: | |
257 | r += 1 | |
258 | return r | |
259 | ||
260 | ||
261 | def is_contiguous(bits): | |
262 | shift = ctz(bits) | |
263 | if is_pow2((bits >> shift) + 1): | |
264 | return shift | |
265 | else: | |
266 | return -1 | |
267 | ||
268 | ||
269 | def eq_fields_for_args(flds_a, flds_b): | |
270 | if len(flds_a) != len(flds_b): | |
271 | return False | |
272 | for k, a in flds_a.items(): | |
273 | if k not in flds_b: | |
274 | return False | |
275 | return True | |
276 | ||
277 | ||
278 | def eq_fields_for_fmts(flds_a, flds_b): | |
279 | if len(flds_a) != len(flds_b): | |
280 | return False | |
281 | for k, a in flds_a.items(): | |
282 | if k not in flds_b: | |
283 | return False | |
284 | b = flds_b[k] | |
285 | if a.__class__ != b.__class__ or a != b: | |
286 | return False | |
287 | return True | |
288 | ||
289 | ||
290 | class Field: | |
291 | """Class representing a simple instruction field""" | |
292 | def __init__(self, sign, pos, len): | |
293 | self.sign = sign | |
294 | self.pos = pos | |
295 | self.len = len | |
296 | self.mask = ((1 << len) - 1) << pos | |
297 | ||
298 | def __str__(self): | |
299 | if self.sign: | |
300 | s = 's' | |
301 | else: | |
302 | s = '' | |
303 | return str(pos) + ':' + s + str(len) | |
304 | ||
305 | def str_extract(self): | |
306 | if self.sign: | |
307 | extr = 'sextract32' | |
308 | else: | |
309 | extr = 'extract32' | |
310 | return '{0}(insn, {1}, {2})'.format(extr, self.pos, self.len) | |
311 | ||
312 | def __eq__(self, other): | |
313 | return self.sign == other.sign and self.sign == other.sign | |
314 | ||
315 | def __ne__(self, other): | |
316 | return not self.__eq__(other) | |
317 | # end Field | |
318 | ||
319 | ||
320 | class MultiField: | |
321 | """Class representing a compound instruction field""" | |
322 | def __init__(self, subs, mask): | |
323 | self.subs = subs | |
324 | self.sign = subs[0].sign | |
325 | self.mask = mask | |
326 | ||
327 | def __str__(self): | |
328 | return str(self.subs) | |
329 | ||
330 | def str_extract(self): | |
331 | ret = '0' | |
332 | pos = 0 | |
333 | for f in reversed(self.subs): | |
334 | if pos == 0: | |
335 | ret = f.str_extract() | |
336 | else: | |
337 | ret = 'deposit32({0}, {1}, {2}, {3})' \ | |
338 | .format(ret, pos, 32 - pos, f.str_extract()) | |
339 | pos += f.len | |
340 | return ret | |
341 | ||
342 | def __ne__(self, other): | |
343 | if len(self.subs) != len(other.subs): | |
344 | return True | |
345 | for a, b in zip(self.subs, other.subs): | |
346 | if a.__class__ != b.__class__ or a != b: | |
347 | return True | |
348 | return False | |
349 | ||
350 | def __eq__(self, other): | |
351 | return not self.__ne__(other) | |
352 | # end MultiField | |
353 | ||
354 | ||
355 | class ConstField: | |
356 | """Class representing an argument field with constant value""" | |
357 | def __init__(self, value): | |
358 | self.value = value | |
359 | self.mask = 0 | |
360 | self.sign = value < 0 | |
361 | ||
362 | def __str__(self): | |
363 | return str(self.value) | |
364 | ||
365 | def str_extract(self): | |
366 | return str(self.value) | |
367 | ||
368 | def __cmp__(self, other): | |
369 | return self.value - other.value | |
370 | # end ConstField | |
371 | ||
372 | ||
373 | class FunctionField: | |
374 | """Class representing a field passed through an expander""" | |
375 | def __init__(self, func, base): | |
376 | self.mask = base.mask | |
377 | self.sign = base.sign | |
378 | self.base = base | |
379 | self.func = func | |
380 | ||
381 | def __str__(self): | |
382 | return self.func + '(' + str(self.base) + ')' | |
383 | ||
384 | def str_extract(self): | |
385 | return self.func + '(' + self.base.str_extract() + ')' | |
386 | ||
387 | def __eq__(self, other): | |
388 | return self.func == other.func and self.base == other.base | |
389 | ||
390 | def __ne__(self, other): | |
391 | return not self.__eq__(other) | |
392 | # end FunctionField | |
393 | ||
394 | ||
395 | class Arguments: | |
396 | """Class representing the extracted fields of a format""" | |
397 | def __init__(self, nm, flds): | |
398 | self.name = nm | |
399 | self.fields = sorted(flds) | |
400 | ||
401 | def __str__(self): | |
402 | return self.name + ' ' + str(self.fields) | |
403 | ||
404 | def struct_name(self): | |
405 | return 'arg_' + self.name | |
406 | ||
407 | def output_def(self): | |
408 | output('typedef struct {\n') | |
409 | for n in self.fields: | |
410 | output(' int ', n, ';\n') | |
411 | output('} ', self.struct_name(), ';\n\n') | |
412 | # end Arguments | |
413 | ||
414 | ||
415 | class General: | |
416 | """Common code between instruction formats and instruction patterns""" | |
417 | def __init__(self, name, lineno, base, fixb, fixm, udfm, fldm, flds): | |
418 | self.name = name | |
419 | self.lineno = lineno | |
420 | self.base = base | |
421 | self.fixedbits = fixb | |
422 | self.fixedmask = fixm | |
423 | self.undefmask = udfm | |
424 | self.fieldmask = fldm | |
425 | self.fields = flds | |
426 | ||
427 | def __str__(self): | |
428 | r = self.name | |
429 | if self.base: | |
430 | r = r + ' ' + self.base.name | |
431 | else: | |
432 | r = r + ' ' + str(self.fields) | |
433 | r = r + ' ' + str_match_bits(self.fixedbits, self.fixedmask) | |
434 | return r | |
435 | ||
436 | def str1(self, i): | |
437 | return str_indent(i) + self.__str__() | |
438 | # end General | |
439 | ||
440 | ||
441 | class Format(General): | |
442 | """Class representing an instruction format""" | |
443 | ||
444 | def extract_name(self): | |
445 | return 'extract_' + self.name | |
446 | ||
447 | def output_extract(self): | |
448 | output('static void ', self.extract_name(), '(', | |
449 | self.base.struct_name(), ' *a, ', insntype, ' insn)\n{\n') | |
450 | for n, f in self.fields.items(): | |
451 | output(' a->', n, ' = ', f.str_extract(), ';\n') | |
452 | output('}\n\n') | |
453 | # end Format | |
454 | ||
455 | ||
456 | class Pattern(General): | |
457 | """Class representing an instruction pattern""" | |
458 | ||
459 | def output_decl(self): | |
460 | global translate_scope | |
461 | global translate_prefix | |
462 | output('typedef ', self.base.base.struct_name(), | |
463 | ' arg_', self.name, ';\n') | |
76805598 | 464 | output(translate_scope, 'bool ', translate_prefix, '_', self.name, |
568ae7ef RH |
465 | '(DisasContext *ctx, arg_', self.name, |
466 | ' *a, ', insntype, ' insn);\n') | |
467 | ||
468 | def output_code(self, i, extracted, outerbits, outermask): | |
469 | global translate_prefix | |
470 | ind = str_indent(i) | |
471 | arg = self.base.base.name | |
472 | output(ind, '/* line ', str(self.lineno), ' */\n') | |
473 | if not extracted: | |
474 | output(ind, self.base.extract_name(), '(&u.f_', arg, ', insn);\n') | |
475 | for n, f in self.fields.items(): | |
476 | output(ind, 'u.f_', arg, '.', n, ' = ', f.str_extract(), ';\n') | |
76805598 | 477 | output(ind, 'return ', translate_prefix, '_', self.name, |
568ae7ef | 478 | '(ctx, &u.f_', arg, ', insn);\n') |
568ae7ef RH |
479 | # end Pattern |
480 | ||
481 | ||
482 | def parse_field(lineno, name, toks): | |
483 | """Parse one instruction field from TOKS at LINENO""" | |
484 | global fields | |
485 | global re_ident | |
486 | global insnwidth | |
487 | ||
488 | # A "simple" field will have only one entry; | |
489 | # a "multifield" will have several. | |
490 | subs = [] | |
491 | width = 0 | |
492 | func = None | |
493 | for t in toks: | |
494 | if re_fullmatch('!function=' + re_ident, t): | |
495 | if func: | |
496 | error(lineno, 'duplicate function') | |
497 | func = t.split('=') | |
498 | func = func[1] | |
499 | continue | |
500 | ||
501 | if re_fullmatch('[0-9]+:s[0-9]+', t): | |
502 | # Signed field extract | |
503 | subtoks = t.split(':s') | |
504 | sign = True | |
505 | elif re_fullmatch('[0-9]+:[0-9]+', t): | |
506 | # Unsigned field extract | |
507 | subtoks = t.split(':') | |
508 | sign = False | |
509 | else: | |
510 | error(lineno, 'invalid field token "{0}"'.format(t)) | |
511 | po = int(subtoks[0]) | |
512 | le = int(subtoks[1]) | |
513 | if po + le > insnwidth: | |
514 | error(lineno, 'field {0} too large'.format(t)) | |
515 | f = Field(sign, po, le) | |
516 | subs.append(f) | |
517 | width += le | |
518 | ||
519 | if width > insnwidth: | |
520 | error(lineno, 'field too large') | |
521 | if len(subs) == 1: | |
522 | f = subs[0] | |
523 | else: | |
524 | mask = 0 | |
525 | for s in subs: | |
526 | if mask & s.mask: | |
527 | error(lineno, 'field components overlap') | |
528 | mask |= s.mask | |
529 | f = MultiField(subs, mask) | |
530 | if func: | |
531 | f = FunctionField(func, f) | |
532 | ||
533 | if name in fields: | |
534 | error(lineno, 'duplicate field', name) | |
535 | fields[name] = f | |
536 | # end parse_field | |
537 | ||
538 | ||
539 | def parse_arguments(lineno, name, toks): | |
540 | """Parse one argument set from TOKS at LINENO""" | |
541 | global arguments | |
542 | global re_ident | |
543 | ||
544 | flds = [] | |
545 | for t in toks: | |
546 | if not re_fullmatch(re_ident, t): | |
547 | error(lineno, 'invalid argument set token "{0}"'.format(t)) | |
548 | if t in flds: | |
549 | error(lineno, 'duplicate argument "{0}"'.format(t)) | |
550 | flds.append(t) | |
551 | ||
552 | if name in arguments: | |
553 | error(lineno, 'duplicate argument set', name) | |
554 | arguments[name] = Arguments(name, flds) | |
555 | # end parse_arguments | |
556 | ||
557 | ||
558 | def lookup_field(lineno, name): | |
559 | global fields | |
560 | if name in fields: | |
561 | return fields[name] | |
562 | error(lineno, 'undefined field', name) | |
563 | ||
564 | ||
565 | def add_field(lineno, flds, new_name, f): | |
566 | if new_name in flds: | |
567 | error(lineno, 'duplicate field', new_name) | |
568 | flds[new_name] = f | |
569 | return flds | |
570 | ||
571 | ||
572 | def add_field_byname(lineno, flds, new_name, old_name): | |
573 | return add_field(lineno, flds, new_name, lookup_field(lineno, old_name)) | |
574 | ||
575 | ||
576 | def infer_argument_set(flds): | |
577 | global arguments | |
578 | ||
579 | for arg in arguments.values(): | |
580 | if eq_fields_for_args(flds, arg.fields): | |
581 | return arg | |
582 | ||
583 | name = str(len(arguments)) | |
584 | arg = Arguments(name, flds.keys()) | |
585 | arguments[name] = arg | |
586 | return arg | |
587 | ||
588 | ||
589 | def infer_format(arg, fieldmask, flds): | |
590 | global arguments | |
591 | global formats | |
592 | ||
593 | const_flds = {} | |
594 | var_flds = {} | |
595 | for n, c in flds.items(): | |
596 | if c is ConstField: | |
597 | const_flds[n] = c | |
598 | else: | |
599 | var_flds[n] = c | |
600 | ||
601 | # Look for an existing format with the same argument set and fields | |
602 | for fmt in formats.values(): | |
603 | if arg and fmt.base != arg: | |
604 | continue | |
605 | if fieldmask != fmt.fieldmask: | |
606 | continue | |
607 | if not eq_fields_for_fmts(flds, fmt.fields): | |
608 | continue | |
609 | return (fmt, const_flds) | |
610 | ||
611 | name = 'Fmt_' + str(len(formats)) | |
612 | if not arg: | |
613 | arg = infer_argument_set(flds) | |
614 | ||
615 | fmt = Format(name, 0, arg, 0, 0, 0, fieldmask, var_flds) | |
616 | formats[name] = fmt | |
617 | ||
618 | return (fmt, const_flds) | |
619 | # end infer_format | |
620 | ||
621 | ||
622 | def parse_generic(lineno, is_format, name, toks): | |
623 | """Parse one instruction format from TOKS at LINENO""" | |
624 | global fields | |
625 | global arguments | |
626 | global formats | |
627 | global patterns | |
628 | global re_ident | |
629 | global insnwidth | |
630 | global insnmask | |
631 | ||
632 | fixedmask = 0 | |
633 | fixedbits = 0 | |
634 | undefmask = 0 | |
635 | width = 0 | |
636 | flds = {} | |
637 | arg = None | |
638 | fmt = None | |
639 | for t in toks: | |
640 | # '&Foo' gives a format an explcit argument set. | |
641 | if t[0] == '&': | |
642 | tt = t[1:] | |
643 | if arg: | |
644 | error(lineno, 'multiple argument sets') | |
645 | if tt in arguments: | |
646 | arg = arguments[tt] | |
647 | else: | |
648 | error(lineno, 'undefined argument set', t) | |
649 | continue | |
650 | ||
651 | # '@Foo' gives a pattern an explicit format. | |
652 | if t[0] == '@': | |
653 | tt = t[1:] | |
654 | if fmt: | |
655 | error(lineno, 'multiple formats') | |
656 | if tt in formats: | |
657 | fmt = formats[tt] | |
658 | else: | |
659 | error(lineno, 'undefined format', t) | |
660 | continue | |
661 | ||
662 | # '%Foo' imports a field. | |
663 | if t[0] == '%': | |
664 | tt = t[1:] | |
665 | flds = add_field_byname(lineno, flds, tt, tt) | |
666 | continue | |
667 | ||
668 | # 'Foo=%Bar' imports a field with a different name. | |
669 | if re_fullmatch(re_ident + '=%' + re_ident, t): | |
670 | (fname, iname) = t.split('=%') | |
671 | flds = add_field_byname(lineno, flds, fname, iname) | |
672 | continue | |
673 | ||
674 | # 'Foo=number' sets an argument field to a constant value | |
675 | if re_fullmatch(re_ident + '=[0-9]+', t): | |
676 | (fname, value) = t.split('=') | |
677 | value = int(value) | |
678 | flds = add_field(lineno, flds, fname, ConstField(value)) | |
679 | continue | |
680 | ||
681 | # Pattern of 0s, 1s, dots and dashes indicate required zeros, | |
682 | # required ones, or dont-cares. | |
683 | if re_fullmatch('[01.-]+', t): | |
684 | shift = len(t) | |
685 | fms = t.replace('0', '1') | |
686 | fms = fms.replace('.', '0') | |
687 | fms = fms.replace('-', '0') | |
688 | fbs = t.replace('.', '0') | |
689 | fbs = fbs.replace('-', '0') | |
690 | ubm = t.replace('1', '0') | |
691 | ubm = ubm.replace('.', '0') | |
692 | ubm = ubm.replace('-', '1') | |
693 | fms = int(fms, 2) | |
694 | fbs = int(fbs, 2) | |
695 | ubm = int(ubm, 2) | |
696 | fixedbits = (fixedbits << shift) | fbs | |
697 | fixedmask = (fixedmask << shift) | fms | |
698 | undefmask = (undefmask << shift) | ubm | |
699 | # Otherwise, fieldname:fieldwidth | |
700 | elif re_fullmatch(re_ident + ':s?[0-9]+', t): | |
701 | (fname, flen) = t.split(':') | |
702 | sign = False | |
703 | if flen[0] == 's': | |
704 | sign = True | |
705 | flen = flen[1:] | |
706 | shift = int(flen, 10) | |
707 | f = Field(sign, insnwidth - width - shift, shift) | |
708 | flds = add_field(lineno, flds, fname, f) | |
709 | fixedbits <<= shift | |
710 | fixedmask <<= shift | |
711 | undefmask <<= shift | |
712 | else: | |
713 | error(lineno, 'invalid token "{0}"'.format(t)) | |
714 | width += shift | |
715 | ||
716 | # We should have filled in all of the bits of the instruction. | |
717 | if not (is_format and width == 0) and width != insnwidth: | |
718 | error(lineno, 'definition has {0} bits'.format(width)) | |
719 | ||
720 | # Do not check for fields overlaping fields; one valid usage | |
721 | # is to be able to duplicate fields via import. | |
722 | fieldmask = 0 | |
723 | for f in flds.values(): | |
724 | fieldmask |= f.mask | |
725 | ||
726 | # Fix up what we've parsed to match either a format or a pattern. | |
727 | if is_format: | |
728 | # Formats cannot reference formats. | |
729 | if fmt: | |
730 | error(lineno, 'format referencing format') | |
731 | # If an argument set is given, then there should be no fields | |
732 | # without a place to store it. | |
733 | if arg: | |
734 | for f in flds.keys(): | |
735 | if f not in arg.fields: | |
736 | error(lineno, 'field {0} not in argument set {1}' | |
737 | .format(f, arg.name)) | |
738 | else: | |
739 | arg = infer_argument_set(flds) | |
740 | if name in formats: | |
741 | error(lineno, 'duplicate format name', name) | |
742 | fmt = Format(name, lineno, arg, fixedbits, fixedmask, | |
743 | undefmask, fieldmask, flds) | |
744 | formats[name] = fmt | |
745 | else: | |
746 | # Patterns can reference a format ... | |
747 | if fmt: | |
748 | # ... but not an argument simultaneously | |
749 | if arg: | |
750 | error(lineno, 'pattern specifies both format and argument set') | |
751 | if fixedmask & fmt.fixedmask: | |
752 | error(lineno, 'pattern fixed bits overlap format fixed bits') | |
753 | fieldmask |= fmt.fieldmask | |
754 | fixedbits |= fmt.fixedbits | |
755 | fixedmask |= fmt.fixedmask | |
756 | undefmask |= fmt.undefmask | |
757 | else: | |
758 | (fmt, flds) = infer_format(arg, fieldmask, flds) | |
759 | arg = fmt.base | |
760 | for f in flds.keys(): | |
761 | if f not in arg.fields: | |
762 | error(lineno, 'field {0} not in argument set {1}' | |
763 | .format(f, arg.name)) | |
764 | if f in fmt.fields.keys(): | |
765 | error(lineno, 'field {0} set by format and pattern'.format(f)) | |
766 | for f in arg.fields: | |
767 | if f not in flds.keys() and f not in fmt.fields.keys(): | |
768 | error(lineno, 'field {0} not initialized'.format(f)) | |
769 | pat = Pattern(name, lineno, fmt, fixedbits, fixedmask, | |
770 | undefmask, fieldmask, flds) | |
771 | patterns.append(pat) | |
772 | ||
773 | # Validate the masks that we have assembled. | |
774 | if fieldmask & fixedmask: | |
775 | error(lineno, 'fieldmask overlaps fixedmask (0x{0:08x} & 0x{1:08x})' | |
776 | .format(fieldmask, fixedmask)) | |
777 | if fieldmask & undefmask: | |
778 | error(lineno, 'fieldmask overlaps undefmask (0x{0:08x} & 0x{1:08x})' | |
779 | .format(fieldmask, undefmask)) | |
780 | if fixedmask & undefmask: | |
781 | error(lineno, 'fixedmask overlaps undefmask (0x{0:08x} & 0x{1:08x})' | |
782 | .format(fixedmask, undefmask)) | |
783 | if not is_format: | |
784 | allbits = fieldmask | fixedmask | undefmask | |
785 | if allbits != insnmask: | |
786 | error(lineno, 'bits left unspecified (0x{0:08x})' | |
787 | .format(allbits ^ insnmask)) | |
788 | # end parse_general | |
789 | ||
790 | ||
791 | def parse_file(f): | |
792 | """Parse all of the patterns within a file""" | |
793 | ||
794 | # Read all of the lines of the file. Concatenate lines | |
795 | # ending in backslash; discard empty lines and comments. | |
796 | toks = [] | |
797 | lineno = 0 | |
798 | for line in f: | |
799 | lineno += 1 | |
800 | ||
801 | # Discard comments | |
802 | end = line.find('#') | |
803 | if end >= 0: | |
804 | line = line[:end] | |
805 | ||
806 | t = line.split() | |
807 | if len(toks) != 0: | |
808 | # Next line after continuation | |
809 | toks.extend(t) | |
810 | elif len(t) == 0: | |
811 | # Empty line | |
812 | continue | |
813 | else: | |
814 | toks = t | |
815 | ||
816 | # Continuation? | |
817 | if toks[-1] == '\\': | |
818 | toks.pop() | |
819 | continue | |
820 | ||
821 | if len(toks) < 2: | |
822 | error(lineno, 'short line') | |
823 | ||
824 | name = toks[0] | |
825 | del toks[0] | |
826 | ||
827 | # Determine the type of object needing to be parsed. | |
828 | if name[0] == '%': | |
829 | parse_field(lineno, name[1:], toks) | |
830 | elif name[0] == '&': | |
831 | parse_arguments(lineno, name[1:], toks) | |
832 | elif name[0] == '@': | |
833 | parse_generic(lineno, True, name[1:], toks) | |
834 | else: | |
835 | parse_generic(lineno, False, name, toks) | |
836 | toks = [] | |
837 | # end parse_file | |
838 | ||
839 | ||
840 | class Tree: | |
841 | """Class representing a node in a decode tree""" | |
842 | ||
843 | def __init__(self, fm, tm): | |
844 | self.fixedmask = fm | |
845 | self.thismask = tm | |
846 | self.subs = [] | |
847 | self.base = None | |
848 | ||
849 | def str1(self, i): | |
850 | ind = str_indent(i) | |
851 | r = '{0}{1:08x}'.format(ind, self.fixedmask) | |
852 | if self.format: | |
853 | r += ' ' + self.format.name | |
854 | r += ' [\n' | |
855 | for (b, s) in self.subs: | |
856 | r += '{0} {1:08x}:\n'.format(ind, b) | |
857 | r += s.str1(i + 4) + '\n' | |
858 | r += ind + ']' | |
859 | return r | |
860 | ||
861 | def __str__(self): | |
862 | return self.str1(0) | |
863 | ||
864 | def output_code(self, i, extracted, outerbits, outermask): | |
865 | ind = str_indent(i) | |
866 | ||
867 | # If we identified all nodes below have the same format, | |
868 | # extract the fields now. | |
869 | if not extracted and self.base: | |
870 | output(ind, self.base.extract_name(), | |
871 | '(&u.f_', self.base.base.name, ', insn);\n') | |
872 | extracted = True | |
873 | ||
874 | # Attempt to aid the compiler in producing compact switch statements. | |
875 | # If the bits in the mask are contiguous, extract them. | |
876 | sh = is_contiguous(self.thismask) | |
877 | if sh > 0: | |
878 | # Propagate SH down into the local functions. | |
879 | def str_switch(b, sh=sh): | |
880 | return '(insn >> {0}) & 0x{1:x}'.format(sh, b >> sh) | |
881 | ||
882 | def str_case(b, sh=sh): | |
883 | return '0x{0:x}'.format(b >> sh) | |
884 | else: | |
885 | def str_switch(b): | |
886 | return 'insn & 0x{0:08x}'.format(b) | |
887 | ||
888 | def str_case(b): | |
889 | return '0x{0:08x}'.format(b) | |
890 | ||
891 | output(ind, 'switch (', str_switch(self.thismask), ') {\n') | |
892 | for b, s in sorted(self.subs): | |
893 | assert (self.thismask & ~s.fixedmask) == 0 | |
894 | innermask = outermask | self.thismask | |
895 | innerbits = outerbits | b | |
896 | output(ind, 'case ', str_case(b), ':\n') | |
897 | output(ind, ' /* ', | |
898 | str_match_bits(innerbits, innermask), ' */\n') | |
899 | s.output_code(i + 4, extracted, innerbits, innermask) | |
900 | output(ind, '}\n') | |
901 | output(ind, 'return false;\n') | |
902 | # end Tree | |
903 | ||
904 | ||
905 | def build_tree(pats, outerbits, outermask): | |
906 | # Find the intersection of all remaining fixedmask. | |
907 | innermask = ~outermask | |
908 | for i in pats: | |
909 | innermask &= i.fixedmask | |
910 | ||
911 | if innermask == 0: | |
912 | pnames = [] | |
913 | for p in pats: | |
914 | pnames.append(p.name + ':' + str(p.lineno)) | |
915 | error(pats[0].lineno, 'overlapping patterns:', pnames) | |
916 | ||
917 | fullmask = outermask | innermask | |
918 | ||
919 | # Sort each element of pats into the bin selected by the mask. | |
920 | bins = {} | |
921 | for i in pats: | |
922 | fb = i.fixedbits & innermask | |
923 | if fb in bins: | |
924 | bins[fb].append(i) | |
925 | else: | |
926 | bins[fb] = [i] | |
927 | ||
928 | # We must recurse if any bin has more than one element or if | |
929 | # the single element in the bin has not been fully matched. | |
930 | t = Tree(fullmask, innermask) | |
931 | ||
932 | for b, l in bins.items(): | |
933 | s = l[0] | |
934 | if len(l) > 1 or s.fixedmask & ~fullmask != 0: | |
935 | s = build_tree(l, b | outerbits, fullmask) | |
936 | t.subs.append((b, s)) | |
937 | ||
938 | return t | |
939 | # end build_tree | |
940 | ||
941 | ||
942 | def prop_format(tree): | |
943 | """Propagate Format objects into the decode tree""" | |
944 | ||
945 | # Depth first search. | |
946 | for (b, s) in tree.subs: | |
947 | if isinstance(s, Tree): | |
948 | prop_format(s) | |
949 | ||
950 | # If all entries in SUBS have the same format, then | |
951 | # propagate that into the tree. | |
952 | f = None | |
953 | for (b, s) in tree.subs: | |
954 | if f is None: | |
955 | f = s.base | |
956 | if f is None: | |
957 | return | |
958 | if f is not s.base: | |
959 | return | |
960 | tree.base = f | |
961 | # end prop_format | |
962 | ||
963 | ||
964 | def main(): | |
965 | global arguments | |
966 | global formats | |
967 | global patterns | |
968 | global translate_scope | |
969 | global translate_prefix | |
970 | global output_fd | |
971 | global output_file | |
972 | global input_file | |
973 | global insnwidth | |
974 | global insntype | |
83d7c40c | 975 | global insnmask |
568ae7ef RH |
976 | |
977 | decode_function = 'decode' | |
978 | decode_scope = 'static ' | |
979 | ||
980 | long_opts = ['decode=', 'translate=', 'output=', 'insnwidth='] | |
981 | try: | |
982 | (opts, args) = getopt.getopt(sys.argv[1:], 'o:w:', long_opts) | |
983 | except getopt.GetoptError as err: | |
984 | error(0, err) | |
985 | for o, a in opts: | |
986 | if o in ('-o', '--output'): | |
987 | output_file = a | |
988 | elif o == '--decode': | |
989 | decode_function = a | |
990 | decode_scope = '' | |
991 | elif o == '--translate': | |
992 | translate_prefix = a | |
993 | translate_scope = '' | |
994 | elif o in ('-w', '--insnwidth'): | |
995 | insnwidth = int(a) | |
996 | if insnwidth == 16: | |
997 | insntype = 'uint16_t' | |
998 | insnmask = 0xffff | |
999 | elif insnwidth != 32: | |
1000 | error(0, 'cannot handle insns of width', insnwidth) | |
1001 | else: | |
1002 | assert False, 'unhandled option' | |
1003 | ||
1004 | if len(args) < 1: | |
1005 | error(0, 'missing input file') | |
1006 | input_file = args[0] | |
1007 | f = open(input_file, 'r') | |
1008 | parse_file(f) | |
1009 | f.close() | |
1010 | ||
1011 | t = build_tree(patterns, 0, 0) | |
1012 | prop_format(t) | |
1013 | ||
1014 | if output_file: | |
1015 | output_fd = open(output_file, 'w') | |
1016 | else: | |
1017 | output_fd = sys.stdout | |
1018 | ||
1019 | output_autogen() | |
1020 | for n in sorted(arguments.keys()): | |
1021 | f = arguments[n] | |
1022 | f.output_def() | |
1023 | ||
1024 | # A single translate function can be invoked for different patterns. | |
1025 | # Make sure that the argument sets are the same, and declare the | |
1026 | # function only once. | |
1027 | out_pats = {} | |
1028 | for i in patterns: | |
1029 | if i.name in out_pats: | |
1030 | p = out_pats[i.name] | |
1031 | if i.base.base != p.base.base: | |
1032 | error(0, i.name, ' has conflicting argument sets') | |
1033 | else: | |
1034 | i.output_decl() | |
1035 | out_pats[i.name] = i | |
1036 | output('\n') | |
1037 | ||
1038 | for n in sorted(formats.keys()): | |
1039 | f = formats[n] | |
1040 | f.output_extract() | |
1041 | ||
1042 | output(decode_scope, 'bool ', decode_function, | |
1043 | '(DisasContext *ctx, ', insntype, ' insn)\n{\n') | |
1044 | ||
1045 | i4 = str_indent(4) | |
1046 | output(i4, 'union {\n') | |
1047 | for n in sorted(arguments.keys()): | |
1048 | f = arguments[n] | |
1049 | output(i4, i4, f.struct_name(), ' f_', f.name, ';\n') | |
1050 | output(i4, '} u;\n\n') | |
1051 | ||
1052 | t.output_code(4, False, 0, 0) | |
1053 | ||
1054 | output('}\n') | |
1055 | ||
1056 | if output_file: | |
1057 | output_fd.close() | |
1058 | # end main | |
1059 | ||
1060 | ||
1061 | if __name__ == '__main__': | |
1062 | main() |