2 # Copyright (c) 2018 Linaro Limited
4 # This library is free software; you can redistribute it and/or
5 # modify it under the terms of the GNU Lesser General Public
6 # License as published by the Free Software Foundation; either
7 # version 2 of the License, or (at your option) any later version.
9 # This library is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 # Lesser General Public License for more details.
14 # You should have received a copy of the GNU Lesser General Public
15 # License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 # Generate a decoding tree from a specification file.
20 # See the syntax and semantics in docs/devel/decodetree.rst.
35 translate_prefix = 'trans'
36 translate_scope = 'static '
41 decode_function = 'decode'
43 re_ident = '[a-zA-Z][a-zA-Z0-9_]*'
46 def error_with_file(file, lineno, *args):
47 """Print an error message from file:line and args and exit."""
52 r = '{0}:{1}: error:'.format(file, lineno)
54 r = '{0}: error:'.format(file)
61 if output_file and output_fd:
63 os.remove(output_file)
66 def error(lineno, *args):
67 error_with_file(input_file, lineno, args)
75 if sys.version_info >= (3, 4):
76 re_fullmatch = re.fullmatch
78 def re_fullmatch(pat, str):
79 return re.match('^' + pat + '$', str)
83 output('/* This file is autogenerated by scripts/decodetree.py. */\n\n')
87 """Return a string with C spaces"""
91 def str_fields(fields):
92 """Return a string uniquely identifing FIELDS"""
94 for n in sorted(fields.keys()):
99 def str_match_bits(bits, mask):
100 """Return a string pretty-printing BITS/MASK"""
103 i = 1 << (insnwidth - 1)
121 """Return true iff X is equal to a power of 2."""
122 return (x & (x - 1)) == 0
126 """Return the number of times 2 factors into X."""
128 while ((x >> r) & 1) == 0:
133 def is_contiguous(bits):
135 if is_pow2((bits >> shift) + 1):
141 def eq_fields_for_args(flds_a, flds_b):
142 if len(flds_a) != len(flds_b):
144 for k, a in flds_a.items():
150 def eq_fields_for_fmts(flds_a, flds_b):
151 if len(flds_a) != len(flds_b):
153 for k, a in flds_a.items():
157 if a.__class__ != b.__class__ or a != b:
163 """Class representing a simple instruction field"""
164 def __init__(self, sign, pos, len):
168 self.mask = ((1 << len) - 1) << pos
175 return str(self.pos) + ':' + s + str(self.len)
177 def str_extract(self):
182 return '{0}(insn, {1}, {2})'.format(extr, self.pos, self.len)
184 def __eq__(self, other):
185 return self.sign == other.sign and self.sign == other.sign
187 def __ne__(self, other):
188 return not self.__eq__(other)
193 """Class representing a compound instruction field"""
194 def __init__(self, subs, mask):
196 self.sign = subs[0].sign
200 return str(self.subs)
202 def str_extract(self):
205 for f in reversed(self.subs):
207 ret = f.str_extract()
209 ret = 'deposit32({0}, {1}, {2}, {3})' \
210 .format(ret, pos, 32 - pos, f.str_extract())
214 def __ne__(self, other):
215 if len(self.subs) != len(other.subs):
217 for a, b in zip(self.subs, other.subs):
218 if a.__class__ != b.__class__ or a != b:
222 def __eq__(self, other):
223 return not self.__ne__(other)
228 """Class representing an argument field with constant value"""
229 def __init__(self, value):
232 self.sign = value < 0
235 return str(self.value)
237 def str_extract(self):
238 return str(self.value)
240 def __cmp__(self, other):
241 return self.value - other.value
246 """Class representing a field passed through an expander"""
247 def __init__(self, func, base):
248 self.mask = base.mask
249 self.sign = base.sign
254 return self.func + '(' + str(self.base) + ')'
256 def str_extract(self):
257 return self.func + '(' + self.base.str_extract() + ')'
259 def __eq__(self, other):
260 return self.func == other.func and self.base == other.base
262 def __ne__(self, other):
263 return not self.__eq__(other)
268 """Class representing the extracted fields of a format"""
269 def __init__(self, nm, flds, extern):
272 self.fields = sorted(flds)
275 return self.name + ' ' + str(self.fields)
277 def struct_name(self):
278 return 'arg_' + self.name
280 def output_def(self):
282 output('typedef struct {\n')
283 for n in self.fields:
284 output(' int ', n, ';\n')
285 output('} ', self.struct_name(), ';\n\n')
290 """Common code between instruction formats and instruction patterns"""
291 def __init__(self, name, lineno, base, fixb, fixm, udfm, fldm, flds):
293 self.file = input_file
296 self.fixedbits = fixb
297 self.fixedmask = fixm
298 self.undefmask = udfm
299 self.fieldmask = fldm
305 r = r + ' ' + self.base.name
307 r = r + ' ' + str(self.fields)
308 r = r + ' ' + str_match_bits(self.fixedbits, self.fixedmask)
312 return str_indent(i) + self.__str__()
316 class Format(General):
317 """Class representing an instruction format"""
319 def extract_name(self):
320 return 'extract_' + self.name
322 def output_extract(self):
323 output('static void ', self.extract_name(), '(',
324 self.base.struct_name(), ' *a, ', insntype, ' insn)\n{\n')
325 for n, f in self.fields.items():
326 output(' a->', n, ' = ', f.str_extract(), ';\n')
331 class Pattern(General):
332 """Class representing an instruction pattern"""
334 def output_decl(self):
335 global translate_scope
336 global translate_prefix
337 output('typedef ', self.base.base.struct_name(),
338 ' arg_', self.name, ';\n')
339 output(translate_scope, 'bool ', translate_prefix, '_', self.name,
340 '(DisasContext *ctx, arg_', self.name, ' *a);\n')
342 def output_code(self, i, extracted, outerbits, outermask):
343 global translate_prefix
345 arg = self.base.base.name
346 output(ind, '/* ', self.file, ':', str(self.lineno), ' */\n')
348 output(ind, self.base.extract_name(), '(&u.f_', arg, ', insn);\n')
349 for n, f in self.fields.items():
350 output(ind, 'u.f_', arg, '.', n, ' = ', f.str_extract(), ';\n')
351 output(ind, 'return ', translate_prefix, '_', self.name,
352 '(ctx, &u.f_', arg, ');\n')
356 def parse_field(lineno, name, toks):
357 """Parse one instruction field from TOKS at LINENO"""
362 # A "simple" field will have only one entry;
363 # a "multifield" will have several.
368 if re_fullmatch('!function=' + re_ident, t):
370 error(lineno, 'duplicate function')
375 if re_fullmatch('[0-9]+:s[0-9]+', t):
376 # Signed field extract
377 subtoks = t.split(':s')
379 elif re_fullmatch('[0-9]+:[0-9]+', t):
380 # Unsigned field extract
381 subtoks = t.split(':')
384 error(lineno, 'invalid field token "{0}"'.format(t))
387 if po + le > insnwidth:
388 error(lineno, 'field {0} too large'.format(t))
389 f = Field(sign, po, le)
393 if width > insnwidth:
394 error(lineno, 'field too large')
401 error(lineno, 'field components overlap')
403 f = MultiField(subs, mask)
405 f = FunctionField(func, f)
408 error(lineno, 'duplicate field', name)
413 def parse_arguments(lineno, name, toks):
414 """Parse one argument set from TOKS at LINENO"""
421 if re_fullmatch('!extern', t):
424 if not re_fullmatch(re_ident, t):
425 error(lineno, 'invalid argument set token "{0}"'.format(t))
427 error(lineno, 'duplicate argument "{0}"'.format(t))
430 if name in arguments:
431 error(lineno, 'duplicate argument set', name)
432 arguments[name] = Arguments(name, flds, extern)
433 # end parse_arguments
436 def lookup_field(lineno, name):
440 error(lineno, 'undefined field', name)
443 def add_field(lineno, flds, new_name, f):
445 error(lineno, 'duplicate field', new_name)
450 def add_field_byname(lineno, flds, new_name, old_name):
451 return add_field(lineno, flds, new_name, lookup_field(lineno, old_name))
454 def infer_argument_set(flds):
456 global decode_function
458 for arg in arguments.values():
459 if eq_fields_for_args(flds, arg.fields):
462 name = decode_function + str(len(arguments))
463 arg = Arguments(name, flds.keys(), False)
464 arguments[name] = arg
468 def infer_format(arg, fieldmask, flds):
471 global decode_function
475 for n, c in flds.items():
481 # Look for an existing format with the same argument set and fields
482 for fmt in formats.values():
483 if arg and fmt.base != arg:
485 if fieldmask != fmt.fieldmask:
487 if not eq_fields_for_fmts(flds, fmt.fields):
489 return (fmt, const_flds)
491 name = decode_function + '_Fmt_' + str(len(formats))
493 arg = infer_argument_set(flds)
495 fmt = Format(name, 0, arg, 0, 0, 0, fieldmask, var_flds)
498 return (fmt, const_flds)
502 def parse_generic(lineno, is_format, name, toks):
503 """Parse one instruction format from TOKS at LINENO"""
520 # '&Foo' gives a format an explcit argument set.
524 error(lineno, 'multiple argument sets')
528 error(lineno, 'undefined argument set', t)
531 # '@Foo' gives a pattern an explicit format.
535 error(lineno, 'multiple formats')
539 error(lineno, 'undefined format', t)
542 # '%Foo' imports a field.
545 flds = add_field_byname(lineno, flds, tt, tt)
548 # 'Foo=%Bar' imports a field with a different name.
549 if re_fullmatch(re_ident + '=%' + re_ident, t):
550 (fname, iname) = t.split('=%')
551 flds = add_field_byname(lineno, flds, fname, iname)
554 # 'Foo=number' sets an argument field to a constant value
555 if re_fullmatch(re_ident + '=[0-9]+', t):
556 (fname, value) = t.split('=')
558 flds = add_field(lineno, flds, fname, ConstField(value))
561 # Pattern of 0s, 1s, dots and dashes indicate required zeros,
562 # required ones, or dont-cares.
563 if re_fullmatch('[01.-]+', t):
565 fms = t.replace('0', '1')
566 fms = fms.replace('.', '0')
567 fms = fms.replace('-', '0')
568 fbs = t.replace('.', '0')
569 fbs = fbs.replace('-', '0')
570 ubm = t.replace('1', '0')
571 ubm = ubm.replace('.', '0')
572 ubm = ubm.replace('-', '1')
576 fixedbits = (fixedbits << shift) | fbs
577 fixedmask = (fixedmask << shift) | fms
578 undefmask = (undefmask << shift) | ubm
579 # Otherwise, fieldname:fieldwidth
580 elif re_fullmatch(re_ident + ':s?[0-9]+', t):
581 (fname, flen) = t.split(':')
586 shift = int(flen, 10)
587 f = Field(sign, insnwidth - width - shift, shift)
588 flds = add_field(lineno, flds, fname, f)
593 error(lineno, 'invalid token "{0}"'.format(t))
596 # We should have filled in all of the bits of the instruction.
597 if not (is_format and width == 0) and width != insnwidth:
598 error(lineno, 'definition has {0} bits'.format(width))
600 # Do not check for fields overlaping fields; one valid usage
601 # is to be able to duplicate fields via import.
603 for f in flds.values():
606 # Fix up what we've parsed to match either a format or a pattern.
608 # Formats cannot reference formats.
610 error(lineno, 'format referencing format')
611 # If an argument set is given, then there should be no fields
612 # without a place to store it.
614 for f in flds.keys():
615 if f not in arg.fields:
616 error(lineno, 'field {0} not in argument set {1}'
617 .format(f, arg.name))
619 arg = infer_argument_set(flds)
621 error(lineno, 'duplicate format name', name)
622 fmt = Format(name, lineno, arg, fixedbits, fixedmask,
623 undefmask, fieldmask, flds)
626 # Patterns can reference a format ...
628 # ... but not an argument simultaneously
630 error(lineno, 'pattern specifies both format and argument set')
631 if fixedmask & fmt.fixedmask:
632 error(lineno, 'pattern fixed bits overlap format fixed bits')
633 fieldmask |= fmt.fieldmask
634 fixedbits |= fmt.fixedbits
635 fixedmask |= fmt.fixedmask
636 undefmask |= fmt.undefmask
638 (fmt, flds) = infer_format(arg, fieldmask, flds)
640 for f in flds.keys():
641 if f not in arg.fields:
642 error(lineno, 'field {0} not in argument set {1}'
643 .format(f, arg.name))
644 if f in fmt.fields.keys():
645 error(lineno, 'field {0} set by format and pattern'.format(f))
647 if f not in flds.keys() and f not in fmt.fields.keys():
648 error(lineno, 'field {0} not initialized'.format(f))
649 pat = Pattern(name, lineno, fmt, fixedbits, fixedmask,
650 undefmask, fieldmask, flds)
653 # Validate the masks that we have assembled.
654 if fieldmask & fixedmask:
655 error(lineno, 'fieldmask overlaps fixedmask (0x{0:08x} & 0x{1:08x})'
656 .format(fieldmask, fixedmask))
657 if fieldmask & undefmask:
658 error(lineno, 'fieldmask overlaps undefmask (0x{0:08x} & 0x{1:08x})'
659 .format(fieldmask, undefmask))
660 if fixedmask & undefmask:
661 error(lineno, 'fixedmask overlaps undefmask (0x{0:08x} & 0x{1:08x})'
662 .format(fixedmask, undefmask))
664 allbits = fieldmask | fixedmask | undefmask
665 if allbits != insnmask:
666 error(lineno, 'bits left unspecified (0x{0:08x})'
667 .format(allbits ^ insnmask))
672 """Parse all of the patterns within a file"""
674 # Read all of the lines of the file. Concatenate lines
675 # ending in backslash; discard empty lines and comments.
688 # Next line after continuation
702 error(lineno, 'short line')
707 # Determine the type of object needing to be parsed.
709 parse_field(lineno, name[1:], toks)
711 parse_arguments(lineno, name[1:], toks)
713 parse_generic(lineno, True, name[1:], toks)
715 parse_generic(lineno, False, name, toks)
721 """Class representing a node in a decode tree"""
723 def __init__(self, fm, tm):
731 r = '{0}{1:08x}'.format(ind, self.fixedmask)
733 r += ' ' + self.format.name
735 for (b, s) in self.subs:
736 r += '{0} {1:08x}:\n'.format(ind, b)
737 r += s.str1(i + 4) + '\n'
744 def output_code(self, i, extracted, outerbits, outermask):
747 # If we identified all nodes below have the same format,
748 # extract the fields now.
749 if not extracted and self.base:
750 output(ind, self.base.extract_name(),
751 '(&u.f_', self.base.base.name, ', insn);\n')
754 # Attempt to aid the compiler in producing compact switch statements.
755 # If the bits in the mask are contiguous, extract them.
756 sh = is_contiguous(self.thismask)
758 # Propagate SH down into the local functions.
759 def str_switch(b, sh=sh):
760 return '(insn >> {0}) & 0x{1:x}'.format(sh, b >> sh)
762 def str_case(b, sh=sh):
763 return '0x{0:x}'.format(b >> sh)
766 return 'insn & 0x{0:08x}'.format(b)
769 return '0x{0:08x}'.format(b)
771 output(ind, 'switch (', str_switch(self.thismask), ') {\n')
772 for b, s in sorted(self.subs):
773 assert (self.thismask & ~s.fixedmask) == 0
774 innermask = outermask | self.thismask
775 innerbits = outerbits | b
776 output(ind, 'case ', str_case(b), ':\n')
778 str_match_bits(innerbits, innermask), ' */\n')
779 s.output_code(i + 4, extracted, innerbits, innermask)
781 output(ind, 'return false;\n')
785 def build_tree(pats, outerbits, outermask):
786 # Find the intersection of all remaining fixedmask.
787 innermask = ~outermask & insnmask
789 innermask &= i.fixedmask
794 pnames.append(p.name + ':' + p.file + ':' + str(p.lineno))
795 error_with_file(pats[0].file, pats[0].lineno,
796 'overlapping patterns:', pnames)
798 fullmask = outermask | innermask
800 # Sort each element of pats into the bin selected by the mask.
803 fb = i.fixedbits & innermask
809 # We must recurse if any bin has more than one element or if
810 # the single element in the bin has not been fully matched.
811 t = Tree(fullmask, innermask)
813 for b, l in bins.items():
815 if len(l) > 1 or s.fixedmask & ~fullmask != 0:
816 s = build_tree(l, b | outerbits, fullmask)
817 t.subs.append((b, s))
823 def prop_format(tree):
824 """Propagate Format objects into the decode tree"""
826 # Depth first search.
827 for (b, s) in tree.subs:
828 if isinstance(s, Tree):
831 # If all entries in SUBS have the same format, then
832 # propagate that into the tree.
834 for (b, s) in tree.subs:
849 global translate_scope
850 global translate_prefix
857 global decode_function
859 decode_scope = 'static '
861 long_opts = ['decode=', 'translate=', 'output=', 'insnwidth=']
863 (opts, args) = getopt.getopt(sys.argv[1:], 'o:w:', long_opts)
864 except getopt.GetoptError as err:
867 if o in ('-o', '--output'):
869 elif o == '--decode':
872 elif o == '--translate':
875 elif o in ('-w', '--insnwidth'):
878 insntype = 'uint16_t'
880 elif insnwidth != 32:
881 error(0, 'cannot handle insns of width', insnwidth)
883 assert False, 'unhandled option'
886 error(0, 'missing input file')
887 for filename in args:
888 input_file = filename
889 f = open(filename, 'r')
893 t = build_tree(patterns, 0, 0)
897 output_fd = open(output_file, 'w')
899 output_fd = sys.stdout
902 for n in sorted(arguments.keys()):
906 # A single translate function can be invoked for different patterns.
907 # Make sure that the argument sets are the same, and declare the
908 # function only once.
911 if i.name in out_pats:
913 if i.base.base != p.base.base:
914 error(0, i.name, ' has conflicting argument sets')
920 for n in sorted(formats.keys()):
924 output(decode_scope, 'bool ', decode_function,
925 '(DisasContext *ctx, ', insntype, ' insn)\n{\n')
928 output(i4, 'union {\n')
929 for n in sorted(arguments.keys()):
931 output(i4, i4, f.struct_name(), ' f_', f.name, ';\n')
932 output(i4, '} u;\n\n')
934 t.output_code(4, False, 0, 0)
943 if __name__ == '__main__':