]> Git Repo - qemu.git/blob - scripts/oss-fuzz/minimize_qtest_trace.py
works with less than base ISA qemu-system-riscv32 -M virt -bios none -kernel output...
[qemu.git] / scripts / oss-fuzz / minimize_qtest_trace.py
1 #!/usr/bin/env python3
2 # -*- coding: utf-8 -*-
3
4 """
5 This takes a crashing qtest trace and tries to remove superflous operations
6 """
7
8 import sys
9 import os
10 import subprocess
11 import time
12 import struct
13
14 QEMU_ARGS = None
15 QEMU_PATH = None
16 TIMEOUT = 5
17 CRASH_TOKEN = None
18
19 # Minimization levels
20 M1 = False # try removing IO commands iteratively
21 M2 = False # try setting bits in operand of write/out to zero
22
23 write_suffix_lookup = {"b": (1, "B"),
24                        "w": (2, "H"),
25                        "l": (4, "L"),
26                        "q": (8, "Q")}
27
28 def usage():
29     sys.exit("""\
30 Usage:
31
32 QEMU_PATH="/path/to/qemu" QEMU_ARGS="args" {} [Options] input_trace output_trace
33
34 By default, will try to use the second-to-last line in the output to identify
35 whether the crash occred. Optionally, manually set a string that idenitifes the
36 crash by setting CRASH_TOKEN=
37
38 Options:
39
40 -M1: enable a loop around the remove minimizer, which may help decrease some
41      timing dependant instructions. Off by default.
42 -M2: try setting bits in operand of write/out to zero. Off by default.
43
44 """.format((sys.argv[0])))
45
46 deduplication_note = """\n\
47 Note: While trimming the input, sometimes the mutated trace triggers a different
48 type crash but indicates the same bug. Under this situation, our minimizer is
49 incapable of recognizing and stopped from removing it. In the future, we may
50 use a more sophisticated crash case deduplication method.
51 \n"""
52
53 def check_if_trace_crashes(trace, path):
54     with open(path, "w") as tracefile:
55         tracefile.write("".join(trace))
56
57     rc = subprocess.Popen("timeout -s 9 {timeout}s {qemu_path} {qemu_args} 2>&1\
58     < {trace_path}".format(timeout=TIMEOUT,
59                            qemu_path=QEMU_PATH,
60                            qemu_args=QEMU_ARGS,
61                            trace_path=path),
62                           shell=True,
63                           stdin=subprocess.PIPE,
64                           stdout=subprocess.PIPE,
65                           encoding="utf-8")
66     global CRASH_TOKEN
67     if CRASH_TOKEN is None:
68         try:
69             outs, _ = rc.communicate(timeout=5)
70             CRASH_TOKEN = " ".join(outs.splitlines()[-2].split()[0:3])
71         except subprocess.TimeoutExpired:
72             print("subprocess.TimeoutExpired")
73             return False
74         print("Identifying Crashes by this string: {}".format(CRASH_TOKEN))
75         global deduplication_note
76         print(deduplication_note)
77         return True
78
79     for line in iter(rc.stdout.readline, ""):
80         if "CLOSED" in line:
81             return False
82         if CRASH_TOKEN in line:
83             return True
84
85     print("\nWarning:")
86     print("  There is no 'CLOSED'or CRASH_TOKEN in the stdout of subprocess.")
87     print("  Usually this indicates a different type of crash.\n")
88     return False
89
90
91 # If previous write commands write the same length of data at the same
92 # interval, we view it as a hint.
93 def split_write_hint(newtrace, i):
94     HINT_LEN = 3 # > 2
95     if i <=(HINT_LEN-1):
96         return None
97
98     #find previous continuous write traces
99     k = 0
100     l = i-1
101     writes = []
102     while (k != HINT_LEN and l >= 0):
103         if newtrace[l].startswith("write "):
104             writes.append(newtrace[l])
105             k += 1
106             l -= 1
107         elif newtrace[l] == "":
108             l -= 1
109         else:
110             return None
111     if k != HINT_LEN:
112         return None
113
114     length = int(writes[0].split()[2], 16)
115     for j in range(1, HINT_LEN):
116         if length != int(writes[j].split()[2], 16):
117             return None
118
119     step = int(writes[0].split()[1], 16) - int(writes[1].split()[1], 16)
120     for j in range(1, HINT_LEN-1):
121         if step != int(writes[j].split()[1], 16) - \
122             int(writes[j+1].split()[1], 16):
123             return None
124
125     return (int(writes[0].split()[1], 16)+step, length)
126
127
128 def remove_lines(newtrace, outpath):
129     remove_step = 1
130     i = 0
131     while i < len(newtrace):
132         # 1.) Try to remove lines completely and reproduce the crash.
133         # If it works, we're done.
134         if (i+remove_step) >= len(newtrace):
135             remove_step = 1
136         prior = newtrace[i:i+remove_step]
137         for j in range(i, i+remove_step):
138             newtrace[j] = ""
139         print("Removing {lines} ...\n".format(lines=prior))
140         if check_if_trace_crashes(newtrace, outpath):
141             i += remove_step
142             # Double the number of lines to remove for next round
143             remove_step *= 2
144             continue
145         # Failed to remove multiple IOs, fast recovery
146         if remove_step > 1:
147             for j in range(i, i+remove_step):
148                 newtrace[j] = prior[j-i]
149             remove_step = 1
150             continue
151         newtrace[i] = prior[0] # remove_step = 1
152
153         # 2.) Try to replace write{bwlq} commands with a write addr, len
154         # command. Since this can require swapping endianness, try both LE and
155         # BE options. We do this, so we can "trim" the writes in (3)
156
157         if (newtrace[i].startswith("write") and not
158             newtrace[i].startswith("write ")):
159             suffix = newtrace[i].split()[0][-1]
160             assert(suffix in write_suffix_lookup)
161             addr = int(newtrace[i].split()[1], 16)
162             value = int(newtrace[i].split()[2], 16)
163             for endianness in ['<', '>']:
164                 data = struct.pack("{end}{size}".format(end=endianness,
165                                    size=write_suffix_lookup[suffix][1]),
166                                    value)
167                 newtrace[i] = "write {addr} {size} 0x{data}\n".format(
168                     addr=hex(addr),
169                     size=hex(write_suffix_lookup[suffix][0]),
170                     data=data.hex())
171                 if(check_if_trace_crashes(newtrace, outpath)):
172                     break
173             else:
174                 newtrace[i] = prior[0]
175
176         # 3.) If it is a qtest write command: write addr len data, try to split
177         # it into two separate write commands. If splitting the data operand
178         # from length/2^n bytes to the left does not work, try to move the pivot
179         # to the right side, then add one to n, until length/2^n == 0. The idea
180         # is to prune unneccessary bytes from long writes, while accommodating
181         # arbitrary MemoryRegion access sizes and alignments.
182
183         # This algorithm will fail under some rare situations.
184         # e.g., xxxxxxxxxuxxxxxx (u is the unnecessary byte)
185
186         if newtrace[i].startswith("write "):
187             addr = int(newtrace[i].split()[1], 16)
188             length = int(newtrace[i].split()[2], 16)
189             data = newtrace[i].split()[3][2:]
190             if length > 1:
191
192                 # Can we get a hint from previous writes?
193                 hint = split_write_hint(newtrace, i)
194                 if hint is not None:
195                     hint_addr = hint[0]
196                     hint_len = hint[1]
197                     if hint_addr >= addr and hint_addr+hint_len <= addr+length:
198                         newtrace[i] = "write {addr} {size} 0x{data}\n".format(
199                             addr=hex(hint_addr),
200                             size=hex(hint_len),
201                             data=data[(hint_addr-addr)*2:\
202                                 (hint_addr-addr)*2+hint_len*2])
203                         if check_if_trace_crashes(newtrace, outpath):
204                             # next round
205                             i += 1
206                             continue
207                         newtrace[i] = prior[0]
208
209                 # Try splitting it using a binary approach
210                 leftlength = int(length/2)
211                 rightlength = length - leftlength
212                 newtrace.insert(i+1, "")
213                 power = 1
214                 while leftlength > 0:
215                     newtrace[i] = "write {addr} {size} 0x{data}\n".format(
216                             addr=hex(addr),
217                             size=hex(leftlength),
218                             data=data[:leftlength*2])
219                     newtrace[i+1] = "write {addr} {size} 0x{data}\n".format(
220                             addr=hex(addr+leftlength),
221                             size=hex(rightlength),
222                             data=data[leftlength*2:])
223                     if check_if_trace_crashes(newtrace, outpath):
224                         break
225                     # move the pivot to right side
226                     if leftlength < rightlength:
227                         rightlength, leftlength = leftlength, rightlength
228                         continue
229                     power += 1
230                     leftlength = int(length/pow(2, power))
231                     rightlength = length - leftlength
232                 if check_if_trace_crashes(newtrace, outpath):
233                     i -= 1
234                 else:
235                     newtrace[i] = prior[0]
236                     del newtrace[i+1]
237         i += 1
238
239
240 def clear_bits(newtrace, outpath):
241     # try setting bits in operands of out/write to zero
242     i = 0
243     while i < len(newtrace):
244         if (not newtrace[i].startswith("write ") and not
245            newtrace[i].startswith("out")):
246            i += 1
247            continue
248         # write ADDR SIZE DATA
249         # outx ADDR VALUE
250         print("\nzero setting bits: {}".format(newtrace[i]))
251
252         prefix = " ".join(newtrace[i].split()[:-1])
253         data = newtrace[i].split()[-1]
254         data_bin = bin(int(data, 16))
255         data_bin_list = list(data_bin)
256
257         for j in range(2, len(data_bin_list)):
258             prior = newtrace[i]
259             if (data_bin_list[j] == '1'):
260                 data_bin_list[j] = '0'
261                 data_try = hex(int("".join(data_bin_list), 2))
262                 # It seems qtest only accepts padded hex-values.
263                 if len(data_try) % 2 == 1:
264                     data_try = data_try[:2] + "0" + data_try[2:]
265
266                 newtrace[i] = "{prefix} {data_try}\n".format(
267                         prefix=prefix,
268                         data_try=data_try)
269
270                 if not check_if_trace_crashes(newtrace, outpath):
271                     data_bin_list[j] = '1'
272                     newtrace[i] = prior
273         i += 1
274
275
276 def minimize_trace(inpath, outpath):
277     global TIMEOUT
278     with open(inpath) as f:
279         trace = f.readlines()
280     start = time.time()
281     if not check_if_trace_crashes(trace, outpath):
282         sys.exit("The input qtest trace didn't cause a crash...")
283     end = time.time()
284     print("Crashed in {} seconds".format(end-start))
285     TIMEOUT = (end-start)*5
286     print("Setting the timeout for {} seconds".format(TIMEOUT))
287
288     newtrace = trace[:]
289     global M1, M2
290
291     # remove lines
292     old_len = len(newtrace) + 1
293     while(old_len > len(newtrace)):
294         old_len = len(newtrace)
295         print("trace lenth = ", old_len)
296         remove_lines(newtrace, outpath)
297         if not M1 and not M2:
298             break
299         newtrace = list(filter(lambda s: s != "", newtrace))
300     assert(check_if_trace_crashes(newtrace, outpath))
301
302     # set bits to zero
303     if M2:
304         clear_bits(newtrace, outpath)
305     assert(check_if_trace_crashes(newtrace, outpath))
306
307
308 if __name__ == '__main__':
309     if len(sys.argv) < 3:
310         usage()
311     if "-M1" in sys.argv:
312         M1 = True
313     if "-M2" in sys.argv:
314         M2 = True
315     QEMU_PATH = os.getenv("QEMU_PATH")
316     QEMU_ARGS = os.getenv("QEMU_ARGS")
317     if QEMU_PATH is None or QEMU_ARGS is None:
318         usage()
319     # if "accel" not in QEMU_ARGS:
320     #     QEMU_ARGS += " -accel qtest"
321     CRASH_TOKEN = os.getenv("CRASH_TOKEN")
322     QEMU_ARGS += " -qtest stdio -monitor none -serial none "
323     minimize_trace(sys.argv[-2], sys.argv[-1])
This page took 0.050781 seconds and 4 git commands to generate.