]> Git Repo - qemu.git/blob - scripts/dump-guest-memory.py
scripts/dump-guest-memory.py: add vmcoreinfo
[qemu.git] / scripts / dump-guest-memory.py
1 """
2 This python script adds a new gdb command, "dump-guest-memory". It
3 should be loaded with "source dump-guest-memory.py" at the (gdb)
4 prompt.
5
6 Copyright (C) 2013, Red Hat, Inc.
7
8 Authors:
9    Laszlo Ersek <[email protected]>
10    Janosch Frank <[email protected]>
11
12 This work is licensed under the terms of the GNU GPL, version 2 or later. See
13 the COPYING file in the top-level directory.
14 """
15
16 import ctypes
17 import struct
18
19 UINTPTR_T = gdb.lookup_type("uintptr_t")
20
21 TARGET_PAGE_SIZE = 0x1000
22 TARGET_PAGE_MASK = 0xFFFFFFFFFFFFF000
23
24 # Special value for e_phnum. This indicates that the real number of
25 # program headers is too large to fit into e_phnum. Instead the real
26 # value is in the field sh_info of section 0.
27 PN_XNUM = 0xFFFF
28
29 EV_CURRENT = 1
30
31 ELFCLASS32 = 1
32 ELFCLASS64 = 2
33
34 ELFDATA2LSB = 1
35 ELFDATA2MSB = 2
36
37 ET_CORE = 4
38
39 PT_LOAD = 1
40 PT_NOTE = 4
41
42 EM_386 = 3
43 EM_PPC = 20
44 EM_PPC64 = 21
45 EM_S390 = 22
46 EM_AARCH = 183
47 EM_X86_64 = 62
48
49 VMCOREINFO_FORMAT_ELF = 1
50
51 def le16_to_cpu(val):
52     return struct.unpack("<H", struct.pack("=H", val))[0]
53
54 def le32_to_cpu(val):
55     return struct.unpack("<I", struct.pack("=I", val))[0]
56
57 def le64_to_cpu(val):
58     return struct.unpack("<Q", struct.pack("=Q", val))[0]
59
60 class ELF(object):
61     """Representation of a ELF file."""
62
63     def __init__(self, arch):
64         self.ehdr = None
65         self.notes = []
66         self.segments = []
67         self.notes_size = 0
68         self.endianness = None
69         self.elfclass = ELFCLASS64
70
71         if arch == 'aarch64-le':
72             self.endianness = ELFDATA2LSB
73             self.elfclass = ELFCLASS64
74             self.ehdr = get_arch_ehdr(self.endianness, self.elfclass)
75             self.ehdr.e_machine = EM_AARCH
76
77         elif arch == 'aarch64-be':
78             self.endianness = ELFDATA2MSB
79             self.ehdr = get_arch_ehdr(self.endianness, self.elfclass)
80             self.ehdr.e_machine = EM_AARCH
81
82         elif arch == 'X86_64':
83             self.endianness = ELFDATA2LSB
84             self.ehdr = get_arch_ehdr(self.endianness, self.elfclass)
85             self.ehdr.e_machine = EM_X86_64
86
87         elif arch == '386':
88             self.endianness = ELFDATA2LSB
89             self.elfclass = ELFCLASS32
90             self.ehdr = get_arch_ehdr(self.endianness, self.elfclass)
91             self.ehdr.e_machine = EM_386
92
93         elif arch == 's390':
94             self.endianness = ELFDATA2MSB
95             self.ehdr = get_arch_ehdr(self.endianness, self.elfclass)
96             self.ehdr.e_machine = EM_S390
97
98         elif arch == 'ppc64-le':
99             self.endianness = ELFDATA2LSB
100             self.ehdr = get_arch_ehdr(self.endianness, self.elfclass)
101             self.ehdr.e_machine = EM_PPC64
102
103         elif arch == 'ppc64-be':
104             self.endianness = ELFDATA2MSB
105             self.ehdr = get_arch_ehdr(self.endianness, self.elfclass)
106             self.ehdr.e_machine = EM_PPC64
107
108         else:
109             raise gdb.GdbError("No valid arch type specified.\n"
110                                "Currently supported types:\n"
111                                "aarch64-be, aarch64-le, X86_64, 386, s390, "
112                                "ppc64-be, ppc64-le")
113
114         self.add_segment(PT_NOTE, 0, 0)
115
116     def add_note(self, n_name, n_desc, n_type):
117         """Adds a note to the ELF."""
118
119         note = get_arch_note(self.endianness, len(n_name), len(n_desc))
120         note.n_namesz = len(n_name) + 1
121         note.n_descsz = len(n_desc)
122         note.n_name = n_name.encode()
123         note.n_type = n_type
124
125         # Desc needs to be 4 byte aligned (although the 64bit spec
126         # specifies 8 byte). When defining n_desc as uint32 it will be
127         # automatically aligned but we need the memmove to copy the
128         # string into it.
129         ctypes.memmove(note.n_desc, n_desc.encode(), len(n_desc))
130
131         self.notes.append(note)
132         self.segments[0].p_filesz += ctypes.sizeof(note)
133         self.segments[0].p_memsz += ctypes.sizeof(note)
134
135
136     def add_vmcoreinfo_note(self, vmcoreinfo):
137         """Adds a vmcoreinfo note to the ELF dump."""
138         # compute the header size, and copy that many bytes from the note
139         header = get_arch_note(self.endianness, 0, 0)
140         ctypes.memmove(ctypes.pointer(header),
141                        vmcoreinfo, ctypes.sizeof(header))
142         if header.n_descsz > 1 << 20:
143             print('warning: invalid vmcoreinfo size')
144             return
145         # now get the full note
146         note = get_arch_note(self.endianness,
147                              header.n_namesz - 1, header.n_descsz)
148         ctypes.memmove(ctypes.pointer(note), vmcoreinfo, ctypes.sizeof(note))
149
150         self.notes.append(note)
151         self.segments[0].p_filesz += ctypes.sizeof(note)
152         self.segments[0].p_memsz += ctypes.sizeof(note)
153
154     def add_segment(self, p_type, p_paddr, p_size):
155         """Adds a segment to the elf."""
156
157         phdr = get_arch_phdr(self.endianness, self.elfclass)
158         phdr.p_type = p_type
159         phdr.p_paddr = p_paddr
160         phdr.p_filesz = p_size
161         phdr.p_memsz = p_size
162         self.segments.append(phdr)
163         self.ehdr.e_phnum += 1
164
165     def to_file(self, elf_file):
166         """Writes all ELF structures to the the passed file.
167
168         Structure:
169         Ehdr
170         Segment 0:PT_NOTE
171         Segment 1:PT_LOAD
172         Segment N:PT_LOAD
173         Note    0..N
174         Dump contents
175         """
176         elf_file.write(self.ehdr)
177         off = ctypes.sizeof(self.ehdr) + \
178               len(self.segments) * ctypes.sizeof(self.segments[0])
179
180         for phdr in self.segments:
181             phdr.p_offset = off
182             elf_file.write(phdr)
183             off += phdr.p_filesz
184
185         for note in self.notes:
186             elf_file.write(note)
187
188
189 def get_arch_note(endianness, len_name, len_desc):
190     """Returns a Note class with the specified endianness."""
191
192     if endianness == ELFDATA2LSB:
193         superclass = ctypes.LittleEndianStructure
194     else:
195         superclass = ctypes.BigEndianStructure
196
197     len_name = len_name + 1
198
199     class Note(superclass):
200         """Represents an ELF note, includes the content."""
201
202         _fields_ = [("n_namesz", ctypes.c_uint32),
203                     ("n_descsz", ctypes.c_uint32),
204                     ("n_type", ctypes.c_uint32),
205                     ("n_name", ctypes.c_char * len_name),
206                     ("n_desc", ctypes.c_uint32 * ((len_desc + 3) // 4))]
207     return Note()
208
209
210 class Ident(ctypes.Structure):
211     """Represents the ELF ident array in the ehdr structure."""
212
213     _fields_ = [('ei_mag0', ctypes.c_ubyte),
214                 ('ei_mag1', ctypes.c_ubyte),
215                 ('ei_mag2', ctypes.c_ubyte),
216                 ('ei_mag3', ctypes.c_ubyte),
217                 ('ei_class', ctypes.c_ubyte),
218                 ('ei_data', ctypes.c_ubyte),
219                 ('ei_version', ctypes.c_ubyte),
220                 ('ei_osabi', ctypes.c_ubyte),
221                 ('ei_abiversion', ctypes.c_ubyte),
222                 ('ei_pad', ctypes.c_ubyte * 7)]
223
224     def __init__(self, endianness, elfclass):
225         self.ei_mag0 = 0x7F
226         self.ei_mag1 = ord('E')
227         self.ei_mag2 = ord('L')
228         self.ei_mag3 = ord('F')
229         self.ei_class = elfclass
230         self.ei_data = endianness
231         self.ei_version = EV_CURRENT
232
233
234 def get_arch_ehdr(endianness, elfclass):
235     """Returns a EHDR64 class with the specified endianness."""
236
237     if endianness == ELFDATA2LSB:
238         superclass = ctypes.LittleEndianStructure
239     else:
240         superclass = ctypes.BigEndianStructure
241
242     class EHDR64(superclass):
243         """Represents the 64 bit ELF header struct."""
244
245         _fields_ = [('e_ident', Ident),
246                     ('e_type', ctypes.c_uint16),
247                     ('e_machine', ctypes.c_uint16),
248                     ('e_version', ctypes.c_uint32),
249                     ('e_entry', ctypes.c_uint64),
250                     ('e_phoff', ctypes.c_uint64),
251                     ('e_shoff', ctypes.c_uint64),
252                     ('e_flags', ctypes.c_uint32),
253                     ('e_ehsize', ctypes.c_uint16),
254                     ('e_phentsize', ctypes.c_uint16),
255                     ('e_phnum', ctypes.c_uint16),
256                     ('e_shentsize', ctypes.c_uint16),
257                     ('e_shnum', ctypes.c_uint16),
258                     ('e_shstrndx', ctypes.c_uint16)]
259
260         def __init__(self):
261             super(superclass, self).__init__()
262             self.e_ident = Ident(endianness, elfclass)
263             self.e_type = ET_CORE
264             self.e_version = EV_CURRENT
265             self.e_ehsize = ctypes.sizeof(self)
266             self.e_phoff = ctypes.sizeof(self)
267             self.e_phentsize = ctypes.sizeof(get_arch_phdr(endianness, elfclass))
268             self.e_phnum = 0
269
270
271     class EHDR32(superclass):
272         """Represents the 32 bit ELF header struct."""
273
274         _fields_ = [('e_ident', Ident),
275                     ('e_type', ctypes.c_uint16),
276                     ('e_machine', ctypes.c_uint16),
277                     ('e_version', ctypes.c_uint32),
278                     ('e_entry', ctypes.c_uint32),
279                     ('e_phoff', ctypes.c_uint32),
280                     ('e_shoff', ctypes.c_uint32),
281                     ('e_flags', ctypes.c_uint32),
282                     ('e_ehsize', ctypes.c_uint16),
283                     ('e_phentsize', ctypes.c_uint16),
284                     ('e_phnum', ctypes.c_uint16),
285                     ('e_shentsize', ctypes.c_uint16),
286                     ('e_shnum', ctypes.c_uint16),
287                     ('e_shstrndx', ctypes.c_uint16)]
288
289         def __init__(self):
290             super(superclass, self).__init__()
291             self.e_ident = Ident(endianness, elfclass)
292             self.e_type = ET_CORE
293             self.e_version = EV_CURRENT
294             self.e_ehsize = ctypes.sizeof(self)
295             self.e_phoff = ctypes.sizeof(self)
296             self.e_phentsize = ctypes.sizeof(get_arch_phdr(endianness, elfclass))
297             self.e_phnum = 0
298
299     # End get_arch_ehdr
300     if elfclass == ELFCLASS64:
301         return EHDR64()
302     else:
303         return EHDR32()
304
305
306 def get_arch_phdr(endianness, elfclass):
307     """Returns a 32 or 64 bit PHDR class with the specified endianness."""
308
309     if endianness == ELFDATA2LSB:
310         superclass = ctypes.LittleEndianStructure
311     else:
312         superclass = ctypes.BigEndianStructure
313
314     class PHDR64(superclass):
315         """Represents the 64 bit ELF program header struct."""
316
317         _fields_ = [('p_type', ctypes.c_uint32),
318                     ('p_flags', ctypes.c_uint32),
319                     ('p_offset', ctypes.c_uint64),
320                     ('p_vaddr', ctypes.c_uint64),
321                     ('p_paddr', ctypes.c_uint64),
322                     ('p_filesz', ctypes.c_uint64),
323                     ('p_memsz', ctypes.c_uint64),
324                     ('p_align', ctypes.c_uint64)]
325
326     class PHDR32(superclass):
327         """Represents the 32 bit ELF program header struct."""
328
329         _fields_ = [('p_type', ctypes.c_uint32),
330                     ('p_offset', ctypes.c_uint32),
331                     ('p_vaddr', ctypes.c_uint32),
332                     ('p_paddr', ctypes.c_uint32),
333                     ('p_filesz', ctypes.c_uint32),
334                     ('p_memsz', ctypes.c_uint32),
335                     ('p_flags', ctypes.c_uint32),
336                     ('p_align', ctypes.c_uint32)]
337
338     # End get_arch_phdr
339     if elfclass == ELFCLASS64:
340         return PHDR64()
341     else:
342         return PHDR32()
343
344
345 def int128_get64(val):
346     """Returns low 64bit part of Int128 struct."""
347
348     try:
349         assert val["hi"] == 0
350         return val["lo"]
351     except gdb.error:
352         u64t = gdb.lookup_type('uint64_t').array(2)
353         u64 = val.cast(u64t)
354         if sys.byteorder == 'little':
355             assert u64[1] == 0
356             return u64[0]
357         else:
358             assert u64[0] == 0
359             return u64[1]
360
361
362 def qlist_foreach(head, field_str):
363     """Generator for qlists."""
364
365     var_p = head["lh_first"]
366     while var_p != 0:
367         var = var_p.dereference()
368         var_p = var[field_str]["le_next"]
369         yield var
370
371
372 def qemu_map_ram_ptr(block, offset):
373     """Returns qemu vaddr for given guest physical address."""
374
375     return block["host"] + offset
376
377
378 def memory_region_get_ram_ptr(memory_region):
379     if memory_region["alias"] != 0:
380         return (memory_region_get_ram_ptr(memory_region["alias"].dereference())
381                 + memory_region["alias_offset"])
382
383     return qemu_map_ram_ptr(memory_region["ram_block"], 0)
384
385
386 def get_guest_phys_blocks():
387     """Returns a list of ram blocks.
388
389     Each block entry contains:
390     'target_start': guest block phys start address
391     'target_end':   guest block phys end address
392     'host_addr':    qemu vaddr of the block's start
393     """
394
395     guest_phys_blocks = []
396
397     print("guest RAM blocks:")
398     print("target_start     target_end       host_addr        message "
399           "count")
400     print("---------------- ---------------- ---------------- ------- "
401           "-----")
402
403     current_map_p = gdb.parse_and_eval("address_space_memory.current_map")
404     current_map = current_map_p.dereference()
405
406     # Conversion to int is needed for python 3
407     # compatibility. Otherwise range doesn't cast the value itself and
408     # breaks.
409     for cur in range(int(current_map["nr"])):
410         flat_range = (current_map["ranges"] + cur).dereference()
411         memory_region = flat_range["mr"].dereference()
412
413         # we only care about RAM
414         if not memory_region["ram"]:
415             continue
416
417         section_size = int128_get64(flat_range["addr"]["size"])
418         target_start = int128_get64(flat_range["addr"]["start"])
419         target_end = target_start + section_size
420         host_addr = (memory_region_get_ram_ptr(memory_region)
421                      + flat_range["offset_in_region"])
422         predecessor = None
423
424         # find continuity in guest physical address space
425         if len(guest_phys_blocks) > 0:
426             predecessor = guest_phys_blocks[-1]
427             predecessor_size = (predecessor["target_end"] -
428                                 predecessor["target_start"])
429
430             # the memory API guarantees monotonically increasing
431             # traversal
432             assert predecessor["target_end"] <= target_start
433
434             # we want continuity in both guest-physical and
435             # host-virtual memory
436             if (predecessor["target_end"] < target_start or
437                 predecessor["host_addr"] + predecessor_size != host_addr):
438                 predecessor = None
439
440         if predecessor is None:
441             # isolated mapping, add it to the list
442             guest_phys_blocks.append({"target_start": target_start,
443                                       "target_end":   target_end,
444                                       "host_addr":    host_addr})
445             message = "added"
446         else:
447             # expand predecessor until @target_end; predecessor's
448             # start doesn't change
449             predecessor["target_end"] = target_end
450             message = "joined"
451
452         print("%016x %016x %016x %-7s %5u" %
453               (target_start, target_end, host_addr.cast(UINTPTR_T),
454                message, len(guest_phys_blocks)))
455
456     return guest_phys_blocks
457
458
459 # The leading docstring doesn't have idiomatic Python formatting. It is
460 # printed by gdb's "help" command (the first line is printed in the
461 # "help data" summary), and it should match how other help texts look in
462 # gdb.
463 class DumpGuestMemory(gdb.Command):
464     """Extract guest vmcore from qemu process coredump.
465
466 The two required arguments are FILE and ARCH:
467 FILE identifies the target file to write the guest vmcore to.
468 ARCH specifies the architecture for which the core will be generated.
469
470 This GDB command reimplements the dump-guest-memory QMP command in
471 python, using the representation of guest memory as captured in the qemu
472 coredump. The qemu process that has been dumped must have had the
473 command line option "-machine dump-guest-core=on" which is the default.
474
475 For simplicity, the "paging", "begin" and "end" parameters of the QMP
476 command are not supported -- no attempt is made to get the guest's
477 internal paging structures (ie. paging=false is hard-wired), and guest
478 memory is always fully dumped.
479
480 Currently aarch64-be, aarch64-le, X86_64, 386, s390, ppc64-be,
481 ppc64-le guests are supported.
482
483 The CORE/NT_PRSTATUS and QEMU notes (that is, the VCPUs' statuses) are
484 not written to the vmcore. Preparing these would require context that is
485 only present in the KVM host kernel module when the guest is alive. A
486 fake ELF note is written instead, only to keep the ELF parser of "crash"
487 happy.
488
489 Dependent on how busted the qemu process was at the time of the
490 coredump, this command might produce unpredictable results. If qemu
491 deliberately called abort(), or it was dumped in response to a signal at
492 a halfway fortunate point, then its coredump should be in reasonable
493 shape and this command should mostly work."""
494
495     def __init__(self):
496         super(DumpGuestMemory, self).__init__("dump-guest-memory",
497                                               gdb.COMMAND_DATA,
498                                               gdb.COMPLETE_FILENAME)
499         self.elf = None
500         self.guest_phys_blocks = None
501
502     def dump_init(self, vmcore):
503         """Prepares and writes ELF structures to core file."""
504
505         # Needed to make crash happy, data for more useful notes is
506         # not available in a qemu core.
507         self.elf.add_note("NONE", "EMPTY", 0)
508
509         # We should never reach PN_XNUM for paging=false dumps,
510         # there's just a handful of discontiguous ranges after
511         # merging.
512         # The constant is needed to account for the PT_NOTE segment.
513         phdr_num = len(self.guest_phys_blocks) + 1
514         assert phdr_num < PN_XNUM
515
516         for block in self.guest_phys_blocks:
517             block_size = block["target_end"] - block["target_start"]
518             self.elf.add_segment(PT_LOAD, block["target_start"], block_size)
519
520         self.elf.to_file(vmcore)
521
522     def dump_iterate(self, vmcore):
523         """Writes guest core to file."""
524
525         qemu_core = gdb.inferiors()[0]
526         for block in self.guest_phys_blocks:
527             cur = block["host_addr"]
528             left = block["target_end"] - block["target_start"]
529             print("dumping range at %016x for length %016x" %
530                   (cur.cast(UINTPTR_T), left))
531
532             while left > 0:
533                 chunk_size = min(TARGET_PAGE_SIZE, left)
534                 chunk = qemu_core.read_memory(cur, chunk_size)
535                 vmcore.write(chunk)
536                 cur += chunk_size
537                 left -= chunk_size
538
539     def phys_memory_read(self, addr, size):
540         qemu_core = gdb.inferiors()[0]
541         for block in self.guest_phys_blocks:
542             if block["target_start"] <= addr \
543                and addr + size <= block["target_end"]:
544                 haddr = block["host_addr"] + (addr - block["target_start"])
545                 return qemu_core.read_memory(haddr, size)
546         return None
547
548     def add_vmcoreinfo(self):
549         if not gdb.parse_and_eval("vmcoreinfo_find()") \
550            or not gdb.parse_and_eval("vmcoreinfo_find()->has_vmcoreinfo"):
551             return
552
553         fmt = gdb.parse_and_eval("vmcoreinfo_find()->vmcoreinfo.guest_format")
554         addr = gdb.parse_and_eval("vmcoreinfo_find()->vmcoreinfo.paddr")
555         size = gdb.parse_and_eval("vmcoreinfo_find()->vmcoreinfo.size")
556
557         fmt = le16_to_cpu(fmt)
558         addr = le64_to_cpu(addr)
559         size = le32_to_cpu(size)
560
561         if fmt != VMCOREINFO_FORMAT_ELF:
562             return
563
564         vmcoreinfo = self.phys_memory_read(addr, size)
565         if vmcoreinfo:
566             self.elf.add_vmcoreinfo_note(vmcoreinfo.tobytes())
567
568     def invoke(self, args, from_tty):
569         """Handles command invocation from gdb."""
570
571         # Unwittingly pressing the Enter key after the command should
572         # not dump the same multi-gig coredump to the same file.
573         self.dont_repeat()
574
575         argv = gdb.string_to_argv(args)
576         if len(argv) != 2:
577             raise gdb.GdbError("usage: dump-guest-memory FILE ARCH")
578
579         self.elf = ELF(argv[1])
580         self.guest_phys_blocks = get_guest_phys_blocks()
581         self.add_vmcoreinfo()
582
583         with open(argv[0], "wb") as vmcore:
584             self.dump_init(vmcore)
585             self.dump_iterate(vmcore)
586
587 DumpGuestMemory()
This page took 0.058673 seconds and 4 git commands to generate.