]> Git Repo - qemu.git/blame - kvm-all.c
target-i386: print EFER in cpu_dump_state
[qemu.git] / kvm-all.c
CommitLineData
05330448
AL
1/*
2 * QEMU KVM support
3 *
4 * Copyright IBM, Corp. 2008
5832d1f2 5 * Red Hat, Inc. 2008
05330448
AL
6 *
7 * Authors:
8 * Anthony Liguori <[email protected]>
5832d1f2 9 * Glauber Costa <[email protected]>
05330448
AL
10 *
11 * This work is licensed under the terms of the GNU GPL, version 2 or later.
12 * See the COPYING file in the top-level directory.
13 *
14 */
15
16#include <sys/types.h>
17#include <sys/ioctl.h>
18#include <sys/mman.h>
984b5181 19#include <stdarg.h>
05330448
AL
20
21#include <linux/kvm.h>
22
23#include "qemu-common.h"
85199474 24#include "qemu-barrier.h"
05330448 25#include "sysemu.h"
d33a1810 26#include "hw/hw.h"
e22a25c9 27#include "gdbstub.h"
05330448
AL
28#include "kvm.h"
29
f65ed4c1
AL
30/* KVM uses PAGE_SIZE in it's definition of COALESCED_MMIO_MAX */
31#define PAGE_SIZE TARGET_PAGE_SIZE
32
05330448
AL
33//#define DEBUG_KVM
34
35#ifdef DEBUG_KVM
8c0d577e 36#define DPRINTF(fmt, ...) \
05330448
AL
37 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
38#else
8c0d577e 39#define DPRINTF(fmt, ...) \
05330448
AL
40 do { } while (0)
41#endif
42
34fc643f
AL
43typedef struct KVMSlot
44{
c227f099
AL
45 target_phys_addr_t start_addr;
46 ram_addr_t memory_size;
47 ram_addr_t phys_offset;
34fc643f
AL
48 int slot;
49 int flags;
50} KVMSlot;
05330448 51
5832d1f2
AL
52typedef struct kvm_dirty_log KVMDirtyLog;
53
05330448
AL
54struct KVMState
55{
56 KVMSlot slots[32];
57 int fd;
58 int vmfd;
f65ed4c1 59 int coalesced_mmio;
62a2744c
SY
60#ifdef KVM_CAP_COALESCED_MMIO
61 struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;
62#endif
e69917e2 63 int broken_set_mem_region;
4495d6a7 64 int migration_log;
a0fb002c 65 int vcpu_events;
b0b1d690 66 int robust_singlestep;
ff44f1a3 67 int debugregs;
e22a25c9
AL
68#ifdef KVM_CAP_SET_GUEST_DEBUG
69 struct kvm_sw_breakpoint_head kvm_sw_breakpoints;
70#endif
6f725c13
GC
71 int irqchip_in_kernel;
72 int pit_in_kernel;
05330448
AL
73};
74
75static KVMState *kvm_state;
76
77static KVMSlot *kvm_alloc_slot(KVMState *s)
78{
79 int i;
80
81 for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
62d60e8c
AL
82 /* KVM private memory slots */
83 if (i >= 8 && i < 12)
84 continue;
05330448
AL
85 if (s->slots[i].memory_size == 0)
86 return &s->slots[i];
87 }
88
d3f8d37f
AL
89 fprintf(stderr, "%s: no free slot available\n", __func__);
90 abort();
91}
92
93static KVMSlot *kvm_lookup_matching_slot(KVMState *s,
c227f099
AL
94 target_phys_addr_t start_addr,
95 target_phys_addr_t end_addr)
d3f8d37f
AL
96{
97 int i;
98
99 for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
100 KVMSlot *mem = &s->slots[i];
101
102 if (start_addr == mem->start_addr &&
103 end_addr == mem->start_addr + mem->memory_size) {
104 return mem;
105 }
106 }
107
05330448
AL
108 return NULL;
109}
110
6152e2ae
AL
111/*
112 * Find overlapping slot with lowest start address
113 */
114static KVMSlot *kvm_lookup_overlapping_slot(KVMState *s,
c227f099
AL
115 target_phys_addr_t start_addr,
116 target_phys_addr_t end_addr)
05330448 117{
6152e2ae 118 KVMSlot *found = NULL;
05330448
AL
119 int i;
120
121 for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
122 KVMSlot *mem = &s->slots[i];
123
6152e2ae
AL
124 if (mem->memory_size == 0 ||
125 (found && found->start_addr < mem->start_addr)) {
126 continue;
127 }
128
129 if (end_addr > mem->start_addr &&
130 start_addr < mem->start_addr + mem->memory_size) {
131 found = mem;
132 }
05330448
AL
133 }
134
6152e2ae 135 return found;
05330448
AL
136}
137
5832d1f2
AL
138static int kvm_set_user_memory_region(KVMState *s, KVMSlot *slot)
139{
140 struct kvm_userspace_memory_region mem;
141
142 mem.slot = slot->slot;
143 mem.guest_phys_addr = slot->start_addr;
144 mem.memory_size = slot->memory_size;
5579c7f3 145 mem.userspace_addr = (unsigned long)qemu_get_ram_ptr(slot->phys_offset);
5832d1f2 146 mem.flags = slot->flags;
4495d6a7
JK
147 if (s->migration_log) {
148 mem.flags |= KVM_MEM_LOG_DIRTY_PAGES;
149 }
5832d1f2
AL
150 return kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
151}
152
8d2ba1fb
JK
153static void kvm_reset_vcpu(void *opaque)
154{
155 CPUState *env = opaque;
156
caa5af0f 157 kvm_arch_reset_vcpu(env);
8d2ba1fb 158}
5832d1f2 159
6f725c13
GC
160int kvm_irqchip_in_kernel(void)
161{
162 return kvm_state->irqchip_in_kernel;
163}
164
165int kvm_pit_in_kernel(void)
166{
167 return kvm_state->pit_in_kernel;
168}
169
170
05330448
AL
171int kvm_init_vcpu(CPUState *env)
172{
173 KVMState *s = kvm_state;
174 long mmap_size;
175 int ret;
176
8c0d577e 177 DPRINTF("kvm_init_vcpu\n");
05330448 178
984b5181 179 ret = kvm_vm_ioctl(s, KVM_CREATE_VCPU, env->cpu_index);
05330448 180 if (ret < 0) {
8c0d577e 181 DPRINTF("kvm_create_vcpu failed\n");
05330448
AL
182 goto err;
183 }
184
185 env->kvm_fd = ret;
186 env->kvm_state = s;
187
188 mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
189 if (mmap_size < 0) {
8c0d577e 190 DPRINTF("KVM_GET_VCPU_MMAP_SIZE failed\n");
05330448
AL
191 goto err;
192 }
193
194 env->kvm_run = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED,
195 env->kvm_fd, 0);
196 if (env->kvm_run == MAP_FAILED) {
197 ret = -errno;
8c0d577e 198 DPRINTF("mmap'ing vcpu state failed\n");
05330448
AL
199 goto err;
200 }
201
62a2744c
SY
202#ifdef KVM_CAP_COALESCED_MMIO
203 if (s->coalesced_mmio && !s->coalesced_mmio_ring)
204 s->coalesced_mmio_ring = (void *) env->kvm_run +
205 s->coalesced_mmio * PAGE_SIZE;
206#endif
207
05330448 208 ret = kvm_arch_init_vcpu(env);
8d2ba1fb 209 if (ret == 0) {
a08d4367 210 qemu_register_reset(kvm_reset_vcpu, env);
caa5af0f 211 kvm_arch_reset_vcpu(env);
8d2ba1fb 212 }
05330448
AL
213err:
214 return ret;
215}
216
5832d1f2
AL
217/*
218 * dirty pages logging control
219 */
c227f099
AL
220static int kvm_dirty_pages_log_change(target_phys_addr_t phys_addr,
221 ram_addr_t size, int flags, int mask)
5832d1f2
AL
222{
223 KVMState *s = kvm_state;
d3f8d37f 224 KVMSlot *mem = kvm_lookup_matching_slot(s, phys_addr, phys_addr + size);
4495d6a7
JK
225 int old_flags;
226
5832d1f2 227 if (mem == NULL) {
d3f8d37f
AL
228 fprintf(stderr, "BUG: %s: invalid parameters " TARGET_FMT_plx "-"
229 TARGET_FMT_plx "\n", __func__, phys_addr,
c227f099 230 (target_phys_addr_t)(phys_addr + size - 1));
5832d1f2
AL
231 return -EINVAL;
232 }
233
4495d6a7 234 old_flags = mem->flags;
5832d1f2 235
4495d6a7 236 flags = (mem->flags & ~mask) | flags;
5832d1f2
AL
237 mem->flags = flags;
238
4495d6a7
JK
239 /* If nothing changed effectively, no need to issue ioctl */
240 if (s->migration_log) {
241 flags |= KVM_MEM_LOG_DIRTY_PAGES;
242 }
243 if (flags == old_flags) {
244 return 0;
245 }
246
5832d1f2
AL
247 return kvm_set_user_memory_region(s, mem);
248}
249
c227f099 250int kvm_log_start(target_phys_addr_t phys_addr, ram_addr_t size)
5832d1f2 251{
d3f8d37f 252 return kvm_dirty_pages_log_change(phys_addr, size,
5832d1f2
AL
253 KVM_MEM_LOG_DIRTY_PAGES,
254 KVM_MEM_LOG_DIRTY_PAGES);
255}
256
c227f099 257int kvm_log_stop(target_phys_addr_t phys_addr, ram_addr_t size)
5832d1f2 258{
d3f8d37f 259 return kvm_dirty_pages_log_change(phys_addr, size,
5832d1f2
AL
260 0,
261 KVM_MEM_LOG_DIRTY_PAGES);
262}
263
7b8f3b78 264static int kvm_set_migration_log(int enable)
4495d6a7
JK
265{
266 KVMState *s = kvm_state;
267 KVMSlot *mem;
268 int i, err;
269
270 s->migration_log = enable;
271
272 for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
273 mem = &s->slots[i];
274
275 if (!!(mem->flags & KVM_MEM_LOG_DIRTY_PAGES) == enable) {
276 continue;
277 }
278 err = kvm_set_user_memory_region(s, mem);
279 if (err) {
280 return err;
281 }
282 }
283 return 0;
284}
285
96c1606b
AG
286static int test_le_bit(unsigned long nr, unsigned char *addr)
287{
288 return (addr[nr >> 3] >> (nr & 7)) & 1;
289}
290
5832d1f2
AL
291/**
292 * kvm_physical_sync_dirty_bitmap - Grab dirty bitmap from kernel space
293 * This function updates qemu's dirty bitmap using cpu_physical_memory_set_dirty().
294 * This means all bits are set to dirty.
295 *
d3f8d37f 296 * @start_add: start of logged region.
5832d1f2
AL
297 * @end_addr: end of logged region.
298 */
7b8f3b78
MT
299static int kvm_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
300 target_phys_addr_t end_addr)
5832d1f2
AL
301{
302 KVMState *s = kvm_state;
151f7749 303 unsigned long size, allocated_size = 0;
c227f099
AL
304 target_phys_addr_t phys_addr;
305 ram_addr_t addr;
151f7749
JK
306 KVMDirtyLog d;
307 KVMSlot *mem;
308 int ret = 0;
5832d1f2 309
151f7749
JK
310 d.dirty_bitmap = NULL;
311 while (start_addr < end_addr) {
312 mem = kvm_lookup_overlapping_slot(s, start_addr, end_addr);
313 if (mem == NULL) {
314 break;
315 }
5832d1f2 316
151f7749
JK
317 size = ((mem->memory_size >> TARGET_PAGE_BITS) + 7) / 8;
318 if (!d.dirty_bitmap) {
319 d.dirty_bitmap = qemu_malloc(size);
320 } else if (size > allocated_size) {
321 d.dirty_bitmap = qemu_realloc(d.dirty_bitmap, size);
322 }
323 allocated_size = size;
324 memset(d.dirty_bitmap, 0, allocated_size);
5832d1f2 325
151f7749 326 d.slot = mem->slot;
5832d1f2 327
6e489f3f 328 if (kvm_vm_ioctl(s, KVM_GET_DIRTY_LOG, &d) == -1) {
8c0d577e 329 DPRINTF("ioctl failed %d\n", errno);
151f7749
JK
330 ret = -1;
331 break;
332 }
5832d1f2 333
151f7749
JK
334 for (phys_addr = mem->start_addr, addr = mem->phys_offset;
335 phys_addr < mem->start_addr + mem->memory_size;
336 phys_addr += TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
96c1606b 337 unsigned char *bitmap = (unsigned char *)d.dirty_bitmap;
151f7749 338 unsigned nr = (phys_addr - mem->start_addr) >> TARGET_PAGE_BITS;
151f7749 339
96c1606b 340 if (test_le_bit(nr, bitmap)) {
151f7749
JK
341 cpu_physical_memory_set_dirty(addr);
342 }
343 }
344 start_addr = phys_addr;
5832d1f2 345 }
5832d1f2 346 qemu_free(d.dirty_bitmap);
151f7749
JK
347
348 return ret;
5832d1f2
AL
349}
350
c227f099 351int kvm_coalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
f65ed4c1
AL
352{
353 int ret = -ENOSYS;
354#ifdef KVM_CAP_COALESCED_MMIO
355 KVMState *s = kvm_state;
356
357 if (s->coalesced_mmio) {
358 struct kvm_coalesced_mmio_zone zone;
359
360 zone.addr = start;
361 zone.size = size;
362
363 ret = kvm_vm_ioctl(s, KVM_REGISTER_COALESCED_MMIO, &zone);
364 }
365#endif
366
367 return ret;
368}
369
c227f099 370int kvm_uncoalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
f65ed4c1
AL
371{
372 int ret = -ENOSYS;
373#ifdef KVM_CAP_COALESCED_MMIO
374 KVMState *s = kvm_state;
375
376 if (s->coalesced_mmio) {
377 struct kvm_coalesced_mmio_zone zone;
378
379 zone.addr = start;
380 zone.size = size;
381
382 ret = kvm_vm_ioctl(s, KVM_UNREGISTER_COALESCED_MMIO, &zone);
383 }
384#endif
385
386 return ret;
387}
388
ad7b8b33
AL
389int kvm_check_extension(KVMState *s, unsigned int extension)
390{
391 int ret;
392
393 ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, extension);
394 if (ret < 0) {
395 ret = 0;
396 }
397
398 return ret;
399}
400
7b8f3b78
MT
401static void kvm_set_phys_mem(target_phys_addr_t start_addr,
402 ram_addr_t size,
403 ram_addr_t phys_offset)
46dbef6a
MT
404{
405 KVMState *s = kvm_state;
406 ram_addr_t flags = phys_offset & ~TARGET_PAGE_MASK;
407 KVMSlot *mem, old;
408 int err;
409
410 if (start_addr & ~TARGET_PAGE_MASK) {
411 if (flags >= IO_MEM_UNASSIGNED) {
412 if (!kvm_lookup_overlapping_slot(s, start_addr,
413 start_addr + size)) {
414 return;
415 }
416 fprintf(stderr, "Unaligned split of a KVM memory slot\n");
417 } else {
418 fprintf(stderr, "Only page-aligned memory slots supported\n");
419 }
420 abort();
421 }
422
423 /* KVM does not support read-only slots */
424 phys_offset &= ~IO_MEM_ROM;
425
426 while (1) {
427 mem = kvm_lookup_overlapping_slot(s, start_addr, start_addr + size);
428 if (!mem) {
429 break;
430 }
431
432 if (flags < IO_MEM_UNASSIGNED && start_addr >= mem->start_addr &&
433 (start_addr + size <= mem->start_addr + mem->memory_size) &&
434 (phys_offset - start_addr == mem->phys_offset - mem->start_addr)) {
435 /* The new slot fits into the existing one and comes with
436 * identical parameters - nothing to be done. */
437 return;
438 }
439
440 old = *mem;
441
442 /* unregister the overlapping slot */
443 mem->memory_size = 0;
444 err = kvm_set_user_memory_region(s, mem);
445 if (err) {
446 fprintf(stderr, "%s: error unregistering overlapping slot: %s\n",
447 __func__, strerror(-err));
448 abort();
449 }
450
451 /* Workaround for older KVM versions: we can't join slots, even not by
452 * unregistering the previous ones and then registering the larger
453 * slot. We have to maintain the existing fragmentation. Sigh.
454 *
455 * This workaround assumes that the new slot starts at the same
456 * address as the first existing one. If not or if some overlapping
457 * slot comes around later, we will fail (not seen in practice so far)
458 * - and actually require a recent KVM version. */
459 if (s->broken_set_mem_region &&
460 old.start_addr == start_addr && old.memory_size < size &&
461 flags < IO_MEM_UNASSIGNED) {
462 mem = kvm_alloc_slot(s);
463 mem->memory_size = old.memory_size;
464 mem->start_addr = old.start_addr;
465 mem->phys_offset = old.phys_offset;
466 mem->flags = 0;
467
468 err = kvm_set_user_memory_region(s, mem);
469 if (err) {
470 fprintf(stderr, "%s: error updating slot: %s\n", __func__,
471 strerror(-err));
472 abort();
473 }
474
475 start_addr += old.memory_size;
476 phys_offset += old.memory_size;
477 size -= old.memory_size;
478 continue;
479 }
480
481 /* register prefix slot */
482 if (old.start_addr < start_addr) {
483 mem = kvm_alloc_slot(s);
484 mem->memory_size = start_addr - old.start_addr;
485 mem->start_addr = old.start_addr;
486 mem->phys_offset = old.phys_offset;
487 mem->flags = 0;
488
489 err = kvm_set_user_memory_region(s, mem);
490 if (err) {
491 fprintf(stderr, "%s: error registering prefix slot: %s\n",
492 __func__, strerror(-err));
493 abort();
494 }
495 }
496
497 /* register suffix slot */
498 if (old.start_addr + old.memory_size > start_addr + size) {
499 ram_addr_t size_delta;
500
501 mem = kvm_alloc_slot(s);
502 mem->start_addr = start_addr + size;
503 size_delta = mem->start_addr - old.start_addr;
504 mem->memory_size = old.memory_size - size_delta;
505 mem->phys_offset = old.phys_offset + size_delta;
506 mem->flags = 0;
507
508 err = kvm_set_user_memory_region(s, mem);
509 if (err) {
510 fprintf(stderr, "%s: error registering suffix slot: %s\n",
511 __func__, strerror(-err));
512 abort();
513 }
514 }
515 }
516
517 /* in case the KVM bug workaround already "consumed" the new slot */
518 if (!size)
519 return;
520
521 /* KVM does not need to know about this memory */
522 if (flags >= IO_MEM_UNASSIGNED)
523 return;
524
525 mem = kvm_alloc_slot(s);
526 mem->memory_size = size;
527 mem->start_addr = start_addr;
528 mem->phys_offset = phys_offset;
529 mem->flags = 0;
530
531 err = kvm_set_user_memory_region(s, mem);
532 if (err) {
533 fprintf(stderr, "%s: error registering slot: %s\n", __func__,
534 strerror(-err));
535 abort();
536 }
537}
538
7b8f3b78
MT
539static void kvm_client_set_memory(struct CPUPhysMemoryClient *client,
540 target_phys_addr_t start_addr,
541 ram_addr_t size,
542 ram_addr_t phys_offset)
543{
544 kvm_set_phys_mem(start_addr, size, phys_offset);
545}
546
547static int kvm_client_sync_dirty_bitmap(struct CPUPhysMemoryClient *client,
548 target_phys_addr_t start_addr,
549 target_phys_addr_t end_addr)
550{
551 return kvm_physical_sync_dirty_bitmap(start_addr, end_addr);
552}
553
554static int kvm_client_migration_log(struct CPUPhysMemoryClient *client,
555 int enable)
556{
557 return kvm_set_migration_log(enable);
558}
559
560static CPUPhysMemoryClient kvm_cpu_phys_memory_client = {
561 .set_memory = kvm_client_set_memory,
562 .sync_dirty_bitmap = kvm_client_sync_dirty_bitmap,
563 .migration_log = kvm_client_migration_log,
564};
565
05330448
AL
566int kvm_init(int smp_cpus)
567{
168ccc11
JK
568 static const char upgrade_note[] =
569 "Please upgrade to at least kernel 2.6.29 or recent kvm-kmod\n"
570 "(see http://sourceforge.net/projects/kvm).\n";
05330448
AL
571 KVMState *s;
572 int ret;
573 int i;
574
9f8fd694
MM
575 if (smp_cpus > 1) {
576 fprintf(stderr, "No SMP KVM support, use '-smp 1'\n");
05330448 577 return -EINVAL;
9f8fd694 578 }
05330448
AL
579
580 s = qemu_mallocz(sizeof(KVMState));
05330448 581
e22a25c9 582#ifdef KVM_CAP_SET_GUEST_DEBUG
72cf2d4f 583 QTAILQ_INIT(&s->kvm_sw_breakpoints);
e22a25c9 584#endif
05330448
AL
585 for (i = 0; i < ARRAY_SIZE(s->slots); i++)
586 s->slots[i].slot = i;
587
588 s->vmfd = -1;
40ff6d7e 589 s->fd = qemu_open("/dev/kvm", O_RDWR);
05330448
AL
590 if (s->fd == -1) {
591 fprintf(stderr, "Could not access KVM kernel module: %m\n");
592 ret = -errno;
593 goto err;
594 }
595
596 ret = kvm_ioctl(s, KVM_GET_API_VERSION, 0);
597 if (ret < KVM_API_VERSION) {
598 if (ret > 0)
599 ret = -EINVAL;
600 fprintf(stderr, "kvm version too old\n");
601 goto err;
602 }
603
604 if (ret > KVM_API_VERSION) {
605 ret = -EINVAL;
606 fprintf(stderr, "kvm version not supported\n");
607 goto err;
608 }
609
610 s->vmfd = kvm_ioctl(s, KVM_CREATE_VM, 0);
0104dcac
AG
611 if (s->vmfd < 0) {
612#ifdef TARGET_S390X
613 fprintf(stderr, "Please add the 'switch_amode' kernel parameter to "
614 "your host kernel command line\n");
615#endif
05330448 616 goto err;
0104dcac 617 }
05330448
AL
618
619 /* initially, KVM allocated its own memory and we had to jump through
620 * hooks to make phys_ram_base point to this. Modern versions of KVM
5579c7f3 621 * just use a user allocated buffer so we can use regular pages
05330448
AL
622 * unmodified. Make sure we have a sufficiently modern version of KVM.
623 */
ad7b8b33
AL
624 if (!kvm_check_extension(s, KVM_CAP_USER_MEMORY)) {
625 ret = -EINVAL;
168ccc11
JK
626 fprintf(stderr, "kvm does not support KVM_CAP_USER_MEMORY\n%s",
627 upgrade_note);
05330448
AL
628 goto err;
629 }
630
d85dc283
AL
631 /* There was a nasty bug in < kvm-80 that prevents memory slots from being
632 * destroyed properly. Since we rely on this capability, refuse to work
633 * with any kernel without this capability. */
ad7b8b33
AL
634 if (!kvm_check_extension(s, KVM_CAP_DESTROY_MEMORY_REGION_WORKS)) {
635 ret = -EINVAL;
d85dc283
AL
636
637 fprintf(stderr,
168ccc11
JK
638 "KVM kernel module broken (DESTROY_MEMORY_REGION).\n%s",
639 upgrade_note);
d85dc283
AL
640 goto err;
641 }
642
62a2744c 643 s->coalesced_mmio = 0;
f65ed4c1 644#ifdef KVM_CAP_COALESCED_MMIO
ad7b8b33 645 s->coalesced_mmio = kvm_check_extension(s, KVM_CAP_COALESCED_MMIO);
62a2744c 646 s->coalesced_mmio_ring = NULL;
f65ed4c1
AL
647#endif
648
e69917e2
JK
649 s->broken_set_mem_region = 1;
650#ifdef KVM_CAP_JOIN_MEMORY_REGIONS_WORKS
651 ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, KVM_CAP_JOIN_MEMORY_REGIONS_WORKS);
652 if (ret > 0) {
653 s->broken_set_mem_region = 0;
654 }
655#endif
656
a0fb002c
JK
657 s->vcpu_events = 0;
658#ifdef KVM_CAP_VCPU_EVENTS
659 s->vcpu_events = kvm_check_extension(s, KVM_CAP_VCPU_EVENTS);
660#endif
661
b0b1d690
JK
662 s->robust_singlestep = 0;
663#ifdef KVM_CAP_X86_ROBUST_SINGLESTEP
664 s->robust_singlestep =
665 kvm_check_extension(s, KVM_CAP_X86_ROBUST_SINGLESTEP);
666#endif
667
ff44f1a3
JK
668 s->debugregs = 0;
669#ifdef KVM_CAP_DEBUGREGS
670 s->debugregs = kvm_check_extension(s, KVM_CAP_DEBUGREGS);
671#endif
672
05330448
AL
673 ret = kvm_arch_init(s, smp_cpus);
674 if (ret < 0)
675 goto err;
676
677 kvm_state = s;
7b8f3b78 678 cpu_register_phys_memory_client(&kvm_cpu_phys_memory_client);
05330448
AL
679
680 return 0;
681
682err:
683 if (s) {
684 if (s->vmfd != -1)
685 close(s->vmfd);
686 if (s->fd != -1)
687 close(s->fd);
688 }
689 qemu_free(s);
690
691 return ret;
692}
693
afcea8cb
BS
694static int kvm_handle_io(uint16_t port, void *data, int direction, int size,
695 uint32_t count)
05330448
AL
696{
697 int i;
698 uint8_t *ptr = data;
699
700 for (i = 0; i < count; i++) {
701 if (direction == KVM_EXIT_IO_IN) {
702 switch (size) {
703 case 1:
afcea8cb 704 stb_p(ptr, cpu_inb(port));
05330448
AL
705 break;
706 case 2:
afcea8cb 707 stw_p(ptr, cpu_inw(port));
05330448
AL
708 break;
709 case 4:
afcea8cb 710 stl_p(ptr, cpu_inl(port));
05330448
AL
711 break;
712 }
713 } else {
714 switch (size) {
715 case 1:
afcea8cb 716 cpu_outb(port, ldub_p(ptr));
05330448
AL
717 break;
718 case 2:
afcea8cb 719 cpu_outw(port, lduw_p(ptr));
05330448
AL
720 break;
721 case 4:
afcea8cb 722 cpu_outl(port, ldl_p(ptr));
05330448
AL
723 break;
724 }
725 }
726
727 ptr += size;
728 }
729
730 return 1;
731}
732
62a2744c 733void kvm_flush_coalesced_mmio_buffer(void)
f65ed4c1
AL
734{
735#ifdef KVM_CAP_COALESCED_MMIO
736 KVMState *s = kvm_state;
62a2744c
SY
737 if (s->coalesced_mmio_ring) {
738 struct kvm_coalesced_mmio_ring *ring = s->coalesced_mmio_ring;
f65ed4c1
AL
739 while (ring->first != ring->last) {
740 struct kvm_coalesced_mmio *ent;
741
742 ent = &ring->coalesced_mmio[ring->first];
743
744 cpu_physical_memory_write(ent->phys_addr, ent->data, ent->len);
85199474 745 smp_wmb();
f65ed4c1
AL
746 ring->first = (ring->first + 1) % KVM_COALESCED_MMIO_MAX;
747 }
748 }
749#endif
750}
751
4c0960c0
AK
752void kvm_cpu_synchronize_state(CPUState *env)
753{
9ded2744 754 if (!env->kvm_vcpu_dirty) {
4c0960c0 755 kvm_arch_get_registers(env);
9ded2744 756 env->kvm_vcpu_dirty = 1;
4c0960c0
AK
757 }
758}
759
ea375f9a
JK
760void kvm_cpu_synchronize_post_reset(CPUState *env)
761{
762 kvm_arch_put_registers(env, KVM_PUT_RESET_STATE);
763 env->kvm_vcpu_dirty = 0;
764}
765
766void kvm_cpu_synchronize_post_init(CPUState *env)
767{
768 kvm_arch_put_registers(env, KVM_PUT_FULL_STATE);
769 env->kvm_vcpu_dirty = 0;
770}
771
05330448
AL
772int kvm_cpu_exec(CPUState *env)
773{
774 struct kvm_run *run = env->kvm_run;
775 int ret;
776
8c0d577e 777 DPRINTF("kvm_cpu_exec()\n");
05330448
AL
778
779 do {
6312b928 780#ifndef CONFIG_IOTHREAD
be214e6c 781 if (env->exit_request) {
8c0d577e 782 DPRINTF("interrupt exit requested\n");
05330448
AL
783 ret = 0;
784 break;
785 }
6312b928 786#endif
05330448 787
9ded2744 788 if (env->kvm_vcpu_dirty) {
ea375f9a 789 kvm_arch_put_registers(env, KVM_PUT_RUNTIME_STATE);
9ded2744 790 env->kvm_vcpu_dirty = 0;
4c0960c0
AK
791 }
792
8c14c173 793 kvm_arch_pre_run(env, run);
d549db5a 794 qemu_mutex_unlock_iothread();
05330448 795 ret = kvm_vcpu_ioctl(env, KVM_RUN, 0);
d549db5a 796 qemu_mutex_lock_iothread();
05330448
AL
797 kvm_arch_post_run(env, run);
798
799 if (ret == -EINTR || ret == -EAGAIN) {
cc84de95 800 cpu_exit(env);
8c0d577e 801 DPRINTF("io window exit\n");
05330448
AL
802 ret = 0;
803 break;
804 }
805
806 if (ret < 0) {
8c0d577e 807 DPRINTF("kvm run failed %s\n", strerror(-ret));
05330448
AL
808 abort();
809 }
810
62a2744c 811 kvm_flush_coalesced_mmio_buffer();
f65ed4c1 812
05330448
AL
813 ret = 0; /* exit loop */
814 switch (run->exit_reason) {
815 case KVM_EXIT_IO:
8c0d577e 816 DPRINTF("handle_io\n");
afcea8cb 817 ret = kvm_handle_io(run->io.port,
05330448
AL
818 (uint8_t *)run + run->io.data_offset,
819 run->io.direction,
820 run->io.size,
821 run->io.count);
822 break;
823 case KVM_EXIT_MMIO:
8c0d577e 824 DPRINTF("handle_mmio\n");
05330448
AL
825 cpu_physical_memory_rw(run->mmio.phys_addr,
826 run->mmio.data,
827 run->mmio.len,
828 run->mmio.is_write);
829 ret = 1;
830 break;
831 case KVM_EXIT_IRQ_WINDOW_OPEN:
8c0d577e 832 DPRINTF("irq_window_open\n");
05330448
AL
833 break;
834 case KVM_EXIT_SHUTDOWN:
8c0d577e 835 DPRINTF("shutdown\n");
05330448
AL
836 qemu_system_reset_request();
837 ret = 1;
838 break;
839 case KVM_EXIT_UNKNOWN:
8c0d577e 840 DPRINTF("kvm_exit_unknown\n");
05330448
AL
841 break;
842 case KVM_EXIT_FAIL_ENTRY:
8c0d577e 843 DPRINTF("kvm_exit_fail_entry\n");
05330448
AL
844 break;
845 case KVM_EXIT_EXCEPTION:
8c0d577e 846 DPRINTF("kvm_exit_exception\n");
05330448
AL
847 break;
848 case KVM_EXIT_DEBUG:
8c0d577e 849 DPRINTF("kvm_exit_debug\n");
e22a25c9
AL
850#ifdef KVM_CAP_SET_GUEST_DEBUG
851 if (kvm_arch_debug(&run->debug.arch)) {
852 gdb_set_stop_cpu(env);
853 vm_stop(EXCP_DEBUG);
854 env->exception_index = EXCP_DEBUG;
855 return 0;
856 }
857 /* re-enter, this exception was guest-internal */
858 ret = 1;
859#endif /* KVM_CAP_SET_GUEST_DEBUG */
05330448
AL
860 break;
861 default:
8c0d577e 862 DPRINTF("kvm_arch_handle_exit\n");
05330448
AL
863 ret = kvm_arch_handle_exit(env, run);
864 break;
865 }
866 } while (ret > 0);
867
be214e6c
AJ
868 if (env->exit_request) {
869 env->exit_request = 0;
becfc390
AL
870 env->exception_index = EXCP_INTERRUPT;
871 }
872
05330448
AL
873 return ret;
874}
875
984b5181 876int kvm_ioctl(KVMState *s, int type, ...)
05330448
AL
877{
878 int ret;
984b5181
AL
879 void *arg;
880 va_list ap;
05330448 881
984b5181
AL
882 va_start(ap, type);
883 arg = va_arg(ap, void *);
884 va_end(ap);
885
886 ret = ioctl(s->fd, type, arg);
05330448
AL
887 if (ret == -1)
888 ret = -errno;
889
890 return ret;
891}
892
984b5181 893int kvm_vm_ioctl(KVMState *s, int type, ...)
05330448
AL
894{
895 int ret;
984b5181
AL
896 void *arg;
897 va_list ap;
898
899 va_start(ap, type);
900 arg = va_arg(ap, void *);
901 va_end(ap);
05330448 902
984b5181 903 ret = ioctl(s->vmfd, type, arg);
05330448
AL
904 if (ret == -1)
905 ret = -errno;
906
907 return ret;
908}
909
984b5181 910int kvm_vcpu_ioctl(CPUState *env, int type, ...)
05330448
AL
911{
912 int ret;
984b5181
AL
913 void *arg;
914 va_list ap;
915
916 va_start(ap, type);
917 arg = va_arg(ap, void *);
918 va_end(ap);
05330448 919
984b5181 920 ret = ioctl(env->kvm_fd, type, arg);
05330448
AL
921 if (ret == -1)
922 ret = -errno;
923
924 return ret;
925}
bd322087
AL
926
927int kvm_has_sync_mmu(void)
928{
a9c11522 929#ifdef KVM_CAP_SYNC_MMU
bd322087
AL
930 KVMState *s = kvm_state;
931
ad7b8b33
AL
932 return kvm_check_extension(s, KVM_CAP_SYNC_MMU);
933#else
bd322087 934 return 0;
ad7b8b33 935#endif
bd322087 936}
e22a25c9 937
a0fb002c
JK
938int kvm_has_vcpu_events(void)
939{
940 return kvm_state->vcpu_events;
941}
942
b0b1d690
JK
943int kvm_has_robust_singlestep(void)
944{
945 return kvm_state->robust_singlestep;
946}
947
ff44f1a3
JK
948int kvm_has_debugregs(void)
949{
950 return kvm_state->debugregs;
951}
952
6f0437e8
JK
953void kvm_setup_guest_memory(void *start, size_t size)
954{
955 if (!kvm_has_sync_mmu()) {
956#ifdef MADV_DONTFORK
957 int ret = madvise(start, size, MADV_DONTFORK);
958
959 if (ret) {
960 perror("madvice");
961 exit(1);
962 }
963#else
964 fprintf(stderr,
965 "Need MADV_DONTFORK in absence of synchronous KVM MMU\n");
966 exit(1);
967#endif
968 }
969}
970
e22a25c9 971#ifdef KVM_CAP_SET_GUEST_DEBUG
fc5d642f
LC
972static void on_vcpu(CPUState *env, void (*func)(void *data), void *data)
973{
828566bc 974#ifdef CONFIG_IOTHREAD
a2eebe88
AS
975 if (env != cpu_single_env) {
976 abort();
fc5d642f 977 }
828566bc 978#endif
a2eebe88 979 func(data);
fc5d642f
LC
980}
981
e22a25c9
AL
982struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *env,
983 target_ulong pc)
984{
985 struct kvm_sw_breakpoint *bp;
986
72cf2d4f 987 QTAILQ_FOREACH(bp, &env->kvm_state->kvm_sw_breakpoints, entry) {
e22a25c9
AL
988 if (bp->pc == pc)
989 return bp;
990 }
991 return NULL;
992}
993
994int kvm_sw_breakpoints_active(CPUState *env)
995{
72cf2d4f 996 return !QTAILQ_EMPTY(&env->kvm_state->kvm_sw_breakpoints);
e22a25c9
AL
997}
998
452e4751
GC
999struct kvm_set_guest_debug_data {
1000 struct kvm_guest_debug dbg;
1001 CPUState *env;
1002 int err;
1003};
1004
1005static void kvm_invoke_set_guest_debug(void *data)
1006{
1007 struct kvm_set_guest_debug_data *dbg_data = data;
b3807725
JK
1008 CPUState *env = dbg_data->env;
1009
b3807725 1010 dbg_data->err = kvm_vcpu_ioctl(env, KVM_SET_GUEST_DEBUG, &dbg_data->dbg);
452e4751
GC
1011}
1012
e22a25c9
AL
1013int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap)
1014{
452e4751 1015 struct kvm_set_guest_debug_data data;
e22a25c9 1016
b0b1d690 1017 data.dbg.control = reinject_trap;
e22a25c9 1018
b0b1d690
JK
1019 if (env->singlestep_enabled) {
1020 data.dbg.control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
1021 }
452e4751 1022 kvm_arch_update_guest_debug(env, &data.dbg);
452e4751 1023 data.env = env;
e22a25c9 1024
452e4751
GC
1025 on_vcpu(env, kvm_invoke_set_guest_debug, &data);
1026 return data.err;
e22a25c9
AL
1027}
1028
1029int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr,
1030 target_ulong len, int type)
1031{
1032 struct kvm_sw_breakpoint *bp;
1033 CPUState *env;
1034 int err;
1035
1036 if (type == GDB_BREAKPOINT_SW) {
1037 bp = kvm_find_sw_breakpoint(current_env, addr);
1038 if (bp) {
1039 bp->use_count++;
1040 return 0;
1041 }
1042
1043 bp = qemu_malloc(sizeof(struct kvm_sw_breakpoint));
1044 if (!bp)
1045 return -ENOMEM;
1046
1047 bp->pc = addr;
1048 bp->use_count = 1;
1049 err = kvm_arch_insert_sw_breakpoint(current_env, bp);
1050 if (err) {
1051 free(bp);
1052 return err;
1053 }
1054
72cf2d4f 1055 QTAILQ_INSERT_HEAD(&current_env->kvm_state->kvm_sw_breakpoints,
e22a25c9
AL
1056 bp, entry);
1057 } else {
1058 err = kvm_arch_insert_hw_breakpoint(addr, len, type);
1059 if (err)
1060 return err;
1061 }
1062
1063 for (env = first_cpu; env != NULL; env = env->next_cpu) {
1064 err = kvm_update_guest_debug(env, 0);
1065 if (err)
1066 return err;
1067 }
1068 return 0;
1069}
1070
1071int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr,
1072 target_ulong len, int type)
1073{
1074 struct kvm_sw_breakpoint *bp;
1075 CPUState *env;
1076 int err;
1077
1078 if (type == GDB_BREAKPOINT_SW) {
1079 bp = kvm_find_sw_breakpoint(current_env, addr);
1080 if (!bp)
1081 return -ENOENT;
1082
1083 if (bp->use_count > 1) {
1084 bp->use_count--;
1085 return 0;
1086 }
1087
1088 err = kvm_arch_remove_sw_breakpoint(current_env, bp);
1089 if (err)
1090 return err;
1091
72cf2d4f 1092 QTAILQ_REMOVE(&current_env->kvm_state->kvm_sw_breakpoints, bp, entry);
e22a25c9
AL
1093 qemu_free(bp);
1094 } else {
1095 err = kvm_arch_remove_hw_breakpoint(addr, len, type);
1096 if (err)
1097 return err;
1098 }
1099
1100 for (env = first_cpu; env != NULL; env = env->next_cpu) {
1101 err = kvm_update_guest_debug(env, 0);
1102 if (err)
1103 return err;
1104 }
1105 return 0;
1106}
1107
1108void kvm_remove_all_breakpoints(CPUState *current_env)
1109{
1110 struct kvm_sw_breakpoint *bp, *next;
1111 KVMState *s = current_env->kvm_state;
1112 CPUState *env;
1113
72cf2d4f 1114 QTAILQ_FOREACH_SAFE(bp, &s->kvm_sw_breakpoints, entry, next) {
e22a25c9
AL
1115 if (kvm_arch_remove_sw_breakpoint(current_env, bp) != 0) {
1116 /* Try harder to find a CPU that currently sees the breakpoint. */
1117 for (env = first_cpu; env != NULL; env = env->next_cpu) {
1118 if (kvm_arch_remove_sw_breakpoint(env, bp) == 0)
1119 break;
1120 }
1121 }
1122 }
1123 kvm_arch_remove_all_hw_breakpoints();
1124
1125 for (env = first_cpu; env != NULL; env = env->next_cpu)
1126 kvm_update_guest_debug(env, 0);
1127}
1128
1129#else /* !KVM_CAP_SET_GUEST_DEBUG */
1130
1131int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap)
1132{
1133 return -EINVAL;
1134}
1135
1136int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr,
1137 target_ulong len, int type)
1138{
1139 return -EINVAL;
1140}
1141
1142int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr,
1143 target_ulong len, int type)
1144{
1145 return -EINVAL;
1146}
1147
1148void kvm_remove_all_breakpoints(CPUState *current_env)
1149{
1150}
1151#endif /* !KVM_CAP_SET_GUEST_DEBUG */
cc84de95
MT
1152
1153int kvm_set_signal_mask(CPUState *env, const sigset_t *sigset)
1154{
1155 struct kvm_signal_mask *sigmask;
1156 int r;
1157
1158 if (!sigset)
1159 return kvm_vcpu_ioctl(env, KVM_SET_SIGNAL_MASK, NULL);
1160
1161 sigmask = qemu_malloc(sizeof(*sigmask) + sizeof(*sigset));
1162
1163 sigmask->len = 8;
1164 memcpy(sigmask->sigset, sigset, sizeof(*sigset));
1165 r = kvm_vcpu_ioctl(env, KVM_SET_SIGNAL_MASK, sigmask);
1166 free(sigmask);
1167
1168 return r;
1169}
ca821806 1170
ca821806
MT
1171int kvm_set_ioeventfd_pio_word(int fd, uint16_t addr, uint16_t val, bool assign)
1172{
98c8573e 1173#ifdef KVM_IOEVENTFD
ca821806
MT
1174 struct kvm_ioeventfd kick = {
1175 .datamatch = val,
1176 .addr = addr,
1177 .len = 2,
1178 .flags = KVM_IOEVENTFD_FLAG_DATAMATCH | KVM_IOEVENTFD_FLAG_PIO,
1179 .fd = fd,
1180 };
1181 int r;
1182 if (!kvm_enabled())
1183 return -ENOSYS;
1184 if (!assign)
1185 kick.flags |= KVM_IOEVENTFD_FLAG_DEASSIGN;
1186 r = kvm_vm_ioctl(kvm_state, KVM_IOEVENTFD, &kick);
1187 if (r < 0)
1188 return r;
1189 return 0;
98c8573e
PB
1190#else
1191 return -ENOSYS;
ca821806 1192#endif
98c8573e 1193}
This page took 0.394753 seconds and 4 git commands to generate.