#include <linux/kvm.h>
#include "qemu-common.h"
+#include "qemu-barrier.h"
#include "sysemu.h"
#include "hw/hw.h"
#include "gdbstub.h"
KVMSlot slots[32];
int fd;
int vmfd;
- int regs_modified;
int coalesced_mmio;
+#ifdef KVM_CAP_COALESCED_MMIO
+ struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;
+#endif
int broken_set_mem_region;
int migration_log;
+ int vcpu_events;
#ifdef KVM_CAP_SET_GUEST_DEBUG
struct kvm_sw_breakpoint_head kvm_sw_breakpoints;
#endif
{
CPUState *env = opaque;
+ kvm_arch_reset_vcpu(env);
if (kvm_arch_put_registers(env)) {
fprintf(stderr, "Fatal: kvm vcpu reset failed\n");
abort();
goto err;
}
+#ifdef KVM_CAP_COALESCED_MMIO
+ if (s->coalesced_mmio && !s->coalesced_mmio_ring)
+ s->coalesced_mmio_ring = (void *) env->kvm_run +
+ s->coalesced_mmio * PAGE_SIZE;
+#endif
+
ret = kvm_arch_init_vcpu(env);
if (ret == 0) {
qemu_register_reset(kvm_reset_vcpu, env);
+ kvm_arch_reset_vcpu(env);
ret = kvm_arch_put_registers(env);
}
err:
return ret;
}
-int kvm_put_mp_state(CPUState *env)
-{
- struct kvm_mp_state mp_state = { .mp_state = env->mp_state };
-
- return kvm_vcpu_ioctl(env, KVM_SET_MP_STATE, &mp_state);
-}
-
-int kvm_get_mp_state(CPUState *env)
-{
- struct kvm_mp_state mp_state;
- int ret;
-
- ret = kvm_vcpu_ioctl(env, KVM_GET_MP_STATE, &mp_state);
- if (ret < 0) {
- return ret;
- }
- env->mp_state = mp_state.mp_state;
- return 0;
-}
-
/*
* dirty pages logging control
*/
KVM_MEM_LOG_DIRTY_PAGES);
}
-int kvm_set_migration_log(int enable)
+static int kvm_set_migration_log(int enable)
{
KVMState *s = kvm_state;
KVMSlot *mem;
* @start_add: start of logged region.
* @end_addr: end of logged region.
*/
-int kvm_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
- target_phys_addr_t end_addr)
+static int kvm_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
+ target_phys_addr_t end_addr)
{
KVMState *s = kvm_state;
unsigned long size, allocated_size = 0;
return ret;
}
+static void kvm_set_phys_mem(target_phys_addr_t start_addr,
+ ram_addr_t size,
+ ram_addr_t phys_offset)
+{
+ KVMState *s = kvm_state;
+ ram_addr_t flags = phys_offset & ~TARGET_PAGE_MASK;
+ KVMSlot *mem, old;
+ int err;
+
+ if (start_addr & ~TARGET_PAGE_MASK) {
+ if (flags >= IO_MEM_UNASSIGNED) {
+ if (!kvm_lookup_overlapping_slot(s, start_addr,
+ start_addr + size)) {
+ return;
+ }
+ fprintf(stderr, "Unaligned split of a KVM memory slot\n");
+ } else {
+ fprintf(stderr, "Only page-aligned memory slots supported\n");
+ }
+ abort();
+ }
+
+ /* KVM does not support read-only slots */
+ phys_offset &= ~IO_MEM_ROM;
+
+ while (1) {
+ mem = kvm_lookup_overlapping_slot(s, start_addr, start_addr + size);
+ if (!mem) {
+ break;
+ }
+
+ if (flags < IO_MEM_UNASSIGNED && start_addr >= mem->start_addr &&
+ (start_addr + size <= mem->start_addr + mem->memory_size) &&
+ (phys_offset - start_addr == mem->phys_offset - mem->start_addr)) {
+ /* The new slot fits into the existing one and comes with
+ * identical parameters - nothing to be done. */
+ return;
+ }
+
+ old = *mem;
+
+ /* unregister the overlapping slot */
+ mem->memory_size = 0;
+ err = kvm_set_user_memory_region(s, mem);
+ if (err) {
+ fprintf(stderr, "%s: error unregistering overlapping slot: %s\n",
+ __func__, strerror(-err));
+ abort();
+ }
+
+ /* Workaround for older KVM versions: we can't join slots, even not by
+ * unregistering the previous ones and then registering the larger
+ * slot. We have to maintain the existing fragmentation. Sigh.
+ *
+ * This workaround assumes that the new slot starts at the same
+ * address as the first existing one. If not or if some overlapping
+ * slot comes around later, we will fail (not seen in practice so far)
+ * - and actually require a recent KVM version. */
+ if (s->broken_set_mem_region &&
+ old.start_addr == start_addr && old.memory_size < size &&
+ flags < IO_MEM_UNASSIGNED) {
+ mem = kvm_alloc_slot(s);
+ mem->memory_size = old.memory_size;
+ mem->start_addr = old.start_addr;
+ mem->phys_offset = old.phys_offset;
+ mem->flags = 0;
+
+ err = kvm_set_user_memory_region(s, mem);
+ if (err) {
+ fprintf(stderr, "%s: error updating slot: %s\n", __func__,
+ strerror(-err));
+ abort();
+ }
+
+ start_addr += old.memory_size;
+ phys_offset += old.memory_size;
+ size -= old.memory_size;
+ continue;
+ }
+
+ /* register prefix slot */
+ if (old.start_addr < start_addr) {
+ mem = kvm_alloc_slot(s);
+ mem->memory_size = start_addr - old.start_addr;
+ mem->start_addr = old.start_addr;
+ mem->phys_offset = old.phys_offset;
+ mem->flags = 0;
+
+ err = kvm_set_user_memory_region(s, mem);
+ if (err) {
+ fprintf(stderr, "%s: error registering prefix slot: %s\n",
+ __func__, strerror(-err));
+ abort();
+ }
+ }
+
+ /* register suffix slot */
+ if (old.start_addr + old.memory_size > start_addr + size) {
+ ram_addr_t size_delta;
+
+ mem = kvm_alloc_slot(s);
+ mem->start_addr = start_addr + size;
+ size_delta = mem->start_addr - old.start_addr;
+ mem->memory_size = old.memory_size - size_delta;
+ mem->phys_offset = old.phys_offset + size_delta;
+ mem->flags = 0;
+
+ err = kvm_set_user_memory_region(s, mem);
+ if (err) {
+ fprintf(stderr, "%s: error registering suffix slot: %s\n",
+ __func__, strerror(-err));
+ abort();
+ }
+ }
+ }
+
+ /* in case the KVM bug workaround already "consumed" the new slot */
+ if (!size)
+ return;
+
+ /* KVM does not need to know about this memory */
+ if (flags >= IO_MEM_UNASSIGNED)
+ return;
+
+ mem = kvm_alloc_slot(s);
+ mem->memory_size = size;
+ mem->start_addr = start_addr;
+ mem->phys_offset = phys_offset;
+ mem->flags = 0;
+
+ err = kvm_set_user_memory_region(s, mem);
+ if (err) {
+ fprintf(stderr, "%s: error registering slot: %s\n", __func__,
+ strerror(-err));
+ abort();
+ }
+}
+
+static void kvm_client_set_memory(struct CPUPhysMemoryClient *client,
+ target_phys_addr_t start_addr,
+ ram_addr_t size,
+ ram_addr_t phys_offset)
+{
+ kvm_set_phys_mem(start_addr, size, phys_offset);
+}
+
+static int kvm_client_sync_dirty_bitmap(struct CPUPhysMemoryClient *client,
+ target_phys_addr_t start_addr,
+ target_phys_addr_t end_addr)
+{
+ return kvm_physical_sync_dirty_bitmap(start_addr, end_addr);
+}
+
+static int kvm_client_migration_log(struct CPUPhysMemoryClient *client,
+ int enable)
+{
+ return kvm_set_migration_log(enable);
+}
+
+static CPUPhysMemoryClient kvm_cpu_phys_memory_client = {
+ .set_memory = kvm_client_set_memory,
+ .sync_dirty_bitmap = kvm_client_sync_dirty_bitmap,
+ .migration_log = kvm_client_migration_log,
+};
+
int kvm_init(int smp_cpus)
{
static const char upgrade_note[] =
s = qemu_mallocz(sizeof(KVMState));
#ifdef KVM_CAP_SET_GUEST_DEBUG
- TAILQ_INIT(&s->kvm_sw_breakpoints);
+ QTAILQ_INIT(&s->kvm_sw_breakpoints);
#endif
for (i = 0; i < ARRAY_SIZE(s->slots); i++)
s->slots[i].slot = i;
s->vmfd = -1;
- s->fd = open("/dev/kvm", O_RDWR);
+ s->fd = qemu_open("/dev/kvm", O_RDWR);
if (s->fd == -1) {
fprintf(stderr, "Could not access KVM kernel module: %m\n");
ret = -errno;
goto err;
}
+ s->coalesced_mmio = 0;
#ifdef KVM_CAP_COALESCED_MMIO
s->coalesced_mmio = kvm_check_extension(s, KVM_CAP_COALESCED_MMIO);
-#else
- s->coalesced_mmio = 0;
+ s->coalesced_mmio_ring = NULL;
#endif
s->broken_set_mem_region = 1;
}
#endif
+ s->vcpu_events = 0;
+#ifdef KVM_CAP_VCPU_EVENTS
+ s->vcpu_events = kvm_check_extension(s, KVM_CAP_VCPU_EVENTS);
+#endif
+
ret = kvm_arch_init(s, smp_cpus);
if (ret < 0)
goto err;
kvm_state = s;
+ cpu_register_phys_memory_client(&kvm_cpu_phys_memory_client);
return 0;
return ret;
}
-static int kvm_handle_io(CPUState *env, uint16_t port, void *data,
- int direction, int size, uint32_t count)
+static int kvm_handle_io(uint16_t port, void *data, int direction, int size,
+ uint32_t count)
{
int i;
uint8_t *ptr = data;
if (direction == KVM_EXIT_IO_IN) {
switch (size) {
case 1:
- stb_p(ptr, cpu_inb(env, port));
+ stb_p(ptr, cpu_inb(port));
break;
case 2:
- stw_p(ptr, cpu_inw(env, port));
+ stw_p(ptr, cpu_inw(port));
break;
case 4:
- stl_p(ptr, cpu_inl(env, port));
+ stl_p(ptr, cpu_inl(port));
break;
}
} else {
switch (size) {
case 1:
- cpu_outb(env, port, ldub_p(ptr));
+ cpu_outb(port, ldub_p(ptr));
break;
case 2:
- cpu_outw(env, port, lduw_p(ptr));
+ cpu_outw(port, lduw_p(ptr));
break;
case 4:
- cpu_outl(env, port, ldl_p(ptr));
+ cpu_outl(port, ldl_p(ptr));
break;
}
}
return 1;
}
-static void kvm_run_coalesced_mmio(CPUState *env, struct kvm_run *run)
+void kvm_flush_coalesced_mmio_buffer(void)
{
#ifdef KVM_CAP_COALESCED_MMIO
KVMState *s = kvm_state;
- if (s->coalesced_mmio) {
- struct kvm_coalesced_mmio_ring *ring;
-
- ring = (void *)run + (s->coalesced_mmio * TARGET_PAGE_SIZE);
+ if (s->coalesced_mmio_ring) {
+ struct kvm_coalesced_mmio_ring *ring = s->coalesced_mmio_ring;
while (ring->first != ring->last) {
struct kvm_coalesced_mmio *ent;
ent = &ring->coalesced_mmio[ring->first];
cpu_physical_memory_write(ent->phys_addr, ent->data, ent->len);
- /* FIXME smp_wmb() */
+ smp_wmb();
ring->first = (ring->first + 1) % KVM_COALESCED_MMIO_MAX;
}
}
void kvm_cpu_synchronize_state(CPUState *env)
{
- if (!env->kvm_state->regs_modified) {
+ if (!env->kvm_vcpu_dirty) {
kvm_arch_get_registers(env);
- env->kvm_state->regs_modified = 1;
+ env->kvm_vcpu_dirty = 1;
}
}
dprintf("kvm_cpu_exec()\n");
do {
+#ifndef CONFIG_IOTHREAD
if (env->exit_request) {
dprintf("interrupt exit requested\n");
ret = 0;
break;
}
+#endif
- if (env->kvm_state->regs_modified) {
+ if (env->kvm_vcpu_dirty) {
kvm_arch_put_registers(env);
- env->kvm_state->regs_modified = 0;
+ env->kvm_vcpu_dirty = 0;
}
kvm_arch_pre_run(env, run);
+ qemu_mutex_unlock_iothread();
ret = kvm_vcpu_ioctl(env, KVM_RUN, 0);
+ qemu_mutex_lock_iothread();
kvm_arch_post_run(env, run);
if (ret == -EINTR || ret == -EAGAIN) {
+ cpu_exit(env);
dprintf("io window exit\n");
ret = 0;
break;
abort();
}
- kvm_run_coalesced_mmio(env, run);
+ kvm_flush_coalesced_mmio_buffer();
ret = 0; /* exit loop */
switch (run->exit_reason) {
case KVM_EXIT_IO:
dprintf("handle_io\n");
- ret = kvm_handle_io(env, run->io.port,
+ ret = kvm_handle_io(run->io.port,
(uint8_t *)run + run->io.data_offset,
run->io.direction,
run->io.size,
return ret;
}
-void kvm_set_phys_mem(target_phys_addr_t start_addr,
- ram_addr_t size,
- ram_addr_t phys_offset)
-{
- KVMState *s = kvm_state;
- ram_addr_t flags = phys_offset & ~TARGET_PAGE_MASK;
- KVMSlot *mem, old;
- int err;
-
- if (start_addr & ~TARGET_PAGE_MASK) {
- if (flags >= IO_MEM_UNASSIGNED) {
- if (!kvm_lookup_overlapping_slot(s, start_addr,
- start_addr + size)) {
- return;
- }
- fprintf(stderr, "Unaligned split of a KVM memory slot\n");
- } else {
- fprintf(stderr, "Only page-aligned memory slots supported\n");
- }
- abort();
- }
-
- /* KVM does not support read-only slots */
- phys_offset &= ~IO_MEM_ROM;
-
- while (1) {
- mem = kvm_lookup_overlapping_slot(s, start_addr, start_addr + size);
- if (!mem) {
- break;
- }
-
- if (flags < IO_MEM_UNASSIGNED && start_addr >= mem->start_addr &&
- (start_addr + size <= mem->start_addr + mem->memory_size) &&
- (phys_offset - start_addr == mem->phys_offset - mem->start_addr)) {
- /* The new slot fits into the existing one and comes with
- * identical parameters - nothing to be done. */
- return;
- }
-
- old = *mem;
-
- /* unregister the overlapping slot */
- mem->memory_size = 0;
- err = kvm_set_user_memory_region(s, mem);
- if (err) {
- fprintf(stderr, "%s: error unregistering overlapping slot: %s\n",
- __func__, strerror(-err));
- abort();
- }
-
- /* Workaround for older KVM versions: we can't join slots, even not by
- * unregistering the previous ones and then registering the larger
- * slot. We have to maintain the existing fragmentation. Sigh.
- *
- * This workaround assumes that the new slot starts at the same
- * address as the first existing one. If not or if some overlapping
- * slot comes around later, we will fail (not seen in practice so far)
- * - and actually require a recent KVM version. */
- if (s->broken_set_mem_region &&
- old.start_addr == start_addr && old.memory_size < size &&
- flags < IO_MEM_UNASSIGNED) {
- mem = kvm_alloc_slot(s);
- mem->memory_size = old.memory_size;
- mem->start_addr = old.start_addr;
- mem->phys_offset = old.phys_offset;
- mem->flags = 0;
-
- err = kvm_set_user_memory_region(s, mem);
- if (err) {
- fprintf(stderr, "%s: error updating slot: %s\n", __func__,
- strerror(-err));
- abort();
- }
-
- start_addr += old.memory_size;
- phys_offset += old.memory_size;
- size -= old.memory_size;
- continue;
- }
-
- /* register prefix slot */
- if (old.start_addr < start_addr) {
- mem = kvm_alloc_slot(s);
- mem->memory_size = start_addr - old.start_addr;
- mem->start_addr = old.start_addr;
- mem->phys_offset = old.phys_offset;
- mem->flags = 0;
-
- err = kvm_set_user_memory_region(s, mem);
- if (err) {
- fprintf(stderr, "%s: error registering prefix slot: %s\n",
- __func__, strerror(-err));
- abort();
- }
- }
-
- /* register suffix slot */
- if (old.start_addr + old.memory_size > start_addr + size) {
- ram_addr_t size_delta;
-
- mem = kvm_alloc_slot(s);
- mem->start_addr = start_addr + size;
- size_delta = mem->start_addr - old.start_addr;
- mem->memory_size = old.memory_size - size_delta;
- mem->phys_offset = old.phys_offset + size_delta;
- mem->flags = 0;
-
- err = kvm_set_user_memory_region(s, mem);
- if (err) {
- fprintf(stderr, "%s: error registering suffix slot: %s\n",
- __func__, strerror(-err));
- abort();
- }
- }
- }
-
- /* in case the KVM bug workaround already "consumed" the new slot */
- if (!size)
- return;
-
- /* KVM does not need to know about this memory */
- if (flags >= IO_MEM_UNASSIGNED)
- return;
-
- mem = kvm_alloc_slot(s);
- mem->memory_size = size;
- mem->start_addr = start_addr;
- mem->phys_offset = phys_offset;
- mem->flags = 0;
-
- err = kvm_set_user_memory_region(s, mem);
- if (err) {
- fprintf(stderr, "%s: error registering slot: %s\n", __func__,
- strerror(-err));
- abort();
- }
-}
-
int kvm_ioctl(KVMState *s, int type, ...)
{
int ret;
#endif
}
+int kvm_has_vcpu_events(void)
+{
+ return kvm_state->vcpu_events;
+}
+
void kvm_setup_guest_memory(void *start, size_t size)
{
if (!kvm_has_sync_mmu()) {
#ifdef KVM_CAP_SET_GUEST_DEBUG
static void on_vcpu(CPUState *env, void (*func)(void *data), void *data)
{
- if (env == cpu_single_env) {
- func(data);
- return;
+#ifdef CONFIG_IOTHREAD
+ if (env != cpu_single_env) {
+ abort();
}
- abort();
+#endif
+ func(data);
}
struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *env,
{
struct kvm_sw_breakpoint *bp;
- TAILQ_FOREACH(bp, &env->kvm_state->kvm_sw_breakpoints, entry) {
+ QTAILQ_FOREACH(bp, &env->kvm_state->kvm_sw_breakpoints, entry) {
if (bp->pc == pc)
return bp;
}
int kvm_sw_breakpoints_active(CPUState *env)
{
- return !TAILQ_EMPTY(&env->kvm_state->kvm_sw_breakpoints);
+ return !QTAILQ_EMPTY(&env->kvm_state->kvm_sw_breakpoints);
}
struct kvm_set_guest_debug_data {
static void kvm_invoke_set_guest_debug(void *data)
{
struct kvm_set_guest_debug_data *dbg_data = data;
- dbg_data->err = kvm_vcpu_ioctl(dbg_data->env, KVM_SET_GUEST_DEBUG, &dbg_data->dbg);
+ CPUState *env = dbg_data->env;
+
+ if (env->kvm_vcpu_dirty) {
+ kvm_arch_put_registers(env);
+ env->kvm_vcpu_dirty = 0;
+ }
+ dbg_data->err = kvm_vcpu_ioctl(env, KVM_SET_GUEST_DEBUG, &dbg_data->dbg);
}
int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap)
return err;
}
- TAILQ_INSERT_HEAD(¤t_env->kvm_state->kvm_sw_breakpoints,
+ QTAILQ_INSERT_HEAD(¤t_env->kvm_state->kvm_sw_breakpoints,
bp, entry);
} else {
err = kvm_arch_insert_hw_breakpoint(addr, len, type);
if (err)
return err;
- TAILQ_REMOVE(¤t_env->kvm_state->kvm_sw_breakpoints, bp, entry);
+ QTAILQ_REMOVE(¤t_env->kvm_state->kvm_sw_breakpoints, bp, entry);
qemu_free(bp);
} else {
err = kvm_arch_remove_hw_breakpoint(addr, len, type);
KVMState *s = current_env->kvm_state;
CPUState *env;
- TAILQ_FOREACH_SAFE(bp, &s->kvm_sw_breakpoints, entry, next) {
+ QTAILQ_FOREACH_SAFE(bp, &s->kvm_sw_breakpoints, entry, next) {
if (kvm_arch_remove_sw_breakpoint(current_env, bp) != 0) {
/* Try harder to find a CPU that currently sees the breakpoint. */
for (env = first_cpu; env != NULL; env = env->next_cpu) {
{
}
#endif /* !KVM_CAP_SET_GUEST_DEBUG */
+
+int kvm_set_signal_mask(CPUState *env, const sigset_t *sigset)
+{
+ struct kvm_signal_mask *sigmask;
+ int r;
+
+ if (!sigset)
+ return kvm_vcpu_ioctl(env, KVM_SET_SIGNAL_MASK, NULL);
+
+ sigmask = qemu_malloc(sizeof(*sigmask) + sizeof(*sigset));
+
+ sigmask->len = 8;
+ memcpy(sigmask->sigset, sigset, sizeof(*sigset));
+ r = kvm_vcpu_ioctl(env, KVM_SET_SIGNAL_MASK, sigmask);
+ free(sigmask);
+
+ return r;
+}