#include "qemu/atomic.h"
#include "qemu/option.h"
#include "qemu/config-file.h"
+#include "qemu/error-report.h"
#include "hw/hw.h"
#include "hw/pci/msi.h"
#include "hw/s390x/adapter.h"
#include "exec/address-spaces.h"
#include "qemu/event_notifier.h"
#include "trace.h"
+#include "hw/irq.h"
#include "hw/boards.h"
#include <sys/eventfd.h>
#endif
-/* KVM uses PAGE_SIZE in its definition of COALESCED_MMIO_MAX */
-#define PAGE_SIZE TARGET_PAGE_SIZE
+/* KVM uses PAGE_SIZE in its definition of KVM_COALESCED_MMIO_MAX. We
+ * need to use the real host PAGE_SIZE, as that's what KVM will use.
+ */
+#define PAGE_SIZE getpagesize()
//#define DEBUG_KVM
#ifdef KVM_CAP_SET_GUEST_DEBUG
struct kvm_sw_breakpoint_head kvm_sw_breakpoints;
#endif
- int pit_state2;
- int xsave, xcrs;
int many_ioeventfds;
int intx_set_mask;
/* The man page (and posix) say ioctl numbers are signed int, but
* unsigned, and treating them as signed here can break things */
unsigned irq_set_ioctl;
unsigned int sigmask_len;
+ GHashTable *gsimap;
#ifdef KVM_CAP_IRQ_ROUTING
struct kvm_irq_routing *irq_routes;
int nr_allocated_irq_routes;
uint32_t *used_gsi_bitmap;
unsigned int gsi_count;
QTAILQ_HEAD(msi_hashtab, KVMMSIRoute) msi_hashtab[KVM_MSI_HASHTAB_SIZE];
- bool direct_msi;
#endif
KVMMemoryListener memory_listener;
};
KVMState *kvm_state;
bool kvm_kernel_irqchip;
+bool kvm_split_irqchip;
bool kvm_async_interrupts_allowed;
bool kvm_halt_in_kernel_allowed;
bool kvm_eventfds_allowed;
bool kvm_allowed;
bool kvm_readonly_mem_allowed;
bool kvm_vm_attributes_allowed;
+bool kvm_direct_msi_allowed;
+bool kvm_ioeventfd_any_length_allowed;
static const KVMCapabilityInfo kvm_required_capabilites[] = {
KVM_CAP_INFO(USER_MEMORY),
/* kvm works in page size chunks, but the function may be called
with sub-page size and unaligned start address. Pad the start
address to next and truncate size to previous page boundary. */
- delta = (TARGET_PAGE_SIZE - (start_addr & ~TARGET_PAGE_MASK));
- delta &= ~TARGET_PAGE_MASK;
+ delta = qemu_real_host_page_size - (start_addr & ~qemu_real_host_page_mask);
+ delta &= ~qemu_real_host_page_mask;
if (delta > size) {
return;
}
start_addr += delta;
size -= delta;
- size &= TARGET_PAGE_MASK;
- if (!size || (start_addr & ~TARGET_PAGE_MASK)) {
+ size &= qemu_real_host_page_mask;
+ if (!size || (start_addr & ~qemu_real_host_page_mask)) {
return;
}
s->irq_routes = g_malloc0(sizeof(*s->irq_routes));
s->nr_allocated_irq_routes = 0;
- if (!s->direct_msi) {
+ if (!kvm_direct_msi_allowed) {
for (i = 0; i < KVM_MSI_HASHTAB_SIZE; i++) {
QTAILQ_INIT(&s->msi_hashtab[i]);
}
* number can succeed even though a new route entry cannot be added.
* When this happens, flush dynamic MSI entries to free IRQ route entries.
*/
- if (!s->direct_msi && s->irq_routes->nr == s->gsi_count) {
+ if (!kvm_direct_msi_allowed && s->irq_routes->nr == s->gsi_count) {
kvm_flush_dynamic_msi_routes(s);
}
struct kvm_msi msi;
KVMMSIRoute *route;
- if (s->direct_msi) {
+ if (kvm_direct_msi_allowed) {
msi.address_lo = (uint32_t)msg.address;
msi.address_hi = msg.address >> 32;
msi.data = le32_to_cpu(msg.data);
return kvm_set_irq(s, route->kroute.gsi, 1);
}
-int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg)
+int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg, PCIDevice *dev)
{
struct kvm_irq_routing_entry kroute = {};
int virq;
kroute.u.msi.address_lo = (uint32_t)msg.address;
kroute.u.msi.address_hi = msg.address >> 32;
kroute.u.msi.data = le32_to_cpu(msg.data);
- if (kvm_arch_fixup_msi_route(&kroute, msg.address, msg.data)) {
+ if (kvm_arch_fixup_msi_route(&kroute, msg.address, msg.data, dev)) {
kvm_irqchip_release_virq(s, virq);
return -EINVAL;
}
return virq;
}
-int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg)
+int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg,
+ PCIDevice *dev)
{
struct kvm_irq_routing_entry kroute = {};
kroute.u.msi.address_lo = (uint32_t)msg.address;
kroute.u.msi.address_hi = msg.address >> 32;
kroute.u.msi.data = le32_to_cpu(msg.data);
- if (kvm_arch_fixup_msi_route(&kroute, msg.address, msg.data)) {
+ if (kvm_arch_fixup_msi_route(&kroute, msg.address, msg.data, dev)) {
return -EINVAL;
}
kroute.u.adapter.ind_offset = adapter->ind_offset;
kroute.u.adapter.adapter_id = adapter->adapter_id;
+ kvm_add_routing_entry(s, &kroute);
+
+ return virq;
+}
+
+int kvm_irqchip_add_hv_sint_route(KVMState *s, uint32_t vcpu, uint32_t sint)
+{
+ struct kvm_irq_routing_entry kroute = {};
+ int virq;
+
+ if (!kvm_gsi_routing_enabled()) {
+ return -ENOSYS;
+ }
+ if (!kvm_check_extension(s, KVM_CAP_HYPERV_SYNIC)) {
+ return -ENOSYS;
+ }
+ virq = kvm_irqchip_get_virq(s);
+ if (virq < 0) {
+ return virq;
+ }
+
+ kroute.gsi = virq;
+ kroute.type = KVM_IRQ_ROUTING_HV_SINT;
+ kroute.flags = 0;
+ kroute.u.hv_sint.vcpu = vcpu;
+ kroute.u.hv_sint.sint = sint;
+
kvm_add_routing_entry(s, &kroute);
kvm_irqchip_commit_routes(s);
return -ENOSYS;
}
+int kvm_irqchip_add_hv_sint_route(KVMState *s, uint32_t vcpu, uint32_t sint)
+{
+ return -ENOSYS;
+}
+
static int kvm_irqchip_assign_irqfd(KVMState *s, int fd, int virq, bool assign)
{
abort();
}
#endif /* !KVM_CAP_IRQ_ROUTING */
-int kvm_irqchip_add_irqfd_notifier(KVMState *s, EventNotifier *n,
- EventNotifier *rn, int virq)
+int kvm_irqchip_add_irqfd_notifier_gsi(KVMState *s, EventNotifier *n,
+ EventNotifier *rn, int virq)
{
return kvm_irqchip_assign_irqfd(s, event_notifier_get_fd(n),
rn ? event_notifier_get_fd(rn) : -1, virq, true);
}
-int kvm_irqchip_remove_irqfd_notifier(KVMState *s, EventNotifier *n, int virq)
+int kvm_irqchip_remove_irqfd_notifier_gsi(KVMState *s, EventNotifier *n,
+ int virq)
{
return kvm_irqchip_assign_irqfd(s, event_notifier_get_fd(n), -1, virq,
false);
}
+int kvm_irqchip_add_irqfd_notifier(KVMState *s, EventNotifier *n,
+ EventNotifier *rn, qemu_irq irq)
+{
+ gpointer key, gsi;
+ gboolean found = g_hash_table_lookup_extended(s->gsimap, irq, &key, &gsi);
+
+ if (!found) {
+ return -ENXIO;
+ }
+ return kvm_irqchip_add_irqfd_notifier_gsi(s, n, rn, GPOINTER_TO_INT(gsi));
+}
+
+int kvm_irqchip_remove_irqfd_notifier(KVMState *s, EventNotifier *n,
+ qemu_irq irq)
+{
+ gpointer key, gsi;
+ gboolean found = g_hash_table_lookup_extended(s->gsimap, irq, &key, &gsi);
+
+ if (!found) {
+ return -ENXIO;
+ }
+ return kvm_irqchip_remove_irqfd_notifier_gsi(s, n, GPOINTER_TO_INT(gsi));
+}
+
+void kvm_irqchip_set_qemuirq_gsi(KVMState *s, qemu_irq irq, int gsi)
+{
+ g_hash_table_insert(s->gsimap, irq, GINT_TO_POINTER(gsi));
+}
+
static void kvm_irqchip_create(MachineState *machine, KVMState *s)
{
int ret;
/* First probe and see if there's a arch-specific hook to create the
* in-kernel irqchip for us */
- ret = kvm_arch_irqchip_create(s);
+ ret = kvm_arch_irqchip_create(machine, s);
if (ret == 0) {
- ret = kvm_vm_ioctl(s, KVM_CREATE_IRQCHIP);
+ if (machine_kernel_irqchip_split(machine)) {
+ perror("Split IRQ chip mode not supported.");
+ exit(1);
+ } else {
+ ret = kvm_vm_ioctl(s, KVM_CREATE_IRQCHIP);
+ }
}
if (ret < 0) {
fprintf(stderr, "Create kernel irqchip failed: %s\n", strerror(-ret));
kvm_halt_in_kernel_allowed = true;
kvm_init_irq_routing(s);
+
+ s->gsimap = g_hash_table_new(g_direct_hash, g_direct_equal);
}
/* Find number of supported CPUs using the recommended
* page size for the system though.
*/
assert(TARGET_PAGE_SIZE <= getpagesize());
- page_size_init();
s->sigmask_len = 8;
s->debugregs = kvm_check_extension(s, KVM_CAP_DEBUGREGS);
#endif
-#ifdef KVM_CAP_XSAVE
- s->xsave = kvm_check_extension(s, KVM_CAP_XSAVE);
-#endif
-
-#ifdef KVM_CAP_XCRS
- s->xcrs = kvm_check_extension(s, KVM_CAP_XCRS);
-#endif
-
-#ifdef KVM_CAP_PIT_STATE2
- s->pit_state2 = kvm_check_extension(s, KVM_CAP_PIT_STATE2);
-#endif
-
#ifdef KVM_CAP_IRQ_ROUTING
- s->direct_msi = (kvm_check_extension(s, KVM_CAP_SIGNAL_MSI) > 0);
+ kvm_direct_msi_allowed = (kvm_check_extension(s, KVM_CAP_SIGNAL_MSI) > 0);
#endif
s->intx_set_mask = kvm_check_extension(s, KVM_CAP_PCI_2_3);
kvm_vm_attributes_allowed =
(kvm_check_extension(s, KVM_CAP_VM_ATTRIBUTES) > 0);
+ kvm_ioeventfd_any_length_allowed =
+ (kvm_check_extension(s, KVM_CAP_IOEVENTFD_ANY_LENGTH) > 0);
+
ret = kvm_arch_init(ms, s);
if (ret < 0) {
goto err;
kvm_state = s;
- s->memory_listener.listener.eventfd_add = kvm_mem_ioeventfd_add;
- s->memory_listener.listener.eventfd_del = kvm_mem_ioeventfd_del;
+ if (kvm_eventfds_allowed) {
+ s->memory_listener.listener.eventfd_add = kvm_mem_ioeventfd_add;
+ s->memory_listener.listener.eventfd_del = kvm_mem_ioeventfd_del;
+ }
s->memory_listener.listener.coalesced_mmio_add = kvm_coalesce_mmio_region;
s->memory_listener.listener.coalesced_mmio_del = kvm_uncoalesce_mmio_region;
run_on_cpu(cpu, do_kvm_cpu_synchronize_post_init, cpu);
}
-void kvm_cpu_clean_state(CPUState *cpu)
-{
- cpu->kvm_vcpu_dirty = false;
-}
-
int kvm_cpu_exec(CPUState *cpu)
{
struct kvm_run *run = cpu->kvm_run;
qemu_system_reset_request();
ret = EXCP_INTERRUPT;
break;
+ case KVM_SYSTEM_EVENT_CRASH:
+ qemu_mutex_lock_iothread();
+ qemu_system_guest_panicked();
+ qemu_mutex_unlock_iothread();
+ ret = 0;
+ break;
default:
DPRINTF("kvm_arch_handle_exit\n");
ret = kvm_arch_handle_exit(cpu, run);
return ret ? 0 : 1;
}
+int kvm_device_check_attr(int dev_fd, uint32_t group, uint64_t attr)
+{
+ struct kvm_device_attr attribute = {
+ .group = group,
+ .attr = attr,
+ .flags = 0,
+ };
+
+ return kvm_device_ioctl(dev_fd, KVM_HAS_DEVICE_ATTR, &attribute) ? 0 : 1;
+}
+
+void kvm_device_access(int fd, int group, uint64_t attr,
+ void *val, bool write)
+{
+ struct kvm_device_attr kvmattr;
+ int err;
+
+ kvmattr.flags = 0;
+ kvmattr.group = group;
+ kvmattr.attr = attr;
+ kvmattr.addr = (uintptr_t)val;
+
+ err = kvm_device_ioctl(fd,
+ write ? KVM_SET_DEVICE_ATTR : KVM_GET_DEVICE_ATTR,
+ &kvmattr);
+ if (err < 0) {
+ error_report("KVM_%s_DEVICE_ATTR failed: %s",
+ write ? "SET" : "GET", strerror(-err));
+ error_printf("Group %d attr 0x%016" PRIx64, group, attr);
+ abort();
+ }
+}
+
int kvm_has_sync_mmu(void)
{
return kvm_check_extension(kvm_state, KVM_CAP_SYNC_MMU);
return kvm_state->debugregs;
}
-int kvm_has_xsave(void)
-{
- return kvm_state->xsave;
-}
-
-int kvm_has_xcrs(void)
-{
- return kvm_state->xcrs;
-}
-
-int kvm_has_pit_state2(void)
-{
- return kvm_state->pit_state2;
-}
-
int kvm_has_many_ioeventfds(void)
{
if (!kvm_enabled()) {