X-Git-Url: https://repo.jachan.dev/qemu.git/blobdiff_plain/c8232b39bb18a91cde39b8e0b60e731a4ce782b1..e6c8fc07b4fce0729bb747770756835f4b0ca7f4:/kvm-all.c diff --git a/kvm-all.c b/kvm-all.c index 06e06f2b3f..a65e73fb1d 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -13,10 +13,9 @@ * */ -#include +#include "qemu/osdep.h" #include #include -#include #include @@ -24,6 +23,7 @@ #include "qemu/atomic.h" #include "qemu/option.h" #include "qemu/config-file.h" +#include "qemu/error-report.h" #include "hw/hw.h" #include "hw/pci/msi.h" #include "hw/s390x/adapter.h" @@ -44,8 +44,10 @@ #include #endif -/* KVM uses PAGE_SIZE in its definition of COALESCED_MMIO_MAX */ -#define PAGE_SIZE TARGET_PAGE_SIZE +/* KVM uses PAGE_SIZE in its definition of KVM_COALESCED_MMIO_MAX. We + * need to use the real host PAGE_SIZE, as that's what KVM will use. + */ +#define PAGE_SIZE getpagesize() //#define DEBUG_KVM @@ -76,8 +78,6 @@ struct KVMState #ifdef KVM_CAP_SET_GUEST_DEBUG struct kvm_sw_breakpoint_head kvm_sw_breakpoints; #endif - int pit_state2; - int xsave, xcrs; int many_ioeventfds; int intx_set_mask; /* The man page (and posix) say ioctl numbers are signed int, but @@ -92,13 +92,13 @@ struct KVMState uint32_t *used_gsi_bitmap; unsigned int gsi_count; QTAILQ_HEAD(msi_hashtab, KVMMSIRoute) msi_hashtab[KVM_MSI_HASHTAB_SIZE]; - bool direct_msi; #endif KVMMemoryListener memory_listener; }; KVMState *kvm_state; bool kvm_kernel_irqchip; +bool kvm_split_irqchip; bool kvm_async_interrupts_allowed; bool kvm_halt_in_kernel_allowed; bool kvm_eventfds_allowed; @@ -110,6 +110,8 @@ bool kvm_gsi_direct_mapping; bool kvm_allowed; bool kvm_readonly_mem_allowed; bool kvm_vm_attributes_allowed; +bool kvm_direct_msi_allowed; +bool kvm_ioeventfd_any_length_allowed; static const KVMCapabilityInfo kvm_required_capabilites[] = { KVM_CAP_INFO(USER_MEMORY), @@ -641,15 +643,15 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, /* kvm works in page size chunks, but the function may be called with sub-page size and unaligned start address. Pad the start address to next and truncate size to previous page boundary. */ - delta = (TARGET_PAGE_SIZE - (start_addr & ~TARGET_PAGE_MASK)); - delta &= ~TARGET_PAGE_MASK; + delta = qemu_real_host_page_size - (start_addr & ~qemu_real_host_page_mask); + delta &= ~qemu_real_host_page_mask; if (delta > size) { return; } start_addr += delta; size -= delta; - size &= TARGET_PAGE_MASK; - if (!size || (start_addr & ~TARGET_PAGE_MASK)) { + size &= qemu_real_host_page_mask; + if (!size || (start_addr & ~qemu_real_host_page_mask)) { return; } @@ -978,7 +980,7 @@ void kvm_init_irq_routing(KVMState *s) s->irq_routes = g_malloc0(sizeof(*s->irq_routes)); s->nr_allocated_irq_routes = 0; - if (!s->direct_msi) { + if (!kvm_direct_msi_allowed) { for (i = 0; i < KVM_MSI_HASHTAB_SIZE; i++) { QTAILQ_INIT(&s->msi_hashtab[i]); } @@ -1112,7 +1114,7 @@ static int kvm_irqchip_get_virq(KVMState *s) * number can succeed even though a new route entry cannot be added. * When this happens, flush dynamic MSI entries to free IRQ route entries. */ - if (!s->direct_msi && s->irq_routes->nr == s->gsi_count) { + if (!kvm_direct_msi_allowed && s->irq_routes->nr == s->gsi_count) { kvm_flush_dynamic_msi_routes(s); } @@ -1149,7 +1151,7 @@ int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg) struct kvm_msi msi; KVMMSIRoute *route; - if (s->direct_msi) { + if (kvm_direct_msi_allowed) { msi.address_lo = (uint32_t)msg.address; msi.address_hi = msg.address >> 32; msi.data = le32_to_cpu(msg.data); @@ -1188,7 +1190,7 @@ int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg) return kvm_set_irq(s, route->kroute.gsi, 1); } -int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg) +int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg, PCIDevice *dev) { struct kvm_irq_routing_entry kroute = {}; int virq; @@ -1212,7 +1214,7 @@ int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg) kroute.u.msi.address_lo = (uint32_t)msg.address; kroute.u.msi.address_hi = msg.address >> 32; kroute.u.msi.data = le32_to_cpu(msg.data); - if (kvm_arch_fixup_msi_route(&kroute, msg.address, msg.data)) { + if (kvm_arch_fixup_msi_route(&kroute, msg.address, msg.data, dev)) { kvm_irqchip_release_virq(s, virq); return -EINVAL; } @@ -1223,7 +1225,8 @@ int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg) return virq; } -int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg) +int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg, + PCIDevice *dev) { struct kvm_irq_routing_entry kroute = {}; @@ -1241,7 +1244,7 @@ int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg) kroute.u.msi.address_lo = (uint32_t)msg.address; kroute.u.msi.address_hi = msg.address >> 32; kroute.u.msi.data = le32_to_cpu(msg.data); - if (kvm_arch_fixup_msi_route(&kroute, msg.address, msg.data)) { + if (kvm_arch_fixup_msi_route(&kroute, msg.address, msg.data, dev)) { return -EINVAL; } @@ -1292,6 +1295,33 @@ int kvm_irqchip_add_adapter_route(KVMState *s, AdapterInfo *adapter) kroute.u.adapter.ind_offset = adapter->ind_offset; kroute.u.adapter.adapter_id = adapter->adapter_id; + kvm_add_routing_entry(s, &kroute); + + return virq; +} + +int kvm_irqchip_add_hv_sint_route(KVMState *s, uint32_t vcpu, uint32_t sint) +{ + struct kvm_irq_routing_entry kroute = {}; + int virq; + + if (!kvm_gsi_routing_enabled()) { + return -ENOSYS; + } + if (!kvm_check_extension(s, KVM_CAP_HYPERV_SYNIC)) { + return -ENOSYS; + } + virq = kvm_irqchip_get_virq(s); + if (virq < 0) { + return virq; + } + + kroute.gsi = virq; + kroute.type = KVM_IRQ_ROUTING_HV_SINT; + kroute.flags = 0; + kroute.u.hv_sint.vcpu = vcpu; + kroute.u.hv_sint.sint = sint; + kvm_add_routing_entry(s, &kroute); kvm_irqchip_commit_routes(s); @@ -1323,6 +1353,11 @@ int kvm_irqchip_add_adapter_route(KVMState *s, AdapterInfo *adapter) return -ENOSYS; } +int kvm_irqchip_add_hv_sint_route(KVMState *s, uint32_t vcpu, uint32_t sint) +{ + return -ENOSYS; +} + static int kvm_irqchip_assign_irqfd(KVMState *s, int fd, int virq, bool assign) { abort(); @@ -1395,9 +1430,14 @@ static void kvm_irqchip_create(MachineState *machine, KVMState *s) /* First probe and see if there's a arch-specific hook to create the * in-kernel irqchip for us */ - ret = kvm_arch_irqchip_create(s); + ret = kvm_arch_irqchip_create(machine, s); if (ret == 0) { - ret = kvm_vm_ioctl(s, KVM_CREATE_IRQCHIP); + if (machine_kernel_irqchip_split(machine)) { + perror("Split IRQ chip mode not supported."); + exit(1); + } else { + ret = kvm_vm_ioctl(s, KVM_CREATE_IRQCHIP); + } } if (ret < 0) { fprintf(stderr, "Create kernel irqchip failed: %s\n", strerror(-ret)); @@ -1462,7 +1502,6 @@ static int kvm_init(MachineState *ms) * page size for the system though. */ assert(TARGET_PAGE_SIZE <= getpagesize()); - page_size_init(); s->sigmask_len = 8; @@ -1585,20 +1624,8 @@ static int kvm_init(MachineState *ms) s->debugregs = kvm_check_extension(s, KVM_CAP_DEBUGREGS); #endif -#ifdef KVM_CAP_XSAVE - s->xsave = kvm_check_extension(s, KVM_CAP_XSAVE); -#endif - -#ifdef KVM_CAP_XCRS - s->xcrs = kvm_check_extension(s, KVM_CAP_XCRS); -#endif - -#ifdef KVM_CAP_PIT_STATE2 - s->pit_state2 = kvm_check_extension(s, KVM_CAP_PIT_STATE2); -#endif - #ifdef KVM_CAP_IRQ_ROUTING - s->direct_msi = (kvm_check_extension(s, KVM_CAP_SIGNAL_MSI) > 0); + kvm_direct_msi_allowed = (kvm_check_extension(s, KVM_CAP_SIGNAL_MSI) > 0); #endif s->intx_set_mask = kvm_check_extension(s, KVM_CAP_PCI_2_3); @@ -1625,6 +1652,9 @@ static int kvm_init(MachineState *ms) kvm_vm_attributes_allowed = (kvm_check_extension(s, KVM_CAP_VM_ATTRIBUTES) > 0); + kvm_ioeventfd_any_length_allowed = + (kvm_check_extension(s, KVM_CAP_IOEVENTFD_ANY_LENGTH) > 0); + ret = kvm_arch_init(ms, s); if (ret < 0) { goto err; @@ -1636,8 +1666,10 @@ static int kvm_init(MachineState *ms) kvm_state = s; - s->memory_listener.listener.eventfd_add = kvm_mem_ioeventfd_add; - s->memory_listener.listener.eventfd_del = kvm_mem_ioeventfd_del; + if (kvm_eventfds_allowed) { + s->memory_listener.listener.eventfd_add = kvm_mem_ioeventfd_add; + s->memory_listener.listener.eventfd_del = kvm_mem_ioeventfd_del; + } s->memory_listener.listener.coalesced_mmio_add = kvm_coalesce_mmio_region; s->memory_listener.listener.coalesced_mmio_del = kvm_uncoalesce_mmio_region; @@ -1779,11 +1811,6 @@ void kvm_cpu_synchronize_post_init(CPUState *cpu) run_on_cpu(cpu, do_kvm_cpu_synchronize_post_init, cpu); } -void kvm_cpu_clean_state(CPUState *cpu) -{ - cpu->kvm_vcpu_dirty = false; -} - int kvm_cpu_exec(CPUState *cpu) { struct kvm_run *run = cpu->kvm_run; @@ -1890,6 +1917,12 @@ int kvm_cpu_exec(CPUState *cpu) qemu_system_reset_request(); ret = EXCP_INTERRUPT; break; + case KVM_SYSTEM_EVENT_CRASH: + qemu_mutex_lock_iothread(); + qemu_system_guest_panicked(); + qemu_mutex_unlock_iothread(); + ret = 0; + break; default: DPRINTF("kvm_arch_handle_exit\n"); ret = kvm_arch_handle_exit(cpu, run); @@ -2003,6 +2036,39 @@ int kvm_vm_check_attr(KVMState *s, uint32_t group, uint64_t attr) return ret ? 0 : 1; } +int kvm_device_check_attr(int dev_fd, uint32_t group, uint64_t attr) +{ + struct kvm_device_attr attribute = { + .group = group, + .attr = attr, + .flags = 0, + }; + + return kvm_device_ioctl(dev_fd, KVM_HAS_DEVICE_ATTR, &attribute) ? 0 : 1; +} + +void kvm_device_access(int fd, int group, uint64_t attr, + void *val, bool write) +{ + struct kvm_device_attr kvmattr; + int err; + + kvmattr.flags = 0; + kvmattr.group = group; + kvmattr.attr = attr; + kvmattr.addr = (uintptr_t)val; + + err = kvm_device_ioctl(fd, + write ? KVM_SET_DEVICE_ATTR : KVM_GET_DEVICE_ATTR, + &kvmattr); + if (err < 0) { + error_report("KVM_%s_DEVICE_ATTR failed: %s", + write ? "SET" : "GET", strerror(-err)); + error_printf("Group %d attr 0x%016" PRIx64, group, attr); + abort(); + } +} + int kvm_has_sync_mmu(void) { return kvm_check_extension(kvm_state, KVM_CAP_SYNC_MMU); @@ -2023,21 +2089,6 @@ int kvm_has_debugregs(void) return kvm_state->debugregs; } -int kvm_has_xsave(void) -{ - return kvm_state->xsave; -} - -int kvm_has_xcrs(void) -{ - return kvm_state->xcrs; -} - -int kvm_has_pit_state2(void) -{ - return kvm_state->pit_state2; -} - int kvm_has_many_ioeventfds(void) { if (!kvm_enabled()) { @@ -2310,7 +2361,7 @@ int kvm_set_one_reg(CPUState *cs, uint64_t id, void *source) reg.addr = (uintptr_t) source; r = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); if (r) { - trace_kvm_failed_reg_set(id, strerror(r)); + trace_kvm_failed_reg_set(id, strerror(-r)); } return r; } @@ -2324,7 +2375,7 @@ int kvm_get_one_reg(CPUState *cs, uint64_t id, void *target) reg.addr = (uintptr_t) target; r = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); if (r) { - trace_kvm_failed_reg_get(id, strerror(r)); + trace_kvm_failed_reg_get(id, strerror(-r)); } return r; }