static bool has_msr_async_pf_en;
static bool has_msr_pv_eoi_en;
static bool has_msr_misc_enable;
+static bool has_msr_bndcfgs;
static bool has_msr_kvm_steal_time;
static int lm_capable_kernel;
+static bool has_msr_hv_hypercall;
+static bool has_msr_hv_vapic;
+static bool has_msr_hv_tsc;
static bool has_msr_architectural_pmu;
static uint32_t num_architectural_pmu_counters;
return cpuid;
}
-struct kvm_para_features {
+static const struct kvm_para_features {
int cap;
int feature;
} para_features[] = {
{ KVM_CAP_NOP_IO_DELAY, KVM_FEATURE_NOP_IO_DELAY },
{ KVM_CAP_PV_MMU, KVM_FEATURE_MMU_OP },
{ KVM_CAP_ASYNC_PF, KVM_FEATURE_ASYNC_PF },
- { -1, -1 }
};
static int get_para_features(KVMState *s)
{
int i, features = 0;
- for (i = 0; i < ARRAY_SIZE(para_features) - 1; i++) {
+ for (i = 0; i < ARRAY_SIZE(para_features); i++) {
if (kvm_check_extension(s, para_features[i].cap)) {
features |= (1 << para_features[i].feature);
}
static bool hyperv_enabled(X86CPU *cpu)
{
- return hyperv_hypercall_available(cpu) ||
- cpu->hyperv_relaxed_timing;
+ CPUState *cs = CPU(cpu);
+ return kvm_check_extension(cs->kvm_state, KVM_CAP_HYPERV) > 0 &&
+ (hyperv_hypercall_available(cpu) ||
+ cpu->hyperv_time ||
+ cpu->hyperv_relaxed_timing);
}
#define KVM_MAX_CPUID_ENTRIES 100
uint32_t unused;
struct kvm_cpuid_entry2 *c;
uint32_t signature[3];
+ int kvm_base = KVM_CPUID_SIGNATURE;
int r;
memset(&cpuid_data, 0, sizeof(cpuid_data));
cpuid_i = 0;
/* Paravirtualization CPUIDs */
- c = &cpuid_data.entries[cpuid_i++];
- c->function = KVM_CPUID_SIGNATURE;
- if (!hyperv_enabled(cpu)) {
- memcpy(signature, "KVMKVMKVM\0\0\0", 12);
- c->eax = 0;
- } else {
+ if (hyperv_enabled(cpu)) {
+ c = &cpuid_data.entries[cpuid_i++];
+ c->function = HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS;
memcpy(signature, "Microsoft Hv", 12);
c->eax = HYPERV_CPUID_MIN;
- }
- c->ebx = signature[0];
- c->ecx = signature[1];
- c->edx = signature[2];
-
- c = &cpuid_data.entries[cpuid_i++];
- c->function = KVM_CPUID_FEATURES;
- c->eax = env->features[FEAT_KVM];
+ c->ebx = signature[0];
+ c->ecx = signature[1];
+ c->edx = signature[2];
- if (hyperv_enabled(cpu)) {
+ c = &cpuid_data.entries[cpuid_i++];
+ c->function = HYPERV_CPUID_INTERFACE;
memcpy(signature, "Hv#1\0\0\0\0\0\0\0\0", 12);
c->eax = signature[0];
+ c->ebx = 0;
+ c->ecx = 0;
+ c->edx = 0;
c = &cpuid_data.entries[cpuid_i++];
c->function = HYPERV_CPUID_VERSION;
if (cpu->hyperv_vapic) {
c->eax |= HV_X64_MSR_HYPERCALL_AVAILABLE;
c->eax |= HV_X64_MSR_APIC_ACCESS_AVAILABLE;
+ has_msr_hv_vapic = true;
+ }
+ if (cpu->hyperv_time &&
+ kvm_check_extension(cs->kvm_state, KVM_CAP_HYPERV_TIME) > 0) {
+ c->eax |= HV_X64_MSR_HYPERCALL_AVAILABLE;
+ c->eax |= HV_X64_MSR_TIME_REF_COUNT_AVAILABLE;
+ c->eax |= 0x200;
+ has_msr_hv_tsc = true;
}
-
c = &cpuid_data.entries[cpuid_i++];
c->function = HYPERV_CPUID_ENLIGHTMENT_INFO;
if (cpu->hyperv_relaxed_timing) {
c->eax |= HV_X64_RELAXED_TIMING_RECOMMENDED;
}
- if (cpu->hyperv_vapic) {
+ if (has_msr_hv_vapic) {
c->eax |= HV_X64_APIC_ACCESS_RECOMMENDED;
}
c->ebx = cpu->hyperv_spinlock_attempts;
c->eax = 0x40;
c->ebx = 0x40;
- c = &cpuid_data.entries[cpuid_i++];
- c->function = KVM_CPUID_SIGNATURE_NEXT;
- memcpy(signature, "KVMKVMKVM\0\0\0", 12);
- c->eax = 0;
- c->ebx = signature[0];
- c->ecx = signature[1];
- c->edx = signature[2];
+ kvm_base = KVM_CPUID_SIGNATURE_NEXT;
+ has_msr_hv_hypercall = true;
}
+ memcpy(signature, "KVMKVMKVM\0\0\0", 12);
+ c = &cpuid_data.entries[cpuid_i++];
+ c->function = KVM_CPUID_SIGNATURE | kvm_base;
+ c->eax = 0;
+ c->ebx = signature[0];
+ c->ecx = signature[1];
+ c->edx = signature[2];
+
+ c = &cpuid_data.entries[cpuid_i++];
+ c->function = KVM_CPUID_FEATURES | kvm_base;
+ c->eax = env->features[FEAT_KVM];
+
has_msr_async_pf_en = c->eax & (1 << KVM_FEATURE_ASYNC_PF);
has_msr_pv_eoi_en = c->eax & (1 << KVM_FEATURE_PV_EOI);
has_msr_misc_enable = true;
continue;
}
+ if (kvm_msr_list->indices[i] == MSR_IA32_BNDCFGS) {
+ has_msr_bndcfgs = true;
+ continue;
+ }
}
}
#define XSAVE_XMM_SPACE 40
#define XSAVE_XSTATE_BV 128
#define XSAVE_YMMH_SPACE 144
+#define XSAVE_BNDREGS 240
+#define XSAVE_BNDCSR 256
static int kvm_put_xsave(X86CPU *cpu)
{
*(uint64_t *)&xsave->region[XSAVE_XSTATE_BV] = env->xstate_bv;
memcpy(&xsave->region[XSAVE_YMMH_SPACE], env->ymmh_regs,
sizeof env->ymmh_regs);
+ memcpy(&xsave->region[XSAVE_BNDREGS], env->bnd_regs,
+ sizeof env->bnd_regs);
+ memcpy(&xsave->region[XSAVE_BNDCSR], &env->bndcs_regs,
+ sizeof(env->bndcs_regs));
r = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_XSAVE, xsave);
return r;
}
sregs.cr3 = env->cr[3];
sregs.cr4 = env->cr[4];
- sregs.cr8 = cpu_get_apic_tpr(env->apic_state);
- sregs.apic_base = cpu_get_apic_base(env->apic_state);
+ sregs.cr8 = cpu_get_apic_tpr(cpu->apic_state);
+ sregs.apic_base = cpu_get_apic_base(cpu->apic_state);
sregs.efer = env->efer;
return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, &msr_data);
}
+/*
+ * Provide a separate write service for the feature control MSR in order to
+ * kick the VCPU out of VMXON or even guest mode on reset. This has to be done
+ * before writing any other state because forcibly leaving nested mode
+ * invalidates the VCPU state.
+ */
+static int kvm_put_msr_feature_control(X86CPU *cpu)
+{
+ struct {
+ struct kvm_msrs info;
+ struct kvm_msr_entry entry;
+ } msr_data;
+
+ kvm_msr_entry_set(&msr_data.entry, MSR_IA32_FEATURE_CONTROL,
+ cpu->env.msr_ia32_feature_control);
+ msr_data.info.nmsrs = 1;
+ return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, &msr_data);
+}
+
static int kvm_put_msrs(X86CPU *cpu, int level)
{
CPUX86State *env = &cpu->env;
kvm_msr_entry_set(&msrs[n++], MSR_IA32_MISC_ENABLE,
env->msr_ia32_misc_enable);
}
+ if (has_msr_bndcfgs) {
+ kvm_msr_entry_set(&msrs[n++], MSR_IA32_BNDCFGS, env->msr_bndcfgs);
+ }
#ifdef TARGET_X86_64
if (lm_capable_kernel) {
kvm_msr_entry_set(&msrs[n++], MSR_CSTAR, env->cstar);
kvm_msr_entry_set(&msrs[n++], MSR_LSTAR, env->lstar);
}
#endif
- if (level == KVM_PUT_FULL_STATE) {
- /*
- * KVM is yet unable to synchronize TSC values of multiple VCPUs on
- * writeback. Until this is fixed, we only write the offset to SMP
- * guests after migration, desynchronizing the VCPUs, but avoiding
- * huge jump-backs that would occur without any writeback at all.
- */
- if (smp_cpus == 1 || env->tsc != 0) {
- kvm_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc);
- }
- }
/*
* The following MSRs have side effects on the guest or are too heavy
* for normal writeback. Limit them to reset or full state updates.
*/
if (level >= KVM_PUT_RESET_STATE) {
+ kvm_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc);
kvm_msr_entry_set(&msrs[n++], MSR_KVM_SYSTEM_TIME,
env->system_time_msr);
kvm_msr_entry_set(&msrs[n++], MSR_KVM_WALL_CLOCK, env->wall_clock_msr);
kvm_msr_entry_set(&msrs[n++], MSR_CORE_PERF_GLOBAL_CTRL,
env->msr_global_ctrl);
}
- if (hyperv_hypercall_available(cpu)) {
- kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_GUEST_OS_ID, 0);
- kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_HYPERCALL, 0);
+ if (has_msr_hv_hypercall) {
+ kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_GUEST_OS_ID,
+ env->msr_hv_guest_os_id);
+ kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_HYPERCALL,
+ env->msr_hv_hypercall);
}
- if (cpu->hyperv_vapic) {
- kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_APIC_ASSIST_PAGE, 0);
+ if (has_msr_hv_vapic) {
+ kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_APIC_ASSIST_PAGE,
+ env->msr_hv_vapic);
}
- if (has_msr_feature_control) {
- kvm_msr_entry_set(&msrs[n++], MSR_IA32_FEATURE_CONTROL,
- env->msr_ia32_feature_control);
+ if (has_msr_hv_tsc) {
+ kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_REFERENCE_TSC,
+ env->msr_hv_tsc);
}
+
+ /* Note: MSR_IA32_FEATURE_CONTROL is written separately, see
+ * kvm_put_msr_feature_control. */
}
if (env->mcg_cap) {
int i;
env->xstate_bv = *(uint64_t *)&xsave->region[XSAVE_XSTATE_BV];
memcpy(env->ymmh_regs, &xsave->region[XSAVE_YMMH_SPACE],
sizeof env->ymmh_regs);
+ memcpy(env->bnd_regs, &xsave->region[XSAVE_BNDREGS],
+ sizeof env->bnd_regs);
+ memcpy(&env->bndcs_regs, &xsave->region[XSAVE_BNDCSR],
+ sizeof(env->bndcs_regs));
return 0;
}
if (has_msr_feature_control) {
msrs[n++].index = MSR_IA32_FEATURE_CONTROL;
}
+ if (has_msr_bndcfgs) {
+ msrs[n++].index = MSR_IA32_BNDCFGS;
+ }
if (!env->tsc_valid) {
msrs[n++].index = MSR_IA32_TSC;
}
}
+ if (has_msr_hv_hypercall) {
+ msrs[n++].index = HV_X64_MSR_HYPERCALL;
+ msrs[n++].index = HV_X64_MSR_GUEST_OS_ID;
+ }
+ if (has_msr_hv_vapic) {
+ msrs[n++].index = HV_X64_MSR_APIC_ASSIST_PAGE;
+ }
+ if (has_msr_hv_tsc) {
+ msrs[n++].index = HV_X64_MSR_REFERENCE_TSC;
+ }
+
msr_data.info.nmsrs = n;
ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, &msr_data);
if (ret < 0) {
case MSR_IA32_FEATURE_CONTROL:
env->msr_ia32_feature_control = msrs[i].data;
break;
+ case MSR_IA32_BNDCFGS:
+ env->msr_bndcfgs = msrs[i].data;
+ break;
default:
if (msrs[i].index >= MSR_MC0_CTL &&
msrs[i].index < MSR_MC0_CTL + (env->mcg_cap & 0xff) * 4) {
case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL0 + MAX_GP_COUNTERS - 1:
env->msr_gp_evtsel[index - MSR_P6_EVNTSEL0] = msrs[i].data;
break;
+ case HV_X64_MSR_HYPERCALL:
+ env->msr_hv_hypercall = msrs[i].data;
+ break;
+ case HV_X64_MSR_GUEST_OS_ID:
+ env->msr_hv_guest_os_id = msrs[i].data;
+ break;
+ case HV_X64_MSR_APIC_ASSIST_PAGE:
+ env->msr_hv_vapic = msrs[i].data;
+ break;
+ case HV_X64_MSR_REFERENCE_TSC:
+ env->msr_hv_tsc = msrs[i].data;
+ break;
}
}
static int kvm_get_apic(X86CPU *cpu)
{
- CPUX86State *env = &cpu->env;
- DeviceState *apic = env->apic_state;
+ DeviceState *apic = cpu->apic_state;
struct kvm_lapic_state kapic;
int ret;
static int kvm_put_apic(X86CPU *cpu)
{
- CPUX86State *env = &cpu->env;
- DeviceState *apic = env->apic_state;
+ DeviceState *apic = cpu->apic_state;
struct kvm_lapic_state kapic;
if (apic && kvm_irqchip_in_kernel()) {
assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
+ if (level >= KVM_PUT_RESET_STATE && has_msr_feature_control) {
+ ret = kvm_put_msr_feature_control(x86_cpu);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+
ret = kvm_getput_regs(x86_cpu, 1);
if (ret < 0) {
return ret;
}
DPRINTF("setting tpr\n");
- run->cr8 = cpu_get_apic_tpr(env->apic_state);
+ run->cr8 = cpu_get_apic_tpr(x86_cpu->apic_state);
}
}
} else {
env->eflags &= ~IF_MASK;
}
- cpu_set_apic_tpr(env->apic_state, run->cr8);
- cpu_set_apic_base(env->apic_state, run->apic_base);
+ cpu_set_apic_tpr(x86_cpu->apic_state, run->cr8);
+ cpu_set_apic_base(x86_cpu->apic_state, run->apic_base);
}
int kvm_arch_process_async_events(CPUState *cs)
if (cs->interrupt_request & CPU_INTERRUPT_POLL) {
cs->interrupt_request &= ~CPU_INTERRUPT_POLL;
- apic_poll_irq(env->apic_state);
+ apic_poll_irq(cpu->apic_state);
}
if (((cs->interrupt_request & CPU_INTERRUPT_HARD) &&
(env->eflags & IF_MASK)) ||
if (cs->interrupt_request & CPU_INTERRUPT_TPR) {
cs->interrupt_request &= ~CPU_INTERRUPT_TPR;
kvm_cpu_synchronize_state(cs);
- apic_handle_tpr_access_report(env->apic_state, env->eip,
+ apic_handle_tpr_access_report(cpu->apic_state, env->eip,
env->tpr_access_type);
}
static int kvm_handle_tpr_access(X86CPU *cpu)
{
- CPUX86State *env = &cpu->env;
CPUState *cs = CPU(cpu);
struct kvm_run *run = cs->kvm_run;
- apic_handle_tpr_access_report(env->apic_state, run->tpr_access.rip,
+ apic_handle_tpr_access_report(cpu->apic_state, run->tpr_access.rip,
run->tpr_access.is_write ? TPR_ACCESS_WRITE
: TPR_ACCESS_READ);
return 1;
break;
case 0x1:
ret = EXCP_DEBUG;
- env->watchpoint_hit = &hw_watchpoint;
+ cs->watchpoint_hit = &hw_watchpoint;
hw_watchpoint.vaddr = hw_breakpoint[n].addr;
hw_watchpoint.flags = BP_MEM_WRITE;
break;
case 0x3:
ret = EXCP_DEBUG;
- env->watchpoint_hit = &hw_watchpoint;
+ cs->watchpoint_hit = &hw_watchpoint;
hw_watchpoint.vaddr = hw_breakpoint[n].addr;
hw_watchpoint.flags = BP_MEM_ACCESS;
break;
}
}
}
- } else if (kvm_find_sw_breakpoint(CPU(cpu), arch_info->pc)) {
+ } else if (kvm_find_sw_breakpoint(cs, arch_info->pc)) {
ret = EXCP_DEBUG;
}
if (ret == 0) {
- cpu_synchronize_state(CPU(cpu));
+ cpu_synchronize_state(cs);
assert(env->exception_injected == -1);
/* pass to guest */