target-i386: set eflags prior to calling svm_load_seg_cache() in svm_helper.c

[qemu.git] / target-i386 / kvm.c
diff --git a/target-i386/kvm.c b/target-i386/kvm.c

index 1188482359f929a1225b9f016efdcfaa07acdc48..d17eea3186a8a86b765052fa091a52b090b6d314 100644 (file)
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -69,8 +69,12 @@ static bool has_msr_feature_control;
  static bool has_msr_async_pf_en;
  static bool has_msr_pv_eoi_en;
  static bool has_msr_misc_enable;
+static bool has_msr_bndcfgs;
  static bool has_msr_kvm_steal_time;
  static int lm_capable_kernel;
+static bool has_msr_hv_hypercall;
+static bool has_msr_hv_vapic;
+static bool has_msr_hv_tsc;
  
  static bool has_msr_architectural_pmu;
  static uint32_t num_architectural_pmu_counters;
@@ -118,7 +122,7 @@ static struct kvm_cpuid2 *get_supported_cpuid(KVMState *s)
      return cpuid;
  }
  
-struct kvm_para_features {
+static const struct kvm_para_features {
      int cap;
      int feature;
  } para_features[] = {
@@ -126,14 +130,13 @@ struct kvm_para_features {
      { KVM_CAP_NOP_IO_DELAY, KVM_FEATURE_NOP_IO_DELAY },
      { KVM_CAP_PV_MMU, KVM_FEATURE_MMU_OP },
      { KVM_CAP_ASYNC_PF, KVM_FEATURE_ASYNC_PF },
-    { -1, -1 }
  };
  
  static int get_para_features(KVMState *s)
  {
      int i, features = 0;
  
-    for (i = 0; i < ARRAY_SIZE(para_features) - 1; i++) {
+    for (i = 0; i < ARRAY_SIZE(para_features); i++) {
          if (kvm_check_extension(s, para_features[i].cap)) {
              features |= (1 << para_features[i].feature);
          }
@@ -436,8 +439,11 @@ static bool hyperv_hypercall_available(X86CPU *cpu)
  
  static bool hyperv_enabled(X86CPU *cpu)
  {
-    return hyperv_hypercall_available(cpu) ||
-           cpu->hyperv_relaxed_timing;
+    CPUState *cs = CPU(cpu);
+    return kvm_check_extension(cs->kvm_state, KVM_CAP_HYPERV) > 0 &&
+           (hyperv_hypercall_available(cpu) ||
+            cpu->hyperv_time  ||
+            cpu->hyperv_relaxed_timing);
  }
  
  #define KVM_MAX_CPUID_ENTRIES  100
@@ -454,6 +460,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
      uint32_t unused;
      struct kvm_cpuid_entry2 *c;
      uint32_t signature[3];
+    int kvm_base = KVM_CPUID_SIGNATURE;
      int r;
  
      memset(&cpuid_data, 0, sizeof(cpuid_data));
@@ -461,26 +468,22 @@ int kvm_arch_init_vcpu(CPUState *cs)
      cpuid_i = 0;
  
      /* Paravirtualization CPUIDs */
-    c = &cpuid_data.entries[cpuid_i++];
-    c->function = KVM_CPUID_SIGNATURE;
-    if (!hyperv_enabled(cpu)) {
-        memcpy(signature, "KVMKVMKVM\0\0\0", 12);
-        c->eax = 0;
-    } else {
+    if (hyperv_enabled(cpu)) {
+        c = &cpuid_data.entries[cpuid_i++];
+        c->function = HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS;
          memcpy(signature, "Microsoft Hv", 12);
          c->eax = HYPERV_CPUID_MIN;
-    }
-    c->ebx = signature[0];
-    c->ecx = signature[1];
-    c->edx = signature[2];
-
-    c = &cpuid_data.entries[cpuid_i++];
-    c->function = KVM_CPUID_FEATURES;
-    c->eax = env->features[FEAT_KVM];
+        c->ebx = signature[0];
+        c->ecx = signature[1];
+        c->edx = signature[2];
  
-    if (hyperv_enabled(cpu)) {
+        c = &cpuid_data.entries[cpuid_i++];
+        c->function = HYPERV_CPUID_INTERFACE;
          memcpy(signature, "Hv#1\0\0\0\0\0\0\0\0", 12);
          c->eax = signature[0];
+        c->ebx = 0;
+        c->ecx = 0;
+        c->edx = 0;
  
          c = &cpuid_data.entries[cpuid_i++];
          c->function = HYPERV_CPUID_VERSION;
@@ -495,14 +498,21 @@ int kvm_arch_init_vcpu(CPUState *cs)
          if (cpu->hyperv_vapic) {
              c->eax |= HV_X64_MSR_HYPERCALL_AVAILABLE;
              c->eax |= HV_X64_MSR_APIC_ACCESS_AVAILABLE;
+            has_msr_hv_vapic = true;
+        }
+        if (cpu->hyperv_time &&
+            kvm_check_extension(cs->kvm_state, KVM_CAP_HYPERV_TIME) > 0) {
+            c->eax |= HV_X64_MSR_HYPERCALL_AVAILABLE;
+            c->eax |= HV_X64_MSR_TIME_REF_COUNT_AVAILABLE;
+            c->eax |= 0x200;
+            has_msr_hv_tsc = true;
          }
-
          c = &cpuid_data.entries[cpuid_i++];
          c->function = HYPERV_CPUID_ENLIGHTMENT_INFO;
          if (cpu->hyperv_relaxed_timing) {
              c->eax |= HV_X64_RELAXED_TIMING_RECOMMENDED;
          }
-        if (cpu->hyperv_vapic) {
+        if (has_msr_hv_vapic) {
              c->eax |= HV_X64_APIC_ACCESS_RECOMMENDED;
          }
          c->ebx = cpu->hyperv_spinlock_attempts;
@@ -512,15 +522,22 @@ int kvm_arch_init_vcpu(CPUState *cs)
          c->eax = 0x40;
          c->ebx = 0x40;
  
-        c = &cpuid_data.entries[cpuid_i++];
-        c->function = KVM_CPUID_SIGNATURE_NEXT;
-        memcpy(signature, "KVMKVMKVM\0\0\0", 12);
-        c->eax = 0;
-        c->ebx = signature[0];
-        c->ecx = signature[1];
-        c->edx = signature[2];
+        kvm_base = KVM_CPUID_SIGNATURE_NEXT;
+        has_msr_hv_hypercall = true;
      }
  
+    memcpy(signature, "KVMKVMKVM\0\0\0", 12);
+    c = &cpuid_data.entries[cpuid_i++];
+    c->function = KVM_CPUID_SIGNATURE | kvm_base;
+    c->eax = 0;
+    c->ebx = signature[0];
+    c->ecx = signature[1];
+    c->edx = signature[2];
+
+    c = &cpuid_data.entries[cpuid_i++];
+    c->function = KVM_CPUID_FEATURES | kvm_base;
+    c->eax = env->features[FEAT_KVM];
+
      has_msr_async_pf_en = c->eax & (1 << KVM_FEATURE_ASYNC_PF);
  
      has_msr_pv_eoi_en = c->eax & (1 << KVM_FEATURE_PV_EOI);
@@ -772,6 +789,10 @@ static int kvm_get_supported_msrs(KVMState *s)
                      has_msr_misc_enable = true;
                      continue;
                  }
+                if (kvm_msr_list->indices[i] == MSR_IA32_BNDCFGS) {
+                    has_msr_bndcfgs = true;
+                    continue;
+                }
              }
          }
  
@@ -975,6 +996,8 @@ static int kvm_put_fpu(X86CPU *cpu)
  #define XSAVE_XMM_SPACE   40
  #define XSAVE_XSTATE_BV   128
  #define XSAVE_YMMH_SPACE  144
+#define XSAVE_BNDREGS     240
+#define XSAVE_BNDCSR      256
  
  static int kvm_put_xsave(X86CPU *cpu)
  {
@@ -1007,6 +1030,10 @@ static int kvm_put_xsave(X86CPU *cpu)
      *(uint64_t *)&xsave->region[XSAVE_XSTATE_BV] = env->xstate_bv;
      memcpy(&xsave->region[XSAVE_YMMH_SPACE], env->ymmh_regs,
              sizeof env->ymmh_regs);
+    memcpy(&xsave->region[XSAVE_BNDREGS], env->bnd_regs,
+            sizeof env->bnd_regs);
+    memcpy(&xsave->region[XSAVE_BNDCSR], &env->bndcs_regs,
+            sizeof(env->bndcs_regs));
      r = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_XSAVE, xsave);
      return r;
  }
@@ -1069,8 +1096,8 @@ static int kvm_put_sregs(X86CPU *cpu)
      sregs.cr3 = env->cr[3];
      sregs.cr4 = env->cr[4];
  
-    sregs.cr8 = cpu_get_apic_tpr(env->apic_state);
-    sregs.apic_base = cpu_get_apic_base(env->apic_state);
+    sregs.cr8 = cpu_get_apic_tpr(cpu->apic_state);
+    sregs.apic_base = cpu_get_apic_base(cpu->apic_state);
  
      sregs.efer = env->efer;
  
@@ -1104,6 +1131,25 @@ static int kvm_put_tscdeadline_msr(X86CPU *cpu)
      return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, &msr_data);
  }
  
+/*
+ * Provide a separate write service for the feature control MSR in order to
+ * kick the VCPU out of VMXON or even guest mode on reset. This has to be done
+ * before writing any other state because forcibly leaving nested mode
+ * invalidates the VCPU state.
+ */
+static int kvm_put_msr_feature_control(X86CPU *cpu)
+{
+    struct {
+        struct kvm_msrs info;
+        struct kvm_msr_entry entry;
+    } msr_data;
+
+    kvm_msr_entry_set(&msr_data.entry, MSR_IA32_FEATURE_CONTROL,
+                      cpu->env.msr_ia32_feature_control);
+    msr_data.info.nmsrs = 1;
+    return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, &msr_data);
+}
+
  static int kvm_put_msrs(X86CPU *cpu, int level)
  {
      CPUX86State *env = &cpu->env;
@@ -1131,6 +1177,9 @@ static int kvm_put_msrs(X86CPU *cpu, int level)
          kvm_msr_entry_set(&msrs[n++], MSR_IA32_MISC_ENABLE,
                            env->msr_ia32_misc_enable);
      }
+    if (has_msr_bndcfgs) {
+        kvm_msr_entry_set(&msrs[n++], MSR_IA32_BNDCFGS, env->msr_bndcfgs);
+    }
  #ifdef TARGET_X86_64
      if (lm_capable_kernel) {
          kvm_msr_entry_set(&msrs[n++], MSR_CSTAR, env->cstar);
@@ -1139,22 +1188,12 @@ static int kvm_put_msrs(X86CPU *cpu, int level)
          kvm_msr_entry_set(&msrs[n++], MSR_LSTAR, env->lstar);
      }
  #endif
-    if (level == KVM_PUT_FULL_STATE) {
-        /*
-         * KVM is yet unable to synchronize TSC values of multiple VCPUs on
-         * writeback. Until this is fixed, we only write the offset to SMP
-         * guests after migration, desynchronizing the VCPUs, but avoiding
-         * huge jump-backs that would occur without any writeback at all.
-         */
-        if (smp_cpus == 1 || env->tsc != 0) {
-            kvm_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc);
-        }
-    }
      /*
       * The following MSRs have side effects on the guest or are too heavy
       * for normal writeback. Limit them to reset or full state updates.
       */
      if (level >= KVM_PUT_RESET_STATE) {
+        kvm_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc);
          kvm_msr_entry_set(&msrs[n++], MSR_KVM_SYSTEM_TIME,
                            env->system_time_msr);
          kvm_msr_entry_set(&msrs[n++], MSR_KVM_WALL_CLOCK, env->wall_clock_msr);
@@ -1197,17 +1236,23 @@ static int kvm_put_msrs(X86CPU *cpu, int level)
              kvm_msr_entry_set(&msrs[n++], MSR_CORE_PERF_GLOBAL_CTRL,
                                env->msr_global_ctrl);
          }
-        if (hyperv_hypercall_available(cpu)) {
-            kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_GUEST_OS_ID, 0);
-            kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_HYPERCALL, 0);
+        if (has_msr_hv_hypercall) {
+            kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_GUEST_OS_ID,
+                              env->msr_hv_guest_os_id);
+            kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_HYPERCALL,
+                              env->msr_hv_hypercall);
          }
-        if (cpu->hyperv_vapic) {
-            kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_APIC_ASSIST_PAGE, 0);
+        if (has_msr_hv_vapic) {
+            kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_APIC_ASSIST_PAGE,
+                              env->msr_hv_vapic);
          }
-        if (has_msr_feature_control) {
-            kvm_msr_entry_set(&msrs[n++], MSR_IA32_FEATURE_CONTROL,
-                              env->msr_ia32_feature_control);
+        if (has_msr_hv_tsc) {
+            kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_REFERENCE_TSC,
+                              env->msr_hv_tsc);
          }
+
+        /* Note: MSR_IA32_FEATURE_CONTROL is written separately, see
+         *       kvm_put_msr_feature_control. */
      }
      if (env->mcg_cap) {
          int i;
@@ -1289,6 +1334,10 @@ static int kvm_get_xsave(X86CPU *cpu)
      env->xstate_bv = *(uint64_t *)&xsave->region[XSAVE_XSTATE_BV];
      memcpy(env->ymmh_regs, &xsave->region[XSAVE_YMMH_SPACE],
              sizeof env->ymmh_regs);
+    memcpy(env->bnd_regs, &xsave->region[XSAVE_BNDREGS],
+            sizeof env->bnd_regs);
+    memcpy(&env->bndcs_regs, &xsave->region[XSAVE_BNDCSR],
+            sizeof(env->bndcs_regs));
      return 0;
  }
  
@@ -1435,6 +1484,9 @@ static int kvm_get_msrs(X86CPU *cpu)
      if (has_msr_feature_control) {
          msrs[n++].index = MSR_IA32_FEATURE_CONTROL;
      }
+    if (has_msr_bndcfgs) {
+        msrs[n++].index = MSR_IA32_BNDCFGS;
+    }
  
      if (!env->tsc_valid) {
          msrs[n++].index = MSR_IA32_TSC;
@@ -1482,6 +1534,17 @@ static int kvm_get_msrs(X86CPU *cpu)
          }
      }
  
+    if (has_msr_hv_hypercall) {
+        msrs[n++].index = HV_X64_MSR_HYPERCALL;
+        msrs[n++].index = HV_X64_MSR_GUEST_OS_ID;
+    }
+    if (has_msr_hv_vapic) {
+        msrs[n++].index = HV_X64_MSR_APIC_ASSIST_PAGE;
+    }
+    if (has_msr_hv_tsc) {
+        msrs[n++].index = HV_X64_MSR_REFERENCE_TSC;
+    }
+
      msr_data.info.nmsrs = n;
      ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, &msr_data);
      if (ret < 0) {
@@ -1550,6 +1613,9 @@ static int kvm_get_msrs(X86CPU *cpu)
          case MSR_IA32_FEATURE_CONTROL:
              env->msr_ia32_feature_control = msrs[i].data;
              break;
+        case MSR_IA32_BNDCFGS:
+            env->msr_bndcfgs = msrs[i].data;
+            break;
          default:
              if (msrs[i].index >= MSR_MC0_CTL &&
                  msrs[i].index < MSR_MC0_CTL + (env->mcg_cap & 0xff) * 4) {
@@ -1586,6 +1652,18 @@ static int kvm_get_msrs(X86CPU *cpu)
          case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL0 + MAX_GP_COUNTERS - 1:
              env->msr_gp_evtsel[index - MSR_P6_EVNTSEL0] = msrs[i].data;
              break;
+        case HV_X64_MSR_HYPERCALL:
+            env->msr_hv_hypercall = msrs[i].data;
+            break;
+        case HV_X64_MSR_GUEST_OS_ID:
+            env->msr_hv_guest_os_id = msrs[i].data;
+            break;
+        case HV_X64_MSR_APIC_ASSIST_PAGE:
+            env->msr_hv_vapic = msrs[i].data;
+            break;
+        case HV_X64_MSR_REFERENCE_TSC:
+            env->msr_hv_tsc = msrs[i].data;
+            break;
          }
      }
  
@@ -1619,8 +1697,7 @@ static int kvm_get_mp_state(X86CPU *cpu)
  
  static int kvm_get_apic(X86CPU *cpu)
  {
-    CPUX86State *env = &cpu->env;
-    DeviceState *apic = env->apic_state;
+    DeviceState *apic = cpu->apic_state;
      struct kvm_lapic_state kapic;
      int ret;
  
@@ -1637,8 +1714,7 @@ static int kvm_get_apic(X86CPU *cpu)
  
  static int kvm_put_apic(X86CPU *cpu)
  {
-    CPUX86State *env = &cpu->env;
-    DeviceState *apic = env->apic_state;
+    DeviceState *apic = cpu->apic_state;
      struct kvm_lapic_state kapic;
  
      if (apic && kvm_irqchip_in_kernel()) {
@@ -1801,6 +1877,13 @@ int kvm_arch_put_registers(CPUState *cpu, int level)
  
      assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
  
+    if (level >= KVM_PUT_RESET_STATE && has_msr_feature_control) {
+        ret = kvm_put_msr_feature_control(x86_cpu);
+        if (ret < 0) {
+            return ret;
+        }
+    }
+
      ret = kvm_getput_regs(x86_cpu, 1);
      if (ret < 0) {
          return ret;
@@ -1962,7 +2045,7 @@ void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run)
          }
  
          DPRINTF("setting tpr\n");
-        run->cr8 = cpu_get_apic_tpr(env->apic_state);
+        run->cr8 = cpu_get_apic_tpr(x86_cpu->apic_state);
      }
  }
  
@@ -1976,8 +2059,8 @@ void kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
      } else {
          env->eflags &= ~IF_MASK;
      }
-    cpu_set_apic_tpr(env->apic_state, run->cr8);
-    cpu_set_apic_base(env->apic_state, run->apic_base);
+    cpu_set_apic_tpr(x86_cpu->apic_state, run->cr8);
+    cpu_set_apic_base(x86_cpu->apic_state, run->apic_base);
  }
  
  int kvm_arch_process_async_events(CPUState *cs)
@@ -2014,7 +2097,7 @@ int kvm_arch_process_async_events(CPUState *cs)
  
      if (cs->interrupt_request & CPU_INTERRUPT_POLL) {
          cs->interrupt_request &= ~CPU_INTERRUPT_POLL;
-        apic_poll_irq(env->apic_state);
+        apic_poll_irq(cpu->apic_state);
      }
      if (((cs->interrupt_request & CPU_INTERRUPT_HARD) &&
           (env->eflags & IF_MASK)) ||
@@ -2032,7 +2115,7 @@ int kvm_arch_process_async_events(CPUState *cs)
      if (cs->interrupt_request & CPU_INTERRUPT_TPR) {
          cs->interrupt_request &= ~CPU_INTERRUPT_TPR;
          kvm_cpu_synchronize_state(cs);
-        apic_handle_tpr_access_report(env->apic_state, env->eip,
+        apic_handle_tpr_access_report(cpu->apic_state, env->eip,
                                        env->tpr_access_type);
      }
  
@@ -2056,11 +2139,10 @@ static int kvm_handle_halt(X86CPU *cpu)
  
  static int kvm_handle_tpr_access(X86CPU *cpu)
  {
-    CPUX86State *env = &cpu->env;
      CPUState *cs = CPU(cpu);
      struct kvm_run *run = cs->kvm_run;
  
-    apic_handle_tpr_access_report(env->apic_state, run->tpr_access.rip,
+    apic_handle_tpr_access_report(cpu->apic_state, run->tpr_access.rip,
                                    run->tpr_access.is_write ? TPR_ACCESS_WRITE
                                                             : TPR_ACCESS_READ);
      return 1;
@@ -2194,13 +2276,13 @@ static int kvm_handle_debug(X86CPU *cpu,
                          break;
                      case 0x1:
                          ret = EXCP_DEBUG;
-                        env->watchpoint_hit = &hw_watchpoint;
+                        cs->watchpoint_hit = &hw_watchpoint;
                          hw_watchpoint.vaddr = hw_breakpoint[n].addr;
                          hw_watchpoint.flags = BP_MEM_WRITE;
                          break;
                      case 0x3:
                          ret = EXCP_DEBUG;
-                        env->watchpoint_hit = &hw_watchpoint;
+                        cs->watchpoint_hit = &hw_watchpoint;
                          hw_watchpoint.vaddr = hw_breakpoint[n].addr;
                          hw_watchpoint.flags = BP_MEM_ACCESS;
                          break;
@@ -2208,11 +2290,11 @@ static int kvm_handle_debug(X86CPU *cpu,
                  }
              }
          }
-    } else if (kvm_find_sw_breakpoint(CPU(cpu), arch_info->pc)) {
+    } else if (kvm_find_sw_breakpoint(cs, arch_info->pc)) {
          ret = EXCP_DEBUG;
      }
      if (ret == 0) {
-        cpu_synchronize_state(CPU(cpu));
+        cpu_synchronize_state(cs);
          assert(env->exception_injected == -1);
  
          /* pass to guest */