spapr: Limit threads per core according to current compatibility mode

[qemu.git] / target-i386 / kvm.c
diff --git a/target-i386/kvm.c b/target-i386/kvm.c

index 57389114f6056f7b72e0b01384c37389619b8707..4bf0ac9e76a082e43a1c58c86c5431ee01e16d93 100644 (file)
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -30,6 +30,8 @@
  #include "qemu/config-file.h"
  #include "hw/i386/pc.h"
  #include "hw/i386/apic.h"
+#include "hw/i386/apic_internal.h"
+#include "hw/i386/apic-msidef.h"
  #include "exec/ioport.h"
  #include <asm/hyperv.h>
  #include "hw/pci/pci.h"
@@ -72,6 +74,9 @@ static bool has_msr_misc_enable;
  static bool has_msr_bndcfgs;
  static bool has_msr_kvm_steal_time;
  static int lm_capable_kernel;
+static bool has_msr_hv_hypercall;
+static bool has_msr_hv_vapic;
+static bool has_msr_hv_tsc;
  
  static bool has_msr_architectural_pmu;
  static uint32_t num_architectural_pmu_counters;
@@ -119,7 +124,7 @@ static struct kvm_cpuid2 *get_supported_cpuid(KVMState *s)
      return cpuid;
  }
  
-struct kvm_para_features {
+static const struct kvm_para_features {
      int cap;
      int feature;
  } para_features[] = {
@@ -127,14 +132,13 @@ struct kvm_para_features {
      { KVM_CAP_NOP_IO_DELAY, KVM_FEATURE_NOP_IO_DELAY },
      { KVM_CAP_PV_MMU, KVM_FEATURE_MMU_OP },
      { KVM_CAP_ASYNC_PF, KVM_FEATURE_ASYNC_PF },
-    { -1, -1 }
  };
  
  static int get_para_features(KVMState *s)
  {
      int i, features = 0;
  
-    for (i = 0; i < ARRAY_SIZE(para_features) - 1; i++) {
+    for (i = 0; i < ARRAY_SIZE(para_features); i++) {
          if (kvm_check_extension(s, para_features[i].cap)) {
              features |= (1 << para_features[i].feature);
          }
@@ -437,8 +441,11 @@ static bool hyperv_hypercall_available(X86CPU *cpu)
  
  static bool hyperv_enabled(X86CPU *cpu)
  {
-    return hyperv_hypercall_available(cpu) ||
-           cpu->hyperv_relaxed_timing;
+    CPUState *cs = CPU(cpu);
+    return kvm_check_extension(cs->kvm_state, KVM_CAP_HYPERV) > 0 &&
+           (hyperv_hypercall_available(cpu) ||
+            cpu->hyperv_time  ||
+            cpu->hyperv_relaxed_timing);
  }
  
  #define KVM_MAX_CPUID_ENTRIES  100
@@ -493,14 +500,21 @@ int kvm_arch_init_vcpu(CPUState *cs)
          if (cpu->hyperv_vapic) {
              c->eax |= HV_X64_MSR_HYPERCALL_AVAILABLE;
              c->eax |= HV_X64_MSR_APIC_ACCESS_AVAILABLE;
+            has_msr_hv_vapic = true;
+        }
+        if (cpu->hyperv_time &&
+            kvm_check_extension(cs->kvm_state, KVM_CAP_HYPERV_TIME) > 0) {
+            c->eax |= HV_X64_MSR_HYPERCALL_AVAILABLE;
+            c->eax |= HV_X64_MSR_TIME_REF_COUNT_AVAILABLE;
+            c->eax |= 0x200;
+            has_msr_hv_tsc = true;
          }
-
          c = &cpuid_data.entries[cpuid_i++];
          c->function = HYPERV_CPUID_ENLIGHTMENT_INFO;
          if (cpu->hyperv_relaxed_timing) {
              c->eax |= HV_X64_RELAXED_TIMING_RECOMMENDED;
          }
-        if (cpu->hyperv_vapic) {
+        if (has_msr_hv_vapic) {
              c->eax |= HV_X64_APIC_ACCESS_RECOMMENDED;
          }
          c->ebx = cpu->hyperv_spinlock_attempts;
@@ -511,25 +525,28 @@ int kvm_arch_init_vcpu(CPUState *cs)
          c->ebx = 0x40;
  
          kvm_base = KVM_CPUID_SIGNATURE_NEXT;
+        has_msr_hv_hypercall = true;
      }
  
-    memcpy(signature, "KVMKVMKVM\0\0\0", 12);
-    c = &cpuid_data.entries[cpuid_i++];
-    c->function = KVM_CPUID_SIGNATURE | kvm_base;
-    c->eax = 0;
-    c->ebx = signature[0];
-    c->ecx = signature[1];
-    c->edx = signature[2];
+    if (cpu->expose_kvm) {
+        memcpy(signature, "KVMKVMKVM\0\0\0", 12);
+        c = &cpuid_data.entries[cpuid_i++];
+        c->function = KVM_CPUID_SIGNATURE | kvm_base;
+        c->eax = KVM_CPUID_FEATURES | kvm_base;
+        c->ebx = signature[0];
+        c->ecx = signature[1];
+        c->edx = signature[2];
  
-    c = &cpuid_data.entries[cpuid_i++];
-    c->function = KVM_CPUID_FEATURES | kvm_base;
-    c->eax = env->features[FEAT_KVM];
+        c = &cpuid_data.entries[cpuid_i++];
+        c->function = KVM_CPUID_FEATURES | kvm_base;
+        c->eax = env->features[FEAT_KVM];
  
-    has_msr_async_pf_en = c->eax & (1 << KVM_FEATURE_ASYNC_PF);
+        has_msr_async_pf_en = c->eax & (1 << KVM_FEATURE_ASYNC_PF);
  
-    has_msr_pv_eoi_en = c->eax & (1 << KVM_FEATURE_PV_EOI);
+        has_msr_pv_eoi_en = c->eax & (1 << KVM_FEATURE_PV_EOI);
  
-    has_msr_kvm_steal_time = c->eax & (1 << KVM_FEATURE_STEAL_TIME);
+        has_msr_kvm_steal_time = c->eax & (1 << KVM_FEATURE_STEAL_TIME);
+    }
  
      cpu_x86_cpuid(env, 0, 0, &limit, &unused, &unused, &unused);
  
@@ -710,9 +727,8 @@ int kvm_arch_init_vcpu(CPUState *cs)
      return 0;
  }
  
-void kvm_arch_reset_vcpu(CPUState *cs)
+void kvm_arch_reset_vcpu(X86CPU *cpu)
  {
-    X86CPU *cpu = X86_CPU(cs);
      CPUX86State *env = &cpu->env;
  
      env->exception_injected = -1;
@@ -726,6 +742,16 @@ void kvm_arch_reset_vcpu(CPUState *cs)
      }
  }
  
+void kvm_arch_do_init_vcpu(X86CPU *cpu)
+{
+    CPUX86State *env = &cpu->env;
+
+    /* APs get directly into wait-for-SIPI state.  */
+    if (env->mp_state == KVM_MP_STATE_UNINITIALIZED) {
+        env->mp_state = KVM_MP_STATE_INIT_RECEIVED;
+    }
+}
+
  static int kvm_get_supported_msrs(KVMState *s)
  {
      static int kvm_supported_msrs;
@@ -1223,12 +1249,19 @@ static int kvm_put_msrs(X86CPU *cpu, int level)
              kvm_msr_entry_set(&msrs[n++], MSR_CORE_PERF_GLOBAL_CTRL,
                                env->msr_global_ctrl);
          }
-        if (hyperv_hypercall_available(cpu)) {
-            kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_GUEST_OS_ID, 0);
-            kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_HYPERCALL, 0);
+        if (has_msr_hv_hypercall) {
+            kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_GUEST_OS_ID,
+                              env->msr_hv_guest_os_id);
+            kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_HYPERCALL,
+                              env->msr_hv_hypercall);
          }
-        if (cpu->hyperv_vapic) {
-            kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_APIC_ASSIST_PAGE, 0);
+        if (has_msr_hv_vapic) {
+            kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_APIC_ASSIST_PAGE,
+                              env->msr_hv_vapic);
+        }
+        if (has_msr_hv_tsc) {
+            kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_REFERENCE_TSC,
+                              env->msr_hv_tsc);
          }
  
          /* Note: MSR_IA32_FEATURE_CONTROL is written separately, see
@@ -1399,7 +1432,7 @@ static int kvm_get_sregs(X86CPU *cpu)
         HF_OSFXSR_MASK | HF_LMA_MASK | HF_CS32_MASK | \
         HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK)
  
-    hflags = (env->segs[R_CS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK;
+    hflags = (env->segs[R_SS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK;
      hflags |= (env->cr[0] & CR0_PE_MASK) << (HF_PE_SHIFT - CR0_PE_SHIFT);
      hflags |= (env->cr[0] << (HF_MP_SHIFT - CR0_MP_SHIFT)) &
                  (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK);
@@ -1514,6 +1547,17 @@ static int kvm_get_msrs(X86CPU *cpu)
          }
      }
  
+    if (has_msr_hv_hypercall) {
+        msrs[n++].index = HV_X64_MSR_HYPERCALL;
+        msrs[n++].index = HV_X64_MSR_GUEST_OS_ID;
+    }
+    if (has_msr_hv_vapic) {
+        msrs[n++].index = HV_X64_MSR_APIC_ASSIST_PAGE;
+    }
+    if (has_msr_hv_tsc) {
+        msrs[n++].index = HV_X64_MSR_REFERENCE_TSC;
+    }
+
      msr_data.info.nmsrs = n;
      ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, &msr_data);
      if (ret < 0) {
@@ -1621,6 +1665,18 @@ static int kvm_get_msrs(X86CPU *cpu)
          case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL0 + MAX_GP_COUNTERS - 1:
              env->msr_gp_evtsel[index - MSR_P6_EVNTSEL0] = msrs[i].data;
              break;
+        case HV_X64_MSR_HYPERCALL:
+            env->msr_hv_hypercall = msrs[i].data;
+            break;
+        case HV_X64_MSR_GUEST_OS_ID:
+            env->msr_hv_guest_os_id = msrs[i].data;
+            break;
+        case HV_X64_MSR_APIC_ASSIST_PAGE:
+            env->msr_hv_vapic = msrs[i].data;
+            break;
+        case HV_X64_MSR_REFERENCE_TSC:
+            env->msr_hv_tsc = msrs[i].data;
+            break;
          }
      }
  
@@ -1961,14 +2017,15 @@ void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run)
          }
      }
  
-    if (!kvm_irqchip_in_kernel()) {
-        /* Force the VCPU out of its inner loop to process any INIT requests
-         * or pending TPR access reports. */
-        if (cpu->interrupt_request &
-            (CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) {
-            cpu->exit_request = 1;
-        }
+    /* Force the VCPU out of its inner loop to process any INIT requests
+     * or (for userspace APIC, but it is cheap to combine the checks here)
+     * pending TPR access reports.
+     */
+    if (cpu->interrupt_request & (CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) {
+        cpu->exit_request = 1;
+    }
  
+    if (!kvm_irqchip_in_kernel()) {
          /* Try to inject an interrupt if the guest can accept it */
          if (run->ready_for_interrupt_injection &&
              (cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
@@ -2048,6 +2105,11 @@ int kvm_arch_process_async_events(CPUState *cs)
          }
      }
  
+    if (cs->interrupt_request & CPU_INTERRUPT_INIT) {
+        kvm_cpu_synchronize_state(cs);
+        do_cpu_init(cpu);
+    }
+
      if (kvm_irqchip_in_kernel()) {
          return 0;
      }
@@ -2061,10 +2123,6 @@ int kvm_arch_process_async_events(CPUState *cs)
          (cs->interrupt_request & CPU_INTERRUPT_NMI)) {
          cs->halted = 0;
      }
-    if (cs->interrupt_request & CPU_INTERRUPT_INIT) {
-        kvm_cpu_synchronize_state(cs);
-        do_cpu_init(cpu);
-    }
      if (cs->interrupt_request & CPU_INTERRUPT_SIPI) {
          kvm_cpu_synchronize_state(cs);
          do_cpu_sipi(cpu);
@@ -2233,13 +2291,13 @@ static int kvm_handle_debug(X86CPU *cpu,
                          break;
                      case 0x1:
                          ret = EXCP_DEBUG;
-                        env->watchpoint_hit = &hw_watchpoint;
+                        cs->watchpoint_hit = &hw_watchpoint;
                          hw_watchpoint.vaddr = hw_breakpoint[n].addr;
                          hw_watchpoint.flags = BP_MEM_WRITE;
                          break;
                      case 0x3:
                          ret = EXCP_DEBUG;
-                        env->watchpoint_hit = &hw_watchpoint;
+                        cs->watchpoint_hit = &hw_watchpoint;
                          hw_watchpoint.vaddr = hw_breakpoint[n].addr;
                          hw_watchpoint.flags = BP_MEM_ACCESS;
                          break;
@@ -2247,11 +2305,11 @@ static int kvm_handle_debug(X86CPU *cpu,
                  }
              }
          }
-    } else if (kvm_find_sw_breakpoint(CPU(cpu), arch_info->pc)) {
+    } else if (kvm_find_sw_breakpoint(cs, arch_info->pc)) {
          ret = EXCP_DEBUG;
      }
      if (ret == 0) {
-        cpu_synchronize_state(CPU(cpu));
+        cpu_synchronize_state(cs);
          assert(env->exception_injected == -1);
  
          /* pass to guest */