]> Git Repo - J-linux.git/commitdiff
Merge tag 'kvm-x86-vmx-6.5' of https://github.com/kvm-x86/linux into HEAD
authorPaolo Bonzini <[email protected]>
Sat, 1 Jul 2023 11:20:04 +0000 (07:20 -0400)
committerPaolo Bonzini <[email protected]>
Sat, 1 Jul 2023 11:20:04 +0000 (07:20 -0400)
KVM VMX changes for 6.5:

 - Fix missing/incorrect #GP checks on ENCLS

 - Use standard mmu_notifier hooks for handling APIC access page

 - Misc cleanups

1  2 
arch/x86/kvm/mmu/mmu.c
arch/x86/kvm/vmx/nested.c
arch/x86/kvm/vmx/pmu_intel.c
arch/x86/kvm/vmx/vmx.c
arch/x86/kvm/x86.c
include/linux/kvm_host.h
virt/kvm/kvm_main.c

diff --combined arch/x86/kvm/mmu/mmu.c
index 03ff06cd65b33867af1885bc32bf3ecf4137ea09,beb507d82adfd9092ba92c6741d8c7e042186071..ec169f5c7dce21d5f730638ba86ebc99f3050146
@@@ -58,8 -58,6 +58,8 @@@
  
  extern bool itlb_multihit_kvm_mitigation;
  
 +static bool nx_hugepage_mitigation_hard_disabled;
 +
  int __read_mostly nx_huge_pages = -1;
  static uint __read_mostly nx_huge_pages_recovery_period_ms;
  #ifdef CONFIG_PREEMPT_RT
@@@ -69,13 -67,12 +69,13 @@@ static uint __read_mostly nx_huge_pages
  static uint __read_mostly nx_huge_pages_recovery_ratio = 60;
  #endif
  
 +static int get_nx_huge_pages(char *buffer, const struct kernel_param *kp);
  static int set_nx_huge_pages(const char *val, const struct kernel_param *kp);
  static int set_nx_huge_pages_recovery_param(const char *val, const struct kernel_param *kp);
  
  static const struct kernel_param_ops nx_huge_pages_ops = {
        .set = set_nx_huge_pages,
 -      .get = param_get_bool,
 +      .get = get_nx_huge_pages,
  };
  
  static const struct kernel_param_ops nx_huge_pages_recovery_param_ops = {
@@@ -1603,6 -1600,10 +1603,10 @@@ bool kvm_unmap_gfn_range(struct kvm *kv
        if (tdp_mmu_enabled)
                flush = kvm_tdp_mmu_unmap_gfn_range(kvm, range, flush);
  
+       if (kvm_x86_ops.set_apic_access_page_addr &&
+           range->slot->id == APIC_ACCESS_PAGE_PRIVATE_MEMSLOT)
+               kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
        return flush;
  }
  
@@@ -5800,14 -5801,6 +5804,14 @@@ static void __kvm_mmu_invalidate_addr(s
  
        vcpu_clear_mmio_info(vcpu, addr);
  
 +      /*
 +       * Walking and synchronizing SPTEs both assume they are operating in
 +       * the context of the current MMU, and would need to be reworked if
 +       * this is ever used to sync the guest_mmu, e.g. to emulate INVEPT.
 +       */
 +      if (WARN_ON_ONCE(mmu != vcpu->arch.mmu))
 +              return;
 +
        if (!VALID_PAGE(root_hpa))
                return;
  
@@@ -6855,14 -6848,6 +6859,14 @@@ static void mmu_destroy_caches(void
        kmem_cache_destroy(mmu_page_header_cache);
  }
  
 +static int get_nx_huge_pages(char *buffer, const struct kernel_param *kp)
 +{
 +      if (nx_hugepage_mitigation_hard_disabled)
 +              return sprintf(buffer, "never\n");
 +
 +      return param_get_bool(buffer, kp);
 +}
 +
  static bool get_nx_auto_mode(void)
  {
        /* Return true when CPU has the bug, and mitigations are ON */
@@@ -6879,29 -6864,15 +6883,29 @@@ static int set_nx_huge_pages(const cha
        bool old_val = nx_huge_pages;
        bool new_val;
  
 +      if (nx_hugepage_mitigation_hard_disabled)
 +              return -EPERM;
 +
        /* In "auto" mode deploy workaround only if CPU has the bug. */
 -      if (sysfs_streq(val, "off"))
 +      if (sysfs_streq(val, "off")) {
                new_val = 0;
 -      else if (sysfs_streq(val, "force"))
 +      } else if (sysfs_streq(val, "force")) {
                new_val = 1;
 -      else if (sysfs_streq(val, "auto"))
 +      } else if (sysfs_streq(val, "auto")) {
                new_val = get_nx_auto_mode();
 -      else if (kstrtobool(val, &new_val) < 0)
 +      } else if (sysfs_streq(val, "never")) {
 +              new_val = 0;
 +
 +              mutex_lock(&kvm_lock);
 +              if (!list_empty(&vm_list)) {
 +                      mutex_unlock(&kvm_lock);
 +                      return -EBUSY;
 +              }
 +              nx_hugepage_mitigation_hard_disabled = true;
 +              mutex_unlock(&kvm_lock);
 +      } else if (kstrtobool(val, &new_val) < 0) {
                return -EINVAL;
 +      }
  
        __set_nx_huge_pages(new_val);
  
@@@ -7039,9 -7010,6 +7043,9 @@@ static int set_nx_huge_pages_recovery_p
        uint old_period, new_period;
        int err;
  
 +      if (nx_hugepage_mitigation_hard_disabled)
 +              return -EPERM;
 +
        was_recovery_enabled = calc_nx_huge_pages_recovery_period(&old_period);
  
        err = param_set_uint(val, kp);
@@@ -7127,10 -7095,7 +7131,10 @@@ static void kvm_recover_nx_huge_pages(s
                 */
                slot = NULL;
                if (atomic_read(&kvm->nr_memslots_dirty_logging)) {
 -                      slot = gfn_to_memslot(kvm, sp->gfn);
 +                      struct kvm_memslots *slots;
 +
 +                      slots = kvm_memslots_for_spte_role(kvm, sp->role);
 +                      slot = __gfn_to_memslot(slots, sp->gfn);
                        WARN_ON_ONCE(!slot);
                }
  
@@@ -7200,9 -7165,6 +7204,9 @@@ int kvm_mmu_post_init_vm(struct kvm *kv
  {
        int err;
  
 +      if (nx_hugepage_mitigation_hard_disabled)
 +              return 0;
 +
        err = kvm_vm_create_worker_thread(kvm, kvm_nx_huge_page_recovery_worker, 0,
                                          "kvm-nx-lpage-recovery",
                                          &kvm->arch.nx_huge_page_recovery_thread);
index ba2ed6d87364512ebc00c4e059695e9c8925b509,368a43e3b40e6a132a29ceb110a57d06647ca645..516391cc0d64fb9689d17fea0d04903c13abe66a
@@@ -2328,8 -2328,7 +2328,7 @@@ static void prepare_vmcs02_early(struc
                 * Preset *DT exiting when emulating UMIP, so that vmx_set_cr4()
                 * will not have to rewrite the controls just for this bit.
                 */
-               if (!boot_cpu_has(X86_FEATURE_UMIP) && vmx_umip_emulated() &&
-                   (vmcs12->guest_cr4 & X86_CR4_UMIP))
+               if (vmx_umip_emulated() && (vmcs12->guest_cr4 & X86_CR4_UMIP))
                        exec_control |= SECONDARY_EXEC_DESC;
  
                if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY)
@@@ -2649,7 -2648,7 +2648,7 @@@ static int prepare_vmcs02(struct kvm_vc
        }
  
        if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) &&
 -          intel_pmu_has_perf_global_ctrl(vcpu_to_pmu(vcpu)) &&
 +          kvm_pmu_has_perf_global_ctrl(vcpu_to_pmu(vcpu)) &&
            WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
                                     vmcs12->guest_ia32_perf_global_ctrl))) {
                *entry_failure_code = ENTRY_FAIL_DEFAULT;
@@@ -4524,7 -4523,7 +4523,7 @@@ static void load_vmcs12_host_state(stru
                vcpu->arch.pat = vmcs12->host_ia32_pat;
        }
        if ((vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) &&
 -          intel_pmu_has_perf_global_ctrl(vcpu_to_pmu(vcpu)))
 +          kvm_pmu_has_perf_global_ctrl(vcpu_to_pmu(vcpu)))
                WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
                                         vmcs12->host_ia32_perf_global_ctrl));
  
index 30ec9ccdea47252af85095631048b366c6790d32,84be32d9f365d4763796be2b7ba182556df1e476..80c769c58a876530674e823434bcbf627d19d3c6
@@@ -73,6 -73,18 +73,6 @@@ static struct kvm_pmc *intel_pmc_idx_to
        }
  }
  
 -static void reprogram_counters(struct kvm_pmu *pmu, u64 diff)
 -{
 -      int bit;
 -
 -      if (!diff)
 -              return;
 -
 -      for_each_set_bit(bit, (unsigned long *)&diff, X86_PMC_IDX_MAX)
 -              set_bit(bit, pmu->reprogram_pmi);
 -      kvm_make_request(KVM_REQ_PMU, pmu_to_vcpu(pmu));
 -}
 -
  static bool intel_hw_event_available(struct kvm_pmc *pmc)
  {
        struct kvm_pmu *pmu = pmc_to_pmu(pmc);
        return true;
  }
  
 -/* check if a PMC is enabled by comparing it with globl_ctrl bits. */
 -static bool intel_pmc_is_enabled(struct kvm_pmc *pmc)
 -{
 -      struct kvm_pmu *pmu = pmc_to_pmu(pmc);
 -
 -      if (!intel_pmu_has_perf_global_ctrl(pmu))
 -              return true;
 -
 -      return test_bit(pmc->idx, (unsigned long *)&pmu->global_ctrl);
 -}
 -
  static bool intel_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
  {
        struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
@@@ -175,7 -198,11 +175,7 @@@ static bool intel_is_valid_msr(struct k
  
        switch (msr) {
        case MSR_CORE_PERF_FIXED_CTR_CTRL:
 -      case MSR_CORE_PERF_GLOBAL_STATUS:
 -      case MSR_CORE_PERF_GLOBAL_CTRL:
 -      case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
 -              return intel_pmu_has_perf_global_ctrl(pmu);
 -              break;
 +              return kvm_pmu_has_perf_global_ctrl(pmu);
        case MSR_IA32_PEBS_ENABLE:
                ret = vcpu_get_perf_capabilities(vcpu) & PERF_CAP_PEBS_FORMAT;
                break;
@@@ -325,6 -352,15 +325,6 @@@ static int intel_pmu_get_msr(struct kvm
        case MSR_CORE_PERF_FIXED_CTR_CTRL:
                msr_info->data = pmu->fixed_ctr_ctrl;
                break;
 -      case MSR_CORE_PERF_GLOBAL_STATUS:
 -              msr_info->data = pmu->global_status;
 -              break;
 -      case MSR_CORE_PERF_GLOBAL_CTRL:
 -              msr_info->data = pmu->global_ctrl;
 -              break;
 -      case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
 -              msr_info->data = 0;
 -              break;
        case MSR_IA32_PEBS_ENABLE:
                msr_info->data = pmu->pebs_enable;
                break;
@@@ -374,6 -410,29 +374,6 @@@ static int intel_pmu_set_msr(struct kvm
                if (pmu->fixed_ctr_ctrl != data)
                        reprogram_fixed_counters(pmu, data);
                break;
 -      case MSR_CORE_PERF_GLOBAL_STATUS:
 -              if (!msr_info->host_initiated)
 -                      return 1; /* RO MSR */
 -
 -              pmu->global_status = data;
 -              break;
 -      case MSR_CORE_PERF_GLOBAL_CTRL:
 -              if (!kvm_valid_perf_global_ctrl(pmu, data))
 -                      return 1;
 -
 -              if (pmu->global_ctrl != data) {
 -                      diff = pmu->global_ctrl ^ data;
 -                      pmu->global_ctrl = data;
 -                      reprogram_counters(pmu, diff);
 -              }
 -              break;
 -      case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
 -              if (data & pmu->global_ovf_ctrl_mask)
 -                      return 1;
 -
 -              if (!msr_info->host_initiated)
 -                      pmu->global_status &= ~data;
 -              break;
        case MSR_IA32_PEBS_ENABLE:
                if (data & pmu->pebs_enable_mask)
                        return 1;
                }
                break;
        case MSR_IA32_DS_AREA:
-               if (msr_info->host_initiated && data && !guest_cpuid_has(vcpu, X86_FEATURE_DS))
-                       return 1;
                if (is_noncanonical_address(data, vcpu))
                        return 1;
  
@@@ -472,7 -529,7 +470,7 @@@ static void intel_pmu_refresh(struct kv
        pmu->reserved_bits = 0xffffffff00200000ull;
        pmu->raw_event_mask = X86_RAW_EVENT_MASK;
        pmu->global_ctrl_mask = ~0ull;
 -      pmu->global_ovf_ctrl_mask = ~0ull;
 +      pmu->global_status_mask = ~0ull;
        pmu->fixed_ctr_ctrl_mask = ~0ull;
        pmu->pebs_enable_mask = ~0ull;
        pmu->pebs_data_cfg_mask = ~0ull;
        counter_mask = ~(((1ull << pmu->nr_arch_gp_counters) - 1) |
                (((1ull << pmu->nr_arch_fixed_counters) - 1) << INTEL_PMC_IDX_FIXED));
        pmu->global_ctrl_mask = counter_mask;
 -      pmu->global_ovf_ctrl_mask = pmu->global_ctrl_mask
 +
 +      /*
 +       * GLOBAL_STATUS and GLOBAL_OVF_CONTROL (a.k.a. GLOBAL_STATUS_RESET)
 +       * share reserved bit definitions.  The kernel just happens to use
 +       * OVF_CTRL for the names.
 +       */
 +      pmu->global_status_mask = pmu->global_ctrl_mask
                        & ~(MSR_CORE_PERF_GLOBAL_OVF_CTRL_OVF_BUF |
                            MSR_CORE_PERF_GLOBAL_OVF_CTRL_COND_CHGD);
        if (vmx_pt_mode_is_host_guest())
 -              pmu->global_ovf_ctrl_mask &=
 +              pmu->global_status_mask &=
                                ~MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI;
  
        entry = kvm_find_cpuid_entry_index(vcpu, 7, 0);
@@@ -748,7 -799,7 +746,7 @@@ void intel_pmu_cross_mapped_check(struc
                pmc = intel_pmc_idx_to_pmc(pmu, bit);
  
                if (!pmc || !pmc_speculative_in_use(pmc) ||
 -                  !intel_pmc_is_enabled(pmc) || !pmc->perf_event)
 +                  !pmc_is_globally_enabled(pmc) || !pmc->perf_event)
                        continue;
  
                /*
  
  struct kvm_pmu_ops intel_pmu_ops __initdata = {
        .hw_event_available = intel_hw_event_available,
 -      .pmc_is_enabled = intel_pmc_is_enabled,
        .pmc_idx_to_pmc = intel_pmc_idx_to_pmc,
        .rdpmc_ecx_to_pmc = intel_rdpmc_ecx_to_pmc,
        .msr_idx_to_pmc = intel_msr_idx_to_pmc,
        .cleanup = intel_pmu_cleanup,
        .EVENTSEL_EVENT = ARCH_PERFMON_EVENTSEL_EVENT,
        .MAX_NR_GP_COUNTERS = KVM_INTEL_PMC_MAX_GENERIC,
 +      .MIN_NR_GP_COUNTERS = 1,
  };
diff --combined arch/x86/kvm/vmx/vmx.c
index 2d9d155691a7f5e20374066902bc60b7be9e13d4,9ea4a5dfe62a1e724fd44dadf2641dee1a183913..0ecf4be2c6af0e11de6b2ea993c29c3b1b00d7c0
@@@ -2287,16 -2287,19 +2287,16 @@@ static int vmx_set_msr(struct kvm_vcpu 
                        return 1;
                goto find_uret_msr;
        case MSR_IA32_CR_PAT:
 -              if (!kvm_pat_valid(data))
 -                      return 1;
 +              ret = kvm_set_msr_common(vcpu, msr_info);
 +              if (ret)
 +                      break;
  
                if (is_guest_mode(vcpu) &&
                    get_vmcs12(vcpu)->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT)
                        get_vmcs12(vcpu)->guest_ia32_pat = data;
  
 -              if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
 +              if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT)
                        vmcs_write64(GUEST_IA32_PAT, data);
 -                      vcpu->arch.pat = data;
 -                      break;
 -              }
 -              ret = kvm_set_msr_common(vcpu, msr_info);
                break;
        case MSR_IA32_MCG_EXT_CTL:
                if ((!msr_info->host_initiated &&
@@@ -3384,15 -3387,15 +3384,15 @@@ static bool vmx_is_valid_cr4(struct kvm
  
  void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
  {
-       unsigned long old_cr4 = vcpu->arch.cr4;
+       unsigned long old_cr4 = kvm_read_cr4(vcpu);
        struct vcpu_vmx *vmx = to_vmx(vcpu);
+       unsigned long hw_cr4;
        /*
         * Pass through host's Machine Check Enable value to hw_cr4, which
         * is in force while we are in guest mode.  Do not let guests control
         * this bit, even if host CR4.MCE == 0.
         */
-       unsigned long hw_cr4;
        hw_cr4 = (cr4_read_shadow() & X86_CR4_MCE) | (cr4 & ~X86_CR4_MCE);
        if (is_unrestricted_guest(vcpu))
                hw_cr4 |= KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST;
        else
                hw_cr4 |= KVM_PMODE_VM_CR4_ALWAYS_ON;
  
-       if (!boot_cpu_has(X86_FEATURE_UMIP) && vmx_umip_emulated()) {
+       if (vmx_umip_emulated()) {
                if (cr4 & X86_CR4_UMIP) {
                        secondary_exec_controls_setbit(vmx, SECONDARY_EXEC_DESC);
                        hw_cr4 &= ~X86_CR4_UMIP;
@@@ -5399,7 -5402,13 +5399,13 @@@ static int handle_set_cr4(struct kvm_vc
  
  static int handle_desc(struct kvm_vcpu *vcpu)
  {
-       WARN_ON(!(vcpu->arch.cr4 & X86_CR4_UMIP));
+       /*
+        * UMIP emulation relies on intercepting writes to CR4.UMIP, i.e. this
+        * and other code needs to be updated if UMIP can be guest owned.
+        */
+       BUILD_BUG_ON(KVM_POSSIBLE_CR4_GUEST_BITS & X86_CR4_UMIP);
+       WARN_ON_ONCE(!kvm_is_cr4_bit_set(vcpu, X86_CR4_UMIP));
        return kvm_emulate_instruction(vcpu, 0);
  }
  
@@@ -6705,7 -6714,12 +6711,12 @@@ void vmx_set_virtual_apic_mode(struct k
  
  static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu)
  {
-       struct page *page;
+       const gfn_t gfn = APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT;
+       struct kvm *kvm = vcpu->kvm;
+       struct kvm_memslots *slots = kvm_memslots(kvm);
+       struct kvm_memory_slot *slot;
+       unsigned long mmu_seq;
+       kvm_pfn_t pfn;
  
        /* Defer reload until vmcs01 is the current VMCS. */
        if (is_guest_mode(vcpu)) {
            SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
                return;
  
-       page = gfn_to_page(vcpu->kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
-       if (is_error_page(page))
+       /*
+        * Grab the memslot so that the hva lookup for the mmu_notifier retry
+        * is guaranteed to use the same memslot as the pfn lookup, i.e. rely
+        * on the pfn lookup's validation of the memslot to ensure a valid hva
+        * is used for the retry check.
+        */
+       slot = id_to_memslot(slots, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT);
+       if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
                return;
  
-       vmcs_write64(APIC_ACCESS_ADDR, page_to_phys(page));
+       /*
+        * Ensure that the mmu_notifier sequence count is read before KVM
+        * retrieves the pfn from the primary MMU.  Note, the memslot is
+        * protected by SRCU, not the mmu_notifier.  Pairs with the smp_wmb()
+        * in kvm_mmu_invalidate_end().
+        */
+       mmu_seq = kvm->mmu_invalidate_seq;
+       smp_rmb();
+       /*
+        * No need to retry if the memslot does not exist or is invalid.  KVM
+        * controls the APIC-access page memslot, and only deletes the memslot
+        * if APICv is permanently inhibited, i.e. the memslot won't reappear.
+        */
+       pfn = gfn_to_pfn_memslot(slot, gfn);
+       if (is_error_noslot_pfn(pfn))
+               return;
+       read_lock(&vcpu->kvm->mmu_lock);
+       if (mmu_invalidate_retry_hva(kvm, mmu_seq,
+                                    gfn_to_hva_memslot(slot, gfn))) {
+               kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
+               read_unlock(&vcpu->kvm->mmu_lock);
+               goto out;
+       }
+       vmcs_write64(APIC_ACCESS_ADDR, pfn_to_hpa(pfn));
+       read_unlock(&vcpu->kvm->mmu_lock);
        vmx_flush_tlb_current(vcpu);
  
+ out:
        /*
         * Do not pin apic access page in memory, the MMU notifier
         * will call us again if it is migrated or swapped out.
         */
-       put_page(page);
+       kvm_release_pfn_clean(pfn);
  }
  
  static void vmx_hwapic_isr_update(int max_isr)
diff --combined arch/x86/kvm/x86.c
index 7d6e044504482d9f2e136606fe4a4db13c684b57,f962b7e3487ef6d1812ae3f1affc6f2fc3ac6f54..8bca4d2405f8c06047d537dd52a7430397018ca0
@@@ -1017,11 -1017,13 +1017,11 @@@ void kvm_load_guest_xsave_state(struct 
                        wrmsrl(MSR_IA32_XSS, vcpu->arch.ia32_xss);
        }
  
 -#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
 -      if (static_cpu_has(X86_FEATURE_PKU) &&
 +      if (cpu_feature_enabled(X86_FEATURE_PKU) &&
            vcpu->arch.pkru != vcpu->arch.host_pkru &&
            ((vcpu->arch.xcr0 & XFEATURE_MASK_PKRU) ||
             kvm_is_cr4_bit_set(vcpu, X86_CR4_PKE)))
                write_pkru(vcpu->arch.pkru);
 -#endif /* CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS */
  }
  EXPORT_SYMBOL_GPL(kvm_load_guest_xsave_state);
  
@@@ -1030,13 -1032,15 +1030,13 @@@ void kvm_load_host_xsave_state(struct k
        if (vcpu->arch.guest_state_protected)
                return;
  
 -#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
 -      if (static_cpu_has(X86_FEATURE_PKU) &&
 +      if (cpu_feature_enabled(X86_FEATURE_PKU) &&
            ((vcpu->arch.xcr0 & XFEATURE_MASK_PKRU) ||
             kvm_is_cr4_bit_set(vcpu, X86_CR4_PKE))) {
                vcpu->arch.pkru = rdpkru();
                if (vcpu->arch.pkru != vcpu->arch.host_pkru)
                        write_pkru(vcpu->arch.host_pkru);
        }
 -#endif /* CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS */
  
        if (kvm_is_cr4_bit_set(vcpu, X86_CR4_OSXSAVE)) {
  
@@@ -1423,14 -1427,15 +1423,14 @@@ int kvm_emulate_rdpmc(struct kvm_vcpu *
  EXPORT_SYMBOL_GPL(kvm_emulate_rdpmc);
  
  /*
 - * List of msr numbers which we expose to userspace through KVM_GET_MSRS
 - * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST.
 - *
 - * The three MSR lists(msrs_to_save, emulated_msrs, msr_based_features)
 - * extract the supported MSRs from the related const lists.
 - * msrs_to_save is selected from the msrs_to_save_all to reflect the
 - * capabilities of the host cpu. This capabilities test skips MSRs that are
 - * kvm-specific. Those are put in emulated_msrs_all; filtering of emulated_msrs
 - * may depend on host virtualization features rather than host cpu features.
 + * The three MSR lists(msrs_to_save, emulated_msrs, msr_based_features) track
 + * the set of MSRs that KVM exposes to userspace through KVM_GET_MSRS,
 + * KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST.  msrs_to_save holds MSRs that
 + * require host support, i.e. should be probed via RDMSR.  emulated_msrs holds
 + * MSRs that KVM emulates without strictly requiring host support.
 + * msr_based_features holds MSRs that enumerate features, i.e. are effectively
 + * CPUID leafs.  Note, msr_based_features isn't mutually exclusive with
 + * msrs_to_save and emulated_msrs.
   */
  
  static const u32 msrs_to_save_base[] = {
@@@ -1478,10 -1483,6 +1478,10 @@@ static const u32 msrs_to_save_pmu[] = 
        MSR_F15H_PERF_CTL3, MSR_F15H_PERF_CTL4, MSR_F15H_PERF_CTL5,
        MSR_F15H_PERF_CTR0, MSR_F15H_PERF_CTR1, MSR_F15H_PERF_CTR2,
        MSR_F15H_PERF_CTR3, MSR_F15H_PERF_CTR4, MSR_F15H_PERF_CTR5,
 +
 +      MSR_AMD64_PERF_CNTR_GLOBAL_CTL,
 +      MSR_AMD64_PERF_CNTR_GLOBAL_STATUS,
 +      MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR,
  };
  
  static u32 msrs_to_save[ARRAY_SIZE(msrs_to_save_base) +
@@@ -1530,11 -1531,11 +1530,11 @@@ static const u32 emulated_msrs_all[] = 
        MSR_IA32_UCODE_REV,
  
        /*
 -       * The following list leaves out MSRs whose values are determined
 -       * by arch/x86/kvm/vmx/nested.c based on CPUID or other MSRs.
 -       * We always support the "true" VMX control MSRs, even if the host
 -       * processor does not, so I am putting these registers here rather
 -       * than in msrs_to_save_all.
 +       * KVM always supports the "true" VMX control MSRs, even if the host
 +       * does not.  The VMX MSRs as a whole are considered "emulated" as KVM
 +       * doesn't strictly require them to exist in the host (ignoring that
 +       * KVM would refuse to load in the first place if the core set of MSRs
 +       * aren't supported).
         */
        MSR_IA32_VMX_BASIC,
        MSR_IA32_VMX_TRUE_PINBASED_CTLS,
@@@ -1630,7 -1631,7 +1630,7 @@@ static u64 kvm_get_arch_capabilities(vo
         * If we're doing cache flushes (either "always" or "cond")
         * we will do one whenever the guest does a vmlaunch/vmresume.
         * If an outer hypervisor is doing the cache flush for us
 -       * (VMENTER_L1D_FLUSH_NESTED_VM), we can safely pass that
 +       * (ARCH_CAP_SKIP_VMENTRY_L1DFLUSH), we can safely pass that
         * capability to the guest too, and if EPT is disabled we're not
         * vulnerable.  Overall, only VMENTER_L1D_FLUSH_NEVER will
         * require a nested hypervisor to do a flush of its own.
@@@ -1808,7 -1809,7 +1808,7 @@@ bool kvm_msr_allowed(struct kvm_vcpu *v
                unsigned long *bitmap = ranges[i].bitmap;
  
                if ((index >= start) && (index < end) && (flags & type)) {
 -                      allowed = !!test_bit(index - start, bitmap);
 +                      allowed = test_bit(index - start, bitmap);
                        break;
                }
        }
@@@ -3701,14 -3702,8 +3701,14 @@@ int kvm_set_msr_common(struct kvm_vcpu 
                        return 1;
                }
                break;
 -      case 0x200 ... MSR_IA32_MC0_CTL2 - 1:
 -      case MSR_IA32_MCx_CTL2(KVM_MAX_MCE_BANKS) ... 0x2ff:
 +      case MSR_IA32_CR_PAT:
 +              if (!kvm_pat_valid(data))
 +                      return 1;
 +
 +              vcpu->arch.pat = data;
 +              break;
 +      case MTRRphysBase_MSR(0) ... MSR_MTRRfix4K_F8000:
 +      case MSR_MTRRdefType:
                return kvm_mtrr_set_msr(vcpu, msr, data);
        case MSR_IA32_APICBASE:
                return kvm_set_apic_base(vcpu, msr_info);
@@@ -4115,12 -4110,9 +4115,12 @@@ int kvm_get_msr_common(struct kvm_vcpu 
                msr_info->data = kvm_scale_tsc(rdtsc(), ratio) + offset;
                break;
        }
 +      case MSR_IA32_CR_PAT:
 +              msr_info->data = vcpu->arch.pat;
 +              break;
        case MSR_MTRRcap:
 -      case 0x200 ... MSR_IA32_MC0_CTL2 - 1:
 -      case MSR_IA32_MCx_CTL2(KVM_MAX_MCE_BANKS) ... 0x2ff:
 +      case MTRRphysBase_MSR(0) ... MSR_MTRRfix4K_F8000:
 +      case MSR_MTRRdefType:
                return kvm_mtrr_get_msr(vcpu, msr_info->index, &msr_info->data);
        case 0xcd: /* fsb frequency */
                msr_info->data = 3;
@@@ -7158,12 -7150,6 +7158,12 @@@ static void kvm_probe_msr_to_save(u32 m
                    kvm_pmu_cap.num_counters_fixed)
                        return;
                break;
 +      case MSR_AMD64_PERF_CNTR_GLOBAL_CTL:
 +      case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS:
 +      case MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR:
 +              if (!kvm_cpu_cap_has(X86_FEATURE_PERFMON_V2))
 +                      return;
 +              break;
        case MSR_IA32_XFD:
        case MSR_IA32_XFD_ERR:
                if (!kvm_cpu_cap_has(X86_FEATURE_XFD))
@@@ -10449,20 -10435,6 +10449,6 @@@ static void vcpu_load_eoi_exitmap(struc
                vcpu, (u64 *)vcpu->arch.ioapic_handled_vectors);
  }
  
- void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
-                                           unsigned long start, unsigned long end)
- {
-       unsigned long apic_address;
-       /*
-        * The physical address of apic access page is stored in the VMCS.
-        * Update it when it becomes invalid.
-        */
-       apic_address = gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
-       if (start <= apic_address && apic_address < end)
-               kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
- }
  void kvm_arch_guest_memory_reclaimed(struct kvm *kvm)
  {
        static_call_cond(kvm_x86_guest_memory_reclaimed)(kvm);
@@@ -10772,9 -10744,6 +10758,9 @@@ static int vcpu_enter_guest(struct kvm_
                        exit_fastpath = EXIT_FASTPATH_EXIT_HANDLED;
                        break;
                }
 +
 +              /* Note, VM-Exits that go down the "slow" path are accounted below. */
 +              ++vcpu->stat.exits;
        }
  
        /*
diff --combined include/linux/kvm_host.h
index 84ba21c8093f8f30f0ea76ee1c517cbd60027026,cb66f4100be7487005e43edd2cccda352e76ca16..9d3ac7720da9f46a2de764bbd0508da040355f59
@@@ -849,7 -849,7 +849,7 @@@ static inline void kvm_vm_bugged(struc
  
  #define KVM_BUG(cond, kvm, fmt...)                            \
  ({                                                            \
 -      int __ret = (cond);                                     \
 +      bool __ret = !!(cond);                                  \
                                                                \
        if (WARN_ONCE(__ret && !(kvm)->vm_bugged, fmt))         \
                kvm_vm_bugged(kvm);                             \
  
  #define KVM_BUG_ON(cond, kvm)                                 \
  ({                                                            \
 -      int __ret = (cond);                                     \
 +      bool __ret = !!(cond);                                  \
                                                                \
        if (WARN_ON_ONCE(__ret && !(kvm)->vm_bugged))           \
                kvm_vm_bugged(kvm);                             \
@@@ -991,8 -991,6 +991,8 @@@ static inline bool kvm_memslots_empty(s
        return RB_EMPTY_ROOT(&slots->gfn_tree);
  }
  
 +bool kvm_are_all_memslots_empty(struct kvm *kvm);
 +
  #define kvm_for_each_memslot(memslot, bkt, slots)                           \
        hash_for_each(slots->id_hash, bkt, memslot, id_node[slots->node_idx]) \
                if (WARN_ON_ONCE(!memslot->npages)) {                         \
@@@ -2239,9 -2237,6 +2239,6 @@@ static inline long kvm_arch_vcpu_async_
  }
  #endif /* CONFIG_HAVE_KVM_VCPU_ASYNC_IOCTL */
  
- void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
-                                           unsigned long start, unsigned long end);
  void kvm_arch_guest_memory_reclaimed(struct kvm *kvm);
  
  #ifdef CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE
diff --combined virt/kvm/kvm_main.c
index ab8c8eb9fd624275fdb485bf1c06c9d1fbe7b260,f3c7c3c901615e2be1b929d5610891ce39e19e8d..b838c8f71349e078986d1fda6b5a9a68c96cc415
@@@ -154,11 -154,6 +154,6 @@@ static unsigned long long kvm_active_vm
  
  static DEFINE_PER_CPU(cpumask_var_t, cpu_kick_mask);
  
- __weak void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
-                                                  unsigned long start, unsigned long end)
- {
- }
  __weak void kvm_arch_guest_memory_reclaimed(struct kvm *kvm)
  {
  }
@@@ -521,18 -516,6 +516,6 @@@ static inline struct kvm *mmu_notifier_
        return container_of(mn, struct kvm, mmu_notifier);
  }
  
- static void kvm_mmu_notifier_invalidate_range(struct mmu_notifier *mn,
-                                             struct mm_struct *mm,
-                                             unsigned long start, unsigned long end)
- {
-       struct kvm *kvm = mmu_notifier_to_kvm(mn);
-       int idx;
-       idx = srcu_read_lock(&kvm->srcu);
-       kvm_arch_mmu_notifier_invalidate_range(kvm, start, end);
-       srcu_read_unlock(&kvm->srcu, idx);
- }
  typedef bool (*hva_handler_t)(struct kvm *kvm, struct kvm_gfn_range *range);
  
  typedef void (*on_lock_fn_t)(struct kvm *kvm, unsigned long start,
@@@ -686,24 -669,6 +669,24 @@@ static __always_inline int kvm_handle_h
  
        return __kvm_handle_hva_range(kvm, &range);
  }
 +
 +static bool kvm_change_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
 +{
 +      /*
 +       * Skipping invalid memslots is correct if and only change_pte() is
 +       * surrounded by invalidate_range_{start,end}(), which is currently
 +       * guaranteed by the primary MMU.  If that ever changes, KVM needs to
 +       * unmap the memslot instead of skipping the memslot to ensure that KVM
 +       * doesn't hold references to the old PFN.
 +       */
 +      WARN_ON_ONCE(!READ_ONCE(kvm->mn_active_invalidate_count));
 +
 +      if (range->slot->flags & KVM_MEMSLOT_INVALID)
 +              return false;
 +
 +      return kvm_set_spte_gfn(kvm, range);
 +}
 +
  static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,
                                        struct mm_struct *mm,
                                        unsigned long address,
        if (!READ_ONCE(kvm->mmu_invalidate_in_progress))
                return;
  
 -      kvm_handle_hva_range(mn, address, address + 1, pte, kvm_set_spte_gfn);
 +      kvm_handle_hva_range(mn, address, address + 1, pte, kvm_change_spte_gfn);
  }
  
  void kvm_mmu_invalidate_begin(struct kvm *kvm, unsigned long start,
@@@ -910,7 -875,6 +893,6 @@@ static void kvm_mmu_notifier_release(st
  }
  
  static const struct mmu_notifier_ops kvm_mmu_notifier_ops = {
-       .invalidate_range       = kvm_mmu_notifier_invalidate_range,
        .invalidate_range_start = kvm_mmu_notifier_invalidate_range_start,
        .invalidate_range_end   = kvm_mmu_notifier_invalidate_range_end,
        .clear_flush_young      = kvm_mmu_notifier_clear_flush_young,
@@@ -3888,10 -3852,7 +3870,10 @@@ static int create_vcpu_fd(struct kvm_vc
  static int vcpu_get_pid(void *data, u64 *val)
  {
        struct kvm_vcpu *vcpu = data;
 -      *val = pid_nr(rcu_access_pointer(vcpu->pid));
 +
 +      rcu_read_lock();
 +      *val = pid_nr(rcu_dereference(vcpu->pid));
 +      rcu_read_unlock();
        return 0;
  }
  
@@@ -3993,7 -3954,7 +3975,7 @@@ static int kvm_vm_ioctl_create_vcpu(str
        if (r < 0)
                goto kvm_put_xa_release;
  
 -      if (KVM_BUG_ON(!!xa_store(&kvm->vcpu_array, vcpu->vcpu_idx, vcpu, 0), kvm)) {
 +      if (KVM_BUG_ON(xa_store(&kvm->vcpu_array, vcpu->vcpu_idx, vcpu, 0), kvm)) {
                r = -EINVAL;
                goto kvm_put_xa_release;
        }
@@@ -4623,7 -4584,7 +4605,7 @@@ int __attribute__((weak)) kvm_vm_ioctl_
        return -EINVAL;
  }
  
 -static bool kvm_are_all_memslots_empty(struct kvm *kvm)
 +bool kvm_are_all_memslots_empty(struct kvm *kvm)
  {
        int i;
  
  
        return true;
  }
 +EXPORT_SYMBOL_GPL(kvm_are_all_memslots_empty);
  
  static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm,
                                           struct kvm_enable_cap *cap)
@@@ -5316,12 -5276,6 +5298,12 @@@ static void hardware_disable_all(void
  }
  #endif /* CONFIG_KVM_GENERIC_HARDWARE_ENABLING */
  
 +static void kvm_iodevice_destructor(struct kvm_io_device *dev)
 +{
 +      if (dev->ops->destructor)
 +              dev->ops->destructor(dev);
 +}
 +
  static void kvm_io_bus_destroy(struct kvm_io_bus *bus)
  {
        int i;
@@@ -5545,7 -5499,7 +5527,7 @@@ int kvm_io_bus_register_dev(struct kvm 
  int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
                              struct kvm_io_device *dev)
  {
 -      int i, j;
 +      int i;
        struct kvm_io_bus *new_bus, *bus;
  
        lockdep_assert_held(&kvm->slots_lock);
        rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
        synchronize_srcu_expedited(&kvm->srcu);
  
 -      /* Destroy the old bus _after_ installing the (null) bus. */
 +      /*
 +       * If NULL bus is installed, destroy the old bus, including all the
 +       * attached devices. Otherwise, destroy the caller's device only.
 +       */
        if (!new_bus) {
                pr_err("kvm: failed to shrink bus, removing it completely\n");
 -              for (j = 0; j < bus->dev_count; j++) {
 -                      if (j == i)
 -                              continue;
 -                      kvm_iodevice_destructor(bus->range[j].dev);
 -              }
 +              kvm_io_bus_destroy(bus);
 +              return -ENOMEM;
        }
  
 +      kvm_iodevice_destructor(dev);
        kfree(bus);
 -      return new_bus ? 0 : -ENOMEM;
 +      return 0;
  }
  
  struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx,
This page took 0.139988 seconds and 4 git commands to generate.