Merge tag 'kvm-x86-selftests-6.4' of https://github.com/kvm-x86/linux into HEAD

author Paolo Bonzini <[email protected]>

Wed, 26 Apr 2023 19:54:40 +0000 (15:54 -0400)

committer Paolo Bonzini <[email protected]>

Wed, 26 Apr 2023 19:56:01 +0000 (15:56 -0400)
author Paolo Bonzini <[email protected]>
Wed, 26 Apr 2023 19:54:40 +0000 (15:54 -0400)
committer Paolo Bonzini <[email protected]>
Wed, 26 Apr 2023 19:56:01 +0000 (15:56 -0400)
diff --combined arch/x86/kvm/cpuid.c

index b944492faefa332bf7d4ad865c3ac12da452eac0,1daf0df6a9b8a697bfd35e23865883835eb56556..123bf8b97a4b21098a8271320f711cecf2eae5ec
--- 1/arch/x86/kvm/cpuid.c
--- 2/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@@ -60,6 -60,12 +60,6 @@@ u32 xstate_required_size(u64 xstate_bv
         return ret;
   }
   
- -/*
- - * This one is tied to SSB in the user API, and not
- - * visible in /proc/cpuinfo.
- - */
- -#define KVM_X86_FEATURE_AMD_PSFD      (13*32+28) /* Predictive Store Forwarding Disable */
- -
   #define F feature_bit
   
   /* Scattered Flag - For features that are scattered by cpufeatures.h. */
@@@ -260,7 -266,7 +260,7 @@@ static void __kvm_update_cpuid_runtime(
                 /* Update OSXSAVE bit */
                 if (boot_cpu_has(X86_FEATURE_XSAVE))
                         cpuid_entry_change(best, X86_FEATURE_OSXSAVE,
- -                                 kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE));
+ +                                         kvm_is_cr4_bit_set(vcpu, X86_CR4_OSXSAVE));
   
                 cpuid_entry_change(best, X86_FEATURE_APIC,
                            vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE);
@@@ -269,7 -275,7 +269,7 @@@
         best = cpuid_entry2_find(entries, nent, 7, 0);
         if (best && boot_cpu_has(X86_FEATURE_PKU) && best->function == 0x7)
                 cpuid_entry_change(best, X86_FEATURE_OSPKE,
- -                                 kvm_read_cr4_bits(vcpu, X86_CR4_PKE));
+ +                                 kvm_is_cr4_bit_set(vcpu, X86_CR4_PKE));
   
         best = cpuid_entry2_find(entries, nent, 0xD, 0);
         if (best)
@@@ -414,7 -420,7 +414,7 @@@ static int kvm_set_cpuid(struct kvm_vcp
          * KVM_SET_CPUID{,2} again. To support this legacy behavior, check
          * whether the supplied CPUID data is equal to what's already set.
          */
- -      if (vcpu->arch.last_vmentry_cpu != -1) {
+ +      if (kvm_vcpu_has_run(vcpu)) {
                 r = kvm_cpuid_check_equal(vcpu, e2, nent);
                 if (r)
                         return r;
@@@ -709,7 -715,7 +709,7 @@@ void kvm_set_cpu_caps(void
                 F(CLZERO) | F(XSAVEERPTR) |
                 F(WBNOINVD) | F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) |
                 F(AMD_SSB_NO) | F(AMD_STIBP) | F(AMD_STIBP_ALWAYS_ON) |
- -              __feature_bit(KVM_X86_FEATURE_AMD_PSFD)
+ +              F(AMD_PSFD)
         );
   
         /*
@@@ -996,7 -1002,7 +996,7 @@@ static inline int __do_cpuid_func(struc
                 entry->eax = entry->ebx = entry->ecx = 0;
                 break;
         case 0xd: {
-               u64 permitted_xcr0 = kvm_caps.supported_xcr0 & xstate_get_guest_group_perm();
+               u64 permitted_xcr0 = kvm_get_filtered_xcr0();
                 u64 permitted_xss = kvm_caps.supported_xss;
   
                 entry->eax &= permitted_xcr0;
diff --combined arch/x86/kvm/x86.c

index 095a41c6f346714cb4e2266feeb560211ab889aa,181f155933cfcf39f19f1401bfd2ddf6bbd025a2..6a41bdb7f599805168762336c13f37c18cda8333
--- 1/arch/x86/kvm/x86.c
--- 2/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@@ -194,7 -194,7 +194,7 @@@ bool __read_mostly eager_page_split = t
   module_param(eager_page_split, bool, 0644);
   
   /* Enable/disable SMT_RSB bug mitigation */
- -bool __read_mostly mitigate_smt_rsb;
+ +static bool __read_mostly mitigate_smt_rsb;
   module_param(mitigate_smt_rsb, bool, 0444);
   
   /*
@@@ -802,8 -802,8 +802,8 @@@ void kvm_inject_emulated_page_fault(str
          */
         if ((fault->error_code & PFERR_PRESENT_MASK) &&
             !(fault->error_code & PFERR_RSVD_MASK))
- -              kvm_mmu_invalidate_gva(vcpu, fault_mmu, fault->address,
- -                                     fault_mmu->root.hpa);
+ +              kvm_mmu_invalidate_addr(vcpu, fault_mmu, fault->address,
+ +                                      KVM_MMU_ROOT_CURRENT);
   
         fault_mmu->inject_page_fault(vcpu, fault);
   }
@@@ -841,7 -841,7 +841,7 @@@ bool kvm_require_cpl(struct kvm_vcpu *v
   
   bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr)
   {
- -      if ((dr != 4 && dr != 5) || !kvm_read_cr4_bits(vcpu, X86_CR4_DE))
+ +      if ((dr != 4 && dr != 5) || !kvm_is_cr4_bit_set(vcpu, X86_CR4_DE))
                 return true;
   
         kvm_queue_exception(vcpu, UD_VECTOR);
@@@ -906,24 -906,6 +906,24 @@@ EXPORT_SYMBOL_GPL(load_pdptrs)
   
   void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned long cr0)
   {
+ +      /*
+ +       * CR0.WP is incorporated into the MMU role, but only for non-nested,
+ +       * indirect shadow MMUs.  If paging is disabled, no updates are needed
+ +       * as there are no permission bits to emulate.  If TDP is enabled, the
+ +       * MMU's metadata needs to be updated, e.g. so that emulating guest
+ +       * translations does the right thing, but there's no need to unload the
+ +       * root as CR0.WP doesn't affect SPTEs.
+ +       */
+ +      if ((cr0 ^ old_cr0) == X86_CR0_WP) {
+ +              if (!(cr0 & X86_CR0_PG))
+ +                      return;
+ +
+ +              if (tdp_enabled) {
+ +                      kvm_init_mmu(vcpu);
+ +                      return;
+ +              }
+ +      }
+ +
         if ((cr0 ^ old_cr0) & X86_CR0_PG) {
                 kvm_clear_async_pf_completion_queue(vcpu);
                 kvm_async_pf_hash_reset(vcpu);
@@@ -983,7 -965,7 +983,7 @@@ int kvm_set_cr0(struct kvm_vcpu *vcpu, 
                 return 1;
   
         if (!(cr0 & X86_CR0_PG) &&
- -          (is_64_bit_mode(vcpu) || kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE)))
+ +          (is_64_bit_mode(vcpu) || kvm_is_cr4_bit_set(vcpu, X86_CR4_PCIDE)))
                 return 1;
   
         static_call(kvm_x86_set_cr0)(vcpu, cr0);
@@@ -1005,7 -987,7 +1005,7 @@@ void kvm_load_guest_xsave_state(struct 
         if (vcpu->arch.guest_state_protected)
                 return;
   
- -      if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) {
+ +      if (kvm_is_cr4_bit_set(vcpu, X86_CR4_OSXSAVE)) {
   
                 if (vcpu->arch.xcr0 != host_xcr0)
                         xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
@@@ -1019,7 -1001,7 +1019,7 @@@
         if (static_cpu_has(X86_FEATURE_PKU) &&
             vcpu->arch.pkru != vcpu->arch.host_pkru &&
             ((vcpu->arch.xcr0 & XFEATURE_MASK_PKRU) ||
- -           kvm_read_cr4_bits(vcpu, X86_CR4_PKE)))
+ +           kvm_is_cr4_bit_set(vcpu, X86_CR4_PKE)))
                 write_pkru(vcpu->arch.pkru);
   #endif /* CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS */
   }
@@@ -1033,14 -1015,14 +1033,14 @@@ void kvm_load_host_xsave_state(struct k
   #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
         if (static_cpu_has(X86_FEATURE_PKU) &&
             ((vcpu->arch.xcr0 & XFEATURE_MASK_PKRU) ||
- -           kvm_read_cr4_bits(vcpu, X86_CR4_PKE))) {
+ +           kvm_is_cr4_bit_set(vcpu, X86_CR4_PKE))) {
                 vcpu->arch.pkru = rdpkru();
                 if (vcpu->arch.pkru != vcpu->arch.host_pkru)
                         write_pkru(vcpu->arch.host_pkru);
         }
   #endif /* CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS */
   
- -      if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) {
+ +      if (kvm_is_cr4_bit_set(vcpu, X86_CR4_OSXSAVE)) {
   
                 if (vcpu->arch.xcr0 != host_xcr0)
                         xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0);
@@@ -1196,6 -1178,9 +1196,6 @@@ int kvm_set_cr4(struct kvm_vcpu *vcpu, 
                 return 1;
   
         if ((cr4 & X86_CR4_PCIDE) && !(old_cr4 & X86_CR4_PCIDE)) {
- -              if (!guest_cpuid_has(vcpu, X86_FEATURE_PCID))
- -                      return 1;
- -
                 /* PCID can not be enabled when cr3[11:0]!=000H or EFER.LMA=0 */
                 if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_MASK) || !is_long_mode(vcpu))
                         return 1;
@@@ -1242,7 -1227,7 +1242,7 @@@ static void kvm_invalidate_pcid(struct 
          * PCIDs for them are also 0, because MOV to CR3 always flushes the TLB
          * with PCIDE=0.
          */
- -      if (!kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE))
+ +      if (!kvm_is_cr4_bit_set(vcpu, X86_CR4_PCIDE))
                 return;
   
         for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
@@@ -1257,7 -1242,9 +1257,7 @@@ int kvm_set_cr3(struct kvm_vcpu *vcpu, 
         bool skip_tlb_flush = false;
         unsigned long pcid = 0;
   #ifdef CONFIG_X86_64
- -      bool pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE);
- -
- -      if (pcid_enabled) {
+ +      if (kvm_is_cr4_bit_set(vcpu, X86_CR4_PCIDE)) {
                 skip_tlb_flush = cr3 & X86_CR3_PCID_NOFLUSH;
                 cr3 &= ~X86_CR3_PCID_NOFLUSH;
                 pcid = cr3 & X86_CR3_PCID_MASK;
@@@ -1556,40 -1543,38 +1556,40 @@@ static u32 emulated_msrs[ARRAY_SIZE(emu
   static unsigned num_emulated_msrs;
   
   /*
- - * List of msr numbers which are used to expose MSR-based features that
- - * can be used by a hypervisor to validate requested CPU features.
+ + * List of MSRs that control the existence of MSR-based features, i.e. MSRs
+ + * that are effectively CPUID leafs.  VMX MSRs are also included in the set of
+ + * feature MSRs, but are handled separately to allow expedited lookups.
    */
- -static const u32 msr_based_features_all[] = {
- -      MSR_IA32_VMX_BASIC,
- -      MSR_IA32_VMX_TRUE_PINBASED_CTLS,
- -      MSR_IA32_VMX_PINBASED_CTLS,
- -      MSR_IA32_VMX_TRUE_PROCBASED_CTLS,
- -      MSR_IA32_VMX_PROCBASED_CTLS,
- -      MSR_IA32_VMX_TRUE_EXIT_CTLS,
- -      MSR_IA32_VMX_EXIT_CTLS,
- -      MSR_IA32_VMX_TRUE_ENTRY_CTLS,
- -      MSR_IA32_VMX_ENTRY_CTLS,
- -      MSR_IA32_VMX_MISC,
- -      MSR_IA32_VMX_CR0_FIXED0,
- -      MSR_IA32_VMX_CR0_FIXED1,
- -      MSR_IA32_VMX_CR4_FIXED0,
- -      MSR_IA32_VMX_CR4_FIXED1,
- -      MSR_IA32_VMX_VMCS_ENUM,
- -      MSR_IA32_VMX_PROCBASED_CTLS2,
- -      MSR_IA32_VMX_EPT_VPID_CAP,
- -      MSR_IA32_VMX_VMFUNC,
- -
+ +static const u32 msr_based_features_all_except_vmx[] = {
         MSR_AMD64_DE_CFG,
         MSR_IA32_UCODE_REV,
         MSR_IA32_ARCH_CAPABILITIES,
         MSR_IA32_PERF_CAPABILITIES,
   };
   
- -static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all)];
+ +static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all_except_vmx) +
+ +                            (KVM_LAST_EMULATED_VMX_MSR - KVM_FIRST_EMULATED_VMX_MSR + 1)];
   static unsigned int num_msr_based_features;
   
+ +/*
+ + * All feature MSRs except uCode revID, which tracks the currently loaded uCode
+ + * patch, are immutable once the vCPU model is defined.
+ + */
+ +static bool kvm_is_immutable_feature_msr(u32 msr)
+ +{
+ +      int i;
+ +
+ +      if (msr >= KVM_FIRST_EMULATED_VMX_MSR && msr <= KVM_LAST_EMULATED_VMX_MSR)
+ +              return true;
+ +
+ +      for (i = 0; i < ARRAY_SIZE(msr_based_features_all_except_vmx); i++) {
+ +              if (msr == msr_based_features_all_except_vmx[i])
+ +                      return msr != MSR_IA32_UCODE_REV;
+ +      }
+ +
+ +      return false;
+ +}
+ +
   /*
    * Some IA32_ARCH_CAPABILITIES bits have dependencies on MSRs that KVM
    * does not yet virtualize. These include:
@@@ -2207,22 -2192,6 +2207,22 @@@ static int do_get_msr(struct kvm_vcpu *
   
   static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
   {
+ +      u64 val;
+ +
+ +      /*
+ +       * Disallow writes to immutable feature MSRs after KVM_RUN.  KVM does
+ +       * not support modifying the guest vCPU model on the fly, e.g. changing
+ +       * the nVMX capabilities while L2 is running is nonsensical.  Ignore
+ +       * writes of the same value, e.g. to allow userspace to blindly stuff
+ +       * all MSRs when emulating RESET.
+ +       */
+ +      if (kvm_vcpu_has_run(vcpu) && kvm_is_immutable_feature_msr(index)) {
+ +              if (do_get_msr(vcpu, index, &val) || *data != val)
+ +                      return -EINVAL;
+ +
+ +              return 0;
+ +      }
+ +
         return kvm_set_msr_ignored_check(vcpu, index, *data, true);
   }
   
@@@ -3645,40 -3614,9 +3645,40 @@@ int kvm_set_msr_common(struct kvm_vcpu 
                 if (data & ~kvm_caps.supported_perf_cap)
                         return 1;
   
+ +              /*
+ +               * Note, this is not just a performance optimization!  KVM
+ +               * disallows changing feature MSRs after the vCPU has run; PMU
+ +               * refresh will bug the VM if called after the vCPU has run.
+ +               */
+ +              if (vcpu->arch.perf_capabilities == data)
+ +                      break;
+ +
                 vcpu->arch.perf_capabilities = data;
                 kvm_pmu_refresh(vcpu);
- -              return 0;
+ +              break;
+ +      case MSR_IA32_PRED_CMD:
+ +              if (!msr_info->host_initiated && !guest_has_pred_cmd_msr(vcpu))
+ +                      return 1;
+ +
+ +              if (!boot_cpu_has(X86_FEATURE_IBPB) || (data & ~PRED_CMD_IBPB))
+ +                      return 1;
+ +              if (!data)
+ +                      break;
+ +
+ +              wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
+ +              break;
+ +      case MSR_IA32_FLUSH_CMD:
+ +              if (!msr_info->host_initiated &&
+ +                  !guest_cpuid_has(vcpu, X86_FEATURE_FLUSH_L1D))
+ +                      return 1;
+ +
+ +              if (!boot_cpu_has(X86_FEATURE_FLUSH_L1D) || (data & ~L1D_FLUSH))
+ +                      return 1;
+ +              if (!data)
+ +                      break;
+ +
+ +              wrmsrl(MSR_IA32_FLUSH_CMD, L1D_FLUSH);
+ +              break;
         case MSR_EFER:
                 return set_efer(vcpu, msr_info);
         case MSR_K7_HWCR:
@@@ -4593,9 -4531,7 +4593,7 @@@ int kvm_vm_ioctl_check_extension(struc
                         r = 0;
                 break;
         case KVM_CAP_XSAVE2: {
-               u64 guest_perm = xstate_get_guest_group_perm();
- 
-               r = xstate_required_size(kvm_caps.supported_xcr0 & guest_perm, false);
+               r = xstate_required_size(kvm_get_filtered_xcr0(), false);
                 if (r < sizeof(struct kvm_xsave))
                         r = sizeof(struct kvm_xsave);
                 break;
@@@ -5095,7 -5031,7 +5093,7 @@@ static int kvm_vcpu_ioctl_x86_set_mce(s
                 return 0;
         if (mce->status & MCI_STATUS_UC) {
                 if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) ||
- -                  !kvm_read_cr4_bits(vcpu, X86_CR4_MCE)) {
+ +                  !kvm_is_cr4_bit_set(vcpu, X86_CR4_MCE)) {
                         kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
                         return 0;
                 }
@@@ -7071,18 -7007,6 +7069,18 @@@ out
         return r;
   }
   
+ +static void kvm_probe_feature_msr(u32 msr_index)
+ +{
+ +      struct kvm_msr_entry msr = {
+ +              .index = msr_index,
+ +      };
+ +
+ +      if (kvm_get_msr_feature(&msr))
+ +              return;
+ +
+ +      msr_based_features[num_msr_based_features++] = msr_index;
+ +}
+ +
   static void kvm_probe_msr_to_save(u32 msr_index)
   {
         u32 dummy[2];
@@@ -7158,7 -7082,7 +7156,7 @@@
         msrs_to_save[num_msrs_to_save++] = msr_index;
   }
   
- -static void kvm_init_msr_list(void)
+ +static void kvm_init_msr_lists(void)
   {
         unsigned i;
   
@@@ -7184,11 -7108,15 +7182,11 @@@
                 emulated_msrs[num_emulated_msrs++] = emulated_msrs_all[i];
         }
   
- -      for (i = 0; i < ARRAY_SIZE(msr_based_features_all); i++) {
- -              struct kvm_msr_entry msr;
+ +      for (i = KVM_FIRST_EMULATED_VMX_MSR; i <= KVM_LAST_EMULATED_VMX_MSR; i++)
+ +              kvm_probe_feature_msr(i);
   
- -              msr.index = msr_based_features_all[i];
- -              if (kvm_get_msr_feature(&msr))
- -                      continue;
- -
- -              msr_based_features[num_msr_based_features++] = msr_based_features_all[i];
- -      }
+ +      for (i = 0; i < ARRAY_SIZE(msr_based_features_all_except_vmx); i++)
+ +              kvm_probe_feature_msr(msr_based_features_all_except_vmx[i]);
   }
   
   static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
@@@ -9522,7 -9450,7 +9520,7 @@@ static int __kvm_x86_vendor_init(struc
                 kvm_caps.max_guest_tsc_khz = max;
         }
         kvm_caps.default_tsc_scaling_ratio = 1ULL << kvm_caps.tsc_scaling_ratio_frac_bits;
- -      kvm_init_msr_list();
+ +      kvm_init_msr_lists();
         return 0;
   
   out_unwind_ops:
@@@ -9853,11 -9781,7 +9851,11 @@@ int kvm_emulate_hypercall(struct kvm_vc
                 vcpu->run->hypercall.args[0]  = gpa;
                 vcpu->run->hypercall.args[1]  = npages;
                 vcpu->run->hypercall.args[2]  = attrs;
- -              vcpu->run->hypercall.longmode = op_64_bit;
+ +              vcpu->run->hypercall.flags    = 0;
+ +              if (op_64_bit)
+ +                      vcpu->run->hypercall.flags |= KVM_EXIT_HYPERCALL_LONG_MODE;
+ +
+ +              WARN_ON_ONCE(vcpu->run->hypercall.flags & KVM_EXIT_HYPERCALL_MBZ);
                 vcpu->arch.complete_userspace_io = complete_hypercall_exit;
                 return 0;
         }
@@@ -13310,7 -13234,7 +13308,7 @@@ int kvm_handle_invpcid(struct kvm_vcpu 
                 return 1;
         }
   
- -      pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE);
+ +      pcid_enabled = kvm_is_cr4_bit_set(vcpu, X86_CR4_PCIDE);
   
         switch (type) {
         case INVPCID_TYPE_INDIV_ADDR:
diff --combined arch/x86/kvm/x86.h

index fbef05c0bdeb5dab69a18ea114da177f7c851e1d,bca706406189c10d7eefcab793b19b97c142a5f2..c544602d07a359fba5774dbd192335c50044072c
--- 1/arch/x86/kvm/x86.h
--- 2/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@@ -3,6 -3,7 +3,7 @@@
   #define ARCH_X86_KVM_X86_H
   
   #include <linux/kvm_host.h>
+ #include <asm/fpu/xstate.h>
   #include <asm/mce.h>
   #include <asm/pvclock.h>
   #include "kvm_cache_regs.h"
@@@ -40,14 -41,6 +41,14 @@@ void kvm_spurious_fault(void)
         failed;                                                         \
   })
   
+ +/*
+ + * The first...last VMX feature MSRs that are emulated by KVM.  This may or may
+ + * not cover all known VMX MSRs, as KVM doesn't emulate an MSR until there's an
+ + * associated feature that KVM supports for nested virtualization.
+ + */
+ +#define KVM_FIRST_EMULATED_VMX_MSR    MSR_IA32_VMX_BASIC
+ +#define KVM_LAST_EMULATED_VMX_MSR     MSR_IA32_VMX_VMFUNC
+ +
   #define KVM_DEFAULT_PLE_GAP           128
   #define KVM_VMX_DEFAULT_PLE_WINDOW    4096
   #define KVM_DEFAULT_PLE_WINDOW_GROW   2
@@@ -91,11 -84,6 +92,11 @@@ static inline unsigned int __shrink_ple
   void kvm_service_local_tlb_flush_requests(struct kvm_vcpu *vcpu);
   int kvm_check_nested_events(struct kvm_vcpu *vcpu);
   
+ +static inline bool kvm_vcpu_has_run(struct kvm_vcpu *vcpu)
+ +{
+ +      return vcpu->arch.last_vmentry_cpu != -1;
+ +}
+ +
   static inline bool kvm_is_exception_pending(struct kvm_vcpu *vcpu)
   {
         return vcpu->arch.exception.pending ||
@@@ -136,15 -124,15 +137,15 @@@ static inline bool kvm_exception_is_sof
   
   static inline bool is_protmode(struct kvm_vcpu *vcpu)
   {
- -      return kvm_read_cr0_bits(vcpu, X86_CR0_PE);
+ +      return kvm_is_cr0_bit_set(vcpu, X86_CR0_PE);
   }
   
- -static inline int is_long_mode(struct kvm_vcpu *vcpu)
+ +static inline bool is_long_mode(struct kvm_vcpu *vcpu)
   {
   #ifdef CONFIG_X86_64
- -      return vcpu->arch.efer & EFER_LMA;
+ +      return !!(vcpu->arch.efer & EFER_LMA);
   #else
- -      return 0;
+ +      return false;
   #endif
   }
   
@@@ -184,19 -172,19 +185,19 @@@ static inline bool mmu_is_nested(struc
         return vcpu->arch.walk_mmu == &vcpu->arch.nested_mmu;
   }
   
- -static inline int is_pae(struct kvm_vcpu *vcpu)
+ +static inline bool is_pae(struct kvm_vcpu *vcpu)
   {
- -      return kvm_read_cr4_bits(vcpu, X86_CR4_PAE);
+ +      return kvm_is_cr4_bit_set(vcpu, X86_CR4_PAE);
   }
   
- -static inline int is_pse(struct kvm_vcpu *vcpu)
+ +static inline bool is_pse(struct kvm_vcpu *vcpu)
   {
- -      return kvm_read_cr4_bits(vcpu, X86_CR4_PSE);
+ +      return kvm_is_cr4_bit_set(vcpu, X86_CR4_PSE);
   }
   
- -static inline int is_paging(struct kvm_vcpu *vcpu)
+ +static inline bool is_paging(struct kvm_vcpu *vcpu)
   {
- -      return likely(kvm_read_cr0_bits(vcpu, X86_CR0_PG));
+ +      return likely(kvm_is_cr0_bit_set(vcpu, X86_CR0_PG));
   }
   
   static inline bool is_pae_paging(struct kvm_vcpu *vcpu)
@@@ -206,7 -194,7 +207,7 @@@
   
   static inline u8 vcpu_virt_addr_bits(struct kvm_vcpu *vcpu)
   {
- -      return kvm_read_cr4_bits(vcpu, X86_CR4_LA57) ? 57 : 48;
+ +      return kvm_is_cr4_bit_set(vcpu, X86_CR4_LA57) ? 57 : 48;
   }
   
   static inline bool is_noncanonical_address(u64 la, struct kvm_vcpu *vcpu)
@@@ -328,6 -316,34 +329,34 @@@ extern struct kvm_caps kvm_caps
   
   extern bool enable_pmu;
   
+ /*
+  * Get a filtered version of KVM's supported XCR0 that strips out dynamic
+  * features for which the current process doesn't (yet) have permission to use.
+  * This is intended to be used only when enumerating support to userspace,
+  * e.g. in KVM_GET_SUPPORTED_CPUID and KVM_CAP_XSAVE2, it does NOT need to be
+  * used to check/restrict guest behavior as KVM rejects KVM_SET_CPUID{2} if
+  * userspace attempts to enable unpermitted features.
+  */
+ static inline u64 kvm_get_filtered_xcr0(void)
+ {
+       u64 permitted_xcr0 = kvm_caps.supported_xcr0;
+ 
+       BUILD_BUG_ON(XFEATURE_MASK_USER_DYNAMIC != XFEATURE_MASK_XTILE_DATA);
+ 
+       if (permitted_xcr0 & XFEATURE_MASK_USER_DYNAMIC) {
+               permitted_xcr0 &= xstate_get_guest_group_perm();
+ 
+               /*
+                * Treat XTILE_CFG as unsupported if the current process isn't
+                * allowed to use XTILE_DATA, as attempting to set XTILE_CFG in
+                * XCR0 without setting XTILE_DATA is architecturally illegal.
+                */
+               if (!(permitted_xcr0 & XFEATURE_MASK_XTILE_DATA))
+                       permitted_xcr0 &= ~XFEATURE_MASK_XTILE_CFG;
+       }
+       return permitted_xcr0;
+ }
+ 
   static inline bool kvm_mpx_supported(void)
   {
         return (kvm_caps.supported_xcr0 & (XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR))
diff --combined tools/testing/selftests/kvm/Makefile

index d66a0642cffd8acf1a1c1b5cf33f3d1ff1a62b8b,18cadc6697983dd56e26283d1a1cda17aea41762..7a5ff646e7e79efa3a83fc66f54456a761bd1c42
--- 1/tools/testing/selftests/kvm/Makefile
--- 2/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@@ -105,6 -105,7 +105,7 @@@ TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc
   TEST_GEN_PROGS_x86_64 += x86_64/vmx_nested_tsc_scaling_test
   TEST_GEN_PROGS_x86_64 += x86_64/xapic_ipi_test
   TEST_GEN_PROGS_x86_64 += x86_64/xapic_state_test
+ TEST_GEN_PROGS_x86_64 += x86_64/xcr0_cpuid_test
   TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test
   TEST_GEN_PROGS_x86_64 += x86_64/debug_regs
   TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test
@@@ -141,7 -142,6 +142,7 @@@ TEST_GEN_PROGS_aarch64 += aarch64/get-r
   TEST_GEN_PROGS_aarch64 += aarch64/hypercalls
   TEST_GEN_PROGS_aarch64 += aarch64/page_fault_test
   TEST_GEN_PROGS_aarch64 += aarch64/psci_test
+ +TEST_GEN_PROGS_aarch64 += aarch64/smccc_filter
   TEST_GEN_PROGS_aarch64 += aarch64/vcpu_width_config
   TEST_GEN_PROGS_aarch64 += aarch64/vgic_init
   TEST_GEN_PROGS_aarch64 += aarch64/vgic_irq
diff --combined tools/testing/selftests/kvm/include/x86_64/processor.h

index e1d65d933310d5021a875fea098d3b2c485b12e0,70c5469e4023659083e1b584714074533bdbd003..aa434c8f19c563a5709bb75328c608fae7754304
--- 1/tools/testing/selftests/kvm/include/x86_64/processor.h
--- 2/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@@ -48,6 -48,35 +48,35 @@@ extern bool host_cpu_is_amd
   #define X86_CR4_SMAP          (1ul << 21)
   #define X86_CR4_PKE           (1ul << 22)
   
+ struct xstate_header {
+       u64                             xstate_bv;
+       u64                             xcomp_bv;
+       u64                             reserved[6];
+ } __attribute__((packed));
+ 
+ struct xstate {
+       u8                              i387[512];
+       struct xstate_header            header;
+       u8                              extended_state_area[0];
+ } __attribute__ ((packed, aligned (64)));
+ 
+ #define XFEATURE_MASK_FP              BIT_ULL(0)
+ #define XFEATURE_MASK_SSE             BIT_ULL(1)
+ #define XFEATURE_MASK_YMM             BIT_ULL(2)
+ #define XFEATURE_MASK_BNDREGS         BIT_ULL(3)
+ #define XFEATURE_MASK_BNDCSR          BIT_ULL(4)
+ #define XFEATURE_MASK_OPMASK          BIT_ULL(5)
+ #define XFEATURE_MASK_ZMM_Hi256               BIT_ULL(6)
+ #define XFEATURE_MASK_Hi16_ZMM                BIT_ULL(7)
+ #define XFEATURE_MASK_XTILE_CFG               BIT_ULL(17)
+ #define XFEATURE_MASK_XTILE_DATA      BIT_ULL(18)
+ 
+ #define XFEATURE_MASK_AVX512          (XFEATURE_MASK_OPMASK | \
+                                        XFEATURE_MASK_ZMM_Hi256 | \
+                                        XFEATURE_MASK_Hi16_ZMM)
+ #define XFEATURE_MASK_XTILE           (XFEATURE_MASK_XTILE_DATA | \
+                                        XFEATURE_MASK_XTILE_CFG)
+ 
   /* Note, these are ordered alphabetically to match kvm_cpuid_entry2.  Eww. */
   enum cpuid_output_regs {
         KVM_CPUID_EAX,
@@@ -131,6 -160,7 +160,7 @@@ struct kvm_x86_cpu_feature 
   #define       X86_FEATURE_XTILEDATA           KVM_X86_CPU_FEATURE(0xD, 0, EAX, 18)
   #define       X86_FEATURE_XSAVES              KVM_X86_CPU_FEATURE(0xD, 1, EAX, 3)
   #define       X86_FEATURE_XFD                 KVM_X86_CPU_FEATURE(0xD, 1, EAX, 4)
+ #define X86_FEATURE_XTILEDATA_XFD     KVM_X86_CPU_FEATURE(0xD, 18, ECX, 2)
   
   /*
    * Extended Leafs, a.k.a. AMD defined
@@@ -211,10 -241,14 +241,14 @@@ struct kvm_x86_cpu_property 
   #define X86_PROPERTY_PMU_NR_GP_COUNTERS               KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 8, 15)
   #define X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH        KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 24, 31)
   
+ #define X86_PROPERTY_SUPPORTED_XCR0_LO                KVM_X86_CPU_PROPERTY(0xd,  0, EAX,  0, 31)
   #define X86_PROPERTY_XSTATE_MAX_SIZE_XCR0     KVM_X86_CPU_PROPERTY(0xd,  0, EBX,  0, 31)
   #define X86_PROPERTY_XSTATE_MAX_SIZE          KVM_X86_CPU_PROPERTY(0xd,  0, ECX,  0, 31)
+ #define X86_PROPERTY_SUPPORTED_XCR0_HI                KVM_X86_CPU_PROPERTY(0xd,  0, EDX,  0, 31)
+ 
   #define X86_PROPERTY_XSTATE_TILE_SIZE         KVM_X86_CPU_PROPERTY(0xd, 18, EAX,  0, 31)
   #define X86_PROPERTY_XSTATE_TILE_OFFSET               KVM_X86_CPU_PROPERTY(0xd, 18, EBX,  0, 31)
+ #define X86_PROPERTY_AMX_MAX_PALETTE_TABLES   KVM_X86_CPU_PROPERTY(0x1d, 0, EAX,  0, 31)
   #define X86_PROPERTY_AMX_TOTAL_TILE_BYTES     KVM_X86_CPU_PROPERTY(0x1d, 1, EAX,  0, 15)
   #define X86_PROPERTY_AMX_BYTES_PER_TILE               KVM_X86_CPU_PROPERTY(0x1d, 1, EAX, 16, 31)
   #define X86_PROPERTY_AMX_BYTES_PER_ROW                KVM_X86_CPU_PROPERTY(0x1d, 1, EBX, 0,  15)
@@@ -496,6 -530,24 +530,24 @@@ static inline void set_cr4(uint64_t val
         __asm__ __volatile__("mov %0, %%cr4" : : "r" (val) : "memory");
   }
   
+ static inline u64 xgetbv(u32 index)
+ {
+       u32 eax, edx;
+ 
+       __asm__ __volatile__("xgetbv;"
+                    : "=a" (eax), "=d" (edx)
+                    : "c" (index));
+       return eax | ((u64)edx << 32);
+ }
+ 
+ static inline void xsetbv(u32 index, u64 value)
+ {
+       u32 eax = value;
+       u32 edx = value >> 32;
+ 
+       __asm__ __volatile__("xsetbv" :: "a" (eax), "d" (edx), "c" (index));
+ }
+ 
   static inline struct desc_ptr get_gdt(void)
   {
         struct desc_ptr gdt;
@@@ -632,6 -684,15 +684,15 @@@ static inline bool this_pmu_has(struct 
                !this_cpu_has(feature.anti_feature);
   }
   
+ static __always_inline uint64_t this_cpu_supported_xcr0(void)
+ {
+       if (!this_cpu_has_p(X86_PROPERTY_SUPPORTED_XCR0_LO))
+               return 0;
+ 
+       return this_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_LO) |
+              ((uint64_t)this_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_HI) << 32);
+ }
+ 
   typedef u32           __attribute__((vector_size(16))) sse128_t;
   #define __sse128_u    union { sse128_t vec; u64 as_u64[2]; u32 as_u32[4]; }
   #define sse128_lo(x)  ({ __sse128_u t; t.vec = x; t.as_u64[0]; })
@@@ -928,45 -989,14 +989,45 @@@ static inline void vcpu_clear_cpuid_fea
   uint64_t vcpu_get_msr(struct kvm_vcpu *vcpu, uint64_t msr_index);
   int _vcpu_set_msr(struct kvm_vcpu *vcpu, uint64_t msr_index, uint64_t msr_value);
   
- -static inline void vcpu_set_msr(struct kvm_vcpu *vcpu, uint64_t msr_index,
- -                              uint64_t msr_value)
- -{
- -      int r = _vcpu_set_msr(vcpu, msr_index, msr_value);
- -
- -      TEST_ASSERT(r == 1, KVM_IOCTL_ERROR(KVM_SET_MSRS, r));
- -}
+ +/*
+ + * Assert on an MSR access(es) and pretty print the MSR name when possible.
+ + * Note, the caller provides the stringified name so that the name of macro is
+ + * printed, not the value the macro resolves to (due to macro expansion).
+ + */
+ +#define TEST_ASSERT_MSR(cond, fmt, msr, str, args...)                         \
+ +do {                                                                          \
+ +      if (__builtin_constant_p(msr)) {                                        \
+ +              TEST_ASSERT(cond, fmt, str, args);                              \
+ +      } else if (!(cond)) {                                                   \
+ +              char buf[16];                                                   \
+ +                                                                              \
+ +              snprintf(buf, sizeof(buf), "MSR 0x%x", msr);                    \
+ +              TEST_ASSERT(cond, fmt, buf, args);                              \
+ +      }                                                                       \
+ +} while (0)
   
+ +/*
+ + * Returns true if KVM should return the last written value when reading an MSR
+ + * from userspace, e.g. the MSR isn't a command MSR, doesn't emulate state that
+ + * is changing, etc.  This is NOT an exhaustive list!  The intent is to filter
+ + * out MSRs that are not durable _and_ that a selftest wants to write.
+ + */
+ +static inline bool is_durable_msr(uint32_t msr)
+ +{
+ +      return msr != MSR_IA32_TSC;
+ +}
+ +
+ +#define vcpu_set_msr(vcpu, msr, val)                                                  \
+ +do {                                                                                  \
+ +      uint64_t r, v = val;                                                            \
+ +                                                                                      \
+ +      TEST_ASSERT_MSR(_vcpu_set_msr(vcpu, msr, v) == 1,                               \
+ +                      "KVM_SET_MSRS failed on %s, value = 0x%lx", msr, #msr, v);      \
+ +      if (!is_durable_msr(msr))                                                       \
+ +              break;                                                                  \
+ +      r = vcpu_get_msr(vcpu, msr);                                                    \
+ +      TEST_ASSERT_MSR(r == v, "Set %s to '0x%lx', got back '0x%lx'", msr, #msr, v, r);\
+ +} while (0)
   
   void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits);
   bool vm_is_unrestricted_guest(struct kvm_vm *vm);
@@@ -1086,6 -1116,14 +1147,14 @@@ static inline uint8_t wrmsr_safe(uint32
         return kvm_asm_safe("wrmsr", "a"(val & -1u), "d"(val >> 32), "c"(msr));
   }
   
+ static inline uint8_t xsetbv_safe(uint32_t index, uint64_t value)
+ {
+       u32 eax = value;
+       u32 edx = value >> 32;
+ 
+       return kvm_asm_safe("xsetbv", "a" (eax), "d" (edx), "c" (index));
+ }
+ 
   bool kvm_is_tdp_enabled(void);
   
   uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr,
@@@ -1097,10 -1135,10 +1166,10 @@@ uint64_t kvm_hypercall(uint64_t nr, uin
   uint64_t __xen_hypercall(uint64_t nr, uint64_t a0, void *a1);
   void xen_hypercall(uint64_t nr, uint64_t a0, void *a1);
   
- void __vm_xsave_require_permission(int bit, const char *name);
+ void __vm_xsave_require_permission(uint64_t xfeature, const char *name);
   
- #define vm_xsave_require_permission(perm)     \
-       __vm_xsave_require_permission(perm, #perm)
+ #define vm_xsave_require_permission(xfeature) \
+       __vm_xsave_require_permission(xfeature, #xfeature)
   
   enum pg_level {
         PG_LEVEL_NONE,
@@@ -1137,14 -1175,6 +1206,6 @@@ void virt_map_level(struct kvm_vm *vm, 
   #define X86_CR0_CD          (1UL<<30) /* Cache Disable */
   #define X86_CR0_PG          (1UL<<31) /* Paging */
   
- #define XSTATE_XTILE_CFG_BIT          17
- #define XSTATE_XTILE_DATA_BIT         18
- 
- #define XSTATE_XTILE_CFG_MASK         (1ULL << XSTATE_XTILE_CFG_BIT)
- #define XSTATE_XTILE_DATA_MASK                (1ULL << XSTATE_XTILE_DATA_BIT)
- #define XFEATURE_XTILE_MASK           (XSTATE_XTILE_CFG_MASK | \
-                                       XSTATE_XTILE_DATA_MASK)
- 
   #define PFERR_PRESENT_BIT 0
   #define PFERR_WRITE_BIT 1
   #define PFERR_USER_BIT 2
diff --combined tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c

index 8cec5c8aca8a9bd13ad571ec8eb8f980c3953be4,1f60dfae69e0c505a3b8b888a4b76204d6bdf6ab..40507ed9fe8a551f3d1ad17f11ff7f021696fbe9
--- 1/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
--- 2/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
+++ b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
@@@ -54,21 -54,6 +54,21 @@@
   
   #define AMD_ZEN_BR_RETIRED EVENT(0xc2, 0)
   
+ +
+ +/*
+ + * "Retired instructions", from Processor Programming Reference
+ + * (PPR) for AMD Family 17h Model 01h, Revision B1 Processors,
+ + * Preliminary Processor Programming Reference (PPR) for AMD Family
+ + * 17h Model 31h, Revision B0 Processors, and Preliminary Processor
+ + * Programming Reference (PPR) for AMD Family 19h Model 01h, Revision
+ + * B1 Processors Volume 1 of 2.
+ + *                      --- and ---
+ + * "Instructions retired", from the Intel SDM, volume 3,
+ + * "Pre-defined Architectural Performance Events."
+ + */
+ +
+ +#define INST_RETIRED EVENT(0xc0, 0)
+ +
   /*
    * This event list comprises Intel's eight architectural events plus
    * AMD's "retired branch instructions" for Zen[123] (and possibly
@@@ -76,7 -61,7 +76,7 @@@
    */
   static const uint64_t event_list[] = {
         EVENT(0x3c, 0),
- -      EVENT(0xc0, 0),
+ +      INST_RETIRED,
         EVENT(0x3c, 1),
         EVENT(0x2e, 0x4f),
         EVENT(0x2e, 0x41),
@@@ -86,21 -71,13 +86,21 @@@
         AMD_ZEN_BR_RETIRED,
   };
   
+ +struct {
+ +      uint64_t loads;
+ +      uint64_t stores;
+ +      uint64_t loads_stores;
+ +      uint64_t branches_retired;
+ +      uint64_t instructions_retired;
+ +} pmc_results;
+ +
   /*
    * If we encounter a #GP during the guest PMU sanity check, then the guest
    * PMU is not functional. Inform the hypervisor via GUEST_SYNC(0).
    */
   static void guest_gp_handler(struct ex_regs *regs)
   {
- -      GUEST_SYNC(0);
+ +      GUEST_SYNC(-EFAULT);
   }
   
   /*
@@@ -115,23 -92,12 +115,23 @@@ static void check_msr(uint32_t msr, uin
   
         wrmsr(msr, v);
         if (rdmsr(msr) != v)
- -              GUEST_SYNC(0);
+ +              GUEST_SYNC(-EIO);
   
         v ^= bits_to_flip;
         wrmsr(msr, v);
         if (rdmsr(msr) != v)
- -              GUEST_SYNC(0);
+ +              GUEST_SYNC(-EIO);
+ +}
+ +
+ +static void run_and_measure_loop(uint32_t msr_base)
+ +{
+ +      const uint64_t branches_retired = rdmsr(msr_base + 0);
+ +      const uint64_t insn_retired = rdmsr(msr_base + 1);
+ +
+ +      __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
+ +
+ +      pmc_results.branches_retired = rdmsr(msr_base + 0) - branches_retired;
+ +      pmc_results.instructions_retired = rdmsr(msr_base + 1) - insn_retired;
   }
   
   static void intel_guest_code(void)
@@@ -139,18 -105,19 +139,18 @@@
         check_msr(MSR_CORE_PERF_GLOBAL_CTRL, 1);
         check_msr(MSR_P6_EVNTSEL0, 0xffff);
         check_msr(MSR_IA32_PMC0, 0xffff);
- -      GUEST_SYNC(1);
+ +      GUEST_SYNC(0);
   
         for (;;) {
- -              uint64_t br0, br1;
- -
                 wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
                 wrmsr(MSR_P6_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE |
                       ARCH_PERFMON_EVENTSEL_OS | INTEL_BR_RETIRED);
- -              wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 1);
- -              br0 = rdmsr(MSR_IA32_PMC0);
- -              __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
- -              br1 = rdmsr(MSR_IA32_PMC0);
- -              GUEST_SYNC(br1 - br0);
+ +              wrmsr(MSR_P6_EVNTSEL1, ARCH_PERFMON_EVENTSEL_ENABLE |
+ +                    ARCH_PERFMON_EVENTSEL_OS | INST_RETIRED);
+ +              wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0x3);
+ +
+ +              run_and_measure_loop(MSR_IA32_PMC0);
+ +              GUEST_SYNC(0);
         }
   }
   
@@@ -163,17 -130,18 +163,17 @@@ static void amd_guest_code(void
   {
         check_msr(MSR_K7_EVNTSEL0, 0xffff);
         check_msr(MSR_K7_PERFCTR0, 0xffff);
- -      GUEST_SYNC(1);
+ +      GUEST_SYNC(0);
   
         for (;;) {
- -              uint64_t br0, br1;
- -
                 wrmsr(MSR_K7_EVNTSEL0, 0);
                 wrmsr(MSR_K7_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE |
                       ARCH_PERFMON_EVENTSEL_OS | AMD_ZEN_BR_RETIRED);
- -              br0 = rdmsr(MSR_K7_PERFCTR0);
- -              __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
- -              br1 = rdmsr(MSR_K7_PERFCTR0);
- -              GUEST_SYNC(br1 - br0);
+ +              wrmsr(MSR_K7_EVNTSEL1, ARCH_PERFMON_EVENTSEL_ENABLE |
+ +                    ARCH_PERFMON_EVENTSEL_OS | INST_RETIRED);
+ +
+ +              run_and_measure_loop(MSR_K7_PERFCTR0);
+ +              GUEST_SYNC(0);
         }
   }
   
@@@ -193,19 -161,6 +193,19 @@@ static uint64_t run_vcpu_to_sync(struc
         return uc.args[1];
   }
   
+ +static void run_vcpu_and_sync_pmc_results(struct kvm_vcpu *vcpu)
+ +{
+ +      uint64_t r;
+ +
+ +      memset(&pmc_results, 0, sizeof(pmc_results));
+ +      sync_global_to_guest(vcpu->vm, pmc_results);
+ +
+ +      r = run_vcpu_to_sync(vcpu);
+ +      TEST_ASSERT(!r, "Unexpected sync value: 0x%lx", r);
+ +
+ +      sync_global_from_guest(vcpu->vm, pmc_results);
+ +}
+ +
   /*
    * In a nested environment or if the vPMU is disabled, the guest PMU
    * might not work as architected (accessing the PMU MSRs may raise
@@@ -216,13 -171,13 +216,13 @@@
    */
   static bool sanity_check_pmu(struct kvm_vcpu *vcpu)
   {
- -      bool success;
+ +      uint64_t r;
   
         vm_install_exception_handler(vcpu->vm, GP_VECTOR, guest_gp_handler);
- -      success = run_vcpu_to_sync(vcpu);
+ +      r = run_vcpu_to_sync(vcpu);
         vm_install_exception_handler(vcpu->vm, GP_VECTOR, NULL);
   
- -      return success;
+ +      return !r;
   }
   
   static struct kvm_pmu_event_filter *alloc_pmu_event_filter(uint32_t nevents)
@@@ -282,101 -237,91 +282,101 @@@ static struct kvm_pmu_event_filter *rem
         return f;
   }
   
+ +#define ASSERT_PMC_COUNTING_INSTRUCTIONS()                                            \
+ +do {                                                                                  \
+ +      uint64_t br = pmc_results.branches_retired;                                     \
+ +      uint64_t ir = pmc_results.instructions_retired;                                 \
+ +                                                                                      \
+ +      if (br && br != NUM_BRANCHES)                                                   \
+ +              pr_info("%s: Branch instructions retired = %lu (expected %u)\n",        \
+ +                      __func__, br, NUM_BRANCHES);                                    \
+ +      TEST_ASSERT(br, "%s: Branch instructions retired = %lu (expected > 0)",         \
+ +                  __func__, br);                                                      \
+ +      TEST_ASSERT(ir, "%s: Instructions retired = %lu (expected > 0)",                \
+ +                  __func__, ir);                                                      \
+ +} while (0)
+ +
+ +#define ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS()                                                \
+ +do {                                                                                  \
+ +      uint64_t br = pmc_results.branches_retired;                                     \
+ +      uint64_t ir = pmc_results.instructions_retired;                                 \
+ +                                                                                      \
+ +      TEST_ASSERT(!br, "%s: Branch instructions retired = %lu (expected 0)",          \
+ +                  __func__, br);                                                      \
+ +      TEST_ASSERT(!ir, "%s: Instructions retired = %lu (expected 0)",                 \
+ +                  __func__, ir);                                                      \
+ +} while (0)
+ +
   static void test_without_filter(struct kvm_vcpu *vcpu)
   {
- -      uint64_t count = run_vcpu_to_sync(vcpu);
+ +      run_vcpu_and_sync_pmc_results(vcpu);
   
- -      if (count != NUM_BRANCHES)
- -              pr_info("%s: Branch instructions retired = %lu (expected %u)\n",
- -                      __func__, count, NUM_BRANCHES);
- -      TEST_ASSERT(count, "Allowed PMU event is not counting");
+ +      ASSERT_PMC_COUNTING_INSTRUCTIONS();
   }
   
- -static uint64_t test_with_filter(struct kvm_vcpu *vcpu,
- -                               struct kvm_pmu_event_filter *f)
+ +static void test_with_filter(struct kvm_vcpu *vcpu,
+ +                           struct kvm_pmu_event_filter *f)
   {
         vm_ioctl(vcpu->vm, KVM_SET_PMU_EVENT_FILTER, f);
- -      return run_vcpu_to_sync(vcpu);
+ +      run_vcpu_and_sync_pmc_results(vcpu);
   }
   
   static void test_amd_deny_list(struct kvm_vcpu *vcpu)
   {
         uint64_t event = EVENT(0x1C2, 0);
         struct kvm_pmu_event_filter *f;
- -      uint64_t count;
   
         f = create_pmu_event_filter(&event, 1, KVM_PMU_EVENT_DENY, 0);
- -      count = test_with_filter(vcpu, f);
- -
+ +      test_with_filter(vcpu, f);
         free(f);
- -      if (count != NUM_BRANCHES)
- -              pr_info("%s: Branch instructions retired = %lu (expected %u)\n",
- -                      __func__, count, NUM_BRANCHES);
- -      TEST_ASSERT(count, "Allowed PMU event is not counting");
+ +
+ +      ASSERT_PMC_COUNTING_INSTRUCTIONS();
   }
   
   static void test_member_deny_list(struct kvm_vcpu *vcpu)
   {
         struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_DENY);
- -      uint64_t count = test_with_filter(vcpu, f);
   
+ +      test_with_filter(vcpu, f);
         free(f);
- -      if (count)
- -              pr_info("%s: Branch instructions retired = %lu (expected 0)\n",
- -                      __func__, count);
- -      TEST_ASSERT(!count, "Disallowed PMU Event is counting");
+ +
+ +      ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS();
   }
   
   static void test_member_allow_list(struct kvm_vcpu *vcpu)
   {
         struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_ALLOW);
- -      uint64_t count = test_with_filter(vcpu, f);
   
+ +      test_with_filter(vcpu, f);
         free(f);
- -      if (count != NUM_BRANCHES)
- -              pr_info("%s: Branch instructions retired = %lu (expected %u)\n",
- -                      __func__, count, NUM_BRANCHES);
- -      TEST_ASSERT(count, "Allowed PMU event is not counting");
+ +
+ +      ASSERT_PMC_COUNTING_INSTRUCTIONS();
   }
   
   static void test_not_member_deny_list(struct kvm_vcpu *vcpu)
   {
         struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_DENY);
- -      uint64_t count;
   
+ +      remove_event(f, INST_RETIRED);
         remove_event(f, INTEL_BR_RETIRED);
         remove_event(f, AMD_ZEN_BR_RETIRED);
- -      count = test_with_filter(vcpu, f);
+ +      test_with_filter(vcpu, f);
         free(f);
- -      if (count != NUM_BRANCHES)
- -              pr_info("%s: Branch instructions retired = %lu (expected %u)\n",
- -                      __func__, count, NUM_BRANCHES);
- -      TEST_ASSERT(count, "Allowed PMU event is not counting");
+ +
+ +      ASSERT_PMC_COUNTING_INSTRUCTIONS();
   }
   
   static void test_not_member_allow_list(struct kvm_vcpu *vcpu)
   {
         struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_ALLOW);
- -      uint64_t count;
   
+ +      remove_event(f, INST_RETIRED);
         remove_event(f, INTEL_BR_RETIRED);
         remove_event(f, AMD_ZEN_BR_RETIRED);
- -      count = test_with_filter(vcpu, f);
+ +      test_with_filter(vcpu, f);
         free(f);
- -      if (count)
- -              pr_info("%s: Branch instructions retired = %lu (expected 0)\n",
- -                      __func__, count);
- -      TEST_ASSERT(!count, "Disallowed PMU Event is counting");
+ +
+ +      ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS();
   }
   
   /*
@@@ -505,30 -450,51 +505,30 @@@ static bool supports_event_mem_inst_ret
   #define EXCLUDE_MASKED_ENTRY(event_select, mask, match) \
         KVM_PMU_ENCODE_MASKED_ENTRY(event_select, mask, match, true)
   
- -struct perf_counter {
- -      union {
- -              uint64_t raw;
- -              struct {
- -                      uint64_t loads:22;
- -                      uint64_t stores:22;
- -                      uint64_t loads_stores:20;
- -              };
- -      };
- -};
- -
- -static uint64_t masked_events_guest_test(uint32_t msr_base)
+ +static void masked_events_guest_test(uint32_t msr_base)
   {
- -      uint64_t ld0, ld1, st0, st1, ls0, ls1;
- -      struct perf_counter c;
- -      int val;
- -
         /*
- -       * The acutal value of the counters don't determine the outcome of
+ +       * The actual value of the counters don't determine the outcome of
          * the test.  Only that they are zero or non-zero.
          */
- -      ld0 = rdmsr(msr_base + 0);
- -      st0 = rdmsr(msr_base + 1);
- -      ls0 = rdmsr(msr_base + 2);
+ +      const uint64_t loads = rdmsr(msr_base + 0);
+ +      const uint64_t stores = rdmsr(msr_base + 1);
+ +      const uint64_t loads_stores = rdmsr(msr_base + 2);
+ +      int val;
+ +
   
         __asm__ __volatile__("movl $0, %[v];"
                              "movl %[v], %%eax;"
                              "incl %[v];"
                              : [v]"+m"(val) :: "eax");
   
- -      ld1 = rdmsr(msr_base + 0);
- -      st1 = rdmsr(msr_base + 1);
- -      ls1 = rdmsr(msr_base + 2);
- -
- -      c.loads = ld1 - ld0;
- -      c.stores = st1 - st0;
- -      c.loads_stores = ls1 - ls0;
- -
- -      return c.raw;
+ +      pmc_results.loads = rdmsr(msr_base + 0) - loads;
+ +      pmc_results.stores = rdmsr(msr_base + 1) - stores;
+ +      pmc_results.loads_stores = rdmsr(msr_base + 2) - loads_stores;
   }
   
   static void intel_masked_events_guest_code(void)
   {
- -      uint64_t r;
- -
         for (;;) {
                 wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
   
@@@ -541,13 -507,16 +541,13 @@@
   
                 wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0x7);
   
- -              r = masked_events_guest_test(MSR_IA32_PMC0);
- -
- -              GUEST_SYNC(r);
+ +              masked_events_guest_test(MSR_IA32_PMC0);
+ +              GUEST_SYNC(0);
         }
   }
   
   static void amd_masked_events_guest_code(void)
   {
- -      uint64_t r;
- -
         for (;;) {
                 wrmsr(MSR_K7_EVNTSEL0, 0);
                 wrmsr(MSR_K7_EVNTSEL1, 0);
@@@ -560,22 -529,26 +560,22 @@@
                 wrmsr(MSR_K7_EVNTSEL2, ARCH_PERFMON_EVENTSEL_ENABLE |
                       ARCH_PERFMON_EVENTSEL_OS | LS_DISPATCH_LOAD_STORE);
   
- -              r = masked_events_guest_test(MSR_K7_PERFCTR0);
- -
- -              GUEST_SYNC(r);
+ +              masked_events_guest_test(MSR_K7_PERFCTR0);
+ +              GUEST_SYNC(0);
         }
   }
   
- -static struct perf_counter run_masked_events_test(struct kvm_vcpu *vcpu,
- -                                               const uint64_t masked_events[],
- -                                               const int nmasked_events)
+ +static void run_masked_events_test(struct kvm_vcpu *vcpu,
+ +                                 const uint64_t masked_events[],
+ +                                 const int nmasked_events)
   {
         struct kvm_pmu_event_filter *f;
- -      struct perf_counter r;
   
         f = create_pmu_event_filter(masked_events, nmasked_events,
                                     KVM_PMU_EVENT_ALLOW,
                                     KVM_PMU_EVENT_FLAG_MASKED_EVENTS);
- -      r.raw = test_with_filter(vcpu, f);
+ +      test_with_filter(vcpu, f);
         free(f);
- -
- -      return r;
   }
   
   /* Matches KVM_PMU_EVENT_FILTER_MAX_EVENTS in pmu.c */
@@@ -700,6 -673,7 +700,6 @@@ static void run_masked_events_tests(str
                                     int nevents)
   {
         int ntests = ARRAY_SIZE(test_cases);
- -      struct perf_counter c;
         int i, n;
   
         for (i = 0; i < ntests; i++) {
@@@ -711,15 -685,13 +711,15 @@@
   
                 n = append_test_events(test, events, nevents);
   
- -              c = run_masked_events_test(vcpu, events, n);
- -              TEST_ASSERT(bool_eq(c.loads, test->flags & ALLOW_LOADS) &&
- -                          bool_eq(c.stores, test->flags & ALLOW_STORES) &&
- -                          bool_eq(c.loads_stores,
+ +              run_masked_events_test(vcpu, events, n);
+ +
+ +              TEST_ASSERT(bool_eq(pmc_results.loads, test->flags & ALLOW_LOADS) &&
+ +                          bool_eq(pmc_results.stores, test->flags & ALLOW_STORES) &&
+ +                          bool_eq(pmc_results.loads_stores,
                                     test->flags & ALLOW_LOADS_STORES),
- -                          "%s  loads: %u, stores: %u, loads + stores: %u",
- -                          test->msg, c.loads, c.stores, c.loads_stores);
+ +                          "%s  loads: %lu, stores: %lu, loads + stores: %lu",
+ +                          test->msg, pmc_results.loads, pmc_results.stores,
+ +                          pmc_results.loads_stores);
         }
   }
   
@@@ -792,6 -764,7 +792,7 @@@ int main(int argc, char *argv[]
         struct kvm_vcpu *vcpu, *vcpu2 = NULL;
         struct kvm_vm *vm;
   
+       TEST_REQUIRE(get_kvm_param_bool("enable_pmu"));
         TEST_REQUIRE(kvm_has_cap(KVM_CAP_PMU_EVENT_FILTER));
         TEST_REQUIRE(kvm_has_cap(KVM_CAP_PMU_EVENT_MASKED_EVENTS));
   
diff --combined tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c

index 3009b3e5254d7175592d74c58a5dc3beb9d16759,2933b1bd754e0310566a80231620cad671556843..4c90f76930f99198cad9de8a84588ed0d9d92d37
--- 1/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c
--- 2/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c
@@@ -14,11 -14,12 +14,11 @@@
   #define _GNU_SOURCE /* for program_invocation_short_name */
   #include <sys/ioctl.h>
   
+ +#include <linux/bitmap.h>
+ +
   #include "kvm_util.h"
   #include "vmx.h"
   
- -#define PMU_CAP_FW_WRITES     (1ULL << 13)
- -#define PMU_CAP_LBR_FMT               0x3f
- -
   union perf_capabilities {
         struct {
                 u64     lbr_format:6;
@@@ -35,220 -36,60 +35,221 @@@
         u64     capabilities;
   };
   
- -static void guest_code(void)
+ +/*
+ + * The LBR format and most PEBS features are immutable, all other features are
+ + * fungible (if supported by the host and KVM).
+ + */
+ +static const union perf_capabilities immutable_caps = {
+ +      .lbr_format = -1,
+ +      .pebs_trap  = 1,
+ +      .pebs_arch_reg = 1,
+ +      .pebs_format = -1,
+ +      .pebs_baseline = 1,
+ +};
+ +
+ +static const union perf_capabilities format_caps = {
+ +      .lbr_format = -1,
+ +      .pebs_format = -1,
+ +};
+ +
+ +static void guest_code(uint64_t current_val)
   {
- -      wrmsr(MSR_IA32_PERF_CAPABILITIES, PMU_CAP_LBR_FMT);
+ +      uint8_t vector;
+ +      int i;
+ +
+ +      vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES, current_val);
+ +      GUEST_ASSERT_2(vector == GP_VECTOR, current_val, vector);
+ +
+ +      vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES, 0);
+ +      GUEST_ASSERT_2(vector == GP_VECTOR, 0, vector);
+ +
+ +      for (i = 0; i < 64; i++) {
+ +              vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES,
+ +                                  current_val ^ BIT_ULL(i));
+ +              GUEST_ASSERT_2(vector == GP_VECTOR,
+ +                             current_val ^ BIT_ULL(i), vector);
+ +      }
+ +
+ +      GUEST_DONE();
   }
   
- -int main(int argc, char *argv[])
+ +/*
+ + * Verify that guest WRMSRs to PERF_CAPABILITIES #GP regardless of the value
+ + * written, that the guest always sees the userspace controlled value, and that
+ + * PERF_CAPABILITIES is immutable after KVM_RUN.
+ + */
+ +static void test_guest_wrmsr_perf_capabilities(union perf_capabilities host_cap)
   {
- -      struct kvm_vm *vm;
         struct kvm_vcpu *vcpu;
- -      int ret;
- -      union perf_capabilities host_cap;
- -      uint64_t val;
+ +      struct kvm_vm *vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ +      struct ucall uc;
+ +      int r, i;
   
- -      host_cap.capabilities = kvm_get_feature_msr(MSR_IA32_PERF_CAPABILITIES);
- -      host_cap.capabilities &= (PMU_CAP_FW_WRITES | PMU_CAP_LBR_FMT);
+ +      vm_init_descriptor_tables(vm);
+ +      vcpu_init_descriptor_tables(vcpu);
   
- -      /* Create VM */
- -      vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ +      vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
   
- -      TEST_REQUIRE(get_kvm_param_bool("enable_pmu"));
- -      TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_PDCM));
+ +      vcpu_args_set(vcpu, 1, host_cap.capabilities);
+ +      vcpu_run(vcpu);
   
- -      TEST_REQUIRE(kvm_cpu_has_p(X86_PROPERTY_PMU_VERSION));
- -      TEST_REQUIRE(kvm_cpu_property(X86_PROPERTY_PMU_VERSION) > 0);
+ +      switch (get_ucall(vcpu, &uc)) {
+ +      case UCALL_ABORT:
+ +              REPORT_GUEST_ASSERT_2(uc, "val = 0x%lx, vector = %lu");
+ +              break;
+ +      case UCALL_DONE:
+ +              break;
+ +      default:
+ +              TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+ +      }
   
- -      /* testcase 1, set capabilities when we have PDCM bit */
- -      vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, PMU_CAP_FW_WRITES);
+ +      ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES), host_cap.capabilities);
   
- -      /* check capabilities can be retrieved with KVM_GET_MSR */
- -      ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES), PMU_CAP_FW_WRITES);
+ +      vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
   
- -      /* check whatever we write with KVM_SET_MSR is _not_ modified */
- -      vcpu_run(vcpu);
- -      ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES), PMU_CAP_FW_WRITES);
+ +      r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, 0);
+ +      TEST_ASSERT(!r, "Post-KVM_RUN write '0' didn't fail");
+ +
+ +      for (i = 0; i < 64; i++) {
+ +              r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES,
+ +                                host_cap.capabilities ^ BIT_ULL(i));
+ +              TEST_ASSERT(!r, "Post-KVM_RUN write '0x%llx'didn't fail",
+ +                          host_cap.capabilities ^ BIT_ULL(i));
+ +      }
+ +
+ +      kvm_vm_free(vm);
+ +}
+ +
+ +/*
+ + * Verify KVM allows writing PERF_CAPABILITIES with all KVM-supported features
+ + * enabled, as well as '0' (to disable all features).
+ + */
+ +static void test_basic_perf_capabilities(union perf_capabilities host_cap)
+ +{
+ +      struct kvm_vcpu *vcpu;
+ +      struct kvm_vm *vm = vm_create_with_one_vcpu(&vcpu, NULL);
   
- -      /* testcase 2, check valid LBR formats are accepted */
         vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, 0);
- -      ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES), 0);
+ +      vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
+ +
+ +      kvm_vm_free(vm);
+ +}
   
- -      vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.lbr_format);
- -      ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES), (u64)host_cap.lbr_format);
+ +static void test_fungible_perf_capabilities(union perf_capabilities host_cap)
+ +{
+ +      const uint64_t fungible_caps = host_cap.capabilities & ~immutable_caps.capabilities;
+ +
+ +      struct kvm_vcpu *vcpu;
+ +      struct kvm_vm *vm = vm_create_with_one_vcpu(&vcpu, NULL);
+ +      int bit;
+ +
+ +      for_each_set_bit(bit, &fungible_caps, 64) {
+ +              vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, BIT_ULL(bit));
+ +              vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES,
+ +                           host_cap.capabilities & ~BIT_ULL(bit));
+ +      }
+ +      vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
+ +
+ +      kvm_vm_free(vm);
+ +}
+ +
+ +/*
+ + * Verify KVM rejects attempts to set unsupported and/or immutable features in
+ + * PERF_CAPABILITIES.  Note, LBR format and PEBS format need to be validated
+ + * separately as they are multi-bit values, e.g. toggling or setting a single
+ + * bit can generate a false positive without dedicated safeguards.
+ + */
+ +static void test_immutable_perf_capabilities(union perf_capabilities host_cap)
+ +{
+ +      const uint64_t reserved_caps = (~host_cap.capabilities |
+ +                                      immutable_caps.capabilities) &
+ +                                     ~format_caps.capabilities;
+ +
+ +      struct kvm_vcpu *vcpu;
+ +      struct kvm_vm *vm = vm_create_with_one_vcpu(&vcpu, NULL);
+ +      union perf_capabilities val = host_cap;
+ +      int r, bit;
+ +
+ +      for_each_set_bit(bit, &reserved_caps, 64) {
+ +              r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES,
+ +                                host_cap.capabilities ^ BIT_ULL(bit));
+ +              TEST_ASSERT(!r, "%s immutable feature 0x%llx (bit %d) didn't fail",
+ +                          host_cap.capabilities & BIT_ULL(bit) ? "Setting" : "Clearing",
+ +                          BIT_ULL(bit), bit);
+ +      }
   
         /*
- -       * Testcase 3, check that an "invalid" LBR format is rejected.  Only an
- -       * exact match of the host's format (and 0/disabled) is allowed.
+ +       * KVM only supports the host's native LBR format, as well as '0' (to
+ +       * disable LBR support).  Verify KVM rejects all other LBR formats.
          */
- -      for (val = 1; val <= PMU_CAP_LBR_FMT; val++) {
- -              if (val == (host_cap.capabilities & PMU_CAP_LBR_FMT))
+ +      for (val.lbr_format = 1; val.lbr_format; val.lbr_format++) {
+ +              if (val.lbr_format == host_cap.lbr_format)
+ +                      continue;
+ +
+ +              r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, val.capabilities);
+ +              TEST_ASSERT(!r, "Bad LBR FMT = 0x%x didn't fail, host = 0x%x",
+ +                          val.lbr_format, host_cap.lbr_format);
+ +      }
+ +
+ +      /* Ditto for the PEBS format. */
+ +      for (val.pebs_format = 1; val.pebs_format; val.pebs_format++) {
+ +              if (val.pebs_format == host_cap.pebs_format)
                         continue;
   
- -              ret = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, val);
- -              TEST_ASSERT(!ret, "Bad LBR FMT = 0x%lx didn't fail", val);
+ +              r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, val.capabilities);
+ +              TEST_ASSERT(!r, "Bad PEBS FMT = 0x%x didn't fail, host = 0x%x",
+ +                          val.pebs_format, host_cap.pebs_format);
         }
   
- -      printf("Completed perf capability tests.\n");
         kvm_vm_free(vm);
   }
+ +
+ +/*
+ + * Test that LBR MSRs are writable when LBRs are enabled, and then verify that
+ + * disabling the vPMU via CPUID also disables LBR support.  Set bits 2:0 of
+ + * LBR_TOS as those bits are writable across all uarch implementations (arch
+ + * LBRs will need to poke a different MSR).
+ + */
+ +static void test_lbr_perf_capabilities(union perf_capabilities host_cap)
+ +{
+ +      struct kvm_vcpu *vcpu;
+ +      struct kvm_vm *vm;
+ +      int r;
+ +
+ +      if (!host_cap.lbr_format)
+ +              return;
+ +
+ +      vm = vm_create_with_one_vcpu(&vcpu, NULL);
+ +
+ +      vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
+ +      vcpu_set_msr(vcpu, MSR_LBR_TOS, 7);
+ +
+ +      vcpu_clear_cpuid_entry(vcpu, X86_PROPERTY_PMU_VERSION.function);
+ +
+ +      r = _vcpu_set_msr(vcpu, MSR_LBR_TOS, 7);
+ +      TEST_ASSERT(!r, "Writing LBR_TOS should fail after disabling vPMU");
+ +
+ +      kvm_vm_free(vm);
+ +}
+ +
+ +int main(int argc, char *argv[])
+ +{
+ +      union perf_capabilities host_cap;
+ +
++      TEST_REQUIRE(get_kvm_param_bool("enable_pmu"));
+ +      TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_PDCM));
+ +
+ +      TEST_REQUIRE(kvm_cpu_has_p(X86_PROPERTY_PMU_VERSION));
+ +      TEST_REQUIRE(kvm_cpu_property(X86_PROPERTY_PMU_VERSION) > 0);
+ +
+ +      host_cap.capabilities = kvm_get_feature_msr(MSR_IA32_PERF_CAPABILITIES);
+ +
+ +      TEST_ASSERT(host_cap.full_width_write,
+ +                  "Full-width writes should always be supported");
+ +
+ +      test_basic_perf_capabilities(host_cap);
+ +      test_fungible_perf_capabilities(host_cap);
+ +      test_immutable_perf_capabilities(host_cap);
+ +      test_guest_wrmsr_perf_capabilities(host_cap);
+ +      test_lbr_perf_capabilities(host_cap);
+ +}
author	Paolo Bonzini <[email protected]>
	Wed, 26 Apr 2023 19:54:40 +0000 (15:54 -0400)
committer	Paolo Bonzini <[email protected]>
	Wed, 26 Apr 2023 19:56:01 +0000 (15:56 -0400)
		1	2
arch/x86/kvm/cpuid.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/x86.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/x86.h	patch \|	diff1 \|	diff2 \|	blob \| history
tools/testing/selftests/kvm/Makefile	patch \|	diff1 \|	diff2 \|	blob \| history
tools/testing/selftests/kvm/include/x86_64/processor.h	patch \|	diff1 \|	diff2 \|	blob \| history
tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c	patch \|	diff1 \|	diff2 \|	blob \| history
tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c	patch \|	diff1 \|	diff2 \|	blob \| history