]> Git Repo - linux.git/commitdiff
Merge tag 'kvm-x86-misc-6.6' of https://github.com/kvm-x86/linux into HEAD
authorPaolo Bonzini <[email protected]>
Thu, 31 Aug 2023 17:36:33 +0000 (13:36 -0400)
committerPaolo Bonzini <[email protected]>
Thu, 31 Aug 2023 17:36:33 +0000 (13:36 -0400)
KVM x86 changes for 6.6:

 - Misc cleanups

 - Retry APIC optimized recalculation if a vCPU is added/enabled

 - Overhaul emergency reboot code to bring SVM up to par with VMX, tie the
   "emergency disabling" behavior to KVM actually being loaded, and move all of
   the logic within KVM

 - Fix user triggerable WARNs in SVM where KVM incorrectly assumes the TSC
   ratio MSR can diverge from the default iff TSC scaling is enabled, and clean
   up related code

 - Add a framework to allow "caching" feature flags so that KVM can check if
   the guest can use a feature without needing to search guest CPUID

1  2 
arch/x86/include/asm/kvm_host.h
arch/x86/kvm/cpuid.c
arch/x86/kvm/lapic.c
arch/x86/kvm/mmu/mmu.c
arch/x86/kvm/svm/svm.c
arch/x86/kvm/svm/svm.h
arch/x86/kvm/vmx/vmx.c
arch/x86/kvm/x86.c

index 6523f5494cb20f2dc43474798a88f5a7e54f29ca,771adf2438bc283156400dc3f9838f6b426e26d2..e3c9ff4146fca3a6ce3da86cce36c5e690c872cd
@@@ -746,7 -746,6 +746,6 @@@ struct kvm_vcpu_arch 
        u64 smi_count;
        bool at_instruction_boundary;
        bool tpr_access_reporting;
-       bool xsaves_enabled;
        bool xfd_no_write_intercept;
        u64 ia32_xss;
        u64 microcode_version;
        struct kvm_cpuid_entry2 *cpuid_entries;
        struct kvm_hypervisor_cpuid kvm_cpuid;
  
+       /*
+        * FIXME: Drop this macro and use KVM_NR_GOVERNED_FEATURES directly
+        * when "struct kvm_vcpu_arch" is no longer defined in an
+        * arch/x86/include/asm header.  The max is mostly arbitrary, i.e.
+        * can be increased as necessary.
+        */
+ #define KVM_MAX_NR_GOVERNED_FEATURES BITS_PER_LONG
+       /*
+        * Track whether or not the guest is allowed to use features that are
+        * governed by KVM, where "governed" means KVM needs to manage state
+        * and/or explicitly enable the feature in hardware.  Typically, but
+        * not always, governed features can be used by the guest if and only
+        * if both KVM and userspace want to expose the feature to the guest.
+        */
+       struct {
+               DECLARE_BITMAP(enabled, KVM_MAX_NR_GOVERNED_FEATURES);
+       } governed_features;
        u64 reserved_gpa_bits;
        int maxphyaddr;
  
@@@ -1566,10 -1584,9 +1584,10 @@@ struct kvm_x86_ops 
        void (*set_segment)(struct kvm_vcpu *vcpu,
                            struct kvm_segment *var, int seg);
        void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l);
 +      bool (*is_valid_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0);
        void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0);
        void (*post_set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
 -      bool (*is_valid_cr4)(struct kvm_vcpu *vcpu, unsigned long cr0);
 +      bool (*is_valid_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4);
        void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4);
        int (*set_efer)(struct kvm_vcpu *vcpu, u64 efer);
        void (*get_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
  
        u64 (*get_l2_tsc_offset)(struct kvm_vcpu *vcpu);
        u64 (*get_l2_tsc_multiplier)(struct kvm_vcpu *vcpu);
-       void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
-       void (*write_tsc_multiplier)(struct kvm_vcpu *vcpu, u64 multiplier);
+       void (*write_tsc_offset)(struct kvm_vcpu *vcpu);
+       void (*write_tsc_multiplier)(struct kvm_vcpu *vcpu);
  
        /*
         * Retrieve somewhat arbitrary exit information.  Intended to
@@@ -1795,8 -1812,8 +1813,8 @@@ static inline struct kvm *kvm_arch_allo
  #define __KVM_HAVE_ARCH_VM_FREE
  void kvm_arch_free_vm(struct kvm *kvm);
  
 -#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB
 -static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm)
 +#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS
 +static inline int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
  {
        if (kvm_x86_ops.flush_remote_tlbs &&
            !static_call(kvm_x86_flush_remote_tlbs)(kvm))
                return -ENOTSUPP;
  }
  
 +#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE
 +
  #define kvm_arch_pmi_in_guest(vcpu) \
        ((vcpu) && (vcpu)->arch.handling_intr_from_guest)
  
diff --combined arch/x86/kvm/cpuid.c
index d3432687c9e6315d0a521a5babce201a527a3a0b,e961e9a058477c4b66991a129128748073a97bcf..0544e30b4946d1e5fb40f6737335ac00d58a84b2
@@@ -11,6 -11,7 +11,7 @@@
  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  
  #include <linux/kvm_host.h>
+ #include "linux/lockdep.h"
  #include <linux/export.h>
  #include <linux/vmalloc.h>
  #include <linux/uaccess.h>
@@@ -84,6 -85,18 +85,18 @@@ static inline struct kvm_cpuid_entry2 *
        struct kvm_cpuid_entry2 *e;
        int i;
  
+       /*
+        * KVM has a semi-arbitrary rule that querying the guest's CPUID model
+        * with IRQs disabled is disallowed.  The CPUID model can legitimately
+        * have over one hundred entries, i.e. the lookup is slow, and IRQs are
+        * typically disabled in KVM only when KVM is in a performance critical
+        * path, e.g. the core VM-Enter/VM-Exit run loop.  Nothing will break
+        * if this rule is violated, this assertion is purely to flag potential
+        * performance issues.  If this fires, consider moving the lookup out
+        * of the hotpath, e.g. by caching information during CPUID updates.
+        */
+       lockdep_assert_irqs_enabled();
        for (i = 0; i < nent; i++) {
                e = &entries[i];
  
@@@ -312,6 -325,27 +325,27 @@@ static void kvm_vcpu_after_set_cpuid(st
  {
        struct kvm_lapic *apic = vcpu->arch.apic;
        struct kvm_cpuid_entry2 *best;
+       bool allow_gbpages;
+       BUILD_BUG_ON(KVM_NR_GOVERNED_FEATURES > KVM_MAX_NR_GOVERNED_FEATURES);
+       bitmap_zero(vcpu->arch.governed_features.enabled,
+                   KVM_MAX_NR_GOVERNED_FEATURES);
+       /*
+        * If TDP is enabled, let the guest use GBPAGES if they're supported in
+        * hardware.  The hardware page walker doesn't let KVM disable GBPAGES,
+        * i.e. won't treat them as reserved, and KVM doesn't redo the GVA->GPA
+        * walk for performance and complexity reasons.  Not to mention KVM
+        * _can't_ solve the problem because GVA->GPA walks aren't visible to
+        * KVM once a TDP translation is installed.  Mimic hardware behavior so
+        * that KVM's is at least consistent, i.e. doesn't randomly inject #PF.
+        * If TDP is disabled, honor *only* guest CPUID as KVM has full control
+        * and can install smaller shadow pages if the host lacks 1GiB support.
+        */
+       allow_gbpages = tdp_enabled ? boot_cpu_has(X86_FEATURE_GBPAGES) :
+                                     guest_cpuid_has(vcpu, X86_FEATURE_GBPAGES);
+       if (allow_gbpages)
+               kvm_governed_feature_set(vcpu, X86_FEATURE_GBPAGES);
  
        best = kvm_find_cpuid_entry(vcpu, 1);
        if (best && apic) {
@@@ -647,7 -681,8 +681,8 @@@ void kvm_set_cpu_caps(void
        );
  
        kvm_cpu_cap_init_kvm_defined(CPUID_7_1_EDX,
-               F(AVX_VNNI_INT8) | F(AVX_NE_CONVERT) | F(PREFETCHITI)
+               F(AVX_VNNI_INT8) | F(AVX_NE_CONVERT) | F(PREFETCHITI) |
+               F(AMX_COMPLEX)
        );
  
        kvm_cpu_cap_mask(CPUID_D_1_EAX,
                F(NULL_SEL_CLR_BASE) | F(AUTOIBRS) | 0 /* PrefetchCtlMsr */
        );
  
 +      if (cpu_feature_enabled(X86_FEATURE_SRSO_NO))
 +              kvm_cpu_cap_set(X86_FEATURE_SRSO_NO);
 +
        kvm_cpu_cap_init_kvm_defined(CPUID_8000_0022_EAX,
                F(PERFMON_V2)
        );
@@@ -1154,6 -1186,9 +1189,9 @@@ static inline int __do_cpuid_func(struc
                cpuid_entry_override(entry, CPUID_8000_0001_EDX);
                cpuid_entry_override(entry, CPUID_8000_0001_ECX);
                break;
+       case 0x80000005:
+               /*  Pass host L1 cache and TLB info. */
+               break;
        case 0x80000006:
                /* Drop reserved bits, pass host L2 cache and TLB info. */
                entry->edx &= ~GENMASK(17, 16);
diff --combined arch/x86/kvm/lapic.c
index a983a16163b137524e8295a38d0c3709e788edde,673880bc0762c7dfd5e70b026b613229a47c3b6f..dcd60b39e794d95ab4b1b4046a69ec5cf1a9dffc
@@@ -376,7 -376,8 +376,8 @@@ void kvm_recalculate_apic_map(struct kv
        struct kvm_vcpu *vcpu;
        unsigned long i;
        u32 max_id = 255; /* enough space for any xAPIC ID */
-       bool xapic_id_mismatch = false;
+       bool xapic_id_mismatch;
+       int r;
  
        /* Read kvm->arch.apic_map_dirty before kvm->arch.apic_map.  */
        if (atomic_read_acquire(&kvm->arch.apic_map_dirty) == CLEAN)
                  "Dirty APIC map without an in-kernel local APIC");
  
        mutex_lock(&kvm->arch.apic_map_lock);
+ retry:
        /*
-        * Read kvm->arch.apic_map_dirty before kvm->arch.apic_map
-        * (if clean) or the APIC registers (if dirty).
+        * Read kvm->arch.apic_map_dirty before kvm->arch.apic_map (if clean)
+        * or the APIC registers (if dirty).  Note, on retry the map may have
+        * not yet been marked dirty by whatever task changed a vCPU's x2APIC
+        * ID, i.e. the map may still show up as in-progress.  In that case
+        * this task still needs to retry and complete its calculation.
         */
        if (atomic_cmpxchg_acquire(&kvm->arch.apic_map_dirty,
                                   DIRTY, UPDATE_IN_PROGRESS) == CLEAN) {
                return;
        }
  
+       /*
+        * Reset the mismatch flag between attempts so that KVM does the right
+        * thing if a vCPU changes its xAPIC ID, but do NOT reset max_id, i.e.
+        * keep max_id strictly increasing.  Disallowing max_id from shrinking
+        * ensures KVM won't get stuck in an infinite loop, e.g. if the vCPU
+        * with the highest x2APIC ID is toggling its APIC on and off.
+        */
+       xapic_id_mismatch = false;
        kvm_for_each_vcpu(i, vcpu, kvm)
                if (kvm_apic_present(vcpu))
                        max_id = max(max_id, kvm_x2apic_id(vcpu->arch.apic));
                if (!kvm_apic_present(vcpu))
                        continue;
  
-               if (kvm_recalculate_phys_map(new, vcpu, &xapic_id_mismatch)) {
+               r = kvm_recalculate_phys_map(new, vcpu, &xapic_id_mismatch);
+               if (r) {
                        kvfree(new);
                        new = NULL;
+                       if (r == -E2BIG) {
+                               cond_resched();
+                               goto retry;
+                       }
                        goto out;
                }
  
@@@ -637,22 -658,16 +658,22 @@@ bool __kvm_apic_update_irr(u32 *pir, vo
        *max_irr = -1;
  
        for (i = vec = 0; i <= 7; i++, vec += 32) {
 +              u32 *p_irr = (u32 *)(regs + APIC_IRR + i * 0x10);
 +
 +              irr_val = *p_irr;
                pir_val = READ_ONCE(pir[i]);
 -              irr_val = *((u32 *)(regs + APIC_IRR + i * 0x10));
 +
                if (pir_val) {
 +                      pir_val = xchg(&pir[i], 0);
 +
                        prev_irr_val = irr_val;
 -                      irr_val |= xchg(&pir[i], 0);
 -                      *((u32 *)(regs + APIC_IRR + i * 0x10)) = irr_val;
 -                      if (prev_irr_val != irr_val) {
 -                              max_updated_irr =
 -                                      __fls(irr_val ^ prev_irr_val) + vec;
 -                      }
 +                      do {
 +                              irr_val = prev_irr_val | pir_val;
 +                      } while (prev_irr_val != irr_val &&
 +                               !try_cmpxchg(p_irr, &prev_irr_val, irr_val));
 +
 +                      if (prev_irr_val != irr_val)
 +                              max_updated_irr = __fls(irr_val ^ prev_irr_val) + vec;
                }
                if (irr_val)
                        *max_irr = __fls(irr_val) + vec;
@@@ -666,11 -681,8 +687,11 @@@ EXPORT_SYMBOL_GPL(__kvm_apic_update_irr
  bool kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir, int *max_irr)
  {
        struct kvm_lapic *apic = vcpu->arch.apic;
 +      bool irr_updated = __kvm_apic_update_irr(pir, apic->regs, max_irr);
  
 -      return __kvm_apic_update_irr(pir, apic->regs, max_irr);
 +      if (unlikely(!apic->apicv_active && irr_updated))
 +              apic->irr_pending = true;
 +      return irr_updated;
  }
  EXPORT_SYMBOL_GPL(kvm_apic_update_irr);
  
diff --combined arch/x86/kvm/mmu/mmu.c
index 7b52e31f1151ba48634fda5240c6ca0184929319,a0c2acb323ebe73439333fe58f2424cccd3cd9f4..276157f8496cc6775e137b39936d4726131b6c93
@@@ -278,12 -278,16 +278,12 @@@ static inline bool kvm_available_flush_
        return kvm_x86_ops.flush_remote_tlbs_range;
  }
  
 -void kvm_flush_remote_tlbs_range(struct kvm *kvm, gfn_t start_gfn,
 -                               gfn_t nr_pages)
 +int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages)
  {
 -      int ret = -EOPNOTSUPP;
 +      if (!kvm_x86_ops.flush_remote_tlbs_range)
 +              return -EOPNOTSUPP;
  
 -      if (kvm_x86_ops.flush_remote_tlbs_range)
 -              ret = static_call(kvm_x86_flush_remote_tlbs_range)(kvm, start_gfn,
 -                                                                 nr_pages);
 -      if (ret)
 -              kvm_flush_remote_tlbs(kvm);
 +      return static_call(kvm_x86_flush_remote_tlbs_range)(kvm, gfn, nr_pages);
  }
  
  static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index);
@@@ -1584,7 -1588,7 +1584,7 @@@ static __always_inline bool kvm_handle_
        for_each_slot_rmap_range(range->slot, PG_LEVEL_4K, KVM_MAX_HUGEPAGE_LEVEL,
                                 range->start, range->end - 1, &iterator)
                ret |= handler(kvm, iterator.rmap, range->slot, iterator.gfn,
 -                             iterator.level, range->pte);
 +                             iterator.level, range->arg.pte);
  
        return ret;
  }
@@@ -4804,28 -4808,13 +4804,13 @@@ static void __reset_rsvds_bits_mask(str
        }
  }
  
- static bool guest_can_use_gbpages(struct kvm_vcpu *vcpu)
- {
-       /*
-        * If TDP is enabled, let the guest use GBPAGES if they're supported in
-        * hardware.  The hardware page walker doesn't let KVM disable GBPAGES,
-        * i.e. won't treat them as reserved, and KVM doesn't redo the GVA->GPA
-        * walk for performance and complexity reasons.  Not to mention KVM
-        * _can't_ solve the problem because GVA->GPA walks aren't visible to
-        * KVM once a TDP translation is installed.  Mimic hardware behavior so
-        * that KVM's is at least consistent, i.e. doesn't randomly inject #PF.
-        */
-       return tdp_enabled ? boot_cpu_has(X86_FEATURE_GBPAGES) :
-                            guest_cpuid_has(vcpu, X86_FEATURE_GBPAGES);
- }
  static void reset_guest_rsvds_bits_mask(struct kvm_vcpu *vcpu,
                                        struct kvm_mmu *context)
  {
        __reset_rsvds_bits_mask(&context->guest_rsvd_check,
                                vcpu->arch.reserved_gpa_bits,
                                context->cpu_role.base.level, is_efer_nx(context),
-                               guest_can_use_gbpages(vcpu),
+                               guest_can_use(vcpu, X86_FEATURE_GBPAGES),
                                is_cr4_pse(context),
                                guest_cpuid_is_amd_or_hygon(vcpu));
  }
@@@ -4902,7 -4891,8 +4887,8 @@@ static void reset_shadow_zero_bits_mask
        __reset_rsvds_bits_mask(shadow_zero_check, reserved_hpa_bits(),
                                context->root_role.level,
                                context->root_role.efer_nx,
-                               guest_can_use_gbpages(vcpu), is_pse, is_amd);
+                               guest_can_use(vcpu, X86_FEATURE_GBPAGES),
+                               is_pse, is_amd);
  
        if (!shadow_me_mask)
                return;
@@@ -6666,7 -6656,7 +6652,7 @@@ static void kvm_rmap_zap_collapsible_sp
         */
        if (walk_slot_rmaps(kvm, slot, kvm_mmu_zap_collapsible_spte,
                            PG_LEVEL_4K, KVM_MAX_HUGEPAGE_LEVEL - 1, true))
 -              kvm_arch_flush_remote_tlbs_memslot(kvm, slot);
 +              kvm_flush_remote_tlbs_memslot(kvm, slot);
  }
  
  void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
        }
  }
  
 -void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
 -                                      const struct kvm_memory_slot *memslot)
 -{
 -      /*
 -       * All current use cases for flushing the TLBs for a specific memslot
 -       * related to dirty logging, and many do the TLB flush out of mmu_lock.
 -       * The interaction between the various operations on memslot must be
 -       * serialized by slots_locks to ensure the TLB flush from one operation
 -       * is observed by any other operation on the same memslot.
 -       */
 -      lockdep_assert_held(&kvm->slots_lock);
 -      kvm_flush_remote_tlbs_range(kvm, memslot->base_gfn, memslot->npages);
 -}
 -
  void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
                                   const struct kvm_memory_slot *memslot)
  {
@@@ -6844,7 -6848,7 +6830,7 @@@ static void mmu_destroy_caches(void
  static int get_nx_huge_pages(char *buffer, const struct kernel_param *kp)
  {
        if (nx_hugepage_mitigation_hard_disabled)
-               return sprintf(buffer, "never\n");
+               return sysfs_emit(buffer, "never\n");
  
        return param_get_bool(buffer, kp);
  }
diff --combined arch/x86/kvm/svm/svm.c
index 488814e919ca0ef3b51574e1a5a51ec320c6170f,226b3a780d0fcb20120040da0fb4e1c494c01e95..f283eb47f6acec7d34453c2047536bfaa14b66c5
  #include <asm/spec-ctrl.h>
  #include <asm/cpu_device_id.h>
  #include <asm/traps.h>
+ #include <asm/reboot.h>
  #include <asm/fpu/api.h>
  
- #include <asm/virtext.h>
  #include <trace/events/ipi.h>
  
  #include "trace.h"
@@@ -203,7 -202,7 +202,7 @@@ static int nested = true
  module_param(nested, int, S_IRUGO);
  
  /* enable/disable Next RIP Save */
 -static int nrips = true;
 +int nrips = true;
  module_param(nrips, int, 0444);
  
  /* enable/disable Virtual VMLOAD VMSAVE */
@@@ -365,8 -364,6 +364,8 @@@ static void svm_set_interrupt_shadow(st
                svm->vmcb->control.int_state |= SVM_INTERRUPT_SHADOW_MASK;
  
  }
 +static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type,
 +                                      void *insn, int insn_len);
  
  static int __svm_skip_emulated_instruction(struct kvm_vcpu *vcpu,
                                           bool commit_side_effects)
        }
  
        if (!svm->next_rip) {
 +              /*
 +               * FIXME: Drop this when kvm_emulate_instruction() does the
 +               * right thing and treats "can't emulate" as outright failure
 +               * for EMULTYPE_SKIP.
 +               */
 +              if (!svm_can_emulate_instruction(vcpu, EMULTYPE_SKIP, NULL, 0))
 +                      return 0;
 +
                if (unlikely(!commit_side_effects))
                        old_rflags = svm->vmcb->save.rflags;
  
@@@ -527,14 -516,21 +526,21 @@@ static void svm_init_osvw(struct kvm_vc
                vcpu->arch.osvw.status |= 1;
  }
  
- static bool kvm_is_svm_supported(void)
+ static bool __kvm_is_svm_supported(void)
  {
-       int cpu = raw_smp_processor_id();
-       const char *msg;
+       int cpu = smp_processor_id();
+       struct cpuinfo_x86 *c = &cpu_data(cpu);
        u64 vm_cr;
  
-       if (!cpu_has_svm(&msg)) {
-               pr_err("SVM not supported by CPU %d, %s\n", cpu, msg);
+       if (c->x86_vendor != X86_VENDOR_AMD &&
+           c->x86_vendor != X86_VENDOR_HYGON) {
+               pr_err("CPU %d isn't AMD or Hygon\n", cpu);
+               return false;
+       }
+       if (!cpu_has(c, X86_FEATURE_SVM)) {
+               pr_err("SVM not supported by CPU %d\n", cpu);
                return false;
        }
  
        return true;
  }
  
+ static bool kvm_is_svm_supported(void)
+ {
+       bool supported;
+       migrate_disable();
+       supported = __kvm_is_svm_supported();
+       migrate_enable();
+       return supported;
+ }
  static int svm_check_processor_compat(void)
  {
-       if (!kvm_is_svm_supported())
+       if (!__kvm_is_svm_supported())
                return -EIO;
  
        return 0;
  }
  
- void __svm_write_tsc_multiplier(u64 multiplier)
static void __svm_write_tsc_multiplier(u64 multiplier)
  {
-       preempt_disable();
        if (multiplier == __this_cpu_read(current_tsc_ratio))
-               goto out;
+               return;
  
        wrmsrl(MSR_AMD64_TSC_RATIO, multiplier);
        __this_cpu_write(current_tsc_ratio, multiplier);
- out:
-       preempt_enable();
+ }
+ static inline void kvm_cpu_svm_disable(void)
+ {
+       uint64_t efer;
+       wrmsrl(MSR_VM_HSAVE_PA, 0);
+       rdmsrl(MSR_EFER, efer);
+       if (efer & EFER_SVME) {
+               /*
+                * Force GIF=1 prior to disabling SVM, e.g. to ensure INIT and
+                * NMI aren't blocked.
+                */
+               stgi();
+               wrmsrl(MSR_EFER, efer & ~EFER_SVME);
+       }
+ }
+ static void svm_emergency_disable(void)
+ {
+       kvm_rebooting = true;
+       kvm_cpu_svm_disable();
  }
  
  static void svm_hardware_disable(void)
        if (tsc_scaling)
                __svm_write_tsc_multiplier(SVM_TSC_RATIO_DEFAULT);
  
-       cpu_svm_disable();
+       kvm_cpu_svm_disable();
  
        amd_pmu_disable_virt();
  }
@@@ -687,39 -713,6 +723,39 @@@ free_save_area
  
  }
  
 +static void set_dr_intercepts(struct vcpu_svm *svm)
 +{
 +      struct vmcb *vmcb = svm->vmcb01.ptr;
 +
 +      vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_READ);
 +      vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_READ);
 +      vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_READ);
 +      vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_READ);
 +      vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_READ);
 +      vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_READ);
 +      vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_READ);
 +      vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_WRITE);
 +      vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_WRITE);
 +      vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_WRITE);
 +      vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_WRITE);
 +      vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_WRITE);
 +      vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_WRITE);
 +      vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_WRITE);
 +      vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
 +      vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
 +
 +      recalc_intercepts(svm);
 +}
 +
 +static void clr_dr_intercepts(struct vcpu_svm *svm)
 +{
 +      struct vmcb *vmcb = svm->vmcb01.ptr;
 +
 +      vmcb->control.intercepts[INTERCEPT_DR] = 0;
 +
 +      recalc_intercepts(svm);
 +}
 +
  static int direct_access_msr_slot(u32 msr)
  {
        u32 i;
@@@ -990,24 -983,50 +1026,24 @@@ static void svm_disable_lbrv(struct kvm
                svm_copy_lbrs(svm->vmcb01.ptr, svm->vmcb);
  }
  
 -static int svm_get_lbr_msr(struct vcpu_svm *svm, u32 index)
 +static struct vmcb *svm_get_lbr_vmcb(struct vcpu_svm *svm)
  {
        /*
 -       * If the LBR virtualization is disabled, the LBR msrs are always
 -       * kept in the vmcb01 to avoid copying them on nested guest entries.
 -       *
 -       * If nested, and the LBR virtualization is enabled/disabled, the msrs
 -       * are moved between the vmcb01 and vmcb02 as needed.
 +       * If LBR virtualization is disabled, the LBR MSRs are always kept in
 +       * vmcb01.  If LBR virtualization is enabled and L1 is running VMs of
 +       * its own, the MSRs are moved between vmcb01 and vmcb02 as needed.
         */
 -      struct vmcb *vmcb =
 -              (svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK) ?
 -                      svm->vmcb : svm->vmcb01.ptr;
 -
 -      switch (index) {
 -      case MSR_IA32_DEBUGCTLMSR:
 -              return vmcb->save.dbgctl;
 -      case MSR_IA32_LASTBRANCHFROMIP:
 -              return vmcb->save.br_from;
 -      case MSR_IA32_LASTBRANCHTOIP:
 -              return vmcb->save.br_to;
 -      case MSR_IA32_LASTINTFROMIP:
 -              return vmcb->save.last_excp_from;
 -      case MSR_IA32_LASTINTTOIP:
 -              return vmcb->save.last_excp_to;
 -      default:
 -              KVM_BUG(false, svm->vcpu.kvm,
 -                      "%s: Unknown MSR 0x%x", __func__, index);
 -              return 0;
 -      }
 +      return svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK ? svm->vmcb :
 +                                                                 svm->vmcb01.ptr;
  }
  
  void svm_update_lbrv(struct kvm_vcpu *vcpu)
  {
        struct vcpu_svm *svm = to_svm(vcpu);
 -
 -      bool enable_lbrv = svm_get_lbr_msr(svm, MSR_IA32_DEBUGCTLMSR) &
 -                                         DEBUGCTLMSR_LBR;
 -
 -      bool current_enable_lbrv = !!(svm->vmcb->control.virt_ext &
 -                                    LBR_CTL_ENABLE_MASK);
 -
 -      if (unlikely(is_guest_mode(vcpu) && guest_can_use(vcpu, X86_FEATURE_LBRV)))
 -              if (unlikely(svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))
 -                      enable_lbrv = true;
 +      bool current_enable_lbrv = svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK;
 +      bool enable_lbrv = (svm_get_lbr_vmcb(svm)->save.dbgctl & DEBUGCTLMSR_LBR) ||
-                          (is_guest_mode(vcpu) && svm->lbrv_enabled &&
++                          (is_guest_mode(vcpu) && guest_can_use(vcpu, X86_FEATURE_LBRV) &&
 +                          (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK));
  
        if (enable_lbrv == current_enable_lbrv)
                return;
@@@ -1118,21 -1137,23 +1154,23 @@@ static u64 svm_get_l2_tsc_multiplier(st
        return svm->tsc_ratio_msr;
  }
  
- static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
+ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu)
  {
        struct vcpu_svm *svm = to_svm(vcpu);
  
        svm->vmcb01.ptr->control.tsc_offset = vcpu->arch.l1_tsc_offset;
-       svm->vmcb->control.tsc_offset = offset;
+       svm->vmcb->control.tsc_offset = vcpu->arch.tsc_offset;
        vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
  }
  
static void svm_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 multiplier)
void svm_write_tsc_multiplier(struct kvm_vcpu *vcpu)
  {
-       __svm_write_tsc_multiplier(multiplier);
+       preempt_disable();
+       if (to_svm(vcpu)->guest_state_loaded)
+               __svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio);
+       preempt_enable();
  }
  
  /* Evaluate instruction intercepts that depend on guest CPUID features. */
  static void svm_recalc_instruction_intercepts(struct kvm_vcpu *vcpu,
                                              struct vcpu_svm *svm)
@@@ -1173,8 -1194,6 +1211,6 @@@ static inline void init_vmcb_after_set_
  
                set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SYSENTER_EIP, 0, 0);
                set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SYSENTER_ESP, 0, 0);
-               svm->v_vmload_vmsave_enabled = false;
        } else {
                /*
                 * If hardware supports Virtual VMLOAD VMSAVE then enable it
@@@ -1218,9 -1237,10 +1254,9 @@@ static void init_vmcb(struct kvm_vcpu *
         * Guest access to VMware backdoor ports could legitimately
         * trigger #GP because of TSS I/O permission bitmap.
         * We intercept those #GP and allow access to them anyway
 -       * as VMware does.  Don't intercept #GP for SEV guests as KVM can't
 -       * decrypt guest memory to decode the faulting instruction.
 +       * as VMware does.
         */
 -      if (enable_vmware_backdoor && !sev_guest(vcpu->kvm))
 +      if (enable_vmware_backdoor)
                set_exception_intercept(svm, GP_VECTOR);
  
        svm_set_intercept(svm, INTERCEPT_INTR);
@@@ -1514,9 -1534,7 +1550,9 @@@ static void svm_vcpu_load(struct kvm_vc
  
        if (sd->current_vmcb != svm->vmcb) {
                sd->current_vmcb = svm->vmcb;
 -              indirect_branch_prediction_barrier();
 +
 +              if (!cpu_feature_enabled(X86_FEATURE_IBPB_ON_VMEXIT))
 +                      indirect_branch_prediction_barrier();
        }
        if (kvm_vcpu_apicv_active(vcpu))
                avic_vcpu_load(vcpu, cpu);
@@@ -1804,11 -1822,6 +1840,11 @@@ static void sev_post_set_cr3(struct kvm
        }
  }
  
 +static bool svm_is_valid_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 +{
 +      return true;
 +}
 +
  void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
  {
        struct vcpu_svm *svm = to_svm(vcpu);
@@@ -1965,7 -1978,7 +2001,7 @@@ static void svm_sync_dirty_debug_regs(s
  {
        struct vcpu_svm *svm = to_svm(vcpu);
  
 -      if (vcpu->arch.guest_state_protected)
 +      if (WARN_ON_ONCE(sev_es_guest(vcpu->kvm)))
                return;
  
        get_debugreg(vcpu->arch.db[0], 0);
@@@ -2526,13 -2539,12 +2562,13 @@@ static int iret_interception(struct kvm
  {
        struct vcpu_svm *svm = to_svm(vcpu);
  
 +      WARN_ON_ONCE(sev_es_guest(vcpu->kvm));
 +
        ++vcpu->stat.nmi_window_exits;
        svm->awaiting_iret_completion = true;
  
        svm_clr_iret_intercept(svm);
 -      if (!sev_es_guest(vcpu->kvm))
 -              svm->nmi_iret_rip = kvm_rip_read(vcpu);
 +      svm->nmi_iret_rip = kvm_rip_read(vcpu);
  
        kvm_make_request(KVM_REQ_EVENT, vcpu);
        return 1;
@@@ -2697,13 -2709,6 +2733,13 @@@ static int dr_interception(struct kvm_v
        unsigned long val;
        int err = 0;
  
 +      /*
 +       * SEV-ES intercepts DR7 only to disable guest debugging and the guest issues a VMGEXIT
 +       * for DR7 write only. KVM cannot change DR7 (always swapped as type 'A') so return early.
 +       */
 +      if (sev_es_guest(vcpu->kvm))
 +              return 1;
 +
        if (vcpu->guest_debug == 0) {
                /*
                 * No more DR vmexits; force a reload of the debug registers
@@@ -2788,7 -2793,8 +2824,8 @@@ static int svm_get_msr(struct kvm_vcpu 
  
        switch (msr_info->index) {
        case MSR_AMD64_TSC_RATIO:
-               if (!msr_info->host_initiated && !svm->tsc_scaling_enabled)
+               if (!msr_info->host_initiated &&
+                   !guest_can_use(vcpu, X86_FEATURE_TSCRATEMSR))
                        return 1;
                msr_info->data = svm->tsc_ratio_msr;
                break;
                msr_info->data = svm->tsc_aux;
                break;
        case MSR_IA32_DEBUGCTLMSR:
 +              msr_info->data = svm_get_lbr_vmcb(svm)->save.dbgctl;
 +              break;
        case MSR_IA32_LASTBRANCHFROMIP:
 +              msr_info->data = svm_get_lbr_vmcb(svm)->save.br_from;
 +              break;
        case MSR_IA32_LASTBRANCHTOIP:
 +              msr_info->data = svm_get_lbr_vmcb(svm)->save.br_to;
 +              break;
        case MSR_IA32_LASTINTFROMIP:
 +              msr_info->data = svm_get_lbr_vmcb(svm)->save.last_excp_from;
 +              break;
        case MSR_IA32_LASTINTTOIP:
 -              msr_info->data = svm_get_lbr_msr(svm, msr_info->index);
 +              msr_info->data = svm_get_lbr_vmcb(svm)->save.last_excp_to;
                break;
        case MSR_VM_HSAVE_PA:
                msr_info->data = svm->nested.hsave_msr;
@@@ -2938,7 -2936,7 +2975,7 @@@ static int svm_set_msr(struct kvm_vcpu 
        switch (ecx) {
        case MSR_AMD64_TSC_RATIO:
  
-               if (!svm->tsc_scaling_enabled) {
+               if (!guest_can_use(vcpu, X86_FEATURE_TSCRATEMSR)) {
  
                        if (!msr->host_initiated)
                                return 1;
  
                svm->tsc_ratio_msr = data;
  
-               if (svm->tsc_scaling_enabled && is_guest_mode(vcpu))
+               if (guest_can_use(vcpu, X86_FEATURE_TSCRATEMSR) &&
+                   is_guest_mode(vcpu))
                        nested_svm_update_tsc_ratio_msr(vcpu);
  
                break;
                if (data & DEBUGCTL_RESERVED_BITS)
                        return 1;
  
 -              if (svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK)
 -                      svm->vmcb->save.dbgctl = data;
 -              else
 -                      svm->vmcb01.ptr->save.dbgctl = data;
 -
 +              svm_get_lbr_vmcb(svm)->save.dbgctl = data;
                svm_update_lbrv(vcpu);
 -
                break;
        case MSR_VM_HSAVE_PA:
                /*
@@@ -3796,19 -3800,6 +3834,19 @@@ static void svm_enable_nmi_window(struc
        if (svm_get_nmi_mask(vcpu) && !svm->awaiting_iret_completion)
                return; /* IRET will cause a vm exit */
  
 +      /*
 +       * SEV-ES guests are responsible for signaling when a vCPU is ready to
 +       * receive a new NMI, as SEV-ES guests can't be single-stepped, i.e.
 +       * KVM can't intercept and single-step IRET to detect when NMIs are
 +       * unblocked (architecturally speaking).  See SVM_VMGEXIT_NMI_COMPLETE.
 +       *
 +       * Note, GIF is guaranteed to be '1' for SEV-ES guests as hardware
 +       * ignores SEV-ES guest writes to EFER.SVME *and* CLGI/STGI are not
 +       * supported NAEs in the GHCB protocol.
 +       */
 +      if (sev_es_guest(vcpu->kvm))
 +              return;
 +
        if (!gif_set(svm)) {
                if (vgif)
                        svm_set_intercept(svm, INTERCEPT_STGI);
@@@ -3958,11 -3949,12 +3996,11 @@@ static void svm_complete_interrupts(str
        svm->soft_int_injected = false;
  
        /*
 -       * If we've made progress since setting HF_IRET_MASK, we've
 +       * If we've made progress since setting awaiting_iret_completion, we've
         * executed an IRET and can allow NMI injection.
         */
        if (svm->awaiting_iret_completion &&
 -          (sev_es_guest(vcpu->kvm) ||
 -           kvm_rip_read(vcpu) != svm->nmi_iret_rip)) {
 +          kvm_rip_read(vcpu) != svm->nmi_iret_rip) {
                svm->awaiting_iret_completion = false;
                svm->nmi_masked = false;
                kvm_make_request(KVM_REQ_EVENT, vcpu);
@@@ -4032,8 -4024,14 +4070,8 @@@ static int svm_vcpu_pre_run(struct kvm_
  
  static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
  {
 -      struct vmcb_control_area *control = &to_svm(vcpu)->vmcb->control;
 -
 -      /*
 -       * Note, the next RIP must be provided as SRCU isn't held, i.e. KVM
 -       * can't read guest memory (dereference memslots) to decode the WRMSR.
 -       */
 -      if (control->exit_code == SVM_EXIT_MSR && control->exit_info_1 &&
 -          nrips && control->next_rip)
 +      if (to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_MSR &&
 +          to_svm(vcpu)->vmcb->control.exit_info_1)
                return handle_fastpath_set_msr_irqoff(vcpu);
  
        return EXIT_FASTPATH_NONE;
@@@ -4045,8 -4043,6 +4083,8 @@@ static noinstr void svm_vcpu_enter_exit
  
        guest_state_enter_irqoff();
  
 +      amd_clear_divider();
 +
        if (sev_es_guest(vcpu->kvm))
                __svm_sev_es_vcpu_run(svm, spec_ctrl_intercepted);
        else
@@@ -4248,28 -4244,37 +4286,37 @@@ static void svm_vcpu_after_set_cpuid(st
        struct vcpu_svm *svm = to_svm(vcpu);
        struct kvm_cpuid_entry2 *best;
  
-       vcpu->arch.xsaves_enabled = guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
-                                   boot_cpu_has(X86_FEATURE_XSAVE) &&
-                                   boot_cpu_has(X86_FEATURE_XSAVES);
-       /* Update nrips enabled cache */
-       svm->nrips_enabled = kvm_cpu_cap_has(X86_FEATURE_NRIPS) &&
-                            guest_cpuid_has(vcpu, X86_FEATURE_NRIPS);
-       svm->tsc_scaling_enabled = tsc_scaling && guest_cpuid_has(vcpu, X86_FEATURE_TSCRATEMSR);
-       svm->lbrv_enabled = lbrv && guest_cpuid_has(vcpu, X86_FEATURE_LBRV);
-       svm->v_vmload_vmsave_enabled = vls && guest_cpuid_has(vcpu, X86_FEATURE_V_VMSAVE_VMLOAD);
-       svm->pause_filter_enabled = kvm_cpu_cap_has(X86_FEATURE_PAUSEFILTER) &&
-                       guest_cpuid_has(vcpu, X86_FEATURE_PAUSEFILTER);
+       /*
+        * SVM doesn't provide a way to disable just XSAVES in the guest, KVM
+        * can only disable all variants of by disallowing CR4.OSXSAVE from
+        * being set.  As a result, if the host has XSAVE and XSAVES, and the
+        * guest has XSAVE enabled, the guest can execute XSAVES without
+        * faulting.  Treat XSAVES as enabled in this case regardless of
+        * whether it's advertised to the guest so that KVM context switches
+        * XSS on VM-Enter/VM-Exit.  Failure to do so would effectively give
+        * the guest read/write access to the host's XSS.
+        */
+       if (boot_cpu_has(X86_FEATURE_XSAVE) &&
+           boot_cpu_has(X86_FEATURE_XSAVES) &&
+           guest_cpuid_has(vcpu, X86_FEATURE_XSAVE))
+               kvm_governed_feature_set(vcpu, X86_FEATURE_XSAVES);
  
-       svm->pause_threshold_enabled = kvm_cpu_cap_has(X86_FEATURE_PFTHRESHOLD) &&
-                       guest_cpuid_has(vcpu, X86_FEATURE_PFTHRESHOLD);
+       kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_NRIPS);
+       kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_TSCRATEMSR);
+       kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_LBRV);
  
-       svm->vgif_enabled = vgif && guest_cpuid_has(vcpu, X86_FEATURE_VGIF);
+       /*
+        * Intercept VMLOAD if the vCPU mode is Intel in order to emulate that
+        * VMLOAD drops bits 63:32 of SYSENTER (ignoring the fact that exposing
+        * SVM on Intel is bonkers and extremely unlikely to work).
+        */
+       if (!guest_cpuid_is_intel(vcpu))
+               kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_V_VMSAVE_VMLOAD);
  
-       svm->vnmi_enabled = vnmi && guest_cpuid_has(vcpu, X86_FEATURE_VNMI);
+       kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_PAUSEFILTER);
+       kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_PFTHRESHOLD);
+       kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_VGIF);
+       kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_VNMI);
  
        svm_recalc_instruction_intercepts(vcpu, svm);
  
@@@ -4690,25 -4695,16 +4737,25 @@@ static bool svm_can_emulate_instruction
         * and cannot be decrypted by KVM, i.e. KVM would read cyphertext and
         * decode garbage.
         *
 -       * Inject #UD if KVM reached this point without an instruction buffer.
 -       * In practice, this path should never be hit by a well-behaved guest,
 -       * e.g. KVM doesn't intercept #UD or #GP for SEV guests, but this path
 -       * is still theoretically reachable, e.g. via unaccelerated fault-like
 -       * AVIC access, and needs to be handled by KVM to avoid putting the
 -       * guest into an infinite loop.   Injecting #UD is somewhat arbitrary,
 -       * but its the least awful option given lack of insight into the guest.
 +       * If KVM is NOT trying to simply skip an instruction, inject #UD if
 +       * KVM reached this point without an instruction buffer.  In practice,
 +       * this path should never be hit by a well-behaved guest, e.g. KVM
 +       * doesn't intercept #UD or #GP for SEV guests, but this path is still
 +       * theoretically reachable, e.g. via unaccelerated fault-like AVIC
 +       * access, and needs to be handled by KVM to avoid putting the guest
 +       * into an infinite loop.   Injecting #UD is somewhat arbitrary, but
 +       * its the least awful option given lack of insight into the guest.
 +       *
 +       * If KVM is trying to skip an instruction, simply resume the guest.
 +       * If a #NPF occurs while the guest is vectoring an INT3/INTO, then KVM
 +       * will attempt to re-inject the INT3/INTO and skip the instruction.
 +       * In that scenario, retrying the INT3/INTO and hoping the guest will
 +       * make forward progress is the only option that has a chance of
 +       * success (and in practice it will work the vast majority of the time).
         */
        if (unlikely(!insn)) {
 -              kvm_queue_exception(vcpu, UD_VECTOR);
 +              if (!(emul_type & EMULTYPE_SKIP))
 +                      kvm_queue_exception(vcpu, UD_VECTOR);
                return false;
        }
  
@@@ -4866,7 -4862,6 +4913,7 @@@ static struct kvm_x86_ops svm_x86_ops _
        .set_segment = svm_set_segment,
        .get_cpl = svm_get_cpl,
        .get_cs_db_l_bits = svm_get_cs_db_l_bits,
 +      .is_valid_cr0 = svm_is_valid_cr0,
        .set_cr0 = svm_set_cr0,
        .post_set_cr3 = sev_post_set_cr3,
        .is_valid_cr4 = svm_is_valid_cr4,
@@@ -5160,11 -5155,9 +5207,11 @@@ static __init int svm_hardware_setup(vo
  
        svm_adjust_mmio_mask();
  
 +      nrips = nrips && boot_cpu_has(X86_FEATURE_NRIPS);
 +
        /*
         * Note, SEV setup consumes npt_enabled and enable_mmio_caching (which
 -       * may be modified by svm_adjust_mmio_mask()).
 +       * may be modified by svm_adjust_mmio_mask()), as well as nrips.
         */
        sev_hardware_setup();
  
                        goto err;
        }
  
 -      if (nrips) {
 -              if (!boot_cpu_has(X86_FEATURE_NRIPS))
 -                      nrips = false;
 -      }
 -
        enable_apicv = avic = avic && avic_hardware_setup();
  
        if (!enable_apicv) {
@@@ -5258,6 -5256,13 +5305,13 @@@ static struct kvm_x86_init_ops svm_init
        .pmu_ops = &amd_pmu_ops,
  };
  
+ static void __svm_exit(void)
+ {
+       kvm_x86_vendor_exit();
+       cpu_emergency_unregister_virt_callback(svm_emergency_disable);
+ }
  static int __init svm_init(void)
  {
        int r;
        if (r)
                return r;
  
+       cpu_emergency_register_virt_callback(svm_emergency_disable);
        /*
         * Common KVM initialization _must_ come last, after this, /dev/kvm is
         * exposed to userspace!
        return 0;
  
  err_kvm_init:
-       kvm_x86_vendor_exit();
+       __svm_exit();
        return r;
  }
  
  static void __exit svm_exit(void)
  {
        kvm_exit();
-       kvm_x86_vendor_exit();
+       __svm_exit();
  }
  
  module_init(svm_init)
diff --combined arch/x86/kvm/svm/svm.h
index ef508042a5536aedd6f45d48ab2bf3651eb9f67e,06400cfe2244b01a6736c28030f777387859d116..f41253958357b7730b280fe17e990c91e0b23a05
@@@ -22,6 -22,7 +22,7 @@@
  #include <asm/svm.h>
  #include <asm/sev-common.h>
  
+ #include "cpuid.h"
  #include "kvm_cache_regs.h"
  
  #define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT)
@@@ -33,7 -34,6 +34,7 @@@
  #define MSRPM_OFFSETS 32
  extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
  extern bool npt_enabled;
 +extern int nrips;
  extern int vgif;
  extern bool intercept_smi;
  extern bool x2avic_enabled;
@@@ -191,12 -191,10 +192,12 @@@ struct vcpu_sev_es_state 
        /* SEV-ES support */
        struct sev_es_save_area *vmsa;
        struct ghcb *ghcb;
 +      u8 valid_bitmap[16];
        struct kvm_host_map ghcb_map;
        bool received_first_sipi;
  
        /* SEV-ES scratch area support */
 +      u64 sw_scratch;
        void *ghcb_sa;
        u32 ghcb_sa_len;
        bool ghcb_sa_sync;
@@@ -261,16 -259,6 +262,6 @@@ struct vcpu_svm 
        unsigned long soft_int_next_rip;
        bool soft_int_injected;
  
-       /* optional nested SVM features that are enabled for this guest  */
-       bool nrips_enabled                : 1;
-       bool tsc_scaling_enabled          : 1;
-       bool v_vmload_vmsave_enabled      : 1;
-       bool lbrv_enabled                 : 1;
-       bool pause_filter_enabled         : 1;
-       bool pause_threshold_enabled      : 1;
-       bool vgif_enabled                 : 1;
-       bool vnmi_enabled                 : 1;
        u32 ldr_reg;
        u32 dfr_reg;
        struct page *avic_backing_page;
@@@ -407,6 -395,48 +398,6 @@@ static inline bool vmcb12_is_intercept(
        return test_bit(bit, (unsigned long *)&control->intercepts);
  }
  
 -static inline void set_dr_intercepts(struct vcpu_svm *svm)
 -{
 -      struct vmcb *vmcb = svm->vmcb01.ptr;
 -
 -      if (!sev_es_guest(svm->vcpu.kvm)) {
 -              vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_READ);
 -              vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_READ);
 -              vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_READ);
 -              vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_READ);
 -              vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_READ);
 -              vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_READ);
 -              vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_READ);
 -              vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_WRITE);
 -              vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_WRITE);
 -              vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_WRITE);
 -              vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_WRITE);
 -              vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_WRITE);
 -              vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_WRITE);
 -              vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_WRITE);
 -      }
 -
 -      vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
 -      vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
 -
 -      recalc_intercepts(svm);
 -}
 -
 -static inline void clr_dr_intercepts(struct vcpu_svm *svm)
 -{
 -      struct vmcb *vmcb = svm->vmcb01.ptr;
 -
 -      vmcb->control.intercepts[INTERCEPT_DR] = 0;
 -
 -      /* DR7 access must remain intercepted for an SEV-ES guest */
 -      if (sev_es_guest(svm->vcpu.kvm)) {
 -              vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
 -              vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
 -      }
 -
 -      recalc_intercepts(svm);
 -}
 -
  static inline void set_exception_intercept(struct vcpu_svm *svm, u32 bit)
  {
        struct vmcb *vmcb = svm->vmcb01.ptr;
@@@ -452,7 -482,8 +443,8 @@@ static inline bool svm_is_intercept(str
  
  static inline bool nested_vgif_enabled(struct vcpu_svm *svm)
  {
-       return svm->vgif_enabled && (svm->nested.ctl.int_ctl & V_GIF_ENABLE_MASK);
+       return guest_can_use(&svm->vcpu, X86_FEATURE_VGIF) &&
+              (svm->nested.ctl.int_ctl & V_GIF_ENABLE_MASK);
  }
  
  static inline struct vmcb *get_vgif_vmcb(struct vcpu_svm *svm)
@@@ -503,7 -534,7 +495,7 @@@ static inline bool nested_npt_enabled(s
  
  static inline bool nested_vnmi_enabled(struct vcpu_svm *svm)
  {
-       return svm->vnmi_enabled &&
+       return guest_can_use(&svm->vcpu, X86_FEATURE_VNMI) &&
               (svm->nested.ctl.int_ctl & V_NMI_ENABLE_MASK);
  }
  
@@@ -619,7 -650,7 +611,7 @@@ int nested_svm_check_exception(struct v
                               bool has_error_code, u32 error_code);
  int nested_svm_exit_special(struct vcpu_svm *svm);
  void nested_svm_update_tsc_ratio_msr(struct kvm_vcpu *vcpu);
- void __svm_write_tsc_multiplier(u64 multiplier);
+ void svm_write_tsc_multiplier(struct kvm_vcpu *vcpu);
  void nested_copy_vmcb_control_to_cache(struct vcpu_svm *svm,
                                       struct vmcb_control_area *control);
  void nested_copy_vmcb_save_to_cache(struct vcpu_svm *svm,
@@@ -705,28 -736,4 +697,28 @@@ void sev_es_unmap_ghcb(struct vcpu_svm 
  void __svm_sev_es_vcpu_run(struct vcpu_svm *svm, bool spec_ctrl_intercepted);
  void __svm_vcpu_run(struct vcpu_svm *svm, bool spec_ctrl_intercepted);
  
 +#define DEFINE_KVM_GHCB_ACCESSORS(field)                                              \
 +      static __always_inline bool kvm_ghcb_##field##_is_valid(const struct vcpu_svm *svm) \
 +      {                                                                       \
 +              return test_bit(GHCB_BITMAP_IDX(field),                         \
 +                              (unsigned long *)&svm->sev_es.valid_bitmap);    \
 +      }                                                                       \
 +                                                                              \
 +      static __always_inline u64 kvm_ghcb_get_##field##_if_valid(struct vcpu_svm *svm, struct ghcb *ghcb) \
 +      {                                                                       \
 +              return kvm_ghcb_##field##_is_valid(svm) ? ghcb->save.field : 0; \
 +      }                                                                       \
 +
 +DEFINE_KVM_GHCB_ACCESSORS(cpl)
 +DEFINE_KVM_GHCB_ACCESSORS(rax)
 +DEFINE_KVM_GHCB_ACCESSORS(rcx)
 +DEFINE_KVM_GHCB_ACCESSORS(rdx)
 +DEFINE_KVM_GHCB_ACCESSORS(rbx)
 +DEFINE_KVM_GHCB_ACCESSORS(rsi)
 +DEFINE_KVM_GHCB_ACCESSORS(sw_exit_code)
 +DEFINE_KVM_GHCB_ACCESSORS(sw_exit_info_1)
 +DEFINE_KVM_GHCB_ACCESSORS(sw_exit_info_2)
 +DEFINE_KVM_GHCB_ACCESSORS(sw_scratch)
 +DEFINE_KVM_GHCB_ACCESSORS(xcr0)
 +
  #endif
diff --combined arch/x86/kvm/vmx/vmx.c
index c0236dd4d8924665dc0cb4c923d2bd1982369248,e9386afd1521fc250ebb14085b16d22004f4f7b4..af73d5d54ec8e8a6a07e8e5f700ce1ee94da9a39
  #include <asm/idtentry.h>
  #include <asm/io.h>
  #include <asm/irq_remapping.h>
- #include <asm/kexec.h>
+ #include <asm/reboot.h>
  #include <asm/perf_event.h>
  #include <asm/mmu_context.h>
  #include <asm/mshyperv.h>
  #include <asm/mwait.h>
  #include <asm/spec-ctrl.h>
- #include <asm/virtext.h>
  #include <asm/vmx.h>
  
  #include "capabilities.h"
@@@ -237,9 -236,6 +236,6 @@@ static const struct 
  #define L1D_CACHE_ORDER 4
  static void *vmx_l1d_flush_pages;
  
- /* Control for disabling CPU Fill buffer clear */
- static bool __read_mostly vmx_fb_clear_ctrl_available;
  static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf)
  {
        struct page *page;
                return 0;
        }
  
-       if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) {
-               u64 msr;
-               rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr);
-               if (msr & ARCH_CAP_SKIP_VMENTRY_L1DFLUSH) {
-                       l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED;
-                       return 0;
-               }
+       if (host_arch_capabilities & ARCH_CAP_SKIP_VMENTRY_L1DFLUSH) {
+               l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED;
+               return 0;
        }
  
        /* If set to auto use the default l1tf mitigation method */
@@@ -366,22 -357,9 +357,9 @@@ static int vmentry_l1d_flush_set(const 
  static int vmentry_l1d_flush_get(char *s, const struct kernel_param *kp)
  {
        if (WARN_ON_ONCE(l1tf_vmx_mitigation >= ARRAY_SIZE(vmentry_l1d_param)))
-               return sprintf(s, "???\n");
+               return sysfs_emit(s, "???\n");
  
-       return sprintf(s, "%s\n", vmentry_l1d_param[l1tf_vmx_mitigation].option);
- }
- static void vmx_setup_fb_clear_ctrl(void)
- {
-       u64 msr;
-       if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES) &&
-           !boot_cpu_has_bug(X86_BUG_MDS) &&
-           !boot_cpu_has_bug(X86_BUG_TAA)) {
-               rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr);
-               if (msr & ARCH_CAP_FB_CLEAR_CTRL)
-                       vmx_fb_clear_ctrl_available = true;
-       }
+       return sysfs_emit(s, "%s\n", vmentry_l1d_param[l1tf_vmx_mitigation].option);
  }
  
  static __always_inline void vmx_disable_fb_clear(struct vcpu_vmx *vmx)
@@@ -409,7 -387,9 +387,9 @@@ static __always_inline void vmx_enable_
  
  static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx)
  {
-       vmx->disable_fb_clear = vmx_fb_clear_ctrl_available;
+       vmx->disable_fb_clear = (host_arch_capabilities & ARCH_CAP_FB_CLEAR_CTRL) &&
+                               !boot_cpu_has_bug(X86_BUG_MDS) &&
+                               !boot_cpu_has_bug(X86_BUG_TAA);
  
        /*
         * If guest will not execute VERW, there is no need to set FB_CLEAR_DIS
@@@ -441,23 -421,13 +421,23 @@@ do {                                    
        pr_warn_ratelimited(fmt);       \
  } while (0)
  
 -void vmread_error(unsigned long field, bool fault)
 +noinline void vmread_error(unsigned long field)
  {
 -      if (fault)
 +      vmx_insn_failed("vmread failed: field=%lx\n", field);
 +}
 +
 +#ifndef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
 +noinstr void vmread_error_trampoline2(unsigned long field, bool fault)
 +{
 +      if (fault) {
                kvm_spurious_fault();
 -      else
 -              vmx_insn_failed("vmread failed: field=%lx\n", field);
 +      } else {
 +              instrumentation_begin();
 +              vmread_error(field);
 +              instrumentation_end();
 +      }
  }
 +#endif
  
  noinline void vmwrite_error(unsigned long field, unsigned long value)
  {
@@@ -754,17 -724,51 +734,51 @@@ static int vmx_set_guest_uret_msr(struc
        return ret;
  }
  
- #ifdef CONFIG_KEXEC_CORE
- static void crash_vmclear_local_loaded_vmcss(void)
+ /*
+  * Disable VMX and clear CR4.VMXE (even if VMXOFF faults)
+  *
+  * Note, VMXOFF causes a #UD if the CPU is !post-VMXON, but it's impossible to
+  * atomically track post-VMXON state, e.g. this may be called in NMI context.
+  * Eat all faults as all other faults on VMXOFF faults are mode related, i.e.
+  * faults are guaranteed to be due to the !post-VMXON check unless the CPU is
+  * magically in RM, VM86, compat mode, or at CPL>0.
+  */
+ static int kvm_cpu_vmxoff(void)
+ {
+       asm_volatile_goto("1: vmxoff\n\t"
+                         _ASM_EXTABLE(1b, %l[fault])
+                         ::: "cc", "memory" : fault);
+       cr4_clear_bits(X86_CR4_VMXE);
+       return 0;
+ fault:
+       cr4_clear_bits(X86_CR4_VMXE);
+       return -EIO;
+ }
+ static void vmx_emergency_disable(void)
  {
        int cpu = raw_smp_processor_id();
        struct loaded_vmcs *v;
  
+       kvm_rebooting = true;
+       /*
+        * Note, CR4.VMXE can be _cleared_ in NMI context, but it can only be
+        * set in task context.  If this races with VMX is disabled by an NMI,
+        * VMCLEAR and VMXOFF may #UD, but KVM will eat those faults due to
+        * kvm_rebooting set.
+        */
+       if (!(__read_cr4() & X86_CR4_VMXE))
+               return;
        list_for_each_entry(v, &per_cpu(loaded_vmcss_on_cpu, cpu),
                            loaded_vmcss_on_cpu_link)
                vmcs_clear(v->vmcs);
+       kvm_cpu_vmxoff();
  }
- #endif /* CONFIG_KEXEC_CORE */
  
  static void __loaded_vmcs_clear(void *arg)
  {
@@@ -1513,11 -1517,6 +1527,11 @@@ void vmx_set_rflags(struct kvm_vcpu *vc
        struct vcpu_vmx *vmx = to_vmx(vcpu);
        unsigned long old_rflags;
  
 +      /*
 +       * Unlike CR0 and CR4, RFLAGS handling requires checking if the vCPU
 +       * is an unrestricted guest in order to mark L2 as needing emulation
 +       * if L1 runs L2 as a restricted guest.
 +       */
        if (is_unrestricted_guest(vcpu)) {
                kvm_register_mark_available(vcpu, VCPU_EXREG_RFLAGS);
                vmx->rflags = rflags;
@@@ -1899,25 -1898,14 +1913,14 @@@ u64 vmx_get_l2_tsc_multiplier(struct kv
        return kvm_caps.default_tsc_scaling_ratio;
  }
  
- static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
- {
-       vmcs_write64(TSC_OFFSET, offset);
- }
- static void vmx_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 multiplier)
+ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu)
  {
-       vmcs_write64(TSC_MULTIPLIER, multiplier);
+       vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
  }
  
- /*
-  * nested_vmx_allowed() checks whether a guest should be allowed to use VMX
-  * instructions and MSRs (i.e., nested VMX). Nested VMX is disabled for
-  * all guests if the "nested" module option is off, and can also be disabled
-  * for a single guest by disabling its VMX cpuid bit.
-  */
- bool nested_vmx_allowed(struct kvm_vcpu *vcpu)
+ static void vmx_write_tsc_multiplier(struct kvm_vcpu *vcpu)
  {
-       return nested && guest_cpuid_has(vcpu, X86_FEATURE_VMX);
+       vmcs_write64(TSC_MULTIPLIER, vcpu->arch.tsc_scaling_ratio);
  }
  
  /*
@@@ -2047,7 -2035,7 +2050,7 @@@ static int vmx_get_msr(struct kvm_vcpu 
                        [msr_info->index - MSR_IA32_SGXLEPUBKEYHASH0];
                break;
        case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR:
-               if (!nested_vmx_allowed(vcpu))
+               if (!guest_can_use(vcpu, X86_FEATURE_VMX))
                        return 1;
                if (vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index,
                                    &msr_info->data))
@@@ -2355,7 -2343,7 +2358,7 @@@ static int vmx_set_msr(struct kvm_vcpu 
        case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR:
                if (!msr_info->host_initiated)
                        return 1; /* they are read-only */
-               if (!nested_vmx_allowed(vcpu))
+               if (!guest_can_use(vcpu, X86_FEATURE_VMX))
                        return 1;
                return vmx_set_vmx_msr(vcpu, msr_index, data);
        case MSR_IA32_RTIT_CTL:
@@@ -2729,11 -2717,11 +2732,11 @@@ static int setup_vmcs_config(struct vmc
        return 0;
  }
  
- static bool kvm_is_vmx_supported(void)
+ static bool __kvm_is_vmx_supported(void)
  {
-       int cpu = raw_smp_processor_id();
+       int cpu = smp_processor_id();
  
-       if (!cpu_has_vmx()) {
+       if (!(cpuid_ecx(1) & feature_bit(VMX))) {
                pr_err("VMX not supported by CPU %d\n", cpu);
                return false;
        }
        return true;
  }
  
+ static bool kvm_is_vmx_supported(void)
+ {
+       bool supported;
+       migrate_disable();
+       supported = __kvm_is_vmx_supported();
+       migrate_enable();
+       return supported;
+ }
  static int vmx_check_processor_compat(void)
  {
        int cpu = raw_smp_processor_id();
        struct vmcs_config vmcs_conf;
        struct vmx_capability vmx_cap;
  
-       if (!kvm_is_vmx_supported())
+       if (!__kvm_is_vmx_supported())
                return -EIO;
  
        if (setup_vmcs_config(&vmcs_conf, &vmx_cap) < 0) {
@@@ -2833,7 -2832,7 +2847,7 @@@ static void vmx_hardware_disable(void
  {
        vmclear_local_loaded_vmcss();
  
-       if (cpu_vmxoff())
+       if (kvm_cpu_vmxoff())
                kvm_spurious_fault();
  
        hv_reset_evmcs();
@@@ -3052,15 -3051,6 +3066,15 @@@ static void enter_rmode(struct kvm_vcp
        struct vcpu_vmx *vmx = to_vmx(vcpu);
        struct kvm_vmx *kvm_vmx = to_kvm_vmx(vcpu->kvm);
  
 +      /*
 +       * KVM should never use VM86 to virtualize Real Mode when L2 is active,
 +       * as using VM86 is unnecessary if unrestricted guest is enabled, and
 +       * if unrestricted guest is disabled, VM-Enter (from L1) with CR0.PG=0
 +       * should VM-Fail and KVM should reject userspace attempts to stuff
 +       * CR0.PG=0 when L2 is active.
 +       */
 +      WARN_ON_ONCE(is_guest_mode(vcpu));
 +
        vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR);
        vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES);
        vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS);
  
        vmx->rmode.vm86_active = 1;
  
 -      /*
 -       * Very old userspace does not call KVM_SET_TSS_ADDR before entering
 -       * vcpu. Warn the user that an update is overdue.
 -       */
 -      if (!kvm_vmx->tss_addr)
 -              pr_warn_once("KVM_SET_TSS_ADDR needs to be called before running vCPU\n");
 -
        vmx_segment_cache_clear(vmx);
  
        vmcs_writel(GUEST_TR_BASE, kvm_vmx->tss_addr);
@@@ -3243,17 -3240,6 +3257,17 @@@ void ept_save_pdptrs(struct kvm_vcpu *v
  #define CR3_EXITING_BITS (CPU_BASED_CR3_LOAD_EXITING | \
                          CPU_BASED_CR3_STORE_EXITING)
  
 +static bool vmx_is_valid_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 +{
 +      if (is_guest_mode(vcpu))
 +              return nested_guest_cr0_valid(vcpu, cr0);
 +
 +      if (to_vmx(vcpu)->nested.vmxon)
 +              return nested_host_cr0_valid(vcpu, cr0);
 +
 +      return true;
 +}
 +
  void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
  {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
        old_cr0_pg = kvm_read_cr0_bits(vcpu, X86_CR0_PG);
  
        hw_cr0 = (cr0 & ~KVM_VM_CR0_ALWAYS_OFF);
 -      if (is_unrestricted_guest(vcpu))
 +      if (enable_unrestricted_guest)
                hw_cr0 |= KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST;
        else {
                hw_cr0 |= KVM_VM_CR0_ALWAYS_ON;
        }
  #endif
  
 -      if (enable_ept && !is_unrestricted_guest(vcpu)) {
 +      if (enable_ept && !enable_unrestricted_guest) {
                /*
                 * Ensure KVM has an up-to-date snapshot of the guest's CR3.  If
                 * the below code _enables_ CR3 exiting, vmx_cache_reg() will
        vmx->emulation_required = vmx_emulation_required(vcpu);
  }
  
 -static int vmx_get_max_tdp_level(void)
 +static int vmx_get_max_ept_level(void)
  {
        if (cpu_has_vmx_ept_5levels())
                return 5;
@@@ -3422,7 -3408,7 +3436,7 @@@ void vmx_set_cr4(struct kvm_vcpu *vcpu
         * this bit, even if host CR4.MCE == 0.
         */
        hw_cr4 = (cr4_read_shadow() & X86_CR4_MCE) | (cr4 & ~X86_CR4_MCE);
 -      if (is_unrestricted_guest(vcpu))
 +      if (enable_unrestricted_guest)
                hw_cr4 |= KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST;
        else if (vmx->rmode.vm86_active)
                hw_cr4 |= KVM_RMODE_VM_CR4_ALWAYS_ON;
        vcpu->arch.cr4 = cr4;
        kvm_register_mark_available(vcpu, VCPU_EXREG_CR4);
  
 -      if (!is_unrestricted_guest(vcpu)) {
 +      if (!enable_unrestricted_guest) {
                if (enable_ept) {
                        if (!is_paging(vcpu)) {
                                hw_cr4 &= ~X86_CR4_PAE;
@@@ -4546,16 -4532,19 +4560,19 @@@ vmx_adjust_secondary_exec_control(struc
   * based on a single guest CPUID bit, with a dedicated feature bit.  This also
   * verifies that the control is actually supported by KVM and hardware.
   */
- #define vmx_adjust_sec_exec_control(vmx, exec_control, name, feat_name, ctrl_name, exiting) \
- ({                                                                     \
-       bool __enabled;                                                  \
-                                                                        \
-       if (cpu_has_vmx_##name()) {                                      \
-               __enabled = guest_cpuid_has(&(vmx)->vcpu,                \
-                                           X86_FEATURE_##feat_name);    \
-               vmx_adjust_secondary_exec_control(vmx, exec_control,     \
-                       SECONDARY_EXEC_##ctrl_name, __enabled, exiting); \
-       }                                                                \
+ #define vmx_adjust_sec_exec_control(vmx, exec_control, name, feat_name, ctrl_name, exiting)   \
+ ({                                                                                            \
+       struct kvm_vcpu *__vcpu = &(vmx)->vcpu;                                                 \
+       bool __enabled;                                                                         \
+                                                                                               \
+       if (cpu_has_vmx_##name()) {                                                             \
+               if (kvm_is_governed_feature(X86_FEATURE_##feat_name))                           \
+                       __enabled = guest_can_use(__vcpu, X86_FEATURE_##feat_name);             \
+               else                                                                            \
+                       __enabled = guest_cpuid_has(__vcpu, X86_FEATURE_##feat_name);           \
+               vmx_adjust_secondary_exec_control(vmx, exec_control, SECONDARY_EXEC_##ctrl_name,\
+                                                 __enabled, exiting);                          \
+       }                                                                                       \
  })
  
  /* More macro magic for ENABLE_/opt-in versus _EXITING/opt-out controls. */
@@@ -4615,19 -4604,7 +4632,7 @@@ static u32 vmx_secondary_exec_control(s
        if (!enable_pml || !atomic_read(&vcpu->kvm->nr_memslots_dirty_logging))
                exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
  
-       if (cpu_has_vmx_xsaves()) {
-               /* Exposing XSAVES only when XSAVE is exposed */
-               bool xsaves_enabled =
-                       boot_cpu_has(X86_FEATURE_XSAVE) &&
-                       guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
-                       guest_cpuid_has(vcpu, X86_FEATURE_XSAVES);
-               vcpu->arch.xsaves_enabled = xsaves_enabled;
-               vmx_adjust_secondary_exec_control(vmx, &exec_control,
-                                                 SECONDARY_EXEC_XSAVES,
-                                                 xsaves_enabled, false);
-       }
+       vmx_adjust_sec_exec_feature(vmx, &exec_control, xsaves, XSAVES);
  
        /*
         * RDPID is also gated by ENABLE_RDTSCP, turn on the control if either
                                                  SECONDARY_EXEC_ENABLE_RDTSCP,
                                                  rdpid_or_rdtscp_enabled, false);
        }
        vmx_adjust_sec_exec_feature(vmx, &exec_control, invpcid, INVPCID);
  
        vmx_adjust_sec_exec_exiting(vmx, &exec_control, rdrand, RDRAND);
@@@ -4679,8 -4657,7 +4685,8 @@@ static int vmx_alloc_ipiv_pid_table(str
        if (kvm_vmx->pid_table)
                return 0;
  
 -      pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, vmx_get_pid_table_order(kvm));
 +      pages = alloc_pages(GFP_KERNEL_ACCOUNT | __GFP_ZERO,
 +                          vmx_get_pid_table_order(kvm));
        if (!pages)
                return -ENOMEM;
  
@@@ -5393,11 -5370,18 +5399,11 @@@ static int handle_set_cr0(struct kvm_vc
                val = (val & ~vmcs12->cr0_guest_host_mask) |
                        (vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask);
  
 -              if (!nested_guest_cr0_valid(vcpu, val))
 -                      return 1;
 -
                if (kvm_set_cr0(vcpu, val))
                        return 1;
                vmcs_writel(CR0_READ_SHADOW, orig_val);
                return 0;
        } else {
 -              if (to_vmx(vcpu)->nested.vmxon &&
 -                  !nested_host_cr0_valid(vcpu, val))
 -                      return 1;
 -
                return kvm_set_cr0(vcpu, val);
        }
  }
@@@ -6789,10 -6773,8 +6795,10 @@@ static void vmx_set_apic_access_page_ad
        vmcs_write64(APIC_ACCESS_ADDR, pfn_to_hpa(pfn));
        read_unlock(&vcpu->kvm->mmu_lock);
  
 -      vmx_flush_tlb_current(vcpu);
 -
 +      /*
 +       * No need for a manual TLB flush at this point, KVM has already done a
 +       * flush if there were SPTEs pointing at the previous page.
 +       */
  out:
        /*
         * Do not pin apic access page in memory, the MMU notifier
@@@ -7238,20 -7220,13 +7244,20 @@@ static noinstr void vmx_vcpu_enter_exit
                                   flags);
  
        vcpu->arch.cr2 = native_read_cr2();
 +      vcpu->arch.regs_avail &= ~VMX_REGS_LAZY_LOAD_SET;
 +
 +      vmx->idt_vectoring_info = 0;
  
        vmx_enable_fb_clear(vmx);
  
 -      if (unlikely(vmx->fail))
 +      if (unlikely(vmx->fail)) {
                vmx->exit_reason.full = 0xdead;
 -      else
 -              vmx->exit_reason.full = vmcs_read32(VM_EXIT_REASON);
 +              goto out;
 +      }
 +
 +      vmx->exit_reason.full = vmcs_read32(VM_EXIT_REASON);
 +      if (likely(!vmx->exit_reason.failed_vmentry))
 +              vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
  
        if ((u16)vmx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI &&
            is_nmi(vmx_get_intr_info(vcpu))) {
                kvm_after_interrupt(vcpu);
        }
  
 +out:
        guest_state_exit_irqoff();
  }
  
@@@ -7382,6 -7356,8 +7388,6 @@@ static fastpath_t vmx_vcpu_run(struct k
        loadsegment(es, __USER_DS);
  #endif
  
 -      vcpu->arch.regs_avail &= ~VMX_REGS_LAZY_LOAD_SET;
 -
        pt_guest_exit(vmx);
  
        kvm_load_host_xsave_state(vcpu);
                vmx->nested.nested_run_pending = 0;
        }
  
 -      vmx->idt_vectoring_info = 0;
 -
        if (unlikely(vmx->fail))
                return EXIT_FASTPATH_NONE;
  
        if (unlikely((u16)vmx->exit_reason.basic == EXIT_REASON_MCE_DURING_VMENTRY))
                kvm_machine_check();
  
 -      if (likely(!vmx->exit_reason.failed_vmentry))
 -              vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
 -
        trace_kvm_exit(vcpu, KVM_ISA_VMX);
  
        if (unlikely(vmx->exit_reason.failed_vmentry))
@@@ -7747,8 -7728,16 +7753,16 @@@ static void vmx_vcpu_after_set_cpuid(st
  {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
  
-       /* xsaves_enabled is recomputed in vmx_compute_secondary_exec_control(). */
-       vcpu->arch.xsaves_enabled = false;
+       /*
+        * XSAVES is effectively enabled if and only if XSAVE is also exposed
+        * to the guest.  XSAVES depends on CR4.OSXSAVE, and CR4.OSXSAVE can be
+        * set if and only if XSAVE is supported.
+        */
+       if (boot_cpu_has(X86_FEATURE_XSAVE) &&
+           guest_cpuid_has(vcpu, X86_FEATURE_XSAVE))
+               kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_XSAVES);
+       kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_VMX);
  
        vmx_setup_uret_msrs(vmx);
  
                vmcs_set_secondary_exec_control(vmx,
                                                vmx_secondary_exec_control(vmx));
  
-       if (nested_vmx_allowed(vcpu))
+       if (guest_can_use(vcpu, X86_FEATURE_VMX))
                vmx->msr_ia32_feature_control_valid_bits |=
                        FEAT_CTL_VMX_ENABLED_INSIDE_SMX |
                        FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX;
                        ~(FEAT_CTL_VMX_ENABLED_INSIDE_SMX |
                          FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX);
  
-       if (nested_vmx_allowed(vcpu))
+       if (guest_can_use(vcpu, X86_FEATURE_VMX))
                nested_vmx_cr_fixed1_bits_update(vcpu);
  
        if (boot_cpu_has(X86_FEATURE_INTEL_PT) &&
@@@ -8228,7 -8217,6 +8242,7 @@@ static struct kvm_x86_ops vmx_x86_ops _
        .set_segment = vmx_set_segment,
        .get_cpl = vmx_get_cpl,
        .get_cs_db_l_bits = vmx_get_cs_db_l_bits,
 +      .is_valid_cr0 = vmx_is_valid_cr0,
        .set_cr0 = vmx_set_cr0,
        .is_valid_cr4 = vmx_is_valid_cr4,
        .set_cr4 = vmx_set_cr4,
@@@ -8522,7 -8510,7 +8536,7 @@@ static __init int hardware_setup(void
         */
        vmx_setup_me_spte_mask();
  
 -      kvm_configure_mmu(enable_ept, 0, vmx_get_max_tdp_level(),
 +      kvm_configure_mmu(enable_ept, 0, vmx_get_max_ept_level(),
                          ept_caps_to_lpage_level(vmx_capability.ept));
  
        /*
@@@ -8618,10 -8606,8 +8632,8 @@@ static void __vmx_exit(void
  {
        allow_smaller_maxphyaddr = false;
  
- #ifdef CONFIG_KEXEC_CORE
-       RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL);
-       synchronize_rcu();
- #endif
+       cpu_emergency_unregister_virt_callback(vmx_emergency_disable);
        vmx_cleanup_l1d_flush();
  }
  
@@@ -8662,18 -8648,14 +8674,14 @@@ static int __init vmx_init(void
        if (r)
                goto err_l1d_flush;
  
-       vmx_setup_fb_clear_ctrl();
        for_each_possible_cpu(cpu) {
                INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
  
                pi_init_cpu(cpu);
        }
  
- #ifdef CONFIG_KEXEC_CORE
-       rcu_assign_pointer(crash_vmclear_loaded_vmcss,
-                          crash_vmclear_local_loaded_vmcss);
- #endif
+       cpu_emergency_register_virt_callback(vmx_emergency_disable);
        vmx_check_vmcs12_offsets();
  
        /*
diff --combined arch/x86/kvm/x86.c
index 94fa36ee073c2434159d91c40aef9a9a5b9c7ba9,7849ea0b0bf7ba9dbb5b1a784c6e60ec2845e914..0b38a046690eb6bfe21db7fddd8ba76322aec843
@@@ -237,6 -237,9 +237,9 @@@ EXPORT_SYMBOL_GPL(enable_apicv)
  u64 __read_mostly host_xss;
  EXPORT_SYMBOL_GPL(host_xss);
  
+ u64 __read_mostly host_arch_capabilities;
+ EXPORT_SYMBOL_GPL(host_arch_capabilities);
  const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
        KVM_GENERIC_VM_STATS(),
        STATS_DESC_COUNTER(VM, mmu_shadow_zapped),
@@@ -906,22 -909,6 +909,22 @@@ int load_pdptrs(struct kvm_vcpu *vcpu, 
  }
  EXPORT_SYMBOL_GPL(load_pdptrs);
  
 +static bool kvm_is_valid_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 +{
 +#ifdef CONFIG_X86_64
 +      if (cr0 & 0xffffffff00000000UL)
 +              return false;
 +#endif
 +
 +      if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD))
 +              return false;
 +
 +      if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE))
 +              return false;
 +
 +      return static_call(kvm_x86_is_valid_cr0)(vcpu, cr0);
 +}
 +
  void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned long cr0)
  {
        /*
@@@ -968,13 -955,20 +971,13 @@@ int kvm_set_cr0(struct kvm_vcpu *vcpu, 
  {
        unsigned long old_cr0 = kvm_read_cr0(vcpu);
  
 -      cr0 |= X86_CR0_ET;
 -
 -#ifdef CONFIG_X86_64
 -      if (cr0 & 0xffffffff00000000UL)
 +      if (!kvm_is_valid_cr0(vcpu, cr0))
                return 1;
 -#endif
 -
 -      cr0 &= ~CR0_RESERVED_BITS;
  
 -      if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD))
 -              return 1;
 +      cr0 |= X86_CR0_ET;
  
 -      if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE))
 -              return 1;
 +      /* Write to CR0 reserved bits are ignored, even on Intel. */
 +      cr0 &= ~CR0_RESERVED_BITS;
  
  #ifdef CONFIG_X86_64
        if ((vcpu->arch.efer & EFER_LME) && !is_paging(vcpu) &&
@@@ -1021,7 -1015,7 +1024,7 @@@ void kvm_load_guest_xsave_state(struct 
                if (vcpu->arch.xcr0 != host_xcr0)
                        xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
  
-               if (vcpu->arch.xsaves_enabled &&
+               if (guest_can_use(vcpu, X86_FEATURE_XSAVES) &&
                    vcpu->arch.ia32_xss != host_xss)
                        wrmsrl(MSR_IA32_XSS, vcpu->arch.ia32_xss);
        }
@@@ -1052,7 -1046,7 +1055,7 @@@ void kvm_load_host_xsave_state(struct k
                if (vcpu->arch.xcr0 != host_xcr0)
                        xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0);
  
-               if (vcpu->arch.xsaves_enabled &&
+               if (guest_can_use(vcpu, X86_FEATURE_XSAVES) &&
                    vcpu->arch.ia32_xss != host_xss)
                        wrmsrl(MSR_IA32_XSS, host_xss);
        }
@@@ -1616,16 -1610,11 +1619,11 @@@ static bool kvm_is_immutable_feature_ms
         ARCH_CAP_SKIP_VMENTRY_L1DFLUSH | ARCH_CAP_SSB_NO | ARCH_CAP_MDS_NO | \
         ARCH_CAP_PSCHANGE_MC_NO | ARCH_CAP_TSX_CTRL_MSR | ARCH_CAP_TAA_NO | \
         ARCH_CAP_SBDR_SSDP_NO | ARCH_CAP_FBSDP_NO | ARCH_CAP_PSDP_NO | \
 -       ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO)
 +       ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO | ARCH_CAP_GDS_NO)
  
  static u64 kvm_get_arch_capabilities(void)
  {
-       u64 data = 0;
-       if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) {
-               rdmsrl(MSR_IA32_ARCH_CAPABILITIES, data);
-               data &= KVM_SUPPORTED_ARCH_CAP;
-       }
+       u64 data = host_arch_capabilities & KVM_SUPPORTED_ARCH_CAP;
  
        /*
         * If nx_huge_pages is enabled, KVM's shadow paging will ensure that
                 */
        }
  
 +      if (!boot_cpu_has_bug(X86_BUG_GDS) || gds_ucode_mitigated())
 +              data |= ARCH_CAP_GDS_NO;
 +
        return data;
  }
  
@@@ -2184,8 -2170,6 +2182,8 @@@ fastpath_t handle_fastpath_set_msr_irqo
        u64 data;
        fastpath_t ret = EXIT_FASTPATH_NONE;
  
 +      kvm_vcpu_srcu_read_lock(vcpu);
 +
        switch (msr) {
        case APIC_BASE_MSR + (APIC_ICR >> 4):
                data = kvm_read_edx_eax(vcpu);
        if (ret != EXIT_FASTPATH_NONE)
                trace_kvm_msr_write(msr, data);
  
 +      kvm_vcpu_srcu_read_unlock(vcpu);
 +
        return ret;
  }
  EXPORT_SYMBOL_GPL(handle_fastpath_set_msr_irqoff);
@@@ -2631,7 -2613,7 +2629,7 @@@ static void kvm_vcpu_write_tsc_offset(s
        else
                vcpu->arch.tsc_offset = l1_offset;
  
-       static_call(kvm_x86_write_tsc_offset)(vcpu, vcpu->arch.tsc_offset);
+       static_call(kvm_x86_write_tsc_offset)(vcpu);
  }
  
  static void kvm_vcpu_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 l1_multiplier)
                vcpu->arch.tsc_scaling_ratio = l1_multiplier;
  
        if (kvm_caps.has_tsc_control)
-               static_call(kvm_x86_write_tsc_multiplier)(
-                       vcpu, vcpu->arch.tsc_scaling_ratio);
+               static_call(kvm_x86_write_tsc_multiplier)(vcpu);
  }
  
  static inline bool kvm_check_tsc_unstable(void)
@@@ -4665,7 -4646,6 +4662,6 @@@ static int kvm_x86_dev_get_attr(struct 
                return 0;
        default:
                return -ENXIO;
-               break;
        }
  }
  
@@@ -6532,7 -6512,7 +6528,7 @@@ static void kvm_free_msr_filter(struct 
  static int kvm_add_msr_filter(struct kvm_x86_msr_filter *msr_filter,
                              struct kvm_msr_filter_range *user_range)
  {
-       unsigned long *bitmap = NULL;
+       unsigned long *bitmap;
        size_t bitmap_size;
  
        if (!user_range->nmsrs)
@@@ -8245,11 -8225,6 +8241,6 @@@ static bool emulator_get_cpuid(struct x
        return kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx, exact_only);
  }
  
- static bool emulator_guest_has_long_mode(struct x86_emulate_ctxt *ctxt)
- {
-       return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_LM);
- }
  static bool emulator_guest_has_movbe(struct x86_emulate_ctxt *ctxt)
  {
        return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_MOVBE);
@@@ -8351,7 -8326,6 +8342,6 @@@ static const struct x86_emulate_ops emu
        .fix_hypercall       = emulator_fix_hypercall,
        .intercept           = emulator_intercept,
        .get_cpuid           = emulator_get_cpuid,
-       .guest_has_long_mode = emulator_guest_has_long_mode,
        .guest_has_movbe     = emulator_guest_has_movbe,
        .guest_has_fxsr      = emulator_guest_has_fxsr,
        .guest_has_rdpid     = emulator_guest_has_rdpid,
@@@ -9172,7 -9146,7 +9162,7 @@@ static int kvmclock_cpu_down_prep(unsig
  static void tsc_khz_changed(void *data)
  {
        struct cpufreq_freqs *freq = data;
-       unsigned long khz = 0;
+       unsigned long khz;
  
        WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_CONSTANT_TSC));
  
@@@ -9512,6 -9486,9 +9502,9 @@@ static int __kvm_x86_vendor_init(struc
  
        kvm_init_pmu_capability(ops->pmu_ops);
  
+       if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
+               rdmsrl(MSR_IA32_ARCH_CAPABILITIES, host_arch_capabilities);
        r = ops->hardware_setup();
        if (r != 0)
                goto out_mmu_exit;
@@@ -10219,13 -10196,9 +10212,13 @@@ static int kvm_check_and_inject_events(
                if (r < 0)
                        goto out;
                if (r) {
 -                      kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), false);
 -                      static_call(kvm_x86_inject_irq)(vcpu, false);
 -                      WARN_ON(static_call(kvm_x86_interrupt_allowed)(vcpu, true) < 0);
 +                      int irq = kvm_cpu_get_interrupt(vcpu);
 +
 +                      if (!WARN_ON_ONCE(irq == -1)) {
 +                              kvm_queue_interrupt(vcpu, irq, false);
 +                              static_call(kvm_x86_inject_irq)(vcpu, false);
 +                              WARN_ON(static_call(kvm_x86_interrupt_allowed)(vcpu, true) < 0);
 +                      }
                }
                if (kvm_cpu_has_injectable_intr(vcpu))
                        static_call(kvm_x86_enable_irq_window)(vcpu);
@@@ -11111,12 -11084,17 +11104,17 @@@ int kvm_arch_vcpu_ioctl_run(struct kvm_
                        r = -EINTR;
                        goto out;
                }
                /*
-                * It should be impossible for the hypervisor timer to be in
-                * use before KVM has ever run the vCPU.
+                * Don't bother switching APIC timer emulation from the
+                * hypervisor timer to the software timer, the only way for the
+                * APIC timer to be active is if userspace stuffed vCPU state,
+                * i.e. put the vCPU into a nonsensical state.  Only an INIT
+                * will transition the vCPU out of UNINITIALIZED (without more
+                * state stuffing from userspace), which will reset the local
+                * APIC and thus cancel the timer or drop the IRQ (if the timer
+                * already expired).
                 */
-               WARN_ON_ONCE(kvm_lapic_hv_timer_in_use(vcpu));
                kvm_vcpu_srcu_read_unlock(vcpu);
                kvm_vcpu_block(vcpu);
                kvm_vcpu_srcu_read_lock(vcpu);
@@@ -11480,8 -11458,7 +11478,8 @@@ static bool kvm_is_valid_sregs(struct k
                        return false;
        }
  
 -      return kvm_is_valid_cr4(vcpu, sregs->cr4);
 +      return kvm_is_valid_cr4(vcpu, sregs->cr4) &&
 +             kvm_is_valid_cr0(vcpu, sregs->cr0);
  }
  
  static int __set_sregs_common(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs,
@@@ -11798,22 -11775,15 +11796,22 @@@ static int sync_regs(struct kvm_vcpu *v
                __set_regs(vcpu, &vcpu->run->s.regs.regs);
                vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_REGS;
        }
 +
        if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_SREGS) {
 -              if (__set_sregs(vcpu, &vcpu->run->s.regs.sregs))
 +              struct kvm_sregs sregs = vcpu->run->s.regs.sregs;
 +
 +              if (__set_sregs(vcpu, &sregs))
                        return -EINVAL;
 +
                vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_SREGS;
        }
 +
        if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_EVENTS) {
 -              if (kvm_vcpu_ioctl_x86_set_vcpu_events(
 -                              vcpu, &vcpu->run->s.regs.events))
 +              struct kvm_vcpu_events events = vcpu->run->s.regs.events;
 +
 +              if (kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events))
                        return -EINVAL;
 +
                vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_EVENTS;
        }
  
@@@ -12779,7 -12749,7 +12777,7 @@@ static void kvm_mmu_slot_apply_flags(st
                 * See is_writable_pte() for more details (the case involving
                 * access-tracked SPTEs is particularly relevant).
                 */
 -              kvm_arch_flush_remote_tlbs_memslot(kvm, new);
 +              kvm_flush_remote_tlbs_memslot(kvm, new);
        }
  }
  
@@@ -13213,7 -13183,7 +13211,7 @@@ EXPORT_SYMBOL_GPL(kvm_arch_has_noncoher
  
  bool kvm_arch_has_irq_bypass(void)
  {
 -      return true;
 +      return enable_apicv && irq_remapping_cap(IRQ_POSTING_CAP);
  }
  
  int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
This page took 0.173392 seconds and 4 git commands to generate.