]> Git Repo - linux.git/commitdiff
Merge branch 'kvm-lapic-fix-and-cleanup' into HEAD
authorPaolo Bonzini <[email protected]>
Tue, 27 Dec 2022 12:56:16 +0000 (07:56 -0500)
committerPaolo Bonzini <[email protected]>
Tue, 24 Jan 2023 11:08:01 +0000 (06:08 -0500)
The first half or so patches fix semi-urgent, real-world relevant APICv
and AVIC bugs.

The second half fixes a variety of AVIC and optimized APIC map bugs
where KVM doesn't play nice with various edge cases that are
architecturally legal(ish), but are unlikely to occur in most real world
scenarios

Signed-off-by: Paolo Bonzini <[email protected]>
1  2 
arch/x86/include/asm/kvm-x86-ops.h
arch/x86/include/asm/kvm_host.h
arch/x86/kvm/lapic.c
arch/x86/kvm/svm/avic.c
arch/x86/kvm/svm/nested.c
arch/x86/kvm/svm/svm.c
arch/x86/kvm/vmx/vmx.c
arch/x86/kvm/x86.c

index dba2909e5ae2a12658f94e54398da251b9bbf439,84f43caef9b7eaa73b178f009389ed6dbac49e87..8dc345cc63188b4f338317dc18752b1bfd0954e4
@@@ -14,7 -14,6 +14,7 @@@ BUILD_BUG_ON(1
   * to make a definition optional, but in this case the default will
   * be __static_call_return0.
   */
 +KVM_X86_OP(check_processor_compatibility)
  KVM_X86_OP(hardware_enable)
  KVM_X86_OP(hardware_disable)
  KVM_X86_OP(hardware_unsetup)
@@@ -77,7 -76,6 +77,6 @@@ KVM_X86_OP(set_nmi_mask
  KVM_X86_OP(enable_nmi_window)
  KVM_X86_OP(enable_irq_window)
  KVM_X86_OP_OPTIONAL(update_cr8_intercept)
- KVM_X86_OP(check_apicv_inhibit_reasons)
  KVM_X86_OP(refresh_apicv_exec_ctrl)
  KVM_X86_OP_OPTIONAL(hwapic_irr_update)
  KVM_X86_OP_OPTIONAL(hwapic_isr_update)
index 8d0a0a7c34fc399286569acb94c1d8d9e9fb4524,7ca854714ccda12af7767aed5d93a55f8e271117..4d2bc08794e45c6ef11cd551ebd9a52f2e8dfc5b
@@@ -1022,19 -1022,30 +1022,30 @@@ struct kvm_arch_memory_slot 
  };
  
  /*
-  * We use as the mode the number of bits allocated in the LDR for the
-  * logical processor ID.  It happens that these are all powers of two.
-  * This makes it is very easy to detect cases where the APICs are
-  * configured for multiple modes; in that case, we cannot use the map and
-  * hence cannot use kvm_irq_delivery_to_apic_fast either.
+  * Track the mode of the optimized logical map, as the rules for decoding the
+  * destination vary per mode.  Enabling the optimized logical map requires all
+  * software-enabled local APIs to be in the same mode, each addressable APIC to
+  * be mapped to only one MDA, and each MDA to map to at most one APIC.
   */
- #define KVM_APIC_MODE_XAPIC_CLUSTER          4
- #define KVM_APIC_MODE_XAPIC_FLAT             8
- #define KVM_APIC_MODE_X2APIC                16
+ enum kvm_apic_logical_mode {
+       /* All local APICs are software disabled. */
+       KVM_APIC_MODE_SW_DISABLED,
+       /* All software enabled local APICs in xAPIC cluster addressing mode. */
+       KVM_APIC_MODE_XAPIC_CLUSTER,
+       /* All software enabled local APICs in xAPIC flat addressing mode. */
+       KVM_APIC_MODE_XAPIC_FLAT,
+       /* All software enabled local APICs in x2APIC mode. */
+       KVM_APIC_MODE_X2APIC,
+       /*
+        * Optimized map disabled, e.g. not all local APICs in the same logical
+        * mode, same logical ID assigned to multiple APICs, etc.
+        */
+       KVM_APIC_MODE_MAP_DISABLED,
+ };
  
  struct kvm_apic_map {
        struct rcu_head rcu;
-       u8 mode;
+       enum kvm_apic_logical_mode logical_mode;
        u32 max_apic_id;
        union {
                struct kvm_lapic *xapic_flat_map[8];
@@@ -1112,7 -1123,6 +1123,7 @@@ struct msr_bitmap_range 
  
  /* Xen emulation context */
  struct kvm_xen {
 +      struct mutex xen_lock;
        u32 xen_version;
        bool long_mode;
        bool runstate_update_flag;
@@@ -1164,6 -1174,12 +1175,12 @@@ enum kvm_apicv_inhibit 
         */
        APICV_INHIBIT_REASON_BLOCKIRQ,
  
+       /*
+        * APICv is disabled because not all vCPUs have a 1:1 mapping between
+        * APIC ID and vCPU, _and_ KVM is not applying its x2APIC hotplug hack.
+        */
+       APICV_INHIBIT_REASON_PHYSICAL_ID_ALIASED,
        /*
         * For simplicity, the APIC acceleration is inhibited
         * first time either APIC ID or APIC base are changed by the guest
         * AVIC is disabled because SEV doesn't support it.
         */
        APICV_INHIBIT_REASON_SEV,
+       /*
+        * AVIC is disabled because not all vCPUs with a valid LDR have a 1:1
+        * mapping between logical ID and vCPU.
+        */
+       APICV_INHIBIT_REASON_LOGICAL_ID_ALIASED,
  };
  
  struct kvm_arch {
        struct kvm_apic_map __rcu *apic_map;
        atomic_t apic_map_dirty;
  
-       /* Protects apic_access_memslot_enabled and apicv_inhibit_reasons */
-       struct rw_semaphore apicv_update_lock;
        bool apic_access_memslot_enabled;
+       bool apic_access_memslot_inhibited;
+       /* Protects apicv_inhibit_reasons */
+       struct rw_semaphore apicv_update_lock;
        unsigned long apicv_inhibit_reasons;
  
        gpa_t wall_clock;
@@@ -1512,8 -1535,6 +1536,8 @@@ static inline u16 kvm_lapic_irq_dest_mo
  struct kvm_x86_ops {
        const char *name;
  
 +      int (*check_processor_compatibility)(void);
 +
        int (*hardware_enable)(void);
        void (*hardware_disable)(void);
        void (*hardware_unsetup)(void);
        void (*enable_irq_window)(struct kvm_vcpu *vcpu);
        void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
        bool (*check_apicv_inhibit_reasons)(enum kvm_apicv_inhibit reason);
+       const unsigned long required_apicv_inhibits;
+       bool allow_apicv_in_x2apic_without_x2apic_virtualization;
        void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu);
        void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);
        void (*hwapic_isr_update)(int isr);
@@@ -1725,6 -1748,9 +1751,6 @@@ struct kvm_x86_nested_ops 
  };
  
  struct kvm_x86_init_ops {
 -      int (*cpu_has_kvm_support)(void);
 -      int (*disabled_by_bios)(void);
 -      int (*check_processor_compatibility)(void);
        int (*hardware_setup)(void);
        unsigned int (*handle_intel_pt_intr)(void);
  
@@@ -1751,9 -1777,6 +1777,9 @@@ extern struct kvm_x86_ops kvm_x86_ops
  #define KVM_X86_OP_OPTIONAL_RET0 KVM_X86_OP
  #include <asm/kvm-x86-ops.h>
  
 +int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops);
 +void kvm_x86_vendor_exit(void);
 +
  #define __KVM_HAVE_ARCH_VM_ALLOC
  static inline struct kvm *kvm_arch_alloc_vm(void)
  {
@@@ -1976,7 -1999,7 +2002,7 @@@ gpa_t kvm_mmu_gva_to_gpa_system(struct 
  
  bool kvm_apicv_activated(struct kvm *kvm);
  bool kvm_vcpu_apicv_activated(struct kvm_vcpu *vcpu);
- void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu);
+ void __kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu);
  void __kvm_set_or_clear_apicv_inhibit(struct kvm *kvm,
                                      enum kvm_apicv_inhibit reason, bool set);
  void kvm_set_or_clear_apicv_inhibit(struct kvm *kvm,
diff --combined arch/x86/kvm/lapic.c
index cfaf1d8c64ca902330fbccf3ed318c5f646960d6,669ea125b7e229f5454e3f087ab5ec11ec81aa1b..7cf4eebc9bcc8ec3c06f029b8463b388f7c58cfc
@@@ -15,7 -15,6 +15,7 @@@
   *
   * Based on Xen 3.1 code, Copyright (c) 2004, Intel Corporation.
   */
 +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  
  #include <linux/kvm_host.h>
  #include <linux/kvm.h>
@@@ -167,9 -166,19 +167,19 @@@ static bool kvm_use_posted_timer_interr
        return kvm_can_post_timer_interrupt(vcpu) && vcpu->mode == IN_GUEST_MODE;
  }
  
+ static inline u32 kvm_apic_calc_x2apic_ldr(u32 id)
+ {
+       return ((id >> 4) << 16) | (1 << (id & 0xf));
+ }
  static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map,
                u32 dest_id, struct kvm_lapic ***cluster, u16 *mask) {
-       switch (map->mode) {
+       switch (map->logical_mode) {
+       case KVM_APIC_MODE_SW_DISABLED:
+               /* Arbitrarily use the flat map so that @cluster isn't NULL. */
+               *cluster = map->xapic_flat_map;
+               *mask = 0;
+               return true;
        case KVM_APIC_MODE_X2APIC: {
                u32 offset = (dest_id >> 16) * 16;
                u32 max_apic_id = map->max_apic_id;
                *cluster = map->xapic_cluster_map[(dest_id >> 4) & 0xf];
                *mask = dest_id & 0xf;
                return true;
+       case KVM_APIC_MODE_MAP_DISABLED:
+               return false;
        default:
-               /* Not optimized. */
+               WARN_ON_ONCE(1);
                return false;
        }
  }
@@@ -207,6 -218,134 +219,134 @@@ static void kvm_apic_map_free(struct rc
        kvfree(map);
  }
  
+ static int kvm_recalculate_phys_map(struct kvm_apic_map *new,
+                                   struct kvm_vcpu *vcpu,
+                                   bool *xapic_id_mismatch)
+ {
+       struct kvm_lapic *apic = vcpu->arch.apic;
+       u32 x2apic_id = kvm_x2apic_id(apic);
+       u32 xapic_id = kvm_xapic_id(apic);
+       u32 physical_id;
+       /*
+        * Deliberately truncate the vCPU ID when detecting a mismatched APIC
+        * ID to avoid false positives if the vCPU ID, i.e. x2APIC ID, is a
+        * 32-bit value.  Any unwanted aliasing due to truncation results will
+        * be detected below.
+        */
+       if (!apic_x2apic_mode(apic) && xapic_id != (u8)vcpu->vcpu_id)
+               *xapic_id_mismatch = true;
+       /*
+        * Apply KVM's hotplug hack if userspace has enable 32-bit APIC IDs.
+        * Allow sending events to vCPUs by their x2APIC ID even if the target
+        * vCPU is in legacy xAPIC mode, and silently ignore aliased xAPIC IDs
+        * (the x2APIC ID is truncated to 8 bits, causing IDs > 0xff to wrap
+        * and collide).
+        *
+        * Honor the architectural (and KVM's non-optimized) behavior if
+        * userspace has not enabled 32-bit x2APIC IDs.  Each APIC is supposed
+        * to process messages independently.  If multiple vCPUs have the same
+        * effective APIC ID, e.g. due to the x2APIC wrap or because the guest
+        * manually modified its xAPIC IDs, events targeting that ID are
+        * supposed to be recognized by all vCPUs with said ID.
+        */
+       if (vcpu->kvm->arch.x2apic_format) {
+               /* See also kvm_apic_match_physical_addr(). */
+               if ((apic_x2apic_mode(apic) || x2apic_id > 0xff) &&
+                       x2apic_id <= new->max_apic_id)
+                       new->phys_map[x2apic_id] = apic;
+               if (!apic_x2apic_mode(apic) && !new->phys_map[xapic_id])
+                       new->phys_map[xapic_id] = apic;
+       } else {
+               /*
+                * Disable the optimized map if the physical APIC ID is already
+                * mapped, i.e. is aliased to multiple vCPUs.  The optimized
+                * map requires a strict 1:1 mapping between IDs and vCPUs.
+                */
+               if (apic_x2apic_mode(apic))
+                       physical_id = x2apic_id;
+               else
+                       physical_id = xapic_id;
+               if (new->phys_map[physical_id])
+                       return -EINVAL;
+               new->phys_map[physical_id] = apic;
+       }
+       return 0;
+ }
+ static void kvm_recalculate_logical_map(struct kvm_apic_map *new,
+                                       struct kvm_vcpu *vcpu)
+ {
+       struct kvm_lapic *apic = vcpu->arch.apic;
+       enum kvm_apic_logical_mode logical_mode;
+       struct kvm_lapic **cluster;
+       u16 mask;
+       u32 ldr;
+       if (new->logical_mode == KVM_APIC_MODE_MAP_DISABLED)
+               return;
+       if (!kvm_apic_sw_enabled(apic))
+               return;
+       ldr = kvm_lapic_get_reg(apic, APIC_LDR);
+       if (!ldr)
+               return;
+       if (apic_x2apic_mode(apic)) {
+               logical_mode = KVM_APIC_MODE_X2APIC;
+       } else {
+               ldr = GET_APIC_LOGICAL_ID(ldr);
+               if (kvm_lapic_get_reg(apic, APIC_DFR) == APIC_DFR_FLAT)
+                       logical_mode = KVM_APIC_MODE_XAPIC_FLAT;
+               else
+                       logical_mode = KVM_APIC_MODE_XAPIC_CLUSTER;
+       }
+       /*
+        * To optimize logical mode delivery, all software-enabled APICs must
+        * be configured for the same mode.
+        */
+       if (new->logical_mode == KVM_APIC_MODE_SW_DISABLED) {
+               new->logical_mode = logical_mode;
+       } else if (new->logical_mode != logical_mode) {
+               new->logical_mode = KVM_APIC_MODE_MAP_DISABLED;
+               return;
+       }
+       /*
+        * In x2APIC mode, the LDR is read-only and derived directly from the
+        * x2APIC ID, thus is guaranteed to be addressable.  KVM reuses
+        * kvm_apic_map.phys_map to optimize logical mode x2APIC interrupts by
+        * reversing the LDR calculation to get cluster of APICs, i.e. no
+        * additional work is required.
+        */
+       if (apic_x2apic_mode(apic)) {
+               WARN_ON_ONCE(ldr != kvm_apic_calc_x2apic_ldr(kvm_x2apic_id(apic)));
+               return;
+       }
+       if (WARN_ON_ONCE(!kvm_apic_map_get_logical_dest(new, ldr,
+                                                       &cluster, &mask))) {
+               new->logical_mode = KVM_APIC_MODE_MAP_DISABLED;
+               return;
+       }
+       if (!mask)
+               return;
+       ldr = ffs(mask) - 1;
+       if (!is_power_of_2(mask) || cluster[ldr])
+               new->logical_mode = KVM_APIC_MODE_MAP_DISABLED;
+       else
+               cluster[ldr] = apic;
+ }
  /*
   * CLEAN -> DIRTY and UPDATE_IN_PROGRESS -> DIRTY changes happen without a lock.
   *
@@@ -225,6 -364,7 +365,7 @@@ void kvm_recalculate_apic_map(struct kv
        struct kvm_vcpu *vcpu;
        unsigned long i;
        u32 max_id = 255; /* enough space for any xAPIC ID */
+       bool xapic_id_mismatch = false;
  
        /* Read kvm->arch.apic_map_dirty before kvm->arch.apic_map.  */
        if (atomic_read_acquire(&kvm->arch.apic_map_dirty) == CLEAN)
                goto out;
  
        new->max_apic_id = max_id;
+       new->logical_mode = KVM_APIC_MODE_SW_DISABLED;
  
        kvm_for_each_vcpu(i, vcpu, kvm) {
-               struct kvm_lapic *apic = vcpu->arch.apic;
-               struct kvm_lapic **cluster;
-               u16 mask;
-               u32 ldr;
-               u8 xapic_id;
-               u32 x2apic_id;
                if (!kvm_apic_present(vcpu))
                        continue;
  
-               xapic_id = kvm_xapic_id(apic);
-               x2apic_id = kvm_x2apic_id(apic);
-               /* Hotplug hack: see kvm_apic_match_physical_addr(), ... */
-               if ((apic_x2apic_mode(apic) || x2apic_id > 0xff) &&
-                               x2apic_id <= new->max_apic_id)
-                       new->phys_map[x2apic_id] = apic;
-               /*
-                * ... xAPIC ID of VCPUs with APIC ID > 0xff will wrap-around,
-                * prevent them from masking VCPUs with APIC ID <= 0xff.
-                */
-               if (!apic_x2apic_mode(apic) && !new->phys_map[xapic_id])
-                       new->phys_map[xapic_id] = apic;
-               if (!kvm_apic_sw_enabled(apic))
-                       continue;
-               ldr = kvm_lapic_get_reg(apic, APIC_LDR);
-               if (apic_x2apic_mode(apic)) {
-                       new->mode |= KVM_APIC_MODE_X2APIC;
-               } else if (ldr) {
-                       ldr = GET_APIC_LOGICAL_ID(ldr);
-                       if (kvm_lapic_get_reg(apic, APIC_DFR) == APIC_DFR_FLAT)
-                               new->mode |= KVM_APIC_MODE_XAPIC_FLAT;
-                       else
-                               new->mode |= KVM_APIC_MODE_XAPIC_CLUSTER;
+               if (kvm_recalculate_phys_map(new, vcpu, &xapic_id_mismatch)) {
+                       kvfree(new);
+                       new = NULL;
+                       goto out;
                }
  
-               if (!kvm_apic_map_get_logical_dest(new, ldr, &cluster, &mask))
-                       continue;
-               if (mask)
-                       cluster[ffs(mask) - 1] = apic;
+               kvm_recalculate_logical_map(new, vcpu);
        }
  out:
+       /*
+        * The optimized map is effectively KVM's internal version of APICv,
+        * and all unwanted aliasing that results in disabling the optimized
+        * map also applies to APICv.
+        */
+       if (!new)
+               kvm_set_apicv_inhibit(kvm, APICV_INHIBIT_REASON_PHYSICAL_ID_ALIASED);
+       else
+               kvm_clear_apicv_inhibit(kvm, APICV_INHIBIT_REASON_PHYSICAL_ID_ALIASED);
+       if (!new || new->logical_mode == KVM_APIC_MODE_MAP_DISABLED)
+               kvm_set_apicv_inhibit(kvm, APICV_INHIBIT_REASON_LOGICAL_ID_ALIASED);
+       else
+               kvm_clear_apicv_inhibit(kvm, APICV_INHIBIT_REASON_LOGICAL_ID_ALIASED);
+       if (xapic_id_mismatch)
+               kvm_set_apicv_inhibit(kvm, APICV_INHIBIT_REASON_APIC_ID_MODIFIED);
+       else
+               kvm_clear_apicv_inhibit(kvm, APICV_INHIBIT_REASON_APIC_ID_MODIFIED);
        old = rcu_dereference_protected(kvm->arch.apic_map,
                        lockdep_is_held(&kvm->arch.apic_map_lock));
        rcu_assign_pointer(kvm->arch.apic_map, new);
@@@ -361,11 -488,6 +489,6 @@@ static inline void kvm_apic_set_dfr(str
        atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
  }
  
- static inline u32 kvm_apic_calc_x2apic_ldr(u32 id)
- {
-       return ((id >> 4) << 16) | (1 << (id & 0xf));
- }
  static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u32 id)
  {
        u32 ldr = kvm_apic_calc_x2apic_ldr(id);
@@@ -942,7 -1064,8 +1065,7 @@@ static void kvm_apic_disabled_lapic_fou
  {
        if (!kvm->arch.disabled_lapic_found) {
                kvm->arch.disabled_lapic_found = true;
 -              printk(KERN_INFO
 -                     "Disabled LAPIC found during irq injection\n");
 +              pr_info("Disabled LAPIC found during irq injection\n");
        }
  }
  
@@@ -951,7 -1074,7 +1074,7 @@@ static bool kvm_apic_is_broadcast_dest(
  {
        if (kvm->arch.x2apic_broadcast_quirk_disabled) {
                if ((irq->dest_id == APIC_BROADCAST &&
-                               map->mode != KVM_APIC_MODE_X2APIC))
+                    map->logical_mode != KVM_APIC_MODE_X2APIC))
                        return true;
                if (irq->dest_id == X2APIC_BROADCAST)
                        return true;
@@@ -1560,7 -1683,7 +1683,7 @@@ static void limit_periodic_timer_freque
  
                if (apic->lapic_timer.period < min_period) {
                        pr_info_ratelimited(
 -                          "kvm: vcpu %i: requested %lld ns "
 +                          "vcpu %i: requested %lld ns "
                            "lapic timer period limited to %lld ns\n",
                            apic->vcpu->vcpu_id,
                            apic->lapic_timer.period, min_period);
@@@ -1845,7 -1968,7 +1968,7 @@@ static bool set_target_expiration(struc
                                deadline = apic->lapic_timer.period;
                        else if (unlikely(deadline > apic->lapic_timer.period)) {
                                pr_info_ratelimited(
 -                                  "kvm: vcpu %i: requested lapic timer restore with "
 +                                  "vcpu %i: requested lapic timer restore with "
                                    "starting count register %#x=%u (%lld ns) > initial count (%lld ns). "
                                    "Using initial count to start timer.\n",
                                    apic->vcpu->vcpu_id,
@@@ -2068,19 -2191,6 +2191,6 @@@ static void apic_manage_nmi_watchdog(st
        }
  }
  
- static void kvm_lapic_xapic_id_updated(struct kvm_lapic *apic)
- {
-       struct kvm *kvm = apic->vcpu->kvm;
-       if (KVM_BUG_ON(apic_x2apic_mode(apic), kvm))
-               return;
-       if (kvm_xapic_id(apic) == apic->vcpu->vcpu_id)
-               return;
-       kvm_set_apicv_inhibit(apic->vcpu->kvm, APICV_INHIBIT_REASON_APIC_ID_MODIFIED);
- }
  static int get_lvt_index(u32 reg)
  {
        if (reg == APIC_LVTCMCI)
@@@ -2101,7 -2211,6 +2211,6 @@@ static int kvm_lapic_reg_write(struct k
        case APIC_ID:           /* Local APIC ID */
                if (!apic_x2apic_mode(apic)) {
                        kvm_apic_set_xapic_id(apic, val >> 24);
-                       kvm_lapic_xapic_id_updated(apic);
                } else {
                        ret = 1;
                }
@@@ -2284,23 -2393,18 +2393,18 @@@ void kvm_apic_write_nodecode(struct kvm
        struct kvm_lapic *apic = vcpu->arch.apic;
        u64 val;
  
-       if (apic_x2apic_mode(apic)) {
-               if (KVM_BUG_ON(kvm_lapic_msr_read(apic, offset, &val), vcpu->kvm))
-                       return;
-       } else {
-               val = kvm_lapic_get_reg(apic, offset);
-       }
        /*
         * ICR is a single 64-bit register when x2APIC is enabled.  For legacy
         * xAPIC, ICR writes need to go down the common (slightly slower) path
         * to get the upper half from ICR2.
         */
        if (apic_x2apic_mode(apic) && offset == APIC_ICR) {
+               val = kvm_lapic_get_reg64(apic, APIC_ICR);
                kvm_apic_send_ipi(apic, (u32)val, (u32)(val >> 32));
                trace_kvm_apic_write(APIC_ICR, val);
        } else {
                /* TODO: optimize to just emulate side effect w/o one more write */
+               val = kvm_lapic_get_reg(apic, offset);
                kvm_lapic_reg_write(apic, offset, (u32)val);
        }
  }
@@@ -2398,7 -2502,7 +2502,7 @@@ void kvm_lapic_set_base(struct kvm_vcp
                kvm_apic_set_x2apic_id(apic, vcpu->vcpu_id);
  
        if ((old_value ^ value) & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE)) {
-               kvm_vcpu_update_apicv(vcpu);
+               kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
                static_call_cond(kvm_x86_set_virtual_apic_mode)(vcpu);
        }
  
@@@ -2429,6 -2533,78 +2533,78 @@@ void kvm_apic_update_apicv(struct kvm_v
                 */
                apic->isr_count = count_vectors(apic->regs + APIC_ISR);
        }
+       apic->highest_isr_cache = -1;
+ }
+ int kvm_alloc_apic_access_page(struct kvm *kvm)
+ {
+       struct page *page;
+       void __user *hva;
+       int ret = 0;
+       mutex_lock(&kvm->slots_lock);
+       if (kvm->arch.apic_access_memslot_enabled ||
+           kvm->arch.apic_access_memslot_inhibited)
+               goto out;
+       hva = __x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
+                                     APIC_DEFAULT_PHYS_BASE, PAGE_SIZE);
+       if (IS_ERR(hva)) {
+               ret = PTR_ERR(hva);
+               goto out;
+       }
+       page = gfn_to_page(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
+       if (is_error_page(page)) {
+               ret = -EFAULT;
+               goto out;
+       }
+       /*
+        * Do not pin the page in memory, so that memory hot-unplug
+        * is able to migrate it.
+        */
+       put_page(page);
+       kvm->arch.apic_access_memslot_enabled = true;
+ out:
+       mutex_unlock(&kvm->slots_lock);
+       return ret;
+ }
+ EXPORT_SYMBOL_GPL(kvm_alloc_apic_access_page);
+ void kvm_inhibit_apic_access_page(struct kvm_vcpu *vcpu)
+ {
+       struct kvm *kvm = vcpu->kvm;
+       if (!kvm->arch.apic_access_memslot_enabled)
+               return;
+       kvm_vcpu_srcu_read_unlock(vcpu);
+       mutex_lock(&kvm->slots_lock);
+       if (kvm->arch.apic_access_memslot_enabled) {
+               __x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT, 0, 0);
+               /*
+                * Clear "enabled" after the memslot is deleted so that a
+                * different vCPU doesn't get a false negative when checking
+                * the flag out of slots_lock.  No additional memory barrier is
+                * needed as modifying memslots requires waiting other vCPUs to
+                * drop SRCU (see above), and false positives are ok as the
+                * flag is rechecked after acquiring slots_lock.
+                */
+               kvm->arch.apic_access_memslot_enabled = false;
+               /*
+                * Mark the memslot as inhibited to prevent reallocating the
+                * memslot during vCPU creation, e.g. if a vCPU is hotplugged.
+                */
+               kvm->arch.apic_access_memslot_inhibited = true;
+       }
+       mutex_unlock(&kvm->slots_lock);
+       kvm_vcpu_srcu_read_lock(vcpu);
  }
  
  void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
                kvm_lapic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
        }
        kvm_apic_update_apicv(vcpu);
-       apic->highest_isr_cache = -1;
        update_divide_count(apic);
        atomic_set(&apic->lapic_timer.pending, 0);
  
@@@ -2756,9 -2931,6 +2931,6 @@@ int kvm_apic_set_state(struct kvm_vcpu 
        }
        memcpy(vcpu->arch.apic->regs, s->regs, sizeof(*s));
  
-       if (!apic_x2apic_mode(apic))
-               kvm_lapic_xapic_id_updated(apic);
        atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
        kvm_recalculate_apic_map(vcpu->kvm);
        kvm_apic_set_version(vcpu);
        __start_apic_timer(apic, APIC_TMCCT);
        kvm_lapic_set_reg(apic, APIC_TMCCT, 0);
        kvm_apic_update_apicv(vcpu);
-       apic->highest_isr_cache = -1;
        if (apic->apicv_active) {
                static_call_cond(kvm_x86_apicv_post_state_restore)(vcpu);
                static_call_cond(kvm_x86_hwapic_irr_update)(vcpu, apic_find_highest_irr(apic));
diff --combined arch/x86/kvm/svm/avic.c
index f52f5e0dd4658fc15ad430d9e4a9953f2c8f99d3,14677bc31b83afb7f5b76a62fac2178656e0d14e..b3928150a37ce8d5a1d93bba10fc314c8c5e9e11
@@@ -12,7 -12,7 +12,7 @@@
   *   Avi Kivity   <[email protected]>
   */
  
 -#define pr_fmt(fmt) "SVM: " fmt
 +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  
  #include <linux/kvm_types.h>
  #include <linux/hashtable.h>
@@@ -53,7 -53,7 +53,7 @@@ static DEFINE_HASHTABLE(svm_vm_data_has
  static u32 next_vm_id = 0;
  static bool next_vm_id_wrapped = 0;
  static DEFINE_SPINLOCK(svm_vm_data_hash_lock);
enum avic_modes avic_mode;
bool x2avic_enabled;
  
  /*
   * This is a wrapper of struct amd_iommu_ir_data.
@@@ -72,20 -72,25 +72,25 @@@ static void avic_activate_vmcb(struct v
  
        vmcb->control.int_ctl |= AVIC_ENABLE_MASK;
  
-       /* Note:
-        * KVM can support hybrid-AVIC mode, where KVM emulates x2APIC
-        * MSR accesses, while interrupt injection to a running vCPU
-        * can be achieved using AVIC doorbell. The AVIC hardware still
-        * accelerate MMIO accesses, but this does not cause any harm
-        * as the guest is not supposed to access xAPIC mmio when uses x2APIC.
+       /*
+        * Note: KVM supports hybrid-AVIC mode, where KVM emulates x2APIC MSR
+        * accesses, while interrupt injection to a running vCPU can be
+        * achieved using AVIC doorbell.  KVM disables the APIC access page
+        * (deletes the memslot) if any vCPU has x2APIC enabled, thus enabling
+        * AVIC in hybrid mode activates only the doorbell mechanism.
         */
-       if (apic_x2apic_mode(svm->vcpu.arch.apic) &&
-           avic_mode == AVIC_MODE_X2) {
+       if (x2avic_enabled && apic_x2apic_mode(svm->vcpu.arch.apic)) {
                vmcb->control.int_ctl |= X2APIC_MODE_MASK;
                vmcb->control.avic_physical_id |= X2AVIC_MAX_PHYSICAL_ID;
                /* Disabling MSR intercept for x2APIC registers */
                svm_set_x2apic_msr_interception(svm, false);
        } else {
+               /*
+                * Flush the TLB, the guest may have inserted a non-APIC
+                * mapping into the TLB while AVIC was disabled.
+                */
+               kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, &svm->vcpu);
                /* For xAVIC and hybrid-xAVIC modes */
                vmcb->control.avic_physical_id |= AVIC_MAX_PHYSICAL_ID;
                /* Enabling MSR intercept for x2APIC registers */
@@@ -241,8 -246,8 +246,8 @@@ static u64 *avic_get_physical_id_entry(
        u64 *avic_physical_id_table;
        struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
  
-       if ((avic_mode == AVIC_MODE_X1 && index > AVIC_MAX_PHYSICAL_ID) ||
-           (avic_mode == AVIC_MODE_X2 && index > X2AVIC_MAX_PHYSICAL_ID))
+       if ((!x2avic_enabled && index > AVIC_MAX_PHYSICAL_ID) ||
+           (index > X2AVIC_MAX_PHYSICAL_ID))
                return NULL;
  
        avic_physical_id_table = page_address(kvm_svm->avic_physical_id_table_page);
        return &avic_physical_id_table[index];
  }
  
- /*
-  * Note:
-  * AVIC hardware walks the nested page table to check permissions,
-  * but does not use the SPA address specified in the leaf page
-  * table entry since it uses  address in the AVIC_BACKING_PAGE pointer
-  * field of the VMCB. Therefore, we set up the
-  * APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (4KB) here.
-  */
- static int avic_alloc_access_page(struct kvm *kvm)
- {
-       void __user *ret;
-       int r = 0;
-       mutex_lock(&kvm->slots_lock);
-       if (kvm->arch.apic_access_memslot_enabled)
-               goto out;
-       ret = __x86_set_memory_region(kvm,
-                                     APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
-                                     APIC_DEFAULT_PHYS_BASE,
-                                     PAGE_SIZE);
-       if (IS_ERR(ret)) {
-               r = PTR_ERR(ret);
-               goto out;
-       }
-       kvm->arch.apic_access_memslot_enabled = true;
- out:
-       mutex_unlock(&kvm->slots_lock);
-       return r;
- }
  static int avic_init_backing_page(struct kvm_vcpu *vcpu)
  {
        u64 *entry, new_entry;
        int id = vcpu->vcpu_id;
        struct vcpu_svm *svm = to_svm(vcpu);
  
-       if ((avic_mode == AVIC_MODE_X1 && id > AVIC_MAX_PHYSICAL_ID) ||
-           (avic_mode == AVIC_MODE_X2 && id > X2AVIC_MAX_PHYSICAL_ID))
+       if ((!x2avic_enabled && id > AVIC_MAX_PHYSICAL_ID) ||
+           (id > X2AVIC_MAX_PHYSICAL_ID))
                return -EINVAL;
  
        if (!vcpu->arch.apic->regs)
        if (kvm_apicv_activated(vcpu->kvm)) {
                int ret;
  
-               ret = avic_alloc_access_page(vcpu->kvm);
+               /*
+                * Note, AVIC hardware walks the nested page table to check
+                * permissions, but does not use the SPA address specified in
+                * the leaf SPTE since it uses address in the AVIC_BACKING_PAGE
+                * pointer field of the VMCB.
+                */
+               ret = kvm_alloc_apic_access_page(vcpu->kvm);
                if (ret)
                        return ret;
        }
@@@ -339,6 -317,60 +317,60 @@@ void avic_ring_doorbell(struct kvm_vcp
        put_cpu();
  }
  
+ static void avic_kick_vcpu(struct kvm_vcpu *vcpu, u32 icrl)
+ {
+       vcpu->arch.apic->irr_pending = true;
+       svm_complete_interrupt_delivery(vcpu,
+                                       icrl & APIC_MODE_MASK,
+                                       icrl & APIC_INT_LEVELTRIG,
+                                       icrl & APIC_VECTOR_MASK);
+ }
+ static void avic_kick_vcpu_by_physical_id(struct kvm *kvm, u32 physical_id,
+                                         u32 icrl)
+ {
+       /*
+        * KVM inhibits AVIC if any vCPU ID diverges from the vCPUs APIC ID,
+        * i.e. APIC ID == vCPU ID.
+        */
+       struct kvm_vcpu *target_vcpu = kvm_get_vcpu_by_id(kvm, physical_id);
+       /* Once again, nothing to do if the target vCPU doesn't exist. */
+       if (unlikely(!target_vcpu))
+               return;
+       avic_kick_vcpu(target_vcpu, icrl);
+ }
+ static void avic_kick_vcpu_by_logical_id(struct kvm *kvm, u32 *avic_logical_id_table,
+                                        u32 logid_index, u32 icrl)
+ {
+       u32 physical_id;
+       if (avic_logical_id_table) {
+               u32 logid_entry = avic_logical_id_table[logid_index];
+               /* Nothing to do if the logical destination is invalid. */
+               if (unlikely(!(logid_entry & AVIC_LOGICAL_ID_ENTRY_VALID_MASK)))
+                       return;
+               physical_id = logid_entry &
+                             AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK;
+       } else {
+               /*
+                * For x2APIC, the logical APIC ID is a read-only value that is
+                * derived from the x2APIC ID, thus the x2APIC ID can be found
+                * by reversing the calculation (stored in logid_index).  Note,
+                * bits 31:20 of the x2APIC ID aren't propagated to the logical
+                * ID, but KVM limits the x2APIC ID limited to KVM_MAX_VCPU_IDS.
+                */
+               physical_id = logid_index;
+       }
+       avic_kick_vcpu_by_physical_id(kvm, physical_id, icrl);
+ }
  /*
   * A fast-path version of avic_kick_target_vcpus(), which attempts to match
   * destination APIC ID to vCPU without looping through all vCPUs.
  static int avic_kick_target_vcpus_fast(struct kvm *kvm, struct kvm_lapic *source,
                                       u32 icrl, u32 icrh, u32 index)
  {
-       u32 l1_physical_id, dest;
-       struct kvm_vcpu *target_vcpu;
        int dest_mode = icrl & APIC_DEST_MASK;
        int shorthand = icrl & APIC_SHORT_MASK;
        struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
+       u32 dest;
  
        if (shorthand != APIC_DEST_NOSHORT)
                return -EINVAL;
                if (!apic_x2apic_mode(source) && dest == APIC_BROADCAST)
                        return -EINVAL;
  
-               l1_physical_id = dest;
-               if (WARN_ON_ONCE(l1_physical_id != index))
+               if (WARN_ON_ONCE(dest != index))
                        return -EINVAL;
  
+               avic_kick_vcpu_by_physical_id(kvm, dest, icrl);
        } else {
-               u32 bitmap, cluster;
-               int logid_index;
+               u32 *avic_logical_id_table;
+               unsigned long bitmap, i;
+               u32 cluster;
  
                if (apic_x2apic_mode(source)) {
                        /* 16 bit dest mask, 16 bit cluster id */
-                       bitmap = dest & 0xFFFF0000;
+                       bitmap = dest & 0xFFFF;
                        cluster = (dest >> 16) << 4;
                } else if (kvm_lapic_get_reg(source, APIC_DFR) == APIC_DFR_FLAT) {
                        /* 8 bit dest mask*/
                        cluster = (dest >> 4) << 2;
                }
  
+               /* Nothing to do if there are no destinations in the cluster. */
                if (unlikely(!bitmap))
-                       /* guest bug: nobody to send the logical interrupt to */
                        return 0;
  
-               if (!is_power_of_2(bitmap))
-                       /* multiple logical destinations, use slow path */
-                       return -EINVAL;
-               logid_index = cluster + __ffs(bitmap);
-               if (!apic_x2apic_mode(source)) {
-                       u32 *avic_logical_id_table =
-                               page_address(kvm_svm->avic_logical_id_table_page);
-                       u32 logid_entry = avic_logical_id_table[logid_index];
-                       if (WARN_ON_ONCE(index != logid_index))
-                               return -EINVAL;
-                       /* guest bug: non existing/reserved logical destination */
-                       if (unlikely(!(logid_entry & AVIC_LOGICAL_ID_ENTRY_VALID_MASK)))
-                               return 0;
-                       l1_physical_id = logid_entry &
-                                        AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK;
-               } else {
-                       /*
-                        * For x2APIC logical mode, cannot leverage the index.
-                        * Instead, calculate physical ID from logical ID in ICRH.
-                        */
-                       int cluster = (icrh & 0xffff0000) >> 16;
-                       int apic = ffs(icrh & 0xffff) - 1;
-                       /*
-                        * If the x2APIC logical ID sub-field (i.e. icrh[15:0])
-                        * contains anything but a single bit, we cannot use the
-                        * fast path, because it is limited to a single vCPU.
-                        */
-                       if (apic < 0 || icrh != (1 << apic))
-                               return -EINVAL;
+               if (apic_x2apic_mode(source))
+                       avic_logical_id_table = NULL;
+               else
+                       avic_logical_id_table = page_address(kvm_svm->avic_logical_id_table_page);
  
-                       l1_physical_id = (cluster << 4) + apic;
-               }
+               /*
+                * AVIC is inhibited if vCPUs aren't mapped 1:1 with logical
+                * IDs, thus each bit in the destination is guaranteed to map
+                * to at most one vCPU.
+                */
+               for_each_set_bit(i, &bitmap, 16)
+                       avic_kick_vcpu_by_logical_id(kvm, avic_logical_id_table,
+                                                    cluster + i, icrl);
        }
  
-       target_vcpu = kvm_get_vcpu_by_id(kvm, l1_physical_id);
-       if (unlikely(!target_vcpu))
-               /* guest bug: non existing vCPU is a target of this IPI*/
-               return 0;
-       target_vcpu->arch.apic->irr_pending = true;
-       svm_complete_interrupt_delivery(target_vcpu,
-                                       icrl & APIC_MODE_MASK,
-                                       icrl & APIC_INT_LEVELTRIG,
-                                       icrl & APIC_VECTOR_MASK);
        return 0;
  }
  
  static void avic_kick_target_vcpus(struct kvm *kvm, struct kvm_lapic *source,
                                   u32 icrl, u32 icrh, u32 index)
  {
+       u32 dest = apic_x2apic_mode(source) ? icrh : GET_XAPIC_DEST_FIELD(icrh);
        unsigned long i;
        struct kvm_vcpu *vcpu;
  
         * since entered the guest will have processed pending IRQs at VMRUN.
         */
        kvm_for_each_vcpu(i, vcpu, kvm) {
-               u32 dest;
-               if (apic_x2apic_mode(vcpu->arch.apic))
-                       dest = icrh;
-               else
-                       dest = GET_XAPIC_DEST_FIELD(icrh);
                if (kvm_apic_match_dest(vcpu, source, icrl & APIC_SHORT_MASK,
-                                       dest, icrl & APIC_DEST_MASK)) {
-                       vcpu->arch.apic->irr_pending = true;
-                       svm_complete_interrupt_delivery(vcpu,
-                                                       icrl & APIC_MODE_MASK,
-                                                       icrl & APIC_INT_LEVELTRIG,
-                                                       icrl & APIC_VECTOR_MASK);
-               }
+                                       dest, icrl & APIC_DEST_MASK))
+                       avic_kick_vcpu(vcpu, icrl);
        }
  }
  
@@@ -496,14 -480,18 +480,18 @@@ int avic_incomplete_ipi_interception(st
        trace_kvm_avic_incomplete_ipi(vcpu->vcpu_id, icrh, icrl, id, index);
  
        switch (id) {
+       case AVIC_IPI_FAILURE_INVALID_TARGET:
        case AVIC_IPI_FAILURE_INVALID_INT_TYPE:
                /*
                 * Emulate IPIs that are not handled by AVIC hardware, which
-                * only virtualizes Fixed, Edge-Triggered INTRs.  The exit is
-                * a trap, e.g. ICR holds the correct value and RIP has been
-                * advanced, KVM is responsible only for emulating the IPI.
-                * Sadly, hardware may sometimes leave the BUSY flag set, in
-                * which case KVM needs to emulate the ICR write as well in
+                * only virtualizes Fixed, Edge-Triggered INTRs, and falls over
+                * if _any_ targets are invalid, e.g. if the logical mode mask
+                * is a superset of running vCPUs.
+                *
+                * The exit is a trap, e.g. ICR holds the correct value and RIP
+                * has been advanced, KVM is responsible only for emulating the
+                * IPI.  Sadly, hardware may sometimes leave the BUSY flag set,
+                * in which case KVM needs to emulate the ICR write as well in
                 * order to clear the BUSY flag.
                 */
                if (icrl & APIC_ICR_BUSY)
                 */
                avic_kick_target_vcpus(vcpu->kvm, apic, icrl, icrh, index);
                break;
-       case AVIC_IPI_FAILURE_INVALID_TARGET:
-               break;
        case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE:
                WARN_ONCE(1, "Invalid backing page\n");
                break;
@@@ -541,33 -527,33 +527,33 @@@ unsigned long avic_vcpu_get_apicv_inhib
  static u32 *avic_get_logical_id_entry(struct kvm_vcpu *vcpu, u32 ldr, bool flat)
  {
        struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
-       int index;
        u32 *logical_apic_id_table;
-       int dlid = GET_APIC_LOGICAL_ID(ldr);
-       if (!dlid)
-               return NULL;
+       u32 cluster, index;
  
-       if (flat) { /* flat */
-               index = ffs(dlid) - 1;
-               if (index > 7)
-                       return NULL;
-       } else { /* cluster */
-               int cluster = (dlid & 0xf0) >> 4;
-               int apic = ffs(dlid & 0x0f) - 1;
+       ldr = GET_APIC_LOGICAL_ID(ldr);
  
-               if ((apic < 0) || (apic > 7) ||
-                   (cluster >= 0xf))
+       if (flat) {
+               cluster = 0;
+       } else {
+               cluster = (ldr >> 4);
+               if (cluster >= 0xf)
                        return NULL;
-               index = (cluster << 2) + apic;
+               ldr &= 0xf;
        }
+       if (!ldr || !is_power_of_2(ldr))
+               return NULL;
+       index = __ffs(ldr);
+       if (WARN_ON_ONCE(index > 7))
+               return NULL;
+       index += (cluster << 2);
  
        logical_apic_id_table = (u32 *) page_address(kvm_svm->avic_logical_id_table_page);
  
        return &logical_apic_id_table[index];
  }
  
- static int avic_ldr_write(struct kvm_vcpu *vcpu, u8 g_physical_id, u32 ldr)
+ static void avic_ldr_write(struct kvm_vcpu *vcpu, u8 g_physical_id, u32 ldr)
  {
        bool flat;
        u32 *entry, new_entry;
        flat = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR) == APIC_DFR_FLAT;
        entry = avic_get_logical_id_entry(vcpu, ldr, flat);
        if (!entry)
-               return -EINVAL;
+               return;
  
        new_entry = READ_ONCE(*entry);
        new_entry &= ~AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK;
        new_entry |= (g_physical_id & AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK);
        new_entry |= AVIC_LOGICAL_ID_ENTRY_VALID_MASK;
        WRITE_ONCE(*entry, new_entry);
-       return 0;
  }
  
  static void avic_invalidate_logical_id_entry(struct kvm_vcpu *vcpu)
                clear_bit(AVIC_LOGICAL_ID_ENTRY_VALID_BIT, (unsigned long *)entry);
  }
  
- static int avic_handle_ldr_update(struct kvm_vcpu *vcpu)
+ static void avic_handle_ldr_update(struct kvm_vcpu *vcpu)
  {
-       int ret = 0;
        struct vcpu_svm *svm = to_svm(vcpu);
        u32 ldr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LDR);
        u32 id = kvm_xapic_id(vcpu->arch.apic);
  
        /* AVIC does not support LDR update for x2APIC */
        if (apic_x2apic_mode(vcpu->arch.apic))
-               return 0;
+               return;
  
        if (ldr == svm->ldr_reg)
-               return 0;
+               return;
  
        avic_invalidate_logical_id_entry(vcpu);
  
-       if (ldr)
-               ret = avic_ldr_write(vcpu, id, ldr);
-       if (!ret)
-               svm->ldr_reg = ldr;
-       return ret;
+       svm->ldr_reg = ldr;
+       avic_ldr_write(vcpu, id, ldr);
  }
  
  static void avic_handle_dfr_update(struct kvm_vcpu *vcpu)
@@@ -645,12 -623,14 +623,14 @@@ static int avic_unaccel_trap_write(stru
  
        switch (offset) {
        case APIC_LDR:
-               if (avic_handle_ldr_update(vcpu))
-                       return 0;
+               avic_handle_ldr_update(vcpu);
                break;
        case APIC_DFR:
                avic_handle_dfr_update(vcpu);
                break;
+       case APIC_RRR:
+               /* Ignore writes to Read Remote Data, it's read-only. */
+               return 1;
        default:
                break;
        }
@@@ -739,18 -719,6 +719,6 @@@ void avic_apicv_post_state_restore(stru
        avic_handle_ldr_update(vcpu);
  }
  
- void avic_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
- {
-       if (!lapic_in_kernel(vcpu) || avic_mode == AVIC_MODE_NONE)
-               return;
-       if (kvm_get_apic_mode(vcpu) == LAPIC_MODE_INVALID) {
-               WARN_ONCE(true, "Invalid local APIC state (vcpu_id=%d)", vcpu->vcpu_id);
-               return;
-       }
-       avic_refresh_apicv_exec_ctrl(vcpu);
- }
  static int avic_set_pi_irte_mode(struct kvm_vcpu *vcpu, bool activate)
  {
        int ret = 0;
@@@ -995,23 -963,6 +963,6 @@@ out
        return ret;
  }
  
- bool avic_check_apicv_inhibit_reasons(enum kvm_apicv_inhibit reason)
- {
-       ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE) |
-                         BIT(APICV_INHIBIT_REASON_ABSENT) |
-                         BIT(APICV_INHIBIT_REASON_HYPERV) |
-                         BIT(APICV_INHIBIT_REASON_NESTED) |
-                         BIT(APICV_INHIBIT_REASON_IRQWIN) |
-                         BIT(APICV_INHIBIT_REASON_PIT_REINJ) |
-                         BIT(APICV_INHIBIT_REASON_BLOCKIRQ) |
-                         BIT(APICV_INHIBIT_REASON_SEV)      |
-                         BIT(APICV_INHIBIT_REASON_APIC_ID_MODIFIED) |
-                         BIT(APICV_INHIBIT_REASON_APIC_BASE_MODIFIED);
-       return supported & BIT(reason);
- }
  static inline int
  avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
  {
@@@ -1064,6 -1015,7 +1015,7 @@@ void avic_vcpu_load(struct kvm_vcpu *vc
                return;
  
        entry = READ_ONCE(*(svm->avic_physical_id_cache));
+       WARN_ON_ONCE(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK);
  
        entry &= ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK;
        entry |= (h_physical_id & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK);
@@@ -1092,17 -1044,15 +1044,15 @@@ void avic_vcpu_put(struct kvm_vcpu *vcp
        WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
  }
  
- void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
+ void avic_refresh_virtual_apic_mode(struct kvm_vcpu *vcpu)
  {
        struct vcpu_svm *svm = to_svm(vcpu);
        struct vmcb *vmcb = svm->vmcb01.ptr;
-       bool activated = kvm_vcpu_apicv_active(vcpu);
  
-       if (!enable_apicv)
+       if (!lapic_in_kernel(vcpu) || !enable_apicv)
                return;
  
-       if (activated) {
+       if (kvm_vcpu_apicv_active(vcpu)) {
                /**
                 * During AVIC temporary deactivation, guest could update
                 * APIC ID, DFR and LDR registers, which would not be trapped
                avic_deactivate_vmcb(svm);
        }
        vmcb_mark_dirty(vmcb, VMCB_AVIC);
+ }
+ void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
+ {
+       bool activated = kvm_vcpu_apicv_active(vcpu);
+       if (!enable_apicv)
+               return;
+       avic_refresh_virtual_apic_mode(vcpu);
  
        if (activated)
                avic_vcpu_load(vcpu, vcpu->cpu);
@@@ -1165,32 -1125,32 +1125,32 @@@ bool avic_hardware_setup(struct kvm_x86
        if (!npt_enabled)
                return false;
  
+       /* AVIC is a prerequisite for x2AVIC. */
+       if (!boot_cpu_has(X86_FEATURE_AVIC) && !force_avic) {
+               if (boot_cpu_has(X86_FEATURE_X2AVIC)) {
+                       pr_warn(FW_BUG "Cannot support x2AVIC due to AVIC is disabled");
+                       pr_warn(FW_BUG "Try enable AVIC using force_avic option");
+               }
+               return false;
+       }
        if (boot_cpu_has(X86_FEATURE_AVIC)) {
-               avic_mode = AVIC_MODE_X1;
                pr_info("AVIC enabled\n");
        } else if (force_avic) {
                /*
                 * Some older systems does not advertise AVIC support.
                 * See Revision Guide for specific AMD processor for more detail.
                 */
-               avic_mode = AVIC_MODE_X1;
                pr_warn("AVIC is not supported in CPUID but force enabled");
                pr_warn("Your system might crash and burn");
        }
  
        /* AVIC is a prerequisite for x2AVIC. */
-       if (boot_cpu_has(X86_FEATURE_X2AVIC)) {
-               if (avic_mode == AVIC_MODE_X1) {
-                       avic_mode = AVIC_MODE_X2;
-                       pr_info("x2AVIC enabled\n");
-               } else {
-                       pr_warn(FW_BUG "Cannot support x2AVIC due to AVIC is disabled");
-                       pr_warn(FW_BUG "Try enable AVIC using force_avic option");
-               }
-       }
+       x2avic_enabled = boot_cpu_has(X86_FEATURE_X2AVIC);
+       if (x2avic_enabled)
+               pr_info("x2AVIC enabled\n");
  
-       if (avic_mode != AVIC_MODE_NONE)
-               amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier);
+       amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier);
  
-       return !!avic_mode;
+       return true;
  }
index 500da957e5908ca28f9f49c1061e2fdd0c1f68e6,34ac03969f28d39766ac5073faa45697c3722ad9..700df66d23c745c4b3facef6b051ca7ae0edfbf8
@@@ -12,7 -12,7 +12,7 @@@
   *   Avi Kivity   <[email protected]>
   */
  
 -#define pr_fmt(fmt) "SVM: " fmt
 +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  
  #include <linux/kvm_types.h>
  #include <linux/kvm_host.h>
@@@ -138,13 -138,15 +138,13 @@@ void recalc_intercepts(struct vcpu_svm 
                c->intercepts[i] = h->intercepts[i];
  
        if (g->int_ctl & V_INTR_MASKING_MASK) {
 -              /* We only want the cr8 intercept bits of L1 */
 -              vmcb_clr_intercept(c, INTERCEPT_CR8_READ);
 -              vmcb_clr_intercept(c, INTERCEPT_CR8_WRITE);
 -
                /*
 -               * Once running L2 with HF_VINTR_MASK, EFLAGS.IF does not
 -               * affect any interrupt we may want to inject; therefore,
 -               * interrupt window vmexits are irrelevant to L0.
 +               * Once running L2 with HF_VINTR_MASK, EFLAGS.IF and CR8
 +               * does not affect any interrupt we may want to inject;
 +               * therefore, writes to CR8 are irrelevant to L0, as are
 +               * interrupt window vmexits.
                 */
 +              vmcb_clr_intercept(c, INTERCEPT_CR8_WRITE);
                vmcb_clr_intercept(c, INTERCEPT_VINTR);
        }
  
@@@ -1104,7 -1106,7 +1104,7 @@@ int nested_svm_vmexit(struct vcpu_svm *
         * to benefit from it right away.
         */
        if (kvm_apicv_activated(vcpu->kvm))
-               kvm_vcpu_update_apicv(vcpu);
+               __kvm_vcpu_update_apicv(vcpu);
  
        return 0;
  }
diff --combined arch/x86/kvm/svm/svm.c
index 799b24801d310134903920c4bdbee78384659207,f2453df77727e9b704ea1c88edf54d56525fd9af..d13cf53e739067485f64b303b2f945b93117ae67
@@@ -1,4 -1,4 +1,4 @@@
 -#define pr_fmt(fmt) "SVM: " fmt
 +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  
  #include <linux/kvm_host.h>
  
@@@ -519,37 -519,21 +519,37 @@@ static void svm_init_osvw(struct kvm_vc
                vcpu->arch.osvw.status |= 1;
  }
  
 -static int has_svm(void)
 +static bool kvm_is_svm_supported(void)
  {
 +      int cpu = raw_smp_processor_id();
        const char *msg;
 +      u64 vm_cr;
  
        if (!cpu_has_svm(&msg)) {
 -              printk(KERN_INFO "has_svm: %s\n", msg);
 -              return 0;
 +              pr_err("SVM not supported by CPU %d, %s\n", cpu, msg);
 +              return false;
        }
  
        if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) {
                pr_info("KVM is unsupported when running as an SEV guest\n");
 -              return 0;
 +              return false;
        }
  
 -      return 1;
 +      rdmsrl(MSR_VM_CR, vm_cr);
 +      if (vm_cr & (1 << SVM_VM_CR_SVM_DISABLE)) {
 +              pr_err("SVM disabled (by BIOS) in MSR_VM_CR on CPU %d\n", cpu);
 +              return false;
 +      }
 +
 +      return true;
 +}
 +
 +static int svm_check_processor_compat(void)
 +{
 +      if (!kvm_is_svm_supported())
 +              return -EIO;
 +
 +      return 0;
  }
  
  void __svm_write_tsc_multiplier(u64 multiplier)
@@@ -588,6 -572,10 +588,6 @@@ static int svm_hardware_enable(void
        if (efer & EFER_SVME)
                return -EBUSY;
  
 -      if (!has_svm()) {
 -              pr_err("%s: err EOPNOTSUPP on %d\n", __func__, me);
 -              return -EINVAL;
 -      }
        sd = per_cpu_ptr(&svm_data, me);
        sd->asid_generation = 1;
        sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1;
@@@ -825,7 -813,7 +825,7 @@@ void svm_set_x2apic_msr_interception(st
        if (intercept == svm->x2avic_msrs_intercepted)
                return;
  
-       if (avic_mode != AVIC_MODE_X2 ||
+       if (!x2avic_enabled ||
            !apic_x2apic_mode(svm->vcpu.arch.apic))
                return;
  
@@@ -2088,7 -2076,7 +2088,7 @@@ static void svm_handle_mce(struct kvm_v
                 * Erratum 383 triggered. Guest state is corrupt so kill the
                 * guest.
                 */
 -              pr_err("KVM: Guest triggered AMD Erratum 383\n");
 +              pr_err("Guest triggered AMD Erratum 383\n");
  
                kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
  
@@@ -2717,9 -2705,9 +2717,9 @@@ static int svm_get_msr_feature(struct k
        msr->data = 0;
  
        switch (msr->index) {
 -      case MSR_F10H_DECFG:
 -              if (boot_cpu_has(X86_FEATURE_LFENCE_RDTSC))
 -                      msr->data |= MSR_F10H_DECFG_LFENCE_SERIALIZE;
 +      case MSR_AMD64_DE_CFG:
 +              if (cpu_feature_enabled(X86_FEATURE_LFENCE_RDTSC))
 +                      msr->data |= MSR_AMD64_DE_CFG_LFENCE_SERIALIZE;
                break;
        default:
                return KVM_MSR_RET_INVALID;
@@@ -2818,7 -2806,7 +2818,7 @@@ static int svm_get_msr(struct kvm_vcpu 
                        msr_info->data = 0x1E;
                }
                break;
 -      case MSR_F10H_DECFG:
 +      case MSR_AMD64_DE_CFG:
                msr_info->data = svm->msr_decfg;
                break;
        default:
@@@ -3047,7 -3035,7 +3047,7 @@@ static int svm_set_msr(struct kvm_vcpu 
        case MSR_VM_IGNNE:
                vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
                break;
 -      case MSR_F10H_DECFG: {
 +      case MSR_AMD64_DE_CFG: {
                struct kvm_msr_entry msr_entry;
  
                msr_entry.index = msr->index;
@@@ -4088,6 -4076,17 +4088,6 @@@ static void svm_load_mmu_pgd(struct kvm
        vmcb_mark_dirty(svm->vmcb, VMCB_CR);
  }
  
 -static int is_disabled(void)
 -{
 -      u64 vm_cr;
 -
 -      rdmsrl(MSR_VM_CR, vm_cr);
 -      if (vm_cr & (1 << SVM_VM_CR_SVM_DISABLE))
 -              return 1;
 -
 -      return 0;
 -}
 -
  static void
  svm_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
  {
        hypercall[2] = 0xd9;
  }
  
 -static int __init svm_check_processor_compat(void)
 -{
 -      return 0;
 -}
 -
  /*
   * The kvm parameter can be NULL (module initialization, or invocation before
   * VM creation). Be sure to check the kvm parameter before using it.
@@@ -4625,7 -4629,7 +4625,7 @@@ static bool svm_can_emulate_instruction
        smap = cr4 & X86_CR4_SMAP;
        is_user = svm_get_cpl(vcpu) == 3;
        if (smap && (!smep || is_user)) {
 -              pr_err_ratelimited("KVM: SEV Guest triggered AMD Erratum 1096\n");
 +              pr_err_ratelimited("SEV Guest triggered AMD Erratum 1096\n");
  
                /*
                 * If the fault occurred in userspace, arbitrarily inject #GP
@@@ -4697,9 -4701,7 +4697,9 @@@ static int svm_vm_init(struct kvm *kvm
  }
  
  static struct kvm_x86_ops svm_x86_ops __initdata = {
 -      .name = "kvm_amd",
 +      .name = KBUILD_MODNAME,
 +
 +      .check_processor_compatibility = svm_check_processor_compat,
  
        .hardware_unsetup = svm_hardware_unsetup,
        .hardware_enable = svm_hardware_enable,
        .enable_nmi_window = svm_enable_nmi_window,
        .enable_irq_window = svm_enable_irq_window,
        .update_cr8_intercept = svm_update_cr8_intercept,
-       .set_virtual_apic_mode = avic_set_virtual_apic_mode,
+       .set_virtual_apic_mode = avic_refresh_virtual_apic_mode,
        .refresh_apicv_exec_ctrl = avic_refresh_apicv_exec_ctrl,
-       .check_apicv_inhibit_reasons = avic_check_apicv_inhibit_reasons,
        .apicv_post_state_restore = avic_apicv_post_state_restore,
+       .required_apicv_inhibits = AVIC_REQUIRED_APICV_INHIBITS,
  
        .get_exit_info = svm_get_exit_info,
  
@@@ -4976,7 -4978,7 +4976,7 @@@ static __init int svm_hardware_setup(vo
        }
  
        if (nested) {
 -              printk(KERN_INFO "kvm: Nested Virtualization enabled\n");
 +              pr_info("Nested Virtualization enabled\n");
                kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE);
        }
  
        /* Force VM NPT level equal to the host's paging level */
        kvm_configure_mmu(npt_enabled, get_npt_level(),
                          get_npt_level(), PG_LEVEL_1G);
 -      pr_info("kvm: Nested Paging %sabled\n", npt_enabled ? "en" : "dis");
 +      pr_info("Nested Paging %sabled\n", npt_enabled ? "en" : "dis");
  
        /* Setup shadow_me_value and shadow_me_mask */
        kvm_mmu_set_me_spte_mask(sme_me_mask, sme_me_mask);
                svm_x86_ops.vcpu_blocking = NULL;
                svm_x86_ops.vcpu_unblocking = NULL;
                svm_x86_ops.vcpu_get_apicv_inhibit_reasons = NULL;
+       } else if (!x2avic_enabled) {
+               svm_x86_ops.allow_apicv_in_x2apic_without_x2apic_virtualization = true;
        }
  
        if (vls) {
@@@ -5084,7 -5088,10 +5086,7 @@@ err
  
  
  static struct kvm_x86_init_ops svm_init_ops __initdata = {
 -      .cpu_has_kvm_support = has_svm,
 -      .disabled_by_bios = is_disabled,
        .hardware_setup = svm_hardware_setup,
 -      .check_processor_compatibility = svm_check_processor_compat,
  
        .runtime_ops = &svm_x86_ops,
        .pmu_ops = &amd_pmu_ops,
  
  static int __init svm_init(void)
  {
 +      int r;
 +
        __unused_size_checks();
  
 -      return kvm_init(&svm_init_ops, sizeof(struct vcpu_svm),
 -                      __alignof__(struct vcpu_svm), THIS_MODULE);
 +      if (!kvm_is_svm_supported())
 +              return -EOPNOTSUPP;
 +
 +      r = kvm_x86_vendor_init(&svm_init_ops);
 +      if (r)
 +              return r;
 +
 +      /*
 +       * Common KVM initialization _must_ come last, after this, /dev/kvm is
 +       * exposed to userspace!
 +       */
 +      r = kvm_init(sizeof(struct vcpu_svm), __alignof__(struct vcpu_svm),
 +                   THIS_MODULE);
 +      if (r)
 +              goto err_kvm_init;
 +
 +      return 0;
 +
 +err_kvm_init:
 +      kvm_x86_vendor_exit();
 +      return r;
  }
  
  static void __exit svm_exit(void)
  {
        kvm_exit();
 +      kvm_x86_vendor_exit();
  }
  
  module_init(svm_init)
diff --combined arch/x86/kvm/vmx/vmx.c
index 73005d7e4e43c29bb03973bae40cc2bbe2ad1936,ad2ac66ef32e271b148692826c79866d829bee1c..c788aa3826119fe051dcdd8dab7c0ae5857c4211
@@@ -12,7 -12,6 +12,7 @@@
   *   Avi Kivity   <[email protected]>
   *   Yaniv Kamay  <[email protected]>
   */
 +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  
  #include <linux/highmem.h>
  #include <linux/hrtimer.h>
@@@ -445,36 -444,36 +445,36 @@@ void vmread_error(unsigned long field, 
        if (fault)
                kvm_spurious_fault();
        else
 -              vmx_insn_failed("kvm: vmread failed: field=%lx\n", field);
 +              vmx_insn_failed("vmread failed: field=%lx\n", field);
  }
  
  noinline void vmwrite_error(unsigned long field, unsigned long value)
  {
 -      vmx_insn_failed("kvm: vmwrite failed: field=%lx val=%lx err=%u\n",
 +      vmx_insn_failed("vmwrite failed: field=%lx val=%lx err=%u\n",
                        field, value, vmcs_read32(VM_INSTRUCTION_ERROR));
  }
  
  noinline void vmclear_error(struct vmcs *vmcs, u64 phys_addr)
  {
 -      vmx_insn_failed("kvm: vmclear failed: %p/%llx err=%u\n",
 +      vmx_insn_failed("vmclear failed: %p/%llx err=%u\n",
                        vmcs, phys_addr, vmcs_read32(VM_INSTRUCTION_ERROR));
  }
  
  noinline void vmptrld_error(struct vmcs *vmcs, u64 phys_addr)
  {
 -      vmx_insn_failed("kvm: vmptrld failed: %p/%llx err=%u\n",
 +      vmx_insn_failed("vmptrld failed: %p/%llx err=%u\n",
                        vmcs, phys_addr, vmcs_read32(VM_INSTRUCTION_ERROR));
  }
  
  noinline void invvpid_error(unsigned long ext, u16 vpid, gva_t gva)
  {
 -      vmx_insn_failed("kvm: invvpid failed: ext=0x%lx vpid=%u gva=0x%lx\n",
 +      vmx_insn_failed("invvpid failed: ext=0x%lx vpid=%u gva=0x%lx\n",
                        ext, vpid, gva);
  }
  
  noinline void invept_error(unsigned long ext, u64 eptp, gpa_t gpa)
  {
 -      vmx_insn_failed("kvm: invept failed: ext=0x%lx eptp=%llx gpa=0x%llx\n",
 +      vmx_insn_failed("invept failed: ext=0x%lx eptp=%llx gpa=0x%llx\n",
                        ext, eptp, gpa);
  }
  
@@@ -489,8 -488,8 +489,8 @@@ static DEFINE_PER_CPU(struct list_head
  static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS);
  static DEFINE_SPINLOCK(vmx_vpid_lock);
  
 -struct vmcs_config vmcs_config;
 -struct vmx_capability vmx_capability;
 +struct vmcs_config vmcs_config __ro_after_init;
 +struct vmx_capability vmx_capability __ro_after_init;
  
  #define VMX_SEGMENT_FIELD(seg)                                        \
        [VCPU_SREG_##seg] = {                                   \
@@@ -524,8 -523,6 +524,8 @@@ static inline void vmx_segment_cache_cl
  static unsigned long host_idt_base;
  
  #if IS_ENABLED(CONFIG_HYPERV)
 +static struct kvm_x86_ops vmx_x86_ops __initdata;
 +
  static bool __read_mostly enlightened_vmcs = true;
  module_param(enlightened_vmcs, bool, 0444);
  
@@@ -554,71 -551,6 +554,71 @@@ static int hv_enable_l2_tlb_flush(struc
        return 0;
  }
  
 +static __init void hv_init_evmcs(void)
 +{
 +      int cpu;
 +
 +      if (!enlightened_vmcs)
 +              return;
 +
 +      /*
 +       * Enlightened VMCS usage should be recommended and the host needs
 +       * to support eVMCS v1 or above.
 +       */
 +      if (ms_hyperv.hints & HV_X64_ENLIGHTENED_VMCS_RECOMMENDED &&
 +          (ms_hyperv.nested_features & HV_X64_ENLIGHTENED_VMCS_VERSION) >=
 +           KVM_EVMCS_VERSION) {
 +
 +              /* Check that we have assist pages on all online CPUs */
 +              for_each_online_cpu(cpu) {
 +                      if (!hv_get_vp_assist_page(cpu)) {
 +                              enlightened_vmcs = false;
 +                              break;
 +                      }
 +              }
 +
 +              if (enlightened_vmcs) {
 +                      pr_info("Using Hyper-V Enlightened VMCS\n");
 +                      static_branch_enable(&enable_evmcs);
 +              }
 +
 +              if (ms_hyperv.nested_features & HV_X64_NESTED_DIRECT_FLUSH)
 +                      vmx_x86_ops.enable_l2_tlb_flush
 +                              = hv_enable_l2_tlb_flush;
 +
 +      } else {
 +              enlightened_vmcs = false;
 +      }
 +}
 +
 +static void hv_reset_evmcs(void)
 +{
 +      struct hv_vp_assist_page *vp_ap;
 +
 +      if (!static_branch_unlikely(&enable_evmcs))
 +              return;
 +
 +      /*
 +       * KVM should enable eVMCS if and only if all CPUs have a VP assist
 +       * page, and should reject CPU onlining if eVMCS is enabled the CPU
 +       * doesn't have a VP assist page allocated.
 +       */
 +      vp_ap = hv_get_vp_assist_page(smp_processor_id());
 +      if (WARN_ON_ONCE(!vp_ap))
 +              return;
 +
 +      /*
 +       * Reset everything to support using non-enlightened VMCS access later
 +       * (e.g. when we reload the module with enlightened_vmcs=0)
 +       */
 +      vp_ap->nested_control.features.directhypercall = 0;
 +      vp_ap->current_nested_vmcs = 0;
 +      vp_ap->enlighten_vmentry = 0;
 +}
 +
 +#else /* IS_ENABLED(CONFIG_HYPERV) */
 +static void hv_init_evmcs(void) {}
 +static void hv_reset_evmcs(void) {}
  #endif /* IS_ENABLED(CONFIG_HYPERV) */
  
  /*
@@@ -1681,8 -1613,8 +1681,8 @@@ static int skip_emulated_instruction(st
                if (!instr_len)
                        goto rip_updated;
  
 -              WARN(exit_reason.enclave_mode,
 -                   "KVM: skipping instruction after SGX enclave VM-Exit");
 +              WARN_ONCE(exit_reason.enclave_mode,
 +                        "skipping instruction after SGX enclave VM-Exit");
  
                orig_rip = kvm_rip_read(vcpu);
                rip = orig_rip + instr_len;
@@@ -2516,6 -2448,88 +2516,6 @@@ static void vmx_cache_reg(struct kvm_vc
        }
  }
  
 -static __init int cpu_has_kvm_support(void)
 -{
 -      return cpu_has_vmx();
 -}
 -
 -static __init int vmx_disabled_by_bios(void)
 -{
 -      return !boot_cpu_has(X86_FEATURE_MSR_IA32_FEAT_CTL) ||
 -             !boot_cpu_has(X86_FEATURE_VMX);
 -}
 -
 -static int kvm_cpu_vmxon(u64 vmxon_pointer)
 -{
 -      u64 msr;
 -
 -      cr4_set_bits(X86_CR4_VMXE);
 -
 -      asm_volatile_goto("1: vmxon %[vmxon_pointer]\n\t"
 -                        _ASM_EXTABLE(1b, %l[fault])
 -                        : : [vmxon_pointer] "m"(vmxon_pointer)
 -                        : : fault);
 -      return 0;
 -
 -fault:
 -      WARN_ONCE(1, "VMXON faulted, MSR_IA32_FEAT_CTL (0x3a) = 0x%llx\n",
 -                rdmsrl_safe(MSR_IA32_FEAT_CTL, &msr) ? 0xdeadbeef : msr);
 -      cr4_clear_bits(X86_CR4_VMXE);
 -
 -      return -EFAULT;
 -}
 -
 -static int vmx_hardware_enable(void)
 -{
 -      int cpu = raw_smp_processor_id();
 -      u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
 -      int r;
 -
 -      if (cr4_read_shadow() & X86_CR4_VMXE)
 -              return -EBUSY;
 -
 -      /*
 -       * This can happen if we hot-added a CPU but failed to allocate
 -       * VP assist page for it.
 -       */
 -      if (static_branch_unlikely(&enable_evmcs) &&
 -          !hv_get_vp_assist_page(cpu))
 -              return -EFAULT;
 -
 -      intel_pt_handle_vmx(1);
 -
 -      r = kvm_cpu_vmxon(phys_addr);
 -      if (r) {
 -              intel_pt_handle_vmx(0);
 -              return r;
 -      }
 -
 -      if (enable_ept)
 -              ept_sync_global();
 -
 -      return 0;
 -}
 -
 -static void vmclear_local_loaded_vmcss(void)
 -{
 -      int cpu = raw_smp_processor_id();
 -      struct loaded_vmcs *v, *n;
 -
 -      list_for_each_entry_safe(v, n, &per_cpu(loaded_vmcss_on_cpu, cpu),
 -                               loaded_vmcss_on_cpu_link)
 -              __loaded_vmcs_clear(v);
 -}
 -
 -static void vmx_hardware_disable(void)
 -{
 -      vmclear_local_loaded_vmcss();
 -
 -      if (cpu_vmxoff())
 -              kvm_spurious_fault();
 -
 -      intel_pt_handle_vmx(0);
 -}
 -
  /*
   * There is no X86_FEATURE for SGX yet, but anyway we need to query CPUID
   * directly instead of going through cpu_has(), to ensure KVM is trapping
@@@ -2551,7 -2565,8 +2551,7 @@@ static bool cpu_has_perf_global_ctrl_bu
        return false;
  }
  
 -static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt,
 -                                    u32 msr, u32 *result)
 +static int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, u32 msr, u32 *result)
  {
        u32 vmx_msr_low, vmx_msr_high;
        u32 ctl = ctl_min | ctl_opt;
        return 0;
  }
  
 -static __init u64 adjust_vmx_controls64(u64 ctl_opt, u32 msr)
 +static u64 adjust_vmx_controls64(u64 ctl_opt, u32 msr)
  {
        u64 allowed;
  
        return  ctl_opt & allowed;
  }
  
 -static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
 -                                  struct vmx_capability *vmx_cap)
 +static int setup_vmcs_config(struct vmcs_config *vmcs_conf,
 +                           struct vmx_capability *vmx_cap)
  {
        u32 vmx_msr_low, vmx_msr_high;
        u32 _pin_based_exec_control = 0;
        return 0;
  }
  
 +static bool kvm_is_vmx_supported(void)
 +{
 +      int cpu = raw_smp_processor_id();
 +
 +      if (!cpu_has_vmx()) {
 +              pr_err("VMX not supported by CPU %d\n", cpu);
 +              return false;
 +      }
 +
 +      if (!this_cpu_has(X86_FEATURE_MSR_IA32_FEAT_CTL) ||
 +          !this_cpu_has(X86_FEATURE_VMX)) {
 +              pr_err("VMX not enabled (by BIOS) in MSR_IA32_FEAT_CTL on CPU %d\n", cpu);
 +              return false;
 +      }
 +
 +      return true;
 +}
 +
 +static int vmx_check_processor_compat(void)
 +{
 +      int cpu = raw_smp_processor_id();
 +      struct vmcs_config vmcs_conf;
 +      struct vmx_capability vmx_cap;
 +
 +      if (!kvm_is_vmx_supported())
 +              return -EIO;
 +
 +      if (setup_vmcs_config(&vmcs_conf, &vmx_cap) < 0) {
 +              pr_err("Failed to setup VMCS config on CPU %d\n", cpu);
 +              return -EIO;
 +      }
 +      if (nested)
 +              nested_vmx_setup_ctls_msrs(&vmcs_conf, vmx_cap.ept);
 +      if (memcmp(&vmcs_config, &vmcs_conf, sizeof(struct vmcs_config))) {
 +              pr_err("Inconsistent VMCS config on CPU %d\n", cpu);
 +              return -EIO;
 +      }
 +      return 0;
 +}
 +
 +static int kvm_cpu_vmxon(u64 vmxon_pointer)
 +{
 +      u64 msr;
 +
 +      cr4_set_bits(X86_CR4_VMXE);
 +
 +      asm_volatile_goto("1: vmxon %[vmxon_pointer]\n\t"
 +                        _ASM_EXTABLE(1b, %l[fault])
 +                        : : [vmxon_pointer] "m"(vmxon_pointer)
 +                        : : fault);
 +      return 0;
 +
 +fault:
 +      WARN_ONCE(1, "VMXON faulted, MSR_IA32_FEAT_CTL (0x3a) = 0x%llx\n",
 +                rdmsrl_safe(MSR_IA32_FEAT_CTL, &msr) ? 0xdeadbeef : msr);
 +      cr4_clear_bits(X86_CR4_VMXE);
 +
 +      return -EFAULT;
 +}
 +
 +static int vmx_hardware_enable(void)
 +{
 +      int cpu = raw_smp_processor_id();
 +      u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
 +      int r;
 +
 +      if (cr4_read_shadow() & X86_CR4_VMXE)
 +              return -EBUSY;
 +
 +      /*
 +       * This can happen if we hot-added a CPU but failed to allocate
 +       * VP assist page for it.
 +       */
 +      if (static_branch_unlikely(&enable_evmcs) &&
 +          !hv_get_vp_assist_page(cpu))
 +              return -EFAULT;
 +
 +      intel_pt_handle_vmx(1);
 +
 +      r = kvm_cpu_vmxon(phys_addr);
 +      if (r) {
 +              intel_pt_handle_vmx(0);
 +              return r;
 +      }
 +
 +      if (enable_ept)
 +              ept_sync_global();
 +
 +      return 0;
 +}
 +
 +static void vmclear_local_loaded_vmcss(void)
 +{
 +      int cpu = raw_smp_processor_id();
 +      struct loaded_vmcs *v, *n;
 +
 +      list_for_each_entry_safe(v, n, &per_cpu(loaded_vmcss_on_cpu, cpu),
 +                               loaded_vmcss_on_cpu_link)
 +              __loaded_vmcs_clear(v);
 +}
 +
 +static void vmx_hardware_disable(void)
 +{
 +      vmclear_local_loaded_vmcss();
 +
 +      if (cpu_vmxoff())
 +              kvm_spurious_fault();
 +
 +      hv_reset_evmcs();
 +
 +      intel_pt_handle_vmx(0);
 +}
 +
  struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags)
  {
        int node = cpu_to_node(cpu);
@@@ -3053,8 -2955,9 +3053,8 @@@ static void fix_rmode_seg(int seg, stru
                var.type = 0x3;
                var.avl = 0;
                if (save->base & 0xf)
 -                      printk_once(KERN_WARNING "kvm: segment base is not "
 -                                      "paragraph aligned when entering "
 -                                      "protected mode (seg=%d)", seg);
 +                      pr_warn_once("segment base is not paragraph aligned "
 +                                   "when entering protected mode (seg=%d)", seg);
        }
  
        vmcs_write16(sf->selector, var.selector);
@@@ -3084,7 -2987,8 +3084,7 @@@ static void enter_rmode(struct kvm_vcp
         * vcpu. Warn the user that an update is overdue.
         */
        if (!kvm_vmx->tss_addr)
 -              printk_once(KERN_WARNING "kvm: KVM_SET_TSS_ADDR need to be "
 -                           "called before entering vcpu\n");
 +              pr_warn_once("KVM_SET_TSS_ADDR needs to be called before running vCPU\n");
  
        vmx_segment_cache_clear(vmx);
  
@@@ -3904,39 -3808,6 +3904,6 @@@ static void seg_setup(int seg
        vmcs_write32(sf->ar_bytes, ar);
  }
  
- static int alloc_apic_access_page(struct kvm *kvm)
- {
-       struct page *page;
-       void __user *hva;
-       int ret = 0;
-       mutex_lock(&kvm->slots_lock);
-       if (kvm->arch.apic_access_memslot_enabled)
-               goto out;
-       hva = __x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
-                                     APIC_DEFAULT_PHYS_BASE, PAGE_SIZE);
-       if (IS_ERR(hva)) {
-               ret = PTR_ERR(hva);
-               goto out;
-       }
-       page = gfn_to_page(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
-       if (is_error_page(page)) {
-               ret = -EFAULT;
-               goto out;
-       }
-       /*
-        * Do not pin the page in memory, so that memory hot-unplug
-        * is able to migrate it.
-        */
-       put_page(page);
-       kvm->arch.apic_access_memslot_enabled = true;
- out:
-       mutex_unlock(&kvm->slots_lock);
-       return ret;
- }
  int allocate_vpid(void)
  {
        int vpid;
@@@ -6952,7 -6823,7 +6919,7 @@@ static void handle_external_interrupt_i
        gate_desc *desc = (gate_desc *)host_idt_base + vector;
  
        if (KVM_BUG(!is_external_intr(intr_info), vcpu->kvm,
 -          "KVM: unexpected VM-Exit interrupt info: 0x%x", intr_info))
 +          "unexpected VM-Exit interrupt info: 0x%x", intr_info))
                return;
  
        handle_interrupt_nmi_irqoff(vcpu, gate_offset(desc));
@@@ -7490,7 -7361,7 +7457,7 @@@ static int vmx_vcpu_create(struct kvm_v
        vmx->loaded_vmcs = &vmx->vmcs01;
  
        if (cpu_need_virtualize_apic_accesses(vcpu)) {
-               err = alloc_apic_access_page(vcpu->kvm);
+               err = kvm_alloc_apic_access_page(vcpu->kvm);
                if (err)
                        goto free_vmcs;
        }
@@@ -7550,6 -7421,29 +7517,6 @@@ static int vmx_vm_init(struct kvm *kvm
        return 0;
  }
  
 -static int __init vmx_check_processor_compat(void)
 -{
 -      struct vmcs_config vmcs_conf;
 -      struct vmx_capability vmx_cap;
 -
 -      if (!this_cpu_has(X86_FEATURE_MSR_IA32_FEAT_CTL) ||
 -          !this_cpu_has(X86_FEATURE_VMX)) {
 -              pr_err("kvm: VMX is disabled on CPU %d\n", smp_processor_id());
 -              return -EIO;
 -      }
 -
 -      if (setup_vmcs_config(&vmcs_conf, &vmx_cap) < 0)
 -              return -EIO;
 -      if (nested)
 -              nested_vmx_setup_ctls_msrs(&vmcs_conf, vmx_cap.ept);
 -      if (memcmp(&vmcs_config, &vmcs_conf, sizeof(struct vmcs_config)) != 0) {
 -              printk(KERN_ERR "kvm: CPU %d feature inconsistency!\n",
 -                              smp_processor_id());
 -              return -EIO;
 -      }
 -      return 0;
 -}
 -
  static u8 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
  {
        u8 cache;
@@@ -8129,17 -8023,16 +8096,16 @@@ static void vmx_hardware_unsetup(void
        free_kvm_area();
  }
  
- static bool vmx_check_apicv_inhibit_reasons(enum kvm_apicv_inhibit reason)
- {
-       ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE) |
-                         BIT(APICV_INHIBIT_REASON_ABSENT) |
-                         BIT(APICV_INHIBIT_REASON_HYPERV) |
-                         BIT(APICV_INHIBIT_REASON_BLOCKIRQ) |
-                         BIT(APICV_INHIBIT_REASON_APIC_ID_MODIFIED) |
-                         BIT(APICV_INHIBIT_REASON_APIC_BASE_MODIFIED);
-       return supported & BIT(reason);
- }
+ #define VMX_REQUIRED_APICV_INHIBITS                   \
+ (                                                     \
+       BIT(APICV_INHIBIT_REASON_DISABLE)|              \
+       BIT(APICV_INHIBIT_REASON_ABSENT) |              \
+       BIT(APICV_INHIBIT_REASON_HYPERV) |              \
+       BIT(APICV_INHIBIT_REASON_BLOCKIRQ) |            \
+       BIT(APICV_INHIBIT_REASON_PHYSICAL_ID_ALIASED) | \
+       BIT(APICV_INHIBIT_REASON_APIC_ID_MODIFIED) |    \
+       BIT(APICV_INHIBIT_REASON_APIC_BASE_MODIFIED)    \
+ )
  
  static void vmx_vm_destroy(struct kvm *kvm)
  {
  }
  
  static struct kvm_x86_ops vmx_x86_ops __initdata = {
 -      .name = "kvm_intel",
 +      .name = KBUILD_MODNAME,
 +
 +      .check_processor_compatibility = vmx_check_processor_compat,
  
        .hardware_unsetup = vmx_hardware_unsetup,
  
        .refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl,
        .load_eoi_exitmap = vmx_load_eoi_exitmap,
        .apicv_post_state_restore = vmx_apicv_post_state_restore,
-       .check_apicv_inhibit_reasons = vmx_check_apicv_inhibit_reasons,
+       .required_apicv_inhibits = VMX_REQUIRED_APICV_INHIBITS,
        .hwapic_irr_update = vmx_hwapic_irr_update,
        .hwapic_isr_update = vmx_hwapic_isr_update,
        .guest_apic_has_interrupt = vmx_guest_apic_has_interrupt,
@@@ -8371,7 -8262,7 +8337,7 @@@ static __init int hardware_setup(void
                return -EIO;
  
        if (cpu_has_perf_global_ctrl_bug())
 -              pr_warn_once("kvm: VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL "
 +              pr_warn_once("VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL "
                             "does not work properly. Using workaround\n");
  
        if (boot_cpu_has(X86_FEATURE_NX))
  
        if (boot_cpu_has(X86_FEATURE_MPX)) {
                rdmsrl(MSR_IA32_BNDCFGS, host_bndcfgs);
 -              WARN_ONCE(host_bndcfgs, "KVM: BNDCFGS in host will be lost");
 +              WARN_ONCE(host_bndcfgs, "BNDCFGS in host will be lost");
        }
  
        if (!cpu_has_vmx_mpx())
  
        /* NX support is required for shadow paging. */
        if (!enable_ept && !boot_cpu_has(X86_FEATURE_NX)) {
 -              pr_err_ratelimited("kvm: NX (Execute Disable) not supported\n");
 +              pr_err_ratelimited("NX (Execute Disable) not supported\n");
                return -EOPNOTSUPP;
        }
  
  }
  
  static struct kvm_x86_init_ops vmx_init_ops __initdata = {
 -      .cpu_has_kvm_support = cpu_has_kvm_support,
 -      .disabled_by_bios = vmx_disabled_by_bios,
 -      .check_processor_compatibility = vmx_check_processor_compat,
        .hardware_setup = hardware_setup,
        .handle_intel_pt_intr = NULL,
  
@@@ -8567,23 -8461,41 +8533,23 @@@ static void vmx_cleanup_l1d_flush(void
        l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO;
  }
  
 -static void vmx_exit(void)
 +static void __vmx_exit(void)
  {
 +      allow_smaller_maxphyaddr = false;
 +
  #ifdef CONFIG_KEXEC_CORE
        RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL);
        synchronize_rcu();
  #endif
 +      vmx_cleanup_l1d_flush();
 +}
  
 +static void vmx_exit(void)
 +{
        kvm_exit();
 +      kvm_x86_vendor_exit();
  
 -#if IS_ENABLED(CONFIG_HYPERV)
 -      if (static_branch_unlikely(&enable_evmcs)) {
 -              int cpu;
 -              struct hv_vp_assist_page *vp_ap;
 -              /*
 -               * Reset everything to support using non-enlightened VMCS
 -               * access later (e.g. when we reload the module with
 -               * enlightened_vmcs=0)
 -               */
 -              for_each_online_cpu(cpu) {
 -                      vp_ap = hv_get_vp_assist_page(cpu);
 -
 -                      if (!vp_ap)
 -                              continue;
 -
 -                      vp_ap->nested_control.features.directhypercall = 0;
 -                      vp_ap->current_nested_vmcs = 0;
 -                      vp_ap->enlighten_vmentry = 0;
 -              }
 -
 -              static_branch_disable(&enable_evmcs);
 -      }
 -#endif
 -      vmx_cleanup_l1d_flush();
 -
 -      allow_smaller_maxphyaddr = false;
 +      __vmx_exit();
  }
  module_exit(vmx_exit);
  
@@@ -8591,29 -8503,56 +8557,29 @@@ static int __init vmx_init(void
  {
        int r, cpu;
  
 -#if IS_ENABLED(CONFIG_HYPERV)
 +      if (!kvm_is_vmx_supported())
 +              return -EOPNOTSUPP;
 +
        /*
 -       * Enlightened VMCS usage should be recommended and the host needs
 -       * to support eVMCS v1 or above. We can also disable eVMCS support
 -       * with module parameter.
 +       * Note, hv_init_evmcs() touches only VMX knobs, i.e. there's nothing
 +       * to unwind if a later step fails.
         */
 -      if (enlightened_vmcs &&
 -          ms_hyperv.hints & HV_X64_ENLIGHTENED_VMCS_RECOMMENDED &&
 -          (ms_hyperv.nested_features & HV_X64_ENLIGHTENED_VMCS_VERSION) >=
 -          KVM_EVMCS_VERSION) {
 +      hv_init_evmcs();
  
 -              /* Check that we have assist pages on all online CPUs */
 -              for_each_online_cpu(cpu) {
 -                      if (!hv_get_vp_assist_page(cpu)) {
 -                              enlightened_vmcs = false;
 -                              break;
 -                      }
 -              }
 -
 -              if (enlightened_vmcs) {
 -                      pr_info("KVM: vmx: using Hyper-V Enlightened VMCS\n");
 -                      static_branch_enable(&enable_evmcs);
 -              }
 -
 -              if (ms_hyperv.nested_features & HV_X64_NESTED_DIRECT_FLUSH)
 -                      vmx_x86_ops.enable_l2_tlb_flush
 -                              = hv_enable_l2_tlb_flush;
 -
 -      } else {
 -              enlightened_vmcs = false;
 -      }
 -#endif
 -
 -      r = kvm_init(&vmx_init_ops, sizeof(struct vcpu_vmx),
 -                   __alignof__(struct vcpu_vmx), THIS_MODULE);
 +      r = kvm_x86_vendor_init(&vmx_init_ops);
        if (r)
                return r;
  
        /*
 -       * Must be called after kvm_init() so enable_ept is properly set
 +       * Must be called after common x86 init so enable_ept is properly set
         * up. Hand the parameter mitigation value in which was stored in
         * the pre module init parser. If no parameter was given, it will
         * contain 'auto' which will be turned into the default 'cond'
         * mitigation mode.
         */
        r = vmx_setup_l1d_flush(vmentry_l1d_flush_param);
 -      if (r) {
 -              vmx_exit();
 -              return r;
 -      }
 +      if (r)
 +              goto err_l1d_flush;
  
        vmx_setup_fb_clear_ctrl();
  
        if (!enable_ept)
                allow_smaller_maxphyaddr = true;
  
 +      /*
 +       * Common KVM initialization _must_ come last, after this, /dev/kvm is
 +       * exposed to userspace!
 +       */
 +      r = kvm_init(sizeof(struct vcpu_vmx), __alignof__(struct vcpu_vmx),
 +                   THIS_MODULE);
 +      if (r)
 +              goto err_kvm_init;
 +
        return 0;
 +
 +err_kvm_init:
 +      __vmx_exit();
 +err_l1d_flush:
 +      kvm_x86_vendor_exit();
 +      return r;
  }
  module_init(vmx_init);
diff --combined arch/x86/kvm/x86.c
index c3ac88036b522c2833c2768541153af6992c9527,5becce5bd45a40d7f622692069a2afbc49c600ee..508074e47bc0ebd4535de4bc548f57abdffe863d
@@@ -15,7 -15,6 +15,7 @@@
   *   Amit Shah    <[email protected]>
   *   Ben-Ami Yassour <[email protected]>
   */
 +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  
  #include <linux/kvm_host.h>
  #include "irq.h"
@@@ -129,7 -128,6 +129,7 @@@ static int kvm_vcpu_do_singlestep(struc
  static int __set_sregs2(struct kvm_vcpu *vcpu, struct kvm_sregs2 *sregs2);
  static void __get_sregs2(struct kvm_vcpu *vcpu, struct kvm_sregs2 *sregs2);
  
 +static DEFINE_MUTEX(vendor_module_lock);
  struct kvm_x86_ops kvm_x86_ops __read_mostly;
  
  #define KVM_X86_OP(func)                                           \
@@@ -1559,7 -1557,7 +1559,7 @@@ static const u32 msr_based_features_all
        MSR_IA32_VMX_EPT_VPID_CAP,
        MSR_IA32_VMX_VMFUNC,
  
 -      MSR_F10H_DECFG,
 +      MSR_AMD64_DE_CFG,
        MSR_IA32_UCODE_REV,
        MSR_IA32_ARCH_CAPABILITIES,
        MSR_IA32_PERF_CAPABILITIES,
@@@ -2088,7 -2086,7 +2088,7 @@@ static int kvm_emulate_monitor_mwait(st
            !guest_cpuid_has(vcpu, X86_FEATURE_MWAIT))
                return kvm_handle_invalid_op(vcpu);
  
 -      pr_warn_once("kvm: %s instruction emulated as NOP!\n", insn);
 +      pr_warn_once("%s instruction emulated as NOP!\n", insn);
        return kvm_emulate_as_nop(vcpu);
  }
  int kvm_emulate_mwait(struct kvm_vcpu *vcpu)
@@@ -2435,8 -2433,7 +2435,8 @@@ static int kvm_set_tsc_khz(struct kvm_v
        thresh_lo = adjust_tsc_khz(tsc_khz, -tsc_tolerance_ppm);
        thresh_hi = adjust_tsc_khz(tsc_khz, tsc_tolerance_ppm);
        if (user_tsc_khz < thresh_lo || user_tsc_khz > thresh_hi) {
 -              pr_debug("kvm: requested TSC rate %u falls outside tolerance [%u,%u]\n", user_tsc_khz, thresh_lo, thresh_hi);
 +              pr_debug("requested TSC rate %u falls outside tolerance [%u,%u]\n",
 +                       user_tsc_khz, thresh_lo, thresh_hi);
                use_scaling = 1;
        }
        return set_tsc_khz(vcpu, user_tsc_khz, use_scaling);
@@@ -7704,7 -7701,7 +7704,7 @@@ static int emulator_cmpxchg_emulated(st
        return X86EMUL_CONTINUE;
  
  emul_write:
 -      printk_once(KERN_WARNING "kvm: emulating exchange as write\n");
 +      pr_warn_once("emulating exchange as write\n");
  
        return emulator_write_emulated(ctxt, addr, new, bytes, exception);
  }
@@@ -8265,7 -8262,7 +8265,7 @@@ static struct x86_emulate_ctxt *alloc_e
  
        ctxt = kmem_cache_zalloc(x86_emulator_cache, GFP_KERNEL_ACCOUNT);
        if (!ctxt) {
 -              pr_err("kvm: failed to allocate vcpu's emulator\n");
 +              pr_err("failed to allocate vcpu's emulator\n");
                return NULL;
        }
  
@@@ -9276,66 -9273,35 +9276,66 @@@ static struct notifier_block pvclock_gt
  };
  #endif
  
 -int kvm_arch_init(void *opaque)
 +static inline void kvm_ops_update(struct kvm_x86_init_ops *ops)
 +{
 +      memcpy(&kvm_x86_ops, ops->runtime_ops, sizeof(kvm_x86_ops));
 +
 +#define __KVM_X86_OP(func) \
 +      static_call_update(kvm_x86_##func, kvm_x86_ops.func);
 +#define KVM_X86_OP(func) \
 +      WARN_ON(!kvm_x86_ops.func); __KVM_X86_OP(func)
 +#define KVM_X86_OP_OPTIONAL __KVM_X86_OP
 +#define KVM_X86_OP_OPTIONAL_RET0(func) \
 +      static_call_update(kvm_x86_##func, (void *)kvm_x86_ops.func ? : \
 +                                         (void *)__static_call_return0);
 +#include <asm/kvm-x86-ops.h>
 +#undef __KVM_X86_OP
 +
 +      kvm_pmu_ops_update(ops->pmu_ops);
 +}
 +
 +static int kvm_x86_check_processor_compatibility(void)
 +{
 +      int cpu = smp_processor_id();
 +      struct cpuinfo_x86 *c = &cpu_data(cpu);
 +
 +      /*
 +       * Compatibility checks are done when loading KVM and when enabling
 +       * hardware, e.g. during CPU hotplug, to ensure all online CPUs are
 +       * compatible, i.e. KVM should never perform a compatibility check on
 +       * an offline CPU.
 +       */
 +      WARN_ON(!cpu_online(cpu));
 +
 +      if (__cr4_reserved_bits(cpu_has, c) !=
 +          __cr4_reserved_bits(cpu_has, &boot_cpu_data))
 +              return -EIO;
 +
 +      return static_call(kvm_x86_check_processor_compatibility)();
 +}
 +
 +static void kvm_x86_check_cpu_compat(void *ret)
 +{
 +      *(int *)ret = kvm_x86_check_processor_compatibility();
 +}
 +
 +static int __kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
  {
 -      struct kvm_x86_init_ops *ops = opaque;
        u64 host_pat;
 -      int r;
 +      int r, cpu;
  
        if (kvm_x86_ops.hardware_enable) {
 -              pr_err("kvm: already loaded vendor module '%s'\n", kvm_x86_ops.name);
 +              pr_err("already loaded vendor module '%s'\n", kvm_x86_ops.name);
                return -EEXIST;
        }
  
 -      if (!ops->cpu_has_kvm_support()) {
 -              pr_err_ratelimited("kvm: no hardware support for '%s'\n",
 -                                 ops->runtime_ops->name);
 -              return -EOPNOTSUPP;
 -      }
 -      if (ops->disabled_by_bios()) {
 -              pr_err_ratelimited("kvm: support for '%s' disabled by bios\n",
 -                                 ops->runtime_ops->name);
 -              return -EOPNOTSUPP;
 -      }
 -
        /*
         * KVM explicitly assumes that the guest has an FPU and
         * FXSAVE/FXRSTOR. For example, the KVM_GET_FPU explicitly casts the
         * vCPU's FPU state as a fxregs_state struct.
         */
        if (!boot_cpu_has(X86_FEATURE_FPU) || !boot_cpu_has(X86_FEATURE_FXSR)) {
 -              printk(KERN_ERR "kvm: inadequate fpu\n");
 +              pr_err("inadequate fpu\n");
                return -EOPNOTSUPP;
        }
  
         */
        if (rdmsrl_safe(MSR_IA32_CR_PAT, &host_pat) ||
            (host_pat & GENMASK(2, 0)) != 6) {
 -              pr_err("kvm: host PAT[0] is not WB\n");
 +              pr_err("host PAT[0] is not WB\n");
                return -EIO;
        }
  
        x86_emulator_cache = kvm_alloc_emulator_cache();
        if (!x86_emulator_cache) {
 -              pr_err("kvm: failed to allocate cache for x86 emulator\n");
 +              pr_err("failed to allocate cache for x86 emulator\n");
                return -ENOMEM;
        }
  
        user_return_msrs = alloc_percpu(struct kvm_user_return_msrs);
        if (!user_return_msrs) {
 -              printk(KERN_ERR "kvm: failed to allocate percpu kvm_user_return_msrs\n");
 +              pr_err("failed to allocate percpu kvm_user_return_msrs\n");
                r = -ENOMEM;
                goto out_free_x86_emulator_cache;
        }
        if (r)
                goto out_free_percpu;
  
 -      kvm_timer_init();
 -
        if (boot_cpu_has(X86_FEATURE_XSAVE)) {
                host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
                kvm_caps.supported_xcr0 = host_xcr0 & KVM_SUPPORTED_XCR0;
        }
  
 +      rdmsrl_safe(MSR_EFER, &host_efer);
 +
 +      if (boot_cpu_has(X86_FEATURE_XSAVES))
 +              rdmsrl(MSR_IA32_XSS, host_xss);
 +
 +      kvm_init_pmu_capability();
 +
 +      r = ops->hardware_setup();
 +      if (r != 0)
 +              goto out_mmu_exit;
 +
 +      kvm_ops_update(ops);
 +
 +      for_each_online_cpu(cpu) {
 +              smp_call_function_single(cpu, kvm_x86_check_cpu_compat, &r, 1);
 +              if (r < 0)
 +                      goto out_unwind_ops;
 +      }
 +
 +      /*
 +       * Point of no return!  DO NOT add error paths below this point unless
 +       * absolutely necessary, as most operations from this point forward
 +       * require unwinding.
 +       */
 +      kvm_timer_init();
 +
        if (pi_inject_timer == -1)
                pi_inject_timer = housekeeping_enabled(HK_TYPE_TIMER);
  #ifdef CONFIG_X86_64
                set_hv_tscchange_cb(kvm_hyperv_tsc_notifier);
  #endif
  
 +      kvm_register_perf_callbacks(ops->handle_intel_pt_intr);
 +
 +      if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES))
 +              kvm_caps.supported_xss = 0;
 +
 +#define __kvm_cpu_cap_has(UNUSED_, f) kvm_cpu_cap_has(f)
 +      cr4_reserved_bits = __cr4_reserved_bits(__kvm_cpu_cap_has, UNUSED_);
 +#undef __kvm_cpu_cap_has
 +
 +      if (kvm_caps.has_tsc_control) {
 +              /*
 +               * Make sure the user can only configure tsc_khz values that
 +               * fit into a signed integer.
 +               * A min value is not calculated because it will always
 +               * be 1 on all machines.
 +               */
 +              u64 max = min(0x7fffffffULL,
 +                            __scale_tsc(kvm_caps.max_tsc_scaling_ratio, tsc_khz));
 +              kvm_caps.max_guest_tsc_khz = max;
 +      }
 +      kvm_caps.default_tsc_scaling_ratio = 1ULL << kvm_caps.tsc_scaling_ratio_frac_bits;
 +      kvm_init_msr_list();
        return 0;
  
 +out_unwind_ops:
 +      kvm_x86_ops.hardware_enable = NULL;
 +      static_call(kvm_x86_hardware_unsetup)();
 +out_mmu_exit:
 +      kvm_mmu_vendor_module_exit();
  out_free_percpu:
        free_percpu(user_return_msrs);
  out_free_x86_emulator_cache:
        return r;
  }
  
 -void kvm_arch_exit(void)
 +int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
 +{
 +      int r;
 +
 +      mutex_lock(&vendor_module_lock);
 +      r = __kvm_x86_vendor_init(ops);
 +      mutex_unlock(&vendor_module_lock);
 +
 +      return r;
 +}
 +EXPORT_SYMBOL_GPL(kvm_x86_vendor_init);
 +
 +void kvm_x86_vendor_exit(void)
  {
 +      kvm_unregister_perf_callbacks();
 +
  #ifdef CONFIG_X86_64
        if (hypervisor_is_type(X86_HYPER_MS_HYPERV))
                clear_hv_tscchange_cb();
        irq_work_sync(&pvclock_irq_work);
        cancel_work_sync(&pvclock_gtod_work);
  #endif
 -      kvm_x86_ops.hardware_enable = NULL;
 +      static_call(kvm_x86_hardware_unsetup)();
        kvm_mmu_vendor_module_exit();
        free_percpu(user_return_msrs);
        kmem_cache_destroy(x86_emulator_cache);
        static_key_deferred_flush(&kvm_xen_enabled);
        WARN_ON(static_branch_unlikely(&kvm_xen_enabled.key));
  #endif
 +      mutex_lock(&vendor_module_lock);
 +      kvm_x86_ops.hardware_enable = NULL;
 +      mutex_unlock(&vendor_module_lock);
  }
 +EXPORT_SYMBOL_GPL(kvm_x86_vendor_exit);
  
  static int __kvm_emulate_halt(struct kvm_vcpu *vcpu, int state, int reason)
  {
@@@ -10148,7 -10045,7 +10148,7 @@@ void kvm_make_scan_ioapic_request(struc
        kvm_make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC);
  }
  
- void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
+ void __kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
  {
        struct kvm_lapic *apic = vcpu->arch.apic;
        bool activate;
        preempt_enable();
        up_read(&vcpu->kvm->arch.apicv_update_lock);
  }
- EXPORT_SYMBOL_GPL(kvm_vcpu_update_apicv);
+ EXPORT_SYMBOL_GPL(__kvm_vcpu_update_apicv);
+ static void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
+ {
+       if (!lapic_in_kernel(vcpu))
+               return;
+       /*
+        * Due to sharing page tables across vCPUs, the xAPIC memslot must be
+        * deleted if any vCPU has xAPIC virtualization and x2APIC enabled, but
+        * and hardware doesn't support x2APIC virtualization.  E.g. some AMD
+        * CPUs support AVIC but not x2APIC.  KVM still allows enabling AVIC in
+        * this case so that KVM can the AVIC doorbell to inject interrupts to
+        * running vCPUs, but KVM must not create SPTEs for the APIC base as
+        * the vCPU would incorrectly be able to access the vAPIC page via MMIO
+        * despite being in x2APIC mode.  For simplicity, inhibiting the APIC
+        * access page is sticky.
+        */
+       if (apic_x2apic_mode(vcpu->arch.apic) &&
+           kvm_x86_ops.allow_apicv_in_x2apic_without_x2apic_virtualization)
+               kvm_inhibit_apic_access_page(vcpu);
+       __kvm_vcpu_update_apicv(vcpu);
+ }
  
  void __kvm_set_or_clear_apicv_inhibit(struct kvm *kvm,
                                      enum kvm_apicv_inhibit reason, bool set)
  
        lockdep_assert_held_write(&kvm->arch.apicv_update_lock);
  
-       if (!static_call(kvm_x86_check_apicv_inhibit_reasons)(reason))
+       if (!(kvm_x86_ops.required_apicv_inhibits & BIT(reason)))
                return;
  
        old = new = kvm->arch.apicv_inhibit_reasons;
@@@ -11636,7 -11556,7 +11659,7 @@@ static int sync_regs(struct kvm_vcpu *v
  int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
  {
        if (kvm_check_tsc_unstable() && kvm->created_vcpus)
 -              pr_warn_once("kvm: SMP vm created on host with unstable TSC; "
 +              pr_warn_once("SMP vm created on host with unstable TSC; "
                             "guest TSC will not be reliable\n");
  
        if (!kvm->arch.max_vcpu_ids)
@@@ -11713,7 -11633,7 +11736,7 @@@ int kvm_arch_vcpu_create(struct kvm_vcp
                goto free_wbinvd_dirty_mask;
  
        if (!fpu_alloc_guest_fpstate(&vcpu->arch.guest_fpu)) {
 -              pr_err("kvm: failed to allocate vcpu's fpu\n");
 +              pr_err("failed to allocate vcpu's fpu\n");
                goto free_emulate_ctxt;
        }
  
@@@ -11987,11 -11907,6 +12010,11 @@@ int kvm_arch_hardware_enable(void
        bool stable, backwards_tsc = false;
  
        kvm_user_return_msr_cpu_online();
 +
 +      ret = kvm_x86_check_processor_compatibility();
 +      if (ret)
 +              return ret;
 +
        ret = static_call(kvm_x86_hardware_enable)();
        if (ret != 0)
                return ret;
@@@ -12078,6 -11993,88 +12101,6 @@@ void kvm_arch_hardware_disable(void
        drop_user_return_notifiers();
  }
  
 -static inline void kvm_ops_update(struct kvm_x86_init_ops *ops)
 -{
 -      memcpy(&kvm_x86_ops, ops->runtime_ops, sizeof(kvm_x86_ops));
 -
 -#define __KVM_X86_OP(func) \
 -      static_call_update(kvm_x86_##func, kvm_x86_ops.func);
 -#define KVM_X86_OP(func) \
 -      WARN_ON(!kvm_x86_ops.func); __KVM_X86_OP(func)
 -#define KVM_X86_OP_OPTIONAL __KVM_X86_OP
 -#define KVM_X86_OP_OPTIONAL_RET0(func) \
 -      static_call_update(kvm_x86_##func, (void *)kvm_x86_ops.func ? : \
 -                                         (void *)__static_call_return0);
 -#include <asm/kvm-x86-ops.h>
 -#undef __KVM_X86_OP
 -
 -      kvm_pmu_ops_update(ops->pmu_ops);
 -}
 -
 -int kvm_arch_hardware_setup(void *opaque)
 -{
 -      struct kvm_x86_init_ops *ops = opaque;
 -      int r;
 -
 -      rdmsrl_safe(MSR_EFER, &host_efer);
 -
 -      if (boot_cpu_has(X86_FEATURE_XSAVES))
 -              rdmsrl(MSR_IA32_XSS, host_xss);
 -
 -      kvm_init_pmu_capability();
 -
 -      r = ops->hardware_setup();
 -      if (r != 0)
 -              return r;
 -
 -      kvm_ops_update(ops);
 -
 -      kvm_register_perf_callbacks(ops->handle_intel_pt_intr);
 -
 -      if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES))
 -              kvm_caps.supported_xss = 0;
 -
 -#define __kvm_cpu_cap_has(UNUSED_, f) kvm_cpu_cap_has(f)
 -      cr4_reserved_bits = __cr4_reserved_bits(__kvm_cpu_cap_has, UNUSED_);
 -#undef __kvm_cpu_cap_has
 -
 -      if (kvm_caps.has_tsc_control) {
 -              /*
 -               * Make sure the user can only configure tsc_khz values that
 -               * fit into a signed integer.
 -               * A min value is not calculated because it will always
 -               * be 1 on all machines.
 -               */
 -              u64 max = min(0x7fffffffULL,
 -                            __scale_tsc(kvm_caps.max_tsc_scaling_ratio, tsc_khz));
 -              kvm_caps.max_guest_tsc_khz = max;
 -      }
 -      kvm_caps.default_tsc_scaling_ratio = 1ULL << kvm_caps.tsc_scaling_ratio_frac_bits;
 -      kvm_init_msr_list();
 -      return 0;
 -}
 -
 -void kvm_arch_hardware_unsetup(void)
 -{
 -      kvm_unregister_perf_callbacks();
 -
 -      static_call(kvm_x86_hardware_unsetup)();
 -}
 -
 -int kvm_arch_check_processor_compat(void *opaque)
 -{
 -      struct cpuinfo_x86 *c = &cpu_data(smp_processor_id());
 -      struct kvm_x86_init_ops *ops = opaque;
 -
 -      WARN_ON(!irqs_disabled());
 -
 -      if (__cr4_reserved_bits(cpu_has, c) !=
 -          __cr4_reserved_bits(cpu_has, &boot_cpu_data))
 -              return -EIO;
 -
 -      return ops->check_processor_compatibility();
 -}
 -
  bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu)
  {
        return vcpu->kvm->arch.bsp_vcpu_id == vcpu->vcpu_id;
This page took 0.177493 seconds and 4 git commands to generate.