]> Git Repo - J-linux.git/commitdiff
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
authorLinus Torvalds <[email protected]>
Sat, 22 Jan 2022 07:40:01 +0000 (09:40 +0200)
committerLinus Torvalds <[email protected]>
Sat, 22 Jan 2022 07:40:01 +0000 (09:40 +0200)
Pull more kvm updates from Paolo Bonzini:
 "Generic:

   - selftest compilation fix for non-x86

   - KVM: avoid warning on s390 in mark_page_dirty

 x86:

   - fix page write-protection bug and improve comments

   - use binary search to lookup the PMU event filter, add test

   - enable_pmu module parameter support for Intel CPUs

   - switch blocked_vcpu_on_cpu_lock to raw spinlock

   - cleanups of blocked vCPU logic

   - partially allow KVM_SET_CPUID{,2} after KVM_RUN (5.16 regression)

   - various small fixes"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (46 commits)
  docs: kvm: fix WARNINGs from api.rst
  selftests: kvm/x86: Fix the warning in lib/x86_64/processor.c
  selftests: kvm/x86: Fix the warning in pmu_event_filter_test.c
  kvm: selftests: Do not indent with spaces
  kvm: selftests: sync uapi/linux/kvm.h with Linux header
  selftests: kvm: add amx_test to .gitignore
  KVM: SVM: Nullify vcpu_(un)blocking() hooks if AVIC is disabled
  KVM: SVM: Move svm_hardware_setup() and its helpers below svm_x86_ops
  KVM: SVM: Drop AVIC's intermediate avic_set_running() helper
  KVM: VMX: Don't do full kick when handling posted interrupt wakeup
  KVM: VMX: Fold fallback path into triggering posted IRQ helper
  KVM: VMX: Pass desired vector instead of bool for triggering posted IRQ
  KVM: VMX: Don't do full kick when triggering posted interrupt "fails"
  KVM: SVM: Skip AVIC and IRTE updates when loading blocking vCPU
  KVM: SVM: Use kvm_vcpu_is_blocking() in AVIC load to handle preemption
  KVM: SVM: Remove unnecessary APICv/AVIC update in vCPU unblocking path
  KVM: SVM: Don't bother checking for "running" AVIC when kicking for IPIs
  KVM: SVM: Signal AVIC doorbell iff vCPU is in guest mode
  KVM: x86: Remove defunct pre_block/post_block kvm_x86_ops hooks
  KVM: x86: Unexport LAPIC's switch_to_{hv,sw}_timer() helpers
  ...

1  2 
arch/x86/include/asm/kvm_host.h
arch/x86/kvm/mmu/spte.c
arch/x86/kvm/pmu.c
arch/x86/kvm/svm/svm.c
arch/x86/kvm/vmx/vmx.c
arch/x86/kvm/x86.c
arch/x86/kvm/x86.h
include/linux/kvm_host.h
virt/kvm/kvm_main.c

index 0677b9ea01c901c68ea763e7330d904701fa8617,682ad02a4e585bb91dcbd0c1308f186ed7d97d0c..1384517d77093b6ec2ba471f9136f67d5176507e
@@@ -782,7 -782,6 +782,7 @@@ struct kvm_vcpu_arch 
        unsigned nmi_pending; /* NMI queued after currently running handler */
        bool nmi_injected;    /* Trying to inject an NMI this entry */
        bool smi_pending;    /* SMI queued after currently running handler */
 +      u8 handling_intr_from_guest;
  
        struct kvm_mtrr mtrr_state;
        u64 pat;
@@@ -1381,6 -1380,7 +1381,7 @@@ struct kvm_x86_ops 
         */
        void (*tlb_flush_guest)(struct kvm_vcpu *vcpu);
  
+       int (*vcpu_pre_run)(struct kvm_vcpu *vcpu);
        enum exit_fastpath_completion (*run)(struct kvm_vcpu *vcpu);
        int (*handle_exit)(struct kvm_vcpu *vcpu,
                enum exit_fastpath_completion exit_fastpath);
        const struct kvm_pmu_ops *pmu_ops;
        const struct kvm_x86_nested_ops *nested_ops;
  
-       /*
-        * Architecture specific hooks for vCPU blocking due to
-        * HLT instruction.
-        * Returns for .pre_block():
-        *    - 0 means continue to block the vCPU.
-        *    - 1 means we cannot block the vCPU since some event
-        *        happens during this period, such as, 'ON' bit in
-        *        posted-interrupts descriptor is set.
-        */
-       int (*pre_block)(struct kvm_vcpu *vcpu);
-       void (*post_block)(struct kvm_vcpu *vcpu);
        void (*vcpu_blocking)(struct kvm_vcpu *vcpu);
        void (*vcpu_unblocking)(struct kvm_vcpu *vcpu);
  
@@@ -1529,7 -1517,6 +1518,7 @@@ struct kvm_x86_init_ops 
        int (*disabled_by_bios)(void);
        int (*check_processor_compatibility)(void);
        int (*hardware_setup)(void);
 +      unsigned int (*handle_intel_pt_intr)(void);
  
        struct kvm_x86_ops *runtime_ops;
  };
@@@ -1579,9 -1566,6 +1568,9 @@@ static inline int kvm_arch_flush_remote
                return -ENOTSUPP;
  }
  
 +#define kvm_arch_pmi_in_guest(vcpu) \
 +      ((vcpu) && (vcpu)->arch.handling_intr_from_guest)
 +
  int kvm_mmu_module_init(void);
  void kvm_mmu_module_exit(void);
  
@@@ -1913,6 -1897,8 +1902,6 @@@ int kvm_skip_emulated_instruction(struc
  int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err);
  void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu);
  
 -int kvm_is_in_guest(void);
 -
  void __user *__x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa,
                                     u32 size);
  bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu);
diff --combined arch/x86/kvm/mmu/spte.c
index 351b04ad62a18f10b11962b7b4db433171a8f5d9,f8677404c93cb8431d6b597f85d7b5b12b9e4846..73cfe62fdad1fd4ac55e0dba7e04466abf9bc33d
@@@ -16,7 -16,6 +16,7 @@@
  #include "spte.h"
  
  #include <asm/e820/api.h>
 +#include <asm/memtype.h>
  #include <asm/vmx.h>
  
  static bool __read_mostly enable_mmio_caching = true;
@@@ -216,6 -215,7 +216,7 @@@ u64 kvm_mmu_changed_pte_notifier_make_s
  
        new_spte &= ~PT_WRITABLE_MASK;
        new_spte &= ~shadow_host_writable_mask;
+       new_spte &= ~shadow_mmu_writable_mask;
  
        new_spte = mark_spte_for_access_track(new_spte);
  
diff --combined arch/x86/kvm/pmu.c
index 261b39cbef6ea52c77473a097839cf1f08438387,2c98f3ee8df402c6bdbcec8d1bb9ccc381584d1a..f614f95acc6b3e38b0a928e42cdd0f43c9180e02
@@@ -13,6 -13,8 +13,8 @@@
  #include <linux/types.h>
  #include <linux/kvm_host.h>
  #include <linux/perf_event.h>
+ #include <linux/bsearch.h>
+ #include <linux/sort.h>
  #include <asm/perf_event.h>
  #include "x86.h"
  #include "cpuid.h"
@@@ -77,7 -79,7 +79,7 @@@ static inline void __kvm_perf_overflow(
         * woken up. So we should wake it, but this is impossible from
         * NMI context. Do it from irq work instead.
         */
 -      if (in_pmi && !kvm_is_in_guest())
 +      if (in_pmi && !kvm_handling_nmi_from_guest(pmc->vcpu))
                irq_work_queue(&pmc_to_pmu(pmc)->irq_work);
        else
                kvm_make_request(KVM_REQ_PMI, pmc->vcpu);
@@@ -109,6 -111,9 +111,9 @@@ static void pmc_reprogram_counter(struc
                .config = config,
        };
  
+       if (type == PERF_TYPE_HARDWARE && config >= PERF_COUNT_HW_MAX)
+               return;
        attr.sample_period = get_sample_period(pmc, pmc->counter);
  
        if (in_tx)
@@@ -169,12 -174,16 +174,16 @@@ static bool pmc_resume_counter(struct k
        return true;
  }
  
+ static int cmp_u64(const void *a, const void *b)
+ {
+       return *(__u64 *)a - *(__u64 *)b;
+ }
  void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
  {
        unsigned config, type = PERF_TYPE_RAW;
        struct kvm *kvm = pmc->vcpu->kvm;
        struct kvm_pmu_event_filter *filter;
-       int i;
        bool allow_event = true;
  
        if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL)
  
        filter = srcu_dereference(kvm->arch.pmu_event_filter, &kvm->srcu);
        if (filter) {
-               for (i = 0; i < filter->nevents; i++)
-                       if (filter->events[i] ==
-                           (eventsel & AMD64_RAW_EVENT_MASK_NB))
-                               break;
-               if (filter->action == KVM_PMU_EVENT_ALLOW &&
-                   i == filter->nevents)
-                       allow_event = false;
-               if (filter->action == KVM_PMU_EVENT_DENY &&
-                   i < filter->nevents)
-                       allow_event = false;
+               __u64 key = eventsel & AMD64_RAW_EVENT_MASK_NB;
+               if (bsearch(&key, filter->events, filter->nevents,
+                           sizeof(__u64), cmp_u64))
+                       allow_event = filter->action == KVM_PMU_EVENT_ALLOW;
+               else
+                       allow_event = filter->action == KVM_PMU_EVENT_DENY;
        }
        if (!allow_event)
                return;
@@@ -573,6 -579,11 +579,11 @@@ int kvm_vm_ioctl_set_pmu_event_filter(s
        /* Ensure nevents can't be changed between the user copies. */
        *filter = tmp;
  
+       /*
+        * Sort the in-kernel list so that we can search it with bsearch.
+        */
+       sort(&filter->events, filter->nevents, sizeof(__u64), cmp_u64, NULL);
        mutex_lock(&kvm->lock);
        filter = rcu_replace_pointer(kvm->arch.pmu_event_filter, filter,
                                     mutex_is_locked(&kvm->lock));
diff --combined arch/x86/kvm/svm/svm.c
index 46bcc706f25740b3f0a1956a4d2124fda095306d,6d31d357a83b9fdc88a68d01130ca68db8b9c06f..2c99b18d76c0f9b5157b3999a733a6e46cbc8561
@@@ -192,10 -192,6 +192,6 @@@ module_param(vgif, int, 0444)
  static int lbrv = true;
  module_param(lbrv, int, 0444);
  
- /* enable/disable PMU virtualization */
- bool pmu = true;
- module_param(pmu, bool, 0444);
  static int tsc_scaling = true;
  module_param(tsc_scaling, int, 0444);
  
@@@ -873,47 -869,6 +869,6 @@@ static void shrink_ple_window(struct kv
        }
  }
  
- /*
-  * The default MMIO mask is a single bit (excluding the present bit),
-  * which could conflict with the memory encryption bit. Check for
-  * memory encryption support and override the default MMIO mask if
-  * memory encryption is enabled.
-  */
- static __init void svm_adjust_mmio_mask(void)
- {
-       unsigned int enc_bit, mask_bit;
-       u64 msr, mask;
-       /* If there is no memory encryption support, use existing mask */
-       if (cpuid_eax(0x80000000) < 0x8000001f)
-               return;
-       /* If memory encryption is not enabled, use existing mask */
-       rdmsrl(MSR_AMD64_SYSCFG, msr);
-       if (!(msr & MSR_AMD64_SYSCFG_MEM_ENCRYPT))
-               return;
-       enc_bit = cpuid_ebx(0x8000001f) & 0x3f;
-       mask_bit = boot_cpu_data.x86_phys_bits;
-       /* Increment the mask bit if it is the same as the encryption bit */
-       if (enc_bit == mask_bit)
-               mask_bit++;
-       /*
-        * If the mask bit location is below 52, then some bits above the
-        * physical addressing limit will always be reserved, so use the
-        * rsvd_bits() function to generate the mask. This mask, along with
-        * the present bit, will be used to generate a page fault with
-        * PFER.RSV = 1.
-        *
-        * If the mask bit location is 52 (or above), then clear the mask.
-        */
-       mask = (mask_bit < 52) ? rsvd_bits(mask_bit, 51) | PT_PRESENT_MASK : 0;
-       kvm_mmu_set_mmio_spte_mask(mask, mask, PT_WRITABLE_MASK | PT_USER_MASK);
- }
  static void svm_hardware_teardown(void)
  {
        int cpu;
        iopm_base = 0;
  }
  
- static __init void svm_set_cpu_caps(void)
- {
-       kvm_set_cpu_caps();
-       supported_xss = 0;
-       /* CPUID 0x80000001 and 0x8000000A (SVM features) */
-       if (nested) {
-               kvm_cpu_cap_set(X86_FEATURE_SVM);
-               if (nrips)
-                       kvm_cpu_cap_set(X86_FEATURE_NRIPS);
-               if (npt_enabled)
-                       kvm_cpu_cap_set(X86_FEATURE_NPT);
-               if (tsc_scaling)
-                       kvm_cpu_cap_set(X86_FEATURE_TSCRATEMSR);
-               /* Nested VM can receive #VMEXIT instead of triggering #GP */
-               kvm_cpu_cap_set(X86_FEATURE_SVME_ADDR_CHK);
-       }
-       /* CPUID 0x80000008 */
-       if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD) ||
-           boot_cpu_has(X86_FEATURE_AMD_SSBD))
-               kvm_cpu_cap_set(X86_FEATURE_VIRT_SSBD);
-       /* AMD PMU PERFCTR_CORE CPUID */
-       if (pmu && boot_cpu_has(X86_FEATURE_PERFCTR_CORE))
-               kvm_cpu_cap_set(X86_FEATURE_PERFCTR_CORE);
-       /* CPUID 0x8000001F (SME/SEV features) */
-       sev_set_cpu_caps();
- }
- static __init int svm_hardware_setup(void)
- {
-       int cpu;
-       struct page *iopm_pages;
-       void *iopm_va;
-       int r;
-       unsigned int order = get_order(IOPM_SIZE);
-       /*
-        * NX is required for shadow paging and for NPT if the NX huge pages
-        * mitigation is enabled.
-        */
-       if (!boot_cpu_has(X86_FEATURE_NX)) {
-               pr_err_ratelimited("NX (Execute Disable) not supported\n");
-               return -EOPNOTSUPP;
-       }
-       kvm_enable_efer_bits(EFER_NX);
-       iopm_pages = alloc_pages(GFP_KERNEL, order);
-       if (!iopm_pages)
-               return -ENOMEM;
-       iopm_va = page_address(iopm_pages);
-       memset(iopm_va, 0xff, PAGE_SIZE * (1 << order));
-       iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT;
-       init_msrpm_offsets();
-       supported_xcr0 &= ~(XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR);
-       if (boot_cpu_has(X86_FEATURE_FXSR_OPT))
-               kvm_enable_efer_bits(EFER_FFXSR);
-       if (tsc_scaling) {
-               if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR)) {
-                       tsc_scaling = false;
-               } else {
-                       pr_info("TSC scaling supported\n");
-                       kvm_has_tsc_control = true;
-                       kvm_max_tsc_scaling_ratio = TSC_RATIO_MAX;
-                       kvm_tsc_scaling_ratio_frac_bits = 32;
-               }
-       }
-       tsc_aux_uret_slot = kvm_add_user_return_msr(MSR_TSC_AUX);
-       /* Check for pause filtering support */
-       if (!boot_cpu_has(X86_FEATURE_PAUSEFILTER)) {
-               pause_filter_count = 0;
-               pause_filter_thresh = 0;
-       } else if (!boot_cpu_has(X86_FEATURE_PFTHRESHOLD)) {
-               pause_filter_thresh = 0;
-       }
-       if (nested) {
-               printk(KERN_INFO "kvm: Nested Virtualization enabled\n");
-               kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE);
-       }
-       /*
-        * KVM's MMU doesn't support using 2-level paging for itself, and thus
-        * NPT isn't supported if the host is using 2-level paging since host
-        * CR4 is unchanged on VMRUN.
-        */
-       if (!IS_ENABLED(CONFIG_X86_64) && !IS_ENABLED(CONFIG_X86_PAE))
-               npt_enabled = false;
-       if (!boot_cpu_has(X86_FEATURE_NPT))
-               npt_enabled = false;
-       /* Force VM NPT level equal to the host's paging level */
-       kvm_configure_mmu(npt_enabled, get_npt_level(),
-                         get_npt_level(), PG_LEVEL_1G);
-       pr_info("kvm: Nested Paging %sabled\n", npt_enabled ? "en" : "dis");
-       /* Note, SEV setup consumes npt_enabled. */
-       sev_hardware_setup();
-       svm_hv_hardware_setup();
-       svm_adjust_mmio_mask();
-       for_each_possible_cpu(cpu) {
-               r = svm_cpu_init(cpu);
-               if (r)
-                       goto err;
-       }
-       if (nrips) {
-               if (!boot_cpu_has(X86_FEATURE_NRIPS))
-                       nrips = false;
-       }
-       enable_apicv = avic = avic && npt_enabled && boot_cpu_has(X86_FEATURE_AVIC);
-       if (enable_apicv) {
-               pr_info("AVIC enabled\n");
-               amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier);
-       }
-       if (vls) {
-               if (!npt_enabled ||
-                   !boot_cpu_has(X86_FEATURE_V_VMSAVE_VMLOAD) ||
-                   !IS_ENABLED(CONFIG_X86_64)) {
-                       vls = false;
-               } else {
-                       pr_info("Virtual VMLOAD VMSAVE supported\n");
-               }
-       }
-       if (boot_cpu_has(X86_FEATURE_SVME_ADDR_CHK))
-               svm_gp_erratum_intercept = false;
-       if (vgif) {
-               if (!boot_cpu_has(X86_FEATURE_VGIF))
-                       vgif = false;
-               else
-                       pr_info("Virtual GIF supported\n");
-       }
-       if (lbrv) {
-               if (!boot_cpu_has(X86_FEATURE_LBRV))
-                       lbrv = false;
-               else
-                       pr_info("LBR virtualization supported\n");
-       }
-       if (!pmu)
-               pr_info("PMU virtualization is disabled\n");
-       svm_set_cpu_caps();
-       /*
-        * It seems that on AMD processors PTE's accessed bit is
-        * being set by the CPU hardware before the NPF vmexit.
-        * This is not expected behaviour and our tests fail because
-        * of it.
-        * A workaround here is to disable support for
-        * GUEST_MAXPHYADDR < HOST_MAXPHYADDR if NPT is enabled.
-        * In this case userspace can know if there is support using
-        * KVM_CAP_SMALLER_MAXPHYADDR extension and decide how to handle
-        * it
-        * If future AMD CPU models change the behaviour described above,
-        * this variable can be changed accordingly
-        */
-       allow_smaller_maxphyaddr = !npt_enabled;
-       return 0;
- err:
-       svm_hardware_teardown();
-       return r;
- }
  static void init_seg(struct vmcb_seg *seg)
  {
        seg->selector = 0;
@@@ -1444,12 -1207,6 +1207,6 @@@ static int svm_create_vcpu(struct kvm_v
        if (err)
                goto error_free_vmsa_page;
  
-       /* We initialize this flag to true to make sure that the is_running
-        * bit would be set the first time the vcpu is loaded.
-        */
-       if (irqchip_in_kernel(vcpu->kvm) && kvm_apicv_activated(vcpu->kvm))
-               svm->avic_is_running = true;
        svm->msrpm = svm_vcpu_alloc_msrpm();
        if (!svm->msrpm) {
                err = -ENOMEM;
@@@ -3833,6 -3590,11 +3590,11 @@@ static void svm_cancel_injection(struc
        svm_complete_interrupts(vcpu);
  }
  
+ static int svm_vcpu_pre_run(struct kvm_vcpu *vcpu)
+ {
+       return 1;
+ }
  static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
  {
        if (to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_MSR &&
@@@ -3967,7 -3729,7 +3729,7 @@@ static __no_kcsan fastpath_t svm_vcpu_r
        vcpu->arch.regs_dirty = 0;
  
        if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
 -              kvm_before_interrupt(vcpu);
 +              kvm_before_interrupt(vcpu, KVM_HANDLING_NMI);
  
        kvm_load_host_xsave_state(vcpu);
        stgi();
@@@ -4629,8 -4391,8 +4391,8 @@@ static struct kvm_x86_ops svm_x86_ops _
        .prepare_guest_switch = svm_prepare_guest_switch,
        .vcpu_load = svm_vcpu_load,
        .vcpu_put = svm_vcpu_put,
-       .vcpu_blocking = svm_vcpu_blocking,
-       .vcpu_unblocking = svm_vcpu_unblocking,
+       .vcpu_blocking = avic_vcpu_blocking,
+       .vcpu_unblocking = avic_vcpu_unblocking,
  
        .update_exception_bitmap = svm_update_exception_bitmap,
        .get_msr_feature = svm_get_msr_feature,
        .tlb_flush_gva = svm_flush_tlb_gva,
        .tlb_flush_guest = svm_flush_tlb,
  
+       .vcpu_pre_run = svm_vcpu_pre_run,
        .run = svm_vcpu_run,
        .handle_exit = handle_exit,
        .skip_emulated_instruction = skip_emulated_instruction,
        .vcpu_deliver_sipi_vector = svm_vcpu_deliver_sipi_vector,
  };
  
+ /*
+  * The default MMIO mask is a single bit (excluding the present bit),
+  * which could conflict with the memory encryption bit. Check for
+  * memory encryption support and override the default MMIO mask if
+  * memory encryption is enabled.
+  */
+ static __init void svm_adjust_mmio_mask(void)
+ {
+       unsigned int enc_bit, mask_bit;
+       u64 msr, mask;
+       /* If there is no memory encryption support, use existing mask */
+       if (cpuid_eax(0x80000000) < 0x8000001f)
+               return;
+       /* If memory encryption is not enabled, use existing mask */
+       rdmsrl(MSR_AMD64_SYSCFG, msr);
+       if (!(msr & MSR_AMD64_SYSCFG_MEM_ENCRYPT))
+               return;
+       enc_bit = cpuid_ebx(0x8000001f) & 0x3f;
+       mask_bit = boot_cpu_data.x86_phys_bits;
+       /* Increment the mask bit if it is the same as the encryption bit */
+       if (enc_bit == mask_bit)
+               mask_bit++;
+       /*
+        * If the mask bit location is below 52, then some bits above the
+        * physical addressing limit will always be reserved, so use the
+        * rsvd_bits() function to generate the mask. This mask, along with
+        * the present bit, will be used to generate a page fault with
+        * PFER.RSV = 1.
+        *
+        * If the mask bit location is 52 (or above), then clear the mask.
+        */
+       mask = (mask_bit < 52) ? rsvd_bits(mask_bit, 51) | PT_PRESENT_MASK : 0;
+       kvm_mmu_set_mmio_spte_mask(mask, mask, PT_WRITABLE_MASK | PT_USER_MASK);
+ }
+ static __init void svm_set_cpu_caps(void)
+ {
+       kvm_set_cpu_caps();
+       supported_xss = 0;
+       /* CPUID 0x80000001 and 0x8000000A (SVM features) */
+       if (nested) {
+               kvm_cpu_cap_set(X86_FEATURE_SVM);
+               if (nrips)
+                       kvm_cpu_cap_set(X86_FEATURE_NRIPS);
+               if (npt_enabled)
+                       kvm_cpu_cap_set(X86_FEATURE_NPT);
+               if (tsc_scaling)
+                       kvm_cpu_cap_set(X86_FEATURE_TSCRATEMSR);
+               /* Nested VM can receive #VMEXIT instead of triggering #GP */
+               kvm_cpu_cap_set(X86_FEATURE_SVME_ADDR_CHK);
+       }
+       /* CPUID 0x80000008 */
+       if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD) ||
+           boot_cpu_has(X86_FEATURE_AMD_SSBD))
+               kvm_cpu_cap_set(X86_FEATURE_VIRT_SSBD);
+       /* AMD PMU PERFCTR_CORE CPUID */
+       if (enable_pmu && boot_cpu_has(X86_FEATURE_PERFCTR_CORE))
+               kvm_cpu_cap_set(X86_FEATURE_PERFCTR_CORE);
+       /* CPUID 0x8000001F (SME/SEV features) */
+       sev_set_cpu_caps();
+ }
+ static __init int svm_hardware_setup(void)
+ {
+       int cpu;
+       struct page *iopm_pages;
+       void *iopm_va;
+       int r;
+       unsigned int order = get_order(IOPM_SIZE);
+       /*
+        * NX is required for shadow paging and for NPT if the NX huge pages
+        * mitigation is enabled.
+        */
+       if (!boot_cpu_has(X86_FEATURE_NX)) {
+               pr_err_ratelimited("NX (Execute Disable) not supported\n");
+               return -EOPNOTSUPP;
+       }
+       kvm_enable_efer_bits(EFER_NX);
+       iopm_pages = alloc_pages(GFP_KERNEL, order);
+       if (!iopm_pages)
+               return -ENOMEM;
+       iopm_va = page_address(iopm_pages);
+       memset(iopm_va, 0xff, PAGE_SIZE * (1 << order));
+       iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT;
+       init_msrpm_offsets();
+       supported_xcr0 &= ~(XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR);
+       if (boot_cpu_has(X86_FEATURE_FXSR_OPT))
+               kvm_enable_efer_bits(EFER_FFXSR);
+       if (tsc_scaling) {
+               if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR)) {
+                       tsc_scaling = false;
+               } else {
+                       pr_info("TSC scaling supported\n");
+                       kvm_has_tsc_control = true;
+                       kvm_max_tsc_scaling_ratio = TSC_RATIO_MAX;
+                       kvm_tsc_scaling_ratio_frac_bits = 32;
+               }
+       }
+       tsc_aux_uret_slot = kvm_add_user_return_msr(MSR_TSC_AUX);
+       /* Check for pause filtering support */
+       if (!boot_cpu_has(X86_FEATURE_PAUSEFILTER)) {
+               pause_filter_count = 0;
+               pause_filter_thresh = 0;
+       } else if (!boot_cpu_has(X86_FEATURE_PFTHRESHOLD)) {
+               pause_filter_thresh = 0;
+       }
+       if (nested) {
+               printk(KERN_INFO "kvm: Nested Virtualization enabled\n");
+               kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE);
+       }
+       /*
+        * KVM's MMU doesn't support using 2-level paging for itself, and thus
+        * NPT isn't supported if the host is using 2-level paging since host
+        * CR4 is unchanged on VMRUN.
+        */
+       if (!IS_ENABLED(CONFIG_X86_64) && !IS_ENABLED(CONFIG_X86_PAE))
+               npt_enabled = false;
+       if (!boot_cpu_has(X86_FEATURE_NPT))
+               npt_enabled = false;
+       /* Force VM NPT level equal to the host's paging level */
+       kvm_configure_mmu(npt_enabled, get_npt_level(),
+                         get_npt_level(), PG_LEVEL_1G);
+       pr_info("kvm: Nested Paging %sabled\n", npt_enabled ? "en" : "dis");
+       /* Note, SEV setup consumes npt_enabled. */
+       sev_hardware_setup();
+       svm_hv_hardware_setup();
+       svm_adjust_mmio_mask();
+       for_each_possible_cpu(cpu) {
+               r = svm_cpu_init(cpu);
+               if (r)
+                       goto err;
+       }
+       if (nrips) {
+               if (!boot_cpu_has(X86_FEATURE_NRIPS))
+                       nrips = false;
+       }
+       enable_apicv = avic = avic && npt_enabled && boot_cpu_has(X86_FEATURE_AVIC);
+       if (enable_apicv) {
+               pr_info("AVIC enabled\n");
+               amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier);
+       } else {
+               svm_x86_ops.vcpu_blocking = NULL;
+               svm_x86_ops.vcpu_unblocking = NULL;
+       }
+       if (vls) {
+               if (!npt_enabled ||
+                   !boot_cpu_has(X86_FEATURE_V_VMSAVE_VMLOAD) ||
+                   !IS_ENABLED(CONFIG_X86_64)) {
+                       vls = false;
+               } else {
+                       pr_info("Virtual VMLOAD VMSAVE supported\n");
+               }
+       }
+       if (boot_cpu_has(X86_FEATURE_SVME_ADDR_CHK))
+               svm_gp_erratum_intercept = false;
+       if (vgif) {
+               if (!boot_cpu_has(X86_FEATURE_VGIF))
+                       vgif = false;
+               else
+                       pr_info("Virtual GIF supported\n");
+       }
+       if (lbrv) {
+               if (!boot_cpu_has(X86_FEATURE_LBRV))
+                       lbrv = false;
+               else
+                       pr_info("LBR virtualization supported\n");
+       }
+       if (!enable_pmu)
+               pr_info("PMU virtualization is disabled\n");
+       svm_set_cpu_caps();
+       /*
+        * It seems that on AMD processors PTE's accessed bit is
+        * being set by the CPU hardware before the NPF vmexit.
+        * This is not expected behaviour and our tests fail because
+        * of it.
+        * A workaround here is to disable support for
+        * GUEST_MAXPHYADDR < HOST_MAXPHYADDR if NPT is enabled.
+        * In this case userspace can know if there is support using
+        * KVM_CAP_SMALLER_MAXPHYADDR extension and decide how to handle
+        * it
+        * If future AMD CPU models change the behaviour described above,
+        * this variable can be changed accordingly
+        */
+       allow_smaller_maxphyaddr = !npt_enabled;
+       return 0;
+ err:
+       svm_hardware_teardown();
+       return r;
+ }
  static struct kvm_x86_init_ops svm_init_ops __initdata = {
        .cpu_has_kvm_support = has_svm,
        .disabled_by_bios = is_disabled,
diff --combined arch/x86/kvm/vmx/vmx.c
index 1b2e9d8c5cc9b0c0743ec034156869d83a9a1f0f,a02a28ce7cc34f95fe6ab7a77bc7c990587e45c9..4ac676066d6079b5d918d2daf54af045fe14c796
@@@ -3931,12 -3931,10 +3931,10 @@@ static void vmx_msr_filter_changed(stru
        pt_update_intercept_for_msr(vcpu);
  }
  
- static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu,
-                                                    bool nested)
+ static inline void kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu,
+                                                    int pi_vec)
  {
  #ifdef CONFIG_SMP
-       int pi_vec = nested ? POSTED_INTR_NESTED_VECTOR : POSTED_INTR_VECTOR;
        if (vcpu->mode == IN_GUEST_MODE) {
                /*
                 * The vector of interrupt to be delivered to vcpu had
                 */
  
                apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), pi_vec);
-               return true;
+               return;
        }
  #endif
-       return false;
+       /*
+        * The vCPU isn't in the guest; wake the vCPU in case it is blocking,
+        * otherwise do nothing as KVM will grab the highest priority pending
+        * IRQ via ->sync_pir_to_irr() in vcpu_enter_guest().
+        */
+       kvm_vcpu_wake_up(vcpu);
  }
  
  static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu,
                smp_mb__after_atomic();
  
                /* the PIR and ON have been set by L1. */
-               if (!kvm_vcpu_trigger_posted_interrupt(vcpu, true))
-                       kvm_vcpu_kick(vcpu);
+               kvm_vcpu_trigger_posted_interrupt(vcpu, POSTED_INTR_NESTED_VECTOR);
                return 0;
        }
        return -1;
@@@ -4035,9 -4037,7 +4037,7 @@@ static int vmx_deliver_posted_interrupt
         * guaranteed to see PID.ON=1 and sync the PIR to IRR if triggering a
         * posted interrupt "fails" because vcpu->mode != IN_GUEST_MODE.
         */
-       if (!kvm_vcpu_trigger_posted_interrupt(vcpu, false))
-               kvm_vcpu_kick(vcpu);
+       kvm_vcpu_trigger_posted_interrupt(vcpu, POSTED_INTR_VECTOR);
        return 0;
  }
  
@@@ -5426,6 -5426,14 +5426,14 @@@ static int handle_nmi_window(struct kvm
        return 1;
  }
  
+ static bool vmx_emulation_required_with_pending_exception(struct kvm_vcpu *vcpu)
+ {
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
+       return vmx->emulation_required && !vmx->rmode.vm86_active &&
+              vcpu->arch.exception.pending;
+ }
  static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
  {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
                if (!kvm_emulate_instruction(vcpu, 0))
                        return 0;
  
-               if (vmx->emulation_required && !vmx->rmode.vm86_active &&
-                   vcpu->arch.exception.pending) {
+               if (vmx_emulation_required_with_pending_exception(vcpu)) {
                        kvm_prepare_emulation_failure_exit(vcpu);
                        return 0;
                }
        return 1;
  }
  
+ static int vmx_vcpu_pre_run(struct kvm_vcpu *vcpu)
+ {
+       if (vmx_emulation_required_with_pending_exception(vcpu)) {
+               kvm_prepare_emulation_failure_exit(vcpu);
+               return 0;
+       }
+       return 1;
+ }
  static void grow_ple_window(struct kvm_vcpu *vcpu)
  {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
@@@ -6434,9 -6451,7 +6451,9 @@@ void vmx_do_interrupt_nmi_irqoff(unsign
  static void handle_interrupt_nmi_irqoff(struct kvm_vcpu *vcpu,
                                        unsigned long entry)
  {
 -      kvm_before_interrupt(vcpu);
 +      bool is_nmi = entry == (unsigned long)asm_exc_nmi_noist;
 +
 +      kvm_before_interrupt(vcpu, is_nmi ? KVM_HANDLING_NMI : KVM_HANDLING_IRQ);
        vmx_do_interrupt_nmi_irqoff(entry);
        kvm_after_interrupt(vcpu);
  }
@@@ -6928,6 -6943,8 +6945,8 @@@ static int vmx_create_vcpu(struct kvm_v
        BUILD_BUG_ON(offsetof(struct vcpu_vmx, vcpu) != 0);
        vmx = to_vmx(vcpu);
  
+       INIT_LIST_HEAD(&vmx->pi_wakeup_list);
        err = -ENOMEM;
  
        vmx->vpid = allocate_vpid();
@@@ -7549,25 -7566,6 +7568,6 @@@ void vmx_update_cpu_dirty_logging(struc
                secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_ENABLE_PML);
  }
  
- static int vmx_pre_block(struct kvm_vcpu *vcpu)
- {
-       if (pi_pre_block(vcpu))
-               return 1;
-       if (kvm_lapic_hv_timer_in_use(vcpu))
-               kvm_lapic_switch_to_sw_timer(vcpu);
-       return 0;
- }
- static void vmx_post_block(struct kvm_vcpu *vcpu)
- {
-       if (kvm_x86_ops.set_hv_timer)
-               kvm_lapic_switch_to_hv_timer(vcpu);
-       pi_post_block(vcpu);
- }
  static void vmx_setup_mce(struct kvm_vcpu *vcpu)
  {
        if (vcpu->arch.mcg_cap & MCG_LMCE_P)
@@@ -7710,6 -7708,7 +7710,7 @@@ static struct kvm_x86_ops vmx_x86_ops _
        .tlb_flush_gva = vmx_flush_tlb_gva,
        .tlb_flush_guest = vmx_flush_tlb_guest,
  
+       .vcpu_pre_run = vmx_vcpu_pre_run,
        .run = vmx_vcpu_run,
        .handle_exit = vmx_handle_exit,
        .skip_emulated_instruction = vmx_skip_emulated_instruction,
        .cpu_dirty_log_size = PML_ENTITY_NUM,
        .update_cpu_dirty_logging = vmx_update_cpu_dirty_logging,
  
-       .pre_block = vmx_pre_block,
-       .post_block = vmx_post_block,
        .pmu_ops = &intel_pmu_ops,
        .nested_ops = &vmx_nested_ops,
  
        .vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector,
  };
  
 +static unsigned int vmx_handle_intel_pt_intr(void)
 +{
 +      struct kvm_vcpu *vcpu = kvm_get_running_vcpu();
 +
 +      /* '0' on failure so that the !PT case can use a RET0 static call. */
 +      if (!kvm_arch_pmi_in_guest(vcpu))
 +              return 0;
 +
 +      kvm_make_request(KVM_REQ_PMI, vcpu);
 +      __set_bit(MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT,
 +                (unsigned long *)&vcpu->arch.pmu.global_status);
 +      return 1;
 +}
 +
  static __init void vmx_setup_user_return_msrs(void)
  {
  
                kvm_add_user_return_msr(vmx_uret_msrs_list[i]);
  }
  
 +static struct kvm_x86_init_ops vmx_init_ops __initdata;
 +
  static __init int hardware_setup(void)
  {
        unsigned long host_bndcfgs;
                return -EINVAL;
        if (!enable_ept || !cpu_has_vmx_intel_pt())
                pt_mode = PT_MODE_SYSTEM;
 +      if (pt_mode == PT_MODE_HOST_GUEST)
 +              vmx_init_ops.handle_intel_pt_intr = vmx_handle_intel_pt_intr;
 +      else
 +              vmx_init_ops.handle_intel_pt_intr = NULL;
  
        setup_default_sgx_lepubkeyhash();
  
@@@ -8023,7 -7999,6 +8021,7 @@@ static struct kvm_x86_init_ops vmx_init
        .disabled_by_bios = vmx_disabled_by_bios,
        .check_processor_compatibility = vmx_check_processor_compat,
        .hardware_setup = hardware_setup,
 +      .handle_intel_pt_intr = NULL,
  
        .runtime_ops = &vmx_x86_ops,
  };
diff --combined arch/x86/kvm/x86.c
index 76b4803dd3bdd0584bf01371df0f1652afdb8416,55518b7d3b964f5c2e44e3fbe7590fa6c7152a42..9e43d756312f17063dbf7a68adb3b6d1066f9080
@@@ -187,6 -187,11 +187,11 @@@ module_param(force_emulation_prefix, bo
  int __read_mostly pi_inject_timer = -1;
  module_param(pi_inject_timer, bint, S_IRUGO | S_IWUSR);
  
+ /* Enable/disable PMU virtualization */
+ bool __read_mostly enable_pmu = true;
+ EXPORT_SYMBOL_GPL(enable_pmu);
+ module_param(enable_pmu, bool, 0444);
  /*
   * Restoring the host value for MSRs that are only consumed when running in
   * usermode, e.g. SYSCALL MSRs and TSC_AUX, can be deferred until the CPU
@@@ -5230,17 -5235,6 +5235,6 @@@ long kvm_arch_vcpu_ioctl(struct file *f
                struct kvm_cpuid __user *cpuid_arg = argp;
                struct kvm_cpuid cpuid;
  
-               /*
-                * KVM does not correctly handle changing guest CPUID after KVM_RUN, as
-                * MAXPHYADDR, GBPAGES support, AMD reserved bit behavior, etc.. aren't
-                * tracked in kvm_mmu_page_role.  As a result, KVM may miss guest page
-                * faults due to reusing SPs/SPTEs.  In practice no sane VMM mucks with
-                * the core vCPU model on the fly, so fail.
-                */
-               r = -EINVAL;
-               if (vcpu->arch.last_vmentry_cpu != -1)
-                       goto out;
                r = -EFAULT;
                if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
                        goto out;
                struct kvm_cpuid2 __user *cpuid_arg = argp;
                struct kvm_cpuid2 cpuid;
  
-               /*
-                * KVM_SET_CPUID{,2} after KVM_RUN is forbidded, see the comment in
-                * KVM_SET_CPUID case above.
-                */
-               r = -EINVAL;
-               if (vcpu->arch.last_vmentry_cpu != -1)
-                       goto out;
                r = -EFAULT;
                if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
                        goto out;
@@@ -8665,6 -8651,50 +8651,6 @@@ static void kvm_timer_init(void
                          kvmclock_cpu_online, kvmclock_cpu_down_prep);
  }
  
 -DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu);
 -EXPORT_PER_CPU_SYMBOL_GPL(current_vcpu);
 -
 -int kvm_is_in_guest(void)
 -{
 -      return __this_cpu_read(current_vcpu) != NULL;
 -}
 -
 -static int kvm_is_user_mode(void)
 -{
 -      int user_mode = 3;
 -
 -      if (__this_cpu_read(current_vcpu))
 -              user_mode = static_call(kvm_x86_get_cpl)(__this_cpu_read(current_vcpu));
 -
 -      return user_mode != 0;
 -}
 -
 -static unsigned long kvm_get_guest_ip(void)
 -{
 -      unsigned long ip = 0;
 -
 -      if (__this_cpu_read(current_vcpu))
 -              ip = kvm_rip_read(__this_cpu_read(current_vcpu));
 -
 -      return ip;
 -}
 -
 -static void kvm_handle_intel_pt_intr(void)
 -{
 -      struct kvm_vcpu *vcpu = __this_cpu_read(current_vcpu);
 -
 -      kvm_make_request(KVM_REQ_PMI, vcpu);
 -      __set_bit(MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT,
 -                      (unsigned long *)&vcpu->arch.pmu.global_status);
 -}
 -
 -static struct perf_guest_info_callbacks kvm_guest_cbs = {
 -      .is_in_guest            = kvm_is_in_guest,
 -      .is_user_mode           = kvm_is_user_mode,
 -      .get_guest_ip           = kvm_get_guest_ip,
 -      .handle_intel_pt_intr   = kvm_handle_intel_pt_intr,
 -};
 -
  #ifdef CONFIG_X86_64
  static void pvclock_gtod_update_fn(struct work_struct *work)
  {
@@@ -8777,6 -8807,8 +8763,6 @@@ int kvm_arch_init(void *opaque
  
        kvm_timer_init();
  
 -      perf_register_guest_info_callbacks(&kvm_guest_cbs);
 -
        if (boot_cpu_has(X86_FEATURE_XSAVE)) {
                host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
                supported_xcr0 = host_xcr0 & KVM_SUPPORTED_XCR0;
@@@ -8808,6 -8840,7 +8794,6 @@@ void kvm_arch_exit(void
                clear_hv_tscchange_cb();
  #endif
        kvm_lapic_exit();
 -      perf_unregister_guest_info_callbacks(&kvm_guest_cbs);
  
        if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
                cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block,
@@@ -9945,10 -9978,11 +9931,11 @@@ static int vcpu_enter_guest(struct kvm_
        smp_mb__after_srcu_read_unlock();
  
        /*
-        * This handles the case where a posted interrupt was
-        * notified with kvm_vcpu_kick.  Assigned devices can
-        * use the POSTED_INTR_VECTOR even if APICv is disabled,
-        * so do it even if APICv is disabled on this vCPU.
+        * Process pending posted interrupts to handle the case where the
+        * notification IRQ arrived in the host, or was never sent (because the
+        * target vCPU wasn't running).  Do this regardless of the vCPU's APICv
+        * status, KVM doesn't update assigned devices when APICv is inhibited,
+        * i.e. they can post interrupts even if APICv is temporarily disabled.
         */
        if (kvm_lapic_enabled(vcpu))
                static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu);
         * interrupts on processors that implement an interrupt shadow, the
         * stat.exits increment will do nicely.
         */
 -      kvm_before_interrupt(vcpu);
 +      kvm_before_interrupt(vcpu, KVM_HANDLING_IRQ);
        local_irq_enable();
        ++vcpu->stat.exits;
        local_irq_disable();
  
  static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
  {
-       if (!kvm_arch_vcpu_runnable(vcpu) &&
-           (!kvm_x86_ops.pre_block || static_call(kvm_x86_pre_block)(vcpu) == 0)) {
+       bool hv_timer;
+       if (!kvm_arch_vcpu_runnable(vcpu)) {
+               /*
+                * Switch to the software timer before halt-polling/blocking as
+                * the guest's timer may be a break event for the vCPU, and the
+                * hypervisor timer runs only when the CPU is in guest mode.
+                * Switch before halt-polling so that KVM recognizes an expired
+                * timer before blocking.
+                */
+               hv_timer = kvm_lapic_hv_timer_in_use(vcpu);
+               if (hv_timer)
+                       kvm_lapic_switch_to_sw_timer(vcpu);
                srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
                if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
                        kvm_vcpu_halt(vcpu);
                        kvm_vcpu_block(vcpu);
                vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
  
-               if (kvm_x86_ops.post_block)
-                       static_call(kvm_x86_post_block)(vcpu);
+               if (hv_timer)
+                       kvm_lapic_switch_to_hv_timer(vcpu);
  
                if (!kvm_check_request(KVM_REQ_UNHALT, vcpu))
                        return 1;
@@@ -10316,6 -10362,11 +10315,11 @@@ int kvm_arch_vcpu_ioctl_run(struct kvm_
                        r = -EINTR;
                        goto out;
                }
+               /*
+                * It should be impossible for the hypervisor timer to be in
+                * use before KVM has ever run the vCPU.
+                */
+               WARN_ON_ONCE(kvm_lapic_hv_timer_in_use(vcpu));
                kvm_vcpu_block(vcpu);
                if (kvm_apic_accept_events(vcpu) < 0) {
                        r = 0;
        } else
                WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed);
  
-       if (kvm_run->immediate_exit)
+       if (kvm_run->immediate_exit) {
                r = -EINTR;
-       else
-               r = vcpu_run(vcpu);
+               goto out;
+       }
+       r = static_call(kvm_x86_vcpu_pre_run)(vcpu);
+       if (r <= 0)
+               goto out;
+       r = vcpu_run(vcpu);
  
  out:
        kvm_put_guest_fpu(vcpu);
@@@ -11393,8 -11450,6 +11403,8 @@@ int kvm_arch_hardware_setup(void *opaqu
        memcpy(&kvm_x86_ops, ops->runtime_ops, sizeof(kvm_x86_ops));
        kvm_ops_static_call_update();
  
 +      kvm_register_perf_callbacks(ops->handle_intel_pt_intr);
 +
        if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES))
                supported_xss = 0;
  
  
  void kvm_arch_hardware_unsetup(void)
  {
 +      kvm_unregister_perf_callbacks();
 +
        static_call(kvm_x86_hardware_unsetup)();
  }
  
@@@ -12017,11 -12070,6 +12027,11 @@@ bool kvm_arch_vcpu_in_kernel(struct kvm
        return vcpu->arch.preempted_in_kernel;
  }
  
 +unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu)
 +{
 +      return kvm_rip_read(vcpu);
 +}
 +
  int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
  {
        return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
diff --combined arch/x86/kvm/x86.h
index bec8ed090abcef044e78ec61a7ec7a404a9beee0,1ebd5a7594da74dbf536ae6737de8c874c73630c..635b75f9e14540aff2aceb6e4b1c5c3221c444ab
@@@ -336,6 -336,7 +336,7 @@@ extern u64 host_xcr0
  extern u64 supported_xcr0;
  extern u64 host_xss;
  extern u64 supported_xss;
+ extern bool enable_pmu;
  
  static inline bool kvm_mpx_supported(void)
  {
@@@ -391,27 -392,18 +392,27 @@@ static inline bool kvm_cstate_in_guest(
        return kvm->arch.cstate_in_guest;
  }
  
 -DECLARE_PER_CPU(struct kvm_vcpu *, current_vcpu);
 +enum kvm_intr_type {
 +      /* Values are arbitrary, but must be non-zero. */
 +      KVM_HANDLING_IRQ = 1,
 +      KVM_HANDLING_NMI,
 +};
  
 -static inline void kvm_before_interrupt(struct kvm_vcpu *vcpu)
 +static inline void kvm_before_interrupt(struct kvm_vcpu *vcpu,
 +                                      enum kvm_intr_type intr)
  {
 -      __this_cpu_write(current_vcpu, vcpu);
 +      WRITE_ONCE(vcpu->arch.handling_intr_from_guest, (u8)intr);
  }
  
  static inline void kvm_after_interrupt(struct kvm_vcpu *vcpu)
  {
 -      __this_cpu_write(current_vcpu, NULL);
 +      WRITE_ONCE(vcpu->arch.handling_intr_from_guest, 0);
  }
  
 +static inline bool kvm_handling_nmi_from_guest(struct kvm_vcpu *vcpu)
 +{
 +      return vcpu->arch.handling_intr_from_guest == KVM_HANDLING_NMI;
 +}
  
  static inline bool kvm_pat_valid(u64 data)
  {
diff --combined include/linux/kvm_host.h
index d89d564f7c19a198677177e800e919aac927872d,f079820f52b50c7785174b3c36c6b421b64ff74d..06912d6b39d051013b731ae04b3b26c9f68a5efb
@@@ -309,9 -309,6 +309,6 @@@ struct kvm_vcpu 
        u64 requests;
        unsigned long guest_debug;
  
-       int pre_pcpu;
-       struct list_head blocked_vcpu_list;
        struct mutex mutex;
        struct kvm_run *run;
  
@@@ -1424,16 -1421,6 +1421,16 @@@ static inline bool kvm_arch_intc_initia
  }
  #endif
  
 +#ifdef CONFIG_GUEST_PERF_EVENTS
 +unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu);
 +
 +void kvm_register_perf_callbacks(unsigned int (*pt_intr_handler)(void));
 +void kvm_unregister_perf_callbacks(void);
 +#else
 +static inline void kvm_register_perf_callbacks(void *ign) {}
 +static inline void kvm_unregister_perf_callbacks(void) {}
 +#endif /* CONFIG_GUEST_PERF_EVENTS */
 +
  int kvm_arch_init_vm(struct kvm *kvm, unsigned long type);
  void kvm_arch_destroy_vm(struct kvm *kvm);
  void kvm_arch_sync_events(struct kvm *kvm);
diff --combined virt/kvm/kvm_main.c
index 504158f0e1314a3566427aa4ad3c6dbfbd3426b1,5a1164483e6c990b1c98b716615c3ef3f323ea53..9a20f2299386eb0385746725fb73c3f5041faee6
@@@ -427,9 -427,6 +427,6 @@@ static void kvm_vcpu_init(struct kvm_vc
  #endif
        kvm_async_pf_vcpu_init(vcpu);
  
-       vcpu->pre_pcpu = -1;
-       INIT_LIST_HEAD(&vcpu->blocked_vcpu_list);
        kvm_vcpu_set_in_spin_loop(vcpu, false);
        kvm_vcpu_set_dy_eligible(vcpu, false);
        vcpu->preempted = false;
@@@ -3163,8 -3160,10 +3160,10 @@@ void mark_page_dirty_in_slot(struct kv
  {
        struct kvm_vcpu *vcpu = kvm_get_running_vcpu();
  
+ #ifdef CONFIG_HAVE_KVM_DIRTY_RING
        if (WARN_ON_ONCE(!vcpu) || WARN_ON_ONCE(vcpu->kvm != kvm))
                return;
+ #endif
  
        if (memslot && kvm_slot_dirty_track_enabled(memslot)) {
                unsigned long rel_gfn = gfn - memslot->base_gfn;
@@@ -5603,50 -5602,6 +5602,50 @@@ struct kvm_vcpu * __percpu *kvm_get_run
          return &kvm_running_vcpu;
  }
  
 +#ifdef CONFIG_GUEST_PERF_EVENTS
 +static unsigned int kvm_guest_state(void)
 +{
 +      struct kvm_vcpu *vcpu = kvm_get_running_vcpu();
 +      unsigned int state;
 +
 +      if (!kvm_arch_pmi_in_guest(vcpu))
 +              return 0;
 +
 +      state = PERF_GUEST_ACTIVE;
 +      if (!kvm_arch_vcpu_in_kernel(vcpu))
 +              state |= PERF_GUEST_USER;
 +
 +      return state;
 +}
 +
 +static unsigned long kvm_guest_get_ip(void)
 +{
 +      struct kvm_vcpu *vcpu = kvm_get_running_vcpu();
 +
 +      /* Retrieving the IP must be guarded by a call to kvm_guest_state(). */
 +      if (WARN_ON_ONCE(!kvm_arch_pmi_in_guest(vcpu)))
 +              return 0;
 +
 +      return kvm_arch_vcpu_get_ip(vcpu);
 +}
 +
 +static struct perf_guest_info_callbacks kvm_guest_cbs = {
 +      .state                  = kvm_guest_state,
 +      .get_ip                 = kvm_guest_get_ip,
 +      .handle_intel_pt_intr   = NULL,
 +};
 +
 +void kvm_register_perf_callbacks(unsigned int (*pt_intr_handler)(void))
 +{
 +      kvm_guest_cbs.handle_intel_pt_intr = pt_intr_handler;
 +      perf_register_guest_info_callbacks(&kvm_guest_cbs);
 +}
 +void kvm_unregister_perf_callbacks(void)
 +{
 +      perf_unregister_guest_info_callbacks(&kvm_guest_cbs);
 +}
 +#endif
 +
  struct kvm_cpu_compat_check {
        void *opaque;
        int *ret;
This page took 0.145768 seconds and 4 git commands to generate.