]> Git Repo - linux.git/commitdiff
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
authorLinus Torvalds <[email protected]>
Tue, 12 Jun 2018 18:34:04 +0000 (11:34 -0700)
committerLinus Torvalds <[email protected]>
Tue, 12 Jun 2018 18:34:04 +0000 (11:34 -0700)
Pull KVM updates from Paolo Bonzini:
 "Small update for KVM:

  ARM:
   - lazy context-switching of FPSIMD registers on arm64
   - "split" regions for vGIC redistributor

  s390:
   - cleanups for nested
   - clock handling
   - crypto
   - storage keys
   - control register bits

  x86:
   - many bugfixes
   - implement more Hyper-V super powers
   - implement lapic_timer_advance_ns even when the LAPIC timer is
     emulated using the processor's VMX preemption timer.
   - two security-related bugfixes at the top of the branch"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (79 commits)
  kvm: fix typo in flag name
  kvm: x86: use correct privilege level for sgdt/sidt/fxsave/fxrstor access
  KVM: x86: pass kvm_vcpu to kvm_read_guest_virt and kvm_write_guest_virt_system
  KVM: x86: introduce linear_{read,write}_system
  kvm: nVMX: Enforce cpl=0 for VMX instructions
  kvm: nVMX: Add support for "VMWRITE to any supported field"
  kvm: nVMX: Restrict VMX capability MSR changes
  KVM: VMX: Optimize tscdeadline timer latency
  KVM: docs: nVMX: Remove known limitations as they do not exist now
  KVM: docs: mmu: KVM support exposing SLAT to guests
  kvm: no need to check return value of debugfs_create functions
  kvm: Make VM ioctl do valloc for some archs
  kvm: Change return type to vm_fault_t
  KVM: docs: mmu: Fix link to NPT presentation from KVM Forum 2008
  kvm: x86: Amend the KVM_GET_SUPPORTED_CPUID API documentation
  KVM: x86: hyperv: declare KVM_CAP_HYPERV_TLBFLUSH capability
  KVM: x86: hyperv: simplistic HVCALL_FLUSH_VIRTUAL_ADDRESS_{LIST,SPACE}_EX implementation
  KVM: x86: hyperv: simplistic HVCALL_FLUSH_VIRTUAL_ADDRESS_{LIST,SPACE} implementation
  KVM: introduce kvm_make_vcpus_request_mask() API
  KVM: x86: hyperv: do rep check for each hypercall separately
  ...

25 files changed:
1  2 
arch/arm/include/asm/kvm_host.h
arch/arm64/Kconfig
arch/arm64/include/asm/cpufeature.h
arch/arm64/include/asm/kvm_asm.h
arch/arm64/include/asm/kvm_host.h
arch/arm64/include/asm/processor.h
arch/arm64/include/asm/thread_info.h
arch/arm64/kernel/fpsimd.c
arch/arm64/kernel/ptrace.c
arch/arm64/kvm/hyp/hyp-entry.S
arch/arm64/kvm/hyp/switch.c
arch/mips/kvm/mips.c
arch/powerpc/kvm/book3s_hv.c
arch/s390/include/asm/pgtable.h
arch/s390/kvm/priv.c
arch/x86/include/asm/kvm_host.h
arch/x86/kvm/cpuid.c
arch/x86/kvm/hyperv.c
arch/x86/kvm/lapic.c
arch/x86/kvm/mmu.c
arch/x86/kvm/svm.c
arch/x86/kvm/vmx.c
arch/x86/kvm/x86.c
include/linux/sched.h
virt/kvm/arm/arm.c

index 2d75e77bf7bb341d6e1d39a76351e0eee9390c39,4b12f32f540c2c8d7e88adfa567cf6206927ae64..1f1fe4109b026690ab80ca0d3e31355f77e78f04
@@@ -21,7 -21,6 +21,7 @@@
  
  #include <linux/types.h>
  #include <linux/kvm_types.h>
 +#include <asm/cputype.h>
  #include <asm/kvm.h>
  #include <asm/kvm_asm.h>
  #include <asm/kvm_mmio.h>
@@@ -281,6 -280,7 +281,7 @@@ void kvm_mmu_wp_memory_region(struct kv
  
  struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
  
+ static inline bool kvm_arch_check_sve_has_vhe(void) { return true; }
  static inline void kvm_arch_hardware_unsetup(void) {}
  static inline void kvm_arch_sync_events(struct kvm *kvm) {}
  static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
@@@ -304,40 -304,28 +305,49 @@@ int kvm_arm_vcpu_arch_get_attr(struct k
  int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
                               struct kvm_device_attr *attr);
  
- /* All host FP/SIMD state is restored on guest exit, so nothing to save: */
- static inline void kvm_fpsimd_flush_cpu_state(void) {}
+ /*
+  * VFP/NEON switching is all done by the hyp switch code, so no need to
+  * coordinate with host context handling for this state:
+  */
+ static inline void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) {}
+ static inline void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu) {}
+ static inline void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) {}
  
  static inline void kvm_arm_vhe_guest_enter(void) {}
  static inline void kvm_arm_vhe_guest_exit(void) {}
  
  static inline bool kvm_arm_harden_branch_predictor(void)
 +{
 +      switch(read_cpuid_part()) {
 +#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
 +      case ARM_CPU_PART_BRAHMA_B15:
 +      case ARM_CPU_PART_CORTEX_A12:
 +      case ARM_CPU_PART_CORTEX_A15:
 +      case ARM_CPU_PART_CORTEX_A17:
 +              return true;
 +#endif
 +      default:
 +              return false;
 +      }
 +}
 +
 +#define KVM_SSBD_UNKNOWN              -1
 +#define KVM_SSBD_FORCE_DISABLE                0
 +#define KVM_SSBD_KERNEL               1
 +#define KVM_SSBD_FORCE_ENABLE         2
 +#define KVM_SSBD_MITIGATED            3
 +
 +static inline int kvm_arm_have_ssbd(void)
  {
        /* No way to detect it yet, pretend it is not there. */
 -      return false;
 +      return KVM_SSBD_UNKNOWN;
  }
  
  static inline void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu) {}
  static inline void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu) {}
  
+ #define __KVM_HAVE_ARCH_VM_ALLOC
+ struct kvm *kvm_arch_alloc_vm(void);
+ void kvm_arch_free_vm(struct kvm *kvm);
  #endif /* __ARM_KVM_HOST_H__ */
diff --combined arch/arm64/Kconfig
index 9795b59aa28a1ecc01979f819d68beaa21192311,b0d3820081c8bbb68445dce1c3f01db14f2c69e5..9fd4a8ccce0760cd2c7f026b1f8394af5b313081
@@@ -7,19 -7,16 +7,19 @@@ config ARM6
        select ACPI_REDUCED_HARDWARE_ONLY if ACPI
        select ACPI_MCFG if ACPI
        select ACPI_SPCR_TABLE if ACPI
 +      select ACPI_PPTT if ACPI
        select ARCH_CLOCKSOURCE_DATA
        select ARCH_HAS_DEBUG_VIRTUAL
        select ARCH_HAS_DEVMEM_IS_ALLOWED
        select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
        select ARCH_HAS_ELF_RANDOMIZE
 +      select ARCH_HAS_FAST_MULTIPLIER
        select ARCH_HAS_FORTIFY_SOURCE
        select ARCH_HAS_GCOV_PROFILE_ALL
        select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA
        select ARCH_HAS_KCOV
        select ARCH_HAS_MEMBARRIER_SYNC_CORE
 +      select ARCH_HAS_PTE_SPECIAL
        select ARCH_HAS_SET_MEMORY
        select ARCH_HAS_SG_CHAIN
        select ARCH_HAS_STRICT_KERNEL_RWX
        select HAVE_CONTEXT_TRACKING
        select HAVE_DEBUG_BUGVERBOSE
        select HAVE_DEBUG_KMEMLEAK
 -      select HAVE_DMA_API_DEBUG
        select HAVE_DMA_CONTIGUOUS
        select HAVE_DYNAMIC_FTRACE
        select HAVE_EFFICIENT_UNALIGNED_ACCESS
        select IRQ_FORCED_THREADING
        select MODULES_USE_ELF_RELA
        select MULTI_IRQ_HANDLER
 +      select NEED_DMA_MAP_STATE
 +      select NEED_SG_DMA_LENGTH
        select NO_BOOTMEM
        select OF
        select OF_EARLY_FLATTREE
        select POWER_SUPPLY
        select REFCOUNT_FULL
        select SPARSE_IRQ
 +      select SWIOTLB
        select SYSCTL_EXCEPTION_TRACE
        select THREAD_INFO_IN_TASK
        help
  config 64BIT
        def_bool y
  
 -config ARCH_PHYS_ADDR_T_64BIT
 -      def_bool y
 -
  config MMU
        def_bool y
  
@@@ -239,9 -237,24 +239,9 @@@ config ZONE_DMA3
  config HAVE_GENERIC_GUP
        def_bool y
  
 -config ARCH_DMA_ADDR_T_64BIT
 -      def_bool y
 -
 -config NEED_DMA_MAP_STATE
 -      def_bool y
 -
 -config NEED_SG_DMA_LENGTH
 -      def_bool y
 -
  config SMP
        def_bool y
  
 -config SWIOTLB
 -      def_bool y
 -
 -config IOMMU_HELPER
 -      def_bool SWIOTLB
 -
  config KERNEL_MODE_NEON
        def_bool y
  
@@@ -925,15 -938,6 +925,15 @@@ config HARDEN_EL2_VECTOR
  
          If unsure, say Y.
  
 +config ARM64_SSBD
 +      bool "Speculative Store Bypass Disable" if EXPERT
 +      default y
 +      help
 +        This enables mitigation of the bypassing of previous stores
 +        by speculative loads.
 +
 +        If unsure, say Y.
 +
  menuconfig ARMV8_DEPRECATED
        bool "Emulate deprecated/obsolete ARMv8 instructions"
        depends on COMPAT
@@@ -1045,7 -1049,6 +1045,7 @@@ config ARM64_PA
  
  config ARM64_LSE_ATOMICS
        bool "Atomic instructions"
 +      default y
        help
          As part of the Large System Extensions, ARMv8.1 introduces new
          atomic instructions that are designed specifically to scale in
          Say Y here to make use of these instructions for the in-kernel
          atomic routines. This incurs a small overhead on CPUs that do
          not support these instructions and requires the kernel to be
 -        built with binutils >= 2.25.
 +        built with binutils >= 2.25 in order for the new instructions
 +        to be used.
  
  config ARM64_VHE
        bool "Enable support for Virtualization Host Extensions (VHE)"
@@@ -1128,6 -1130,7 +1128,7 @@@ endmen
  config ARM64_SVE
        bool "ARM Scalable Vector Extension support"
        default y
+       depends on !KVM || ARM64_VHE
        help
          The Scalable Vector Extension (SVE) is an extension to the AArch64
          execution state which complements and extends the SIMD functionality
          booting the kernel.  If unsure and you are not observing these
          symptoms, you should assume that it is safe to say Y.
  
+         CPUs that support SVE are architecturally required to support the
+         Virtualization Host Extensions (VHE), so the kernel makes no
+         provision for supporting SVE alongside KVM without VHE enabled.
+         Thus, you will need to enable CONFIG_ARM64_VHE if you want to support
+         KVM in the same kernel image.
  config ARM64_MODULE_PLTS
        bool
        select HAVE_MOD_ARCH_SPECIFIC
index 55bc1f073bfbe4b8905cb43da8cb21a509fad686,0a6b7133195e1f0f3a07adec705929334f29e46f..1717ba1db35ddb935720c20ec46c318d59ca9b83
@@@ -11,9 -11,7 +11,7 @@@
  
  #include <asm/cpucaps.h>
  #include <asm/cputype.h>
- #include <asm/fpsimd.h>
  #include <asm/hwcap.h>
- #include <asm/sigcontext.h>
  #include <asm/sysreg.h>
  
  /*
@@@ -510,55 -508,6 +508,28 @@@ static inline bool system_supports_sve(
                cpus_have_const_cap(ARM64_SVE);
  }
  
- /*
-  * Read the pseudo-ZCR used by cpufeatures to identify the supported SVE
-  * vector length.
-  *
-  * Use only if SVE is present.
-  * This function clobbers the SVE vector length.
-  */
- static inline u64 read_zcr_features(void)
- {
-       u64 zcr;
-       unsigned int vq_max;
-       /*
-        * Set the maximum possible VL, and write zeroes to all other
-        * bits to see if they stick.
-        */
-       sve_kernel_enable(NULL);
-       write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL1);
-       zcr = read_sysreg_s(SYS_ZCR_EL1);
-       zcr &= ~(u64)ZCR_ELx_LEN_MASK; /* find sticky 1s outside LEN field */
-       vq_max = sve_vq_from_vl(sve_get_vl());
-       zcr |= vq_max - 1; /* set LEN field to maximum effective value */
-       return zcr;
- }
 +#define ARM64_SSBD_UNKNOWN            -1
 +#define ARM64_SSBD_FORCE_DISABLE      0
 +#define ARM64_SSBD_KERNEL             1
 +#define ARM64_SSBD_FORCE_ENABLE               2
 +#define ARM64_SSBD_MITIGATED          3
 +
 +static inline int arm64_get_ssbd_state(void)
 +{
 +#ifdef CONFIG_ARM64_SSBD
 +      extern int ssbd_state;
 +      return ssbd_state;
 +#else
 +      return ARM64_SSBD_UNKNOWN;
 +#endif
 +}
 +
 +#ifdef CONFIG_ARM64_SSBD
 +void arm64_set_ssbd_mitigation(bool state);
 +#else
 +static inline void arm64_set_ssbd_mitigation(bool state) {}
 +#endif
 +
  #endif /* __ASSEMBLY__ */
  
  #endif
index 951b2076a5e222036d8fd5eb612ec0302ec0da57,821a7032c0f71d65d5875505cd303b36ef3b1482..102b5a5c47b6cb4a00040e7efb295d357b20c8a3
@@@ -20,9 -20,6 +20,9 @@@
  
  #include <asm/virt.h>
  
 +#define       VCPU_WORKAROUND_2_FLAG_SHIFT    0
 +#define       VCPU_WORKAROUND_2_FLAG          (_AC(1, UL) << VCPU_WORKAROUND_2_FLAG_SHIFT)
 +
  #define ARM_EXIT_WITH_SERROR_BIT  31
  #define ARM_EXCEPTION_CODE(x)   ((x) & ~(1U << ARM_EXIT_WITH_SERROR_BIT))
  #define ARM_SERROR_PENDING(x)   !!((x) & (1U << ARM_EXIT_WITH_SERROR_BIT))
  /* The hyp-stub will return this for any kvm_call_hyp() call */
  #define ARM_EXCEPTION_HYP_GONE          HVC_STUB_ERR
  
- #define KVM_ARM64_DEBUG_DIRTY_SHIFT   0
- #define KVM_ARM64_DEBUG_DIRTY         (1 << KVM_ARM64_DEBUG_DIRTY_SHIFT)
+ #ifndef __ASSEMBLY__
+ #include <linux/mm.h>
  
  /* Translate a kernel address of @sym into its equivalent linear mapping */
  #define kvm_ksym_ref(sym)                                             \
        ({                                                              \
                void *val = &sym;                                       \
                if (!is_kernel_in_hyp_mode())                           \
-                       val = phys_to_virt((u64)&sym - kimage_voffset); \
+                       val = lm_alias(&sym);                           \
                val;                                                    \
         })
  
- #ifndef __ASSEMBLY__
  struct kvm;
  struct kvm_vcpu;
  
@@@ -74,37 -71,14 +74,37 @@@ extern u32 __kvm_get_mdcr_el2(void)
  
  extern u32 __init_stage2_translation(void);
  
 +/* Home-grown __this_cpu_{ptr,read} variants that always work at HYP */
 +#define __hyp_this_cpu_ptr(sym)                                               \
 +      ({                                                              \
 +              void *__ptr = hyp_symbol_addr(sym);                     \
 +              __ptr += read_sysreg(tpidr_el2);                        \
 +              (typeof(&sym))__ptr;                                    \
 +       })
 +
 +#define __hyp_this_cpu_read(sym)                                      \
 +      ({                                                              \
 +              *__hyp_this_cpu_ptr(sym);                               \
 +       })
 +
  #else /* __ASSEMBLY__ */
  
 -.macro get_host_ctxt reg, tmp
 -      adr_l   \reg, kvm_host_cpu_state
 +.macro hyp_adr_this_cpu reg, sym, tmp
 +      adr_l   \reg, \sym
        mrs     \tmp, tpidr_el2
        add     \reg, \reg, \tmp
  .endm
  
 +.macro hyp_ldr_this_cpu reg, sym, tmp
 +      adr_l   \reg, \sym
 +      mrs     \tmp, tpidr_el2
 +      ldr     \reg,  [\reg, \tmp]
 +.endm
 +
 +.macro get_host_ctxt reg, tmp
 +      hyp_adr_this_cpu \reg, kvm_host_cpu_state, \tmp
 +.endm
 +
  .macro get_vcpu_ptr vcpu, ctxt
        get_host_ctxt \ctxt, \vcpu
        ldr     \vcpu, [\ctxt, #HOST_CONTEXT_VCPU]
index 95d8a0e15b5fbcede8b98a708e1ea6559eb7aada,c923d3e17ba3ce511239961749f64ad5b80cc1e5..fda9a8ca48bef71b0d4a76be1a45295af1211dd6
@@@ -30,6 -30,7 +30,7 @@@
  #include <asm/kvm.h>
  #include <asm/kvm_asm.h>
  #include <asm/kvm_mmio.h>
+ #include <asm/thread_info.h>
  
  #define __KVM_HAVE_ARCH_INTC_INITIALIZED
  
@@@ -216,11 -217,8 +217,11 @@@ struct kvm_vcpu_arch 
        /* Exception Information */
        struct kvm_vcpu_fault_info fault;
  
-       /* Guest debug state */
-       u64 debug_flags;
 +      /* State of various workarounds, see kvm_asm.h for bit assignment */
 +      u64 workaround_flags;
 +
+       /* Miscellaneous vcpu state flags */
+       u64 flags;
  
        /*
         * We maintain more than a single set of debug registers to support
  
        /* Pointer to host CPU context */
        kvm_cpu_context_t *host_cpu_context;
+       struct thread_info *host_thread_info;   /* hyp VA */
+       struct user_fpsimd_state *host_fpsimd_state;    /* hyp VA */
        struct {
                /* {Break,watch}point registers */
                struct kvm_guest_debug_arch regs;
        bool sysregs_loaded_on_cpu;
  };
  
+ /* vcpu_arch flags field values: */
+ #define KVM_ARM64_DEBUG_DIRTY         (1 << 0)
+ #define KVM_ARM64_FP_ENABLED          (1 << 1) /* guest FP regs loaded */
+ #define KVM_ARM64_FP_HOST             (1 << 2) /* host FP regs loaded */
+ #define KVM_ARM64_HOST_SVE_IN_USE     (1 << 3) /* backup for host TIF_SVE */
  #define vcpu_gp_regs(v)               (&(v)->arch.ctxt.gp_regs)
  
  /*
@@@ -397,6 -405,19 +408,19 @@@ static inline void __cpu_init_hyp_mode(
        kvm_call_hyp(__kvm_set_tpidr_el2, tpidr_el2);
  }
  
+ static inline bool kvm_arch_check_sve_has_vhe(void)
+ {
+       /*
+        * The Arm architecture specifies that implementation of SVE
+        * requires VHE also to be implemented.  The KVM code for arm64
+        * relies on this when SVE is present:
+        */
+       if (system_supports_sve())
+               return has_vhe();
+       else
+               return true;
+ }
  static inline void kvm_arch_hardware_unsetup(void) {}
  static inline void kvm_arch_sync_events(struct kvm *kvm) {}
  static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
@@@ -423,15 -444,18 +447,18 @@@ static inline void __cpu_init_stage2(vo
                  "PARange is %d bits, unsupported configuration!", parange);
  }
  
- /*
-  * All host FP/SIMD state is restored on guest exit, so nothing needs
-  * doing here except in the SVE case:
- */
- static inline void kvm_fpsimd_flush_cpu_state(void)
+ /* Guest/host FPSIMD coordination helpers */
+ int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu);
+ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu);
+ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu);
+ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu);
+ #ifdef CONFIG_KVM /* Avoid conflicts with core headers if CONFIG_KVM=n */
+ static inline int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
  {
-       if (system_supports_sve())
-               sve_flush_cpu_state();
+       return kvm_arch_vcpu_run_map_fp(vcpu);
  }
+ #endif
  
  static inline void kvm_arm_vhe_guest_enter(void)
  {
@@@ -455,30 -479,11 +482,34 @@@ static inline bool kvm_arm_harden_branc
        return cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR);
  }
  
 +#define KVM_SSBD_UNKNOWN              -1
 +#define KVM_SSBD_FORCE_DISABLE                0
 +#define KVM_SSBD_KERNEL               1
 +#define KVM_SSBD_FORCE_ENABLE         2
 +#define KVM_SSBD_MITIGATED            3
 +
 +static inline int kvm_arm_have_ssbd(void)
 +{
 +      switch (arm64_get_ssbd_state()) {
 +      case ARM64_SSBD_FORCE_DISABLE:
 +              return KVM_SSBD_FORCE_DISABLE;
 +      case ARM64_SSBD_KERNEL:
 +              return KVM_SSBD_KERNEL;
 +      case ARM64_SSBD_FORCE_ENABLE:
 +              return KVM_SSBD_FORCE_ENABLE;
 +      case ARM64_SSBD_MITIGATED:
 +              return KVM_SSBD_MITIGATED;
 +      case ARM64_SSBD_UNKNOWN:
 +      default:
 +              return KVM_SSBD_UNKNOWN;
 +      }
 +}
 +
  void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu);
  void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu);
  
+ #define __KVM_HAVE_ARCH_VM_ALLOC
+ struct kvm *kvm_arch_alloc_vm(void);
+ void kvm_arch_free_vm(struct kvm *kvm);
  #endif /* __ARM64_KVM_HOST_H__ */
index 65ab83e8926e794d04dbeaea14b3ba9dff9ad73b,c99e657fdd57ba7128d08e11d6bcad0fe3597399..a73ae1e492007e53ffdf7d1c94d15cea33ea25ff
@@@ -35,8 -35,6 +35,8 @@@
  #ifdef __KERNEL__
  
  #include <linux/build_bug.h>
 +#include <linux/cache.h>
 +#include <linux/init.h>
  #include <linux/stddef.h>
  #include <linux/string.h>
  
@@@ -158,7 -156,9 +158,9 @@@ static inline void arch_thread_struct_w
  /* Sync TPIDR_EL0 back to thread_struct for current */
  void tls_preserve_current_state(void);
  
- #define INIT_THREAD  {        }
+ #define INIT_THREAD {                         \
+       .fpsimd_cpu = NR_CPUS,                  \
+ }
  
  static inline void start_thread_common(struct pt_regs *regs, unsigned long pc)
  {
@@@ -246,9 -246,17 +248,20 @@@ void cpu_enable_pan(const struct arm64_
  void cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused);
  void cpu_clear_disr(const struct arm64_cpu_capabilities *__unused);
  
 +extern unsigned long __ro_after_init signal_minsigstksz; /* sigframe size */
 +extern void __init minsigstksz_setup(void);
 +
+ /*
+  * Not at the top of the file due to a direct #include cycle between
+  * <asm/fpsimd.h> and <asm/processor.h>.  Deferring this #include
+  * ensures that contents of processor.h are visible to fpsimd.h even if
+  * processor.h is included first.
+  *
+  * These prctl helpers are the only things in this file that require
+  * fpsimd.h.  The core code expects them to be in this header.
+  */
+ #include <asm/fpsimd.h>
  /* Userspace interface for PR_SVE_{SET,GET}_VL prctl()s: */
  #define SVE_SET_VL(arg)       sve_set_current_vl(arg)
  #define SVE_GET_VL()  sve_get_current_vl()
index cbcf11b5e6377f0b87a59921d7154c1d64fccf39,af271f9a6c9f2c07d6e27b3bfcb9d09e16bf26a6..cb2c10a8f0a8517edc4460809f9e3d5c5e6be178
@@@ -45,12 -45,6 +45,6 @@@ struct thread_info 
        int                     preempt_count;  /* 0 => preemptable, <0 => bug */
  };
  
- #define INIT_THREAD_INFO(tsk)                                         \
- {                                                                     \
-       .preempt_count  = INIT_PREEMPT_COUNT,                           \
-       .addr_limit     = KERNEL_DS,                                    \
- }
  #define thread_saved_pc(tsk)  \
        ((unsigned long)(tsk->thread.cpu_context.pc))
  #define thread_saved_sp(tsk)  \
@@@ -94,7 -88,6 +88,7 @@@ void arch_release_task_struct(struct ta
  #define TIF_32BIT             22      /* 32bit process */
  #define TIF_SVE                       23      /* Scalable Vector Extension in use */
  #define TIF_SVE_VL_INHERIT    24      /* Inherit sve_vl_onexec across exec */
 +#define TIF_SSBD              25      /* Wants SSB mitigation */
  
  #define _TIF_SIGPENDING               (1 << TIF_SIGPENDING)
  #define _TIF_NEED_RESCHED     (1 << TIF_NEED_RESCHED)
                                 _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
                                 _TIF_NOHZ)
  
+ #define INIT_THREAD_INFO(tsk)                                         \
+ {                                                                     \
+       .flags          = _TIF_FOREIGN_FPSTATE,                         \
+       .preempt_count  = INIT_PREEMPT_COUNT,                           \
+       .addr_limit     = KERNEL_DS,                                    \
+ }
  #endif /* __KERNEL__ */
  #endif /* __ASM_THREAD_INFO_H */
index 3b527ae46e492b773a98a2a091a608bcec3dc119,7074c4cd0e0e1ca1533c4c8aa251be6419a45812..84c68b14f1b2f140c97556fd491aada7e06f1410
  #include <linux/percpu.h>
  #include <linux/prctl.h>
  #include <linux/preempt.h>
 -#include <linux/prctl.h>
  #include <linux/ptrace.h>
  #include <linux/sched/signal.h>
  #include <linux/sched/task_stack.h>
  #include <linux/signal.h>
  #include <linux/slab.h>
+ #include <linux/stddef.h>
  #include <linux/sysctl.h>
  
  #include <asm/esr.h>
  #include <asm/fpsimd.h>
  #include <asm/cpufeature.h>
  #include <asm/cputype.h>
+ #include <asm/processor.h>
  #include <asm/simd.h>
  #include <asm/sigcontext.h>
  #include <asm/sysreg.h>
   */
  struct fpsimd_last_state_struct {
        struct user_fpsimd_state *st;
-       bool sve_in_use;
  };
  
  static DEFINE_PER_CPU(struct fpsimd_last_state_struct, fpsimd_last_state);
@@@ -128,7 -130,7 +129,7 @@@ static int sve_default_vl = -1
  #ifdef CONFIG_ARM64_SVE
  
  /* Maximum supported vector length across all CPUs (initially poisoned) */
 -int __ro_after_init sve_max_vl = -1;
 +int __ro_after_init sve_max_vl = SVE_VL_MIN;
  /* Set of available vector lengths, as vq_to_bit(vq): */
  static __ro_after_init DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);
  static void __percpu *efi_sve_state;
@@@ -158,19 -160,6 +159,6 @@@ static void sve_free(struct task_struc
        __sve_free(task);
  }
  
- /* Offset of FFR in the SVE register dump */
- static size_t sve_ffr_offset(int vl)
- {
-       return SVE_SIG_FFR_OFFSET(sve_vq_from_vl(vl)) - SVE_SIG_REGS_OFFSET;
- }
- static void *sve_pffr(struct task_struct *task)
- {
-       return (char *)task->thread.sve_state +
-               sve_ffr_offset(task->thread.sve_vl);
- }
  static void change_cpacr(u64 val, u64 mask)
  {
        u64 cpacr = read_sysreg(CPACR_EL1);
@@@ -251,31 -240,24 +239,24 @@@ static void task_fpsimd_load(void
        WARN_ON(!in_softirq() && !irqs_disabled());
  
        if (system_supports_sve() && test_thread_flag(TIF_SVE))
-               sve_load_state(sve_pffr(current),
+               sve_load_state(sve_pffr(&current->thread),
                               &current->thread.uw.fpsimd_state.fpsr,
                               sve_vq_from_vl(current->thread.sve_vl) - 1);
        else
                fpsimd_load_state(&current->thread.uw.fpsimd_state);
-       if (system_supports_sve()) {
-               /* Toggle SVE trapping for userspace if needed */
-               if (test_thread_flag(TIF_SVE))
-                       sve_user_enable();
-               else
-                       sve_user_disable();
-               /* Serialised by exception return to user */
-       }
  }
  
  /*
-  * Ensure current's FPSIMD/SVE storage in thread_struct is up to date
-  * with respect to the CPU registers.
+  * Ensure FPSIMD/SVE storage in memory for the loaded context is up to
+  * date with respect to the CPU registers.
   *
   * Softirqs (and preemption) must be disabled.
   */
static void task_fpsimd_save(void)
void fpsimd_save(void)
  {
+       struct user_fpsimd_state *st = __this_cpu_read(fpsimd_last_state.st);
+       /* set by fpsimd_bind_task_to_cpu() or fpsimd_bind_state_to_cpu() */
        WARN_ON(!in_softirq() && !irqs_disabled());
  
        if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
                                return;
                        }
  
-                       sve_save_state(sve_pffr(current),
-                                      &current->thread.uw.fpsimd_state.fpsr);
+                       sve_save_state(sve_pffr(&current->thread), &st->fpsr);
                } else
-                       fpsimd_save_state(&current->thread.uw.fpsimd_state);
+                       fpsimd_save_state(st);
        }
  }
  
@@@ -359,13 -340,22 +339,13 @@@ static int sve_proc_do_default_vl(struc
                return ret;
  
        /* Writing -1 has the special meaning "set to max": */
 -      if (vl == -1) {
 -              /* Fail safe if sve_max_vl wasn't initialised */
 -              if (WARN_ON(!sve_vl_valid(sve_max_vl)))
 -                      vl = SVE_VL_MIN;
 -              else
 -                      vl = sve_max_vl;
 -
 -              goto chosen;
 -      }
 +      if (vl == -1)
 +              vl = sve_max_vl;
  
        if (!sve_vl_valid(vl))
                return -EINVAL;
  
 -      vl = find_supported_vector_length(vl);
 -chosen:
 -      sve_default_vl = vl;
 +      sve_default_vl = find_supported_vector_length(vl);
        return 0;
  }
  
@@@ -588,7 -578,7 +568,7 @@@ int sve_set_vector_length(struct task_s
        if (task == current) {
                local_bh_disable();
  
-               task_fpsimd_save();
+               fpsimd_save();
                set_thread_flag(TIF_FOREIGN_FPSTATE);
        }
  
        task->thread.sve_vl = vl;
  
  out:
-       if (flags & PR_SVE_VL_INHERIT)
-               set_tsk_thread_flag(task, TIF_SVE_VL_INHERIT);
-       else
-               clear_tsk_thread_flag(task, TIF_SVE_VL_INHERIT);
+       update_tsk_thread_flag(task, TIF_SVE_VL_INHERIT,
+                              flags & PR_SVE_VL_INHERIT);
  
        return 0;
  }
@@@ -755,6 -743,33 +733,33 @@@ void sve_kernel_enable(const struct arm
        isb();
  }
  
+ /*
+  * Read the pseudo-ZCR used by cpufeatures to identify the supported SVE
+  * vector length.
+  *
+  * Use only if SVE is present.
+  * This function clobbers the SVE vector length.
+  */
+ u64 read_zcr_features(void)
+ {
+       u64 zcr;
+       unsigned int vq_max;
+       /*
+        * Set the maximum possible VL, and write zeroes to all other
+        * bits to see if they stick.
+        */
+       sve_kernel_enable(NULL);
+       write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL1);
+       zcr = read_sysreg_s(SYS_ZCR_EL1);
+       zcr &= ~(u64)ZCR_ELx_LEN_MASK; /* find sticky 1s outside LEN field */
+       vq_max = sve_vq_from_vl(sve_get_vl());
+       zcr |= vq_max - 1; /* set LEN field to maximum effective value */
+       return zcr;
+ }
  void __init sve_setup(void)
  {
        u64 zcr;
@@@ -829,7 -844,7 +834,7 @@@ asmlinkage void do_sve_acc(unsigned in
  
        local_bh_disable();
  
-       task_fpsimd_save();
+       fpsimd_save();
        fpsimd_to_sve(current);
  
        /* Force ret_to_user to reload the registers: */
@@@ -872,7 -887,7 +877,7 @@@ asmlinkage void do_fpsimd_exc(unsigned 
                        si_code = FPE_FLTRES;
        }
  
 -      memset(&info, 0, sizeof(info));
 +      clear_siginfo(&info);
        info.si_signo = SIGFPE;
        info.si_code = si_code;
        info.si_addr = (void __user *)instruction_pointer(regs);
  
  void fpsimd_thread_switch(struct task_struct *next)
  {
+       bool wrong_task, wrong_cpu;
        if (!system_supports_fpsimd())
                return;
+       /* Save unsaved fpsimd state, if any: */
+       fpsimd_save();
        /*
-        * Save the current FPSIMD state to memory, but only if whatever is in
-        * the registers is in fact the most recent userland FPSIMD state of
-        * 'current'.
+        * Fix up TIF_FOREIGN_FPSTATE to correctly describe next's
+        * state.  For kernel threads, FPSIMD registers are never loaded
+        * and wrong_task and wrong_cpu will always be true.
         */
-       if (current->mm)
-               task_fpsimd_save();
+       wrong_task = __this_cpu_read(fpsimd_last_state.st) !=
+                                       &next->thread.uw.fpsimd_state;
+       wrong_cpu = next->thread.fpsimd_cpu != smp_processor_id();
  
-       if (next->mm) {
-               /*
-                * If we are switching to a task whose most recent userland
-                * FPSIMD state is already in the registers of *this* cpu,
-                * we can skip loading the state from memory. Otherwise, set
-                * the TIF_FOREIGN_FPSTATE flag so the state will be loaded
-                * upon the next return to userland.
-                */
-               if (__this_cpu_read(fpsimd_last_state.st) ==
-                       &next->thread.uw.fpsimd_state
-                   && next->thread.fpsimd_cpu == smp_processor_id())
-                       clear_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE);
-               else
-                       set_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE);
-       }
+       update_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE,
+                              wrong_task || wrong_cpu);
  }
  
  void fpsimd_flush_thread(void)
@@@ -972,7 -981,7 +971,7 @@@ void fpsimd_preserve_current_state(void
                return;
  
        local_bh_disable();
-       task_fpsimd_save();
+       fpsimd_save();
        local_bh_enable();
  }
  
@@@ -992,14 -1001,33 +991,33 @@@ void fpsimd_signal_preserve_current_sta
   * Associate current's FPSIMD context with this cpu
   * Preemption must be disabled when calling this function.
   */
static void fpsimd_bind_to_cpu(void)
void fpsimd_bind_task_to_cpu(void)
  {
        struct fpsimd_last_state_struct *last =
                this_cpu_ptr(&fpsimd_last_state);
  
        last->st = &current->thread.uw.fpsimd_state;
-       last->sve_in_use = test_thread_flag(TIF_SVE);
        current->thread.fpsimd_cpu = smp_processor_id();
+       if (system_supports_sve()) {
+               /* Toggle SVE trapping for userspace if needed */
+               if (test_thread_flag(TIF_SVE))
+                       sve_user_enable();
+               else
+                       sve_user_disable();
+               /* Serialised by exception return to user */
+       }
+ }
+ void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st)
+ {
+       struct fpsimd_last_state_struct *last =
+               this_cpu_ptr(&fpsimd_last_state);
+       WARN_ON(!in_softirq() && !irqs_disabled());
+       last->st = st;
  }
  
  /*
@@@ -1016,7 -1044,7 +1034,7 @@@ void fpsimd_restore_current_state(void
  
        if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
                task_fpsimd_load();
-               fpsimd_bind_to_cpu();
+               fpsimd_bind_task_to_cpu();
        }
  
        local_bh_enable();
@@@ -1039,9 -1067,9 +1057,9 @@@ void fpsimd_update_current_state(struc
                fpsimd_to_sve(current);
  
        task_fpsimd_load();
+       fpsimd_bind_task_to_cpu();
  
-       if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE))
-               fpsimd_bind_to_cpu();
+       clear_thread_flag(TIF_FOREIGN_FPSTATE);
  
        local_bh_enable();
  }
@@@ -1054,29 -1082,12 +1072,12 @@@ void fpsimd_flush_task_state(struct tas
        t->thread.fpsimd_cpu = NR_CPUS;
  }
  
static inline void fpsimd_flush_cpu_state(void)
+ void fpsimd_flush_cpu_state(void)
  {
        __this_cpu_write(fpsimd_last_state.st, NULL);
+       set_thread_flag(TIF_FOREIGN_FPSTATE);
  }
  
- /*
-  * Invalidate any task SVE state currently held in this CPU's regs.
-  *
-  * This is used to prevent the kernel from trying to reuse SVE register data
-  * that is detroyed by KVM guest enter/exit.  This function should go away when
-  * KVM SVE support is implemented.  Don't use it for anything else.
-  */
- #ifdef CONFIG_ARM64_SVE
- void sve_flush_cpu_state(void)
- {
-       struct fpsimd_last_state_struct const *last =
-               this_cpu_ptr(&fpsimd_last_state);
-       if (last->st && last->sve_in_use)
-               fpsimd_flush_cpu_state();
- }
- #endif /* CONFIG_ARM64_SVE */
  #ifdef CONFIG_KERNEL_MODE_NEON
  
  DEFINE_PER_CPU(bool, kernel_neon_busy);
@@@ -1110,11 -1121,8 +1111,8 @@@ void kernel_neon_begin(void
  
        __this_cpu_write(kernel_neon_busy, true);
  
-       /* Save unsaved task fpsimd state, if any: */
-       if (current->mm) {
-               task_fpsimd_save();
-               set_thread_flag(TIF_FOREIGN_FPSTATE);
-       }
+       /* Save unsaved fpsimd state, if any: */
+       fpsimd_save();
  
        /* Invalidate any task state remaining in the fpsimd regs: */
        fpsimd_flush_cpu_state();
@@@ -1236,13 -1244,10 +1234,10 @@@ static int fpsimd_cpu_pm_notifier(struc
  {
        switch (cmd) {
        case CPU_PM_ENTER:
-               if (current->mm)
-                       task_fpsimd_save();
+               fpsimd_save();
                fpsimd_flush_cpu_state();
                break;
        case CPU_PM_EXIT:
-               if (current->mm)
-                       set_thread_flag(TIF_FOREIGN_FPSTATE);
                break;
        case CPU_PM_ENTER_FAILED:
        default:
index bd732644c2f6af1bbe5ac00d8aaaa041c621080e,78889c4546d7a7d72e94890e260e1288a6de0a12..5c338ce5a7fa13e1bccf7f264f7e6db24ca63cc2
@@@ -44,6 -44,7 +44,7 @@@
  #include <asm/compat.h>
  #include <asm/cpufeature.h>
  #include <asm/debug-monitors.h>
+ #include <asm/fpsimd.h>
  #include <asm/pgtable.h>
  #include <asm/stacktrace.h>
  #include <asm/syscall.h>
@@@ -766,6 -767,9 +767,6 @@@ static void sve_init_header_from_task(s
        vq = sve_vq_from_vl(header->vl);
  
        header->max_vl = sve_max_vl;
 -      if (WARN_ON(!sve_vl_valid(sve_max_vl)))
 -              header->max_vl = header->vl;
 -
        header->size = SVE_PT_SIZE(vq, header->flags);
        header->max_size = SVE_PT_SIZE(sve_vq_from_vl(header->max_vl),
                                      SVE_PT_REGS_SVE);
@@@ -1043,6 -1047,8 +1044,6 @@@ static const struct user_regset_view us
  };
  
  #ifdef CONFIG_COMPAT
 -#include <linux/compat.h>
 -
  enum compat_regset {
        REGSET_COMPAT_GPR,
        REGSET_COMPAT_VFP,
index 05d8369790321e48f895eb4e5b802c7b3fdaf16a,753b9d213651af6b2b56ee66709436c9ea23d2db..24b4fbafe3e4ac9f9c30aaa2da04c16a798bf9ff
@@@ -106,68 -106,13 +106,49 @@@ el1_hvc_guest
         */
        ldr     x1, [sp]                                // Guest's x0
        eor     w1, w1, #ARM_SMCCC_ARCH_WORKAROUND_1
 +      cbz     w1, wa_epilogue
 +
 +      /* ARM_SMCCC_ARCH_WORKAROUND_2 handling */
 +      eor     w1, w1, #(ARM_SMCCC_ARCH_WORKAROUND_1 ^ \
 +                        ARM_SMCCC_ARCH_WORKAROUND_2)
        cbnz    w1, el1_trap
 -      mov     x0, x1
 +
 +#ifdef CONFIG_ARM64_SSBD
 +alternative_cb        arm64_enable_wa2_handling
 +      b       wa2_end
 +alternative_cb_end
 +      get_vcpu_ptr    x2, x0
 +      ldr     x0, [x2, #VCPU_WORKAROUND_FLAGS]
 +
 +      // Sanitize the argument and update the guest flags
 +      ldr     x1, [sp, #8]                    // Guest's x1
 +      clz     w1, w1                          // Murphy's device:
 +      lsr     w1, w1, #5                      // w1 = !!w1 without using
 +      eor     w1, w1, #1                      // the flags...
 +      bfi     x0, x1, #VCPU_WORKAROUND_2_FLAG_SHIFT, #1
 +      str     x0, [x2, #VCPU_WORKAROUND_FLAGS]
 +
 +      /* Check that we actually need to perform the call */
 +      hyp_ldr_this_cpu x0, arm64_ssbd_callback_required, x2
 +      cbz     x0, wa2_end
 +
 +      mov     w0, #ARM_SMCCC_ARCH_WORKAROUND_2
 +      smc     #0
 +
 +      /* Don't leak data from the SMC call */
 +      mov     x3, xzr
 +wa2_end:
 +      mov     x2, xzr
 +      mov     x1, xzr
 +#endif
 +
 +wa_epilogue:
 +      mov     x0, xzr
        add     sp, sp, #16
        eret
  
  el1_trap:
        get_vcpu_ptr    x1, x0
-       mrs             x0, esr_el2
-       lsr             x0, x0, #ESR_ELx_EC_SHIFT
-       /*
-        * x0: ESR_EC
-        * x1: vcpu pointer
-        */
-       /*
-        * We trap the first access to the FP/SIMD to save the host context
-        * and restore the guest context lazily.
-        * If FP/SIMD is not implemented, handle the trap and inject an
-        * undefined instruction exception to the guest.
-        */
- alternative_if_not ARM64_HAS_NO_FPSIMD
-       cmp     x0, #ESR_ELx_EC_FP_ASIMD
-       b.eq    __fpsimd_guest_restore
- alternative_else_nop_endif
        mov     x0, #ARM_EXCEPTION_TRAP
        b       __guest_exit
  
index c50cedc447f1ab33e2eda5682d6b72fda272514d,2d45bd719a5dfba78a81e36bf25cc4b72c42541b..d496ef579859627edd1ba98c1233d9584cd407e3
   * along with this program.  If not, see <http://www.gnu.org/licenses/>.
   */
  
 +#include <linux/arm-smccc.h>
  #include <linux/types.h>
  #include <linux/jump_label.h>
  #include <uapi/linux/psci.h>
  
  #include <kvm/arm_psci.h>
  
+ #include <asm/cpufeature.h>
  #include <asm/kvm_asm.h>
  #include <asm/kvm_emulate.h>
+ #include <asm/kvm_host.h>
  #include <asm/kvm_hyp.h>
  #include <asm/kvm_mmu.h>
  #include <asm/fpsimd.h>
  #include <asm/debug-monitors.h>
+ #include <asm/processor.h>
+ #include <asm/thread_info.h>
  
- static bool __hyp_text __fpsimd_enabled_nvhe(void)
+ /* Check whether the FP regs were dirtied while in the host-side run loop: */
+ static bool __hyp_text update_fp_enabled(struct kvm_vcpu *vcpu)
  {
-       return !(read_sysreg(cptr_el2) & CPTR_EL2_TFP);
- }
+       if (vcpu->arch.host_thread_info->flags & _TIF_FOREIGN_FPSTATE)
+               vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED |
+                                     KVM_ARM64_FP_HOST);
  
- static bool fpsimd_enabled_vhe(void)
- {
-       return !!(read_sysreg(cpacr_el1) & CPACR_EL1_FPEN);
+       return !!(vcpu->arch.flags & KVM_ARM64_FP_ENABLED);
  }
  
  /* Save the 32-bit only FPSIMD system register state */
@@@ -93,7 -96,10 +97,10 @@@ static void activate_traps_vhe(struct k
  
        val = read_sysreg(cpacr_el1);
        val |= CPACR_EL1_TTA;
-       val &= ~(CPACR_EL1_FPEN | CPACR_EL1_ZEN);
+       val &= ~CPACR_EL1_ZEN;
+       if (!update_fp_enabled(vcpu))
+               val &= ~CPACR_EL1_FPEN;
        write_sysreg(val, cpacr_el1);
  
        write_sysreg(kvm_get_hyp_vector(), vbar_el1);
@@@ -106,7 -112,10 +113,10 @@@ static void __hyp_text __activate_traps
        __activate_traps_common(vcpu);
  
        val = CPTR_EL2_DEFAULT;
-       val |= CPTR_EL2_TTA | CPTR_EL2_TFP | CPTR_EL2_TZ;
+       val |= CPTR_EL2_TTA | CPTR_EL2_TZ;
+       if (!update_fp_enabled(vcpu))
+               val |= CPTR_EL2_TFP;
        write_sysreg(val, cptr_el2);
  }
  
@@@ -319,6 -328,50 +329,50 @@@ static bool __hyp_text __skip_instr(str
        }
  }
  
+ static bool __hyp_text __hyp_switch_fpsimd(struct kvm_vcpu *vcpu)
+ {
+       struct user_fpsimd_state *host_fpsimd = vcpu->arch.host_fpsimd_state;
+       if (has_vhe())
+               write_sysreg(read_sysreg(cpacr_el1) | CPACR_EL1_FPEN,
+                            cpacr_el1);
+       else
+               write_sysreg(read_sysreg(cptr_el2) & ~(u64)CPTR_EL2_TFP,
+                            cptr_el2);
+       isb();
+       if (vcpu->arch.flags & KVM_ARM64_FP_HOST) {
+               /*
+                * In the SVE case, VHE is assumed: it is enforced by
+                * Kconfig and kvm_arch_init().
+                */
+               if (system_supports_sve() &&
+                   (vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE)) {
+                       struct thread_struct *thread = container_of(
+                               host_fpsimd,
+                               struct thread_struct, uw.fpsimd_state);
+                       sve_save_state(sve_pffr(thread), &host_fpsimd->fpsr);
+               } else {
+                       __fpsimd_save_state(host_fpsimd);
+               }
+               vcpu->arch.flags &= ~KVM_ARM64_FP_HOST;
+       }
+       __fpsimd_restore_state(&vcpu->arch.ctxt.gp_regs.fp_regs);
+       /* Skip restoring fpexc32 for AArch64 guests */
+       if (!(read_sysreg(hcr_el2) & HCR_RW))
+               write_sysreg(vcpu->arch.ctxt.sys_regs[FPEXC32_EL2],
+                            fpexc32_el2);
+       vcpu->arch.flags |= KVM_ARM64_FP_ENABLED;
+       return true;
+ }
  /*
   * Return true when we were able to fixup the guest exit and should return to
   * the guest, false when we should restore the host state and return to the
@@@ -335,11 -388,23 +389,23 @@@ static bool __hyp_text fixup_guest_exit
         * same PC once the SError has been injected, and replay the
         * trapping instruction.
         */
-       if (*exit_code == ARM_EXCEPTION_TRAP && !__populate_fault_info(vcpu))
+       if (*exit_code != ARM_EXCEPTION_TRAP)
+               goto exit;
+       /*
+        * We trap the first access to the FP/SIMD to save the host context
+        * and restore the guest context lazily.
+        * If FP/SIMD is not implemented, handle the trap and inject an
+        * undefined instruction exception to the guest.
+        */
+       if (system_supports_fpsimd() &&
+           kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_FP_ASIMD)
+               return __hyp_switch_fpsimd(vcpu);
+       if (!__populate_fault_info(vcpu))
                return true;
  
-       if (static_branch_unlikely(&vgic_v2_cpuif_trap) &&
-           *exit_code == ARM_EXCEPTION_TRAP) {
+       if (static_branch_unlikely(&vgic_v2_cpuif_trap)) {
                bool valid;
  
                valid = kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_DABT_LOW &&
                if (valid) {
                        int ret = __vgic_v2_perform_cpuif_access(vcpu);
  
-                       if (ret == 1) {
-                               if (__skip_instr(vcpu))
-                                       return true;
-                               else
-                                       *exit_code = ARM_EXCEPTION_TRAP;
-                       }
+                       if (ret ==  1 && __skip_instr(vcpu))
+                               return true;
  
                        if (ret == -1) {
                                /* Promote an illegal access to an
                                        *vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS;
                                *exit_code = ARM_EXCEPTION_EL1_SERROR;
                        }
+                       goto exit;
                }
        }
  
        if (static_branch_unlikely(&vgic_v3_cpuif_trap) &&
-           *exit_code == ARM_EXCEPTION_TRAP &&
            (kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 ||
             kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_CP15_32)) {
                int ret = __vgic_v3_perform_cpuif_access(vcpu);
  
-               if (ret == 1) {
-                       if (__skip_instr(vcpu))
-                               return true;
-                       else
-                               *exit_code = ARM_EXCEPTION_TRAP;
-               }
+               if (ret == 1 && __skip_instr(vcpu))
+                       return true;
        }
  
+ exit:
        /* Return to the host kernel and handle the exit */
        return false;
  }
  
 +static inline bool __hyp_text __needs_ssbd_off(struct kvm_vcpu *vcpu)
 +{
 +      if (!cpus_have_const_cap(ARM64_SSBD))
 +              return false;
 +
 +      return !(vcpu->arch.workaround_flags & VCPU_WORKAROUND_2_FLAG);
 +}
 +
 +static void __hyp_text __set_guest_arch_workaround_state(struct kvm_vcpu *vcpu)
 +{
 +#ifdef CONFIG_ARM64_SSBD
 +      /*
 +       * The host runs with the workaround always present. If the
 +       * guest wants it disabled, so be it...
 +       */
 +      if (__needs_ssbd_off(vcpu) &&
 +          __hyp_this_cpu_read(arm64_ssbd_callback_required))
 +              arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 0, NULL);
 +#endif
 +}
 +
 +static void __hyp_text __set_host_arch_workaround_state(struct kvm_vcpu *vcpu)
 +{
 +#ifdef CONFIG_ARM64_SSBD
 +      /*
 +       * If the guest has disabled the workaround, bring it back on.
 +       */
 +      if (__needs_ssbd_off(vcpu) &&
 +          __hyp_this_cpu_read(arm64_ssbd_callback_required))
 +              arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 1, NULL);
 +#endif
 +}
 +
  /* Switch to the guest for VHE systems running in EL2 */
  int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
  {
        struct kvm_cpu_context *host_ctxt;
        struct kvm_cpu_context *guest_ctxt;
-       bool fp_enabled;
        u64 exit_code;
  
        host_ctxt = vcpu->arch.host_cpu_context;
        sysreg_restore_guest_state_vhe(guest_ctxt);
        __debug_switch_to_guest(vcpu);
  
 +      __set_guest_arch_workaround_state(vcpu);
 +
        do {
                /* Jump in the fire! */
                exit_code = __guest_enter(vcpu, host_ctxt);
                /* And we're baaack! */
        } while (fixup_guest_exit(vcpu, &exit_code));
  
-       fp_enabled = fpsimd_enabled_vhe();
 +      __set_host_arch_workaround_state(vcpu);
 +
        sysreg_save_guest_state_vhe(guest_ctxt);
  
        __deactivate_traps(vcpu);
  
        sysreg_restore_host_state_vhe(host_ctxt);
  
-       if (fp_enabled) {
-               __fpsimd_save_state(&guest_ctxt->gp_regs.fp_regs);
-               __fpsimd_restore_state(&host_ctxt->gp_regs.fp_regs);
+       if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED)
                __fpsimd_save_fpexc32(vcpu);
-       }
  
        __debug_switch_to_host(vcpu);
  
@@@ -478,7 -494,6 +532,6 @@@ int __hyp_text __kvm_vcpu_run_nvhe(stru
  {
        struct kvm_cpu_context *host_ctxt;
        struct kvm_cpu_context *guest_ctxt;
-       bool fp_enabled;
        u64 exit_code;
  
        vcpu = kern_hyp_va(vcpu);
        __sysreg_restore_state_nvhe(guest_ctxt);
        __debug_switch_to_guest(vcpu);
  
 +      __set_guest_arch_workaround_state(vcpu);
 +
        do {
                /* Jump in the fire! */
                exit_code = __guest_enter(vcpu, host_ctxt);
                /* And we're baaack! */
        } while (fixup_guest_exit(vcpu, &exit_code));
  
-       fp_enabled = __fpsimd_enabled_nvhe();
 +      __set_host_arch_workaround_state(vcpu);
 +
        __sysreg_save_state_nvhe(guest_ctxt);
        __sysreg32_save_state(vcpu);
        __timer_disable_traps(vcpu);
  
        __sysreg_restore_state_nvhe(host_ctxt);
  
-       if (fp_enabled) {
-               __fpsimd_save_state(&guest_ctxt->gp_regs.fp_regs);
-               __fpsimd_restore_state(&host_ctxt->gp_regs.fp_regs);
+       if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED)
                __fpsimd_save_fpexc32(vcpu);
-       }
  
        /*
         * This must come after restoring the host sysregs, since a non-VHE
diff --combined arch/mips/kvm/mips.c
index 0f725e9cee8f69230ca7ddff5f6023c30294395c,03e0e0f189cc0c9a319f00395d6a53de57fea4f0..7cd76f93a438ab00d7085b3aaa93a5294c494c55
@@@ -45,7 -45,7 +45,7 @@@ struct kvm_stats_debugfs_item debugfs_e
        { "cache",        VCPU_STAT(cache_exits),        KVM_STAT_VCPU },
        { "signal",       VCPU_STAT(signal_exits),       KVM_STAT_VCPU },
        { "interrupt",    VCPU_STAT(int_exits),          KVM_STAT_VCPU },
 -      { "cop_unsuable", VCPU_STAT(cop_unusable_exits), KVM_STAT_VCPU },
 +      { "cop_unusable", VCPU_STAT(cop_unusable_exits), KVM_STAT_VCPU },
        { "tlbmod",       VCPU_STAT(tlbmod_exits),       KVM_STAT_VCPU },
        { "tlbmiss_ld",   VCPU_STAT(tlbmiss_ld_exits),   KVM_STAT_VCPU },
        { "tlbmiss_st",   VCPU_STAT(tlbmiss_st_exits),   KVM_STAT_VCPU },
@@@ -1076,7 -1076,7 +1076,7 @@@ int kvm_arch_vcpu_ioctl_set_fpu(struct 
        return -ENOIOCTLCMD;
  }
  
int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
  {
        return VM_FAULT_SIGBUS;
  }
index cb6d2313b19f482ec9ee0dc089008cc9feb9ee1b,67d7de1470ccb4253493ac5d4fcbc781747bc7d0..69895597736ab60a63ce840269e9889dd0bf24b0
@@@ -2441,7 -2441,6 +2441,7 @@@ static void init_vcore_to_run(struct kv
        vc->in_guest = 0;
        vc->napping_threads = 0;
        vc->conferring_threads = 0;
 +      vc->tb_offset_applied = 0;
  }
  
  static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip)
@@@ -2912,12 -2911,8 +2912,12 @@@ static noinline void kvmppc_run_core(st
  
        srcu_idx = srcu_read_lock(&vc->kvm->srcu);
  
 +      this_cpu_disable_ftrace();
 +
        trap = __kvmppc_vcore_entry();
  
 +      this_cpu_enable_ftrace();
 +
        srcu_read_unlock(&vc->kvm->srcu, srcu_idx);
  
        trace_hardirqs_off();
@@@ -3955,8 -3950,7 +3955,7 @@@ static int kvmppc_core_init_vm_hv(struc
         */
        snprintf(buf, sizeof(buf), "vm%d", current->pid);
        kvm->arch.debugfs_dir = debugfs_create_dir(buf, kvm_debugfs_dir);
-       if (!IS_ERR_OR_NULL(kvm->arch.debugfs_dir))
-               kvmppc_mmu_debugfs_init(kvm);
+       kvmppc_mmu_debugfs_init(kvm);
  
        return 0;
  }
index 9809694e1389ef836f4d5ea1eb4d9b08be5e1260,c9f155b67660589fe460cd83a2df886cd5503f7d..5ab636089c6052c51cb5ac15046ee0975bcfd026
@@@ -171,6 -171,7 +171,6 @@@ static inline int is_module_addr(void *
  #define _PAGE_WRITE   0x020           /* SW pte write bit */
  #define _PAGE_SPECIAL 0x040           /* SW associated with special page */
  #define _PAGE_UNUSED  0x080           /* SW bit for pgste usage state */
 -#define __HAVE_ARCH_PTE_SPECIAL
  
  #ifdef CONFIG_MEM_SOFT_DIRTY
  #define _PAGE_SOFT_DIRTY 0x002                /* SW pte soft dirty bit */
@@@ -506,10 -507,10 +506,10 @@@ static inline int mm_alloc_pgste(struc
   * faults should no longer be backed by zero pages
   */
  #define mm_forbids_zeropage mm_has_pgste
- static inline int mm_use_skey(struct mm_struct *mm)
+ static inline int mm_uses_skeys(struct mm_struct *mm)
  {
  #ifdef CONFIG_PGSTE
-       if (mm->context.use_skey)
+       if (mm->context.uses_skeys)
                return 1;
  #endif
        return 0;
diff --combined arch/s390/kvm/priv.c
index a3bce0e8434628a1c8d05404149e6573a71bd4ab,e8c62703c76452003a9c30ec8d893cb43437ba9f..eb0eb60c7be6a26677f8ed20509aba88df6da337
@@@ -26,6 -26,7 +26,6 @@@
  #include <asm/gmap.h>
  #include <asm/io.h>
  #include <asm/ptrace.h>
 -#include <asm/compat.h>
  #include <asm/sclp.h>
  #include "gaccess.h"
  #include "kvm-s390.h"
@@@ -204,24 -205,28 +204,28 @@@ static int handle_store_cpu_address(str
  
  int kvm_s390_skey_check_enable(struct kvm_vcpu *vcpu)
  {
-       int rc = 0;
+       int rc;
        struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block;
  
        trace_kvm_s390_skey_related_inst(vcpu);
-       if (!(sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE)) &&
+       /* Already enabled? */
+       if (vcpu->kvm->arch.use_skf &&
+           !(sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE)) &&
            !kvm_s390_test_cpuflags(vcpu, CPUSTAT_KSS))
-               return rc;
+               return 0;
  
        rc = s390_enable_skey();
        VCPU_EVENT(vcpu, 3, "enabling storage keys for guest: %d", rc);
-       if (!rc) {
-               if (kvm_s390_test_cpuflags(vcpu, CPUSTAT_KSS))
-                       kvm_s390_clear_cpuflags(vcpu, CPUSTAT_KSS);
-               else
-                       sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE |
-                                            ICTL_RRBE);
-       }
-       return rc;
+       if (rc)
+               return rc;
+       if (kvm_s390_test_cpuflags(vcpu, CPUSTAT_KSS))
+               kvm_s390_clear_cpuflags(vcpu, CPUSTAT_KSS);
+       if (!vcpu->kvm->arch.use_skf)
+               sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
+       else
+               sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE);
+       return 0;
  }
  
  static int try_handle_skey(struct kvm_vcpu *vcpu)
        rc = kvm_s390_skey_check_enable(vcpu);
        if (rc)
                return rc;
-       if (sclp.has_skey) {
+       if (vcpu->kvm->arch.use_skf) {
                /* with storage-key facility, SIE interprets it for us */
                kvm_s390_retry_instr(vcpu);
                VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation");
index f4b2588865e9f7ad16696d3e70255a2b794d26b3,0ebe659f28026e6a16fb32f067ac37fcf77414ce..c13cd28d9d1be5abdff8fdf93692d51755c8930c
@@@ -258,7 -258,8 +258,8 @@@ union kvm_mmu_page_role 
                unsigned smep_andnot_wp:1;
                unsigned smap_andnot_wp:1;
                unsigned ad_disabled:1;
-               unsigned :7;
+               unsigned guest_mode:1;
+               unsigned :6;
  
                /*
                 * This is left at the top of the word so that
@@@ -476,6 -477,7 +477,7 @@@ struct kvm_vcpu_hv 
        struct kvm_hyperv_exit exit;
        struct kvm_vcpu_hv_stimer stimer[HV_SYNIC_STIMER_COUNT];
        DECLARE_BITMAP(stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT);
+       cpumask_t tlb_lush;
  };
  
  struct kvm_vcpu_arch {
@@@ -924,7 -926,7 +926,7 @@@ struct kvm_x86_ops 
        int (*hardware_setup)(void);               /* __init */
        void (*hardware_unsetup)(void);            /* __exit */
        bool (*cpu_has_accelerated_tpr)(void);
 -      bool (*cpu_has_high_real_mode_segbase)(void);
 +      bool (*has_emulated_msr)(int index);
        void (*cpuid_update)(struct kvm_vcpu *vcpu);
  
        struct kvm *(*vm_alloc)(void);
        void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);
        void (*hwapic_isr_update)(struct kvm_vcpu *vcpu, int isr);
        void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
-       void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set);
+       void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu);
        void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa);
        void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
        int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
@@@ -1277,6 -1279,7 +1279,7 @@@ void __kvm_mmu_free_some_pages(struct k
  int kvm_mmu_load(struct kvm_vcpu *vcpu);
  void kvm_mmu_unload(struct kvm_vcpu *vcpu);
  void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
+ void kvm_mmu_free_roots(struct kvm_vcpu *vcpu);
  gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
                           struct x86_exception *exception);
  gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
diff --combined arch/x86/kvm/cpuid.c
index f4f30d0c25c426388fb962fdcd04e76b508755cc,72d8c492d71d423b346e44a0c0b36d94ff049bd0..5720e78b2f7b52fa9a05bc52064d4fb8cac04c6c
@@@ -379,8 -379,7 +379,8 @@@ static inline int __do_cpuid_ent(struc
  
        /* cpuid 0x80000008.ebx */
        const u32 kvm_cpuid_8000_0008_ebx_x86_features =
 -              F(IBPB) | F(IBRS);
 +              F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) |
 +              F(AMD_SSB_NO);
  
        /* cpuid 0xC0000001.edx */
        const u32 kvm_cpuid_C000_0001_edx_x86_features =
        const u32 kvm_cpuid_7_0_ecx_x86_features =
                F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ |
                F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
-               F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG);
+               F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
+               F(CLDEMOTE);
  
        /* cpuid 7.0.edx*/
        const u32 kvm_cpuid_7_0_edx_x86_features =
                F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) |
 -              F(ARCH_CAPABILITIES);
 +              F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES);
  
        /* all calls to cpuid_count() should be made on the same cpu */
        get_cpu();
                                entry->ecx &= ~F(PKU);
                        entry->edx &= kvm_cpuid_7_0_edx_x86_features;
                        cpuid_mask(&entry->edx, CPUID_7_EDX);
 +                      /*
 +                       * We emulate ARCH_CAPABILITIES in software even
 +                       * if the host doesn't support it.
 +                       */
 +                      entry->edx |= F(ARCH_CAPABILITIES);
                } else {
                        entry->ebx = 0;
                        entry->ecx = 0;
                        g_phys_as = phys_as;
                entry->eax = g_phys_as | (virt_as << 8);
                entry->edx = 0;
 -              /* IBRS and IBPB aren't necessarily present in hardware cpuid */
 -              if (boot_cpu_has(X86_FEATURE_IBPB))
 -                      entry->ebx |= F(IBPB);
 -              if (boot_cpu_has(X86_FEATURE_IBRS))
 -                      entry->ebx |= F(IBRS);
 +              /*
 +               * IBRS, IBPB and VIRT_SSBD aren't necessarily present in
 +               * hardware cpuid
 +               */
 +              if (boot_cpu_has(X86_FEATURE_AMD_IBPB))
 +                      entry->ebx |= F(AMD_IBPB);
 +              if (boot_cpu_has(X86_FEATURE_AMD_IBRS))
 +                      entry->ebx |= F(AMD_IBRS);
 +              if (boot_cpu_has(X86_FEATURE_VIRT_SSBD))
 +                      entry->ebx |= F(VIRT_SSBD);
                entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features;
                cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX);
 +              /*
 +               * The preference is to use SPEC CTRL MSR instead of the
 +               * VIRT_SPEC MSR.
 +               */
 +              if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD) &&
 +                  !boot_cpu_has(X86_FEATURE_AMD_SSBD))
 +                      entry->ebx |= F(VIRT_SSBD);
                break;
        }
        case 0x80000019:
diff --combined arch/x86/kvm/hyperv.c
index 46ff64da44cab46d637facafffb10ba1a1269a1f,14e0d0ae4e0a8ea86969cd7f93ea029b89a0a070..af8caf965baa291319a7e5825b27ab28f97a24cf
@@@ -1242,6 -1242,121 +1242,121 @@@ int kvm_hv_get_msr_common(struct kvm_vc
                return kvm_hv_get_msr(vcpu, msr, pdata);
  }
  
+ static __always_inline int get_sparse_bank_no(u64 valid_bank_mask, int bank_no)
+ {
+       int i = 0, j;
+       if (!(valid_bank_mask & BIT_ULL(bank_no)))
+               return -1;
+       for (j = 0; j < bank_no; j++)
+               if (valid_bank_mask & BIT_ULL(j))
+                       i++;
+       return i;
+ }
+ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa,
+                           u16 rep_cnt, bool ex)
+ {
+       struct kvm *kvm = current_vcpu->kvm;
+       struct kvm_vcpu_hv *hv_current = &current_vcpu->arch.hyperv;
+       struct hv_tlb_flush_ex flush_ex;
+       struct hv_tlb_flush flush;
+       struct kvm_vcpu *vcpu;
+       unsigned long vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)] = {0};
+       unsigned long valid_bank_mask = 0;
+       u64 sparse_banks[64];
+       int sparse_banks_len, i;
+       bool all_cpus;
+       if (!ex) {
+               if (unlikely(kvm_read_guest(kvm, ingpa, &flush, sizeof(flush))))
+                       return HV_STATUS_INVALID_HYPERCALL_INPUT;
+               trace_kvm_hv_flush_tlb(flush.processor_mask,
+                                      flush.address_space, flush.flags);
+               sparse_banks[0] = flush.processor_mask;
+               all_cpus = flush.flags & HV_FLUSH_ALL_PROCESSORS;
+       } else {
+               if (unlikely(kvm_read_guest(kvm, ingpa, &flush_ex,
+                                           sizeof(flush_ex))))
+                       return HV_STATUS_INVALID_HYPERCALL_INPUT;
+               trace_kvm_hv_flush_tlb_ex(flush_ex.hv_vp_set.valid_bank_mask,
+                                         flush_ex.hv_vp_set.format,
+                                         flush_ex.address_space,
+                                         flush_ex.flags);
+               valid_bank_mask = flush_ex.hv_vp_set.valid_bank_mask;
+               all_cpus = flush_ex.hv_vp_set.format !=
+                       HV_GENERIC_SET_SPARSE_4K;
+               sparse_banks_len = bitmap_weight(&valid_bank_mask, 64) *
+                       sizeof(sparse_banks[0]);
+               if (!sparse_banks_len && !all_cpus)
+                       goto ret_success;
+               if (!all_cpus &&
+                   kvm_read_guest(kvm,
+                                  ingpa + offsetof(struct hv_tlb_flush_ex,
+                                                   hv_vp_set.bank_contents),
+                                  sparse_banks,
+                                  sparse_banks_len))
+                       return HV_STATUS_INVALID_HYPERCALL_INPUT;
+       }
+       cpumask_clear(&hv_current->tlb_lush);
+       kvm_for_each_vcpu(i, vcpu, kvm) {
+               struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
+               int bank = hv->vp_index / 64, sbank = 0;
+               if (!all_cpus) {
+                       /* Banks >64 can't be represented */
+                       if (bank >= 64)
+                               continue;
+                       /* Non-ex hypercalls can only address first 64 vCPUs */
+                       if (!ex && bank)
+                               continue;
+                       if (ex) {
+                               /*
+                                * Check is the bank of this vCPU is in sparse
+                                * set and get the sparse bank number.
+                                */
+                               sbank = get_sparse_bank_no(valid_bank_mask,
+                                                          bank);
+                               if (sbank < 0)
+                                       continue;
+                       }
+                       if (!(sparse_banks[sbank] & BIT_ULL(hv->vp_index % 64)))
+                               continue;
+               }
+               /*
+                * vcpu->arch.cr3 may not be up-to-date for running vCPUs so we
+                * can't analyze it here, flush TLB regardless of the specified
+                * address space.
+                */
+               __set_bit(i, vcpu_bitmap);
+       }
+       kvm_make_vcpus_request_mask(kvm,
+                                   KVM_REQ_TLB_FLUSH | KVM_REQUEST_NO_WAKEUP,
+                                   vcpu_bitmap, &hv_current->tlb_lush);
+ ret_success:
+       /* We always do full TLB flush, set rep_done = rep_cnt. */
+       return (u64)HV_STATUS_SUCCESS |
+               ((u64)rep_cnt << HV_HYPERCALL_REP_COMP_OFFSET);
+ }
  bool kvm_hv_hypercall_enabled(struct kvm *kvm)
  {
        return READ_ONCE(kvm->arch.hyperv.hv_hypercall) & HV_X64_MSR_HYPERCALL_ENABLE;
@@@ -1260,18 -1375,14 +1375,18 @@@ static void kvm_hv_hypercall_set_result
        }
  }
  
 -static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu)
 +static int kvm_hv_hypercall_complete(struct kvm_vcpu *vcpu, u64 result)
  {
 -      struct kvm_run *run = vcpu->run;
 -
 -      kvm_hv_hypercall_set_result(vcpu, run->hyperv.u.hcall.result);
 +      kvm_hv_hypercall_set_result(vcpu, result);
 +      ++vcpu->stat.hypercalls;
        return kvm_skip_emulated_instruction(vcpu);
  }
  
 +static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu)
 +{
 +      return kvm_hv_hypercall_complete(vcpu, vcpu->run->hyperv.u.hcall.result);
 +}
 +
  static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, bool fast, u64 param)
  {
        struct eventfd_ctx *eventfd;
@@@ -1315,7 -1426,7 +1430,7 @@@ int kvm_hv_hypercall(struct kvm_vcpu *v
  {
        u64 param, ingpa, outgpa, ret = HV_STATUS_SUCCESS;
        uint16_t code, rep_idx, rep_cnt;
-       bool fast, longmode;
+       bool fast, longmode, rep;
  
        /*
         * hypercall generates UD from non zero cpl and real mode
  #endif
  
        code = param & 0xffff;
-       fast = (param >> 16) & 0x1;
-       rep_cnt = (param >> 32) & 0xfff;
-       rep_idx = (param >> 48) & 0xfff;
+       fast = !!(param & HV_HYPERCALL_FAST_BIT);
+       rep_cnt = (param >> HV_HYPERCALL_REP_COMP_OFFSET) & 0xfff;
+       rep_idx = (param >> HV_HYPERCALL_REP_START_OFFSET) & 0xfff;
+       rep = !!(rep_cnt || rep_idx);
  
        trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa);
  
-       /* Hypercall continuation is not supported yet */
-       if (rep_cnt || rep_idx) {
-               ret = HV_STATUS_INVALID_HYPERCALL_CODE;
-               goto out;
-       }
        switch (code) {
        case HVCALL_NOTIFY_LONG_SPIN_WAIT:
+               if (unlikely(rep)) {
+                       ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
+                       break;
+               }
                kvm_vcpu_on_spin(vcpu, true);
                break;
        case HVCALL_SIGNAL_EVENT:
+               if (unlikely(rep)) {
+                       ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
+                       break;
+               }
                ret = kvm_hvcall_signal_event(vcpu, fast, ingpa);
                if (ret != HV_STATUS_INVALID_PORT_ID)
                        break;
                /* maybe userspace knows this conn_id: fall through */
        case HVCALL_POST_MESSAGE:
                /* don't bother userspace if it has no way to handle it */
-               if (!vcpu_to_synic(vcpu)->active) {
-                       ret = HV_STATUS_INVALID_HYPERCALL_CODE;
+               if (unlikely(rep || !vcpu_to_synic(vcpu)->active)) {
+                       ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
                        break;
                }
                vcpu->run->exit_reason = KVM_EXIT_HYPERV;
                vcpu->arch.complete_userspace_io =
                                kvm_hv_hypercall_complete_userspace;
                return 0;
+       case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST:
+               if (unlikely(fast || !rep_cnt || rep_idx)) {
+                       ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
+                       break;
+               }
+               ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, false);
+               break;
+       case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE:
+               if (unlikely(fast || rep)) {
+                       ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
+                       break;
+               }
+               ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, false);
+               break;
+       case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX:
+               if (unlikely(fast || !rep_cnt || rep_idx)) {
+                       ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
+                       break;
+               }
+               ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, true);
+               break;
+       case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX:
+               if (unlikely(fast || rep)) {
+                       ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
+                       break;
+               }
+               ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, true);
+               break;
        default:
                ret = HV_STATUS_INVALID_HYPERCALL_CODE;
                break;
        }
  
- out:
 -      kvm_hv_hypercall_set_result(vcpu, ret);
 -      return 1;
 +      return kvm_hv_hypercall_complete(vcpu, ret);
  }
  
  void kvm_hv_init_vm(struct kvm *kvm)
diff --combined arch/x86/kvm/lapic.c
index 3773c462511404bcc94ad69742b16fdfdc26a504,776391cf69a5196ae0fb33a6abbb496fc167f2fa..b5cd8465d44f6cb99a9ae705cf2f44f3c310a1ac
@@@ -1522,23 -1522,11 +1522,23 @@@ static bool set_target_expiration(struc
  
  static void advance_periodic_target_expiration(struct kvm_lapic *apic)
  {
 -      apic->lapic_timer.tscdeadline +=
 -              nsec_to_cycles(apic->vcpu, apic->lapic_timer.period);
 +      ktime_t now = ktime_get();
 +      u64 tscl = rdtsc();
 +      ktime_t delta;
 +
 +      /*
 +       * Synchronize both deadlines to the same time source or
 +       * differences in the periods (caused by differences in the
 +       * underlying clocks or numerical approximation errors) will
 +       * cause the two to drift apart over time as the errors
 +       * accumulate.
 +       */
        apic->lapic_timer.target_expiration =
                ktime_add_ns(apic->lapic_timer.target_expiration,
                                apic->lapic_timer.period);
 +      delta = ktime_sub(apic->lapic_timer.target_expiration, now);
 +      apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
 +              nsec_to_cycles(apic->vcpu, delta);
  }
  
  static void start_sw_period(struct kvm_lapic *apic)
@@@ -2002,13 -1990,11 +2002,11 @@@ void kvm_lapic_set_base(struct kvm_vcp
                }
        }
  
-       if ((old_value ^ value) & X2APIC_ENABLE) {
-               if (value & X2APIC_ENABLE) {
-                       kvm_apic_set_x2apic_id(apic, vcpu->vcpu_id);
-                       kvm_x86_ops->set_virtual_x2apic_mode(vcpu, true);
-               } else
-                       kvm_x86_ops->set_virtual_x2apic_mode(vcpu, false);
-       }
+       if (((old_value ^ value) & X2APIC_ENABLE) && (value & X2APIC_ENABLE))
+               kvm_apic_set_x2apic_id(apic, vcpu->vcpu_id);
+       if ((old_value ^ value) & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE))
+               kvm_x86_ops->set_virtual_apic_mode(vcpu);
  
        apic->base_address = apic->vcpu->arch.apic_base &
                             MSR_IA32_APICBASE_BASE;
diff --combined arch/x86/kvm/mmu.c
index d634f0332c0fad5aec8b7e285b97d7423e064dcc,f440d43c8d5ad864bc64795e6b766575815fa77a..d594690d8b9597a87f4cba26e8c1be5cb2de22de
@@@ -222,7 -222,6 +222,6 @@@ static const u64 shadow_acc_track_saved
  static const u64 shadow_acc_track_saved_bits_shift = PT64_SECOND_AVAIL_BITS_SHIFT;
  
  static void mmu_spte_set(u64 *sptep, u64 spte);
- static void mmu_free_roots(struct kvm_vcpu *vcpu);
  
  void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value)
  {
@@@ -3007,7 -3006,6 +3006,7 @@@ static void kvm_send_hwpoison_signal(un
  {
        siginfo_t info;
  
 +      clear_siginfo(&info);
        info.si_signo   = SIGBUS;
        info.si_errno   = 0;
        info.si_code    = BUS_MCEERR_AR;
@@@ -3343,51 -3341,48 +3342,48 @@@ out_unlock
        return RET_PF_RETRY;
  }
  
static void mmu_free_roots(struct kvm_vcpu *vcpu)
+ static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa,
                             struct list_head *invalid_list)
  {
-       int i;
        struct kvm_mmu_page *sp;
-       LIST_HEAD(invalid_list);
  
-       if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
+       if (!VALID_PAGE(*root_hpa))
                return;
  
-       if (vcpu->arch.mmu.shadow_root_level >= PT64_ROOT_4LEVEL &&
-           (vcpu->arch.mmu.root_level >= PT64_ROOT_4LEVEL ||
-            vcpu->arch.mmu.direct_map)) {
-               hpa_t root = vcpu->arch.mmu.root_hpa;
+       sp = page_header(*root_hpa & PT64_BASE_ADDR_MASK);
+       --sp->root_count;
+       if (!sp->root_count && sp->role.invalid)
+               kvm_mmu_prepare_zap_page(kvm, sp, invalid_list);
  
-               spin_lock(&vcpu->kvm->mmu_lock);
-               sp = page_header(root);
-               --sp->root_count;
-               if (!sp->root_count && sp->role.invalid) {
-                       kvm_mmu_prepare_zap_page(vcpu->kvm, sp, &invalid_list);
-                       kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
-               }
-               spin_unlock(&vcpu->kvm->mmu_lock);
-               vcpu->arch.mmu.root_hpa = INVALID_PAGE;
+       *root_hpa = INVALID_PAGE;
+ }
+ void kvm_mmu_free_roots(struct kvm_vcpu *vcpu)
+ {
+       int i;
+       LIST_HEAD(invalid_list);
+       struct kvm_mmu *mmu = &vcpu->arch.mmu;
+       if (!VALID_PAGE(mmu->root_hpa))
                return;
-       }
  
        spin_lock(&vcpu->kvm->mmu_lock);
-       for (i = 0; i < 4; ++i) {
-               hpa_t root = vcpu->arch.mmu.pae_root[i];
  
-               if (root) {
-                       root &= PT64_BASE_ADDR_MASK;
-                       sp = page_header(root);
-                       --sp->root_count;
-                       if (!sp->root_count && sp->role.invalid)
-                               kvm_mmu_prepare_zap_page(vcpu->kvm, sp,
-                                                        &invalid_list);
-               }
-               vcpu->arch.mmu.pae_root[i] = INVALID_PAGE;
+       if (mmu->shadow_root_level >= PT64_ROOT_4LEVEL &&
+           (mmu->root_level >= PT64_ROOT_4LEVEL || mmu->direct_map)) {
+               mmu_free_root_page(vcpu->kvm, &mmu->root_hpa, &invalid_list);
+       } else {
+               for (i = 0; i < 4; ++i)
+                       if (mmu->pae_root[i] != 0)
+                               mmu_free_root_page(vcpu->kvm, &mmu->pae_root[i],
+                                                  &invalid_list);
+               mmu->root_hpa = INVALID_PAGE;
        }
        kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
        spin_unlock(&vcpu->kvm->mmu_lock);
-       vcpu->arch.mmu.root_hpa = INVALID_PAGE;
  }
+ EXPORT_SYMBOL_GPL(kvm_mmu_free_roots);
  
  static int mmu_check_root(struct kvm_vcpu *vcpu, gfn_t root_gfn)
  {
@@@ -3720,7 -3715,6 +3716,6 @@@ static int handle_mmio_page_fault(struc
         */
        return RET_PF_RETRY;
  }
- EXPORT_SYMBOL_GPL(handle_mmio_page_fault);
  
  static bool page_fault_handle_page_track(struct kvm_vcpu *vcpu,
                                         u32 error_code, gfn_t gfn)
@@@ -3812,6 -3806,14 +3807,14 @@@ static bool try_async_pf(struct kvm_vcp
        struct kvm_memory_slot *slot;
        bool async;
  
+       /*
+        * Don't expose private memslots to L2.
+        */
+       if (is_guest_mode(vcpu) && !kvm_is_visible_gfn(vcpu->kvm, gfn)) {
+               *pfn = KVM_PFN_NOSLOT;
+               return false;
+       }
        slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
        async = false;
        *pfn = __gfn_to_pfn_memslot(slot, gfn, false, &async, write, writable);
@@@ -3951,7 -3953,7 +3954,7 @@@ static void nonpaging_init_context(stru
  
  void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu)
  {
-       mmu_free_roots(vcpu);
+       kvm_mmu_free_roots(vcpu);
  }
  
  static unsigned long get_cr3(struct kvm_vcpu *vcpu)
@@@ -4473,6 -4475,7 +4476,7 @@@ static void init_kvm_tdp_mmu(struct kvm
        struct kvm_mmu *context = &vcpu->arch.mmu;
  
        context->base_role.word = 0;
+       context->base_role.guest_mode = is_guest_mode(vcpu);
        context->base_role.smm = is_smm(vcpu);
        context->base_role.ad_disabled = (shadow_accessed_mask == 0);
        context->page_fault = tdp_page_fault;
@@@ -4539,6 -4542,7 +4543,7 @@@ void kvm_init_shadow_mmu(struct kvm_vcp
                = smep && !is_write_protection(vcpu);
        context->base_role.smap_andnot_wp
                = smap && !is_write_protection(vcpu);
+       context->base_role.guest_mode = is_guest_mode(vcpu);
        context->base_role.smm = is_smm(vcpu);
        reset_shadow_zero_bits_mask(vcpu, context);
  }
@@@ -4564,7 -4568,7 +4569,7 @@@ void kvm_init_shadow_ept_mmu(struct kvm
        context->root_hpa = INVALID_PAGE;
        context->direct_map = false;
        context->base_role.ad_disabled = !accessed_dirty;
+       context->base_role.guest_mode = 1;
        update_permission_bitmask(vcpu, context, true);
        update_pkru_bitmask(vcpu, context, true);
        update_last_nonleaf_level(vcpu, context);
@@@ -4664,7 -4668,7 +4669,7 @@@ EXPORT_SYMBOL_GPL(kvm_mmu_load)
  
  void kvm_mmu_unload(struct kvm_vcpu *vcpu)
  {
-       mmu_free_roots(vcpu);
+       kvm_mmu_free_roots(vcpu);
        WARN_ON(VALID_PAGE(vcpu->arch.mmu.root_hpa));
  }
  EXPORT_SYMBOL_GPL(kvm_mmu_unload);
@@@ -4825,6 -4829,7 +4830,7 @@@ static void kvm_mmu_pte_write(struct kv
        mask.smep_andnot_wp = 1;
        mask.smap_andnot_wp = 1;
        mask.smm = 1;
+       mask.guest_mode = 1;
        mask.ad_disabled = 1;
  
        /*
diff --combined arch/x86/kvm/svm.c
index 950ec50f77c30b71545fd93a4154875b4d9f0e4e,d9305f1723f572740385a91b252232e00c328d0a..695b0bd02220378493dd3ea3f9edf6959343121f
@@@ -49,7 -49,7 +49,7 @@@
  #include <asm/debugreg.h>
  #include <asm/kvm_para.h>
  #include <asm/irq_remapping.h>
 -#include <asm/nospec-branch.h>
 +#include <asm/spec-ctrl.h>
  
  #include <asm/virtext.h>
  #include "trace.h"
@@@ -213,12 -213,6 +213,12 @@@ struct vcpu_svm 
        } host;
  
        u64 spec_ctrl;
 +      /*
 +       * Contains guest-controlled bits of VIRT_SPEC_CTRL, which will be
 +       * translated into the appropriate L2_CFG bits on the host to
 +       * perform speculative control.
 +       */
 +      u64 virt_spec_ctrl;
  
        u32 *msrpm;
  
@@@ -1768,7 -1762,10 +1768,10 @@@ static struct page **sev_pin_memory(str
        unsigned long npages, npinned, size;
        unsigned long locked, lock_limit;
        struct page **pages;
-       int first, last;
+       unsigned long first, last;
+       if (ulen == 0 || uaddr + ulen < uaddr)
+               return NULL;
  
        /* Calculate number of pages. */
        first = (uaddr & PAGE_MASK) >> PAGE_SHIFT;
@@@ -1855,13 -1852,13 +1858,13 @@@ static void __unregister_enc_region_loc
  
  static struct kvm *svm_vm_alloc(void)
  {
-       struct kvm_svm *kvm_svm = kzalloc(sizeof(struct kvm_svm), GFP_KERNEL);
+       struct kvm_svm *kvm_svm = vzalloc(sizeof(struct kvm_svm));
        return &kvm_svm->kvm;
  }
  
  static void svm_vm_free(struct kvm *kvm)
  {
-       kfree(to_kvm_svm(kvm));
+       vfree(to_kvm_svm(kvm));
  }
  
  static void sev_vm_destroy(struct kvm *kvm)
@@@ -2066,7 -2063,6 +2069,7 @@@ static void svm_vcpu_reset(struct kvm_v
  
        vcpu->arch.microcode_version = 0x01000065;
        svm->spec_ctrl = 0;
 +      svm->virt_spec_ctrl = 0;
  
        if (!init_event) {
                svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE |
@@@ -4115,19 -4111,11 +4118,19 @@@ static int svm_get_msr(struct kvm_vcpu 
                break;
        case MSR_IA32_SPEC_CTRL:
                if (!msr_info->host_initiated &&
 -                  !guest_cpuid_has(vcpu, X86_FEATURE_IBRS))
 +                  !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) &&
 +                  !guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD))
                        return 1;
  
                msr_info->data = svm->spec_ctrl;
                break;
 +      case MSR_AMD64_VIRT_SPEC_CTRL:
 +              if (!msr_info->host_initiated &&
 +                  !guest_cpuid_has(vcpu, X86_FEATURE_VIRT_SSBD))
 +                      return 1;
 +
 +              msr_info->data = svm->virt_spec_ctrl;
 +              break;
        case MSR_F15H_IC_CFG: {
  
                int family, model;
@@@ -4218,12 -4206,11 +4221,12 @@@ static int svm_set_msr(struct kvm_vcpu 
                break;
        case MSR_IA32_SPEC_CTRL:
                if (!msr->host_initiated &&
 -                  !guest_cpuid_has(vcpu, X86_FEATURE_IBRS))
 +                  !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) &&
 +                  !guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD))
                        return 1;
  
                /* The STIBP bit doesn't fault even if it's not advertised */
 -              if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP))
 +              if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD))
                        return 1;
  
                svm->spec_ctrl = data;
                break;
        case MSR_IA32_PRED_CMD:
                if (!msr->host_initiated &&
 -                  !guest_cpuid_has(vcpu, X86_FEATURE_IBPB))
 +                  !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBPB))
                        return 1;
  
                if (data & ~PRED_CMD_IBPB)
                        break;
                set_msr_interception(svm->msrpm, MSR_IA32_PRED_CMD, 0, 1);
                break;
 +      case MSR_AMD64_VIRT_SPEC_CTRL:
 +              if (!msr->host_initiated &&
 +                  !guest_cpuid_has(vcpu, X86_FEATURE_VIRT_SSBD))
 +                      return 1;
 +
 +              if (data & ~SPEC_CTRL_SSBD)
 +                      return 1;
 +
 +              svm->virt_spec_ctrl = data;
 +              break;
        case MSR_STAR:
                svm->vmcb->save.star = data;
                break;
@@@ -5062,7 -5039,7 +5065,7 @@@ static void update_cr8_intercept(struc
                set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
  }
  
- static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
+ static void svm_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
  {
        return;
  }
@@@ -5583,7 -5560,8 +5586,7 @@@ static void svm_vcpu_run(struct kvm_vcp
         * is no need to worry about the conditional branch over the wrmsr
         * being speculatively taken.
         */
 -      if (svm->spec_ctrl)
 -              native_wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
 +      x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl);
  
        asm volatile (
                "push %%" _ASM_BP "; \n\t"
  #endif
                );
  
 +      /* Eliminate branch target predictions from guest mode */
 +      vmexit_fill_RSB();
 +
 +#ifdef CONFIG_X86_64
 +      wrmsrl(MSR_GS_BASE, svm->host.gs_base);
 +#else
 +      loadsegment(fs, svm->host.fs);
 +#ifndef CONFIG_X86_32_LAZY_GS
 +      loadsegment(gs, svm->host.gs);
 +#endif
 +#endif
 +
        /*
         * We do not use IBRS in the kernel. If this vCPU has used the
         * SPEC_CTRL MSR it may have left it on; save the value and
        if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
                svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
  
 -      if (svm->spec_ctrl)
 -              native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
 -
 -      /* Eliminate branch target predictions from guest mode */
 -      vmexit_fill_RSB();
 -
 -#ifdef CONFIG_X86_64
 -      wrmsrl(MSR_GS_BASE, svm->host.gs_base);
 -#else
 -      loadsegment(fs, svm->host.fs);
 -#ifndef CONFIG_X86_32_LAZY_GS
 -      loadsegment(gs, svm->host.gs);
 -#endif
 -#endif
 +      x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl);
  
        reload_tss(vcpu);
  
@@@ -5810,7 -5789,7 +5813,7 @@@ static bool svm_cpu_has_accelerated_tpr
        return false;
  }
  
 -static bool svm_has_high_real_mode_segbase(void)
 +static bool svm_has_emulated_msr(int index)
  {
        return true;
  }
@@@ -6949,6 -6928,9 +6952,9 @@@ static int svm_register_enc_region(stru
        if (!sev_guest(kvm))
                return -ENOTTY;
  
+       if (range->addr > ULONG_MAX || range->size > ULONG_MAX)
+               return -EINVAL;
        region = kzalloc(sizeof(*region), GFP_KERNEL);
        if (!region)
                return -ENOMEM;
@@@ -7036,7 -7018,7 +7042,7 @@@ static struct kvm_x86_ops svm_x86_ops _
        .hardware_enable = svm_hardware_enable,
        .hardware_disable = svm_hardware_disable,
        .cpu_has_accelerated_tpr = svm_cpu_has_accelerated_tpr,
 -      .cpu_has_high_real_mode_segbase = svm_has_high_real_mode_segbase,
 +      .has_emulated_msr = svm_has_emulated_msr,
  
        .vcpu_create = svm_create_vcpu,
        .vcpu_free = svm_free_vcpu,
        .enable_nmi_window = enable_nmi_window,
        .enable_irq_window = enable_irq_window,
        .update_cr8_intercept = update_cr8_intercept,
-       .set_virtual_x2apic_mode = svm_set_virtual_x2apic_mode,
+       .set_virtual_apic_mode = svm_set_virtual_apic_mode,
        .get_enable_apicv = svm_get_enable_apicv,
        .refresh_apicv_exec_ctrl = svm_refresh_apicv_exec_ctrl,
        .load_eoi_exitmap = svm_load_eoi_exitmap,
diff --combined arch/x86/kvm/vmx.c
index 40aa29204baf80aee54056dffb69519cc6cb5f89,48989f78be60e6f6f97a3dd2ce2da1441fc3ab47..fc61e25966e470d82a52f915c8be9aca379be614
@@@ -51,7 -51,7 +51,7 @@@
  #include <asm/apic.h>
  #include <asm/irq_remapping.h>
  #include <asm/mmu_context.h>
 -#include <asm/nospec-branch.h>
 +#include <asm/spec-ctrl.h>
  #include <asm/mshyperv.h>
  
  #include "trace.h"
@@@ -242,7 -242,11 +242,11 @@@ struct shared_msr_entry 
   * underlying hardware which will be used to run L2.
   * This structure is packed to ensure that its layout is identical across
   * machines (necessary for live migration).
-  * If there are changes in this struct, VMCS12_REVISION must be changed.
+  *
+  * IMPORTANT: Changing the layout of existing fields in this structure
+  * will break save/restore compatibility with older kvm releases. When
+  * adding new fields, either use space in the reserved padding* arrays
+  * or add the new fields to the end of the structure.
   */
  typedef u64 natural_width;
  struct __packed vmcs12 {
        u64 virtual_apic_page_addr;
        u64 apic_access_addr;
        u64 posted_intr_desc_addr;
-       u64 vm_function_control;
        u64 ept_pointer;
        u64 eoi_exit_bitmap0;
        u64 eoi_exit_bitmap1;
        u64 eoi_exit_bitmap2;
        u64 eoi_exit_bitmap3;
-       u64 eptp_list_address;
        u64 xss_exit_bitmap;
        u64 guest_physical_address;
        u64 vmcs_link_pointer;
-       u64 pml_address;
        u64 guest_ia32_debugctl;
        u64 guest_ia32_pat;
        u64 guest_ia32_efer;
        u64 host_ia32_pat;
        u64 host_ia32_efer;
        u64 host_ia32_perf_global_ctrl;
-       u64 padding64[8]; /* room for future expansion */
+       u64 vmread_bitmap;
+       u64 vmwrite_bitmap;
+       u64 vm_function_control;
+       u64 eptp_list_address;
+       u64 pml_address;
+       u64 padding64[3]; /* room for future expansion */
        /*
         * To allow migration of L1 (complete with its L2 guests) between
         * machines of different natural widths (32 or 64 bit), we cannot have
        u16 guest_ldtr_selector;
        u16 guest_tr_selector;
        u16 guest_intr_status;
-       u16 guest_pml_index;
        u16 host_es_selector;
        u16 host_cs_selector;
        u16 host_ss_selector;
        u16 host_fs_selector;
        u16 host_gs_selector;
        u16 host_tr_selector;
+       u16 guest_pml_index;
  };
  
+ /*
+  * For save/restore compatibility, the vmcs12 field offsets must not change.
+  */
+ #define CHECK_OFFSET(field, loc)                              \
+       BUILD_BUG_ON_MSG(offsetof(struct vmcs12, field) != (loc),       \
+               "Offset of " #field " in struct vmcs12 has changed.")
+ static inline void vmx_check_vmcs12_offsets(void) {
+       CHECK_OFFSET(revision_id, 0);
+       CHECK_OFFSET(abort, 4);
+       CHECK_OFFSET(launch_state, 8);
+       CHECK_OFFSET(io_bitmap_a, 40);
+       CHECK_OFFSET(io_bitmap_b, 48);
+       CHECK_OFFSET(msr_bitmap, 56);
+       CHECK_OFFSET(vm_exit_msr_store_addr, 64);
+       CHECK_OFFSET(vm_exit_msr_load_addr, 72);
+       CHECK_OFFSET(vm_entry_msr_load_addr, 80);
+       CHECK_OFFSET(tsc_offset, 88);
+       CHECK_OFFSET(virtual_apic_page_addr, 96);
+       CHECK_OFFSET(apic_access_addr, 104);
+       CHECK_OFFSET(posted_intr_desc_addr, 112);
+       CHECK_OFFSET(ept_pointer, 120);
+       CHECK_OFFSET(eoi_exit_bitmap0, 128);
+       CHECK_OFFSET(eoi_exit_bitmap1, 136);
+       CHECK_OFFSET(eoi_exit_bitmap2, 144);
+       CHECK_OFFSET(eoi_exit_bitmap3, 152);
+       CHECK_OFFSET(xss_exit_bitmap, 160);
+       CHECK_OFFSET(guest_physical_address, 168);
+       CHECK_OFFSET(vmcs_link_pointer, 176);
+       CHECK_OFFSET(guest_ia32_debugctl, 184);
+       CHECK_OFFSET(guest_ia32_pat, 192);
+       CHECK_OFFSET(guest_ia32_efer, 200);
+       CHECK_OFFSET(guest_ia32_perf_global_ctrl, 208);
+       CHECK_OFFSET(guest_pdptr0, 216);
+       CHECK_OFFSET(guest_pdptr1, 224);
+       CHECK_OFFSET(guest_pdptr2, 232);
+       CHECK_OFFSET(guest_pdptr3, 240);
+       CHECK_OFFSET(guest_bndcfgs, 248);
+       CHECK_OFFSET(host_ia32_pat, 256);
+       CHECK_OFFSET(host_ia32_efer, 264);
+       CHECK_OFFSET(host_ia32_perf_global_ctrl, 272);
+       CHECK_OFFSET(vmread_bitmap, 280);
+       CHECK_OFFSET(vmwrite_bitmap, 288);
+       CHECK_OFFSET(vm_function_control, 296);
+       CHECK_OFFSET(eptp_list_address, 304);
+       CHECK_OFFSET(pml_address, 312);
+       CHECK_OFFSET(cr0_guest_host_mask, 344);
+       CHECK_OFFSET(cr4_guest_host_mask, 352);
+       CHECK_OFFSET(cr0_read_shadow, 360);
+       CHECK_OFFSET(cr4_read_shadow, 368);
+       CHECK_OFFSET(cr3_target_value0, 376);
+       CHECK_OFFSET(cr3_target_value1, 384);
+       CHECK_OFFSET(cr3_target_value2, 392);
+       CHECK_OFFSET(cr3_target_value3, 400);
+       CHECK_OFFSET(exit_qualification, 408);
+       CHECK_OFFSET(guest_linear_address, 416);
+       CHECK_OFFSET(guest_cr0, 424);
+       CHECK_OFFSET(guest_cr3, 432);
+       CHECK_OFFSET(guest_cr4, 440);
+       CHECK_OFFSET(guest_es_base, 448);
+       CHECK_OFFSET(guest_cs_base, 456);
+       CHECK_OFFSET(guest_ss_base, 464);
+       CHECK_OFFSET(guest_ds_base, 472);
+       CHECK_OFFSET(guest_fs_base, 480);
+       CHECK_OFFSET(guest_gs_base, 488);
+       CHECK_OFFSET(guest_ldtr_base, 496);
+       CHECK_OFFSET(guest_tr_base, 504);
+       CHECK_OFFSET(guest_gdtr_base, 512);
+       CHECK_OFFSET(guest_idtr_base, 520);
+       CHECK_OFFSET(guest_dr7, 528);
+       CHECK_OFFSET(guest_rsp, 536);
+       CHECK_OFFSET(guest_rip, 544);
+       CHECK_OFFSET(guest_rflags, 552);
+       CHECK_OFFSET(guest_pending_dbg_exceptions, 560);
+       CHECK_OFFSET(guest_sysenter_esp, 568);
+       CHECK_OFFSET(guest_sysenter_eip, 576);
+       CHECK_OFFSET(host_cr0, 584);
+       CHECK_OFFSET(host_cr3, 592);
+       CHECK_OFFSET(host_cr4, 600);
+       CHECK_OFFSET(host_fs_base, 608);
+       CHECK_OFFSET(host_gs_base, 616);
+       CHECK_OFFSET(host_tr_base, 624);
+       CHECK_OFFSET(host_gdtr_base, 632);
+       CHECK_OFFSET(host_idtr_base, 640);
+       CHECK_OFFSET(host_ia32_sysenter_esp, 648);
+       CHECK_OFFSET(host_ia32_sysenter_eip, 656);
+       CHECK_OFFSET(host_rsp, 664);
+       CHECK_OFFSET(host_rip, 672);
+       CHECK_OFFSET(pin_based_vm_exec_control, 744);
+       CHECK_OFFSET(cpu_based_vm_exec_control, 748);
+       CHECK_OFFSET(exception_bitmap, 752);
+       CHECK_OFFSET(page_fault_error_code_mask, 756);
+       CHECK_OFFSET(page_fault_error_code_match, 760);
+       CHECK_OFFSET(cr3_target_count, 764);
+       CHECK_OFFSET(vm_exit_controls, 768);
+       CHECK_OFFSET(vm_exit_msr_store_count, 772);
+       CHECK_OFFSET(vm_exit_msr_load_count, 776);
+       CHECK_OFFSET(vm_entry_controls, 780);
+       CHECK_OFFSET(vm_entry_msr_load_count, 784);
+       CHECK_OFFSET(vm_entry_intr_info_field, 788);
+       CHECK_OFFSET(vm_entry_exception_error_code, 792);
+       CHECK_OFFSET(vm_entry_instruction_len, 796);
+       CHECK_OFFSET(tpr_threshold, 800);
+       CHECK_OFFSET(secondary_vm_exec_control, 804);
+       CHECK_OFFSET(vm_instruction_error, 808);
+       CHECK_OFFSET(vm_exit_reason, 812);
+       CHECK_OFFSET(vm_exit_intr_info, 816);
+       CHECK_OFFSET(vm_exit_intr_error_code, 820);
+       CHECK_OFFSET(idt_vectoring_info_field, 824);
+       CHECK_OFFSET(idt_vectoring_error_code, 828);
+       CHECK_OFFSET(vm_exit_instruction_len, 832);
+       CHECK_OFFSET(vmx_instruction_info, 836);
+       CHECK_OFFSET(guest_es_limit, 840);
+       CHECK_OFFSET(guest_cs_limit, 844);
+       CHECK_OFFSET(guest_ss_limit, 848);
+       CHECK_OFFSET(guest_ds_limit, 852);
+       CHECK_OFFSET(guest_fs_limit, 856);
+       CHECK_OFFSET(guest_gs_limit, 860);
+       CHECK_OFFSET(guest_ldtr_limit, 864);
+       CHECK_OFFSET(guest_tr_limit, 868);
+       CHECK_OFFSET(guest_gdtr_limit, 872);
+       CHECK_OFFSET(guest_idtr_limit, 876);
+       CHECK_OFFSET(guest_es_ar_bytes, 880);
+       CHECK_OFFSET(guest_cs_ar_bytes, 884);
+       CHECK_OFFSET(guest_ss_ar_bytes, 888);
+       CHECK_OFFSET(guest_ds_ar_bytes, 892);
+       CHECK_OFFSET(guest_fs_ar_bytes, 896);
+       CHECK_OFFSET(guest_gs_ar_bytes, 900);
+       CHECK_OFFSET(guest_ldtr_ar_bytes, 904);
+       CHECK_OFFSET(guest_tr_ar_bytes, 908);
+       CHECK_OFFSET(guest_interruptibility_info, 912);
+       CHECK_OFFSET(guest_activity_state, 916);
+       CHECK_OFFSET(guest_sysenter_cs, 920);
+       CHECK_OFFSET(host_ia32_sysenter_cs, 924);
+       CHECK_OFFSET(vmx_preemption_timer_value, 928);
+       CHECK_OFFSET(virtual_processor_id, 960);
+       CHECK_OFFSET(posted_intr_nv, 962);
+       CHECK_OFFSET(guest_es_selector, 964);
+       CHECK_OFFSET(guest_cs_selector, 966);
+       CHECK_OFFSET(guest_ss_selector, 968);
+       CHECK_OFFSET(guest_ds_selector, 970);
+       CHECK_OFFSET(guest_fs_selector, 972);
+       CHECK_OFFSET(guest_gs_selector, 974);
+       CHECK_OFFSET(guest_ldtr_selector, 976);
+       CHECK_OFFSET(guest_tr_selector, 978);
+       CHECK_OFFSET(guest_intr_status, 980);
+       CHECK_OFFSET(host_es_selector, 982);
+       CHECK_OFFSET(host_cs_selector, 984);
+       CHECK_OFFSET(host_ss_selector, 986);
+       CHECK_OFFSET(host_ds_selector, 988);
+       CHECK_OFFSET(host_fs_selector, 990);
+       CHECK_OFFSET(host_gs_selector, 992);
+       CHECK_OFFSET(host_tr_selector, 994);
+       CHECK_OFFSET(guest_pml_index, 996);
+ }
  /*
   * VMCS12_REVISION is an arbitrary id that should be changed if the content or
   * layout of struct vmcs12 is changed. MSR_IA32_VMX_BASIC returns this id, and
   * VMPTRLD verifies that the VMCS region that L1 is loading contains this id.
+  *
+  * IMPORTANT: Changing this value will break save/restore compatibility with
+  * older kvm releases.
   */
  #define VMCS12_REVISION 0x11e57ed0
  
@@@ -481,7 -646,8 +646,8 @@@ struct nested_vmx 
        bool sync_shadow_vmcs;
        bool dirty_vmcs12;
  
-       bool change_vmcs01_virtual_x2apic_mode;
+       bool change_vmcs01_virtual_apic_mode;
        /* L2 must run next, and mustn't decide to exit to L1. */
        bool nested_run_pending;
  
@@@ -761,6 -927,7 +927,7 @@@ static const unsigned short vmcs_field_
        FIELD64(VM_EXIT_MSR_STORE_ADDR, vm_exit_msr_store_addr),
        FIELD64(VM_EXIT_MSR_LOAD_ADDR, vm_exit_msr_load_addr),
        FIELD64(VM_ENTRY_MSR_LOAD_ADDR, vm_entry_msr_load_addr),
+       FIELD64(PML_ADDRESS, pml_address),
        FIELD64(TSC_OFFSET, tsc_offset),
        FIELD64(VIRTUAL_APIC_PAGE_ADDR, virtual_apic_page_addr),
        FIELD64(APIC_ACCESS_ADDR, apic_access_addr),
        FIELD64(EOI_EXIT_BITMAP2, eoi_exit_bitmap2),
        FIELD64(EOI_EXIT_BITMAP3, eoi_exit_bitmap3),
        FIELD64(EPTP_LIST_ADDRESS, eptp_list_address),
+       FIELD64(VMREAD_BITMAP, vmread_bitmap),
+       FIELD64(VMWRITE_BITMAP, vmwrite_bitmap),
        FIELD64(XSS_EXIT_BITMAP, xss_exit_bitmap),
        FIELD64(GUEST_PHYSICAL_ADDRESS, guest_physical_address),
        FIELD64(VMCS_LINK_POINTER, vmcs_link_pointer),
-       FIELD64(PML_ADDRESS, pml_address),
        FIELD64(GUEST_IA32_DEBUGCTL, guest_ia32_debugctl),
        FIELD64(GUEST_IA32_PAT, guest_ia32_pat),
        FIELD64(GUEST_IA32_EFER, guest_ia32_efer),
@@@ -1089,6 -1257,16 +1257,16 @@@ static inline u16 evmcs_read16(unsigne
        return *(u16 *)((char *)current_evmcs + offset);
  }
  
+ static inline void evmcs_touch_msr_bitmap(void)
+ {
+       if (unlikely(!current_evmcs))
+               return;
+       if (current_evmcs->hv_enlightenments_control.msr_bitmap)
+               current_evmcs->hv_clean_fields &=
+                       ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP;
+ }
  static void evmcs_load(u64 phys_addr)
  {
        struct hv_vp_assist_page *vp_ap =
@@@ -1173,6 -1351,7 +1351,7 @@@ static inline u32 evmcs_read32(unsigne
  static inline u16 evmcs_read16(unsigned long field) { return 0; }
  static inline void evmcs_load(u64 phys_addr) {}
  static inline void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf) {}
+ static inline void evmcs_touch_msr_bitmap(void) {}
  #endif /* IS_ENABLED(CONFIG_HYPERV) */
  
  static inline bool is_exception_n(u32 intr_info, u8 vector)
@@@ -1393,6 -1572,11 +1572,11 @@@ static inline bool cpu_has_vmx_invept_g
        return vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT;
  }
  
+ static inline bool cpu_has_vmx_invvpid_individual_addr(void)
+ {
+       return vmx_capability.vpid & VMX_VPID_EXTENT_INDIVIDUAL_ADDR_BIT;
+ }
  static inline bool cpu_has_vmx_invvpid_single(void)
  {
        return vmx_capability.vpid & VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT;
@@@ -1510,6 -1694,17 +1694,17 @@@ static inline unsigned nested_cpu_vmx_m
        return vmx_misc_cr3_count(to_vmx(vcpu)->nested.msrs.misc_low);
  }
  
+ /*
+  * Do the virtual VMX capability MSRs specify that L1 can use VMWRITE
+  * to modify any valid field of the VMCS, or are the VM-exit
+  * information fields read-only?
+  */
+ static inline bool nested_cpu_has_vmwrite_any_field(struct kvm_vcpu *vcpu)
+ {
+       return to_vmx(vcpu)->nested.msrs.misc_low &
+               MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS;
+ }
  static inline bool nested_cpu_has(struct vmcs12 *vmcs12, u32 bit)
  {
        return vmcs12->cpu_based_vm_exec_control & bit;
@@@ -3127,6 -3322,7 +3322,7 @@@ static void nested_vmx_setup_ctls_msrs(
                msrs->misc_high);
        msrs->misc_low &= VMX_MISC_SAVE_EFER_LMA;
        msrs->misc_low |=
+               MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS |
                VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE |
                VMX_MISC_ACTIVITY_HLT;
        msrs->misc_high = 0;
@@@ -3300,6 -3496,15 +3496,15 @@@ static int vmx_restore_vmx_misc(struct 
  
        vmx->nested.msrs.misc_low = data;
        vmx->nested.msrs.misc_high = data >> 32;
+       /*
+        * If L1 has read-only VM-exit information fields, use the
+        * less permissive vmx_vmwrite_bitmap to specify write
+        * permissions for the shadow VMCS.
+        */
+       if (enable_shadow_vmcs && !nested_cpu_has_vmwrite_any_field(&vmx->vcpu))
+               vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap));
        return 0;
  }
  
@@@ -3354,6 -3559,13 +3559,13 @@@ static int vmx_set_vmx_msr(struct kvm_v
  {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
  
+       /*
+        * Don't allow changes to the VMX capability MSRs while the vCPU
+        * is in VMX operation.
+        */
+       if (vmx->nested.vmxon)
+               return -EBUSY;
        switch (msr_index) {
        case MSR_IA32_VMX_BASIC:
                return vmx_restore_vmx_basic(vmx, data);
@@@ -3529,6 -3741,7 +3741,6 @@@ static int vmx_get_msr(struct kvm_vcpu 
                return kvm_get_msr_common(vcpu, msr_info);
        case MSR_IA32_SPEC_CTRL:
                if (!msr_info->host_initiated &&
 -                  !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) &&
                    !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
                        return 1;
  
@@@ -3647,11 -3860,12 +3859,11 @@@ static int vmx_set_msr(struct kvm_vcpu 
                break;
        case MSR_IA32_SPEC_CTRL:
                if (!msr_info->host_initiated &&
 -                  !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) &&
                    !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
                        return 1;
  
                /* The STIBP bit doesn't fault even if it's not advertised */
 -              if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP))
 +              if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD))
                        return 1;
  
                vmx->spec_ctrl = data;
                break;
        case MSR_IA32_PRED_CMD:
                if (!msr_info->host_initiated &&
 -                  !guest_cpuid_has(vcpu, X86_FEATURE_IBPB) &&
                    !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
                        return 1;
  
@@@ -4216,6 -4431,14 +4428,14 @@@ static int alloc_loaded_vmcs(struct loa
                if (!loaded_vmcs->msr_bitmap)
                        goto out_vmcs;
                memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE);
+               if (static_branch_unlikely(&enable_evmcs) &&
+                   (ms_hyperv.nested_features & HV_X64_NESTED_MSR_BITMAP)) {
+                       struct hv_enlightened_vmcs *evmcs =
+                               (struct hv_enlightened_vmcs *)loaded_vmcs->vmcs;
+                       evmcs->hv_enlightenments_control.msr_bitmap = 1;
+               }
        }
        return 0;
  
@@@ -5329,6 -5552,9 +5549,9 @@@ static void __always_inline vmx_disable
        if (!cpu_has_vmx_msr_bitmap())
                return;
  
+       if (static_branch_unlikely(&enable_evmcs))
+               evmcs_touch_msr_bitmap();
        /*
         * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
         * have the write-low and read-high bitmap offsets the wrong way round.
@@@ -5364,6 -5590,9 +5587,9 @@@ static void __always_inline vmx_enable_
        if (!cpu_has_vmx_msr_bitmap())
                return;
  
+       if (static_branch_unlikely(&enable_evmcs))
+               evmcs_touch_msr_bitmap();
        /*
         * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
         * have the write-low and read-high bitmap offsets the wrong way round.
@@@ -5946,8 -6175,14 +6172,14 @@@ static void vmx_vcpu_setup(struct vcpu_
        int i;
  
        if (enable_shadow_vmcs) {
+               /*
+                * At vCPU creation, "VMWRITE to any supported field
+                * in the VMCS" is supported, so use the more
+                * permissive vmx_vmread_bitmap to specify both read
+                * and write permissions for the shadow VMCS.
+                */
                vmcs_write64(VMREAD_BITMAP, __pa(vmx_vmread_bitmap));
-               vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap));
+               vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmread_bitmap));
        }
        if (cpu_has_vmx_msr_bitmap())
                vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap));
@@@ -7588,8 -7823,7 +7820,7 @@@ static int nested_vmx_get_vmptr(struct 
                        vmcs_read32(VMX_INSTRUCTION_INFO), false, &gva))
                return 1;
  
-       if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, vmpointer,
-                               sizeof(*vmpointer), &e)) {
+       if (kvm_read_guest_virt(vcpu, gva, vmpointer, sizeof(*vmpointer), &e)) {
                kvm_inject_page_fault(vcpu, &e);
                return 1;
        }
@@@ -7670,6 -7904,12 +7901,12 @@@ static int handle_vmon(struct kvm_vcpu 
                return 1;
        }
  
+       /* CPL=0 must be checked manually. */
+       if (vmx_get_cpl(vcpu)) {
+               kvm_queue_exception(vcpu, UD_VECTOR);
+               return 1;
+       }
        if (vmx->nested.vmxon) {
                nested_vmx_failValid(vcpu, VMXERR_VMXON_IN_VMX_ROOT_OPERATION);
                return kvm_skip_emulated_instruction(vcpu);
   */
  static int nested_vmx_check_permission(struct kvm_vcpu *vcpu)
  {
+       if (vmx_get_cpl(vcpu)) {
+               kvm_queue_exception(vcpu, UD_VECTOR);
+               return 0;
+       }
        if (!to_vmx(vcpu)->nested.vmxon) {
                kvm_queue_exception(vcpu, UD_VECTOR);
                return 0;
@@@ -7928,23 -8173,42 +8170,42 @@@ static inline int vmcs12_write_any(stru
  
  }
  
+ /*
+  * Copy the writable VMCS shadow fields back to the VMCS12, in case
+  * they have been modified by the L1 guest. Note that the "read-only"
+  * VM-exit information fields are actually writable if the vCPU is
+  * configured to support "VMWRITE to any supported field in the VMCS."
+  */
  static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
  {
-       int i;
+       const u16 *fields[] = {
+               shadow_read_write_fields,
+               shadow_read_only_fields
+       };
+       const int max_fields[] = {
+               max_shadow_read_write_fields,
+               max_shadow_read_only_fields
+       };
+       int i, q;
        unsigned long field;
        u64 field_value;
        struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs;
-       const u16 *fields = shadow_read_write_fields;
-       const int num_fields = max_shadow_read_write_fields;
  
        preempt_disable();
  
        vmcs_load(shadow_vmcs);
  
-       for (i = 0; i < num_fields; i++) {
-               field = fields[i];
-               field_value = __vmcs_readl(field);
-               vmcs12_write_any(&vmx->vcpu, field, field_value);
+       for (q = 0; q < ARRAY_SIZE(fields); q++) {
+               for (i = 0; i < max_fields[q]; i++) {
+                       field = fields[q][i];
+                       field_value = __vmcs_readl(field);
+                       vmcs12_write_any(&vmx->vcpu, field, field_value);
+               }
+               /*
+                * Skip the VM-exit information fields if they are read-only.
+                */
+               if (!nested_cpu_has_vmwrite_any_field(&vmx->vcpu))
+                       break;
        }
  
        vmcs_clear(shadow_vmcs);
@@@ -8029,9 -8293,9 +8290,9 @@@ static int handle_vmread(struct kvm_vcp
                if (get_vmx_mem_address(vcpu, exit_qualification,
                                vmx_instruction_info, true, &gva))
                        return 1;
-               /* _system ok, as hardware has verified cpl=0 */
-               kvm_write_guest_virt_system(&vcpu->arch.emulate_ctxt, gva,
-                            &field_value, (is_long_mode(vcpu) ? 8 : 4), NULL);
+               /* _system ok, nested_vmx_check_permission has verified cpl=0 */
+               kvm_write_guest_virt_system(vcpu, gva, &field_value,
+                                           (is_long_mode(vcpu) ? 8 : 4), NULL);
        }
  
        nested_vmx_succeed(vcpu);
@@@ -8069,8 -8333,8 +8330,8 @@@ static int handle_vmwrite(struct kvm_vc
                if (get_vmx_mem_address(vcpu, exit_qualification,
                                vmx_instruction_info, false, &gva))
                        return 1;
-               if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva,
-                          &field_value, (is_64_bit_mode(vcpu) ? 8 : 4), &e)) {
+               if (kvm_read_guest_virt(vcpu, gva, &field_value,
+                                       (is_64_bit_mode(vcpu) ? 8 : 4), &e)) {
                        kvm_inject_page_fault(vcpu, &e);
                        return 1;
                }
  
  
        field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
-       if (vmcs_field_readonly(field)) {
+       /*
+        * If the vCPU supports "VMWRITE to any supported field in the
+        * VMCS," then the "read-only" fields are actually read/write.
+        */
+       if (vmcs_field_readonly(field) &&
+           !nested_cpu_has_vmwrite_any_field(vcpu)) {
                nested_vmx_failValid(vcpu,
                        VMXERR_VMWRITE_READ_ONLY_VMCS_COMPONENT);
                return kvm_skip_emulated_instruction(vcpu);
@@@ -8189,10 -8458,10 +8455,10 @@@ static int handle_vmptrst(struct kvm_vc
        if (get_vmx_mem_address(vcpu, exit_qualification,
                        vmx_instruction_info, true, &vmcs_gva))
                return 1;
-       /* ok to use *_system, as hardware has verified cpl=0 */
-       if (kvm_write_guest_virt_system(&vcpu->arch.emulate_ctxt, vmcs_gva,
-                                (void *)&to_vmx(vcpu)->nested.current_vmptr,
-                                sizeof(u64), &e)) {
+       /* *_system ok, nested_vmx_check_permission has verified cpl=0 */
+       if (kvm_write_guest_virt_system(vcpu, vmcs_gva,
+                                       (void *)&to_vmx(vcpu)->nested.current_vmptr,
+                                       sizeof(u64), &e)) {
                kvm_inject_page_fault(vcpu, &e);
                return 1;
        }
@@@ -8239,8 -8508,7 +8505,7 @@@ static int handle_invept(struct kvm_vcp
        if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
                        vmx_instruction_info, false, &gva))
                return 1;
-       if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &operand,
-                               sizeof(operand), &e)) {
+       if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) {
                kvm_inject_page_fault(vcpu, &e);
                return 1;
        }
@@@ -8304,8 -8572,7 +8569,7 @@@ static int handle_invvpid(struct kvm_vc
        if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
                        vmx_instruction_info, false, &gva))
                return 1;
-       if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &operand,
-                               sizeof(operand), &e)) {
+       if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) {
                kvm_inject_page_fault(vcpu, &e);
                return 1;
        }
  
        switch (type) {
        case VMX_VPID_EXTENT_INDIVIDUAL_ADDR:
-               if (is_noncanonical_address(operand.gla, vcpu)) {
+               if (!operand.vpid ||
+                   is_noncanonical_address(operand.gla, vcpu)) {
                        nested_vmx_failValid(vcpu,
                                VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
                        return kvm_skip_emulated_instruction(vcpu);
                }
-               /* fall through */
+               if (cpu_has_vmx_invvpid_individual_addr() &&
+                   vmx->nested.vpid02) {
+                       __invvpid(VMX_VPID_EXTENT_INDIVIDUAL_ADDR,
+                               vmx->nested.vpid02, operand.gla);
+               } else
+                       __vmx_flush_tlb(vcpu, vmx->nested.vpid02, true);
+               break;
        case VMX_VPID_EXTENT_SINGLE_CONTEXT:
        case VMX_VPID_EXTENT_SINGLE_NON_GLOBAL:
                if (!operand.vpid) {
                                VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
                        return kvm_skip_emulated_instruction(vcpu);
                }
+               __vmx_flush_tlb(vcpu, vmx->nested.vpid02, true);
                break;
        case VMX_VPID_EXTENT_ALL_CONTEXT:
+               __vmx_flush_tlb(vcpu, vmx->nested.vpid02, true);
                break;
        default:
                WARN_ON_ONCE(1);
                return kvm_skip_emulated_instruction(vcpu);
        }
  
-       __vmx_flush_tlb(vcpu, vmx->nested.vpid02, true);
        nested_vmx_succeed(vcpu);
  
        return kvm_skip_emulated_instruction(vcpu);
@@@ -8842,11 -9117,13 +9114,13 @@@ static bool nested_vmx_exit_reflected(s
        case EXIT_REASON_TPR_BELOW_THRESHOLD:
                return nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW);
        case EXIT_REASON_APIC_ACCESS:
-               return nested_cpu_has2(vmcs12,
-                       SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
        case EXIT_REASON_APIC_WRITE:
        case EXIT_REASON_EOI_INDUCED:
-               /* apic_write and eoi_induced should exit unconditionally. */
+               /*
+                * The controls for "virtualize APIC accesses," "APIC-
+                * register virtualization," and "virtual-interrupt
+                * delivery" only come from vmcs12.
+                */
                return true;
        case EXIT_REASON_EPT_VIOLATION:
                /*
@@@ -9253,31 -9530,43 +9527,43 @@@ static void update_cr8_intercept(struc
        vmcs_write32(TPR_THRESHOLD, irr);
  }
  
- static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
+ static void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
  {
        u32 sec_exec_control;
  
+       if (!lapic_in_kernel(vcpu))
+               return;
        /* Postpone execution until vmcs01 is the current VMCS. */
        if (is_guest_mode(vcpu)) {
-               to_vmx(vcpu)->nested.change_vmcs01_virtual_x2apic_mode = true;
+               to_vmx(vcpu)->nested.change_vmcs01_virtual_apic_mode = true;
                return;
        }
  
-       if (!cpu_has_vmx_virtualize_x2apic_mode())
-               return;
        if (!cpu_need_tpr_shadow(vcpu))
                return;
  
        sec_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
+       sec_exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
+                             SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE);
  
-       if (set) {
-               sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
-               sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
-       } else {
-               sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
-               sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
-               vmx_flush_tlb(vcpu, true);
+       switch (kvm_get_apic_mode(vcpu)) {
+       case LAPIC_MODE_INVALID:
+               WARN_ONCE(true, "Invalid local APIC state");
+       case LAPIC_MODE_DISABLED:
+               break;
+       case LAPIC_MODE_XAPIC:
+               if (flexpriority_enabled) {
+                       sec_exec_control |=
+                               SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
+                       vmx_flush_tlb(vcpu, true);
+               }
+               break;
+       case LAPIC_MODE_X2APIC:
+               if (cpu_has_vmx_virtualize_x2apic_mode())
+                       sec_exec_control |=
+                               SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
+               break;
        }
        vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control);
  
  
  static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa)
  {
-       struct vcpu_vmx *vmx = to_vmx(vcpu);
-       /*
-        * Currently we do not handle the nested case where L2 has an
-        * APIC access page of its own; that page is still pinned.
-        * Hence, we skip the case where the VCPU is in guest mode _and_
-        * L1 prepared an APIC access page for L2.
-        *
-        * For the case where L1 and L2 share the same APIC access page
-        * (flexpriority=Y but SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES clear
-        * in the vmcs12), this function will only update either the vmcs01
-        * or the vmcs02.  If the former, the vmcs02 will be updated by
-        * prepare_vmcs02.  If the latter, the vmcs01 will be updated in
-        * the next L2->L1 exit.
-        */
-       if (!is_guest_mode(vcpu) ||
-           !nested_cpu_has2(get_vmcs12(&vmx->vcpu),
-                            SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
+       if (!is_guest_mode(vcpu)) {
                vmcs_write64(APIC_ACCESS_ADDR, hpa);
                vmx_flush_tlb(vcpu, true);
        }
@@@ -9485,21 -9757,9 +9754,21 @@@ static void vmx_handle_external_intr(st
  }
  STACK_FRAME_NON_STANDARD(vmx_handle_external_intr);
  
 -static bool vmx_has_high_real_mode_segbase(void)
 +static bool vmx_has_emulated_msr(int index)
  {
 -      return enable_unrestricted_guest || emulate_invalid_guest_state;
 +      switch (index) {
 +      case MSR_IA32_SMBASE:
 +              /*
 +               * We cannot do SMM unless we can run the guest in big
 +               * real mode.
 +               */
 +              return enable_unrestricted_guest || emulate_invalid_guest_state;
 +      case MSR_AMD64_VIRT_SPEC_CTRL:
 +              /* This is AMD only.  */
 +              return false;
 +      default:
 +              return true;
 +      }
  }
  
  static bool vmx_mpx_supported(void)
@@@ -9731,7 -9991,8 +10000,7 @@@ static void __noclone vmx_vcpu_run(stru
         * is no need to worry about the conditional branch over the wrmsr
         * being speculatively taken.
         */
 -      if (vmx->spec_ctrl)
 -              native_wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
 +      x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0);
  
        vmx->__launched = vmx->loaded_vmcs->launched;
  
        if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
                vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
  
 -      if (vmx->spec_ctrl)
 -              native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
 +      x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0);
  
        /* Eliminate branch target predictions from guest mode */
        vmexit_fill_RSB();
@@@ -9943,13 -10205,13 +10212,13 @@@ STACK_FRAME_NON_STANDARD(vmx_vcpu_run)
  
  static struct kvm *vmx_vm_alloc(void)
  {
-       struct kvm_vmx *kvm_vmx = kzalloc(sizeof(struct kvm_vmx), GFP_KERNEL);
+       struct kvm_vmx *kvm_vmx = vzalloc(sizeof(struct kvm_vmx));
        return &kvm_vmx->kvm;
  }
  
  static void vmx_vm_free(struct kvm *kvm)
  {
-       kfree(to_kvm_vmx(kvm));
+       vfree(to_kvm_vmx(kvm));
  }
  
  static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
@@@ -10387,11 -10649,6 +10656,6 @@@ static void nested_get_vmcs12_pages(str
                        vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
                                        SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
                }
-       } else if (!(nested_cpu_has_virt_x2apic_mode(vmcs12)) &&
-                  cpu_need_virtualize_apic_accesses(&vmx->vcpu)) {
-               vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
-                             SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
-               kvm_vcpu_reload_apic_access_page(vcpu);
        }
  
        if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) {
@@@ -10871,8 -11128,7 +11135,7 @@@ static int nested_vmx_load_cr3(struct k
        return 0;
  }
  
- static void prepare_vmcs02_full(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
-                              bool from_vmentry)
+ static void prepare_vmcs02_full(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
  {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
  
   * is assigned to entry_failure_code on failure.
   */
  static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
-                         bool from_vmentry, u32 *entry_failure_code)
+                         u32 *entry_failure_code)
  {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
        u32 exec_control, vmcs12_exec_ctrl;
  
        if (vmx->nested.dirty_vmcs12) {
-               prepare_vmcs02_full(vcpu, vmcs12, from_vmentry);
+               prepare_vmcs02_full(vcpu, vmcs12);
                vmx->nested.dirty_vmcs12 = false;
        }
  
         * HOST_FS_BASE, HOST_GS_BASE.
         */
  
-       if (from_vmentry &&
+       if (vmx->nested.nested_run_pending &&
            (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) {
                kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
                vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl);
                kvm_set_dr(vcpu, 7, vcpu->arch.dr7);
                vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.vmcs01_debugctl);
        }
-       if (from_vmentry) {
+       if (vmx->nested.nested_run_pending) {
                vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
                             vmcs12->vm_entry_intr_info_field);
                vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
                        ~VM_ENTRY_IA32E_MODE) |
                (vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE));
  
-       if (from_vmentry &&
+       if (vmx->nested.nested_run_pending &&
            (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT)) {
                vmcs_write64(GUEST_IA32_PAT, vmcs12->guest_ia32_pat);
                vcpu->arch.pat = vmcs12->guest_ia32_pat;
                if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02) {
                        if (vmcs12->virtual_processor_id != vmx->nested.last_vpid) {
                                vmx->nested.last_vpid = vmcs12->virtual_processor_id;
-                               __vmx_flush_tlb(vcpu, to_vmx(vcpu)->nested.vpid02, true);
+                               __vmx_flush_tlb(vcpu, vmx->nested.vpid02, true);
                        }
                } else {
                        vmx_flush_tlb(vcpu, true);
        vmx_set_cr4(vcpu, vmcs12->guest_cr4);
        vmcs_writel(CR4_READ_SHADOW, nested_read_cr4(vmcs12));
  
-       if (from_vmentry &&
+       if (vmx->nested.nested_run_pending &&
            (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER))
                vcpu->arch.efer = vmcs12->guest_ia32_efer;
        else if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE)
@@@ -11418,7 -11674,7 +11681,7 @@@ static int check_vmentry_postreqs(struc
        return 0;
  }
  
- static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
+ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu)
  {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
        struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
                vcpu->arch.tsc_offset += vmcs12->tsc_offset;
  
        r = EXIT_REASON_INVALID_STATE;
-       if (prepare_vmcs02(vcpu, vmcs12, from_vmentry, &exit_qual))
+       if (prepare_vmcs02(vcpu, vmcs12, &exit_qual))
                goto fail;
  
        nested_get_vmcs12_pages(vcpu, vmcs12);
@@@ -11540,20 -11796,22 +11803,22 @@@ static int nested_vmx_run(struct kvm_vc
         * the nested entry.
         */
  
-       ret = enter_vmx_non_root_mode(vcpu, true);
-       if (ret)
+       vmx->nested.nested_run_pending = 1;
+       ret = enter_vmx_non_root_mode(vcpu);
+       if (ret) {
+               vmx->nested.nested_run_pending = 0;
                return ret;
+       }
  
        /*
         * If we're entering a halted L2 vcpu and the L2 vcpu won't be woken
         * by event injection, halt vcpu.
         */
        if ((vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) &&
-           !(vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK))
+           !(vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK)) {
+               vmx->nested.nested_run_pending = 0;
                return kvm_vcpu_halt(vcpu);
-       vmx->nested.nested_run_pending = 1;
+       }
        return 1;
  
  out:
@@@ -11925,12 -12183,20 +12190,20 @@@ static void load_vmcs12_host_state(stru
  
        load_vmcs12_mmu_host_state(vcpu, vmcs12);
  
-       if (enable_vpid) {
-               /*
-                * Trivially support vpid by letting L2s share their parent
-                * L1's vpid. TODO: move to a more elaborate solution, giving
-                * each L2 its own vpid and exposing the vpid feature to L1.
-                */
+       /*
+        * If vmcs01 don't use VPID, CPU flushes TLB on every
+        * VMEntry/VMExit. Thus, no need to flush TLB.
+        *
+        * If vmcs12 uses VPID, TLB entries populated by L2 are
+        * tagged with vmx->nested.vpid02 while L1 entries are tagged
+        * with vmx->vpid. Thus, no need to flush TLB.
+        *
+        * Therefore, flush TLB only in case vmcs01 uses VPID and
+        * vmcs12 don't use VPID as in this case L1 & L2 TLB entries
+        * are both tagged with vmx->vpid.
+        */
+       if (enable_vpid &&
+           !(nested_cpu_has_vpid(vmcs12) && to_vmx(vcpu)->nested.vpid02)) {
                vmx_flush_tlb(vcpu, true);
        }
  
@@@ -12069,10 -12335,9 +12342,9 @@@ static void nested_vmx_vmexit(struct kv
        if (kvm_has_tsc_control)
                decache_tsc_multiplier(vmx);
  
-       if (vmx->nested.change_vmcs01_virtual_x2apic_mode) {
-               vmx->nested.change_vmcs01_virtual_x2apic_mode = false;
-               vmx_set_virtual_x2apic_mode(vcpu,
-                               vcpu->arch.apic_base & X2APIC_ENABLE);
+       if (vmx->nested.change_vmcs01_virtual_apic_mode) {
+               vmx->nested.change_vmcs01_virtual_apic_mode = false;
+               vmx_set_virtual_apic_mode(vcpu);
        } else if (!nested_cpu_has_ept(vmcs12) &&
                   nested_cpu_has2(vmcs12,
                                   SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
@@@ -12236,7 -12501,7 +12508,7 @@@ static inline int u64_shl_div_u64(u64 a
  static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc)
  {
        struct vcpu_vmx *vmx;
-       u64 tscl, guest_tscl, delta_tsc;
+       u64 tscl, guest_tscl, delta_tsc, lapic_timer_advance_cycles;
  
        if (kvm_mwait_in_guest(vcpu->kvm))
                return -EOPNOTSUPP;
        tscl = rdtsc();
        guest_tscl = kvm_read_l1_tsc(vcpu, tscl);
        delta_tsc = max(guest_deadline_tsc, guest_tscl) - guest_tscl;
+       lapic_timer_advance_cycles = nsec_to_cycles(vcpu, lapic_timer_advance_ns);
+       if (delta_tsc > lapic_timer_advance_cycles)
+               delta_tsc -= lapic_timer_advance_cycles;
+       else
+               delta_tsc = 0;
  
        /* Convert to host delta tsc if tsc scaling is enabled */
        if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio &&
@@@ -12615,7 -12886,7 +12893,7 @@@ static int vmx_pre_leave_smm(struct kvm
  
        if (vmx->nested.smm.guest_mode) {
                vcpu->arch.hflags &= ~HF_SMM_MASK;
-               ret = enter_vmx_non_root_mode(vcpu, false);
+               ret = enter_vmx_non_root_mode(vcpu);
                vcpu->arch.hflags |= HF_SMM_MASK;
                if (ret)
                        return ret;
@@@ -12639,7 -12910,7 +12917,7 @@@ static struct kvm_x86_ops vmx_x86_ops _
        .hardware_enable = hardware_enable,
        .hardware_disable = hardware_disable,
        .cpu_has_accelerated_tpr = report_flexpriority,
 -      .cpu_has_high_real_mode_segbase = vmx_has_high_real_mode_segbase,
 +      .has_emulated_msr = vmx_has_emulated_msr,
  
        .vm_init = vmx_vm_init,
        .vm_alloc = vmx_vm_alloc,
        .enable_nmi_window = enable_nmi_window,
        .enable_irq_window = enable_irq_window,
        .update_cr8_intercept = update_cr8_intercept,
-       .set_virtual_x2apic_mode = vmx_set_virtual_x2apic_mode,
+       .set_virtual_apic_mode = vmx_set_virtual_apic_mode,
        .set_apic_access_page_addr = vmx_set_apic_access_page_addr,
        .get_enable_apicv = vmx_get_enable_apicv,
        .refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl,
@@@ -12812,6 -13083,7 +13090,7 @@@ static int __init vmx_init(void
        rcu_assign_pointer(crash_vmclear_loaded_vmcss,
                           crash_vmclear_local_loaded_vmcss);
  #endif
+       vmx_check_vmcs12_offsets();
  
        return 0;
  }
diff --combined arch/x86/kvm/x86.c
index 71e7cda6d01430bca8ef226238589ab0e830d6c9,06dd4cdb2ca8a8fa21bf24957a34344e11ec65e2..cc8c8be1e92db9d309acd9367af4cf08218aba4c
@@@ -138,6 -138,7 +138,7 @@@ module_param(tsc_tolerance_ppm, uint, S
  /* lapic timer advance (tscdeadline mode only) in nanoseconds */
  unsigned int __read_mostly lapic_timer_advance_ns = 0;
  module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR);
+ EXPORT_SYMBOL_GPL(lapic_timer_advance_ns);
  
  static bool __read_mostly vector_hashing = true;
  module_param(vector_hashing, bool, S_IRUGO);
@@@ -318,23 -319,27 +319,27 @@@ u64 kvm_get_apic_base(struct kvm_vcpu *
  }
  EXPORT_SYMBOL_GPL(kvm_get_apic_base);
  
+ enum lapic_mode kvm_get_apic_mode(struct kvm_vcpu *vcpu)
+ {
+       return kvm_apic_mode(kvm_get_apic_base(vcpu));
+ }
+ EXPORT_SYMBOL_GPL(kvm_get_apic_mode);
  int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
  {
-       u64 old_state = vcpu->arch.apic_base &
-               (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE);
-       u64 new_state = msr_info->data &
-               (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE);
+       enum lapic_mode old_mode = kvm_get_apic_mode(vcpu);
+       enum lapic_mode new_mode = kvm_apic_mode(msr_info->data);
        u64 reserved_bits = ((~0ULL) << cpuid_maxphyaddr(vcpu)) | 0x2ff |
                (guest_cpuid_has(vcpu, X86_FEATURE_X2APIC) ? 0 : X2APIC_ENABLE);
  
-       if ((msr_info->data & reserved_bits) || new_state == X2APIC_ENABLE)
-               return 1;
-       if (!msr_info->host_initiated &&
-           ((new_state == MSR_IA32_APICBASE_ENABLE &&
-             old_state == (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE)) ||
-            (new_state == (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE) &&
-             old_state == 0)))
+       if ((msr_info->data & reserved_bits) != 0 || new_mode == LAPIC_MODE_INVALID)
                return 1;
+       if (!msr_info->host_initiated) {
+               if (old_mode == LAPIC_MODE_X2APIC && new_mode == LAPIC_MODE_XAPIC)
+                       return 1;
+               if (old_mode == LAPIC_MODE_DISABLED && new_mode == LAPIC_MODE_X2APIC)
+                       return 1;
+       }
  
        kvm_lapic_set_base(vcpu, msr_info->data);
        return 0;
@@@ -856,7 -861,7 +861,7 @@@ int kvm_set_cr3(struct kvm_vcpu *vcpu, 
        }
  
        if (is_long_mode(vcpu) &&
-           (cr3 & rsvd_bits(cpuid_maxphyaddr(vcpu), 62)))
+           (cr3 & rsvd_bits(cpuid_maxphyaddr(vcpu), 63)))
                return 1;
        else if (is_pae(vcpu) && is_paging(vcpu) &&
                   !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
@@@ -1061,7 -1066,6 +1066,7 @@@ static u32 emulated_msrs[] = 
        MSR_SMI_COUNT,
        MSR_PLATFORM_INFO,
        MSR_MISC_FEATURES_ENABLES,
 +      MSR_AMD64_VIRT_SPEC_CTRL,
  };
  
  static unsigned num_emulated_msrs;
@@@ -1761,7 -1765,7 +1766,7 @@@ static int do_monotonic_boot(s64 *t, u6
        return mode;
  }
  
- static int do_realtime(struct timespec *ts, u64 *tsc_timestamp)
+ static int do_realtime(struct timespec64 *ts, u64 *tsc_timestamp)
  {
        struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
        unsigned long seq;
@@@ -1794,7 -1798,7 +1799,7 @@@ static bool kvm_get_time_and_clockread(
  }
  
  /* returns true if host is using TSC based clocksource */
- static bool kvm_get_walltime_and_clockread(struct timespec *ts,
+ static bool kvm_get_walltime_and_clockread(struct timespec64 *ts,
                                           u64 *tsc_timestamp)
  {
        /* checked again under seqlock below */
@@@ -2868,6 -2872,7 +2873,7 @@@ int kvm_vm_ioctl_check_extension(struc
        case KVM_CAP_HYPERV_SYNIC2:
        case KVM_CAP_HYPERV_VP_INDEX:
        case KVM_CAP_HYPERV_EVENTFD:
+       case KVM_CAP_HYPERV_TLBFLUSH:
        case KVM_CAP_PCI_SEGMENT:
        case KVM_CAP_DEBUGREGS:
        case KVM_CAP_X86_ROBUST_SINGLESTEP:
                r = KVM_CLOCK_TSC_STABLE;
                break;
        case KVM_CAP_X86_DISABLE_EXITS:
-               r |=  KVM_X86_DISABLE_EXITS_HTL | KVM_X86_DISABLE_EXITS_PAUSE;
+               r |=  KVM_X86_DISABLE_EXITS_HLT | KVM_X86_DISABLE_EXITS_PAUSE;
                if(kvm_can_mwait_in_guest())
                        r |= KVM_X86_DISABLE_EXITS_MWAIT;
                break;
                 * fringe case that is not enabled except via specific settings
                 * of the module parameters.
                 */
 -              r = kvm_x86_ops->cpu_has_high_real_mode_segbase();
 +              r = kvm_x86_ops->has_emulated_msr(MSR_IA32_SMBASE);
                break;
        case KVM_CAP_VAPIC:
                r = !kvm_x86_ops->cpu_has_accelerated_tpr();
@@@ -3962,7 -3967,7 +3968,7 @@@ out_nofree
        return r;
  }
  
int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
  {
        return VM_FAULT_SIGBUS;
  }
@@@ -4248,7 -4253,7 +4254,7 @@@ split_irqchip_unlock
                if ((cap->args[0] & KVM_X86_DISABLE_EXITS_MWAIT) &&
                        kvm_can_mwait_in_guest())
                        kvm->arch.mwait_in_guest = true;
-               if (cap->args[0] & KVM_X86_DISABLE_EXITS_HTL)
+               if (cap->args[0] & KVM_X86_DISABLE_EXITS_HLT)
                        kvm->arch.hlt_in_guest = true;
                if (cap->args[0] & KVM_X86_DISABLE_EXITS_PAUSE)
                        kvm->arch.pause_in_guest = true;
@@@ -4607,8 -4612,14 +4613,8 @@@ static void kvm_init_msr_list(void
        num_msrs_to_save = j;
  
        for (i = j = 0; i < ARRAY_SIZE(emulated_msrs); i++) {
 -              switch (emulated_msrs[i]) {
 -              case MSR_IA32_SMBASE:
 -                      if (!kvm_x86_ops->cpu_has_high_real_mode_segbase())
 -                              continue;
 -                      break;
 -              default:
 -                      break;
 -              }
 +              if (!kvm_x86_ops->has_emulated_msr(emulated_msrs[i]))
 +                      continue;
  
                if (j < i)
                        emulated_msrs[j] = emulated_msrs[i];
@@@ -4787,11 -4798,10 +4793,10 @@@ static int kvm_fetch_guest_virt(struct 
        return X86EMUL_CONTINUE;
  }
  
- int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt,
+ int kvm_read_guest_virt(struct kvm_vcpu *vcpu,
                               gva_t addr, void *val, unsigned int bytes,
                               struct x86_exception *exception)
  {
-       struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
        u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
  
        return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access,
  }
  EXPORT_SYMBOL_GPL(kvm_read_guest_virt);
  
- static int kvm_read_guest_virt_system(struct x86_emulate_ctxt *ctxt,
-                                     gva_t addr, void *val, unsigned int bytes,
-                                     struct x86_exception *exception)
+ static int emulator_read_std(struct x86_emulate_ctxt *ctxt,
+                            gva_t addr, void *val, unsigned int bytes,
+                            struct x86_exception *exception, bool system)
  {
        struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
-       return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, exception);
+       u32 access = 0;
+       if (!system && kvm_x86_ops->get_cpl(vcpu) == 3)
+               access |= PFERR_USER_MASK;
+       return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, exception);
  }
  
  static int kvm_read_guest_phys_system(struct x86_emulate_ctxt *ctxt,
        return r < 0 ? X86EMUL_IO_NEEDED : X86EMUL_CONTINUE;
  }
  
- int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
-                                      gva_t addr, void *val,
-                                      unsigned int bytes,
-                                      struct x86_exception *exception)
+ static int kvm_write_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
+                                     struct kvm_vcpu *vcpu, u32 access,
+                                     struct x86_exception *exception)
  {
-       struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
        void *data = val;
        int r = X86EMUL_CONTINUE;
  
        while (bytes) {
                gpa_t gpa =  vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr,
-                                                            PFERR_WRITE_MASK,
+                                                            access,
                                                             exception);
                unsigned offset = addr & (PAGE_SIZE-1);
                unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset);
  out:
        return r;
  }
+ static int emulator_write_std(struct x86_emulate_ctxt *ctxt, gva_t addr, void *val,
+                             unsigned int bytes, struct x86_exception *exception,
+                             bool system)
+ {
+       struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
+       u32 access = PFERR_WRITE_MASK;
+       if (!system && kvm_x86_ops->get_cpl(vcpu) == 3)
+               access |= PFERR_USER_MASK;
+       return kvm_write_guest_virt_helper(addr, val, bytes, vcpu,
+                                          access, exception);
+ }
+ int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu, gva_t addr, void *val,
+                               unsigned int bytes, struct x86_exception *exception)
+ {
+       return kvm_write_guest_virt_helper(addr, val, bytes, vcpu,
+                                          PFERR_WRITE_MASK, exception);
+ }
  EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system);
  
  int handle_ud(struct kvm_vcpu *vcpu)
        struct x86_exception e;
  
        if (force_emulation_prefix &&
-           kvm_read_guest_virt(&vcpu->arch.emulate_ctxt,
-                               kvm_get_linear_rip(vcpu), sig, sizeof(sig), &e) == 0 &&
+           kvm_read_guest_virt(vcpu, kvm_get_linear_rip(vcpu),
+                               sig, sizeof(sig), &e) == 0 &&
            memcmp(sig, "\xf\xbkvm", sizeof(sig)) == 0) {
                kvm_rip_write(vcpu, kvm_rip_read(vcpu) + sizeof(sig));
                emul_type = 0;
@@@ -5600,8 -5634,8 +5629,8 @@@ static int emulator_pre_leave_smm(struc
  static const struct x86_emulate_ops emulate_ops = {
        .read_gpr            = emulator_read_gpr,
        .write_gpr           = emulator_write_gpr,
-       .read_std            = kvm_read_guest_virt_system,
-       .write_std           = kvm_write_guest_virt_system,
+       .read_std            = emulator_read_std,
+       .write_std           = emulator_write_std,
        .read_phys           = kvm_read_guest_phys_system,
        .fetch               = kvm_fetch_guest_virt,
        .read_emulated       = emulator_read_emulated,
@@@ -6617,7 -6651,7 +6646,7 @@@ static int kvm_pv_clock_pairing(struct 
                                unsigned long clock_type)
  {
        struct kvm_clock_pairing clock_pairing;
-       struct timespec ts;
+       struct timespec64 ts;
        u64 cycle;
        int ret;
  
@@@ -6671,8 -6705,11 +6700,8 @@@ int kvm_emulate_hypercall(struct kvm_vc
        unsigned long nr, a0, a1, a2, a3, ret;
        int op_64_bit;
  
 -      if (kvm_hv_hypercall_enabled(vcpu->kvm)) {
 -              if (!kvm_hv_hypercall(vcpu))
 -                      return 0;
 -              goto out;
 -      }
 +      if (kvm_hv_hypercall_enabled(vcpu->kvm))
 +              return kvm_hv_hypercall(vcpu);
  
        nr = kvm_register_read(vcpu, VCPU_REGS_RAX);
        a0 = kvm_register_read(vcpu, VCPU_REGS_RBX);
  
        if (kvm_x86_ops->get_cpl(vcpu) != 0) {
                ret = -KVM_EPERM;
 -              goto out_error;
 +              goto out;
        }
  
        switch (nr) {
                ret = -KVM_ENOSYS;
                break;
        }
 -out_error:
 +out:
        if (!op_64_bit)
                ret = (u32)ret;
        kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
  
 -out:
        ++vcpu->stat.hypercalls;
        return kvm_skip_emulated_instruction(vcpu);
  }
@@@ -7976,7 -8014,6 +8005,7 @@@ static int __set_sregs(struct kvm_vcpu 
  {
        struct msr_data apic_base_msr;
        int mmu_reset_needed = 0;
 +      int cpuid_update_needed = 0;
        int pending_vec, max_bits, idx;
        struct desc_ptr dt;
        int ret = -EINVAL;
        vcpu->arch.cr0 = sregs->cr0;
  
        mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4;
 +      cpuid_update_needed |= ((kvm_read_cr4(vcpu) ^ sregs->cr4) &
 +                              (X86_CR4_OSXSAVE | X86_CR4_PKE));
        kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
 -      if (sregs->cr4 & (X86_CR4_OSXSAVE | X86_CR4_PKE))
 +      if (cpuid_update_needed)
                kvm_update_cpuid(vcpu);
  
        idx = srcu_read_lock(&vcpu->kvm->srcu);
diff --combined include/linux/sched.h
index 3aa4fcb74e761dfda361f17d09593ecd9c361646,ff289ae6b7870f2bc9cc4acd61e6768acfc1604e..16e4d984fe51948d092cb093538264a10b2f4135
@@@ -27,7 -27,6 +27,7 @@@
  #include <linux/signal_types.h>
  #include <linux/mm_types_task.h>
  #include <linux/task_io_accounting.h>
 +#include <linux/rseq.h>
  
  /* task_struct member predeclarations (sorted alphabetically): */
  struct audit_context;
@@@ -1048,17 -1047,6 +1048,17 @@@ struct task_struct 
        unsigned long                   numa_pages_migrated;
  #endif /* CONFIG_NUMA_BALANCING */
  
 +#ifdef CONFIG_RSEQ
 +      struct rseq __user *rseq;
 +      u32 rseq_len;
 +      u32 rseq_sig;
 +      /*
 +       * RmW on rseq_event_mask must be performed atomically
 +       * with respect to preemption.
 +       */
 +      unsigned long rseq_event_mask;
 +#endif
 +
        struct tlbflush_unmap_batch     tlb_ubc;
  
        struct rcu_head                 rcu;
@@@ -1445,8 -1433,7 +1445,8 @@@ static inline bool is_percpu_thread(voi
  #define PFA_NO_NEW_PRIVS              0       /* May not gain new privileges. */
  #define PFA_SPREAD_PAGE                       1       /* Spread page cache over cpuset */
  #define PFA_SPREAD_SLAB                       2       /* Spread some slab caches over cpuset */
 -
 +#define PFA_SPEC_SSB_DISABLE          3       /* Speculative Store Bypass disabled */
 +#define PFA_SPEC_SSB_FORCE_DISABLE    4       /* Speculative Store Bypass force disabled*/
  
  #define TASK_PFA_TEST(name, func)                                     \
        static inline bool task_##func(struct task_struct *p)           \
@@@ -1471,13 -1458,6 +1471,13 @@@ TASK_PFA_TEST(SPREAD_SLAB, spread_slab
  TASK_PFA_SET(SPREAD_SLAB, spread_slab)
  TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab)
  
 +TASK_PFA_TEST(SPEC_SSB_DISABLE, spec_ssb_disable)
 +TASK_PFA_SET(SPEC_SSB_DISABLE, spec_ssb_disable)
 +TASK_PFA_CLEAR(SPEC_SSB_DISABLE, spec_ssb_disable)
 +
 +TASK_PFA_TEST(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)
 +TASK_PFA_SET(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)
 +
  static inline void
  current_restore_flags(unsigned long orig_flags, unsigned long flags)
  {
@@@ -1524,7 -1504,6 +1524,7 @@@ static inline int task_nice(const struc
  extern int can_nice(const struct task_struct *p, const int nice);
  extern int task_curr(const struct task_struct *p);
  extern int idle_cpu(int cpu);
 +extern int available_idle_cpu(int cpu);
  extern int sched_setscheduler(struct task_struct *, int, const struct sched_param *);
  extern int sched_setscheduler_nocheck(struct task_struct *, int, const struct sched_param *);
  extern int sched_setattr(struct task_struct *, const struct sched_attr *);
@@@ -1639,6 -1618,12 +1639,12 @@@ static inline void clear_tsk_thread_fla
        clear_ti_thread_flag(task_thread_info(tsk), flag);
  }
  
+ static inline void update_tsk_thread_flag(struct task_struct *tsk, int flag,
+                                         bool value)
+ {
+       update_ti_thread_flag(task_thread_info(tsk), flag, value);
+ }
  static inline int test_and_set_tsk_thread_flag(struct task_struct *tsk, int flag)
  {
        return test_and_set_ti_thread_flag(task_thread_info(tsk), flag);
@@@ -1674,6 -1659,7 +1680,6 @@@ static inline int test_tsk_need_resched
   * explicit rescheduling in places that are safe. The return
   * value indicates whether a reschedule was done in fact.
   * cond_resched_lock() will drop the spinlock before scheduling,
 - * cond_resched_softirq() will enable bhs before scheduling.
   */
  #ifndef CONFIG_PREEMPT
  extern int _cond_resched(void);
@@@ -1693,6 -1679,13 +1699,6 @@@ extern int __cond_resched_lock(spinlock
        __cond_resched_lock(lock);                              \
  })
  
 -extern int __cond_resched_softirq(void);
 -
 -#define cond_resched_softirq() ({                                     \
 -      ___might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET);     \
 -      __cond_resched_softirq();                                       \
 -})
 -
  static inline void cond_resched_rcu(void)
  {
  #if defined(CONFIG_DEBUG_ATOMIC_SLEEP) || !defined(CONFIG_PREEMPT_RCU)
@@@ -1769,126 -1762,4 +1775,126 @@@ extern long sched_getaffinity(pid_t pid
  #define TASK_SIZE_OF(tsk)     TASK_SIZE
  #endif
  
 +#ifdef CONFIG_RSEQ
 +
 +/*
 + * Map the event mask on the user-space ABI enum rseq_cs_flags
 + * for direct mask checks.
 + */
 +enum rseq_event_mask_bits {
 +      RSEQ_EVENT_PREEMPT_BIT  = RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT,
 +      RSEQ_EVENT_SIGNAL_BIT   = RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT,
 +      RSEQ_EVENT_MIGRATE_BIT  = RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT,
 +};
 +
 +enum rseq_event_mask {
 +      RSEQ_EVENT_PREEMPT      = (1U << RSEQ_EVENT_PREEMPT_BIT),
 +      RSEQ_EVENT_SIGNAL       = (1U << RSEQ_EVENT_SIGNAL_BIT),
 +      RSEQ_EVENT_MIGRATE      = (1U << RSEQ_EVENT_MIGRATE_BIT),
 +};
 +
 +static inline void rseq_set_notify_resume(struct task_struct *t)
 +{
 +      if (t->rseq)
 +              set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
 +}
 +
 +void __rseq_handle_notify_resume(struct pt_regs *regs);
 +
 +static inline void rseq_handle_notify_resume(struct pt_regs *regs)
 +{
 +      if (current->rseq)
 +              __rseq_handle_notify_resume(regs);
 +}
 +
 +static inline void rseq_signal_deliver(struct pt_regs *regs)
 +{
 +      preempt_disable();
 +      __set_bit(RSEQ_EVENT_SIGNAL_BIT, &current->rseq_event_mask);
 +      preempt_enable();
 +      rseq_handle_notify_resume(regs);
 +}
 +
 +/* rseq_preempt() requires preemption to be disabled. */
 +static inline void rseq_preempt(struct task_struct *t)
 +{
 +      __set_bit(RSEQ_EVENT_PREEMPT_BIT, &t->rseq_event_mask);
 +      rseq_set_notify_resume(t);
 +}
 +
 +/* rseq_migrate() requires preemption to be disabled. */
 +static inline void rseq_migrate(struct task_struct *t)
 +{
 +      __set_bit(RSEQ_EVENT_MIGRATE_BIT, &t->rseq_event_mask);
 +      rseq_set_notify_resume(t);
 +}
 +
 +/*
 + * If parent process has a registered restartable sequences area, the
 + * child inherits. Only applies when forking a process, not a thread. In
 + * case a parent fork() in the middle of a restartable sequence, set the
 + * resume notifier to force the child to retry.
 + */
 +static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags)
 +{
 +      if (clone_flags & CLONE_THREAD) {
 +              t->rseq = NULL;
 +              t->rseq_len = 0;
 +              t->rseq_sig = 0;
 +              t->rseq_event_mask = 0;
 +      } else {
 +              t->rseq = current->rseq;
 +              t->rseq_len = current->rseq_len;
 +              t->rseq_sig = current->rseq_sig;
 +              t->rseq_event_mask = current->rseq_event_mask;
 +              rseq_preempt(t);
 +      }
 +}
 +
 +static inline void rseq_execve(struct task_struct *t)
 +{
 +      t->rseq = NULL;
 +      t->rseq_len = 0;
 +      t->rseq_sig = 0;
 +      t->rseq_event_mask = 0;
 +}
 +
 +#else
 +
 +static inline void rseq_set_notify_resume(struct task_struct *t)
 +{
 +}
 +static inline void rseq_handle_notify_resume(struct pt_regs *regs)
 +{
 +}
 +static inline void rseq_signal_deliver(struct pt_regs *regs)
 +{
 +}
 +static inline void rseq_preempt(struct task_struct *t)
 +{
 +}
 +static inline void rseq_migrate(struct task_struct *t)
 +{
 +}
 +static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags)
 +{
 +}
 +static inline void rseq_execve(struct task_struct *t)
 +{
 +}
 +
 +#endif
 +
 +#ifdef CONFIG_DEBUG_RSEQ
 +
 +void rseq_syscall(struct pt_regs *regs);
 +
 +#else
 +
 +static inline void rseq_syscall(struct pt_regs *regs)
 +{
 +}
 +
 +#endif
 +
  #endif
diff --combined virt/kvm/arm/arm.c
index 2d9b4795edb2beecd04588c791735d155cc05741,72be779cffe20ad8b09bd356bd42b65a39acaea2..04e554cae3a2066e5eb6e4d2544efc84a62d88de
@@@ -16,6 -16,7 +16,7 @@@
   * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
   */
  
+ #include <linux/bug.h>
  #include <linux/cpu_pm.h>
  #include <linux/errno.h>
  #include <linux/err.h>
@@@ -41,6 -42,7 +42,7 @@@
  #include <asm/mman.h>
  #include <asm/tlbflush.h>
  #include <asm/cacheflush.h>
+ #include <asm/cpufeature.h>
  #include <asm/virt.h>
  #include <asm/kvm_arm.h>
  #include <asm/kvm_asm.h>
@@@ -163,7 -165,7 +165,7 @@@ int kvm_arch_create_vcpu_debugfs(struc
        return 0;
  }
  
int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
  {
        return VM_FAULT_SIGBUS;
  }
@@@ -249,6 -251,21 +251,21 @@@ long kvm_arch_dev_ioctl(struct file *fi
        return -EINVAL;
  }
  
+ struct kvm *kvm_arch_alloc_vm(void)
+ {
+       if (!has_vhe())
+               return kzalloc(sizeof(struct kvm), GFP_KERNEL);
+       return vzalloc(sizeof(struct kvm));
+ }
+ void kvm_arch_free_vm(struct kvm *kvm)
+ {
+       if (!has_vhe())
+               kfree(kvm);
+       else
+               vfree(kvm);
+ }
  
  struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
  {
@@@ -290,7 -307,6 +307,6 @@@ out
  
  void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
  {
-       kvm_vgic_vcpu_early_init(vcpu);
  }
  
  void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
@@@ -363,10 -379,12 +379,12 @@@ void kvm_arch_vcpu_load(struct kvm_vcp
        kvm_vgic_load(vcpu);
        kvm_timer_vcpu_load(vcpu);
        kvm_vcpu_load_sysregs(vcpu);
+       kvm_arch_vcpu_load_fp(vcpu);
  }
  
  void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
  {
+       kvm_arch_vcpu_put_fp(vcpu);
        kvm_vcpu_put_sysregs(vcpu);
        kvm_timer_vcpu_put(vcpu);
        kvm_vgic_put(vcpu);
@@@ -678,9 -696,6 +696,6 @@@ int kvm_arch_vcpu_ioctl_run(struct kvm_
                 */
                preempt_disable();
  
-               /* Flush FP/SIMD state that can't survive guest entry/exit */
-               kvm_fpsimd_flush_cpu_state();
                kvm_pmu_flush_hwstate(vcpu);
  
                local_irq_disable();
                if (static_branch_unlikely(&userspace_irqchip_in_use))
                        kvm_timer_sync_hwstate(vcpu);
  
+               kvm_arch_vcpu_ctxsync_fp(vcpu);
                /*
                 * We may have taken a host interrupt in HYP mode (ie
                 * while executing the guest). This interrupt is still
@@@ -1490,10 -1507,6 +1507,10 @@@ static int init_hyp_mode(void
                }
        }
  
 +      err = hyp_map_aux_data();
 +      if (err)
 +              kvm_err("Cannot map host auxilary data: %d\n", err);
 +
        return 0;
  
  out_err:
@@@ -1574,6 -1587,11 +1591,11 @@@ int kvm_arch_init(void *opaque
                return -ENODEV;
        }
  
+       if (!kvm_arch_check_sve_has_vhe()) {
+               kvm_pr_unimpl("SVE system without VHE unsupported.  Broken cpu?");
+               return -ENODEV;
+       }
        for_each_online_cpu(cpu) {
                smp_call_function_single(cpu, check_kvm_target_cpu, &ret, 1);
                if (ret < 0) {
This page took 0.301109 seconds and 4 git commands to generate.