Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

author Linus Torvalds <[email protected]>

Tue, 12 Jun 2018 18:34:04 +0000 (11:34 -0700)

committer Linus Torvalds <[email protected]>

Tue, 12 Jun 2018 18:34:04 +0000 (11:34 -0700)
author Linus Torvalds <[email protected]>
Tue, 12 Jun 2018 18:34:04 +0000 (11:34 -0700)
committer Linus Torvalds <[email protected]>
Tue, 12 Jun 2018 18:34:04 +0000 (11:34 -0700)
diff --combined arch/arm/include/asm/kvm_host.h

index 2d75e77bf7bb341d6e1d39a76351e0eee9390c39,4b12f32f540c2c8d7e88adfa567cf6206927ae64..1f1fe4109b026690ab80ca0d3e31355f77e78f04
--- 1/arch/arm/include/asm/kvm_host.h
--- 2/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@@ -21,7 -21,6 +21,7 @@@
   
   #include <linux/types.h>
   #include <linux/kvm_types.h>
+ +#include <asm/cputype.h>
   #include <asm/kvm.h>
   #include <asm/kvm_asm.h>
   #include <asm/kvm_mmio.h>
@@@ -281,6 -280,7 +281,7 @@@ void kvm_mmu_wp_memory_region(struct kv
   
   struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
   
+ static inline bool kvm_arch_check_sve_has_vhe(void) { return true; }
   static inline void kvm_arch_hardware_unsetup(void) {}
   static inline void kvm_arch_sync_events(struct kvm *kvm) {}
   static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
@@@ -304,40 -304,28 +305,49 @@@ int kvm_arm_vcpu_arch_get_attr(struct k
   int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
                                struct kvm_device_attr *attr);
   
- /* All host FP/SIMD state is restored on guest exit, so nothing to save: */
- static inline void kvm_fpsimd_flush_cpu_state(void) {}
+ /*
+  * VFP/NEON switching is all done by the hyp switch code, so no need to
+  * coordinate with host context handling for this state:
+  */
+ static inline void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) {}
+ static inline void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu) {}
+ static inline void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) {}
   
   static inline void kvm_arm_vhe_guest_enter(void) {}
   static inline void kvm_arm_vhe_guest_exit(void) {}
   
   static inline bool kvm_arm_harden_branch_predictor(void)
+ +{
+ +      switch(read_cpuid_part()) {
+ +#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
+ +      case ARM_CPU_PART_BRAHMA_B15:
+ +      case ARM_CPU_PART_CORTEX_A12:
+ +      case ARM_CPU_PART_CORTEX_A15:
+ +      case ARM_CPU_PART_CORTEX_A17:
+ +              return true;
+ +#endif
+ +      default:
+ +              return false;
+ +      }
+ +}
+ +
+ +#define KVM_SSBD_UNKNOWN              -1
+ +#define KVM_SSBD_FORCE_DISABLE                0
+ +#define KVM_SSBD_KERNEL               1
+ +#define KVM_SSBD_FORCE_ENABLE         2
+ +#define KVM_SSBD_MITIGATED            3
+ +
+ +static inline int kvm_arm_have_ssbd(void)
   {
         /* No way to detect it yet, pretend it is not there. */
- -      return false;
+ +      return KVM_SSBD_UNKNOWN;
   }
   
   static inline void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu) {}
   static inline void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu) {}
   
+ #define __KVM_HAVE_ARCH_VM_ALLOC
+ struct kvm *kvm_arch_alloc_vm(void);
+ void kvm_arch_free_vm(struct kvm *kvm);
+ 
   #endif /* __ARM_KVM_HOST_H__ */
diff --combined arch/arm64/Kconfig

index 9795b59aa28a1ecc01979f819d68beaa21192311,b0d3820081c8bbb68445dce1c3f01db14f2c69e5..9fd4a8ccce0760cd2c7f026b1f8394af5b313081
--- 1/arch/arm64/Kconfig
--- 2/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@@ -7,19 -7,16 +7,19 @@@ config ARM6
         select ACPI_REDUCED_HARDWARE_ONLY if ACPI
         select ACPI_MCFG if ACPI
         select ACPI_SPCR_TABLE if ACPI
+ +      select ACPI_PPTT if ACPI
         select ARCH_CLOCKSOURCE_DATA
         select ARCH_HAS_DEBUG_VIRTUAL
         select ARCH_HAS_DEVMEM_IS_ALLOWED
         select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
         select ARCH_HAS_ELF_RANDOMIZE
+ +      select ARCH_HAS_FAST_MULTIPLIER
         select ARCH_HAS_FORTIFY_SOURCE
         select ARCH_HAS_GCOV_PROFILE_ALL
         select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA
         select ARCH_HAS_KCOV
         select ARCH_HAS_MEMBARRIER_SYNC_CORE
+ +      select ARCH_HAS_PTE_SPECIAL
         select ARCH_HAS_SET_MEMORY
         select ARCH_HAS_SG_CHAIN
         select ARCH_HAS_STRICT_KERNEL_RWX
@@@ -108,6 -105,7 +108,6 @@@
         select HAVE_CONTEXT_TRACKING
         select HAVE_DEBUG_BUGVERBOSE
         select HAVE_DEBUG_KMEMLEAK
- -      select HAVE_DMA_API_DEBUG
         select HAVE_DMA_CONTIGUOUS
         select HAVE_DYNAMIC_FTRACE
         select HAVE_EFFICIENT_UNALIGNED_ACCESS
@@@ -135,8 -133,6 +135,8 @@@
         select IRQ_FORCED_THREADING
         select MODULES_USE_ELF_RELA
         select MULTI_IRQ_HANDLER
+ +      select NEED_DMA_MAP_STATE
+ +      select NEED_SG_DMA_LENGTH
         select NO_BOOTMEM
         select OF
         select OF_EARLY_FLATTREE
@@@ -146,7 -142,6 +146,7 @@@
         select POWER_SUPPLY
         select REFCOUNT_FULL
         select SPARSE_IRQ
+ +      select SWIOTLB
         select SYSCTL_EXCEPTION_TRACE
         select THREAD_INFO_IN_TASK
         help
@@@ -155,6 -150,9 +155,6 @@@
   config 64BIT
         def_bool y
   
- -config ARCH_PHYS_ADDR_T_64BIT
- -      def_bool y
- -
   config MMU
         def_bool y
   
@@@ -239,9 -237,24 +239,9 @@@ config ZONE_DMA3
   config HAVE_GENERIC_GUP
         def_bool y
   
- -config ARCH_DMA_ADDR_T_64BIT
- -      def_bool y
- -
- -config NEED_DMA_MAP_STATE
- -      def_bool y
- -
- -config NEED_SG_DMA_LENGTH
- -      def_bool y
- -
   config SMP
         def_bool y
   
- -config SWIOTLB
- -      def_bool y
- -
- -config IOMMU_HELPER
- -      def_bool SWIOTLB
- -
   config KERNEL_MODE_NEON
         def_bool y
   
@@@ -925,15 -938,6 +925,15 @@@ config HARDEN_EL2_VECTOR
   
           If unsure, say Y.
   
+ +config ARM64_SSBD
+ +      bool "Speculative Store Bypass Disable" if EXPERT
+ +      default y
+ +      help
+ +        This enables mitigation of the bypassing of previous stores
+ +        by speculative loads.
+ +
+ +        If unsure, say Y.
+ +
   menuconfig ARMV8_DEPRECATED
         bool "Emulate deprecated/obsolete ARMv8 instructions"
         depends on COMPAT
@@@ -1045,7 -1049,6 +1045,7 @@@ config ARM64_PA
   
   config ARM64_LSE_ATOMICS
         bool "Atomic instructions"
+ +      default y
         help
           As part of the Large System Extensions, ARMv8.1 introduces new
           atomic instructions that are designed specifically to scale in
@@@ -1054,8 -1057,7 +1054,8 @@@
           Say Y here to make use of these instructions for the in-kernel
           atomic routines. This incurs a small overhead on CPUs that do
           not support these instructions and requires the kernel to be
- -        built with binutils >= 2.25.
+ +        built with binutils >= 2.25 in order for the new instructions
+ +        to be used.
   
   config ARM64_VHE
         bool "Enable support for Virtualization Host Extensions (VHE)"
@@@ -1128,6 -1130,7 +1128,7 @@@ endmen
   config ARM64_SVE
         bool "ARM Scalable Vector Extension support"
         default y
+       depends on !KVM || ARM64_VHE
         help
           The Scalable Vector Extension (SVE) is an extension to the AArch64
           execution state which complements and extends the SIMD functionality
@@@ -1153,6 -1156,12 +1154,12 @@@
           booting the kernel.  If unsure and you are not observing these
           symptoms, you should assume that it is safe to say Y.
   
+         CPUs that support SVE are architecturally required to support the
+         Virtualization Host Extensions (VHE), so the kernel makes no
+         provision for supporting SVE alongside KVM without VHE enabled.
+         Thus, you will need to enable CONFIG_ARM64_VHE if you want to support
+         KVM in the same kernel image.
+ 
   config ARM64_MODULE_PLTS
         bool
         select HAVE_MOD_ARCH_SPECIFIC
diff --combined arch/arm64/include/asm/cpufeature.h

index 55bc1f073bfbe4b8905cb43da8cb21a509fad686,0a6b7133195e1f0f3a07adec705929334f29e46f..1717ba1db35ddb935720c20ec46c318d59ca9b83
--- 1/arch/arm64/include/asm/cpufeature.h
--- 2/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@@ -11,9 -11,7 +11,7 @@@
   
   #include <asm/cpucaps.h>
   #include <asm/cputype.h>
- #include <asm/fpsimd.h>
   #include <asm/hwcap.h>
- #include <asm/sigcontext.h>
   #include <asm/sysreg.h>
   
   /*
@@@ -510,55 -508,6 +508,28 @@@ static inline bool system_supports_sve(
                 cpus_have_const_cap(ARM64_SVE);
   }
   
- /*
-  * Read the pseudo-ZCR used by cpufeatures to identify the supported SVE
-  * vector length.
-  *
-  * Use only if SVE is present.
-  * This function clobbers the SVE vector length.
-  */
- static inline u64 read_zcr_features(void)
- {
-       u64 zcr;
-       unsigned int vq_max;
- 
-       /*
-        * Set the maximum possible VL, and write zeroes to all other
-        * bits to see if they stick.
-        */
-       sve_kernel_enable(NULL);
-       write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL1);
- 
-       zcr = read_sysreg_s(SYS_ZCR_EL1);
-       zcr &= ~(u64)ZCR_ELx_LEN_MASK; /* find sticky 1s outside LEN field */
-       vq_max = sve_vq_from_vl(sve_get_vl());
-       zcr |= vq_max - 1; /* set LEN field to maximum effective value */
- 
-       return zcr;
- }
- 
+ +#define ARM64_SSBD_UNKNOWN            -1
+ +#define ARM64_SSBD_FORCE_DISABLE      0
+ +#define ARM64_SSBD_KERNEL             1
+ +#define ARM64_SSBD_FORCE_ENABLE               2
+ +#define ARM64_SSBD_MITIGATED          3
+ +
+ +static inline int arm64_get_ssbd_state(void)
+ +{
+ +#ifdef CONFIG_ARM64_SSBD
+ +      extern int ssbd_state;
+ +      return ssbd_state;
+ +#else
+ +      return ARM64_SSBD_UNKNOWN;
+ +#endif
+ +}
+ +
+ +#ifdef CONFIG_ARM64_SSBD
+ +void arm64_set_ssbd_mitigation(bool state);
+ +#else
+ +static inline void arm64_set_ssbd_mitigation(bool state) {}
+ +#endif
+ +
   #endif /* __ASSEMBLY__ */
   
   #endif
diff --combined arch/arm64/include/asm/kvm_asm.h

index 951b2076a5e222036d8fd5eb612ec0302ec0da57,821a7032c0f71d65d5875505cd303b36ef3b1482..102b5a5c47b6cb4a00040e7efb295d357b20c8a3
--- 1/arch/arm64/include/asm/kvm_asm.h
--- 2/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@@ -20,9 -20,6 +20,9 @@@
   
   #include <asm/virt.h>
   
+ +#define       VCPU_WORKAROUND_2_FLAG_SHIFT    0
+ +#define       VCPU_WORKAROUND_2_FLAG          (_AC(1, UL) << VCPU_WORKAROUND_2_FLAG_SHIFT)
+ +
   #define ARM_EXIT_WITH_SERROR_BIT  31
   #define ARM_EXCEPTION_CODE(x)   ((x) & ~(1U << ARM_EXIT_WITH_SERROR_BIT))
   #define ARM_SERROR_PENDING(x)   !!((x) & (1U << ARM_EXIT_WITH_SERROR_BIT))
@@@ -33,19 -30,19 +33,19 @@@
   /* The hyp-stub will return this for any kvm_call_hyp() call */
   #define ARM_EXCEPTION_HYP_GONE          HVC_STUB_ERR
   
- #define KVM_ARM64_DEBUG_DIRTY_SHIFT   0
- #define KVM_ARM64_DEBUG_DIRTY         (1 << KVM_ARM64_DEBUG_DIRTY_SHIFT)
+ #ifndef __ASSEMBLY__
+ 
+ #include <linux/mm.h>
   
   /* Translate a kernel address of @sym into its equivalent linear mapping */
   #define kvm_ksym_ref(sym)                                             \
         ({                                                              \
                 void *val = &sym;                                       \
                 if (!is_kernel_in_hyp_mode())                           \
-                       val = phys_to_virt((u64)&sym - kimage_voffset); \
+                       val = lm_alias(&sym);                           \
                 val;                                                    \
          })
   
- #ifndef __ASSEMBLY__
   struct kvm;
   struct kvm_vcpu;
   
@@@ -74,37 -71,14 +74,37 @@@ extern u32 __kvm_get_mdcr_el2(void)
   
   extern u32 __init_stage2_translation(void);
   
+ +/* Home-grown __this_cpu_{ptr,read} variants that always work at HYP */
+ +#define __hyp_this_cpu_ptr(sym)                                               \
+ +      ({                                                              \
+ +              void *__ptr = hyp_symbol_addr(sym);                     \
+ +              __ptr += read_sysreg(tpidr_el2);                        \
+ +              (typeof(&sym))__ptr;                                    \
+ +       })
+ +
+ +#define __hyp_this_cpu_read(sym)                                      \
+ +      ({                                                              \
+ +              *__hyp_this_cpu_ptr(sym);                               \
+ +       })
+ +
   #else /* __ASSEMBLY__ */
   
- -.macro get_host_ctxt reg, tmp
- -      adr_l   \reg, kvm_host_cpu_state
+ +.macro hyp_adr_this_cpu reg, sym, tmp
+ +      adr_l   \reg, \sym
         mrs     \tmp, tpidr_el2
         add     \reg, \reg, \tmp
   .endm
   
+ +.macro hyp_ldr_this_cpu reg, sym, tmp
+ +      adr_l   \reg, \sym
+ +      mrs     \tmp, tpidr_el2
+ +      ldr     \reg,  [\reg, \tmp]
+ +.endm
+ +
+ +.macro get_host_ctxt reg, tmp
+ +      hyp_adr_this_cpu \reg, kvm_host_cpu_state, \tmp
+ +.endm
+ +
   .macro get_vcpu_ptr vcpu, ctxt
         get_host_ctxt \ctxt, \vcpu
         ldr     \vcpu, [\ctxt, #HOST_CONTEXT_VCPU]
diff --combined arch/arm64/include/asm/kvm_host.h

index 95d8a0e15b5fbcede8b98a708e1ea6559eb7aada,c923d3e17ba3ce511239961749f64ad5b80cc1e5..fda9a8ca48bef71b0d4a76be1a45295af1211dd6
--- 1/arch/arm64/include/asm/kvm_host.h
--- 2/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@@ -30,6 -30,7 +30,7 @@@
   #include <asm/kvm.h>
   #include <asm/kvm_asm.h>
   #include <asm/kvm_mmio.h>
+ #include <asm/thread_info.h>
   
   #define __KVM_HAVE_ARCH_INTC_INITIALIZED
   
@@@ -216,11 -217,8 +217,11 @@@ struct kvm_vcpu_arch 
         /* Exception Information */
         struct kvm_vcpu_fault_info fault;
   
-       /* Guest debug state */
-       u64 debug_flags;
+ +      /* State of various workarounds, see kvm_asm.h for bit assignment */
+ +      u64 workaround_flags;
+ +
+       /* Miscellaneous vcpu state flags */
+       u64 flags;
   
         /*
          * We maintain more than a single set of debug registers to support
@@@ -241,6 -239,10 +242,10 @@@
   
         /* Pointer to host CPU context */
         kvm_cpu_context_t *host_cpu_context;
+ 
+       struct thread_info *host_thread_info;   /* hyp VA */
+       struct user_fpsimd_state *host_fpsimd_state;    /* hyp VA */
+ 
         struct {
                 /* {Break,watch}point registers */
                 struct kvm_guest_debug_arch regs;
@@@ -296,6 -298,12 +301,12 @@@
         bool sysregs_loaded_on_cpu;
   };
   
+ /* vcpu_arch flags field values: */
+ #define KVM_ARM64_DEBUG_DIRTY         (1 << 0)
+ #define KVM_ARM64_FP_ENABLED          (1 << 1) /* guest FP regs loaded */
+ #define KVM_ARM64_FP_HOST             (1 << 2) /* host FP regs loaded */
+ #define KVM_ARM64_HOST_SVE_IN_USE     (1 << 3) /* backup for host TIF_SVE */
+ 
   #define vcpu_gp_regs(v)               (&(v)->arch.ctxt.gp_regs)
   
   /*
@@@ -397,6 -405,19 +408,19 @@@ static inline void __cpu_init_hyp_mode(
         kvm_call_hyp(__kvm_set_tpidr_el2, tpidr_el2);
   }
   
+ static inline bool kvm_arch_check_sve_has_vhe(void)
+ {
+       /*
+        * The Arm architecture specifies that implementation of SVE
+        * requires VHE also to be implemented.  The KVM code for arm64
+        * relies on this when SVE is present:
+        */
+       if (system_supports_sve())
+               return has_vhe();
+       else
+               return true;
+ }
+ 
   static inline void kvm_arch_hardware_unsetup(void) {}
   static inline void kvm_arch_sync_events(struct kvm *kvm) {}
   static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
@@@ -423,15 -444,18 +447,18 @@@ static inline void __cpu_init_stage2(vo
                   "PARange is %d bits, unsupported configuration!", parange);
   }
   
- /*
-  * All host FP/SIMD state is restored on guest exit, so nothing needs
-  * doing here except in the SVE case:
- */
- static inline void kvm_fpsimd_flush_cpu_state(void)
+ /* Guest/host FPSIMD coordination helpers */
+ int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu);
+ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu);
+ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu);
+ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu);
+ 
+ #ifdef CONFIG_KVM /* Avoid conflicts with core headers if CONFIG_KVM=n */
+ static inline int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
   {
-       if (system_supports_sve())
-               sve_flush_cpu_state();
+       return kvm_arch_vcpu_run_map_fp(vcpu);
   }
+ #endif
   
   static inline void kvm_arm_vhe_guest_enter(void)
   {
@@@ -455,30 -479,11 +482,34 @@@ static inline bool kvm_arm_harden_branc
         return cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR);
   }
   
+ +#define KVM_SSBD_UNKNOWN              -1
+ +#define KVM_SSBD_FORCE_DISABLE                0
+ +#define KVM_SSBD_KERNEL               1
+ +#define KVM_SSBD_FORCE_ENABLE         2
+ +#define KVM_SSBD_MITIGATED            3
+ +
+ +static inline int kvm_arm_have_ssbd(void)
+ +{
+ +      switch (arm64_get_ssbd_state()) {
+ +      case ARM64_SSBD_FORCE_DISABLE:
+ +              return KVM_SSBD_FORCE_DISABLE;
+ +      case ARM64_SSBD_KERNEL:
+ +              return KVM_SSBD_KERNEL;
+ +      case ARM64_SSBD_FORCE_ENABLE:
+ +              return KVM_SSBD_FORCE_ENABLE;
+ +      case ARM64_SSBD_MITIGATED:
+ +              return KVM_SSBD_MITIGATED;
+ +      case ARM64_SSBD_UNKNOWN:
+ +      default:
+ +              return KVM_SSBD_UNKNOWN;
+ +      }
+ +}
+ +
   void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu);
   void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu);
   
+ #define __KVM_HAVE_ARCH_VM_ALLOC
+ struct kvm *kvm_arch_alloc_vm(void);
+ void kvm_arch_free_vm(struct kvm *kvm);
+ 
   #endif /* __ARM64_KVM_HOST_H__ */
diff --combined arch/arm64/include/asm/processor.h

index 65ab83e8926e794d04dbeaea14b3ba9dff9ad73b,c99e657fdd57ba7128d08e11d6bcad0fe3597399..a73ae1e492007e53ffdf7d1c94d15cea33ea25ff
--- 1/arch/arm64/include/asm/processor.h
--- 2/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@@ -35,8 -35,6 +35,8 @@@
   #ifdef __KERNEL__
   
   #include <linux/build_bug.h>
+ +#include <linux/cache.h>
+ +#include <linux/init.h>
   #include <linux/stddef.h>
   #include <linux/string.h>
   
@@@ -158,7 -156,9 +158,9 @@@ static inline void arch_thread_struct_w
   /* Sync TPIDR_EL0 back to thread_struct for current */
   void tls_preserve_current_state(void);
   
- #define INIT_THREAD  {        }
+ #define INIT_THREAD {                         \
+       .fpsimd_cpu = NR_CPUS,                  \
+ }
   
   static inline void start_thread_common(struct pt_regs *regs, unsigned long pc)
   {
@@@ -246,9 -246,17 +248,20 @@@ void cpu_enable_pan(const struct arm64_
   void cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused);
   void cpu_clear_disr(const struct arm64_cpu_capabilities *__unused);
   
+ +extern unsigned long __ro_after_init signal_minsigstksz; /* sigframe size */
+ +extern void __init minsigstksz_setup(void);
+ +
+ /*
+  * Not at the top of the file due to a direct #include cycle between
+  * <asm/fpsimd.h> and <asm/processor.h>.  Deferring this #include
+  * ensures that contents of processor.h are visible to fpsimd.h even if
+  * processor.h is included first.
+  *
+  * These prctl helpers are the only things in this file that require
+  * fpsimd.h.  The core code expects them to be in this header.
+  */
+ #include <asm/fpsimd.h>
+ 
   /* Userspace interface for PR_SVE_{SET,GET}_VL prctl()s: */
   #define SVE_SET_VL(arg)       sve_set_current_vl(arg)
   #define SVE_GET_VL()  sve_get_current_vl()
diff --combined arch/arm64/include/asm/thread_info.h

index cbcf11b5e6377f0b87a59921d7154c1d64fccf39,af271f9a6c9f2c07d6e27b3bfcb9d09e16bf26a6..cb2c10a8f0a8517edc4460809f9e3d5c5e6be178
--- 1/arch/arm64/include/asm/thread_info.h
--- 2/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@@ -45,12 -45,6 +45,6 @@@ struct thread_info 
         int                     preempt_count;  /* 0 => preemptable, <0 => bug */
   };
   
- #define INIT_THREAD_INFO(tsk)                                         \
- {                                                                     \
-       .preempt_count  = INIT_PREEMPT_COUNT,                           \
-       .addr_limit     = KERNEL_DS,                                    \
- }
- 
   #define thread_saved_pc(tsk)  \
         ((unsigned long)(tsk->thread.cpu_context.pc))
   #define thread_saved_sp(tsk)  \
@@@ -94,7 -88,6 +88,7 @@@ void arch_release_task_struct(struct ta
   #define TIF_32BIT             22      /* 32bit process */
   #define TIF_SVE                       23      /* Scalable Vector Extension in use */
   #define TIF_SVE_VL_INHERIT    24      /* Inherit sve_vl_onexec across exec */
+ +#define TIF_SSBD              25      /* Wants SSB mitigation */
   
   #define _TIF_SIGPENDING               (1 << TIF_SIGPENDING)
   #define _TIF_NEED_RESCHED     (1 << TIF_NEED_RESCHED)
@@@ -118,5 -111,12 +112,12 @@@
                                  _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
                                  _TIF_NOHZ)
   
+ #define INIT_THREAD_INFO(tsk)                                         \
+ {                                                                     \
+       .flags          = _TIF_FOREIGN_FPSTATE,                         \
+       .preempt_count  = INIT_PREEMPT_COUNT,                           \
+       .addr_limit     = KERNEL_DS,                                    \
+ }
+ 
   #endif /* __KERNEL__ */
   #endif /* __ASM_THREAD_INFO_H */
diff --combined arch/arm64/kernel/fpsimd.c

index 3b527ae46e492b773a98a2a091a608bcec3dc119,7074c4cd0e0e1ca1533c4c8aa251be6419a45812..84c68b14f1b2f140c97556fd491aada7e06f1410
--- 1/arch/arm64/kernel/fpsimd.c
--- 2/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@@ -31,17 -31,20 +31,19 @@@
   #include <linux/percpu.h>
   #include <linux/prctl.h>
   #include <linux/preempt.h>
- -#include <linux/prctl.h>
   #include <linux/ptrace.h>
   #include <linux/sched/signal.h>
   #include <linux/sched/task_stack.h>
   #include <linux/signal.h>
   #include <linux/slab.h>
+ #include <linux/stddef.h>
   #include <linux/sysctl.h>
   
   #include <asm/esr.h>
   #include <asm/fpsimd.h>
   #include <asm/cpufeature.h>
   #include <asm/cputype.h>
+ #include <asm/processor.h>
   #include <asm/simd.h>
   #include <asm/sigcontext.h>
   #include <asm/sysreg.h>
@@@ -117,7 -120,6 +119,6 @@@
    */
   struct fpsimd_last_state_struct {
         struct user_fpsimd_state *st;
-       bool sve_in_use;
   };
   
   static DEFINE_PER_CPU(struct fpsimd_last_state_struct, fpsimd_last_state);
@@@ -128,7 -130,7 +129,7 @@@ static int sve_default_vl = -1
   #ifdef CONFIG_ARM64_SVE
   
   /* Maximum supported vector length across all CPUs (initially poisoned) */
- -int __ro_after_init sve_max_vl = -1;
+ +int __ro_after_init sve_max_vl = SVE_VL_MIN;
   /* Set of available vector lengths, as vq_to_bit(vq): */
   static __ro_after_init DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);
   static void __percpu *efi_sve_state;
@@@ -158,19 -160,6 +159,6 @@@ static void sve_free(struct task_struc
         __sve_free(task);
   }
   
- 
- /* Offset of FFR in the SVE register dump */
- static size_t sve_ffr_offset(int vl)
- {
-       return SVE_SIG_FFR_OFFSET(sve_vq_from_vl(vl)) - SVE_SIG_REGS_OFFSET;
- }
- 
- static void *sve_pffr(struct task_struct *task)
- {
-       return (char *)task->thread.sve_state +
-               sve_ffr_offset(task->thread.sve_vl);
- }
- 
   static void change_cpacr(u64 val, u64 mask)
   {
         u64 cpacr = read_sysreg(CPACR_EL1);
@@@ -251,31 -240,24 +239,24 @@@ static void task_fpsimd_load(void
         WARN_ON(!in_softirq() && !irqs_disabled());
   
         if (system_supports_sve() && test_thread_flag(TIF_SVE))
-               sve_load_state(sve_pffr(current),
+               sve_load_state(sve_pffr(&current->thread),
                                &current->thread.uw.fpsimd_state.fpsr,
                                sve_vq_from_vl(current->thread.sve_vl) - 1);
         else
                 fpsimd_load_state(&current->thread.uw.fpsimd_state);
- 
-       if (system_supports_sve()) {
-               /* Toggle SVE trapping for userspace if needed */
-               if (test_thread_flag(TIF_SVE))
-                       sve_user_enable();
-               else
-                       sve_user_disable();
- 
-               /* Serialised by exception return to user */
-       }
   }
   
   /*
-  * Ensure current's FPSIMD/SVE storage in thread_struct is up to date
-  * with respect to the CPU registers.
+  * Ensure FPSIMD/SVE storage in memory for the loaded context is up to
+  * date with respect to the CPU registers.
    *
    * Softirqs (and preemption) must be disabled.
    */
- static void task_fpsimd_save(void)
+ void fpsimd_save(void)
   {
+       struct user_fpsimd_state *st = __this_cpu_read(fpsimd_last_state.st);
+       /* set by fpsimd_bind_task_to_cpu() or fpsimd_bind_state_to_cpu() */
+ 
         WARN_ON(!in_softirq() && !irqs_disabled());
   
         if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
@@@ -290,10 -272,9 +271,9 @@@
                                 return;
                         }
   
-                       sve_save_state(sve_pffr(current),
-                                      &current->thread.uw.fpsimd_state.fpsr);
+                       sve_save_state(sve_pffr(&current->thread), &st->fpsr);
                 } else
-                       fpsimd_save_state(&current->thread.uw.fpsimd_state);
+                       fpsimd_save_state(st);
         }
   }
   
@@@ -359,13 -340,22 +339,13 @@@ static int sve_proc_do_default_vl(struc
                 return ret;
   
         /* Writing -1 has the special meaning "set to max": */
- -      if (vl == -1) {
- -              /* Fail safe if sve_max_vl wasn't initialised */
- -              if (WARN_ON(!sve_vl_valid(sve_max_vl)))
- -                      vl = SVE_VL_MIN;
- -              else
- -                      vl = sve_max_vl;
- -
- -              goto chosen;
- -      }
+ +      if (vl == -1)
+ +              vl = sve_max_vl;
   
         if (!sve_vl_valid(vl))
                 return -EINVAL;
   
- -      vl = find_supported_vector_length(vl);
- -chosen:
- -      sve_default_vl = vl;
+ +      sve_default_vl = find_supported_vector_length(vl);
         return 0;
   }
   
@@@ -588,7 -578,7 +568,7 @@@ int sve_set_vector_length(struct task_s
         if (task == current) {
                 local_bh_disable();
   
-               task_fpsimd_save();
+               fpsimd_save();
                 set_thread_flag(TIF_FOREIGN_FPSTATE);
         }
   
@@@ -608,10 -598,8 +588,8 @@@
         task->thread.sve_vl = vl;
   
   out:
-       if (flags & PR_SVE_VL_INHERIT)
-               set_tsk_thread_flag(task, TIF_SVE_VL_INHERIT);
-       else
-               clear_tsk_thread_flag(task, TIF_SVE_VL_INHERIT);
+       update_tsk_thread_flag(task, TIF_SVE_VL_INHERIT,
+                              flags & PR_SVE_VL_INHERIT);
   
         return 0;
   }
@@@ -755,6 -743,33 +733,33 @@@ void sve_kernel_enable(const struct arm
         isb();
   }
   
+ /*
+  * Read the pseudo-ZCR used by cpufeatures to identify the supported SVE
+  * vector length.
+  *
+  * Use only if SVE is present.
+  * This function clobbers the SVE vector length.
+  */
+ u64 read_zcr_features(void)
+ {
+       u64 zcr;
+       unsigned int vq_max;
+ 
+       /*
+        * Set the maximum possible VL, and write zeroes to all other
+        * bits to see if they stick.
+        */
+       sve_kernel_enable(NULL);
+       write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL1);
+ 
+       zcr = read_sysreg_s(SYS_ZCR_EL1);
+       zcr &= ~(u64)ZCR_ELx_LEN_MASK; /* find sticky 1s outside LEN field */
+       vq_max = sve_vq_from_vl(sve_get_vl());
+       zcr |= vq_max - 1; /* set LEN field to maximum effective value */
+ 
+       return zcr;
+ }
+ 
   void __init sve_setup(void)
   {
         u64 zcr;
@@@ -829,7 -844,7 +834,7 @@@ asmlinkage void do_sve_acc(unsigned in
   
         local_bh_disable();
   
-       task_fpsimd_save();
+       fpsimd_save();
         fpsimd_to_sve(current);
   
         /* Force ret_to_user to reload the registers: */
@@@ -872,7 -887,7 +877,7 @@@ asmlinkage void do_fpsimd_exc(unsigned 
                         si_code = FPE_FLTRES;
         }
   
- -      memset(&info, 0, sizeof(info));
+ +      clear_siginfo(&info);
         info.si_signo = SIGFPE;
         info.si_code = si_code;
         info.si_addr = (void __user *)instruction_pointer(regs);
@@@ -882,31 -897,25 +887,25 @@@
   
   void fpsimd_thread_switch(struct task_struct *next)
   {
+       bool wrong_task, wrong_cpu;
+ 
         if (!system_supports_fpsimd())
                 return;
+ 
+       /* Save unsaved fpsimd state, if any: */
+       fpsimd_save();
+ 
         /*
-        * Save the current FPSIMD state to memory, but only if whatever is in
-        * the registers is in fact the most recent userland FPSIMD state of
-        * 'current'.
+        * Fix up TIF_FOREIGN_FPSTATE to correctly describe next's
+        * state.  For kernel threads, FPSIMD registers are never loaded
+        * and wrong_task and wrong_cpu will always be true.
          */
-       if (current->mm)
-               task_fpsimd_save();
+       wrong_task = __this_cpu_read(fpsimd_last_state.st) !=
+                                       &next->thread.uw.fpsimd_state;
+       wrong_cpu = next->thread.fpsimd_cpu != smp_processor_id();
   
-       if (next->mm) {
-               /*
-                * If we are switching to a task whose most recent userland
-                * FPSIMD state is already in the registers of *this* cpu,
-                * we can skip loading the state from memory. Otherwise, set
-                * the TIF_FOREIGN_FPSTATE flag so the state will be loaded
-                * upon the next return to userland.
-                */
-               if (__this_cpu_read(fpsimd_last_state.st) ==
-                       &next->thread.uw.fpsimd_state
-                   && next->thread.fpsimd_cpu == smp_processor_id())
-                       clear_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE);
-               else
-                       set_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE);
-       }
+       update_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE,
+                              wrong_task || wrong_cpu);
   }
   
   void fpsimd_flush_thread(void)
@@@ -972,7 -981,7 +971,7 @@@ void fpsimd_preserve_current_state(void
                 return;
   
         local_bh_disable();
-       task_fpsimd_save();
+       fpsimd_save();
         local_bh_enable();
   }
   
@@@ -992,14 -1001,33 +991,33 @@@ void fpsimd_signal_preserve_current_sta
    * Associate current's FPSIMD context with this cpu
    * Preemption must be disabled when calling this function.
    */
- static void fpsimd_bind_to_cpu(void)
+ void fpsimd_bind_task_to_cpu(void)
   {
         struct fpsimd_last_state_struct *last =
                 this_cpu_ptr(&fpsimd_last_state);
   
         last->st = &current->thread.uw.fpsimd_state;
-       last->sve_in_use = test_thread_flag(TIF_SVE);
         current->thread.fpsimd_cpu = smp_processor_id();
+ 
+       if (system_supports_sve()) {
+               /* Toggle SVE trapping for userspace if needed */
+               if (test_thread_flag(TIF_SVE))
+                       sve_user_enable();
+               else
+                       sve_user_disable();
+ 
+               /* Serialised by exception return to user */
+       }
+ }
+ 
+ void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st)
+ {
+       struct fpsimd_last_state_struct *last =
+               this_cpu_ptr(&fpsimd_last_state);
+ 
+       WARN_ON(!in_softirq() && !irqs_disabled());
+ 
+       last->st = st;
   }
   
   /*
@@@ -1016,7 -1044,7 +1034,7 @@@ void fpsimd_restore_current_state(void
   
         if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
                 task_fpsimd_load();
-               fpsimd_bind_to_cpu();
+               fpsimd_bind_task_to_cpu();
         }
   
         local_bh_enable();
@@@ -1039,9 -1067,9 +1057,9 @@@ void fpsimd_update_current_state(struc
                 fpsimd_to_sve(current);
   
         task_fpsimd_load();
+       fpsimd_bind_task_to_cpu();
   
-       if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE))
-               fpsimd_bind_to_cpu();
+       clear_thread_flag(TIF_FOREIGN_FPSTATE);
   
         local_bh_enable();
   }
@@@ -1054,29 -1082,12 +1072,12 @@@ void fpsimd_flush_task_state(struct tas
         t->thread.fpsimd_cpu = NR_CPUS;
   }
   
- static inline void fpsimd_flush_cpu_state(void)
+ void fpsimd_flush_cpu_state(void)
   {
         __this_cpu_write(fpsimd_last_state.st, NULL);
+       set_thread_flag(TIF_FOREIGN_FPSTATE);
   }
   
- /*
-  * Invalidate any task SVE state currently held in this CPU's regs.
-  *
-  * This is used to prevent the kernel from trying to reuse SVE register data
-  * that is detroyed by KVM guest enter/exit.  This function should go away when
-  * KVM SVE support is implemented.  Don't use it for anything else.
-  */
- #ifdef CONFIG_ARM64_SVE
- void sve_flush_cpu_state(void)
- {
-       struct fpsimd_last_state_struct const *last =
-               this_cpu_ptr(&fpsimd_last_state);
- 
-       if (last->st && last->sve_in_use)
-               fpsimd_flush_cpu_state();
- }
- #endif /* CONFIG_ARM64_SVE */
- 
   #ifdef CONFIG_KERNEL_MODE_NEON
   
   DEFINE_PER_CPU(bool, kernel_neon_busy);
@@@ -1110,11 -1121,8 +1111,8 @@@ void kernel_neon_begin(void
   
         __this_cpu_write(kernel_neon_busy, true);
   
-       /* Save unsaved task fpsimd state, if any: */
-       if (current->mm) {
-               task_fpsimd_save();
-               set_thread_flag(TIF_FOREIGN_FPSTATE);
-       }
+       /* Save unsaved fpsimd state, if any: */
+       fpsimd_save();
   
         /* Invalidate any task state remaining in the fpsimd regs: */
         fpsimd_flush_cpu_state();
@@@ -1236,13 -1244,10 +1234,10 @@@ static int fpsimd_cpu_pm_notifier(struc
   {
         switch (cmd) {
         case CPU_PM_ENTER:
-               if (current->mm)
-                       task_fpsimd_save();
+               fpsimd_save();
                 fpsimd_flush_cpu_state();
                 break;
         case CPU_PM_EXIT:
-               if (current->mm)
-                       set_thread_flag(TIF_FOREIGN_FPSTATE);
                 break;
         case CPU_PM_ENTER_FAILED:
         default:
diff --combined arch/arm64/kernel/ptrace.c

index bd732644c2f6af1bbe5ac00d8aaaa041c621080e,78889c4546d7a7d72e94890e260e1288a6de0a12..5c338ce5a7fa13e1bccf7f264f7e6db24ca63cc2
--- 1/arch/arm64/kernel/ptrace.c
--- 2/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@@ -44,6 -44,7 +44,7 @@@
   #include <asm/compat.h>
   #include <asm/cpufeature.h>
   #include <asm/debug-monitors.h>
+ #include <asm/fpsimd.h>
   #include <asm/pgtable.h>
   #include <asm/stacktrace.h>
   #include <asm/syscall.h>
@@@ -766,6 -767,9 +767,6 @@@ static void sve_init_header_from_task(s
         vq = sve_vq_from_vl(header->vl);
   
         header->max_vl = sve_max_vl;
- -      if (WARN_ON(!sve_vl_valid(sve_max_vl)))
- -              header->max_vl = header->vl;
- -
         header->size = SVE_PT_SIZE(vq, header->flags);
         header->max_size = SVE_PT_SIZE(sve_vq_from_vl(header->max_vl),
                                       SVE_PT_REGS_SVE);
@@@ -1043,6 -1047,8 +1044,6 @@@ static const struct user_regset_view us
   };
   
   #ifdef CONFIG_COMPAT
- -#include <linux/compat.h>
- -
   enum compat_regset {
         REGSET_COMPAT_GPR,
         REGSET_COMPAT_VFP,
diff --combined arch/arm64/kvm/hyp/hyp-entry.S

index 05d8369790321e48f895eb4e5b802c7b3fdaf16a,753b9d213651af6b2b56ee66709436c9ea23d2db..24b4fbafe3e4ac9f9c30aaa2da04c16a798bf9ff
--- 1/arch/arm64/kvm/hyp/hyp-entry.S
--- 2/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@@ -106,68 -106,13 +106,49 @@@ el1_hvc_guest
          */
         ldr     x1, [sp]                                // Guest's x0
         eor     w1, w1, #ARM_SMCCC_ARCH_WORKAROUND_1
+ +      cbz     w1, wa_epilogue
+ +
+ +      /* ARM_SMCCC_ARCH_WORKAROUND_2 handling */
+ +      eor     w1, w1, #(ARM_SMCCC_ARCH_WORKAROUND_1 ^ \
+ +                        ARM_SMCCC_ARCH_WORKAROUND_2)
         cbnz    w1, el1_trap
- -      mov     x0, x1
+ +
+ +#ifdef CONFIG_ARM64_SSBD
+ +alternative_cb        arm64_enable_wa2_handling
+ +      b       wa2_end
+ +alternative_cb_end
+ +      get_vcpu_ptr    x2, x0
+ +      ldr     x0, [x2, #VCPU_WORKAROUND_FLAGS]
+ +
+ +      // Sanitize the argument and update the guest flags
+ +      ldr     x1, [sp, #8]                    // Guest's x1
+ +      clz     w1, w1                          // Murphy's device:
+ +      lsr     w1, w1, #5                      // w1 = !!w1 without using
+ +      eor     w1, w1, #1                      // the flags...
+ +      bfi     x0, x1, #VCPU_WORKAROUND_2_FLAG_SHIFT, #1
+ +      str     x0, [x2, #VCPU_WORKAROUND_FLAGS]
+ +
+ +      /* Check that we actually need to perform the call */
+ +      hyp_ldr_this_cpu x0, arm64_ssbd_callback_required, x2
+ +      cbz     x0, wa2_end
+ +
+ +      mov     w0, #ARM_SMCCC_ARCH_WORKAROUND_2
+ +      smc     #0
+ +
+ +      /* Don't leak data from the SMC call */
+ +      mov     x3, xzr
+ +wa2_end:
+ +      mov     x2, xzr
+ +      mov     x1, xzr
+ +#endif
+ +
+ +wa_epilogue:
+ +      mov     x0, xzr
         add     sp, sp, #16
         eret
   
   el1_trap:
         get_vcpu_ptr    x1, x0
- 
-       mrs             x0, esr_el2
-       lsr             x0, x0, #ESR_ELx_EC_SHIFT
-       /*
-        * x0: ESR_EC
-        * x1: vcpu pointer
-        */
- 
-       /*
-        * We trap the first access to the FP/SIMD to save the host context
-        * and restore the guest context lazily.
-        * If FP/SIMD is not implemented, handle the trap and inject an
-        * undefined instruction exception to the guest.
-        */
- alternative_if_not ARM64_HAS_NO_FPSIMD
-       cmp     x0, #ESR_ELx_EC_FP_ASIMD
-       b.eq    __fpsimd_guest_restore
- alternative_else_nop_endif
- 
         mov     x0, #ARM_EXCEPTION_TRAP
         b       __guest_exit
   
diff --combined arch/arm64/kvm/hyp/switch.c

index c50cedc447f1ab33e2eda5682d6b72fda272514d,2d45bd719a5dfba78a81e36bf25cc4b72c42541b..d496ef579859627edd1ba98c1233d9584cd407e3
--- 1/arch/arm64/kvm/hyp/switch.c
--- 2/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@@ -15,28 -15,31 +15,32 @@@
    * along with this program.  If not, see <http://www.gnu.org/licenses/>.
    */
   
+ +#include <linux/arm-smccc.h>
   #include <linux/types.h>
   #include <linux/jump_label.h>
   #include <uapi/linux/psci.h>
   
   #include <kvm/arm_psci.h>
   
+ #include <asm/cpufeature.h>
   #include <asm/kvm_asm.h>
   #include <asm/kvm_emulate.h>
+ #include <asm/kvm_host.h>
   #include <asm/kvm_hyp.h>
   #include <asm/kvm_mmu.h>
   #include <asm/fpsimd.h>
   #include <asm/debug-monitors.h>
+ #include <asm/processor.h>
+ #include <asm/thread_info.h>
   
- static bool __hyp_text __fpsimd_enabled_nvhe(void)
+ /* Check whether the FP regs were dirtied while in the host-side run loop: */
+ static bool __hyp_text update_fp_enabled(struct kvm_vcpu *vcpu)
   {
-       return !(read_sysreg(cptr_el2) & CPTR_EL2_TFP);
- }
+       if (vcpu->arch.host_thread_info->flags & _TIF_FOREIGN_FPSTATE)
+               vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED |
+                                     KVM_ARM64_FP_HOST);
   
- static bool fpsimd_enabled_vhe(void)
- {
-       return !!(read_sysreg(cpacr_el1) & CPACR_EL1_FPEN);
+       return !!(vcpu->arch.flags & KVM_ARM64_FP_ENABLED);
   }
   
   /* Save the 32-bit only FPSIMD system register state */
@@@ -93,7 -96,10 +97,10 @@@ static void activate_traps_vhe(struct k
   
         val = read_sysreg(cpacr_el1);
         val |= CPACR_EL1_TTA;
-       val &= ~(CPACR_EL1_FPEN | CPACR_EL1_ZEN);
+       val &= ~CPACR_EL1_ZEN;
+       if (!update_fp_enabled(vcpu))
+               val &= ~CPACR_EL1_FPEN;
+ 
         write_sysreg(val, cpacr_el1);
   
         write_sysreg(kvm_get_hyp_vector(), vbar_el1);
@@@ -106,7 -112,10 +113,10 @@@ static void __hyp_text __activate_traps
         __activate_traps_common(vcpu);
   
         val = CPTR_EL2_DEFAULT;
-       val |= CPTR_EL2_TTA | CPTR_EL2_TFP | CPTR_EL2_TZ;
+       val |= CPTR_EL2_TTA | CPTR_EL2_TZ;
+       if (!update_fp_enabled(vcpu))
+               val |= CPTR_EL2_TFP;
+ 
         write_sysreg(val, cptr_el2);
   }
   
@@@ -319,6 -328,50 +329,50 @@@ static bool __hyp_text __skip_instr(str
         }
   }
   
+ static bool __hyp_text __hyp_switch_fpsimd(struct kvm_vcpu *vcpu)
+ {
+       struct user_fpsimd_state *host_fpsimd = vcpu->arch.host_fpsimd_state;
+ 
+       if (has_vhe())
+               write_sysreg(read_sysreg(cpacr_el1) | CPACR_EL1_FPEN,
+                            cpacr_el1);
+       else
+               write_sysreg(read_sysreg(cptr_el2) & ~(u64)CPTR_EL2_TFP,
+                            cptr_el2);
+ 
+       isb();
+ 
+       if (vcpu->arch.flags & KVM_ARM64_FP_HOST) {
+               /*
+                * In the SVE case, VHE is assumed: it is enforced by
+                * Kconfig and kvm_arch_init().
+                */
+               if (system_supports_sve() &&
+                   (vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE)) {
+                       struct thread_struct *thread = container_of(
+                               host_fpsimd,
+                               struct thread_struct, uw.fpsimd_state);
+ 
+                       sve_save_state(sve_pffr(thread), &host_fpsimd->fpsr);
+               } else {
+                       __fpsimd_save_state(host_fpsimd);
+               }
+ 
+               vcpu->arch.flags &= ~KVM_ARM64_FP_HOST;
+       }
+ 
+       __fpsimd_restore_state(&vcpu->arch.ctxt.gp_regs.fp_regs);
+ 
+       /* Skip restoring fpexc32 for AArch64 guests */
+       if (!(read_sysreg(hcr_el2) & HCR_RW))
+               write_sysreg(vcpu->arch.ctxt.sys_regs[FPEXC32_EL2],
+                            fpexc32_el2);
+ 
+       vcpu->arch.flags |= KVM_ARM64_FP_ENABLED;
+ 
+       return true;
+ }
+ 
   /*
    * Return true when we were able to fixup the guest exit and should return to
    * the guest, false when we should restore the host state and return to the
@@@ -335,11 -388,23 +389,23 @@@ static bool __hyp_text fixup_guest_exit
          * same PC once the SError has been injected, and replay the
          * trapping instruction.
          */
-       if (*exit_code == ARM_EXCEPTION_TRAP && !__populate_fault_info(vcpu))
+       if (*exit_code != ARM_EXCEPTION_TRAP)
+               goto exit;
+ 
+       /*
+        * We trap the first access to the FP/SIMD to save the host context
+        * and restore the guest context lazily.
+        * If FP/SIMD is not implemented, handle the trap and inject an
+        * undefined instruction exception to the guest.
+        */
+       if (system_supports_fpsimd() &&
+           kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_FP_ASIMD)
+               return __hyp_switch_fpsimd(vcpu);
+ 
+       if (!__populate_fault_info(vcpu))
                 return true;
   
-       if (static_branch_unlikely(&vgic_v2_cpuif_trap) &&
-           *exit_code == ARM_EXCEPTION_TRAP) {
+       if (static_branch_unlikely(&vgic_v2_cpuif_trap)) {
                 bool valid;
   
                 valid = kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_DABT_LOW &&
@@@ -351,12 -416,8 +417,8 @@@
                 if (valid) {
                         int ret = __vgic_v2_perform_cpuif_access(vcpu);
   
-                       if (ret == 1) {
-                               if (__skip_instr(vcpu))
-                                       return true;
-                               else
-                                       *exit_code = ARM_EXCEPTION_TRAP;
-                       }
+                       if (ret ==  1 && __skip_instr(vcpu))
+                               return true;
   
                         if (ret == -1) {
                                 /* Promote an illegal access to an
@@@ -369,66 -430,30 +431,63 @@@
                                         *vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS;
                                 *exit_code = ARM_EXCEPTION_EL1_SERROR;
                         }
+ 
+                       goto exit;
                 }
         }
   
         if (static_branch_unlikely(&vgic_v3_cpuif_trap) &&
-           *exit_code == ARM_EXCEPTION_TRAP &&
             (kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 ||
              kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_CP15_32)) {
                 int ret = __vgic_v3_perform_cpuif_access(vcpu);
   
-               if (ret == 1) {
-                       if (__skip_instr(vcpu))
-                               return true;
-                       else
-                               *exit_code = ARM_EXCEPTION_TRAP;
-               }
+               if (ret == 1 && __skip_instr(vcpu))
+                       return true;
         }
   
+ exit:
         /* Return to the host kernel and handle the exit */
         return false;
   }
   
+ +static inline bool __hyp_text __needs_ssbd_off(struct kvm_vcpu *vcpu)
+ +{
+ +      if (!cpus_have_const_cap(ARM64_SSBD))
+ +              return false;
+ +
+ +      return !(vcpu->arch.workaround_flags & VCPU_WORKAROUND_2_FLAG);
+ +}
+ +
+ +static void __hyp_text __set_guest_arch_workaround_state(struct kvm_vcpu *vcpu)
+ +{
+ +#ifdef CONFIG_ARM64_SSBD
+ +      /*
+ +       * The host runs with the workaround always present. If the
+ +       * guest wants it disabled, so be it...
+ +       */
+ +      if (__needs_ssbd_off(vcpu) &&
+ +          __hyp_this_cpu_read(arm64_ssbd_callback_required))
+ +              arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 0, NULL);
+ +#endif
+ +}
+ +
+ +static void __hyp_text __set_host_arch_workaround_state(struct kvm_vcpu *vcpu)
+ +{
+ +#ifdef CONFIG_ARM64_SSBD
+ +      /*
+ +       * If the guest has disabled the workaround, bring it back on.
+ +       */
+ +      if (__needs_ssbd_off(vcpu) &&
+ +          __hyp_this_cpu_read(arm64_ssbd_callback_required))
+ +              arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 1, NULL);
+ +#endif
+ +}
+ +
   /* Switch to the guest for VHE systems running in EL2 */
   int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
   {
         struct kvm_cpu_context *host_ctxt;
         struct kvm_cpu_context *guest_ctxt;
-       bool fp_enabled;
         u64 exit_code;
   
         host_ctxt = vcpu->arch.host_cpu_context;
@@@ -443,8 -468,6 +502,8 @@@
         sysreg_restore_guest_state_vhe(guest_ctxt);
         __debug_switch_to_guest(vcpu);
   
+ +      __set_guest_arch_workaround_state(vcpu);
+ +
         do {
                 /* Jump in the fire! */
                 exit_code = __guest_enter(vcpu, host_ctxt);
@@@ -452,21 -475,14 +511,16 @@@
                 /* And we're baaack! */
         } while (fixup_guest_exit(vcpu, &exit_code));
   
-       fp_enabled = fpsimd_enabled_vhe();
- 
+ +      __set_host_arch_workaround_state(vcpu);
+ +
         sysreg_save_guest_state_vhe(guest_ctxt);
   
         __deactivate_traps(vcpu);
   
         sysreg_restore_host_state_vhe(host_ctxt);
   
-       if (fp_enabled) {
-               __fpsimd_save_state(&guest_ctxt->gp_regs.fp_regs);
-               __fpsimd_restore_state(&host_ctxt->gp_regs.fp_regs);
+       if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED)
                 __fpsimd_save_fpexc32(vcpu);
-       }
   
         __debug_switch_to_host(vcpu);
   
@@@ -478,7 -494,6 +532,6 @@@ int __hyp_text __kvm_vcpu_run_nvhe(stru
   {
         struct kvm_cpu_context *host_ctxt;
         struct kvm_cpu_context *guest_ctxt;
-       bool fp_enabled;
         u64 exit_code;
   
         vcpu = kern_hyp_va(vcpu);
@@@ -503,8 -518,6 +556,8 @@@
         __sysreg_restore_state_nvhe(guest_ctxt);
         __debug_switch_to_guest(vcpu);
   
+ +      __set_guest_arch_workaround_state(vcpu);
+ +
         do {
                 /* Jump in the fire! */
                 exit_code = __guest_enter(vcpu, host_ctxt);
@@@ -512,10 -525,6 +565,8 @@@
                 /* And we're baaack! */
         } while (fixup_guest_exit(vcpu, &exit_code));
   
-       fp_enabled = __fpsimd_enabled_nvhe();
- 
+ +      __set_host_arch_workaround_state(vcpu);
+ +
         __sysreg_save_state_nvhe(guest_ctxt);
         __sysreg32_save_state(vcpu);
         __timer_disable_traps(vcpu);
@@@ -526,11 -535,8 +577,8 @@@
   
         __sysreg_restore_state_nvhe(host_ctxt);
   
-       if (fp_enabled) {
-               __fpsimd_save_state(&guest_ctxt->gp_regs.fp_regs);
-               __fpsimd_restore_state(&host_ctxt->gp_regs.fp_regs);
+       if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED)
                 __fpsimd_save_fpexc32(vcpu);
-       }
   
         /*
          * This must come after restoring the host sysregs, since a non-VHE
diff --combined arch/mips/kvm/mips.c

index 0f725e9cee8f69230ca7ddff5f6023c30294395c,03e0e0f189cc0c9a319f00395d6a53de57fea4f0..7cd76f93a438ab00d7085b3aaa93a5294c494c55
--- 1/arch/mips/kvm/mips.c
--- 2/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@@ -45,7 -45,7 +45,7 @@@ struct kvm_stats_debugfs_item debugfs_e
         { "cache",        VCPU_STAT(cache_exits),        KVM_STAT_VCPU },
         { "signal",       VCPU_STAT(signal_exits),       KVM_STAT_VCPU },
         { "interrupt",    VCPU_STAT(int_exits),          KVM_STAT_VCPU },
- -      { "cop_unsuable", VCPU_STAT(cop_unusable_exits), KVM_STAT_VCPU },
+ +      { "cop_unusable", VCPU_STAT(cop_unusable_exits), KVM_STAT_VCPU },
         { "tlbmod",       VCPU_STAT(tlbmod_exits),       KVM_STAT_VCPU },
         { "tlbmiss_ld",   VCPU_STAT(tlbmiss_ld_exits),   KVM_STAT_VCPU },
         { "tlbmiss_st",   VCPU_STAT(tlbmiss_st_exits),   KVM_STAT_VCPU },
@@@ -1076,7 -1076,7 +1076,7 @@@ int kvm_arch_vcpu_ioctl_set_fpu(struct 
         return -ENOIOCTLCMD;
   }
   
- int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
+ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
   {
         return VM_FAULT_SIGBUS;
   }
diff --combined arch/powerpc/kvm/book3s_hv.c

index cb6d2313b19f482ec9ee0dc089008cc9feb9ee1b,67d7de1470ccb4253493ac5d4fcbc781747bc7d0..69895597736ab60a63ce840269e9889dd0bf24b0
--- 1/arch/powerpc/kvm/book3s_hv.c
--- 2/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@@ -2441,7 -2441,6 +2441,7 @@@ static void init_vcore_to_run(struct kv
         vc->in_guest = 0;
         vc->napping_threads = 0;
         vc->conferring_threads = 0;
+ +      vc->tb_offset_applied = 0;
   }
   
   static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip)
@@@ -2912,12 -2911,8 +2912,12 @@@ static noinline void kvmppc_run_core(st
   
         srcu_idx = srcu_read_lock(&vc->kvm->srcu);
   
+ +      this_cpu_disable_ftrace();
+ +
         trap = __kvmppc_vcore_entry();
   
+ +      this_cpu_enable_ftrace();
+ +
         srcu_read_unlock(&vc->kvm->srcu, srcu_idx);
   
         trace_hardirqs_off();
@@@ -3955,8 -3950,7 +3955,7 @@@ static int kvmppc_core_init_vm_hv(struc
          */
         snprintf(buf, sizeof(buf), "vm%d", current->pid);
         kvm->arch.debugfs_dir = debugfs_create_dir(buf, kvm_debugfs_dir);
-       if (!IS_ERR_OR_NULL(kvm->arch.debugfs_dir))
-               kvmppc_mmu_debugfs_init(kvm);
+       kvmppc_mmu_debugfs_init(kvm);
   
         return 0;
   }
diff --combined arch/s390/include/asm/pgtable.h

index 9809694e1389ef836f4d5ea1eb4d9b08be5e1260,c9f155b67660589fe460cd83a2df886cd5503f7d..5ab636089c6052c51cb5ac15046ee0975bcfd026
--- 1/arch/s390/include/asm/pgtable.h
--- 2/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@@ -171,6 -171,7 +171,6 @@@ static inline int is_module_addr(void *
   #define _PAGE_WRITE   0x020           /* SW pte write bit */
   #define _PAGE_SPECIAL 0x040           /* SW associated with special page */
   #define _PAGE_UNUSED  0x080           /* SW bit for pgste usage state */
- -#define __HAVE_ARCH_PTE_SPECIAL
   
   #ifdef CONFIG_MEM_SOFT_DIRTY
   #define _PAGE_SOFT_DIRTY 0x002                /* SW pte soft dirty bit */
@@@ -506,10 -507,10 +506,10 @@@ static inline int mm_alloc_pgste(struc
    * faults should no longer be backed by zero pages
    */
   #define mm_forbids_zeropage mm_has_pgste
- static inline int mm_use_skey(struct mm_struct *mm)
+ static inline int mm_uses_skeys(struct mm_struct *mm)
   {
   #ifdef CONFIG_PGSTE
-       if (mm->context.use_skey)
+       if (mm->context.uses_skeys)
                 return 1;
   #endif
         return 0;
diff --combined arch/s390/kvm/priv.c

index a3bce0e8434628a1c8d05404149e6573a71bd4ab,e8c62703c76452003a9c30ec8d893cb43437ba9f..eb0eb60c7be6a26677f8ed20509aba88df6da337
--- 1/arch/s390/kvm/priv.c
--- 2/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@@ -26,6 -26,7 +26,6 @@@
   #include <asm/gmap.h>
   #include <asm/io.h>
   #include <asm/ptrace.h>
- -#include <asm/compat.h>
   #include <asm/sclp.h>
   #include "gaccess.h"
   #include "kvm-s390.h"
@@@ -204,24 -205,28 +204,28 @@@ static int handle_store_cpu_address(str
   
   int kvm_s390_skey_check_enable(struct kvm_vcpu *vcpu)
   {
-       int rc = 0;
+       int rc;
         struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block;
   
         trace_kvm_s390_skey_related_inst(vcpu);
-       if (!(sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE)) &&
+       /* Already enabled? */
+       if (vcpu->kvm->arch.use_skf &&
+           !(sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE)) &&
             !kvm_s390_test_cpuflags(vcpu, CPUSTAT_KSS))
-               return rc;
+               return 0;
   
         rc = s390_enable_skey();
         VCPU_EVENT(vcpu, 3, "enabling storage keys for guest: %d", rc);
-       if (!rc) {
-               if (kvm_s390_test_cpuflags(vcpu, CPUSTAT_KSS))
-                       kvm_s390_clear_cpuflags(vcpu, CPUSTAT_KSS);
-               else
-                       sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE |
-                                            ICTL_RRBE);
-       }
-       return rc;
+       if (rc)
+               return rc;
+ 
+       if (kvm_s390_test_cpuflags(vcpu, CPUSTAT_KSS))
+               kvm_s390_clear_cpuflags(vcpu, CPUSTAT_KSS);
+       if (!vcpu->kvm->arch.use_skf)
+               sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
+       else
+               sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE);
+       return 0;
   }
   
   static int try_handle_skey(struct kvm_vcpu *vcpu)
@@@ -231,7 -236,7 +235,7 @@@
         rc = kvm_s390_skey_check_enable(vcpu);
         if (rc)
                 return rc;
-       if (sclp.has_skey) {
+       if (vcpu->kvm->arch.use_skf) {
                 /* with storage-key facility, SIE interprets it for us */
                 kvm_s390_retry_instr(vcpu);
                 VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation");
diff --combined arch/x86/include/asm/kvm_host.h

index f4b2588865e9f7ad16696d3e70255a2b794d26b3,0ebe659f28026e6a16fb32f067ac37fcf77414ce..c13cd28d9d1be5abdff8fdf93692d51755c8930c
--- 1/arch/x86/include/asm/kvm_host.h
--- 2/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@@ -258,7 -258,8 +258,8 @@@ union kvm_mmu_page_role 
                 unsigned smep_andnot_wp:1;
                 unsigned smap_andnot_wp:1;
                 unsigned ad_disabled:1;
-               unsigned :7;
+               unsigned guest_mode:1;
+               unsigned :6;
   
                 /*
                  * This is left at the top of the word so that
@@@ -476,6 -477,7 +477,7 @@@ struct kvm_vcpu_hv 
         struct kvm_hyperv_exit exit;
         struct kvm_vcpu_hv_stimer stimer[HV_SYNIC_STIMER_COUNT];
         DECLARE_BITMAP(stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT);
+       cpumask_t tlb_lush;
   };
   
   struct kvm_vcpu_arch {
@@@ -924,7 -926,7 +926,7 @@@ struct kvm_x86_ops 
         int (*hardware_setup)(void);               /* __init */
         void (*hardware_unsetup)(void);            /* __exit */
         bool (*cpu_has_accelerated_tpr)(void);
- -      bool (*cpu_has_high_real_mode_segbase)(void);
+ +      bool (*has_emulated_msr)(int index);
         void (*cpuid_update)(struct kvm_vcpu *vcpu);
   
         struct kvm *(*vm_alloc)(void);
@@@ -995,7 -997,7 +997,7 @@@
         void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);
         void (*hwapic_isr_update)(struct kvm_vcpu *vcpu, int isr);
         void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
-       void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set);
+       void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu);
         void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa);
         void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
         int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
@@@ -1277,6 -1279,7 +1279,7 @@@ void __kvm_mmu_free_some_pages(struct k
   int kvm_mmu_load(struct kvm_vcpu *vcpu);
   void kvm_mmu_unload(struct kvm_vcpu *vcpu);
   void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
+ void kvm_mmu_free_roots(struct kvm_vcpu *vcpu);
   gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
                            struct x86_exception *exception);
   gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
diff --combined arch/x86/kvm/cpuid.c

index f4f30d0c25c426388fb962fdcd04e76b508755cc,72d8c492d71d423b346e44a0c0b36d94ff049bd0..5720e78b2f7b52fa9a05bc52064d4fb8cac04c6c
--- 1/arch/x86/kvm/cpuid.c
--- 2/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@@ -379,8 -379,7 +379,8 @@@ static inline int __do_cpuid_ent(struc
   
         /* cpuid 0x80000008.ebx */
         const u32 kvm_cpuid_8000_0008_ebx_x86_features =
- -              F(IBPB) | F(IBRS);
+ +              F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) |
+ +              F(AMD_SSB_NO);
   
         /* cpuid 0xC0000001.edx */
         const u32 kvm_cpuid_C000_0001_edx_x86_features =
@@@ -404,12 -403,13 +404,13 @@@
         const u32 kvm_cpuid_7_0_ecx_x86_features =
                 F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ |
                 F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
-               F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG);
+               F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
+               F(CLDEMOTE);
   
         /* cpuid 7.0.edx*/
         const u32 kvm_cpuid_7_0_edx_x86_features =
                 F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) |
- -              F(ARCH_CAPABILITIES);
+ +              F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES);
   
         /* all calls to cpuid_count() should be made on the same cpu */
         get_cpu();
@@@ -496,11 -496,6 +497,11 @@@
                                 entry->ecx &= ~F(PKU);
                         entry->edx &= kvm_cpuid_7_0_edx_x86_features;
                         cpuid_mask(&entry->edx, CPUID_7_EDX);
+ +                      /*
+ +                       * We emulate ARCH_CAPABILITIES in software even
+ +                       * if the host doesn't support it.
+ +                       */
+ +                      entry->edx |= F(ARCH_CAPABILITIES);
                 } else {
                         entry->ebx = 0;
                         entry->ecx = 0;
@@@ -653,25 -648,13 +654,25 @@@
                         g_phys_as = phys_as;
                 entry->eax = g_phys_as | (virt_as << 8);
                 entry->edx = 0;
- -              /* IBRS and IBPB aren't necessarily present in hardware cpuid */
- -              if (boot_cpu_has(X86_FEATURE_IBPB))
- -                      entry->ebx |= F(IBPB);
- -              if (boot_cpu_has(X86_FEATURE_IBRS))
- -                      entry->ebx |= F(IBRS);
+ +              /*
+ +               * IBRS, IBPB and VIRT_SSBD aren't necessarily present in
+ +               * hardware cpuid
+ +               */
+ +              if (boot_cpu_has(X86_FEATURE_AMD_IBPB))
+ +                      entry->ebx |= F(AMD_IBPB);
+ +              if (boot_cpu_has(X86_FEATURE_AMD_IBRS))
+ +                      entry->ebx |= F(AMD_IBRS);
+ +              if (boot_cpu_has(X86_FEATURE_VIRT_SSBD))
+ +                      entry->ebx |= F(VIRT_SSBD);
                 entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features;
                 cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX);
+ +              /*
+ +               * The preference is to use SPEC CTRL MSR instead of the
+ +               * VIRT_SPEC MSR.
+ +               */
+ +              if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD) &&
+ +                  !boot_cpu_has(X86_FEATURE_AMD_SSBD))
+ +                      entry->ebx |= F(VIRT_SSBD);
                 break;
         }
         case 0x80000019:
diff --combined arch/x86/kvm/hyperv.c

index 46ff64da44cab46d637facafffb10ba1a1269a1f,14e0d0ae4e0a8ea86969cd7f93ea029b89a0a070..af8caf965baa291319a7e5825b27ab28f97a24cf
--- 1/arch/x86/kvm/hyperv.c
--- 2/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@@ -1242,6 -1242,121 +1242,121 @@@ int kvm_hv_get_msr_common(struct kvm_vc
                 return kvm_hv_get_msr(vcpu, msr, pdata);
   }
   
+ static __always_inline int get_sparse_bank_no(u64 valid_bank_mask, int bank_no)
+ {
+       int i = 0, j;
+ 
+       if (!(valid_bank_mask & BIT_ULL(bank_no)))
+               return -1;
+ 
+       for (j = 0; j < bank_no; j++)
+               if (valid_bank_mask & BIT_ULL(j))
+                       i++;
+ 
+       return i;
+ }
+ 
+ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa,
+                           u16 rep_cnt, bool ex)
+ {
+       struct kvm *kvm = current_vcpu->kvm;
+       struct kvm_vcpu_hv *hv_current = &current_vcpu->arch.hyperv;
+       struct hv_tlb_flush_ex flush_ex;
+       struct hv_tlb_flush flush;
+       struct kvm_vcpu *vcpu;
+       unsigned long vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)] = {0};
+       unsigned long valid_bank_mask = 0;
+       u64 sparse_banks[64];
+       int sparse_banks_len, i;
+       bool all_cpus;
+ 
+       if (!ex) {
+               if (unlikely(kvm_read_guest(kvm, ingpa, &flush, sizeof(flush))))
+                       return HV_STATUS_INVALID_HYPERCALL_INPUT;
+ 
+               trace_kvm_hv_flush_tlb(flush.processor_mask,
+                                      flush.address_space, flush.flags);
+ 
+               sparse_banks[0] = flush.processor_mask;
+               all_cpus = flush.flags & HV_FLUSH_ALL_PROCESSORS;
+       } else {
+               if (unlikely(kvm_read_guest(kvm, ingpa, &flush_ex,
+                                           sizeof(flush_ex))))
+                       return HV_STATUS_INVALID_HYPERCALL_INPUT;
+ 
+               trace_kvm_hv_flush_tlb_ex(flush_ex.hv_vp_set.valid_bank_mask,
+                                         flush_ex.hv_vp_set.format,
+                                         flush_ex.address_space,
+                                         flush_ex.flags);
+ 
+               valid_bank_mask = flush_ex.hv_vp_set.valid_bank_mask;
+               all_cpus = flush_ex.hv_vp_set.format !=
+                       HV_GENERIC_SET_SPARSE_4K;
+ 
+               sparse_banks_len = bitmap_weight(&valid_bank_mask, 64) *
+                       sizeof(sparse_banks[0]);
+ 
+               if (!sparse_banks_len && !all_cpus)
+                       goto ret_success;
+ 
+               if (!all_cpus &&
+                   kvm_read_guest(kvm,
+                                  ingpa + offsetof(struct hv_tlb_flush_ex,
+                                                   hv_vp_set.bank_contents),
+                                  sparse_banks,
+                                  sparse_banks_len))
+                       return HV_STATUS_INVALID_HYPERCALL_INPUT;
+       }
+ 
+       cpumask_clear(&hv_current->tlb_lush);
+ 
+       kvm_for_each_vcpu(i, vcpu, kvm) {
+               struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
+               int bank = hv->vp_index / 64, sbank = 0;
+ 
+               if (!all_cpus) {
+                       /* Banks >64 can't be represented */
+                       if (bank >= 64)
+                               continue;
+ 
+                       /* Non-ex hypercalls can only address first 64 vCPUs */
+                       if (!ex && bank)
+                               continue;
+ 
+                       if (ex) {
+                               /*
+                                * Check is the bank of this vCPU is in sparse
+                                * set and get the sparse bank number.
+                                */
+                               sbank = get_sparse_bank_no(valid_bank_mask,
+                                                          bank);
+ 
+                               if (sbank < 0)
+                                       continue;
+                       }
+ 
+                       if (!(sparse_banks[sbank] & BIT_ULL(hv->vp_index % 64)))
+                               continue;
+               }
+ 
+               /*
+                * vcpu->arch.cr3 may not be up-to-date for running vCPUs so we
+                * can't analyze it here, flush TLB regardless of the specified
+                * address space.
+                */
+               __set_bit(i, vcpu_bitmap);
+       }
+ 
+       kvm_make_vcpus_request_mask(kvm,
+                                   KVM_REQ_TLB_FLUSH | KVM_REQUEST_NO_WAKEUP,
+                                   vcpu_bitmap, &hv_current->tlb_lush);
+ 
+ ret_success:
+       /* We always do full TLB flush, set rep_done = rep_cnt. */
+       return (u64)HV_STATUS_SUCCESS |
+               ((u64)rep_cnt << HV_HYPERCALL_REP_COMP_OFFSET);
+ }
+ 
   bool kvm_hv_hypercall_enabled(struct kvm *kvm)
   {
         return READ_ONCE(kvm->arch.hyperv.hv_hypercall) & HV_X64_MSR_HYPERCALL_ENABLE;
@@@ -1260,18 -1375,14 +1375,18 @@@ static void kvm_hv_hypercall_set_result
         }
   }
   
- -static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu)
+ +static int kvm_hv_hypercall_complete(struct kvm_vcpu *vcpu, u64 result)
   {
- -      struct kvm_run *run = vcpu->run;
- -
- -      kvm_hv_hypercall_set_result(vcpu, run->hyperv.u.hcall.result);
+ +      kvm_hv_hypercall_set_result(vcpu, result);
+ +      ++vcpu->stat.hypercalls;
         return kvm_skip_emulated_instruction(vcpu);
   }
   
+ +static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu)
+ +{
+ +      return kvm_hv_hypercall_complete(vcpu, vcpu->run->hyperv.u.hcall.result);
+ +}
+ +
   static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, bool fast, u64 param)
   {
         struct eventfd_ctx *eventfd;
@@@ -1315,7 -1426,7 +1430,7 @@@ int kvm_hv_hypercall(struct kvm_vcpu *v
   {
         u64 param, ingpa, outgpa, ret = HV_STATUS_SUCCESS;
         uint16_t code, rep_idx, rep_cnt;
-       bool fast, longmode;
+       bool fast, longmode, rep;
   
         /*
          * hypercall generates UD from non zero cpl and real mode
@@@ -1345,31 -1456,34 +1460,34 @@@
   #endif
   
         code = param & 0xffff;
-       fast = (param >> 16) & 0x1;
-       rep_cnt = (param >> 32) & 0xfff;
-       rep_idx = (param >> 48) & 0xfff;
+       fast = !!(param & HV_HYPERCALL_FAST_BIT);
+       rep_cnt = (param >> HV_HYPERCALL_REP_COMP_OFFSET) & 0xfff;
+       rep_idx = (param >> HV_HYPERCALL_REP_START_OFFSET) & 0xfff;
+       rep = !!(rep_cnt || rep_idx);
   
         trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa);
   
-       /* Hypercall continuation is not supported yet */
-       if (rep_cnt || rep_idx) {
-               ret = HV_STATUS_INVALID_HYPERCALL_CODE;
-               goto out;
-       }
- 
         switch (code) {
         case HVCALL_NOTIFY_LONG_SPIN_WAIT:
+               if (unlikely(rep)) {
+                       ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
+                       break;
+               }
                 kvm_vcpu_on_spin(vcpu, true);
                 break;
         case HVCALL_SIGNAL_EVENT:
+               if (unlikely(rep)) {
+                       ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
+                       break;
+               }
                 ret = kvm_hvcall_signal_event(vcpu, fast, ingpa);
                 if (ret != HV_STATUS_INVALID_PORT_ID)
                         break;
                 /* maybe userspace knows this conn_id: fall through */
         case HVCALL_POST_MESSAGE:
                 /* don't bother userspace if it has no way to handle it */
-               if (!vcpu_to_synic(vcpu)->active) {
-                       ret = HV_STATUS_INVALID_HYPERCALL_CODE;
+               if (unlikely(rep || !vcpu_to_synic(vcpu)->active)) {
+                       ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
                         break;
                 }
                 vcpu->run->exit_reason = KVM_EXIT_HYPERV;
@@@ -1380,13 -1494,41 +1498,40 @@@
                 vcpu->arch.complete_userspace_io =
                                 kvm_hv_hypercall_complete_userspace;
                 return 0;
+       case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST:
+               if (unlikely(fast || !rep_cnt || rep_idx)) {
+                       ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
+                       break;
+               }
+               ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, false);
+               break;
+       case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE:
+               if (unlikely(fast || rep)) {
+                       ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
+                       break;
+               }
+               ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, false);
+               break;
+       case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX:
+               if (unlikely(fast || !rep_cnt || rep_idx)) {
+                       ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
+                       break;
+               }
+               ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, true);
+               break;
+       case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX:
+               if (unlikely(fast || rep)) {
+                       ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
+                       break;
+               }
+               ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, true);
+               break;
         default:
                 ret = HV_STATUS_INVALID_HYPERCALL_CODE;
                 break;
         }
   
- out:
- -      kvm_hv_hypercall_set_result(vcpu, ret);
- -      return 1;
+ +      return kvm_hv_hypercall_complete(vcpu, ret);
   }
   
   void kvm_hv_init_vm(struct kvm *kvm)
diff --combined arch/x86/kvm/lapic.c

index 3773c462511404bcc94ad69742b16fdfdc26a504,776391cf69a5196ae0fb33a6abbb496fc167f2fa..b5cd8465d44f6cb99a9ae705cf2f44f3c310a1ac
--- 1/arch/x86/kvm/lapic.c
--- 2/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@@ -1522,23 -1522,11 +1522,23 @@@ static bool set_target_expiration(struc
   
   static void advance_periodic_target_expiration(struct kvm_lapic *apic)
   {
- -      apic->lapic_timer.tscdeadline +=
- -              nsec_to_cycles(apic->vcpu, apic->lapic_timer.period);
+ +      ktime_t now = ktime_get();
+ +      u64 tscl = rdtsc();
+ +      ktime_t delta;
+ +
+ +      /*
+ +       * Synchronize both deadlines to the same time source or
+ +       * differences in the periods (caused by differences in the
+ +       * underlying clocks or numerical approximation errors) will
+ +       * cause the two to drift apart over time as the errors
+ +       * accumulate.
+ +       */
         apic->lapic_timer.target_expiration =
                 ktime_add_ns(apic->lapic_timer.target_expiration,
                                 apic->lapic_timer.period);
+ +      delta = ktime_sub(apic->lapic_timer.target_expiration, now);
+ +      apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
+ +              nsec_to_cycles(apic->vcpu, delta);
   }
   
   static void start_sw_period(struct kvm_lapic *apic)
@@@ -2002,13 -1990,11 +2002,11 @@@ void kvm_lapic_set_base(struct kvm_vcp
                 }
         }
   
-       if ((old_value ^ value) & X2APIC_ENABLE) {
-               if (value & X2APIC_ENABLE) {
-                       kvm_apic_set_x2apic_id(apic, vcpu->vcpu_id);
-                       kvm_x86_ops->set_virtual_x2apic_mode(vcpu, true);
-               } else
-                       kvm_x86_ops->set_virtual_x2apic_mode(vcpu, false);
-       }
+       if (((old_value ^ value) & X2APIC_ENABLE) && (value & X2APIC_ENABLE))
+               kvm_apic_set_x2apic_id(apic, vcpu->vcpu_id);
+ 
+       if ((old_value ^ value) & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE))
+               kvm_x86_ops->set_virtual_apic_mode(vcpu);
   
         apic->base_address = apic->vcpu->arch.apic_base &
                              MSR_IA32_APICBASE_BASE;
diff --combined arch/x86/kvm/mmu.c

index d634f0332c0fad5aec8b7e285b97d7423e064dcc,f440d43c8d5ad864bc64795e6b766575815fa77a..d594690d8b9597a87f4cba26e8c1be5cb2de22de
--- 1/arch/x86/kvm/mmu.c
--- 2/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@@ -222,7 -222,6 +222,6 @@@ static const u64 shadow_acc_track_saved
   static const u64 shadow_acc_track_saved_bits_shift = PT64_SECOND_AVAIL_BITS_SHIFT;
   
   static void mmu_spte_set(u64 *sptep, u64 spte);
- static void mmu_free_roots(struct kvm_vcpu *vcpu);
   
   void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value)
   {
@@@ -3007,7 -3006,6 +3006,7 @@@ static void kvm_send_hwpoison_signal(un
   {
         siginfo_t info;
   
+ +      clear_siginfo(&info);
         info.si_signo   = SIGBUS;
         info.si_errno   = 0;
         info.si_code    = BUS_MCEERR_AR;
@@@ -3343,51 -3341,48 +3342,48 @@@ out_unlock
         return RET_PF_RETRY;
   }
   
- 
- static void mmu_free_roots(struct kvm_vcpu *vcpu)
+ static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa,
+                              struct list_head *invalid_list)
   {
-       int i;
         struct kvm_mmu_page *sp;
-       LIST_HEAD(invalid_list);
   
-       if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
+       if (!VALID_PAGE(*root_hpa))
                 return;
   
-       if (vcpu->arch.mmu.shadow_root_level >= PT64_ROOT_4LEVEL &&
-           (vcpu->arch.mmu.root_level >= PT64_ROOT_4LEVEL ||
-            vcpu->arch.mmu.direct_map)) {
-               hpa_t root = vcpu->arch.mmu.root_hpa;
+       sp = page_header(*root_hpa & PT64_BASE_ADDR_MASK);
+       --sp->root_count;
+       if (!sp->root_count && sp->role.invalid)
+               kvm_mmu_prepare_zap_page(kvm, sp, invalid_list);
   
-               spin_lock(&vcpu->kvm->mmu_lock);
-               sp = page_header(root);
-               --sp->root_count;
-               if (!sp->root_count && sp->role.invalid) {
-                       kvm_mmu_prepare_zap_page(vcpu->kvm, sp, &invalid_list);
-                       kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
-               }
-               spin_unlock(&vcpu->kvm->mmu_lock);
-               vcpu->arch.mmu.root_hpa = INVALID_PAGE;
+       *root_hpa = INVALID_PAGE;
+ }
+ 
+ void kvm_mmu_free_roots(struct kvm_vcpu *vcpu)
+ {
+       int i;
+       LIST_HEAD(invalid_list);
+       struct kvm_mmu *mmu = &vcpu->arch.mmu;
+ 
+       if (!VALID_PAGE(mmu->root_hpa))
                 return;
-       }
   
         spin_lock(&vcpu->kvm->mmu_lock);
-       for (i = 0; i < 4; ++i) {
-               hpa_t root = vcpu->arch.mmu.pae_root[i];
   
-               if (root) {
-                       root &= PT64_BASE_ADDR_MASK;
-                       sp = page_header(root);
-                       --sp->root_count;
-                       if (!sp->root_count && sp->role.invalid)
-                               kvm_mmu_prepare_zap_page(vcpu->kvm, sp,
-                                                        &invalid_list);
-               }
-               vcpu->arch.mmu.pae_root[i] = INVALID_PAGE;
+       if (mmu->shadow_root_level >= PT64_ROOT_4LEVEL &&
+           (mmu->root_level >= PT64_ROOT_4LEVEL || mmu->direct_map)) {
+               mmu_free_root_page(vcpu->kvm, &mmu->root_hpa, &invalid_list);
+       } else {
+               for (i = 0; i < 4; ++i)
+                       if (mmu->pae_root[i] != 0)
+                               mmu_free_root_page(vcpu->kvm, &mmu->pae_root[i],
+                                                  &invalid_list);
+               mmu->root_hpa = INVALID_PAGE;
         }
+ 
         kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
         spin_unlock(&vcpu->kvm->mmu_lock);
-       vcpu->arch.mmu.root_hpa = INVALID_PAGE;
   }
+ EXPORT_SYMBOL_GPL(kvm_mmu_free_roots);
   
   static int mmu_check_root(struct kvm_vcpu *vcpu, gfn_t root_gfn)
   {
@@@ -3720,7 -3715,6 +3716,6 @@@ static int handle_mmio_page_fault(struc
          */
         return RET_PF_RETRY;
   }
- EXPORT_SYMBOL_GPL(handle_mmio_page_fault);
   
   static bool page_fault_handle_page_track(struct kvm_vcpu *vcpu,
                                          u32 error_code, gfn_t gfn)
@@@ -3812,6 -3806,14 +3807,14 @@@ static bool try_async_pf(struct kvm_vcp
         struct kvm_memory_slot *slot;
         bool async;
   
+       /*
+        * Don't expose private memslots to L2.
+        */
+       if (is_guest_mode(vcpu) && !kvm_is_visible_gfn(vcpu->kvm, gfn)) {
+               *pfn = KVM_PFN_NOSLOT;
+               return false;
+       }
+ 
         slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
         async = false;
         *pfn = __gfn_to_pfn_memslot(slot, gfn, false, &async, write, writable);
@@@ -3951,7 -3953,7 +3954,7 @@@ static void nonpaging_init_context(stru
   
   void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu)
   {
-       mmu_free_roots(vcpu);
+       kvm_mmu_free_roots(vcpu);
   }
   
   static unsigned long get_cr3(struct kvm_vcpu *vcpu)
@@@ -4473,6 -4475,7 +4476,7 @@@ static void init_kvm_tdp_mmu(struct kvm
         struct kvm_mmu *context = &vcpu->arch.mmu;
   
         context->base_role.word = 0;
+       context->base_role.guest_mode = is_guest_mode(vcpu);
         context->base_role.smm = is_smm(vcpu);
         context->base_role.ad_disabled = (shadow_accessed_mask == 0);
         context->page_fault = tdp_page_fault;
@@@ -4539,6 -4542,7 +4543,7 @@@ void kvm_init_shadow_mmu(struct kvm_vcp
                 = smep && !is_write_protection(vcpu);
         context->base_role.smap_andnot_wp
                 = smap && !is_write_protection(vcpu);
+       context->base_role.guest_mode = is_guest_mode(vcpu);
         context->base_role.smm = is_smm(vcpu);
         reset_shadow_zero_bits_mask(vcpu, context);
   }
@@@ -4564,7 -4568,7 +4569,7 @@@ void kvm_init_shadow_ept_mmu(struct kvm
         context->root_hpa = INVALID_PAGE;
         context->direct_map = false;
         context->base_role.ad_disabled = !accessed_dirty;
- 
+       context->base_role.guest_mode = 1;
         update_permission_bitmask(vcpu, context, true);
         update_pkru_bitmask(vcpu, context, true);
         update_last_nonleaf_level(vcpu, context);
@@@ -4664,7 -4668,7 +4669,7 @@@ EXPORT_SYMBOL_GPL(kvm_mmu_load)
   
   void kvm_mmu_unload(struct kvm_vcpu *vcpu)
   {
-       mmu_free_roots(vcpu);
+       kvm_mmu_free_roots(vcpu);
         WARN_ON(VALID_PAGE(vcpu->arch.mmu.root_hpa));
   }
   EXPORT_SYMBOL_GPL(kvm_mmu_unload);
@@@ -4825,6 -4829,7 +4830,7 @@@ static void kvm_mmu_pte_write(struct kv
         mask.smep_andnot_wp = 1;
         mask.smap_andnot_wp = 1;
         mask.smm = 1;
+       mask.guest_mode = 1;
         mask.ad_disabled = 1;
   
         /*
diff --combined arch/x86/kvm/svm.c

index 950ec50f77c30b71545fd93a4154875b4d9f0e4e,d9305f1723f572740385a91b252232e00c328d0a..695b0bd02220378493dd3ea3f9edf6959343121f
--- 1/arch/x86/kvm/svm.c
--- 2/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@@ -49,7 -49,7 +49,7 @@@
   #include <asm/debugreg.h>
   #include <asm/kvm_para.h>
   #include <asm/irq_remapping.h>
- -#include <asm/nospec-branch.h>
+ +#include <asm/spec-ctrl.h>
   
   #include <asm/virtext.h>
   #include "trace.h"
@@@ -213,12 -213,6 +213,12 @@@ struct vcpu_svm 
         } host;
   
         u64 spec_ctrl;
+ +      /*
+ +       * Contains guest-controlled bits of VIRT_SPEC_CTRL, which will be
+ +       * translated into the appropriate L2_CFG bits on the host to
+ +       * perform speculative control.
+ +       */
+ +      u64 virt_spec_ctrl;
   
         u32 *msrpm;
   
@@@ -1768,7 -1762,10 +1768,10 @@@ static struct page **sev_pin_memory(str
         unsigned long npages, npinned, size;
         unsigned long locked, lock_limit;
         struct page **pages;
-       int first, last;
+       unsigned long first, last;
+ 
+       if (ulen == 0 || uaddr + ulen < uaddr)
+               return NULL;
   
         /* Calculate number of pages. */
         first = (uaddr & PAGE_MASK) >> PAGE_SHIFT;
@@@ -1855,13 -1852,13 +1858,13 @@@ static void __unregister_enc_region_loc
   
   static struct kvm *svm_vm_alloc(void)
   {
-       struct kvm_svm *kvm_svm = kzalloc(sizeof(struct kvm_svm), GFP_KERNEL);
+       struct kvm_svm *kvm_svm = vzalloc(sizeof(struct kvm_svm));
         return &kvm_svm->kvm;
   }
   
   static void svm_vm_free(struct kvm *kvm)
   {
-       kfree(to_kvm_svm(kvm));
+       vfree(to_kvm_svm(kvm));
   }
   
   static void sev_vm_destroy(struct kvm *kvm)
@@@ -2066,7 -2063,6 +2069,7 @@@ static void svm_vcpu_reset(struct kvm_v
   
         vcpu->arch.microcode_version = 0x01000065;
         svm->spec_ctrl = 0;
+ +      svm->virt_spec_ctrl = 0;
   
         if (!init_event) {
                 svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE |
@@@ -4115,19 -4111,11 +4118,19 @@@ static int svm_get_msr(struct kvm_vcpu 
                 break;
         case MSR_IA32_SPEC_CTRL:
                 if (!msr_info->host_initiated &&
- -                  !guest_cpuid_has(vcpu, X86_FEATURE_IBRS))
+ +                  !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) &&
+ +                  !guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD))
                         return 1;
   
                 msr_info->data = svm->spec_ctrl;
                 break;
+ +      case MSR_AMD64_VIRT_SPEC_CTRL:
+ +              if (!msr_info->host_initiated &&
+ +                  !guest_cpuid_has(vcpu, X86_FEATURE_VIRT_SSBD))
+ +                      return 1;
+ +
+ +              msr_info->data = svm->virt_spec_ctrl;
+ +              break;
         case MSR_F15H_IC_CFG: {
   
                 int family, model;
@@@ -4218,12 -4206,11 +4221,12 @@@ static int svm_set_msr(struct kvm_vcpu 
                 break;
         case MSR_IA32_SPEC_CTRL:
                 if (!msr->host_initiated &&
- -                  !guest_cpuid_has(vcpu, X86_FEATURE_IBRS))
+ +                  !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) &&
+ +                  !guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD))
                         return 1;
   
                 /* The STIBP bit doesn't fault even if it's not advertised */
- -              if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP))
+ +              if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD))
                         return 1;
   
                 svm->spec_ctrl = data;
@@@ -4246,7 -4233,7 +4249,7 @@@
                 break;
         case MSR_IA32_PRED_CMD:
                 if (!msr->host_initiated &&
- -                  !guest_cpuid_has(vcpu, X86_FEATURE_IBPB))
+ +                  !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBPB))
                         return 1;
   
                 if (data & ~PRED_CMD_IBPB)
@@@ -4260,16 -4247,6 +4263,16 @@@
                         break;
                 set_msr_interception(svm->msrpm, MSR_IA32_PRED_CMD, 0, 1);
                 break;
+ +      case MSR_AMD64_VIRT_SPEC_CTRL:
+ +              if (!msr->host_initiated &&
+ +                  !guest_cpuid_has(vcpu, X86_FEATURE_VIRT_SSBD))
+ +                      return 1;
+ +
+ +              if (data & ~SPEC_CTRL_SSBD)
+ +                      return 1;
+ +
+ +              svm->virt_spec_ctrl = data;
+ +              break;
         case MSR_STAR:
                 svm->vmcb->save.star = data;
                 break;
@@@ -5062,7 -5039,7 +5065,7 @@@ static void update_cr8_intercept(struc
                 set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
   }
   
- static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
+ static void svm_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
   {
         return;
   }
@@@ -5583,7 -5560,8 +5586,7 @@@ static void svm_vcpu_run(struct kvm_vcp
          * is no need to worry about the conditional branch over the wrmsr
          * being speculatively taken.
          */
- -      if (svm->spec_ctrl)
- -              native_wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
+ +      x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl);
   
         asm volatile (
                 "push %%" _ASM_BP "; \n\t"
@@@ -5677,18 -5655,6 +5680,18 @@@
   #endif
                 );
   
+ +      /* Eliminate branch target predictions from guest mode */
+ +      vmexit_fill_RSB();
+ +
+ +#ifdef CONFIG_X86_64
+ +      wrmsrl(MSR_GS_BASE, svm->host.gs_base);
+ +#else
+ +      loadsegment(fs, svm->host.fs);
+ +#ifndef CONFIG_X86_32_LAZY_GS
+ +      loadsegment(gs, svm->host.gs);
+ +#endif
+ +#endif
+ +
         /*
          * We do not use IBRS in the kernel. If this vCPU has used the
          * SPEC_CTRL MSR it may have left it on; save the value and
@@@ -5707,7 -5673,20 +5710,7 @@@
         if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
                 svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
   
- -      if (svm->spec_ctrl)
- -              native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
- -
- -      /* Eliminate branch target predictions from guest mode */
- -      vmexit_fill_RSB();
- -
- -#ifdef CONFIG_X86_64
- -      wrmsrl(MSR_GS_BASE, svm->host.gs_base);
- -#else
- -      loadsegment(fs, svm->host.fs);
- -#ifndef CONFIG_X86_32_LAZY_GS
- -      loadsegment(gs, svm->host.gs);
- -#endif
- -#endif
+ +      x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl);
   
         reload_tss(vcpu);
   
@@@ -5810,7 -5789,7 +5813,7 @@@ static bool svm_cpu_has_accelerated_tpr
         return false;
   }
   
- -static bool svm_has_high_real_mode_segbase(void)
+ +static bool svm_has_emulated_msr(int index)
   {
         return true;
   }
@@@ -6949,6 -6928,9 +6952,9 @@@ static int svm_register_enc_region(stru
         if (!sev_guest(kvm))
                 return -ENOTTY;
   
+       if (range->addr > ULONG_MAX || range->size > ULONG_MAX)
+               return -EINVAL;
+ 
         region = kzalloc(sizeof(*region), GFP_KERNEL);
         if (!region)
                 return -ENOMEM;
@@@ -7036,7 -7018,7 +7042,7 @@@ static struct kvm_x86_ops svm_x86_ops _
         .hardware_enable = svm_hardware_enable,
         .hardware_disable = svm_hardware_disable,
         .cpu_has_accelerated_tpr = svm_cpu_has_accelerated_tpr,
- -      .cpu_has_high_real_mode_segbase = svm_has_high_real_mode_segbase,
+ +      .has_emulated_msr = svm_has_emulated_msr,
   
         .vcpu_create = svm_create_vcpu,
         .vcpu_free = svm_free_vcpu,
@@@ -7100,7 -7082,7 +7106,7 @@@
         .enable_nmi_window = enable_nmi_window,
         .enable_irq_window = enable_irq_window,
         .update_cr8_intercept = update_cr8_intercept,
-       .set_virtual_x2apic_mode = svm_set_virtual_x2apic_mode,
+       .set_virtual_apic_mode = svm_set_virtual_apic_mode,
         .get_enable_apicv = svm_get_enable_apicv,
         .refresh_apicv_exec_ctrl = svm_refresh_apicv_exec_ctrl,
         .load_eoi_exitmap = svm_load_eoi_exitmap,
diff --combined arch/x86/kvm/vmx.c

index 40aa29204baf80aee54056dffb69519cc6cb5f89,48989f78be60e6f6f97a3dd2ce2da1441fc3ab47..fc61e25966e470d82a52f915c8be9aca379be614
--- 1/arch/x86/kvm/vmx.c
--- 2/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@@ -51,7 -51,7 +51,7 @@@
   #include <asm/apic.h>
   #include <asm/irq_remapping.h>
   #include <asm/mmu_context.h>
- -#include <asm/nospec-branch.h>
+ +#include <asm/spec-ctrl.h>
   #include <asm/mshyperv.h>
   
   #include "trace.h"
@@@ -242,7 -242,11 +242,11 @@@ struct shared_msr_entry 
    * underlying hardware which will be used to run L2.
    * This structure is packed to ensure that its layout is identical across
    * machines (necessary for live migration).
-  * If there are changes in this struct, VMCS12_REVISION must be changed.
+  *
+  * IMPORTANT: Changing the layout of existing fields in this structure
+  * will break save/restore compatibility with older kvm releases. When
+  * adding new fields, either use space in the reserved padding* arrays
+  * or add the new fields to the end of the structure.
    */
   typedef u64 natural_width;
   struct __packed vmcs12 {
@@@ -265,17 -269,14 +269,14 @@@
         u64 virtual_apic_page_addr;
         u64 apic_access_addr;
         u64 posted_intr_desc_addr;
-       u64 vm_function_control;
         u64 ept_pointer;
         u64 eoi_exit_bitmap0;
         u64 eoi_exit_bitmap1;
         u64 eoi_exit_bitmap2;
         u64 eoi_exit_bitmap3;
-       u64 eptp_list_address;
         u64 xss_exit_bitmap;
         u64 guest_physical_address;
         u64 vmcs_link_pointer;
-       u64 pml_address;
         u64 guest_ia32_debugctl;
         u64 guest_ia32_pat;
         u64 guest_ia32_efer;
@@@ -288,7 -289,12 +289,12 @@@
         u64 host_ia32_pat;
         u64 host_ia32_efer;
         u64 host_ia32_perf_global_ctrl;
-       u64 padding64[8]; /* room for future expansion */
+       u64 vmread_bitmap;
+       u64 vmwrite_bitmap;
+       u64 vm_function_control;
+       u64 eptp_list_address;
+       u64 pml_address;
+       u64 padding64[3]; /* room for future expansion */
         /*
          * To allow migration of L1 (complete with its L2 guests) between
          * machines of different natural widths (32 or 64 bit), we cannot have
@@@ -397,7 -403,6 +403,6 @@@
         u16 guest_ldtr_selector;
         u16 guest_tr_selector;
         u16 guest_intr_status;
-       u16 guest_pml_index;
         u16 host_es_selector;
         u16 host_cs_selector;
         u16 host_ss_selector;
@@@ -405,12 -410,172 +410,172 @@@
         u16 host_fs_selector;
         u16 host_gs_selector;
         u16 host_tr_selector;
+       u16 guest_pml_index;
   };
   
+ /*
+  * For save/restore compatibility, the vmcs12 field offsets must not change.
+  */
+ #define CHECK_OFFSET(field, loc)                              \
+       BUILD_BUG_ON_MSG(offsetof(struct vmcs12, field) != (loc),       \
+               "Offset of " #field " in struct vmcs12 has changed.")
+ 
+ static inline void vmx_check_vmcs12_offsets(void) {
+       CHECK_OFFSET(revision_id, 0);
+       CHECK_OFFSET(abort, 4);
+       CHECK_OFFSET(launch_state, 8);
+       CHECK_OFFSET(io_bitmap_a, 40);
+       CHECK_OFFSET(io_bitmap_b, 48);
+       CHECK_OFFSET(msr_bitmap, 56);
+       CHECK_OFFSET(vm_exit_msr_store_addr, 64);
+       CHECK_OFFSET(vm_exit_msr_load_addr, 72);
+       CHECK_OFFSET(vm_entry_msr_load_addr, 80);
+       CHECK_OFFSET(tsc_offset, 88);
+       CHECK_OFFSET(virtual_apic_page_addr, 96);
+       CHECK_OFFSET(apic_access_addr, 104);
+       CHECK_OFFSET(posted_intr_desc_addr, 112);
+       CHECK_OFFSET(ept_pointer, 120);
+       CHECK_OFFSET(eoi_exit_bitmap0, 128);
+       CHECK_OFFSET(eoi_exit_bitmap1, 136);
+       CHECK_OFFSET(eoi_exit_bitmap2, 144);
+       CHECK_OFFSET(eoi_exit_bitmap3, 152);
+       CHECK_OFFSET(xss_exit_bitmap, 160);
+       CHECK_OFFSET(guest_physical_address, 168);
+       CHECK_OFFSET(vmcs_link_pointer, 176);
+       CHECK_OFFSET(guest_ia32_debugctl, 184);
+       CHECK_OFFSET(guest_ia32_pat, 192);
+       CHECK_OFFSET(guest_ia32_efer, 200);
+       CHECK_OFFSET(guest_ia32_perf_global_ctrl, 208);
+       CHECK_OFFSET(guest_pdptr0, 216);
+       CHECK_OFFSET(guest_pdptr1, 224);
+       CHECK_OFFSET(guest_pdptr2, 232);
+       CHECK_OFFSET(guest_pdptr3, 240);
+       CHECK_OFFSET(guest_bndcfgs, 248);
+       CHECK_OFFSET(host_ia32_pat, 256);
+       CHECK_OFFSET(host_ia32_efer, 264);
+       CHECK_OFFSET(host_ia32_perf_global_ctrl, 272);
+       CHECK_OFFSET(vmread_bitmap, 280);
+       CHECK_OFFSET(vmwrite_bitmap, 288);
+       CHECK_OFFSET(vm_function_control, 296);
+       CHECK_OFFSET(eptp_list_address, 304);
+       CHECK_OFFSET(pml_address, 312);
+       CHECK_OFFSET(cr0_guest_host_mask, 344);
+       CHECK_OFFSET(cr4_guest_host_mask, 352);
+       CHECK_OFFSET(cr0_read_shadow, 360);
+       CHECK_OFFSET(cr4_read_shadow, 368);
+       CHECK_OFFSET(cr3_target_value0, 376);
+       CHECK_OFFSET(cr3_target_value1, 384);
+       CHECK_OFFSET(cr3_target_value2, 392);
+       CHECK_OFFSET(cr3_target_value3, 400);
+       CHECK_OFFSET(exit_qualification, 408);
+       CHECK_OFFSET(guest_linear_address, 416);
+       CHECK_OFFSET(guest_cr0, 424);
+       CHECK_OFFSET(guest_cr3, 432);
+       CHECK_OFFSET(guest_cr4, 440);
+       CHECK_OFFSET(guest_es_base, 448);
+       CHECK_OFFSET(guest_cs_base, 456);
+       CHECK_OFFSET(guest_ss_base, 464);
+       CHECK_OFFSET(guest_ds_base, 472);
+       CHECK_OFFSET(guest_fs_base, 480);
+       CHECK_OFFSET(guest_gs_base, 488);
+       CHECK_OFFSET(guest_ldtr_base, 496);
+       CHECK_OFFSET(guest_tr_base, 504);
+       CHECK_OFFSET(guest_gdtr_base, 512);
+       CHECK_OFFSET(guest_idtr_base, 520);
+       CHECK_OFFSET(guest_dr7, 528);
+       CHECK_OFFSET(guest_rsp, 536);
+       CHECK_OFFSET(guest_rip, 544);
+       CHECK_OFFSET(guest_rflags, 552);
+       CHECK_OFFSET(guest_pending_dbg_exceptions, 560);
+       CHECK_OFFSET(guest_sysenter_esp, 568);
+       CHECK_OFFSET(guest_sysenter_eip, 576);
+       CHECK_OFFSET(host_cr0, 584);
+       CHECK_OFFSET(host_cr3, 592);
+       CHECK_OFFSET(host_cr4, 600);
+       CHECK_OFFSET(host_fs_base, 608);
+       CHECK_OFFSET(host_gs_base, 616);
+       CHECK_OFFSET(host_tr_base, 624);
+       CHECK_OFFSET(host_gdtr_base, 632);
+       CHECK_OFFSET(host_idtr_base, 640);
+       CHECK_OFFSET(host_ia32_sysenter_esp, 648);
+       CHECK_OFFSET(host_ia32_sysenter_eip, 656);
+       CHECK_OFFSET(host_rsp, 664);
+       CHECK_OFFSET(host_rip, 672);
+       CHECK_OFFSET(pin_based_vm_exec_control, 744);
+       CHECK_OFFSET(cpu_based_vm_exec_control, 748);
+       CHECK_OFFSET(exception_bitmap, 752);
+       CHECK_OFFSET(page_fault_error_code_mask, 756);
+       CHECK_OFFSET(page_fault_error_code_match, 760);
+       CHECK_OFFSET(cr3_target_count, 764);
+       CHECK_OFFSET(vm_exit_controls, 768);
+       CHECK_OFFSET(vm_exit_msr_store_count, 772);
+       CHECK_OFFSET(vm_exit_msr_load_count, 776);
+       CHECK_OFFSET(vm_entry_controls, 780);
+       CHECK_OFFSET(vm_entry_msr_load_count, 784);
+       CHECK_OFFSET(vm_entry_intr_info_field, 788);
+       CHECK_OFFSET(vm_entry_exception_error_code, 792);
+       CHECK_OFFSET(vm_entry_instruction_len, 796);
+       CHECK_OFFSET(tpr_threshold, 800);
+       CHECK_OFFSET(secondary_vm_exec_control, 804);
+       CHECK_OFFSET(vm_instruction_error, 808);
+       CHECK_OFFSET(vm_exit_reason, 812);
+       CHECK_OFFSET(vm_exit_intr_info, 816);
+       CHECK_OFFSET(vm_exit_intr_error_code, 820);
+       CHECK_OFFSET(idt_vectoring_info_field, 824);
+       CHECK_OFFSET(idt_vectoring_error_code, 828);
+       CHECK_OFFSET(vm_exit_instruction_len, 832);
+       CHECK_OFFSET(vmx_instruction_info, 836);
+       CHECK_OFFSET(guest_es_limit, 840);
+       CHECK_OFFSET(guest_cs_limit, 844);
+       CHECK_OFFSET(guest_ss_limit, 848);
+       CHECK_OFFSET(guest_ds_limit, 852);
+       CHECK_OFFSET(guest_fs_limit, 856);
+       CHECK_OFFSET(guest_gs_limit, 860);
+       CHECK_OFFSET(guest_ldtr_limit, 864);
+       CHECK_OFFSET(guest_tr_limit, 868);
+       CHECK_OFFSET(guest_gdtr_limit, 872);
+       CHECK_OFFSET(guest_idtr_limit, 876);
+       CHECK_OFFSET(guest_es_ar_bytes, 880);
+       CHECK_OFFSET(guest_cs_ar_bytes, 884);
+       CHECK_OFFSET(guest_ss_ar_bytes, 888);
+       CHECK_OFFSET(guest_ds_ar_bytes, 892);
+       CHECK_OFFSET(guest_fs_ar_bytes, 896);
+       CHECK_OFFSET(guest_gs_ar_bytes, 900);
+       CHECK_OFFSET(guest_ldtr_ar_bytes, 904);
+       CHECK_OFFSET(guest_tr_ar_bytes, 908);
+       CHECK_OFFSET(guest_interruptibility_info, 912);
+       CHECK_OFFSET(guest_activity_state, 916);
+       CHECK_OFFSET(guest_sysenter_cs, 920);
+       CHECK_OFFSET(host_ia32_sysenter_cs, 924);
+       CHECK_OFFSET(vmx_preemption_timer_value, 928);
+       CHECK_OFFSET(virtual_processor_id, 960);
+       CHECK_OFFSET(posted_intr_nv, 962);
+       CHECK_OFFSET(guest_es_selector, 964);
+       CHECK_OFFSET(guest_cs_selector, 966);
+       CHECK_OFFSET(guest_ss_selector, 968);
+       CHECK_OFFSET(guest_ds_selector, 970);
+       CHECK_OFFSET(guest_fs_selector, 972);
+       CHECK_OFFSET(guest_gs_selector, 974);
+       CHECK_OFFSET(guest_ldtr_selector, 976);
+       CHECK_OFFSET(guest_tr_selector, 978);
+       CHECK_OFFSET(guest_intr_status, 980);
+       CHECK_OFFSET(host_es_selector, 982);
+       CHECK_OFFSET(host_cs_selector, 984);
+       CHECK_OFFSET(host_ss_selector, 986);
+       CHECK_OFFSET(host_ds_selector, 988);
+       CHECK_OFFSET(host_fs_selector, 990);
+       CHECK_OFFSET(host_gs_selector, 992);
+       CHECK_OFFSET(host_tr_selector, 994);
+       CHECK_OFFSET(guest_pml_index, 996);
+ }
+ 
   /*
    * VMCS12_REVISION is an arbitrary id that should be changed if the content or
    * layout of struct vmcs12 is changed. MSR_IA32_VMX_BASIC returns this id, and
    * VMPTRLD verifies that the VMCS region that L1 is loading contains this id.
+  *
+  * IMPORTANT: Changing this value will break save/restore compatibility with
+  * older kvm releases.
    */
   #define VMCS12_REVISION 0x11e57ed0
   
@@@ -481,7 -646,8 +646,8 @@@ struct nested_vmx 
         bool sync_shadow_vmcs;
         bool dirty_vmcs12;
   
-       bool change_vmcs01_virtual_x2apic_mode;
+       bool change_vmcs01_virtual_apic_mode;
+ 
         /* L2 must run next, and mustn't decide to exit to L1. */
         bool nested_run_pending;
   
@@@ -761,6 -927,7 +927,7 @@@ static const unsigned short vmcs_field_
         FIELD64(VM_EXIT_MSR_STORE_ADDR, vm_exit_msr_store_addr),
         FIELD64(VM_EXIT_MSR_LOAD_ADDR, vm_exit_msr_load_addr),
         FIELD64(VM_ENTRY_MSR_LOAD_ADDR, vm_entry_msr_load_addr),
+       FIELD64(PML_ADDRESS, pml_address),
         FIELD64(TSC_OFFSET, tsc_offset),
         FIELD64(VIRTUAL_APIC_PAGE_ADDR, virtual_apic_page_addr),
         FIELD64(APIC_ACCESS_ADDR, apic_access_addr),
@@@ -772,10 -939,11 +939,11 @@@
         FIELD64(EOI_EXIT_BITMAP2, eoi_exit_bitmap2),
         FIELD64(EOI_EXIT_BITMAP3, eoi_exit_bitmap3),
         FIELD64(EPTP_LIST_ADDRESS, eptp_list_address),
+       FIELD64(VMREAD_BITMAP, vmread_bitmap),
+       FIELD64(VMWRITE_BITMAP, vmwrite_bitmap),
         FIELD64(XSS_EXIT_BITMAP, xss_exit_bitmap),
         FIELD64(GUEST_PHYSICAL_ADDRESS, guest_physical_address),
         FIELD64(VMCS_LINK_POINTER, vmcs_link_pointer),
-       FIELD64(PML_ADDRESS, pml_address),
         FIELD64(GUEST_IA32_DEBUGCTL, guest_ia32_debugctl),
         FIELD64(GUEST_IA32_PAT, guest_ia32_pat),
         FIELD64(GUEST_IA32_EFER, guest_ia32_efer),
@@@ -1089,6 -1257,16 +1257,16 @@@ static inline u16 evmcs_read16(unsigne
         return *(u16 *)((char *)current_evmcs + offset);
   }
   
+ static inline void evmcs_touch_msr_bitmap(void)
+ {
+       if (unlikely(!current_evmcs))
+               return;
+ 
+       if (current_evmcs->hv_enlightenments_control.msr_bitmap)
+               current_evmcs->hv_clean_fields &=
+                       ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP;
+ }
+ 
   static void evmcs_load(u64 phys_addr)
   {
         struct hv_vp_assist_page *vp_ap =
@@@ -1173,6 -1351,7 +1351,7 @@@ static inline u32 evmcs_read32(unsigne
   static inline u16 evmcs_read16(unsigned long field) { return 0; }
   static inline void evmcs_load(u64 phys_addr) {}
   static inline void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf) {}
+ static inline void evmcs_touch_msr_bitmap(void) {}
   #endif /* IS_ENABLED(CONFIG_HYPERV) */
   
   static inline bool is_exception_n(u32 intr_info, u8 vector)
@@@ -1393,6 -1572,11 +1572,11 @@@ static inline bool cpu_has_vmx_invept_g
         return vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT;
   }
   
+ static inline bool cpu_has_vmx_invvpid_individual_addr(void)
+ {
+       return vmx_capability.vpid & VMX_VPID_EXTENT_INDIVIDUAL_ADDR_BIT;
+ }
+ 
   static inline bool cpu_has_vmx_invvpid_single(void)
   {
         return vmx_capability.vpid & VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT;
@@@ -1510,6 -1694,17 +1694,17 @@@ static inline unsigned nested_cpu_vmx_m
         return vmx_misc_cr3_count(to_vmx(vcpu)->nested.msrs.misc_low);
   }
   
+ /*
+  * Do the virtual VMX capability MSRs specify that L1 can use VMWRITE
+  * to modify any valid field of the VMCS, or are the VM-exit
+  * information fields read-only?
+  */
+ static inline bool nested_cpu_has_vmwrite_any_field(struct kvm_vcpu *vcpu)
+ {
+       return to_vmx(vcpu)->nested.msrs.misc_low &
+               MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS;
+ }
+ 
   static inline bool nested_cpu_has(struct vmcs12 *vmcs12, u32 bit)
   {
         return vmcs12->cpu_based_vm_exec_control & bit;
@@@ -3127,6 -3322,7 +3322,7 @@@ static void nested_vmx_setup_ctls_msrs(
                 msrs->misc_high);
         msrs->misc_low &= VMX_MISC_SAVE_EFER_LMA;
         msrs->misc_low |=
+               MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS |
                 VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE |
                 VMX_MISC_ACTIVITY_HLT;
         msrs->misc_high = 0;
@@@ -3300,6 -3496,15 +3496,15 @@@ static int vmx_restore_vmx_misc(struct 
   
         vmx->nested.msrs.misc_low = data;
         vmx->nested.msrs.misc_high = data >> 32;
+ 
+       /*
+        * If L1 has read-only VM-exit information fields, use the
+        * less permissive vmx_vmwrite_bitmap to specify write
+        * permissions for the shadow VMCS.
+        */
+       if (enable_shadow_vmcs && !nested_cpu_has_vmwrite_any_field(&vmx->vcpu))
+               vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap));
+ 
         return 0;
   }
   
@@@ -3354,6 -3559,13 +3559,13 @@@ static int vmx_set_vmx_msr(struct kvm_v
   {
         struct vcpu_vmx *vmx = to_vmx(vcpu);
   
+       /*
+        * Don't allow changes to the VMX capability MSRs while the vCPU
+        * is in VMX operation.
+        */
+       if (vmx->nested.vmxon)
+               return -EBUSY;
+ 
         switch (msr_index) {
         case MSR_IA32_VMX_BASIC:
                 return vmx_restore_vmx_basic(vmx, data);
@@@ -3529,6 -3741,7 +3741,6 @@@ static int vmx_get_msr(struct kvm_vcpu 
                 return kvm_get_msr_common(vcpu, msr_info);
         case MSR_IA32_SPEC_CTRL:
                 if (!msr_info->host_initiated &&
- -                  !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) &&
                     !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
                         return 1;
   
@@@ -3647,11 -3860,12 +3859,11 @@@ static int vmx_set_msr(struct kvm_vcpu 
                 break;
         case MSR_IA32_SPEC_CTRL:
                 if (!msr_info->host_initiated &&
- -                  !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) &&
                     !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
                         return 1;
   
                 /* The STIBP bit doesn't fault even if it's not advertised */
- -              if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP))
+ +              if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD))
                         return 1;
   
                 vmx->spec_ctrl = data;
@@@ -3677,6 -3891,7 +3889,6 @@@
                 break;
         case MSR_IA32_PRED_CMD:
                 if (!msr_info->host_initiated &&
- -                  !guest_cpuid_has(vcpu, X86_FEATURE_IBPB) &&
                     !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
                         return 1;
   
@@@ -4216,6 -4431,14 +4428,14 @@@ static int alloc_loaded_vmcs(struct loa
                 if (!loaded_vmcs->msr_bitmap)
                         goto out_vmcs;
                 memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE);
+ 
+               if (static_branch_unlikely(&enable_evmcs) &&
+                   (ms_hyperv.nested_features & HV_X64_NESTED_MSR_BITMAP)) {
+                       struct hv_enlightened_vmcs *evmcs =
+                               (struct hv_enlightened_vmcs *)loaded_vmcs->vmcs;
+ 
+                       evmcs->hv_enlightenments_control.msr_bitmap = 1;
+               }
         }
         return 0;
   
@@@ -5329,6 -5552,9 +5549,9 @@@ static void __always_inline vmx_disable
         if (!cpu_has_vmx_msr_bitmap())
                 return;
   
+       if (static_branch_unlikely(&enable_evmcs))
+               evmcs_touch_msr_bitmap();
+ 
         /*
          * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
          * have the write-low and read-high bitmap offsets the wrong way round.
@@@ -5364,6 -5590,9 +5587,9 @@@ static void __always_inline vmx_enable_
         if (!cpu_has_vmx_msr_bitmap())
                 return;
   
+       if (static_branch_unlikely(&enable_evmcs))
+               evmcs_touch_msr_bitmap();
+ 
         /*
          * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
          * have the write-low and read-high bitmap offsets the wrong way round.
@@@ -5946,8 -6175,14 +6172,14 @@@ static void vmx_vcpu_setup(struct vcpu_
         int i;
   
         if (enable_shadow_vmcs) {
+               /*
+                * At vCPU creation, "VMWRITE to any supported field
+                * in the VMCS" is supported, so use the more
+                * permissive vmx_vmread_bitmap to specify both read
+                * and write permissions for the shadow VMCS.
+                */
                 vmcs_write64(VMREAD_BITMAP, __pa(vmx_vmread_bitmap));
-               vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap));
+               vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmread_bitmap));
         }
         if (cpu_has_vmx_msr_bitmap())
                 vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap));
@@@ -7588,8 -7823,7 +7820,7 @@@ static int nested_vmx_get_vmptr(struct 
                         vmcs_read32(VMX_INSTRUCTION_INFO), false, &gva))
                 return 1;
   
-       if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, vmpointer,
-                               sizeof(*vmpointer), &e)) {
+       if (kvm_read_guest_virt(vcpu, gva, vmpointer, sizeof(*vmpointer), &e)) {
                 kvm_inject_page_fault(vcpu, &e);
                 return 1;
         }
@@@ -7670,6 -7904,12 +7901,12 @@@ static int handle_vmon(struct kvm_vcpu 
                 return 1;
         }
   
+       /* CPL=0 must be checked manually. */
+       if (vmx_get_cpl(vcpu)) {
+               kvm_queue_exception(vcpu, UD_VECTOR);
+               return 1;
+       }
+ 
         if (vmx->nested.vmxon) {
                 nested_vmx_failValid(vcpu, VMXERR_VMXON_IN_VMX_ROOT_OPERATION);
                 return kvm_skip_emulated_instruction(vcpu);
@@@ -7729,6 -7969,11 +7966,11 @@@
    */
   static int nested_vmx_check_permission(struct kvm_vcpu *vcpu)
   {
+       if (vmx_get_cpl(vcpu)) {
+               kvm_queue_exception(vcpu, UD_VECTOR);
+               return 0;
+       }
+ 
         if (!to_vmx(vcpu)->nested.vmxon) {
                 kvm_queue_exception(vcpu, UD_VECTOR);
                 return 0;
@@@ -7928,23 -8173,42 +8170,42 @@@ static inline int vmcs12_write_any(stru
   
   }
   
+ /*
+  * Copy the writable VMCS shadow fields back to the VMCS12, in case
+  * they have been modified by the L1 guest. Note that the "read-only"
+  * VM-exit information fields are actually writable if the vCPU is
+  * configured to support "VMWRITE to any supported field in the VMCS."
+  */
   static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
   {
-       int i;
+       const u16 *fields[] = {
+               shadow_read_write_fields,
+               shadow_read_only_fields
+       };
+       const int max_fields[] = {
+               max_shadow_read_write_fields,
+               max_shadow_read_only_fields
+       };
+       int i, q;
         unsigned long field;
         u64 field_value;
         struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs;
-       const u16 *fields = shadow_read_write_fields;
-       const int num_fields = max_shadow_read_write_fields;
   
         preempt_disable();
   
         vmcs_load(shadow_vmcs);
   
-       for (i = 0; i < num_fields; i++) {
-               field = fields[i];
-               field_value = __vmcs_readl(field);
-               vmcs12_write_any(&vmx->vcpu, field, field_value);
+       for (q = 0; q < ARRAY_SIZE(fields); q++) {
+               for (i = 0; i < max_fields[q]; i++) {
+                       field = fields[q][i];
+                       field_value = __vmcs_readl(field);
+                       vmcs12_write_any(&vmx->vcpu, field, field_value);
+               }
+               /*
+                * Skip the VM-exit information fields if they are read-only.
+                */
+               if (!nested_cpu_has_vmwrite_any_field(&vmx->vcpu))
+                       break;
         }
   
         vmcs_clear(shadow_vmcs);
@@@ -8029,9 -8293,9 +8290,9 @@@ static int handle_vmread(struct kvm_vcp
                 if (get_vmx_mem_address(vcpu, exit_qualification,
                                 vmx_instruction_info, true, &gva))
                         return 1;
-               /* _system ok, as hardware has verified cpl=0 */
-               kvm_write_guest_virt_system(&vcpu->arch.emulate_ctxt, gva,
-                            &field_value, (is_long_mode(vcpu) ? 8 : 4), NULL);
+               /* _system ok, nested_vmx_check_permission has verified cpl=0 */
+               kvm_write_guest_virt_system(vcpu, gva, &field_value,
+                                           (is_long_mode(vcpu) ? 8 : 4), NULL);
         }
   
         nested_vmx_succeed(vcpu);
@@@ -8069,8 -8333,8 +8330,8 @@@ static int handle_vmwrite(struct kvm_vc
                 if (get_vmx_mem_address(vcpu, exit_qualification,
                                 vmx_instruction_info, false, &gva))
                         return 1;
-               if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva,
-                          &field_value, (is_64_bit_mode(vcpu) ? 8 : 4), &e)) {
+               if (kvm_read_guest_virt(vcpu, gva, &field_value,
+                                       (is_64_bit_mode(vcpu) ? 8 : 4), &e)) {
                         kvm_inject_page_fault(vcpu, &e);
                         return 1;
                 }
@@@ -8078,7 -8342,12 +8339,12 @@@
   
   
         field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
-       if (vmcs_field_readonly(field)) {
+       /*
+        * If the vCPU supports "VMWRITE to any supported field in the
+        * VMCS," then the "read-only" fields are actually read/write.
+        */
+       if (vmcs_field_readonly(field) &&
+           !nested_cpu_has_vmwrite_any_field(vcpu)) {
                 nested_vmx_failValid(vcpu,
                         VMXERR_VMWRITE_READ_ONLY_VMCS_COMPONENT);
                 return kvm_skip_emulated_instruction(vcpu);
@@@ -8189,10 -8458,10 +8455,10 @@@ static int handle_vmptrst(struct kvm_vc
         if (get_vmx_mem_address(vcpu, exit_qualification,
                         vmx_instruction_info, true, &vmcs_gva))
                 return 1;
-       /* ok to use *_system, as hardware has verified cpl=0 */
-       if (kvm_write_guest_virt_system(&vcpu->arch.emulate_ctxt, vmcs_gva,
-                                (void *)&to_vmx(vcpu)->nested.current_vmptr,
-                                sizeof(u64), &e)) {
+       /* *_system ok, nested_vmx_check_permission has verified cpl=0 */
+       if (kvm_write_guest_virt_system(vcpu, vmcs_gva,
+                                       (void *)&to_vmx(vcpu)->nested.current_vmptr,
+                                       sizeof(u64), &e)) {
                 kvm_inject_page_fault(vcpu, &e);
                 return 1;
         }
@@@ -8239,8 -8508,7 +8505,7 @@@ static int handle_invept(struct kvm_vcp
         if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
                         vmx_instruction_info, false, &gva))
                 return 1;
-       if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &operand,
-                               sizeof(operand), &e)) {
+       if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) {
                 kvm_inject_page_fault(vcpu, &e);
                 return 1;
         }
@@@ -8304,8 -8572,7 +8569,7 @@@ static int handle_invvpid(struct kvm_vc
         if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
                         vmx_instruction_info, false, &gva))
                 return 1;
-       if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &operand,
-                               sizeof(operand), &e)) {
+       if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) {
                 kvm_inject_page_fault(vcpu, &e);
                 return 1;
         }
@@@ -8317,12 -8584,19 +8581,19 @@@
   
         switch (type) {
         case VMX_VPID_EXTENT_INDIVIDUAL_ADDR:
-               if (is_noncanonical_address(operand.gla, vcpu)) {
+               if (!operand.vpid ||
+                   is_noncanonical_address(operand.gla, vcpu)) {
                         nested_vmx_failValid(vcpu,
                                 VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
                         return kvm_skip_emulated_instruction(vcpu);
                 }
-               /* fall through */
+               if (cpu_has_vmx_invvpid_individual_addr() &&
+                   vmx->nested.vpid02) {
+                       __invvpid(VMX_VPID_EXTENT_INDIVIDUAL_ADDR,
+                               vmx->nested.vpid02, operand.gla);
+               } else
+                       __vmx_flush_tlb(vcpu, vmx->nested.vpid02, true);
+               break;
         case VMX_VPID_EXTENT_SINGLE_CONTEXT:
         case VMX_VPID_EXTENT_SINGLE_NON_GLOBAL:
                 if (!operand.vpid) {
@@@ -8330,15 -8604,16 +8601,16 @@@
                                 VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
                         return kvm_skip_emulated_instruction(vcpu);
                 }
+               __vmx_flush_tlb(vcpu, vmx->nested.vpid02, true);
                 break;
         case VMX_VPID_EXTENT_ALL_CONTEXT:
+               __vmx_flush_tlb(vcpu, vmx->nested.vpid02, true);
                 break;
         default:
                 WARN_ON_ONCE(1);
                 return kvm_skip_emulated_instruction(vcpu);
         }
   
-       __vmx_flush_tlb(vcpu, vmx->nested.vpid02, true);
         nested_vmx_succeed(vcpu);
   
         return kvm_skip_emulated_instruction(vcpu);
@@@ -8842,11 -9117,13 +9114,13 @@@ static bool nested_vmx_exit_reflected(s
         case EXIT_REASON_TPR_BELOW_THRESHOLD:
                 return nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW);
         case EXIT_REASON_APIC_ACCESS:
-               return nested_cpu_has2(vmcs12,
-                       SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
         case EXIT_REASON_APIC_WRITE:
         case EXIT_REASON_EOI_INDUCED:
-               /* apic_write and eoi_induced should exit unconditionally. */
+               /*
+                * The controls for "virtualize APIC accesses," "APIC-
+                * register virtualization," and "virtual-interrupt
+                * delivery" only come from vmcs12.
+                */
                 return true;
         case EXIT_REASON_EPT_VIOLATION:
                 /*
@@@ -9253,31 -9530,43 +9527,43 @@@ static void update_cr8_intercept(struc
         vmcs_write32(TPR_THRESHOLD, irr);
   }
   
- static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
+ static void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
   {
         u32 sec_exec_control;
   
+       if (!lapic_in_kernel(vcpu))
+               return;
+ 
         /* Postpone execution until vmcs01 is the current VMCS. */
         if (is_guest_mode(vcpu)) {
-               to_vmx(vcpu)->nested.change_vmcs01_virtual_x2apic_mode = true;
+               to_vmx(vcpu)->nested.change_vmcs01_virtual_apic_mode = true;
                 return;
         }
   
-       if (!cpu_has_vmx_virtualize_x2apic_mode())
-               return;
- 
         if (!cpu_need_tpr_shadow(vcpu))
                 return;
   
         sec_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
+       sec_exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
+                             SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE);
   
-       if (set) {
-               sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
-               sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
-       } else {
-               sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
-               sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
-               vmx_flush_tlb(vcpu, true);
+       switch (kvm_get_apic_mode(vcpu)) {
+       case LAPIC_MODE_INVALID:
+               WARN_ONCE(true, "Invalid local APIC state");
+       case LAPIC_MODE_DISABLED:
+               break;
+       case LAPIC_MODE_XAPIC:
+               if (flexpriority_enabled) {
+                       sec_exec_control |=
+                               SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
+                       vmx_flush_tlb(vcpu, true);
+               }
+               break;
+       case LAPIC_MODE_X2APIC:
+               if (cpu_has_vmx_virtualize_x2apic_mode())
+                       sec_exec_control |=
+                               SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
+               break;
         }
         vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control);
   
@@@ -9286,24 -9575,7 +9572,7 @@@
   
   static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa)
   {
-       struct vcpu_vmx *vmx = to_vmx(vcpu);
- 
-       /*
-        * Currently we do not handle the nested case where L2 has an
-        * APIC access page of its own; that page is still pinned.
-        * Hence, we skip the case where the VCPU is in guest mode _and_
-        * L1 prepared an APIC access page for L2.
-        *
-        * For the case where L1 and L2 share the same APIC access page
-        * (flexpriority=Y but SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES clear
-        * in the vmcs12), this function will only update either the vmcs01
-        * or the vmcs02.  If the former, the vmcs02 will be updated by
-        * prepare_vmcs02.  If the latter, the vmcs01 will be updated in
-        * the next L2->L1 exit.
-        */
-       if (!is_guest_mode(vcpu) ||
-           !nested_cpu_has2(get_vmcs12(&vmx->vcpu),
-                            SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
+       if (!is_guest_mode(vcpu)) {
                 vmcs_write64(APIC_ACCESS_ADDR, hpa);
                 vmx_flush_tlb(vcpu, true);
         }
@@@ -9485,21 -9757,9 +9754,21 @@@ static void vmx_handle_external_intr(st
   }
   STACK_FRAME_NON_STANDARD(vmx_handle_external_intr);
   
- -static bool vmx_has_high_real_mode_segbase(void)
+ +static bool vmx_has_emulated_msr(int index)
   {
- -      return enable_unrestricted_guest || emulate_invalid_guest_state;
+ +      switch (index) {
+ +      case MSR_IA32_SMBASE:
+ +              /*
+ +               * We cannot do SMM unless we can run the guest in big
+ +               * real mode.
+ +               */
+ +              return enable_unrestricted_guest || emulate_invalid_guest_state;
+ +      case MSR_AMD64_VIRT_SPEC_CTRL:
+ +              /* This is AMD only.  */
+ +              return false;
+ +      default:
+ +              return true;
+ +      }
   }
   
   static bool vmx_mpx_supported(void)
@@@ -9731,7 -9991,8 +10000,7 @@@ static void __noclone vmx_vcpu_run(stru
          * is no need to worry about the conditional branch over the wrmsr
          * being speculatively taken.
          */
- -      if (vmx->spec_ctrl)
- -              native_wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
+ +      x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0);
   
         vmx->__launched = vmx->loaded_vmcs->launched;
   
@@@ -9879,7 -10140,8 +10148,7 @@@
         if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
                 vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
   
- -      if (vmx->spec_ctrl)
- -              native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
+ +      x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0);
   
         /* Eliminate branch target predictions from guest mode */
         vmexit_fill_RSB();
@@@ -9943,13 -10205,13 +10212,13 @@@ STACK_FRAME_NON_STANDARD(vmx_vcpu_run)
   
   static struct kvm *vmx_vm_alloc(void)
   {
-       struct kvm_vmx *kvm_vmx = kzalloc(sizeof(struct kvm_vmx), GFP_KERNEL);
+       struct kvm_vmx *kvm_vmx = vzalloc(sizeof(struct kvm_vmx));
         return &kvm_vmx->kvm;
   }
   
   static void vmx_vm_free(struct kvm *kvm)
   {
-       kfree(to_kvm_vmx(kvm));
+       vfree(to_kvm_vmx(kvm));
   }
   
   static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
@@@ -10387,11 -10649,6 +10656,6 @@@ static void nested_get_vmcs12_pages(str
                         vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
                                         SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
                 }
-       } else if (!(nested_cpu_has_virt_x2apic_mode(vmcs12)) &&
-                  cpu_need_virtualize_apic_accesses(&vmx->vcpu)) {
-               vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
-                             SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
-               kvm_vcpu_reload_apic_access_page(vcpu);
         }
   
         if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) {
@@@ -10871,8 -11128,7 +11135,7 @@@ static int nested_vmx_load_cr3(struct k
         return 0;
   }
   
- static void prepare_vmcs02_full(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
-                              bool from_vmentry)
+ static void prepare_vmcs02_full(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
   {
         struct vcpu_vmx *vmx = to_vmx(vcpu);
   
@@@ -11006,13 -11262,13 +11269,13 @@@
    * is assigned to entry_failure_code on failure.
    */
   static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
-                         bool from_vmentry, u32 *entry_failure_code)
+                         u32 *entry_failure_code)
   {
         struct vcpu_vmx *vmx = to_vmx(vcpu);
         u32 exec_control, vmcs12_exec_ctrl;
   
         if (vmx->nested.dirty_vmcs12) {
-               prepare_vmcs02_full(vcpu, vmcs12, from_vmentry);
+               prepare_vmcs02_full(vcpu, vmcs12);
                 vmx->nested.dirty_vmcs12 = false;
         }
   
@@@ -11032,7 -11288,7 +11295,7 @@@
          * HOST_FS_BASE, HOST_GS_BASE.
          */
   
-       if (from_vmentry &&
+       if (vmx->nested.nested_run_pending &&
             (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) {
                 kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
                 vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl);
@@@ -11040,7 -11296,7 +11303,7 @@@
                 kvm_set_dr(vcpu, 7, vcpu->arch.dr7);
                 vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.vmcs01_debugctl);
         }
-       if (from_vmentry) {
+       if (vmx->nested.nested_run_pending) {
                 vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
                              vmcs12->vm_entry_intr_info_field);
                 vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
@@@ -11172,7 -11428,7 +11435,7 @@@
                         ~VM_ENTRY_IA32E_MODE) |
                 (vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE));
   
-       if (from_vmentry &&
+       if (vmx->nested.nested_run_pending &&
             (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT)) {
                 vmcs_write64(GUEST_IA32_PAT, vmcs12->guest_ia32_pat);
                 vcpu->arch.pat = vmcs12->guest_ia32_pat;
@@@ -11197,7 -11453,7 +11460,7 @@@
                 if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02) {
                         if (vmcs12->virtual_processor_id != vmx->nested.last_vpid) {
                                 vmx->nested.last_vpid = vmcs12->virtual_processor_id;
-                               __vmx_flush_tlb(vcpu, to_vmx(vcpu)->nested.vpid02, true);
+                               __vmx_flush_tlb(vcpu, vmx->nested.vpid02, true);
                         }
                 } else {
                         vmx_flush_tlb(vcpu, true);
@@@ -11240,7 -11496,7 +11503,7 @@@
         vmx_set_cr4(vcpu, vmcs12->guest_cr4);
         vmcs_writel(CR4_READ_SHADOW, nested_read_cr4(vmcs12));
   
-       if (from_vmentry &&
+       if (vmx->nested.nested_run_pending &&
             (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER))
                 vcpu->arch.efer = vmcs12->guest_ia32_efer;
         else if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE)
@@@ -11418,7 -11674,7 +11681,7 @@@ static int check_vmentry_postreqs(struc
         return 0;
   }
   
- static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
+ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu)
   {
         struct vcpu_vmx *vmx = to_vmx(vcpu);
         struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
@@@ -11438,7 -11694,7 +11701,7 @@@
                 vcpu->arch.tsc_offset += vmcs12->tsc_offset;
   
         r = EXIT_REASON_INVALID_STATE;
-       if (prepare_vmcs02(vcpu, vmcs12, from_vmentry, &exit_qual))
+       if (prepare_vmcs02(vcpu, vmcs12, &exit_qual))
                 goto fail;
   
         nested_get_vmcs12_pages(vcpu, vmcs12);
@@@ -11540,20 -11796,22 +11803,22 @@@ static int nested_vmx_run(struct kvm_vc
          * the nested entry.
          */
   
-       ret = enter_vmx_non_root_mode(vcpu, true);
-       if (ret)
+       vmx->nested.nested_run_pending = 1;
+       ret = enter_vmx_non_root_mode(vcpu);
+       if (ret) {
+               vmx->nested.nested_run_pending = 0;
                 return ret;
+       }
   
         /*
          * If we're entering a halted L2 vcpu and the L2 vcpu won't be woken
          * by event injection, halt vcpu.
          */
         if ((vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) &&
-           !(vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK))
+           !(vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK)) {
+               vmx->nested.nested_run_pending = 0;
                 return kvm_vcpu_halt(vcpu);
- 
-       vmx->nested.nested_run_pending = 1;
- 
+       }
         return 1;
   
   out:
@@@ -11925,12 -12183,20 +12190,20 @@@ static void load_vmcs12_host_state(stru
   
         load_vmcs12_mmu_host_state(vcpu, vmcs12);
   
-       if (enable_vpid) {
-               /*
-                * Trivially support vpid by letting L2s share their parent
-                * L1's vpid. TODO: move to a more elaborate solution, giving
-                * each L2 its own vpid and exposing the vpid feature to L1.
-                */
+       /*
+        * If vmcs01 don't use VPID, CPU flushes TLB on every
+        * VMEntry/VMExit. Thus, no need to flush TLB.
+        *
+        * If vmcs12 uses VPID, TLB entries populated by L2 are
+        * tagged with vmx->nested.vpid02 while L1 entries are tagged
+        * with vmx->vpid. Thus, no need to flush TLB.
+        *
+        * Therefore, flush TLB only in case vmcs01 uses VPID and
+        * vmcs12 don't use VPID as in this case L1 & L2 TLB entries
+        * are both tagged with vmx->vpid.
+        */
+       if (enable_vpid &&
+           !(nested_cpu_has_vpid(vmcs12) && to_vmx(vcpu)->nested.vpid02)) {
                 vmx_flush_tlb(vcpu, true);
         }
   
@@@ -12069,10 -12335,9 +12342,9 @@@ static void nested_vmx_vmexit(struct kv
         if (kvm_has_tsc_control)
                 decache_tsc_multiplier(vmx);
   
-       if (vmx->nested.change_vmcs01_virtual_x2apic_mode) {
-               vmx->nested.change_vmcs01_virtual_x2apic_mode = false;
-               vmx_set_virtual_x2apic_mode(vcpu,
-                               vcpu->arch.apic_base & X2APIC_ENABLE);
+       if (vmx->nested.change_vmcs01_virtual_apic_mode) {
+               vmx->nested.change_vmcs01_virtual_apic_mode = false;
+               vmx_set_virtual_apic_mode(vcpu);
         } else if (!nested_cpu_has_ept(vmcs12) &&
                    nested_cpu_has2(vmcs12,
                                    SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
@@@ -12236,7 -12501,7 +12508,7 @@@ static inline int u64_shl_div_u64(u64 a
   static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc)
   {
         struct vcpu_vmx *vmx;
-       u64 tscl, guest_tscl, delta_tsc;
+       u64 tscl, guest_tscl, delta_tsc, lapic_timer_advance_cycles;
   
         if (kvm_mwait_in_guest(vcpu->kvm))
                 return -EOPNOTSUPP;
@@@ -12245,6 -12510,12 +12517,12 @@@
         tscl = rdtsc();
         guest_tscl = kvm_read_l1_tsc(vcpu, tscl);
         delta_tsc = max(guest_deadline_tsc, guest_tscl) - guest_tscl;
+       lapic_timer_advance_cycles = nsec_to_cycles(vcpu, lapic_timer_advance_ns);
+ 
+       if (delta_tsc > lapic_timer_advance_cycles)
+               delta_tsc -= lapic_timer_advance_cycles;
+       else
+               delta_tsc = 0;
   
         /* Convert to host delta tsc if tsc scaling is enabled */
         if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio &&
@@@ -12615,7 -12886,7 +12893,7 @@@ static int vmx_pre_leave_smm(struct kvm
   
         if (vmx->nested.smm.guest_mode) {
                 vcpu->arch.hflags &= ~HF_SMM_MASK;
-               ret = enter_vmx_non_root_mode(vcpu, false);
+               ret = enter_vmx_non_root_mode(vcpu);
                 vcpu->arch.hflags |= HF_SMM_MASK;
                 if (ret)
                         return ret;
@@@ -12639,7 -12910,7 +12917,7 @@@ static struct kvm_x86_ops vmx_x86_ops _
         .hardware_enable = hardware_enable,
         .hardware_disable = hardware_disable,
         .cpu_has_accelerated_tpr = report_flexpriority,
- -      .cpu_has_high_real_mode_segbase = vmx_has_high_real_mode_segbase,
+ +      .has_emulated_msr = vmx_has_emulated_msr,
   
         .vm_init = vmx_vm_init,
         .vm_alloc = vmx_vm_alloc,
@@@ -12700,7 -12971,7 +12978,7 @@@
         .enable_nmi_window = enable_nmi_window,
         .enable_irq_window = enable_irq_window,
         .update_cr8_intercept = update_cr8_intercept,
-       .set_virtual_x2apic_mode = vmx_set_virtual_x2apic_mode,
+       .set_virtual_apic_mode = vmx_set_virtual_apic_mode,
         .set_apic_access_page_addr = vmx_set_apic_access_page_addr,
         .get_enable_apicv = vmx_get_enable_apicv,
         .refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl,
@@@ -12812,6 -13083,7 +13090,7 @@@ static int __init vmx_init(void
         rcu_assign_pointer(crash_vmclear_loaded_vmcss,
                            crash_vmclear_local_loaded_vmcss);
   #endif
+       vmx_check_vmcs12_offsets();
   
         return 0;
   }
diff --combined arch/x86/kvm/x86.c

index 71e7cda6d01430bca8ef226238589ab0e830d6c9,06dd4cdb2ca8a8fa21bf24957a34344e11ec65e2..cc8c8be1e92db9d309acd9367af4cf08218aba4c
--- 1/arch/x86/kvm/x86.c
--- 2/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@@ -138,6 -138,7 +138,7 @@@ module_param(tsc_tolerance_ppm, uint, S
   /* lapic timer advance (tscdeadline mode only) in nanoseconds */
   unsigned int __read_mostly lapic_timer_advance_ns = 0;
   module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR);
+ EXPORT_SYMBOL_GPL(lapic_timer_advance_ns);
   
   static bool __read_mostly vector_hashing = true;
   module_param(vector_hashing, bool, S_IRUGO);
@@@ -318,23 -319,27 +319,27 @@@ u64 kvm_get_apic_base(struct kvm_vcpu *
   }
   EXPORT_SYMBOL_GPL(kvm_get_apic_base);
   
+ enum lapic_mode kvm_get_apic_mode(struct kvm_vcpu *vcpu)
+ {
+       return kvm_apic_mode(kvm_get_apic_base(vcpu));
+ }
+ EXPORT_SYMBOL_GPL(kvm_get_apic_mode);
+ 
   int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
   {
-       u64 old_state = vcpu->arch.apic_base &
-               (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE);
-       u64 new_state = msr_info->data &
-               (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE);
+       enum lapic_mode old_mode = kvm_get_apic_mode(vcpu);
+       enum lapic_mode new_mode = kvm_apic_mode(msr_info->data);
         u64 reserved_bits = ((~0ULL) << cpuid_maxphyaddr(vcpu)) | 0x2ff |
                 (guest_cpuid_has(vcpu, X86_FEATURE_X2APIC) ? 0 : X2APIC_ENABLE);
   
-       if ((msr_info->data & reserved_bits) || new_state == X2APIC_ENABLE)
-               return 1;
-       if (!msr_info->host_initiated &&
-           ((new_state == MSR_IA32_APICBASE_ENABLE &&
-             old_state == (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE)) ||
-            (new_state == (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE) &&
-             old_state == 0)))
+       if ((msr_info->data & reserved_bits) != 0 || new_mode == LAPIC_MODE_INVALID)
                 return 1;
+       if (!msr_info->host_initiated) {
+               if (old_mode == LAPIC_MODE_X2APIC && new_mode == LAPIC_MODE_XAPIC)
+                       return 1;
+               if (old_mode == LAPIC_MODE_DISABLED && new_mode == LAPIC_MODE_X2APIC)
+                       return 1;
+       }
   
         kvm_lapic_set_base(vcpu, msr_info->data);
         return 0;
@@@ -856,7 -861,7 +861,7 @@@ int kvm_set_cr3(struct kvm_vcpu *vcpu, 
         }
   
         if (is_long_mode(vcpu) &&
-           (cr3 & rsvd_bits(cpuid_maxphyaddr(vcpu), 62)))
+           (cr3 & rsvd_bits(cpuid_maxphyaddr(vcpu), 63)))
                 return 1;
         else if (is_pae(vcpu) && is_paging(vcpu) &&
                    !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
@@@ -1061,7 -1066,6 +1066,7 @@@ static u32 emulated_msrs[] = 
         MSR_SMI_COUNT,
         MSR_PLATFORM_INFO,
         MSR_MISC_FEATURES_ENABLES,
+ +      MSR_AMD64_VIRT_SPEC_CTRL,
   };
   
   static unsigned num_emulated_msrs;
@@@ -1761,7 -1765,7 +1766,7 @@@ static int do_monotonic_boot(s64 *t, u6
         return mode;
   }
   
- static int do_realtime(struct timespec *ts, u64 *tsc_timestamp)
+ static int do_realtime(struct timespec64 *ts, u64 *tsc_timestamp)
   {
         struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
         unsigned long seq;
@@@ -1794,7 -1798,7 +1799,7 @@@ static bool kvm_get_time_and_clockread(
   }
   
   /* returns true if host is using TSC based clocksource */
- static bool kvm_get_walltime_and_clockread(struct timespec *ts,
+ static bool kvm_get_walltime_and_clockread(struct timespec64 *ts,
                                            u64 *tsc_timestamp)
   {
         /* checked again under seqlock below */
@@@ -2868,6 -2872,7 +2873,7 @@@ int kvm_vm_ioctl_check_extension(struc
         case KVM_CAP_HYPERV_SYNIC2:
         case KVM_CAP_HYPERV_VP_INDEX:
         case KVM_CAP_HYPERV_EVENTFD:
+       case KVM_CAP_HYPERV_TLBFLUSH:
         case KVM_CAP_PCI_SEGMENT:
         case KVM_CAP_DEBUGREGS:
         case KVM_CAP_X86_ROBUST_SINGLESTEP:
@@@ -2894,7 -2899,7 +2900,7 @@@
                 r = KVM_CLOCK_TSC_STABLE;
                 break;
         case KVM_CAP_X86_DISABLE_EXITS:
-               r |=  KVM_X86_DISABLE_EXITS_HTL | KVM_X86_DISABLE_EXITS_PAUSE;
+               r |=  KVM_X86_DISABLE_EXITS_HLT | KVM_X86_DISABLE_EXITS_PAUSE;
                 if(kvm_can_mwait_in_guest())
                         r |= KVM_X86_DISABLE_EXITS_MWAIT;
                 break;
@@@ -2907,7 -2912,7 +2913,7 @@@
                  * fringe case that is not enabled except via specific settings
                  * of the module parameters.
                  */
- -              r = kvm_x86_ops->cpu_has_high_real_mode_segbase();
+ +              r = kvm_x86_ops->has_emulated_msr(MSR_IA32_SMBASE);
                 break;
         case KVM_CAP_VAPIC:
                 r = !kvm_x86_ops->cpu_has_accelerated_tpr();
@@@ -3962,7 -3967,7 +3968,7 @@@ out_nofree
         return r;
   }
   
- int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
+ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
   {
         return VM_FAULT_SIGBUS;
   }
@@@ -4248,7 -4253,7 +4254,7 @@@ split_irqchip_unlock
                 if ((cap->args[0] & KVM_X86_DISABLE_EXITS_MWAIT) &&
                         kvm_can_mwait_in_guest())
                         kvm->arch.mwait_in_guest = true;
-               if (cap->args[0] & KVM_X86_DISABLE_EXITS_HTL)
+               if (cap->args[0] & KVM_X86_DISABLE_EXITS_HLT)
                         kvm->arch.hlt_in_guest = true;
                 if (cap->args[0] & KVM_X86_DISABLE_EXITS_PAUSE)
                         kvm->arch.pause_in_guest = true;
@@@ -4607,8 -4612,14 +4613,8 @@@ static void kvm_init_msr_list(void
         num_msrs_to_save = j;
   
         for (i = j = 0; i < ARRAY_SIZE(emulated_msrs); i++) {
- -              switch (emulated_msrs[i]) {
- -              case MSR_IA32_SMBASE:
- -                      if (!kvm_x86_ops->cpu_has_high_real_mode_segbase())
- -                              continue;
- -                      break;
- -              default:
- -                      break;
- -              }
+ +              if (!kvm_x86_ops->has_emulated_msr(emulated_msrs[i]))
+ +                      continue;
   
                 if (j < i)
                         emulated_msrs[j] = emulated_msrs[i];
@@@ -4787,11 -4798,10 +4793,10 @@@ static int kvm_fetch_guest_virt(struct 
         return X86EMUL_CONTINUE;
   }
   
- int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt,
+ int kvm_read_guest_virt(struct kvm_vcpu *vcpu,
                                gva_t addr, void *val, unsigned int bytes,
                                struct x86_exception *exception)
   {
-       struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
         u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
   
         return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access,
@@@ -4799,12 -4809,17 +4804,17 @@@
   }
   EXPORT_SYMBOL_GPL(kvm_read_guest_virt);
   
- static int kvm_read_guest_virt_system(struct x86_emulate_ctxt *ctxt,
-                                     gva_t addr, void *val, unsigned int bytes,
-                                     struct x86_exception *exception)
+ static int emulator_read_std(struct x86_emulate_ctxt *ctxt,
+                            gva_t addr, void *val, unsigned int bytes,
+                            struct x86_exception *exception, bool system)
   {
         struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
-       return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, exception);
+       u32 access = 0;
+ 
+       if (!system && kvm_x86_ops->get_cpl(vcpu) == 3)
+               access |= PFERR_USER_MASK;
+ 
+       return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, exception);
   }
   
   static int kvm_read_guest_phys_system(struct x86_emulate_ctxt *ctxt,
@@@ -4816,18 -4831,16 +4826,16 @@@
         return r < 0 ? X86EMUL_IO_NEEDED : X86EMUL_CONTINUE;
   }
   
- int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
-                                      gva_t addr, void *val,
-                                      unsigned int bytes,
-                                      struct x86_exception *exception)
+ static int kvm_write_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
+                                     struct kvm_vcpu *vcpu, u32 access,
+                                     struct x86_exception *exception)
   {
-       struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
         void *data = val;
         int r = X86EMUL_CONTINUE;
   
         while (bytes) {
                 gpa_t gpa =  vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr,
-                                                            PFERR_WRITE_MASK,
+                                                            access,
                                                              exception);
                 unsigned offset = addr & (PAGE_SIZE-1);
                 unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset);
@@@ -4848,6 -4861,27 +4856,27 @@@
   out:
         return r;
   }
+ 
+ static int emulator_write_std(struct x86_emulate_ctxt *ctxt, gva_t addr, void *val,
+                             unsigned int bytes, struct x86_exception *exception,
+                             bool system)
+ {
+       struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
+       u32 access = PFERR_WRITE_MASK;
+ 
+       if (!system && kvm_x86_ops->get_cpl(vcpu) == 3)
+               access |= PFERR_USER_MASK;
+ 
+       return kvm_write_guest_virt_helper(addr, val, bytes, vcpu,
+                                          access, exception);
+ }
+ 
+ int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu, gva_t addr, void *val,
+                               unsigned int bytes, struct x86_exception *exception)
+ {
+       return kvm_write_guest_virt_helper(addr, val, bytes, vcpu,
+                                          PFERR_WRITE_MASK, exception);
+ }
   EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system);
   
   int handle_ud(struct kvm_vcpu *vcpu)
@@@ -4858,8 -4892,8 +4887,8 @@@
         struct x86_exception e;
   
         if (force_emulation_prefix &&
-           kvm_read_guest_virt(&vcpu->arch.emulate_ctxt,
-                               kvm_get_linear_rip(vcpu), sig, sizeof(sig), &e) == 0 &&
+           kvm_read_guest_virt(vcpu, kvm_get_linear_rip(vcpu),
+                               sig, sizeof(sig), &e) == 0 &&
             memcmp(sig, "\xf\xbkvm", sizeof(sig)) == 0) {
                 kvm_rip_write(vcpu, kvm_rip_read(vcpu) + sizeof(sig));
                 emul_type = 0;
@@@ -5600,8 -5634,8 +5629,8 @@@ static int emulator_pre_leave_smm(struc
   static const struct x86_emulate_ops emulate_ops = {
         .read_gpr            = emulator_read_gpr,
         .write_gpr           = emulator_write_gpr,
-       .read_std            = kvm_read_guest_virt_system,
-       .write_std           = kvm_write_guest_virt_system,
+       .read_std            = emulator_read_std,
+       .write_std           = emulator_write_std,
         .read_phys           = kvm_read_guest_phys_system,
         .fetch               = kvm_fetch_guest_virt,
         .read_emulated       = emulator_read_emulated,
@@@ -6617,7 -6651,7 +6646,7 @@@ static int kvm_pv_clock_pairing(struct 
                                 unsigned long clock_type)
   {
         struct kvm_clock_pairing clock_pairing;
-       struct timespec ts;
+       struct timespec64 ts;
         u64 cycle;
         int ret;
   
@@@ -6671,8 -6705,11 +6700,8 @@@ int kvm_emulate_hypercall(struct kvm_vc
         unsigned long nr, a0, a1, a2, a3, ret;
         int op_64_bit;
   
- -      if (kvm_hv_hypercall_enabled(vcpu->kvm)) {
- -              if (!kvm_hv_hypercall(vcpu))
- -                      return 0;
- -              goto out;
- -      }
+ +      if (kvm_hv_hypercall_enabled(vcpu->kvm))
+ +              return kvm_hv_hypercall(vcpu);
   
         nr = kvm_register_read(vcpu, VCPU_REGS_RAX);
         a0 = kvm_register_read(vcpu, VCPU_REGS_RBX);
@@@ -6693,7 -6730,7 +6722,7 @@@
   
         if (kvm_x86_ops->get_cpl(vcpu) != 0) {
                 ret = -KVM_EPERM;
- -              goto out_error;
+ +              goto out;
         }
   
         switch (nr) {
@@@ -6713,11 -6750,12 +6742,11 @@@
                 ret = -KVM_ENOSYS;
                 break;
         }
- -out_error:
+ +out:
         if (!op_64_bit)
                 ret = (u32)ret;
         kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
   
- -out:
         ++vcpu->stat.hypercalls;
         return kvm_skip_emulated_instruction(vcpu);
   }
@@@ -7976,7 -8014,6 +8005,7 @@@ static int __set_sregs(struct kvm_vcpu 
   {
         struct msr_data apic_base_msr;
         int mmu_reset_needed = 0;
+ +      int cpuid_update_needed = 0;
         int pending_vec, max_bits, idx;
         struct desc_ptr dt;
         int ret = -EINVAL;
@@@ -8015,10 -8052,8 +8044,10 @@@
         vcpu->arch.cr0 = sregs->cr0;
   
         mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4;
+ +      cpuid_update_needed |= ((kvm_read_cr4(vcpu) ^ sregs->cr4) &
+ +                              (X86_CR4_OSXSAVE | X86_CR4_PKE));
         kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
- -      if (sregs->cr4 & (X86_CR4_OSXSAVE | X86_CR4_PKE))
+ +      if (cpuid_update_needed)
                 kvm_update_cpuid(vcpu);
   
         idx = srcu_read_lock(&vcpu->kvm->srcu);
diff --combined include/linux/sched.h

index 3aa4fcb74e761dfda361f17d09593ecd9c361646,ff289ae6b7870f2bc9cc4acd61e6768acfc1604e..16e4d984fe51948d092cb093538264a10b2f4135
--- 1/include/linux/sched.h
--- 2/include/linux/sched.h
+++ b/include/linux/sched.h
@@@ -27,7 -27,6 +27,7 @@@
   #include <linux/signal_types.h>
   #include <linux/mm_types_task.h>
   #include <linux/task_io_accounting.h>
+ +#include <linux/rseq.h>
   
   /* task_struct member predeclarations (sorted alphabetically): */
   struct audit_context;
@@@ -1048,17 -1047,6 +1048,17 @@@ struct task_struct 
         unsigned long                   numa_pages_migrated;
   #endif /* CONFIG_NUMA_BALANCING */
   
+ +#ifdef CONFIG_RSEQ
+ +      struct rseq __user *rseq;
+ +      u32 rseq_len;
+ +      u32 rseq_sig;
+ +      /*
+ +       * RmW on rseq_event_mask must be performed atomically
+ +       * with respect to preemption.
+ +       */
+ +      unsigned long rseq_event_mask;
+ +#endif
+ +
         struct tlbflush_unmap_batch     tlb_ubc;
   
         struct rcu_head                 rcu;
@@@ -1445,8 -1433,7 +1445,8 @@@ static inline bool is_percpu_thread(voi
   #define PFA_NO_NEW_PRIVS              0       /* May not gain new privileges. */
   #define PFA_SPREAD_PAGE                       1       /* Spread page cache over cpuset */
   #define PFA_SPREAD_SLAB                       2       /* Spread some slab caches over cpuset */
- -
+ +#define PFA_SPEC_SSB_DISABLE          3       /* Speculative Store Bypass disabled */
+ +#define PFA_SPEC_SSB_FORCE_DISABLE    4       /* Speculative Store Bypass force disabled*/
   
   #define TASK_PFA_TEST(name, func)                                     \
         static inline bool task_##func(struct task_struct *p)           \
@@@ -1471,13 -1458,6 +1471,13 @@@ TASK_PFA_TEST(SPREAD_SLAB, spread_slab
   TASK_PFA_SET(SPREAD_SLAB, spread_slab)
   TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab)
   
+ +TASK_PFA_TEST(SPEC_SSB_DISABLE, spec_ssb_disable)
+ +TASK_PFA_SET(SPEC_SSB_DISABLE, spec_ssb_disable)
+ +TASK_PFA_CLEAR(SPEC_SSB_DISABLE, spec_ssb_disable)
+ +
+ +TASK_PFA_TEST(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)
+ +TASK_PFA_SET(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)
+ +
   static inline void
   current_restore_flags(unsigned long orig_flags, unsigned long flags)
   {
@@@ -1524,7 -1504,6 +1524,7 @@@ static inline int task_nice(const struc
   extern int can_nice(const struct task_struct *p, const int nice);
   extern int task_curr(const struct task_struct *p);
   extern int idle_cpu(int cpu);
+ +extern int available_idle_cpu(int cpu);
   extern int sched_setscheduler(struct task_struct *, int, const struct sched_param *);
   extern int sched_setscheduler_nocheck(struct task_struct *, int, const struct sched_param *);
   extern int sched_setattr(struct task_struct *, const struct sched_attr *);
@@@ -1639,6 -1618,12 +1639,12 @@@ static inline void clear_tsk_thread_fla
         clear_ti_thread_flag(task_thread_info(tsk), flag);
   }
   
+ static inline void update_tsk_thread_flag(struct task_struct *tsk, int flag,
+                                         bool value)
+ {
+       update_ti_thread_flag(task_thread_info(tsk), flag, value);
+ }
+ 
   static inline int test_and_set_tsk_thread_flag(struct task_struct *tsk, int flag)
   {
         return test_and_set_ti_thread_flag(task_thread_info(tsk), flag);
@@@ -1674,6 -1659,7 +1680,6 @@@ static inline int test_tsk_need_resched
    * explicit rescheduling in places that are safe. The return
    * value indicates whether a reschedule was done in fact.
    * cond_resched_lock() will drop the spinlock before scheduling,
- - * cond_resched_softirq() will enable bhs before scheduling.
    */
   #ifndef CONFIG_PREEMPT
   extern int _cond_resched(void);
@@@ -1693,6 -1679,13 +1699,6 @@@ extern int __cond_resched_lock(spinlock
         __cond_resched_lock(lock);                              \
   })
   
- -extern int __cond_resched_softirq(void);
- -
- -#define cond_resched_softirq() ({                                     \
- -      ___might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET);     \
- -      __cond_resched_softirq();                                       \
- -})
- -
   static inline void cond_resched_rcu(void)
   {
   #if defined(CONFIG_DEBUG_ATOMIC_SLEEP) || !defined(CONFIG_PREEMPT_RCU)
@@@ -1769,126 -1762,4 +1775,126 @@@ extern long sched_getaffinity(pid_t pid
   #define TASK_SIZE_OF(tsk)     TASK_SIZE
   #endif
   
+ +#ifdef CONFIG_RSEQ
+ +
+ +/*
+ + * Map the event mask on the user-space ABI enum rseq_cs_flags
+ + * for direct mask checks.
+ + */
+ +enum rseq_event_mask_bits {
+ +      RSEQ_EVENT_PREEMPT_BIT  = RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT,
+ +      RSEQ_EVENT_SIGNAL_BIT   = RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT,
+ +      RSEQ_EVENT_MIGRATE_BIT  = RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT,
+ +};
+ +
+ +enum rseq_event_mask {
+ +      RSEQ_EVENT_PREEMPT      = (1U << RSEQ_EVENT_PREEMPT_BIT),
+ +      RSEQ_EVENT_SIGNAL       = (1U << RSEQ_EVENT_SIGNAL_BIT),
+ +      RSEQ_EVENT_MIGRATE      = (1U << RSEQ_EVENT_MIGRATE_BIT),
+ +};
+ +
+ +static inline void rseq_set_notify_resume(struct task_struct *t)
+ +{
+ +      if (t->rseq)
+ +              set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
+ +}
+ +
+ +void __rseq_handle_notify_resume(struct pt_regs *regs);
+ +
+ +static inline void rseq_handle_notify_resume(struct pt_regs *regs)
+ +{
+ +      if (current->rseq)
+ +              __rseq_handle_notify_resume(regs);
+ +}
+ +
+ +static inline void rseq_signal_deliver(struct pt_regs *regs)
+ +{
+ +      preempt_disable();
+ +      __set_bit(RSEQ_EVENT_SIGNAL_BIT, &current->rseq_event_mask);
+ +      preempt_enable();
+ +      rseq_handle_notify_resume(regs);
+ +}
+ +
+ +/* rseq_preempt() requires preemption to be disabled. */
+ +static inline void rseq_preempt(struct task_struct *t)
+ +{
+ +      __set_bit(RSEQ_EVENT_PREEMPT_BIT, &t->rseq_event_mask);
+ +      rseq_set_notify_resume(t);
+ +}
+ +
+ +/* rseq_migrate() requires preemption to be disabled. */
+ +static inline void rseq_migrate(struct task_struct *t)
+ +{
+ +      __set_bit(RSEQ_EVENT_MIGRATE_BIT, &t->rseq_event_mask);
+ +      rseq_set_notify_resume(t);
+ +}
+ +
+ +/*
+ + * If parent process has a registered restartable sequences area, the
+ + * child inherits. Only applies when forking a process, not a thread. In
+ + * case a parent fork() in the middle of a restartable sequence, set the
+ + * resume notifier to force the child to retry.
+ + */
+ +static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags)
+ +{
+ +      if (clone_flags & CLONE_THREAD) {
+ +              t->rseq = NULL;
+ +              t->rseq_len = 0;
+ +              t->rseq_sig = 0;
+ +              t->rseq_event_mask = 0;
+ +      } else {
+ +              t->rseq = current->rseq;
+ +              t->rseq_len = current->rseq_len;
+ +              t->rseq_sig = current->rseq_sig;
+ +              t->rseq_event_mask = current->rseq_event_mask;
+ +              rseq_preempt(t);
+ +      }
+ +}
+ +
+ +static inline void rseq_execve(struct task_struct *t)
+ +{
+ +      t->rseq = NULL;
+ +      t->rseq_len = 0;
+ +      t->rseq_sig = 0;
+ +      t->rseq_event_mask = 0;
+ +}
+ +
+ +#else
+ +
+ +static inline void rseq_set_notify_resume(struct task_struct *t)
+ +{
+ +}
+ +static inline void rseq_handle_notify_resume(struct pt_regs *regs)
+ +{
+ +}
+ +static inline void rseq_signal_deliver(struct pt_regs *regs)
+ +{
+ +}
+ +static inline void rseq_preempt(struct task_struct *t)
+ +{
+ +}
+ +static inline void rseq_migrate(struct task_struct *t)
+ +{
+ +}
+ +static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags)
+ +{
+ +}
+ +static inline void rseq_execve(struct task_struct *t)
+ +{
+ +}
+ +
+ +#endif
+ +
+ +#ifdef CONFIG_DEBUG_RSEQ
+ +
+ +void rseq_syscall(struct pt_regs *regs);
+ +
+ +#else
+ +
+ +static inline void rseq_syscall(struct pt_regs *regs)
+ +{
+ +}
+ +
+ +#endif
+ +
   #endif
diff --combined virt/kvm/arm/arm.c

index 2d9b4795edb2beecd04588c791735d155cc05741,72be779cffe20ad8b09bd356bd42b65a39acaea2..04e554cae3a2066e5eb6e4d2544efc84a62d88de
--- 1/virt/kvm/arm/arm.c
--- 2/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@@ -16,6 -16,7 +16,7 @@@
    * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
    */
   
+ #include <linux/bug.h>
   #include <linux/cpu_pm.h>
   #include <linux/errno.h>
   #include <linux/err.h>
@@@ -41,6 -42,7 +42,7 @@@
   #include <asm/mman.h>
   #include <asm/tlbflush.h>
   #include <asm/cacheflush.h>
+ #include <asm/cpufeature.h>
   #include <asm/virt.h>
   #include <asm/kvm_arm.h>
   #include <asm/kvm_asm.h>
@@@ -163,7 -165,7 +165,7 @@@ int kvm_arch_create_vcpu_debugfs(struc
         return 0;
   }
   
- int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
+ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
   {
         return VM_FAULT_SIGBUS;
   }
@@@ -249,6 -251,21 +251,21 @@@ long kvm_arch_dev_ioctl(struct file *fi
         return -EINVAL;
   }
   
+ struct kvm *kvm_arch_alloc_vm(void)
+ {
+       if (!has_vhe())
+               return kzalloc(sizeof(struct kvm), GFP_KERNEL);
+ 
+       return vzalloc(sizeof(struct kvm));
+ }
+ 
+ void kvm_arch_free_vm(struct kvm *kvm)
+ {
+       if (!has_vhe())
+               kfree(kvm);
+       else
+               vfree(kvm);
+ }
   
   struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
   {
@@@ -290,7 -307,6 +307,6 @@@ out
   
   void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
   {
-       kvm_vgic_vcpu_early_init(vcpu);
   }
   
   void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
@@@ -363,10 -379,12 +379,12 @@@ void kvm_arch_vcpu_load(struct kvm_vcp
         kvm_vgic_load(vcpu);
         kvm_timer_vcpu_load(vcpu);
         kvm_vcpu_load_sysregs(vcpu);
+       kvm_arch_vcpu_load_fp(vcpu);
   }
   
   void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
   {
+       kvm_arch_vcpu_put_fp(vcpu);
         kvm_vcpu_put_sysregs(vcpu);
         kvm_timer_vcpu_put(vcpu);
         kvm_vgic_put(vcpu);
@@@ -678,9 -696,6 +696,6 @@@ int kvm_arch_vcpu_ioctl_run(struct kvm_
                  */
                 preempt_disable();
   
-               /* Flush FP/SIMD state that can't survive guest entry/exit */
-               kvm_fpsimd_flush_cpu_state();
- 
                 kvm_pmu_flush_hwstate(vcpu);
   
                 local_irq_disable();
@@@ -778,6 -793,8 +793,8 @@@
                 if (static_branch_unlikely(&userspace_irqchip_in_use))
                         kvm_timer_sync_hwstate(vcpu);
   
+               kvm_arch_vcpu_ctxsync_fp(vcpu);
+ 
                 /*
                  * We may have taken a host interrupt in HYP mode (ie
                  * while executing the guest). This interrupt is still
@@@ -1490,10 -1507,6 +1507,10 @@@ static int init_hyp_mode(void
                 }
         }
   
+ +      err = hyp_map_aux_data();
+ +      if (err)
+ +              kvm_err("Cannot map host auxilary data: %d\n", err);
+ +
         return 0;
   
   out_err:
@@@ -1574,6 -1587,11 +1591,11 @@@ int kvm_arch_init(void *opaque
                 return -ENODEV;
         }
   
+       if (!kvm_arch_check_sve_has_vhe()) {
+               kvm_pr_unimpl("SVE system without VHE unsupported.  Broken cpu?");
+               return -ENODEV;
+       }
+ 
         for_each_online_cpu(cpu) {
                 smp_call_function_single(cpu, check_kvm_target_cpu, &ret, 1);
                 if (ret < 0) {
author	Linus Torvalds <[email protected]>
	Tue, 12 Jun 2018 18:34:04 +0000 (11:34 -0700)
committer	Linus Torvalds <[email protected]>
	Tue, 12 Jun 2018 18:34:04 +0000 (11:34 -0700)
		1	2
arch/arm/include/asm/kvm_host.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/arm64/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
arch/arm64/include/asm/cpufeature.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/arm64/include/asm/kvm_asm.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/arm64/include/asm/kvm_host.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/arm64/include/asm/processor.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/arm64/include/asm/thread_info.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/arm64/kernel/fpsimd.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/arm64/kernel/ptrace.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/arm64/kvm/hyp/hyp-entry.S	patch \|	diff1 \|	diff2 \|	blob \| history
arch/arm64/kvm/hyp/switch.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/mips/kvm/mips.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/kvm/book3s_hv.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/s390/include/asm/pgtable.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/s390/kvm/priv.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/include/asm/kvm_host.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/cpuid.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/hyperv.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/lapic.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/mmu.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/svm.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/vmx.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/x86.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/sched.h	patch \|	diff1 \|	diff2 \|	blob \| history
virt/kvm/arm/arm.c	patch \|	diff1 \|	diff2 \|	blob \| history