#include <linux/types.h>
#include <linux/kvm_types.h>
+#include <asm/cputype.h>
#include <asm/kvm.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_mmio.h>
struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
+ static inline bool kvm_arch_check_sve_has_vhe(void) { return true; }
static inline void kvm_arch_hardware_unsetup(void) {}
static inline void kvm_arch_sync_events(struct kvm *kvm) {}
static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr);
- /* All host FP/SIMD state is restored on guest exit, so nothing to save: */
- static inline void kvm_fpsimd_flush_cpu_state(void) {}
+ /*
+ * VFP/NEON switching is all done by the hyp switch code, so no need to
+ * coordinate with host context handling for this state:
+ */
+ static inline void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) {}
+ static inline void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu) {}
+ static inline void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) {}
static inline void kvm_arm_vhe_guest_enter(void) {}
static inline void kvm_arm_vhe_guest_exit(void) {}
static inline bool kvm_arm_harden_branch_predictor(void)
+{
+ switch(read_cpuid_part()) {
+#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
+ case ARM_CPU_PART_BRAHMA_B15:
+ case ARM_CPU_PART_CORTEX_A12:
+ case ARM_CPU_PART_CORTEX_A15:
+ case ARM_CPU_PART_CORTEX_A17:
+ return true;
+#endif
+ default:
+ return false;
+ }
+}
+
+#define KVM_SSBD_UNKNOWN -1
+#define KVM_SSBD_FORCE_DISABLE 0
+#define KVM_SSBD_KERNEL 1
+#define KVM_SSBD_FORCE_ENABLE 2
+#define KVM_SSBD_MITIGATED 3
+
+static inline int kvm_arm_have_ssbd(void)
{
/* No way to detect it yet, pretend it is not there. */
- return false;
+ return KVM_SSBD_UNKNOWN;
}
static inline void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu) {}
static inline void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu) {}
+ #define __KVM_HAVE_ARCH_VM_ALLOC
+ struct kvm *kvm_arch_alloc_vm(void);
+ void kvm_arch_free_vm(struct kvm *kvm);
+
#endif /* __ARM_KVM_HOST_H__ */
select ACPI_REDUCED_HARDWARE_ONLY if ACPI
select ACPI_MCFG if ACPI
select ACPI_SPCR_TABLE if ACPI
+ select ACPI_PPTT if ACPI
select ARCH_CLOCKSOURCE_DATA
select ARCH_HAS_DEBUG_VIRTUAL
select ARCH_HAS_DEVMEM_IS_ALLOWED
select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
select ARCH_HAS_ELF_RANDOMIZE
+ select ARCH_HAS_FAST_MULTIPLIER
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA
select ARCH_HAS_KCOV
select ARCH_HAS_MEMBARRIER_SYNC_CORE
+ select ARCH_HAS_PTE_SPECIAL
select ARCH_HAS_SET_MEMORY
select ARCH_HAS_SG_CHAIN
select ARCH_HAS_STRICT_KERNEL_RWX
select HAVE_CONTEXT_TRACKING
select HAVE_DEBUG_BUGVERBOSE
select HAVE_DEBUG_KMEMLEAK
- select HAVE_DMA_API_DEBUG
select HAVE_DMA_CONTIGUOUS
select HAVE_DYNAMIC_FTRACE
select HAVE_EFFICIENT_UNALIGNED_ACCESS
select IRQ_FORCED_THREADING
select MODULES_USE_ELF_RELA
select MULTI_IRQ_HANDLER
+ select NEED_DMA_MAP_STATE
+ select NEED_SG_DMA_LENGTH
select NO_BOOTMEM
select OF
select OF_EARLY_FLATTREE
select POWER_SUPPLY
select REFCOUNT_FULL
select SPARSE_IRQ
+ select SWIOTLB
select SYSCTL_EXCEPTION_TRACE
select THREAD_INFO_IN_TASK
help
config 64BIT
def_bool y
-config ARCH_PHYS_ADDR_T_64BIT
- def_bool y
-
config MMU
def_bool y
config HAVE_GENERIC_GUP
def_bool y
-config ARCH_DMA_ADDR_T_64BIT
- def_bool y
-
-config NEED_DMA_MAP_STATE
- def_bool y
-
-config NEED_SG_DMA_LENGTH
- def_bool y
-
config SMP
def_bool y
-config SWIOTLB
- def_bool y
-
-config IOMMU_HELPER
- def_bool SWIOTLB
-
config KERNEL_MODE_NEON
def_bool y
If unsure, say Y.
+config ARM64_SSBD
+ bool "Speculative Store Bypass Disable" if EXPERT
+ default y
+ help
+ This enables mitigation of the bypassing of previous stores
+ by speculative loads.
+
+ If unsure, say Y.
+
menuconfig ARMV8_DEPRECATED
bool "Emulate deprecated/obsolete ARMv8 instructions"
depends on COMPAT
config ARM64_LSE_ATOMICS
bool "Atomic instructions"
+ default y
help
As part of the Large System Extensions, ARMv8.1 introduces new
atomic instructions that are designed specifically to scale in
Say Y here to make use of these instructions for the in-kernel
atomic routines. This incurs a small overhead on CPUs that do
not support these instructions and requires the kernel to be
- built with binutils >= 2.25.
+ built with binutils >= 2.25 in order for the new instructions
+ to be used.
config ARM64_VHE
bool "Enable support for Virtualization Host Extensions (VHE)"
config ARM64_SVE
bool "ARM Scalable Vector Extension support"
default y
+ depends on !KVM || ARM64_VHE
help
The Scalable Vector Extension (SVE) is an extension to the AArch64
execution state which complements and extends the SIMD functionality
booting the kernel. If unsure and you are not observing these
symptoms, you should assume that it is safe to say Y.
+ CPUs that support SVE are architecturally required to support the
+ Virtualization Host Extensions (VHE), so the kernel makes no
+ provision for supporting SVE alongside KVM without VHE enabled.
+ Thus, you will need to enable CONFIG_ARM64_VHE if you want to support
+ KVM in the same kernel image.
+
config ARM64_MODULE_PLTS
bool
select HAVE_MOD_ARCH_SPECIFIC
#include <asm/cpucaps.h>
#include <asm/cputype.h>
- #include <asm/fpsimd.h>
#include <asm/hwcap.h>
- #include <asm/sigcontext.h>
#include <asm/sysreg.h>
/*
cpus_have_const_cap(ARM64_SVE);
}
- /*
- * Read the pseudo-ZCR used by cpufeatures to identify the supported SVE
- * vector length.
- *
- * Use only if SVE is present.
- * This function clobbers the SVE vector length.
- */
- static inline u64 read_zcr_features(void)
- {
- u64 zcr;
- unsigned int vq_max;
-
- /*
- * Set the maximum possible VL, and write zeroes to all other
- * bits to see if they stick.
- */
- sve_kernel_enable(NULL);
- write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL1);
-
- zcr = read_sysreg_s(SYS_ZCR_EL1);
- zcr &= ~(u64)ZCR_ELx_LEN_MASK; /* find sticky 1s outside LEN field */
- vq_max = sve_vq_from_vl(sve_get_vl());
- zcr |= vq_max - 1; /* set LEN field to maximum effective value */
-
- return zcr;
- }
-
+#define ARM64_SSBD_UNKNOWN -1
+#define ARM64_SSBD_FORCE_DISABLE 0
+#define ARM64_SSBD_KERNEL 1
+#define ARM64_SSBD_FORCE_ENABLE 2
+#define ARM64_SSBD_MITIGATED 3
+
+static inline int arm64_get_ssbd_state(void)
+{
+#ifdef CONFIG_ARM64_SSBD
+ extern int ssbd_state;
+ return ssbd_state;
+#else
+ return ARM64_SSBD_UNKNOWN;
+#endif
+}
+
+#ifdef CONFIG_ARM64_SSBD
+void arm64_set_ssbd_mitigation(bool state);
+#else
+static inline void arm64_set_ssbd_mitigation(bool state) {}
+#endif
+
#endif /* __ASSEMBLY__ */
#endif
#include <asm/virt.h>
+#define VCPU_WORKAROUND_2_FLAG_SHIFT 0
+#define VCPU_WORKAROUND_2_FLAG (_AC(1, UL) << VCPU_WORKAROUND_2_FLAG_SHIFT)
+
#define ARM_EXIT_WITH_SERROR_BIT 31
#define ARM_EXCEPTION_CODE(x) ((x) & ~(1U << ARM_EXIT_WITH_SERROR_BIT))
#define ARM_SERROR_PENDING(x) !!((x) & (1U << ARM_EXIT_WITH_SERROR_BIT))
/* The hyp-stub will return this for any kvm_call_hyp() call */
#define ARM_EXCEPTION_HYP_GONE HVC_STUB_ERR
- #define KVM_ARM64_DEBUG_DIRTY_SHIFT 0
- #define KVM_ARM64_DEBUG_DIRTY (1 << KVM_ARM64_DEBUG_DIRTY_SHIFT)
+ #ifndef __ASSEMBLY__
+
+ #include <linux/mm.h>
/* Translate a kernel address of @sym into its equivalent linear mapping */
#define kvm_ksym_ref(sym) \
({ \
void *val = &sym; \
if (!is_kernel_in_hyp_mode()) \
- val = phys_to_virt((u64)&sym - kimage_voffset); \
+ val = lm_alias(&sym); \
val; \
})
- #ifndef __ASSEMBLY__
struct kvm;
struct kvm_vcpu;
extern u32 __init_stage2_translation(void);
+/* Home-grown __this_cpu_{ptr,read} variants that always work at HYP */
+#define __hyp_this_cpu_ptr(sym) \
+ ({ \
+ void *__ptr = hyp_symbol_addr(sym); \
+ __ptr += read_sysreg(tpidr_el2); \
+ (typeof(&sym))__ptr; \
+ })
+
+#define __hyp_this_cpu_read(sym) \
+ ({ \
+ *__hyp_this_cpu_ptr(sym); \
+ })
+
#else /* __ASSEMBLY__ */
-.macro get_host_ctxt reg, tmp
- adr_l \reg, kvm_host_cpu_state
+.macro hyp_adr_this_cpu reg, sym, tmp
+ adr_l \reg, \sym
mrs \tmp, tpidr_el2
add \reg, \reg, \tmp
.endm
+.macro hyp_ldr_this_cpu reg, sym, tmp
+ adr_l \reg, \sym
+ mrs \tmp, tpidr_el2
+ ldr \reg, [\reg, \tmp]
+.endm
+
+.macro get_host_ctxt reg, tmp
+ hyp_adr_this_cpu \reg, kvm_host_cpu_state, \tmp
+.endm
+
.macro get_vcpu_ptr vcpu, ctxt
get_host_ctxt \ctxt, \vcpu
ldr \vcpu, [\ctxt, #HOST_CONTEXT_VCPU]
#include <asm/kvm.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_mmio.h>
+ #include <asm/thread_info.h>
#define __KVM_HAVE_ARCH_INTC_INITIALIZED
/* Exception Information */
struct kvm_vcpu_fault_info fault;
- /* Guest debug state */
- u64 debug_flags;
+ /* State of various workarounds, see kvm_asm.h for bit assignment */
+ u64 workaround_flags;
+
+ /* Miscellaneous vcpu state flags */
+ u64 flags;
/*
* We maintain more than a single set of debug registers to support
/* Pointer to host CPU context */
kvm_cpu_context_t *host_cpu_context;
+
+ struct thread_info *host_thread_info; /* hyp VA */
+ struct user_fpsimd_state *host_fpsimd_state; /* hyp VA */
+
struct {
/* {Break,watch}point registers */
struct kvm_guest_debug_arch regs;
bool sysregs_loaded_on_cpu;
};
+ /* vcpu_arch flags field values: */
+ #define KVM_ARM64_DEBUG_DIRTY (1 << 0)
+ #define KVM_ARM64_FP_ENABLED (1 << 1) /* guest FP regs loaded */
+ #define KVM_ARM64_FP_HOST (1 << 2) /* host FP regs loaded */
+ #define KVM_ARM64_HOST_SVE_IN_USE (1 << 3) /* backup for host TIF_SVE */
+
#define vcpu_gp_regs(v) (&(v)->arch.ctxt.gp_regs)
/*
kvm_call_hyp(__kvm_set_tpidr_el2, tpidr_el2);
}
+ static inline bool kvm_arch_check_sve_has_vhe(void)
+ {
+ /*
+ * The Arm architecture specifies that implementation of SVE
+ * requires VHE also to be implemented. The KVM code for arm64
+ * relies on this when SVE is present:
+ */
+ if (system_supports_sve())
+ return has_vhe();
+ else
+ return true;
+ }
+
static inline void kvm_arch_hardware_unsetup(void) {}
static inline void kvm_arch_sync_events(struct kvm *kvm) {}
static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
"PARange is %d bits, unsupported configuration!", parange);
}
- /*
- * All host FP/SIMD state is restored on guest exit, so nothing needs
- * doing here except in the SVE case:
- */
- static inline void kvm_fpsimd_flush_cpu_state(void)
+ /* Guest/host FPSIMD coordination helpers */
+ int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu);
+ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu);
+ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu);
+ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu);
+
+ #ifdef CONFIG_KVM /* Avoid conflicts with core headers if CONFIG_KVM=n */
+ static inline int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
{
- if (system_supports_sve())
- sve_flush_cpu_state();
+ return kvm_arch_vcpu_run_map_fp(vcpu);
}
+ #endif
static inline void kvm_arm_vhe_guest_enter(void)
{
return cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR);
}
+#define KVM_SSBD_UNKNOWN -1
+#define KVM_SSBD_FORCE_DISABLE 0
+#define KVM_SSBD_KERNEL 1
+#define KVM_SSBD_FORCE_ENABLE 2
+#define KVM_SSBD_MITIGATED 3
+
+static inline int kvm_arm_have_ssbd(void)
+{
+ switch (arm64_get_ssbd_state()) {
+ case ARM64_SSBD_FORCE_DISABLE:
+ return KVM_SSBD_FORCE_DISABLE;
+ case ARM64_SSBD_KERNEL:
+ return KVM_SSBD_KERNEL;
+ case ARM64_SSBD_FORCE_ENABLE:
+ return KVM_SSBD_FORCE_ENABLE;
+ case ARM64_SSBD_MITIGATED:
+ return KVM_SSBD_MITIGATED;
+ case ARM64_SSBD_UNKNOWN:
+ default:
+ return KVM_SSBD_UNKNOWN;
+ }
+}
+
void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu);
void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu);
+ #define __KVM_HAVE_ARCH_VM_ALLOC
+ struct kvm *kvm_arch_alloc_vm(void);
+ void kvm_arch_free_vm(struct kvm *kvm);
+
#endif /* __ARM64_KVM_HOST_H__ */
#ifdef __KERNEL__
#include <linux/build_bug.h>
+#include <linux/cache.h>
+#include <linux/init.h>
#include <linux/stddef.h>
#include <linux/string.h>
/* Sync TPIDR_EL0 back to thread_struct for current */
void tls_preserve_current_state(void);
- #define INIT_THREAD { }
+ #define INIT_THREAD { \
+ .fpsimd_cpu = NR_CPUS, \
+ }
static inline void start_thread_common(struct pt_regs *regs, unsigned long pc)
{
void cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused);
void cpu_clear_disr(const struct arm64_cpu_capabilities *__unused);
+extern unsigned long __ro_after_init signal_minsigstksz; /* sigframe size */
+extern void __init minsigstksz_setup(void);
+
+ /*
+ * Not at the top of the file due to a direct #include cycle between
+ * <asm/fpsimd.h> and <asm/processor.h>. Deferring this #include
+ * ensures that contents of processor.h are visible to fpsimd.h even if
+ * processor.h is included first.
+ *
+ * These prctl helpers are the only things in this file that require
+ * fpsimd.h. The core code expects them to be in this header.
+ */
+ #include <asm/fpsimd.h>
+
/* Userspace interface for PR_SVE_{SET,GET}_VL prctl()s: */
#define SVE_SET_VL(arg) sve_set_current_vl(arg)
#define SVE_GET_VL() sve_get_current_vl()
int preempt_count; /* 0 => preemptable, <0 => bug */
};
- #define INIT_THREAD_INFO(tsk) \
- { \
- .preempt_count = INIT_PREEMPT_COUNT, \
- .addr_limit = KERNEL_DS, \
- }
-
#define thread_saved_pc(tsk) \
((unsigned long)(tsk->thread.cpu_context.pc))
#define thread_saved_sp(tsk) \
#define TIF_32BIT 22 /* 32bit process */
#define TIF_SVE 23 /* Scalable Vector Extension in use */
#define TIF_SVE_VL_INHERIT 24 /* Inherit sve_vl_onexec across exec */
+#define TIF_SSBD 25 /* Wants SSB mitigation */
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
_TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
_TIF_NOHZ)
+ #define INIT_THREAD_INFO(tsk) \
+ { \
+ .flags = _TIF_FOREIGN_FPSTATE, \
+ .preempt_count = INIT_PREEMPT_COUNT, \
+ .addr_limit = KERNEL_DS, \
+ }
+
#endif /* __KERNEL__ */
#endif /* __ASM_THREAD_INFO_H */
#include <linux/percpu.h>
#include <linux/prctl.h>
#include <linux/preempt.h>
-#include <linux/prctl.h>
#include <linux/ptrace.h>
#include <linux/sched/signal.h>
#include <linux/sched/task_stack.h>
#include <linux/signal.h>
#include <linux/slab.h>
+ #include <linux/stddef.h>
#include <linux/sysctl.h>
#include <asm/esr.h>
#include <asm/fpsimd.h>
#include <asm/cpufeature.h>
#include <asm/cputype.h>
+ #include <asm/processor.h>
#include <asm/simd.h>
#include <asm/sigcontext.h>
#include <asm/sysreg.h>
*/
struct fpsimd_last_state_struct {
struct user_fpsimd_state *st;
- bool sve_in_use;
};
static DEFINE_PER_CPU(struct fpsimd_last_state_struct, fpsimd_last_state);
#ifdef CONFIG_ARM64_SVE
/* Maximum supported vector length across all CPUs (initially poisoned) */
-int __ro_after_init sve_max_vl = -1;
+int __ro_after_init sve_max_vl = SVE_VL_MIN;
/* Set of available vector lengths, as vq_to_bit(vq): */
static __ro_after_init DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);
static void __percpu *efi_sve_state;
__sve_free(task);
}
-
- /* Offset of FFR in the SVE register dump */
- static size_t sve_ffr_offset(int vl)
- {
- return SVE_SIG_FFR_OFFSET(sve_vq_from_vl(vl)) - SVE_SIG_REGS_OFFSET;
- }
-
- static void *sve_pffr(struct task_struct *task)
- {
- return (char *)task->thread.sve_state +
- sve_ffr_offset(task->thread.sve_vl);
- }
-
static void change_cpacr(u64 val, u64 mask)
{
u64 cpacr = read_sysreg(CPACR_EL1);
WARN_ON(!in_softirq() && !irqs_disabled());
if (system_supports_sve() && test_thread_flag(TIF_SVE))
- sve_load_state(sve_pffr(current),
+ sve_load_state(sve_pffr(¤t->thread),
¤t->thread.uw.fpsimd_state.fpsr,
sve_vq_from_vl(current->thread.sve_vl) - 1);
else
fpsimd_load_state(¤t->thread.uw.fpsimd_state);
-
- if (system_supports_sve()) {
- /* Toggle SVE trapping for userspace if needed */
- if (test_thread_flag(TIF_SVE))
- sve_user_enable();
- else
- sve_user_disable();
-
- /* Serialised by exception return to user */
- }
}
/*
- * Ensure current's FPSIMD/SVE storage in thread_struct is up to date
- * with respect to the CPU registers.
+ * Ensure FPSIMD/SVE storage in memory for the loaded context is up to
+ * date with respect to the CPU registers.
*
* Softirqs (and preemption) must be disabled.
*/
- static void task_fpsimd_save(void)
+ void fpsimd_save(void)
{
+ struct user_fpsimd_state *st = __this_cpu_read(fpsimd_last_state.st);
+ /* set by fpsimd_bind_task_to_cpu() or fpsimd_bind_state_to_cpu() */
+
WARN_ON(!in_softirq() && !irqs_disabled());
if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
return;
}
- sve_save_state(sve_pffr(current),
- ¤t->thread.uw.fpsimd_state.fpsr);
+ sve_save_state(sve_pffr(¤t->thread), &st->fpsr);
} else
- fpsimd_save_state(¤t->thread.uw.fpsimd_state);
+ fpsimd_save_state(st);
}
}
return ret;
/* Writing -1 has the special meaning "set to max": */
- if (vl == -1) {
- /* Fail safe if sve_max_vl wasn't initialised */
- if (WARN_ON(!sve_vl_valid(sve_max_vl)))
- vl = SVE_VL_MIN;
- else
- vl = sve_max_vl;
-
- goto chosen;
- }
+ if (vl == -1)
+ vl = sve_max_vl;
if (!sve_vl_valid(vl))
return -EINVAL;
- vl = find_supported_vector_length(vl);
-chosen:
- sve_default_vl = vl;
+ sve_default_vl = find_supported_vector_length(vl);
return 0;
}
if (task == current) {
local_bh_disable();
- task_fpsimd_save();
+ fpsimd_save();
set_thread_flag(TIF_FOREIGN_FPSTATE);
}
task->thread.sve_vl = vl;
out:
- if (flags & PR_SVE_VL_INHERIT)
- set_tsk_thread_flag(task, TIF_SVE_VL_INHERIT);
- else
- clear_tsk_thread_flag(task, TIF_SVE_VL_INHERIT);
+ update_tsk_thread_flag(task, TIF_SVE_VL_INHERIT,
+ flags & PR_SVE_VL_INHERIT);
return 0;
}
isb();
}
+ /*
+ * Read the pseudo-ZCR used by cpufeatures to identify the supported SVE
+ * vector length.
+ *
+ * Use only if SVE is present.
+ * This function clobbers the SVE vector length.
+ */
+ u64 read_zcr_features(void)
+ {
+ u64 zcr;
+ unsigned int vq_max;
+
+ /*
+ * Set the maximum possible VL, and write zeroes to all other
+ * bits to see if they stick.
+ */
+ sve_kernel_enable(NULL);
+ write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL1);
+
+ zcr = read_sysreg_s(SYS_ZCR_EL1);
+ zcr &= ~(u64)ZCR_ELx_LEN_MASK; /* find sticky 1s outside LEN field */
+ vq_max = sve_vq_from_vl(sve_get_vl());
+ zcr |= vq_max - 1; /* set LEN field to maximum effective value */
+
+ return zcr;
+ }
+
void __init sve_setup(void)
{
u64 zcr;
local_bh_disable();
- task_fpsimd_save();
+ fpsimd_save();
fpsimd_to_sve(current);
/* Force ret_to_user to reload the registers: */
si_code = FPE_FLTRES;
}
- memset(&info, 0, sizeof(info));
+ clear_siginfo(&info);
info.si_signo = SIGFPE;
info.si_code = si_code;
info.si_addr = (void __user *)instruction_pointer(regs);
void fpsimd_thread_switch(struct task_struct *next)
{
+ bool wrong_task, wrong_cpu;
+
if (!system_supports_fpsimd())
return;
+
+ /* Save unsaved fpsimd state, if any: */
+ fpsimd_save();
+
/*
- * Save the current FPSIMD state to memory, but only if whatever is in
- * the registers is in fact the most recent userland FPSIMD state of
- * 'current'.
+ * Fix up TIF_FOREIGN_FPSTATE to correctly describe next's
+ * state. For kernel threads, FPSIMD registers are never loaded
+ * and wrong_task and wrong_cpu will always be true.
*/
- if (current->mm)
- task_fpsimd_save();
+ wrong_task = __this_cpu_read(fpsimd_last_state.st) !=
+ &next->thread.uw.fpsimd_state;
+ wrong_cpu = next->thread.fpsimd_cpu != smp_processor_id();
- if (next->mm) {
- /*
- * If we are switching to a task whose most recent userland
- * FPSIMD state is already in the registers of *this* cpu,
- * we can skip loading the state from memory. Otherwise, set
- * the TIF_FOREIGN_FPSTATE flag so the state will be loaded
- * upon the next return to userland.
- */
- if (__this_cpu_read(fpsimd_last_state.st) ==
- &next->thread.uw.fpsimd_state
- && next->thread.fpsimd_cpu == smp_processor_id())
- clear_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE);
- else
- set_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE);
- }
+ update_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE,
+ wrong_task || wrong_cpu);
}
void fpsimd_flush_thread(void)
return;
local_bh_disable();
- task_fpsimd_save();
+ fpsimd_save();
local_bh_enable();
}
* Associate current's FPSIMD context with this cpu
* Preemption must be disabled when calling this function.
*/
- static void fpsimd_bind_to_cpu(void)
+ void fpsimd_bind_task_to_cpu(void)
{
struct fpsimd_last_state_struct *last =
this_cpu_ptr(&fpsimd_last_state);
last->st = ¤t->thread.uw.fpsimd_state;
- last->sve_in_use = test_thread_flag(TIF_SVE);
current->thread.fpsimd_cpu = smp_processor_id();
+
+ if (system_supports_sve()) {
+ /* Toggle SVE trapping for userspace if needed */
+ if (test_thread_flag(TIF_SVE))
+ sve_user_enable();
+ else
+ sve_user_disable();
+
+ /* Serialised by exception return to user */
+ }
+ }
+
+ void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st)
+ {
+ struct fpsimd_last_state_struct *last =
+ this_cpu_ptr(&fpsimd_last_state);
+
+ WARN_ON(!in_softirq() && !irqs_disabled());
+
+ last->st = st;
}
/*
if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
task_fpsimd_load();
- fpsimd_bind_to_cpu();
+ fpsimd_bind_task_to_cpu();
}
local_bh_enable();
fpsimd_to_sve(current);
task_fpsimd_load();
+ fpsimd_bind_task_to_cpu();
- if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE))
- fpsimd_bind_to_cpu();
+ clear_thread_flag(TIF_FOREIGN_FPSTATE);
local_bh_enable();
}
t->thread.fpsimd_cpu = NR_CPUS;
}
- static inline void fpsimd_flush_cpu_state(void)
+ void fpsimd_flush_cpu_state(void)
{
__this_cpu_write(fpsimd_last_state.st, NULL);
+ set_thread_flag(TIF_FOREIGN_FPSTATE);
}
- /*
- * Invalidate any task SVE state currently held in this CPU's regs.
- *
- * This is used to prevent the kernel from trying to reuse SVE register data
- * that is detroyed by KVM guest enter/exit. This function should go away when
- * KVM SVE support is implemented. Don't use it for anything else.
- */
- #ifdef CONFIG_ARM64_SVE
- void sve_flush_cpu_state(void)
- {
- struct fpsimd_last_state_struct const *last =
- this_cpu_ptr(&fpsimd_last_state);
-
- if (last->st && last->sve_in_use)
- fpsimd_flush_cpu_state();
- }
- #endif /* CONFIG_ARM64_SVE */
-
#ifdef CONFIG_KERNEL_MODE_NEON
DEFINE_PER_CPU(bool, kernel_neon_busy);
__this_cpu_write(kernel_neon_busy, true);
- /* Save unsaved task fpsimd state, if any: */
- if (current->mm) {
- task_fpsimd_save();
- set_thread_flag(TIF_FOREIGN_FPSTATE);
- }
+ /* Save unsaved fpsimd state, if any: */
+ fpsimd_save();
/* Invalidate any task state remaining in the fpsimd regs: */
fpsimd_flush_cpu_state();
{
switch (cmd) {
case CPU_PM_ENTER:
- if (current->mm)
- task_fpsimd_save();
+ fpsimd_save();
fpsimd_flush_cpu_state();
break;
case CPU_PM_EXIT:
- if (current->mm)
- set_thread_flag(TIF_FOREIGN_FPSTATE);
break;
case CPU_PM_ENTER_FAILED:
default:
#include <asm/compat.h>
#include <asm/cpufeature.h>
#include <asm/debug-monitors.h>
+ #include <asm/fpsimd.h>
#include <asm/pgtable.h>
#include <asm/stacktrace.h>
#include <asm/syscall.h>
vq = sve_vq_from_vl(header->vl);
header->max_vl = sve_max_vl;
- if (WARN_ON(!sve_vl_valid(sve_max_vl)))
- header->max_vl = header->vl;
-
header->size = SVE_PT_SIZE(vq, header->flags);
header->max_size = SVE_PT_SIZE(sve_vq_from_vl(header->max_vl),
SVE_PT_REGS_SVE);
};
#ifdef CONFIG_COMPAT
-#include <linux/compat.h>
-
enum compat_regset {
REGSET_COMPAT_GPR,
REGSET_COMPAT_VFP,
*/
ldr x1, [sp] // Guest's x0
eor w1, w1, #ARM_SMCCC_ARCH_WORKAROUND_1
+ cbz w1, wa_epilogue
+
+ /* ARM_SMCCC_ARCH_WORKAROUND_2 handling */
+ eor w1, w1, #(ARM_SMCCC_ARCH_WORKAROUND_1 ^ \
+ ARM_SMCCC_ARCH_WORKAROUND_2)
cbnz w1, el1_trap
- mov x0, x1
+
+#ifdef CONFIG_ARM64_SSBD
+alternative_cb arm64_enable_wa2_handling
+ b wa2_end
+alternative_cb_end
+ get_vcpu_ptr x2, x0
+ ldr x0, [x2, #VCPU_WORKAROUND_FLAGS]
+
+ // Sanitize the argument and update the guest flags
+ ldr x1, [sp, #8] // Guest's x1
+ clz w1, w1 // Murphy's device:
+ lsr w1, w1, #5 // w1 = !!w1 without using
+ eor w1, w1, #1 // the flags...
+ bfi x0, x1, #VCPU_WORKAROUND_2_FLAG_SHIFT, #1
+ str x0, [x2, #VCPU_WORKAROUND_FLAGS]
+
+ /* Check that we actually need to perform the call */
+ hyp_ldr_this_cpu x0, arm64_ssbd_callback_required, x2
+ cbz x0, wa2_end
+
+ mov w0, #ARM_SMCCC_ARCH_WORKAROUND_2
+ smc #0
+
+ /* Don't leak data from the SMC call */
+ mov x3, xzr
+wa2_end:
+ mov x2, xzr
+ mov x1, xzr
+#endif
+
+wa_epilogue:
+ mov x0, xzr
add sp, sp, #16
eret
el1_trap:
get_vcpu_ptr x1, x0
-
- mrs x0, esr_el2
- lsr x0, x0, #ESR_ELx_EC_SHIFT
- /*
- * x0: ESR_EC
- * x1: vcpu pointer
- */
-
- /*
- * We trap the first access to the FP/SIMD to save the host context
- * and restore the guest context lazily.
- * If FP/SIMD is not implemented, handle the trap and inject an
- * undefined instruction exception to the guest.
- */
- alternative_if_not ARM64_HAS_NO_FPSIMD
- cmp x0, #ESR_ELx_EC_FP_ASIMD
- b.eq __fpsimd_guest_restore
- alternative_else_nop_endif
-
mov x0, #ARM_EXCEPTION_TRAP
b __guest_exit
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+#include <linux/arm-smccc.h>
#include <linux/types.h>
#include <linux/jump_label.h>
#include <uapi/linux/psci.h>
#include <kvm/arm_psci.h>
+ #include <asm/cpufeature.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_emulate.h>
+ #include <asm/kvm_host.h>
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
#include <asm/fpsimd.h>
#include <asm/debug-monitors.h>
+ #include <asm/processor.h>
+ #include <asm/thread_info.h>
- static bool __hyp_text __fpsimd_enabled_nvhe(void)
+ /* Check whether the FP regs were dirtied while in the host-side run loop: */
+ static bool __hyp_text update_fp_enabled(struct kvm_vcpu *vcpu)
{
- return !(read_sysreg(cptr_el2) & CPTR_EL2_TFP);
- }
+ if (vcpu->arch.host_thread_info->flags & _TIF_FOREIGN_FPSTATE)
+ vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED |
+ KVM_ARM64_FP_HOST);
- static bool fpsimd_enabled_vhe(void)
- {
- return !!(read_sysreg(cpacr_el1) & CPACR_EL1_FPEN);
+ return !!(vcpu->arch.flags & KVM_ARM64_FP_ENABLED);
}
/* Save the 32-bit only FPSIMD system register state */
val = read_sysreg(cpacr_el1);
val |= CPACR_EL1_TTA;
- val &= ~(CPACR_EL1_FPEN | CPACR_EL1_ZEN);
+ val &= ~CPACR_EL1_ZEN;
+ if (!update_fp_enabled(vcpu))
+ val &= ~CPACR_EL1_FPEN;
+
write_sysreg(val, cpacr_el1);
write_sysreg(kvm_get_hyp_vector(), vbar_el1);
__activate_traps_common(vcpu);
val = CPTR_EL2_DEFAULT;
- val |= CPTR_EL2_TTA | CPTR_EL2_TFP | CPTR_EL2_TZ;
+ val |= CPTR_EL2_TTA | CPTR_EL2_TZ;
+ if (!update_fp_enabled(vcpu))
+ val |= CPTR_EL2_TFP;
+
write_sysreg(val, cptr_el2);
}
}
}
+ static bool __hyp_text __hyp_switch_fpsimd(struct kvm_vcpu *vcpu)
+ {
+ struct user_fpsimd_state *host_fpsimd = vcpu->arch.host_fpsimd_state;
+
+ if (has_vhe())
+ write_sysreg(read_sysreg(cpacr_el1) | CPACR_EL1_FPEN,
+ cpacr_el1);
+ else
+ write_sysreg(read_sysreg(cptr_el2) & ~(u64)CPTR_EL2_TFP,
+ cptr_el2);
+
+ isb();
+
+ if (vcpu->arch.flags & KVM_ARM64_FP_HOST) {
+ /*
+ * In the SVE case, VHE is assumed: it is enforced by
+ * Kconfig and kvm_arch_init().
+ */
+ if (system_supports_sve() &&
+ (vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE)) {
+ struct thread_struct *thread = container_of(
+ host_fpsimd,
+ struct thread_struct, uw.fpsimd_state);
+
+ sve_save_state(sve_pffr(thread), &host_fpsimd->fpsr);
+ } else {
+ __fpsimd_save_state(host_fpsimd);
+ }
+
+ vcpu->arch.flags &= ~KVM_ARM64_FP_HOST;
+ }
+
+ __fpsimd_restore_state(&vcpu->arch.ctxt.gp_regs.fp_regs);
+
+ /* Skip restoring fpexc32 for AArch64 guests */
+ if (!(read_sysreg(hcr_el2) & HCR_RW))
+ write_sysreg(vcpu->arch.ctxt.sys_regs[FPEXC32_EL2],
+ fpexc32_el2);
+
+ vcpu->arch.flags |= KVM_ARM64_FP_ENABLED;
+
+ return true;
+ }
+
/*
* Return true when we were able to fixup the guest exit and should return to
* the guest, false when we should restore the host state and return to the
* same PC once the SError has been injected, and replay the
* trapping instruction.
*/
- if (*exit_code == ARM_EXCEPTION_TRAP && !__populate_fault_info(vcpu))
+ if (*exit_code != ARM_EXCEPTION_TRAP)
+ goto exit;
+
+ /*
+ * We trap the first access to the FP/SIMD to save the host context
+ * and restore the guest context lazily.
+ * If FP/SIMD is not implemented, handle the trap and inject an
+ * undefined instruction exception to the guest.
+ */
+ if (system_supports_fpsimd() &&
+ kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_FP_ASIMD)
+ return __hyp_switch_fpsimd(vcpu);
+
+ if (!__populate_fault_info(vcpu))
return true;
- if (static_branch_unlikely(&vgic_v2_cpuif_trap) &&
- *exit_code == ARM_EXCEPTION_TRAP) {
+ if (static_branch_unlikely(&vgic_v2_cpuif_trap)) {
bool valid;
valid = kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_DABT_LOW &&
if (valid) {
int ret = __vgic_v2_perform_cpuif_access(vcpu);
- if (ret == 1) {
- if (__skip_instr(vcpu))
- return true;
- else
- *exit_code = ARM_EXCEPTION_TRAP;
- }
+ if (ret == 1 && __skip_instr(vcpu))
+ return true;
if (ret == -1) {
/* Promote an illegal access to an
*vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS;
*exit_code = ARM_EXCEPTION_EL1_SERROR;
}
+
+ goto exit;
}
}
if (static_branch_unlikely(&vgic_v3_cpuif_trap) &&
- *exit_code == ARM_EXCEPTION_TRAP &&
(kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 ||
kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_CP15_32)) {
int ret = __vgic_v3_perform_cpuif_access(vcpu);
- if (ret == 1) {
- if (__skip_instr(vcpu))
- return true;
- else
- *exit_code = ARM_EXCEPTION_TRAP;
- }
+ if (ret == 1 && __skip_instr(vcpu))
+ return true;
}
+ exit:
/* Return to the host kernel and handle the exit */
return false;
}
+static inline bool __hyp_text __needs_ssbd_off(struct kvm_vcpu *vcpu)
+{
+ if (!cpus_have_const_cap(ARM64_SSBD))
+ return false;
+
+ return !(vcpu->arch.workaround_flags & VCPU_WORKAROUND_2_FLAG);
+}
+
+static void __hyp_text __set_guest_arch_workaround_state(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_ARM64_SSBD
+ /*
+ * The host runs with the workaround always present. If the
+ * guest wants it disabled, so be it...
+ */
+ if (__needs_ssbd_off(vcpu) &&
+ __hyp_this_cpu_read(arm64_ssbd_callback_required))
+ arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 0, NULL);
+#endif
+}
+
+static void __hyp_text __set_host_arch_workaround_state(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_ARM64_SSBD
+ /*
+ * If the guest has disabled the workaround, bring it back on.
+ */
+ if (__needs_ssbd_off(vcpu) &&
+ __hyp_this_cpu_read(arm64_ssbd_callback_required))
+ arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 1, NULL);
+#endif
+}
+
/* Switch to the guest for VHE systems running in EL2 */
int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
{
struct kvm_cpu_context *host_ctxt;
struct kvm_cpu_context *guest_ctxt;
- bool fp_enabled;
u64 exit_code;
host_ctxt = vcpu->arch.host_cpu_context;
sysreg_restore_guest_state_vhe(guest_ctxt);
__debug_switch_to_guest(vcpu);
+ __set_guest_arch_workaround_state(vcpu);
+
do {
/* Jump in the fire! */
exit_code = __guest_enter(vcpu, host_ctxt);
/* And we're baaack! */
} while (fixup_guest_exit(vcpu, &exit_code));
- fp_enabled = fpsimd_enabled_vhe();
-
+ __set_host_arch_workaround_state(vcpu);
+
sysreg_save_guest_state_vhe(guest_ctxt);
__deactivate_traps(vcpu);
sysreg_restore_host_state_vhe(host_ctxt);
- if (fp_enabled) {
- __fpsimd_save_state(&guest_ctxt->gp_regs.fp_regs);
- __fpsimd_restore_state(&host_ctxt->gp_regs.fp_regs);
+ if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED)
__fpsimd_save_fpexc32(vcpu);
- }
__debug_switch_to_host(vcpu);
{
struct kvm_cpu_context *host_ctxt;
struct kvm_cpu_context *guest_ctxt;
- bool fp_enabled;
u64 exit_code;
vcpu = kern_hyp_va(vcpu);
__sysreg_restore_state_nvhe(guest_ctxt);
__debug_switch_to_guest(vcpu);
+ __set_guest_arch_workaround_state(vcpu);
+
do {
/* Jump in the fire! */
exit_code = __guest_enter(vcpu, host_ctxt);
/* And we're baaack! */
} while (fixup_guest_exit(vcpu, &exit_code));
- fp_enabled = __fpsimd_enabled_nvhe();
-
+ __set_host_arch_workaround_state(vcpu);
+
__sysreg_save_state_nvhe(guest_ctxt);
__sysreg32_save_state(vcpu);
__timer_disable_traps(vcpu);
__sysreg_restore_state_nvhe(host_ctxt);
- if (fp_enabled) {
- __fpsimd_save_state(&guest_ctxt->gp_regs.fp_regs);
- __fpsimd_restore_state(&host_ctxt->gp_regs.fp_regs);
+ if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED)
__fpsimd_save_fpexc32(vcpu);
- }
/*
* This must come after restoring the host sysregs, since a non-VHE
{ "cache", VCPU_STAT(cache_exits), KVM_STAT_VCPU },
{ "signal", VCPU_STAT(signal_exits), KVM_STAT_VCPU },
{ "interrupt", VCPU_STAT(int_exits), KVM_STAT_VCPU },
- { "cop_unsuable", VCPU_STAT(cop_unusable_exits), KVM_STAT_VCPU },
+ { "cop_unusable", VCPU_STAT(cop_unusable_exits), KVM_STAT_VCPU },
{ "tlbmod", VCPU_STAT(tlbmod_exits), KVM_STAT_VCPU },
{ "tlbmiss_ld", VCPU_STAT(tlbmiss_ld_exits), KVM_STAT_VCPU },
{ "tlbmiss_st", VCPU_STAT(tlbmiss_st_exits), KVM_STAT_VCPU },
return -ENOIOCTLCMD;
}
- int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
+ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
{
return VM_FAULT_SIGBUS;
}
vc->in_guest = 0;
vc->napping_threads = 0;
vc->conferring_threads = 0;
+ vc->tb_offset_applied = 0;
}
static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip)
srcu_idx = srcu_read_lock(&vc->kvm->srcu);
+ this_cpu_disable_ftrace();
+
trap = __kvmppc_vcore_entry();
+ this_cpu_enable_ftrace();
+
srcu_read_unlock(&vc->kvm->srcu, srcu_idx);
trace_hardirqs_off();
*/
snprintf(buf, sizeof(buf), "vm%d", current->pid);
kvm->arch.debugfs_dir = debugfs_create_dir(buf, kvm_debugfs_dir);
- if (!IS_ERR_OR_NULL(kvm->arch.debugfs_dir))
- kvmppc_mmu_debugfs_init(kvm);
+ kvmppc_mmu_debugfs_init(kvm);
return 0;
}
#define _PAGE_WRITE 0x020 /* SW pte write bit */
#define _PAGE_SPECIAL 0x040 /* SW associated with special page */
#define _PAGE_UNUSED 0x080 /* SW bit for pgste usage state */
-#define __HAVE_ARCH_PTE_SPECIAL
#ifdef CONFIG_MEM_SOFT_DIRTY
#define _PAGE_SOFT_DIRTY 0x002 /* SW pte soft dirty bit */
* faults should no longer be backed by zero pages
*/
#define mm_forbids_zeropage mm_has_pgste
- static inline int mm_use_skey(struct mm_struct *mm)
+ static inline int mm_uses_skeys(struct mm_struct *mm)
{
#ifdef CONFIG_PGSTE
- if (mm->context.use_skey)
+ if (mm->context.uses_skeys)
return 1;
#endif
return 0;
#include <asm/gmap.h>
#include <asm/io.h>
#include <asm/ptrace.h>
-#include <asm/compat.h>
#include <asm/sclp.h>
#include "gaccess.h"
#include "kvm-s390.h"
int kvm_s390_skey_check_enable(struct kvm_vcpu *vcpu)
{
- int rc = 0;
+ int rc;
struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block;
trace_kvm_s390_skey_related_inst(vcpu);
- if (!(sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE)) &&
+ /* Already enabled? */
+ if (vcpu->kvm->arch.use_skf &&
+ !(sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE)) &&
!kvm_s390_test_cpuflags(vcpu, CPUSTAT_KSS))
- return rc;
+ return 0;
rc = s390_enable_skey();
VCPU_EVENT(vcpu, 3, "enabling storage keys for guest: %d", rc);
- if (!rc) {
- if (kvm_s390_test_cpuflags(vcpu, CPUSTAT_KSS))
- kvm_s390_clear_cpuflags(vcpu, CPUSTAT_KSS);
- else
- sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE |
- ICTL_RRBE);
- }
- return rc;
+ if (rc)
+ return rc;
+
+ if (kvm_s390_test_cpuflags(vcpu, CPUSTAT_KSS))
+ kvm_s390_clear_cpuflags(vcpu, CPUSTAT_KSS);
+ if (!vcpu->kvm->arch.use_skf)
+ sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
+ else
+ sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE);
+ return 0;
}
static int try_handle_skey(struct kvm_vcpu *vcpu)
rc = kvm_s390_skey_check_enable(vcpu);
if (rc)
return rc;
- if (sclp.has_skey) {
+ if (vcpu->kvm->arch.use_skf) {
/* with storage-key facility, SIE interprets it for us */
kvm_s390_retry_instr(vcpu);
VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation");
unsigned smep_andnot_wp:1;
unsigned smap_andnot_wp:1;
unsigned ad_disabled:1;
- unsigned :7;
+ unsigned guest_mode:1;
+ unsigned :6;
/*
* This is left at the top of the word so that
struct kvm_hyperv_exit exit;
struct kvm_vcpu_hv_stimer stimer[HV_SYNIC_STIMER_COUNT];
DECLARE_BITMAP(stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT);
+ cpumask_t tlb_lush;
};
struct kvm_vcpu_arch {
int (*hardware_setup)(void); /* __init */
void (*hardware_unsetup)(void); /* __exit */
bool (*cpu_has_accelerated_tpr)(void);
- bool (*cpu_has_high_real_mode_segbase)(void);
+ bool (*has_emulated_msr)(int index);
void (*cpuid_update)(struct kvm_vcpu *vcpu);
struct kvm *(*vm_alloc)(void);
void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);
void (*hwapic_isr_update)(struct kvm_vcpu *vcpu, int isr);
void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
- void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set);
+ void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu);
void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa);
void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
int kvm_mmu_load(struct kvm_vcpu *vcpu);
void kvm_mmu_unload(struct kvm_vcpu *vcpu);
void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
+ void kvm_mmu_free_roots(struct kvm_vcpu *vcpu);
gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
struct x86_exception *exception);
gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
/* cpuid 0x80000008.ebx */
const u32 kvm_cpuid_8000_0008_ebx_x86_features =
- F(IBPB) | F(IBRS);
+ F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) |
+ F(AMD_SSB_NO);
/* cpuid 0xC0000001.edx */
const u32 kvm_cpuid_C000_0001_edx_x86_features =
const u32 kvm_cpuid_7_0_ecx_x86_features =
F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ |
F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
- F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG);
+ F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
+ F(CLDEMOTE);
/* cpuid 7.0.edx*/
const u32 kvm_cpuid_7_0_edx_x86_features =
F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) |
- F(ARCH_CAPABILITIES);
+ F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES);
/* all calls to cpuid_count() should be made on the same cpu */
get_cpu();
entry->ecx &= ~F(PKU);
entry->edx &= kvm_cpuid_7_0_edx_x86_features;
cpuid_mask(&entry->edx, CPUID_7_EDX);
+ /*
+ * We emulate ARCH_CAPABILITIES in software even
+ * if the host doesn't support it.
+ */
+ entry->edx |= F(ARCH_CAPABILITIES);
} else {
entry->ebx = 0;
entry->ecx = 0;
g_phys_as = phys_as;
entry->eax = g_phys_as | (virt_as << 8);
entry->edx = 0;
- /* IBRS and IBPB aren't necessarily present in hardware cpuid */
- if (boot_cpu_has(X86_FEATURE_IBPB))
- entry->ebx |= F(IBPB);
- if (boot_cpu_has(X86_FEATURE_IBRS))
- entry->ebx |= F(IBRS);
+ /*
+ * IBRS, IBPB and VIRT_SSBD aren't necessarily present in
+ * hardware cpuid
+ */
+ if (boot_cpu_has(X86_FEATURE_AMD_IBPB))
+ entry->ebx |= F(AMD_IBPB);
+ if (boot_cpu_has(X86_FEATURE_AMD_IBRS))
+ entry->ebx |= F(AMD_IBRS);
+ if (boot_cpu_has(X86_FEATURE_VIRT_SSBD))
+ entry->ebx |= F(VIRT_SSBD);
entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features;
cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX);
+ /*
+ * The preference is to use SPEC CTRL MSR instead of the
+ * VIRT_SPEC MSR.
+ */
+ if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD) &&
+ !boot_cpu_has(X86_FEATURE_AMD_SSBD))
+ entry->ebx |= F(VIRT_SSBD);
break;
}
case 0x80000019:
return kvm_hv_get_msr(vcpu, msr, pdata);
}
+ static __always_inline int get_sparse_bank_no(u64 valid_bank_mask, int bank_no)
+ {
+ int i = 0, j;
+
+ if (!(valid_bank_mask & BIT_ULL(bank_no)))
+ return -1;
+
+ for (j = 0; j < bank_no; j++)
+ if (valid_bank_mask & BIT_ULL(j))
+ i++;
+
+ return i;
+ }
+
+ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa,
+ u16 rep_cnt, bool ex)
+ {
+ struct kvm *kvm = current_vcpu->kvm;
+ struct kvm_vcpu_hv *hv_current = ¤t_vcpu->arch.hyperv;
+ struct hv_tlb_flush_ex flush_ex;
+ struct hv_tlb_flush flush;
+ struct kvm_vcpu *vcpu;
+ unsigned long vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)] = {0};
+ unsigned long valid_bank_mask = 0;
+ u64 sparse_banks[64];
+ int sparse_banks_len, i;
+ bool all_cpus;
+
+ if (!ex) {
+ if (unlikely(kvm_read_guest(kvm, ingpa, &flush, sizeof(flush))))
+ return HV_STATUS_INVALID_HYPERCALL_INPUT;
+
+ trace_kvm_hv_flush_tlb(flush.processor_mask,
+ flush.address_space, flush.flags);
+
+ sparse_banks[0] = flush.processor_mask;
+ all_cpus = flush.flags & HV_FLUSH_ALL_PROCESSORS;
+ } else {
+ if (unlikely(kvm_read_guest(kvm, ingpa, &flush_ex,
+ sizeof(flush_ex))))
+ return HV_STATUS_INVALID_HYPERCALL_INPUT;
+
+ trace_kvm_hv_flush_tlb_ex(flush_ex.hv_vp_set.valid_bank_mask,
+ flush_ex.hv_vp_set.format,
+ flush_ex.address_space,
+ flush_ex.flags);
+
+ valid_bank_mask = flush_ex.hv_vp_set.valid_bank_mask;
+ all_cpus = flush_ex.hv_vp_set.format !=
+ HV_GENERIC_SET_SPARSE_4K;
+
+ sparse_banks_len = bitmap_weight(&valid_bank_mask, 64) *
+ sizeof(sparse_banks[0]);
+
+ if (!sparse_banks_len && !all_cpus)
+ goto ret_success;
+
+ if (!all_cpus &&
+ kvm_read_guest(kvm,
+ ingpa + offsetof(struct hv_tlb_flush_ex,
+ hv_vp_set.bank_contents),
+ sparse_banks,
+ sparse_banks_len))
+ return HV_STATUS_INVALID_HYPERCALL_INPUT;
+ }
+
+ cpumask_clear(&hv_current->tlb_lush);
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
+ int bank = hv->vp_index / 64, sbank = 0;
+
+ if (!all_cpus) {
+ /* Banks >64 can't be represented */
+ if (bank >= 64)
+ continue;
+
+ /* Non-ex hypercalls can only address first 64 vCPUs */
+ if (!ex && bank)
+ continue;
+
+ if (ex) {
+ /*
+ * Check is the bank of this vCPU is in sparse
+ * set and get the sparse bank number.
+ */
+ sbank = get_sparse_bank_no(valid_bank_mask,
+ bank);
+
+ if (sbank < 0)
+ continue;
+ }
+
+ if (!(sparse_banks[sbank] & BIT_ULL(hv->vp_index % 64)))
+ continue;
+ }
+
+ /*
+ * vcpu->arch.cr3 may not be up-to-date for running vCPUs so we
+ * can't analyze it here, flush TLB regardless of the specified
+ * address space.
+ */
+ __set_bit(i, vcpu_bitmap);
+ }
+
+ kvm_make_vcpus_request_mask(kvm,
+ KVM_REQ_TLB_FLUSH | KVM_REQUEST_NO_WAKEUP,
+ vcpu_bitmap, &hv_current->tlb_lush);
+
+ ret_success:
+ /* We always do full TLB flush, set rep_done = rep_cnt. */
+ return (u64)HV_STATUS_SUCCESS |
+ ((u64)rep_cnt << HV_HYPERCALL_REP_COMP_OFFSET);
+ }
+
bool kvm_hv_hypercall_enabled(struct kvm *kvm)
{
return READ_ONCE(kvm->arch.hyperv.hv_hypercall) & HV_X64_MSR_HYPERCALL_ENABLE;
}
}
-static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu)
+static int kvm_hv_hypercall_complete(struct kvm_vcpu *vcpu, u64 result)
{
- struct kvm_run *run = vcpu->run;
-
- kvm_hv_hypercall_set_result(vcpu, run->hyperv.u.hcall.result);
+ kvm_hv_hypercall_set_result(vcpu, result);
+ ++vcpu->stat.hypercalls;
return kvm_skip_emulated_instruction(vcpu);
}
+static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu)
+{
+ return kvm_hv_hypercall_complete(vcpu, vcpu->run->hyperv.u.hcall.result);
+}
+
static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, bool fast, u64 param)
{
struct eventfd_ctx *eventfd;
{
u64 param, ingpa, outgpa, ret = HV_STATUS_SUCCESS;
uint16_t code, rep_idx, rep_cnt;
- bool fast, longmode;
+ bool fast, longmode, rep;
/*
* hypercall generates UD from non zero cpl and real mode
#endif
code = param & 0xffff;
- fast = (param >> 16) & 0x1;
- rep_cnt = (param >> 32) & 0xfff;
- rep_idx = (param >> 48) & 0xfff;
+ fast = !!(param & HV_HYPERCALL_FAST_BIT);
+ rep_cnt = (param >> HV_HYPERCALL_REP_COMP_OFFSET) & 0xfff;
+ rep_idx = (param >> HV_HYPERCALL_REP_START_OFFSET) & 0xfff;
+ rep = !!(rep_cnt || rep_idx);
trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa);
- /* Hypercall continuation is not supported yet */
- if (rep_cnt || rep_idx) {
- ret = HV_STATUS_INVALID_HYPERCALL_CODE;
- goto out;
- }
-
switch (code) {
case HVCALL_NOTIFY_LONG_SPIN_WAIT:
+ if (unlikely(rep)) {
+ ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
+ break;
+ }
kvm_vcpu_on_spin(vcpu, true);
break;
case HVCALL_SIGNAL_EVENT:
+ if (unlikely(rep)) {
+ ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
+ break;
+ }
ret = kvm_hvcall_signal_event(vcpu, fast, ingpa);
if (ret != HV_STATUS_INVALID_PORT_ID)
break;
/* maybe userspace knows this conn_id: fall through */
case HVCALL_POST_MESSAGE:
/* don't bother userspace if it has no way to handle it */
- if (!vcpu_to_synic(vcpu)->active) {
- ret = HV_STATUS_INVALID_HYPERCALL_CODE;
+ if (unlikely(rep || !vcpu_to_synic(vcpu)->active)) {
+ ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
break;
}
vcpu->run->exit_reason = KVM_EXIT_HYPERV;
vcpu->arch.complete_userspace_io =
kvm_hv_hypercall_complete_userspace;
return 0;
+ case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST:
+ if (unlikely(fast || !rep_cnt || rep_idx)) {
+ ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
+ break;
+ }
+ ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, false);
+ break;
+ case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE:
+ if (unlikely(fast || rep)) {
+ ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
+ break;
+ }
+ ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, false);
+ break;
+ case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX:
+ if (unlikely(fast || !rep_cnt || rep_idx)) {
+ ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
+ break;
+ }
+ ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, true);
+ break;
+ case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX:
+ if (unlikely(fast || rep)) {
+ ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
+ break;
+ }
+ ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, true);
+ break;
default:
ret = HV_STATUS_INVALID_HYPERCALL_CODE;
break;
}
- out:
- kvm_hv_hypercall_set_result(vcpu, ret);
- return 1;
+ return kvm_hv_hypercall_complete(vcpu, ret);
}
void kvm_hv_init_vm(struct kvm *kvm)
static void advance_periodic_target_expiration(struct kvm_lapic *apic)
{
- apic->lapic_timer.tscdeadline +=
- nsec_to_cycles(apic->vcpu, apic->lapic_timer.period);
+ ktime_t now = ktime_get();
+ u64 tscl = rdtsc();
+ ktime_t delta;
+
+ /*
+ * Synchronize both deadlines to the same time source or
+ * differences in the periods (caused by differences in the
+ * underlying clocks or numerical approximation errors) will
+ * cause the two to drift apart over time as the errors
+ * accumulate.
+ */
apic->lapic_timer.target_expiration =
ktime_add_ns(apic->lapic_timer.target_expiration,
apic->lapic_timer.period);
+ delta = ktime_sub(apic->lapic_timer.target_expiration, now);
+ apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
+ nsec_to_cycles(apic->vcpu, delta);
}
static void start_sw_period(struct kvm_lapic *apic)
}
}
- if ((old_value ^ value) & X2APIC_ENABLE) {
- if (value & X2APIC_ENABLE) {
- kvm_apic_set_x2apic_id(apic, vcpu->vcpu_id);
- kvm_x86_ops->set_virtual_x2apic_mode(vcpu, true);
- } else
- kvm_x86_ops->set_virtual_x2apic_mode(vcpu, false);
- }
+ if (((old_value ^ value) & X2APIC_ENABLE) && (value & X2APIC_ENABLE))
+ kvm_apic_set_x2apic_id(apic, vcpu->vcpu_id);
+
+ if ((old_value ^ value) & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE))
+ kvm_x86_ops->set_virtual_apic_mode(vcpu);
apic->base_address = apic->vcpu->arch.apic_base &
MSR_IA32_APICBASE_BASE;
static const u64 shadow_acc_track_saved_bits_shift = PT64_SECOND_AVAIL_BITS_SHIFT;
static void mmu_spte_set(u64 *sptep, u64 spte);
- static void mmu_free_roots(struct kvm_vcpu *vcpu);
void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value)
{
{
siginfo_t info;
+ clear_siginfo(&info);
info.si_signo = SIGBUS;
info.si_errno = 0;
info.si_code = BUS_MCEERR_AR;
return RET_PF_RETRY;
}
-
- static void mmu_free_roots(struct kvm_vcpu *vcpu)
+ static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa,
+ struct list_head *invalid_list)
{
- int i;
struct kvm_mmu_page *sp;
- LIST_HEAD(invalid_list);
- if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
+ if (!VALID_PAGE(*root_hpa))
return;
- if (vcpu->arch.mmu.shadow_root_level >= PT64_ROOT_4LEVEL &&
- (vcpu->arch.mmu.root_level >= PT64_ROOT_4LEVEL ||
- vcpu->arch.mmu.direct_map)) {
- hpa_t root = vcpu->arch.mmu.root_hpa;
+ sp = page_header(*root_hpa & PT64_BASE_ADDR_MASK);
+ --sp->root_count;
+ if (!sp->root_count && sp->role.invalid)
+ kvm_mmu_prepare_zap_page(kvm, sp, invalid_list);
- spin_lock(&vcpu->kvm->mmu_lock);
- sp = page_header(root);
- --sp->root_count;
- if (!sp->root_count && sp->role.invalid) {
- kvm_mmu_prepare_zap_page(vcpu->kvm, sp, &invalid_list);
- kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
- }
- spin_unlock(&vcpu->kvm->mmu_lock);
- vcpu->arch.mmu.root_hpa = INVALID_PAGE;
+ *root_hpa = INVALID_PAGE;
+ }
+
+ void kvm_mmu_free_roots(struct kvm_vcpu *vcpu)
+ {
+ int i;
+ LIST_HEAD(invalid_list);
+ struct kvm_mmu *mmu = &vcpu->arch.mmu;
+
+ if (!VALID_PAGE(mmu->root_hpa))
return;
- }
spin_lock(&vcpu->kvm->mmu_lock);
- for (i = 0; i < 4; ++i) {
- hpa_t root = vcpu->arch.mmu.pae_root[i];
- if (root) {
- root &= PT64_BASE_ADDR_MASK;
- sp = page_header(root);
- --sp->root_count;
- if (!sp->root_count && sp->role.invalid)
- kvm_mmu_prepare_zap_page(vcpu->kvm, sp,
- &invalid_list);
- }
- vcpu->arch.mmu.pae_root[i] = INVALID_PAGE;
+ if (mmu->shadow_root_level >= PT64_ROOT_4LEVEL &&
+ (mmu->root_level >= PT64_ROOT_4LEVEL || mmu->direct_map)) {
+ mmu_free_root_page(vcpu->kvm, &mmu->root_hpa, &invalid_list);
+ } else {
+ for (i = 0; i < 4; ++i)
+ if (mmu->pae_root[i] != 0)
+ mmu_free_root_page(vcpu->kvm, &mmu->pae_root[i],
+ &invalid_list);
+ mmu->root_hpa = INVALID_PAGE;
}
+
kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
spin_unlock(&vcpu->kvm->mmu_lock);
- vcpu->arch.mmu.root_hpa = INVALID_PAGE;
}
+ EXPORT_SYMBOL_GPL(kvm_mmu_free_roots);
static int mmu_check_root(struct kvm_vcpu *vcpu, gfn_t root_gfn)
{
*/
return RET_PF_RETRY;
}
- EXPORT_SYMBOL_GPL(handle_mmio_page_fault);
static bool page_fault_handle_page_track(struct kvm_vcpu *vcpu,
u32 error_code, gfn_t gfn)
struct kvm_memory_slot *slot;
bool async;
+ /*
+ * Don't expose private memslots to L2.
+ */
+ if (is_guest_mode(vcpu) && !kvm_is_visible_gfn(vcpu->kvm, gfn)) {
+ *pfn = KVM_PFN_NOSLOT;
+ return false;
+ }
+
slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
async = false;
*pfn = __gfn_to_pfn_memslot(slot, gfn, false, &async, write, writable);
void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu)
{
- mmu_free_roots(vcpu);
+ kvm_mmu_free_roots(vcpu);
}
static unsigned long get_cr3(struct kvm_vcpu *vcpu)
struct kvm_mmu *context = &vcpu->arch.mmu;
context->base_role.word = 0;
+ context->base_role.guest_mode = is_guest_mode(vcpu);
context->base_role.smm = is_smm(vcpu);
context->base_role.ad_disabled = (shadow_accessed_mask == 0);
context->page_fault = tdp_page_fault;
= smep && !is_write_protection(vcpu);
context->base_role.smap_andnot_wp
= smap && !is_write_protection(vcpu);
+ context->base_role.guest_mode = is_guest_mode(vcpu);
context->base_role.smm = is_smm(vcpu);
reset_shadow_zero_bits_mask(vcpu, context);
}
context->root_hpa = INVALID_PAGE;
context->direct_map = false;
context->base_role.ad_disabled = !accessed_dirty;
-
+ context->base_role.guest_mode = 1;
update_permission_bitmask(vcpu, context, true);
update_pkru_bitmask(vcpu, context, true);
update_last_nonleaf_level(vcpu, context);
void kvm_mmu_unload(struct kvm_vcpu *vcpu)
{
- mmu_free_roots(vcpu);
+ kvm_mmu_free_roots(vcpu);
WARN_ON(VALID_PAGE(vcpu->arch.mmu.root_hpa));
}
EXPORT_SYMBOL_GPL(kvm_mmu_unload);
mask.smep_andnot_wp = 1;
mask.smap_andnot_wp = 1;
mask.smm = 1;
+ mask.guest_mode = 1;
mask.ad_disabled = 1;
/*
#include <asm/debugreg.h>
#include <asm/kvm_para.h>
#include <asm/irq_remapping.h>
-#include <asm/nospec-branch.h>
+#include <asm/spec-ctrl.h>
#include <asm/virtext.h>
#include "trace.h"
} host;
u64 spec_ctrl;
+ /*
+ * Contains guest-controlled bits of VIRT_SPEC_CTRL, which will be
+ * translated into the appropriate L2_CFG bits on the host to
+ * perform speculative control.
+ */
+ u64 virt_spec_ctrl;
u32 *msrpm;
unsigned long npages, npinned, size;
unsigned long locked, lock_limit;
struct page **pages;
- int first, last;
+ unsigned long first, last;
+
+ if (ulen == 0 || uaddr + ulen < uaddr)
+ return NULL;
/* Calculate number of pages. */
first = (uaddr & PAGE_MASK) >> PAGE_SHIFT;
static struct kvm *svm_vm_alloc(void)
{
- struct kvm_svm *kvm_svm = kzalloc(sizeof(struct kvm_svm), GFP_KERNEL);
+ struct kvm_svm *kvm_svm = vzalloc(sizeof(struct kvm_svm));
return &kvm_svm->kvm;
}
static void svm_vm_free(struct kvm *kvm)
{
- kfree(to_kvm_svm(kvm));
+ vfree(to_kvm_svm(kvm));
}
static void sev_vm_destroy(struct kvm *kvm)
vcpu->arch.microcode_version = 0x01000065;
svm->spec_ctrl = 0;
+ svm->virt_spec_ctrl = 0;
if (!init_event) {
svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE |
break;
case MSR_IA32_SPEC_CTRL:
if (!msr_info->host_initiated &&
- !guest_cpuid_has(vcpu, X86_FEATURE_IBRS))
+ !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD))
return 1;
msr_info->data = svm->spec_ctrl;
break;
+ case MSR_AMD64_VIRT_SPEC_CTRL:
+ if (!msr_info->host_initiated &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_VIRT_SSBD))
+ return 1;
+
+ msr_info->data = svm->virt_spec_ctrl;
+ break;
case MSR_F15H_IC_CFG: {
int family, model;
break;
case MSR_IA32_SPEC_CTRL:
if (!msr->host_initiated &&
- !guest_cpuid_has(vcpu, X86_FEATURE_IBRS))
+ !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS) &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD))
return 1;
/* The STIBP bit doesn't fault even if it's not advertised */
- if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP))
+ if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD))
return 1;
svm->spec_ctrl = data;
break;
case MSR_IA32_PRED_CMD:
if (!msr->host_initiated &&
- !guest_cpuid_has(vcpu, X86_FEATURE_IBPB))
+ !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBPB))
return 1;
if (data & ~PRED_CMD_IBPB)
break;
set_msr_interception(svm->msrpm, MSR_IA32_PRED_CMD, 0, 1);
break;
+ case MSR_AMD64_VIRT_SPEC_CTRL:
+ if (!msr->host_initiated &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_VIRT_SSBD))
+ return 1;
+
+ if (data & ~SPEC_CTRL_SSBD)
+ return 1;
+
+ svm->virt_spec_ctrl = data;
+ break;
case MSR_STAR:
svm->vmcb->save.star = data;
break;
set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
}
- static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
+ static void svm_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
{
return;
}
* is no need to worry about the conditional branch over the wrmsr
* being speculatively taken.
*/
- if (svm->spec_ctrl)
- native_wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
+ x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl);
asm volatile (
"push %%" _ASM_BP "; \n\t"
#endif
);
+ /* Eliminate branch target predictions from guest mode */
+ vmexit_fill_RSB();
+
+#ifdef CONFIG_X86_64
+ wrmsrl(MSR_GS_BASE, svm->host.gs_base);
+#else
+ loadsegment(fs, svm->host.fs);
+#ifndef CONFIG_X86_32_LAZY_GS
+ loadsegment(gs, svm->host.gs);
+#endif
+#endif
+
/*
* We do not use IBRS in the kernel. If this vCPU has used the
* SPEC_CTRL MSR it may have left it on; save the value and
if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
- if (svm->spec_ctrl)
- native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
-
- /* Eliminate branch target predictions from guest mode */
- vmexit_fill_RSB();
-
-#ifdef CONFIG_X86_64
- wrmsrl(MSR_GS_BASE, svm->host.gs_base);
-#else
- loadsegment(fs, svm->host.fs);
-#ifndef CONFIG_X86_32_LAZY_GS
- loadsegment(gs, svm->host.gs);
-#endif
-#endif
+ x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl);
reload_tss(vcpu);
return false;
}
-static bool svm_has_high_real_mode_segbase(void)
+static bool svm_has_emulated_msr(int index)
{
return true;
}
if (!sev_guest(kvm))
return -ENOTTY;
+ if (range->addr > ULONG_MAX || range->size > ULONG_MAX)
+ return -EINVAL;
+
region = kzalloc(sizeof(*region), GFP_KERNEL);
if (!region)
return -ENOMEM;
.hardware_enable = svm_hardware_enable,
.hardware_disable = svm_hardware_disable,
.cpu_has_accelerated_tpr = svm_cpu_has_accelerated_tpr,
- .cpu_has_high_real_mode_segbase = svm_has_high_real_mode_segbase,
+ .has_emulated_msr = svm_has_emulated_msr,
.vcpu_create = svm_create_vcpu,
.vcpu_free = svm_free_vcpu,
.enable_nmi_window = enable_nmi_window,
.enable_irq_window = enable_irq_window,
.update_cr8_intercept = update_cr8_intercept,
- .set_virtual_x2apic_mode = svm_set_virtual_x2apic_mode,
+ .set_virtual_apic_mode = svm_set_virtual_apic_mode,
.get_enable_apicv = svm_get_enable_apicv,
.refresh_apicv_exec_ctrl = svm_refresh_apicv_exec_ctrl,
.load_eoi_exitmap = svm_load_eoi_exitmap,
#include <asm/apic.h>
#include <asm/irq_remapping.h>
#include <asm/mmu_context.h>
-#include <asm/nospec-branch.h>
+#include <asm/spec-ctrl.h>
#include <asm/mshyperv.h>
#include "trace.h"
* underlying hardware which will be used to run L2.
* This structure is packed to ensure that its layout is identical across
* machines (necessary for live migration).
- * If there are changes in this struct, VMCS12_REVISION must be changed.
+ *
+ * IMPORTANT: Changing the layout of existing fields in this structure
+ * will break save/restore compatibility with older kvm releases. When
+ * adding new fields, either use space in the reserved padding* arrays
+ * or add the new fields to the end of the structure.
*/
typedef u64 natural_width;
struct __packed vmcs12 {
u64 virtual_apic_page_addr;
u64 apic_access_addr;
u64 posted_intr_desc_addr;
- u64 vm_function_control;
u64 ept_pointer;
u64 eoi_exit_bitmap0;
u64 eoi_exit_bitmap1;
u64 eoi_exit_bitmap2;
u64 eoi_exit_bitmap3;
- u64 eptp_list_address;
u64 xss_exit_bitmap;
u64 guest_physical_address;
u64 vmcs_link_pointer;
- u64 pml_address;
u64 guest_ia32_debugctl;
u64 guest_ia32_pat;
u64 guest_ia32_efer;
u64 host_ia32_pat;
u64 host_ia32_efer;
u64 host_ia32_perf_global_ctrl;
- u64 padding64[8]; /* room for future expansion */
+ u64 vmread_bitmap;
+ u64 vmwrite_bitmap;
+ u64 vm_function_control;
+ u64 eptp_list_address;
+ u64 pml_address;
+ u64 padding64[3]; /* room for future expansion */
/*
* To allow migration of L1 (complete with its L2 guests) between
* machines of different natural widths (32 or 64 bit), we cannot have
u16 guest_ldtr_selector;
u16 guest_tr_selector;
u16 guest_intr_status;
- u16 guest_pml_index;
u16 host_es_selector;
u16 host_cs_selector;
u16 host_ss_selector;
u16 host_fs_selector;
u16 host_gs_selector;
u16 host_tr_selector;
+ u16 guest_pml_index;
};
+ /*
+ * For save/restore compatibility, the vmcs12 field offsets must not change.
+ */
+ #define CHECK_OFFSET(field, loc) \
+ BUILD_BUG_ON_MSG(offsetof(struct vmcs12, field) != (loc), \
+ "Offset of " #field " in struct vmcs12 has changed.")
+
+ static inline void vmx_check_vmcs12_offsets(void) {
+ CHECK_OFFSET(revision_id, 0);
+ CHECK_OFFSET(abort, 4);
+ CHECK_OFFSET(launch_state, 8);
+ CHECK_OFFSET(io_bitmap_a, 40);
+ CHECK_OFFSET(io_bitmap_b, 48);
+ CHECK_OFFSET(msr_bitmap, 56);
+ CHECK_OFFSET(vm_exit_msr_store_addr, 64);
+ CHECK_OFFSET(vm_exit_msr_load_addr, 72);
+ CHECK_OFFSET(vm_entry_msr_load_addr, 80);
+ CHECK_OFFSET(tsc_offset, 88);
+ CHECK_OFFSET(virtual_apic_page_addr, 96);
+ CHECK_OFFSET(apic_access_addr, 104);
+ CHECK_OFFSET(posted_intr_desc_addr, 112);
+ CHECK_OFFSET(ept_pointer, 120);
+ CHECK_OFFSET(eoi_exit_bitmap0, 128);
+ CHECK_OFFSET(eoi_exit_bitmap1, 136);
+ CHECK_OFFSET(eoi_exit_bitmap2, 144);
+ CHECK_OFFSET(eoi_exit_bitmap3, 152);
+ CHECK_OFFSET(xss_exit_bitmap, 160);
+ CHECK_OFFSET(guest_physical_address, 168);
+ CHECK_OFFSET(vmcs_link_pointer, 176);
+ CHECK_OFFSET(guest_ia32_debugctl, 184);
+ CHECK_OFFSET(guest_ia32_pat, 192);
+ CHECK_OFFSET(guest_ia32_efer, 200);
+ CHECK_OFFSET(guest_ia32_perf_global_ctrl, 208);
+ CHECK_OFFSET(guest_pdptr0, 216);
+ CHECK_OFFSET(guest_pdptr1, 224);
+ CHECK_OFFSET(guest_pdptr2, 232);
+ CHECK_OFFSET(guest_pdptr3, 240);
+ CHECK_OFFSET(guest_bndcfgs, 248);
+ CHECK_OFFSET(host_ia32_pat, 256);
+ CHECK_OFFSET(host_ia32_efer, 264);
+ CHECK_OFFSET(host_ia32_perf_global_ctrl, 272);
+ CHECK_OFFSET(vmread_bitmap, 280);
+ CHECK_OFFSET(vmwrite_bitmap, 288);
+ CHECK_OFFSET(vm_function_control, 296);
+ CHECK_OFFSET(eptp_list_address, 304);
+ CHECK_OFFSET(pml_address, 312);
+ CHECK_OFFSET(cr0_guest_host_mask, 344);
+ CHECK_OFFSET(cr4_guest_host_mask, 352);
+ CHECK_OFFSET(cr0_read_shadow, 360);
+ CHECK_OFFSET(cr4_read_shadow, 368);
+ CHECK_OFFSET(cr3_target_value0, 376);
+ CHECK_OFFSET(cr3_target_value1, 384);
+ CHECK_OFFSET(cr3_target_value2, 392);
+ CHECK_OFFSET(cr3_target_value3, 400);
+ CHECK_OFFSET(exit_qualification, 408);
+ CHECK_OFFSET(guest_linear_address, 416);
+ CHECK_OFFSET(guest_cr0, 424);
+ CHECK_OFFSET(guest_cr3, 432);
+ CHECK_OFFSET(guest_cr4, 440);
+ CHECK_OFFSET(guest_es_base, 448);
+ CHECK_OFFSET(guest_cs_base, 456);
+ CHECK_OFFSET(guest_ss_base, 464);
+ CHECK_OFFSET(guest_ds_base, 472);
+ CHECK_OFFSET(guest_fs_base, 480);
+ CHECK_OFFSET(guest_gs_base, 488);
+ CHECK_OFFSET(guest_ldtr_base, 496);
+ CHECK_OFFSET(guest_tr_base, 504);
+ CHECK_OFFSET(guest_gdtr_base, 512);
+ CHECK_OFFSET(guest_idtr_base, 520);
+ CHECK_OFFSET(guest_dr7, 528);
+ CHECK_OFFSET(guest_rsp, 536);
+ CHECK_OFFSET(guest_rip, 544);
+ CHECK_OFFSET(guest_rflags, 552);
+ CHECK_OFFSET(guest_pending_dbg_exceptions, 560);
+ CHECK_OFFSET(guest_sysenter_esp, 568);
+ CHECK_OFFSET(guest_sysenter_eip, 576);
+ CHECK_OFFSET(host_cr0, 584);
+ CHECK_OFFSET(host_cr3, 592);
+ CHECK_OFFSET(host_cr4, 600);
+ CHECK_OFFSET(host_fs_base, 608);
+ CHECK_OFFSET(host_gs_base, 616);
+ CHECK_OFFSET(host_tr_base, 624);
+ CHECK_OFFSET(host_gdtr_base, 632);
+ CHECK_OFFSET(host_idtr_base, 640);
+ CHECK_OFFSET(host_ia32_sysenter_esp, 648);
+ CHECK_OFFSET(host_ia32_sysenter_eip, 656);
+ CHECK_OFFSET(host_rsp, 664);
+ CHECK_OFFSET(host_rip, 672);
+ CHECK_OFFSET(pin_based_vm_exec_control, 744);
+ CHECK_OFFSET(cpu_based_vm_exec_control, 748);
+ CHECK_OFFSET(exception_bitmap, 752);
+ CHECK_OFFSET(page_fault_error_code_mask, 756);
+ CHECK_OFFSET(page_fault_error_code_match, 760);
+ CHECK_OFFSET(cr3_target_count, 764);
+ CHECK_OFFSET(vm_exit_controls, 768);
+ CHECK_OFFSET(vm_exit_msr_store_count, 772);
+ CHECK_OFFSET(vm_exit_msr_load_count, 776);
+ CHECK_OFFSET(vm_entry_controls, 780);
+ CHECK_OFFSET(vm_entry_msr_load_count, 784);
+ CHECK_OFFSET(vm_entry_intr_info_field, 788);
+ CHECK_OFFSET(vm_entry_exception_error_code, 792);
+ CHECK_OFFSET(vm_entry_instruction_len, 796);
+ CHECK_OFFSET(tpr_threshold, 800);
+ CHECK_OFFSET(secondary_vm_exec_control, 804);
+ CHECK_OFFSET(vm_instruction_error, 808);
+ CHECK_OFFSET(vm_exit_reason, 812);
+ CHECK_OFFSET(vm_exit_intr_info, 816);
+ CHECK_OFFSET(vm_exit_intr_error_code, 820);
+ CHECK_OFFSET(idt_vectoring_info_field, 824);
+ CHECK_OFFSET(idt_vectoring_error_code, 828);
+ CHECK_OFFSET(vm_exit_instruction_len, 832);
+ CHECK_OFFSET(vmx_instruction_info, 836);
+ CHECK_OFFSET(guest_es_limit, 840);
+ CHECK_OFFSET(guest_cs_limit, 844);
+ CHECK_OFFSET(guest_ss_limit, 848);
+ CHECK_OFFSET(guest_ds_limit, 852);
+ CHECK_OFFSET(guest_fs_limit, 856);
+ CHECK_OFFSET(guest_gs_limit, 860);
+ CHECK_OFFSET(guest_ldtr_limit, 864);
+ CHECK_OFFSET(guest_tr_limit, 868);
+ CHECK_OFFSET(guest_gdtr_limit, 872);
+ CHECK_OFFSET(guest_idtr_limit, 876);
+ CHECK_OFFSET(guest_es_ar_bytes, 880);
+ CHECK_OFFSET(guest_cs_ar_bytes, 884);
+ CHECK_OFFSET(guest_ss_ar_bytes, 888);
+ CHECK_OFFSET(guest_ds_ar_bytes, 892);
+ CHECK_OFFSET(guest_fs_ar_bytes, 896);
+ CHECK_OFFSET(guest_gs_ar_bytes, 900);
+ CHECK_OFFSET(guest_ldtr_ar_bytes, 904);
+ CHECK_OFFSET(guest_tr_ar_bytes, 908);
+ CHECK_OFFSET(guest_interruptibility_info, 912);
+ CHECK_OFFSET(guest_activity_state, 916);
+ CHECK_OFFSET(guest_sysenter_cs, 920);
+ CHECK_OFFSET(host_ia32_sysenter_cs, 924);
+ CHECK_OFFSET(vmx_preemption_timer_value, 928);
+ CHECK_OFFSET(virtual_processor_id, 960);
+ CHECK_OFFSET(posted_intr_nv, 962);
+ CHECK_OFFSET(guest_es_selector, 964);
+ CHECK_OFFSET(guest_cs_selector, 966);
+ CHECK_OFFSET(guest_ss_selector, 968);
+ CHECK_OFFSET(guest_ds_selector, 970);
+ CHECK_OFFSET(guest_fs_selector, 972);
+ CHECK_OFFSET(guest_gs_selector, 974);
+ CHECK_OFFSET(guest_ldtr_selector, 976);
+ CHECK_OFFSET(guest_tr_selector, 978);
+ CHECK_OFFSET(guest_intr_status, 980);
+ CHECK_OFFSET(host_es_selector, 982);
+ CHECK_OFFSET(host_cs_selector, 984);
+ CHECK_OFFSET(host_ss_selector, 986);
+ CHECK_OFFSET(host_ds_selector, 988);
+ CHECK_OFFSET(host_fs_selector, 990);
+ CHECK_OFFSET(host_gs_selector, 992);
+ CHECK_OFFSET(host_tr_selector, 994);
+ CHECK_OFFSET(guest_pml_index, 996);
+ }
+
/*
* VMCS12_REVISION is an arbitrary id that should be changed if the content or
* layout of struct vmcs12 is changed. MSR_IA32_VMX_BASIC returns this id, and
* VMPTRLD verifies that the VMCS region that L1 is loading contains this id.
+ *
+ * IMPORTANT: Changing this value will break save/restore compatibility with
+ * older kvm releases.
*/
#define VMCS12_REVISION 0x11e57ed0
bool sync_shadow_vmcs;
bool dirty_vmcs12;
- bool change_vmcs01_virtual_x2apic_mode;
+ bool change_vmcs01_virtual_apic_mode;
+
/* L2 must run next, and mustn't decide to exit to L1. */
bool nested_run_pending;
FIELD64(VM_EXIT_MSR_STORE_ADDR, vm_exit_msr_store_addr),
FIELD64(VM_EXIT_MSR_LOAD_ADDR, vm_exit_msr_load_addr),
FIELD64(VM_ENTRY_MSR_LOAD_ADDR, vm_entry_msr_load_addr),
+ FIELD64(PML_ADDRESS, pml_address),
FIELD64(TSC_OFFSET, tsc_offset),
FIELD64(VIRTUAL_APIC_PAGE_ADDR, virtual_apic_page_addr),
FIELD64(APIC_ACCESS_ADDR, apic_access_addr),
FIELD64(EOI_EXIT_BITMAP2, eoi_exit_bitmap2),
FIELD64(EOI_EXIT_BITMAP3, eoi_exit_bitmap3),
FIELD64(EPTP_LIST_ADDRESS, eptp_list_address),
+ FIELD64(VMREAD_BITMAP, vmread_bitmap),
+ FIELD64(VMWRITE_BITMAP, vmwrite_bitmap),
FIELD64(XSS_EXIT_BITMAP, xss_exit_bitmap),
FIELD64(GUEST_PHYSICAL_ADDRESS, guest_physical_address),
FIELD64(VMCS_LINK_POINTER, vmcs_link_pointer),
- FIELD64(PML_ADDRESS, pml_address),
FIELD64(GUEST_IA32_DEBUGCTL, guest_ia32_debugctl),
FIELD64(GUEST_IA32_PAT, guest_ia32_pat),
FIELD64(GUEST_IA32_EFER, guest_ia32_efer),
return *(u16 *)((char *)current_evmcs + offset);
}
+ static inline void evmcs_touch_msr_bitmap(void)
+ {
+ if (unlikely(!current_evmcs))
+ return;
+
+ if (current_evmcs->hv_enlightenments_control.msr_bitmap)
+ current_evmcs->hv_clean_fields &=
+ ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP;
+ }
+
static void evmcs_load(u64 phys_addr)
{
struct hv_vp_assist_page *vp_ap =
static inline u16 evmcs_read16(unsigned long field) { return 0; }
static inline void evmcs_load(u64 phys_addr) {}
static inline void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf) {}
+ static inline void evmcs_touch_msr_bitmap(void) {}
#endif /* IS_ENABLED(CONFIG_HYPERV) */
static inline bool is_exception_n(u32 intr_info, u8 vector)
return vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT;
}
+ static inline bool cpu_has_vmx_invvpid_individual_addr(void)
+ {
+ return vmx_capability.vpid & VMX_VPID_EXTENT_INDIVIDUAL_ADDR_BIT;
+ }
+
static inline bool cpu_has_vmx_invvpid_single(void)
{
return vmx_capability.vpid & VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT;
return vmx_misc_cr3_count(to_vmx(vcpu)->nested.msrs.misc_low);
}
+ /*
+ * Do the virtual VMX capability MSRs specify that L1 can use VMWRITE
+ * to modify any valid field of the VMCS, or are the VM-exit
+ * information fields read-only?
+ */
+ static inline bool nested_cpu_has_vmwrite_any_field(struct kvm_vcpu *vcpu)
+ {
+ return to_vmx(vcpu)->nested.msrs.misc_low &
+ MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS;
+ }
+
static inline bool nested_cpu_has(struct vmcs12 *vmcs12, u32 bit)
{
return vmcs12->cpu_based_vm_exec_control & bit;
msrs->misc_high);
msrs->misc_low &= VMX_MISC_SAVE_EFER_LMA;
msrs->misc_low |=
+ MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS |
VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE |
VMX_MISC_ACTIVITY_HLT;
msrs->misc_high = 0;
vmx->nested.msrs.misc_low = data;
vmx->nested.msrs.misc_high = data >> 32;
+
+ /*
+ * If L1 has read-only VM-exit information fields, use the
+ * less permissive vmx_vmwrite_bitmap to specify write
+ * permissions for the shadow VMCS.
+ */
+ if (enable_shadow_vmcs && !nested_cpu_has_vmwrite_any_field(&vmx->vcpu))
+ vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap));
+
return 0;
}
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
+ /*
+ * Don't allow changes to the VMX capability MSRs while the vCPU
+ * is in VMX operation.
+ */
+ if (vmx->nested.vmxon)
+ return -EBUSY;
+
switch (msr_index) {
case MSR_IA32_VMX_BASIC:
return vmx_restore_vmx_basic(vmx, data);
return kvm_get_msr_common(vcpu, msr_info);
case MSR_IA32_SPEC_CTRL:
if (!msr_info->host_initiated &&
- !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) &&
!guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
return 1;
break;
case MSR_IA32_SPEC_CTRL:
if (!msr_info->host_initiated &&
- !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) &&
!guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
return 1;
/* The STIBP bit doesn't fault even if it's not advertised */
- if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP))
+ if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD))
return 1;
vmx->spec_ctrl = data;
break;
case MSR_IA32_PRED_CMD:
if (!msr_info->host_initiated &&
- !guest_cpuid_has(vcpu, X86_FEATURE_IBPB) &&
!guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
return 1;
if (!loaded_vmcs->msr_bitmap)
goto out_vmcs;
memset(loaded_vmcs->msr_bitmap, 0xff, PAGE_SIZE);
+
+ if (static_branch_unlikely(&enable_evmcs) &&
+ (ms_hyperv.nested_features & HV_X64_NESTED_MSR_BITMAP)) {
+ struct hv_enlightened_vmcs *evmcs =
+ (struct hv_enlightened_vmcs *)loaded_vmcs->vmcs;
+
+ evmcs->hv_enlightenments_control.msr_bitmap = 1;
+ }
}
return 0;
if (!cpu_has_vmx_msr_bitmap())
return;
+ if (static_branch_unlikely(&enable_evmcs))
+ evmcs_touch_msr_bitmap();
+
/*
* See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
* have the write-low and read-high bitmap offsets the wrong way round.
if (!cpu_has_vmx_msr_bitmap())
return;
+ if (static_branch_unlikely(&enable_evmcs))
+ evmcs_touch_msr_bitmap();
+
/*
* See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
* have the write-low and read-high bitmap offsets the wrong way round.
int i;
if (enable_shadow_vmcs) {
+ /*
+ * At vCPU creation, "VMWRITE to any supported field
+ * in the VMCS" is supported, so use the more
+ * permissive vmx_vmread_bitmap to specify both read
+ * and write permissions for the shadow VMCS.
+ */
vmcs_write64(VMREAD_BITMAP, __pa(vmx_vmread_bitmap));
- vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap));
+ vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmread_bitmap));
}
if (cpu_has_vmx_msr_bitmap())
vmcs_write64(MSR_BITMAP, __pa(vmx->vmcs01.msr_bitmap));
vmcs_read32(VMX_INSTRUCTION_INFO), false, &gva))
return 1;
- if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, vmpointer,
- sizeof(*vmpointer), &e)) {
+ if (kvm_read_guest_virt(vcpu, gva, vmpointer, sizeof(*vmpointer), &e)) {
kvm_inject_page_fault(vcpu, &e);
return 1;
}
return 1;
}
+ /* CPL=0 must be checked manually. */
+ if (vmx_get_cpl(vcpu)) {
+ kvm_queue_exception(vcpu, UD_VECTOR);
+ return 1;
+ }
+
if (vmx->nested.vmxon) {
nested_vmx_failValid(vcpu, VMXERR_VMXON_IN_VMX_ROOT_OPERATION);
return kvm_skip_emulated_instruction(vcpu);
*/
static int nested_vmx_check_permission(struct kvm_vcpu *vcpu)
{
+ if (vmx_get_cpl(vcpu)) {
+ kvm_queue_exception(vcpu, UD_VECTOR);
+ return 0;
+ }
+
if (!to_vmx(vcpu)->nested.vmxon) {
kvm_queue_exception(vcpu, UD_VECTOR);
return 0;
}
+ /*
+ * Copy the writable VMCS shadow fields back to the VMCS12, in case
+ * they have been modified by the L1 guest. Note that the "read-only"
+ * VM-exit information fields are actually writable if the vCPU is
+ * configured to support "VMWRITE to any supported field in the VMCS."
+ */
static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
{
- int i;
+ const u16 *fields[] = {
+ shadow_read_write_fields,
+ shadow_read_only_fields
+ };
+ const int max_fields[] = {
+ max_shadow_read_write_fields,
+ max_shadow_read_only_fields
+ };
+ int i, q;
unsigned long field;
u64 field_value;
struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs;
- const u16 *fields = shadow_read_write_fields;
- const int num_fields = max_shadow_read_write_fields;
preempt_disable();
vmcs_load(shadow_vmcs);
- for (i = 0; i < num_fields; i++) {
- field = fields[i];
- field_value = __vmcs_readl(field);
- vmcs12_write_any(&vmx->vcpu, field, field_value);
+ for (q = 0; q < ARRAY_SIZE(fields); q++) {
+ for (i = 0; i < max_fields[q]; i++) {
+ field = fields[q][i];
+ field_value = __vmcs_readl(field);
+ vmcs12_write_any(&vmx->vcpu, field, field_value);
+ }
+ /*
+ * Skip the VM-exit information fields if they are read-only.
+ */
+ if (!nested_cpu_has_vmwrite_any_field(&vmx->vcpu))
+ break;
}
vmcs_clear(shadow_vmcs);
if (get_vmx_mem_address(vcpu, exit_qualification,
vmx_instruction_info, true, &gva))
return 1;
- /* _system ok, as hardware has verified cpl=0 */
- kvm_write_guest_virt_system(&vcpu->arch.emulate_ctxt, gva,
- &field_value, (is_long_mode(vcpu) ? 8 : 4), NULL);
+ /* _system ok, nested_vmx_check_permission has verified cpl=0 */
+ kvm_write_guest_virt_system(vcpu, gva, &field_value,
+ (is_long_mode(vcpu) ? 8 : 4), NULL);
}
nested_vmx_succeed(vcpu);
if (get_vmx_mem_address(vcpu, exit_qualification,
vmx_instruction_info, false, &gva))
return 1;
- if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva,
- &field_value, (is_64_bit_mode(vcpu) ? 8 : 4), &e)) {
+ if (kvm_read_guest_virt(vcpu, gva, &field_value,
+ (is_64_bit_mode(vcpu) ? 8 : 4), &e)) {
kvm_inject_page_fault(vcpu, &e);
return 1;
}
field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
- if (vmcs_field_readonly(field)) {
+ /*
+ * If the vCPU supports "VMWRITE to any supported field in the
+ * VMCS," then the "read-only" fields are actually read/write.
+ */
+ if (vmcs_field_readonly(field) &&
+ !nested_cpu_has_vmwrite_any_field(vcpu)) {
nested_vmx_failValid(vcpu,
VMXERR_VMWRITE_READ_ONLY_VMCS_COMPONENT);
return kvm_skip_emulated_instruction(vcpu);
if (get_vmx_mem_address(vcpu, exit_qualification,
vmx_instruction_info, true, &vmcs_gva))
return 1;
- /* ok to use *_system, as hardware has verified cpl=0 */
- if (kvm_write_guest_virt_system(&vcpu->arch.emulate_ctxt, vmcs_gva,
- (void *)&to_vmx(vcpu)->nested.current_vmptr,
- sizeof(u64), &e)) {
+ /* *_system ok, nested_vmx_check_permission has verified cpl=0 */
+ if (kvm_write_guest_virt_system(vcpu, vmcs_gva,
+ (void *)&to_vmx(vcpu)->nested.current_vmptr,
+ sizeof(u64), &e)) {
kvm_inject_page_fault(vcpu, &e);
return 1;
}
if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
vmx_instruction_info, false, &gva))
return 1;
- if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &operand,
- sizeof(operand), &e)) {
+ if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) {
kvm_inject_page_fault(vcpu, &e);
return 1;
}
if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
vmx_instruction_info, false, &gva))
return 1;
- if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &operand,
- sizeof(operand), &e)) {
+ if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) {
kvm_inject_page_fault(vcpu, &e);
return 1;
}
switch (type) {
case VMX_VPID_EXTENT_INDIVIDUAL_ADDR:
- if (is_noncanonical_address(operand.gla, vcpu)) {
+ if (!operand.vpid ||
+ is_noncanonical_address(operand.gla, vcpu)) {
nested_vmx_failValid(vcpu,
VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
return kvm_skip_emulated_instruction(vcpu);
}
- /* fall through */
+ if (cpu_has_vmx_invvpid_individual_addr() &&
+ vmx->nested.vpid02) {
+ __invvpid(VMX_VPID_EXTENT_INDIVIDUAL_ADDR,
+ vmx->nested.vpid02, operand.gla);
+ } else
+ __vmx_flush_tlb(vcpu, vmx->nested.vpid02, true);
+ break;
case VMX_VPID_EXTENT_SINGLE_CONTEXT:
case VMX_VPID_EXTENT_SINGLE_NON_GLOBAL:
if (!operand.vpid) {
VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
return kvm_skip_emulated_instruction(vcpu);
}
+ __vmx_flush_tlb(vcpu, vmx->nested.vpid02, true);
break;
case VMX_VPID_EXTENT_ALL_CONTEXT:
+ __vmx_flush_tlb(vcpu, vmx->nested.vpid02, true);
break;
default:
WARN_ON_ONCE(1);
return kvm_skip_emulated_instruction(vcpu);
}
- __vmx_flush_tlb(vcpu, vmx->nested.vpid02, true);
nested_vmx_succeed(vcpu);
return kvm_skip_emulated_instruction(vcpu);
case EXIT_REASON_TPR_BELOW_THRESHOLD:
return nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW);
case EXIT_REASON_APIC_ACCESS:
- return nested_cpu_has2(vmcs12,
- SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
case EXIT_REASON_APIC_WRITE:
case EXIT_REASON_EOI_INDUCED:
- /* apic_write and eoi_induced should exit unconditionally. */
+ /*
+ * The controls for "virtualize APIC accesses," "APIC-
+ * register virtualization," and "virtual-interrupt
+ * delivery" only come from vmcs12.
+ */
return true;
case EXIT_REASON_EPT_VIOLATION:
/*
vmcs_write32(TPR_THRESHOLD, irr);
}
- static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
+ static void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
{
u32 sec_exec_control;
+ if (!lapic_in_kernel(vcpu))
+ return;
+
/* Postpone execution until vmcs01 is the current VMCS. */
if (is_guest_mode(vcpu)) {
- to_vmx(vcpu)->nested.change_vmcs01_virtual_x2apic_mode = true;
+ to_vmx(vcpu)->nested.change_vmcs01_virtual_apic_mode = true;
return;
}
- if (!cpu_has_vmx_virtualize_x2apic_mode())
- return;
-
if (!cpu_need_tpr_shadow(vcpu))
return;
sec_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
+ sec_exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
+ SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE);
- if (set) {
- sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
- sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
- } else {
- sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
- sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
- vmx_flush_tlb(vcpu, true);
+ switch (kvm_get_apic_mode(vcpu)) {
+ case LAPIC_MODE_INVALID:
+ WARN_ONCE(true, "Invalid local APIC state");
+ case LAPIC_MODE_DISABLED:
+ break;
+ case LAPIC_MODE_XAPIC:
+ if (flexpriority_enabled) {
+ sec_exec_control |=
+ SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
+ vmx_flush_tlb(vcpu, true);
+ }
+ break;
+ case LAPIC_MODE_X2APIC:
+ if (cpu_has_vmx_virtualize_x2apic_mode())
+ sec_exec_control |=
+ SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
+ break;
}
vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control);
static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa)
{
- struct vcpu_vmx *vmx = to_vmx(vcpu);
-
- /*
- * Currently we do not handle the nested case where L2 has an
- * APIC access page of its own; that page is still pinned.
- * Hence, we skip the case where the VCPU is in guest mode _and_
- * L1 prepared an APIC access page for L2.
- *
- * For the case where L1 and L2 share the same APIC access page
- * (flexpriority=Y but SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES clear
- * in the vmcs12), this function will only update either the vmcs01
- * or the vmcs02. If the former, the vmcs02 will be updated by
- * prepare_vmcs02. If the latter, the vmcs01 will be updated in
- * the next L2->L1 exit.
- */
- if (!is_guest_mode(vcpu) ||
- !nested_cpu_has2(get_vmcs12(&vmx->vcpu),
- SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
+ if (!is_guest_mode(vcpu)) {
vmcs_write64(APIC_ACCESS_ADDR, hpa);
vmx_flush_tlb(vcpu, true);
}
}
STACK_FRAME_NON_STANDARD(vmx_handle_external_intr);
-static bool vmx_has_high_real_mode_segbase(void)
+static bool vmx_has_emulated_msr(int index)
{
- return enable_unrestricted_guest || emulate_invalid_guest_state;
+ switch (index) {
+ case MSR_IA32_SMBASE:
+ /*
+ * We cannot do SMM unless we can run the guest in big
+ * real mode.
+ */
+ return enable_unrestricted_guest || emulate_invalid_guest_state;
+ case MSR_AMD64_VIRT_SPEC_CTRL:
+ /* This is AMD only. */
+ return false;
+ default:
+ return true;
+ }
}
static bool vmx_mpx_supported(void)
* is no need to worry about the conditional branch over the wrmsr
* being speculatively taken.
*/
- if (vmx->spec_ctrl)
- native_wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
+ x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0);
vmx->__launched = vmx->loaded_vmcs->launched;
if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
- if (vmx->spec_ctrl)
- native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
+ x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0);
/* Eliminate branch target predictions from guest mode */
vmexit_fill_RSB();
static struct kvm *vmx_vm_alloc(void)
{
- struct kvm_vmx *kvm_vmx = kzalloc(sizeof(struct kvm_vmx), GFP_KERNEL);
+ struct kvm_vmx *kvm_vmx = vzalloc(sizeof(struct kvm_vmx));
return &kvm_vmx->kvm;
}
static void vmx_vm_free(struct kvm *kvm)
{
- kfree(to_kvm_vmx(kvm));
+ vfree(to_kvm_vmx(kvm));
}
static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
}
- } else if (!(nested_cpu_has_virt_x2apic_mode(vmcs12)) &&
- cpu_need_virtualize_apic_accesses(&vmx->vcpu)) {
- vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
- SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
- kvm_vcpu_reload_apic_access_page(vcpu);
}
if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) {
return 0;
}
- static void prepare_vmcs02_full(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
- bool from_vmentry)
+ static void prepare_vmcs02_full(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
* is assigned to entry_failure_code on failure.
*/
static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
- bool from_vmentry, u32 *entry_failure_code)
+ u32 *entry_failure_code)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
u32 exec_control, vmcs12_exec_ctrl;
if (vmx->nested.dirty_vmcs12) {
- prepare_vmcs02_full(vcpu, vmcs12, from_vmentry);
+ prepare_vmcs02_full(vcpu, vmcs12);
vmx->nested.dirty_vmcs12 = false;
}
* HOST_FS_BASE, HOST_GS_BASE.
*/
- if (from_vmentry &&
+ if (vmx->nested.nested_run_pending &&
(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) {
kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl);
kvm_set_dr(vcpu, 7, vcpu->arch.dr7);
vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.vmcs01_debugctl);
}
- if (from_vmentry) {
+ if (vmx->nested.nested_run_pending) {
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
vmcs12->vm_entry_intr_info_field);
vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
~VM_ENTRY_IA32E_MODE) |
(vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE));
- if (from_vmentry &&
+ if (vmx->nested.nested_run_pending &&
(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT)) {
vmcs_write64(GUEST_IA32_PAT, vmcs12->guest_ia32_pat);
vcpu->arch.pat = vmcs12->guest_ia32_pat;
if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02) {
if (vmcs12->virtual_processor_id != vmx->nested.last_vpid) {
vmx->nested.last_vpid = vmcs12->virtual_processor_id;
- __vmx_flush_tlb(vcpu, to_vmx(vcpu)->nested.vpid02, true);
+ __vmx_flush_tlb(vcpu, vmx->nested.vpid02, true);
}
} else {
vmx_flush_tlb(vcpu, true);
vmx_set_cr4(vcpu, vmcs12->guest_cr4);
vmcs_writel(CR4_READ_SHADOW, nested_read_cr4(vmcs12));
- if (from_vmentry &&
+ if (vmx->nested.nested_run_pending &&
(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER))
vcpu->arch.efer = vmcs12->guest_ia32_efer;
else if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE)
return 0;
}
- static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
+ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
vcpu->arch.tsc_offset += vmcs12->tsc_offset;
r = EXIT_REASON_INVALID_STATE;
- if (prepare_vmcs02(vcpu, vmcs12, from_vmentry, &exit_qual))
+ if (prepare_vmcs02(vcpu, vmcs12, &exit_qual))
goto fail;
nested_get_vmcs12_pages(vcpu, vmcs12);
* the nested entry.
*/
- ret = enter_vmx_non_root_mode(vcpu, true);
- if (ret)
+ vmx->nested.nested_run_pending = 1;
+ ret = enter_vmx_non_root_mode(vcpu);
+ if (ret) {
+ vmx->nested.nested_run_pending = 0;
return ret;
+ }
/*
* If we're entering a halted L2 vcpu and the L2 vcpu won't be woken
* by event injection, halt vcpu.
*/
if ((vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) &&
- !(vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK))
+ !(vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK)) {
+ vmx->nested.nested_run_pending = 0;
return kvm_vcpu_halt(vcpu);
-
- vmx->nested.nested_run_pending = 1;
-
+ }
return 1;
out:
load_vmcs12_mmu_host_state(vcpu, vmcs12);
- if (enable_vpid) {
- /*
- * Trivially support vpid by letting L2s share their parent
- * L1's vpid. TODO: move to a more elaborate solution, giving
- * each L2 its own vpid and exposing the vpid feature to L1.
- */
+ /*
+ * If vmcs01 don't use VPID, CPU flushes TLB on every
+ * VMEntry/VMExit. Thus, no need to flush TLB.
+ *
+ * If vmcs12 uses VPID, TLB entries populated by L2 are
+ * tagged with vmx->nested.vpid02 while L1 entries are tagged
+ * with vmx->vpid. Thus, no need to flush TLB.
+ *
+ * Therefore, flush TLB only in case vmcs01 uses VPID and
+ * vmcs12 don't use VPID as in this case L1 & L2 TLB entries
+ * are both tagged with vmx->vpid.
+ */
+ if (enable_vpid &&
+ !(nested_cpu_has_vpid(vmcs12) && to_vmx(vcpu)->nested.vpid02)) {
vmx_flush_tlb(vcpu, true);
}
if (kvm_has_tsc_control)
decache_tsc_multiplier(vmx);
- if (vmx->nested.change_vmcs01_virtual_x2apic_mode) {
- vmx->nested.change_vmcs01_virtual_x2apic_mode = false;
- vmx_set_virtual_x2apic_mode(vcpu,
- vcpu->arch.apic_base & X2APIC_ENABLE);
+ if (vmx->nested.change_vmcs01_virtual_apic_mode) {
+ vmx->nested.change_vmcs01_virtual_apic_mode = false;
+ vmx_set_virtual_apic_mode(vcpu);
} else if (!nested_cpu_has_ept(vmcs12) &&
nested_cpu_has2(vmcs12,
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc)
{
struct vcpu_vmx *vmx;
- u64 tscl, guest_tscl, delta_tsc;
+ u64 tscl, guest_tscl, delta_tsc, lapic_timer_advance_cycles;
if (kvm_mwait_in_guest(vcpu->kvm))
return -EOPNOTSUPP;
tscl = rdtsc();
guest_tscl = kvm_read_l1_tsc(vcpu, tscl);
delta_tsc = max(guest_deadline_tsc, guest_tscl) - guest_tscl;
+ lapic_timer_advance_cycles = nsec_to_cycles(vcpu, lapic_timer_advance_ns);
+
+ if (delta_tsc > lapic_timer_advance_cycles)
+ delta_tsc -= lapic_timer_advance_cycles;
+ else
+ delta_tsc = 0;
/* Convert to host delta tsc if tsc scaling is enabled */
if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio &&
if (vmx->nested.smm.guest_mode) {
vcpu->arch.hflags &= ~HF_SMM_MASK;
- ret = enter_vmx_non_root_mode(vcpu, false);
+ ret = enter_vmx_non_root_mode(vcpu);
vcpu->arch.hflags |= HF_SMM_MASK;
if (ret)
return ret;
.hardware_enable = hardware_enable,
.hardware_disable = hardware_disable,
.cpu_has_accelerated_tpr = report_flexpriority,
- .cpu_has_high_real_mode_segbase = vmx_has_high_real_mode_segbase,
+ .has_emulated_msr = vmx_has_emulated_msr,
.vm_init = vmx_vm_init,
.vm_alloc = vmx_vm_alloc,
.enable_nmi_window = enable_nmi_window,
.enable_irq_window = enable_irq_window,
.update_cr8_intercept = update_cr8_intercept,
- .set_virtual_x2apic_mode = vmx_set_virtual_x2apic_mode,
+ .set_virtual_apic_mode = vmx_set_virtual_apic_mode,
.set_apic_access_page_addr = vmx_set_apic_access_page_addr,
.get_enable_apicv = vmx_get_enable_apicv,
.refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl,
rcu_assign_pointer(crash_vmclear_loaded_vmcss,
crash_vmclear_local_loaded_vmcss);
#endif
+ vmx_check_vmcs12_offsets();
return 0;
}
/* lapic timer advance (tscdeadline mode only) in nanoseconds */
unsigned int __read_mostly lapic_timer_advance_ns = 0;
module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR);
+ EXPORT_SYMBOL_GPL(lapic_timer_advance_ns);
static bool __read_mostly vector_hashing = true;
module_param(vector_hashing, bool, S_IRUGO);
}
EXPORT_SYMBOL_GPL(kvm_get_apic_base);
+ enum lapic_mode kvm_get_apic_mode(struct kvm_vcpu *vcpu)
+ {
+ return kvm_apic_mode(kvm_get_apic_base(vcpu));
+ }
+ EXPORT_SYMBOL_GPL(kvm_get_apic_mode);
+
int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{
- u64 old_state = vcpu->arch.apic_base &
- (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE);
- u64 new_state = msr_info->data &
- (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE);
+ enum lapic_mode old_mode = kvm_get_apic_mode(vcpu);
+ enum lapic_mode new_mode = kvm_apic_mode(msr_info->data);
u64 reserved_bits = ((~0ULL) << cpuid_maxphyaddr(vcpu)) | 0x2ff |
(guest_cpuid_has(vcpu, X86_FEATURE_X2APIC) ? 0 : X2APIC_ENABLE);
- if ((msr_info->data & reserved_bits) || new_state == X2APIC_ENABLE)
- return 1;
- if (!msr_info->host_initiated &&
- ((new_state == MSR_IA32_APICBASE_ENABLE &&
- old_state == (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE)) ||
- (new_state == (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE) &&
- old_state == 0)))
+ if ((msr_info->data & reserved_bits) != 0 || new_mode == LAPIC_MODE_INVALID)
return 1;
+ if (!msr_info->host_initiated) {
+ if (old_mode == LAPIC_MODE_X2APIC && new_mode == LAPIC_MODE_XAPIC)
+ return 1;
+ if (old_mode == LAPIC_MODE_DISABLED && new_mode == LAPIC_MODE_X2APIC)
+ return 1;
+ }
kvm_lapic_set_base(vcpu, msr_info->data);
return 0;
}
if (is_long_mode(vcpu) &&
- (cr3 & rsvd_bits(cpuid_maxphyaddr(vcpu), 62)))
+ (cr3 & rsvd_bits(cpuid_maxphyaddr(vcpu), 63)))
return 1;
else if (is_pae(vcpu) && is_paging(vcpu) &&
!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
MSR_SMI_COUNT,
MSR_PLATFORM_INFO,
MSR_MISC_FEATURES_ENABLES,
+ MSR_AMD64_VIRT_SPEC_CTRL,
};
static unsigned num_emulated_msrs;
return mode;
}
- static int do_realtime(struct timespec *ts, u64 *tsc_timestamp)
+ static int do_realtime(struct timespec64 *ts, u64 *tsc_timestamp)
{
struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
unsigned long seq;
}
/* returns true if host is using TSC based clocksource */
- static bool kvm_get_walltime_and_clockread(struct timespec *ts,
+ static bool kvm_get_walltime_and_clockread(struct timespec64 *ts,
u64 *tsc_timestamp)
{
/* checked again under seqlock below */
case KVM_CAP_HYPERV_SYNIC2:
case KVM_CAP_HYPERV_VP_INDEX:
case KVM_CAP_HYPERV_EVENTFD:
+ case KVM_CAP_HYPERV_TLBFLUSH:
case KVM_CAP_PCI_SEGMENT:
case KVM_CAP_DEBUGREGS:
case KVM_CAP_X86_ROBUST_SINGLESTEP:
r = KVM_CLOCK_TSC_STABLE;
break;
case KVM_CAP_X86_DISABLE_EXITS:
- r |= KVM_X86_DISABLE_EXITS_HTL | KVM_X86_DISABLE_EXITS_PAUSE;
+ r |= KVM_X86_DISABLE_EXITS_HLT | KVM_X86_DISABLE_EXITS_PAUSE;
if(kvm_can_mwait_in_guest())
r |= KVM_X86_DISABLE_EXITS_MWAIT;
break;
* fringe case that is not enabled except via specific settings
* of the module parameters.
*/
- r = kvm_x86_ops->cpu_has_high_real_mode_segbase();
+ r = kvm_x86_ops->has_emulated_msr(MSR_IA32_SMBASE);
break;
case KVM_CAP_VAPIC:
r = !kvm_x86_ops->cpu_has_accelerated_tpr();
return r;
}
- int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
+ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
{
return VM_FAULT_SIGBUS;
}
if ((cap->args[0] & KVM_X86_DISABLE_EXITS_MWAIT) &&
kvm_can_mwait_in_guest())
kvm->arch.mwait_in_guest = true;
- if (cap->args[0] & KVM_X86_DISABLE_EXITS_HTL)
+ if (cap->args[0] & KVM_X86_DISABLE_EXITS_HLT)
kvm->arch.hlt_in_guest = true;
if (cap->args[0] & KVM_X86_DISABLE_EXITS_PAUSE)
kvm->arch.pause_in_guest = true;
num_msrs_to_save = j;
for (i = j = 0; i < ARRAY_SIZE(emulated_msrs); i++) {
- switch (emulated_msrs[i]) {
- case MSR_IA32_SMBASE:
- if (!kvm_x86_ops->cpu_has_high_real_mode_segbase())
- continue;
- break;
- default:
- break;
- }
+ if (!kvm_x86_ops->has_emulated_msr(emulated_msrs[i]))
+ continue;
if (j < i)
emulated_msrs[j] = emulated_msrs[i];
return X86EMUL_CONTINUE;
}
- int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt,
+ int kvm_read_guest_virt(struct kvm_vcpu *vcpu,
gva_t addr, void *val, unsigned int bytes,
struct x86_exception *exception)
{
- struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access,
}
EXPORT_SYMBOL_GPL(kvm_read_guest_virt);
- static int kvm_read_guest_virt_system(struct x86_emulate_ctxt *ctxt,
- gva_t addr, void *val, unsigned int bytes,
- struct x86_exception *exception)
+ static int emulator_read_std(struct x86_emulate_ctxt *ctxt,
+ gva_t addr, void *val, unsigned int bytes,
+ struct x86_exception *exception, bool system)
{
struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
- return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, exception);
+ u32 access = 0;
+
+ if (!system && kvm_x86_ops->get_cpl(vcpu) == 3)
+ access |= PFERR_USER_MASK;
+
+ return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, exception);
}
static int kvm_read_guest_phys_system(struct x86_emulate_ctxt *ctxt,
return r < 0 ? X86EMUL_IO_NEEDED : X86EMUL_CONTINUE;
}
- int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
- gva_t addr, void *val,
- unsigned int bytes,
- struct x86_exception *exception)
+ static int kvm_write_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
+ struct kvm_vcpu *vcpu, u32 access,
+ struct x86_exception *exception)
{
- struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
void *data = val;
int r = X86EMUL_CONTINUE;
while (bytes) {
gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr,
- PFERR_WRITE_MASK,
+ access,
exception);
unsigned offset = addr & (PAGE_SIZE-1);
unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset);
out:
return r;
}
+
+ static int emulator_write_std(struct x86_emulate_ctxt *ctxt, gva_t addr, void *val,
+ unsigned int bytes, struct x86_exception *exception,
+ bool system)
+ {
+ struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
+ u32 access = PFERR_WRITE_MASK;
+
+ if (!system && kvm_x86_ops->get_cpl(vcpu) == 3)
+ access |= PFERR_USER_MASK;
+
+ return kvm_write_guest_virt_helper(addr, val, bytes, vcpu,
+ access, exception);
+ }
+
+ int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu, gva_t addr, void *val,
+ unsigned int bytes, struct x86_exception *exception)
+ {
+ return kvm_write_guest_virt_helper(addr, val, bytes, vcpu,
+ PFERR_WRITE_MASK, exception);
+ }
EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system);
int handle_ud(struct kvm_vcpu *vcpu)
struct x86_exception e;
if (force_emulation_prefix &&
- kvm_read_guest_virt(&vcpu->arch.emulate_ctxt,
- kvm_get_linear_rip(vcpu), sig, sizeof(sig), &e) == 0 &&
+ kvm_read_guest_virt(vcpu, kvm_get_linear_rip(vcpu),
+ sig, sizeof(sig), &e) == 0 &&
memcmp(sig, "\xf\xbkvm", sizeof(sig)) == 0) {
kvm_rip_write(vcpu, kvm_rip_read(vcpu) + sizeof(sig));
emul_type = 0;
static const struct x86_emulate_ops emulate_ops = {
.read_gpr = emulator_read_gpr,
.write_gpr = emulator_write_gpr,
- .read_std = kvm_read_guest_virt_system,
- .write_std = kvm_write_guest_virt_system,
+ .read_std = emulator_read_std,
+ .write_std = emulator_write_std,
.read_phys = kvm_read_guest_phys_system,
.fetch = kvm_fetch_guest_virt,
.read_emulated = emulator_read_emulated,
unsigned long clock_type)
{
struct kvm_clock_pairing clock_pairing;
- struct timespec ts;
+ struct timespec64 ts;
u64 cycle;
int ret;
unsigned long nr, a0, a1, a2, a3, ret;
int op_64_bit;
- if (kvm_hv_hypercall_enabled(vcpu->kvm)) {
- if (!kvm_hv_hypercall(vcpu))
- return 0;
- goto out;
- }
+ if (kvm_hv_hypercall_enabled(vcpu->kvm))
+ return kvm_hv_hypercall(vcpu);
nr = kvm_register_read(vcpu, VCPU_REGS_RAX);
a0 = kvm_register_read(vcpu, VCPU_REGS_RBX);
if (kvm_x86_ops->get_cpl(vcpu) != 0) {
ret = -KVM_EPERM;
- goto out_error;
+ goto out;
}
switch (nr) {
ret = -KVM_ENOSYS;
break;
}
-out_error:
+out:
if (!op_64_bit)
ret = (u32)ret;
kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
-out:
++vcpu->stat.hypercalls;
return kvm_skip_emulated_instruction(vcpu);
}
{
struct msr_data apic_base_msr;
int mmu_reset_needed = 0;
+ int cpuid_update_needed = 0;
int pending_vec, max_bits, idx;
struct desc_ptr dt;
int ret = -EINVAL;
vcpu->arch.cr0 = sregs->cr0;
mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4;
+ cpuid_update_needed |= ((kvm_read_cr4(vcpu) ^ sregs->cr4) &
+ (X86_CR4_OSXSAVE | X86_CR4_PKE));
kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
- if (sregs->cr4 & (X86_CR4_OSXSAVE | X86_CR4_PKE))
+ if (cpuid_update_needed)
kvm_update_cpuid(vcpu);
idx = srcu_read_lock(&vcpu->kvm->srcu);
#include <linux/signal_types.h>
#include <linux/mm_types_task.h>
#include <linux/task_io_accounting.h>
+#include <linux/rseq.h>
/* task_struct member predeclarations (sorted alphabetically): */
struct audit_context;
unsigned long numa_pages_migrated;
#endif /* CONFIG_NUMA_BALANCING */
+#ifdef CONFIG_RSEQ
+ struct rseq __user *rseq;
+ u32 rseq_len;
+ u32 rseq_sig;
+ /*
+ * RmW on rseq_event_mask must be performed atomically
+ * with respect to preemption.
+ */
+ unsigned long rseq_event_mask;
+#endif
+
struct tlbflush_unmap_batch tlb_ubc;
struct rcu_head rcu;
#define PFA_NO_NEW_PRIVS 0 /* May not gain new privileges. */
#define PFA_SPREAD_PAGE 1 /* Spread page cache over cpuset */
#define PFA_SPREAD_SLAB 2 /* Spread some slab caches over cpuset */
-
+#define PFA_SPEC_SSB_DISABLE 3 /* Speculative Store Bypass disabled */
+#define PFA_SPEC_SSB_FORCE_DISABLE 4 /* Speculative Store Bypass force disabled*/
#define TASK_PFA_TEST(name, func) \
static inline bool task_##func(struct task_struct *p) \
TASK_PFA_SET(SPREAD_SLAB, spread_slab)
TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab)
+TASK_PFA_TEST(SPEC_SSB_DISABLE, spec_ssb_disable)
+TASK_PFA_SET(SPEC_SSB_DISABLE, spec_ssb_disable)
+TASK_PFA_CLEAR(SPEC_SSB_DISABLE, spec_ssb_disable)
+
+TASK_PFA_TEST(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)
+TASK_PFA_SET(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)
+
static inline void
current_restore_flags(unsigned long orig_flags, unsigned long flags)
{
extern int can_nice(const struct task_struct *p, const int nice);
extern int task_curr(const struct task_struct *p);
extern int idle_cpu(int cpu);
+extern int available_idle_cpu(int cpu);
extern int sched_setscheduler(struct task_struct *, int, const struct sched_param *);
extern int sched_setscheduler_nocheck(struct task_struct *, int, const struct sched_param *);
extern int sched_setattr(struct task_struct *, const struct sched_attr *);
clear_ti_thread_flag(task_thread_info(tsk), flag);
}
+ static inline void update_tsk_thread_flag(struct task_struct *tsk, int flag,
+ bool value)
+ {
+ update_ti_thread_flag(task_thread_info(tsk), flag, value);
+ }
+
static inline int test_and_set_tsk_thread_flag(struct task_struct *tsk, int flag)
{
return test_and_set_ti_thread_flag(task_thread_info(tsk), flag);
* explicit rescheduling in places that are safe. The return
* value indicates whether a reschedule was done in fact.
* cond_resched_lock() will drop the spinlock before scheduling,
- * cond_resched_softirq() will enable bhs before scheduling.
*/
#ifndef CONFIG_PREEMPT
extern int _cond_resched(void);
__cond_resched_lock(lock); \
})
-extern int __cond_resched_softirq(void);
-
-#define cond_resched_softirq() ({ \
- ___might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET); \
- __cond_resched_softirq(); \
-})
-
static inline void cond_resched_rcu(void)
{
#if defined(CONFIG_DEBUG_ATOMIC_SLEEP) || !defined(CONFIG_PREEMPT_RCU)
#define TASK_SIZE_OF(tsk) TASK_SIZE
#endif
+#ifdef CONFIG_RSEQ
+
+/*
+ * Map the event mask on the user-space ABI enum rseq_cs_flags
+ * for direct mask checks.
+ */
+enum rseq_event_mask_bits {
+ RSEQ_EVENT_PREEMPT_BIT = RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT,
+ RSEQ_EVENT_SIGNAL_BIT = RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT,
+ RSEQ_EVENT_MIGRATE_BIT = RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT,
+};
+
+enum rseq_event_mask {
+ RSEQ_EVENT_PREEMPT = (1U << RSEQ_EVENT_PREEMPT_BIT),
+ RSEQ_EVENT_SIGNAL = (1U << RSEQ_EVENT_SIGNAL_BIT),
+ RSEQ_EVENT_MIGRATE = (1U << RSEQ_EVENT_MIGRATE_BIT),
+};
+
+static inline void rseq_set_notify_resume(struct task_struct *t)
+{
+ if (t->rseq)
+ set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
+}
+
+void __rseq_handle_notify_resume(struct pt_regs *regs);
+
+static inline void rseq_handle_notify_resume(struct pt_regs *regs)
+{
+ if (current->rseq)
+ __rseq_handle_notify_resume(regs);
+}
+
+static inline void rseq_signal_deliver(struct pt_regs *regs)
+{
+ preempt_disable();
+ __set_bit(RSEQ_EVENT_SIGNAL_BIT, ¤t->rseq_event_mask);
+ preempt_enable();
+ rseq_handle_notify_resume(regs);
+}
+
+/* rseq_preempt() requires preemption to be disabled. */
+static inline void rseq_preempt(struct task_struct *t)
+{
+ __set_bit(RSEQ_EVENT_PREEMPT_BIT, &t->rseq_event_mask);
+ rseq_set_notify_resume(t);
+}
+
+/* rseq_migrate() requires preemption to be disabled. */
+static inline void rseq_migrate(struct task_struct *t)
+{
+ __set_bit(RSEQ_EVENT_MIGRATE_BIT, &t->rseq_event_mask);
+ rseq_set_notify_resume(t);
+}
+
+/*
+ * If parent process has a registered restartable sequences area, the
+ * child inherits. Only applies when forking a process, not a thread. In
+ * case a parent fork() in the middle of a restartable sequence, set the
+ * resume notifier to force the child to retry.
+ */
+static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags)
+{
+ if (clone_flags & CLONE_THREAD) {
+ t->rseq = NULL;
+ t->rseq_len = 0;
+ t->rseq_sig = 0;
+ t->rseq_event_mask = 0;
+ } else {
+ t->rseq = current->rseq;
+ t->rseq_len = current->rseq_len;
+ t->rseq_sig = current->rseq_sig;
+ t->rseq_event_mask = current->rseq_event_mask;
+ rseq_preempt(t);
+ }
+}
+
+static inline void rseq_execve(struct task_struct *t)
+{
+ t->rseq = NULL;
+ t->rseq_len = 0;
+ t->rseq_sig = 0;
+ t->rseq_event_mask = 0;
+}
+
+#else
+
+static inline void rseq_set_notify_resume(struct task_struct *t)
+{
+}
+static inline void rseq_handle_notify_resume(struct pt_regs *regs)
+{
+}
+static inline void rseq_signal_deliver(struct pt_regs *regs)
+{
+}
+static inline void rseq_preempt(struct task_struct *t)
+{
+}
+static inline void rseq_migrate(struct task_struct *t)
+{
+}
+static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags)
+{
+}
+static inline void rseq_execve(struct task_struct *t)
+{
+}
+
+#endif
+
+#ifdef CONFIG_DEBUG_RSEQ
+
+void rseq_syscall(struct pt_regs *regs);
+
+#else
+
+static inline void rseq_syscall(struct pt_regs *regs)
+{
+}
+
+#endif
+
#endif
* Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
+ #include <linux/bug.h>
#include <linux/cpu_pm.h>
#include <linux/errno.h>
#include <linux/err.h>
#include <asm/mman.h>
#include <asm/tlbflush.h>
#include <asm/cacheflush.h>
+ #include <asm/cpufeature.h>
#include <asm/virt.h>
#include <asm/kvm_arm.h>
#include <asm/kvm_asm.h>
return 0;
}
- int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
+ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
{
return VM_FAULT_SIGBUS;
}
return -EINVAL;
}
+ struct kvm *kvm_arch_alloc_vm(void)
+ {
+ if (!has_vhe())
+ return kzalloc(sizeof(struct kvm), GFP_KERNEL);
+
+ return vzalloc(sizeof(struct kvm));
+ }
+
+ void kvm_arch_free_vm(struct kvm *kvm)
+ {
+ if (!has_vhe())
+ kfree(kvm);
+ else
+ vfree(kvm);
+ }
struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
{
void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
{
- kvm_vgic_vcpu_early_init(vcpu);
}
void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
kvm_vgic_load(vcpu);
kvm_timer_vcpu_load(vcpu);
kvm_vcpu_load_sysregs(vcpu);
+ kvm_arch_vcpu_load_fp(vcpu);
}
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
{
+ kvm_arch_vcpu_put_fp(vcpu);
kvm_vcpu_put_sysregs(vcpu);
kvm_timer_vcpu_put(vcpu);
kvm_vgic_put(vcpu);
*/
preempt_disable();
- /* Flush FP/SIMD state that can't survive guest entry/exit */
- kvm_fpsimd_flush_cpu_state();
-
kvm_pmu_flush_hwstate(vcpu);
local_irq_disable();
if (static_branch_unlikely(&userspace_irqchip_in_use))
kvm_timer_sync_hwstate(vcpu);
+ kvm_arch_vcpu_ctxsync_fp(vcpu);
+
/*
* We may have taken a host interrupt in HYP mode (ie
* while executing the guest). This interrupt is still
}
}
+ err = hyp_map_aux_data();
+ if (err)
+ kvm_err("Cannot map host auxilary data: %d\n", err);
+
return 0;
out_err:
return -ENODEV;
}
+ if (!kvm_arch_check_sve_has_vhe()) {
+ kvm_pr_unimpl("SVE system without VHE unsupported. Broken cpu?");
+ return -ENODEV;
+ }
+
for_each_online_cpu(cpu) {
smp_call_function_single(cpu, check_kvm_target_cpu, &ret, 1);
if (ret < 0) {