The "pad" and "reserved" fields may be used for future extensions and should be
set to 0s by userspace.
+4.138 KVM_ARM_SET_COUNTER_OFFSET
+--------------------------------
+
+:Capability: KVM_CAP_COUNTER_OFFSET
+:Architectures: arm64
+:Type: vm ioctl
+:Parameters: struct kvm_arm_counter_offset (in)
+:Returns: 0 on success, < 0 on error
+
+This capability indicates that userspace is able to apply a single VM-wide
+offset to both the virtual and physical counters as viewed by the guest
+using the KVM_ARM_SET_CNT_OFFSET ioctl and the following data structure:
+
+::
+
+ struct kvm_arm_counter_offset {
+ __u64 counter_offset;
+ __u64 reserved;
+ };
+
+The offset describes a number of counter cycles that are subtracted from
+both virtual and physical counter views (similar to the effects of the
+CNTVOFF_EL2 and CNTPOFF_EL2 system registers, but only global). The offset
+always applies to all vcpus (already created or created after this ioctl)
+for this VM.
+
+It is userspace's responsibility to compute the offset based, for example,
+on previous values of the guest counters.
+
+Any value other than 0 for the "reserved" field may result in an error
+(-EINVAL) being returned. This ioctl can also return -EBUSY if any vcpu
+ioctl is issued concurrently.
+
+Note that using this ioctl results in KVM ignoring subsequent userspace
+writes to the CNTVCT_EL0 and CNTPCT_EL0 registers using the SET_ONE_REG
+interface. No error will be returned, but the resulting offset will not be
+applied.
+
5. The kvm_run structure
========================
__u64 nr;
__u64 args[6];
__u64 ret;
- __u32 longmode;
- __u32 pad;
+ __u64 flags;
} hypercall;
- Unused. This was once used for 'hypercall to userspace'. To implement
- such functionality, use KVM_EXIT_IO (x86) or KVM_EXIT_MMIO (all except s390).
+
+ It is strongly recommended that userspace use ``KVM_EXIT_IO`` (x86) or
+ ``KVM_EXIT_MMIO`` (all except s390) to implement functionality that
+ requires a guest to interact with host userpace.
.. note:: KVM_EXIT_IO is significantly faster than KVM_EXIT_MMIO.
+ For arm64:
+ ----------
+
+ SMCCC exits can be enabled depending on the configuration of the SMCCC
+ filter. See the Documentation/virt/kvm/devices/vm.rst
+ ``KVM_ARM_SMCCC_FILTER`` for more details.
+
+ ``nr`` contains the function ID of the guest's SMCCC call. Userspace is
+ expected to use the ``KVM_GET_ONE_REG`` ioctl to retrieve the call
+ parameters from the vCPU's GPRs.
+
+ Definition of ``flags``:
+ - ``KVM_HYPERCALL_EXIT_SMC``: Indicates that the guest used the SMC
+ conduit to initiate the SMCCC call. If this bit is 0 then the guest
+ used the HVC conduit for the SMCCC call.
+
+ - ``KVM_HYPERCALL_EXIT_16BIT``: Indicates that the guest used a 16bit
+ instruction to initiate the SMCCC call. If this bit is 0 then the
+ guest used a 32bit instruction. An AArch64 guest always has this
+ bit set to 0.
+
+ At the point of exit, PC points to the instruction immediately following
+ the trapping instruction.
+
::
/* KVM_EXIT_TPR_ACCESS */
#include <linux/types.h>
#include <linux/jump_label.h>
#include <linux/kvm_types.h>
+ #include <linux/maple_tree.h>
#include <linux/percpu.h>
#include <linux/psci.h>
#include <asm/arch_gicv3.h>
/* Mandated version of PSCI */
u32 psci_version;
+ /* Protects VM-scoped configuration data */
+ struct mutex config_lock;
+
/*
* If we encounter a data abort without valid instruction syndrome
* information, report this to user space. User space can (and
#define KVM_ARCH_FLAG_EL1_32BIT 4
/* PSCI SYSTEM_SUSPEND enabled for the guest */
#define KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED 5
-
+ /* VM counter offset */
+#define KVM_ARCH_FLAG_VM_COUNTER_OFFSET 6
+ /* Timer PPIs made immutable */
+#define KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE 7
-#define KVM_ARCH_FLAG_SMCCC_FILTER_CONFIGURED 6
+ /* SMCCC filter initialized for the VM */
++#define KVM_ARCH_FLAG_SMCCC_FILTER_CONFIGURED 8
unsigned long flags;
/*
/* Hypercall features firmware registers' descriptor */
struct kvm_smccc_features smccc_feat;
+ struct maple_tree smccc_filter;
/*
* For an untrusted host VM, 'pkvm.handle' is used to lookup
TPIDR_EL2, /* EL2 Software Thread ID Register */
CNTHCTL_EL2, /* Counter-timer Hypervisor Control register */
SP_EL2, /* EL2 Stack Pointer */
+ CNTHP_CTL_EL2,
+ CNTHP_CVAL_EL2,
+ CNTHV_CTL_EL2,
+ CNTHV_CVAL_EL2,
NR_SYS_REGS /* Nothing after this line! */
};
/* vcpu power state */
struct kvm_mp_state mp_state;
+ spinlock_t mp_state_lock;
/* Cache some mmu pages needed inside spinlock regions */
struct kvm_mmu_memory_cache mmu_page_cache;
int __init kvm_sys_reg_table_init(void);
+bool lock_all_vcpus(struct kvm *kvm);
+void unlock_all_vcpus(struct kvm *kvm);
+
/* MMIO helpers */
void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data);
unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len);
long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm,
struct kvm_arm_copy_mte_tags *copy_tags);
+int kvm_vm_ioctl_set_counter_offset(struct kvm *kvm,
+ struct kvm_arm_counter_offset *offset);
/* Guest/host FPSIMD coordination helpers */
int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu);
(system_supports_32bit_el0() && \
!static_branch_unlikely(&arm64_mismatched_32bit_el0))
+ #define kvm_vm_has_ran_once(kvm) \
+ (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &(kvm)->arch.flags))
+
int kvm_trng_call(struct kvm_vcpu *vcpu);
#ifdef CONFIG_KVM
extern phys_addr_t hyp_mem_base;
__u64 reserved[2];
};
+/*
+ * Counter/Timer offset structure. Describe the virtual/physical offset.
+ * To be used with KVM_ARM_SET_COUNTER_OFFSET.
+ */
+struct kvm_arm_counter_offset {
+ __u64 counter_offset;
+ __u64 reserved;
+};
+
#define KVM_ARM_TAGS_TO_GUEST 0
#define KVM_ARM_TAGS_FROM_GUEST 1
#endif
};
+ /* Device Control API on vm fd */
+ #define KVM_ARM_VM_SMCCC_CTRL 0
+ #define KVM_ARM_VM_SMCCC_FILTER 0
+
/* Device Control API: ARM VGIC */
#define KVM_DEV_ARM_VGIC_GRP_ADDR 0
#define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1
#define KVM_ARM_VCPU_TIMER_CTRL 1
#define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0
#define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1
+#define KVM_ARM_VCPU_TIMER_IRQ_HVTIMER 2
+#define KVM_ARM_VCPU_TIMER_IRQ_HPTIMER 3
#define KVM_ARM_VCPU_PVTIME_CTRL 2
#define KVM_ARM_VCPU_PVTIME_IPA 0
/* run->fail_entry.hardware_entry_failure_reason codes. */
#define KVM_EXIT_FAIL_ENTRY_CPU_UNSUPPORTED (1ULL << 0)
+ enum kvm_smccc_filter_action {
+ KVM_SMCCC_FILTER_HANDLE = 0,
+ KVM_SMCCC_FILTER_DENY,
+ KVM_SMCCC_FILTER_FWD_TO_USER,
+
+ #ifdef __KERNEL__
+ NR_SMCCC_FILTER_ACTIONS
+ #endif
+ };
+
+ struct kvm_smccc_filter {
+ __u32 base;
+ __u32 nr_functions;
+ __u8 action;
+ __u8 pad[15];
+ };
+
+ /* arm64-specific KVM_EXIT_HYPERCALL flags */
+ #define KVM_HYPERCALL_EXIT_SMC (1U << 0)
+ #define KVM_HYPERCALL_EXIT_16BIT (1U << 1)
+
#endif
#endif /* __ARM_KVM_H__ */
{
int ret;
+ mutex_init(&kvm->arch.config_lock);
+
+#ifdef CONFIG_LOCKDEP
+ /* Clue in lockdep that the config_lock must be taken inside kvm->lock */
+ mutex_lock(&kvm->lock);
+ mutex_lock(&kvm->arch.config_lock);
+ mutex_unlock(&kvm->arch.config_lock);
+ mutex_unlock(&kvm->lock);
+#endif
+
ret = kvm_share_hyp(kvm, kvm + 1);
if (ret)
return ret;
kvm_vgic_early_init(kvm);
+ kvm_timer_init_vm(kvm);
+
/* The maximum number of VCPUs is limited by the host's GIC model */
kvm->max_vcpus = kvm_arm_default_max_vcpus();
kvm_destroy_vcpus(kvm);
kvm_unshare_hyp(kvm, kvm + 1);
+
+ kvm_arm_teardown_hypercalls(kvm);
}
int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_VCPU_ATTRIBUTES:
case KVM_CAP_PTP_KVM:
case KVM_CAP_ARM_SYSTEM_SUSPEND:
+ case KVM_CAP_COUNTER_OFFSET:
r = 1;
break;
case KVM_CAP_SET_GUEST_DEBUG2:
{
int err;
+ spin_lock_init(&vcpu->arch.mp_state_lock);
+
+#ifdef CONFIG_LOCKDEP
+ /* Inform lockdep that the config_lock is acquired after vcpu->mutex */
+ mutex_lock(&vcpu->mutex);
+ mutex_lock(&vcpu->kvm->arch.config_lock);
+ mutex_unlock(&vcpu->kvm->arch.config_lock);
+ mutex_unlock(&vcpu->mutex);
+#endif
+
/* Force users to call KVM_ARM_VCPU_INIT */
vcpu->arch.target = -1;
bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
vcpu->cpu = -1;
}
-void kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu)
+static void __kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu)
{
- vcpu->arch.mp_state.mp_state = KVM_MP_STATE_STOPPED;
+ WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_STOPPED);
kvm_make_request(KVM_REQ_SLEEP, vcpu);
kvm_vcpu_kick(vcpu);
}
+void kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu)
+{
+ spin_lock(&vcpu->arch.mp_state_lock);
+ __kvm_arm_vcpu_power_off(vcpu);
+ spin_unlock(&vcpu->arch.mp_state_lock);
+}
+
bool kvm_arm_vcpu_stopped(struct kvm_vcpu *vcpu)
{
- return vcpu->arch.mp_state.mp_state == KVM_MP_STATE_STOPPED;
+ return READ_ONCE(vcpu->arch.mp_state.mp_state) == KVM_MP_STATE_STOPPED;
}
static void kvm_arm_vcpu_suspend(struct kvm_vcpu *vcpu)
{
- vcpu->arch.mp_state.mp_state = KVM_MP_STATE_SUSPENDED;
+ WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_SUSPENDED);
kvm_make_request(KVM_REQ_SUSPEND, vcpu);
kvm_vcpu_kick(vcpu);
}
static bool kvm_arm_vcpu_suspended(struct kvm_vcpu *vcpu)
{
- return vcpu->arch.mp_state.mp_state == KVM_MP_STATE_SUSPENDED;
+ return READ_ONCE(vcpu->arch.mp_state.mp_state) == KVM_MP_STATE_SUSPENDED;
}
int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
{
- *mp_state = vcpu->arch.mp_state;
+ *mp_state = READ_ONCE(vcpu->arch.mp_state);
return 0;
}
{
int ret = 0;
+ spin_lock(&vcpu->arch.mp_state_lock);
+
switch (mp_state->mp_state) {
case KVM_MP_STATE_RUNNABLE:
- vcpu->arch.mp_state = *mp_state;
+ WRITE_ONCE(vcpu->arch.mp_state, *mp_state);
break;
case KVM_MP_STATE_STOPPED:
- kvm_arm_vcpu_power_off(vcpu);
+ __kvm_arm_vcpu_power_off(vcpu);
break;
case KVM_MP_STATE_SUSPENDED:
kvm_arm_vcpu_suspend(vcpu);
ret = -EINVAL;
}
+ spin_unlock(&vcpu->arch.mp_state_lock);
+
return ret;
}
if (kvm_vm_is_protected(kvm))
kvm_call_hyp_nvhe(__pkvm_vcpu_init_traps, vcpu);
- mutex_lock(&kvm->lock);
+ mutex_lock(&kvm->arch.config_lock);
set_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags);
- mutex_unlock(&kvm->lock);
+ mutex_unlock(&kvm->arch.config_lock);
return ret;
}
/*
* Handle the "start in power-off" case.
*/
+ spin_lock(&vcpu->arch.mp_state_lock);
+
if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
- kvm_arm_vcpu_power_off(vcpu);
+ __kvm_arm_vcpu_power_off(vcpu);
else
- vcpu->arch.mp_state.mp_state = KVM_MP_STATE_RUNNABLE;
+ WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_RUNNABLE);
+
+ spin_unlock(&vcpu->arch.mp_state_lock);
return 0;
}
}
}
+ static int kvm_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
+ {
+ switch (attr->group) {
+ case KVM_ARM_VM_SMCCC_CTRL:
+ return kvm_vm_smccc_has_attr(kvm, attr);
+ default:
+ return -ENXIO;
+ }
+ }
+
+ static int kvm_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
+ {
+ switch (attr->group) {
+ case KVM_ARM_VM_SMCCC_CTRL:
+ return kvm_vm_smccc_set_attr(kvm, attr);
+ default:
+ return -ENXIO;
+ }
+ }
+
long kvm_arch_vm_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
struct kvm *kvm = filp->private_data;
void __user *argp = (void __user *)arg;
+ struct kvm_device_attr attr;
switch (ioctl) {
case KVM_CREATE_IRQCHIP: {
return -EFAULT;
return kvm_vm_ioctl_mte_copy_tags(kvm, ©_tags);
}
+ case KVM_ARM_SET_COUNTER_OFFSET: {
+ struct kvm_arm_counter_offset offset;
+
+ if (copy_from_user(&offset, argp, sizeof(offset)))
+ return -EFAULT;
+ return kvm_vm_ioctl_set_counter_offset(kvm, &offset);
+ }
+ case KVM_HAS_DEVICE_ATTR: {
+ if (copy_from_user(&attr, argp, sizeof(attr)))
+ return -EFAULT;
+
+ return kvm_vm_has_attr(kvm, &attr);
+ }
+ case KVM_SET_DEVICE_ATTR: {
+ if (copy_from_user(&attr, argp, sizeof(attr)))
+ return -EFAULT;
+
+ return kvm_vm_set_attr(kvm, &attr);
+ }
default:
return -EINVAL;
}
}
+/* unlocks vcpus from @vcpu_lock_idx and smaller */
+static void unlock_vcpus(struct kvm *kvm, int vcpu_lock_idx)
+{
+ struct kvm_vcpu *tmp_vcpu;
+
+ for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) {
+ tmp_vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx);
+ mutex_unlock(&tmp_vcpu->mutex);
+ }
+}
+
+void unlock_all_vcpus(struct kvm *kvm)
+{
+ lockdep_assert_held(&kvm->lock);
+
+ unlock_vcpus(kvm, atomic_read(&kvm->online_vcpus) - 1);
+}
+
+/* Returns true if all vcpus were locked, false otherwise */
+bool lock_all_vcpus(struct kvm *kvm)
+{
+ struct kvm_vcpu *tmp_vcpu;
+ unsigned long c;
+
+ lockdep_assert_held(&kvm->lock);
+
+ /*
+ * Any time a vcpu is in an ioctl (including running), the
+ * core KVM code tries to grab the vcpu->mutex.
+ *
+ * By grabbing the vcpu->mutex of all VCPUs we ensure that no
+ * other VCPUs can fiddle with the state while we access it.
+ */
+ kvm_for_each_vcpu(c, tmp_vcpu, kvm) {
+ if (!mutex_trylock(&tmp_vcpu->mutex)) {
+ unlock_vcpus(kvm, c - 1);
+ return false;
+ }
+ }
+
+ return true;
+}
+
static unsigned long nvhe_percpu_size(void)
{
return (unsigned long)CHOOSE_NVHE_SYM(__per_cpu_end) -
cycles = systime_snapshot.cycles - vcpu->kvm->arch.timer_data.voffset;
break;
case KVM_PTP_PHYS_COUNTER:
- cycles = systime_snapshot.cycles;
+ cycles = systime_snapshot.cycles - vcpu->kvm->arch.timer_data.poffset;
break;
default:
return;
val[3] = lower_32_bits(cycles);
}
- static bool kvm_hvc_call_default_allowed(u32 func_id)
+ static bool kvm_smccc_default_allowed(u32 func_id)
{
switch (func_id) {
/*
}
}
- static bool kvm_hvc_call_allowed(struct kvm_vcpu *vcpu, u32 func_id)
+ static bool kvm_smccc_test_fw_bmap(struct kvm_vcpu *vcpu, u32 func_id)
{
struct kvm_smccc_features *smccc_feat = &vcpu->kvm->arch.smccc_feat;
return test_bit(KVM_REG_ARM_VENDOR_HYP_BIT_PTP,
&smccc_feat->vendor_hyp_bmap);
default:
- return kvm_hvc_call_default_allowed(func_id);
+ return false;
}
}
- int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
+ #define SMC32_ARCH_RANGE_BEGIN ARM_SMCCC_VERSION_FUNC_ID
+ #define SMC32_ARCH_RANGE_END ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \
+ ARM_SMCCC_SMC_32, \
+ 0, ARM_SMCCC_FUNC_MASK)
+
+ #define SMC64_ARCH_RANGE_BEGIN ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \
+ ARM_SMCCC_SMC_64, \
+ 0, 0)
+ #define SMC64_ARCH_RANGE_END ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \
+ ARM_SMCCC_SMC_64, \
+ 0, ARM_SMCCC_FUNC_MASK)
+
+ static void init_smccc_filter(struct kvm *kvm)
+ {
+ int r;
+
+ mt_init(&kvm->arch.smccc_filter);
+
+ /*
+ * Prevent userspace from handling any SMCCC calls in the architecture
+ * range, avoiding the risk of misrepresenting Spectre mitigation status
+ * to the guest.
+ */
+ r = mtree_insert_range(&kvm->arch.smccc_filter,
+ SMC32_ARCH_RANGE_BEGIN, SMC32_ARCH_RANGE_END,
+ xa_mk_value(KVM_SMCCC_FILTER_HANDLE),
+ GFP_KERNEL_ACCOUNT);
+ WARN_ON_ONCE(r);
+
+ r = mtree_insert_range(&kvm->arch.smccc_filter,
+ SMC64_ARCH_RANGE_BEGIN, SMC64_ARCH_RANGE_END,
+ xa_mk_value(KVM_SMCCC_FILTER_HANDLE),
+ GFP_KERNEL_ACCOUNT);
+ WARN_ON_ONCE(r);
+
+ }
+
+ static int kvm_smccc_set_filter(struct kvm *kvm, struct kvm_smccc_filter __user *uaddr)
+ {
+ const void *zero_page = page_to_virt(ZERO_PAGE(0));
+ struct kvm_smccc_filter filter;
+ u32 start, end;
+ int r;
+
+ if (copy_from_user(&filter, uaddr, sizeof(filter)))
+ return -EFAULT;
+
+ if (memcmp(filter.pad, zero_page, sizeof(filter.pad)))
+ return -EINVAL;
+
+ start = filter.base;
+ end = start + filter.nr_functions - 1;
+
+ if (end < start || filter.action >= NR_SMCCC_FILTER_ACTIONS)
+ return -EINVAL;
+
- mutex_lock(&kvm->lock);
++ mutex_lock(&kvm->arch.config_lock);
+
+ if (kvm_vm_has_ran_once(kvm)) {
+ r = -EBUSY;
+ goto out_unlock;
+ }
+
+ r = mtree_insert_range(&kvm->arch.smccc_filter, start, end,
+ xa_mk_value(filter.action), GFP_KERNEL_ACCOUNT);
+ if (r)
+ goto out_unlock;
+
+ set_bit(KVM_ARCH_FLAG_SMCCC_FILTER_CONFIGURED, &kvm->arch.flags);
+
+ out_unlock:
- mutex_unlock(&kvm->lock);
++ mutex_unlock(&kvm->arch.config_lock);
+ return r;
+ }
+
+ static u8 kvm_smccc_filter_get_action(struct kvm *kvm, u32 func_id)
+ {
+ unsigned long idx = func_id;
+ void *val;
+
+ if (!test_bit(KVM_ARCH_FLAG_SMCCC_FILTER_CONFIGURED, &kvm->arch.flags))
+ return KVM_SMCCC_FILTER_HANDLE;
+
+ /*
+ * But where's the error handling, you say?
+ *
+ * mt_find() returns NULL if no entry was found, which just so happens
+ * to match KVM_SMCCC_FILTER_HANDLE.
+ */
+ val = mt_find(&kvm->arch.smccc_filter, &idx, idx);
+ return xa_to_value(val);
+ }
+
+ static u8 kvm_smccc_get_action(struct kvm_vcpu *vcpu, u32 func_id)
+ {
+ /*
+ * Intervening actions in the SMCCC filter take precedence over the
+ * pseudo-firmware register bitmaps.
+ */
+ u8 action = kvm_smccc_filter_get_action(vcpu->kvm, func_id);
+ if (action != KVM_SMCCC_FILTER_HANDLE)
+ return action;
+
+ if (kvm_smccc_test_fw_bmap(vcpu, func_id) ||
+ kvm_smccc_default_allowed(func_id))
+ return KVM_SMCCC_FILTER_HANDLE;
+
+ return KVM_SMCCC_FILTER_DENY;
+ }
+
+ static void kvm_prepare_hypercall_exit(struct kvm_vcpu *vcpu, u32 func_id)
+ {
+ u8 ec = ESR_ELx_EC(kvm_vcpu_get_esr(vcpu));
+ struct kvm_run *run = vcpu->run;
+ u64 flags = 0;
+
+ if (ec == ESR_ELx_EC_SMC32 || ec == ESR_ELx_EC_SMC64)
+ flags |= KVM_HYPERCALL_EXIT_SMC;
+
+ if (!kvm_vcpu_trap_il_is32bit(vcpu))
+ flags |= KVM_HYPERCALL_EXIT_16BIT;
+
+ run->exit_reason = KVM_EXIT_HYPERCALL;
+ run->hypercall = (typeof(run->hypercall)) {
+ .nr = func_id,
+ .flags = flags,
+ };
+ }
+
+ int kvm_smccc_call_handler(struct kvm_vcpu *vcpu)
{
struct kvm_smccc_features *smccc_feat = &vcpu->kvm->arch.smccc_feat;
u32 func_id = smccc_get_function(vcpu);
u64 val[4] = {SMCCC_RET_NOT_SUPPORTED};
u32 feature;
+ u8 action;
gpa_t gpa;
- if (!kvm_hvc_call_allowed(vcpu, func_id))
+ action = kvm_smccc_get_action(vcpu, func_id);
+ switch (action) {
+ case KVM_SMCCC_FILTER_HANDLE:
+ break;
+ case KVM_SMCCC_FILTER_DENY:
+ goto out;
+ case KVM_SMCCC_FILTER_FWD_TO_USER:
+ kvm_prepare_hypercall_exit(vcpu, func_id);
+ return 0;
+ default:
+ WARN_RATELIMIT(1, "Unhandled SMCCC filter action: %d\n", action);
goto out;
+ }
switch (func_id) {
case ARM_SMCCC_VERSION_FUNC_ID:
smccc_feat->std_bmap = KVM_ARM_SMCCC_STD_FEATURES;
smccc_feat->std_hyp_bmap = KVM_ARM_SMCCC_STD_HYP_FEATURES;
smccc_feat->vendor_hyp_bmap = KVM_ARM_SMCCC_VENDOR_HYP_FEATURES;
+
+ init_smccc_filter(kvm);
+ }
+
+ void kvm_arm_teardown_hypercalls(struct kvm *kvm)
+ {
+ mtree_destroy(&kvm->arch.smccc_filter);
}
int kvm_arm_get_fw_num_regs(struct kvm_vcpu *vcpu)
if (val & ~fw_reg_features)
return -EINVAL;
- mutex_lock(&kvm->lock);
+ mutex_lock(&kvm->arch.config_lock);
- if (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags) &&
- val != *fw_reg_bmap) {
+ if (kvm_vm_has_ran_once(kvm) && val != *fw_reg_bmap) {
ret = -EBUSY;
goto out;
}
WRITE_ONCE(*fw_reg_bmap, val);
out:
- mutex_unlock(&kvm->lock);
+ mutex_unlock(&kvm->arch.config_lock);
return ret;
}
return -EINVAL;
}
+
+ int kvm_vm_smccc_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
+ {
+ switch (attr->attr) {
+ case KVM_ARM_VM_SMCCC_FILTER:
+ return 0;
+ default:
+ return -ENXIO;
+ }
+ }
+
+ int kvm_vm_smccc_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
+ {
+ void __user *uaddr = (void __user *)attr->addr;
+
+ switch (attr->attr) {
+ case KVM_ARM_VM_SMCCC_FILTER:
+ return kvm_smccc_set_filter(kvm, uaddr);
+ default:
+ return -ENXIO;
+ }
+ }
struct arm_pmu *arm_pmu;
int ret = -ENXIO;
- mutex_lock(&kvm->lock);
+ lockdep_assert_held(&kvm->arch.config_lock);
mutex_lock(&arm_pmus_lock);
list_for_each_entry(entry, &arm_pmus, entry) {
arm_pmu = entry->arm_pmu;
if (arm_pmu->pmu.type == pmu_id) {
- if (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags) ||
+ if (kvm_vm_has_ran_once(kvm) ||
(kvm->arch.pmu_filter && kvm->arch.arm_pmu != arm_pmu)) {
ret = -EBUSY;
break;
}
mutex_unlock(&arm_pmus_lock);
- mutex_unlock(&kvm->lock);
return ret;
}
{
struct kvm *kvm = vcpu->kvm;
+ lockdep_assert_held(&kvm->arch.config_lock);
+
if (!kvm_vcpu_has_pmu(vcpu))
return -ENODEV;
if (vcpu->arch.pmu.created)
return -EBUSY;
- mutex_lock(&kvm->lock);
if (!kvm->arch.arm_pmu) {
/* No PMU set, get the default one */
kvm->arch.arm_pmu = kvm_pmu_probe_armpmu();
- if (!kvm->arch.arm_pmu) {
- mutex_unlock(&kvm->lock);
+ if (!kvm->arch.arm_pmu)
return -ENODEV;
- }
}
- mutex_unlock(&kvm->lock);
switch (attr->attr) {
case KVM_ARM_VCPU_PMU_V3_IRQ: {
filter.action != KVM_PMU_EVENT_DENY))
return -EINVAL;
- if (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags))
- mutex_lock(&kvm->lock);
-
- if (kvm_vm_has_ran_once(kvm)) {
- mutex_unlock(&kvm->lock);
++ if (kvm_vm_has_ran_once(kvm))
return -EBUSY;
- }
if (!kvm->arch.pmu_filter) {
kvm->arch.pmu_filter = bitmap_alloc(nr_events, GFP_KERNEL_ACCOUNT);
- if (!kvm->arch.pmu_filter) {
- mutex_unlock(&kvm->lock);
+ if (!kvm->arch.pmu_filter)
return -ENOMEM;
- }
/*
* The default depends on the first applied filter.
else
bitmap_clear(kvm->arch.pmu_filter, filter.base_event, filter.nevents);
- mutex_unlock(&kvm->lock);
-
return 0;
}
case KVM_ARM_VCPU_PMU_V3_SET_PMU: {
struct vcpu_reset_state *reset_state;
struct kvm *kvm = source_vcpu->kvm;
struct kvm_vcpu *vcpu = NULL;
+ int ret = PSCI_RET_SUCCESS;
unsigned long cpu_id;
cpu_id = smccc_get_arg1(source_vcpu);
*/
if (!vcpu)
return PSCI_RET_INVALID_PARAMS;
+
+ spin_lock(&vcpu->arch.mp_state_lock);
if (!kvm_arm_vcpu_stopped(vcpu)) {
if (kvm_psci_version(source_vcpu) != KVM_ARM_PSCI_0_1)
- return PSCI_RET_ALREADY_ON;
+ ret = PSCI_RET_ALREADY_ON;
else
- return PSCI_RET_INVALID_PARAMS;
+ ret = PSCI_RET_INVALID_PARAMS;
+
+ goto out_unlock;
}
reset_state = &vcpu->arch.reset_state;
*/
reset_state->r0 = smccc_get_arg3(source_vcpu);
- WRITE_ONCE(reset_state->reset, true);
+ reset_state->reset = true;
kvm_make_request(KVM_REQ_VCPU_RESET, vcpu);
/*
*/
smp_wmb();
- vcpu->arch.mp_state.mp_state = KVM_MP_STATE_RUNNABLE;
+ WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_RUNNABLE);
kvm_vcpu_wake_up(vcpu);
- return PSCI_RET_SUCCESS;
+out_unlock:
+ spin_unlock(&vcpu->arch.mp_state_lock);
+ return ret;
}
static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
* after this call is handled and before the VCPUs have been
* re-initialized.
*/
- kvm_for_each_vcpu(i, tmp, vcpu->kvm)
- tmp->arch.mp_state.mp_state = KVM_MP_STATE_STOPPED;
+ kvm_for_each_vcpu(i, tmp, vcpu->kvm) {
+ spin_lock(&tmp->arch.mp_state_lock);
+ WRITE_ONCE(tmp->arch.mp_state.mp_state, KVM_MP_STATE_STOPPED);
+ spin_unlock(&tmp->arch.mp_state_lock);
+ }
kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_SLEEP);
memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event));
static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu)
{
- struct kvm *kvm = vcpu->kvm;
u32 psci_fn = smccc_get_function(vcpu);
unsigned long val;
int ret = 1;
kvm_psci_narrow_to_32bit(vcpu);
fallthrough;
case PSCI_0_2_FN64_CPU_ON:
- mutex_lock(&kvm->lock);
val = kvm_psci_vcpu_on(vcpu);
- mutex_unlock(&kvm->lock);
break;
case PSCI_0_2_FN_AFFINITY_INFO:
kvm_psci_narrow_to_32bit(vcpu);
static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu)
{
- struct kvm *kvm = vcpu->kvm;
u32 psci_fn = smccc_get_function(vcpu);
unsigned long val;
val = PSCI_RET_SUCCESS;
break;
case KVM_PSCI_FN_CPU_ON:
- mutex_lock(&kvm->lock);
val = kvm_psci_vcpu_on(vcpu);
- mutex_unlock(&kvm->lock);
break;
default:
val = PSCI_RET_NOT_SUPPORTED;
int kvm_psci_call(struct kvm_vcpu *vcpu)
{
u32 psci_fn = smccc_get_function(vcpu);
+ int version = kvm_psci_version(vcpu);
unsigned long val;
val = kvm_psci_check_allowed_function(vcpu, psci_fn);
return 1;
}
- switch (kvm_psci_version(vcpu)) {
+ switch (version) {
case KVM_ARM_PSCI_1_1:
return kvm_psci_1_x_call(vcpu, 1);
case KVM_ARM_PSCI_1_0:
case KVM_ARM_PSCI_0_1:
return kvm_psci_0_1_call(vcpu);
default:
- return -EINVAL;
+ WARN_ONCE(1, "Unknown PSCI version %d", version);
+ smccc_set_retval(vcpu, SMCCC_RET_NOT_SUPPORTED, 0, 0, 0);
+ return 1;
}
}
__u64 nr;
__u64 args[6];
__u64 ret;
- __u32 longmode;
- __u32 pad;
+
+ union {
+ #ifndef __KERNEL__
+ __u32 longmode;
+ #endif
+ __u64 flags;
+ };
} hypercall;
/* KVM_EXIT_TPR_ACCESS */
struct {
#define KVM_CAP_S390_PROTECTED_ASYNC_DISABLE 224
#define KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP 225
#define KVM_CAP_PMU_EVENT_MASKED_EVENTS 226
+#define KVM_CAP_COUNTER_OFFSET 227
#ifdef KVM_CAP_IRQ_ROUTING
#define KVM_SET_PMU_EVENT_FILTER _IOW(KVMIO, 0xb2, struct kvm_pmu_event_filter)
#define KVM_PPC_SVM_OFF _IO(KVMIO, 0xb3)
#define KVM_ARM_MTE_COPY_TAGS _IOR(KVMIO, 0xb4, struct kvm_arm_copy_mte_tags)
+/* Available with KVM_CAP_COUNTER_OFFSET */
+#define KVM_ARM_SET_COUNTER_OFFSET _IOW(KVMIO, 0xb5, struct kvm_arm_counter_offset)
/* ioctl for vm fd */
#define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device)
return (gva >> vm->page_shift) & mask;
}
-static uint64_t pte_addr(struct kvm_vm *vm, uint64_t entry)
+static uint64_t addr_pte(struct kvm_vm *vm, uint64_t pa, uint64_t attrs)
{
- uint64_t mask = ((1UL << (vm->va_bits - vm->page_shift)) - 1) << vm->page_shift;
- return entry & mask;
+ uint64_t pte;
+
+ pte = pa & GENMASK(47, vm->page_shift);
+ if (vm->page_shift == 16)
+ pte |= FIELD_GET(GENMASK(51, 48), pa) << 12;
+ pte |= attrs;
+
+ return pte;
+}
+
+static uint64_t pte_addr(struct kvm_vm *vm, uint64_t pte)
+{
+ uint64_t pa;
+
+ pa = pte & GENMASK(47, vm->page_shift);
+ if (vm->page_shift == 16)
+ pa |= FIELD_GET(GENMASK(15, 12), pte) << 48;
+
+ return pa;
}
static uint64_t ptrs_per_pgd(struct kvm_vm *vm)
ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, vaddr) * 8;
if (!*ptep)
- *ptep = vm_alloc_page_table(vm) | 3;
+ *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3);
switch (vm->pgtable_levels) {
case 4:
ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, vaddr) * 8;
if (!*ptep)
- *ptep = vm_alloc_page_table(vm) | 3;
+ *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3);
/* fall through */
case 3:
ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, vaddr) * 8;
if (!*ptep)
- *ptep = vm_alloc_page_table(vm) | 3;
+ *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3);
/* fall through */
case 2:
ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, vaddr) * 8;
TEST_FAIL("Page table levels must be 2, 3, or 4");
}
- *ptep = paddr | 3;
- *ptep |= (attr_idx << 2) | (1 << 10) /* Access Flag */;
+ *ptep = addr_pte(vm, paddr, (attr_idx << 2) | (1 << 10) | 3); /* AF */
}
void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
{
struct kvm_vcpu_init default_init = { .target = -1, };
struct kvm_vm *vm = vcpu->vm;
- uint64_t sctlr_el1, tcr_el1;
+ uint64_t sctlr_el1, tcr_el1, ttbr0_el1;
if (!init)
init = &default_init;
TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
}
+ ttbr0_el1 = vm->pgd & GENMASK(47, vm->page_shift);
+
/* Configure output size */
switch (vm->mode) {
case VM_MODE_P52V48_64K:
tcr_el1 |= 6ul << 32; /* IPS = 52 bits */
+ ttbr0_el1 |= FIELD_GET(GENMASK(51, 48), vm->pgd) << 2;
break;
case VM_MODE_P48V48_4K:
case VM_MODE_P48V48_16K:
vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1), sctlr_el1);
vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1), tcr_el1);
vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MAIR_EL1), DEFAULT_MAIR_EL1);
- vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TTBR0_EL1), vm->pgd);
+ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TTBR0_EL1), ttbr0_el1);
vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TPIDR_EL1), vcpu->id);
}
close(kvm_fd);
}
+ #define __smccc_call(insn, function_id, arg0, arg1, arg2, arg3, arg4, arg5, \
+ arg6, res) \
+ asm volatile("mov w0, %w[function_id]\n" \
+ "mov x1, %[arg0]\n" \
+ "mov x2, %[arg1]\n" \
+ "mov x3, %[arg2]\n" \
+ "mov x4, %[arg3]\n" \
+ "mov x5, %[arg4]\n" \
+ "mov x6, %[arg5]\n" \
+ "mov x7, %[arg6]\n" \
+ #insn "#0\n" \
+ "mov %[res0], x0\n" \
+ "mov %[res1], x1\n" \
+ "mov %[res2], x2\n" \
+ "mov %[res3], x3\n" \
+ : [res0] "=r"(res->a0), [res1] "=r"(res->a1), \
+ [res2] "=r"(res->a2), [res3] "=r"(res->a3) \
+ : [function_id] "r"(function_id), [arg0] "r"(arg0), \
+ [arg1] "r"(arg1), [arg2] "r"(arg2), [arg3] "r"(arg3), \
+ [arg4] "r"(arg4), [arg5] "r"(arg5), [arg6] "r"(arg6) \
+ : "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7")
+
+
void smccc_hvc(uint32_t function_id, uint64_t arg0, uint64_t arg1,
uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5,
uint64_t arg6, struct arm_smccc_res *res)
{
- asm volatile("mov w0, %w[function_id]\n"
- "mov x1, %[arg0]\n"
- "mov x2, %[arg1]\n"
- "mov x3, %[arg2]\n"
- "mov x4, %[arg3]\n"
- "mov x5, %[arg4]\n"
- "mov x6, %[arg5]\n"
- "mov x7, %[arg6]\n"
- "hvc #0\n"
- "mov %[res0], x0\n"
- "mov %[res1], x1\n"
- "mov %[res2], x2\n"
- "mov %[res3], x3\n"
- : [res0] "=r"(res->a0), [res1] "=r"(res->a1),
- [res2] "=r"(res->a2), [res3] "=r"(res->a3)
- : [function_id] "r"(function_id), [arg0] "r"(arg0),
- [arg1] "r"(arg1), [arg2] "r"(arg2), [arg3] "r"(arg3),
- [arg4] "r"(arg4), [arg5] "r"(arg5), [arg6] "r"(arg6)
- : "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7");
+ __smccc_call(hvc, function_id, arg0, arg1, arg2, arg3, arg4, arg5,
+ arg6, res);
+ }
+
+ void smccc_smc(uint32_t function_id, uint64_t arg0, uint64_t arg1,
+ uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5,
+ uint64_t arg6, struct arm_smccc_res *res)
+ {
+ __smccc_call(smc, function_id, arg0, arg1, arg2, arg3, arg4, arg5,
+ arg6, res);
}
void kvm_selftest_arch_init(void)