4.97 KVM_X86_SET_MSR_FILTER
----------------------------
-:Capability: KVM_X86_SET_MSR_FILTER
+:Capability: KVM_CAP_X86_MSR_FILTER
:Architectures: x86
:Type: vm ioctl
:Parameters: struct kvm_msr_filter
allows user space to deflect and potentially handle various MSR accesses
into user space.
-If a vCPU is in running state while this ioctl is invoked, the vCPU may
-experience inconsistent filtering behavior on MSR accesses.
+Note, invoking this ioctl while a vCPU is running is inherently racy. However,
+KVM does guarantee that vCPUs will see either the previous filter or the new
+filter, e.g. MSRs with identical settings in both the old and new filter will
+have deterministic behavior.
4.98 KVM_CREATE_SPAPR_TCE_64
----------------------------
authentication tag all of which are needed to decrypt the dump at a
later time.
-
-4.126 KVM_X86_SET_MSR_FILTER
-----------------------------
-
-:Capability: KVM_CAP_X86_MSR_FILTER
-:Architectures: x86
-:Type: vm ioctl
-:Parameters: struct kvm_msr_filter
-:Returns: 0 on success, < 0 on error
-
-::
-
- struct kvm_msr_filter_range {
- #define KVM_MSR_FILTER_READ (1 << 0)
- #define KVM_MSR_FILTER_WRITE (1 << 1)
- __u32 flags;
- __u32 nmsrs; /* number of msrs in bitmap */
- __u32 base; /* MSR index the bitmap starts at */
- __u8 *bitmap; /* a 1 bit allows the operations in flags, 0 denies */
- };
-
- #define KVM_MSR_FILTER_MAX_RANGES 16
- struct kvm_msr_filter {
- #define KVM_MSR_FILTER_DEFAULT_ALLOW (0 << 0)
- #define KVM_MSR_FILTER_DEFAULT_DENY (1 << 0)
- __u32 flags;
- struct kvm_msr_filter_range ranges[KVM_MSR_FILTER_MAX_RANGES];
- };
-
-flags values for ``struct kvm_msr_filter_range``:
-
-``KVM_MSR_FILTER_READ``
-
- Filter read accesses to MSRs using the given bitmap. A 0 in the bitmap
- indicates that a read should immediately fail, while a 1 indicates that
- a read for a particular MSR should be handled regardless of the default
- filter action.
-
-``KVM_MSR_FILTER_WRITE``
-
- Filter write accesses to MSRs using the given bitmap. A 0 in the bitmap
- indicates that a write should immediately fail, while a 1 indicates that
- a write for a particular MSR should be handled regardless of the default
- filter action.
-
-``KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE``
-
- Filter both read and write accesses to MSRs using the given bitmap. A 0
- in the bitmap indicates that both reads and writes should immediately fail,
- while a 1 indicates that reads and writes for a particular MSR are not
- filtered by this range.
-
-flags values for ``struct kvm_msr_filter``:
-
-``KVM_MSR_FILTER_DEFAULT_ALLOW``
-
- If no filter range matches an MSR index that is getting accessed, KVM will
- fall back to allowing access to the MSR.
-
-``KVM_MSR_FILTER_DEFAULT_DENY``
-
- If no filter range matches an MSR index that is getting accessed, KVM will
- fall back to rejecting access to the MSR. In this mode, all MSRs that should
- be processed by KVM need to explicitly be marked as allowed in the bitmaps.
-
-This ioctl allows user space to define up to 16 bitmaps of MSR ranges to
-specify whether a certain MSR access should be explicitly filtered for or not.
-
-If this ioctl has never been invoked, MSR accesses are not guarded and the
-default KVM in-kernel emulation behavior is fully preserved.
-
-Calling this ioctl with an empty set of ranges (all nmsrs == 0) disables MSR
-filtering. In that mode, ``KVM_MSR_FILTER_DEFAULT_DENY`` is invalid and causes
-an error.
-
-As soon as the filtering is in place, every MSR access is processed through
-the filtering except for accesses to the x2APIC MSRs (from 0x800 to 0x8ff);
-x2APIC MSRs are always allowed, independent of the ``default_allow`` setting,
-and their behavior depends on the ``X2APIC_ENABLE`` bit of the APIC base
-register.
-
-If a bit is within one of the defined ranges, read and write accesses are
-guarded by the bitmap's value for the MSR index if the kind of access
-is included in the ``struct kvm_msr_filter_range`` flags. If no range
-cover this particular access, the behavior is determined by the flags
-field in the kvm_msr_filter struct: ``KVM_MSR_FILTER_DEFAULT_ALLOW``
-and ``KVM_MSR_FILTER_DEFAULT_DENY``.
-
-Each bitmap range specifies a range of MSRs to potentially allow access on.
-The range goes from MSR index [base .. base+nmsrs]. The flags field
-indicates whether reads, writes or both reads and writes are filtered
-by setting a 1 bit in the bitmap for the corresponding MSR index.
-
-If an MSR access is not permitted through the filtering, it generates a
-#GP inside the guest. When combined with KVM_CAP_X86_USER_SPACE_MSR, that
-allows user space to deflect and potentially handle various MSR accesses
-into user space.
-
-Note, invoking this ioctl with a vCPU is running is inherently racy. However,
-KVM does guarantee that vCPUs will see either the previous filter or the new
-filter, e.g. MSRs with identical settings in both the old and new filter will
-have deterministic behavior.
-
-4.127 KVM_XEN_HVM_SET_ATTR
+4.126 KVM_XEN_HVM_SET_ATTR
--------------------------
:Capability: KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO
(0x40000001). Otherwise, a guest may use the paravirtual features
regardless of what has actually been exposed through the CPUID leaf.
- 8.29 KVM_CAP_DIRTY_LOG_RING
- ---------------------------
+ 8.29 KVM_CAP_DIRTY_LOG_RING/KVM_CAP_DIRTY_LOG_RING_ACQ_REL
+ ----------------------------------------------------------
:Architectures: x86
:Parameters: args[0] - size of the dirty log ring
flags of a GFN have the DIRTY bit cleared, meaning that it has harvested
all the dirty GFNs that were available.
+ Note that on weakly ordered architectures, userspace accesses to the
+ ring buffer (and more specifically the 'flags' field) must be ordered,
+ using load-acquire/store-release accessors when available, or any
+ other memory barrier that will ensure this ordering.
+
It's not necessary for userspace to harvest the all dirty GFNs at once.
However it must collect the dirty GFNs in sequence, i.e., the userspace
program cannot skip one dirty GFN to collect the one next to it.
machine will switch to ring-buffer dirty page tracking and further
KVM_GET_DIRTY_LOG or KVM_CLEAR_DIRTY_LOG ioctls will fail.
+ NOTE: KVM_CAP_DIRTY_LOG_RING_ACQ_REL is the only capability that
+ should be exposed by weakly ordered architecture, in order to indicate
+ the additional memory ordering requirements imposed on userspace when
+ reading the state of an entry and mutating it from DIRTY to HARVESTED.
+ Architecture with TSO-like ordering (such as x86) are allowed to
+ expose both KVM_CAP_DIRTY_LOG_RING and KVM_CAP_DIRTY_LOG_RING_ACQ_REL
+ to userspace.
+
8.30 KVM_CAP_XEN_HVM
--------------------
N: sun50i
ARM/Amlogic Meson SoC CLOCK FRAMEWORK
S: Maintained
F: sound/soc/meson/
ARM/Amlogic Meson SoC support
F: arch/arm/mach-orion5x/ts78xx-*
ARM/OXNAS platform support
S: Maintained
F: drivers/gpu/drm/sun4i/
DRM DRIVERS FOR AMLOGIC SOCS
S: Supported
DRM DRIVERS FOR BRIDGE CHIPS
F: drivers/dma/hisi_dma.c
HISILICON GPIO DRIVER
-M: Luo Jiaxing <luojiaxing@huawei.com>
+M: Jay Fang <f.fangjian@huawei.com>
S: Maintained
F: drivers/gpio/gpio-hisi.c
F: drivers/crypto/hisilicon/zip/
HISILICON ROCE DRIVER
S: Maintained
F: Documentation/devicetree/bindings/infiniband/hisilicon-hns-roce.txt
F: Documentation/devicetree/bindings/serio/
F: Documentation/input/
F: drivers/input/
+F: include/dt-bindings/input/
F: include/linux/input.h
F: include/linux/input/
F: include/uapi/linux/input-event-codes.h
ITE IT66121 HDMI BRIDGE DRIVER
S: Maintained
T: git git://anongit.freedesktop.org/drm/drm-misc
F: Documentation/devicetree/bindings/display/bridge/ite,it66121.yaml
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm.git
F: arch/arm64/include/asm/kvm*
F: kernel/module/kdb.c
KHADAS MCU MFD DRIVER
S: Maintained
F: Documentation/devicetree/bindings/mfd/khadas,mcu.yaml
F: drivers/watchdog/menz69_wdt.c
MESON AO CEC DRIVER FOR AMLOGIC SOCS
S: Supported
F: drivers/media/cec/platform/meson/ao-cec.c
MESON GE2D DRIVER FOR AMLOGIC SOCS
S: Supported
F: drivers/mtd/nand/raw/meson_*
MESON VIDEO DECODER DRIVER FOR AMLOGIC SOCS
S: Supported
S: Supported
+F: Documentation/devicetree/bindings/clock/microchip,mpfs.yaml
+F: Documentation/devicetree/bindings/gpio/microchip,mpfs-gpio.yaml
+F: Documentation/devicetree/bindings/i2c/microchip,corei2c.yaml
+F: Documentation/devicetree/bindings/mailbox/microchip,mpfs-mailbox.yaml
+F: Documentation/devicetree/bindings/net/can/microchip,mpfs-can.yaml
+F: Documentation/devicetree/bindings/pwm/microchip,corepwm.yaml
+F: Documentation/devicetree/bindings/soc/microchip/microchip,mpfs-sys-controller.yaml
+F: Documentation/devicetree/bindings/spi/microchip,mpfs-spi.yaml
+F: Documentation/devicetree/bindings/usb/microchip,mpfs-musb.yaml
F: arch/riscv/boot/dts/microchip/
F: drivers/char/hw_random/mpfs-rng.c
F: drivers/clk/microchip/clk-mpfs.c
+F: drivers/i2c/busses/i2c-microchip-core.c
F: drivers/mailbox/mailbox-mpfs.c
F: drivers/pci/controller/pcie-microchip-host.c
F: drivers/rtc/rtc-mpfs.c
S: Maintained
F: drivers/infiniband/ulp/rtrs/
+RUNTIME VERIFICATION (RV)
+S: Maintained
+F: Documentation/trace/rv/
+F: include/linux/rv.h
+F: include/rv/
+F: kernel/trace/rv/
+F: tools/verification/
+
RXRPC SOCKETS (AF_RXRPC)
F: include/linux/trace*.h
F: include/trace/
F: kernel/trace/
+F: scripts/tracing/
F: tools/testing/selftests/ftrace/
TRACING MMIO ACCESSES (MMIOTRACE)
S: Maintained
+F: Documentation/block/ublk.rst
F: drivers/block/ublk_drv.c
F: include/uapi/linux/ublk_cmd.h
S: Maintained
-F: Documentation/devicetree/bindings/gpio/gpio-xilinx.txt
+F: Documentation/devicetree/bindings/gpio/xlnx,gpio-xilinx.yaml
F: Documentation/devicetree/bindings/gpio/gpio-zynq.yaml
F: drivers/gpio/gpio-xilinx.c
F: drivers/gpio/gpio-zynq.c
*/
#if VA_BITS > 48
mrs_s x0, SYS_ID_AA64MMFR2_EL1
- tst x0, #0xf << ID_AA64MMFR2_LVA_SHIFT
+ tst x0, #0xf << ID_AA64MMFR2_EL1_VARange_SHIFT
mov x0, #VA_BITS
mov x25, #VA_BITS_MIN
csel x25, x25, x0, eq
SYM_FUNC_START_LOCAL(create_kernel_mapping)
adrp x0, init_pg_dir
mov_q x5, KIMAGE_VADDR // compile time __va(_text)
+#ifdef CONFIG_RELOCATABLE
add x5, x5, x23 // add KASLR displacement
+#endif
adrp x6, _end // runtime __pa(_end)
adrp x3, _text // runtime __pa(_text)
sub x6, x6, x3 // _end - _text
*/
SYM_FUNC_START(__enable_mmu)
mrs x3, ID_AA64MMFR0_EL1
- ubfx x3, x3, #ID_AA64MMFR0_TGRAN_SHIFT, 4
- cmp x3, #ID_AA64MMFR0_TGRAN_SUPPORTED_MIN
+ ubfx x3, x3, #ID_AA64MMFR0_EL1_TGRAN_SHIFT, 4
+ cmp x3, #ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MIN
b.lt __no_granule_support
- cmp x3, #ID_AA64MMFR0_TGRAN_SUPPORTED_MAX
+ cmp x3, #ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MAX
b.gt __no_granule_support
phys_to_ttbr x2, x2
msr ttbr0_el1, x2 // load TTBR0
b.ne 2f
mrs_s x0, SYS_ID_AA64MMFR2_EL1
- and x0, x0, #(0xf << ID_AA64MMFR2_LVA_SHIFT)
+ and x0, x0, #(0xf << ID_AA64MMFR2_EL1_VARange_SHIFT)
cbnz x0, 2f
update_early_cpu_boot_status \
kvm_vcpu_halt(vcpu);
vcpu_clear_flag(vcpu, IN_WFIT);
- kvm_clear_request(KVM_REQ_UNHALT, vcpu);
preempt_disable();
vgic_v4_load(vcpu);
* at, which would end badly once inaccessible.
*/
kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start);
- kmemleak_free_part(__va(hyp_mem_base), hyp_mem_size);
+ kmemleak_free_part_phys(hyp_mem_base, hyp_mem_size);
return pkvm_drop_host_privileges();
}
if (!arg)
return -EINVAL;
+ if (strcmp(arg, "none") == 0) {
+ kvm_mode = KVM_MODE_NONE;
+ return 0;
+ }
+
+ if (!is_hyp_mode_available()) {
+ pr_warn_once("KVM is not available. Ignoring kvm-arm.mode\n");
+ return 0;
+ }
+
if (strcmp(arg, "protected") == 0) {
if (!is_kernel_in_hyp_mode())
kvm_mode = KVM_MODE_PROTECTED;
return 0;
}
- if (strcmp(arg, "none") == 0) {
- kvm_mode = KVM_MODE_NONE;
- return 0;
- }
-
return -EINVAL;
}
early_param("kvm-arm.mode", early_kvm_mode_cfg);
# SPDX-License-Identifier: GPL-2.0-only
+ /aarch64/aarch32_id_regs
/aarch64/arch_timer
/aarch64/debug-exceptions
/aarch64/get-reg-list
/x86_64/max_vcpuid_cap_test
/x86_64/mmio_warning_test
/x86_64/monitor_mwait_test
+/x86_64/nested_exceptions_test
/x86_64/nx_huge_pages_test
/x86_64/platform_info_test
/x86_64/pmu_event_filter_test
LIBKVM += lib/sparsebit.c
LIBKVM += lib/test_util.c
+LIBKVM_STRING += lib/string_override.c
+
LIBKVM_x86_64 += lib/x86_64/apic.c
LIBKVM_x86_64 += lib/x86_64/handlers.S
LIBKVM_x86_64 += lib/x86_64/perf_test_util.c
TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test
TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test
TEST_GEN_PROGS_x86_64 += x86_64/monitor_mwait_test
+TEST_GEN_PROGS_x86_64 += x86_64/nested_exceptions_test
TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test
TEST_GEN_PROGS_x86_64 += x86_64/pmu_event_filter_test
TEST_GEN_PROGS_x86_64 += x86_64/set_boot_cpu_id
# Compiled outputs used by test targets
TEST_GEN_PROGS_EXTENDED_x86_64 += x86_64/nx_huge_pages_test
+ TEST_GEN_PROGS_aarch64 += aarch64/aarch32_id_regs
TEST_GEN_PROGS_aarch64 += aarch64/arch_timer
TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions
TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list
LIBKVM_S := $(filter %.S,$(LIBKVM))
LIBKVM_C_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_C))
LIBKVM_S_OBJ := $(patsubst %.S, $(OUTPUT)/%.o, $(LIBKVM_S))
-LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ)
+LIBKVM_STRING_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_STRING))
+LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) $(LIBKVM_STRING_OBJ)
EXTRA_CLEAN += $(LIBKVM_OBJS) cscope.*
$(LIBKVM_S_OBJ): $(OUTPUT)/%.o: %.S
$(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
+# Compile the string overrides as freestanding to prevent the compiler from
+# generating self-referential code, e.g. without "freestanding" the compiler may
+# "optimize" memcmp() by invoking memcmp(), thus causing infinite recursion.
+$(LIBKVM_STRING_OBJ): $(OUTPUT)/%.o: %.c
+ $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c -ffreestanding $< -o $@
+
x := $(shell mkdir -p $(sort $(dir $(TEST_GEN_PROGS))))
$(TEST_GEN_PROGS): $(LIBKVM_OBJS)
$(TEST_GEN_PROGS_EXTENDED): $(LIBKVM_OBJS)
return _open_kvm_dev_path_or_exit(O_RDONLY);
}
+static bool get_module_param_bool(const char *module_name, const char *param)
+{
+ const int path_size = 128;
+ char path[path_size];
+ char value;
+ ssize_t r;
+ int fd;
+
+ r = snprintf(path, path_size, "/sys/module/%s/parameters/%s",
+ module_name, param);
+ TEST_ASSERT(r < path_size,
+ "Failed to construct sysfs path in %d bytes.", path_size);
+
+ fd = open_path_or_exit(path, O_RDONLY);
+
+ r = read(fd, &value, 1);
+ TEST_ASSERT(r == 1, "read(%s) failed", path);
+
+ r = close(fd);
+ TEST_ASSERT(!r, "close(%s) failed", path);
+
+ if (value == 'Y')
+ return true;
+ else if (value == 'N')
+ return false;
+
+ TEST_FAIL("Unrecognized value '%c' for boolean module param", value);
+}
+
+bool get_kvm_intel_param_bool(const char *param)
+{
+ return get_module_param_bool("kvm_intel", param);
+}
+
+bool get_kvm_amd_param_bool(const char *param)
+{
+ return get_module_param_bool("kvm_amd", param);
+}
+
/*
* Capability
*
void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size)
{
- vm_enable_cap(vm, KVM_CAP_DIRTY_LOG_RING, ring_size);
+ if (vm_check_cap(vm, KVM_CAP_DIRTY_LOG_RING_ACQ_REL))
+ vm_enable_cap(vm, KVM_CAP_DIRTY_LOG_RING_ACQ_REL, ring_size);
+ else
+ vm_enable_cap(vm, KVM_CAP_DIRTY_LOG_RING, ring_size);
vm->dirty_ring_size = ring_size;
}
int ret = -EINTR;
int idx = srcu_read_lock(&vcpu->kvm->srcu);
- if (kvm_arch_vcpu_runnable(vcpu)) {
- kvm_make_request(KVM_REQ_UNHALT, vcpu);
+ if (kvm_arch_vcpu_runnable(vcpu))
goto out;
- }
if (kvm_cpu_has_pending_timer(vcpu))
goto out;
if (signal_pending(current))
case KVM_CAP_NR_MEMSLOTS:
return KVM_USER_MEM_SLOTS;
case KVM_CAP_DIRTY_LOG_RING:
- #ifdef CONFIG_HAVE_KVM_DIRTY_RING
+ #ifdef CONFIG_HAVE_KVM_DIRTY_RING_TSO
+ return KVM_DIRTY_RING_MAX_ENTRIES * sizeof(struct kvm_dirty_gfn);
+ #else
+ return 0;
+ #endif
+ case KVM_CAP_DIRTY_LOG_RING_ACQ_REL:
+ #ifdef CONFIG_HAVE_KVM_DIRTY_RING_ACQ_REL
return KVM_DIRTY_RING_MAX_ENTRIES * sizeof(struct kvm_dirty_gfn);
#else
return 0;
return 0;
}
case KVM_CAP_DIRTY_LOG_RING:
+ case KVM_CAP_DIRTY_LOG_RING_ACQ_REL:
return kvm_vm_ioctl_enable_dirty_log_ring(kvm, cap->args[0]);
default:
return kvm_vm_ioctl_enable_cap(kvm, cap);
r = kvm_async_pf_init();
if (r)
- goto out_free_5;
+ goto out_free_4;
kvm_chardev_ops.owner = module;
out_unreg:
kvm_async_pf_deinit();
-out_free_5:
+out_free_4:
for_each_possible_cpu(cpu)
free_cpumask_var(per_cpu(cpu_kick_mask, cpu));
-out_free_4:
kmem_cache_destroy(kvm_vcpu_cache);
out_free_3:
unregister_reboot_notifier(&kvm_reboot_notifier);