select DMA_DIRECT_REMAP
select EDAC_SUPPORT
select FRAME_POINTER
+ select FUNCTION_ALIGNMENT_4B
+ select FUNCTION_ALIGNMENT_8B if DYNAMIC_FTRACE_WITH_CALL_OPS
select GENERIC_ALLOCATOR
select GENERIC_ARCH_TOPOLOGY
select GENERIC_CLOCKEVENTS_BROADCAST
select HAVE_DEBUG_KMEMLEAK
select HAVE_DMA_CONTIGUOUS
select HAVE_DYNAMIC_FTRACE
- select HAVE_DYNAMIC_FTRACE_WITH_ARGS \
- if $(cc-option,-fpatchable-function-entry=2)
+ select HAVE_DYNAMIC_FTRACE_WITH_CALL_OPS \
+ if (DYNAMIC_FTRACE_WITH_ARGS && !CFI_CLANG)
select FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY \
if DYNAMIC_FTRACE_WITH_ARGS
select HAVE_EFFICIENT_UNALIGNED_ACCESS
If unsure, say Y.
+config ARM64_ERRATUM_2645198
+ bool "Cortex-A715: 2645198: Workaround possible [ESR|FAR]_ELx corruption"
+ default y
+ help
+ This option adds the workaround for ARM Cortex-A715 erratum 2645198.
+
+ If a Cortex-A715 cpu sees a page mapping permissions change from executable
+ to non-executable, it may corrupt the ESR_ELx and FAR_ELx registers on the
+ next instruction abort caused by permission fault.
+
+ Only user-space does executable to non-executable permission transition via
+ mprotect() system call. Workaround the problem by doing a break-before-make
+ TLB invalidation, for all changes to executable user space mappings.
+
+ If unsure, say Y.
+
config CAVIUM_ERRATUM_22375
bool "Cavium erratum 22375, 24313"
default y
help
Say Y if you want to run Linux in a Virtual Machine on Xen on ARM64.
+ # include/linux/mmzone.h requires the following to be true:
+ #
+ # MAX_ORDER - 1 + PAGE_SHIFT <= SECTION_SIZE_BITS
+ #
+ # so the maximum value of MAX_ORDER is SECTION_SIZE_BITS + 1 - PAGE_SHIFT:
+ #
+ # | SECTION_SIZE_BITS | PAGE_SHIFT | max MAX_ORDER | default MAX_ORDER |
+ # ----+-------------------+--------------+-----------------+--------------------+
+ # 4K | 27 | 12 | 16 | 11 |
+ # 16K | 27 | 14 | 14 | 12 |
+ # 64K | 29 | 16 | 14 | 14 |
config ARCH_FORCE_MAX_ORDER
- int
+ int "Maximum zone order" if ARM64_4K_PAGES || ARM64_16K_PAGES
default "14" if ARM64_64K_PAGES
+ range 12 14 if ARM64_16K_PAGES
default "12" if ARM64_16K_PAGES
+ range 11 16 if ARM64_4K_PAGES
default "11"
help
The kernel memory allocator divides physically contiguous memory
This config option is actually maximum order plus one. For example,
a value of 11 means that the largest free memory block is 2^10 pages.
- We make sure that we can allocate upto a HugePage size for each configuration.
+ We make sure that we can allocate up to a HugePage size for each configuration.
Hence we have :
MAX_ORDER = (PMD_SHIFT - PAGE_SHIFT) + 1 => PAGE_SHIFT - 2
bool "Use pointer authentication for kernel"
default y
depends on ARM64_PTR_AUTH
- depends on (CC_HAS_SIGN_RETURN_ADDRESS || CC_HAS_BRANCH_PROT_PAC_RET) && AS_HAS_PAC
+ depends on (CC_HAS_SIGN_RETURN_ADDRESS || CC_HAS_BRANCH_PROT_PAC_RET) && AS_HAS_ARMV8_3
# Modern compilers insert a .note.gnu.property section note for PAC
# which is only understood by binutils starting with version 2.33.1.
depends on LD_IS_LLD || LD_VERSION >= 23301 || (CC_IS_GCC && GCC_VERSION < 90100)
# GCC 7, 8
def_bool $(cc-option,-msign-return-address=all)
- config AS_HAS_PAC
+ config AS_HAS_ARMV8_3
def_bool $(cc-option,-Wa$(comma)-march=armv8.3-a)
config AS_HAS_CFI_NEGATE_RA_STATE
This enables support for the Bitmain SoC Family.
config ARCH_EXYNOS
- bool "ARMv8 based Samsung Exynos SoC family"
+ bool "Samsung Exynos SoC family"
select COMMON_CLK_SAMSUNG
select CLKSRC_EXYNOS_MCT
select EXYNOS_PM_DOMAINS if PM_GENERIC_DOMAINS
This enables support for ARMv8 based Samsung Exynos SoC family.
config ARCH_SPARX5
- bool "ARMv8 based Microchip Sparx5 SoC family"
+ bool "Microchip Sparx5 SoC family"
select PINCTRL
select DW_APB_TIMER_OF
help
select PINCTRL_ARMADA_CP110
select PINCTRL_AC5
help
- This enables support for Marvell EBU familly, including:
+ This enables support for Marvell EBU family, including:
- Armada 3700 SoC Family
- Armada 7K SoC Family
- Armada 8K SoC Family
if ARCH_NXP
config ARCH_LAYERSCAPE
- bool "ARMv8 based Freescale Layerscape SoC family"
+ bool "Freescale Layerscape SoC family"
select EDAC_SUPPORT
help
This enables support for the Freescale Layerscape SoC family.
config ARCH_MXC
- bool "ARMv8 based NXP i.MX SoC family"
+ bool "NXP i.MX SoC family"
select ARM64_ERRATUM_843419
select ARM64_ERRATUM_845719 if COMPAT
select IMX_GPCV2
This enables support for the NVIDIA Tegra SoC family.
config ARCH_TESLA_FSD
- bool "ARMv8 based Tesla platform"
+ bool "Tesla platform"
depends on ARCH_EXYNOS
help
Support for ARMv8 based Tesla platforms.
include/generated/asm-offsets.h))
endif
- ifeq ($(CONFIG_AS_HAS_ARMV8_2), y)
- # make sure to pass the newest target architecture to -march.
- asm-arch := armv8.2-a
- endif
-
- # Ensure that if the compiler supports branch protection we default it
- # off, this will be overridden if we are using branch protection.
- branch-prot-flags-y += $(call cc-option,-mbranch-protection=none)
-
- ifeq ($(CONFIG_ARM64_PTR_AUTH_KERNEL),y)
- branch-prot-flags-$(CONFIG_CC_HAS_SIGN_RETURN_ADDRESS) := -msign-return-address=all
- # We enable additional protection for leaf functions as there is some
- # narrow potential for ROP protection benefits and no substantial
- # performance impact has been observed.
- PACRET-y := pac-ret+leaf
-
- # Using a shadow call stack in leaf functions is too costly, so avoid PAC there
- # as well when we may be patching PAC into SCS
- PACRET-$(CONFIG_UNWIND_PATCH_PAC_INTO_SCS) := pac-ret
-
ifeq ($(CONFIG_ARM64_BTI_KERNEL),y)
- branch-prot-flags-$(CONFIG_CC_HAS_BRANCH_PROT_PAC_RET_BTI) := -mbranch-protection=$(PACRET-y)+bti
+ KBUILD_CFLAGS += -mbranch-protection=pac-ret+bti
+ else ifeq ($(CONFIG_ARM64_PTR_AUTH_KERNEL),y)
+ ifeq ($(CONFIG_CC_HAS_BRANCH_PROT_PAC_RET),y)
+ KBUILD_CFLAGS += -mbranch-protection=pac-ret
+ else
+ KBUILD_CFLAGS += -msign-return-address=non-leaf
+ endif
else
- branch-prot-flags-$(CONFIG_CC_HAS_BRANCH_PROT_PAC_RET) := -mbranch-protection=$(PACRET-y)
- endif
- # -march=armv8.3-a enables the non-nops instructions for PAC, to avoid the
- # compiler to generate them and consequently to break the single image contract
- # we pass it only to the assembler. This option is utilized only in case of non
- # integrated assemblers.
- ifeq ($(CONFIG_AS_HAS_PAC), y)
- asm-arch := armv8.3-a
- endif
- endif
-
- KBUILD_CFLAGS += $(branch-prot-flags-y)
-
- ifeq ($(CONFIG_AS_HAS_ARMV8_4), y)
- # make sure to pass the newest target architecture to -march.
- asm-arch := armv8.4-a
+ KBUILD_CFLAGS += $(call cc-option,-mbranch-protection=none)
endif
+ # Tell the assembler to support instructions from the latest target
+ # architecture.
+ #
+ # For non-integrated assemblers we'll pass this on the command line, and for
+ # integrated assemblers we'll define ARM64_ASM_ARCH and ARM64_ASM_PREAMBLE for
+ # inline usage.
+ #
+ # We cannot pass the same arch flag to the compiler as this would allow it to
+ # freely generate instructions which are not supported by earlier architecture
+ # versions, which would prevent a single kernel image from working on earlier
+ # hardware.
ifeq ($(CONFIG_AS_HAS_ARMV8_5), y)
- # make sure to pass the newest target architecture to -march.
- asm-arch := armv8.5-a
+ asm-arch := armv8.5-a
+ else ifeq ($(CONFIG_AS_HAS_ARMV8_4), y)
+ asm-arch := armv8.4-a
+ else ifeq ($(CONFIG_AS_HAS_ARMV8_3), y)
+ asm-arch := armv8.3-a
+ else ifeq ($(CONFIG_AS_HAS_ARMV8_2), y)
+ asm-arch := armv8.2-a
endif
ifdef asm-arch
CHECKFLAGS += -D__aarch64__
- ifeq ($(CONFIG_DYNAMIC_FTRACE_WITH_ARGS),y)
+ ifeq ($(CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS),y)
+ KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
+ CC_FLAGS_FTRACE := -fpatchable-function-entry=4,2
+ else ifeq ($(CONFIG_DYNAMIC_FTRACE_WITH_ARGS),y)
KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
CC_FLAGS_FTRACE := -fpatchable-function-entry=2
endif
endif
endif
+include $(srctree)/scripts/Makefile.defconf
+
+PHONY += virtconfig
+virtconfig:
+ $(call merge_into_defconfig_override,defconfig,virt)
+
define archhelp
echo '* Image.gz - Compressed kernel image (arch/$(ARCH)/boot/Image.gz)'
echo ' Image - Uncompressed kernel image (arch/$(ARCH)/boot/Image)'
})
extern spinlock_t efi_rt_lock;
+extern u64 *efi_rt_stack_top;
efi_status_t __efi_rt_asm_wrapper(void *, const char *, ...);
+/*
+ * efi_rt_stack_top[-1] contains the value the stack pointer had before
+ * switching to the EFI runtime stack.
+ */
+#define current_in_efi() \
+ (!preemptible() && efi_rt_stack_top != NULL && \
+ on_task_stack(current, READ_ONCE(efi_rt_stack_top[-1]), 1))
+
#define ARCH_EFI_IRQ_FLAGS_MASK (PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT)
/*
#define EFI_ALLOC_ALIGN SZ_64K
#define EFI_ALLOC_LIMIT ((1UL << 48) - 1)
+ extern unsigned long primary_entry_offset(void);
+
/*
* On ARM systems, virtually remapped UEFI runtime services are set up in two
* distinct stages:
#define ESR_ELx_FSC_ACCESS (0x08)
#define ESR_ELx_FSC_FAULT (0x04)
#define ESR_ELx_FSC_PERM (0x0C)
+#define ESR_ELx_FSC_SEA_TTW0 (0x14)
+#define ESR_ELx_FSC_SEA_TTW1 (0x15)
+#define ESR_ELx_FSC_SEA_TTW2 (0x16)
+#define ESR_ELx_FSC_SEA_TTW3 (0x17)
+#define ESR_ELx_FSC_SECC (0x18)
+#define ESR_ELx_FSC_SECC_TTW0 (0x1c)
+#define ESR_ELx_FSC_SECC_TTW1 (0x1d)
+#define ESR_ELx_FSC_SECC_TTW2 (0x1e)
+#define ESR_ELx_FSC_SECC_TTW3 (0x1f)
/* ISS field definitions for Data Aborts */
#define ESR_ELx_ISV_SHIFT (24)
#define ESR_ELx_SME_ISS_ILL 1
#define ESR_ELx_SME_ISS_SM_DISABLED 2
#define ESR_ELx_SME_ISS_ZA_DISABLED 3
+ #define ESR_ELx_SME_ISS_ZT_DISABLED 4
#ifndef __ASSEMBLY__
#include <asm/types.h>
}
extern void __sync_icache_dcache(pte_t pteval);
+ bool pgattr_change_is_safe(u64 old, u64 new);
/*
* PTE bits configuration in the presence of hardware Dirty Bit Management
* PTE_DIRTY || (PTE_WRITE && !PTE_RDONLY)
*/
- static inline void __check_racy_pte_update(struct mm_struct *mm, pte_t *ptep,
+ static inline void __check_safe_pte_update(struct mm_struct *mm, pte_t *ptep,
pte_t pte)
{
pte_t old_pte;
VM_WARN_ONCE(pte_write(old_pte) && !pte_dirty(pte),
"%s: racy dirty state clearing: 0x%016llx -> 0x%016llx",
__func__, pte_val(old_pte), pte_val(pte));
+ VM_WARN_ONCE(!pgattr_change_is_safe(pte_val(old_pte), pte_val(pte)),
+ "%s: unsafe attribute change: 0x%016llx -> 0x%016llx",
+ __func__, pte_val(old_pte), pte_val(pte));
}
static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
mte_sync_tags(old_pte, pte);
}
- __check_racy_pte_update(mm, ptep, pte);
+ __check_safe_pte_update(mm, ptep, pte);
set_pte(ptep, pte);
}
#define pud_leaf(pud) (pud_present(pud) && !pud_table(pud))
#define pud_valid(pud) pte_valid(pud_pte(pud))
#define pud_user(pud) pte_user(pud_pte(pud))
-
+#define pud_user_exec(pud) pte_user_exec(pud_pte(pud))
static inline void set_pud(pud_t *pudp, pud_t pud)
{
#else
#define pud_page_paddr(pud) ({ BUILD_BUG(); 0; })
+#define pud_user_exec(pud) pud_user(pud) /* Always 0 with folding */
/* Match pmd_offset folding in <asm/generic/pgtable-nopmd.h> */
#define pmd_set_fixmap(addr) NULL
static inline bool pmd_user_accessible_page(pmd_t pmd)
{
- return pmd_leaf(pmd) && (pmd_user(pmd) || pmd_user_exec(pmd));
+ return pmd_leaf(pmd) && !pmd_present_invalid(pmd) && (pmd_user(pmd) || pmd_user_exec(pmd));
}
static inline bool pud_user_accessible_page(pud_t pud)
{
- return pud_leaf(pud) && pud_user(pud);
+ return pud_leaf(pud) && (pud_user(pud) || pud_user_exec(pud));
}
#endif
}
+#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
+#define ptep_modify_prot_start ptep_modify_prot_start
+extern pte_t ptep_modify_prot_start(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep);
+
+#define ptep_modify_prot_commit ptep_modify_prot_commit
+extern void ptep_modify_prot_commit(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ pte_t old_pte, pte_t new_pte);
#endif /* !__ASSEMBLY__ */
#endif /* __ASM_PGTABLE_H */
/*
* TIF_SME controls whether a task can use SME without trapping while
* in userspace, when TIF_SME is set then we must have storage
- * alocated in sve_state and za_state to store the contents of both ZA
+ * alocated in sve_state and sme_state to store the contents of both ZA
* and the SVE registers for both streaming and non-streaming modes.
*
* If both SVCR.ZA and SVCR.SM are disabled then at any point we
WARN_ON(!system_supports_fpsimd());
WARN_ON(!have_cpu_fpsimd_context());
- if (system_supports_sve()) {
+ if (system_supports_sve() || system_supports_sme()) {
switch (current->thread.fp_type) {
case FP_STATE_FPSIMD:
/* Stop tracking SVE for this task until next use. */
write_sysreg_s(current->thread.svcr, SYS_SVCR);
if (thread_za_enabled(¤t->thread))
- za_load_state(current->thread.za_state);
+ sme_load_state(current->thread.sme_state,
+ system_supports_sme2());
if (thread_sm_enabled(¤t->thread))
restore_ffr = system_supports_fa64();
*svcr = read_sysreg_s(SYS_SVCR);
if (*svcr & SVCR_ZA_MASK)
- za_save_state(last->za_state);
+ sme_save_state(last->sme_state,
+ system_supports_sme2());
/* If we are in streaming mode override regular SVE. */
if (*svcr & SVCR_SM_MASK) {
#ifdef CONFIG_ARM64_SME
/*
- * Ensure that task->thread.za_state is allocated and sufficiently large.
+ * Ensure that task->thread.sme_state is allocated and sufficiently large.
*
* This function should be used only in preparation for replacing
- * task->thread.za_state with new data. The memory is always zeroed
+ * task->thread.sme_state with new data. The memory is always zeroed
* here to prevent stale data from showing through: this is done in
* the interest of testability and predictability, the architecture
* guarantees that when ZA is enabled it will be zeroed.
*/
void sme_alloc(struct task_struct *task)
{
- if (task->thread.za_state) {
- memset(task->thread.za_state, 0, za_state_size(task));
+ if (task->thread.sme_state) {
+ memset(task->thread.sme_state, 0, sme_state_size(task));
return;
}
/* This could potentially be up to 64K. */
- task->thread.za_state =
- kzalloc(za_state_size(task), GFP_KERNEL);
+ task->thread.sme_state =
+ kzalloc(sme_state_size(task), GFP_KERNEL);
}
static void sme_free(struct task_struct *task)
{
- kfree(task->thread.za_state);
- task->thread.za_state = NULL;
+ kfree(task->thread.sme_state);
+ task->thread.sme_state = NULL;
}
void sme_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
isb();
}
+ /*
+ * This must be called after sme_kernel_enable(), we rely on the
+ * feature table being sorted to ensure this.
+ */
+ void sme2_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
+ {
+ /* Allow use of ZT0 */
+ write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_EZT0_MASK,
+ SYS_SMCR_EL1);
+ }
+
/*
* This must be called after sme_kernel_enable(), we rely on the
* feature table being sorted to ensure this.
unsigned int vq_max;
sme_kernel_enable(NULL);
- sme_smstart_sm();
/*
* Set the maximum possible VL.
smcr = read_sysreg_s(SYS_SMCR_EL1);
smcr &= ~(u64)SMCR_ELx_LEN_MASK; /* Only the LEN field */
- vq_max = sve_vq_from_vl(sve_get_vl());
+ vq_max = sve_vq_from_vl(sme_get_vl());
smcr |= vq_max - 1; /* set LEN field to maximum effective value */
- sme_smstop_sm();
-
return smcr;
}
sve_alloc(current, false);
sme_alloc(current);
- if (!current->thread.sve_state || !current->thread.za_state) {
+ if (!current->thread.sve_state || !current->thread.sme_state) {
force_sig(SIGKILL);
return;
}
void fpsimd_flush_thread(void)
{
void *sve_state = NULL;
- void *za_state = NULL;
+ void *sme_state = NULL;
if (!system_supports_fpsimd())
return;
clear_thread_flag(TIF_SME);
/* Defer kfree() while in atomic context */
- za_state = current->thread.za_state;
- current->thread.za_state = NULL;
+ sme_state = current->thread.sme_state;
+ current->thread.sme_state = NULL;
fpsimd_flush_thread_vl(ARM64_VEC_SME);
current->thread.svcr = 0;
put_cpu_fpsimd_context();
kfree(sve_state);
- kfree(za_state);
+ kfree(sme_state);
}
/*
WARN_ON(!system_supports_fpsimd());
last->st = ¤t->thread.uw.fpsimd_state;
last->sve_state = current->thread.sve_state;
- last->za_state = current->thread.za_state;
+ last->sme_state = current->thread.sme_state;
last->sve_vl = task_get_sve_vl(current);
last->sme_vl = task_get_sme_vl(current);
last->svcr = ¤t->thread.svcr;
unsigned long tls[2];
tls[0] = target->thread.uw.tp_value;
- if (system_supports_sme())
+ if (system_supports_tpidr2())
tls[1] = target->thread.tpidr2_el0;
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, tls, 0, count);
return ret;
target->thread.uw.tp_value = tls[0];
- if (system_supports_sme())
+ if (system_supports_tpidr2())
target->thread.tpidr2_el0 = tls[1];
return ret;
if (thread_za_enabled(&target->thread)) {
start = end;
end = ZA_PT_SIZE(vq);
- membuf_write(&to, target->thread.za_state, end - start);
+ membuf_write(&to, target->thread.sme_state, end - start);
}
/* Zero any trailing padding */
/* Allocate/reinit ZA storage */
sme_alloc(target);
- if (!target->thread.za_state) {
+ if (!target->thread.sme_state) {
ret = -ENOMEM;
goto out;
}
start = ZA_PT_ZA_OFFSET;
end = ZA_PT_SIZE(vq);
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- target->thread.za_state,
+ target->thread.sme_state,
start, end);
if (ret)
goto out;
return ret;
}
+ static int zt_get(struct task_struct *target,
+ const struct user_regset *regset,
+ struct membuf to)
+ {
+ if (!system_supports_sme2())
+ return -EINVAL;
+
+ /*
+ * If PSTATE.ZA is not set then ZT will be zeroed when it is
+ * enabled so report the current register value as zero.
+ */
+ if (thread_za_enabled(&target->thread))
+ membuf_write(&to, thread_zt_state(&target->thread),
+ ZT_SIG_REG_BYTES);
+ else
+ membuf_zero(&to, ZT_SIG_REG_BYTES);
+
+ return 0;
+ }
+
+ static int zt_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+ {
+ int ret;
+
+ if (!system_supports_sme2())
+ return -EINVAL;
+
+ if (!thread_za_enabled(&target->thread)) {
+ sme_alloc(target);
+ if (!target->thread.sme_state)
+ return -ENOMEM;
+ }
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ thread_zt_state(&target->thread),
+ 0, ZT_SIG_REG_BYTES);
+ if (ret == 0)
+ target->thread.svcr |= SVCR_ZA_MASK;
+
+ return ret;
+ }
+
#endif /* CONFIG_ARM64_SME */
#ifdef CONFIG_ARM64_PTR_AUTH
#ifdef CONFIG_ARM64_SVE
REGSET_SVE,
#endif
-#ifdef CONFIG_ARM64_SVE
+#ifdef CONFIG_ARM64_SME
REGSET_SSVE,
REGSET_ZA,
+ REGSET_ZT,
#endif
#ifdef CONFIG_ARM64_PTR_AUTH
REGSET_PAC_MASK,
.regset_get = za_get,
.set = za_set,
},
+ [REGSET_ZT] = { /* SME ZT */
+ .core_note_type = NT_ARM_ZT,
+ .n = 1,
+ .size = ZT_SIG_REG_BYTES,
+ .align = sizeof(u64),
+ .regset_get = zt_get,
+ .set = zt_set,
+ },
#endif
#ifdef CONFIG_ARM64_PTR_AUTH
[REGSET_PAC_MASK] = {
unsigned long fpsimd_offset;
unsigned long esr_offset;
unsigned long sve_offset;
+ unsigned long tpidr2_offset;
unsigned long za_offset;
+ unsigned long zt_offset;
unsigned long extra_offset;
unsigned long end_offset;
};
return base + offset;
}
+ struct user_ctxs {
+ struct fpsimd_context __user *fpsimd;
+ u32 fpsimd_size;
+ struct sve_context __user *sve;
+ u32 sve_size;
+ struct tpidr2_context __user *tpidr2;
+ u32 tpidr2_size;
+ struct za_context __user *za;
+ u32 za_size;
+ struct zt_context __user *zt;
+ u32 zt_size;
+ };
+
static int preserve_fpsimd_context(struct fpsimd_context __user *ctx)
{
struct user_fpsimd_state const *fpsimd =
return err ? -EFAULT : 0;
}
- static int restore_fpsimd_context(struct fpsimd_context __user *ctx)
+ static int restore_fpsimd_context(struct user_ctxs *user)
{
struct user_fpsimd_state fpsimd;
- __u32 magic, size;
int err = 0;
- /* check the magic/size information */
- __get_user_error(magic, &ctx->head.magic, err);
- __get_user_error(size, &ctx->head.size, err);
- if (err)
- return -EFAULT;
- if (magic != FPSIMD_MAGIC || size != sizeof(struct fpsimd_context))
+ /* check the size information */
+ if (user->fpsimd_size != sizeof(struct fpsimd_context))
return -EINVAL;
/* copy the FP and status/control registers */
- err = __copy_from_user(fpsimd.vregs, ctx->vregs,
+ err = __copy_from_user(fpsimd.vregs, &(user->fpsimd->vregs),
sizeof(fpsimd.vregs));
- __get_user_error(fpsimd.fpsr, &ctx->fpsr, err);
- __get_user_error(fpsimd.fpcr, &ctx->fpcr, err);
+ __get_user_error(fpsimd.fpsr, &(user->fpsimd->fpsr), err);
+ __get_user_error(fpsimd.fpcr, &(user->fpsimd->fpcr), err);
clear_thread_flag(TIF_SVE);
current->thread.fp_type = FP_STATE_FPSIMD;
}
- struct user_ctxs {
- struct fpsimd_context __user *fpsimd;
- struct sve_context __user *sve;
- struct za_context __user *za;
- };
-
#ifdef CONFIG_ARM64_SVE
static int preserve_sve_context(struct sve_context __user *ctx)
static int restore_sve_fpsimd_context(struct user_ctxs *user)
{
- int err;
+ int err = 0;
unsigned int vl, vq;
struct user_fpsimd_state fpsimd;
- struct sve_context sve;
+ u16 user_vl, flags;
- if (__copy_from_user(&sve, user->sve, sizeof(sve)))
- return -EFAULT;
+ if (user->sve_size < sizeof(*user->sve))
+ return -EINVAL;
+
+ __get_user_error(user_vl, &(user->sve->vl), err);
+ __get_user_error(flags, &(user->sve->flags), err);
+ if (err)
+ return err;
- if (sve.flags & SVE_SIG_FLAG_SM) {
+ if (flags & SVE_SIG_FLAG_SM) {
if (!system_supports_sme())
return -EINVAL;
vl = task_get_sme_vl(current);
} else {
- if (!system_supports_sve())
+ /*
+ * A SME only system use SVE for streaming mode so can
+ * have a SVE formatted context with a zero VL and no
+ * payload data.
+ */
+ if (!system_supports_sve() && !system_supports_sme())
return -EINVAL;
vl = task_get_sve_vl(current);
}
- if (sve.vl != vl)
+ if (user_vl != vl)
return -EINVAL;
- if (sve.head.size <= sizeof(*user->sve)) {
+ if (user->sve_size == sizeof(*user->sve)) {
clear_thread_flag(TIF_SVE);
current->thread.svcr &= ~SVCR_SM_MASK;
current->thread.fp_type = FP_STATE_FPSIMD;
goto fpsimd_only;
}
- vq = sve_vq_from_vl(sve.vl);
+ vq = sve_vq_from_vl(vl);
- if (sve.head.size < SVE_SIG_CONTEXT_SIZE(vq))
+ if (user->sve_size < SVE_SIG_CONTEXT_SIZE(vq))
return -EINVAL;
/*
if (err)
return -EFAULT;
- if (sve.flags & SVE_SIG_FLAG_SM)
+ if (flags & SVE_SIG_FLAG_SM)
current->thread.svcr |= SVCR_SM_MASK;
else
set_thread_flag(TIF_SVE);
#ifdef CONFIG_ARM64_SME
+ static int preserve_tpidr2_context(struct tpidr2_context __user *ctx)
+ {
+ int err = 0;
+
+ current->thread.tpidr2_el0 = read_sysreg_s(SYS_TPIDR2_EL0);
+
+ __put_user_error(TPIDR2_MAGIC, &ctx->head.magic, err);
+ __put_user_error(sizeof(*ctx), &ctx->head.size, err);
+ __put_user_error(current->thread.tpidr2_el0, &ctx->tpidr2, err);
+
+ return err;
+ }
+
+ static int restore_tpidr2_context(struct user_ctxs *user)
+ {
+ u64 tpidr2_el0;
+ int err = 0;
+
+ if (user->tpidr2_size != sizeof(*user->tpidr2))
+ return -EINVAL;
+
+ __get_user_error(tpidr2_el0, &user->tpidr2->tpidr2, err);
+ if (!err)
+ current->thread.tpidr2_el0 = tpidr2_el0;
+
+ return err;
+ }
+
static int preserve_za_context(struct za_context __user *ctx)
{
int err = 0;
* fpsimd_signal_preserve_current_state().
*/
err |= __copy_to_user((char __user *)ctx + ZA_SIG_REGS_OFFSET,
- current->thread.za_state,
+ current->thread.sme_state,
ZA_SIG_REGS_SIZE(vq));
}
static int restore_za_context(struct user_ctxs *user)
{
- int err;
+ int err = 0;
unsigned int vq;
- struct za_context za;
+ u16 user_vl;
- if (__copy_from_user(&za, user->za, sizeof(za)))
- return -EFAULT;
+ if (user->za_size < sizeof(*user->za))
+ return -EINVAL;
- if (za.vl != task_get_sme_vl(current))
+ __get_user_error(user_vl, &(user->za->vl), err);
+ if (err)
+ return err;
+
+ if (user_vl != task_get_sme_vl(current))
return -EINVAL;
- if (za.head.size <= sizeof(*user->za)) {
+ if (user->za_size == sizeof(*user->za)) {
current->thread.svcr &= ~SVCR_ZA_MASK;
return 0;
}
- vq = sve_vq_from_vl(za.vl);
+ vq = sve_vq_from_vl(user_vl);
- if (za.head.size < ZA_SIG_CONTEXT_SIZE(vq))
+ if (user->za_size < ZA_SIG_CONTEXT_SIZE(vq))
return -EINVAL;
/*
* Careful: we are about __copy_from_user() directly into
- * thread.za_state with preemption enabled, so protection is
+ * thread.sme_state with preemption enabled, so protection is
* needed to prevent a racing context switch from writing stale
* registers back over the new data.
*/
/* From now, fpsimd_thread_switch() won't touch thread.sve_state */
sme_alloc(current);
- if (!current->thread.za_state) {
+ if (!current->thread.sme_state) {
current->thread.svcr &= ~SVCR_ZA_MASK;
clear_thread_flag(TIF_SME);
return -ENOMEM;
}
- err = __copy_from_user(current->thread.za_state,
+ err = __copy_from_user(current->thread.sme_state,
(char __user const *)user->za +
ZA_SIG_REGS_OFFSET,
ZA_SIG_REGS_SIZE(vq));
return 0;
}
+
+ static int preserve_zt_context(struct zt_context __user *ctx)
+ {
+ int err = 0;
+ u16 reserved[ARRAY_SIZE(ctx->__reserved)];
+
+ if (WARN_ON(!thread_za_enabled(¤t->thread)))
+ return -EINVAL;
+
+ memset(reserved, 0, sizeof(reserved));
+
+ __put_user_error(ZT_MAGIC, &ctx->head.magic, err);
+ __put_user_error(round_up(ZT_SIG_CONTEXT_SIZE(1), 16),
+ &ctx->head.size, err);
+ __put_user_error(1, &ctx->nregs, err);
+ BUILD_BUG_ON(sizeof(ctx->__reserved) != sizeof(reserved));
+ err |= __copy_to_user(&ctx->__reserved, reserved, sizeof(reserved));
+
+ /*
+ * This assumes that the ZT state has already been saved to
+ * the task struct by calling the function
+ * fpsimd_signal_preserve_current_state().
+ */
+ err |= __copy_to_user((char __user *)ctx + ZT_SIG_REGS_OFFSET,
+ thread_zt_state(¤t->thread),
+ ZT_SIG_REGS_SIZE(1));
+
+ return err ? -EFAULT : 0;
+ }
+
+ static int restore_zt_context(struct user_ctxs *user)
+ {
+ int err;
+ u16 nregs;
+
+ /* ZA must be restored first for this check to be valid */
+ if (!thread_za_enabled(¤t->thread))
+ return -EINVAL;
+
+ if (user->zt_size != ZT_SIG_CONTEXT_SIZE(1))
+ return -EINVAL;
+
+ if (__copy_from_user(&nregs, &(user->zt->nregs), sizeof(nregs)))
+ return -EFAULT;
+
+ if (nregs != 1)
+ return -EINVAL;
+
+ /*
+ * Careful: we are about __copy_from_user() directly into
+ * thread.zt_state with preemption enabled, so protection is
+ * needed to prevent a racing context switch from writing stale
+ * registers back over the new data.
+ */
+
+ fpsimd_flush_task_state(current);
+ /* From now, fpsimd_thread_switch() won't touch ZT in thread state */
+
+ err = __copy_from_user(thread_zt_state(¤t->thread),
+ (char __user const *)user->zt +
+ ZT_SIG_REGS_OFFSET,
+ ZT_SIG_REGS_SIZE(1));
+ if (err)
+ return -EFAULT;
+
+ return 0;
+ }
+
#else /* ! CONFIG_ARM64_SME */
/* Turn any non-optimised out attempts to use these into a link error: */
+ extern int preserve_tpidr2_context(void __user *ctx);
+ extern int restore_tpidr2_context(struct user_ctxs *user);
extern int preserve_za_context(void __user *ctx);
extern int restore_za_context(struct user_ctxs *user);
+ extern int preserve_zt_context(void __user *ctx);
+ extern int restore_zt_context(struct user_ctxs *user);
#endif /* ! CONFIG_ARM64_SME */
user->fpsimd = NULL;
user->sve = NULL;
+ user->tpidr2 = NULL;
user->za = NULL;
+ user->zt = NULL;
if (!IS_ALIGNED((unsigned long)base, 16))
goto invalid;
if (user->fpsimd)
goto invalid;
- if (size < sizeof(*user->fpsimd))
- goto invalid;
-
user->fpsimd = (struct fpsimd_context __user *)head;
+ user->fpsimd_size = size;
break;
case ESR_MAGIC:
if (user->sve)
goto invalid;
- if (size < sizeof(*user->sve))
+ user->sve = (struct sve_context __user *)head;
+ user->sve_size = size;
+ break;
+
+ case TPIDR2_MAGIC:
+ if (!system_supports_sme())
goto invalid;
- user->sve = (struct sve_context __user *)head;
+ if (user->tpidr2)
+ goto invalid;
+
+ user->tpidr2 = (struct tpidr2_context __user *)head;
+ user->tpidr2_size = size;
break;
case ZA_MAGIC:
if (user->za)
goto invalid;
- if (size < sizeof(*user->za))
+ user->za = (struct za_context __user *)head;
+ user->za_size = size;
+ break;
+
+ case ZT_MAGIC:
+ if (!system_supports_sme2())
goto invalid;
- user->za = (struct za_context __user *)head;
+ if (user->zt)
+ goto invalid;
+
+ user->zt = (struct zt_context __user *)head;
+ user->zt_size = size;
break;
case EXTRA_MAGIC:
if (user.sve)
err = restore_sve_fpsimd_context(&user);
else
- err = restore_fpsimd_context(user.fpsimd);
+ err = restore_fpsimd_context(&user);
}
+ if (err == 0 && system_supports_sme() && user.tpidr2)
+ err = restore_tpidr2_context(&user);
+
if (err == 0 && system_supports_sme() && user.za)
err = restore_za_context(&user);
+ if (err == 0 && system_supports_sme2() && user.zt)
+ err = restore_zt_context(&user);
+
return err;
}
return err;
}
- if (system_supports_sve()) {
+ if (system_supports_sve() || system_supports_sme()) {
unsigned int vq = 0;
if (add_all || test_thread_flag(TIF_SVE) ||
else
vl = task_get_sme_vl(current);
+ err = sigframe_alloc(user, &user->tpidr2_offset,
+ sizeof(struct tpidr2_context));
+ if (err)
+ return err;
+
if (thread_za_enabled(¤t->thread))
vq = sve_vq_from_vl(vl);
return err;
}
+ if (system_supports_sme2()) {
+ if (add_all || thread_za_enabled(¤t->thread)) {
+ err = sigframe_alloc(user, &user->zt_offset,
+ ZT_SIG_CONTEXT_SIZE(1));
+ if (err)
+ return err;
+ }
+ }
+
return sigframe_alloc_end(user);
}
err |= preserve_sve_context(sve_ctx);
}
+ /* TPIDR2 if supported */
+ if (system_supports_sme() && err == 0) {
+ struct tpidr2_context __user *tpidr2_ctx =
+ apply_user_offset(user, user->tpidr2_offset);
+ err |= preserve_tpidr2_context(tpidr2_ctx);
+ }
+
/* ZA state if present */
if (system_supports_sme() && err == 0 && user->za_offset) {
struct za_context __user *za_ctx =
err |= preserve_za_context(za_ctx);
}
+ /* ZT state if present */
+ if (system_supports_sme2() && err == 0 && user->zt_offset) {
+ struct zt_context __user *zt_ctx =
+ apply_user_offset(user, user->zt_offset);
+ err |= preserve_zt_context(zt_ctx);
+ }
+
if (err == 0 && user->extra_offset) {
char __user *sfp = (char __user *)user->sigframe;
char __user *userp =
#include <linux/syscalls.h>
#include <linux/mm_types.h>
#include <linux/kasan.h>
+#include <linux/ubsan.h>
#include <linux/cfi.h>
#include <asm/atomic.h>
if (!bad)
p += sprintf(p, i == 0 ? "(%08x) " : "%08x ", val);
- else {
- p += sprintf(p, "bad PC value");
- break;
- }
+ else
+ p += sprintf(p, i == 0 ? "(????????) " : "???????? ");
}
printk("%sCode: %s\n", lvl, str);
};
#endif
+#ifdef CONFIG_UBSAN_TRAP
+static int ubsan_handler(struct pt_regs *regs, unsigned long esr)
+{
+ die(report_ubsan_failure(regs, esr & UBSAN_BRK_MASK), regs, esr);
+ return DBG_HOOK_HANDLED;
+}
+
+static struct break_hook ubsan_break_hook = {
+ .fn = ubsan_handler,
+ .imm = UBSAN_BRK_IMM,
+ .mask = UBSAN_BRK_MASK,
+};
+#endif
#define esr_comment(esr) ((esr) & ESR_ELx_BRK64_ISS_COMMENT_MASK)
#ifdef CONFIG_KASAN_SW_TAGS
if ((esr_comment(esr) & ~KASAN_BRK_MASK) == KASAN_BRK_IMM)
return kasan_handler(regs, esr) != DBG_HOOK_HANDLED;
+#endif
+#ifdef CONFIG_UBSAN_TRAP
+ if ((esr_comment(esr) & ~UBSAN_BRK_MASK) == UBSAN_BRK_IMM)
+ return ubsan_handler(regs, esr) != DBG_HOOK_HANDLED;
#endif
return bug_handler(regs, esr) != DBG_HOOK_HANDLED;
}
register_kernel_break_hook(&fault_break_hook);
#ifdef CONFIG_KASAN_SW_TAGS
register_kernel_break_hook(&kasan_break_hook);
+#endif
+#ifdef CONFIG_UBSAN_TRAP
+ register_kernel_break_hook(&ubsan_break_hook);
#endif
debug_traps_init();
}
#ifdef CONFIG_HIBERNATION
#define HIBERNATE_TEXT \
+ ALIGN_FUNCTION(); \
__hibernate_exit_text_start = .; \
*(.hibernate_exit.text) \
__hibernate_exit_text_end = .;
#ifdef CONFIG_KEXEC_CORE
#define KEXEC_TEXT \
+ ALIGN_FUNCTION(); \
__relocate_new_kernel_start = .; \
*(.kexec_relocate.text) \
__relocate_new_kernel_end = .;
ENTRY_TEXT
TEXT_TEXT
SCHED_TEXT
- CPUIDLE_TEXT
LOCK_TEXT
KPROBES_TEXT
HYPERVISOR_TEXT
- IDMAP_TEXT
*(.gnu.warning)
. = ALIGN(16);
*(.got) /* Global offset table */
TRAMP_TEXT
HIBERNATE_TEXT
KEXEC_TEXT
+ IDMAP_TEXT
. = ALIGN(PAGE_SIZE);
}
#ifdef CONFIG_HIBERNATION
ASSERT(__hibernate_exit_text_end - __hibernate_exit_text_start <= SZ_4K,
"Hibernate exit text is bigger than 4 KiB")
+ ASSERT(__hibernate_exit_text_start == swsusp_arch_suspend_exit,
+ "Hibernate exit text does not start with swsusp_arch_suspend_exit")
#endif
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
ASSERT((__entry_tramp_text_end - __entry_tramp_text_start) <= 3*PAGE_SIZE,
ASSERT(__relocate_new_kernel_end - __relocate_new_kernel_start <= SZ_4K,
"kexec relocation code is bigger than 4 KiB")
ASSERT(KEXEC_CONTROL_PAGE_SIZE >= SZ_4K, "KEXEC_CONTROL_PAGE_SIZE is broken")
+ ASSERT(__relocate_new_kernel_start == arm64_relocate_new_kernel,
+ "kexec control page does not start with arm64_relocate_new_kernel")
#endif
return phys;
}
- static bool pgattr_change_is_safe(u64 old, u64 new)
+ bool pgattr_change_is_safe(u64 old, u64 new)
{
/*
* The following mapping attributes may be updated in live
pteval_t mask = PTE_PXN | PTE_RDONLY | PTE_WRITE | PTE_NG;
/* creating or taking down mappings is always safe */
- if (old == 0 || new == 0)
+ if (!pte_valid(__pte(old)) || !pte_valid(__pte(new)))
return true;
+ /* A live entry's pfn should not change */
+ if (pte_pfn(__pte(old)) != pte_pfn(__pte(new)))
+ return false;
+
/* live contiguous mappings may not be manipulated at all */
if ((old | new) & PTE_CONT)
return false;
}
early_initcall(prevent_bootmem_remove_init);
#endif
+
+pte_t ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
+{
+ if (IS_ENABLED(CONFIG_ARM64_ERRATUM_2645198) &&
+ cpus_have_const_cap(ARM64_WORKAROUND_2645198)) {
+ /*
+ * Break-before-make (BBM) is required for all user space mappings
+ * when the permission changes from executable to non-executable
+ * in cases where cpu is affected with errata #2645198.
+ */
+ if (pte_user_exec(READ_ONCE(*ptep)))
+ return ptep_clear_flush(vma, addr, ptep);
+ }
+ return ptep_get_and_clear(vma->vm_mm, addr, ptep);
+}
+
+void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep,
+ pte_t old_pte, pte_t pte)
+{
+ set_pte_at(vma->vm_mm, addr, ptep, pte);
+}
HAS_GENERIC_AUTH_ARCH_QARMA3
HAS_GENERIC_AUTH_ARCH_QARMA5
HAS_GENERIC_AUTH_IMP_DEF
- HAS_IRQ_PRIO_MASKING
+ HAS_GIC_CPUIF_SYSREGS
+ HAS_GIC_PRIO_MASKING
+ HAS_GIC_PRIO_RELAXED_SYNC
HAS_LDAPR
HAS_LSE_ATOMICS
HAS_NO_FPSIMD
HAS_RNG
HAS_SB
HAS_STAGE2_FWB
- HAS_SYSREG_GIC_CPUIF
HAS_TIDCP1
HAS_TLB_RANGE
HAS_VIRT_HOST_EXTN
MTE_ASYMM
SME
SME_FA64
+ SME2
SPECTRE_V2
SPECTRE_V3A
SPECTRE_V4
WORKAROUND_2064142
WORKAROUND_2077057
WORKAROUND_2457168
+WORKAROUND_2645198
WORKAROUND_2658417
WORKAROUND_TRBE_OVERWRITE_FILL_MODE
WORKAROUND_TSB_FLUSH_FAILURE
const u8 *type1_family = efi_get_smbios_string(1, family);
/*
- * Ampere Altra machines crash in SetTime() if SetVirtualAddressMap()
- * has not been called prior.
+ * Ampere eMAG, Altra, and Altra Max machines crash in SetTime() if
+ * SetVirtualAddressMap() has not been called prior.
*/
- if (!type1_family || strcmp(type1_family, "Altra"))
+ if (!type1_family || (
+ strcmp(type1_family, "eMAG") &&
+ strcmp(type1_family, "Altra") &&
+ strcmp(type1_family, "Altra Max")))
return false;
efi_warn("Working around broken SetVirtualAddressMap()\n");
return EFI_SUCCESS;
}
+ #ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE
+ #define DCTYPE "civac"
+ #else
+ #define DCTYPE "cvau"
+ #endif
+
void efi_cache_sync_image(unsigned long image_base,
unsigned long alloc_size,
unsigned long code_size)
u64 lsize = 4 << cpuid_feature_extract_unsigned_field(ctr,
CTR_EL0_DminLine_SHIFT);
- do {
- asm("dc civac, %0" :: "r"(image_base));
- image_base += lsize;
- alloc_size -= lsize;
- } while (alloc_size >= lsize);
+ /* only perform the cache maintenance if needed for I/D coherency */
+ if (!(ctr & BIT(CTR_EL0_IDC_SHIFT))) {
+ do {
+ asm("dc " DCTYPE ", %0" :: "r"(image_base));
+ image_base += lsize;
+ code_size -= lsize;
+ } while (code_size >= lsize);
+ }
asm("ic ialluis");
dsb(ish);
isb();
}
+
+ unsigned long __weak primary_entry_offset(void)
+ {
+ /*
+ * By default, we can invoke the kernel via the branch instruction in
+ * the image header, so offset #0. This will be overridden by the EFI
+ * stub build that is linked into the core kernel, as in that case, the
+ * image header may not have been loaded into memory, or may be mapped
+ * with non-executable permissions.
+ */
+ return 0;
+ }
+
+ void __noreturn efi_enter_kernel(unsigned long entrypoint,
+ unsigned long fdt_addr,
+ unsigned long fdt_size)
+ {
+ void (* __noreturn enter_kernel)(u64, u64, u64, u64);
+
+ enter_kernel = (void *)entrypoint + primary_entry_offset();
+ enter_kernel(fdt_addr, 0, 0, 0);
+ }
*/
static DEFINE_STATIC_KEY_FALSE(supports_pseudo_nmis);
- /*
- * Global static key controlling whether an update to PMR allowing more
- * interrupts requires to be propagated to the redistributor (DSB SY).
- * And this needs to be exported for modules to be able to enable
- * interrupts...
- */
- DEFINE_STATIC_KEY_FALSE(gic_pmr_sync);
- EXPORT_SYMBOL(gic_pmr_sync);
-
DEFINE_STATIC_KEY_FALSE(gic_nonsecure_priorities);
EXPORT_SYMBOL(gic_nonsecure_priorities);
gic_starting_cpu, NULL);
/* Register all 8 non-secure SGIs */
- base_sgi = __irq_domain_alloc_irqs(gic_data.domain, -1, 8,
- NUMA_NO_NODE, &sgi_fwspec,
- false, NULL);
+ base_sgi = irq_domain_alloc_irqs(gic_data.domain, 8, NUMA_NO_NODE, &sgi_fwspec);
if (WARN_ON(base_sgi <= 0))
return;
for (i = 0; i < gic_data.ppi_nr; i++)
refcount_set(&ppi_nmi_refs[i], 0);
- /*
- * Linux itself doesn't use 1:N distribution, so has no need to
- * set PMHE. The only reason to have it set is if EL3 requires it
- * (and we can't change it).
- */
- if (gic_read_ctlr() & ICC_CTLR_EL1_PMHE_MASK)
- static_branch_enable(&gic_pmr_sync);
-
pr_info("Pseudo-NMIs enabled using %s ICC_PMR_EL1 synchronisation\n",
- static_branch_unlikely(&gic_pmr_sync) ? "forced" : "relaxed");
+ gic_has_relaxed_pmr_sync() ? "relaxed" : "forced");
/*
* How priority values are used by the GIC depends on two things:
static void gic_check_cpu_features(void)
{
- WARN_TAINT_ONCE(this_cpu_has_cap(ARM64_HAS_SYSREG_GIC_CPUIF),
+ WARN_TAINT_ONCE(this_cpu_has_cap(ARM64_HAS_GIC_CPUIF_SYSREGS),
TAINT_CPU_OUT_OF_SPEC,
"GICv3 system registers enabled, broken firmware!\n");
}
"irqchip/arm/gic:starting",
gic_starting_cpu, NULL);
- base_sgi = __irq_domain_alloc_irqs(gic_data[0].domain, -1, 8,
- NUMA_NO_NODE, &sgi_fwspec,
- false, NULL);
+ base_sgi = irq_domain_alloc_irqs(gic_data[0].domain, 8, NUMA_NO_NODE, &sgi_fwspec);
if (WARN_ON(base_sgi <= 0))
return;
hw->dn++;
continue;
}
- hw->dtcs_used |= arm_cmn_node_to_xp(cmn, dn)->dtc;
hw->num_dns++;
if (bynodeid)
break;
nodeid, nid.x, nid.y, nid.port, nid.dev, type);
return -EINVAL;
}
+ /*
+ * Keep assuming non-cycles events count in all DTC domains; turns out
+ * it's hard to make a worthwhile optimisation around this, short of
+ * going all-in with domain-local counter allocation as well.
+ */
+ hw->dtcs_used = (1U << cmn->num_dtcs) - 1;
return arm_cmn_validate_group(cmn, event);
}
dtm->base = xp->pmu_base + CMN_DTM_OFFSET(idx);
dtm->pmu_config_low = CMN_DTM_PMU_CONFIG_PMU_EN;
+ writeq_relaxed(dtm->pmu_config_low, dtm->base + CMN_DTM_PMU_CONFIG);
for (i = 0; i < 4; i++) {
dtm->wp_event[i] = -1;
writeq_relaxed(0, dtm->base + CMN_DTM_WPn_MASK(i));
# define __assume_aligned(a, ...)
#endif
- /*
- * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-cold-function-attribute
- * gcc: https://gcc.gnu.org/onlinedocs/gcc/Label-Attributes.html#index-cold-label-attribute
- */
- #define __cold __attribute__((__cold__))
-
/*
* Note the long name.
*
*
* clang: https://clang.llvm.org/docs/AttributeReference.html#pass-object-size-pass-dynamic-object-size
*/
+#if __has_attribute(__pass_dynamic_object_size__)
+# define __pass_dynamic_object_size(type) __attribute__((__pass_dynamic_object_size__(type)))
+#else
+# define __pass_dynamic_object_size(type)
+#endif
#if __has_attribute(__pass_object_size__)
# define __pass_object_size(type) __attribute__((__pass_object_size__(type)))
#else
/* Attributes */
#include <linux/compiler_attributes.h>
+ #if CONFIG_FUNCTION_ALIGNMENT > 0
+ #define __function_aligned __aligned(CONFIG_FUNCTION_ALIGNMENT)
+ #else
+ #define __function_aligned
+ #endif
+
+ /*
+ * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-cold-function-attribute
+ * gcc: https://gcc.gnu.org/onlinedocs/gcc/Label-Attributes.html#index-cold-label-attribute
+ *
+ * When -falign-functions=N is in use, we must avoid the cold attribute as
+ * contemporary versions of GCC drop the alignment for cold functions. Worse,
+ * GCC can implicitly mark callees of cold functions as cold themselves, so
+ * it's not sufficient to add __function_aligned here as that will not ensure
+ * that callees are correctly aligned.
+ *
+ * See:
+ *
+ * https://lore.kernel.org/lkml/Y77%2FqVgvaJidFpYt@FVFF77S0Q05N
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88345#c9
+ */
+ #if !defined(CONFIG_CC_IS_GCC) || (CONFIG_FUNCTION_ALIGNMENT == 0)
+ #define __cold __attribute__((__cold__))
+ #else
+ #define __cold
+ #endif
+
/* Builtins */
/*
#endif
/* Section for code which can't be instrumented at all */
-#define noinstr \
- noinline notrace __attribute((__section__(".noinstr.text"))) \
+#define __noinstr_section(section) \
+ noinline notrace __attribute((__section__(section))) \
__no_kcsan __no_sanitize_address __no_profile __no_sanitize_coverage \
__no_sanitize_memory
+#define noinstr __noinstr_section(".noinstr.text")
+
+/*
+ * The __cpuidle section is used twofold:
+ *
+ * 1) the original use -- identifying if a CPU is 'stuck' in idle state based
+ * on it's instruction pointer. See cpu_in_idle().
+ *
+ * 2) supressing instrumentation around where cpuidle disables RCU; where the
+ * function isn't strictly required for #1, this is interchangeable with
+ * noinstr.
+ */
+#define __cpuidle __noinstr_section(".cpuidle.text")
+
#endif /* __KERNEL__ */
#endif /* __ASSEMBLY__ */
struct task_struct *tsk = current;
int group_dead;
+ WARN_ON(irqs_disabled());
+
synchronize_group_exit(tsk, code);
WARN_ON(tsk->plug);
if (unlikely(!tsk->pid))
panic("Attempted to kill the idle task!");
+ if (unlikely(irqs_disabled())) {
+ pr_info("note: %s[%d] exited with irqs disabled\n",
+ current->comm, task_pid_nr(current));
+ local_irq_enable();
+ }
if (unlikely(in_atomic())) {
pr_info("note: %s[%d] exited with preempt_count %d\n",
current->comm, task_pid_nr(current),
}
EXPORT_SYMBOL(thread_group_exited);
- __weak void abort(void)
+ /*
+ * This needs to be __function_aligned as GCC implicitly makes any
+ * implementation of abort() cold and drops alignment specified by
+ * -falign-functions=N.
+ *
+ * See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88345#c11
+ */
+ __weak __function_aligned void abort(void)
{
BUG();
config HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
bool
+ config HAVE_DYNAMIC_FTRACE_WITH_CALL_OPS
+ bool
+
config HAVE_DYNAMIC_FTRACE_WITH_ARGS
bool
help
depends on DYNAMIC_FTRACE_WITH_REGS
depends on HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+ config DYNAMIC_FTRACE_WITH_CALL_OPS
+ def_bool y
+ depends on HAVE_DYNAMIC_FTRACE_WITH_CALL_OPS
+
config DYNAMIC_FTRACE_WITH_ARGS
def_bool y
depends on DYNAMIC_FTRACE
default y
help
The ring buffer has its own internal recursion. Although when
- recursion happens it wont cause harm because of the protection,
- but it does cause an unwanted overhead. Enabling this option will
+ recursion happens it won't cause harm because of the protection,
+ but it does cause unwanted overhead. Enabling this option will
place where recursion was detected into the ftrace "recursed_functions"
file.
The test runs for 10 seconds. This will slow your boot time
by at least 10 more seconds.
- At the end of the test, statics and more checks are done.
- It will output the stats of each per cpu buffer. What
+ At the end of the test, statistics and more checks are done.
+ It will output the stats of each per cpu buffer: What
was written, the sizes, what was read, what was lost, and
other similar details.
void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op, struct ftrace_regs *fregs);
+ #ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS
+ /*
+ * Stub used to invoke the list ops without requiring a separate trampoline.
+ */
+ const struct ftrace_ops ftrace_list_ops = {
+ .func = ftrace_ops_list_func,
+ .flags = FTRACE_OPS_FL_STUB,
+ };
+
+ static void ftrace_ops_nop_func(unsigned long ip, unsigned long parent_ip,
+ struct ftrace_ops *op,
+ struct ftrace_regs *fregs)
+ {
+ /* do nothing */
+ }
+
+ /*
+ * Stub used when a call site is disabled. May be called transiently by threads
+ * which have made it into ftrace_caller but haven't yet recovered the ops at
+ * the point the call site is disabled.
+ */
+ const struct ftrace_ops ftrace_nop_ops = {
+ .func = ftrace_ops_nop_func,
+ .flags = FTRACE_OPS_FL_STUB,
+ };
+ #endif
+
static inline void ftrace_ops_init(struct ftrace_ops *ops)
{
#ifdef CONFIG_DYNAMIC_FTRACE
call_rcu(&hash->rcu, __free_ftrace_hash_rcu);
}
+/**
+ * ftrace_free_filter - remove all filters for an ftrace_ops
+ * @ops - the ops to remove the filters from
+ */
void ftrace_free_filter(struct ftrace_ops *ops)
{
ftrace_ops_init(ops);
free_ftrace_hash(ops->func_hash->filter_hash);
free_ftrace_hash(ops->func_hash->notrace_hash);
}
+EXPORT_SYMBOL_GPL(ftrace_free_filter);
static struct ftrace_hash *alloc_ftrace_hash(int size_bits)
{
* if rec count is zero.
*/
}
+
+ /*
+ * If the rec has a single associated ops, and ops->func can be
+ * called directly, allow the call site to call via the ops.
+ */
+ if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS) &&
+ ftrace_rec_count(rec) == 1 &&
+ ftrace_ops_get_func(ops) == ops->func)
+ rec->flags |= FTRACE_FL_CALL_OPS;
+ else
+ rec->flags &= ~FTRACE_FL_CALL_OPS;
+
count++;
/* Must match FTRACE_UPDATE_CALLS in ftrace_modify_all_code() */
struct ftrace_ops *ops = NULL;
pr_info("ftrace record flags: %lx\n", rec->flags);
- pr_cont(" (%ld)%s", ftrace_rec_count(rec),
- rec->flags & FTRACE_FL_REGS ? " R" : " ");
+ pr_cont(" (%ld)%s%s", ftrace_rec_count(rec),
+ rec->flags & FTRACE_FL_REGS ? " R" : " ",
+ rec->flags & FTRACE_FL_CALL_OPS ? " O" : " ");
if (rec->flags & FTRACE_FL_TRAMP_EN) {
ops = ftrace_find_tramp_ops_any(rec);
if (ops) {
* want the direct enabled (it will be done via the
* direct helper). But if DIRECT_EN is set, and
* the count is not one, we need to clear it.
+ *
*/
if (ftrace_rec_count(rec) == 1) {
if (!(rec->flags & FTRACE_FL_DIRECT) !=
} else if (rec->flags & FTRACE_FL_DIRECT_EN) {
flag |= FTRACE_FL_DIRECT;
}
+
+ /*
+ * Ops calls are special, as count matters.
+ * As with direct calls, they must only be enabled when count
+ * is one, otherwise they'll be handled via the list ops.
+ */
+ if (ftrace_rec_count(rec) == 1) {
+ if (!(rec->flags & FTRACE_FL_CALL_OPS) !=
+ !(rec->flags & FTRACE_FL_CALL_OPS_EN))
+ flag |= FTRACE_FL_CALL_OPS;
+ } else if (rec->flags & FTRACE_FL_CALL_OPS_EN) {
+ flag |= FTRACE_FL_CALL_OPS;
+ }
}
/* If the state of this record hasn't changed, then do nothing */
rec->flags &= ~FTRACE_FL_DIRECT_EN;
}
}
+
+ if (flag & FTRACE_FL_CALL_OPS) {
+ if (ftrace_rec_count(rec) == 1) {
+ if (rec->flags & FTRACE_FL_CALL_OPS)
+ rec->flags |= FTRACE_FL_CALL_OPS_EN;
+ else
+ rec->flags &= ~FTRACE_FL_CALL_OPS_EN;
+ } else {
+ /*
+ * Can only call directly if there's
+ * only one set of associated ops.
+ */
+ rec->flags &= ~FTRACE_FL_CALL_OPS_EN;
+ }
+ }
}
/*
* and REGS states. The _EN flags must be disabled though.
*/
rec->flags &= ~(FTRACE_FL_ENABLED | FTRACE_FL_TRAMP_EN |
- FTRACE_FL_REGS_EN | FTRACE_FL_DIRECT_EN);
+ FTRACE_FL_REGS_EN | FTRACE_FL_DIRECT_EN |
+ FTRACE_FL_CALL_OPS_EN);
}
ftrace_bug_type = FTRACE_BUG_NOP;
return NULL;
}
+ struct ftrace_ops *
+ ftrace_find_unique_ops(struct dyn_ftrace *rec)
+ {
+ struct ftrace_ops *op, *found = NULL;
+ unsigned long ip = rec->ip;
+
+ do_for_each_ftrace_op(op, ftrace_ops_list) {
+
+ if (hash_contains_ip(ip, op->func_hash)) {
+ if (found)
+ return NULL;
+ found = op;
+ }
+
+ } while_for_each_ftrace_op(op);
+
+ return found;
+ }
+
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
/* Protected by rcu_tasks for reading, and direct_mutex for writing */
static struct ftrace_hash *direct_functions = EMPTY_HASH;
if (iter->flags & FTRACE_ITER_ENABLED) {
struct ftrace_ops *ops;
- seq_printf(m, " (%ld)%s%s%s",
+ seq_printf(m, " (%ld)%s%s%s%s",
ftrace_rec_count(rec),
rec->flags & FTRACE_FL_REGS ? " R" : " ",
rec->flags & FTRACE_FL_IPMODIFY ? " I" : " ",
- rec->flags & FTRACE_FL_DIRECT ? " D" : " ");
+ rec->flags & FTRACE_FL_DIRECT ? " D" : " ",
+ rec->flags & FTRACE_FL_CALL_OPS ? " O" : " ");
if (rec->flags & FTRACE_FL_TRAMP_EN) {
ops = ftrace_find_tramp_ops_any(rec);
if (ops) {
} else {
add_trampoline_func(m, NULL, rec);
}
+ if (rec->flags & FTRACE_FL_CALL_OPS_EN) {
+ ops = ftrace_find_unique_ops(rec);
+ if (ops) {
+ seq_printf(m, "\tops: %pS (%pS)",
+ ops, ops->func);
+ } else {
+ seq_puts(m, "\tops: ERROR!");
+ }
+ }
if (rec->flags & FTRACE_FL_DIRECT) {
unsigned long direct;
*
* Filters denote which functions should be enabled when tracing is enabled
* If @ip is NULL, it fails to update filter.
+ *
+ * This can allocate memory which must be freed before @ops can be freed,
+ * either by removing each filtered addr or by using
+ * ftrace_free_filter(@ops).
*/
int ftrace_set_filter_ip(struct ftrace_ops *ops, unsigned long ip,
int remove, int reset)
*
* Filters denote which functions should be enabled when tracing is enabled
* If @ips array or any ip specified within is NULL , it fails to update filter.
- */
+ *
+ * This can allocate memory which must be freed before @ops can be freed,
+ * either by removing each filtered addr or by using
+ * ftrace_free_filter(@ops).
+*/
int ftrace_set_filter_ips(struct ftrace_ops *ops, unsigned long *ips,
unsigned int cnt, int remove, int reset)
{
*
* Filters denote which functions should be enabled when tracing is enabled.
* If @buf is NULL and reset is set, all functions will be enabled for tracing.
+ *
+ * This can allocate memory which must be freed before @ops can be freed,
+ * either by removing each filtered addr or by using
+ * ftrace_free_filter(@ops).
*/
int ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf,
int len, int reset)
* Notrace Filters denote which functions should not be enabled when tracing
* is enabled. If @buf is NULL and reset is set, all functions will be enabled
* for tracing.
+ *
+ * This can allocate memory which must be freed before @ops can be freed,
+ * either by removing each filtered addr or by using
+ * ftrace_free_filter(@ops).
*/
int ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf,
int len, int reset)