]> Git Repo - linux.git/commitdiff
Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64...
authorLinus Torvalds <[email protected]>
Tue, 21 Feb 2023 23:27:48 +0000 (15:27 -0800)
committerLinus Torvalds <[email protected]>
Tue, 21 Feb 2023 23:27:48 +0000 (15:27 -0800)
Pull arm64 updates from Catalin Marinas:

 - Support for arm64 SME 2 and 2.1. SME2 introduces a new 512-bit
   architectural register (ZT0, for the look-up table feature) that
   Linux needs to save/restore

 - Include TPIDR2 in the signal context and add the corresponding
   kselftests

 - Perf updates: Arm SPEv1.2 support, HiSilicon uncore PMU updates, ACPI
   support to the Marvell DDR and TAD PMU drivers, reset DTM_PMU_CONFIG
   (ARM CMN) at probe time

 - Support for DYNAMIC_FTRACE_WITH_CALL_OPS on arm64

 - Permit EFI boot with MMU and caches on. Instead of cleaning the
   entire loaded kernel image to the PoC and disabling the MMU and
   caches before branching to the kernel bare metal entry point, leave
   the MMU and caches enabled and rely on EFI's cacheable 1:1 mapping of
   all of system RAM to populate the initial page tables

 - Expose the AArch32 (compat) ELF_HWCAP features to user in an arm64
   kernel (the arm32 kernel only defines the values)

 - Harden the arm64 shadow call stack pointer handling: stash the shadow
   stack pointer in the task struct on interrupt, load it directly from
   this structure

 - Signal handling cleanups to remove redundant validation of size
   information and avoid reading the same data from userspace twice

 - Refactor the hwcap macros to make use of the automatically generated
   ID registers. It should make new hwcaps writing less error prone

 - Further arm64 sysreg conversion and some fixes

 - arm64 kselftest fixes and improvements

 - Pointer authentication cleanups: don't sign leaf functions, unify
   asm-arch manipulation

 - Pseudo-NMI code generation optimisations

 - Minor fixes for SME and TPIDR2 handling

 - Miscellaneous updates: ARCH_FORCE_MAX_ORDER is now selectable,
   replace strtobool() to kstrtobool() in the cpufeature.c code, apply
   dynamic shadow call stack in two passes, intercept pfn changes in
   set_pte_at() without the required break-before-make sequence, attempt
   to dump all instructions on unhandled kernel faults

* tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: (130 commits)
  arm64: fix .idmap.text assertion for large kernels
  kselftest/arm64: Don't require FA64 for streaming SVE+ZA tests
  kselftest/arm64: Copy whole EXTRA context
  arm64: kprobes: Drop ID map text from kprobes blacklist
  perf: arm_spe: Print the version of SPE detected
  perf: arm_spe: Add support for SPEv1.2 inverted event filtering
  perf: Add perf_event_attr::config3
  arm64/sme: Fix __finalise_el2 SMEver check
  drivers/perf: fsl_imx8_ddr_perf: Remove set-but-not-used variable
  arm64/signal: Only read new data when parsing the ZT context
  arm64/signal: Only read new data when parsing the ZA context
  arm64/signal: Only read new data when parsing the SVE context
  arm64/signal: Avoid rereading context frame sizes
  arm64/signal: Make interface for restore_fpsimd_context() consistent
  arm64/signal: Remove redundant size validation from parse_user_sigframe()
  arm64/signal: Don't redundantly verify FPSIMD magic
  arm64/cpufeature: Use helper macros to specify hwcaps
  arm64/cpufeature: Always use symbolic name for feature value in hwcaps
  arm64/sysreg: Initial unsigned annotations for ID registers
  arm64/sysreg: Initial annotation of signed ID registers
  ...

22 files changed:
1  2 
arch/arm64/Kconfig
arch/arm64/Kconfig.platforms
arch/arm64/Makefile
arch/arm64/include/asm/efi.h
arch/arm64/include/asm/esr.h
arch/arm64/include/asm/pgtable.h
arch/arm64/kernel/fpsimd.c
arch/arm64/kernel/ptrace.c
arch/arm64/kernel/signal.c
arch/arm64/kernel/traps.c
arch/arm64/kernel/vmlinux.lds.S
arch/arm64/mm/mmu.c
arch/arm64/tools/cpucaps
drivers/firmware/efi/libstub/arm64.c
drivers/irqchip/irq-gic-v3.c
drivers/irqchip/irq-gic.c
drivers/perf/arm-cmn.c
include/linux/compiler_attributes.h
include/linux/compiler_types.h
kernel/exit.c
kernel/trace/Kconfig
kernel/trace/ftrace.c

diff --combined arch/arm64/Kconfig
index c5ccca26a40870f54d96ed33535beee25bfc153a,619ab046744a42efc9c4e61966295f4ed2fe902a..27b2592698b0ae4748e09907adc3ed5f1bf41b2f
@@@ -123,6 -123,8 +123,8 @@@ config ARM6
        select DMA_DIRECT_REMAP
        select EDAC_SUPPORT
        select FRAME_POINTER
+       select FUNCTION_ALIGNMENT_4B
+       select FUNCTION_ALIGNMENT_8B if DYNAMIC_FTRACE_WITH_CALL_OPS
        select GENERIC_ALLOCATOR
        select GENERIC_ARCH_TOPOLOGY
        select GENERIC_CLOCKEVENTS_BROADCAST
        select HAVE_DEBUG_KMEMLEAK
        select HAVE_DMA_CONTIGUOUS
        select HAVE_DYNAMIC_FTRACE
 -      select HAVE_DYNAMIC_FTRACE_WITH_ARGS \
 -              if $(cc-option,-fpatchable-function-entry=2)
+       select HAVE_DYNAMIC_FTRACE_WITH_CALL_OPS \
+               if (DYNAMIC_FTRACE_WITH_ARGS && !CFI_CLANG)
        select FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY \
                if DYNAMIC_FTRACE_WITH_ARGS
        select HAVE_EFFICIENT_UNALIGNED_ACCESS
@@@ -970,22 -976,6 +974,22 @@@ config ARM64_ERRATUM_245716
  
          If unsure, say Y.
  
 +config ARM64_ERRATUM_2645198
 +      bool "Cortex-A715: 2645198: Workaround possible [ESR|FAR]_ELx corruption"
 +      default y
 +      help
 +        This option adds the workaround for ARM Cortex-A715 erratum 2645198.
 +
 +        If a Cortex-A715 cpu sees a page mapping permissions change from executable
 +        to non-executable, it may corrupt the ESR_ELx and FAR_ELx registers on the
 +        next instruction abort caused by permission fault.
 +
 +        Only user-space does executable to non-executable permission transition via
 +        mprotect() system call. Workaround the problem by doing a break-before-make
 +        TLB invalidation, for all changes to executable user space mappings.
 +
 +        If unsure, say Y.
 +
  config CAVIUM_ERRATUM_22375
        bool "Cavium erratum 22375, 24313"
        default y
@@@ -1470,10 -1460,23 +1474,23 @@@ config XE
        help
          Say Y if you want to run Linux in a Virtual Machine on Xen on ARM64.
  
+ # include/linux/mmzone.h requires the following to be true:
+ #
+ #   MAX_ORDER - 1 + PAGE_SHIFT <= SECTION_SIZE_BITS
+ #
+ # so the maximum value of MAX_ORDER is SECTION_SIZE_BITS + 1 - PAGE_SHIFT:
+ #
+ #     | SECTION_SIZE_BITS |  PAGE_SHIFT  |  max MAX_ORDER  |  default MAX_ORDER |
+ # ----+-------------------+--------------+-----------------+--------------------+
+ # 4K  |       27          |      12      |       16        |         11         |
+ # 16K |       27          |      14      |       14        |         12         |
+ # 64K |       29          |      16      |       14        |         14         |
  config ARCH_FORCE_MAX_ORDER
-       int
+       int "Maximum zone order" if ARM64_4K_PAGES || ARM64_16K_PAGES
        default "14" if ARM64_64K_PAGES
+       range 12 14 if ARM64_16K_PAGES
        default "12" if ARM64_16K_PAGES
+       range 11 16 if ARM64_4K_PAGES
        default "11"
        help
          The kernel memory allocator divides physically contiguous memory
          This config option is actually maximum order plus one. For example,
          a value of 11 means that the largest free memory block is 2^10 pages.
  
-         We make sure that we can allocate upto a HugePage size for each configuration.
+         We make sure that we can allocate up to a HugePage size for each configuration.
          Hence we have :
                MAX_ORDER = (PMD_SHIFT - PAGE_SHIFT) + 1 => PAGE_SHIFT - 2
  
@@@ -1832,7 -1835,7 +1849,7 @@@ config ARM64_PTR_AUTH_KERNE
        bool "Use pointer authentication for kernel"
        default y
        depends on ARM64_PTR_AUTH
-       depends on (CC_HAS_SIGN_RETURN_ADDRESS || CC_HAS_BRANCH_PROT_PAC_RET) && AS_HAS_PAC
+       depends on (CC_HAS_SIGN_RETURN_ADDRESS || CC_HAS_BRANCH_PROT_PAC_RET) && AS_HAS_ARMV8_3
        # Modern compilers insert a .note.gnu.property section note for PAC
        # which is only understood by binutils starting with version 2.33.1.
        depends on LD_IS_LLD || LD_VERSION >= 23301 || (CC_IS_GCC && GCC_VERSION < 90100)
@@@ -1857,7 -1860,7 +1874,7 @@@ config CC_HAS_SIGN_RETURN_ADDRES
        # GCC 7, 8
        def_bool $(cc-option,-msign-return-address=all)
  
- config AS_HAS_PAC
+ config AS_HAS_ARMV8_3
        def_bool $(cc-option,-Wa$(comma)-march=armv8.3-a)
  
  config AS_HAS_CFI_NEGATE_RA_STATE
index 165e544aa7f951d4a3b0cc3a40af1425624fed92,333d0af650d2bafa8e88e898838d3a0225cee548..89a0b13b058d6281412545565e5f65235506b1aa
@@@ -95,7 -95,7 +95,7 @@@ config ARCH_BITMAI
          This enables support for the Bitmain SoC Family.
  
  config ARCH_EXYNOS
 -      bool "ARMv8 based Samsung Exynos SoC family"
 +      bool "Samsung Exynos SoC family"
        select COMMON_CLK_SAMSUNG
        select CLKSRC_EXYNOS_MCT
        select EXYNOS_PM_DOMAINS if PM_GENERIC_DOMAINS
          This enables support for ARMv8 based Samsung Exynos SoC family.
  
  config ARCH_SPARX5
 -      bool "ARMv8 based Microchip Sparx5 SoC family"
 +      bool "Microchip Sparx5 SoC family"
        select PINCTRL
        select DW_APB_TIMER_OF
        help
@@@ -187,7 -187,7 +187,7 @@@ config ARCH_MVEB
        select PINCTRL_ARMADA_CP110
        select PINCTRL_AC5
        help
-         This enables support for Marvell EBU familly, including:
+         This enables support for Marvell EBU family, including:
           - Armada 3700 SoC Family
           - Armada 7K SoC Family
           - Armada 8K SoC Family
@@@ -199,13 -199,13 +199,13 @@@ menuconfig ARCH_NX
  if ARCH_NXP
  
  config ARCH_LAYERSCAPE
 -      bool "ARMv8 based Freescale Layerscape SoC family"
 +      bool "Freescale Layerscape SoC family"
        select EDAC_SUPPORT
        help
          This enables support for the Freescale Layerscape SoC family.
  
  config ARCH_MXC
 -      bool "ARMv8 based NXP i.MX SoC family"
 +      bool "NXP i.MX SoC family"
        select ARM64_ERRATUM_843419
        select ARM64_ERRATUM_845719 if COMPAT
        select IMX_GPCV2
@@@ -296,7 -296,7 +296,7 @@@ config ARCH_TEGR
          This enables support for the NVIDIA Tegra SoC family.
  
  config ARCH_TESLA_FSD
 -      bool "ARMv8 based Tesla platform"
 +      bool "Tesla platform"
        depends on ARCH_EXYNOS
        help
          Support for ARMv8 based Tesla platforms.
diff --combined arch/arm64/Makefile
index d7dfe00df7d2de29fa899ab761561e716137d435,c33b5da95b4a03f2701a79ad4efbd5ae9aba19b0..2d49aea0ff67a8f3323e5403f29e69561d436355
@@@ -63,50 -63,37 +63,37 @@@ stack_protector_prepare: prepare
                                        include/generated/asm-offsets.h))
  endif
  
- ifeq ($(CONFIG_AS_HAS_ARMV8_2), y)
- # make sure to pass the newest target architecture to -march.
- asm-arch := armv8.2-a
- endif
- # Ensure that if the compiler supports branch protection we default it
- # off, this will be overridden if we are using branch protection.
- branch-prot-flags-y += $(call cc-option,-mbranch-protection=none)
- ifeq ($(CONFIG_ARM64_PTR_AUTH_KERNEL),y)
- branch-prot-flags-$(CONFIG_CC_HAS_SIGN_RETURN_ADDRESS) := -msign-return-address=all
- # We enable additional protection for leaf functions as there is some
- # narrow potential for ROP protection benefits and no substantial
- # performance impact has been observed.
- PACRET-y := pac-ret+leaf
- # Using a shadow call stack in leaf functions is too costly, so avoid PAC there
- # as well when we may be patching PAC into SCS
- PACRET-$(CONFIG_UNWIND_PATCH_PAC_INTO_SCS) := pac-ret
  ifeq ($(CONFIG_ARM64_BTI_KERNEL),y)
- branch-prot-flags-$(CONFIG_CC_HAS_BRANCH_PROT_PAC_RET_BTI) := -mbranch-protection=$(PACRET-y)+bti
+   KBUILD_CFLAGS += -mbranch-protection=pac-ret+bti
+ else ifeq ($(CONFIG_ARM64_PTR_AUTH_KERNEL),y)
+   ifeq ($(CONFIG_CC_HAS_BRANCH_PROT_PAC_RET),y)
+     KBUILD_CFLAGS += -mbranch-protection=pac-ret
+   else
+     KBUILD_CFLAGS += -msign-return-address=non-leaf
+   endif
  else
- branch-prot-flags-$(CONFIG_CC_HAS_BRANCH_PROT_PAC_RET) := -mbranch-protection=$(PACRET-y)
- endif
- # -march=armv8.3-a enables the non-nops instructions for PAC, to avoid the
- # compiler to generate them and consequently to break the single image contract
- # we pass it only to the assembler. This option is utilized only in case of non
- # integrated assemblers.
- ifeq ($(CONFIG_AS_HAS_PAC), y)
- asm-arch := armv8.3-a
- endif
- endif
- KBUILD_CFLAGS += $(branch-prot-flags-y)
- ifeq ($(CONFIG_AS_HAS_ARMV8_4), y)
- # make sure to pass the newest target architecture to -march.
- asm-arch := armv8.4-a
+   KBUILD_CFLAGS += $(call cc-option,-mbranch-protection=none)
  endif
  
+ # Tell the assembler to support instructions from the latest target
+ # architecture.
+ #
+ # For non-integrated assemblers we'll pass this on the command line, and for
+ # integrated assemblers we'll define ARM64_ASM_ARCH and ARM64_ASM_PREAMBLE for
+ # inline usage.
+ #
+ # We cannot pass the same arch flag to the compiler as this would allow it to
+ # freely generate instructions which are not supported by earlier architecture
+ # versions, which would prevent a single kernel image from working on earlier
+ # hardware.
  ifeq ($(CONFIG_AS_HAS_ARMV8_5), y)
- # make sure to pass the newest target architecture to -march.
- asm-arch := armv8.5-a
+   asm-arch := armv8.5-a
+ else ifeq ($(CONFIG_AS_HAS_ARMV8_4), y)
+   asm-arch := armv8.4-a
+ else ifeq ($(CONFIG_AS_HAS_ARMV8_3), y)
+   asm-arch := armv8.3-a
+ else ifeq ($(CONFIG_AS_HAS_ARMV8_2), y)
+   asm-arch := armv8.2-a
  endif
  
  ifdef asm-arch
@@@ -139,7 -126,10 +126,10 @@@ endi
  
  CHECKFLAGS    += -D__aarch64__
  
- ifeq ($(CONFIG_DYNAMIC_FTRACE_WITH_ARGS),y)
+ ifeq ($(CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS),y)
+   KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
+   CC_FLAGS_FTRACE := -fpatchable-function-entry=4,2
+ else ifeq ($(CONFIG_DYNAMIC_FTRACE_WITH_ARGS),y)
    KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
    CC_FLAGS_FTRACE := -fpatchable-function-entry=2
  endif
@@@ -215,12 -205,6 +205,12 @@@ ifdef CONFIG_COMPAT_VDS
  endif
  endif
  
 +include $(srctree)/scripts/Makefile.defconf
 +
 +PHONY += virtconfig
 +virtconfig:
 +      $(call merge_into_defconfig_override,defconfig,virt)
 +
  define archhelp
    echo  '* Image.gz      - Compressed kernel image (arch/$(ARCH)/boot/Image.gz)'
    echo  '  Image         - Uncompressed kernel image (arch/$(ARCH)/boot/Image)'
index de4ff90785b2c96a15ad08484dedbda289951fae,0f0e729b40efc9ab72ac51844b8095d4018acf9e..acaa39f6381a084bef5c9ddc2cb249edfa7ef095
@@@ -48,17 -48,8 +48,17 @@@ int efi_set_mapping_permissions(struct 
  })
  
  extern spinlock_t efi_rt_lock;
 +extern u64 *efi_rt_stack_top;
  efi_status_t __efi_rt_asm_wrapper(void *, const char *, ...);
  
 +/*
 + * efi_rt_stack_top[-1] contains the value the stack pointer had before
 + * switching to the EFI runtime stack.
 + */
 +#define current_in_efi()                                              \
 +      (!preemptible() && efi_rt_stack_top != NULL &&                  \
 +       on_task_stack(current, READ_ONCE(efi_rt_stack_top[-1]), 1))
 +
  #define ARCH_EFI_IRQ_FLAGS_MASK (PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT)
  
  /*
@@@ -114,6 -105,8 +114,8 @@@ static inline unsigned long efi_get_kim
  #define EFI_ALLOC_ALIGN               SZ_64K
  #define EFI_ALLOC_LIMIT               ((1UL << 48) - 1)
  
+ extern unsigned long primary_entry_offset(void);
  /*
   * On ARM systems, virtually remapped UEFI runtime services are set up in two
   * distinct stages:
index 206de10524e338c9406d57641170f374527dd178,5f3271e9d5dfb26373d4b80e0ae85ee66887822a..c9f15b9e3c711da2530c43aca0f80ddac0705271
  #define ESR_ELx_FSC_ACCESS    (0x08)
  #define ESR_ELx_FSC_FAULT     (0x04)
  #define ESR_ELx_FSC_PERM      (0x0C)
 +#define ESR_ELx_FSC_SEA_TTW0  (0x14)
 +#define ESR_ELx_FSC_SEA_TTW1  (0x15)
 +#define ESR_ELx_FSC_SEA_TTW2  (0x16)
 +#define ESR_ELx_FSC_SEA_TTW3  (0x17)
 +#define ESR_ELx_FSC_SECC      (0x18)
 +#define ESR_ELx_FSC_SECC_TTW0 (0x1c)
 +#define ESR_ELx_FSC_SECC_TTW1 (0x1d)
 +#define ESR_ELx_FSC_SECC_TTW2 (0x1e)
 +#define ESR_ELx_FSC_SECC_TTW3 (0x1f)
  
  /* ISS field definitions for Data Aborts */
  #define ESR_ELx_ISV_SHIFT     (24)
  #define ESR_ELx_SME_ISS_ILL           1
  #define ESR_ELx_SME_ISS_SM_DISABLED   2
  #define ESR_ELx_SME_ISS_ZA_DISABLED   3
+ #define ESR_ELx_SME_ISS_ZT_DISABLED   4
  
  #ifndef __ASSEMBLY__
  #include <asm/types.h>
index 65e78999c75d76ea8e1443baec542d5f80c871d9,832c9c8fb58f16480ce04537be5a2dfc27d0bac3..27455bfd64bc3b72d7b8462764cfdfc483158cb0
@@@ -275,6 -275,7 +275,7 @@@ static inline void set_pte(pte_t *ptep
  }
  
  extern void __sync_icache_dcache(pte_t pteval);
+ bool pgattr_change_is_safe(u64 old, u64 new);
  
  /*
   * PTE bits configuration in the presence of hardware Dirty Bit Management
   *   PTE_DIRTY || (PTE_WRITE && !PTE_RDONLY)
   */
  
- static inline void __check_racy_pte_update(struct mm_struct *mm, pte_t *ptep,
+ static inline void __check_safe_pte_update(struct mm_struct *mm, pte_t *ptep,
                                           pte_t pte)
  {
        pte_t old_pte;
        VM_WARN_ONCE(pte_write(old_pte) && !pte_dirty(pte),
                     "%s: racy dirty state clearing: 0x%016llx -> 0x%016llx",
                     __func__, pte_val(old_pte), pte_val(pte));
+       VM_WARN_ONCE(!pgattr_change_is_safe(pte_val(old_pte), pte_val(pte)),
+                    "%s: unsafe attribute change: 0x%016llx -> 0x%016llx",
+                    __func__, pte_val(old_pte), pte_val(pte));
  }
  
  static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
                        mte_sync_tags(old_pte, pte);
        }
  
-       __check_racy_pte_update(mm, ptep, pte);
+       __check_safe_pte_update(mm, ptep, pte);
  
        set_pte(ptep, pte);
  }
@@@ -681,7 -685,7 +685,7 @@@ static inline unsigned long pmd_page_va
  #define pud_leaf(pud)         (pud_present(pud) && !pud_table(pud))
  #define pud_valid(pud)                pte_valid(pud_pte(pud))
  #define pud_user(pud)         pte_user(pud_pte(pud))
 -
 +#define pud_user_exec(pud)    pte_user_exec(pud_pte(pud))
  
  static inline void set_pud(pud_t *pudp, pud_t pud)
  {
@@@ -730,7 -734,6 +734,7 @@@ static inline pmd_t *pud_pgtable(pud_t 
  #else
  
  #define pud_page_paddr(pud)   ({ BUILD_BUG(); 0; })
 +#define pud_user_exec(pud)    pud_user(pud) /* Always 0 with folding */
  
  /* Match pmd_offset folding in <asm/generic/pgtable-nopmd.h> */
  #define pmd_set_fixmap(addr)          NULL
@@@ -863,12 -866,12 +867,12 @@@ static inline bool pte_user_accessible_
  
  static inline bool pmd_user_accessible_page(pmd_t pmd)
  {
 -      return pmd_leaf(pmd) && (pmd_user(pmd) || pmd_user_exec(pmd));
 +      return pmd_leaf(pmd) && !pmd_present_invalid(pmd) && (pmd_user(pmd) || pmd_user_exec(pmd));
  }
  
  static inline bool pud_user_accessible_page(pud_t pud)
  {
 -      return pud_leaf(pud) && pud_user(pud);
 +      return pud_leaf(pud) && (pud_user(pud) || pud_user_exec(pud));
  }
  #endif
  
@@@ -1094,15 -1097,6 +1098,15 @@@ static inline bool pud_sect_supported(v
  }
  
  
 +#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
 +#define ptep_modify_prot_start ptep_modify_prot_start
 +extern pte_t ptep_modify_prot_start(struct vm_area_struct *vma,
 +                                  unsigned long addr, pte_t *ptep);
 +
 +#define ptep_modify_prot_commit ptep_modify_prot_commit
 +extern void ptep_modify_prot_commit(struct vm_area_struct *vma,
 +                                  unsigned long addr, pte_t *ptep,
 +                                  pte_t old_pte, pte_t new_pte);
  #endif /* !__ASSEMBLY__ */
  
  #endif /* __ASM_PGTABLE_H */
index b6ef1af0122ebde4984b504e8a14578fe6996ffb,c11cb445ffcad6113b4327a49a121138916a3686..692dfefbe0ed2597458e251b2b0da2a1ad50f5ba
@@@ -299,7 -299,7 +299,7 @@@ void task_set_vl_onexec(struct task_str
  /*
   * TIF_SME controls whether a task can use SME without trapping while
   * in userspace, when TIF_SME is set then we must have storage
-  * alocated in sve_state and za_state to store the contents of both ZA
+  * alocated in sve_state and sme_state to store the contents of both ZA
   * and the SVE registers for both streaming and non-streaming modes.
   *
   * If both SVCR.ZA and SVCR.SM are disabled then at any point we
@@@ -385,7 -385,7 +385,7 @@@ static void task_fpsimd_load(void
        WARN_ON(!system_supports_fpsimd());
        WARN_ON(!have_cpu_fpsimd_context());
  
 -      if (system_supports_sve()) {
 +      if (system_supports_sve() || system_supports_sme()) {
                switch (current->thread.fp_type) {
                case FP_STATE_FPSIMD:
                        /* Stop tracking SVE for this task until next use. */
                write_sysreg_s(current->thread.svcr, SYS_SVCR);
  
                if (thread_za_enabled(&current->thread))
-                       za_load_state(current->thread.za_state);
+                       sme_load_state(current->thread.sme_state,
+                                      system_supports_sme2());
  
                if (thread_sm_enabled(&current->thread))
                        restore_ffr = system_supports_fa64();
@@@ -490,7 -491,8 +491,8 @@@ static void fpsimd_save(void
                *svcr = read_sysreg_s(SYS_SVCR);
  
                if (*svcr & SVCR_ZA_MASK)
-                       za_save_state(last->za_state);
+                       sme_save_state(last->sme_state,
+                                      system_supports_sme2());
  
                /* If we are in streaming mode override regular SVE. */
                if (*svcr & SVCR_SM_MASK) {
@@@ -1257,30 -1259,30 +1259,30 @@@ void fpsimd_release_task(struct task_st
  #ifdef CONFIG_ARM64_SME
  
  /*
-  * Ensure that task->thread.za_state is allocated and sufficiently large.
+  * Ensure that task->thread.sme_state is allocated and sufficiently large.
   *
   * This function should be used only in preparation for replacing
-  * task->thread.za_state with new data.  The memory is always zeroed
+  * task->thread.sme_state with new data.  The memory is always zeroed
   * here to prevent stale data from showing through: this is done in
   * the interest of testability and predictability, the architecture
   * guarantees that when ZA is enabled it will be zeroed.
   */
  void sme_alloc(struct task_struct *task)
  {
-       if (task->thread.za_state) {
-               memset(task->thread.za_state, 0, za_state_size(task));
+       if (task->thread.sme_state) {
+               memset(task->thread.sme_state, 0, sme_state_size(task));
                return;
        }
  
        /* This could potentially be up to 64K. */
-       task->thread.za_state =
-               kzalloc(za_state_size(task), GFP_KERNEL);
+       task->thread.sme_state =
+               kzalloc(sme_state_size(task), GFP_KERNEL);
  }
  
  static void sme_free(struct task_struct *task)
  {
-       kfree(task->thread.za_state);
-       task->thread.za_state = NULL;
+       kfree(task->thread.sme_state);
+       task->thread.sme_state = NULL;
  }
  
  void sme_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
        isb();
  }
  
+ /*
+  * This must be called after sme_kernel_enable(), we rely on the
+  * feature table being sorted to ensure this.
+  */
+ void sme2_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
+ {
+       /* Allow use of ZT0 */
+       write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_EZT0_MASK,
+                      SYS_SMCR_EL1);
+ }
  /*
   * This must be called after sme_kernel_enable(), we rely on the
   * feature table being sorted to ensure this.
@@@ -1322,7 -1335,6 +1335,6 @@@ u64 read_smcr_features(void
        unsigned int vq_max;
  
        sme_kernel_enable(NULL);
-       sme_smstart_sm();
  
        /*
         * Set the maximum possible VL.
  
        smcr = read_sysreg_s(SYS_SMCR_EL1);
        smcr &= ~(u64)SMCR_ELx_LEN_MASK; /* Only the LEN field */
-       vq_max = sve_vq_from_vl(sve_get_vl());
+       vq_max = sve_vq_from_vl(sme_get_vl());
        smcr |= vq_max - 1; /* set LEN field to maximum effective value */
  
-       sme_smstop_sm();
        return smcr;
  }
  
@@@ -1488,7 -1498,7 +1498,7 @@@ void do_sme_acc(unsigned long esr, stru
  
        sve_alloc(current, false);
        sme_alloc(current);
-       if (!current->thread.sve_state || !current->thread.za_state) {
+       if (!current->thread.sve_state || !current->thread.sme_state) {
                force_sig(SIGKILL);
                return;
        }
@@@ -1609,7 -1619,7 +1619,7 @@@ static void fpsimd_flush_thread_vl(enu
  void fpsimd_flush_thread(void)
  {
        void *sve_state = NULL;
-       void *za_state = NULL;
+       void *sme_state = NULL;
  
        if (!system_supports_fpsimd())
                return;
                clear_thread_flag(TIF_SME);
  
                /* Defer kfree() while in atomic context */
-               za_state = current->thread.za_state;
-               current->thread.za_state = NULL;
+               sme_state = current->thread.sme_state;
+               current->thread.sme_state = NULL;
  
                fpsimd_flush_thread_vl(ARM64_VEC_SME);
                current->thread.svcr = 0;
  
        put_cpu_fpsimd_context();
        kfree(sve_state);
-       kfree(za_state);
+       kfree(sme_state);
  }
  
  /*
@@@ -1711,7 -1721,7 +1721,7 @@@ static void fpsimd_bind_task_to_cpu(voi
        WARN_ON(!system_supports_fpsimd());
        last->st = &current->thread.uw.fpsimd_state;
        last->sve_state = current->thread.sve_state;
-       last->za_state = current->thread.za_state;
+       last->sme_state = current->thread.sme_state;
        last->sve_vl = task_get_sve_vl(current);
        last->sme_vl = task_get_sme_vl(current);
        last->svcr = &current->thread.svcr;
index 0c321ad23cd3a48d60a52f3a0a1209c4b7218ac2,143a971d7511cf74eb070b61c3d3b4da1270e38d..d7f4f0d1ae120248b0a483c71245fb991b4925d2
@@@ -683,7 -683,7 +683,7 @@@ static int tls_set(struct task_struct *
        unsigned long tls[2];
  
        tls[0] = target->thread.uw.tp_value;
-       if (system_supports_sme())
+       if (system_supports_tpidr2())
                tls[1] = target->thread.tpidr2_el0;
  
        ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, tls, 0, count);
                return ret;
  
        target->thread.uw.tp_value = tls[0];
-       if (system_supports_sme())
+       if (system_supports_tpidr2())
                target->thread.tpidr2_el0 = tls[1];
  
        return ret;
@@@ -1045,7 -1045,7 +1045,7 @@@ static int za_get(struct task_struct *t
        if (thread_za_enabled(&target->thread)) {
                start = end;
                end = ZA_PT_SIZE(vq);
-               membuf_write(&to, target->thread.za_state, end - start);
+               membuf_write(&to, target->thread.sme_state, end - start);
        }
  
        /* Zero any trailing padding */
@@@ -1099,7 -1099,7 +1099,7 @@@ static int za_set(struct task_struct *t
  
        /* Allocate/reinit ZA storage */
        sme_alloc(target);
-       if (!target->thread.za_state) {
+       if (!target->thread.sme_state) {
                ret = -ENOMEM;
                goto out;
        }
        start = ZA_PT_ZA_OFFSET;
        end = ZA_PT_SIZE(vq);
        ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
-                                target->thread.za_state,
+                                target->thread.sme_state,
                                 start, end);
        if (ret)
                goto out;
        return ret;
  }
  
+ static int zt_get(struct task_struct *target,
+                 const struct user_regset *regset,
+                 struct membuf to)
+ {
+       if (!system_supports_sme2())
+               return -EINVAL;
+       /*
+        * If PSTATE.ZA is not set then ZT will be zeroed when it is
+        * enabled so report the current register value as zero.
+        */
+       if (thread_za_enabled(&target->thread))
+               membuf_write(&to, thread_zt_state(&target->thread),
+                            ZT_SIG_REG_BYTES);
+       else
+               membuf_zero(&to, ZT_SIG_REG_BYTES);
+       return 0;
+ }
+ static int zt_set(struct task_struct *target,
+                 const struct user_regset *regset,
+                 unsigned int pos, unsigned int count,
+                 const void *kbuf, const void __user *ubuf)
+ {
+       int ret;
+       if (!system_supports_sme2())
+               return -EINVAL;
+       if (!thread_za_enabled(&target->thread)) {
+               sme_alloc(target);
+               if (!target->thread.sme_state)
+                       return -ENOMEM;
+       }
+       ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+                                thread_zt_state(&target->thread),
+                                0, ZT_SIG_REG_BYTES);
+       if (ret == 0)
+               target->thread.svcr |= SVCR_ZA_MASK;
+       return ret;
+ }
  #endif /* CONFIG_ARM64_SME */
  
  #ifdef CONFIG_ARM64_PTR_AUTH
@@@ -1357,9 -1402,10 +1402,10 @@@ enum aarch64_regset 
  #ifdef CONFIG_ARM64_SVE
        REGSET_SVE,
  #endif
 -#ifdef CONFIG_ARM64_SVE
 +#ifdef CONFIG_ARM64_SME
        REGSET_SSVE,
        REGSET_ZA,
+       REGSET_ZT,
  #endif
  #ifdef CONFIG_ARM64_PTR_AUTH
        REGSET_PAC_MASK,
@@@ -1467,6 -1513,14 +1513,14 @@@ static const struct user_regset aarch64
                .regset_get = za_get,
                .set = za_set,
        },
+       [REGSET_ZT] = { /* SME ZT */
+               .core_note_type = NT_ARM_ZT,
+               .n = 1,
+               .size = ZT_SIG_REG_BYTES,
+               .align = sizeof(u64),
+               .regset_get = zt_get,
+               .set = zt_set,
+       },
  #endif
  #ifdef CONFIG_ARM64_PTR_AUTH
        [REGSET_PAC_MASK] = {
index be279fd482480de2a3b2dfff80a37300907555d8,d7b5ed8a9b7fd3e1fbe5da1d76969e0ea9234388..06a02707f4882eb66c1a496ba89c618e863d564f
@@@ -56,7 -56,9 +56,9 @@@ struct rt_sigframe_user_layout 
        unsigned long fpsimd_offset;
        unsigned long esr_offset;
        unsigned long sve_offset;
+       unsigned long tpidr2_offset;
        unsigned long za_offset;
+       unsigned long zt_offset;
        unsigned long extra_offset;
        unsigned long end_offset;
  };
@@@ -168,6 -170,19 +170,19 @@@ static void __user *apply_user_offset
        return base + offset;
  }
  
+ struct user_ctxs {
+       struct fpsimd_context __user *fpsimd;
+       u32 fpsimd_size;
+       struct sve_context __user *sve;
+       u32 sve_size;
+       struct tpidr2_context __user *tpidr2;
+       u32 tpidr2_size;
+       struct za_context __user *za;
+       u32 za_size;
+       struct zt_context __user *zt;
+       u32 zt_size;
+ };
  static int preserve_fpsimd_context(struct fpsimd_context __user *ctx)
  {
        struct user_fpsimd_state const *fpsimd =
        return err ? -EFAULT : 0;
  }
  
- static int restore_fpsimd_context(struct fpsimd_context __user *ctx)
+ static int restore_fpsimd_context(struct user_ctxs *user)
  {
        struct user_fpsimd_state fpsimd;
-       __u32 magic, size;
        int err = 0;
  
-       /* check the magic/size information */
-       __get_user_error(magic, &ctx->head.magic, err);
-       __get_user_error(size, &ctx->head.size, err);
-       if (err)
-               return -EFAULT;
-       if (magic != FPSIMD_MAGIC || size != sizeof(struct fpsimd_context))
+       /* check the size information */
+       if (user->fpsimd_size != sizeof(struct fpsimd_context))
                return -EINVAL;
  
        /* copy the FP and status/control registers */
-       err = __copy_from_user(fpsimd.vregs, ctx->vregs,
+       err = __copy_from_user(fpsimd.vregs, &(user->fpsimd->vregs),
                               sizeof(fpsimd.vregs));
-       __get_user_error(fpsimd.fpsr, &ctx->fpsr, err);
-       __get_user_error(fpsimd.fpcr, &ctx->fpcr, err);
+       __get_user_error(fpsimd.fpsr, &(user->fpsimd->fpsr), err);
+       __get_user_error(fpsimd.fpcr, &(user->fpsimd->fpcr), err);
  
        clear_thread_flag(TIF_SVE);
        current->thread.fp_type = FP_STATE_FPSIMD;
  }
  
  
- struct user_ctxs {
-       struct fpsimd_context __user *fpsimd;
-       struct sve_context __user *sve;
-       struct za_context __user *za;
- };
  #ifdef CONFIG_ARM64_SVE
  
  static int preserve_sve_context(struct sve_context __user *ctx)
  
  static int restore_sve_fpsimd_context(struct user_ctxs *user)
  {
-       int err;
+       int err = 0;
        unsigned int vl, vq;
        struct user_fpsimd_state fpsimd;
-       struct sve_context sve;
+       u16 user_vl, flags;
  
-       if (__copy_from_user(&sve, user->sve, sizeof(sve)))
-               return -EFAULT;
+       if (user->sve_size < sizeof(*user->sve))
+               return -EINVAL;
+       __get_user_error(user_vl, &(user->sve->vl), err);
+       __get_user_error(flags, &(user->sve->flags), err);
+       if (err)
+               return err;
  
-       if (sve.flags & SVE_SIG_FLAG_SM) {
+       if (flags & SVE_SIG_FLAG_SM) {
                if (!system_supports_sme())
                        return -EINVAL;
  
                vl = task_get_sme_vl(current);
        } else {
 -              if (!system_supports_sve())
 +              /*
 +               * A SME only system use SVE for streaming mode so can
 +               * have a SVE formatted context with a zero VL and no
 +               * payload data.
 +               */
 +              if (!system_supports_sve() && !system_supports_sme())
                        return -EINVAL;
  
                vl = task_get_sve_vl(current);
        }
  
-       if (sve.vl != vl)
+       if (user_vl != vl)
                return -EINVAL;
  
-       if (sve.head.size <= sizeof(*user->sve)) {
+       if (user->sve_size == sizeof(*user->sve)) {
                clear_thread_flag(TIF_SVE);
                current->thread.svcr &= ~SVCR_SM_MASK;
                current->thread.fp_type = FP_STATE_FPSIMD;
                goto fpsimd_only;
        }
  
-       vq = sve_vq_from_vl(sve.vl);
+       vq = sve_vq_from_vl(vl);
  
-       if (sve.head.size < SVE_SIG_CONTEXT_SIZE(vq))
+       if (user->sve_size < SVE_SIG_CONTEXT_SIZE(vq))
                return -EINVAL;
  
        /*
        if (err)
                return -EFAULT;
  
-       if (sve.flags & SVE_SIG_FLAG_SM)
+       if (flags & SVE_SIG_FLAG_SM)
                current->thread.svcr |= SVCR_SM_MASK;
        else
                set_thread_flag(TIF_SVE);
@@@ -366,6 -370,34 +375,34 @@@ extern int preserve_sve_context(void __
  
  #ifdef CONFIG_ARM64_SME
  
+ static int preserve_tpidr2_context(struct tpidr2_context __user *ctx)
+ {
+       int err = 0;
+       current->thread.tpidr2_el0 = read_sysreg_s(SYS_TPIDR2_EL0);
+       __put_user_error(TPIDR2_MAGIC, &ctx->head.magic, err);
+       __put_user_error(sizeof(*ctx), &ctx->head.size, err);
+       __put_user_error(current->thread.tpidr2_el0, &ctx->tpidr2, err);
+       return err;
+ }
+ static int restore_tpidr2_context(struct user_ctxs *user)
+ {
+       u64 tpidr2_el0;
+       int err = 0;
+       if (user->tpidr2_size != sizeof(*user->tpidr2))
+               return -EINVAL;
+       __get_user_error(tpidr2_el0, &user->tpidr2->tpidr2, err);
+       if (!err)
+               current->thread.tpidr2_el0 = tpidr2_el0;
+       return err;
+ }
  static int preserve_za_context(struct za_context __user *ctx)
  {
        int err = 0;
                 * fpsimd_signal_preserve_current_state().
                 */
                err |= __copy_to_user((char __user *)ctx + ZA_SIG_REGS_OFFSET,
-                                     current->thread.za_state,
+                                     current->thread.sme_state,
                                      ZA_SIG_REGS_SIZE(vq));
        }
  
  
  static int restore_za_context(struct user_ctxs *user)
  {
-       int err;
+       int err = 0;
        unsigned int vq;
-       struct za_context za;
+       u16 user_vl;
  
-       if (__copy_from_user(&za, user->za, sizeof(za)))
-               return -EFAULT;
+       if (user->za_size < sizeof(*user->za))
+               return -EINVAL;
  
-       if (za.vl != task_get_sme_vl(current))
+       __get_user_error(user_vl, &(user->za->vl), err);
+       if (err)
+               return err;
+       if (user_vl != task_get_sme_vl(current))
                return -EINVAL;
  
-       if (za.head.size <= sizeof(*user->za)) {
+       if (user->za_size == sizeof(*user->za)) {
                current->thread.svcr &= ~SVCR_ZA_MASK;
                return 0;
        }
  
-       vq = sve_vq_from_vl(za.vl);
+       vq = sve_vq_from_vl(user_vl);
  
-       if (za.head.size < ZA_SIG_CONTEXT_SIZE(vq))
+       if (user->za_size < ZA_SIG_CONTEXT_SIZE(vq))
                return -EINVAL;
  
        /*
         * Careful: we are about __copy_from_user() directly into
-        * thread.za_state with preemption enabled, so protection is
+        * thread.sme_state with preemption enabled, so protection is
         * needed to prevent a racing context switch from writing stale
         * registers back over the new data.
         */
        /* From now, fpsimd_thread_switch() won't touch thread.sve_state */
  
        sme_alloc(current);
-       if (!current->thread.za_state) {
+       if (!current->thread.sme_state) {
                current->thread.svcr &= ~SVCR_ZA_MASK;
                clear_thread_flag(TIF_SME);
                return -ENOMEM;
        }
  
-       err = __copy_from_user(current->thread.za_state,
+       err = __copy_from_user(current->thread.sme_state,
                               (char __user const *)user->za +
                                        ZA_SIG_REGS_OFFSET,
                               ZA_SIG_REGS_SIZE(vq));
  
        return 0;
  }
+ static int preserve_zt_context(struct zt_context __user *ctx)
+ {
+       int err = 0;
+       u16 reserved[ARRAY_SIZE(ctx->__reserved)];
+       if (WARN_ON(!thread_za_enabled(&current->thread)))
+               return -EINVAL;
+       memset(reserved, 0, sizeof(reserved));
+       __put_user_error(ZT_MAGIC, &ctx->head.magic, err);
+       __put_user_error(round_up(ZT_SIG_CONTEXT_SIZE(1), 16),
+                        &ctx->head.size, err);
+       __put_user_error(1, &ctx->nregs, err);
+       BUILD_BUG_ON(sizeof(ctx->__reserved) != sizeof(reserved));
+       err |= __copy_to_user(&ctx->__reserved, reserved, sizeof(reserved));
+       /*
+        * This assumes that the ZT state has already been saved to
+        * the task struct by calling the function
+        * fpsimd_signal_preserve_current_state().
+        */
+       err |= __copy_to_user((char __user *)ctx + ZT_SIG_REGS_OFFSET,
+                             thread_zt_state(&current->thread),
+                             ZT_SIG_REGS_SIZE(1));
+       return err ? -EFAULT : 0;
+ }
+ static int restore_zt_context(struct user_ctxs *user)
+ {
+       int err;
+       u16 nregs;
+       /* ZA must be restored first for this check to be valid */
+       if (!thread_za_enabled(&current->thread))
+               return -EINVAL;
+       if (user->zt_size != ZT_SIG_CONTEXT_SIZE(1))
+               return -EINVAL;
+       if (__copy_from_user(&nregs, &(user->zt->nregs), sizeof(nregs)))
+               return -EFAULT;
+       if (nregs != 1)
+               return -EINVAL;
+       /*
+        * Careful: we are about __copy_from_user() directly into
+        * thread.zt_state with preemption enabled, so protection is
+        * needed to prevent a racing context switch from writing stale
+        * registers back over the new data.
+        */
+       fpsimd_flush_task_state(current);
+       /* From now, fpsimd_thread_switch() won't touch ZT in thread state */
+       err = __copy_from_user(thread_zt_state(&current->thread),
+                              (char __user const *)user->zt +
+                                       ZT_SIG_REGS_OFFSET,
+                              ZT_SIG_REGS_SIZE(1));
+       if (err)
+               return -EFAULT;
+       return 0;
+ }
  #else /* ! CONFIG_ARM64_SME */
  
  /* Turn any non-optimised out attempts to use these into a link error: */
+ extern int preserve_tpidr2_context(void __user *ctx);
+ extern int restore_tpidr2_context(struct user_ctxs *user);
  extern int preserve_za_context(void __user *ctx);
  extern int restore_za_context(struct user_ctxs *user);
+ extern int preserve_zt_context(void __user *ctx);
+ extern int restore_zt_context(struct user_ctxs *user);
  
  #endif /* ! CONFIG_ARM64_SME */
  
@@@ -473,7 -581,9 +586,9 @@@ static int parse_user_sigframe(struct u
  
        user->fpsimd = NULL;
        user->sve = NULL;
+       user->tpidr2 = NULL;
        user->za = NULL;
+       user->zt = NULL;
  
        if (!IS_ALIGNED((unsigned long)base, 16))
                goto invalid;
                        if (user->fpsimd)
                                goto invalid;
  
-                       if (size < sizeof(*user->fpsimd))
-                               goto invalid;
                        user->fpsimd = (struct fpsimd_context __user *)head;
+                       user->fpsimd_size = size;
                        break;
  
                case ESR_MAGIC:
                        if (user->sve)
                                goto invalid;
  
-                       if (size < sizeof(*user->sve))
+                       user->sve = (struct sve_context __user *)head;
+                       user->sve_size = size;
+                       break;
+               case TPIDR2_MAGIC:
+                       if (!system_supports_sme())
                                goto invalid;
  
-                       user->sve = (struct sve_context __user *)head;
+                       if (user->tpidr2)
+                               goto invalid;
+                       user->tpidr2 = (struct tpidr2_context __user *)head;
+                       user->tpidr2_size = size;
                        break;
  
                case ZA_MAGIC:
                        if (user->za)
                                goto invalid;
  
-                       if (size < sizeof(*user->za))
+                       user->za = (struct za_context __user *)head;
+                       user->za_size = size;
+                       break;
+               case ZT_MAGIC:
+                       if (!system_supports_sme2())
                                goto invalid;
  
-                       user->za = (struct za_context __user *)head;
+                       if (user->zt)
+                               goto invalid;
+                       user->zt = (struct zt_context __user *)head;
+                       user->zt_size = size;
                        break;
  
                case EXTRA_MAGIC:
@@@ -668,12 -794,18 +799,18 @@@ static int restore_sigframe(struct pt_r
                if (user.sve)
                        err = restore_sve_fpsimd_context(&user);
                else
-                       err = restore_fpsimd_context(user.fpsimd);
+                       err = restore_fpsimd_context(&user);
        }
  
+       if (err == 0 && system_supports_sme() && user.tpidr2)
+               err = restore_tpidr2_context(&user);
        if (err == 0 && system_supports_sme() && user.za)
                err = restore_za_context(&user);
  
+       if (err == 0 && system_supports_sme2() && user.zt)
+               err = restore_zt_context(&user);
        return err;
  }
  
@@@ -737,7 -869,7 +874,7 @@@ static int setup_sigframe_layout(struc
                        return err;
        }
  
 -      if (system_supports_sve()) {
 +      if (system_supports_sve() || system_supports_sme()) {
                unsigned int vq = 0;
  
                if (add_all || test_thread_flag(TIF_SVE) ||
                else
                        vl = task_get_sme_vl(current);
  
+               err = sigframe_alloc(user, &user->tpidr2_offset,
+                                    sizeof(struct tpidr2_context));
+               if (err)
+                       return err;
                if (thread_za_enabled(&current->thread))
                        vq = sve_vq_from_vl(vl);
  
                        return err;
        }
  
+       if (system_supports_sme2()) {
+               if (add_all || thread_za_enabled(&current->thread)) {
+                       err = sigframe_alloc(user, &user->zt_offset,
+                                            ZT_SIG_CONTEXT_SIZE(1));
+                       if (err)
+                               return err;
+               }
+       }
        return sigframe_alloc_end(user);
  }
  
@@@ -822,6 -968,13 +973,13 @@@ static int setup_sigframe(struct rt_sig
                err |= preserve_sve_context(sve_ctx);
        }
  
+       /* TPIDR2 if supported */
+       if (system_supports_sme() && err == 0) {
+               struct tpidr2_context __user *tpidr2_ctx =
+                       apply_user_offset(user, user->tpidr2_offset);
+               err |= preserve_tpidr2_context(tpidr2_ctx);
+       }
        /* ZA state if present */
        if (system_supports_sme() && err == 0 && user->za_offset) {
                struct za_context __user *za_ctx =
                err |= preserve_za_context(za_ctx);
        }
  
+       /* ZT state if present */
+       if (system_supports_sme2() && err == 0 && user->zt_offset) {
+               struct zt_context __user *zt_ctx =
+                       apply_user_offset(user, user->zt_offset);
+               err |= preserve_zt_context(zt_ctx);
+       }
        if (err == 0 && user->extra_offset) {
                char __user *sfp = (char __user *)user->sigframe;
                char __user *userp =
index 87f42eb1c950ed87b4e12798cfc0ddc7eaeb069f,0ccc063daccb8f3c217a237d65ccb84bf5dff3c3..a6dd3e90755cf6b5a0fcdb15580bccea89ebf97e
@@@ -26,7 -26,6 +26,7 @@@
  #include <linux/syscalls.h>
  #include <linux/mm_types.h>
  #include <linux/kasan.h>
 +#include <linux/ubsan.h>
  #include <linux/cfi.h>
  
  #include <asm/atomic.h>
@@@ -163,10 -162,8 +163,8 @@@ static void dump_kernel_instr(const cha
  
                if (!bad)
                        p += sprintf(p, i == 0 ? "(%08x) " : "%08x ", val);
-               else {
-                       p += sprintf(p, "bad PC value");
-                       break;
-               }
+               else
+                       p += sprintf(p, i == 0 ? "(????????) " : "???????? ");
        }
  
        printk("%sCode: %s\n", lvl, str);
@@@ -1075,19 -1072,6 +1073,19 @@@ static struct break_hook kasan_break_ho
  };
  #endif
  
 +#ifdef CONFIG_UBSAN_TRAP
 +static int ubsan_handler(struct pt_regs *regs, unsigned long esr)
 +{
 +      die(report_ubsan_failure(regs, esr & UBSAN_BRK_MASK), regs, esr);
 +      return DBG_HOOK_HANDLED;
 +}
 +
 +static struct break_hook ubsan_break_hook = {
 +      .fn     = ubsan_handler,
 +      .imm    = UBSAN_BRK_IMM,
 +      .mask   = UBSAN_BRK_MASK,
 +};
 +#endif
  
  #define esr_comment(esr) ((esr) & ESR_ELx_BRK64_ISS_COMMENT_MASK)
  
@@@ -1105,10 -1089,6 +1103,10 @@@ int __init early_brk64(unsigned long ad
  #ifdef CONFIG_KASAN_SW_TAGS
        if ((esr_comment(esr) & ~KASAN_BRK_MASK) == KASAN_BRK_IMM)
                return kasan_handler(regs, esr) != DBG_HOOK_HANDLED;
 +#endif
 +#ifdef CONFIG_UBSAN_TRAP
 +      if ((esr_comment(esr) & ~UBSAN_BRK_MASK) == UBSAN_BRK_IMM)
 +              return ubsan_handler(regs, esr) != DBG_HOOK_HANDLED;
  #endif
        return bug_handler(regs, esr) != DBG_HOOK_HANDLED;
  }
@@@ -1122,9 -1102,6 +1120,9 @@@ void __init trap_init(void
        register_kernel_break_hook(&fault_break_hook);
  #ifdef CONFIG_KASAN_SW_TAGS
        register_kernel_break_hook(&kasan_break_hook);
 +#endif
 +#ifdef CONFIG_UBSAN_TRAP
 +      register_kernel_break_hook(&ubsan_break_hook);
  #endif
        debug_traps_init();
  }
index 2777214cbf1a4d76e823cffa6b37a051fdaf9af2,1a43df27a20461cac2384b7572a9a60ad2cb6f8f..b9202c2ee18e02d827febeb75b24be4430a7b956
@@@ -93,6 -93,7 +93,7 @@@ jiffies = jiffies_64
  
  #ifdef CONFIG_HIBERNATION
  #define HIBERNATE_TEXT                                        \
+       ALIGN_FUNCTION();                               \
        __hibernate_exit_text_start = .;                \
        *(.hibernate_exit.text)                         \
        __hibernate_exit_text_end = .;
  
  #ifdef CONFIG_KEXEC_CORE
  #define KEXEC_TEXT                                    \
+       ALIGN_FUNCTION();                               \
        __relocate_new_kernel_start = .;                \
        *(.kexec_relocate.text)                         \
        __relocate_new_kernel_end = .;
@@@ -175,10 -177,10 +177,9 @@@ SECTION
                        ENTRY_TEXT
                        TEXT_TEXT
                        SCHED_TEXT
 -                      CPUIDLE_TEXT
                        LOCK_TEXT
                        KPROBES_TEXT
                        HYPERVISOR_TEXT
-                       IDMAP_TEXT
                        *(.gnu.warning)
                . = ALIGN(16);
                *(.got)                 /* Global offset table          */
                TRAMP_TEXT
                HIBERNATE_TEXT
                KEXEC_TEXT
+               IDMAP_TEXT
                . = ALIGN(PAGE_SIZE);
        }
  
@@@ -354,6 -357,8 +356,8 @@@ ASSERT(__idmap_text_end - (__idmap_text
  #ifdef CONFIG_HIBERNATION
  ASSERT(__hibernate_exit_text_end - __hibernate_exit_text_start <= SZ_4K,
         "Hibernate exit text is bigger than 4 KiB")
+ ASSERT(__hibernate_exit_text_start == swsusp_arch_suspend_exit,
+        "Hibernate exit text does not start with swsusp_arch_suspend_exit")
  #endif
  #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
  ASSERT((__entry_tramp_text_end - __entry_tramp_text_start) <= 3*PAGE_SIZE,
@@@ -380,4 -385,6 +384,6 @@@ ASSERT(swapper_pg_dir - tramp_pg_dir =
  ASSERT(__relocate_new_kernel_end - __relocate_new_kernel_start <= SZ_4K,
         "kexec relocation code is bigger than 4 KiB")
  ASSERT(KEXEC_CONTROL_PAGE_SIZE >= SZ_4K, "KEXEC_CONTROL_PAGE_SIZE is broken")
+ ASSERT(__relocate_new_kernel_start == arm64_relocate_new_kernel,
+        "kexec control page does not start with arm64_relocate_new_kernel")
  #endif
diff --combined arch/arm64/mm/mmu.c
index d77c9f56b7b435d43f906445ecaef3554530c693,34d5f7c4c64e2f43c08441a0a5a029f5e2d2c4c9..6f9d8898a02516f6999f889b4fd23210aeaa0898
@@@ -133,7 -133,7 +133,7 @@@ static phys_addr_t __init early_pgtable
        return phys;
  }
  
static bool pgattr_change_is_safe(u64 old, u64 new)
+ bool pgattr_change_is_safe(u64 old, u64 new)
  {
        /*
         * The following mapping attributes may be updated in live
        pteval_t mask = PTE_PXN | PTE_RDONLY | PTE_WRITE | PTE_NG;
  
        /* creating or taking down mappings is always safe */
-       if (old == 0 || new == 0)
+       if (!pte_valid(__pte(old)) || !pte_valid(__pte(new)))
                return true;
  
+       /* A live entry's pfn should not change */
+       if (pte_pfn(__pte(old)) != pte_pfn(__pte(new)))
+               return false;
        /* live contiguous mappings may not be manipulated at all */
        if ((old | new) & PTE_CONT)
                return false;
@@@ -1630,24 -1634,3 +1634,24 @@@ static int __init prevent_bootmem_remov
  }
  early_initcall(prevent_bootmem_remove_init);
  #endif
 +
 +pte_t ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
 +{
 +      if (IS_ENABLED(CONFIG_ARM64_ERRATUM_2645198) &&
 +          cpus_have_const_cap(ARM64_WORKAROUND_2645198)) {
 +              /*
 +               * Break-before-make (BBM) is required for all user space mappings
 +               * when the permission changes from executable to non-executable
 +               * in cases where cpu is affected with errata #2645198.
 +               */
 +              if (pte_user_exec(READ_ONCE(*ptep)))
 +                      return ptep_clear_flush(vma, addr, ptep);
 +      }
 +      return ptep_get_and_clear(vma->vm_mm, addr, ptep);
 +}
 +
 +void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep,
 +                           pte_t old_pte, pte_t pte)
 +{
 +      set_pte_at(vma->vm_mm, addr, ptep, pte);
 +}
diff --combined arch/arm64/tools/cpucaps
index dfeb2c51e2573dc07d58be19e58cb89d568051ab,b98f71120588bbdf5f7523a8d683d49cac669a6e..10dcfa13390a856a9b7c07dc1afd0dde546b1598
@@@ -28,7 -28,9 +28,9 @@@ HAS_GENERIC_AUT
  HAS_GENERIC_AUTH_ARCH_QARMA3
  HAS_GENERIC_AUTH_ARCH_QARMA5
  HAS_GENERIC_AUTH_IMP_DEF
- HAS_IRQ_PRIO_MASKING
+ HAS_GIC_CPUIF_SYSREGS
+ HAS_GIC_PRIO_MASKING
+ HAS_GIC_PRIO_RELAXED_SYNC
  HAS_LDAPR
  HAS_LSE_ATOMICS
  HAS_NO_FPSIMD
@@@ -38,7 -40,6 +40,6 @@@ HAS_RAS_EXT
  HAS_RNG
  HAS_SB
  HAS_STAGE2_FWB
- HAS_SYSREG_GIC_CPUIF
  HAS_TIDCP1
  HAS_TLB_RANGE
  HAS_VIRT_HOST_EXTN
@@@ -50,6 -51,7 +51,7 @@@ MT
  MTE_ASYMM
  SME
  SME_FA64
+ SME2
  SPECTRE_V2
  SPECTRE_V3A
  SPECTRE_V4
@@@ -71,7 -73,6 +73,7 @@@ WORKAROUND_203892
  WORKAROUND_2064142
  WORKAROUND_2077057
  WORKAROUND_2457168
 +WORKAROUND_2645198
  WORKAROUND_2658417
  WORKAROUND_TRBE_OVERWRITE_FILL_MODE
  WORKAROUND_TSB_FLUSH_FAILURE
index 4501652e11ab642000e4fe26dd56603234ee3c34,f5da4fbccd860ab11b9bc817f2f8d41e8a12792d..3997702663727e30948d9083bad6e70e1498af02
@@@ -19,13 -19,10 +19,13 @@@ static bool system_needs_vamap(void
        const u8 *type1_family = efi_get_smbios_string(1, family);
  
        /*
 -       * Ampere Altra machines crash in SetTime() if SetVirtualAddressMap()
 -       * has not been called prior.
 +       * Ampere eMAG, Altra, and Altra Max machines crash in SetTime() if
 +       * SetVirtualAddressMap() has not been called prior.
         */
 -      if (!type1_family || strcmp(type1_family, "Altra"))
 +      if (!type1_family || (
 +          strcmp(type1_family, "eMAG") &&
 +          strcmp(type1_family, "Altra") &&
 +          strcmp(type1_family, "Altra Max")))
                return false;
  
        efi_warn("Working around broken SetVirtualAddressMap()\n");
@@@ -59,6 -56,12 +59,12 @@@ efi_status_t check_platform_features(vo
        return EFI_SUCCESS;
  }
  
+ #ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE
+ #define DCTYPE        "civac"
+ #else
+ #define DCTYPE        "cvau"
+ #endif
  void efi_cache_sync_image(unsigned long image_base,
                          unsigned long alloc_size,
                          unsigned long code_size)
        u64 lsize = 4 << cpuid_feature_extract_unsigned_field(ctr,
                                                CTR_EL0_DminLine_SHIFT);
  
-       do {
-               asm("dc civac, %0" :: "r"(image_base));
-               image_base += lsize;
-               alloc_size -= lsize;
-       } while (alloc_size >= lsize);
+       /* only perform the cache maintenance if needed for I/D coherency */
+       if (!(ctr & BIT(CTR_EL0_IDC_SHIFT))) {
+               do {
+                       asm("dc " DCTYPE ", %0" :: "r"(image_base));
+                       image_base += lsize;
+                       code_size -= lsize;
+               } while (code_size >= lsize);
+       }
  
        asm("ic ialluis");
        dsb(ish);
        isb();
  }
+ unsigned long __weak primary_entry_offset(void)
+ {
+       /*
+        * By default, we can invoke the kernel via the branch instruction in
+        * the image header, so offset #0. This will be overridden by the EFI
+        * stub build that is linked into the core kernel, as in that case, the
+        * image header may not have been loaded into memory, or may be mapped
+        * with non-executable permissions.
+        */
+        return 0;
+ }
+ void __noreturn efi_enter_kernel(unsigned long entrypoint,
+                                unsigned long fdt_addr,
+                                unsigned long fdt_size)
+ {
+       void (* __noreturn enter_kernel)(u64, u64, u64, u64);
+       enter_kernel = (void *)entrypoint + primary_entry_offset();
+       enter_kernel(fdt_addr, 0, 0, 0);
+ }
index bb57ab8bff6a391a2c88286f350b299d0017a9f4,3779836737c89aa0c70fe88347bf66eafc265970..fd134e1f481a294bc3b565930459083335ce7e87
@@@ -89,15 -89,6 +89,6 @@@ static DEFINE_STATIC_KEY_TRUE(supports_
   */
  static DEFINE_STATIC_KEY_FALSE(supports_pseudo_nmis);
  
- /*
-  * Global static key controlling whether an update to PMR allowing more
-  * interrupts requires to be propagated to the redistributor (DSB SY).
-  * And this needs to be exported for modules to be able to enable
-  * interrupts...
-  */
- DEFINE_STATIC_KEY_FALSE(gic_pmr_sync);
- EXPORT_SYMBOL(gic_pmr_sync);
  DEFINE_STATIC_KEY_FALSE(gic_nonsecure_priorities);
  EXPORT_SYMBOL(gic_nonsecure_priorities);
  
@@@ -1310,7 -1301,9 +1301,7 @@@ static void __init gic_smp_init(void
                                  gic_starting_cpu, NULL);
  
        /* Register all 8 non-secure SGIs */
 -      base_sgi = __irq_domain_alloc_irqs(gic_data.domain, -1, 8,
 -                                         NUMA_NO_NODE, &sgi_fwspec,
 -                                         false, NULL);
 +      base_sgi = irq_domain_alloc_irqs(gic_data.domain, 8, NUMA_NO_NODE, &sgi_fwspec);
        if (WARN_ON(base_sgi <= 0))
                return;
  
@@@ -1766,16 -1759,8 +1757,8 @@@ static void gic_enable_nmi_support(void
        for (i = 0; i < gic_data.ppi_nr; i++)
                refcount_set(&ppi_nmi_refs[i], 0);
  
-       /*
-        * Linux itself doesn't use 1:N distribution, so has no need to
-        * set PMHE. The only reason to have it set is if EL3 requires it
-        * (and we can't change it).
-        */
-       if (gic_read_ctlr() & ICC_CTLR_EL1_PMHE_MASK)
-               static_branch_enable(&gic_pmr_sync);
        pr_info("Pseudo-NMIs enabled using %s ICC_PMR_EL1 synchronisation\n",
-               static_branch_unlikely(&gic_pmr_sync) ? "forced" : "relaxed");
+               gic_has_relaxed_pmr_sync() ? "relaxed" : "forced");
  
        /*
         * How priority values are used by the GIC depends on two things:
index 4fa4d8ac76d9b234a3145d5044cc456251386eed,6ae697a3800d5c779c704ab3f0a1262398ab3028..95e3d2a71db64a998898219cc21e237144c84e0f
@@@ -54,7 -54,7 +54,7 @@@
  
  static void gic_check_cpu_features(void)
  {
-       WARN_TAINT_ONCE(this_cpu_has_cap(ARM64_HAS_SYSREG_GIC_CPUIF),
+       WARN_TAINT_ONCE(this_cpu_has_cap(ARM64_HAS_GIC_CPUIF_SYSREGS),
                        TAINT_CPU_OUT_OF_SPEC,
                        "GICv3 system registers enabled, broken firmware!\n");
  }
@@@ -868,7 -868,9 +868,7 @@@ static __init void gic_smp_init(void
                                  "irqchip/arm/gic:starting",
                                  gic_starting_cpu, NULL);
  
 -      base_sgi = __irq_domain_alloc_irqs(gic_data[0].domain, -1, 8,
 -                                         NUMA_NO_NODE, &sgi_fwspec,
 -                                         false, NULL);
 +      base_sgi = irq_domain_alloc_irqs(gic_data[0].domain, 8, NUMA_NO_NODE, &sgi_fwspec);
        if (WARN_ON(base_sgi <= 0))
                return;
  
diff --combined drivers/perf/arm-cmn.c
index 1deb61b22bc76767aaac41165a6426cbee8cf2f7,e220714954b09b2c0d1e56a0a3d34ef87c5ee9ee..c9689861be3fad8c1a09ebf536d0c2f91ed3a36a
@@@ -1576,6 -1576,7 +1576,6 @@@ static int arm_cmn_event_init(struct pe
                        hw->dn++;
                        continue;
                }
 -              hw->dtcs_used |= arm_cmn_node_to_xp(cmn, dn)->dtc;
                hw->num_dns++;
                if (bynodeid)
                        break;
                        nodeid, nid.x, nid.y, nid.port, nid.dev, type);
                return -EINVAL;
        }
 +      /*
 +       * Keep assuming non-cycles events count in all DTC domains; turns out
 +       * it's hard to make a worthwhile optimisation around this, short of
 +       * going all-in with domain-local counter allocation as well.
 +       */
 +      hw->dtcs_used = (1U << cmn->num_dtcs) - 1;
  
        return arm_cmn_validate_group(cmn, event);
  }
@@@ -1870,6 -1865,7 +1870,7 @@@ static void arm_cmn_init_dtm(struct arm
  
        dtm->base = xp->pmu_base + CMN_DTM_OFFSET(idx);
        dtm->pmu_config_low = CMN_DTM_PMU_CONFIG_PMU_EN;
+       writeq_relaxed(dtm->pmu_config_low, dtm->base + CMN_DTM_PMU_CONFIG);
        for (i = 0; i < 4; i++) {
                dtm->wp_event[i] = -1;
                writeq_relaxed(0, dtm->base + CMN_DTM_WPn_MASK(i));
index 56467f86a27cba237507780161ba706a397aca25,b83126452c65184a1ff939a1a74a6436e846518e..4a3bd114a24facea243d3c7cf1db815f55890ab7
  # define __assume_aligned(a, ...)
  #endif
  
- /*
-  *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-cold-function-attribute
-  *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Label-Attributes.html#index-cold-label-attribute
-  */
- #define __cold                          __attribute__((__cold__))
  /*
   * Note the long name.
   *
   *
   * clang: https://clang.llvm.org/docs/AttributeReference.html#pass-object-size-pass-dynamic-object-size
   */
 +#if __has_attribute(__pass_dynamic_object_size__)
 +# define __pass_dynamic_object_size(type)     __attribute__((__pass_dynamic_object_size__(type)))
 +#else
 +# define __pass_dynamic_object_size(type)
 +#endif
  #if __has_attribute(__pass_object_size__)
  # define __pass_object_size(type)     __attribute__((__pass_object_size__(type)))
  #else
index dea5bf5bd09c4dcf650effe0c6e3d71d8a2b63a4,aab34e30128e938872f6ff614c1db8a74fdf22c3..690c7c826fbfd738298f7596d59952c66167f61b
@@@ -79,6 -79,33 +79,33 @@@ static inline void __chk_io_ptr(const v
  /* Attributes */
  #include <linux/compiler_attributes.h>
  
+ #if CONFIG_FUNCTION_ALIGNMENT > 0
+ #define __function_aligned            __aligned(CONFIG_FUNCTION_ALIGNMENT)
+ #else
+ #define __function_aligned
+ #endif
+ /*
+  *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-cold-function-attribute
+  *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Label-Attributes.html#index-cold-label-attribute
+  *
+  * When -falign-functions=N is in use, we must avoid the cold attribute as
+  * contemporary versions of GCC drop the alignment for cold functions. Worse,
+  * GCC can implicitly mark callees of cold functions as cold themselves, so
+  * it's not sufficient to add __function_aligned here as that will not ensure
+  * that callees are correctly aligned.
+  *
+  * See:
+  *
+  *   https://lore.kernel.org/lkml/Y77%2FqVgvaJidFpYt@FVFF77S0Q05N
+  *   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88345#c9
+  */
+ #if !defined(CONFIG_CC_IS_GCC) || (CONFIG_FUNCTION_ALIGNMENT == 0)
+ #define __cold                                __attribute__((__cold__))
+ #else
+ #define __cold
+ #endif
  /* Builtins */
  
  /*
@@@ -232,25 -259,11 +259,25 @@@ struct ftrace_likely_data 
  #endif
  
  /* Section for code which can't be instrumented at all */
 -#define noinstr                                                               \
 -      noinline notrace __attribute((__section__(".noinstr.text")))    \
 +#define __noinstr_section(section)                                    \
 +      noinline notrace __attribute((__section__(section)))            \
        __no_kcsan __no_sanitize_address __no_profile __no_sanitize_coverage \
        __no_sanitize_memory
  
 +#define noinstr __noinstr_section(".noinstr.text")
 +
 +/*
 + * The __cpuidle section is used twofold:
 + *
 + *  1) the original use -- identifying if a CPU is 'stuck' in idle state based
 + *     on it's instruction pointer. See cpu_in_idle().
 + *
 + *  2) supressing instrumentation around where cpuidle disables RCU; where the
 + *     function isn't strictly required for #1, this is interchangeable with
 + *     noinstr.
 + */
 +#define __cpuidle __noinstr_section(".cpuidle.text")
 +
  #endif /* __KERNEL__ */
  
  #endif /* __ASSEMBLY__ */
diff --combined kernel/exit.c
index bccfa4218356e7ec91190201ebecb6f83dbea6be,c8e0375705f48a9c6d80d30bf79e20f71778c0ce..f2afdb0add7c5173956940c2a49342807c6407e7
@@@ -807,8 -807,6 +807,8 @@@ void __noreturn do_exit(long code
        struct task_struct *tsk = current;
        int group_dead;
  
 +      WARN_ON(irqs_disabled());
 +
        synchronize_group_exit(tsk, code);
  
        WARN_ON(tsk->plug);
@@@ -940,11 -938,6 +940,11 @@@ void __noreturn make_task_dead(int sign
        if (unlikely(!tsk->pid))
                panic("Attempted to kill the idle task!");
  
 +      if (unlikely(irqs_disabled())) {
 +              pr_info("note: %s[%d] exited with irqs disabled\n",
 +                      current->comm, task_pid_nr(current));
 +              local_irq_enable();
 +      }
        if (unlikely(in_atomic())) {
                pr_info("note: %s[%d] exited with preempt_count %d\n",
                        current->comm, task_pid_nr(current),
@@@ -1905,7 -1898,14 +1905,14 @@@ bool thread_group_exited(struct pid *pi
  }
  EXPORT_SYMBOL(thread_group_exited);
  
- __weak void abort(void)
+ /*
+  * This needs to be __function_aligned as GCC implicitly makes any
+  * implementation of abort() cold and drops alignment specified by
+  * -falign-functions=N.
+  *
+  * See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88345#c11
+  */
+ __weak __function_aligned void abort(void)
  {
        BUG();
  
diff --combined kernel/trace/Kconfig
index d7043043f59c8d2d31ce81794e6279b48ea7aaf6,5df427a2321df77116212c383e7f48c0d8589573..caf32389faf35c526f728616cb1ed974588ef90b
@@@ -42,6 -42,9 +42,9 @@@ config HAVE_DYNAMIC_FTRACE_WITH_REG
  config HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
        bool
  
+ config HAVE_DYNAMIC_FTRACE_WITH_CALL_OPS
+       bool
  config HAVE_DYNAMIC_FTRACE_WITH_ARGS
        bool
        help
@@@ -257,6 -260,10 +260,10 @@@ config DYNAMIC_FTRACE_WITH_DIRECT_CALL
        depends on DYNAMIC_FTRACE_WITH_REGS
        depends on HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
  
+ config DYNAMIC_FTRACE_WITH_CALL_OPS
+       def_bool y
+       depends on HAVE_DYNAMIC_FTRACE_WITH_CALL_OPS
  config DYNAMIC_FTRACE_WITH_ARGS
        def_bool y
        depends on DYNAMIC_FTRACE
@@@ -933,8 -940,8 +940,8 @@@ config RING_BUFFER_RECORD_RECURSIO
        default y
        help
          The ring buffer has its own internal recursion. Although when
 -        recursion happens it wont cause harm because of the protection,
 -        but it does cause an unwanted overhead. Enabling this option will
 +        recursion happens it won't cause harm because of the protection,
 +        but it does cause unwanted overhead. Enabling this option will
          place where recursion was detected into the ftrace "recursed_functions"
          file.
  
@@@ -1017,8 -1024,8 +1024,8 @@@ config RING_BUFFER_STARTUP_TES
         The test runs for 10 seconds. This will slow your boot time
         by at least 10 more seconds.
  
 -       At the end of the test, statics and more checks are done.
 -       It will output the stats of each per cpu buffer. What
 +       At the end of the test, statistics and more checks are done.
 +       It will output the stats of each per cpu buffer: What
         was written, the sizes, what was read, what was lost, and
         other similar details.
  
diff --combined kernel/trace/ftrace.c
index 750aa3f08b25ae8b75da06113b8200c4b873ebd4,e634b80f49d17fcfd26712ecf703cc5d2c1b8f11..51896b610414fa2b23fd53e9453b1ec8da8895c4
@@@ -125,6 -125,33 +125,33 @@@ struct ftrace_ops global_ops
  void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
                          struct ftrace_ops *op, struct ftrace_regs *fregs);
  
+ #ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS
+ /*
+  * Stub used to invoke the list ops without requiring a separate trampoline.
+  */
+ const struct ftrace_ops ftrace_list_ops = {
+       .func   = ftrace_ops_list_func,
+       .flags  = FTRACE_OPS_FL_STUB,
+ };
+ static void ftrace_ops_nop_func(unsigned long ip, unsigned long parent_ip,
+                               struct ftrace_ops *op,
+                               struct ftrace_regs *fregs)
+ {
+       /* do nothing */
+ }
+ /*
+  * Stub used when a call site is disabled. May be called transiently by threads
+  * which have made it into ftrace_caller but haven't yet recovered the ops at
+  * the point the call site is disabled.
+  */
+ const struct ftrace_ops ftrace_nop_ops = {
+       .func   = ftrace_ops_nop_func,
+       .flags  = FTRACE_OPS_FL_STUB,
+ };
+ #endif
  static inline void ftrace_ops_init(struct ftrace_ops *ops)
  {
  #ifdef CONFIG_DYNAMIC_FTRACE
@@@ -1248,17 -1275,12 +1275,17 @@@ static void free_ftrace_hash_rcu(struc
        call_rcu(&hash->rcu, __free_ftrace_hash_rcu);
  }
  
 +/**
 + * ftrace_free_filter - remove all filters for an ftrace_ops
 + * @ops - the ops to remove the filters from
 + */
  void ftrace_free_filter(struct ftrace_ops *ops)
  {
        ftrace_ops_init(ops);
        free_ftrace_hash(ops->func_hash->filter_hash);
        free_ftrace_hash(ops->func_hash->notrace_hash);
  }
 +EXPORT_SYMBOL_GPL(ftrace_free_filter);
  
  static struct ftrace_hash *alloc_ftrace_hash(int size_bits)
  {
@@@ -1819,6 -1841,18 +1846,18 @@@ static bool __ftrace_hash_rec_update(st
                         * if rec count is zero.
                         */
                }
+               /*
+                * If the rec has a single associated ops, and ops->func can be
+                * called directly, allow the call site to call via the ops.
+                */
+               if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS) &&
+                   ftrace_rec_count(rec) == 1 &&
+                   ftrace_ops_get_func(ops) == ops->func)
+                       rec->flags |= FTRACE_FL_CALL_OPS;
+               else
+                       rec->flags &= ~FTRACE_FL_CALL_OPS;
                count++;
  
                /* Must match FTRACE_UPDATE_CALLS in ftrace_modify_all_code() */
@@@ -2113,8 -2147,9 +2152,9 @@@ void ftrace_bug(int failed, struct dyn_
                struct ftrace_ops *ops = NULL;
  
                pr_info("ftrace record flags: %lx\n", rec->flags);
-               pr_cont(" (%ld)%s", ftrace_rec_count(rec),
-                       rec->flags & FTRACE_FL_REGS ? " R" : "  ");
+               pr_cont(" (%ld)%s%s", ftrace_rec_count(rec),
+                       rec->flags & FTRACE_FL_REGS ? " R" : "  ",
+                       rec->flags & FTRACE_FL_CALL_OPS ? " O" : "  ");
                if (rec->flags & FTRACE_FL_TRAMP_EN) {
                        ops = ftrace_find_tramp_ops_any(rec);
                        if (ops) {
@@@ -2182,6 -2217,7 +2222,7 @@@ static int ftrace_check_record(struct d
                 * want the direct enabled (it will be done via the
                 * direct helper). But if DIRECT_EN is set, and
                 * the count is not one, we need to clear it.
+                *
                 */
                if (ftrace_rec_count(rec) == 1) {
                        if (!(rec->flags & FTRACE_FL_DIRECT) !=
                } else if (rec->flags & FTRACE_FL_DIRECT_EN) {
                        flag |= FTRACE_FL_DIRECT;
                }
+               /*
+                * Ops calls are special, as count matters.
+                * As with direct calls, they must only be enabled when count
+                * is one, otherwise they'll be handled via the list ops.
+                */
+               if (ftrace_rec_count(rec) == 1) {
+                       if (!(rec->flags & FTRACE_FL_CALL_OPS) !=
+                           !(rec->flags & FTRACE_FL_CALL_OPS_EN))
+                               flag |= FTRACE_FL_CALL_OPS;
+               } else if (rec->flags & FTRACE_FL_CALL_OPS_EN) {
+                       flag |= FTRACE_FL_CALL_OPS;
+               }
        }
  
        /* If the state of this record hasn't changed, then do nothing */
                                        rec->flags &= ~FTRACE_FL_DIRECT_EN;
                                }
                        }
+                       if (flag & FTRACE_FL_CALL_OPS) {
+                               if (ftrace_rec_count(rec) == 1) {
+                                       if (rec->flags & FTRACE_FL_CALL_OPS)
+                                               rec->flags |= FTRACE_FL_CALL_OPS_EN;
+                                       else
+                                               rec->flags &= ~FTRACE_FL_CALL_OPS_EN;
+                               } else {
+                                       /*
+                                        * Can only call directly if there's
+                                        * only one set of associated ops.
+                                        */
+                                       rec->flags &= ~FTRACE_FL_CALL_OPS_EN;
+                               }
+                       }
                }
  
                /*
                         * and REGS states. The _EN flags must be disabled though.
                         */
                        rec->flags &= ~(FTRACE_FL_ENABLED | FTRACE_FL_TRAMP_EN |
-                                       FTRACE_FL_REGS_EN | FTRACE_FL_DIRECT_EN);
+                                       FTRACE_FL_REGS_EN | FTRACE_FL_DIRECT_EN |
+                                       FTRACE_FL_CALL_OPS_EN);
        }
  
        ftrace_bug_type = FTRACE_BUG_NOP;
@@@ -2436,6 -2501,25 +2506,25 @@@ ftrace_find_tramp_ops_new(struct dyn_ft
        return NULL;
  }
  
+ struct ftrace_ops *
+ ftrace_find_unique_ops(struct dyn_ftrace *rec)
+ {
+       struct ftrace_ops *op, *found = NULL;
+       unsigned long ip = rec->ip;
+       do_for_each_ftrace_op(op, ftrace_ops_list) {
+               if (hash_contains_ip(ip, op->func_hash)) {
+                       if (found)
+                               return NULL;
+                       found = op;
+               }
+       } while_for_each_ftrace_op(op);
+       return found;
+ }
  #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
  /* Protected by rcu_tasks for reading, and direct_mutex for writing */
  static struct ftrace_hash *direct_functions = EMPTY_HASH;
@@@ -3785,11 -3869,12 +3874,12 @@@ static int t_show(struct seq_file *m, v
        if (iter->flags & FTRACE_ITER_ENABLED) {
                struct ftrace_ops *ops;
  
-               seq_printf(m, " (%ld)%s%s%s",
+               seq_printf(m, " (%ld)%s%s%s%s",
                           ftrace_rec_count(rec),
                           rec->flags & FTRACE_FL_REGS ? " R" : "  ",
                           rec->flags & FTRACE_FL_IPMODIFY ? " I" : "  ",
-                          rec->flags & FTRACE_FL_DIRECT ? " D" : "  ");
+                          rec->flags & FTRACE_FL_DIRECT ? " D" : "  ",
+                          rec->flags & FTRACE_FL_CALL_OPS ? " O" : "  ");
                if (rec->flags & FTRACE_FL_TRAMP_EN) {
                        ops = ftrace_find_tramp_ops_any(rec);
                        if (ops) {
                } else {
                        add_trampoline_func(m, NULL, rec);
                }
+               if (rec->flags & FTRACE_FL_CALL_OPS_EN) {
+                       ops = ftrace_find_unique_ops(rec);
+                       if (ops) {
+                               seq_printf(m, "\tops: %pS (%pS)",
+                                          ops, ops->func);
+                       } else {
+                               seq_puts(m, "\tops: ERROR!");
+                       }
+               }
                if (rec->flags & FTRACE_FL_DIRECT) {
                        unsigned long direct;
  
@@@ -5844,10 -5938,6 +5943,10 @@@ EXPORT_SYMBOL_GPL(modify_ftrace_direct_
   *
   * Filters denote which functions should be enabled when tracing is enabled
   * If @ip is NULL, it fails to update filter.
 + *
 + * This can allocate memory which must be freed before @ops can be freed,
 + * either by removing each filtered addr or by using
 + * ftrace_free_filter(@ops).
   */
  int ftrace_set_filter_ip(struct ftrace_ops *ops, unsigned long ip,
                         int remove, int reset)
@@@ -5867,11 -5957,7 +5966,11 @@@ EXPORT_SYMBOL_GPL(ftrace_set_filter_ip)
   *
   * Filters denote which functions should be enabled when tracing is enabled
   * If @ips array or any ip specified within is NULL , it fails to update filter.
 - */
 + *
 + * This can allocate memory which must be freed before @ops can be freed,
 + * either by removing each filtered addr or by using
 + * ftrace_free_filter(@ops).
 +*/
  int ftrace_set_filter_ips(struct ftrace_ops *ops, unsigned long *ips,
                          unsigned int cnt, int remove, int reset)
  {
@@@ -5913,10 -5999,6 +6012,10 @@@ ftrace_set_regex(struct ftrace_ops *ops
   *
   * Filters denote which functions should be enabled when tracing is enabled.
   * If @buf is NULL and reset is set, all functions will be enabled for tracing.
 + *
 + * This can allocate memory which must be freed before @ops can be freed,
 + * either by removing each filtered addr or by using
 + * ftrace_free_filter(@ops).
   */
  int ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf,
                       int len, int reset)
@@@ -5936,10 -6018,6 +6035,10 @@@ EXPORT_SYMBOL_GPL(ftrace_set_filter)
   * Notrace Filters denote which functions should not be enabled when tracing
   * is enabled. If @buf is NULL and reset is set, all functions will be enabled
   * for tracing.
 + *
 + * This can allocate memory which must be freed before @ops can be freed,
 + * either by removing each filtered addr or by using
 + * ftrace_free_filter(@ops).
   */
  int ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf,
                        int len, int reset)
This page took 0.187378 seconds and 4 git commands to generate.