From: Linus Torvalds Date: Thu, 31 Aug 2023 19:20:12 +0000 (-0700) Subject: Merge tag 'x86_shstk_for_6.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git... X-Git-Tag: v6.6-rc1~117 X-Git-Url: https://repo.jachan.dev/linux.git/commitdiff_plain/df57721f9a63e8a1fb9b9b2e70de4aa4c7e0cd2e Merge tag 'x86_shstk_for_6.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull x86 shadow stack support from Dave Hansen: "This is the long awaited x86 shadow stack support, part of Intel's Control-flow Enforcement Technology (CET). CET consists of two related security features: shadow stacks and indirect branch tracking. This series implements just the shadow stack part of this feature, and just for userspace. The main use case for shadow stack is providing protection against return oriented programming attacks. It works by maintaining a secondary (shadow) stack using a special memory type that has protections against modification. When executing a CALL instruction, the processor pushes the return address to both the normal stack and to the special permission shadow stack. Upon RET, the processor pops the shadow stack copy and compares it to the normal stack copy. For more information, refer to the links below for the earlier versions of this patch set" Link: https://lore.kernel.org/lkml/20220130211838.8382-1-rick.p.edgecombe@intel.com/ Link: https://lore.kernel.org/lkml/20230613001108.3040476-1-rick.p.edgecombe@intel.com/ * tag 'x86_shstk_for_6.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (47 commits) x86/shstk: Change order of __user in type x86/ibt: Convert IBT selftest to asm x86/shstk: Don't retry vm_munmap() on -EINTR x86/kbuild: Fix Documentation/ reference x86/shstk: Move arch detail comment out of core mm x86/shstk: Add ARCH_SHSTK_STATUS x86/shstk: Add ARCH_SHSTK_UNLOCK x86: Add PTRACE interface for shadow stack selftests/x86: Add shadow stack test x86/cpufeatures: Enable CET CR4 bit for shadow stack x86/shstk: Wire in shadow stack interface x86: Expose thread features in /proc/$PID/status x86/shstk: Support WRSS for userspace x86/shstk: Introduce map_shadow_stack syscall x86/shstk: Check that signal frame is shadow stack mem x86/shstk: Check that SSP is aligned on sigreturn x86/shstk: Handle signals for shadow stack x86/shstk: Introduce routines modifying shstk x86/shstk: Handle thread shadow stack x86/shstk: Add user-mode shadow stack support ... --- df57721f9a63e8a1fb9b9b2e70de4aa4c7e0cd2e diff --cc arch/arm64/mm/trans_pgd.c index e9ad391fc8ea,a01493f3a06f..7b14df3c6477 --- a/arch/arm64/mm/trans_pgd.c +++ b/arch/arm64/mm/trans_pgd.c @@@ -41,9 -40,8 +41,9 @@@ static void _copy_pte(pte_t *dst_ptep, * read only (code, rodata). Clear the RDONLY bit from * the temporary mappings we use during restore. */ - set_pte(dst_ptep, pte_mkwrite(pte)); + set_pte(dst_ptep, pte_mkwrite_novma(pte)); - } else if (debug_pagealloc_enabled() && !pte_none(pte)) { + } else if ((debug_pagealloc_enabled() || + is_kfence_address((void *)addr)) && !pte_none(pte)) { /* * debug_pagealloc will removed the PTE_VALID bit if * the page isn't in use by the resume kernel. It may have diff --cc arch/s390/Kconfig index 661b6de69c27,91514d535c46..ae29e4392664 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@@ -127,7 -127,8 +127,8 @@@ config S39 select ARCH_WANTS_NO_INSTR select ARCH_WANT_DEFAULT_BPF_JIT select ARCH_WANT_IPC_PARSE_VERSION - select ARCH_WANT_OPTIMIZE_VMEMMAP + select ARCH_WANT_KERNEL_PMD_MKWRITE + select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP select BUILDTIME_TABLE_SORT select CLONE_BACKWARDS2 select DMA_OPS if PCI diff --cc arch/x86/entry/syscalls/syscall_64.tbl index 814768249eae,38db4b1c291a..1d6eee30eceb --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@@ -373,7 -373,7 +373,8 @@@ 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat -452 64 map_shadow_stack sys_map_shadow_stack +452 common fchmodat2 sys_fchmodat2 ++453 64 map_shadow_stack sys_map_shadow_stack # # Due to a historical design error, certain syscalls are numbered differently diff --cc arch/x86/include/asm/cpufeatures.h index 7b4ecbf78d8b,d7215c8b7923..2061ed1c398f --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@@ -307,11 -308,8 +307,12 @@@ #define X86_FEATURE_MSR_TSX_CTRL (11*32+20) /* "" MSR IA32_TSX_CTRL (Intel) implemented */ #define X86_FEATURE_SMBA (11*32+21) /* "" Slow Memory Bandwidth Allocation */ #define X86_FEATURE_BMEC (11*32+22) /* "" Bandwidth Monitoring Event Configuration */ + #define X86_FEATURE_USER_SHSTK (11*32+23) /* Shadow stack support for user mode applications */ +#define X86_FEATURE_SRSO (11*32+24) /* "" AMD BTB untrain RETs */ +#define X86_FEATURE_SRSO_ALIAS (11*32+25) /* "" AMD BTB untrain RETs through aliasing */ +#define X86_FEATURE_IBPB_ON_VMEXIT (11*32+26) /* "" Issue an IBPB only on VMEXIT */ + /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ diff --cc arch/x86/include/asm/pgtable_types.h index a6deb67cfbb2,002f19e4e1f5..0b748ee16b3d --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@@ -125,12 -145,11 +145,12 @@@ * instance, and is *not* included in this mask since * pte_modify() does modify it. */ - #define _COMMON_PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \ - _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY |\ - _PAGE_SOFT_DIRTY | _PAGE_DEVMAP | _PAGE_ENC | \ - _PAGE_UFFD_WP) -#define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \ - _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY_BITS | \ - _PAGE_SOFT_DIRTY | _PAGE_DEVMAP | _PAGE_ENC | \ - _PAGE_UFFD_WP) -#define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE) ++#define _COMMON_PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \ ++ _PAGE_SPECIAL | _PAGE_ACCESSED | \ ++ _PAGE_DIRTY_BITS | _PAGE_SOFT_DIRTY | \ ++ _PAGE_DEVMAP | _PAGE_ENC | _PAGE_UFFD_WP) +#define _PAGE_CHG_MASK (_COMMON_PAGE_CHG_MASK | _PAGE_PAT) +#define _HPAGE_CHG_MASK (_COMMON_PAGE_CHG_MASK | _PAGE_PSE | _PAGE_PAT_LARGE) /* * The cache modes defined here are used to translate between pure SW usage diff --cc mm/memory.c index 405a483d2fd1,36289f33327e..6c264d2f969c --- a/mm/memory.c +++ b/mm/memory.c @@@ -4842,9 -4809,9 +4843,9 @@@ out_map pte = pte_modify(old_pte, vma->vm_page_prot); pte = pte_mkyoung(pte); if (writable) - pte = pte_mkwrite(pte); + pte = pte_mkwrite(pte, vma); ptep_modify_prot_commit(vma, vmf->address, vmf->pte, old_pte, pte); - update_mmu_cache(vma, vmf->address, vmf->pte); + update_mmu_cache_range(vmf, vma, vmf->address, vmf->pte, 1); pte_unmap_unlock(vmf->pte, vmf->ptl); goto out; } diff --cc mm/mmap.c index 514ced13c65c,11dcf50cb933..b56a7f0c9f85 --- a/mm/mmap.c +++ b/mm/mmap.c @@@ -1182,13 -1189,14 +1182,13 @@@ static inline bool file_mmap_ok(struct */ unsigned long do_mmap(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, - unsigned long flags, unsigned long pgoff, - unsigned long *populate, struct list_head *uf) + unsigned long flags, vm_flags_t vm_flags, + unsigned long pgoff, unsigned long *populate, + struct list_head *uf) { struct mm_struct *mm = current->mm; - vm_flags_t vm_flags; int pkey = 0; - validate_mm(mm); *populate = 0; if (!len)