From: Linus Torvalds Date: Fri, 31 Jan 2025 18:39:07 +0000 (-0800) Subject: Merge tag 'x86-mm-2025-01-31' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip X-Git-Tag: v6.14-rc1~20 X-Git-Url: https://repo.jachan.dev/linux.git/commitdiff_plain/c545cd3276cd611a5d53ac5e18e98a37f013694c?hp=-c Merge tag 'x86-mm-2025-01-31' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull x86 mm updates from Ingo Molnar: - The biggest changes are the TLB flushing scalability optimizations, to update the mm_cpumask lazily and related changes. This feature has both a track record and a continued risk of performance regressions, so it was already delayed by a cycle - but it's all 100% perfect now™ (Rik van Riel) - Also miscellaneous fixes and cleanups. (Gautam Somani, Kirill Shutemov, Sebastian Andrzej Siewior) * tag 'x86-mm-2025-01-31' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/mm: Remove unnecessary include of x86/mtrr: Rename mtrr_overwrite_state() to guest_force_mtrr_state() x86/mm/selftests: Fix typo in lam.c x86/mm/tlb: Only trim the mm_cpumask once a second x86/mm/tlb: Also remove local CPU from mm_cpumask if stale x86/mm/tlb: Add tracepoint for TLB flush IPI to stale CPU x86/mm/tlb: Update mm_cpumask lazily --- c545cd3276cd611a5d53ac5e18e98a37f013694c diff --combined arch/x86/mm/fault.c index ac52255fab01,ef12ff3db903..296d294142c8 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@@ -7,7 -7,6 +7,6 @@@ #include /* test_thread_flag(), ... */ #include /* task_stack_*(), ... */ #include /* oops_begin/end, ... */ - #include /* search_exception_tables */ #include /* max_low_pfn */ #include /* kfence_handle_page_fault */ #include /* NOKPROBE_SYMBOL, ... */ @@@ -678,7 -677,7 +677,7 @@@ page_fault_oops(struct pt_regs *regs, u ASM_CALL_ARG3, , [arg1] "r" (regs), [arg2] "r" (address), [arg3] "r" (&info)); - unreachable(); + BUG(); } #endif diff --combined include/linux/mm_types.h index 5f1b2dc788e2,1371893e44ca..6b27db7f9496 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@@ -438,16 -438,13 +438,16 @@@ FOLIO_MATCH(compound_head, _head_2a) * struct ptdesc - Memory descriptor for page tables. * @__page_flags: Same as page flags. Powerpc only. * @pt_rcu_head: For freeing page table pages. - * @pt_list: List of used page tables. Used for s390 and x86. + * @pt_list: List of used page tables. Used for s390 gmap shadow pages + * (which are not linked into the user page tables) and x86 + * pgds. * @_pt_pad_1: Padding that aliases with page's compound head. * @pmd_huge_pte: Protected by ptdesc->ptl, used for THPs. * @__page_mapping: Aliases with page->mapping. Unused for page tables. * @pt_index: Used for s390 gmap. * @pt_mm: Used for x86 pgds. * @pt_frag_refcount: For fragmented page table tracking. Powerpc only. + * @pt_share_count: Used for HugeTLB PMD page table share count. * @_pt_pad_2: Padding to ensure proper alignment. * @ptl: Lock for the page table. * @__page_type: Same as page->page_type. Unused for page tables. @@@ -474,9 -471,6 +474,9 @@@ struct ptdesc pgoff_t pt_index; struct mm_struct *pt_mm; atomic_t pt_frag_refcount; +#ifdef CONFIG_HUGETLB_PMD_PAGE_TABLE_SHARING + atomic_t pt_share_count; +#endif }; union { @@@ -522,32 -516,6 +522,32 @@@ static_assert(sizeof(struct ptdesc) <= const struct page *: (const struct ptdesc *)(p), \ struct page *: (struct ptdesc *)(p))) +#ifdef CONFIG_HUGETLB_PMD_PAGE_TABLE_SHARING +static inline void ptdesc_pmd_pts_init(struct ptdesc *ptdesc) +{ + atomic_set(&ptdesc->pt_share_count, 0); +} + +static inline void ptdesc_pmd_pts_inc(struct ptdesc *ptdesc) +{ + atomic_inc(&ptdesc->pt_share_count); +} + +static inline void ptdesc_pmd_pts_dec(struct ptdesc *ptdesc) +{ + atomic_dec(&ptdesc->pt_share_count); +} + +static inline int ptdesc_pmd_pts_count(struct ptdesc *ptdesc) +{ + return atomic_read(&ptdesc->pt_share_count); +} +#else +static inline void ptdesc_pmd_pts_init(struct ptdesc *ptdesc) +{ +} +#endif + /* * Used for sizing the vmemmap region on some architectures */ @@@ -729,7 -697,7 +729,7 @@@ struct vm_area_struct * counter reuse can only lead to occasional unnecessary use of the * slowpath. */ - int vm_lock_seq; + unsigned int vm_lock_seq; /* Unstable RCU readers are allowed to read this. */ struct vma_lock *vm_lock; #endif @@@ -923,9 -891,6 +923,9 @@@ struct mm_struct * Roughly speaking, incrementing the sequence number is * equivalent to releasing locks on VMAs; reading the sequence * number can be part of taking a read lock on a VMA. + * Incremented every time mmap_lock is write-locked/unlocked. + * Initialized to 0, therefore odd values indicate mmap_lock + * is write-locked and even values that it's released. * * Can be modified under write mmap_lock using RELEASE * semantics. @@@ -934,7 -899,7 +934,7 @@@ * Can be read with ACQUIRE semantics if not holding write * mmap_lock. */ - int mm_lock_seq; + seqcount_t mm_lock_seq; #endif @@@ -1406,6 -1371,7 +1406,7 @@@ enum tlb_flush_reason TLB_LOCAL_SHOOTDOWN, TLB_LOCAL_MM_SHOOTDOWN, TLB_REMOTE_SEND_IPI, + TLB_REMOTE_WRONG_CPU, NR_TLB_FLUSH_REASONS, };