From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 31 Jan 2025 18:39:07 +0000 (-0800)
Subject: Merge tag 'x86-mm-2025-01-31' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
X-Git-Tag: v6.14-rc1~20
X-Git-Url: https://repo.jachan.dev/linux.git/commitdiff_plain/c545cd3276cd611a5d53ac5e18e98a37f013694c?hp=-c

Merge tag 'x86-mm-2025-01-31' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 mm updates from Ingo Molnar:

 - The biggest changes are the TLB flushing scalability optimizations,
   to update the mm_cpumask lazily and related changes.

   This feature has both a track record and a continued risk of
   performance regressions, so it was already delayed by a cycle - but
   it's all 100% perfect now™ (Rik van Riel)

 - Also miscellaneous fixes and cleanups. (Gautam Somani, Kirill
   Shutemov, Sebastian Andrzej Siewior)

* tag 'x86-mm-2025-01-31' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/mm: Remove unnecessary include of <linux/extable.h>
  x86/mtrr: Rename mtrr_overwrite_state() to guest_force_mtrr_state()
  x86/mm/selftests: Fix typo in lam.c
  x86/mm/tlb: Only trim the mm_cpumask once a second
  x86/mm/tlb: Also remove local CPU from mm_cpumask if stale
  x86/mm/tlb: Add tracepoint for TLB flush IPI to stale CPU
  x86/mm/tlb: Update mm_cpumask lazily
---

c545cd3276cd611a5d53ac5e18e98a37f013694c
diff --combined arch/x86/mm/fault.c
index ac52255fab01,ef12ff3db903..296d294142c8
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@@ -7,7 -7,6 +7,6 @@@
  #include <linux/sched.h>		/* test_thread_flag(), ...	*/
  #include <linux/sched/task_stack.h>	/* task_stack_*(), ...		*/
  #include <linux/kdebug.h>		/* oops_begin/end, ...		*/
- #include <linux/extable.h>		/* search_exception_tables	*/
  #include <linux/memblock.h>		/* max_low_pfn			*/
  #include <linux/kfence.h>		/* kfence_handle_page_fault	*/
  #include <linux/kprobes.h>		/* NOKPROBE_SYMBOL, ...		*/
@@@ -678,7 -677,7 +677,7 @@@ page_fault_oops(struct pt_regs *regs, u
  			      ASM_CALL_ARG3,
  			      , [arg1] "r" (regs), [arg2] "r" (address), [arg3] "r" (&info));
  
 -		unreachable();
 +		BUG();
  	}
  #endif
  
diff --combined include/linux/mm_types.h
index 5f1b2dc788e2,1371893e44ca..6b27db7f9496
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@@ -438,16 -438,13 +438,16 @@@ FOLIO_MATCH(compound_head, _head_2a)
   * struct ptdesc -    Memory descriptor for page tables.
   * @__page_flags:     Same as page flags. Powerpc only.
   * @pt_rcu_head:      For freeing page table pages.
 - * @pt_list:          List of used page tables. Used for s390 and x86.
 + * @pt_list:          List of used page tables. Used for s390 gmap shadow pages
 + *                    (which are not linked into the user page tables) and x86
 + *                    pgds.
   * @_pt_pad_1:        Padding that aliases with page's compound head.
   * @pmd_huge_pte:     Protected by ptdesc->ptl, used for THPs.
   * @__page_mapping:   Aliases with page->mapping. Unused for page tables.
   * @pt_index:         Used for s390 gmap.
   * @pt_mm:            Used for x86 pgds.
   * @pt_frag_refcount: For fragmented page table tracking. Powerpc only.
 + * @pt_share_count:   Used for HugeTLB PMD page table share count.
   * @_pt_pad_2:        Padding to ensure proper alignment.
   * @ptl:              Lock for the page table.
   * @__page_type:      Same as page->page_type. Unused for page tables.
@@@ -474,9 -471,6 +474,9 @@@ struct ptdesc 
  		pgoff_t pt_index;
  		struct mm_struct *pt_mm;
  		atomic_t pt_frag_refcount;
 +#ifdef CONFIG_HUGETLB_PMD_PAGE_TABLE_SHARING
 +		atomic_t pt_share_count;
 +#endif
  	};
  
  	union {
@@@ -522,32 -516,6 +522,32 @@@ static_assert(sizeof(struct ptdesc) <= 
  	const struct page *:		(const struct ptdesc *)(p),	\
  	struct page *:			(struct ptdesc *)(p)))
  
 +#ifdef CONFIG_HUGETLB_PMD_PAGE_TABLE_SHARING
 +static inline void ptdesc_pmd_pts_init(struct ptdesc *ptdesc)
 +{
 +	atomic_set(&ptdesc->pt_share_count, 0);
 +}
 +
 +static inline void ptdesc_pmd_pts_inc(struct ptdesc *ptdesc)
 +{
 +	atomic_inc(&ptdesc->pt_share_count);
 +}
 +
 +static inline void ptdesc_pmd_pts_dec(struct ptdesc *ptdesc)
 +{
 +	atomic_dec(&ptdesc->pt_share_count);
 +}
 +
 +static inline int ptdesc_pmd_pts_count(struct ptdesc *ptdesc)
 +{
 +	return atomic_read(&ptdesc->pt_share_count);
 +}
 +#else
 +static inline void ptdesc_pmd_pts_init(struct ptdesc *ptdesc)
 +{
 +}
 +#endif
 +
  /*
   * Used for sizing the vmemmap region on some architectures
   */
@@@ -729,7 -697,7 +729,7 @@@ struct vm_area_struct 
  	 * counter reuse can only lead to occasional unnecessary use of the
  	 * slowpath.
  	 */
 -	int vm_lock_seq;
 +	unsigned int vm_lock_seq;
  	/* Unstable RCU readers are allowed to read this. */
  	struct vma_lock *vm_lock;
  #endif
@@@ -923,9 -891,6 +923,9 @@@ struct mm_struct 
  		 * Roughly speaking, incrementing the sequence number is
  		 * equivalent to releasing locks on VMAs; reading the sequence
  		 * number can be part of taking a read lock on a VMA.
 +		 * Incremented every time mmap_lock is write-locked/unlocked.
 +		 * Initialized to 0, therefore odd values indicate mmap_lock
 +		 * is write-locked and even values that it's released.
  		 *
  		 * Can be modified under write mmap_lock using RELEASE
  		 * semantics.
@@@ -934,7 -899,7 +934,7 @@@
  		 * Can be read with ACQUIRE semantics if not holding write
  		 * mmap_lock.
  		 */
 -		int mm_lock_seq;
 +		seqcount_t mm_lock_seq;
  #endif
  
  
@@@ -1406,6 -1371,7 +1406,7 @@@ enum tlb_flush_reason 
  	TLB_LOCAL_SHOOTDOWN,
  	TLB_LOCAL_MM_SHOOTDOWN,
  	TLB_REMOTE_SEND_IPI,
+ 	TLB_REMOTE_WRONG_CPU,
  	NR_TLB_FLUSH_REASONS,
  };