Merge branch 'per_signal_struct_coredumps-for-v5.16' of git://git.kernel.org/pub...

author Linus Torvalds <[email protected]>

Wed, 3 Nov 2021 19:15:29 +0000 (12:15 -0700)

committer Linus Torvalds <[email protected]>

Wed, 3 Nov 2021 19:15:29 +0000 (12:15 -0700)
author Linus Torvalds <[email protected]>
Wed, 3 Nov 2021 19:15:29 +0000 (12:15 -0700)
committer Linus Torvalds <[email protected]>
Wed, 3 Nov 2021 19:15:29 +0000 (12:15 -0700)
diff --combined arch/ia64/include/asm/ptrace.h

index 8a2d0f72b324bc4d096d7f22ef07b83596dfd0ec,f15504f75f1062e184080eaee1e23ead7c8f9509..a10a498eede1bac7958af7efe7cb29e083069126
--- 1/arch/ia64/include/asm/ptrace.h
--- 2/arch/ia64/include/asm/ptrace.h
+++ b/arch/ia64/include/asm/ptrace.h
@@@ -51,11 -51,6 +51,11 @@@
    * the canonical representation by adding to instruction pointer.
    */
   # define instruction_pointer(regs) ((regs)->cr_iip + ia64_psr(regs)->ri)
+ +# define instruction_pointer_set(regs, val)   \
+ +({                                            \
+ +      ia64_psr(regs)->ri = (val & 0xf);       \
+ +      regs->cr_iip = (val & ~0xfULL);         \
+ +})
   
   static inline unsigned long user_stack_pointer(struct pt_regs *regs)
   {
@@@ -134,9 -129,9 +134,9 @@@ static inline long regs_return_value(st
     extern void ia64_decrement_ip (struct pt_regs *pt);
   
     extern void ia64_ptrace_stop(void);
-   #define arch_ptrace_stop(code, info) \
+   #define arch_ptrace_stop() \
         ia64_ptrace_stop()
-   #define arch_ptrace_stop_needed(code, info) \
+   #define arch_ptrace_stop_needed() \
         (!test_thread_flag(TIF_RESTORE_RSE))
   
     extern void ptrace_attach_sync_user_rbs (struct task_struct *);
diff --combined fs/binfmt_elf.c

index a813b70f594e69ee043781a61de130a636ef9c6a,796e5327ee7d06b31c9ed040183b57ba27408e74..fa582748be412f7d132d96af01d79a51961e28cb
--- 1/fs/binfmt_elf.c
--- 2/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@@ -630,7 -630,7 +630,7 @@@ static unsigned long load_elf_interp(st
   
                         vaddr = eppnt->p_vaddr;
                         if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
- -                              elf_type |= MAP_FIXED_NOREPLACE;
+ +                              elf_type |= MAP_FIXED;
                         else if (no_base && interp_elf_ex->e_type == ET_DYN)
                                 load_addr = -vaddr;
   
@@@ -1834,7 -1834,7 +1834,7 @@@ static int fill_note_info(struct elfhd
         /*
          * Allocate a structure for each thread.
          */
-       for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
+       for (ct = &dump_task->signal->core_state->dumper; ct; ct = ct->next) {
                 t = kzalloc(offsetof(struct elf_thread_core_info,
                                      notes[info->thread_notes]),
                             GFP_KERNEL);
@@@ -2024,7 -2024,7 +2024,7 @@@ static int fill_note_info(struct elfhd
         if (!elf_note_info_init(info))
                 return 0;
   
-       for (ct = current->mm->core_state->dumper.next;
+       for (ct = current->signal->core_state->dumper.next;
                                         ct; ct = ct->next) {
                 ets = kzalloc(sizeof(*ets), GFP_KERNEL);
                 if (!ets)
diff --combined fs/proc/array.c

index 77cf4187adecc359475324f1c37a204d61d74467,520c51be1e572aeda3e13a52ae18009366c8a956..ff869a66b34e390fb58f7487c696f6d5c44e78b8
--- 1/fs/proc/array.c
--- 2/fs/proc/array.c
+++ b/fs/proc/array.c
@@@ -408,9 -408,9 +408,9 @@@ static void task_cpus_allowed(struct se
                    cpumask_pr_args(&task->cpus_mask));
   }
   
- static inline void task_core_dumping(struct seq_file *m, struct mm_struct *mm)
+ static inline void task_core_dumping(struct seq_file *m, struct task_struct *task)
   {
-       seq_put_decimal_ull(m, "CoreDumping:\t", !!mm->core_state);
+       seq_put_decimal_ull(m, "CoreDumping:\t", !!task->signal->core_state);
         seq_putc(m, '\n');
   }
   
@@@ -436,7 -436,7 +436,7 @@@ int proc_pid_status(struct seq_file *m
   
         if (mm) {
                 task_mem(m, mm);
-               task_core_dumping(m, mm);
+               task_core_dumping(m, task);
                 task_thp_status(m, mm);
                 mmput(mm);
         }
@@@ -541,7 -541,7 +541,7 @@@ static int do_task_stat(struct seq_fil
         }
   
         if (permitted && (!whole || num_threads < 2))
- -              wchan = get_wchan(task);
+ +              wchan = !task_is_running(task);
         if (!whole) {
                 min_flt = task->min_flt;
                 maj_flt = task->maj_flt;
@@@ -606,7 -606,10 +606,7 @@@
          *
          * This works with older implementations of procps as well.
          */
- -      if (wchan)
- -              seq_puts(m, " 1");
- -      else
- -              seq_puts(m, " 0");
+ +      seq_put_decimal_ull(m, " ", wchan);
   
         seq_put_decimal_ull(m, " ", 0);
         seq_put_decimal_ull(m, " ", 0);
diff --combined include/linux/mm_types.h

index 8f3131477ec69550410209c3fc3f626dd1b3f3fe,1039f6ae922c91edd54877fb7579b9ddd5b8404a..f7326c8704bb3bdd8970821af831430dfc4e89a6
--- 1/include/linux/mm_types.h
--- 2/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@@ -12,7 -12,6 +12,7 @@@
   #include <linux/completion.h>
   #include <linux/cpumask.h>
   #include <linux/uprobes.h>
+ +#include <linux/rcupdate.h>
   #include <linux/page-flags-layout.h>
   #include <linux/workqueue.h>
   #include <linux/seqlock.h>
@@@ -105,7 -104,18 +105,7 @@@ struct page 
                         struct page_pool *pp;
                         unsigned long _pp_mapping_pad;
                         unsigned long dma_addr;
- -                      union {
- -                              /**
- -                               * dma_addr_upper: might require a 64-bit
- -                               * value on 32-bit architectures.
- -                               */
- -                              unsigned long dma_addr_upper;
- -                              /**
- -                               * For frag page support, not supported in
- -                               * 32-bit architectures with 64-bit DMA.
- -                               */
- -                              atomic_long_t pp_frag_count;
- -                      };
+ +                      atomic_long_t pp_frag_count;
                 };
                 struct {        /* slab, slob and slub */
                         union {
@@@ -229,72 -239,6 +229,72 @@@
   #endif
   } _struct_page_alignment;
   
+ +/**
+ + * struct folio - Represents a contiguous set of bytes.
+ + * @flags: Identical to the page flags.
+ + * @lru: Least Recently Used list; tracks how recently this folio was used.
+ + * @mapping: The file this page belongs to, or refers to the anon_vma for
+ + *    anonymous memory.
+ + * @index: Offset within the file, in units of pages.  For anonymous memory,
+ + *    this is the index from the beginning of the mmap.
+ + * @private: Filesystem per-folio data (see folio_attach_private()).
+ + *    Used for swp_entry_t if folio_test_swapcache().
+ + * @_mapcount: Do not access this member directly.  Use folio_mapcount() to
+ + *    find out how many times this folio is mapped by userspace.
+ + * @_refcount: Do not access this member directly.  Use folio_ref_count()
+ + *    to find how many references there are to this folio.
+ + * @memcg_data: Memory Control Group data.
+ + *
+ + * A folio is a physically, virtually and logically contiguous set
+ + * of bytes.  It is a power-of-two in size, and it is aligned to that
+ + * same power-of-two.  It is at least as large as %PAGE_SIZE.  If it is
+ + * in the page cache, it is at a file offset which is a multiple of that
+ + * power-of-two.  It may be mapped into userspace at an address which is
+ + * at an arbitrary page offset, but its kernel virtual address is aligned
+ + * to its size.
+ + */
+ +struct folio {
+ +      /* private: don't document the anon union */
+ +      union {
+ +              struct {
+ +      /* public: */
+ +                      unsigned long flags;
+ +                      struct list_head lru;
+ +                      struct address_space *mapping;
+ +                      pgoff_t index;
+ +                      void *private;
+ +                      atomic_t _mapcount;
+ +                      atomic_t _refcount;
+ +#ifdef CONFIG_MEMCG
+ +                      unsigned long memcg_data;
+ +#endif
+ +      /* private: the union with struct page is transitional */
+ +              };
+ +              struct page page;
+ +      };
+ +};
+ +
+ +static_assert(sizeof(struct page) == sizeof(struct folio));
+ +#define FOLIO_MATCH(pg, fl)                                           \
+ +      static_assert(offsetof(struct page, pg) == offsetof(struct folio, fl))
+ +FOLIO_MATCH(flags, flags);
+ +FOLIO_MATCH(lru, lru);
+ +FOLIO_MATCH(compound_head, lru);
+ +FOLIO_MATCH(index, index);
+ +FOLIO_MATCH(private, private);
+ +FOLIO_MATCH(_mapcount, _mapcount);
+ +FOLIO_MATCH(_refcount, _refcount);
+ +#ifdef CONFIG_MEMCG
+ +FOLIO_MATCH(memcg_data, memcg_data);
+ +#endif
+ +#undef FOLIO_MATCH
+ +
+ +static inline atomic_t *folio_mapcount_ptr(struct folio *folio)
+ +{
+ +      struct page *tail = &folio->page + 1;
+ +      return &tail->compound_mapcount;
+ +}
+ +
   static inline atomic_t *compound_mapcount_ptr(struct page *page)
   {
         return &page[1].compound_mapcount;
@@@ -313,12 -257,6 +313,12 @@@ static inline atomic_t *compound_pincou
   #define PAGE_FRAG_CACHE_MAX_SIZE      __ALIGN_MASK(32768, ~PAGE_MASK)
   #define PAGE_FRAG_CACHE_MAX_ORDER     get_order(PAGE_FRAG_CACHE_MAX_SIZE)
   
+ +/*
+ + * page_private can be used on tail pages.  However, PagePrivate is only
+ + * checked by the VM on the head page.  So page_private on the tail pages
+ + * should be used for data that's ancillary to the head page (eg attaching
+ + * buffer heads to tail pages after attaching buffer heads to the head page)
+ + */
   #define page_private(page)            ((page)->private)
   
   static inline void set_page_private(struct page *page, unsigned long private)
@@@ -326,11 -264,6 +326,11 @@@
         page->private = private;
   }
   
+ +static inline void *folio_get_private(struct folio *folio)
+ +{
+ +      return folio->private;
+ +}
+ +
   struct page_frag_cache {
         void * va;
   #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
@@@ -454,17 -387,6 +454,6 @@@ struct vm_area_struct 
         struct vm_userfaultfd_ctx vm_userfaultfd_ctx;
   } __randomize_layout;
   
- struct core_thread {
-       struct task_struct *task;
-       struct core_thread *next;
- };
- 
- struct core_state {
-       atomic_t nr_threads;
-       struct core_thread dumper;
-       struct completion startup;
- };
- 
   struct kioctx_table;
   struct mm_struct {
         struct {
@@@ -585,8 -507,6 +574,6 @@@
   
                 unsigned long flags; /* Must use atomic bitops to access */
   
-               struct core_state *core_state; /* coredumping support */
- 
   #ifdef CONFIG_AIO
                 spinlock_t                      ioctx_lock;
                 struct kioctx_table __rcu       *ioctx_table;
@@@ -639,9 -559,6 +626,9 @@@
                 bool tlb_flush_batched;
   #endif
                 struct uprobes_state uprobes_state;
+ +#ifdef CONFIG_PREEMPT_RT
+ +              struct rcu_head delayed_drop;
+ +#endif
   #ifdef CONFIG_HUGETLB_PAGE
                 atomic_long_t hugetlb_usage;
   #endif
diff --combined include/linux/sched.h

index 6f6f8f340a0fba45c25afcec0fcb673e3b079118,f3741f23935eb2eb58ea0a3aaea3d92f84b5a23b..78c351e35fec6361973c86c1a1337080af7a688a
--- 1/include/linux/sched.h
--- 2/include/linux/sched.h
+++ b/include/linux/sched.h
@@@ -503,8 -503,6 +503,8 @@@ struct sched_statistics 
   
         u64                             block_start;
         u64                             block_max;
+ +      s64                             sum_block_runtime;
+ +
         u64                             exec_max;
         u64                             slice_max;
   
@@@ -524,7 -522,7 +524,7 @@@
         u64                             nr_wakeups_passive;
         u64                             nr_wakeups_idle;
   #endif
- -};
+ +} ____cacheline_aligned;
   
   struct sched_entity {
         /* For load-balancing: */
@@@ -540,6 -538,8 +540,6 @@@
   
         u64                             nr_migrations;
   
- -      struct sched_statistics         statistics;
- -
   #ifdef CONFIG_FAIR_GROUP_SCHED
         int                             depth;
         struct sched_entity             *parent;
@@@ -750,6 -750,10 +750,6 @@@ struct task_struct 
   #ifdef CONFIG_SMP
         int                             on_cpu;
         struct __call_single_node       wake_entry;
- -#ifdef CONFIG_THREAD_INFO_IN_TASK
- -      /* Current CPU: */
- -      unsigned int                    cpu;
- -#endif
         unsigned int                    wakee_flips;
         unsigned long                   wakee_flip_decay_ts;
         struct task_struct              *last_wakee;
@@@ -771,10 -775,10 +771,10 @@@
         int                             normal_prio;
         unsigned int                    rt_priority;
   
- -      const struct sched_class        *sched_class;
         struct sched_entity             se;
         struct sched_rt_entity          rt;
         struct sched_dl_entity          dl;
+ +      const struct sched_class        *sched_class;
   
   #ifdef CONFIG_SCHED_CORE
         struct rb_node                  core_node;
@@@ -799,8 -803,6 +799,8 @@@
         struct uclamp_se                uclamp[UCLAMP_CNT];
   #endif
   
+ +      struct sched_statistics         stats;
+ +
   #ifdef CONFIG_PREEMPT_NOTIFIERS
         /* List of struct preempt_notifier: */
         struct hlist_head               preempt_notifiers;
@@@ -1158,8 -1160,10 +1158,8 @@@
         /* Stacked block device info: */
         struct bio_list                 *bio_list;
   
- -#ifdef CONFIG_BLOCK
         /* Stack plugging: */
         struct blk_plug                 *plug;
- -#endif
   
         /* VM state: */
         struct reclaim_state            *reclaim_state;
@@@ -1467,7 -1471,6 +1467,7 @@@
                                         mce_whole_page : 1,
                                         __mce_reserved : 62;
         struct callback_head            mce_kill_me;
+ +      int                             mce_count;
   #endif
   
   #ifdef CONFIG_KRETPROBES
@@@ -1661,6 -1664,7 +1661,7 @@@ extern struct pid *cad_pid
   #define PF_VCPU                       0x00000001      /* I'm a virtual CPU */
   #define PF_IDLE                       0x00000002      /* I am an IDLE thread */
   #define PF_EXITING            0x00000004      /* Getting shut down */
+ #define PF_POSTCOREDUMP               0x00000008      /* Coredumps should ignore this task */
   #define PF_IO_WORKER          0x00000010      /* Task is an IO worker */
   #define PF_WQ_WORKER          0x00000020      /* I'm a workqueue worker */
   #define PF_FORKNOEXEC         0x00000040      /* Forked but didn't exec */
@@@ -1716,7 -1720,7 +1717,7 @@@
   #define tsk_used_math(p)                      ((p)->flags & PF_USED_MATH)
   #define used_math()                           tsk_used_math(current)
   
- -static inline bool is_percpu_thread(void)
+ +static __always_inline bool is_percpu_thread(void)
   {
   #ifdef CONFIG_SMP
         return (current->flags & PF_NO_SETAFFINITY) &&
@@@ -1882,7 -1886,10 +1883,7 @@@ extern struct thread_info init_thread_i
   extern unsigned long init_stack[THREAD_SIZE / sizeof(unsigned long)];
   
   #ifdef CONFIG_THREAD_INFO_IN_TASK
- -static inline struct thread_info *task_thread_info(struct task_struct *task)
- -{
- -      return &task->thread_info;
- -}
+ +# define task_thread_info(task)       (&(task)->thread_info)
   #elif !defined(__HAVE_THREAD_FUNCTIONS)
   # define task_thread_info(task)       ((struct thread_info *)(task)->stack)
   #endif
@@@ -2032,7 -2039,7 +2033,7 @@@ static inline int _cond_resched(void) 
   #endif /* !defined(CONFIG_PREEMPTION) || defined(CONFIG_PREEMPT_DYNAMIC) */
   
   #define cond_resched() ({                     \
- -      ___might_sleep(__FILE__, __LINE__, 0);  \
+ +      __might_resched(__FILE__, __LINE__, 0); \
         _cond_resched();                        \
   })
   
@@@ -2040,38 -2047,19 +2041,38 @@@ extern int __cond_resched_lock(spinlock
   extern int __cond_resched_rwlock_read(rwlock_t *lock);
   extern int __cond_resched_rwlock_write(rwlock_t *lock);
   
- -#define cond_resched_lock(lock) ({                            \
- -      ___might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET);\
- -      __cond_resched_lock(lock);                              \
+ +#define MIGHT_RESCHED_RCU_SHIFT               8
+ +#define MIGHT_RESCHED_PREEMPT_MASK    ((1U << MIGHT_RESCHED_RCU_SHIFT) - 1)
+ +
+ +#ifndef CONFIG_PREEMPT_RT
+ +/*
+ + * Non RT kernels have an elevated preempt count due to the held lock,
+ + * but are not allowed to be inside a RCU read side critical section
+ + */
+ +# define PREEMPT_LOCK_RESCHED_OFFSETS PREEMPT_LOCK_OFFSET
+ +#else
+ +/*
+ + * spin/rw_lock() on RT implies rcu_read_lock(). The might_sleep() check in
+ + * cond_resched*lock() has to take that into account because it checks for
+ + * preempt_count() and rcu_preempt_depth().
+ + */
+ +# define PREEMPT_LOCK_RESCHED_OFFSETS \
+ +      (PREEMPT_LOCK_OFFSET + (1U << MIGHT_RESCHED_RCU_SHIFT))
+ +#endif
+ +
+ +#define cond_resched_lock(lock) ({                                            \
+ +      __might_resched(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSETS);      \
+ +      __cond_resched_lock(lock);                                              \
   })
   
- -#define cond_resched_rwlock_read(lock) ({                     \
- -      __might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET); \
- -      __cond_resched_rwlock_read(lock);                       \
+ +#define cond_resched_rwlock_read(lock) ({                                     \
+ +      __might_resched(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSETS);      \
+ +      __cond_resched_rwlock_read(lock);                                       \
   })
   
- -#define cond_resched_rwlock_write(lock) ({                    \
- -      __might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET); \
- -      __cond_resched_rwlock_write(lock);                      \
+ +#define cond_resched_rwlock_write(lock) ({                                    \
+ +      __might_resched(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSETS);      \
+ +      __cond_resched_rwlock_write(lock);                                      \
   })
   
   static inline void cond_resched_rcu(void)
@@@ -2126,7 -2114,11 +2127,7 @@@ static __always_inline bool need_resche
   
   static inline unsigned int task_cpu(const struct task_struct *p)
   {
- -#ifdef CONFIG_THREAD_INFO_IN_TASK
- -      return READ_ONCE(p->cpu);
- -#else
         return READ_ONCE(task_thread_info(p)->cpu);
- -#endif
   }
   
   extern void set_task_cpu(struct task_struct *p, unsigned int cpu);
@@@ -2145,7 -2137,6 +2146,7 @@@ static inline void set_task_cpu(struct 
   #endif /* CONFIG_SMP */
   
   extern bool sched_task_on_rq(struct task_struct *p);
+ +extern unsigned long get_wchan(struct task_struct *p);
   
   /*
    * In order to reduce various lock holder preemption latencies provide an
diff --combined kernel/exit.c

index 50f1692c732d14945a334195b45e4e3fbede30b7,2b355e926c1350ded8355f6a964d82b014b3e171..f702a6a63686ea7c463ea5b73a7f5b665bb2d91e
--- 1/kernel/exit.c
--- 2/kernel/exit.c
+++ b/kernel/exit.c
@@@ -48,6 -48,7 +48,6 @@@
   #include <linux/pipe_fs_i.h>
   #include <linux/audit.h> /* for audit_free() */
   #include <linux/resource.h>
- -#include <linux/blkdev.h>
   #include <linux/task_io_accounting_ops.h>
   #include <linux/tracehook.h>
   #include <linux/fs_struct.h>
@@@ -63,7 -64,6 +63,7 @@@
   #include <linux/rcuwait.h>
   #include <linux/compat.h>
   #include <linux/io_uring.h>
+ +#include <linux/kprobes.h>
   
   #include <linux/uaccess.h>
   #include <asm/unistd.h>
@@@ -168,7 -168,6 +168,7 @@@ static void delayed_put_task_struct(str
   {
         struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
   
+ +      kprobe_flush_task(tsk);
         perf_event_delayed_put(tsk);
         trace_sched_process_free(tsk);
         put_task_struct(tsk);
@@@ -340,6 -339,46 +340,46 @@@ kill_orphaned_pgrp(struct task_struct *
         }
   }
   
+ static void coredump_task_exit(struct task_struct *tsk)
+ {
+       struct core_state *core_state;
+ 
+       /*
+        * Serialize with any possible pending coredump.
+        * We must hold siglock around checking core_state
+        * and setting PF_POSTCOREDUMP.  The core-inducing thread
+        * will increment ->nr_threads for each thread in the
+        * group without PF_POSTCOREDUMP set.
+        */
+       spin_lock_irq(&tsk->sighand->siglock);
+       tsk->flags |= PF_POSTCOREDUMP;
+       core_state = tsk->signal->core_state;
+       spin_unlock_irq(&tsk->sighand->siglock);
+       if (core_state) {
+               struct core_thread self;
+ 
+               self.task = current;
+               if (self.task->flags & PF_SIGNALED)
+                       self.next = xchg(&core_state->dumper.next, &self);
+               else
+                       self.task = NULL;
+               /*
+                * Implies mb(), the result of xchg() must be visible
+                * to core_state->dumper.
+                */
+               if (atomic_dec_and_test(&core_state->nr_threads))
+                       complete(&core_state->startup);
+ 
+               for (;;) {
+                       set_current_state(TASK_UNINTERRUPTIBLE);
+                       if (!self.task) /* see coredump_finish() */
+                               break;
+                       freezable_schedule();
+               }
+               __set_current_state(TASK_RUNNING);
+       }
+ }
+ 
   #ifdef CONFIG_MEMCG
   /*
    * A task is exiting.   If it owned this mm, find a new owner for the mm.
@@@ -435,47 -474,12 +475,12 @@@ assign_new_owner
   static void exit_mm(void)
   {
         struct mm_struct *mm = current->mm;
-       struct core_state *core_state;
   
         exit_mm_release(current, mm);
         if (!mm)
                 return;
         sync_mm_rss(mm);
-       /*
-        * Serialize with any possible pending coredump.
-        * We must hold mmap_lock around checking core_state
-        * and clearing tsk->mm.  The core-inducing thread
-        * will increment ->nr_threads for each thread in the
-        * group with ->mm != NULL.
-        */
         mmap_read_lock(mm);
-       core_state = mm->core_state;
-       if (core_state) {
-               struct core_thread self;
- 
-               mmap_read_unlock(mm);
- 
-               self.task = current;
-               if (self.task->flags & PF_SIGNALED)
-                       self.next = xchg(&core_state->dumper.next, &self);
-               else
-                       self.task = NULL;
-               /*
-                * Implies mb(), the result of xchg() must be visible
-                * to core_state->dumper.
-                */
-               if (atomic_dec_and_test(&core_state->nr_threads))
-                       complete(&core_state->startup);
- 
-               for (;;) {
-                       set_current_state(TASK_UNINTERRUPTIBLE);
-                       if (!self.task) /* see coredump_finish() */
-                               break;
-                       freezable_schedule();
-               }
-               __set_current_state(TASK_RUNNING);
-               mmap_read_lock(mm);
-       }
         mmgrab(mm);
         BUG_ON(mm != current->active_mm);
         /* more a memory barrier than a real lock */
@@@ -763,6 -767,7 +768,7 @@@ void __noreturn do_exit(long code
         profile_task_exit(tsk);
         kcov_task_exit(tsk);
   
+       coredump_task_exit(tsk);
         ptrace_event(PTRACE_EVENT_EXIT, code);
   
         validate_creds_for_do_exit(tsk);
diff --combined kernel/fork.c

index 8e9feeef555e7f0650f07d84fa51f214c3c98b14,c8adb76982f7a61293a362bf9062fa15ea605b49..3f112b11a9ad1453cdfa5f88f8e3d70ac9089213
--- 1/kernel/fork.c
--- 2/kernel/fork.c
+++ b/kernel/fork.c
@@@ -76,6 -76,7 +76,6 @@@
   #include <linux/taskstats_kern.h>
   #include <linux/random.h>
   #include <linux/tty.h>
- -#include <linux/blkdev.h>
   #include <linux/fs_struct.h>
   #include <linux/magic.h>
   #include <linux/perf_event.h>
@@@ -1043,7 -1044,6 +1043,6 @@@ static struct mm_struct *mm_init(struc
         seqcount_init(&mm->write_protect_seq);
         mmap_init_lock(mm);
         INIT_LIST_HEAD(&mm->mmlist);
-       mm->core_state = NULL;
         mm_pgtables_bytes_init(mm);
         mm->map_count = 0;
         mm->locked_vm = 0;
@@@ -1391,8 -1391,7 +1390,7 @@@ static void mm_release(struct task_stru
          * purposes.
          */
         if (tsk->clear_child_tid) {
-               if (!(tsk->signal->flags & SIGNAL_GROUP_COREDUMP) &&
-                   atomic_read(&mm->mm_users) > 1) {
+               if (atomic_read(&mm->mm_users) > 1) {
                         /*
                          * We don't check the error code - if userspace has
                          * not set up a proper pointer then tough luck.
@@@ -2404,7 -2403,7 +2402,7 @@@ static __latent_entropy struct task_str
         write_unlock_irq(&tasklist_lock);
   
         proc_fork_connector(p);
- -      sched_post_fork(p);
+ +      sched_post_fork(p, args);
         cgroup_post_fork(p, args);
         perf_event_fork(p);
   
diff --combined kernel/signal.c

index e99aff33ff140c3cc6995a0fa89e4065e6275f03,b0db80acc6ef1113cd2dba50ddb018ae84569e63..6f3476dc787325c136a8a79e58e9963b1b57bc67
--- 1/kernel/signal.c
--- 2/kernel/signal.c
+++ b/kernel/signal.c
@@@ -426,10 -426,22 +426,10 @@@ __sigqueue_alloc(int sig, struct task_s
          */
         rcu_read_lock();
         ucounts = task_ucounts(t);
- -      sigpending = inc_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING, 1);
- -      switch (sigpending) {
- -      case 1:
- -              if (likely(get_ucounts(ucounts)))
- -                      break;
- -              fallthrough;
- -      case LONG_MAX:
- -              /*
- -               * we need to decrease the ucount in the userns tree on any
- -               * failure to avoid counts leaking.
- -               */
- -              dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING, 1);
- -              rcu_read_unlock();
- -              return NULL;
- -      }
+ +      sigpending = inc_rlimit_get_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING);
         rcu_read_unlock();
+ +      if (!sigpending)
+ +              return NULL;
   
         if (override_rlimit || likely(sigpending <= task_rlimit(t, RLIMIT_SIGPENDING))) {
                 q = kmem_cache_alloc(sigqueue_cachep, gfp_flags);
@@@ -438,7 -450,8 +438,7 @@@
         }
   
         if (unlikely(q == NULL)) {
- -              if (dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING, 1))
- -                      put_ucounts(ucounts);
+ +              dec_rlimit_put_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING);
         } else {
                 INIT_LIST_HEAD(&q->list);
                 q->flags = sigqueue_flags;
@@@ -451,8 -464,8 +451,8 @@@ static void __sigqueue_free(struct sigq
   {
         if (q->flags & SIGQUEUE_PREALLOC)
                 return;
- -      if (q->ucounts && dec_rlimit_ucounts(q->ucounts, UCOUNT_RLIMIT_SIGPENDING, 1)) {
- -              put_ucounts(q->ucounts);
+ +      if (q->ucounts) {
+ +              dec_rlimit_put_ucounts(q->ucounts, UCOUNT_RLIMIT_SIGPENDING);
                 q->ucounts = NULL;
         }
         kmem_cache_free(sigqueue_cachep, q);
@@@ -2145,40 -2158,6 +2145,6 @@@ static void do_notify_parent_cldstop(st
         spin_unlock_irqrestore(&sighand->siglock, flags);
   }
   
- static inline bool may_ptrace_stop(void)
- {
-       if (!likely(current->ptrace))
-               return false;
-       /*
-        * Are we in the middle of do_coredump?
-        * If so and our tracer is also part of the coredump stopping
-        * is a deadlock situation, and pointless because our tracer
-        * is dead so don't allow us to stop.
-        * If SIGKILL was already sent before the caller unlocked
-        * ->siglock we must see ->core_state != NULL. Otherwise it
-        * is safe to enter schedule().
-        *
-        * This is almost outdated, a task with the pending SIGKILL can't
-        * block in TASK_TRACED. But PTRACE_EVENT_EXIT can be reported
-        * after SIGKILL was already dequeued.
-        */
-       if (unlikely(current->mm->core_state) &&
-           unlikely(current->mm == current->parent->mm))
-               return false;
- 
-       return true;
- }
- 
- /*
-  * Return non-zero if there is a SIGKILL that should be waking us up.
-  * Called with the siglock held.
-  */
- static bool sigkill_pending(struct task_struct *tsk)
- {
-       return sigismember(&tsk->pending.signal, SIGKILL) ||
-              sigismember(&tsk->signal->shared_pending.signal, SIGKILL);
- }
- 
   /*
    * This must be called with current->sighand->siglock held.
    *
@@@ -2196,7 -2175,7 +2162,7 @@@ static void ptrace_stop(int exit_code, 
   {
         bool gstop_done = false;
   
-       if (arch_ptrace_stop_needed(exit_code, info)) {
+       if (arch_ptrace_stop_needed()) {
                 /*
                  * The arch code has something special to do before a
                  * ptrace stop.  This is allowed to block, e.g. for faults
@@@ -2204,17 -2183,16 +2170,16 @@@
                  * calling arch_ptrace_stop, so we must release it now.
                  * To preserve proper semantics, we must do this before
                  * any signal bookkeeping like checking group_stop_count.
-                * Meanwhile, a SIGKILL could come in before we retake the
-                * siglock.  That must prevent us from sleeping in TASK_TRACED.
-                * So after regaining the lock, we must check for SIGKILL.
                  */
                 spin_unlock_irq(&current->sighand->siglock);
-               arch_ptrace_stop(exit_code, info);
+               arch_ptrace_stop();
                 spin_lock_irq(&current->sighand->siglock);
-               if (sigkill_pending(current))
-                       return;
         }
   
+       /*
+        * schedule() will not sleep if there is a pending signal that
+        * can awaken the task.
+        */
         set_special_state(TASK_TRACED);
   
         /*
@@@ -2260,7 -2238,7 +2225,7 @@@
   
         spin_unlock_irq(&current->sighand->siglock);
         read_lock(&tasklist_lock);
-       if (may_ptrace_stop()) {
+       if (likely(current->ptrace)) {
                 /*
                  * Notify parents of the stop.
                  *
@@@ -4138,29 -4116,11 +4103,29 @@@ int do_sigaction(int sig, struct k_siga
         return 0;
   }
   
+ +#ifdef CONFIG_DYNAMIC_SIGFRAME
+ +static inline void sigaltstack_lock(void)
+ +      __acquires(&current->sighand->siglock)
+ +{
+ +      spin_lock_irq(&current->sighand->siglock);
+ +}
+ +
+ +static inline void sigaltstack_unlock(void)
+ +      __releases(&current->sighand->siglock)
+ +{
+ +      spin_unlock_irq(&current->sighand->siglock);
+ +}
+ +#else
+ +static inline void sigaltstack_lock(void) { }
+ +static inline void sigaltstack_unlock(void) { }
+ +#endif
+ +
   static int
   do_sigaltstack (const stack_t *ss, stack_t *oss, unsigned long sp,
                 size_t min_ss_size)
   {
         struct task_struct *t = current;
+ +      int ret = 0;
   
         if (oss) {
                 memset(oss, 0, sizeof(stack_t));
@@@ -4184,24 -4144,19 +4149,24 @@@
                                 ss_mode != 0))
                         return -EINVAL;
   
+ +              sigaltstack_lock();
                 if (ss_mode == SS_DISABLE) {
                         ss_size = 0;
                         ss_sp = NULL;
                 } else {
                         if (unlikely(ss_size < min_ss_size))
- -                              return -ENOMEM;
+ +                              ret = -ENOMEM;
+ +                      if (!sigaltstack_size_valid(ss_size))
+ +                              ret = -ENOMEM;
                 }
- -
- -              t->sas_ss_sp = (unsigned long) ss_sp;
- -              t->sas_ss_size = ss_size;
- -              t->sas_ss_flags = ss_flags;
+ +              if (!ret) {
+ +                      t->sas_ss_sp = (unsigned long) ss_sp;
+ +                      t->sas_ss_size = ss_size;
+ +                      t->sas_ss_flags = ss_flags;
+ +              }
+ +              sigaltstack_unlock();
         }
- -      return 0;
+ +      return ret;
   }
   
   SYSCALL_DEFINE2(sigaltstack,const stack_t __user *,uss, stack_t __user *,uoss)
diff --combined mm/debug.c

index 714be101dec9b1c115d53578ab66c0208981777e,aa5fe4d5c4b4db29598163cd3b46a770f860be0b..d0020fc5820271eced62eebf3be2fc73c0b77e96
--- 1/mm/debug.c
--- 2/mm/debug.c
+++ b/mm/debug.c
@@@ -24,9 -24,7 +24,9 @@@ const char *migrate_reason_names[MR_TYP
         "syscall_or_cpuset",
         "mempolicy_mbind",
         "numa_misplaced",
- -      "cma",
+ +      "contig_range",
+ +      "longterm_pin",
+ +      "demotion",
   };
   
   const struct trace_print_flags pageflag_names[] = {
@@@ -162,7 -160,7 +162,7 @@@ static void __dump_page(struct page *pa
   out_mapping:
         BUILD_BUG_ON(ARRAY_SIZE(pageflag_names) != __NR_PAGEFLAGS + 1);
   
- -      pr_warn("%sflags: %#lx(%pGp)%s\n", type, head->flags, &head->flags,
+ +      pr_warn("%sflags: %pGp%s\n", type, &head->flags,
                 page_cma ? " CMA" : "");
         print_hex_dump(KERN_WARNING, "raw: ", DUMP_PREFIX_NONE, 32,
                         sizeof(unsigned long), page,
@@@ -216,7 -214,7 +216,7 @@@ void dump_mm(const struct mm_struct *mm
                 "start_code %lx end_code %lx start_data %lx end_data %lx\n"
                 "start_brk %lx brk %lx start_stack %lx\n"
                 "arg_start %lx arg_end %lx env_start %lx env_end %lx\n"
-               "binfmt %px flags %lx core_state %px\n"
+               "binfmt %px flags %lx\n"
   #ifdef CONFIG_AIO
                 "ioctx_table %px\n"
   #endif
@@@ -248,7 -246,7 +248,7 @@@
                 mm->start_code, mm->end_code, mm->start_data, mm->end_data,
                 mm->start_brk, mm->brk, mm->start_stack,
                 mm->arg_start, mm->arg_end, mm->env_start, mm->env_end,
-               mm->binfmt, mm->flags, mm->core_state,
+               mm->binfmt, mm->flags,
   #ifdef CONFIG_AIO
                 mm->ioctx_table,
   #endif
diff --combined mm/oom_kill.c

index 989f35a2bbb1deafe9d47789e74a9f13d2e72831,7877c755ab373b6e80aa7b5d2a5357bdabbc9dc2..50b984d048ce5349439d4307e92310d91cef9eab
--- 1/mm/oom_kill.c
--- 2/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@@ -787,9 -787,9 +787,9 @@@ static inline bool __task_will_free_mem
         struct signal_struct *sig = task->signal;
   
         /*
-        * A coredumping process may sleep for an extended period in exit_mm(),
-        * so the oom killer cannot assume that the process will promptly exit
-        * and release memory.
+        * A coredumping process may sleep for an extended period in
+        * coredump_task_exit(), so the oom killer cannot assume that
+        * the process will promptly exit and release memory.
          */
         if (sig->flags & SIGNAL_GROUP_COREDUMP)
                 return false;
@@@ -1150,7 -1150,7 +1150,7 @@@ SYSCALL_DEFINE2(process_mrelease, int, 
         struct task_struct *task;
         struct task_struct *p;
         unsigned int f_flags;
- -      bool reap = true;
+ +      bool reap = false;
         struct pid *pid;
         long ret = 0;
   
@@@ -1177,15 -1177,15 +1177,15 @@@
                 goto put_task;
         }
   
- -      mm = p->mm;
- -      mmgrab(mm);
- -
- -      /* If the work has been done already, just exit with success */
- -      if (test_bit(MMF_OOM_SKIP, &mm->flags))
- -              reap = false;
- -      else if (!task_will_free_mem(p)) {
- -              reap = false;
- -              ret = -EINVAL;
+ +      if (mmget_not_zero(p->mm)) {
+ +              mm = p->mm;
+ +              if (task_will_free_mem(p))
+ +                      reap = true;
+ +              else {
+ +                      /* Error only if the work has not been done already */
+ +                      if (!test_bit(MMF_OOM_SKIP, &mm->flags))
+ +                              ret = -EINVAL;
+ +              }
         }
         task_unlock(p);
   
@@@ -1201,8 -1201,7 +1201,8 @@@
         mmap_read_unlock(mm);
   
   drop_mm:
- -      mmdrop(mm);
+ +      if (mm)
+ +              mmput(mm);
   put_task:
         put_task_struct(task);
   put_pid:
author	Linus Torvalds <[email protected]>
	Wed, 3 Nov 2021 19:15:29 +0000 (12:15 -0700)
committer	Linus Torvalds <[email protected]>
	Wed, 3 Nov 2021 19:15:29 +0000 (12:15 -0700)
		1	2
arch/ia64/include/asm/ptrace.h	patch \|	diff1 \|	diff2 \|	blob \| history
fs/binfmt_elf.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/proc/array.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/mm_types.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/sched.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/exit.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/fork.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/signal.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/debug.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/oom_kill.c	patch \|	diff1 \|	diff2 \|	blob \| history