]> Git Repo - linux.git/commitdiff
Merge branch 'per_signal_struct_coredumps-for-v5.16' of git://git.kernel.org/pub...
authorLinus Torvalds <[email protected]>
Wed, 3 Nov 2021 19:15:29 +0000 (12:15 -0700)
committerLinus Torvalds <[email protected]>
Wed, 3 Nov 2021 19:15:29 +0000 (12:15 -0700)
Pull per signal_struct coredumps from Eric Biederman:
 "Current coredumps are mixed up with the exit code, the signal handling
  code, and the ptrace code making coredumps much more complicated than
  necessary and difficult to follow.

  This series of changes starts with ptrace_stop and cleans it up,
  making it easier to follow what is happening in ptrace_stop. Then
  cleans up the exec interactions with coredumps. Then cleans up the
  coredump interactions with exit. Finally the coredump interactions
  with the signal handling code is cleaned up.

  The first and last changes are bug fixes for minor bugs.

  I believe the fact that vfork followed by execve can kill the process
  the called vfork if exec fails is sufficient justification to change
  the userspace visible behavior.

  In previous discussions some of these changes were organized
  differently and individually appeared to make the code base worse. As
  currently written I believe they all stand on their own as cleanups
  and bug fixes.

  Which means that even if the worst should happen and the last change
  needs to be reverted for some unimaginable reason, the code base will
  still be improved.

  If the worst does not happen there are a more cleanups that can be
  made. Signals that generate coredumps can easily become eligible for
  short circuit delivery in complete_signal. The entire rendezvous for
  generating a coredump can move into get_signal. The function
  force_sig_info_to_task be written in a way that does not modify the
  signal handling state of the target task (because coredumps are
  eligible for short circuit delivery). Many of these future cleanups
  can be done another way but nothing so cleanly as if coredumps become
  per signal_struct"

* 'per_signal_struct_coredumps-for-v5.16' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace:
  coredump: Limit coredumps to a single thread group
  coredump:  Don't perform any cleanups before dumping core
  exit: Factor coredump_exit_mm out of exit_mm
  exec: Check for a pending fatal signal instead of core_state
  ptrace: Remove the unnecessary arguments from arch_ptrace_stop
  signal: Remove the bogus sigkill_pending in ptrace_stop

1  2 
arch/ia64/include/asm/ptrace.h
fs/binfmt_elf.c
fs/proc/array.c
include/linux/mm_types.h
include/linux/sched.h
kernel/exit.c
kernel/fork.c
kernel/signal.c
mm/debug.c
mm/oom_kill.c

index 8a2d0f72b324bc4d096d7f22ef07b83596dfd0ec,f15504f75f1062e184080eaee1e23ead7c8f9509..a10a498eede1bac7958af7efe7cb29e083069126
   * the canonical representation by adding to instruction pointer.
   */
  # define instruction_pointer(regs) ((regs)->cr_iip + ia64_psr(regs)->ri)
 +# define instruction_pointer_set(regs, val)   \
 +({                                            \
 +      ia64_psr(regs)->ri = (val & 0xf);       \
 +      regs->cr_iip = (val & ~0xfULL);         \
 +})
  
  static inline unsigned long user_stack_pointer(struct pt_regs *regs)
  {
@@@ -134,9 -129,9 +134,9 @@@ static inline long regs_return_value(st
    extern void ia64_decrement_ip (struct pt_regs *pt);
  
    extern void ia64_ptrace_stop(void);
-   #define arch_ptrace_stop(code, info) \
+   #define arch_ptrace_stop() \
        ia64_ptrace_stop()
-   #define arch_ptrace_stop_needed(code, info) \
+   #define arch_ptrace_stop_needed() \
        (!test_thread_flag(TIF_RESTORE_RSE))
  
    extern void ptrace_attach_sync_user_rbs (struct task_struct *);
diff --combined fs/binfmt_elf.c
index a813b70f594e69ee043781a61de130a636ef9c6a,796e5327ee7d06b31c9ed040183b57ba27408e74..fa582748be412f7d132d96af01d79a51961e28cb
@@@ -630,7 -630,7 +630,7 @@@ static unsigned long load_elf_interp(st
  
                        vaddr = eppnt->p_vaddr;
                        if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
 -                              elf_type |= MAP_FIXED_NOREPLACE;
 +                              elf_type |= MAP_FIXED;
                        else if (no_base && interp_elf_ex->e_type == ET_DYN)
                                load_addr = -vaddr;
  
@@@ -1834,7 -1834,7 +1834,7 @@@ static int fill_note_info(struct elfhd
        /*
         * Allocate a structure for each thread.
         */
-       for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
+       for (ct = &dump_task->signal->core_state->dumper; ct; ct = ct->next) {
                t = kzalloc(offsetof(struct elf_thread_core_info,
                                     notes[info->thread_notes]),
                            GFP_KERNEL);
@@@ -2024,7 -2024,7 +2024,7 @@@ static int fill_note_info(struct elfhd
        if (!elf_note_info_init(info))
                return 0;
  
-       for (ct = current->mm->core_state->dumper.next;
+       for (ct = current->signal->core_state->dumper.next;
                                        ct; ct = ct->next) {
                ets = kzalloc(sizeof(*ets), GFP_KERNEL);
                if (!ets)
diff --combined fs/proc/array.c
index 77cf4187adecc359475324f1c37a204d61d74467,520c51be1e572aeda3e13a52ae18009366c8a956..ff869a66b34e390fb58f7487c696f6d5c44e78b8
@@@ -408,9 -408,9 +408,9 @@@ static void task_cpus_allowed(struct se
                   cpumask_pr_args(&task->cpus_mask));
  }
  
- static inline void task_core_dumping(struct seq_file *m, struct mm_struct *mm)
+ static inline void task_core_dumping(struct seq_file *m, struct task_struct *task)
  {
-       seq_put_decimal_ull(m, "CoreDumping:\t", !!mm->core_state);
+       seq_put_decimal_ull(m, "CoreDumping:\t", !!task->signal->core_state);
        seq_putc(m, '\n');
  }
  
@@@ -436,7 -436,7 +436,7 @@@ int proc_pid_status(struct seq_file *m
  
        if (mm) {
                task_mem(m, mm);
-               task_core_dumping(m, mm);
+               task_core_dumping(m, task);
                task_thp_status(m, mm);
                mmput(mm);
        }
@@@ -541,7 -541,7 +541,7 @@@ static int do_task_stat(struct seq_fil
        }
  
        if (permitted && (!whole || num_threads < 2))
 -              wchan = get_wchan(task);
 +              wchan = !task_is_running(task);
        if (!whole) {
                min_flt = task->min_flt;
                maj_flt = task->maj_flt;
         *
         * This works with older implementations of procps as well.
         */
 -      if (wchan)
 -              seq_puts(m, " 1");
 -      else
 -              seq_puts(m, " 0");
 +      seq_put_decimal_ull(m, " ", wchan);
  
        seq_put_decimal_ull(m, " ", 0);
        seq_put_decimal_ull(m, " ", 0);
diff --combined include/linux/mm_types.h
index 8f3131477ec69550410209c3fc3f626dd1b3f3fe,1039f6ae922c91edd54877fb7579b9ddd5b8404a..f7326c8704bb3bdd8970821af831430dfc4e89a6
@@@ -12,7 -12,6 +12,7 @@@
  #include <linux/completion.h>
  #include <linux/cpumask.h>
  #include <linux/uprobes.h>
 +#include <linux/rcupdate.h>
  #include <linux/page-flags-layout.h>
  #include <linux/workqueue.h>
  #include <linux/seqlock.h>
@@@ -105,7 -104,18 +105,7 @@@ struct page 
                        struct page_pool *pp;
                        unsigned long _pp_mapping_pad;
                        unsigned long dma_addr;
 -                      union {
 -                              /**
 -                               * dma_addr_upper: might require a 64-bit
 -                               * value on 32-bit architectures.
 -                               */
 -                              unsigned long dma_addr_upper;
 -                              /**
 -                               * For frag page support, not supported in
 -                               * 32-bit architectures with 64-bit DMA.
 -                               */
 -                              atomic_long_t pp_frag_count;
 -                      };
 +                      atomic_long_t pp_frag_count;
                };
                struct {        /* slab, slob and slub */
                        union {
  #endif
  } _struct_page_alignment;
  
 +/**
 + * struct folio - Represents a contiguous set of bytes.
 + * @flags: Identical to the page flags.
 + * @lru: Least Recently Used list; tracks how recently this folio was used.
 + * @mapping: The file this page belongs to, or refers to the anon_vma for
 + *    anonymous memory.
 + * @index: Offset within the file, in units of pages.  For anonymous memory,
 + *    this is the index from the beginning of the mmap.
 + * @private: Filesystem per-folio data (see folio_attach_private()).
 + *    Used for swp_entry_t if folio_test_swapcache().
 + * @_mapcount: Do not access this member directly.  Use folio_mapcount() to
 + *    find out how many times this folio is mapped by userspace.
 + * @_refcount: Do not access this member directly.  Use folio_ref_count()
 + *    to find how many references there are to this folio.
 + * @memcg_data: Memory Control Group data.
 + *
 + * A folio is a physically, virtually and logically contiguous set
 + * of bytes.  It is a power-of-two in size, and it is aligned to that
 + * same power-of-two.  It is at least as large as %PAGE_SIZE.  If it is
 + * in the page cache, it is at a file offset which is a multiple of that
 + * power-of-two.  It may be mapped into userspace at an address which is
 + * at an arbitrary page offset, but its kernel virtual address is aligned
 + * to its size.
 + */
 +struct folio {
 +      /* private: don't document the anon union */
 +      union {
 +              struct {
 +      /* public: */
 +                      unsigned long flags;
 +                      struct list_head lru;
 +                      struct address_space *mapping;
 +                      pgoff_t index;
 +                      void *private;
 +                      atomic_t _mapcount;
 +                      atomic_t _refcount;
 +#ifdef CONFIG_MEMCG
 +                      unsigned long memcg_data;
 +#endif
 +      /* private: the union with struct page is transitional */
 +              };
 +              struct page page;
 +      };
 +};
 +
 +static_assert(sizeof(struct page) == sizeof(struct folio));
 +#define FOLIO_MATCH(pg, fl)                                           \
 +      static_assert(offsetof(struct page, pg) == offsetof(struct folio, fl))
 +FOLIO_MATCH(flags, flags);
 +FOLIO_MATCH(lru, lru);
 +FOLIO_MATCH(compound_head, lru);
 +FOLIO_MATCH(index, index);
 +FOLIO_MATCH(private, private);
 +FOLIO_MATCH(_mapcount, _mapcount);
 +FOLIO_MATCH(_refcount, _refcount);
 +#ifdef CONFIG_MEMCG
 +FOLIO_MATCH(memcg_data, memcg_data);
 +#endif
 +#undef FOLIO_MATCH
 +
 +static inline atomic_t *folio_mapcount_ptr(struct folio *folio)
 +{
 +      struct page *tail = &folio->page + 1;
 +      return &tail->compound_mapcount;
 +}
 +
  static inline atomic_t *compound_mapcount_ptr(struct page *page)
  {
        return &page[1].compound_mapcount;
@@@ -313,12 -257,6 +313,12 @@@ static inline atomic_t *compound_pincou
  #define PAGE_FRAG_CACHE_MAX_SIZE      __ALIGN_MASK(32768, ~PAGE_MASK)
  #define PAGE_FRAG_CACHE_MAX_ORDER     get_order(PAGE_FRAG_CACHE_MAX_SIZE)
  
 +/*
 + * page_private can be used on tail pages.  However, PagePrivate is only
 + * checked by the VM on the head page.  So page_private on the tail pages
 + * should be used for data that's ancillary to the head page (eg attaching
 + * buffer heads to tail pages after attaching buffer heads to the head page)
 + */
  #define page_private(page)            ((page)->private)
  
  static inline void set_page_private(struct page *page, unsigned long private)
        page->private = private;
  }
  
 +static inline void *folio_get_private(struct folio *folio)
 +{
 +      return folio->private;
 +}
 +
  struct page_frag_cache {
        void * va;
  #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
@@@ -454,17 -387,6 +454,6 @@@ struct vm_area_struct 
        struct vm_userfaultfd_ctx vm_userfaultfd_ctx;
  } __randomize_layout;
  
- struct core_thread {
-       struct task_struct *task;
-       struct core_thread *next;
- };
- struct core_state {
-       atomic_t nr_threads;
-       struct core_thread dumper;
-       struct completion startup;
- };
  struct kioctx_table;
  struct mm_struct {
        struct {
  
                unsigned long flags; /* Must use atomic bitops to access */
  
-               struct core_state *core_state; /* coredumping support */
  #ifdef CONFIG_AIO
                spinlock_t                      ioctx_lock;
                struct kioctx_table __rcu       *ioctx_table;
                bool tlb_flush_batched;
  #endif
                struct uprobes_state uprobes_state;
 +#ifdef CONFIG_PREEMPT_RT
 +              struct rcu_head delayed_drop;
 +#endif
  #ifdef CONFIG_HUGETLB_PAGE
                atomic_long_t hugetlb_usage;
  #endif
diff --combined include/linux/sched.h
index 6f6f8f340a0fba45c25afcec0fcb673e3b079118,f3741f23935eb2eb58ea0a3aaea3d92f84b5a23b..78c351e35fec6361973c86c1a1337080af7a688a
@@@ -503,8 -503,6 +503,8 @@@ struct sched_statistics 
  
        u64                             block_start;
        u64                             block_max;
 +      s64                             sum_block_runtime;
 +
        u64                             exec_max;
        u64                             slice_max;
  
        u64                             nr_wakeups_passive;
        u64                             nr_wakeups_idle;
  #endif
 -};
 +} ____cacheline_aligned;
  
  struct sched_entity {
        /* For load-balancing: */
  
        u64                             nr_migrations;
  
 -      struct sched_statistics         statistics;
 -
  #ifdef CONFIG_FAIR_GROUP_SCHED
        int                             depth;
        struct sched_entity             *parent;
@@@ -750,6 -750,10 +750,6 @@@ struct task_struct 
  #ifdef CONFIG_SMP
        int                             on_cpu;
        struct __call_single_node       wake_entry;
 -#ifdef CONFIG_THREAD_INFO_IN_TASK
 -      /* Current CPU: */
 -      unsigned int                    cpu;
 -#endif
        unsigned int                    wakee_flips;
        unsigned long                   wakee_flip_decay_ts;
        struct task_struct              *last_wakee;
        int                             normal_prio;
        unsigned int                    rt_priority;
  
 -      const struct sched_class        *sched_class;
        struct sched_entity             se;
        struct sched_rt_entity          rt;
        struct sched_dl_entity          dl;
 +      const struct sched_class        *sched_class;
  
  #ifdef CONFIG_SCHED_CORE
        struct rb_node                  core_node;
        struct uclamp_se                uclamp[UCLAMP_CNT];
  #endif
  
 +      struct sched_statistics         stats;
 +
  #ifdef CONFIG_PREEMPT_NOTIFIERS
        /* List of struct preempt_notifier: */
        struct hlist_head               preempt_notifiers;
        /* Stacked block device info: */
        struct bio_list                 *bio_list;
  
 -#ifdef CONFIG_BLOCK
        /* Stack plugging: */
        struct blk_plug                 *plug;
 -#endif
  
        /* VM state: */
        struct reclaim_state            *reclaim_state;
                                        mce_whole_page : 1,
                                        __mce_reserved : 62;
        struct callback_head            mce_kill_me;
 +      int                             mce_count;
  #endif
  
  #ifdef CONFIG_KRETPROBES
@@@ -1661,6 -1664,7 +1661,7 @@@ extern struct pid *cad_pid
  #define PF_VCPU                       0x00000001      /* I'm a virtual CPU */
  #define PF_IDLE                       0x00000002      /* I am an IDLE thread */
  #define PF_EXITING            0x00000004      /* Getting shut down */
+ #define PF_POSTCOREDUMP               0x00000008      /* Coredumps should ignore this task */
  #define PF_IO_WORKER          0x00000010      /* Task is an IO worker */
  #define PF_WQ_WORKER          0x00000020      /* I'm a workqueue worker */
  #define PF_FORKNOEXEC         0x00000040      /* Forked but didn't exec */
  #define tsk_used_math(p)                      ((p)->flags & PF_USED_MATH)
  #define used_math()                           tsk_used_math(current)
  
 -static inline bool is_percpu_thread(void)
 +static __always_inline bool is_percpu_thread(void)
  {
  #ifdef CONFIG_SMP
        return (current->flags & PF_NO_SETAFFINITY) &&
@@@ -1882,7 -1886,10 +1883,7 @@@ extern struct thread_info init_thread_i
  extern unsigned long init_stack[THREAD_SIZE / sizeof(unsigned long)];
  
  #ifdef CONFIG_THREAD_INFO_IN_TASK
 -static inline struct thread_info *task_thread_info(struct task_struct *task)
 -{
 -      return &task->thread_info;
 -}
 +# define task_thread_info(task)       (&(task)->thread_info)
  #elif !defined(__HAVE_THREAD_FUNCTIONS)
  # define task_thread_info(task)       ((struct thread_info *)(task)->stack)
  #endif
@@@ -2032,7 -2039,7 +2033,7 @@@ static inline int _cond_resched(void) 
  #endif /* !defined(CONFIG_PREEMPTION) || defined(CONFIG_PREEMPT_DYNAMIC) */
  
  #define cond_resched() ({                     \
 -      ___might_sleep(__FILE__, __LINE__, 0);  \
 +      __might_resched(__FILE__, __LINE__, 0); \
        _cond_resched();                        \
  })
  
@@@ -2040,38 -2047,19 +2041,38 @@@ extern int __cond_resched_lock(spinlock
  extern int __cond_resched_rwlock_read(rwlock_t *lock);
  extern int __cond_resched_rwlock_write(rwlock_t *lock);
  
 -#define cond_resched_lock(lock) ({                            \
 -      ___might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET);\
 -      __cond_resched_lock(lock);                              \
 +#define MIGHT_RESCHED_RCU_SHIFT               8
 +#define MIGHT_RESCHED_PREEMPT_MASK    ((1U << MIGHT_RESCHED_RCU_SHIFT) - 1)
 +
 +#ifndef CONFIG_PREEMPT_RT
 +/*
 + * Non RT kernels have an elevated preempt count due to the held lock,
 + * but are not allowed to be inside a RCU read side critical section
 + */
 +# define PREEMPT_LOCK_RESCHED_OFFSETS PREEMPT_LOCK_OFFSET
 +#else
 +/*
 + * spin/rw_lock() on RT implies rcu_read_lock(). The might_sleep() check in
 + * cond_resched*lock() has to take that into account because it checks for
 + * preempt_count() and rcu_preempt_depth().
 + */
 +# define PREEMPT_LOCK_RESCHED_OFFSETS \
 +      (PREEMPT_LOCK_OFFSET + (1U << MIGHT_RESCHED_RCU_SHIFT))
 +#endif
 +
 +#define cond_resched_lock(lock) ({                                            \
 +      __might_resched(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSETS);      \
 +      __cond_resched_lock(lock);                                              \
  })
  
 -#define cond_resched_rwlock_read(lock) ({                     \
 -      __might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET); \
 -      __cond_resched_rwlock_read(lock);                       \
 +#define cond_resched_rwlock_read(lock) ({                                     \
 +      __might_resched(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSETS);      \
 +      __cond_resched_rwlock_read(lock);                                       \
  })
  
 -#define cond_resched_rwlock_write(lock) ({                    \
 -      __might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET); \
 -      __cond_resched_rwlock_write(lock);                      \
 +#define cond_resched_rwlock_write(lock) ({                                    \
 +      __might_resched(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSETS);      \
 +      __cond_resched_rwlock_write(lock);                                      \
  })
  
  static inline void cond_resched_rcu(void)
@@@ -2126,7 -2114,11 +2127,7 @@@ static __always_inline bool need_resche
  
  static inline unsigned int task_cpu(const struct task_struct *p)
  {
 -#ifdef CONFIG_THREAD_INFO_IN_TASK
 -      return READ_ONCE(p->cpu);
 -#else
        return READ_ONCE(task_thread_info(p)->cpu);
 -#endif
  }
  
  extern void set_task_cpu(struct task_struct *p, unsigned int cpu);
@@@ -2145,7 -2137,6 +2146,7 @@@ static inline void set_task_cpu(struct 
  #endif /* CONFIG_SMP */
  
  extern bool sched_task_on_rq(struct task_struct *p);
 +extern unsigned long get_wchan(struct task_struct *p);
  
  /*
   * In order to reduce various lock holder preemption latencies provide an
diff --combined kernel/exit.c
index 50f1692c732d14945a334195b45e4e3fbede30b7,2b355e926c1350ded8355f6a964d82b014b3e171..f702a6a63686ea7c463ea5b73a7f5b665bb2d91e
@@@ -48,6 -48,7 +48,6 @@@
  #include <linux/pipe_fs_i.h>
  #include <linux/audit.h> /* for audit_free() */
  #include <linux/resource.h>
 -#include <linux/blkdev.h>
  #include <linux/task_io_accounting_ops.h>
  #include <linux/tracehook.h>
  #include <linux/fs_struct.h>
@@@ -63,7 -64,6 +63,7 @@@
  #include <linux/rcuwait.h>
  #include <linux/compat.h>
  #include <linux/io_uring.h>
 +#include <linux/kprobes.h>
  
  #include <linux/uaccess.h>
  #include <asm/unistd.h>
@@@ -168,7 -168,6 +168,7 @@@ static void delayed_put_task_struct(str
  {
        struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
  
 +      kprobe_flush_task(tsk);
        perf_event_delayed_put(tsk);
        trace_sched_process_free(tsk);
        put_task_struct(tsk);
@@@ -340,6 -339,46 +340,46 @@@ kill_orphaned_pgrp(struct task_struct *
        }
  }
  
+ static void coredump_task_exit(struct task_struct *tsk)
+ {
+       struct core_state *core_state;
+       /*
+        * Serialize with any possible pending coredump.
+        * We must hold siglock around checking core_state
+        * and setting PF_POSTCOREDUMP.  The core-inducing thread
+        * will increment ->nr_threads for each thread in the
+        * group without PF_POSTCOREDUMP set.
+        */
+       spin_lock_irq(&tsk->sighand->siglock);
+       tsk->flags |= PF_POSTCOREDUMP;
+       core_state = tsk->signal->core_state;
+       spin_unlock_irq(&tsk->sighand->siglock);
+       if (core_state) {
+               struct core_thread self;
+               self.task = current;
+               if (self.task->flags & PF_SIGNALED)
+                       self.next = xchg(&core_state->dumper.next, &self);
+               else
+                       self.task = NULL;
+               /*
+                * Implies mb(), the result of xchg() must be visible
+                * to core_state->dumper.
+                */
+               if (atomic_dec_and_test(&core_state->nr_threads))
+                       complete(&core_state->startup);
+               for (;;) {
+                       set_current_state(TASK_UNINTERRUPTIBLE);
+                       if (!self.task) /* see coredump_finish() */
+                               break;
+                       freezable_schedule();
+               }
+               __set_current_state(TASK_RUNNING);
+       }
+ }
  #ifdef CONFIG_MEMCG
  /*
   * A task is exiting.   If it owned this mm, find a new owner for the mm.
@@@ -435,47 -474,12 +475,12 @@@ assign_new_owner
  static void exit_mm(void)
  {
        struct mm_struct *mm = current->mm;
-       struct core_state *core_state;
  
        exit_mm_release(current, mm);
        if (!mm)
                return;
        sync_mm_rss(mm);
-       /*
-        * Serialize with any possible pending coredump.
-        * We must hold mmap_lock around checking core_state
-        * and clearing tsk->mm.  The core-inducing thread
-        * will increment ->nr_threads for each thread in the
-        * group with ->mm != NULL.
-        */
        mmap_read_lock(mm);
-       core_state = mm->core_state;
-       if (core_state) {
-               struct core_thread self;
-               mmap_read_unlock(mm);
-               self.task = current;
-               if (self.task->flags & PF_SIGNALED)
-                       self.next = xchg(&core_state->dumper.next, &self);
-               else
-                       self.task = NULL;
-               /*
-                * Implies mb(), the result of xchg() must be visible
-                * to core_state->dumper.
-                */
-               if (atomic_dec_and_test(&core_state->nr_threads))
-                       complete(&core_state->startup);
-               for (;;) {
-                       set_current_state(TASK_UNINTERRUPTIBLE);
-                       if (!self.task) /* see coredump_finish() */
-                               break;
-                       freezable_schedule();
-               }
-               __set_current_state(TASK_RUNNING);
-               mmap_read_lock(mm);
-       }
        mmgrab(mm);
        BUG_ON(mm != current->active_mm);
        /* more a memory barrier than a real lock */
@@@ -763,6 -767,7 +768,7 @@@ void __noreturn do_exit(long code
        profile_task_exit(tsk);
        kcov_task_exit(tsk);
  
+       coredump_task_exit(tsk);
        ptrace_event(PTRACE_EVENT_EXIT, code);
  
        validate_creds_for_do_exit(tsk);
diff --combined kernel/fork.c
index 8e9feeef555e7f0650f07d84fa51f214c3c98b14,c8adb76982f7a61293a362bf9062fa15ea605b49..3f112b11a9ad1453cdfa5f88f8e3d70ac9089213
@@@ -76,6 -76,7 +76,6 @@@
  #include <linux/taskstats_kern.h>
  #include <linux/random.h>
  #include <linux/tty.h>
 -#include <linux/blkdev.h>
  #include <linux/fs_struct.h>
  #include <linux/magic.h>
  #include <linux/perf_event.h>
@@@ -1043,7 -1044,6 +1043,6 @@@ static struct mm_struct *mm_init(struc
        seqcount_init(&mm->write_protect_seq);
        mmap_init_lock(mm);
        INIT_LIST_HEAD(&mm->mmlist);
-       mm->core_state = NULL;
        mm_pgtables_bytes_init(mm);
        mm->map_count = 0;
        mm->locked_vm = 0;
@@@ -1391,8 -1391,7 +1390,7 @@@ static void mm_release(struct task_stru
         * purposes.
         */
        if (tsk->clear_child_tid) {
-               if (!(tsk->signal->flags & SIGNAL_GROUP_COREDUMP) &&
-                   atomic_read(&mm->mm_users) > 1) {
+               if (atomic_read(&mm->mm_users) > 1) {
                        /*
                         * We don't check the error code - if userspace has
                         * not set up a proper pointer then tough luck.
@@@ -2404,7 -2403,7 +2402,7 @@@ static __latent_entropy struct task_str
        write_unlock_irq(&tasklist_lock);
  
        proc_fork_connector(p);
 -      sched_post_fork(p);
 +      sched_post_fork(p, args);
        cgroup_post_fork(p, args);
        perf_event_fork(p);
  
diff --combined kernel/signal.c
index e99aff33ff140c3cc6995a0fa89e4065e6275f03,b0db80acc6ef1113cd2dba50ddb018ae84569e63..6f3476dc787325c136a8a79e58e9963b1b57bc67
@@@ -426,10 -426,22 +426,10 @@@ __sigqueue_alloc(int sig, struct task_s
         */
        rcu_read_lock();
        ucounts = task_ucounts(t);
 -      sigpending = inc_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING, 1);
 -      switch (sigpending) {
 -      case 1:
 -              if (likely(get_ucounts(ucounts)))
 -                      break;
 -              fallthrough;
 -      case LONG_MAX:
 -              /*
 -               * we need to decrease the ucount in the userns tree on any
 -               * failure to avoid counts leaking.
 -               */
 -              dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING, 1);
 -              rcu_read_unlock();
 -              return NULL;
 -      }
 +      sigpending = inc_rlimit_get_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING);
        rcu_read_unlock();
 +      if (!sigpending)
 +              return NULL;
  
        if (override_rlimit || likely(sigpending <= task_rlimit(t, RLIMIT_SIGPENDING))) {
                q = kmem_cache_alloc(sigqueue_cachep, gfp_flags);
        }
  
        if (unlikely(q == NULL)) {
 -              if (dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING, 1))
 -                      put_ucounts(ucounts);
 +              dec_rlimit_put_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING);
        } else {
                INIT_LIST_HEAD(&q->list);
                q->flags = sigqueue_flags;
@@@ -451,8 -464,8 +451,8 @@@ static void __sigqueue_free(struct sigq
  {
        if (q->flags & SIGQUEUE_PREALLOC)
                return;
 -      if (q->ucounts && dec_rlimit_ucounts(q->ucounts, UCOUNT_RLIMIT_SIGPENDING, 1)) {
 -              put_ucounts(q->ucounts);
 +      if (q->ucounts) {
 +              dec_rlimit_put_ucounts(q->ucounts, UCOUNT_RLIMIT_SIGPENDING);
                q->ucounts = NULL;
        }
        kmem_cache_free(sigqueue_cachep, q);
@@@ -2145,40 -2158,6 +2145,6 @@@ static void do_notify_parent_cldstop(st
        spin_unlock_irqrestore(&sighand->siglock, flags);
  }
  
- static inline bool may_ptrace_stop(void)
- {
-       if (!likely(current->ptrace))
-               return false;
-       /*
-        * Are we in the middle of do_coredump?
-        * If so and our tracer is also part of the coredump stopping
-        * is a deadlock situation, and pointless because our tracer
-        * is dead so don't allow us to stop.
-        * If SIGKILL was already sent before the caller unlocked
-        * ->siglock we must see ->core_state != NULL. Otherwise it
-        * is safe to enter schedule().
-        *
-        * This is almost outdated, a task with the pending SIGKILL can't
-        * block in TASK_TRACED. But PTRACE_EVENT_EXIT can be reported
-        * after SIGKILL was already dequeued.
-        */
-       if (unlikely(current->mm->core_state) &&
-           unlikely(current->mm == current->parent->mm))
-               return false;
-       return true;
- }
- /*
-  * Return non-zero if there is a SIGKILL that should be waking us up.
-  * Called with the siglock held.
-  */
- static bool sigkill_pending(struct task_struct *tsk)
- {
-       return sigismember(&tsk->pending.signal, SIGKILL) ||
-              sigismember(&tsk->signal->shared_pending.signal, SIGKILL);
- }
  /*
   * This must be called with current->sighand->siglock held.
   *
@@@ -2196,7 -2175,7 +2162,7 @@@ static void ptrace_stop(int exit_code, 
  {
        bool gstop_done = false;
  
-       if (arch_ptrace_stop_needed(exit_code, info)) {
+       if (arch_ptrace_stop_needed()) {
                /*
                 * The arch code has something special to do before a
                 * ptrace stop.  This is allowed to block, e.g. for faults
                 * calling arch_ptrace_stop, so we must release it now.
                 * To preserve proper semantics, we must do this before
                 * any signal bookkeeping like checking group_stop_count.
-                * Meanwhile, a SIGKILL could come in before we retake the
-                * siglock.  That must prevent us from sleeping in TASK_TRACED.
-                * So after regaining the lock, we must check for SIGKILL.
                 */
                spin_unlock_irq(&current->sighand->siglock);
-               arch_ptrace_stop(exit_code, info);
+               arch_ptrace_stop();
                spin_lock_irq(&current->sighand->siglock);
-               if (sigkill_pending(current))
-                       return;
        }
  
+       /*
+        * schedule() will not sleep if there is a pending signal that
+        * can awaken the task.
+        */
        set_special_state(TASK_TRACED);
  
        /*
  
        spin_unlock_irq(&current->sighand->siglock);
        read_lock(&tasklist_lock);
-       if (may_ptrace_stop()) {
+       if (likely(current->ptrace)) {
                /*
                 * Notify parents of the stop.
                 *
@@@ -4138,29 -4116,11 +4103,29 @@@ int do_sigaction(int sig, struct k_siga
        return 0;
  }
  
 +#ifdef CONFIG_DYNAMIC_SIGFRAME
 +static inline void sigaltstack_lock(void)
 +      __acquires(&current->sighand->siglock)
 +{
 +      spin_lock_irq(&current->sighand->siglock);
 +}
 +
 +static inline void sigaltstack_unlock(void)
 +      __releases(&current->sighand->siglock)
 +{
 +      spin_unlock_irq(&current->sighand->siglock);
 +}
 +#else
 +static inline void sigaltstack_lock(void) { }
 +static inline void sigaltstack_unlock(void) { }
 +#endif
 +
  static int
  do_sigaltstack (const stack_t *ss, stack_t *oss, unsigned long sp,
                size_t min_ss_size)
  {
        struct task_struct *t = current;
 +      int ret = 0;
  
        if (oss) {
                memset(oss, 0, sizeof(stack_t));
                                ss_mode != 0))
                        return -EINVAL;
  
 +              sigaltstack_lock();
                if (ss_mode == SS_DISABLE) {
                        ss_size = 0;
                        ss_sp = NULL;
                } else {
                        if (unlikely(ss_size < min_ss_size))
 -                              return -ENOMEM;
 +                              ret = -ENOMEM;
 +                      if (!sigaltstack_size_valid(ss_size))
 +                              ret = -ENOMEM;
                }
 -
 -              t->sas_ss_sp = (unsigned long) ss_sp;
 -              t->sas_ss_size = ss_size;
 -              t->sas_ss_flags = ss_flags;
 +              if (!ret) {
 +                      t->sas_ss_sp = (unsigned long) ss_sp;
 +                      t->sas_ss_size = ss_size;
 +                      t->sas_ss_flags = ss_flags;
 +              }
 +              sigaltstack_unlock();
        }
 -      return 0;
 +      return ret;
  }
  
  SYSCALL_DEFINE2(sigaltstack,const stack_t __user *,uss, stack_t __user *,uoss)
diff --combined mm/debug.c
index 714be101dec9b1c115d53578ab66c0208981777e,aa5fe4d5c4b4db29598163cd3b46a770f860be0b..d0020fc5820271eced62eebf3be2fc73c0b77e96
@@@ -24,9 -24,7 +24,9 @@@ const char *migrate_reason_names[MR_TYP
        "syscall_or_cpuset",
        "mempolicy_mbind",
        "numa_misplaced",
 -      "cma",
 +      "contig_range",
 +      "longterm_pin",
 +      "demotion",
  };
  
  const struct trace_print_flags pageflag_names[] = {
@@@ -162,7 -160,7 +162,7 @@@ static void __dump_page(struct page *pa
  out_mapping:
        BUILD_BUG_ON(ARRAY_SIZE(pageflag_names) != __NR_PAGEFLAGS + 1);
  
 -      pr_warn("%sflags: %#lx(%pGp)%s\n", type, head->flags, &head->flags,
 +      pr_warn("%sflags: %pGp%s\n", type, &head->flags,
                page_cma ? " CMA" : "");
        print_hex_dump(KERN_WARNING, "raw: ", DUMP_PREFIX_NONE, 32,
                        sizeof(unsigned long), page,
@@@ -216,7 -214,7 +216,7 @@@ void dump_mm(const struct mm_struct *mm
                "start_code %lx end_code %lx start_data %lx end_data %lx\n"
                "start_brk %lx brk %lx start_stack %lx\n"
                "arg_start %lx arg_end %lx env_start %lx env_end %lx\n"
-               "binfmt %px flags %lx core_state %px\n"
+               "binfmt %px flags %lx\n"
  #ifdef CONFIG_AIO
                "ioctx_table %px\n"
  #endif
                mm->start_code, mm->end_code, mm->start_data, mm->end_data,
                mm->start_brk, mm->brk, mm->start_stack,
                mm->arg_start, mm->arg_end, mm->env_start, mm->env_end,
-               mm->binfmt, mm->flags, mm->core_state,
+               mm->binfmt, mm->flags,
  #ifdef CONFIG_AIO
                mm->ioctx_table,
  #endif
diff --combined mm/oom_kill.c
index 989f35a2bbb1deafe9d47789e74a9f13d2e72831,7877c755ab373b6e80aa7b5d2a5357bdabbc9dc2..50b984d048ce5349439d4307e92310d91cef9eab
@@@ -787,9 -787,9 +787,9 @@@ static inline bool __task_will_free_mem
        struct signal_struct *sig = task->signal;
  
        /*
-        * A coredumping process may sleep for an extended period in exit_mm(),
-        * so the oom killer cannot assume that the process will promptly exit
-        * and release memory.
+        * A coredumping process may sleep for an extended period in
+        * coredump_task_exit(), so the oom killer cannot assume that
+        * the process will promptly exit and release memory.
         */
        if (sig->flags & SIGNAL_GROUP_COREDUMP)
                return false;
@@@ -1150,7 -1150,7 +1150,7 @@@ SYSCALL_DEFINE2(process_mrelease, int, 
        struct task_struct *task;
        struct task_struct *p;
        unsigned int f_flags;
 -      bool reap = true;
 +      bool reap = false;
        struct pid *pid;
        long ret = 0;
  
                goto put_task;
        }
  
 -      mm = p->mm;
 -      mmgrab(mm);
 -
 -      /* If the work has been done already, just exit with success */
 -      if (test_bit(MMF_OOM_SKIP, &mm->flags))
 -              reap = false;
 -      else if (!task_will_free_mem(p)) {
 -              reap = false;
 -              ret = -EINVAL;
 +      if (mmget_not_zero(p->mm)) {
 +              mm = p->mm;
 +              if (task_will_free_mem(p))
 +                      reap = true;
 +              else {
 +                      /* Error only if the work has not been done already */
 +                      if (!test_bit(MMF_OOM_SKIP, &mm->flags))
 +                              ret = -EINVAL;
 +              }
        }
        task_unlock(p);
  
        mmap_read_unlock(mm);
  
  drop_mm:
 -      mmdrop(mm);
 +      if (mm)
 +              mmput(mm);
  put_task:
        put_task_struct(task);
  put_pid:
This page took 0.110271 seconds and 4 git commands to generate.