Merge branch 'akpm' (patches from Andrew)

author Linus Torvalds <[email protected]>

Wed, 22 Aug 2018 19:34:08 +0000 (12:34 -0700)

committer Linus Torvalds <[email protected]>

Wed, 22 Aug 2018 19:34:08 +0000 (12:34 -0700)
author Linus Torvalds <[email protected]>
Wed, 22 Aug 2018 19:34:08 +0000 (12:34 -0700)
committer Linus Torvalds <[email protected]>
Wed, 22 Aug 2018 19:34:08 +0000 (12:34 -0700)
diff --combined fs/autofs/autofs_i.h

index 50281228985049563292bc549d0fd7101f06abe1,633986a6a93a202f7295bad9fc2e99bfd6ce780f..9f9cadbfbd7a34f8005fafb0960b3c60e4848c60
--- 1/fs/autofs/autofs_i.h
--- 2/fs/autofs/autofs_i.h
+++ b/fs/autofs/autofs_i.h
@@@ -18,7 -18,6 +18,7 @@@
   #include <linux/string.h>
   #include <linux/wait.h>
   #include <linux/sched.h>
+ +#include <linux/sched/signal.h>
   #include <linux/mount.h>
   #include <linux/namei.h>
   #include <linux/uaccess.h>
@@@ -27,6 -26,7 +27,7 @@@
   #include <linux/list.h>
   #include <linux/completion.h>
   #include <linux/file.h>
+ #include <linux/magic.h>
   
   /* This is the range of ioctl() numbers we claim as ours */
   #define AUTOFS_IOC_FIRST     AUTOFS_IOC_READY
@@@ -125,7 -125,8 +126,8 @@@ struct autofs_sb_info 
   
   static inline struct autofs_sb_info *autofs_sbi(struct super_block *sb)
   {
-       return (struct autofs_sb_info *)(sb->s_fs_info);
+       return sb->s_magic != AUTOFS_SUPER_MAGIC ?
+               NULL : (struct autofs_sb_info *)(sb->s_fs_info);
   }
   
   static inline struct autofs_info *autofs_dentry_ino(struct dentry *dentry)
@@@ -152,15 -153,9 +154,9 @@@ int autofs_expire_run(struct super_bloc
                       struct autofs_sb_info *,
                       struct autofs_packet_expire __user *);
   int autofs_do_expire_multi(struct super_block *sb, struct vfsmount *mnt,
-                          struct autofs_sb_info *sbi, int when);
+                          struct autofs_sb_info *sbi, unsigned int how);
   int autofs_expire_multi(struct super_block *, struct vfsmount *,
                         struct autofs_sb_info *, int __user *);
- struct dentry *autofs_expire_direct(struct super_block *sb,
-                                   struct vfsmount *mnt,
-                                   struct autofs_sb_info *sbi, int how);
- struct dentry *autofs_expire_indirect(struct super_block *sb,
-                                     struct vfsmount *mnt,
-                                     struct autofs_sb_info *sbi, int how);
   
   /* Device node initialization */
   
diff --combined include/linux/sched.h

index 00de3e950dd486679a8cd4bcb4fa7b4fd7aeac8a,58eb3a2bc6954cebec6f05113568842e32690bf7..977cb57d7bc9e7183e6ca628e4f75d236ddf3951
--- 1/include/linux/sched.h
--- 2/include/linux/sched.h
+++ b/include/linux/sched.h
@@@ -779,8 -779,7 +779,8 @@@ struct task_struct 
         struct list_head                ptrace_entry;
   
         /* PID/PID hash table linkage. */
- -      struct pid_link                 pids[PIDTYPE_MAX];
+ +      struct pid                      *thread_pid;
+ +      struct hlist_node               pid_links[PIDTYPE_MAX];
         struct list_head                thread_group;
         struct list_head                thread_node;
   
@@@ -854,6 -853,7 +854,7 @@@
   #endif
   #ifdef CONFIG_DETECT_HUNG_TASK
         unsigned long                   last_switch_count;
+       unsigned long                   last_switch_time;
   #endif
         /* Filesystem information: */
         struct fs_struct                *fs;
@@@ -1210,7 -1210,27 +1211,7 @@@
   
   static inline struct pid *task_pid(struct task_struct *task)
   {
- -      return task->pids[PIDTYPE_PID].pid;
- -}
- -
- -static inline struct pid *task_tgid(struct task_struct *task)
- -{
- -      return task->group_leader->pids[PIDTYPE_PID].pid;
- -}
- -
- -/*
- - * Without tasklist or RCU lock it is not safe to dereference
- - * the result of task_pgrp/task_session even if task == current,
- - * we can race with another thread doing sys_setsid/sys_setpgid.
- - */
- -static inline struct pid *task_pgrp(struct task_struct *task)
- -{
- -      return task->group_leader->pids[PIDTYPE_PGID].pid;
- -}
- -
- -static inline struct pid *task_session(struct task_struct *task)
- -{
- -      return task->group_leader->pids[PIDTYPE_SID].pid;
+ +      return task->thread_pid;
   }
   
   /*
@@@ -1259,7 -1279,7 +1260,7 @@@ static inline pid_t task_tgid_nr(struc
    */
   static inline int pid_alive(const struct task_struct *p)
   {
- -      return p->pids[PIDTYPE_PID].pid != NULL;
+ +      return p->thread_pid != NULL;
   }
   
   static inline pid_t task_pgrp_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
@@@ -1285,12 -1305,12 +1286,12 @@@ static inline pid_t task_session_vnr(st
   
   static inline pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
   {
- -      return __task_pid_nr_ns(tsk, __PIDTYPE_TGID, ns);
+ +      return __task_pid_nr_ns(tsk, PIDTYPE_TGID, ns);
   }
   
   static inline pid_t task_tgid_vnr(struct task_struct *tsk)
   {
- -      return __task_pid_nr_ns(tsk, __PIDTYPE_TGID, NULL);
+ +      return __task_pid_nr_ns(tsk, PIDTYPE_TGID, NULL);
   }
   
   static inline pid_t task_ppid_nr_ns(const struct task_struct *tsk, struct pid_namespace *ns)
diff --combined include/linux/sched/signal.h

index 4e9b77fb702de0604e04687a7aef9d107eed1726,e138ac16c6504a0da8ff1ad3fb06ef1bbb35e7f8..1be35729c2c5b9c0bb915653d7778e0acbdfd29b
--- 1/include/linux/sched/signal.h
--- 2/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@@ -69,11 -69,6 +69,11 @@@ struct thread_group_cputimer 
         bool checking_timer;
   };
   
+ +struct multiprocess_signals {
+ +      sigset_t signal;
+ +      struct hlist_node node;
+ +};
+ +
   /*
    * NOTE! "signal_struct" does not have its own
    * locking, because a shared signal_struct always
@@@ -95,9 -90,6 +95,9 @@@ struct signal_struct 
         /* shared signal handling: */
         struct sigpending       shared_pending;
   
+ +      /* For collecting multiprocess signals during fork */
+ +      struct hlist_head       multiprocess;
+ +
         /* thread group exit support */
         int                     group_exit_code;
         /* overloaded:
@@@ -154,8 -146,7 +154,8 @@@
   
   #endif
   
- -      struct pid *leader_pid;
+ +      /* PID/PID hash table linkage. */
+ +      struct pid *pids[PIDTYPE_MAX];
   
   #ifdef CONFIG_NO_HZ_FULL
         atomic_t tick_dep_mask;
@@@ -323,7 -314,7 +323,7 @@@ int force_sig_pkuerr(void __user *addr
   int force_sig_ptrace_errno_trap(int errno, void __user *addr);
   
   extern int send_sig_info(int, struct siginfo *, struct task_struct *);
- extern int force_sigsegv(int, struct task_struct *);
+ extern void force_sigsegv(int sig, struct task_struct *p);
   extern int force_sig_info(int, struct siginfo *, struct task_struct *);
   extern int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp);
   extern int kill_pid_info(int sig, struct siginfo *info, struct pid *pid);
@@@ -338,7 -329,7 +338,7 @@@ extern int send_sig(int, struct task_st
   extern int zap_other_threads(struct task_struct *p);
   extern struct sigqueue *sigqueue_alloc(void);
   extern void sigqueue_free(struct sigqueue *);
- -extern int send_sigqueue(struct sigqueue *,  struct task_struct *, int group);
+ +extern int send_sigqueue(struct sigqueue *, struct pid *, enum pid_type);
   extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *);
   
   static inline int restart_syscall(void)
@@@ -380,7 -371,6 +380,7 @@@ static inline int signal_pending_state(
    */
   extern void recalc_sigpending_and_wake(struct task_struct *t);
   extern void recalc_sigpending(void);
+ +extern void calculate_sigpending(void);
   
   extern void signal_wake_up_state(struct task_struct *t, unsigned int state);
   
@@@ -393,8 -383,6 +393,8 @@@ static inline void ptrace_signal_wake_u
         signal_wake_up_state(t, resume ? __TASK_TRACED : 0);
   }
   
+ +void task_join_group_stop(struct task_struct *task);
+ +
   #ifdef TIF_RESTORE_SIGMASK
   /*
    * Legacy restore_sigmask accessors.  These are inefficient on
@@@ -568,37 -556,6 +568,37 @@@ extern bool current_is_single_threaded(
   typedef int (*proc_visitor)(struct task_struct *p, void *data);
   void walk_process_tree(struct task_struct *top, proc_visitor, void *);
   
+ +static inline
+ +struct pid *task_pid_type(struct task_struct *task, enum pid_type type)
+ +{
+ +      struct pid *pid;
+ +      if (type == PIDTYPE_PID)
+ +              pid = task_pid(task);
+ +      else
+ +              pid = task->signal->pids[type];
+ +      return pid;
+ +}
+ +
+ +static inline struct pid *task_tgid(struct task_struct *task)
+ +{
+ +      return task->signal->pids[PIDTYPE_TGID];
+ +}
+ +
+ +/*
+ + * Without tasklist or RCU lock it is not safe to dereference
+ + * the result of task_pgrp/task_session even if task == current,
+ + * we can race with another thread doing sys_setsid/sys_setpgid.
+ + */
+ +static inline struct pid *task_pgrp(struct task_struct *task)
+ +{
+ +      return task->signal->pids[PIDTYPE_PGID];
+ +}
+ +
+ +static inline struct pid *task_session(struct task_struct *task)
+ +{
+ +      return task->signal->pids[PIDTYPE_SID];
+ +}
+ +
   static inline int get_nr_threads(struct task_struct *tsk)
   {
         return tsk->signal->nr_threads;
@@@ -617,7 -574,7 +617,7 @@@ static inline bool thread_group_leader(
    */
   static inline bool has_group_leader_pid(struct task_struct *p)
   {
- -      return task_pid(p) == p->signal->leader_pid;
+ +      return task_pid(p) == task_tgid(p);
   }
   
   static inline
diff --combined include/linux/signal.h

index fe125b0335f7dba28ac0344b6527d86505da605e,e3d9e0640e6ec44332b4a6c4eacd158fc3d35259..3d4cd5db30a966b015a89cccc78b9fc2301bb895
--- 1/include/linux/signal.h
--- 2/include/linux/signal.h
+++ b/include/linux/signal.h
@@@ -254,20 -254,18 +254,20 @@@ static inline int valid_signal(unsigne
   
   struct timespec;
   struct pt_regs;
+ +enum pid_type;
   
   extern int next_signal(struct sigpending *pending, sigset_t *mask);
   extern int do_send_sig_info(int sig, struct siginfo *info,
- -                              struct task_struct *p, bool group);
- -extern int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p);
+ +                              struct task_struct *p, enum pid_type type);
+ +extern int group_send_sig_info(int sig, struct siginfo *info,
+ +                             struct task_struct *p, enum pid_type type);
   extern int __group_send_sig_info(int, struct siginfo *, struct task_struct *);
   extern int sigprocmask(int, sigset_t *, sigset_t *);
   extern void set_current_blocked(sigset_t *);
   extern void __set_current_blocked(const sigset_t *);
   extern int show_unhandled_signals;
   
- extern int get_signal(struct ksignal *ksig);
+ extern bool get_signal(struct ksignal *ksig);
   extern void signal_setup_done(int failed, struct ksignal *ksig, int stepping);
   extern void exit_signals(struct task_struct *tsk);
   extern void kernel_sigaction(int, __sighandler_t);
@@@ -289,7 -287,7 +289,7 @@@ static inline void disallow_signal(int 
   
   extern struct kmem_cache *sighand_cachep;
   
- int unhandled_signal(struct task_struct *tsk, int sig);
+ extern bool unhandled_signal(struct task_struct *tsk, int sig);
   
   /*
    * In POSIX a signal is sent either to a specific thread (Linux task)
diff --combined kernel/fork.c

index ff5037be7771709d12cf75362feb2431a765c9d8,23eb960c701d017169ea010e4a68f1454134a18e..d896e9ca38b0cccc5de00a0564ecd22179fa2c06
--- 1/kernel/fork.c
--- 2/kernel/fork.c
+++ b/kernel/fork.c
@@@ -310,8 -310,9 +310,9 @@@ static struct kmem_cache *mm_cachep
   
   struct vm_area_struct *vm_area_alloc(struct mm_struct *mm)
   {
-       struct vm_area_struct *vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+       struct vm_area_struct *vma;
   
+       vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
         if (vma)
                 vma_init(vma, mm);
         return vma;
@@@ -1301,6 -1302,7 +1302,7 @@@ static int copy_mm(unsigned long clone_
         tsk->nvcsw = tsk->nivcsw = 0;
   #ifdef CONFIG_DETECT_HUNG_TASK
         tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw;
+       tsk->last_switch_time = 0;
   #endif
   
         tsk->mm = NULL;
@@@ -1425,7 -1427,9 +1427,9 @@@ static int copy_sighand(unsigned long c
                 return -ENOMEM;
   
         atomic_set(&sig->count, 1);
+       spin_lock_irq(&current->sighand->siglock);
         memcpy(sig->action, current->sighand->action, sizeof(sig->action));
+       spin_unlock_irq(&current->sighand->siglock);
         return 0;
   }
   
@@@ -1487,7 -1491,6 +1491,7 @@@ static int copy_signal(unsigned long cl
         init_waitqueue_head(&sig->wait_chldexit);
         sig->curr_target = tsk;
         init_sigpending(&sig->shared_pending);
+ +      INIT_HLIST_HEAD(&sig->multiprocess);
         seqlock_init(&sig->stats_lock);
         prev_cputime_init(&sig->prev_cputime);
   
@@@ -1581,22 -1584,10 +1585,22 @@@ static void posix_cpu_timers_init(struc
   static inline void posix_cpu_timers_init(struct task_struct *tsk) { }
   #endif
   
+ +static inline void init_task_pid_links(struct task_struct *task)
+ +{
+ +      enum pid_type type;
+ +
+ +      for (type = PIDTYPE_PID; type < PIDTYPE_MAX; ++type) {
+ +              INIT_HLIST_NODE(&task->pid_links[type]);
+ +      }
+ +}
+ +
   static inline void
   init_task_pid(struct task_struct *task, enum pid_type type, struct pid *pid)
   {
- -       task->pids[type].pid = pid;
+ +      if (type == PIDTYPE_PID)
+ +              task->thread_pid = pid;
+ +      else
+ +              task->signal->pids[type] = pid;
   }
   
   static inline void rcu_copy_process(struct task_struct *p)
@@@ -1634,7 -1625,6 +1638,7 @@@ static __latent_entropy struct task_str
   {
         int retval;
         struct task_struct *p;
+ +      struct multiprocess_signals delayed;
   
         /*
          * Don't allow sharing the root directory with processes in a different
@@@ -1682,24 -1672,6 +1686,24 @@@
                         return ERR_PTR(-EINVAL);
         }
   
+ +      /*
+ +       * Force any signals received before this point to be delivered
+ +       * before the fork happens.  Collect up signals sent to multiple
+ +       * processes that happen during the fork and delay them so that
+ +       * they appear to happen after the fork.
+ +       */
+ +      sigemptyset(&delayed.signal);
+ +      INIT_HLIST_NODE(&delayed.node);
+ +
+ +      spin_lock_irq(&current->sighand->siglock);
+ +      if (!(clone_flags & CLONE_THREAD))
+ +              hlist_add_head(&delayed.node, &current->signal->multiprocess);
+ +      recalc_sigpending();
+ +      spin_unlock_irq(&current->sighand->siglock);
+ +      retval = -ERESTARTNOINTR;
+ +      if (signal_pending(current))
+ +              goto fork_out;
+ +
         retval = -ENOMEM;
         p = dup_task_struct(current, node);
         if (!p)
@@@ -1973,26 -1945,29 +1977,26 @@@
   
         rseq_fork(p, clone_flags);
   
- -      /*
- -       * Process group and session signals need to be delivered to just the
- -       * parent before the fork or both the parent and the child after the
- -       * fork. Restart if a signal comes in before we add the new process to
- -       * it's process group.
- -       * A fatal signal pending means that current will exit, so the new
- -       * thread can't slip out of an OOM kill (or normal SIGKILL).
- -      */
- -      recalc_sigpending();
- -      if (signal_pending(current)) {
- -              retval = -ERESTARTNOINTR;
- -              goto bad_fork_cancel_cgroup;
- -      }
+ +      /* Don't start children in a dying pid namespace */
         if (unlikely(!(ns_of_pid(pid)->pid_allocated & PIDNS_ADDING))) {
                 retval = -ENOMEM;
                 goto bad_fork_cancel_cgroup;
         }
   
+ +      /* Let kill terminate clone/fork in the middle */
+ +      if (fatal_signal_pending(current)) {
+ +              retval = -EINTR;
+ +              goto bad_fork_cancel_cgroup;
+ +      }
+ +
+ +
+ +      init_task_pid_links(p);
         if (likely(p->pid)) {
                 ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace);
   
                 init_task_pid(p, PIDTYPE_PID, pid);
                 if (thread_group_leader(p)) {
+ +                      init_task_pid(p, PIDTYPE_TGID, pid);
                         init_task_pid(p, PIDTYPE_PGID, task_pgrp(current));
                         init_task_pid(p, PIDTYPE_SID, task_session(current));
   
@@@ -2000,7 -1975,8 +2004,7 @@@
                                 ns_of_pid(pid)->child_reaper = p;
                                 p->signal->flags |= SIGNAL_UNKILLABLE;
                         }
- -
- -                      p->signal->leader_pid = pid;
+ +                      p->signal->shared_pending.signal = delayed.signal;
                         p->signal->tty = tty_kref_get(current->signal->tty);
                         /*
                          * Inherit has_child_subreaper flag under the same
@@@ -2011,7 -1987,6 +2015,7 @@@
                                                          p->real_parent->signal->is_child_subreaper;
                         list_add_tail(&p->sibling, &p->real_parent->children);
                         list_add_tail_rcu(&p->tasks, &init_task.tasks);
+ +                      attach_pid(p, PIDTYPE_TGID);
                         attach_pid(p, PIDTYPE_PGID);
                         attach_pid(p, PIDTYPE_SID);
                         __this_cpu_inc(process_counts);
@@@ -2019,7 -1994,6 +2023,7 @@@
                         current->signal->nr_threads++;
                         atomic_inc(&current->signal->live);
                         atomic_inc(&current->signal->sigcnt);
+ +                      task_join_group_stop(p);
                         list_add_tail_rcu(&p->thread_group,
                                           &p->group_leader->thread_group);
                         list_add_tail_rcu(&p->thread_node,
@@@ -2028,8 -2002,8 +2032,8 @@@
                 attach_pid(p, PIDTYPE_PID);
                 nr_threads++;
         }
- -
         total_forks++;
+ +      hlist_del_init(&delayed.node);
         spin_unlock(&current->sighand->siglock);
         syscall_tracepoint_update(p);
         write_unlock_irq(&tasklist_lock);
@@@ -2094,19 -2068,16 +2098,19 @@@ bad_fork_free
         put_task_stack(p);
         free_task(p);
   fork_out:
+ +      spin_lock_irq(&current->sighand->siglock);
+ +      hlist_del_init(&delayed.node);
+ +      spin_unlock_irq(&current->sighand->siglock);
         return ERR_PTR(retval);
   }
   
- -static inline void init_idle_pids(struct pid_link *links)
+ +static inline void init_idle_pids(struct task_struct *idle)
   {
         enum pid_type type;
   
         for (type = PIDTYPE_PID; type < PIDTYPE_MAX; ++type) {
- -              INIT_HLIST_NODE(&links[type].node); /* not really needed */
- -              links[type].pid = &init_struct_pid;
+ +              INIT_HLIST_NODE(&idle->pid_links[type]); /* not really needed */
+ +              init_task_pid(idle, type, &init_struct_pid);
         }
   }
   
@@@ -2116,7 -2087,7 +2120,7 @@@ struct task_struct *fork_idle(int cpu
         task = copy_process(CLONE_VM, 0, 0, NULL, &init_struct_pid, 0, 0,
                             cpu_to_node(cpu));
         if (!IS_ERR(task)) {
- -              init_idle_pids(task->pids);
+ +              init_idle_pids(task);
                 init_idle(task, cpu);
         }
   
diff --combined kernel/signal.c

index cfa9d10e731ab663c7c932f6ea9257e349101d5e,786bacc60649d26448b9eabb2fba0bfd1cf83934..5843c541fda9c30bb8e4165facc07a3df114f57b
--- 1/kernel/signal.c
--- 2/kernel/signal.c
+++ b/kernel/signal.c
@@@ -65,14 -65,14 +65,14 @@@ static void __user *sig_handler(struct 
         return t->sighand->action[sig - 1].sa.sa_handler;
   }
   
- static int sig_handler_ignored(void __user *handler, int sig)
+ static inline bool sig_handler_ignored(void __user *handler, int sig)
   {
         /* Is it explicitly or implicitly ignored? */
         return handler == SIG_IGN ||
-               (handler == SIG_DFL && sig_kernel_ignore(sig));
+              (handler == SIG_DFL && sig_kernel_ignore(sig));
   }
   
- static int sig_task_ignored(struct task_struct *t, int sig, bool force)
+ static bool sig_task_ignored(struct task_struct *t, int sig, bool force)
   {
         void __user *handler;
   
@@@ -80,12 -80,12 +80,12 @@@
   
         if (unlikely(t->signal->flags & SIGNAL_UNKILLABLE) &&
             handler == SIG_DFL && !(force && sig_kernel_only(sig)))
-               return 1;
+               return true;
   
         return sig_handler_ignored(handler, sig);
   }
   
- static int sig_ignored(struct task_struct *t, int sig, bool force)
+ static bool sig_ignored(struct task_struct *t, int sig, bool force)
   {
         /*
          * Blocked signals are never ignored, since the
@@@ -93,7 -93,7 +93,7 @@@
          * unblocked.
          */
         if (sigismember(&t->blocked, sig) || sigismember(&t->real_blocked, sig))
-               return 0;
+               return false;
   
         /*
          * Tracers may want to know about even ignored signal unless it
@@@ -101,7 -101,7 +101,7 @@@
          * by SIGNAL_UNKILLABLE task.
          */
         if (t->ptrace && sig != SIGKILL)
-               return 0;
+               return false;
   
         return sig_task_ignored(t, sig, force);
   }
@@@ -110,7 -110,7 +110,7 @@@
    * Re-calculate pending state from the set of locally pending
    * signals, globally pending signals, and blocked signals.
    */
- static inline int has_pending_signals(sigset_t *signal, sigset_t *blocked)
+ static inline bool has_pending_signals(sigset_t *signal, sigset_t *blocked)
   {
         unsigned long ready;
         long i;
@@@ -138,20 -138,21 +138,21 @@@
   
   #define PENDING(p,b) has_pending_signals(&(p)->signal, (b))
   
- static int recalc_sigpending_tsk(struct task_struct *t)
+ static bool recalc_sigpending_tsk(struct task_struct *t)
   {
         if ((t->jobctl & JOBCTL_PENDING_MASK) ||
             PENDING(&t->pending, &t->blocked) ||
             PENDING(&t->signal->shared_pending, &t->blocked)) {
                 set_tsk_thread_flag(t, TIF_SIGPENDING);
-               return 1;
+               return true;
         }
+ 
         /*
          * We must never clear the flag in another thread, or in current
          * when it's possible the current syscall is returning -ERESTART*.
          * So we don't clear it here, and only callers who know they should do.
          */
-       return 0;
+       return false;
   }
   
   /*
@@@ -172,17 -173,6 +173,17 @@@ void recalc_sigpending(void
   
   }
   
+ +void calculate_sigpending(void)
+ +{
+ +      /* Have any signals or users of TIF_SIGPENDING been delayed
+ +       * until after fork?
+ +       */
+ +      spin_lock_irq(&current->sighand->siglock);
+ +      set_tsk_thread_flag(current, TIF_SIGPENDING);
+ +      recalc_sigpending();
+ +      spin_unlock_irq(&current->sighand->siglock);
+ +}
+ +
   /* Given the mask, find the first available signal that should be serviced. */
   
   #define SYNCHRONOUS_MASK \
@@@ -373,20 -363,6 +374,20 @@@ static bool task_participate_group_stop
         return false;
   }
   
+ +void task_join_group_stop(struct task_struct *task)
+ +{
+ +      /* Have the new thread join an on-going signal group stop */
+ +      unsigned long jobctl = current->jobctl;
+ +      if (jobctl & JOBCTL_STOP_PENDING) {
+ +              struct signal_struct *sig = current->signal;
+ +              unsigned long signr = jobctl & JOBCTL_STOP_SIGMASK;
+ +              unsigned long gstop = JOBCTL_STOP_PENDING | JOBCTL_STOP_CONSUME;
+ +              if (task_set_jobctl_pending(task, signr | gstop)) {
+ +                      sig->group_stop_count++;
+ +              }
+ +      }
+ +}
+ +
   /*
    * allocate a new signal queue record
    * - this may be called without locks if and only if t == current, otherwise an
@@@ -529,13 -505,15 +530,15 @@@ flush_signal_handlers(struct task_struc
         }
   }
   
- int unhandled_signal(struct task_struct *tsk, int sig)
+ bool unhandled_signal(struct task_struct *tsk, int sig)
   {
         void __user *handler = tsk->sighand->action[sig-1].sa.sa_handler;
         if (is_global_init(tsk))
-               return 1;
+               return true;
+ 
         if (handler != SIG_IGN && handler != SIG_DFL)
-               return 0;
+               return false;
+ 
         /* if ptraced, let the tracer determine */
         return !tsk->ptrace;
   }
@@@ -709,14 -687,14 +712,14 @@@ void signal_wake_up_state(struct task_s
    *
    * All callers must be holding the siglock.
    */
- static int flush_sigqueue_mask(sigset_t *mask, struct sigpending *s)
+ static void flush_sigqueue_mask(sigset_t *mask, struct sigpending *s)
   {
         struct sigqueue *q, *n;
         sigset_t m;
   
         sigandsets(&m, mask, &s->signal);
         if (sigisemptyset(&m))
-               return 0;
+               return;
   
         sigandnsets(&s->signal, &s->signal, mask);
         list_for_each_entry_safe(q, n, &s->list, list) {
@@@ -725,7 -703,6 +728,6 @@@
                         __sigqueue_free(q);
                 }
         }
-       return 1;
   }
   
   static inline int is_si_special(const struct siginfo *info)
@@@ -742,21 -719,16 +744,16 @@@ static inline bool si_fromuser(const st
   /*
    * called with RCU read lock from check_kill_permission()
    */
- static int kill_ok_by_cred(struct task_struct *t)
+ static bool kill_ok_by_cred(struct task_struct *t)
   {
         const struct cred *cred = current_cred();
         const struct cred *tcred = __task_cred(t);
   
-       if (uid_eq(cred->euid, tcred->suid) ||
-           uid_eq(cred->euid, tcred->uid)  ||
-           uid_eq(cred->uid,  tcred->suid) ||
-           uid_eq(cred->uid,  tcred->uid))
-               return 1;
- 
-       if (ns_capable(tcred->user_ns, CAP_KILL))
-               return 1;
- 
-       return 0;
+       return uid_eq(cred->euid, tcred->suid) ||
+              uid_eq(cred->euid, tcred->uid) ||
+              uid_eq(cred->uid, tcred->suid) ||
+              uid_eq(cred->uid, tcred->uid) ||
+              ns_capable(tcred->user_ns, CAP_KILL);
   }
   
   /*
@@@ -907,20 -879,24 +904,24 @@@ static bool prepare_signal(int sig, str
    * as soon as they're available, so putting the signal on the shared queue
    * will be equivalent to sending it to one such thread.
    */
- static inline int wants_signal(int sig, struct task_struct *p)
+ static inline bool wants_signal(int sig, struct task_struct *p)
   {
         if (sigismember(&p->blocked, sig))
-               return 0;
+               return false;
+ 
         if (p->flags & PF_EXITING)
-               return 0;
+               return false;
+ 
         if (sig == SIGKILL)
-               return 1;
+               return true;
+ 
         if (task_is_stopped_or_traced(p))
-               return 0;
+               return false;
+ 
         return task_curr(p) || !signal_pending(p);
   }
   
- -static void complete_signal(int sig, struct task_struct *p, int group)
+ +static void complete_signal(int sig, struct task_struct *p, enum pid_type type)
   {
         struct signal_struct *signal = p->signal;
         struct task_struct *t;
@@@ -933,7 -909,7 +934,7 @@@
          */
         if (wants_signal(sig, p))
                 t = p;
- -      else if (!group || thread_group_empty(p))
+ +      else if ((type == PIDTYPE_PID) || thread_group_empty(p))
                 /*
                  * There is just one thread and it does not need to be woken.
                  * It will dequeue unblocked signals before it runs again.
@@@ -996,7 -972,7 +997,7 @@@
         return;
   }
   
- static inline int legacy_queue(struct sigpending *signals, int sig)
+ static inline bool legacy_queue(struct sigpending *signals, int sig)
   {
         return (sig < SIGRTMIN) && sigismember(&signals->signal, sig);
   }
@@@ -1023,7 -999,7 +1024,7 @@@ static inline void userns_fixup_signal_
   #endif
   
   static int __send_signal(int sig, struct siginfo *info, struct task_struct *t,
- -                      int group, int from_ancestor_ns)
+ +                      enum pid_type type, int from_ancestor_ns)
   {
         struct sigpending *pending;
         struct sigqueue *q;
@@@ -1037,7 -1013,7 +1038,7 @@@
                         from_ancestor_ns || (info == SEND_SIG_FORCED)))
                 goto ret;
   
- -      pending = group ? &t->signal->shared_pending : &t->pending;
+ +      pending = (type != PIDTYPE_PID) ? &t->signal->shared_pending : &t->pending;
         /*
          * Short-circuit ignored signals and support queuing
          * exactly one non-rt signal, so that we can get more
@@@ -1121,29 -1097,14 +1122,29 @@@
   out_set:
         signalfd_notify(t, sig);
         sigaddset(&pending->signal, sig);
- -      complete_signal(sig, t, group);
+ +
+ +      /* Let multiprocess signals appear after on-going forks */
+ +      if (type > PIDTYPE_TGID) {
+ +              struct multiprocess_signals *delayed;
+ +              hlist_for_each_entry(delayed, &t->signal->multiprocess, node) {
+ +                      sigset_t *signal = &delayed->signal;
+ +                      /* Can't queue both a stop and a continue signal */
+ +                      if (sig == SIGCONT)
+ +                              sigdelsetmask(signal, SIG_KERNEL_STOP_MASK);
+ +                      else if (sig_kernel_stop(sig))
+ +                              sigdelset(signal, SIGCONT);
+ +                      sigaddset(signal, sig);
+ +              }
+ +      }
+ +
+ +      complete_signal(sig, t, type);
   ret:
- -      trace_signal_generate(sig, info, t, group, result);
+ +      trace_signal_generate(sig, info, t, type != PIDTYPE_PID, result);
         return ret;
   }
   
   static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
- -                      int group)
+ +                      enum pid_type type)
   {
         int from_ancestor_ns = 0;
   
@@@ -1152,7 -1113,7 +1153,7 @@@
                            !task_pid_nr_ns(current, task_active_pid_ns(t));
   #endif
   
- -      return __send_signal(sig, info, t, group, from_ancestor_ns);
+ +      return __send_signal(sig, info, t, type, from_ancestor_ns);
   }
   
   static void print_fatal_signal(int signr)
@@@ -1191,23 -1152,23 +1192,23 @@@ __setup("print-fatal-signals=", setup_p
   int
   __group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
   {
- -      return send_signal(sig, info, p, 1);
+ +      return send_signal(sig, info, p, PIDTYPE_TGID);
   }
   
   static int
   specific_send_sig_info(int sig, struct siginfo *info, struct task_struct *t)
   {
- -      return send_signal(sig, info, t, 0);
+ +      return send_signal(sig, info, t, PIDTYPE_PID);
   }
   
   int do_send_sig_info(int sig, struct siginfo *info, struct task_struct *p,
- -                      bool group)
+ +                      enum pid_type type)
   {
         unsigned long flags;
         int ret = -ESRCH;
   
         if (lock_task_sighand(p, &flags)) {
- -              ret = send_signal(sig, info, p, group);
+ +              ret = send_signal(sig, info, p, type);
                 unlock_task_sighand(p, &flags);
         }
   
@@@ -1314,8 -1275,7 +1315,8 @@@ struct sighand_struct *__lock_task_sigh
   /*
    * send signal info to all the members of a group
    */
- -int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
+ +int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p,
+ +                      enum pid_type type)
   {
         int ret;
   
@@@ -1324,7 -1284,7 +1325,7 @@@
         rcu_read_unlock();
   
         if (!ret && sig)
- -              ret = do_send_sig_info(sig, info, p, true);
+ +              ret = do_send_sig_info(sig, info, p, type);
   
         return ret;
   }
@@@ -1342,7 -1302,7 +1343,7 @@@ int __kill_pgrp_info(int sig, struct si
         success = 0;
         retval = -ESRCH;
         do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
- -              int err = group_send_sig_info(sig, info, p);
+ +              int err = group_send_sig_info(sig, info, p, PIDTYPE_PGID);
                 success |= !err;
                 retval = err;
         } while_each_pid_task(pgrp, PIDTYPE_PGID, p);
@@@ -1358,7 -1318,7 +1359,7 @@@ int kill_pid_info(int sig, struct sigin
                 rcu_read_lock();
                 p = pid_task(pid, PIDTYPE_PID);
                 if (p)
- -                      error = group_send_sig_info(sig, info, p);
+ +                      error = group_send_sig_info(sig, info, p, PIDTYPE_TGID);
                 rcu_read_unlock();
                 if (likely(!p || error != -ESRCH))
                         return error;
@@@ -1380,14 -1340,15 +1381,15 @@@ static int kill_proc_info(int sig, stru
         return error;
   }
   
- static int kill_as_cred_perm(const struct cred *cred,
-                            struct task_struct *target)
+ static inline bool kill_as_cred_perm(const struct cred *cred,
+                                    struct task_struct *target)
   {
         const struct cred *pcred = __task_cred(target);
-       if (!uid_eq(cred->euid, pcred->suid) && !uid_eq(cred->euid, pcred->uid) &&
-           !uid_eq(cred->uid,  pcred->suid) && !uid_eq(cred->uid,  pcred->uid))
-               return 0;
-       return 1;
+ 
+       return uid_eq(cred->euid, pcred->suid) ||
+              uid_eq(cred->euid, pcred->uid) ||
+              uid_eq(cred->uid, pcred->suid) ||
+              uid_eq(cred->uid, pcred->uid);
   }
   
   /* like kill_pid_info(), but doesn't use uid/euid of "current" */
@@@ -1417,7 -1378,7 +1419,7 @@@ int kill_pid_info_as_cred(int sig, stru
   
         if (sig) {
                 if (lock_task_sighand(p, &flags)) {
- -                      ret = __send_signal(sig, info, p, 1, 0);
+ +                      ret = __send_signal(sig, info, p, PIDTYPE_TGID, 0);
                         unlock_task_sighand(p, &flags);
                 } else
                         ret = -ESRCH;
@@@ -1461,8 -1422,7 +1463,8 @@@ static int kill_something_info(int sig
                 for_each_process(p) {
                         if (task_pid_vnr(p) > 1 &&
                                         !same_thread_group(p, current)) {
- -                              int err = group_send_sig_info(sig, info, p);
+ +                              int err = group_send_sig_info(sig, info, p,
+ +                                                            PIDTYPE_MAX);
                                 ++count;
                                 if (err != -EPERM)
                                         retval = err;
@@@ -1488,7 -1448,7 +1490,7 @@@ int send_sig_info(int sig, struct sigin
         if (!valid_signal(sig))
                 return -EINVAL;
   
- -      return do_send_sig_info(sig, info, p, false);
+ +      return do_send_sig_info(sig, info, p, PIDTYPE_PID);
   }
   
   #define __si_special(priv) \
@@@ -1500,8 -1460,7 +1502,7 @@@ send_sig(int sig, struct task_struct *p
         return send_sig_info(sig, __si_special(priv), p);
   }
   
- void
- force_sig(int sig, struct task_struct *p)
+ void force_sig(int sig, struct task_struct *p)
   {
         force_sig_info(sig, SEND_SIG_PRIV, p);
   }
@@@ -1512,8 -1471,7 +1513,7 @@@
    * the problem was already a SIGSEGV, we'll want to
    * make sure we don't even try to deliver the signal..
    */
- int
- force_sigsegv(int sig, struct task_struct *p)
+ void force_sigsegv(int sig, struct task_struct *p)
   {
         if (sig == SIGSEGV) {
                 unsigned long flags;
@@@ -1522,7 -1480,6 +1522,6 @@@
                 spin_unlock_irqrestore(&p->sighand->siglock, flags);
         }
         force_sig(SIGSEGV, p);
-       return 0;
   }
   
   int force_sig_fault(int sig, int code, void __user *addr
@@@ -1706,20 -1663,17 +1705,20 @@@ void sigqueue_free(struct sigqueue *q
                 __sigqueue_free(q);
   }
   
- -int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group)
+ +int send_sigqueue(struct sigqueue *q, struct pid *pid, enum pid_type type)
   {
         int sig = q->info.si_signo;
         struct sigpending *pending;
+ +      struct task_struct *t;
         unsigned long flags;
         int ret, result;
   
         BUG_ON(!(q->flags & SIGQUEUE_PREALLOC));
   
         ret = -1;
- -      if (!likely(lock_task_sighand(t, &flags)))
+ +      rcu_read_lock();
+ +      t = pid_task(pid, type);
+ +      if (!t || !likely(lock_task_sighand(t, &flags)))
                 goto ret;
   
         ret = 1; /* the signal is ignored */
@@@ -1741,16 -1695,15 +1740,16 @@@
         q->info.si_overrun = 0;
   
         signalfd_notify(t, sig);
- -      pending = group ? &t->signal->shared_pending : &t->pending;
+ +      pending = (type != PIDTYPE_PID) ? &t->signal->shared_pending : &t->pending;
         list_add_tail(&q->list, &pending->list);
         sigaddset(&pending->signal, sig);
- -      complete_signal(sig, t, group);
+ +      complete_signal(sig, t, type);
         result = TRACE_SIGNAL_DELIVERED;
   out:
- -      trace_signal_generate(sig, &q->info, t, group, result);
+ +      trace_signal_generate(sig, &q->info, t, type != PIDTYPE_PID, result);
         unlock_task_sighand(t, &flags);
   ret:
+ +      rcu_read_unlock();
         return ret;
   }
   
@@@ -1923,10 -1876,10 +1922,10 @@@ static void do_notify_parent_cldstop(st
         spin_unlock_irqrestore(&sighand->siglock, flags);
   }
   
- static inline int may_ptrace_stop(void)
+ static inline bool may_ptrace_stop(void)
   {
         if (!likely(current->ptrace))
-               return 0;
+               return false;
         /*
          * Are we in the middle of do_coredump?
          * If so and our tracer is also part of the coredump stopping
@@@ -1942,19 -1895,19 +1941,19 @@@
          */
         if (unlikely(current->mm->core_state) &&
             unlikely(current->mm == current->parent->mm))
-               return 0;
+               return false;
   
-       return 1;
+       return true;
   }
   
   /*
    * Return non-zero if there is a SIGKILL that should be waking us up.
    * Called with the siglock held.
    */
- static int sigkill_pending(struct task_struct *tsk)
+ static bool sigkill_pending(struct task_struct *tsk)
   {
-       return  sigismember(&tsk->pending.signal, SIGKILL) ||
-               sigismember(&tsk->signal->shared_pending.signal, SIGKILL);
+       return sigismember(&tsk->pending.signal, SIGKILL) ||
+              sigismember(&tsk->signal->shared_pending.signal, SIGKILL);
   }
   
   /*
@@@ -2334,7 -2287,7 +2333,7 @@@ static int ptrace_signal(int signr, sig
         return signr;
   }
   
- int get_signal(struct ksignal *ksig)
+ bool get_signal(struct ksignal *ksig)
   {
         struct sighand_struct *sighand = current->sighand;
         struct signal_struct *signal = current->signal;
@@@ -2344,7 -2297,7 +2343,7 @@@
                 task_work_run();
   
         if (unlikely(uprobe_deny_signal()))
-               return 0;
+               return false;
   
         /*
          * Do this once, we can't return to user-mode if freezing() == T.
@@@ -2801,7 -2754,7 +2800,7 @@@ COMPAT_SYSCALL_DEFINE4(rt_sigprocmask, 
   }
   #endif
   
- static int do_sigpending(sigset_t *set)
+ static void do_sigpending(sigset_t *set)
   {
         spin_lock_irq(&current->sighand->siglock);
         sigorsets(set, &current->pending.signal,
@@@ -2810,7 -2763,6 +2809,6 @@@
   
         /* Outside the lock because only this thread touches it.  */
         sigandsets(set, &current->blocked, set);
-       return 0;
   }
   
   /**
@@@ -2822,15 -2774,16 +2820,16 @@@
   SYSCALL_DEFINE2(rt_sigpending, sigset_t __user *, uset, size_t, sigsetsize)
   {
         sigset_t set;
-       int err;
   
         if (sigsetsize > sizeof(*uset))
                 return -EINVAL;
   
-       err = do_sigpending(&set);
-       if (!err && copy_to_user(uset, &set, sigsetsize))
-               err = -EFAULT;
-       return err;
+       do_sigpending(&set);
+ 
+       if (copy_to_user(uset, &set, sigsetsize))
+               return -EFAULT;
+ 
+       return 0;
   }
   
   #ifdef CONFIG_COMPAT
@@@ -2838,15 -2791,13 +2837,13 @@@ COMPAT_SYSCALL_DEFINE2(rt_sigpending, c
                 compat_size_t, sigsetsize)
   {
         sigset_t set;
-       int err;
   
         if (sigsetsize > sizeof(*uset))
                 return -EINVAL;
   
-       err = do_sigpending(&set);
-       if (!err)
-               err = put_compat_sigset(uset, &set, sigsetsize);
-       return err;
+       do_sigpending(&set);
+ 
+       return put_compat_sigset(uset, &set, sigsetsize);
   }
   #endif
   
@@@ -3239,7 -3190,7 +3236,7 @@@ do_send_specific(pid_t tgid, pid_t pid
                  * probe.  No signal is actually delivered.
                  */
                 if (!error && sig) {
- -                      error = do_send_sig_info(sig, info, p, false);
+ +                      error = do_send_sig_info(sig, info, p, PIDTYPE_PID);
                         /*
                          * If lock_task_sighand() failed we pretend the task
                          * dies after receiving the signal. The window is tiny,
@@@ -3608,25 -3559,26 +3605,26 @@@ int __compat_save_altstack(compat_stack
   SYSCALL_DEFINE1(sigpending, old_sigset_t __user *, uset)
   {
         sigset_t set;
-       int err;
   
         if (sizeof(old_sigset_t) > sizeof(*uset))
                 return -EINVAL;
   
-       err = do_sigpending(&set);
-       if (!err && copy_to_user(uset, &set, sizeof(old_sigset_t)))
-               err = -EFAULT;
-       return err;
+       do_sigpending(&set);
+ 
+       if (copy_to_user(uset, &set, sizeof(old_sigset_t)))
+               return -EFAULT;
+ 
+       return 0;
   }
   
   #ifdef CONFIG_COMPAT
   COMPAT_SYSCALL_DEFINE1(sigpending, compat_old_sigset_t __user *, set32)
   {
         sigset_t set;
-       int err = do_sigpending(&set);
-       if (!err)
-               err = put_user(set.sig[0], set32);
-       return err;
+ 
+       do_sigpending(&set);
+ 
+       return put_user(set.sig[0], set32);
   }
   #endif
   
@@@ -3697,25 -3649,23 +3695,23 @@@ SYSCALL_DEFINE4(rt_sigaction, int, sig
                 size_t, sigsetsize)
   {
         struct k_sigaction new_sa, old_sa;
-       int ret = -EINVAL;
+       int ret;
   
         /* XXX: Don't preclude handling different sized sigset_t's.  */
         if (sigsetsize != sizeof(sigset_t))
-               goto out;
+               return -EINVAL;
   
-       if (act) {
-               if (copy_from_user(&new_sa.sa, act, sizeof(new_sa.sa)))
-                       return -EFAULT;
-       }
+       if (act && copy_from_user(&new_sa.sa, act, sizeof(new_sa.sa)))
+               return -EFAULT;
   
         ret = do_sigaction(sig, act ? &new_sa : NULL, oact ? &old_sa : NULL);
+       if (ret)
+               return ret;
   
-       if (!ret && oact) {
-               if (copy_to_user(oact, &old_sa.sa, sizeof(old_sa.sa)))
-                       return -EFAULT;
-       }
- out:
-       return ret;
+       if (oact && copy_to_user(oact, &old_sa.sa, sizeof(old_sa.sa)))
+               return -EFAULT;
+ 
+       return 0;
   }
   #ifdef CONFIG_COMPAT
   COMPAT_SYSCALL_DEFINE4(rt_sigaction, int, sig,
@@@ -4006,7 -3956,7 +4002,7 @@@ void kdb_send_sig(struct task_struct *t
                            "the deadlock.\n");
                 return;
         }
- -      ret = send_signal(sig, SEND_SIG_PRIV, t, false);
+ +      ret = send_signal(sig, SEND_SIG_PRIV, t, PIDTYPE_PID);
         spin_unlock(&t->sighand->siglock);
         if (ret)
                 kdb_printf("Fail to deliver Signal %d to process %d.\n",
diff --combined mm/oom_kill.c

index 7c74dcc2d26d668cc471ff3ec98db71d47577fa2,0e10b864e0742da9f545130dbd913e1e8c7a8efb..b5b25e4dcbbb707e5d7ad0ae588a88ca2dc49e88
--- 1/mm/oom_kill.c
--- 2/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@@ -400,7 -400,8 +400,8 @@@ static void dump_tasks(struct mem_cgrou
         struct task_struct *p;
         struct task_struct *task;
   
-       pr_info("[ pid ]   uid  tgid total_vm      rss pgtables_bytes swapents oom_score_adj name\n");
+       pr_info("Tasks state (memory values in pages):\n");
+       pr_info("[  pid  ]   uid  tgid total_vm      rss pgtables_bytes swapents oom_score_adj name\n");
         rcu_read_lock();
         for_each_process(p) {
                 if (oom_unkillable_task(p, memcg, nodemask))
@@@ -416,7 -417,7 +417,7 @@@
                         continue;
                 }
   
-               pr_info("[%5d] %5d %5d %8lu %8lu %8ld %8lu         %5hd %s\n",
+               pr_info("[%7d] %5d %5d %8lu %8lu %8ld %8lu         %5hd %s\n",
                         task->pid, from_kuid(&init_user_ns, task_uid(task)),
                         task->tgid, task->mm->total_vm, get_mm_rss(task->mm),
                         mm_pgtables_bytes(task->mm),
@@@ -487,9 -488,10 +488,10 @@@ static DECLARE_WAIT_QUEUE_HEAD(oom_reap
   static struct task_struct *oom_reaper_list;
   static DEFINE_SPINLOCK(oom_reaper_lock);
   
- void __oom_reap_task_mm(struct mm_struct *mm)
+ bool __oom_reap_task_mm(struct mm_struct *mm)
   {
         struct vm_area_struct *vma;
+       bool ret = true;
   
         /*
          * Tell all users of get_user/copy_from_user etc... that the content
@@@ -519,50 -521,32 +521,32 @@@
                         struct mmu_gather tlb;
   
                         tlb_gather_mmu(&tlb, mm, start, end);
-                       mmu_notifier_invalidate_range_start(mm, start, end);
+                       if (mmu_notifier_invalidate_range_start_nonblock(mm, start, end)) {
+                               ret = false;
+                               continue;
+                       }
                         unmap_page_range(&tlb, vma, start, end, NULL);
                         mmu_notifier_invalidate_range_end(mm, start, end);
                         tlb_finish_mmu(&tlb, start, end);
                 }
         }
+ 
+       return ret;
   }
   
+ /*
+  * Reaps the address space of the give task.
+  *
+  * Returns true on success and false if none or part of the address space
+  * has been reclaimed and the caller should retry later.
+  */
   static bool oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
   {
         bool ret = true;
   
-       /*
-        * We have to make sure to not race with the victim exit path
-        * and cause premature new oom victim selection:
-        * oom_reap_task_mm             exit_mm
-        *   mmget_not_zero
-        *                                mmput
-        *                                  atomic_dec_and_test
-        *                                exit_oom_victim
-        *                              [...]
-        *                              out_of_memory
-        *                                select_bad_process
-        *                                  # no TIF_MEMDIE task selects new victim
-        *  unmap_page_range # frees some memory
-        */
-       mutex_lock(&oom_lock);
- 
         if (!down_read_trylock(&mm->mmap_sem)) {
-               ret = false;
                 trace_skip_task_reaping(tsk->pid);
-               goto unlock_oom;
-       }
- 
-       /*
-        * If the mm has invalidate_{start,end}() notifiers that could block,
-        * sleep to give the oom victim some more time.
-        * TODO: we really want to get rid of this ugly hack and make sure that
-        * notifiers cannot block for unbounded amount of time
-        */
-       if (mm_has_blockable_invalidate_notifiers(mm)) {
-               up_read(&mm->mmap_sem);
-               schedule_timeout_idle(HZ);
-               goto unlock_oom;
+               return false;
         }
   
         /*
@@@ -572,25 -556,27 +556,27 @@@
          * down_write();up_write() cycle in exit_mmap().
          */
         if (test_bit(MMF_OOM_SKIP, &mm->flags)) {
-               up_read(&mm->mmap_sem);
                 trace_skip_task_reaping(tsk->pid);
-               goto unlock_oom;
+               goto out_unlock;
         }
   
         trace_start_task_reaping(tsk->pid);
   
-       __oom_reap_task_mm(mm);
+       /* failed to reap part of the address space. Try again later */
+       ret = __oom_reap_task_mm(mm);
+       if (!ret)
+               goto out_finish;
   
         pr_info("oom_reaper: reaped process %d (%s), now anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n",
                         task_pid_nr(tsk), tsk->comm,
                         K(get_mm_counter(mm, MM_ANONPAGES)),
                         K(get_mm_counter(mm, MM_FILEPAGES)),
                         K(get_mm_counter(mm, MM_SHMEMPAGES)));
+ out_finish:
+       trace_finish_task_reaping(tsk->pid);
+ out_unlock:
         up_read(&mm->mmap_sem);
   
-       trace_finish_task_reaping(tsk->pid);
- unlock_oom:
-       mutex_unlock(&oom_lock);
         return ret;
   }
   
@@@ -843,68 -829,12 +829,12 @@@ static bool task_will_free_mem(struct t
         return ret;
   }
   
- static void oom_kill_process(struct oom_control *oc, const char *message)
+ static void __oom_kill_process(struct task_struct *victim)
   {
-       struct task_struct *p = oc->chosen;
-       unsigned int points = oc->chosen_points;
-       struct task_struct *victim = p;
-       struct task_struct *child;
-       struct task_struct *t;
+       struct task_struct *p;
         struct mm_struct *mm;
-       unsigned int victim_points = 0;
-       static DEFINE_RATELIMIT_STATE(oom_rs, DEFAULT_RATELIMIT_INTERVAL,
-                                             DEFAULT_RATELIMIT_BURST);
         bool can_oom_reap = true;
   
-       /*
-        * If the task is already exiting, don't alarm the sysadmin or kill
-        * its children or threads, just give it access to memory reserves
-        * so it can die quickly
-        */
-       task_lock(p);
-       if (task_will_free_mem(p)) {
-               mark_oom_victim(p);
-               wake_oom_reaper(p);
-               task_unlock(p);
-               put_task_struct(p);
-               return;
-       }
-       task_unlock(p);
- 
-       if (__ratelimit(&oom_rs))
-               dump_header(oc, p);
- 
-       pr_err("%s: Kill process %d (%s) score %u or sacrifice child\n",
-               message, task_pid_nr(p), p->comm, points);
- 
-       /*
-        * If any of p's children has a different mm and is eligible for kill,
-        * the one with the highest oom_badness() score is sacrificed for its
-        * parent.  This attempts to lose the minimal amount of work done while
-        * still freeing memory.
-        */
-       read_lock(&tasklist_lock);
-       for_each_thread(p, t) {
-               list_for_each_entry(child, &t->children, sibling) {
-                       unsigned int child_points;
- 
-                       if (process_shares_mm(child, p->mm))
-                               continue;
-                       /*
-                        * oom_badness() returns 0 if the thread is unkillable
-                        */
-                       child_points = oom_badness(child,
-                               oc->memcg, oc->nodemask, oc->totalpages);
-                       if (child_points > victim_points) {
-                               put_task_struct(victim);
-                               victim = child;
-                               victim_points = child_points;
-                               get_task_struct(victim);
-                       }
-               }
-       }
-       read_unlock(&tasklist_lock);
- 
         p = find_lock_task_mm(victim);
         if (!p) {
                 put_task_struct(victim);
@@@ -928,7 -858,7 +858,7 @@@
          * in order to prevent the OOM victim from depleting the memory
          * reserves from the user space under its control.
          */
- -      do_send_sig_info(SIGKILL, SEND_SIG_FORCED, victim, true);
+ +      do_send_sig_info(SIGKILL, SEND_SIG_FORCED, victim, PIDTYPE_TGID);
         mark_oom_victim(victim);
         pr_err("Killed process %d (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n",
                 task_pid_nr(victim), victim->comm, K(victim->mm->total_vm),
@@@ -966,7 -896,7 +896,7 @@@
                  */
                 if (unlikely(p->flags & PF_KTHREAD))
                         continue;
- -              do_send_sig_info(SIGKILL, SEND_SIG_FORCED, p, true);
+ +              do_send_sig_info(SIGKILL, SEND_SIG_FORCED, p, PIDTYPE_TGID);
         }
         rcu_read_unlock();
   
@@@ -978,6 -908,99 +908,99 @@@
   }
   #undef K
   
+ /*
+  * Kill provided task unless it's secured by setting
+  * oom_score_adj to OOM_SCORE_ADJ_MIN.
+  */
+ static int oom_kill_memcg_member(struct task_struct *task, void *unused)
+ {
+       if (task->signal->oom_score_adj != OOM_SCORE_ADJ_MIN) {
+               get_task_struct(task);
+               __oom_kill_process(task);
+       }
+       return 0;
+ }
+ 
+ static void oom_kill_process(struct oom_control *oc, const char *message)
+ {
+       struct task_struct *p = oc->chosen;
+       unsigned int points = oc->chosen_points;
+       struct task_struct *victim = p;
+       struct task_struct *child;
+       struct task_struct *t;
+       struct mem_cgroup *oom_group;
+       unsigned int victim_points = 0;
+       static DEFINE_RATELIMIT_STATE(oom_rs, DEFAULT_RATELIMIT_INTERVAL,
+                                             DEFAULT_RATELIMIT_BURST);
+ 
+       /*
+        * If the task is already exiting, don't alarm the sysadmin or kill
+        * its children or threads, just give it access to memory reserves
+        * so it can die quickly
+        */
+       task_lock(p);
+       if (task_will_free_mem(p)) {
+               mark_oom_victim(p);
+               wake_oom_reaper(p);
+               task_unlock(p);
+               put_task_struct(p);
+               return;
+       }
+       task_unlock(p);
+ 
+       if (__ratelimit(&oom_rs))
+               dump_header(oc, p);
+ 
+       pr_err("%s: Kill process %d (%s) score %u or sacrifice child\n",
+               message, task_pid_nr(p), p->comm, points);
+ 
+       /*
+        * If any of p's children has a different mm and is eligible for kill,
+        * the one with the highest oom_badness() score is sacrificed for its
+        * parent.  This attempts to lose the minimal amount of work done while
+        * still freeing memory.
+        */
+       read_lock(&tasklist_lock);
+       for_each_thread(p, t) {
+               list_for_each_entry(child, &t->children, sibling) {
+                       unsigned int child_points;
+ 
+                       if (process_shares_mm(child, p->mm))
+                               continue;
+                       /*
+                        * oom_badness() returns 0 if the thread is unkillable
+                        */
+                       child_points = oom_badness(child,
+                               oc->memcg, oc->nodemask, oc->totalpages);
+                       if (child_points > victim_points) {
+                               put_task_struct(victim);
+                               victim = child;
+                               victim_points = child_points;
+                               get_task_struct(victim);
+                       }
+               }
+       }
+       read_unlock(&tasklist_lock);
+ 
+       /*
+        * Do we need to kill the entire memory cgroup?
+        * Or even one of the ancestor memory cgroups?
+        * Check this out before killing the victim task.
+        */
+       oom_group = mem_cgroup_get_oom_group(victim, oc->memcg);
+ 
+       __oom_kill_process(victim);
+ 
+       /*
+        * If necessary, kill all tasks in the selected memory cgroup.
+        */
+       if (oom_group) {
+               mem_cgroup_print_oom_group(oom_group);
+               mem_cgroup_scan_tasks(oom_group, oom_kill_memcg_member, NULL);
+               mem_cgroup_put(oom_group);
+       }
+ }
+ 
   /*
    * Determines whether the kernel must panic because of the panic_on_oom sysctl.
    */
diff --combined virt/kvm/kvm_main.c

index 0df592c4f09f2da37ffbce22c24c9228f5a7f3bd,0116b449b99346752e542b52dc70ceb5c469e0b7..f986e31fa68cceaf8b16385e9869dc5e2bf55e4a
--- 1/virt/kvm/kvm_main.c
--- 2/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@@ -140,9 -140,10 +140,10 @@@ static void kvm_uevent_notify_change(un
   static unsigned long long kvm_createvm_count;
   static unsigned long long kvm_active_vms;
   
- __weak void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
-               unsigned long start, unsigned long end)
+ __weak int kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
+               unsigned long start, unsigned long end, bool blockable)
   {
+       return 0;
   }
   
   bool kvm_is_reserved_pfn(kvm_pfn_t pfn)
@@@ -360,13 -361,15 +361,15 @@@ static void kvm_mmu_notifier_change_pte
         srcu_read_unlock(&kvm->srcu, idx);
   }
   
- static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
+ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
                                                     struct mm_struct *mm,
                                                     unsigned long start,
-                                                   unsigned long end)
+                                                   unsigned long end,
+                                                   bool blockable)
   {
         struct kvm *kvm = mmu_notifier_to_kvm(mn);
         int need_tlb_flush = 0, idx;
+       int ret;
   
         idx = srcu_read_lock(&kvm->srcu);
         spin_lock(&kvm->mmu_lock);
@@@ -384,9 -387,11 +387,11 @@@
   
         spin_unlock(&kvm->mmu_lock);
   
-       kvm_arch_mmu_notifier_invalidate_range(kvm, start, end);
+       ret = kvm_arch_mmu_notifier_invalidate_range(kvm, start, end, blockable);
   
         srcu_read_unlock(&kvm->srcu, idx);
+ 
+       return ret;
   }
   
   static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
@@@ -2568,7 -2573,7 +2573,7 @@@ static long kvm_vcpu_ioctl(struct file 
                 if (arg)
                         goto out;
                 oldpid = rcu_access_pointer(vcpu->pid);
- -              if (unlikely(oldpid != current->pids[PIDTYPE_PID].pid)) {
+ +              if (unlikely(oldpid != task_pid(current))) {
                         /* The thread running this VCPU changed. */
                         struct pid *newpid;
author	Linus Torvalds <[email protected]>
	Wed, 22 Aug 2018 19:34:08 +0000 (12:34 -0700)
committer	Linus Torvalds <[email protected]>
	Wed, 22 Aug 2018 19:34:08 +0000 (12:34 -0700)
		1	2
fs/autofs/autofs_i.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/sched.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/sched/signal.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/signal.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/fork.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/signal.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/oom_kill.c	patch \|	diff1 \|	diff2 \|	blob \| history
virt/kvm/kvm_main.c	patch \|	diff1 \|	diff2 \|	blob \| history