]> Git Repo - linux.git/commitdiff
Merge branch 'akpm' (patches from Andrew)
authorLinus Torvalds <[email protected]>
Wed, 22 Aug 2018 19:34:08 +0000 (12:34 -0700)
committerLinus Torvalds <[email protected]>
Wed, 22 Aug 2018 19:34:08 +0000 (12:34 -0700)
Merge more updates from Andrew Morton:

 - the rest of MM

 - procfs updates

 - various misc things

 - more y2038 fixes

 - get_maintainer updates

 - lib/ updates

 - checkpatch updates

 - various epoll updates

 - autofs updates

 - hfsplus

 - some reiserfs work

 - fatfs updates

 - signal.c cleanups

 - ipc/ updates

* emailed patches from Andrew Morton <[email protected]>: (166 commits)
  ipc/util.c: update return value of ipc_getref from int to bool
  ipc/util.c: further variable name cleanups
  ipc: simplify ipc initialization
  ipc: get rid of ids->tables_initialized hack
  lib/rhashtable: guarantee initial hashtable allocation
  lib/rhashtable: simplify bucket_table_alloc()
  ipc: drop ipc_lock()
  ipc/util.c: correct comment in ipc_obtain_object_check
  ipc: rename ipcctl_pre_down_nolock()
  ipc/util.c: use ipc_rcu_putref() for failues in ipc_addid()
  ipc: reorganize initialization of kern_ipc_perm.seq
  ipc: compute kern_ipc_perm.id under the ipc lock
  init/Kconfig: remove EXPERT from CHECKPOINT_RESTORE
  fs/sysv/inode.c: use ktime_get_real_seconds() for superblock stamp
  adfs: use timespec64 for time conversion
  kernel/sysctl.c: fix typos in comments
  drivers/rapidio/devices/rio_mport_cdev.c: remove redundant pointer md
  fork: don't copy inconsistent signal handler state to child
  signal: make get_signal() return bool
  signal: make sigkill_pending() return bool
  ...

1  2 
fs/autofs/autofs_i.h
include/linux/sched.h
include/linux/sched/signal.h
include/linux/signal.h
kernel/fork.c
kernel/signal.c
mm/oom_kill.c
virt/kvm/kvm_main.c

diff --combined fs/autofs/autofs_i.h
index 50281228985049563292bc549d0fd7101f06abe1,633986a6a93a202f7295bad9fc2e99bfd6ce780f..9f9cadbfbd7a34f8005fafb0960b3c60e4848c60
@@@ -18,7 -18,6 +18,7 @@@
  #include <linux/string.h>
  #include <linux/wait.h>
  #include <linux/sched.h>
 +#include <linux/sched/signal.h>
  #include <linux/mount.h>
  #include <linux/namei.h>
  #include <linux/uaccess.h>
@@@ -27,6 -26,7 +27,7 @@@
  #include <linux/list.h>
  #include <linux/completion.h>
  #include <linux/file.h>
+ #include <linux/magic.h>
  
  /* This is the range of ioctl() numbers we claim as ours */
  #define AUTOFS_IOC_FIRST     AUTOFS_IOC_READY
@@@ -125,7 -125,8 +126,8 @@@ struct autofs_sb_info 
  
  static inline struct autofs_sb_info *autofs_sbi(struct super_block *sb)
  {
-       return (struct autofs_sb_info *)(sb->s_fs_info);
+       return sb->s_magic != AUTOFS_SUPER_MAGIC ?
+               NULL : (struct autofs_sb_info *)(sb->s_fs_info);
  }
  
  static inline struct autofs_info *autofs_dentry_ino(struct dentry *dentry)
@@@ -152,15 -153,9 +154,9 @@@ int autofs_expire_run(struct super_bloc
                      struct autofs_sb_info *,
                      struct autofs_packet_expire __user *);
  int autofs_do_expire_multi(struct super_block *sb, struct vfsmount *mnt,
-                          struct autofs_sb_info *sbi, int when);
+                          struct autofs_sb_info *sbi, unsigned int how);
  int autofs_expire_multi(struct super_block *, struct vfsmount *,
                        struct autofs_sb_info *, int __user *);
- struct dentry *autofs_expire_direct(struct super_block *sb,
-                                   struct vfsmount *mnt,
-                                   struct autofs_sb_info *sbi, int how);
- struct dentry *autofs_expire_indirect(struct super_block *sb,
-                                     struct vfsmount *mnt,
-                                     struct autofs_sb_info *sbi, int how);
  
  /* Device node initialization */
  
diff --combined include/linux/sched.h
index 00de3e950dd486679a8cd4bcb4fa7b4fd7aeac8a,58eb3a2bc6954cebec6f05113568842e32690bf7..977cb57d7bc9e7183e6ca628e4f75d236ddf3951
@@@ -779,8 -779,7 +779,8 @@@ struct task_struct 
        struct list_head                ptrace_entry;
  
        /* PID/PID hash table linkage. */
 -      struct pid_link                 pids[PIDTYPE_MAX];
 +      struct pid                      *thread_pid;
 +      struct hlist_node               pid_links[PIDTYPE_MAX];
        struct list_head                thread_group;
        struct list_head                thread_node;
  
  #endif
  #ifdef CONFIG_DETECT_HUNG_TASK
        unsigned long                   last_switch_count;
+       unsigned long                   last_switch_time;
  #endif
        /* Filesystem information: */
        struct fs_struct                *fs;
  
  static inline struct pid *task_pid(struct task_struct *task)
  {
 -      return task->pids[PIDTYPE_PID].pid;
 -}
 -
 -static inline struct pid *task_tgid(struct task_struct *task)
 -{
 -      return task->group_leader->pids[PIDTYPE_PID].pid;
 -}
 -
 -/*
 - * Without tasklist or RCU lock it is not safe to dereference
 - * the result of task_pgrp/task_session even if task == current,
 - * we can race with another thread doing sys_setsid/sys_setpgid.
 - */
 -static inline struct pid *task_pgrp(struct task_struct *task)
 -{
 -      return task->group_leader->pids[PIDTYPE_PGID].pid;
 -}
 -
 -static inline struct pid *task_session(struct task_struct *task)
 -{
 -      return task->group_leader->pids[PIDTYPE_SID].pid;
 +      return task->thread_pid;
  }
  
  /*
@@@ -1259,7 -1279,7 +1260,7 @@@ static inline pid_t task_tgid_nr(struc
   */
  static inline int pid_alive(const struct task_struct *p)
  {
 -      return p->pids[PIDTYPE_PID].pid != NULL;
 +      return p->thread_pid != NULL;
  }
  
  static inline pid_t task_pgrp_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
@@@ -1285,12 -1305,12 +1286,12 @@@ static inline pid_t task_session_vnr(st
  
  static inline pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
  {
 -      return __task_pid_nr_ns(tsk, __PIDTYPE_TGID, ns);
 +      return __task_pid_nr_ns(tsk, PIDTYPE_TGID, ns);
  }
  
  static inline pid_t task_tgid_vnr(struct task_struct *tsk)
  {
 -      return __task_pid_nr_ns(tsk, __PIDTYPE_TGID, NULL);
 +      return __task_pid_nr_ns(tsk, PIDTYPE_TGID, NULL);
  }
  
  static inline pid_t task_ppid_nr_ns(const struct task_struct *tsk, struct pid_namespace *ns)
index 4e9b77fb702de0604e04687a7aef9d107eed1726,e138ac16c6504a0da8ff1ad3fb06ef1bbb35e7f8..1be35729c2c5b9c0bb915653d7778e0acbdfd29b
@@@ -69,11 -69,6 +69,11 @@@ struct thread_group_cputimer 
        bool checking_timer;
  };
  
 +struct multiprocess_signals {
 +      sigset_t signal;
 +      struct hlist_node node;
 +};
 +
  /*
   * NOTE! "signal_struct" does not have its own
   * locking, because a shared signal_struct always
@@@ -95,9 -90,6 +95,9 @@@ struct signal_struct 
        /* shared signal handling: */
        struct sigpending       shared_pending;
  
 +      /* For collecting multiprocess signals during fork */
 +      struct hlist_head       multiprocess;
 +
        /* thread group exit support */
        int                     group_exit_code;
        /* overloaded:
  
  #endif
  
 -      struct pid *leader_pid;
 +      /* PID/PID hash table linkage. */
 +      struct pid *pids[PIDTYPE_MAX];
  
  #ifdef CONFIG_NO_HZ_FULL
        atomic_t tick_dep_mask;
@@@ -323,7 -314,7 +323,7 @@@ int force_sig_pkuerr(void __user *addr
  int force_sig_ptrace_errno_trap(int errno, void __user *addr);
  
  extern int send_sig_info(int, struct siginfo *, struct task_struct *);
- extern int force_sigsegv(int, struct task_struct *);
+ extern void force_sigsegv(int sig, struct task_struct *p);
  extern int force_sig_info(int, struct siginfo *, struct task_struct *);
  extern int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp);
  extern int kill_pid_info(int sig, struct siginfo *info, struct pid *pid);
@@@ -338,7 -329,7 +338,7 @@@ extern int send_sig(int, struct task_st
  extern int zap_other_threads(struct task_struct *p);
  extern struct sigqueue *sigqueue_alloc(void);
  extern void sigqueue_free(struct sigqueue *);
 -extern int send_sigqueue(struct sigqueue *,  struct task_struct *, int group);
 +extern int send_sigqueue(struct sigqueue *, struct pid *, enum pid_type);
  extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *);
  
  static inline int restart_syscall(void)
@@@ -380,7 -371,6 +380,7 @@@ static inline int signal_pending_state(
   */
  extern void recalc_sigpending_and_wake(struct task_struct *t);
  extern void recalc_sigpending(void);
 +extern void calculate_sigpending(void);
  
  extern void signal_wake_up_state(struct task_struct *t, unsigned int state);
  
@@@ -393,8 -383,6 +393,8 @@@ static inline void ptrace_signal_wake_u
        signal_wake_up_state(t, resume ? __TASK_TRACED : 0);
  }
  
 +void task_join_group_stop(struct task_struct *task);
 +
  #ifdef TIF_RESTORE_SIGMASK
  /*
   * Legacy restore_sigmask accessors.  These are inefficient on
@@@ -568,37 -556,6 +568,37 @@@ extern bool current_is_single_threaded(
  typedef int (*proc_visitor)(struct task_struct *p, void *data);
  void walk_process_tree(struct task_struct *top, proc_visitor, void *);
  
 +static inline
 +struct pid *task_pid_type(struct task_struct *task, enum pid_type type)
 +{
 +      struct pid *pid;
 +      if (type == PIDTYPE_PID)
 +              pid = task_pid(task);
 +      else
 +              pid = task->signal->pids[type];
 +      return pid;
 +}
 +
 +static inline struct pid *task_tgid(struct task_struct *task)
 +{
 +      return task->signal->pids[PIDTYPE_TGID];
 +}
 +
 +/*
 + * Without tasklist or RCU lock it is not safe to dereference
 + * the result of task_pgrp/task_session even if task == current,
 + * we can race with another thread doing sys_setsid/sys_setpgid.
 + */
 +static inline struct pid *task_pgrp(struct task_struct *task)
 +{
 +      return task->signal->pids[PIDTYPE_PGID];
 +}
 +
 +static inline struct pid *task_session(struct task_struct *task)
 +{
 +      return task->signal->pids[PIDTYPE_SID];
 +}
 +
  static inline int get_nr_threads(struct task_struct *tsk)
  {
        return tsk->signal->nr_threads;
@@@ -617,7 -574,7 +617,7 @@@ static inline bool thread_group_leader(
   */
  static inline bool has_group_leader_pid(struct task_struct *p)
  {
 -      return task_pid(p) == p->signal->leader_pid;
 +      return task_pid(p) == task_tgid(p);
  }
  
  static inline
diff --combined include/linux/signal.h
index fe125b0335f7dba28ac0344b6527d86505da605e,e3d9e0640e6ec44332b4a6c4eacd158fc3d35259..3d4cd5db30a966b015a89cccc78b9fc2301bb895
@@@ -254,20 -254,18 +254,20 @@@ static inline int valid_signal(unsigne
  
  struct timespec;
  struct pt_regs;
 +enum pid_type;
  
  extern int next_signal(struct sigpending *pending, sigset_t *mask);
  extern int do_send_sig_info(int sig, struct siginfo *info,
 -                              struct task_struct *p, bool group);
 -extern int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p);
 +                              struct task_struct *p, enum pid_type type);
 +extern int group_send_sig_info(int sig, struct siginfo *info,
 +                             struct task_struct *p, enum pid_type type);
  extern int __group_send_sig_info(int, struct siginfo *, struct task_struct *);
  extern int sigprocmask(int, sigset_t *, sigset_t *);
  extern void set_current_blocked(sigset_t *);
  extern void __set_current_blocked(const sigset_t *);
  extern int show_unhandled_signals;
  
- extern int get_signal(struct ksignal *ksig);
+ extern bool get_signal(struct ksignal *ksig);
  extern void signal_setup_done(int failed, struct ksignal *ksig, int stepping);
  extern void exit_signals(struct task_struct *tsk);
  extern void kernel_sigaction(int, __sighandler_t);
@@@ -289,7 -287,7 +289,7 @@@ static inline void disallow_signal(int 
  
  extern struct kmem_cache *sighand_cachep;
  
int unhandled_signal(struct task_struct *tsk, int sig);
extern bool unhandled_signal(struct task_struct *tsk, int sig);
  
  /*
   * In POSIX a signal is sent either to a specific thread (Linux task)
diff --combined kernel/fork.c
index ff5037be7771709d12cf75362feb2431a765c9d8,23eb960c701d017169ea010e4a68f1454134a18e..d896e9ca38b0cccc5de00a0564ecd22179fa2c06
@@@ -310,8 -310,9 +310,9 @@@ static struct kmem_cache *mm_cachep
  
  struct vm_area_struct *vm_area_alloc(struct mm_struct *mm)
  {
-       struct vm_area_struct *vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+       struct vm_area_struct *vma;
  
+       vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
        if (vma)
                vma_init(vma, mm);
        return vma;
@@@ -1301,6 -1302,7 +1302,7 @@@ static int copy_mm(unsigned long clone_
        tsk->nvcsw = tsk->nivcsw = 0;
  #ifdef CONFIG_DETECT_HUNG_TASK
        tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw;
+       tsk->last_switch_time = 0;
  #endif
  
        tsk->mm = NULL;
@@@ -1425,7 -1427,9 +1427,9 @@@ static int copy_sighand(unsigned long c
                return -ENOMEM;
  
        atomic_set(&sig->count, 1);
+       spin_lock_irq(&current->sighand->siglock);
        memcpy(sig->action, current->sighand->action, sizeof(sig->action));
+       spin_unlock_irq(&current->sighand->siglock);
        return 0;
  }
  
@@@ -1487,7 -1491,6 +1491,7 @@@ static int copy_signal(unsigned long cl
        init_waitqueue_head(&sig->wait_chldexit);
        sig->curr_target = tsk;
        init_sigpending(&sig->shared_pending);
 +      INIT_HLIST_HEAD(&sig->multiprocess);
        seqlock_init(&sig->stats_lock);
        prev_cputime_init(&sig->prev_cputime);
  
@@@ -1581,22 -1584,10 +1585,22 @@@ static void posix_cpu_timers_init(struc
  static inline void posix_cpu_timers_init(struct task_struct *tsk) { }
  #endif
  
 +static inline void init_task_pid_links(struct task_struct *task)
 +{
 +      enum pid_type type;
 +
 +      for (type = PIDTYPE_PID; type < PIDTYPE_MAX; ++type) {
 +              INIT_HLIST_NODE(&task->pid_links[type]);
 +      }
 +}
 +
  static inline void
  init_task_pid(struct task_struct *task, enum pid_type type, struct pid *pid)
  {
 -       task->pids[type].pid = pid;
 +      if (type == PIDTYPE_PID)
 +              task->thread_pid = pid;
 +      else
 +              task->signal->pids[type] = pid;
  }
  
  static inline void rcu_copy_process(struct task_struct *p)
@@@ -1634,7 -1625,6 +1638,7 @@@ static __latent_entropy struct task_str
  {
        int retval;
        struct task_struct *p;
 +      struct multiprocess_signals delayed;
  
        /*
         * Don't allow sharing the root directory with processes in a different
                        return ERR_PTR(-EINVAL);
        }
  
 +      /*
 +       * Force any signals received before this point to be delivered
 +       * before the fork happens.  Collect up signals sent to multiple
 +       * processes that happen during the fork and delay them so that
 +       * they appear to happen after the fork.
 +       */
 +      sigemptyset(&delayed.signal);
 +      INIT_HLIST_NODE(&delayed.node);
 +
 +      spin_lock_irq(&current->sighand->siglock);
 +      if (!(clone_flags & CLONE_THREAD))
 +              hlist_add_head(&delayed.node, &current->signal->multiprocess);
 +      recalc_sigpending();
 +      spin_unlock_irq(&current->sighand->siglock);
 +      retval = -ERESTARTNOINTR;
 +      if (signal_pending(current))
 +              goto fork_out;
 +
        retval = -ENOMEM;
        p = dup_task_struct(current, node);
        if (!p)
  
        rseq_fork(p, clone_flags);
  
 -      /*
 -       * Process group and session signals need to be delivered to just the
 -       * parent before the fork or both the parent and the child after the
 -       * fork. Restart if a signal comes in before we add the new process to
 -       * it's process group.
 -       * A fatal signal pending means that current will exit, so the new
 -       * thread can't slip out of an OOM kill (or normal SIGKILL).
 -      */
 -      recalc_sigpending();
 -      if (signal_pending(current)) {
 -              retval = -ERESTARTNOINTR;
 -              goto bad_fork_cancel_cgroup;
 -      }
 +      /* Don't start children in a dying pid namespace */
        if (unlikely(!(ns_of_pid(pid)->pid_allocated & PIDNS_ADDING))) {
                retval = -ENOMEM;
                goto bad_fork_cancel_cgroup;
        }
  
 +      /* Let kill terminate clone/fork in the middle */
 +      if (fatal_signal_pending(current)) {
 +              retval = -EINTR;
 +              goto bad_fork_cancel_cgroup;
 +      }
 +
 +
 +      init_task_pid_links(p);
        if (likely(p->pid)) {
                ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace);
  
                init_task_pid(p, PIDTYPE_PID, pid);
                if (thread_group_leader(p)) {
 +                      init_task_pid(p, PIDTYPE_TGID, pid);
                        init_task_pid(p, PIDTYPE_PGID, task_pgrp(current));
                        init_task_pid(p, PIDTYPE_SID, task_session(current));
  
                                ns_of_pid(pid)->child_reaper = p;
                                p->signal->flags |= SIGNAL_UNKILLABLE;
                        }
 -
 -                      p->signal->leader_pid = pid;
 +                      p->signal->shared_pending.signal = delayed.signal;
                        p->signal->tty = tty_kref_get(current->signal->tty);
                        /*
                         * Inherit has_child_subreaper flag under the same
                                                         p->real_parent->signal->is_child_subreaper;
                        list_add_tail(&p->sibling, &p->real_parent->children);
                        list_add_tail_rcu(&p->tasks, &init_task.tasks);
 +                      attach_pid(p, PIDTYPE_TGID);
                        attach_pid(p, PIDTYPE_PGID);
                        attach_pid(p, PIDTYPE_SID);
                        __this_cpu_inc(process_counts);
                        current->signal->nr_threads++;
                        atomic_inc(&current->signal->live);
                        atomic_inc(&current->signal->sigcnt);
 +                      task_join_group_stop(p);
                        list_add_tail_rcu(&p->thread_group,
                                          &p->group_leader->thread_group);
                        list_add_tail_rcu(&p->thread_node,
                attach_pid(p, PIDTYPE_PID);
                nr_threads++;
        }
 -
        total_forks++;
 +      hlist_del_init(&delayed.node);
        spin_unlock(&current->sighand->siglock);
        syscall_tracepoint_update(p);
        write_unlock_irq(&tasklist_lock);
@@@ -2094,19 -2068,16 +2098,19 @@@ bad_fork_free
        put_task_stack(p);
        free_task(p);
  fork_out:
 +      spin_lock_irq(&current->sighand->siglock);
 +      hlist_del_init(&delayed.node);
 +      spin_unlock_irq(&current->sighand->siglock);
        return ERR_PTR(retval);
  }
  
 -static inline void init_idle_pids(struct pid_link *links)
 +static inline void init_idle_pids(struct task_struct *idle)
  {
        enum pid_type type;
  
        for (type = PIDTYPE_PID; type < PIDTYPE_MAX; ++type) {
 -              INIT_HLIST_NODE(&links[type].node); /* not really needed */
 -              links[type].pid = &init_struct_pid;
 +              INIT_HLIST_NODE(&idle->pid_links[type]); /* not really needed */
 +              init_task_pid(idle, type, &init_struct_pid);
        }
  }
  
@@@ -2116,7 -2087,7 +2120,7 @@@ struct task_struct *fork_idle(int cpu
        task = copy_process(CLONE_VM, 0, 0, NULL, &init_struct_pid, 0, 0,
                            cpu_to_node(cpu));
        if (!IS_ERR(task)) {
 -              init_idle_pids(task->pids);
 +              init_idle_pids(task);
                init_idle(task, cpu);
        }
  
diff --combined kernel/signal.c
index cfa9d10e731ab663c7c932f6ea9257e349101d5e,786bacc60649d26448b9eabb2fba0bfd1cf83934..5843c541fda9c30bb8e4165facc07a3df114f57b
@@@ -65,14 -65,14 +65,14 @@@ static void __user *sig_handler(struct 
        return t->sighand->action[sig - 1].sa.sa_handler;
  }
  
- static int sig_handler_ignored(void __user *handler, int sig)
+ static inline bool sig_handler_ignored(void __user *handler, int sig)
  {
        /* Is it explicitly or implicitly ignored? */
        return handler == SIG_IGN ||
-               (handler == SIG_DFL && sig_kernel_ignore(sig));
+              (handler == SIG_DFL && sig_kernel_ignore(sig));
  }
  
- static int sig_task_ignored(struct task_struct *t, int sig, bool force)
+ static bool sig_task_ignored(struct task_struct *t, int sig, bool force)
  {
        void __user *handler;
  
  
        if (unlikely(t->signal->flags & SIGNAL_UNKILLABLE) &&
            handler == SIG_DFL && !(force && sig_kernel_only(sig)))
-               return 1;
+               return true;
  
        return sig_handler_ignored(handler, sig);
  }
  
- static int sig_ignored(struct task_struct *t, int sig, bool force)
+ static bool sig_ignored(struct task_struct *t, int sig, bool force)
  {
        /*
         * Blocked signals are never ignored, since the
@@@ -93,7 -93,7 +93,7 @@@
         * unblocked.
         */
        if (sigismember(&t->blocked, sig) || sigismember(&t->real_blocked, sig))
-               return 0;
+               return false;
  
        /*
         * Tracers may want to know about even ignored signal unless it
         * by SIGNAL_UNKILLABLE task.
         */
        if (t->ptrace && sig != SIGKILL)
-               return 0;
+               return false;
  
        return sig_task_ignored(t, sig, force);
  }
   * Re-calculate pending state from the set of locally pending
   * signals, globally pending signals, and blocked signals.
   */
- static inline int has_pending_signals(sigset_t *signal, sigset_t *blocked)
+ static inline bool has_pending_signals(sigset_t *signal, sigset_t *blocked)
  {
        unsigned long ready;
        long i;
  
  #define PENDING(p,b) has_pending_signals(&(p)->signal, (b))
  
- static int recalc_sigpending_tsk(struct task_struct *t)
+ static bool recalc_sigpending_tsk(struct task_struct *t)
  {
        if ((t->jobctl & JOBCTL_PENDING_MASK) ||
            PENDING(&t->pending, &t->blocked) ||
            PENDING(&t->signal->shared_pending, &t->blocked)) {
                set_tsk_thread_flag(t, TIF_SIGPENDING);
-               return 1;
+               return true;
        }
        /*
         * We must never clear the flag in another thread, or in current
         * when it's possible the current syscall is returning -ERESTART*.
         * So we don't clear it here, and only callers who know they should do.
         */
-       return 0;
+       return false;
  }
  
  /*
@@@ -172,17 -173,6 +173,17 @@@ void recalc_sigpending(void
  
  }
  
 +void calculate_sigpending(void)
 +{
 +      /* Have any signals or users of TIF_SIGPENDING been delayed
 +       * until after fork?
 +       */
 +      spin_lock_irq(&current->sighand->siglock);
 +      set_tsk_thread_flag(current, TIF_SIGPENDING);
 +      recalc_sigpending();
 +      spin_unlock_irq(&current->sighand->siglock);
 +}
 +
  /* Given the mask, find the first available signal that should be serviced. */
  
  #define SYNCHRONOUS_MASK \
@@@ -373,20 -363,6 +374,20 @@@ static bool task_participate_group_stop
        return false;
  }
  
 +void task_join_group_stop(struct task_struct *task)
 +{
 +      /* Have the new thread join an on-going signal group stop */
 +      unsigned long jobctl = current->jobctl;
 +      if (jobctl & JOBCTL_STOP_PENDING) {
 +              struct signal_struct *sig = current->signal;
 +              unsigned long signr = jobctl & JOBCTL_STOP_SIGMASK;
 +              unsigned long gstop = JOBCTL_STOP_PENDING | JOBCTL_STOP_CONSUME;
 +              if (task_set_jobctl_pending(task, signr | gstop)) {
 +                      sig->group_stop_count++;
 +              }
 +      }
 +}
 +
  /*
   * allocate a new signal queue record
   * - this may be called without locks if and only if t == current, otherwise an
@@@ -529,13 -505,15 +530,15 @@@ flush_signal_handlers(struct task_struc
        }
  }
  
int unhandled_signal(struct task_struct *tsk, int sig)
bool unhandled_signal(struct task_struct *tsk, int sig)
  {
        void __user *handler = tsk->sighand->action[sig-1].sa.sa_handler;
        if (is_global_init(tsk))
-               return 1;
+               return true;
        if (handler != SIG_IGN && handler != SIG_DFL)
-               return 0;
+               return false;
        /* if ptraced, let the tracer determine */
        return !tsk->ptrace;
  }
@@@ -709,14 -687,14 +712,14 @@@ void signal_wake_up_state(struct task_s
   *
   * All callers must be holding the siglock.
   */
- static int flush_sigqueue_mask(sigset_t *mask, struct sigpending *s)
+ static void flush_sigqueue_mask(sigset_t *mask, struct sigpending *s)
  {
        struct sigqueue *q, *n;
        sigset_t m;
  
        sigandsets(&m, mask, &s->signal);
        if (sigisemptyset(&m))
-               return 0;
+               return;
  
        sigandnsets(&s->signal, &s->signal, mask);
        list_for_each_entry_safe(q, n, &s->list, list) {
                        __sigqueue_free(q);
                }
        }
-       return 1;
  }
  
  static inline int is_si_special(const struct siginfo *info)
@@@ -742,21 -719,16 +744,16 @@@ static inline bool si_fromuser(const st
  /*
   * called with RCU read lock from check_kill_permission()
   */
- static int kill_ok_by_cred(struct task_struct *t)
+ static bool kill_ok_by_cred(struct task_struct *t)
  {
        const struct cred *cred = current_cred();
        const struct cred *tcred = __task_cred(t);
  
-       if (uid_eq(cred->euid, tcred->suid) ||
-           uid_eq(cred->euid, tcred->uid)  ||
-           uid_eq(cred->uid,  tcred->suid) ||
-           uid_eq(cred->uid,  tcred->uid))
-               return 1;
-       if (ns_capable(tcred->user_ns, CAP_KILL))
-               return 1;
-       return 0;
+       return uid_eq(cred->euid, tcred->suid) ||
+              uid_eq(cred->euid, tcred->uid) ||
+              uid_eq(cred->uid, tcred->suid) ||
+              uid_eq(cred->uid, tcred->uid) ||
+              ns_capable(tcred->user_ns, CAP_KILL);
  }
  
  /*
@@@ -907,20 -879,24 +904,24 @@@ static bool prepare_signal(int sig, str
   * as soon as they're available, so putting the signal on the shared queue
   * will be equivalent to sending it to one such thread.
   */
- static inline int wants_signal(int sig, struct task_struct *p)
+ static inline bool wants_signal(int sig, struct task_struct *p)
  {
        if (sigismember(&p->blocked, sig))
-               return 0;
+               return false;
        if (p->flags & PF_EXITING)
-               return 0;
+               return false;
        if (sig == SIGKILL)
-               return 1;
+               return true;
        if (task_is_stopped_or_traced(p))
-               return 0;
+               return false;
        return task_curr(p) || !signal_pending(p);
  }
  
 -static void complete_signal(int sig, struct task_struct *p, int group)
 +static void complete_signal(int sig, struct task_struct *p, enum pid_type type)
  {
        struct signal_struct *signal = p->signal;
        struct task_struct *t;
         */
        if (wants_signal(sig, p))
                t = p;
 -      else if (!group || thread_group_empty(p))
 +      else if ((type == PIDTYPE_PID) || thread_group_empty(p))
                /*
                 * There is just one thread and it does not need to be woken.
                 * It will dequeue unblocked signals before it runs again.
        return;
  }
  
- static inline int legacy_queue(struct sigpending *signals, int sig)
+ static inline bool legacy_queue(struct sigpending *signals, int sig)
  {
        return (sig < SIGRTMIN) && sigismember(&signals->signal, sig);
  }
@@@ -1023,7 -999,7 +1024,7 @@@ static inline void userns_fixup_signal_
  #endif
  
  static int __send_signal(int sig, struct siginfo *info, struct task_struct *t,
 -                      int group, int from_ancestor_ns)
 +                      enum pid_type type, int from_ancestor_ns)
  {
        struct sigpending *pending;
        struct sigqueue *q;
                        from_ancestor_ns || (info == SEND_SIG_FORCED)))
                goto ret;
  
 -      pending = group ? &t->signal->shared_pending : &t->pending;
 +      pending = (type != PIDTYPE_PID) ? &t->signal->shared_pending : &t->pending;
        /*
         * Short-circuit ignored signals and support queuing
         * exactly one non-rt signal, so that we can get more
  out_set:
        signalfd_notify(t, sig);
        sigaddset(&pending->signal, sig);
 -      complete_signal(sig, t, group);
 +
 +      /* Let multiprocess signals appear after on-going forks */
 +      if (type > PIDTYPE_TGID) {
 +              struct multiprocess_signals *delayed;
 +              hlist_for_each_entry(delayed, &t->signal->multiprocess, node) {
 +                      sigset_t *signal = &delayed->signal;
 +                      /* Can't queue both a stop and a continue signal */
 +                      if (sig == SIGCONT)
 +                              sigdelsetmask(signal, SIG_KERNEL_STOP_MASK);
 +                      else if (sig_kernel_stop(sig))
 +                              sigdelset(signal, SIGCONT);
 +                      sigaddset(signal, sig);
 +              }
 +      }
 +
 +      complete_signal(sig, t, type);
  ret:
 -      trace_signal_generate(sig, info, t, group, result);
 +      trace_signal_generate(sig, info, t, type != PIDTYPE_PID, result);
        return ret;
  }
  
  static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
 -                      int group)
 +                      enum pid_type type)
  {
        int from_ancestor_ns = 0;
  
                           !task_pid_nr_ns(current, task_active_pid_ns(t));
  #endif
  
 -      return __send_signal(sig, info, t, group, from_ancestor_ns);
 +      return __send_signal(sig, info, t, type, from_ancestor_ns);
  }
  
  static void print_fatal_signal(int signr)
@@@ -1191,23 -1152,23 +1192,23 @@@ __setup("print-fatal-signals=", setup_p
  int
  __group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
  {
 -      return send_signal(sig, info, p, 1);
 +      return send_signal(sig, info, p, PIDTYPE_TGID);
  }
  
  static int
  specific_send_sig_info(int sig, struct siginfo *info, struct task_struct *t)
  {
 -      return send_signal(sig, info, t, 0);
 +      return send_signal(sig, info, t, PIDTYPE_PID);
  }
  
  int do_send_sig_info(int sig, struct siginfo *info, struct task_struct *p,
 -                      bool group)
 +                      enum pid_type type)
  {
        unsigned long flags;
        int ret = -ESRCH;
  
        if (lock_task_sighand(p, &flags)) {
 -              ret = send_signal(sig, info, p, group);
 +              ret = send_signal(sig, info, p, type);
                unlock_task_sighand(p, &flags);
        }
  
@@@ -1314,8 -1275,7 +1315,8 @@@ struct sighand_struct *__lock_task_sigh
  /*
   * send signal info to all the members of a group
   */
 -int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
 +int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p,
 +                      enum pid_type type)
  {
        int ret;
  
        rcu_read_unlock();
  
        if (!ret && sig)
 -              ret = do_send_sig_info(sig, info, p, true);
 +              ret = do_send_sig_info(sig, info, p, type);
  
        return ret;
  }
@@@ -1342,7 -1302,7 +1343,7 @@@ int __kill_pgrp_info(int sig, struct si
        success = 0;
        retval = -ESRCH;
        do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
 -              int err = group_send_sig_info(sig, info, p);
 +              int err = group_send_sig_info(sig, info, p, PIDTYPE_PGID);
                success |= !err;
                retval = err;
        } while_each_pid_task(pgrp, PIDTYPE_PGID, p);
@@@ -1358,7 -1318,7 +1359,7 @@@ int kill_pid_info(int sig, struct sigin
                rcu_read_lock();
                p = pid_task(pid, PIDTYPE_PID);
                if (p)
 -                      error = group_send_sig_info(sig, info, p);
 +                      error = group_send_sig_info(sig, info, p, PIDTYPE_TGID);
                rcu_read_unlock();
                if (likely(!p || error != -ESRCH))
                        return error;
@@@ -1380,14 -1340,15 +1381,15 @@@ static int kill_proc_info(int sig, stru
        return error;
  }
  
- static int kill_as_cred_perm(const struct cred *cred,
-                            struct task_struct *target)
+ static inline bool kill_as_cred_perm(const struct cred *cred,
+                                    struct task_struct *target)
  {
        const struct cred *pcred = __task_cred(target);
-       if (!uid_eq(cred->euid, pcred->suid) && !uid_eq(cred->euid, pcred->uid) &&
-           !uid_eq(cred->uid,  pcred->suid) && !uid_eq(cred->uid,  pcred->uid))
-               return 0;
-       return 1;
+       return uid_eq(cred->euid, pcred->suid) ||
+              uid_eq(cred->euid, pcred->uid) ||
+              uid_eq(cred->uid, pcred->suid) ||
+              uid_eq(cred->uid, pcred->uid);
  }
  
  /* like kill_pid_info(), but doesn't use uid/euid of "current" */
@@@ -1417,7 -1378,7 +1419,7 @@@ int kill_pid_info_as_cred(int sig, stru
  
        if (sig) {
                if (lock_task_sighand(p, &flags)) {
 -                      ret = __send_signal(sig, info, p, 1, 0);
 +                      ret = __send_signal(sig, info, p, PIDTYPE_TGID, 0);
                        unlock_task_sighand(p, &flags);
                } else
                        ret = -ESRCH;
@@@ -1461,8 -1422,7 +1463,8 @@@ static int kill_something_info(int sig
                for_each_process(p) {
                        if (task_pid_vnr(p) > 1 &&
                                        !same_thread_group(p, current)) {
 -                              int err = group_send_sig_info(sig, info, p);
 +                              int err = group_send_sig_info(sig, info, p,
 +                                                            PIDTYPE_MAX);
                                ++count;
                                if (err != -EPERM)
                                        retval = err;
@@@ -1488,7 -1448,7 +1490,7 @@@ int send_sig_info(int sig, struct sigin
        if (!valid_signal(sig))
                return -EINVAL;
  
 -      return do_send_sig_info(sig, info, p, false);
 +      return do_send_sig_info(sig, info, p, PIDTYPE_PID);
  }
  
  #define __si_special(priv) \
@@@ -1500,8 -1460,7 +1502,7 @@@ send_sig(int sig, struct task_struct *p
        return send_sig_info(sig, __si_special(priv), p);
  }
  
- void
- force_sig(int sig, struct task_struct *p)
+ void force_sig(int sig, struct task_struct *p)
  {
        force_sig_info(sig, SEND_SIG_PRIV, p);
  }
   * the problem was already a SIGSEGV, we'll want to
   * make sure we don't even try to deliver the signal..
   */
- int
- force_sigsegv(int sig, struct task_struct *p)
+ void force_sigsegv(int sig, struct task_struct *p)
  {
        if (sig == SIGSEGV) {
                unsigned long flags;
                spin_unlock_irqrestore(&p->sighand->siglock, flags);
        }
        force_sig(SIGSEGV, p);
-       return 0;
  }
  
  int force_sig_fault(int sig, int code, void __user *addr
@@@ -1706,20 -1663,17 +1705,20 @@@ void sigqueue_free(struct sigqueue *q
                __sigqueue_free(q);
  }
  
 -int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group)
 +int send_sigqueue(struct sigqueue *q, struct pid *pid, enum pid_type type)
  {
        int sig = q->info.si_signo;
        struct sigpending *pending;
 +      struct task_struct *t;
        unsigned long flags;
        int ret, result;
  
        BUG_ON(!(q->flags & SIGQUEUE_PREALLOC));
  
        ret = -1;
 -      if (!likely(lock_task_sighand(t, &flags)))
 +      rcu_read_lock();
 +      t = pid_task(pid, type);
 +      if (!t || !likely(lock_task_sighand(t, &flags)))
                goto ret;
  
        ret = 1; /* the signal is ignored */
        q->info.si_overrun = 0;
  
        signalfd_notify(t, sig);
 -      pending = group ? &t->signal->shared_pending : &t->pending;
 +      pending = (type != PIDTYPE_PID) ? &t->signal->shared_pending : &t->pending;
        list_add_tail(&q->list, &pending->list);
        sigaddset(&pending->signal, sig);
 -      complete_signal(sig, t, group);
 +      complete_signal(sig, t, type);
        result = TRACE_SIGNAL_DELIVERED;
  out:
 -      trace_signal_generate(sig, &q->info, t, group, result);
 +      trace_signal_generate(sig, &q->info, t, type != PIDTYPE_PID, result);
        unlock_task_sighand(t, &flags);
  ret:
 +      rcu_read_unlock();
        return ret;
  }
  
@@@ -1923,10 -1876,10 +1922,10 @@@ static void do_notify_parent_cldstop(st
        spin_unlock_irqrestore(&sighand->siglock, flags);
  }
  
- static inline int may_ptrace_stop(void)
+ static inline bool may_ptrace_stop(void)
  {
        if (!likely(current->ptrace))
-               return 0;
+               return false;
        /*
         * Are we in the middle of do_coredump?
         * If so and our tracer is also part of the coredump stopping
         */
        if (unlikely(current->mm->core_state) &&
            unlikely(current->mm == current->parent->mm))
-               return 0;
+               return false;
  
-       return 1;
+       return true;
  }
  
  /*
   * Return non-zero if there is a SIGKILL that should be waking us up.
   * Called with the siglock held.
   */
- static int sigkill_pending(struct task_struct *tsk)
+ static bool sigkill_pending(struct task_struct *tsk)
  {
-       return  sigismember(&tsk->pending.signal, SIGKILL) ||
-               sigismember(&tsk->signal->shared_pending.signal, SIGKILL);
+       return sigismember(&tsk->pending.signal, SIGKILL) ||
+              sigismember(&tsk->signal->shared_pending.signal, SIGKILL);
  }
  
  /*
@@@ -2334,7 -2287,7 +2333,7 @@@ static int ptrace_signal(int signr, sig
        return signr;
  }
  
int get_signal(struct ksignal *ksig)
bool get_signal(struct ksignal *ksig)
  {
        struct sighand_struct *sighand = current->sighand;
        struct signal_struct *signal = current->signal;
                task_work_run();
  
        if (unlikely(uprobe_deny_signal()))
-               return 0;
+               return false;
  
        /*
         * Do this once, we can't return to user-mode if freezing() == T.
@@@ -2801,7 -2754,7 +2800,7 @@@ COMPAT_SYSCALL_DEFINE4(rt_sigprocmask, 
  }
  #endif
  
- static int do_sigpending(sigset_t *set)
+ static void do_sigpending(sigset_t *set)
  {
        spin_lock_irq(&current->sighand->siglock);
        sigorsets(set, &current->pending.signal,
  
        /* Outside the lock because only this thread touches it.  */
        sigandsets(set, &current->blocked, set);
-       return 0;
  }
  
  /**
  SYSCALL_DEFINE2(rt_sigpending, sigset_t __user *, uset, size_t, sigsetsize)
  {
        sigset_t set;
-       int err;
  
        if (sigsetsize > sizeof(*uset))
                return -EINVAL;
  
-       err = do_sigpending(&set);
-       if (!err && copy_to_user(uset, &set, sigsetsize))
-               err = -EFAULT;
-       return err;
+       do_sigpending(&set);
+       if (copy_to_user(uset, &set, sigsetsize))
+               return -EFAULT;
+       return 0;
  }
  
  #ifdef CONFIG_COMPAT
@@@ -2838,15 -2791,13 +2837,13 @@@ COMPAT_SYSCALL_DEFINE2(rt_sigpending, c
                compat_size_t, sigsetsize)
  {
        sigset_t set;
-       int err;
  
        if (sigsetsize > sizeof(*uset))
                return -EINVAL;
  
-       err = do_sigpending(&set);
-       if (!err)
-               err = put_compat_sigset(uset, &set, sigsetsize);
-       return err;
+       do_sigpending(&set);
+       return put_compat_sigset(uset, &set, sigsetsize);
  }
  #endif
  
@@@ -3239,7 -3190,7 +3236,7 @@@ do_send_specific(pid_t tgid, pid_t pid
                 * probe.  No signal is actually delivered.
                 */
                if (!error && sig) {
 -                      error = do_send_sig_info(sig, info, p, false);
 +                      error = do_send_sig_info(sig, info, p, PIDTYPE_PID);
                        /*
                         * If lock_task_sighand() failed we pretend the task
                         * dies after receiving the signal. The window is tiny,
@@@ -3608,25 -3559,26 +3605,26 @@@ int __compat_save_altstack(compat_stack
  SYSCALL_DEFINE1(sigpending, old_sigset_t __user *, uset)
  {
        sigset_t set;
-       int err;
  
        if (sizeof(old_sigset_t) > sizeof(*uset))
                return -EINVAL;
  
-       err = do_sigpending(&set);
-       if (!err && copy_to_user(uset, &set, sizeof(old_sigset_t)))
-               err = -EFAULT;
-       return err;
+       do_sigpending(&set);
+       if (copy_to_user(uset, &set, sizeof(old_sigset_t)))
+               return -EFAULT;
+       return 0;
  }
  
  #ifdef CONFIG_COMPAT
  COMPAT_SYSCALL_DEFINE1(sigpending, compat_old_sigset_t __user *, set32)
  {
        sigset_t set;
-       int err = do_sigpending(&set);
-       if (!err)
-               err = put_user(set.sig[0], set32);
-       return err;
+       do_sigpending(&set);
+       return put_user(set.sig[0], set32);
  }
  #endif
  
@@@ -3697,25 -3649,23 +3695,23 @@@ SYSCALL_DEFINE4(rt_sigaction, int, sig
                size_t, sigsetsize)
  {
        struct k_sigaction new_sa, old_sa;
-       int ret = -EINVAL;
+       int ret;
  
        /* XXX: Don't preclude handling different sized sigset_t's.  */
        if (sigsetsize != sizeof(sigset_t))
-               goto out;
+               return -EINVAL;
  
-       if (act) {
-               if (copy_from_user(&new_sa.sa, act, sizeof(new_sa.sa)))
-                       return -EFAULT;
-       }
+       if (act && copy_from_user(&new_sa.sa, act, sizeof(new_sa.sa)))
+               return -EFAULT;
  
        ret = do_sigaction(sig, act ? &new_sa : NULL, oact ? &old_sa : NULL);
+       if (ret)
+               return ret;
  
-       if (!ret && oact) {
-               if (copy_to_user(oact, &old_sa.sa, sizeof(old_sa.sa)))
-                       return -EFAULT;
-       }
- out:
-       return ret;
+       if (oact && copy_to_user(oact, &old_sa.sa, sizeof(old_sa.sa)))
+               return -EFAULT;
+       return 0;
  }
  #ifdef CONFIG_COMPAT
  COMPAT_SYSCALL_DEFINE4(rt_sigaction, int, sig,
@@@ -4006,7 -3956,7 +4002,7 @@@ void kdb_send_sig(struct task_struct *t
                           "the deadlock.\n");
                return;
        }
 -      ret = send_signal(sig, SEND_SIG_PRIV, t, false);
 +      ret = send_signal(sig, SEND_SIG_PRIV, t, PIDTYPE_PID);
        spin_unlock(&t->sighand->siglock);
        if (ret)
                kdb_printf("Fail to deliver Signal %d to process %d.\n",
diff --combined mm/oom_kill.c
index 7c74dcc2d26d668cc471ff3ec98db71d47577fa2,0e10b864e0742da9f545130dbd913e1e8c7a8efb..b5b25e4dcbbb707e5d7ad0ae588a88ca2dc49e88
@@@ -400,7 -400,8 +400,8 @@@ static void dump_tasks(struct mem_cgrou
        struct task_struct *p;
        struct task_struct *task;
  
-       pr_info("[ pid ]   uid  tgid total_vm      rss pgtables_bytes swapents oom_score_adj name\n");
+       pr_info("Tasks state (memory values in pages):\n");
+       pr_info("[  pid  ]   uid  tgid total_vm      rss pgtables_bytes swapents oom_score_adj name\n");
        rcu_read_lock();
        for_each_process(p) {
                if (oom_unkillable_task(p, memcg, nodemask))
                        continue;
                }
  
-               pr_info("[%5d] %5d %5d %8lu %8lu %8ld %8lu         %5hd %s\n",
+               pr_info("[%7d] %5d %5d %8lu %8lu %8ld %8lu         %5hd %s\n",
                        task->pid, from_kuid(&init_user_ns, task_uid(task)),
                        task->tgid, task->mm->total_vm, get_mm_rss(task->mm),
                        mm_pgtables_bytes(task->mm),
@@@ -487,9 -488,10 +488,10 @@@ static DECLARE_WAIT_QUEUE_HEAD(oom_reap
  static struct task_struct *oom_reaper_list;
  static DEFINE_SPINLOCK(oom_reaper_lock);
  
void __oom_reap_task_mm(struct mm_struct *mm)
bool __oom_reap_task_mm(struct mm_struct *mm)
  {
        struct vm_area_struct *vma;
+       bool ret = true;
  
        /*
         * Tell all users of get_user/copy_from_user etc... that the content
                        struct mmu_gather tlb;
  
                        tlb_gather_mmu(&tlb, mm, start, end);
-                       mmu_notifier_invalidate_range_start(mm, start, end);
+                       if (mmu_notifier_invalidate_range_start_nonblock(mm, start, end)) {
+                               ret = false;
+                               continue;
+                       }
                        unmap_page_range(&tlb, vma, start, end, NULL);
                        mmu_notifier_invalidate_range_end(mm, start, end);
                        tlb_finish_mmu(&tlb, start, end);
                }
        }
+       return ret;
  }
  
+ /*
+  * Reaps the address space of the give task.
+  *
+  * Returns true on success and false if none or part of the address space
+  * has been reclaimed and the caller should retry later.
+  */
  static bool oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
  {
        bool ret = true;
  
-       /*
-        * We have to make sure to not race with the victim exit path
-        * and cause premature new oom victim selection:
-        * oom_reap_task_mm             exit_mm
-        *   mmget_not_zero
-        *                                mmput
-        *                                  atomic_dec_and_test
-        *                                exit_oom_victim
-        *                              [...]
-        *                              out_of_memory
-        *                                select_bad_process
-        *                                  # no TIF_MEMDIE task selects new victim
-        *  unmap_page_range # frees some memory
-        */
-       mutex_lock(&oom_lock);
        if (!down_read_trylock(&mm->mmap_sem)) {
-               ret = false;
                trace_skip_task_reaping(tsk->pid);
-               goto unlock_oom;
-       }
-       /*
-        * If the mm has invalidate_{start,end}() notifiers that could block,
-        * sleep to give the oom victim some more time.
-        * TODO: we really want to get rid of this ugly hack and make sure that
-        * notifiers cannot block for unbounded amount of time
-        */
-       if (mm_has_blockable_invalidate_notifiers(mm)) {
-               up_read(&mm->mmap_sem);
-               schedule_timeout_idle(HZ);
-               goto unlock_oom;
+               return false;
        }
  
        /*
         * down_write();up_write() cycle in exit_mmap().
         */
        if (test_bit(MMF_OOM_SKIP, &mm->flags)) {
-               up_read(&mm->mmap_sem);
                trace_skip_task_reaping(tsk->pid);
-               goto unlock_oom;
+               goto out_unlock;
        }
  
        trace_start_task_reaping(tsk->pid);
  
-       __oom_reap_task_mm(mm);
+       /* failed to reap part of the address space. Try again later */
+       ret = __oom_reap_task_mm(mm);
+       if (!ret)
+               goto out_finish;
  
        pr_info("oom_reaper: reaped process %d (%s), now anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n",
                        task_pid_nr(tsk), tsk->comm,
                        K(get_mm_counter(mm, MM_ANONPAGES)),
                        K(get_mm_counter(mm, MM_FILEPAGES)),
                        K(get_mm_counter(mm, MM_SHMEMPAGES)));
+ out_finish:
+       trace_finish_task_reaping(tsk->pid);
+ out_unlock:
        up_read(&mm->mmap_sem);
  
-       trace_finish_task_reaping(tsk->pid);
- unlock_oom:
-       mutex_unlock(&oom_lock);
        return ret;
  }
  
@@@ -843,68 -829,12 +829,12 @@@ static bool task_will_free_mem(struct t
        return ret;
  }
  
- static void oom_kill_process(struct oom_control *oc, const char *message)
+ static void __oom_kill_process(struct task_struct *victim)
  {
-       struct task_struct *p = oc->chosen;
-       unsigned int points = oc->chosen_points;
-       struct task_struct *victim = p;
-       struct task_struct *child;
-       struct task_struct *t;
+       struct task_struct *p;
        struct mm_struct *mm;
-       unsigned int victim_points = 0;
-       static DEFINE_RATELIMIT_STATE(oom_rs, DEFAULT_RATELIMIT_INTERVAL,
-                                             DEFAULT_RATELIMIT_BURST);
        bool can_oom_reap = true;
  
-       /*
-        * If the task is already exiting, don't alarm the sysadmin or kill
-        * its children or threads, just give it access to memory reserves
-        * so it can die quickly
-        */
-       task_lock(p);
-       if (task_will_free_mem(p)) {
-               mark_oom_victim(p);
-               wake_oom_reaper(p);
-               task_unlock(p);
-               put_task_struct(p);
-               return;
-       }
-       task_unlock(p);
-       if (__ratelimit(&oom_rs))
-               dump_header(oc, p);
-       pr_err("%s: Kill process %d (%s) score %u or sacrifice child\n",
-               message, task_pid_nr(p), p->comm, points);
-       /*
-        * If any of p's children has a different mm and is eligible for kill,
-        * the one with the highest oom_badness() score is sacrificed for its
-        * parent.  This attempts to lose the minimal amount of work done while
-        * still freeing memory.
-        */
-       read_lock(&tasklist_lock);
-       for_each_thread(p, t) {
-               list_for_each_entry(child, &t->children, sibling) {
-                       unsigned int child_points;
-                       if (process_shares_mm(child, p->mm))
-                               continue;
-                       /*
-                        * oom_badness() returns 0 if the thread is unkillable
-                        */
-                       child_points = oom_badness(child,
-                               oc->memcg, oc->nodemask, oc->totalpages);
-                       if (child_points > victim_points) {
-                               put_task_struct(victim);
-                               victim = child;
-                               victim_points = child_points;
-                               get_task_struct(victim);
-                       }
-               }
-       }
-       read_unlock(&tasklist_lock);
        p = find_lock_task_mm(victim);
        if (!p) {
                put_task_struct(victim);
         * in order to prevent the OOM victim from depleting the memory
         * reserves from the user space under its control.
         */
 -      do_send_sig_info(SIGKILL, SEND_SIG_FORCED, victim, true);
 +      do_send_sig_info(SIGKILL, SEND_SIG_FORCED, victim, PIDTYPE_TGID);
        mark_oom_victim(victim);
        pr_err("Killed process %d (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n",
                task_pid_nr(victim), victim->comm, K(victim->mm->total_vm),
                 */
                if (unlikely(p->flags & PF_KTHREAD))
                        continue;
 -              do_send_sig_info(SIGKILL, SEND_SIG_FORCED, p, true);
 +              do_send_sig_info(SIGKILL, SEND_SIG_FORCED, p, PIDTYPE_TGID);
        }
        rcu_read_unlock();
  
  }
  #undef K
  
+ /*
+  * Kill provided task unless it's secured by setting
+  * oom_score_adj to OOM_SCORE_ADJ_MIN.
+  */
+ static int oom_kill_memcg_member(struct task_struct *task, void *unused)
+ {
+       if (task->signal->oom_score_adj != OOM_SCORE_ADJ_MIN) {
+               get_task_struct(task);
+               __oom_kill_process(task);
+       }
+       return 0;
+ }
+ static void oom_kill_process(struct oom_control *oc, const char *message)
+ {
+       struct task_struct *p = oc->chosen;
+       unsigned int points = oc->chosen_points;
+       struct task_struct *victim = p;
+       struct task_struct *child;
+       struct task_struct *t;
+       struct mem_cgroup *oom_group;
+       unsigned int victim_points = 0;
+       static DEFINE_RATELIMIT_STATE(oom_rs, DEFAULT_RATELIMIT_INTERVAL,
+                                             DEFAULT_RATELIMIT_BURST);
+       /*
+        * If the task is already exiting, don't alarm the sysadmin or kill
+        * its children or threads, just give it access to memory reserves
+        * so it can die quickly
+        */
+       task_lock(p);
+       if (task_will_free_mem(p)) {
+               mark_oom_victim(p);
+               wake_oom_reaper(p);
+               task_unlock(p);
+               put_task_struct(p);
+               return;
+       }
+       task_unlock(p);
+       if (__ratelimit(&oom_rs))
+               dump_header(oc, p);
+       pr_err("%s: Kill process %d (%s) score %u or sacrifice child\n",
+               message, task_pid_nr(p), p->comm, points);
+       /*
+        * If any of p's children has a different mm and is eligible for kill,
+        * the one with the highest oom_badness() score is sacrificed for its
+        * parent.  This attempts to lose the minimal amount of work done while
+        * still freeing memory.
+        */
+       read_lock(&tasklist_lock);
+       for_each_thread(p, t) {
+               list_for_each_entry(child, &t->children, sibling) {
+                       unsigned int child_points;
+                       if (process_shares_mm(child, p->mm))
+                               continue;
+                       /*
+                        * oom_badness() returns 0 if the thread is unkillable
+                        */
+                       child_points = oom_badness(child,
+                               oc->memcg, oc->nodemask, oc->totalpages);
+                       if (child_points > victim_points) {
+                               put_task_struct(victim);
+                               victim = child;
+                               victim_points = child_points;
+                               get_task_struct(victim);
+                       }
+               }
+       }
+       read_unlock(&tasklist_lock);
+       /*
+        * Do we need to kill the entire memory cgroup?
+        * Or even one of the ancestor memory cgroups?
+        * Check this out before killing the victim task.
+        */
+       oom_group = mem_cgroup_get_oom_group(victim, oc->memcg);
+       __oom_kill_process(victim);
+       /*
+        * If necessary, kill all tasks in the selected memory cgroup.
+        */
+       if (oom_group) {
+               mem_cgroup_print_oom_group(oom_group);
+               mem_cgroup_scan_tasks(oom_group, oom_kill_memcg_member, NULL);
+               mem_cgroup_put(oom_group);
+       }
+ }
  /*
   * Determines whether the kernel must panic because of the panic_on_oom sysctl.
   */
diff --combined virt/kvm/kvm_main.c
index 0df592c4f09f2da37ffbce22c24c9228f5a7f3bd,0116b449b99346752e542b52dc70ceb5c469e0b7..f986e31fa68cceaf8b16385e9869dc5e2bf55e4a
@@@ -140,9 -140,10 +140,10 @@@ static void kvm_uevent_notify_change(un
  static unsigned long long kvm_createvm_count;
  static unsigned long long kvm_active_vms;
  
- __weak void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
-               unsigned long start, unsigned long end)
+ __weak int kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
+               unsigned long start, unsigned long end, bool blockable)
  {
+       return 0;
  }
  
  bool kvm_is_reserved_pfn(kvm_pfn_t pfn)
@@@ -360,13 -361,15 +361,15 @@@ static void kvm_mmu_notifier_change_pte
        srcu_read_unlock(&kvm->srcu, idx);
  }
  
- static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
+ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
                                                    struct mm_struct *mm,
                                                    unsigned long start,
-                                                   unsigned long end)
+                                                   unsigned long end,
+                                                   bool blockable)
  {
        struct kvm *kvm = mmu_notifier_to_kvm(mn);
        int need_tlb_flush = 0, idx;
+       int ret;
  
        idx = srcu_read_lock(&kvm->srcu);
        spin_lock(&kvm->mmu_lock);
  
        spin_unlock(&kvm->mmu_lock);
  
-       kvm_arch_mmu_notifier_invalidate_range(kvm, start, end);
+       ret = kvm_arch_mmu_notifier_invalidate_range(kvm, start, end, blockable);
  
        srcu_read_unlock(&kvm->srcu, idx);
+       return ret;
  }
  
  static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
@@@ -2568,7 -2573,7 +2573,7 @@@ static long kvm_vcpu_ioctl(struct file 
                if (arg)
                        goto out;
                oldpid = rcu_access_pointer(vcpu->pid);
 -              if (unlikely(oldpid != current->pids[PIDTYPE_PID].pid)) {
 +              if (unlikely(oldpid != task_pid(current))) {
                        /* The thread running this VCPU changed. */
                        struct pid *newpid;
  
This page took 0.130518 seconds and 4 git commands to generate.