]> Git Repo - linux.git/commitdiff
Merge branch 'for-4.3' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
authorLinus Torvalds <[email protected]>
Wed, 2 Sep 2015 15:04:23 +0000 (08:04 -0700)
committerLinus Torvalds <[email protected]>
Wed, 2 Sep 2015 15:04:23 +0000 (08:04 -0700)
Pull cgroup updates from Tejun Heo:

 - a new PIDs controller is added.  It turns out that PIDs are actually
   an independent resource from kmem due to the limited PID space.

 - more core preparations for the v2 interface.  Once cpu side interface
   is settled, it should be ready for lifting the devel mask.
   for-4.3-unified-base was temporarily branched so that other trees
   (block) can pull cgroup core changes that blkcg changes depend on.

 - a non-critical idr_preload usage bug fix.

* 'for-4.3' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
  cgroup: pids: fix invalid get/put usage
  cgroup: introduce cgroup_subsys->legacy_name
  cgroup: don't print subsystems for the default hierarchy
  cgroup: make cftype->private a unsigned long
  cgroup: export cgrp_dfl_root
  cgroup: define controller file conventions
  cgroup: fix idr_preload usage
  cgroup: add documentation for the PIDs controller
  cgroup: implement the PIDs subsystem
  cgroup: allow a cgroup subsystem to reject a fork

1  2 
init/Kconfig
kernel/cgroup.c
kernel/fork.c
kernel/sched/core.c

diff --combined init/Kconfig
index ba1e6eaf4c36e72bdf29d0b683b9c118fb0b38db,2184b34cbf73f9e1e79bf19222938ab8a4161506..bb9b4dd55889f0605b07ddfc73cc105d72b18908
@@@ -538,6 -538,15 +538,6 @@@ config RCU_STALL_COMMO
  config CONTEXT_TRACKING
         bool
  
 -config RCU_USER_QS
 -      bool
 -      help
 -        This option sets hooks on kernel / userspace boundaries and
 -        puts RCU in extended quiescent state when the CPU runs in
 -        userspace. It means that when a CPU runs in userspace, it is
 -        excluded from the global RCU state machine and thus doesn't
 -        try to keep the timer tick on for RCU.
 -
  config CONTEXT_TRACKING_FORCE
        bool "Force context tracking"
        depends on CONTEXT_TRACKING
@@@ -698,7 -707,6 +698,7 @@@ config RCU_BOOST_DELA
  config RCU_NOCB_CPU
        bool "Offload RCU callback processing from boot-selected CPUs"
        depends on TREE_RCU || PREEMPT_RCU
 +      depends on RCU_EXPERT || NO_HZ_FULL
        default n
        help
          Use this option to reduce OS jitter for aggressive HPC or
@@@ -947,6 -955,22 +947,22 @@@ config CGROUP_FREEZE
          Provides a way to freeze and unfreeze all tasks in a
          cgroup.
  
+ config CGROUP_PIDS
+       bool "PIDs cgroup subsystem"
+       help
+         Provides enforcement of process number limits in the scope of a
+         cgroup. Any attempt to fork more processes than is allowed in the
+         cgroup will fail. PIDs are fundamentally a global resource because it
+         is fairly trivial to reach PID exhaustion before you reach even a
+         conservative kmemcg limit. As a result, it is possible to grind a
+         system to halt without being limited by other cgroup policies. The
+         PIDs cgroup subsystem is designed to stop this from happening.
+         It should be noted that organisational operations (such as attaching
+         to a cgroup hierarchy will *not* be blocked by the PIDs subsystem),
+         since the PIDs limit only affects a process's ability to fork, not to
+         attach to a cgroup.
  config CGROUP_DEVICE
        bool "Device controller for cgroups"
        help
diff --combined kernel/cgroup.c
index b89f3168411bc1e9f4f96a42fe5732f3e36c1d41,4ec1b7ee5de8c71ead9916a048ff5b23356db8a2..f3f5cd5e2c0d9ccd8b954e9191cd9169d53c32d7
@@@ -107,8 -107,8 +107,8 @@@ static DEFINE_SPINLOCK(release_agent_pa
  struct percpu_rw_semaphore cgroup_threadgroup_rwsem;
  
  #define cgroup_assert_mutex_or_rcu_locked()                           \
 -      rcu_lockdep_assert(rcu_read_lock_held() ||                      \
 -                         lockdep_is_held(&cgroup_mutex),              \
 +      RCU_LOCKDEP_WARN(!rcu_read_lock_held() &&                       \
 +                         !lockdep_is_held(&cgroup_mutex),             \
                           "cgroup_mutex or RCU read lock required");
  
  /*
@@@ -145,6 -145,7 +145,7 @@@ static const char *cgroup_subsys_name[
   * part of that cgroup.
   */
  struct cgroup_root cgrp_dfl_root;
+ EXPORT_SYMBOL_GPL(cgrp_dfl_root);
  
  /*
   * The default hierarchy always exists but is hidden until mounted for the
@@@ -186,6 -187,9 +187,9 @@@ static u64 css_serial_nr_next = 1
  static unsigned long have_fork_callback __read_mostly;
  static unsigned long have_exit_callback __read_mostly;
  
+ /* Ditto for the can_fork callback. */
+ static unsigned long have_canfork_callback __read_mostly;
  static struct cftype cgroup_dfl_base_files[];
  static struct cftype cgroup_legacy_base_files[];
  
@@@ -207,7 -211,7 +211,7 @@@ static int cgroup_idr_alloc(struct idr 
  
        idr_preload(gfp_mask);
        spin_lock_bh(&cgroup_idr_lock);
-       ret = idr_alloc(idr, ptr, start, end, gfp_mask);
+       ret = idr_alloc(idr, ptr, start, end, gfp_mask & ~__GFP_WAIT);
        spin_unlock_bh(&cgroup_idr_lock);
        idr_preload_end();
        return ret;
@@@ -1027,10 -1031,13 +1031,13 @@@ static const struct file_operations pro
  static char *cgroup_file_name(struct cgroup *cgrp, const struct cftype *cft,
                              char *buf)
  {
+       struct cgroup_subsys *ss = cft->ss;
        if (cft->ss && !(cft->flags & CFTYPE_NO_PREFIX) &&
            !(cgrp->root->flags & CGRP_ROOT_NOPREFIX))
                snprintf(buf, CGROUP_FILE_NAME_MAX, "%s.%s",
-                        cft->ss->name, cft->name);
+                        cgroup_on_dfl(cgrp) ? ss->name : ss->legacy_name,
+                        cft->name);
        else
                strncpy(buf, cft->name, CGROUP_FILE_NAME_MAX);
        return buf;
@@@ -1332,9 -1339,10 +1339,10 @@@ static int cgroup_show_options(struct s
        struct cgroup_subsys *ss;
        int ssid;
  
-       for_each_subsys(ss, ssid)
-               if (root->subsys_mask & (1 << ssid))
-                       seq_printf(seq, ",%s", ss->name);
+       if (root != &cgrp_dfl_root)
+               for_each_subsys(ss, ssid)
+                       if (root->subsys_mask & (1 << ssid))
+                               seq_printf(seq, ",%s", ss->legacy_name);
        if (root->flags & CGRP_ROOT_NOPREFIX)
                seq_puts(seq, ",noprefix");
        if (root->flags & CGRP_ROOT_XATTR)
@@@ -1447,7 -1455,7 +1455,7 @@@ static int parse_cgroupfs_options(char 
                }
  
                for_each_subsys(ss, i) {
-                       if (strcmp(token, ss->name))
+                       if (strcmp(token, ss->legacy_name))
                                continue;
                        if (ss->disabled)
                                continue;
@@@ -1666,7 -1674,7 +1674,7 @@@ static int cgroup_setup_root(struct cgr
  
        lockdep_assert_held(&cgroup_mutex);
  
-       ret = cgroup_idr_alloc(&root->cgroup_idr, root_cgrp, 1, 2, GFP_NOWAIT);
+       ret = cgroup_idr_alloc(&root->cgroup_idr, root_cgrp, 1, 2, GFP_KERNEL);
        if (ret < 0)
                goto out;
        root_cgrp->id = ret;
@@@ -4579,7 -4587,7 +4587,7 @@@ static int create_css(struct cgroup *cg
        if (err)
                goto err_free_css;
  
-       err = cgroup_idr_alloc(&ss->css_idr, NULL, 2, 0, GFP_NOWAIT);
+       err = cgroup_idr_alloc(&ss->css_idr, NULL, 2, 0, GFP_KERNEL);
        if (err < 0)
                goto err_free_percpu_ref;
        css->id = err;
@@@ -4656,7 -4664,7 +4664,7 @@@ static int cgroup_mkdir(struct kernfs_n
         * Temporarily set the pointer to NULL, so idr_find() won't return
         * a half-baked cgroup.
         */
-       cgrp->id = cgroup_idr_alloc(&root->cgroup_idr, NULL, 2, 0, GFP_NOWAIT);
+       cgrp->id = cgroup_idr_alloc(&root->cgroup_idr, NULL, 2, 0, GFP_KERNEL);
        if (cgrp->id < 0) {
                ret = -ENOMEM;
                goto out_cancel_ref;
@@@ -4955,6 -4963,7 +4963,7 @@@ static void __init cgroup_init_subsys(s
  
        have_fork_callback |= (bool)ss->fork << ss->id;
        have_exit_callback |= (bool)ss->exit << ss->id;
+       have_canfork_callback |= (bool)ss->can_fork << ss->id;
  
        /* At system boot, before all subsystems have been
         * registered, no tasks have been forked, so we don't
@@@ -4993,6 -5002,8 +5002,8 @@@ int __init cgroup_init_early(void
  
                ss->id = i;
                ss->name = cgroup_subsys_name[i];
+               if (!ss->legacy_name)
+                       ss->legacy_name = cgroup_subsys_name[i];
  
                if (ss->early_init)
                        cgroup_init_subsys(ss, true);
@@@ -5136,9 -5147,11 +5147,11 @@@ int proc_cgroup_show(struct seq_file *m
                        continue;
  
                seq_printf(m, "%d:", root->hierarchy_id);
-               for_each_subsys(ss, ssid)
-                       if (root->subsys_mask & (1 << ssid))
-                               seq_printf(m, "%s%s", count++ ? "," : "", ss->name);
+               if (root != &cgrp_dfl_root)
+                       for_each_subsys(ss, ssid)
+                               if (root->subsys_mask & (1 << ssid))
+                                       seq_printf(m, "%s%s", count++ ? "," : "",
+                                                  ss->legacy_name);
                if (strlen(root->name))
                        seq_printf(m, "%sname=%s", count ? "," : "",
                                   root->name);
@@@ -5178,7 -5191,7 +5191,7 @@@ static int proc_cgroupstats_show(struc
  
        for_each_subsys(ss, i)
                seq_printf(m, "%s\t%d\t%d\t%d\n",
-                          ss->name, ss->root->hierarchy_id,
+                          ss->legacy_name, ss->root->hierarchy_id,
                           atomic_read(&ss->root->nr_cgrps), !ss->disabled);
  
        mutex_unlock(&cgroup_mutex);
@@@ -5197,6 -5210,19 +5210,19 @@@ static const struct file_operations pro
        .release = single_release,
  };
  
+ static void **subsys_canfork_priv_p(void *ss_priv[CGROUP_CANFORK_COUNT], int i)
+ {
+       if (CGROUP_CANFORK_START <= i && i < CGROUP_CANFORK_END)
+               return &ss_priv[i - CGROUP_CANFORK_START];
+       return NULL;
+ }
+ static void *subsys_canfork_priv(void *ss_priv[CGROUP_CANFORK_COUNT], int i)
+ {
+       void **private = subsys_canfork_priv_p(ss_priv, i);
+       return private ? *private : NULL;
+ }
  /**
   * cgroup_fork - initialize cgroup related fields during copy_process()
   * @child: pointer to task_struct of forking parent process.
@@@ -5211,6 -5237,57 +5237,57 @@@ void cgroup_fork(struct task_struct *ch
        INIT_LIST_HEAD(&child->cg_list);
  }
  
+ /**
+  * cgroup_can_fork - called on a new task before the process is exposed
+  * @child: the task in question.
+  *
+  * This calls the subsystem can_fork() callbacks. If the can_fork() callback
+  * returns an error, the fork aborts with that error code. This allows for
+  * a cgroup subsystem to conditionally allow or deny new forks.
+  */
+ int cgroup_can_fork(struct task_struct *child,
+                   void *ss_priv[CGROUP_CANFORK_COUNT])
+ {
+       struct cgroup_subsys *ss;
+       int i, j, ret;
+       for_each_subsys_which(ss, i, &have_canfork_callback) {
+               ret = ss->can_fork(child, subsys_canfork_priv_p(ss_priv, i));
+               if (ret)
+                       goto out_revert;
+       }
+       return 0;
+ out_revert:
+       for_each_subsys(ss, j) {
+               if (j >= i)
+                       break;
+               if (ss->cancel_fork)
+                       ss->cancel_fork(child, subsys_canfork_priv(ss_priv, j));
+       }
+       return ret;
+ }
+ /**
+  * cgroup_cancel_fork - called if a fork failed after cgroup_can_fork()
+  * @child: the task in question
+  *
+  * This calls the cancel_fork() callbacks if a fork failed *after*
+  * cgroup_can_fork() succeded.
+  */
+ void cgroup_cancel_fork(struct task_struct *child,
+                       void *ss_priv[CGROUP_CANFORK_COUNT])
+ {
+       struct cgroup_subsys *ss;
+       int i;
+       for_each_subsys(ss, i)
+               if (ss->cancel_fork)
+                       ss->cancel_fork(child, subsys_canfork_priv(ss_priv, i));
+ }
  /**
   * cgroup_post_fork - called on a new task after adding it to the task list
   * @child: the task in question
   * cgroup_task_iter_start() - to guarantee that the new task ends up on its
   * list.
   */
- void cgroup_post_fork(struct task_struct *child)
+ void cgroup_post_fork(struct task_struct *child,
+                     void *old_ss_priv[CGROUP_CANFORK_COUNT])
  {
        struct cgroup_subsys *ss;
        int i;
         * and addition to css_set.
         */
        for_each_subsys_which(ss, i, &have_fork_callback)
-               ss->fork(child);
+               ss->fork(child, subsys_canfork_priv(old_ss_priv, i));
  }
  
  /**
@@@ -5400,12 -5478,14 +5478,14 @@@ static int __init cgroup_disable(char *
                        continue;
  
                for_each_subsys(ss, i) {
-                       if (!strcmp(token, ss->name)) {
-                               ss->disabled = 1;
-                               printk(KERN_INFO "Disabling %s control group"
-                                       " subsystem\n", ss->name);
-                               break;
-                       }
+                       if (strcmp(token, ss->name) &&
+                           strcmp(token, ss->legacy_name))
+                               continue;
+                       ss->disabled = 1;
+                       printk(KERN_INFO "Disabling %s control group subsystem\n",
+                              ss->name);
+                       break;
                }
        }
        return 1;
diff --combined kernel/fork.c
index 2b1a61cddc1954fc6be6d2c8a69c836e6caca33c,40e3af12c55e5c225dda47349890f3c10e1bf68e..03aa2e6de7a4e90696c003792641d2c3a150cd02
@@@ -287,11 -287,6 +287,11 @@@ static void set_max_threads(unsigned in
        max_threads = clamp_t(u64, threads, MIN_THREADS, MAX_THREADS);
  }
  
 +#ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT
 +/* Initialized by the architecture: */
 +int arch_task_struct_size __read_mostly;
 +#endif
 +
  void __init fork_init(void)
  {
  #ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR
  #endif
        /* create a slab on which task_structs can be allocated */
        task_struct_cachep =
 -              kmem_cache_create("task_struct", sizeof(struct task_struct),
 +              kmem_cache_create("task_struct", arch_task_struct_size,
                        ARCH_MIN_TASKALIGN, SLAB_PANIC | SLAB_NOTRACK, NULL);
  #endif
  
@@@ -1072,7 -1067,6 +1072,7 @@@ static int copy_sighand(unsigned long c
        rcu_assign_pointer(tsk->sighand, sig);
        if (!sig)
                return -ENOMEM;
 +
        atomic_set(&sig->count, 1);
        memcpy(sig->action, current->sighand->action, sizeof(sig->action));
        return 0;
@@@ -1134,7 -1128,6 +1134,7 @@@ static int copy_signal(unsigned long cl
        init_sigpending(&sig->shared_pending);
        INIT_LIST_HEAD(&sig->posix_timers);
        seqlock_init(&sig->stats_lock);
 +      prev_cputime_init(&sig->prev_cputime);
  
        hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
        sig->real_timer.function = it_real_fn;
@@@ -1246,6 -1239,7 +1246,7 @@@ static struct task_struct *copy_process
  {
        int retval;
        struct task_struct *p;
+       void *cgrp_ss_priv[CGROUP_CANFORK_COUNT] = {};
  
        if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
                return ERR_PTR(-EINVAL);
  
        /*
         * If the new process will be in a different pid or user namespace
 -       * do not allow it to share a thread group or signal handlers or
 -       * parent with the forking task.
 +       * do not allow it to share a thread group with the forking task.
         */
 -      if (clone_flags & CLONE_SIGHAND) {
 +      if (clone_flags & CLONE_THREAD) {
                if ((clone_flags & (CLONE_NEWUSER | CLONE_NEWPID)) ||
                    (task_active_pid_ns(current) !=
                                current->nsproxy->pid_ns_for_children))
  
        p->utime = p->stime = p->gtime = 0;
        p->utimescaled = p->stimescaled = 0;
 -#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 -      p->prev_cputime.utime = p->prev_cputime.stime = 0;
 -#endif
 +      prev_cputime_init(&p->prev_cputime);
 +
  #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
        seqlock_init(&p->vtime_seqlock);
        p->vtime_snap = 0;
        INIT_LIST_HEAD(&p->thread_group);
        p->task_works = NULL;
  
+       /*
+        * Ensure that the cgroup subsystem policies allow the new process to be
+        * forked. It should be noted the the new process's css_set can be changed
+        * between here and cgroup_post_fork() if an organisation operation is in
+        * progress.
+        */
+       retval = cgroup_can_fork(p, cgrp_ss_priv);
+       if (retval)
+               goto bad_fork_free_pid;
        /*
         * Make it visible to the rest of the system, but dont wake it up yet.
         * Need tasklist lock for parent etc handling!
                spin_unlock(&current->sighand->siglock);
                write_unlock_irq(&tasklist_lock);
                retval = -ERESTARTNOINTR;
-               goto bad_fork_free_pid;
+               goto bad_fork_cancel_cgroup;
        }
  
        if (likely(p->pid)) {
        write_unlock_irq(&tasklist_lock);
  
        proc_fork_connector(p);
-       cgroup_post_fork(p);
+       cgroup_post_fork(p, cgrp_ss_priv);
        if (clone_flags & CLONE_THREAD)
                threadgroup_change_end(current);
        perf_event_fork(p);
  
        return p;
  
+ bad_fork_cancel_cgroup:
+       cgroup_cancel_fork(p, cgrp_ss_priv);
  bad_fork_free_pid:
        if (pid != &init_struct_pid)
                free_pid(pid);
@@@ -1871,21 -1879,13 +1884,21 @@@ static int check_unshare_flags(unsigne
                                CLONE_NEWUSER|CLONE_NEWPID))
                return -EINVAL;
        /*
 -       * Not implemented, but pretend it works if there is nothing to
 -       * unshare. Note that unsharing CLONE_THREAD or CLONE_SIGHAND
 -       * needs to unshare vm.
 +       * Not implemented, but pretend it works if there is nothing
 +       * to unshare.  Note that unsharing the address space or the
 +       * signal handlers also need to unshare the signal queues (aka
 +       * CLONE_THREAD).
         */
        if (unshare_flags & (CLONE_THREAD | CLONE_SIGHAND | CLONE_VM)) {
 -              /* FIXME: get_task_mm() increments ->mm_users */
 -              if (atomic_read(&current->mm->mm_users) > 1)
 +              if (!thread_group_empty(current))
 +                      return -EINVAL;
 +      }
 +      if (unshare_flags & (CLONE_SIGHAND | CLONE_VM)) {
 +              if (atomic_read(&current->sighand->count) > 1)
 +                      return -EINVAL;
 +      }
 +      if (unshare_flags & CLONE_VM) {
 +              if (!current_is_single_threaded())
                        return -EINVAL;
        }
  
@@@ -1949,21 -1949,20 +1962,21 @@@ SYSCALL_DEFINE1(unshare, unsigned long
        int err;
  
        /*
 -       * If unsharing a user namespace must also unshare the thread.
 +       * If unsharing a user namespace must also unshare the thread group
 +       * and unshare the filesystem root and working directories.
         */
        if (unshare_flags & CLONE_NEWUSER)
                unshare_flags |= CLONE_THREAD | CLONE_FS;
 -      /*
 -       * If unsharing a thread from a thread group, must also unshare vm.
 -       */
 -      if (unshare_flags & CLONE_THREAD)
 -              unshare_flags |= CLONE_VM;
        /*
         * If unsharing vm, must also unshare signal handlers.
         */
        if (unshare_flags & CLONE_VM)
                unshare_flags |= CLONE_SIGHAND;
 +      /*
 +       * If unsharing a signal handlers, must also unshare the signal queues.
 +       */
 +      if (unshare_flags & CLONE_SIGHAND)
 +              unshare_flags |= CLONE_THREAD;
        /*
         * If unsharing namespace, must also unshare filesystem information.
         */
diff --combined kernel/sched/core.c
index 8b864ecee0e187c58a7903058ae1c0fa57b4d0c3,d811652fe6f598633ca9a955b7aace4fe5b923c5..d8420c233ff76268cdff1d1add89a05e1b89d9c2
@@@ -1151,45 -1151,15 +1151,45 @@@ static int migration_cpu_stop(void *dat
        return 0;
  }
  
 -void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
 +/*
 + * sched_class::set_cpus_allowed must do the below, but is not required to
 + * actually call this function.
 + */
 +void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask)
  {
 -      if (p->sched_class->set_cpus_allowed)
 -              p->sched_class->set_cpus_allowed(p, new_mask);
 -
        cpumask_copy(&p->cpus_allowed, new_mask);
        p->nr_cpus_allowed = cpumask_weight(new_mask);
  }
  
 +void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
 +{
 +      struct rq *rq = task_rq(p);
 +      bool queued, running;
 +
 +      lockdep_assert_held(&p->pi_lock);
 +
 +      queued = task_on_rq_queued(p);
 +      running = task_current(rq, p);
 +
 +      if (queued) {
 +              /*
 +               * Because __kthread_bind() calls this on blocked tasks without
 +               * holding rq->lock.
 +               */
 +              lockdep_assert_held(&rq->lock);
 +              dequeue_task(rq, p, 0);
 +      }
 +      if (running)
 +              put_prev_task(rq, p);
 +
 +      p->sched_class->set_cpus_allowed(p, new_mask);
 +
 +      if (running)
 +              p->sched_class->set_curr_task(rq);
 +      if (queued)
 +              enqueue_task(rq, p, 0);
 +}
 +
  /*
   * Change a given task's CPU affinity. Migrate the thread to a
   * proper CPU and schedule it away if the CPU it's executing on
   * task must not exit() & deallocate itself prematurely. The
   * call is not atomic; no spinlocks may be held.
   */
 -int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
 +static int __set_cpus_allowed_ptr(struct task_struct *p,
 +                                const struct cpumask *new_mask, bool check)
  {
        unsigned long flags;
        struct rq *rq;
  
        rq = task_rq_lock(p, &flags);
  
 +      /*
 +       * Must re-check here, to close a race against __kthread_bind(),
 +       * sched_setaffinity() is not guaranteed to observe the flag.
 +       */
 +      if (check && (p->flags & PF_NO_SETAFFINITY)) {
 +              ret = -EINVAL;
 +              goto out;
 +      }
 +
        if (cpumask_equal(&p->cpus_allowed, new_mask))
                goto out;
  
  
        return ret;
  }
 +
 +int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
 +{
 +      return __set_cpus_allowed_ptr(p, new_mask, false);
 +}
  EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
  
  void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
@@@ -1640,15 -1595,6 +1640,15 @@@ static void update_avg(u64 *avg, u64 sa
        s64 diff = sample - *avg;
        *avg += diff >> 3;
  }
 +
 +#else
 +
 +static inline int __set_cpus_allowed_ptr(struct task_struct *p,
 +                                       const struct cpumask *new_mask, bool check)
 +{
 +      return set_cpus_allowed_ptr(p, new_mask);
 +}
 +
  #endif /* CONFIG_SMP */
  
  static void
@@@ -1708,9 -1654,9 +1708,9 @@@ static voi
  ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags)
  {
        check_preempt_curr(rq, p, wake_flags);
 -      trace_sched_wakeup(p, true);
 -
        p->state = TASK_RUNNING;
 +      trace_sched_wakeup(p);
 +
  #ifdef CONFIG_SMP
        if (p->sched_class->task_woken) {
                /*
@@@ -1928,8 -1874,6 +1928,8 @@@ try_to_wake_up(struct task_struct *p, u
        if (!(p->state & state))
                goto out;
  
 +      trace_sched_waking(p);
 +
        success = 1; /* we're going to change ->state */
        cpu = task_cpu(p);
  
@@@ -2005,8 -1949,6 +2005,8 @@@ static void try_to_wake_up_local(struc
        if (!(p->state & TASK_NORMAL))
                goto out;
  
 +      trace_sched_waking(p);
 +
        if (!task_on_rq_queued(p))
                ttwu_activate(rq, p, ENQUEUE_WAKEUP);
  
@@@ -2074,6 -2016,9 +2074,6 @@@ static void __sched_fork(unsigned long 
        p->se.prev_sum_exec_runtime     = 0;
        p->se.nr_migrations             = 0;
        p->se.vruntime                  = 0;
 -#ifdef CONFIG_SMP
 -      p->se.avg.decay_count           = 0;
 -#endif
        INIT_LIST_HEAD(&p->se.group_node);
  
  #ifdef CONFIG_SCHEDSTATS
@@@ -2255,8 -2200,8 +2255,8 @@@ unsigned long to_ratio(u64 period, u64 
  #ifdef CONFIG_SMP
  inline struct dl_bw *dl_bw_of(int i)
  {
 -      rcu_lockdep_assert(rcu_read_lock_sched_held(),
 -                         "sched RCU must be held");
 +      RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
 +                       "sched RCU must be held");
        return &cpu_rq(i)->rd->dl_bw;
  }
  
@@@ -2265,8 -2210,8 +2265,8 @@@ static inline int dl_bw_cpus(int i
        struct root_domain *rd = cpu_rq(i)->rd;
        int cpus = 0;
  
 -      rcu_lockdep_assert(rcu_read_lock_sched_held(),
 -                         "sched RCU must be held");
 +      RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
 +                       "sched RCU must be held");
        for_each_cpu_and(i, rd->span, cpu_active_mask)
                cpus++;
  
@@@ -2358,11 -2303,11 +2358,11 @@@ void wake_up_new_task(struct task_struc
  #endif
  
        /* Initialize new task's runnable average */
 -      init_task_runnable_average(p);
 +      init_entity_runnable_average(&p->se);
        rq = __task_rq_lock(p);
        activate_task(rq, p, 0);
        p->on_rq = TASK_ON_RQ_QUEUED;
 -      trace_sched_wakeup_new(p, true);
 +      trace_sched_wakeup_new(p);
        check_preempt_curr(rq, p, WF_FORK);
  #ifdef CONFIG_SMP
        if (p->sched_class->task_woken)
@@@ -2524,6 -2469,7 +2524,6 @@@ static struct rq *finish_task_switch(st
         */
        prev_state = prev->state;
        vtime_task_switch(prev);
 -      finish_arch_switch(prev);
        perf_event_task_sched_in(prev, current);
        finish_lock_switch(rq, prev);
        finish_arch_post_lock_switch();
                put_task_struct(prev);
        }
  
 -      tick_nohz_task_switch(current);
 +      tick_nohz_task_switch();
        return rq;
  }
  
@@@ -4394,7 -4340,7 +4394,7 @@@ long sched_setaffinity(pid_t pid, cons
        }
  #endif
  again:
 -      retval = set_cpus_allowed_ptr(p, new_mask);
 +      retval = __set_cpus_allowed_ptr(p, new_mask, true);
  
        if (!retval) {
                cpuset_cpus_allowed(p, cpus_allowed);
@@@ -4546,7 -4492,7 +4546,7 @@@ SYSCALL_DEFINE0(sched_yield
  
  int __sched _cond_resched(void)
  {
 -      if (should_resched()) {
 +      if (should_resched(0)) {
                preempt_schedule_common();
                return 1;
        }
@@@ -4564,7 -4510,7 +4564,7 @@@ EXPORT_SYMBOL(_cond_resched)
   */
  int __cond_resched_lock(spinlock_t *lock)
  {
 -      int resched = should_resched();
 +      int resched = should_resched(PREEMPT_LOCK_OFFSET);
        int ret = 0;
  
        lockdep_assert_held(lock);
@@@ -4586,7 -4532,7 +4586,7 @@@ int __sched __cond_resched_softirq(void
  {
        BUG_ON(!in_softirq());
  
 -      if (should_resched()) {
 +      if (should_resched(SOFTIRQ_DISABLE_OFFSET)) {
                local_bh_enable();
                preempt_schedule_common();
                local_bh_disable();
@@@ -4919,8 -4865,7 +4919,8 @@@ void init_idle(struct task_struct *idle
        struct rq *rq = cpu_rq(cpu);
        unsigned long flags;
  
 -      raw_spin_lock_irqsave(&rq->lock, flags);
 +      raw_spin_lock_irqsave(&idle->pi_lock, flags);
 +      raw_spin_lock(&rq->lock);
  
        __sched_fork(0, idle);
        idle->state = TASK_RUNNING;
  #if defined(CONFIG_SMP)
        idle->on_cpu = 1;
  #endif
 -      raw_spin_unlock_irqrestore(&rq->lock, flags);
 +      raw_spin_unlock(&rq->lock);
 +      raw_spin_unlock_irqrestore(&idle->pi_lock, flags);
  
        /* Set the preempt count _outside_ the spinlocks! */
        init_idle_preempt_count(idle, cpu);
@@@ -5367,7 -5311,8 +5367,7 @@@ static void register_sched_domain_sysct
  /* may be called multiple times per register */
  static void unregister_sched_domain_sysctl(void)
  {
 -      if (sd_sysctl_header)
 -              unregister_sysctl_table(sd_sysctl_header);
 +      unregister_sysctl_table(sd_sysctl_header);
        sd_sysctl_header = NULL;
        if (sd_ctl_dir[0].child)
                sd_free_ctl_entry(&sd_ctl_dir[0].child);
@@@ -5488,14 -5433,6 +5488,14 @@@ static int sched_cpu_active(struct noti
        case CPU_STARTING:
                set_cpu_rq_start_time();
                return NOTIFY_OK;
 +      case CPU_ONLINE:
 +              /*
 +               * At this point a starting CPU has marked itself as online via
 +               * set_cpu_online(). But it might not yet have marked itself
 +               * as active, which is essential from here on.
 +               *
 +               * Thus, fall-through and help the starting CPU along.
 +               */
        case CPU_DOWN_FAILED:
                set_cpu_active((long)hcpu, true);
                return NOTIFY_OK;
@@@ -6508,10 -6445,8 +6508,10 @@@ static void init_numa_topology_type(voi
  
        n = sched_max_numa_distance;
  
 -      if (n <= 1)
 +      if (sched_domains_numa_levels <= 1) {
                sched_numa_topology_type = NUMA_DIRECT;
 +              return;
 +      }
  
        for_each_online_node(a) {
                for_each_online_node(b) {
@@@ -8133,7 -8068,7 +8133,7 @@@ static void cpu_cgroup_css_offline(stru
        sched_offline_group(tg);
  }
  
- static void cpu_cgroup_fork(struct task_struct *task)
+ static void cpu_cgroup_fork(struct task_struct *task, void *private)
  {
        sched_move_task(task);
  }
This page took 0.140034 seconds and 4 git commands to generate.