Merge branch 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...

author Linus Torvalds <[email protected]>

Mon, 11 Jan 2016 22:18:38 +0000 (14:18 -0800)

committer Linus Torvalds <[email protected]>

Mon, 11 Jan 2016 22:18:38 +0000 (14:18 -0800)
author Linus Torvalds <[email protected]>
Mon, 11 Jan 2016 22:18:38 +0000 (14:18 -0800)
committer Linus Torvalds <[email protected]>
Mon, 11 Jan 2016 22:18:38 +0000 (14:18 -0800)
diff --combined Documentation/memory-barriers.txt

index 85304ebd187cf434d9ef4e5c82d08d3aaeab8c69,c85054dc44608fa98d9f4e3835e236420f7b588c..a61be39c7b516a1e3b081afd1fea02a3f1068187
--- 1/Documentation/memory-barriers.txt
--- 2/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@@ -194,7 -194,7 +194,7 @@@ There are some minimal guarantees that 
    (*) On any given CPU, dependent memory accesses will be issued in order, with
        respect to itself.  This means that for:
   
- -      WRITE_ONCE(Q, P); smp_read_barrier_depends(); D = READ_ONCE(*Q);
+ +      Q = READ_ONCE(P); smp_read_barrier_depends(); D = READ_ONCE(*Q);
   
        the CPU will issue the following memory operations:
   
@@@ -202,9 -202,9 +202,9 @@@
   
        and always in that order.  On most systems, smp_read_barrier_depends()
        does nothing, but it is required for DEC Alpha.  The READ_ONCE()
- -     and WRITE_ONCE() are required to prevent compiler mischief.  Please
- -     note that you should normally use something like rcu_dereference()
- -     instead of open-coding smp_read_barrier_depends().
+ +     is required to prevent compiler mischief.  Please note that you
+ +     should normally use something like rcu_dereference() instead of
+ +     open-coding smp_read_barrier_depends().
   
    (*) Overlapping loads and stores within a particular CPU will appear to be
        ordered within that CPU.  This means that for:
@@@ -1673,8 -1673,8 +1673,8 @@@ There are some more advanced barrier fu
    (*) smp_store_mb(var, value)
   
        This assigns the value to the variable and then inserts a full memory
-      barrier after it, depending on the function.  It isn't guaranteed to
-      insert anything more than a compiler barrier in a UP compilation.
+      barrier after it.  It isn't guaranteed to insert anything more than a
+      compiler barrier in a UP compilation.
   
   
    (*) smp_mb__before_atomic();
diff --combined kernel/sched/core.c

index 1ef0d7aeab4776801afae4cab1651dcc4f69378a,91db75018652ca6d7c1274bf7798d4c8ed725745..34cb9f7fc2d28d81fcb857a6617a903f67b0d13e
--- 1/kernel/sched/core.c
--- 2/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@@ -1905,6 -1905,97 +1905,97 @@@ static void ttwu_queue(struct task_stru
         raw_spin_unlock(&rq->lock);
   }
   
+ /*
+  * Notes on Program-Order guarantees on SMP systems.
+  *
+  *  MIGRATION
+  *
+  * The basic program-order guarantee on SMP systems is that when a task [t]
+  * migrates, all its activity on its old cpu [c0] happens-before any subsequent
+  * execution on its new cpu [c1].
+  *
+  * For migration (of runnable tasks) this is provided by the following means:
+  *
+  *  A) UNLOCK of the rq(c0)->lock scheduling out task t
+  *  B) migration for t is required to synchronize *both* rq(c0)->lock and
+  *     rq(c1)->lock (if not at the same time, then in that order).
+  *  C) LOCK of the rq(c1)->lock scheduling in task
+  *
+  * Transitivity guarantees that B happens after A and C after B.
+  * Note: we only require RCpc transitivity.
+  * Note: the cpu doing B need not be c0 or c1
+  *
+  * Example:
+  *
+  *   CPU0            CPU1            CPU2
+  *
+  *   LOCK rq(0)->lock
+  *   sched-out X
+  *   sched-in Y
+  *   UNLOCK rq(0)->lock
+  *
+  *                                   LOCK rq(0)->lock // orders against CPU0
+  *                                   dequeue X
+  *                                   UNLOCK rq(0)->lock
+  *
+  *                                   LOCK rq(1)->lock
+  *                                   enqueue X
+  *                                   UNLOCK rq(1)->lock
+  *
+  *                   LOCK rq(1)->lock // orders against CPU2
+  *                   sched-out Z
+  *                   sched-in X
+  *                   UNLOCK rq(1)->lock
+  *
+  *
+  *  BLOCKING -- aka. SLEEP + WAKEUP
+  *
+  * For blocking we (obviously) need to provide the same guarantee as for
+  * migration. However the means are completely different as there is no lock
+  * chain to provide order. Instead we do:
+  *
+  *   1) smp_store_release(X->on_cpu, 0)
+  *   2) smp_cond_acquire(!X->on_cpu)
+  *
+  * Example:
+  *
+  *   CPU0 (schedule)  CPU1 (try_to_wake_up) CPU2 (schedule)
+  *
+  *   LOCK rq(0)->lock LOCK X->pi_lock
+  *   dequeue X
+  *   sched-out X
+  *   smp_store_release(X->on_cpu, 0);
+  *
+  *                    smp_cond_acquire(!X->on_cpu);
+  *                    X->state = WAKING
+  *                    set_task_cpu(X,2)
+  *
+  *                    LOCK rq(2)->lock
+  *                    enqueue X
+  *                    X->state = RUNNING
+  *                    UNLOCK rq(2)->lock
+  *
+  *                                          LOCK rq(2)->lock // orders against CPU1
+  *                                          sched-out Z
+  *                                          sched-in X
+  *                                          UNLOCK rq(2)->lock
+  *
+  *                    UNLOCK X->pi_lock
+  *   UNLOCK rq(0)->lock
+  *
+  *
+  * However; for wakeups there is a second guarantee we must provide, namely we
+  * must observe the state that lead to our wakeup. That is, not only must our
+  * task observe its own prior state, it must also observe the stores prior to
+  * its wakeup.
+  *
+  * This means that any means of doing remote wakeups must order the CPU doing
+  * the wakeup against the CPU the task is going to end up running on. This,
+  * however, is already required for the regular Program-Order guarantee above,
+  * since the waking CPU is the one issueing the ACQUIRE (smp_cond_acquire).
+  *
+  */
+ 
   /**
    * try_to_wake_up - wake up a thread
    * @p: the thread to be awakened
@@@ -1968,19 -2059,13 +2059,13 @@@ try_to_wake_up(struct task_struct *p, u
         /*
          * If the owning (remote) cpu is still in the middle of schedule() with
          * this task as prev, wait until its done referencing the task.
-        */
-       while (p->on_cpu)
-               cpu_relax();
-       /*
-        * Combined with the control dependency above, we have an effective
-        * smp_load_acquire() without the need for full barriers.
          *
          * Pairs with the smp_store_release() in finish_lock_switch().
          *
          * This ensures that tasks getting woken will be fully ordered against
          * their previous state and preserve Program Order.
          */
-       smp_rmb();
+       smp_cond_acquire(!p->on_cpu);
   
         p->sched_contributes_to_load = !!task_contributes_to_load(p);
         p->state = TASK_WAKING;
@@@ -3109,6 -3194,7 +3194,6 @@@ static void __sched notrace __schedule(
   
         cpu = smp_processor_id();
         rq = cpu_rq(cpu);
- -      rcu_note_context_switch();
         prev = rq->curr;
   
         /*
@@@ -3127,16 -3213,13 +3212,16 @@@
         if (sched_feat(HRTICK))
                 hrtick_clear(rq);
   
+ +      local_irq_disable();
+ +      rcu_note_context_switch();
+ +
         /*
          * Make sure that signal_pending_state()->signal_pending() below
          * can't be reordered with __set_current_state(TASK_INTERRUPTIBLE)
          * done by the caller to avoid the race with signal_wake_up().
          */
         smp_mb__before_spinlock();
- -      raw_spin_lock_irq(&rq->lock);
+ +      raw_spin_lock(&rq->lock);
         lockdep_pin_lock(&rq->lock);
   
         rq->clock_skip_update <<= 1; /* promote REQ to ACT */
@@@ -8243,12 -8326,12 +8328,12 @@@ static void cpu_cgroup_fork(struct task
         sched_move_task(task);
   }
   
- -static int cpu_cgroup_can_attach(struct cgroup_subsys_state *css,
- -                               struct cgroup_taskset *tset)
+ +static int cpu_cgroup_can_attach(struct cgroup_taskset *tset)
   {
         struct task_struct *task;
+ +      struct cgroup_subsys_state *css;
   
- -      cgroup_taskset_for_each(task, tset) {
+ +      cgroup_taskset_for_each(task, css, tset) {
   #ifdef CONFIG_RT_GROUP_SCHED
                 if (!sched_rt_can_attach(css_tg(css), task))
                         return -EINVAL;
@@@ -8261,12 -8344,12 +8346,12 @@@
         return 0;
   }
   
- -static void cpu_cgroup_attach(struct cgroup_subsys_state *css,
- -                            struct cgroup_taskset *tset)
+ +static void cpu_cgroup_attach(struct cgroup_taskset *tset)
   {
         struct task_struct *task;
+ +      struct cgroup_subsys_state *css;
   
- -      cgroup_taskset_for_each(task, tset)
+ +      cgroup_taskset_for_each(task, css, tset)
                 sched_move_task(task);
   }
author	Linus Torvalds <[email protected]>
	Mon, 11 Jan 2016 22:18:38 +0000 (14:18 -0800)
committer	Linus Torvalds <[email protected]>
	Mon, 11 Jan 2016 22:18:38 +0000 (14:18 -0800)
		1	2
Documentation/memory-barriers.txt	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sched/core.c	patch \|	diff1 \|	diff2 \|	blob \| history