]> Git Repo - linux.git/commitdiff
Merge branch 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
authorLinus Torvalds <[email protected]>
Mon, 11 Jan 2016 22:18:38 +0000 (14:18 -0800)
committerLinus Torvalds <[email protected]>
Mon, 11 Jan 2016 22:18:38 +0000 (14:18 -0800)
Pull locking updates from Ingo Molnar:
 "So we have a laundry list of locking subsystem changes:

   - continuing barrier API and code improvements

   - futex enhancements

   - atomics API improvements

   - pvqspinlock enhancements: in particular lock stealing and adaptive
     spinning

   - qspinlock micro-enhancements"

* 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  futex: Allow FUTEX_CLOCK_REALTIME with FUTEX_WAIT op
  futex: Cleanup the goto confusion in requeue_pi()
  futex: Remove pointless put_pi_state calls in requeue()
  futex: Document pi_state refcounting in requeue code
  futex: Rename free_pi_state() to put_pi_state()
  futex: Drop refcount if requeue_pi() acquired the rtmutex
  locking/barriers, arch: Remove ambiguous statement in the smp_store_mb() documentation
  lcoking/barriers, arch: Use smp barriers in smp_store_release()
  locking/cmpxchg, arch: Remove tas() definitions
  locking/pvqspinlock: Queue node adaptive spinning
  locking/pvqspinlock: Allow limited lock stealing
  locking/pvqspinlock: Collect slowpath lock statistics
  sched/core, locking: Document Program-Order guarantees
  locking, sched: Introduce smp_cond_acquire() and use it
  locking/pvqspinlock, x86: Optimize the PV unlock code path
  locking/qspinlock: Avoid redundant read of next pointer
  locking/qspinlock: Prefetch the next node cacheline
  locking/qspinlock: Use _acquire/_release() versions of cmpxchg() & xchg()
  atomics: Add test for atomic operations with _relaxed variants

1  2 
Documentation/memory-barriers.txt
kernel/sched/core.c

index 85304ebd187cf434d9ef4e5c82d08d3aaeab8c69,c85054dc44608fa98d9f4e3835e236420f7b588c..a61be39c7b516a1e3b081afd1fea02a3f1068187
@@@ -194,7 -194,7 +194,7 @@@ There are some minimal guarantees that 
   (*) On any given CPU, dependent memory accesses will be issued in order, with
       respect to itself.  This means that for:
  
 -      WRITE_ONCE(Q, P); smp_read_barrier_depends(); D = READ_ONCE(*Q);
 +      Q = READ_ONCE(P); smp_read_barrier_depends(); D = READ_ONCE(*Q);
  
       the CPU will issue the following memory operations:
  
  
       and always in that order.  On most systems, smp_read_barrier_depends()
       does nothing, but it is required for DEC Alpha.  The READ_ONCE()
 -     and WRITE_ONCE() are required to prevent compiler mischief.  Please
 -     note that you should normally use something like rcu_dereference()
 -     instead of open-coding smp_read_barrier_depends().
 +     is required to prevent compiler mischief.  Please note that you
 +     should normally use something like rcu_dereference() instead of
 +     open-coding smp_read_barrier_depends().
  
   (*) Overlapping loads and stores within a particular CPU will appear to be
       ordered within that CPU.  This means that for:
@@@ -1673,8 -1673,8 +1673,8 @@@ There are some more advanced barrier fu
   (*) smp_store_mb(var, value)
  
       This assigns the value to the variable and then inserts a full memory
-      barrier after it, depending on the function.  It isn't guaranteed to
-      insert anything more than a compiler barrier in a UP compilation.
+      barrier after it.  It isn't guaranteed to insert anything more than a
+      compiler barrier in a UP compilation.
  
  
   (*) smp_mb__before_atomic();
diff --combined kernel/sched/core.c
index 1ef0d7aeab4776801afae4cab1651dcc4f69378a,91db75018652ca6d7c1274bf7798d4c8ed725745..34cb9f7fc2d28d81fcb857a6617a903f67b0d13e
@@@ -1905,6 -1905,97 +1905,97 @@@ static void ttwu_queue(struct task_stru
        raw_spin_unlock(&rq->lock);
  }
  
+ /*
+  * Notes on Program-Order guarantees on SMP systems.
+  *
+  *  MIGRATION
+  *
+  * The basic program-order guarantee on SMP systems is that when a task [t]
+  * migrates, all its activity on its old cpu [c0] happens-before any subsequent
+  * execution on its new cpu [c1].
+  *
+  * For migration (of runnable tasks) this is provided by the following means:
+  *
+  *  A) UNLOCK of the rq(c0)->lock scheduling out task t
+  *  B) migration for t is required to synchronize *both* rq(c0)->lock and
+  *     rq(c1)->lock (if not at the same time, then in that order).
+  *  C) LOCK of the rq(c1)->lock scheduling in task
+  *
+  * Transitivity guarantees that B happens after A and C after B.
+  * Note: we only require RCpc transitivity.
+  * Note: the cpu doing B need not be c0 or c1
+  *
+  * Example:
+  *
+  *   CPU0            CPU1            CPU2
+  *
+  *   LOCK rq(0)->lock
+  *   sched-out X
+  *   sched-in Y
+  *   UNLOCK rq(0)->lock
+  *
+  *                                   LOCK rq(0)->lock // orders against CPU0
+  *                                   dequeue X
+  *                                   UNLOCK rq(0)->lock
+  *
+  *                                   LOCK rq(1)->lock
+  *                                   enqueue X
+  *                                   UNLOCK rq(1)->lock
+  *
+  *                   LOCK rq(1)->lock // orders against CPU2
+  *                   sched-out Z
+  *                   sched-in X
+  *                   UNLOCK rq(1)->lock
+  *
+  *
+  *  BLOCKING -- aka. SLEEP + WAKEUP
+  *
+  * For blocking we (obviously) need to provide the same guarantee as for
+  * migration. However the means are completely different as there is no lock
+  * chain to provide order. Instead we do:
+  *
+  *   1) smp_store_release(X->on_cpu, 0)
+  *   2) smp_cond_acquire(!X->on_cpu)
+  *
+  * Example:
+  *
+  *   CPU0 (schedule)  CPU1 (try_to_wake_up) CPU2 (schedule)
+  *
+  *   LOCK rq(0)->lock LOCK X->pi_lock
+  *   dequeue X
+  *   sched-out X
+  *   smp_store_release(X->on_cpu, 0);
+  *
+  *                    smp_cond_acquire(!X->on_cpu);
+  *                    X->state = WAKING
+  *                    set_task_cpu(X,2)
+  *
+  *                    LOCK rq(2)->lock
+  *                    enqueue X
+  *                    X->state = RUNNING
+  *                    UNLOCK rq(2)->lock
+  *
+  *                                          LOCK rq(2)->lock // orders against CPU1
+  *                                          sched-out Z
+  *                                          sched-in X
+  *                                          UNLOCK rq(2)->lock
+  *
+  *                    UNLOCK X->pi_lock
+  *   UNLOCK rq(0)->lock
+  *
+  *
+  * However; for wakeups there is a second guarantee we must provide, namely we
+  * must observe the state that lead to our wakeup. That is, not only must our
+  * task observe its own prior state, it must also observe the stores prior to
+  * its wakeup.
+  *
+  * This means that any means of doing remote wakeups must order the CPU doing
+  * the wakeup against the CPU the task is going to end up running on. This,
+  * however, is already required for the regular Program-Order guarantee above,
+  * since the waking CPU is the one issueing the ACQUIRE (smp_cond_acquire).
+  *
+  */
  /**
   * try_to_wake_up - wake up a thread
   * @p: the thread to be awakened
@@@ -1968,19 -2059,13 +2059,13 @@@ try_to_wake_up(struct task_struct *p, u
        /*
         * If the owning (remote) cpu is still in the middle of schedule() with
         * this task as prev, wait until its done referencing the task.
-        */
-       while (p->on_cpu)
-               cpu_relax();
-       /*
-        * Combined with the control dependency above, we have an effective
-        * smp_load_acquire() without the need for full barriers.
         *
         * Pairs with the smp_store_release() in finish_lock_switch().
         *
         * This ensures that tasks getting woken will be fully ordered against
         * their previous state and preserve Program Order.
         */
-       smp_rmb();
+       smp_cond_acquire(!p->on_cpu);
  
        p->sched_contributes_to_load = !!task_contributes_to_load(p);
        p->state = TASK_WAKING;
@@@ -3109,6 -3194,7 +3194,6 @@@ static void __sched notrace __schedule(
  
        cpu = smp_processor_id();
        rq = cpu_rq(cpu);
 -      rcu_note_context_switch();
        prev = rq->curr;
  
        /*
        if (sched_feat(HRTICK))
                hrtick_clear(rq);
  
 +      local_irq_disable();
 +      rcu_note_context_switch();
 +
        /*
         * Make sure that signal_pending_state()->signal_pending() below
         * can't be reordered with __set_current_state(TASK_INTERRUPTIBLE)
         * done by the caller to avoid the race with signal_wake_up().
         */
        smp_mb__before_spinlock();
 -      raw_spin_lock_irq(&rq->lock);
 +      raw_spin_lock(&rq->lock);
        lockdep_pin_lock(&rq->lock);
  
        rq->clock_skip_update <<= 1; /* promote REQ to ACT */
@@@ -8243,12 -8326,12 +8328,12 @@@ static void cpu_cgroup_fork(struct task
        sched_move_task(task);
  }
  
 -static int cpu_cgroup_can_attach(struct cgroup_subsys_state *css,
 -                               struct cgroup_taskset *tset)
 +static int cpu_cgroup_can_attach(struct cgroup_taskset *tset)
  {
        struct task_struct *task;
 +      struct cgroup_subsys_state *css;
  
 -      cgroup_taskset_for_each(task, tset) {
 +      cgroup_taskset_for_each(task, css, tset) {
  #ifdef CONFIG_RT_GROUP_SCHED
                if (!sched_rt_can_attach(css_tg(css), task))
                        return -EINVAL;
        return 0;
  }
  
 -static void cpu_cgroup_attach(struct cgroup_subsys_state *css,
 -                            struct cgroup_taskset *tset)
 +static void cpu_cgroup_attach(struct cgroup_taskset *tset)
  {
        struct task_struct *task;
 +      struct cgroup_subsys_state *css;
  
 -      cgroup_taskset_for_each(task, tset)
 +      cgroup_taskset_for_each(task, css, tset)
                sched_move_task(task);
  }
  
This page took 0.157026 seconds and 4 git commands to generate.