Merge branch 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...

author Linus Torvalds <[email protected]>

Thu, 3 Sep 2015 22:46:07 +0000 (15:46 -0700)

committer Linus Torvalds <[email protected]>

Thu, 3 Sep 2015 22:46:07 +0000 (15:46 -0700)
author Linus Torvalds <[email protected]>
Thu, 3 Sep 2015 22:46:07 +0000 (15:46 -0700)
committer Linus Torvalds <[email protected]>
Thu, 3 Sep 2015 22:46:07 +0000 (15:46 -0700)
diff --combined Documentation/memory-barriers.txt

index eafa6a53f72cb142ffcec8f42ae604ff459786bc,18fc860df1beeafe93abfb50060957144697a6b4..2ba8461b0631de759fefd2a12918a6c4f4ee7562
--- 1/Documentation/memory-barriers.txt
--- 2/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@@ -194,22 -194,22 +194,22 @@@ There are some minimal guarantees that 
    (*) On any given CPU, dependent memory accesses will be issued in order, with
        respect to itself.  This means that for:
   
- -      ACCESS_ONCE(Q) = P; smp_read_barrier_depends(); D = ACCESS_ONCE(*Q);
+ +      WRITE_ONCE(Q, P); smp_read_barrier_depends(); D = READ_ONCE(*Q);
   
        the CPU will issue the following memory operations:
   
         Q = LOAD P, D = LOAD *Q
   
        and always in that order.  On most systems, smp_read_barrier_depends()
- -     does nothing, but it is required for DEC Alpha.  The ACCESS_ONCE()
- -     is required to prevent compiler mischief.  Please note that you
- -     should normally use something like rcu_dereference() instead of
- -     open-coding smp_read_barrier_depends().
+ +     does nothing, but it is required for DEC Alpha.  The READ_ONCE()
+ +     and WRITE_ONCE() are required to prevent compiler mischief.  Please
+ +     note that you should normally use something like rcu_dereference()
+ +     instead of open-coding smp_read_barrier_depends().
   
    (*) Overlapping loads and stores within a particular CPU will appear to be
        ordered within that CPU.  This means that for:
   
- -      a = ACCESS_ONCE(*X); ACCESS_ONCE(*X) = b;
+ +      a = READ_ONCE(*X); WRITE_ONCE(*X, b);
   
        the CPU will only issue the following sequence of memory operations:
   
@@@ -217,7 -217,7 +217,7 @@@
   
        And for:
   
- -      ACCESS_ONCE(*X) = c; d = ACCESS_ONCE(*X);
+ +      WRITE_ONCE(*X, c); d = READ_ONCE(*X);
   
        the CPU will only issue:
   
@@@ -228,11 -228,11 +228,11 @@@
   
   And there are a number of things that _must_ or _must_not_ be assumed:
   
- - (*) It _must_not_ be assumed that the compiler will do what you want with
- -     memory references that are not protected by ACCESS_ONCE().  Without
- -     ACCESS_ONCE(), the compiler is within its rights to do all sorts
- -     of "creative" transformations, which are covered in the Compiler
- -     Barrier section.
+ + (*) It _must_not_ be assumed that the compiler will do what you want
+ +     with memory references that are not protected by READ_ONCE() and
+ +     WRITE_ONCE().  Without them, the compiler is within its rights to
+ +     do all sorts of "creative" transformations, which are covered in
+ +     the Compiler Barrier section.
   
    (*) It _must_not_ be assumed that independent loads and stores will be issued
        in the order given.  This means that for:
@@@ -520,8 -520,8 +520,8 @@@ following sequence of events
         { A == 1, B == 2, C = 3, P == &A, Q == &C }
         B = 4;
         <write barrier>
- -      ACCESS_ONCE(P) = &B
- -                            Q = ACCESS_ONCE(P);
+ +      WRITE_ONCE(P, &B)
+ +                            Q = READ_ONCE(P);
                               D = *Q;
   
   There's a clear data dependency here, and it would seem that by the end of the
@@@ -547,8 -547,8 +547,8 @@@ between the address load and the data l
         { A == 1, B == 2, C = 3, P == &A, Q == &C }
         B = 4;
         <write barrier>
- -      ACCESS_ONCE(P) = &B
- -                            Q = ACCESS_ONCE(P);
+ +      WRITE_ONCE(P, &B);
+ +                            Q = READ_ONCE(P);
                               <data dependency barrier>
                               D = *Q;
   
@@@ -574,8 -574,8 +574,8 @@@ access
         { M[0] == 1, M[1] == 2, M[3] = 3, P == 0, Q == 3 }
         M[1] = 4;
         <write barrier>
- -      ACCESS_ONCE(P) = 1
- -                            Q = ACCESS_ONCE(P);
+ +      WRITE_ONCE(P, 1);
+ +                            Q = READ_ONCE(P);
                               <data dependency barrier>
                               D = M[Q];
   
@@@ -596,10 -596,10 +596,10 @@@ A load-load control dependency require
   simply a data dependency barrier to make it work correctly.  Consider the
   following bit of code:
   
- -      q = ACCESS_ONCE(a);
+ +      q = READ_ONCE(a);
         if (q) {
                 <data dependency barrier>  /* BUG: No data dependency!!! */
- -              p = ACCESS_ONCE(b);
+ +              p = READ_ONCE(b);
         }
   
   This will not have the desired effect because there is no actual data
@@@ -608,10 -608,10 +608,10 @@@ by attempting to predict the outcome i
   the load from b as having happened before the load from a.  In such a
   case what's actually required is:
   
- -      q = ACCESS_ONCE(a);
+ +      q = READ_ONCE(a);
         if (q) {
                 <read barrier>
- -              p = ACCESS_ONCE(b);
+ +              p = READ_ONCE(b);
         }
   
   However, stores are not speculated.  This means that ordering -is- provided
@@@ -619,7 -619,7 +619,7 @@@ for load-store control dependencies, a
   
         q = READ_ONCE_CTRL(a);
         if (q) {
- -              ACCESS_ONCE(b) = p;
+ +              WRITE_ONCE(b, p);
         }
   
   Control dependencies pair normally with other types of barriers.  That
@@@ -647,11 -647,11 +647,11 @@@ branches of the "if" statement as follo
         q = READ_ONCE_CTRL(a);
         if (q) {
                 barrier();
- -              ACCESS_ONCE(b) = p;
+ +              WRITE_ONCE(b, p);
                 do_something();
         } else {
                 barrier();
- -              ACCESS_ONCE(b) = p;
+ +              WRITE_ONCE(b, p);
                 do_something_else();
         }
   
@@@ -660,12 -660,12 +660,12 @@@ optimization levels
   
         q = READ_ONCE_CTRL(a);
         barrier();
- -      ACCESS_ONCE(b) = p;  /* BUG: No ordering vs. load from a!!! */
+ +      WRITE_ONCE(b, p);  /* BUG: No ordering vs. load from a!!! */
         if (q) {
- -              /* ACCESS_ONCE(b) = p; -- moved up, BUG!!! */
+ +              /* WRITE_ONCE(b, p); -- moved up, BUG!!! */
                 do_something();
         } else {
- -              /* ACCESS_ONCE(b) = p; -- moved up, BUG!!! */
+ +              /* WRITE_ONCE(b, p); -- moved up, BUG!!! */
                 do_something_else();
         }
   
@@@ -676,7 -676,7 +676,7 @@@ assembly code even after all compiler o
   Therefore, if you need ordering in this example, you need explicit
   memory barriers, for example, smp_store_release():
   
- -      q = ACCESS_ONCE(a);
+ +      q = READ_ONCE(a);
         if (q) {
                 smp_store_release(&b, p);
                 do_something();
@@@ -690,10 -690,10 +690,10 @@@ ordering is guaranteed only when the st
   
         q = READ_ONCE_CTRL(a);
         if (q) {
- -              ACCESS_ONCE(b) = p;
+ +              WRITE_ONCE(b, p);
                 do_something();
         } else {
- -              ACCESS_ONCE(b) = r;
+ +              WRITE_ONCE(b, r);
                 do_something_else();
         }
   
@@@ -706,10 -706,10 +706,10 @@@ the needed conditional.  For example
   
         q = READ_ONCE_CTRL(a);
         if (q % MAX) {
- -              ACCESS_ONCE(b) = p;
+ +              WRITE_ONCE(b, p);
                 do_something();
         } else {
- -              ACCESS_ONCE(b) = r;
+ +              WRITE_ONCE(b, r);
                 do_something_else();
         }
   
@@@ -718,7 -718,7 +718,7 @@@ equal to zero, in which case the compil
   transform the above code into the following:
   
         q = READ_ONCE_CTRL(a);
- -      ACCESS_ONCE(b) = p;
+ +      WRITE_ONCE(b, p);
         do_something_else();
   
   Given this transformation, the CPU is not required to respect the ordering
@@@ -731,10 -731,10 +731,10 @@@ one, perhaps as follows
         q = READ_ONCE_CTRL(a);
         BUILD_BUG_ON(MAX <= 1); /* Order load from a with store to b. */
         if (q % MAX) {
- -              ACCESS_ONCE(b) = p;
+ +              WRITE_ONCE(b, p);
                 do_something();
         } else {
- -              ACCESS_ONCE(b) = r;
+ +              WRITE_ONCE(b, r);
                 do_something_else();
         }
   
@@@ -746,18 -746,18 +746,18 @@@ You must also be careful not to rely to
   evaluation.  Consider this example:
   
         q = READ_ONCE_CTRL(a);
- -      if (a || 1 > 0)
- -              ACCESS_ONCE(b) = 1;
+ +      if (q || 1 > 0)
+ +              WRITE_ONCE(b, 1);
   
   Because the first condition cannot fault and the second condition is
   always true, the compiler can transform this example as following,
   defeating control dependency:
   
         q = READ_ONCE_CTRL(a);
- -      ACCESS_ONCE(b) = 1;
+ +      WRITE_ONCE(b, 1);
   
   This example underscores the need to ensure that the compiler cannot
- -out-guess your code.  More generally, although ACCESS_ONCE() does force
+ +out-guess your code.  More generally, although READ_ONCE() does force
   the compiler to actually emit code for a given load, it does not force
   the compiler to use the results.
   
@@@ -769,7 -769,7 +769,7 @@@ x and y both being zero
         =======================   =======================
         r1 = READ_ONCE_CTRL(x);   r2 = READ_ONCE_CTRL(y);
         if (r1 > 0)               if (r2 > 0)
- -        ACCESS_ONCE(y) = 1;       ACCESS_ONCE(x) = 1;
+ +        WRITE_ONCE(y, 1);         WRITE_ONCE(x, 1);
   
         assert(!(r1 == 1 && r2 == 1));
   
@@@ -779,7 -779,7 +779,7 @@@ then adding the following CPU would gua
   
         CPU 2
         =====================
- -      ACCESS_ONCE(x) = 2;
+ +      WRITE_ONCE(x, 2);
   
         assert(!(r1 == 2 && r2 == 1 && x == 2)); /* FAILS!!! */
   
@@@ -798,7 -798,8 +798,7 @@@ In summary
   
     (*) Control dependencies must be headed by READ_ONCE_CTRL().
         Or, as a much less preferable alternative, interpose
- -      be headed by READ_ONCE() or an ACCESS_ONCE() read and must
- -      have smp_read_barrier_depends() between this read and the
+ +      smp_read_barrier_depends() between a READ_ONCE() and the
         control-dependent write.
   
     (*) Control dependencies can order prior loads against later stores.
@@@ -814,16 -815,15 +814,16 @@@
   
     (*) Control dependencies require at least one run-time conditional
         between the prior load and the subsequent store, and this
- -      conditional must involve the prior load.  If the compiler
- -      is able to optimize the conditional away, it will have also
- -      optimized away the ordering.  Careful use of ACCESS_ONCE() can
- -      help to preserve the needed conditional.
+ +      conditional must involve the prior load.  If the compiler is able
+ +      to optimize the conditional away, it will have also optimized
+ +      away the ordering.  Careful use of READ_ONCE_CTRL() READ_ONCE(),
+ +      and WRITE_ONCE() can help to preserve the needed conditional.
   
     (*) Control dependencies require that the compiler avoid reordering the
- -      dependency into nonexistence.  Careful use of ACCESS_ONCE() or
- -      barrier() can help to preserve your control dependency.  Please
- -      see the Compiler Barrier section for more information.
+ +      dependency into nonexistence.  Careful use of READ_ONCE_CTRL()
+ +      or smp_read_barrier_depends() can help to preserve your control
+ +      dependency.  Please see the Compiler Barrier section for more
+ +      information.
   
     (*) Control dependencies pair normally with other types of barriers.
   
@@@ -848,11 -848,11 +848,11 @@@ barrier, an acquire barrier, a release 
   
         CPU 1                 CPU 2
         ===============       ===============
- -      ACCESS_ONCE(a) = 1;
+ +      WRITE_ONCE(a, 1);
         <write barrier>
- -      ACCESS_ONCE(b) = 2;   x = ACCESS_ONCE(b);
+ +      WRITE_ONCE(b, 2);     x = READ_ONCE(b);
                               <read barrier>
- -                            y = ACCESS_ONCE(a);
+ +                            y = READ_ONCE(a);
   
   Or:
   
@@@ -860,7 -860,7 +860,7 @@@
         ===============       ===============================
         a = 1;
         <write barrier>
- -      ACCESS_ONCE(b) = &a;  x = ACCESS_ONCE(b);
+ +      WRITE_ONCE(b, &a);    x = READ_ONCE(b);
                               <data dependency barrier>
                               y = *x;
   
@@@ -868,11 -868,11 +868,11 @@@ Or even
   
         CPU 1                 CPU 2
         ===============       ===============================
- -      r1 = ACCESS_ONCE(y);
+ +      r1 = READ_ONCE(y);
         <general barrier>
- -      ACCESS_ONCE(y) = 1;   if (r2 = ACCESS_ONCE(x)) {
+ +      WRITE_ONCE(y, 1);     if (r2 = READ_ONCE(x)) {
                                  <implicit control dependency>
- -                               ACCESS_ONCE(y) = 1;
+ +                               WRITE_ONCE(y, 1);
                               }
   
         assert(r1 == 0 || r2 == 0);
@@@ -886,11 -886,11 +886,11 @@@ versa
   
         CPU 1                               CPU 2
         ===================                 ===================
- -      ACCESS_ONCE(a) = 1;  }----   --->{  v = ACCESS_ONCE(c);
- -      ACCESS_ONCE(b) = 2;  }    \ /    {  w = ACCESS_ONCE(d);
+ +      WRITE_ONCE(a, 1);    }----   --->{  v = READ_ONCE(c);
+ +      WRITE_ONCE(b, 2);    }    \ /    {  w = READ_ONCE(d);
         <write barrier>            \        <read barrier>
- -      ACCESS_ONCE(c) = 3;  }    / \    {  x = ACCESS_ONCE(a);
- -      ACCESS_ONCE(d) = 4;  }----   --->{  y = ACCESS_ONCE(b);
+ +      WRITE_ONCE(c, 3);    }    / \    {  x = READ_ONCE(a);
+ +      WRITE_ONCE(d, 4);    }----   --->{  y = READ_ONCE(b);
   
   
   EXAMPLES OF MEMORY BARRIER SEQUENCES
@@@ -1340,10 -1340,10 +1340,10 @@@ compiler from moving the memory accesse
   
         barrier();
   
- -This is a general barrier -- there are no read-read or write-write variants
- -of barrier().  However, ACCESS_ONCE() can be thought of as a weak form
- -for barrier() that affects only the specific accesses flagged by the
- -ACCESS_ONCE().
+ +This is a general barrier -- there are no read-read or write-write
+ +variants of barrier().  However, READ_ONCE() and WRITE_ONCE() can be
+ +thought of as weak forms of barrier() that affect only the specific
+ +accesses flagged by the READ_ONCE() or WRITE_ONCE().
   
   The barrier() function has the following effects:
   
@@@ -1355,10 -1355,9 +1355,10 @@@
    (*) Within a loop, forces the compiler to load the variables used
        in that loop's conditional on each pass through that loop.
   
- -The ACCESS_ONCE() function can prevent any number of optimizations that,
- -while perfectly safe in single-threaded code, can be fatal in concurrent
- -code.  Here are some examples of these sorts of optimizations:
+ +The READ_ONCE() and WRITE_ONCE() functions can prevent any number of
+ +optimizations that, while perfectly safe in single-threaded code, can
+ +be fatal in concurrent code.  Here are some examples of these sorts
+ +of optimizations:
   
    (*) The compiler is within its rights to reorder loads and stores
        to the same variable, and in some cases, the CPU is within its
@@@ -1371,11 -1370,11 +1371,11 @@@
        Might result in an older value of x stored in a[1] than in a[0].
        Prevent both the compiler and the CPU from doing this as follows:
   
- -      a[0] = ACCESS_ONCE(x);
- -      a[1] = ACCESS_ONCE(x);
+ +      a[0] = READ_ONCE(x);
+ +      a[1] = READ_ONCE(x);
   
- -     In short, ACCESS_ONCE() provides cache coherence for accesses from
- -     multiple CPUs to a single variable.
+ +     In short, READ_ONCE() and WRITE_ONCE() provide cache coherence for
+ +     accesses from multiple CPUs to a single variable.
   
    (*) The compiler is within its rights to merge successive loads from
        the same variable.  Such merging can cause the compiler to "optimize"
@@@ -1392,9 -1391,9 +1392,9 @@@
                 for (;;)
                         do_something_with(tmp);
   
- -     Use ACCESS_ONCE() to prevent the compiler from doing this to you:
+ +     Use READ_ONCE() to prevent the compiler from doing this to you:
   
- -      while (tmp = ACCESS_ONCE(a))
+ +      while (tmp = READ_ONCE(a))
                 do_something_with(tmp);
   
    (*) The compiler is within its rights to reload a variable, for example,
@@@ -1416,9 -1415,9 +1416,9 @@@
        a was modified by some other CPU between the "while" statement and
        the call to do_something_with().
   
- -     Again, use ACCESS_ONCE() to prevent the compiler from doing this:
+ +     Again, use READ_ONCE() to prevent the compiler from doing this:
   
- -      while (tmp = ACCESS_ONCE(a))
+ +      while (tmp = READ_ONCE(a))
                 do_something_with(tmp);
   
        Note that if the compiler runs short of registers, it might save
@@@ -1438,21 -1437,21 +1438,21 @@@
   
         do { } while (0);
   
- -     This transformation is a win for single-threaded code because it gets
- -     rid of a load and a branch.  The problem is that the compiler will
- -     carry out its proof assuming that the current CPU is the only one
- -     updating variable 'a'.  If variable 'a' is shared, then the compiler's
- -     proof will be erroneous.  Use ACCESS_ONCE() to tell the compiler
- -     that it doesn't know as much as it thinks it does:
+ +     This transformation is a win for single-threaded code because it
+ +     gets rid of a load and a branch.  The problem is that the compiler
+ +     will carry out its proof assuming that the current CPU is the only
+ +     one updating variable 'a'.  If variable 'a' is shared, then the
+ +     compiler's proof will be erroneous.  Use READ_ONCE() to tell the
+ +     compiler that it doesn't know as much as it thinks it does:
   
- -      while (tmp = ACCESS_ONCE(a))
+ +      while (tmp = READ_ONCE(a))
                 do_something_with(tmp);
   
        But please note that the compiler is also closely watching what you
- -     do with the value after the ACCESS_ONCE().  For example, suppose you
+ +     do with the value after the READ_ONCE().  For example, suppose you
        do the following and MAX is a preprocessor macro with the value 1:
   
- -      while ((tmp = ACCESS_ONCE(a)) % MAX)
+ +      while ((tmp = READ_ONCE(a)) % MAX)
                 do_something_with(tmp);
   
        Then the compiler knows that the result of the "%" operator applied
@@@ -1476,12 -1475,12 +1476,12 @@@
        surprise if some other CPU might have stored to variable 'a' in the
        meantime.
   
- -     Use ACCESS_ONCE() to prevent the compiler from making this sort of
+ +     Use WRITE_ONCE() to prevent the compiler from making this sort of
        wrong guess:
   
- -      ACCESS_ONCE(a) = 0;
+ +      WRITE_ONCE(a, 0);
         /* Code that does not store to variable a. */
- -      ACCESS_ONCE(a) = 0;
+ +      WRITE_ONCE(a, 0);
   
    (*) The compiler is within its rights to reorder memory accesses unless
        you tell it not to.  For example, consider the following interaction
@@@ -1510,43 -1509,40 +1510,43 @@@
         }
   
        If the interrupt occurs between these two statement, then
- -     interrupt_handler() might be passed a garbled msg.  Use ACCESS_ONCE()
+ +     interrupt_handler() might be passed a garbled msg.  Use WRITE_ONCE()
        to prevent this as follows:
   
         void process_level(void)
         {
- -              ACCESS_ONCE(msg) = get_message();
- -              ACCESS_ONCE(flag) = true;
+ +              WRITE_ONCE(msg, get_message());
+ +              WRITE_ONCE(flag, true);
         }
   
         void interrupt_handler(void)
         {
- -              if (ACCESS_ONCE(flag))
- -                      process_message(ACCESS_ONCE(msg));
+ +              if (READ_ONCE(flag))
+ +                      process_message(READ_ONCE(msg));
         }
   
- -     Note that the ACCESS_ONCE() wrappers in interrupt_handler()
- -     are needed if this interrupt handler can itself be interrupted
- -     by something that also accesses 'flag' and 'msg', for example,
- -     a nested interrupt or an NMI.  Otherwise, ACCESS_ONCE() is not
- -     needed in interrupt_handler() other than for documentation purposes.
- -     (Note also that nested interrupts do not typically occur in modern
- -     Linux kernels, in fact, if an interrupt handler returns with
- -     interrupts enabled, you will get a WARN_ONCE() splat.)
- -
- -     You should assume that the compiler can move ACCESS_ONCE() past
- -     code not containing ACCESS_ONCE(), barrier(), or similar primitives.
- -
- -     This effect could also be achieved using barrier(), but ACCESS_ONCE()
- -     is more selective:  With ACCESS_ONCE(), the compiler need only forget
- -     the contents of the indicated memory locations, while with barrier()
- -     the compiler must discard the value of all memory locations that
- -     it has currented cached in any machine registers.  Of course,
- -     the compiler must also respect the order in which the ACCESS_ONCE()s
- -     occur, though the CPU of course need not do so.
+ +     Note that the READ_ONCE() and WRITE_ONCE() wrappers in
+ +     interrupt_handler() are needed if this interrupt handler can itself
+ +     be interrupted by something that also accesses 'flag' and 'msg',
+ +     for example, a nested interrupt or an NMI.  Otherwise, READ_ONCE()
+ +     and WRITE_ONCE() are not needed in interrupt_handler() other than
+ +     for documentation purposes.  (Note also that nested interrupts
+ +     do not typically occur in modern Linux kernels, in fact, if an
+ +     interrupt handler returns with interrupts enabled, you will get a
+ +     WARN_ONCE() splat.)
+ +
+ +     You should assume that the compiler can move READ_ONCE() and
+ +     WRITE_ONCE() past code not containing READ_ONCE(), WRITE_ONCE(),
+ +     barrier(), or similar primitives.
+ +
+ +     This effect could also be achieved using barrier(), but READ_ONCE()
+ +     and WRITE_ONCE() are more selective:  With READ_ONCE() and
+ +     WRITE_ONCE(), the compiler need only forget the contents of the
+ +     indicated memory locations, while with barrier() the compiler must
+ +     discard the value of all memory locations that it has currented
+ +     cached in any machine registers.  Of course, the compiler must also
+ +     respect the order in which the READ_ONCE()s and WRITE_ONCE()s occur,
+ +     though the CPU of course need not do so.
   
    (*) The compiler is within its rights to invent stores to a variable,
        as in the following example:
@@@ -1566,16 -1562,16 +1566,16 @@@
        a branch.  Unfortunately, in concurrent code, this optimization
        could cause some other CPU to see a spurious value of 42 -- even
        if variable 'a' was never zero -- when loading variable 'b'.
- -     Use ACCESS_ONCE() to prevent this as follows:
+ +     Use WRITE_ONCE() to prevent this as follows:
   
         if (a)
- -              ACCESS_ONCE(b) = a;
+ +              WRITE_ONCE(b, a);
         else
- -              ACCESS_ONCE(b) = 42;
+ +              WRITE_ONCE(b, 42);
   
        The compiler can also invent loads.  These are usually less
        damaging, but they can result in cache-line bouncing and thus in
- -     poor performance and scalability.  Use ACCESS_ONCE() to prevent
+ +     poor performance and scalability.  Use READ_ONCE() to prevent
        invented loads.
   
    (*) For aligned memory locations whose size allows them to be accessed
@@@ -1594,9 -1590,9 +1594,9 @@@
        This optimization can therefore be a win in single-threaded code.
        In fact, a recent bug (since fixed) caused GCC to incorrectly use
        this optimization in a volatile store.  In the absence of such bugs,
- -     use of ACCESS_ONCE() prevents store tearing in the following example:
+ +     use of WRITE_ONCE() prevents store tearing in the following example:
   
- -      ACCESS_ONCE(p) = 0x00010002;
+ +      WRITE_ONCE(p, 0x00010002);
   
        Use of packed structures can also result in load and store tearing,
        as in this example:
@@@ -1613,23 -1609,22 +1613,23 @@@
         foo2.b = foo1.b;
         foo2.c = foo1.c;
   
- -     Because there are no ACCESS_ONCE() wrappers and no volatile markings,
- -     the compiler would be well within its rights to implement these three
- -     assignment statements as a pair of 32-bit loads followed by a pair
- -     of 32-bit stores.  This would result in load tearing on 'foo1.b'
- -     and store tearing on 'foo2.b'.  ACCESS_ONCE() again prevents tearing
- -     in this example:
+ +     Because there are no READ_ONCE() or WRITE_ONCE() wrappers and no
+ +     volatile markings, the compiler would be well within its rights to
+ +     implement these three assignment statements as a pair of 32-bit
+ +     loads followed by a pair of 32-bit stores.  This would result in
+ +     load tearing on 'foo1.b' and store tearing on 'foo2.b'.  READ_ONCE()
+ +     and WRITE_ONCE() again prevent tearing in this example:
   
         foo2.a = foo1.a;
- -      ACCESS_ONCE(foo2.b) = ACCESS_ONCE(foo1.b);
+ +      WRITE_ONCE(foo2.b, READ_ONCE(foo1.b));
         foo2.c = foo1.c;
   
- -All that aside, it is never necessary to use ACCESS_ONCE() on a variable
- -that has been marked volatile.  For example, because 'jiffies' is marked
- -volatile, it is never necessary to say ACCESS_ONCE(jiffies).  The reason
- -for this is that ACCESS_ONCE() is implemented as a volatile cast, which
- -has no effect when its argument is already marked volatile.
+ +All that aside, it is never necessary to use READ_ONCE() and
+ +WRITE_ONCE() on a variable that has been marked volatile.  For example,
+ +because 'jiffies' is marked volatile, it is never necessary to
+ +say READ_ONCE(jiffies).  The reason for this is that READ_ONCE() and
+ +WRITE_ONCE() are implemented as volatile casts, which has no effect when
+ +its argument is already marked volatile.
   
   Please note that these compiler barriers have no direct effect on the CPU,
   which may then reorder things however it wishes.
@@@ -1651,15 -1646,14 +1651,15 @@@ The Linux kernel has eight basic CPU me
   All memory barriers except the data dependency barriers imply a compiler
   barrier. Data dependencies do not impose any additional compiler ordering.
   
- -Aside: In the case of data dependencies, the compiler would be expected to
- -issue the loads in the correct order (eg. `a[b]` would have to load the value
- -of b before loading a[b]), however there is no guarantee in the C specification
- -that the compiler may not speculate the value of b (eg. is equal to 1) and load
- -a before b (eg. tmp = a[1]; if (b != 1) tmp = a[b]; ). There is also the
- -problem of a compiler reloading b after having loaded a[b], thus having a newer
- -copy of b than a[b]. A consensus has not yet been reached about these problems,
- -however the ACCESS_ONCE macro is a good place to start looking.
+ +Aside: In the case of data dependencies, the compiler would be expected
+ +to issue the loads in the correct order (eg. `a[b]` would have to load
+ +the value of b before loading a[b]), however there is no guarantee in
+ +the C specification that the compiler may not speculate the value of b
+ +(eg. is equal to 1) and load a before b (eg. tmp = a[1]; if (b != 1)
+ +tmp = a[b]; ). There is also the problem of a compiler reloading b after
+ +having loaded a[b], thus having a newer copy of b than a[b]. A consensus
+ +has not yet been reached about these problems, however the READ_ONCE()
+ +macro is a good place to start looking.
   
   SMP memory barriers are reduced to compiler barriers on uniprocessor compiled
   systems because it is assumed that a CPU will appear to be self-consistent,
@@@ -1854,10 -1848,15 +1854,10 @@@ RELEASE are to the same lock variable, 
   another CPU not holding that lock.  In short, a ACQUIRE followed by an
   RELEASE may -not- be assumed to be a full memory barrier.
   
- -Similarly, the reverse case of a RELEASE followed by an ACQUIRE does not
- -imply a full memory barrier.  If it is necessary for a RELEASE-ACQUIRE
- -pair to produce a full barrier, the ACQUIRE can be followed by an
- -smp_mb__after_unlock_lock() invocation.  This will produce a full barrier
- -if either (a) the RELEASE and the ACQUIRE are executed by the same
- -CPU or task, or (b) the RELEASE and ACQUIRE act on the same variable.
- -The smp_mb__after_unlock_lock() primitive is free on many architectures.
- -Without smp_mb__after_unlock_lock(), the CPU's execution of the critical
- -sections corresponding to the RELEASE and the ACQUIRE can cross, so that:
+ +Similarly, the reverse case of a RELEASE followed by an ACQUIRE does
+ +not imply a full memory barrier.  Therefore, the CPU's execution of the
+ +critical sections corresponding to the RELEASE and the ACQUIRE can cross,
+ +so that:
   
         *A = a;
         RELEASE M
@@@ -1895,6 -1894,29 +1895,6 @@@ the RELEASE would simply complete, ther
         a sleep-unlock race, but the locking primitive needs to resolve
         such races properly in any case.
   
- -With smp_mb__after_unlock_lock(), the two critical sections cannot overlap.
- -For example, with the following code, the store to *A will always be
- -seen by other CPUs before the store to *B:
- -
- -      *A = a;
- -      RELEASE M
- -      ACQUIRE N
- -      smp_mb__after_unlock_lock();
- -      *B = b;
- -
- -The operations will always occur in one of the following orders:
- -
- -      STORE *A, RELEASE, ACQUIRE, smp_mb__after_unlock_lock(), STORE *B
- -      STORE *A, ACQUIRE, RELEASE, smp_mb__after_unlock_lock(), STORE *B
- -      ACQUIRE, STORE *A, RELEASE, smp_mb__after_unlock_lock(), STORE *B
- -
- -If the RELEASE and ACQUIRE were instead both operating on the same lock
- -variable, only the first of these alternatives can occur.  In addition,
- -the more strongly ordered systems may rule out some of the above orders.
- -But in any case, as noted earlier, the smp_mb__after_unlock_lock()
- -ensures that the store to *A will always be seen as happening before
- -the store to *B.
- -
   Locks and semaphores may not provide any guarantee of ordering on UP compiled
   systems, and so cannot be counted on in such a situation to actually achieve
   anything at all - especially with respect to I/O accesses - unless combined
@@@ -2104,12 -2126,12 +2104,12 @@@ three CPUs; then should the following s
   
         CPU 1                           CPU 2
         =============================== ===============================
- -      ACCESS_ONCE(*A) = a;            ACCESS_ONCE(*E) = e;
+ +      WRITE_ONCE(*A, a);              WRITE_ONCE(*E, e);
         ACQUIRE M                       ACQUIRE Q
- -      ACCESS_ONCE(*B) = b;            ACCESS_ONCE(*F) = f;
- -      ACCESS_ONCE(*C) = c;            ACCESS_ONCE(*G) = g;
+ +      WRITE_ONCE(*B, b);              WRITE_ONCE(*F, f);
+ +      WRITE_ONCE(*C, c);              WRITE_ONCE(*G, g);
         RELEASE M                       RELEASE Q
- -      ACCESS_ONCE(*D) = d;            ACCESS_ONCE(*H) = h;
+ +      WRITE_ONCE(*D, d);              WRITE_ONCE(*H, h);
   
   Then there is no guarantee as to what order CPU 3 will see the accesses to *A
   through *H occur in, other than the constraints imposed by the separate locks
@@@ -2125,6 -2147,40 +2125,6 @@@ But it won't see any of
         *E, *F or *G following RELEASE Q
   
   
- -However, if the following occurs:
- -
- -      CPU 1                           CPU 2
- -      =============================== ===============================
- -      ACCESS_ONCE(*A) = a;
- -      ACQUIRE M                    [1]
- -      ACCESS_ONCE(*B) = b;
- -      ACCESS_ONCE(*C) = c;
- -      RELEASE M            [1]
- -      ACCESS_ONCE(*D) = d;            ACCESS_ONCE(*E) = e;
- -                                      ACQUIRE M                    [2]
- -                                      smp_mb__after_unlock_lock();
- -                                      ACCESS_ONCE(*F) = f;
- -                                      ACCESS_ONCE(*G) = g;
- -                                      RELEASE M            [2]
- -                                      ACCESS_ONCE(*H) = h;
- -
- -CPU 3 might see:
- -
- -      *E, ACQUIRE M [1], *C, *B, *A, RELEASE M [1],
- -              ACQUIRE M [2], *H, *F, *G, RELEASE M [2], *D
- -
- -But assuming CPU 1 gets the lock first, CPU 3 won't see any of:
- -
- -      *B, *C, *D, *F, *G or *H preceding ACQUIRE M [1]
- -      *A, *B or *C following RELEASE M [1]
- -      *F, *G or *H preceding ACQUIRE M [2]
- -      *A, *B, *C, *E, *F or *G following RELEASE M [2]
- -
- -Note that the smp_mb__after_unlock_lock() is critically important
- -here: Without it CPU 3 might see some of the above orderings.
- -Without smp_mb__after_unlock_lock(), the accesses are not guaranteed
- -to be seen in order unless CPU 3 holds lock M.
- -
   
   ACQUIRES VS I/O ACCESSES
   ------------------------
@@@ -2327,9 -2383,7 +2327,7 @@@ about the state (old or new) implies a
   explicit lock operations, described later).  These include:
   
         xchg();
-       cmpxchg();
         atomic_xchg();                  atomic_long_xchg();
-       atomic_cmpxchg();               atomic_long_cmpxchg();
         atomic_inc_return();            atomic_long_inc_return();
         atomic_dec_return();            atomic_long_dec_return();
         atomic_add_return();            atomic_long_add_return();
@@@ -2342,7 -2396,9 +2340,9 @@@
         test_and_clear_bit();
         test_and_change_bit();
   
-       /* when succeeds (returns 1) */
+       /* when succeeds */
+       cmpxchg();
+       atomic_cmpxchg();               atomic_long_cmpxchg();
         atomic_add_unless();            atomic_long_add_unless();
   
   These are used for such things as implementing ACQUIRE-class and RELEASE-class
@@@ -2825,11 -2881,11 +2825,11 @@@ A programmer might take it for granted 
   operations in exactly the order specified, so that if the CPU is, for example,
   given the following piece of code to execute:
   
- -      a = ACCESS_ONCE(*A);
- -      ACCESS_ONCE(*B) = b;
- -      c = ACCESS_ONCE(*C);
- -      d = ACCESS_ONCE(*D);
- -      ACCESS_ONCE(*E) = e;
+ +      a = READ_ONCE(*A);
+ +      WRITE_ONCE(*B, b);
+ +      c = READ_ONCE(*C);
+ +      d = READ_ONCE(*D);
+ +      WRITE_ONCE(*E, e);
   
   they would then expect that the CPU will complete the memory operation for each
   instruction before moving on to the next one, leading to a definite sequence of
@@@ -2876,12 -2932,12 +2876,12 @@@ However, it is guaranteed that a CPU wi
   _own_ accesses appear to be correctly ordered, without the need for a memory
   barrier.  For instance with the following code:
   
- -      U = ACCESS_ONCE(*A);
- -      ACCESS_ONCE(*A) = V;
- -      ACCESS_ONCE(*A) = W;
- -      X = ACCESS_ONCE(*A);
- -      ACCESS_ONCE(*A) = Y;
- -      Z = ACCESS_ONCE(*A);
+ +      U = READ_ONCE(*A);
+ +      WRITE_ONCE(*A, V);
+ +      WRITE_ONCE(*A, W);
+ +      X = READ_ONCE(*A);
+ +      WRITE_ONCE(*A, Y);
+ +      Z = READ_ONCE(*A);
   
   and assuming no intervention by an external influence, it can be assumed that
   the final result will appear to be:
@@@ -2897,14 -2953,13 +2897,14 @@@ accesses
         U=LOAD *A, STORE *A=V, STORE *A=W, X=LOAD *A, STORE *A=Y, Z=LOAD *A
   
   in that order, but, without intervention, the sequence may have almost any
- -combination of elements combined or discarded, provided the program's view of
- -the world remains consistent.  Note that ACCESS_ONCE() is -not- optional
- -in the above example, as there are architectures where a given CPU might
- -reorder successive loads to the same location.  On such architectures,
- -ACCESS_ONCE() does whatever is necessary to prevent this, for example, on
- -Itanium the volatile casts used by ACCESS_ONCE() cause GCC to emit the
- -special ld.acq and st.rel instructions that prevent such reordering.
+ +combination of elements combined or discarded, provided the program's view
+ +of the world remains consistent.  Note that READ_ONCE() and WRITE_ONCE()
+ +are -not- optional in the above example, as there are architectures
+ +where a given CPU might reorder successive loads to the same location.
+ +On such architectures, READ_ONCE() and WRITE_ONCE() do whatever is
+ +necessary to prevent this, for example, on Itanium the volatile casts
+ +used by READ_ONCE() and WRITE_ONCE() cause GCC to emit the special ld.acq
+ +and st.rel instructions (respectively) that prevent such reordering.
   
   The compiler may also combine, discard or defer elements of the sequence before
   the CPU even sees them.
@@@ -2918,14 -2973,13 +2918,14 @@@ may be reduced to
   
         *A = W;
   
- -since, without either a write barrier or an ACCESS_ONCE(), it can be
+ +since, without either a write barrier or an WRITE_ONCE(), it can be
   assumed that the effect of the storage of V to *A is lost.  Similarly:
   
         *A = Y;
         Z = *A;
   
- -may, without a memory barrier or an ACCESS_ONCE(), be reduced to:
+ +may, without a memory barrier or an READ_ONCE() and WRITE_ONCE(), be
+ +reduced to:
   
         *A = Y;
         Z = Y;
diff --combined arch/arc/include/asm/atomic.h

index 87d18ae53115596f7b64a56a4a07a572d54c3cbd,d8a85e706fba332f82b84febf040ec46205b941f..c3ecda023e3a52a0aa0bd04cb5ba7b69c6f856a6
--- 1/arch/arc/include/asm/atomic.h
--- 2/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@@ -23,60 -23,33 +23,60 @@@
   
   #define atomic_set(v, i) (((v)->counter) = (i))
   
- -#ifdef CONFIG_ISA_ARCV2
- -#define PREFETCHW     "       prefetchw   [%1]        \n"
- -#else
- -#define PREFETCHW
+ +#ifdef CONFIG_ARC_STAR_9000923308
+ +
+ +#define SCOND_FAIL_RETRY_VAR_DEF                                              \
+ +      unsigned int delay = 1, tmp;                                            \
+ +
+ +#define SCOND_FAIL_RETRY_ASM                                                  \
+ +      "       bz      4f                      \n"                             \
+ +      "   ; --- scond fail delay ---          \n"                             \
+ +      "       mov     %[tmp], %[delay]        \n"     /* tmp = delay */       \
+ +      "2:     brne.d  %[tmp], 0, 2b           \n"     /* while (tmp != 0) */  \
+ +      "       sub     %[tmp], %[tmp], 1       \n"     /* tmp-- */             \
+ +      "       rol     %[delay], %[delay]      \n"     /* delay *= 2 */        \
+ +      "       b       1b                      \n"     /* start over */        \
+ +      "4: ; --- success ---                   \n"                             \
+ +
+ +#define SCOND_FAIL_RETRY_VARS                                                 \
+ +        ,[delay] "+&r" (delay),[tmp] "=&r"    (tmp)                           \
+ +
+ +#else /* !CONFIG_ARC_STAR_9000923308 */
+ +
+ +#define SCOND_FAIL_RETRY_VAR_DEF
+ +
+ +#define SCOND_FAIL_RETRY_ASM                                                  \
+ +      "       bnz     1b                      \n"                             \
+ +
+ +#define SCOND_FAIL_RETRY_VARS
+ +
   #endif
   
   #define ATOMIC_OP(op, c_op, asm_op)                                   \
   static inline void atomic_##op(int i, atomic_t *v)                    \
   {                                                                     \
- -      unsigned int temp;                                              \
+ +      unsigned int val;                                               \
+ +      SCOND_FAIL_RETRY_VAR_DEF                                        \
                                                                         \
         __asm__ __volatile__(                                           \
- -      "1:                             \n"                             \
- -      PREFETCHW                                                       \
- -      "       llock   %0, [%1]        \n"                             \
- -      "       " #asm_op " %0, %0, %2  \n"                             \
- -      "       scond   %0, [%1]        \n"                             \
- -      "       bnz     1b              \n"                             \
- -      : "=&r"(temp)   /* Early clobber, to prevent reg reuse */       \
- -      : "r"(&v->counter), "ir"(i)                                     \
+ +      "1:     llock   %[val], [%[ctr]]                \n"             \
+ +      "       " #asm_op " %[val], %[val], %[i]        \n"             \
+ +      "       scond   %[val], [%[ctr]]                \n"             \
+ +      "                                               \n"             \
+ +      SCOND_FAIL_RETRY_ASM                                            \
+ +                                                                      \
+ +      : [val] "=&r"   (val) /* Early clobber to prevent reg reuse */  \
+ +        SCOND_FAIL_RETRY_VARS                                         \
+ +      : [ctr] "r"     (&v->counter), /* Not "m": llock only supports reg direct addr mode */  \
+ +        [i]   "ir"    (i)                                             \
         : "cc");                                                        \
   }                                                                     \
   
   #define ATOMIC_OP_RETURN(op, c_op, asm_op)                            \
   static inline int atomic_##op##_return(int i, atomic_t *v)            \
   {                                                                     \
- -      unsigned int temp;                                              \
+ +      unsigned int val;                                               \
+ +      SCOND_FAIL_RETRY_VAR_DEF                                        \
                                                                         \
         /*                                                              \
          * Explicit full memory barrier needed before/after as          \
@@@ -85,21 -58,19 +85,21 @@@
         smp_mb();                                                       \
                                                                         \
         __asm__ __volatile__(                                           \
- -      "1:                             \n"                             \
- -      PREFETCHW                                                       \
- -      "       llock   %0, [%1]        \n"                             \
- -      "       " #asm_op " %0, %0, %2  \n"                             \
- -      "       scond   %0, [%1]        \n"                             \
- -      "       bnz     1b              \n"                             \
- -      : "=&r"(temp)                                                   \
- -      : "r"(&v->counter), "ir"(i)                                     \
+ +      "1:     llock   %[val], [%[ctr]]                \n"             \
+ +      "       " #asm_op " %[val], %[val], %[i]        \n"             \
+ +      "       scond   %[val], [%[ctr]]                \n"             \
+ +      "                                               \n"             \
+ +      SCOND_FAIL_RETRY_ASM                                            \
+ +                                                                      \
+ +      : [val] "=&r"   (val)                                           \
+ +        SCOND_FAIL_RETRY_VARS                                         \
+ +      : [ctr] "r"     (&v->counter),                                  \
+ +        [i]   "ir"    (i)                                             \
         : "cc");                                                        \
                                                                         \
         smp_mb();                                                       \
                                                                         \
- -      return temp;                                                    \
+ +      return val;                                                     \
   }
   
   #else /* !CONFIG_ARC_HAS_LLSC */
@@@ -172,16 -143,17 +172,20 @@@ static inline int atomic_##op##_return(
   
   ATOMIC_OPS(add, +=, add)
   ATOMIC_OPS(sub, -=, sub)
- ATOMIC_OP(and, &=, and)
   
- #define atomic_clear_mask(mask, v) atomic_and(~(mask), (v))
+ #define atomic_andnot atomic_andnot
+ 
+ ATOMIC_OP(and, &=, and)
+ ATOMIC_OP(andnot, &= ~, bic)
+ ATOMIC_OP(or, |=, or)
+ ATOMIC_OP(xor, ^=, xor)
   
   #undef ATOMIC_OPS
   #undef ATOMIC_OP_RETURN
   #undef ATOMIC_OP
+ +#undef SCOND_FAIL_RETRY_VAR_DEF
+ +#undef SCOND_FAIL_RETRY_ASM
+ +#undef SCOND_FAIL_RETRY_VARS
   
   /**
    * __atomic_add_unless - add unless the number is a given value
diff --combined arch/s390/kernel/jump_label.c

index c9dac2139f59efcfb101200e96bfd0f143f5e761,a83d2248fea9cf1fea3523ae79fdac97e089b8c4..083b05f5f5ab6f9b48d4028c2e8ec402e2e01af9
--- 1/arch/s390/kernel/jump_label.c
--- 2/arch/s390/kernel/jump_label.c
+++ b/arch/s390/kernel/jump_label.c
@@@ -44,9 -44,12 +44,9 @@@ static void jump_label_bug(struct jump_
         unsigned char *ipn = (unsigned char *)new;
   
         pr_emerg("Jump label code mismatch at %pS [%p]\n", ipc, ipc);
- -      pr_emerg("Found:    %02x %02x %02x %02x %02x %02x\n",
- -               ipc[0], ipc[1], ipc[2], ipc[3], ipc[4], ipc[5]);
- -      pr_emerg("Expected: %02x %02x %02x %02x %02x %02x\n",
- -               ipe[0], ipe[1], ipe[2], ipe[3], ipe[4], ipe[5]);
- -      pr_emerg("New:      %02x %02x %02x %02x %02x %02x\n",
- -               ipn[0], ipn[1], ipn[2], ipn[3], ipn[4], ipn[5]);
+ +      pr_emerg("Found:    %6ph\n", ipc);
+ +      pr_emerg("Expected: %6ph\n", ipe);
+ +      pr_emerg("New:      %6ph\n", ipn);
         panic("Corrupted kernel text");
   }
   
@@@ -61,7 -64,7 +61,7 @@@ static void __jump_label_transform(stru
   {
         struct insn old, new;
   
-       if (type == JUMP_LABEL_ENABLE) {
+       if (type == JUMP_LABEL_JMP) {
                 jump_label_make_nop(entry, &old);
                 jump_label_make_branch(entry, &new);
         } else {
diff --combined arch/s390/kernel/time.c

index 52524b9083c36aa869dfa01de51a20834f18893a,f5a0bd778ace1e2f9a07068f9816368fcc148402..017c3a9bfc280e2475bbeed6902ac1cf3ed8c569
--- 1/arch/s390/kernel/time.c
--- 2/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@@ -58,9 -58,6 +58,9 @@@ EXPORT_SYMBOL_GPL(sched_clock_base_cc)
   
   static DEFINE_PER_CPU(struct clock_event_device, comparators);
   
+ +ATOMIC_NOTIFIER_HEAD(s390_epoch_delta_notifier);
+ +EXPORT_SYMBOL(s390_epoch_delta_notifier);
+ +
   /*
    * Scheduler clock - returns current time in nanosec units.
    */
@@@ -120,6 -117,11 +120,6 @@@ static int s390_next_event(unsigned lon
         return 0;
   }
   
- -static void s390_set_mode(enum clock_event_mode mode,
- -                        struct clock_event_device *evt)
- -{
- -}
- -
   /*
    * Set up lowcore and control register of the current cpu to
    * enable TOD clock and clock comparator interrupts.
@@@ -143,6 -145,7 +143,6 @@@ void init_cpu_timer(void
         cd->rating              = 400;
         cd->cpumask             = cpumask_of(cpu);
         cd->set_next_event      = s390_next_event;
- -      cd->set_mode            = s390_set_mode;
   
         clockevents_register_device(cd);
   
@@@ -378,7 -381,7 +378,7 @@@ static void disable_sync_clock(void *du
          * increase the "sequence" counter to avoid the race of an
          * etr event and the complete recovery against get_sync_clock.
          */
-       atomic_clear_mask(0x80000000, sw_ptr);
+       atomic_andnot(0x80000000, sw_ptr);
         atomic_inc(sw_ptr);
   }
   
@@@ -389,7 -392,7 +389,7 @@@
   static void enable_sync_clock(void)
   {
         atomic_t *sw_ptr = this_cpu_ptr(&clock_sync_word);
-       atomic_set_mask(0x80000000, sw_ptr);
+       atomic_or(0x80000000, sw_ptr);
   }
   
   /*
@@@ -749,7 -752,7 +749,7 @@@ static void clock_sync_cpu(struct clock
   static int etr_sync_clock(void *data)
   {
         static int first;
- -      unsigned long long clock, old_clock, delay, delta;
+ +      unsigned long long clock, old_clock, clock_delta, delay, delta;
         struct clock_sync_data *etr_sync;
         struct etr_aib *sync_port, *aib;
         int port;
@@@ -786,9 -789,6 +786,9 @@@
                 delay = (unsigned long long)
                         (aib->edf2.etv - sync_port->edf2.etv) << 32;
                 delta = adjust_time(old_clock, clock, delay);
+ +              clock_delta = clock - old_clock;
+ +              atomic_notifier_call_chain(&s390_epoch_delta_notifier, 0,
+ +                                         &clock_delta);
                 etr_sync->fixup_cc = delta;
                 fixup_clock_comparator(delta);
                 /* Verify that the clock is properly set. */
@@@ -1526,7 -1526,7 +1526,7 @@@ void stp_island_check(void
   static int stp_sync_clock(void *data)
   {
         static int first;
- -      unsigned long long old_clock, delta;
+ +      unsigned long long old_clock, delta, new_clock, clock_delta;
         struct clock_sync_data *stp_sync;
         int rc;
   
@@@ -1551,11 -1551,7 +1551,11 @@@
                 old_clock = get_tod_clock();
                 rc = chsc_sstpc(stp_page, STP_OP_SYNC, 0);
                 if (rc == 0) {
- -                      delta = adjust_time(old_clock, get_tod_clock(), 0);
+ +                      new_clock = get_tod_clock();
+ +                      delta = adjust_time(old_clock, new_clock, 0);
+ +                      clock_delta = new_clock - old_clock;
+ +                      atomic_notifier_call_chain(&s390_epoch_delta_notifier,
+ +                                                 0, &clock_delta);
                         fixup_clock_comparator(delta);
                         rc = chsc_sstpi(stp_page, &stp_info,
                                         sizeof(struct stp_sstpi));
diff --combined arch/s390/kvm/interrupt.c

index b277d50dcf76a409072832059438e2f10ca3bc63,57309e9cdd8072d726c15b7869b48bd06e7ba852..5c2c169395c3ca61fe9e906baab078cf2966dba2
--- 1/arch/s390/kvm/interrupt.c
--- 2/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@@ -30,6 -30,7 +30,6 @@@
   #define IOINT_SCHID_MASK 0x0000ffff
   #define IOINT_SSID_MASK 0x00030000
   #define IOINT_CSSID_MASK 0x03fc0000
- -#define IOINT_AI_MASK 0x04000000
   #define PFAULT_INIT 0x0600
   #define PFAULT_DONE 0x0680
   #define VIRTIO_PARAM 0x0d00
@@@ -71,13 -72,9 +71,13 @@@ static int ckc_interrupts_enabled(struc
   
   static int ckc_irq_pending(struct kvm_vcpu *vcpu)
   {
+ +      preempt_disable();
         if (!(vcpu->arch.sie_block->ckc <
- -            get_tod_clock_fast() + vcpu->arch.sie_block->epoch))
+ +            get_tod_clock_fast() + vcpu->arch.sie_block->epoch)) {
+ +              preempt_enable();
                 return 0;
+ +      }
+ +      preempt_enable();
         return ckc_interrupts_enabled(vcpu);
   }
   
@@@ -173,20 -170,20 +173,20 @@@ static unsigned long deliverable_irqs(s
   
   static void __set_cpu_idle(struct kvm_vcpu *vcpu)
   {
-       atomic_set_mask(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
+       atomic_or(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
         set_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask);
   }
   
   static void __unset_cpu_idle(struct kvm_vcpu *vcpu)
   {
-       atomic_clear_mask(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
+       atomic_andnot(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
         clear_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask);
   }
   
   static void __reset_intercept_indicators(struct kvm_vcpu *vcpu)
   {
-       atomic_clear_mask(CPUSTAT_IO_INT | CPUSTAT_EXT_INT | CPUSTAT_STOP_INT,
-                         &vcpu->arch.sie_block->cpuflags);
+       atomic_andnot(CPUSTAT_IO_INT | CPUSTAT_EXT_INT | CPUSTAT_STOP_INT,
+                   &vcpu->arch.sie_block->cpuflags);
         vcpu->arch.sie_block->lctl = 0x0000;
         vcpu->arch.sie_block->ictl &= ~(ICTL_LPSW | ICTL_STCTL | ICTL_PINT);
   
@@@ -199,7 -196,7 +199,7 @@@
   
   static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag)
   {
-       atomic_set_mask(flag, &vcpu->arch.sie_block->cpuflags);
+       atomic_or(flag, &vcpu->arch.sie_block->cpuflags);
   }
   
   static void set_intercept_indicators_io(struct kvm_vcpu *vcpu)
@@@ -314,8 -311,8 +314,8 @@@ static int __must_check __deliver_pfaul
         li->irq.ext.ext_params2 = 0;
         spin_unlock(&li->lock);
   
- -      VCPU_EVENT(vcpu, 4, "interrupt: pfault init parm:%x,parm64:%llx",
- -                 0, ext.ext_params2);
+ +      VCPU_EVENT(vcpu, 4, "deliver: pfault init token 0x%llx",
+ +                 ext.ext_params2);
         trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
                                          KVM_S390_INT_PFAULT_INIT,
                                          0, ext.ext_params2);
@@@ -371,7 -368,7 +371,7 @@@ static int __must_check __deliver_machi
         spin_unlock(&fi->lock);
   
         if (deliver) {
- -              VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx",
+ +              VCPU_EVENT(vcpu, 3, "deliver: machine check mcic 0x%llx",
                            mchk.mcic);
                 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
                                                  KVM_S390_MCHK,
@@@ -406,7 -403,7 +406,7 @@@ static int __must_check __deliver_resta
         struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
         int rc;
   
- -      VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu restart");
+ +      VCPU_EVENT(vcpu, 3, "%s", "deliver: cpu restart");
         vcpu->stat.deliver_restart_signal++;
         trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_RESTART, 0, 0);
   
@@@ -430,6 -427,7 +430,6 @@@ static int __must_check __deliver_set_p
         clear_bit(IRQ_PEND_SET_PREFIX, &li->pending_irqs);
         spin_unlock(&li->lock);
   
- -      VCPU_EVENT(vcpu, 4, "interrupt: set prefix to %x", prefix.address);
         vcpu->stat.deliver_prefix_signal++;
         trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
                                          KVM_S390_SIGP_SET_PREFIX,
@@@ -452,7 -450,7 +452,7 @@@ static int __must_check __deliver_emerg
                 clear_bit(IRQ_PEND_EXT_EMERGENCY, &li->pending_irqs);
         spin_unlock(&li->lock);
   
- -      VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp emerg");
+ +      VCPU_EVENT(vcpu, 4, "%s", "deliver: sigp emerg");
         vcpu->stat.deliver_emergency_signal++;
         trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY,
                                          cpu_addr, 0);
@@@ -479,7 -477,7 +479,7 @@@ static int __must_check __deliver_exter
         clear_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs);
         spin_unlock(&li->lock);
   
- -      VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp ext call");
+ +      VCPU_EVENT(vcpu, 4, "%s", "deliver: sigp ext call");
         vcpu->stat.deliver_external_call++;
         trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
                                          KVM_S390_INT_EXTERNAL_CALL,
@@@ -508,7 -506,7 +508,7 @@@ static int __must_check __deliver_prog(
         memset(&li->irq.pgm, 0, sizeof(pgm_info));
         spin_unlock(&li->lock);
   
- -      VCPU_EVENT(vcpu, 4, "interrupt: pgm check code:%x, ilc:%x",
+ +      VCPU_EVENT(vcpu, 3, "deliver: program irq code 0x%x, ilc:%d",
                    pgm_info.code, ilc);
         vcpu->stat.deliver_program_int++;
         trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
@@@ -624,7 -622,7 +624,7 @@@ static int __must_check __deliver_servi
         clear_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs);
         spin_unlock(&fi->lock);
   
- -      VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x",
+ +      VCPU_EVENT(vcpu, 4, "deliver: sclp parameter 0x%x",
                    ext.ext_params);
         vcpu->stat.deliver_service_signal++;
         trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_SERVICE,
@@@ -653,6 -651,9 +653,6 @@@ static int __must_check __deliver_pfaul
                                         struct kvm_s390_interrupt_info,
                                         list);
         if (inti) {
- -              trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
- -                              KVM_S390_INT_PFAULT_DONE, 0,
- -                              inti->ext.ext_params2);
                 list_del(&inti->list);
                 fi->counters[FIRQ_CNTR_PFAULT] -= 1;
         }
@@@ -661,12 -662,6 +661,12 @@@
         spin_unlock(&fi->lock);
   
         if (inti) {
+ +              trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+ +                                               KVM_S390_INT_PFAULT_DONE, 0,
+ +                                               inti->ext.ext_params2);
+ +              VCPU_EVENT(vcpu, 4, "deliver: pfault done token 0x%llx",
+ +                         inti->ext.ext_params2);
+ +
                 rc  = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE,
                                 (u16 *)__LC_EXT_INT_CODE);
                 rc |= put_guest_lc(vcpu, PFAULT_DONE,
@@@ -696,7 -691,7 +696,7 @@@ static int __must_check __deliver_virti
                                         list);
         if (inti) {
                 VCPU_EVENT(vcpu, 4,
- -                         "interrupt: virtio parm:%x,parm64:%llx",
+ +                         "deliver: virtio parm: 0x%x,parm64: 0x%llx",
                            inti->ext.ext_params, inti->ext.ext_params2);
                 vcpu->stat.deliver_virtio_interrupt++;
                 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
@@@ -746,7 -741,7 +746,7 @@@ static int __must_check __deliver_io(st
                                         struct kvm_s390_interrupt_info,
                                         list);
         if (inti) {
- -              VCPU_EVENT(vcpu, 4, "interrupt: I/O %llx", inti->type);
+ +              VCPU_EVENT(vcpu, 4, "deliver: I/O 0x%llx", inti->type);
                 vcpu->stat.deliver_io_int++;
                 trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
                                 inti->type,
@@@ -860,9 -855,7 +860,9 @@@ int kvm_s390_handle_wait(struct kvm_vcp
                 goto no_timer;
         }
   
+ +      preempt_disable();
         now = get_tod_clock_fast() + vcpu->arch.sie_block->epoch;
+ +      preempt_enable();
         sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now);
   
         /* underflow */
@@@ -871,7 -864,7 +871,7 @@@
   
         __set_cpu_idle(vcpu);
         hrtimer_start(&vcpu->arch.ckc_timer, ktime_set (0, sltime) , HRTIMER_MODE_REL);
- -      VCPU_EVENT(vcpu, 5, "enabled wait via clock comparator: %llx ns", sltime);
+ +      VCPU_EVENT(vcpu, 4, "enabled wait via clock comparator: %llu ns", sltime);
   no_timer:
         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
         kvm_vcpu_block(vcpu);
@@@ -901,9 -894,7 +901,9 @@@ enum hrtimer_restart kvm_s390_idle_wake
         u64 now, sltime;
   
         vcpu = container_of(timer, struct kvm_vcpu, arch.ckc_timer);
+ +      preempt_disable();
         now = get_tod_clock_fast() + vcpu->arch.sie_block->epoch;
+ +      preempt_enable();
         sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now);
   
         /*
@@@ -928,7 -919,7 +928,7 @@@ void kvm_s390_clear_local_irqs(struct k
         spin_unlock(&li->lock);
   
         /* clear pending external calls set by sigp interpretation facility */
-       atomic_clear_mask(CPUSTAT_ECALL_PEND, li->cpuflags);
+       atomic_andnot(CPUSTAT_ECALL_PEND, li->cpuflags);
         vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl = 0;
   }
   
@@@ -977,10 -968,6 +977,10 @@@ static int __inject_prog(struct kvm_vcp
   {
         struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
   
+ +      VCPU_EVENT(vcpu, 3, "inject: program irq code 0x%x", irq->u.pgm.code);
+ +      trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
+ +                                 irq->u.pgm.code, 0);
+ +
         li->irq.pgm = irq->u.pgm;
         set_bit(IRQ_PEND_PROG, &li->pending_irqs);
         return 0;
@@@ -991,6 -978,9 +991,6 @@@ int kvm_s390_inject_program_int(struct 
         struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
         struct kvm_s390_irq irq;
   
- -      VCPU_EVENT(vcpu, 3, "inject: program check %d (from kernel)", code);
- -      trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT, code,
- -                                 0, 1);
         spin_lock(&li->lock);
         irq.u.pgm.code = code;
         __inject_prog(vcpu, &irq);
@@@ -1006,6 -996,10 +1006,6 @@@ int kvm_s390_inject_prog_irq(struct kvm
         struct kvm_s390_irq irq;
         int rc;
   
- -      VCPU_EVENT(vcpu, 3, "inject: prog irq %d (from kernel)",
- -                 pgm_info->code);
- -      trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
- -                                 pgm_info->code, 0, 1);
         spin_lock(&li->lock);
         irq.u.pgm = *pgm_info;
         rc = __inject_prog(vcpu, &irq);
@@@ -1018,15 -1012,15 +1018,15 @@@ static int __inject_pfault_init(struct 
   {
         struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
   
- -      VCPU_EVENT(vcpu, 3, "inject: external irq params:%x, params2:%llx",
- -                 irq->u.ext.ext_params, irq->u.ext.ext_params2);
+ +      VCPU_EVENT(vcpu, 4, "inject: pfault init parameter block at 0x%llx",
+ +                 irq->u.ext.ext_params2);
         trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_PFAULT_INIT,
                                    irq->u.ext.ext_params,
- -                                 irq->u.ext.ext_params2, 2);
+ +                                 irq->u.ext.ext_params2);
   
         li->irq.ext = irq->u.ext;
         set_bit(IRQ_PEND_PFAULT_INIT, &li->pending_irqs);
-       atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+       atomic_or(CPUSTAT_EXT_INT, li->cpuflags);
         return 0;
   }
   
@@@ -1041,7 -1035,7 +1041,7 @@@ static int __inject_extcall_sigpif(stru
                 /* another external call is pending */
                 return -EBUSY;
         }
-       atomic_set_mask(CPUSTAT_ECALL_PEND, &vcpu->arch.sie_block->cpuflags);
+       atomic_or(CPUSTAT_ECALL_PEND, &vcpu->arch.sie_block->cpuflags);
         return 0;
   }
   
@@@ -1051,10 -1045,10 +1051,10 @@@ static int __inject_extcall(struct kvm_
         struct kvm_s390_extcall_info *extcall = &li->irq.extcall;
         uint16_t src_id = irq->u.extcall.code;
   
- -      VCPU_EVENT(vcpu, 3, "inject: external call source-cpu:%u",
+ +      VCPU_EVENT(vcpu, 4, "inject: external call source-cpu:%u",
                    src_id);
         trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EXTERNAL_CALL,
- -                                 src_id, 0, 2);
+ +                                 src_id, 0);
   
         /* sending vcpu invalid */
         if (src_id >= KVM_MAX_VCPUS ||
@@@ -1067,7 -1061,7 +1067,7 @@@
         if (test_and_set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs))
                 return -EBUSY;
         *extcall = irq->u.extcall;
-       atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+       atomic_or(CPUSTAT_EXT_INT, li->cpuflags);
         return 0;
   }
   
@@@ -1076,10 -1070,10 +1076,10 @@@ static int __inject_set_prefix(struct k
         struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
         struct kvm_s390_prefix_info *prefix = &li->irq.prefix;
   
- -      VCPU_EVENT(vcpu, 3, "inject: set prefix to %x (from user)",
+ +      VCPU_EVENT(vcpu, 3, "inject: set prefix to %x",
                    irq->u.prefix.address);
         trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_SET_PREFIX,
- -                                 irq->u.prefix.address, 0, 2);
+ +                                 irq->u.prefix.address, 0);
   
         if (!is_vcpu_stopped(vcpu))
                 return -EBUSY;
@@@ -1096,7 -1090,7 +1096,7 @@@ static int __inject_sigp_stop(struct kv
         struct kvm_s390_stop_info *stop = &li->irq.stop;
         int rc = 0;
   
- -      trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_STOP, 0, 0, 2);
+ +      trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_STOP, 0, 0);
   
         if (irq->u.stop.flags & ~KVM_S390_STOP_SUPP_FLAGS)
                 return -EINVAL;
@@@ -1120,8 -1114,8 +1120,8 @@@ static int __inject_sigp_restart(struc
   {
         struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
   
- -      VCPU_EVENT(vcpu, 3, "inject: restart type %llx", irq->type);
- -      trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_RESTART, 0, 0, 2);
+ +      VCPU_EVENT(vcpu, 3, "%s", "inject: restart int");
+ +      trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_RESTART, 0, 0);
   
         set_bit(IRQ_PEND_RESTART, &li->pending_irqs);
         return 0;
@@@ -1132,14 -1126,14 +1132,14 @@@ static int __inject_sigp_emergency(stru
   {
         struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
   
- -      VCPU_EVENT(vcpu, 3, "inject: emergency %u\n",
+ +      VCPU_EVENT(vcpu, 4, "inject: emergency from cpu %u",
                    irq->u.emerg.code);
         trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY,
- -                                 irq->u.emerg.code, 0, 2);
+ +                                 irq->u.emerg.code, 0);
   
         set_bit(irq->u.emerg.code, li->sigp_emerg_pending);
         set_bit(IRQ_PEND_EXT_EMERGENCY, &li->pending_irqs);
-       atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+       atomic_or(CPUSTAT_EXT_INT, li->cpuflags);
         return 0;
   }
   
@@@ -1148,10 -1142,10 +1148,10 @@@ static int __inject_mchk(struct kvm_vcp
         struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
         struct kvm_s390_mchk_info *mchk = &li->irq.mchk;
   
- -      VCPU_EVENT(vcpu, 5, "inject: machine check parm64:%llx",
+ +      VCPU_EVENT(vcpu, 3, "inject: machine check mcic 0x%llx",
                    irq->u.mchk.mcic);
         trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_MCHK, 0,
- -                                 irq->u.mchk.mcic, 2);
+ +                                 irq->u.mchk.mcic);
   
         /*
          * Because repressible machine checks can be indicated along with
@@@ -1178,12 -1172,12 +1178,12 @@@ static int __inject_ckc(struct kvm_vcp
   {
         struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
   
- -      VCPU_EVENT(vcpu, 3, "inject: type %x", KVM_S390_INT_CLOCK_COMP);
+ +      VCPU_EVENT(vcpu, 3, "%s", "inject: clock comparator external");
         trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_CLOCK_COMP,
- -                                 0, 0, 2);
+ +                                 0, 0);
   
         set_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
-       atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+       atomic_or(CPUSTAT_EXT_INT, li->cpuflags);
         return 0;
   }
   
@@@ -1191,12 -1185,12 +1191,12 @@@ static int __inject_cpu_timer(struct kv
   {
         struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
   
- -      VCPU_EVENT(vcpu, 3, "inject: type %x", KVM_S390_INT_CPU_TIMER);
+ +      VCPU_EVENT(vcpu, 3, "%s", "inject: cpu timer external");
         trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_CPU_TIMER,
- -                                 0, 0, 2);
+ +                                 0, 0);
   
         set_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs);
-       atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+       atomic_or(CPUSTAT_EXT_INT, li->cpuflags);
         return 0;
   }
   
@@@ -1375,13 -1369,13 +1375,13 @@@ static void __floating_irq_kick(struct 
         spin_lock(&li->lock);
         switch (type) {
         case KVM_S390_MCHK:
-               atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags);
+               atomic_or(CPUSTAT_STOP_INT, li->cpuflags);
                 break;
         case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
-               atomic_set_mask(CPUSTAT_IO_INT, li->cpuflags);
+               atomic_or(CPUSTAT_IO_INT, li->cpuflags);
                 break;
         default:
-               atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+               atomic_or(CPUSTAT_EXT_INT, li->cpuflags);
                 break;
         }
         spin_unlock(&li->lock);
@@@ -1441,20 -1435,20 +1441,20 @@@ int kvm_s390_inject_vm(struct kvm *kvm
                 inti->ext.ext_params2 = s390int->parm64;
                 break;
         case KVM_S390_INT_SERVICE:
- -              VM_EVENT(kvm, 5, "inject: sclp parm:%x", s390int->parm);
+ +              VM_EVENT(kvm, 4, "inject: sclp parm:%x", s390int->parm);
                 inti->ext.ext_params = s390int->parm;
                 break;
         case KVM_S390_INT_PFAULT_DONE:
                 inti->ext.ext_params2 = s390int->parm64;
                 break;
         case KVM_S390_MCHK:
- -              VM_EVENT(kvm, 5, "inject: machine check parm64:%llx",
+ +              VM_EVENT(kvm, 3, "inject: machine check mcic 0x%llx",
                          s390int->parm64);
                 inti->mchk.cr14 = s390int->parm; /* upper bits are not used */
                 inti->mchk.mcic = s390int->parm64;
                 break;
         case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
- -              if (inti->type & IOINT_AI_MASK)
+ +              if (inti->type & KVM_S390_INT_IO_AI_MASK)
                         VM_EVENT(kvm, 5, "%s", "inject: I/O (AI)");
                 else
                         VM_EVENT(kvm, 5, "inject: I/O css %x ss %x schid %04x",
@@@ -1541,6 -1535,8 +1541,6 @@@ static int do_inject_vcpu(struct kvm_vc
   
         switch (irq->type) {
         case KVM_S390_PROGRAM_INT:
- -              VCPU_EVENT(vcpu, 3, "inject: program check %d (from user)",
- -                         irq->u.pgm.code);
                 rc = __inject_prog(vcpu, irq);
                 break;
         case KVM_S390_SIGP_SET_PREFIX:
diff --combined arch/s390/kvm/kvm-s390.c

index 98df53c013439836773e39f21396157eca28bdca,b73302fb05079f63a7fdadd190267b6677728270..c91eb941b444ee7cad8c5a9ea2523495e71e8f2d
--- 1/arch/s390/kvm/kvm-s390.c
--- 2/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@@ -28,7 -28,6 +28,7 @@@
   #include <linux/vmalloc.h>
   #include <asm/asm-offsets.h>
   #include <asm/lowcore.h>
+ +#include <asm/etr.h>
   #include <asm/pgtable.h>
   #include <asm/nmi.h>
   #include <asm/switch_to.h>
@@@ -109,9 -108,6 +109,9 @@@ struct kvm_stats_debugfs_item debugfs_e
         { "diagnose_10", VCPU_STAT(diagnose_10) },
         { "diagnose_44", VCPU_STAT(diagnose_44) },
         { "diagnose_9c", VCPU_STAT(diagnose_9c) },
+ +      { "diagnose_258", VCPU_STAT(diagnose_258) },
+ +      { "diagnose_308", VCPU_STAT(diagnose_308) },
+ +      { "diagnose_500", VCPU_STAT(diagnose_500) },
         { NULL }
   };
   
@@@ -128,7 -124,6 +128,7 @@@ unsigned long kvm_s390_fac_list_mask_si
   }
   
   static struct gmap_notifier gmap_notifier;
+ +debug_info_t *kvm_s390_dbf;
   
   /* Section: not file related */
   int kvm_arch_hardware_enable(void)
@@@ -139,69 -134,24 +139,69 @@@
   
   static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
   
+ +/*
+ + * This callback is executed during stop_machine(). All CPUs are therefore
+ + * temporarily stopped. In order not to change guest behavior, we have to
+ + * disable preemption whenever we touch the epoch of kvm and the VCPUs,
+ + * so a CPU won't be stopped while calculating with the epoch.
+ + */
+ +static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
+ +                        void *v)
+ +{
+ +      struct kvm *kvm;
+ +      struct kvm_vcpu *vcpu;
+ +      int i;
+ +      unsigned long long *delta = v;
+ +
+ +      list_for_each_entry(kvm, &vm_list, vm_list) {
+ +              kvm->arch.epoch -= *delta;
+ +              kvm_for_each_vcpu(i, vcpu, kvm) {
+ +                      vcpu->arch.sie_block->epoch -= *delta;
+ +              }
+ +      }
+ +      return NOTIFY_OK;
+ +}
+ +
+ +static struct notifier_block kvm_clock_notifier = {
+ +      .notifier_call = kvm_clock_sync,
+ +};
+ +
   int kvm_arch_hardware_setup(void)
   {
         gmap_notifier.notifier_call = kvm_gmap_notifier;
         gmap_register_ipte_notifier(&gmap_notifier);
+ +      atomic_notifier_chain_register(&s390_epoch_delta_notifier,
+ +                                     &kvm_clock_notifier);
         return 0;
   }
   
   void kvm_arch_hardware_unsetup(void)
   {
         gmap_unregister_ipte_notifier(&gmap_notifier);
+ +      atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
+ +                                       &kvm_clock_notifier);
   }
   
   int kvm_arch_init(void *opaque)
   {
+ +      kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
+ +      if (!kvm_s390_dbf)
+ +              return -ENOMEM;
+ +
+ +      if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
+ +              debug_unregister(kvm_s390_dbf);
+ +              return -ENOMEM;
+ +      }
+ +
         /* Register floating interrupt controller interface. */
         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
   }
   
+ +void kvm_arch_exit(void)
+ +{
+ +      debug_unregister(kvm_s390_dbf);
+ +}
+ +
   /* Section: device related */
   long kvm_arch_dev_ioctl(struct file *filp,
                         unsigned int ioctl, unsigned long arg)
@@@ -331,12 -281,10 +331,12 @@@ static int kvm_vm_ioctl_enable_cap(stru
   
         switch (cap->cap) {
         case KVM_CAP_S390_IRQCHIP:
+ +              VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
                 kvm->arch.use_irqchip = 1;
                 r = 0;
                 break;
         case KVM_CAP_S390_USER_SIGP:
+ +              VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
                 kvm->arch.user_sigp = 1;
                 r = 0;
                 break;
@@@ -347,11 -295,8 +347,11 @@@
                         r = 0;
                 } else
                         r = -EINVAL;
+ +              VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
+ +                       r ? "(not available)" : "(success)");
                 break;
         case KVM_CAP_S390_USER_STSI:
+ +              VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
                 kvm->arch.user_stsi = 1;
                 r = 0;
                 break;
@@@ -369,8 -314,6 +369,8 @@@ static int kvm_s390_get_mem_control(str
         switch (attr->attr) {
         case KVM_S390_VM_MEM_LIMIT_SIZE:
                 ret = 0;
+ +              VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
+ +                       kvm->arch.gmap->asce_end);
                 if (put_user(kvm->arch.gmap->asce_end, (u64 __user *)attr->addr))
                         ret = -EFAULT;
                 break;
@@@ -387,13 -330,7 +387,13 @@@ static int kvm_s390_set_mem_control(str
         unsigned int idx;
         switch (attr->attr) {
         case KVM_S390_VM_MEM_ENABLE_CMMA:
+ +              /* enable CMMA only for z10 and later (EDAT_1) */
+ +              ret = -EINVAL;
+ +              if (!MACHINE_IS_LPAR || !MACHINE_HAS_EDAT1)
+ +                      break;
+ +
                 ret = -EBUSY;
+ +              VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
                 mutex_lock(&kvm->lock);
                 if (atomic_read(&kvm->online_vcpus) == 0) {
                         kvm->arch.use_cmma = 1;
@@@ -402,11 -339,6 +402,11 @@@
                 mutex_unlock(&kvm->lock);
                 break;
         case KVM_S390_VM_MEM_CLR_CMMA:
+ +              ret = -EINVAL;
+ +              if (!kvm->arch.use_cmma)
+ +                      break;
+ +
+ +              VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
                 mutex_lock(&kvm->lock);
                 idx = srcu_read_lock(&kvm->srcu);
                 s390_reset_cmma(kvm->arch.gmap->mm);
@@@ -442,7 -374,6 +442,7 @@@
                         }
                 }
                 mutex_unlock(&kvm->lock);
+ +              VM_EVENT(kvm, 3, "SET: max guest memory: %lu bytes", new_limit);
                 break;
         }
         default:
@@@ -469,26 -400,22 +469,26 @@@ static int kvm_s390_vm_set_crypto(struc
                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
                 kvm->arch.crypto.aes_kw = 1;
+ +              VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
                 break;
         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
                 get_random_bytes(
                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
                 kvm->arch.crypto.dea_kw = 1;
+ +              VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
                 break;
         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
                 kvm->arch.crypto.aes_kw = 0;
                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
+ +              VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
                 break;
         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
                 kvm->arch.crypto.dea_kw = 0;
                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
+ +              VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
                 break;
         default:
                 mutex_unlock(&kvm->lock);
@@@ -513,7 -440,6 +513,7 @@@ static int kvm_s390_set_tod_high(struc
   
         if (gtod_high != 0)
                 return -EINVAL;
+ +      VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x\n", gtod_high);
   
         return 0;
   }
@@@ -533,15 -459,12 +533,15 @@@ static int kvm_s390_set_tod_low(struct 
                 return r;
   
         mutex_lock(&kvm->lock);
+ +      preempt_disable();
         kvm->arch.epoch = gtod - host_tod;
         kvm_s390_vcpu_block_all(kvm);
         kvm_for_each_vcpu(vcpu_idx, cur_vcpu, kvm)
                 cur_vcpu->arch.sie_block->epoch = kvm->arch.epoch;
         kvm_s390_vcpu_unblock_all(kvm);
+ +      preempt_enable();
         mutex_unlock(&kvm->lock);
+ +      VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx\n", gtod);
         return 0;
   }
   
@@@ -573,7 -496,6 +573,7 @@@ static int kvm_s390_get_tod_high(struc
         if (copy_to_user((void __user *)attr->addr, &gtod_high,
                                          sizeof(gtod_high)))
                 return -EFAULT;
+ +      VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x\n", gtod_high);
   
         return 0;
   }
@@@ -587,12 -509,9 +587,12 @@@ static int kvm_s390_get_tod_low(struct 
         if (r)
                 return r;
   
+ +      preempt_disable();
         gtod = host_tod + kvm->arch.epoch;
+ +      preempt_enable();
         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
                 return -EFAULT;
+ +      VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx\n", gtod);
   
         return 0;
   }
@@@ -902,9 -821,7 +902,9 @@@ static long kvm_s390_set_skeys(struct k
         }
   
         /* Enable storage key handling for the guest */
- -      s390_enable_skey();
+ +      r = s390_enable_skey();
+ +      if (r)
+ +              goto out;
   
         for (i = 0; i < args->count; i++) {
                 hva = gfn_to_hva(kvm, args->start_gfn + i);
@@@ -962,7 -879,8 +962,7 @@@ long kvm_arch_vm_ioctl(struct file *fil
                 if (kvm->arch.use_irqchip) {
                         /* Set up dummy routing. */
                         memset(&routing, 0, sizeof(routing));
- -                      kvm_set_irq_routing(kvm, &routing, 0, 0);
- -                      r = 0;
+ +                      r = kvm_set_irq_routing(kvm, &routing, 0, 0);
                 }
                 break;
         }
@@@ -1125,7 -1043,7 +1125,7 @@@ int kvm_arch_init_vm(struct kvm *kvm, u
   
         sprintf(debug_name, "kvm-%u", current->pid);
   
- -      kvm->arch.dbf = debug_register(debug_name, 8, 2, 8 * sizeof(long));
+ +      kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
         if (!kvm->arch.dbf)
                 goto out_err;
   
@@@ -1168,7 -1086,7 +1168,7 @@@
         mutex_init(&kvm->arch.ipte_mutex);
   
         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
- -      VM_EVENT(kvm, 3, "%s", "vm created");
+ +      VM_EVENT(kvm, 3, "vm created with type %lu", type);
   
         if (type & KVM_VM_S390_UCONTROL) {
                 kvm->arch.gmap = NULL;
@@@ -1185,7 -1103,6 +1185,7 @@@
         kvm->arch.epoch = 0;
   
         spin_lock_init(&kvm->arch.start_stop_lock);
+ +      KVM_EVENT(3, "vm 0x%p created by pid %u", kvm, current->pid);
   
         return 0;
   out_err:
@@@ -1193,7 -1110,6 +1193,7 @@@
         free_page((unsigned long)kvm->arch.model.fac);
         debug_unregister(kvm->arch.dbf);
         free_page((unsigned long)(kvm->arch.sca));
+ +      KVM_EVENT(3, "creation of vm failed: %d", rc);
         return rc;
   }
   
@@@ -1215,7 -1131,7 +1215,7 @@@ void kvm_arch_vcpu_destroy(struct kvm_v
         if (kvm_is_ucontrol(vcpu->kvm))
                 gmap_free(vcpu->arch.gmap);
   
- -      if (kvm_s390_cmma_enabled(vcpu->kvm))
+ +      if (vcpu->kvm->arch.use_cmma)
                 kvm_s390_vcpu_unsetup_cmma(vcpu);
         free_page((unsigned long)(vcpu->arch.sie_block));
   
@@@ -1250,7 -1166,6 +1250,7 @@@ void kvm_arch_destroy_vm(struct kvm *kv
                 gmap_free(kvm->arch.gmap);
         kvm_s390_destroy_adapters(kvm);
         kvm_s390_clear_float_irqs(kvm);
+ +      KVM_EVENT(3, "vm 0x%p destroyed", kvm);
   }
   
   /* Section: vcpu related */
@@@ -1283,79 -1198,43 +1283,79 @@@ int kvm_arch_vcpu_init(struct kvm_vcpu 
         return 0;
   }
   
+ +/*
+ + * Backs up the current FP/VX register save area on a particular
+ + * destination.  Used to switch between different register save
+ + * areas.
+ + */
+ +static inline void save_fpu_to(struct fpu *dst)
+ +{
+ +      dst->fpc = current->thread.fpu.fpc;
+ +      dst->flags = current->thread.fpu.flags;
+ +      dst->regs = current->thread.fpu.regs;
+ +}
+ +
+ +/*
+ + * Switches the FP/VX register save area from which to lazy
+ + * restore register contents.
+ + */
+ +static inline void load_fpu_from(struct fpu *from)
+ +{
+ +      current->thread.fpu.fpc = from->fpc;
+ +      current->thread.fpu.flags = from->flags;
+ +      current->thread.fpu.regs = from->regs;
+ +}
+ +
   void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
   {
- -      save_fp_ctl(&vcpu->arch.host_fpregs.fpc);
- -      if (test_kvm_facility(vcpu->kvm, 129))
- -              save_vx_regs((__vector128 *)&vcpu->arch.host_vregs->vrs);
- -      else
- -              save_fp_regs(vcpu->arch.host_fpregs.fprs);
- -      save_access_regs(vcpu->arch.host_acrs);
+ +      /* Save host register state */
+ +      save_fpu_regs();
+ +      save_fpu_to(&vcpu->arch.host_fpregs);
+ +
         if (test_kvm_facility(vcpu->kvm, 129)) {
- -              restore_fp_ctl(&vcpu->run->s.regs.fpc);
- -              restore_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs);
- -      } else {
- -              restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
- -              restore_fp_regs(vcpu->arch.guest_fpregs.fprs);
- -      }
+ +              current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
+ +              current->thread.fpu.flags = FPU_USE_VX;
+ +              /*
+ +               * Use the register save area in the SIE-control block
+ +               * for register restore and save in kvm_arch_vcpu_put()
+ +               */
+ +              current->thread.fpu.vxrs =
+ +                      (__vector128 *)&vcpu->run->s.regs.vrs;
+ +              /* Always enable the vector extension for KVM */
+ +              __ctl_set_vx();
+ +      } else
+ +              load_fpu_from(&vcpu->arch.guest_fpregs);
+ +
+ +      if (test_fp_ctl(current->thread.fpu.fpc))
+ +              /* User space provided an invalid FPC, let's clear it */
+ +              current->thread.fpu.fpc = 0;
+ +
+ +      save_access_regs(vcpu->arch.host_acrs);
         restore_access_regs(vcpu->run->s.regs.acrs);
         gmap_enable(vcpu->arch.gmap);
-       atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
+       atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
   }
   
   void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
   {
-       atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
+       atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
         gmap_disable(vcpu->arch.gmap);
- -      if (test_kvm_facility(vcpu->kvm, 129)) {
- -              save_fp_ctl(&vcpu->run->s.regs.fpc);
- -              save_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs);
- -      } else {
- -              save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
- -              save_fp_regs(vcpu->arch.guest_fpregs.fprs);
- -      }
- -      save_access_regs(vcpu->run->s.regs.acrs);
- -      restore_fp_ctl(&vcpu->arch.host_fpregs.fpc);
+ +
+ +      save_fpu_regs();
+ +
         if (test_kvm_facility(vcpu->kvm, 129))
- -              restore_vx_regs((__vector128 *)&vcpu->arch.host_vregs->vrs);
+ +              /*
+ +               * kvm_arch_vcpu_load() set up the register save area to
+ +               * the &vcpu->run->s.regs.vrs and, thus, the vector registers
+ +               * are already saved.  Only the floating-point control must be
+ +               * copied.
+ +               */
+ +              vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
         else
- -              restore_fp_regs(vcpu->arch.host_fpregs.fprs);
+ +              save_fpu_to(&vcpu->arch.guest_fpregs);
+ +      load_fpu_from(&vcpu->arch.host_fpregs);
+ +
+ +      save_access_regs(vcpu->run->s.regs.acrs);
         restore_access_regs(vcpu->arch.host_acrs);
   }
   
@@@ -1385,9 -1264,7 +1385,9 @@@ static void kvm_s390_vcpu_initial_reset
   void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
   {
         mutex_lock(&vcpu->kvm->lock);
+ +      preempt_disable();
         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
+ +      preempt_enable();
         mutex_unlock(&vcpu->kvm->lock);
         if (!kvm_is_ucontrol(vcpu->kvm))
                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
@@@ -1443,9 -1320,9 +1443,9 @@@ int kvm_arch_vcpu_setup(struct kvm_vcp
                                                     CPUSTAT_STOPPED);
   
         if (test_kvm_facility(vcpu->kvm, 78))
-               atomic_set_mask(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
+               atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
         else if (test_kvm_facility(vcpu->kvm, 8))
-               atomic_set_mask(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
+               atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
   
         kvm_s390_vcpu_setup_model(vcpu);
   
@@@ -1465,7 -1342,7 +1465,7 @@@
         }
         vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
   
- -      if (kvm_s390_cmma_enabled(vcpu->kvm)) {
+ +      if (vcpu->kvm->arch.use_cmma) {
                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
                 if (rc)
                         return rc;
@@@ -1500,6 -1377,7 +1500,6 @@@ struct kvm_vcpu *kvm_arch_vcpu_create(s
   
         vcpu->arch.sie_block = &sie_page->sie_block;
         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
- -      vcpu->arch.host_vregs = &sie_page->vregs;
   
         vcpu->arch.sie_block->icpua = id;
         if (!kvm_is_ucontrol(kvm)) {
@@@ -1521,19 -1399,6 +1521,19 @@@
         vcpu->arch.local_int.wq = &vcpu->wq;
         vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
   
+ +      /*
+ +       * Allocate a save area for floating-point registers.  If the vector
+ +       * extension is available, register contents are saved in the SIE
+ +       * control block.  The allocated save area is still required in
+ +       * particular places, for example, in kvm_s390_vcpu_store_status().
+ +       */
+ +      vcpu->arch.guest_fpregs.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS,
+ +                                             GFP_KERNEL);
+ +      if (!vcpu->arch.guest_fpregs.fprs) {
+ +              rc = -ENOMEM;
+ +              goto out_free_sie_block;
+ +      }
+ +
         rc = kvm_vcpu_init(vcpu, kvm, id);
         if (rc)
                 goto out_free_sie_block;
@@@ -1557,24 -1422,24 +1557,24 @@@ int kvm_arch_vcpu_runnable(struct kvm_v
   
   void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
   {
-       atomic_set_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
+       atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
         exit_sie(vcpu);
   }
   
   void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
   {
-       atomic_clear_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
+       atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
   }
   
   static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
   {
-       atomic_set_mask(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
+       atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
         exit_sie(vcpu);
   }
   
   static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
   {
-       atomic_clear_mask(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
+       atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
   }
   
   /*
@@@ -1583,7 -1448,7 +1583,7 @@@
    * return immediately. */
   void exit_sie(struct kvm_vcpu *vcpu)
   {
-       atomic_set_mask(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
+       atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
                 cpu_relax();
   }
@@@ -1756,16 -1621,16 +1756,16 @@@ int kvm_arch_vcpu_ioctl_set_fpu(struct 
   {
         if (test_fp_ctl(fpu->fpc))
                 return -EINVAL;
- -      memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
+ +      memcpy(vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
         vcpu->arch.guest_fpregs.fpc = fpu->fpc;
- -      restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
- -      restore_fp_regs(vcpu->arch.guest_fpregs.fprs);
+ +      save_fpu_regs();
+ +      load_fpu_from(&vcpu->arch.guest_fpregs);
         return 0;
   }
   
   int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
   {
- -      memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
+ +      memcpy(&fpu->fprs, vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
         fpu->fpc = vcpu->arch.guest_fpregs.fpc;
         return 0;
   }
@@@ -1807,19 -1672,19 +1807,19 @@@ int kvm_arch_vcpu_ioctl_set_guest_debug
         if (dbg->control & KVM_GUESTDBG_ENABLE) {
                 vcpu->guest_debug = dbg->control;
                 /* enforce guest PER */
-               atomic_set_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
+               atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
   
                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
                         rc = kvm_s390_import_bp_data(vcpu, dbg);
         } else {
-               atomic_clear_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
+               atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
                 vcpu->arch.guestdbg.last_bp = 0;
         }
   
         if (rc) {
                 vcpu->guest_debug = 0;
                 kvm_s390_clear_bp_data(vcpu);
-               atomic_clear_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
+               atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
         }
   
         return rc;
@@@ -1858,6 -1723,18 +1858,6 @@@ int kvm_arch_vcpu_ioctl_set_mpstate(str
         return rc;
   }
   
- -bool kvm_s390_cmma_enabled(struct kvm *kvm)
- -{
- -      if (!MACHINE_IS_LPAR)
- -              return false;
- -      /* only enable for z10 and later */
- -      if (!MACHINE_HAS_EDAT1)
- -              return false;
- -      if (!kvm->arch.use_cmma)
- -              return false;
- -      return true;
- -}
- -
   static bool ibs_enabled(struct kvm_vcpu *vcpu)
   {
         return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
@@@ -1865,10 -1742,10 +1865,10 @@@
   
   static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
   {
- -      if (!vcpu->requests)
- -              return 0;
   retry:
         kvm_s390_vcpu_request_handled(vcpu);
+ +      if (!vcpu->requests)
+ +              return 0;
         /*
          * We use MMU_RELOAD just to re-arm the ipte notifier for the
          * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
@@@ -1894,7 -1771,7 +1894,7 @@@
         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
                 if (!ibs_enabled(vcpu)) {
                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
-                       atomic_set_mask(CPUSTAT_IBS,
+                       atomic_or(CPUSTAT_IBS,
                                         &vcpu->arch.sie_block->cpuflags);
                 }
                 goto retry;
@@@ -1903,7 -1780,7 +1903,7 @@@
         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
                 if (ibs_enabled(vcpu)) {
                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
-                       atomic_clear_mask(CPUSTAT_IBS,
+                       atomic_andnot(CPUSTAT_IBS,
                                           &vcpu->arch.sie_block->cpuflags);
                 }
                 goto retry;
@@@ -2316,21 -2193,8 +2316,21 @@@ int kvm_s390_vcpu_store_status(struct k
          * copying in vcpu load/put. Lets update our copies before we save
          * it into the save area
          */
- -      save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
- -      save_fp_regs(vcpu->arch.guest_fpregs.fprs);
+ +      save_fpu_regs();
+ +      if (test_kvm_facility(vcpu->kvm, 129)) {
+ +              /*
+ +               * If the vector extension is available, the vector registers
+ +               * which overlaps with floating-point registers are saved in
+ +               * the SIE-control block.  Hence, extract the floating-point
+ +               * registers and the FPC value and store them in the
+ +               * guest_fpregs structure.
+ +               */
+ +              WARN_ON(!is_vx_task(current));    /* XXX remove later */
+ +              vcpu->arch.guest_fpregs.fpc = current->thread.fpu.fpc;
+ +              convert_vx_to_fp(vcpu->arch.guest_fpregs.fprs,
+ +                               current->thread.fpu.vxrs);
+ +      } else
+ +              save_fpu_to(&vcpu->arch.guest_fpregs);
         save_access_regs(vcpu->run->s.regs.acrs);
   
         return kvm_s390_store_status_unloaded(vcpu, addr);
@@@ -2357,13 -2221,10 +2357,13 @@@ int kvm_s390_vcpu_store_adtl_status(str
   
         /*
          * The guest VXRS are in the host VXRs due to the lazy
- -       * copying in vcpu load/put. Let's update our copies before we save
- -       * it into the save area.
+ +       * copying in vcpu load/put. We can simply call save_fpu_regs()
+ +       * to save the current register state because we are in the
+ +       * middle of a load/put cycle.
+ +       *
+ +       * Let's update our copies before we save it into the save area.
          */
- -      save_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs);
+ +      save_fpu_regs();
   
         return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
   }
@@@ -2419,7 -2280,7 +2419,7 @@@ void kvm_s390_vcpu_start(struct kvm_vcp
                 __disable_ibs_on_all_vcpus(vcpu->kvm);
         }
   
-       atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
+       atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
         /*
          * Another VCPU might have used IBS while we were offline.
          * Let's play safe and flush the VCPU at startup.
@@@ -2445,7 -2306,7 +2445,7 @@@ void kvm_s390_vcpu_stop(struct kvm_vcp
         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
         kvm_s390_clear_stop_irq(vcpu);
   
-       atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
+       atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
         __disable_ibs_on_vcpu(vcpu);
   
         for (i = 0; i < online_vcpus; i++) {
@@@ -2479,7 -2340,6 +2479,7 @@@ static int kvm_vcpu_ioctl_enable_cap(st
         case KVM_CAP_S390_CSS_SUPPORT:
                 if (!vcpu->kvm->arch.css_support) {
                         vcpu->kvm->arch.css_support = 1;
+ +                      VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
                         trace_kvm_s390_enable_css(vcpu->kvm);
                 }
                 r = 0;
diff --combined arch/s390/lib/uaccess.c

index 0d002a746bec157b9bd38a5006564bba26bb8b04,93cb1d09493dd68729ffdb3fa12be34ab1cd776b..ae4de559e3a04288c6be111de684b3355f35109b
--- 1/arch/s390/lib/uaccess.c
--- 2/arch/s390/lib/uaccess.c
+++ b/arch/s390/lib/uaccess.c
@@@ -15,7 -15,7 +15,7 @@@
   #include <asm/mmu_context.h>
   #include <asm/facility.h>
   
- static struct static_key have_mvcos = STATIC_KEY_INIT_FALSE;
+ static DEFINE_STATIC_KEY_FALSE(have_mvcos);
   
   static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr,
                                                  unsigned long size)
@@@ -104,7 -104,7 +104,7 @@@ static inline unsigned long copy_from_u
   
   unsigned long __copy_from_user(void *to, const void __user *from, unsigned long n)
   {
-       if (static_key_false(&have_mvcos))
+       if (static_branch_likely(&have_mvcos))
                 return copy_from_user_mvcos(to, from, n);
         return copy_from_user_mvcp(to, from, n);
   }
@@@ -177,7 -177,7 +177,7 @@@ static inline unsigned long copy_to_use
   
   unsigned long __copy_to_user(void __user *to, const void *from, unsigned long n)
   {
-       if (static_key_false(&have_mvcos))
+       if (static_branch_likely(&have_mvcos))
                 return copy_to_user_mvcos(to, from, n);
         return copy_to_user_mvcs(to, from, n);
   }
@@@ -240,7 -240,7 +240,7 @@@ static inline unsigned long copy_in_use
   
   unsigned long __copy_in_user(void __user *to, const void __user *from, unsigned long n)
   {
-       if (static_key_false(&have_mvcos))
+       if (static_branch_likely(&have_mvcos))
                 return copy_in_user_mvcos(to, from, n);
         return copy_in_user_mvc(to, from, n);
   }
@@@ -312,7 -312,7 +312,7 @@@ static inline unsigned long clear_user_
   
   unsigned long __clear_user(void __user *to, unsigned long size)
   {
-       if (static_key_false(&have_mvcos))
+       if (static_branch_likely(&have_mvcos))
                         return clear_user_mvcos(to, size);
         return clear_user_xc(to, size);
   }
@@@ -370,10 -370,23 +370,10 @@@ long __strncpy_from_user(char *dst, con
   }
   EXPORT_SYMBOL(__strncpy_from_user);
   
- -/*
- - * The "old" uaccess variant without mvcos can be enforced with the
- - * uaccess_primary kernel parameter. This is mainly for debugging purposes.
- - */
- -static int uaccess_primary __initdata;
- -
- -static int __init parse_uaccess_pt(char *__unused)
- -{
- -      uaccess_primary = 1;
- -      return 0;
- -}
- -early_param("uaccess_primary", parse_uaccess_pt);
- -
   static int __init uaccess_init(void)
   {
- -      if (!uaccess_primary && test_facility(27))
+ +      if (test_facility(27))
-               static_key_slow_inc(&have_mvcos);
+               static_branch_enable(&have_mvcos);
         return 0;
   }
   early_initcall(uaccess_init);
diff --combined arch/sparc/lib/ksyms.c

index 8069ce12f20b13d514160cec8db0c0d88b64b27e,bb600599726830a0e8465f8ab901147b52e2daec..8eb454cfe05c9f17a3d084306b085f6dde095ee3
--- 1/arch/sparc/lib/ksyms.c
--- 2/arch/sparc/lib/ksyms.c
+++ b/arch/sparc/lib/ksyms.c
@@@ -111,6 -111,9 +111,9 @@@ EXPORT_SYMBOL(atomic64_##op##_return)
   
   ATOMIC_OPS(add)
   ATOMIC_OPS(sub)
+ ATOMIC_OP(and)
+ ATOMIC_OP(or)
+ ATOMIC_OP(xor)
   
   #undef ATOMIC_OPS
   #undef ATOMIC_OP_RETURN
@@@ -135,6 -138,10 +138,6 @@@ EXPORT_SYMBOL(copy_user_page)
   void VISenter(void);
   EXPORT_SYMBOL(VISenter);
   
- -/* CRYPTO code needs this */
- -void VISenterhalf(void);
- -EXPORT_SYMBOL(VISenterhalf);
- -
   extern void xor_vis_2(unsigned long, unsigned long *, unsigned long *);
   extern void xor_vis_3(unsigned long, unsigned long *, unsigned long *,
                 unsigned long *);
diff --combined arch/x86/kernel/tsc.c

index 79055cf2c497e8219ed9956eeb732b9234495593,b9cfd462f7e7b530458bde0c3595a8d97d862e67..c8d52cb4cb6e8b9ee9d81cfc9c0fa3603284ce0e
--- 1/arch/x86/kernel/tsc.c
--- 2/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@@ -38,7 -38,7 +38,7 @@@ static int __read_mostly tsc_unstable
      erroneous rdtsc usage on !cpu_has_tsc processors */
   static int __read_mostly tsc_disabled = -1;
   
- static struct static_key __use_tsc = STATIC_KEY_INIT;
+ static DEFINE_STATIC_KEY_FALSE(__use_tsc);
   
   int tsc_clocksource_reliable;
   
@@@ -274,7 -274,12 +274,12 @@@ done
    */
   u64 native_sched_clock(void)
   {
-       u64 tsc_now;
+       if (static_branch_likely(&__use_tsc)) {
+               u64 tsc_now = rdtsc();
+ 
+               /* return the value in ns */
+               return cycles_2_ns(tsc_now);
+       }
   
         /*
          * Fall back to jiffies if there's no TSC available:
@@@ -284,26 -289,11 +289,19 @@@
          *   very important for it to be as fast as the platform
          *   can achieve it. )
          */
-       if (!static_key_false(&__use_tsc)) {
-               /* No locking but a rare wrong value is not a big deal: */
-               return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ);
-       }
- 
-       /* read the Time Stamp Counter: */
-       tsc_now = rdtsc();
   
-       /* return the value in ns */
-       return cycles_2_ns(tsc_now);
+       /* No locking but a rare wrong value is not a big deal: */
+       return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ);
   }
   
+ +/*
+ + * Generate a sched_clock if you already have a TSC value.
+ + */
+ +u64 native_sched_clock_from_tsc(u64 tsc)
+ +{
+ +      return cycles_2_ns(tsc);
+ +}
+ +
   /* We need to define a real function for sched_clock, to override the
      weak default version */
   #ifdef CONFIG_PARAVIRT
@@@ -1212,7 -1202,7 +1210,7 @@@ void __init tsc_init(void
         /* now allow native_sched_clock() to use rdtsc */
   
         tsc_disabled = 0;
-       static_key_slow_inc(&__use_tsc);
+       static_branch_enable(&__use_tsc);
   
         if (!no_sched_irq_time)
                 enable_sched_clock_irqtime();
diff --combined arch/xtensa/include/asm/atomic.h

index ebcd1f6fc8cb9f64c64f2fa332fb573fd32ef8d7,e0be67936990277cad0e580cdf9714871893fb45..93795d04730387c5207a54807ecb6fe0fc571c50
--- 1/arch/xtensa/include/asm/atomic.h
--- 2/arch/xtensa/include/asm/atomic.h
+++ b/arch/xtensa/include/asm/atomic.h
@@@ -29,7 -29,7 +29,7 @@@
    *
    * Locking interrupts looks like this:
    *
- - *    rsil a15, LOCKLEVEL
+ + *    rsil a15, TOPLEVEL
    *    <code>
    *    wsr  a15, PS
    *    rsync
@@@ -106,7 -106,7 +106,7 @@@ static inline void atomic_##op(int i, a
         unsigned int vval;                                              \
                                                                         \
         __asm__ __volatile__(                                           \
- -                      "       rsil    a15, "__stringify(LOCKLEVEL)"\n"\
+ +                      "       rsil    a15, "__stringify(TOPLEVEL)"\n"\
                         "       l32i    %0, %2, 0\n"                    \
                         "       " #op " %0, %0, %1\n"                   \
                         "       s32i    %0, %2, 0\n"                    \
@@@ -124,7 -124,7 +124,7 @@@ static inline int atomic_##op##_return(
         unsigned int vval;                                              \
                                                                         \
         __asm__ __volatile__(                                           \
- -                      "       rsil    a15,"__stringify(LOCKLEVEL)"\n" \
+ +                      "       rsil    a15,"__stringify(TOPLEVEL)"\n"  \
                         "       l32i    %0, %2, 0\n"                    \
                         "       " #op " %0, %0, %1\n"                   \
                         "       s32i    %0, %2, 0\n"                    \
@@@ -145,6 -145,10 +145,10 @@@
   ATOMIC_OPS(add)
   ATOMIC_OPS(sub)
   
+ ATOMIC_OP(and)
+ ATOMIC_OP(or)
+ ATOMIC_OP(xor)
+ 
   #undef ATOMIC_OPS
   #undef ATOMIC_OP_RETURN
   #undef ATOMIC_OP
@@@ -250,75 -254,6 +254,6 @@@ static __inline__ int __atomic_add_unle
         return c;
   }
   
- 
- static inline void atomic_clear_mask(unsigned int mask, atomic_t *v)
- {
- #if XCHAL_HAVE_S32C1I
-       unsigned long tmp;
-       int result;
- 
-       __asm__ __volatile__(
-                       "1:     l32i    %1, %3, 0\n"
-                       "       wsr     %1, scompare1\n"
-                       "       and     %0, %1, %2\n"
-                       "       s32c1i  %0, %3, 0\n"
-                       "       bne     %0, %1, 1b\n"
-                       : "=&a" (result), "=&a" (tmp)
-                       : "a" (~mask), "a" (v)
-                       : "memory"
-                       );
- #else
-       unsigned int all_f = -1;
-       unsigned int vval;
- 
-       __asm__ __volatile__(
-                       "       rsil    a15,"__stringify(TOPLEVEL)"\n"
-                       "       l32i    %0, %2, 0\n"
-                       "       xor     %1, %4, %3\n"
-                       "       and     %0, %0, %4\n"
-                       "       s32i    %0, %2, 0\n"
-                       "       wsr     a15, ps\n"
-                       "       rsync\n"
-                       : "=&a" (vval), "=a" (mask)
-                       : "a" (v), "a" (all_f), "1" (mask)
-                       : "a15", "memory"
-                       );
- #endif
- }
- 
- static inline void atomic_set_mask(unsigned int mask, atomic_t *v)
- {
- #if XCHAL_HAVE_S32C1I
-       unsigned long tmp;
-       int result;
- 
-       __asm__ __volatile__(
-                       "1:     l32i    %1, %3, 0\n"
-                       "       wsr     %1, scompare1\n"
-                       "       or      %0, %1, %2\n"
-                       "       s32c1i  %0, %3, 0\n"
-                       "       bne     %0, %1, 1b\n"
-                       : "=&a" (result), "=&a" (tmp)
-                       : "a" (mask), "a" (v)
-                       : "memory"
-                       );
- #else
-       unsigned int vval;
- 
-       __asm__ __volatile__(
-                       "       rsil    a15,"__stringify(TOPLEVEL)"\n"
-                       "       l32i    %0, %2, 0\n"
-                       "       or      %0, %0, %1\n"
-                       "       s32i    %0, %2, 0\n"
-                       "       wsr     a15, ps\n"
-                       "       rsync\n"
-                       : "=&a" (vval)
-                       : "a" (mask), "a" (v)
-                       : "a15", "memory"
-                       );
- #endif
- }
- 
   #endif /* __KERNEL__ */
   
   #endif /* _XTENSA_ATOMIC_H */
diff --combined drivers/s390/scsi/zfcp_fsf.c

index 4ac73e047c114bb3d27aa5fab7ca939297fb4c43,27b976aa1818fdec54948e4f71b18ed2a945c804..522a633c866a8b1e464ec857f179365e68bb6530
--- 1/drivers/s390/scsi/zfcp_fsf.c
--- 2/drivers/s390/scsi/zfcp_fsf.c
+++ b/drivers/s390/scsi/zfcp_fsf.c
@@@ -114,7 -114,7 +114,7 @@@ static void zfcp_fsf_link_down_info_eva
         if (atomic_read(&adapter->status) & ZFCP_STATUS_ADAPTER_LINK_UNPLUGGED)
                 return;
   
-       atomic_set_mask(ZFCP_STATUS_ADAPTER_LINK_UNPLUGGED, &adapter->status);
+       atomic_or(ZFCP_STATUS_ADAPTER_LINK_UNPLUGGED, &adapter->status);
   
         zfcp_scsi_schedule_rports_block(adapter);
   
@@@ -204,7 -204,7 +204,7 @@@ static void zfcp_fsf_status_read_link_d
                 break;
         case FSF_STATUS_READ_SUB_FIRMWARE_UPDATE:
                 zfcp_fsf_link_down_info_eval(req, NULL);
- -      };
+ +      }
   }
   
   static void zfcp_fsf_status_read_handler(struct zfcp_fsf_req *req)
@@@ -345,7 -345,7 +345,7 @@@ static void zfcp_fsf_protstatus_eval(st
                 zfcp_erp_adapter_shutdown(adapter, 0, "fspse_3");
                 break;
         case FSF_PROT_HOST_CONNECTION_INITIALIZING:
-               atomic_set_mask(ZFCP_STATUS_ADAPTER_HOST_CON_INIT,
+               atomic_or(ZFCP_STATUS_ADAPTER_HOST_CON_INIT,
                                 &adapter->status);
                 break;
         case FSF_PROT_DUPLICATE_REQUEST_ID:
@@@ -554,7 -554,7 +554,7 @@@ static void zfcp_fsf_exchange_config_da
                         zfcp_erp_adapter_shutdown(adapter, 0, "fsecdh1");
                         return;
                 }
-               atomic_set_mask(ZFCP_STATUS_ADAPTER_XCONFIG_OK,
+               atomic_or(ZFCP_STATUS_ADAPTER_XCONFIG_OK,
                                 &adapter->status);
                 break;
         case FSF_EXCHANGE_CONFIG_DATA_INCOMPLETE:
@@@ -567,7 -567,7 +567,7 @@@
   
                 /* avoids adapter shutdown to be able to recognize
                  * events such as LINK UP */
-               atomic_set_mask(ZFCP_STATUS_ADAPTER_XCONFIG_OK,
+               atomic_or(ZFCP_STATUS_ADAPTER_XCONFIG_OK,
                                 &adapter->status);
                 zfcp_fsf_link_down_info_eval(req,
                         &qtcb->header.fsf_status_qual.link_down_info);
@@@ -1394,9 -1394,9 +1394,9 @@@ static void zfcp_fsf_open_port_handler(
                 break;
         case FSF_GOOD:
                 port->handle = header->port_handle;
-               atomic_set_mask(ZFCP_STATUS_COMMON_OPEN |
+               atomic_or(ZFCP_STATUS_COMMON_OPEN |
                                 ZFCP_STATUS_PORT_PHYS_OPEN, &port->status);
-               atomic_clear_mask(ZFCP_STATUS_COMMON_ACCESS_BOXED,
+               atomic_andnot(ZFCP_STATUS_COMMON_ACCESS_BOXED,
                                   &port->status);
                 /* check whether D_ID has changed during open */
                 /*
@@@ -1677,10 -1677,10 +1677,10 @@@ static void zfcp_fsf_close_physical_por
         case FSF_PORT_BOXED:
                 /* can't use generic zfcp_erp_modify_port_status because
                  * ZFCP_STATUS_COMMON_OPEN must not be reset for the port */
-               atomic_clear_mask(ZFCP_STATUS_PORT_PHYS_OPEN, &port->status);
+               atomic_andnot(ZFCP_STATUS_PORT_PHYS_OPEN, &port->status);
                 shost_for_each_device(sdev, port->adapter->scsi_host)
                         if (sdev_to_zfcp(sdev)->port == port)
-                               atomic_clear_mask(ZFCP_STATUS_COMMON_OPEN,
+                               atomic_andnot(ZFCP_STATUS_COMMON_OPEN,
                                                   &sdev_to_zfcp(sdev)->status);
                 zfcp_erp_set_port_status(port, ZFCP_STATUS_COMMON_ACCESS_BOXED);
                 zfcp_erp_port_reopen(port, ZFCP_STATUS_COMMON_ERP_FAILED,
@@@ -1700,10 -1700,10 +1700,10 @@@
                 /* can't use generic zfcp_erp_modify_port_status because
                  * ZFCP_STATUS_COMMON_OPEN must not be reset for the port
                  */
-               atomic_clear_mask(ZFCP_STATUS_PORT_PHYS_OPEN, &port->status);
+               atomic_andnot(ZFCP_STATUS_PORT_PHYS_OPEN, &port->status);
                 shost_for_each_device(sdev, port->adapter->scsi_host)
                         if (sdev_to_zfcp(sdev)->port == port)
-                               atomic_clear_mask(ZFCP_STATUS_COMMON_OPEN,
+                               atomic_andnot(ZFCP_STATUS_COMMON_OPEN,
                                                   &sdev_to_zfcp(sdev)->status);
                 break;
         }
@@@ -1766,7 -1766,7 +1766,7 @@@ static void zfcp_fsf_open_lun_handler(s
   
         zfcp_sdev = sdev_to_zfcp(sdev);
   
-       atomic_clear_mask(ZFCP_STATUS_COMMON_ACCESS_DENIED |
+       atomic_andnot(ZFCP_STATUS_COMMON_ACCESS_DENIED |
                           ZFCP_STATUS_COMMON_ACCESS_BOXED,
                           &zfcp_sdev->status);
   
@@@ -1822,7 -1822,7 +1822,7 @@@
   
         case FSF_GOOD:
                 zfcp_sdev->lun_handle = header->lun_handle;
-               atomic_set_mask(ZFCP_STATUS_COMMON_OPEN, &zfcp_sdev->status);
+               atomic_or(ZFCP_STATUS_COMMON_OPEN, &zfcp_sdev->status);
                 break;
         }
   }
@@@ -1913,7 -1913,7 +1913,7 @@@ static void zfcp_fsf_close_lun_handler(
                 }
                 break;
         case FSF_GOOD:
-               atomic_clear_mask(ZFCP_STATUS_COMMON_OPEN, &zfcp_sdev->status);
+               atomic_andnot(ZFCP_STATUS_COMMON_OPEN, &zfcp_sdev->status);
                 break;
         }
   }
diff --combined kernel/sched/core.c

index d8420c233ff76268cdff1d1add89a05e1b89d9c2,66ae8baf42fe60ab965a23e1332389dc56587a9e..3595403921bd5be10c3e5e591bf04916e654423d
--- 1/kernel/sched/core.c
--- 2/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@@ -164,14 -164,12 +164,12 @@@ struct static_key sched_feat_keys[__SCH
   
   static void sched_feat_disable(int i)
   {
-       if (static_key_enabled(&sched_feat_keys[i]))
-               static_key_slow_dec(&sched_feat_keys[i]);
+       static_key_disable(&sched_feat_keys[i]);
   }
   
   static void sched_feat_enable(int i)
   {
-       if (!static_key_enabled(&sched_feat_keys[i]))
-               static_key_slow_inc(&sched_feat_keys[i]);
+       static_key_enable(&sched_feat_keys[i]);
   }
   #else
   static void sched_feat_disable(int i) { };
@@@ -1151,45 -1149,15 +1149,45 @@@ static int migration_cpu_stop(void *dat
         return 0;
   }
   
- -void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
+ +/*
+ + * sched_class::set_cpus_allowed must do the below, but is not required to
+ + * actually call this function.
+ + */
+ +void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask)
   {
- -      if (p->sched_class->set_cpus_allowed)
- -              p->sched_class->set_cpus_allowed(p, new_mask);
- -
         cpumask_copy(&p->cpus_allowed, new_mask);
         p->nr_cpus_allowed = cpumask_weight(new_mask);
   }
   
+ +void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
+ +{
+ +      struct rq *rq = task_rq(p);
+ +      bool queued, running;
+ +
+ +      lockdep_assert_held(&p->pi_lock);
+ +
+ +      queued = task_on_rq_queued(p);
+ +      running = task_current(rq, p);
+ +
+ +      if (queued) {
+ +              /*
+ +               * Because __kthread_bind() calls this on blocked tasks without
+ +               * holding rq->lock.
+ +               */
+ +              lockdep_assert_held(&rq->lock);
+ +              dequeue_task(rq, p, 0);
+ +      }
+ +      if (running)
+ +              put_prev_task(rq, p);
+ +
+ +      p->sched_class->set_cpus_allowed(p, new_mask);
+ +
+ +      if (running)
+ +              p->sched_class->set_curr_task(rq);
+ +      if (queued)
+ +              enqueue_task(rq, p, 0);
+ +}
+ +
   /*
    * Change a given task's CPU affinity. Migrate the thread to a
    * proper CPU and schedule it away if the CPU it's executing on
@@@ -1199,8 -1167,7 +1197,8 @@@
    * task must not exit() & deallocate itself prematurely. The
    * call is not atomic; no spinlocks may be held.
    */
- -int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
+ +static int __set_cpus_allowed_ptr(struct task_struct *p,
+ +                                const struct cpumask *new_mask, bool check)
   {
         unsigned long flags;
         struct rq *rq;
@@@ -1209,15 -1176,6 +1207,15 @@@
   
         rq = task_rq_lock(p, &flags);
   
+ +      /*
+ +       * Must re-check here, to close a race against __kthread_bind(),
+ +       * sched_setaffinity() is not guaranteed to observe the flag.
+ +       */
+ +      if (check && (p->flags & PF_NO_SETAFFINITY)) {
+ +              ret = -EINVAL;
+ +              goto out;
+ +      }
+ +
         if (cpumask_equal(&p->cpus_allowed, new_mask))
                 goto out;
   
@@@ -1254,11 -1212,6 +1252,11 @@@ out
   
         return ret;
   }
+ +
+ +int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
+ +{
+ +      return __set_cpus_allowed_ptr(p, new_mask, false);
+ +}
   EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
   
   void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
@@@ -1640,15 -1593,6 +1638,15 @@@ static void update_avg(u64 *avg, u64 sa
         s64 diff = sample - *avg;
         *avg += diff >> 3;
   }
+ +
+ +#else
+ +
+ +static inline int __set_cpus_allowed_ptr(struct task_struct *p,
+ +                                       const struct cpumask *new_mask, bool check)
+ +{
+ +      return set_cpus_allowed_ptr(p, new_mask);
+ +}
+ +
   #endif /* CONFIG_SMP */
   
   static void
@@@ -1708,9 -1652,9 +1706,9 @@@ static voi
   ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags)
   {
         check_preempt_curr(rq, p, wake_flags);
- -      trace_sched_wakeup(p, true);
- -
         p->state = TASK_RUNNING;
+ +      trace_sched_wakeup(p);
+ +
   #ifdef CONFIG_SMP
         if (p->sched_class->task_woken) {
                 /*
@@@ -1928,8 -1872,6 +1926,8 @@@ try_to_wake_up(struct task_struct *p, u
         if (!(p->state & state))
                 goto out;
   
+ +      trace_sched_waking(p);
+ +
         success = 1; /* we're going to change ->state */
         cpu = task_cpu(p);
   
@@@ -2005,8 -1947,6 +2003,8 @@@ static void try_to_wake_up_local(struc
         if (!(p->state & TASK_NORMAL))
                 goto out;
   
+ +      trace_sched_waking(p);
+ +
         if (!task_on_rq_queued(p))
                 ttwu_activate(rq, p, ENQUEUE_WAKEUP);
   
@@@ -2074,6 -2014,9 +2072,6 @@@ static void __sched_fork(unsigned long 
         p->se.prev_sum_exec_runtime     = 0;
         p->se.nr_migrations             = 0;
         p->se.vruntime                  = 0;
- -#ifdef CONFIG_SMP
- -      p->se.avg.decay_count           = 0;
- -#endif
         INIT_LIST_HEAD(&p->se.group_node);
   
   #ifdef CONFIG_SCHEDSTATS
@@@ -2255,8 -2198,8 +2253,8 @@@ unsigned long to_ratio(u64 period, u64 
   #ifdef CONFIG_SMP
   inline struct dl_bw *dl_bw_of(int i)
   {
- -      rcu_lockdep_assert(rcu_read_lock_sched_held(),
- -                         "sched RCU must be held");
+ +      RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
+ +                       "sched RCU must be held");
         return &cpu_rq(i)->rd->dl_bw;
   }
   
@@@ -2265,8 -2208,8 +2263,8 @@@ static inline int dl_bw_cpus(int i
         struct root_domain *rd = cpu_rq(i)->rd;
         int cpus = 0;
   
- -      rcu_lockdep_assert(rcu_read_lock_sched_held(),
- -                         "sched RCU must be held");
+ +      RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
+ +                       "sched RCU must be held");
         for_each_cpu_and(i, rd->span, cpu_active_mask)
                 cpus++;
   
@@@ -2358,11 -2301,11 +2356,11 @@@ void wake_up_new_task(struct task_struc
   #endif
   
         /* Initialize new task's runnable average */
- -      init_task_runnable_average(p);
+ +      init_entity_runnable_average(&p->se);
         rq = __task_rq_lock(p);
         activate_task(rq, p, 0);
         p->on_rq = TASK_ON_RQ_QUEUED;
- -      trace_sched_wakeup_new(p, true);
+ +      trace_sched_wakeup_new(p);
         check_preempt_curr(rq, p, WF_FORK);
   #ifdef CONFIG_SMP
         if (p->sched_class->task_woken)
@@@ -2524,6 -2467,7 +2522,6 @@@ static struct rq *finish_task_switch(st
          */
         prev_state = prev->state;
         vtime_task_switch(prev);
- -      finish_arch_switch(prev);
         perf_event_task_sched_in(prev, current);
         finish_lock_switch(rq, prev);
         finish_arch_post_lock_switch();
@@@ -2543,7 -2487,7 +2541,7 @@@
                 put_task_struct(prev);
         }
   
- -      tick_nohz_task_switch(current);
+ +      tick_nohz_task_switch();
         return rq;
   }
   
@@@ -4394,7 -4338,7 +4392,7 @@@ long sched_setaffinity(pid_t pid, cons
         }
   #endif
   again:
- -      retval = set_cpus_allowed_ptr(p, new_mask);
+ +      retval = __set_cpus_allowed_ptr(p, new_mask, true);
   
         if (!retval) {
                 cpuset_cpus_allowed(p, cpus_allowed);
@@@ -4546,7 -4490,7 +4544,7 @@@ SYSCALL_DEFINE0(sched_yield
   
   int __sched _cond_resched(void)
   {
- -      if (should_resched()) {
+ +      if (should_resched(0)) {
                 preempt_schedule_common();
                 return 1;
         }
@@@ -4564,7 -4508,7 +4562,7 @@@ EXPORT_SYMBOL(_cond_resched)
    */
   int __cond_resched_lock(spinlock_t *lock)
   {
- -      int resched = should_resched();
+ +      int resched = should_resched(PREEMPT_LOCK_OFFSET);
         int ret = 0;
   
         lockdep_assert_held(lock);
@@@ -4586,7 -4530,7 +4584,7 @@@ int __sched __cond_resched_softirq(void
   {
         BUG_ON(!in_softirq());
   
- -      if (should_resched()) {
+ +      if (should_resched(SOFTIRQ_DISABLE_OFFSET)) {
                 local_bh_enable();
                 preempt_schedule_common();
                 local_bh_disable();
@@@ -4919,8 -4863,7 +4917,8 @@@ void init_idle(struct task_struct *idle
         struct rq *rq = cpu_rq(cpu);
         unsigned long flags;
   
- -      raw_spin_lock_irqsave(&rq->lock, flags);
+ +      raw_spin_lock_irqsave(&idle->pi_lock, flags);
+ +      raw_spin_lock(&rq->lock);
   
         __sched_fork(0, idle);
         idle->state = TASK_RUNNING;
@@@ -4946,8 -4889,7 +4944,8 @@@
   #if defined(CONFIG_SMP)
         idle->on_cpu = 1;
   #endif
- -      raw_spin_unlock_irqrestore(&rq->lock, flags);
+ +      raw_spin_unlock(&rq->lock);
+ +      raw_spin_unlock_irqrestore(&idle->pi_lock, flags);
   
         /* Set the preempt count _outside_ the spinlocks! */
         init_idle_preempt_count(idle, cpu);
@@@ -5367,7 -5309,8 +5365,7 @@@ static void register_sched_domain_sysct
   /* may be called multiple times per register */
   static void unregister_sched_domain_sysctl(void)
   {
- -      if (sd_sysctl_header)
- -              unregister_sysctl_table(sd_sysctl_header);
+ +      unregister_sysctl_table(sd_sysctl_header);
         sd_sysctl_header = NULL;
         if (sd_ctl_dir[0].child)
                 sd_free_ctl_entry(&sd_ctl_dir[0].child);
@@@ -5488,14 -5431,6 +5486,14 @@@ static int sched_cpu_active(struct noti
         case CPU_STARTING:
                 set_cpu_rq_start_time();
                 return NOTIFY_OK;
+ +      case CPU_ONLINE:
+ +              /*
+ +               * At this point a starting CPU has marked itself as online via
+ +               * set_cpu_online(). But it might not yet have marked itself
+ +               * as active, which is essential from here on.
+ +               *
+ +               * Thus, fall-through and help the starting CPU along.
+ +               */
         case CPU_DOWN_FAILED:
                 set_cpu_active((long)hcpu, true);
                 return NOTIFY_OK;
@@@ -6508,10 -6443,8 +6506,10 @@@ static void init_numa_topology_type(voi
   
         n = sched_max_numa_distance;
   
- -      if (n <= 1)
+ +      if (sched_domains_numa_levels <= 1) {
                 sched_numa_topology_type = NUMA_DIRECT;
+ +              return;
+ +      }
   
         for_each_online_node(a) {
                 for_each_online_node(b) {
@@@ -8133,7 -8066,7 +8131,7 @@@ static void cpu_cgroup_css_offline(stru
         sched_offline_group(tg);
   }
   
- -static void cpu_cgroup_fork(struct task_struct *task)
+ +static void cpu_cgroup_fork(struct task_struct *task, void *private)
   {
         sched_move_task(task);
   }
diff --combined lib/Kconfig.debug

index 3e0b662cae09611b5628c474cc58e448c3cefdc7,0d859305c55659aeb149ca2ac3e520abdbbe32fe..ab76b99adc857fb38c2e4677b6ab34d6a01fa786
--- 1/lib/Kconfig.debug
--- 2/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@@ -916,12 -916,6 +916,6 @@@ config DEBUG_RT_MUTEXE
          This allows rt mutex semantics violations and rt mutex related
          deadlocks (lockups) to be detected and reported automatically.
   
- config RT_MUTEX_TESTER
-       bool "Built-in scriptable tester for rt-mutexes"
-       depends on DEBUG_KERNEL && RT_MUTEXES && BROKEN
-       help
-         This option enables a rt-mutex tester.
- 
   config DEBUG_SPINLOCK
         bool "Spinlock and rw-lock debugging: basic checks"
         depends on DEBUG_KERNEL
@@@ -1353,6 -1347,20 +1347,6 @@@ config RCU_CPU_STALL_TIMEOU
           RCU grace period persists, additional CPU stall warnings are
           printed at more widely spaced intervals.
   
- -config RCU_CPU_STALL_INFO
- -      bool "Print additional diagnostics on RCU CPU stall"
- -      depends on (TREE_RCU || PREEMPT_RCU) && DEBUG_KERNEL
- -      default y
- -      help
- -        For each stalled CPU that is aware of the current RCU grace
- -        period, print out additional per-CPU diagnostic information
- -        regarding scheduling-clock ticks, idle state, and,
- -        for RCU_FAST_NO_HZ kernels, idle-entry state.
- -
- -        Say N if you are unsure.
- -
- -        Say Y if you want to enable such diagnostics.
- -
   config RCU_TRACE
         bool "Enable tracing for RCU"
         depends on DEBUG_KERNEL
@@@ -1365,7 -1373,7 +1359,7 @@@
           Say N if you are unsure.
   
   config RCU_EQS_DEBUG
- -      bool "Use this when adding any sort of NO_HZ support to your arch"
+ +      bool "Provide debugging asserts for adding NO_HZ support to an arch"
         depends on DEBUG_KERNEL
         help
           This option provides consistency checks in RCU's handling of
@@@ -1528,6 -1536,13 +1522,13 @@@ config FAIL_MMC_REQUES
           and to test how the mmc host driver handles retries from
           the block device.
   
+ config FAIL_FUTEX
+       bool "Fault-injection capability for futexes"
+       select DEBUG_FS
+       depends on FAULT_INJECTION && FUTEX
+       help
+         Provide fault-injection capability for futexes.
+ 
   config FAULT_INJECTION_DEBUG_FS
         bool "Debugfs entries for fault-injection capabilities"
         depends on FAULT_INJECTION && SYSFS && DEBUG_FS
@@@ -1826,6 -1841,15 +1827,15 @@@ config MEMTES
                 memtest=17, mean do 17 test patterns.
           If you are unsure how to answer this question, answer N.
   
+ config TEST_STATIC_KEYS
+       tristate "Test static keys"
+       default n
+       depends on m
+       help
+         Test the static key interfaces.
+ 
+         If unsure, say N.
+ 
   source "samples/Kconfig"
   
   source "lib/Kconfig.kgdb"
diff --combined lib/Makefile

index f2610061bfa4dfc19d681efb43f476310b7c60d4,9f2fc71a14a31cd633ab86711163ffd1f44b5b3a..f01c558bf80db603abcb53868f0e777134208a60
--- 1/lib/Makefile
--- 2/lib/Makefile
+++ b/lib/Makefile
@@@ -39,6 -39,8 +39,8 @@@ obj-$(CONFIG_TEST_KSTRTOX) += test-kstr
   obj-$(CONFIG_TEST_LKM) += test_module.o
   obj-$(CONFIG_TEST_RHASHTABLE) += test_rhashtable.o
   obj-$(CONFIG_TEST_USER_COPY) += test_user_copy.o
+ obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_keys.o
+ obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_key_base.o
   
   ifeq ($(CONFIG_DEBUG_KOBJECT),y)
   CFLAGS_kobject.o += -DDEBUG
@@@ -138,6 -140,8 +140,6 @@@ obj-$(CONFIG_GENERIC_ATOMIC64) += atomi
   
   obj-$(CONFIG_ATOMIC64_SELFTEST) += atomic64_test.o
   
- -obj-$(CONFIG_AVERAGE) += average.o
- -
   obj-$(CONFIG_CPU_RMAP) += cpu_rmap.o
   
   obj-$(CONFIG_CORDIC) += cordic.o
@@@ -158,7 -162,6 +160,7 @@@ obj-$(CONFIG_GENERIC_STRNLEN_USER) += s
   
   obj-$(CONFIG_GENERIC_NET_UTILS) += net_utils.o
   
+ +obj-$(CONFIG_SG_SPLIT) += sg_split.o
   obj-$(CONFIG_STMP_DEVICE) += stmp_device.o
   
   libfdt_files = fdt.o fdt_ro.o fdt_wip.o fdt_rw.o fdt_sw.o fdt_strerror.o \
author	Linus Torvalds <[email protected]>
	Thu, 3 Sep 2015 22:46:07 +0000 (15:46 -0700)
committer	Linus Torvalds <[email protected]>
	Thu, 3 Sep 2015 22:46:07 +0000 (15:46 -0700)
		1	2
Documentation/memory-barriers.txt	patch \|	diff1 \|	diff2 \|	blob \| history
arch/arc/include/asm/atomic.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/s390/kernel/jump_label.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/s390/kernel/time.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/s390/kvm/interrupt.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/s390/kvm/kvm-s390.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/s390/lib/uaccess.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/sparc/lib/ksyms.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/tsc.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/xtensa/include/asm/atomic.h	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/s390/scsi/zfcp_fsf.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sched/core.c	patch \|	diff1 \|	diff2 \|	blob \| history
lib/Kconfig.debug	patch \|	diff1 \|	diff2 \|	blob \| history
lib/Makefile	patch \|	diff1 \|	diff2 \|	blob \| history