Merge remote-tracking branch 'remotes/stefanberger/tags/pull-tpm-2018-09-07-1' into...

[qemu.git] / docs / devel / atomics.txt
diff --git a/docs/devel/atomics.txt b/docs/devel/atomics.txt

index 3ef5d85b1be2100bcc8c7d5af2e0bf8b236d018e..a4db3a4aaadb943337a56e166cd8556cbc3db099 100644 (file)
--- a/docs/devel/atomics.txt
+++ b/docs/devel/atomics.txt
@@ -63,11 +63,23 @@ operations:
      typeof(*ptr) atomic_fetch_sub(ptr, val)
      typeof(*ptr) atomic_fetch_and(ptr, val)
      typeof(*ptr) atomic_fetch_or(ptr, val)
+    typeof(*ptr) atomic_fetch_xor(ptr, val)
+    typeof(*ptr) atomic_fetch_inc_nonzero(ptr)
      typeof(*ptr) atomic_xchg(ptr, val)
      typeof(*ptr) atomic_cmpxchg(ptr, old, new)
  
  all of which return the old value of *ptr.  These operations are
-polymorphic; they operate on any type that is as wide as an int.
+polymorphic; they operate on any type that is as wide as a pointer.
+
+Similar operations return the new value of *ptr:
+
+    typeof(*ptr) atomic_inc_fetch(ptr)
+    typeof(*ptr) atomic_dec_fetch(ptr)
+    typeof(*ptr) atomic_add_fetch(ptr, val)
+    typeof(*ptr) atomic_sub_fetch(ptr, val)
+    typeof(*ptr) atomic_and_fetch(ptr, val)
+    typeof(*ptr) atomic_or_fetch(ptr, val)
+    typeof(*ptr) atomic_xor_fetch(ptr, val)
  
  Sequentially consistent loads and stores can be done using:
  
@@ -110,20 +122,30 @@ In general, if the algorithm you are writing includes both writes
  and reads on the same side, it is generally simpler to use sequentially
  consistent primitives.
  
-When using this model, variables are accessed with atomic_read() and
-atomic_set(), and restrictions to the ordering of accesses is enforced
+When using this model, variables are accessed with:
+
+- atomic_read() and atomic_set(); these prevent the compiler from
+  optimizing accesses out of existence and creating unsolicited
+  accesses, but do not otherwise impose any ordering on loads and
+  stores: both the compiler and the processor are free to reorder
+  them.
+
+- atomic_load_acquire(), which guarantees the LOAD to appear to
+  happen, with respect to the other components of the system,
+  before all the LOAD or STORE operations specified afterwards.
+  Operations coming before atomic_load_acquire() can still be
+  reordered after it.
+
+- atomic_store_release(), which guarantees the STORE to appear to
+  happen, with respect to the other components of the system,
+  after all the LOAD or STORE operations specified afterwards.
+  Operations coming after atomic_store_release() can still be
+  reordered after it.
+
+Restrictions to the ordering of accesses can also be specified
  using the memory barrier macros: smp_rmb(), smp_wmb(), smp_mb(),
  smp_mb_acquire(), smp_mb_release(), smp_read_barrier_depends().
  
-atomic_read() and atomic_set() prevents the compiler from using
-optimizations that might otherwise optimize accesses out of existence
-on the one hand, or that might create unsolicited accesses on the other.
-In general this should not have any effect, because the same compiler
-barriers are already implied by memory barriers.  However, it is useful
-to do so, because it tells readers which variables are shared with
-other threads, and which are local to the current thread or protected
-by other, more mundane means.
-
  Memory barriers control the order of references to shared memory.
  They come in six kinds:
  
@@ -220,7 +242,7 @@ make atomic_mb_set() the more expensive operation.
  
  There are two common cases in which atomic_mb_read and atomic_mb_set
  generate too many memory barriers, and thus it can be useful to manually
-place barriers instead:
+place barriers, or use atomic_load_acquire/atomic_store_release instead:
  
  - when a data structure has one thread that is always a writer
    and one thread that is always a reader, manual placement of
@@ -231,18 +253,15 @@ place barriers instead:
       thread 1                                thread 1
       -------------------------               ------------------------
       (other writes)
-                                             smp_mb_release()
-     atomic_mb_set(&a, x)                    atomic_set(&a, x)
-                                             smp_wmb()
-     atomic_mb_set(&b, y)                    atomic_set(&b, y)
+     atomic_mb_set(&a, x)                    atomic_store_release(&a, x)
+     atomic_mb_set(&b, y)                    atomic_store_release(&b, y)
  
                                         =>
       thread 2                                thread 2
       -------------------------               ------------------------
-     y = atomic_mb_read(&b)                  y = atomic_read(&b)
-                                             smp_rmb()
-     x = atomic_mb_read(&a)                  x = atomic_read(&a)
-                                             smp_mb_acquire()
+     y = atomic_mb_read(&b)                  y = atomic_load_acquire(&b)
+     x = atomic_mb_read(&a)                  x = atomic_load_acquire(&a)
+     (other reads)
  
    Note that the barrier between the stores in thread 1, and between
    the loads in thread 2, has been optimized here to a write or a
@@ -264,7 +283,6 @@ place barriers instead:
                                               smp_mb_acquire();
  
    Similarly, atomic_mb_set() can be transformed as follows:
-  smp_mb():
  
                                               smp_mb_release();
       for (i = 0; i < 10; i++)          =>    for (i = 0; i < 10; i++)
@@ -272,6 +290,8 @@ place barriers instead:
                                               smp_mb();
  
  
+  The other thread can still use atomic_mb_read()/atomic_mb_set().
+
  The two tricks can be combined.  In this case, splitting a loop in
  two lets you hoist the barriers out of the loops _and_ eliminate the
  expensive smp_mb():
@@ -284,8 +304,6 @@ expensive smp_mb():
                                                 atomic_set(&a[i], false);
                                               smp_mb();
  
-  The other thread can still use atomic_mb_read()/atomic_mb_set()
-
  
  Memory barrier pairing
  ----------------------
@@ -374,10 +392,7 @@ and memory barriers, and the equivalents in QEMU:
    note that smp_store_mb() is a little weaker than atomic_mb_set().
    atomic_mb_read() compiles to the same instructions as Linux's
    smp_load_acquire(), but this should be treated as an implementation
-  detail.  QEMU does have atomic_load_acquire() and atomic_store_release()
-  macros, but for now they are only used within atomic.h.  This may
-  change in the future.
-
+  detail.
  
  SOURCES
  =======