]>
Commit | Line | Data |
---|---|---|
5444e768 PB |
1 | /* |
2 | * Simple interface for atomic operations. | |
3 | * | |
4 | * Copyright (C) 2013 Red Hat, Inc. | |
5 | * | |
6 | * Author: Paolo Bonzini <[email protected]> | |
7 | * | |
8 | * This work is licensed under the terms of the GNU GPL, version 2 or later. | |
9 | * See the COPYING file in the top-level directory. | |
10 | * | |
11 | */ | |
85199474 | 12 | |
5444e768 PB |
13 | #ifndef __QEMU_ATOMIC_H |
14 | #define __QEMU_ATOMIC_H 1 | |
1d93f0f0 | 15 | |
5444e768 | 16 | #include "qemu/compiler.h" |
e2251708 | 17 | |
5444e768 | 18 | /* For C11 atomic ops */ |
1d31fca4 | 19 | |
5444e768 PB |
20 | /* Compiler barrier */ |
21 | #define barrier() ({ asm volatile("" ::: "memory"); (void)0; }) | |
22 | ||
23 | #ifndef __ATOMIC_RELAXED | |
52e850de | 24 | |
a281ebc1 | 25 | /* |
5444e768 PB |
26 | * We use GCC builtin if it's available, as that can use mfence on |
27 | * 32-bit as well, e.g. if built with -march=pentium-m. However, on | |
28 | * i386 the spec is buggy, and the implementation followed it until | |
29 | * 4.3 (http://gcc.gnu.org/bugzilla/show_bug.cgi?id=36793). | |
30 | */ | |
31 | #if defined(__i386__) || defined(__x86_64__) | |
32 | #if !QEMU_GNUC_PREREQ(4, 4) | |
33 | #if defined __x86_64__ | |
34 | #define smp_mb() ({ asm volatile("mfence" ::: "memory"); (void)0; }) | |
a281ebc1 | 35 | #else |
5444e768 PB |
36 | #define smp_mb() ({ asm volatile("lock; addl $0,0(%%esp) " ::: "memory"); (void)0; }) |
37 | #endif | |
38 | #endif | |
39 | #endif | |
40 | ||
41 | ||
42 | #ifdef __alpha__ | |
43 | #define smp_read_barrier_depends() asm volatile("mb":::"memory") | |
a281ebc1 MT |
44 | #endif |
45 | ||
5444e768 | 46 | #if defined(__i386__) || defined(__x86_64__) || defined(__s390x__) |
a281ebc1 | 47 | |
5444e768 PB |
48 | /* |
49 | * Because of the strongly ordered storage model, wmb() and rmb() are nops | |
50 | * here (a compiler barrier only). QEMU doesn't do accesses to write-combining | |
51 | * qemu memory or non-temporal load/stores from C code. | |
52 | */ | |
a281ebc1 | 53 | #define smp_wmb() barrier() |
a821ce59 | 54 | #define smp_rmb() barrier() |
5444e768 PB |
55 | |
56 | /* | |
57 | * __sync_lock_test_and_set() is documented to be an acquire barrier only, | |
58 | * but it is a full barrier at the hardware level. Add a compiler barrier | |
59 | * to make it a full barrier also at the compiler level. | |
60 | */ | |
61 | #define atomic_xchg(ptr, i) (barrier(), __sync_lock_test_and_set(ptr, i)) | |
62 | ||
63 | /* | |
64 | * Load/store with Java volatile semantics. | |
65 | */ | |
66 | #define atomic_mb_set(ptr, i) ((void)atomic_xchg(ptr, i)) | |
e2251708 | 67 | |
463ce4ae | 68 | #elif defined(_ARCH_PPC) |
e2251708 DG |
69 | |
70 | /* | |
a281ebc1 | 71 | * We use an eieio() for wmb() on powerpc. This assumes we don't |
e2251708 | 72 | * need to order cacheable and non-cacheable stores with respect to |
5444e768 PB |
73 | * each other. |
74 | * | |
75 | * smp_mb has the same problem as on x86 for not-very-new GCC | |
76 | * (http://patchwork.ozlabs.org/patch/126184/, Nov 2011). | |
e2251708 | 77 | */ |
5444e768 | 78 | #define smp_wmb() ({ asm volatile("eieio" ::: "memory"); (void)0; }) |
a821ce59 | 79 | #if defined(__powerpc64__) |
5444e768 | 80 | #define smp_rmb() ({ asm volatile("lwsync" ::: "memory"); (void)0; }) |
a821ce59 | 81 | #else |
5444e768 | 82 | #define smp_rmb() ({ asm volatile("sync" ::: "memory"); (void)0; }) |
a821ce59 | 83 | #endif |
5444e768 | 84 | #define smp_mb() ({ asm volatile("sync" ::: "memory"); (void)0; }) |
a821ce59 | 85 | |
5444e768 | 86 | #endif /* _ARCH_PPC */ |
e2251708 | 87 | |
5444e768 | 88 | #endif /* C11 atomics */ |
e2251708 DG |
89 | |
90 | /* | |
91 | * For (host) platforms we don't have explicit barrier definitions | |
92 | * for, we use the gcc __sync_synchronize() primitive to generate a | |
93 | * full barrier. This should be safe on all platforms, though it may | |
5444e768 | 94 | * be overkill for smp_wmb() and smp_rmb(). |
e2251708 | 95 | */ |
5444e768 PB |
96 | #ifndef smp_mb |
97 | #define smp_mb() __sync_synchronize() | |
98 | #endif | |
99 | ||
100 | #ifndef smp_wmb | |
101 | #ifdef __ATOMIC_RELEASE | |
3bbf5723 PB |
102 | /* __atomic_thread_fence does not include a compiler barrier; instead, |
103 | * the barrier is part of __atomic_load/__atomic_store's "volatile-like" | |
104 | * semantics. If smp_wmb() is a no-op, absence of the barrier means that | |
105 | * the compiler is free to reorder stores on each side of the barrier. | |
106 | * Add one here, and similarly in smp_rmb() and smp_read_barrier_depends(). | |
107 | */ | |
108 | #define smp_wmb() ({ barrier(); __atomic_thread_fence(__ATOMIC_RELEASE); barrier(); }) | |
5444e768 | 109 | #else |
e2251708 | 110 | #define smp_wmb() __sync_synchronize() |
5444e768 PB |
111 | #endif |
112 | #endif | |
113 | ||
114 | #ifndef smp_rmb | |
115 | #ifdef __ATOMIC_ACQUIRE | |
3bbf5723 | 116 | #define smp_rmb() ({ barrier(); __atomic_thread_fence(__ATOMIC_ACQUIRE); barrier(); }) |
5444e768 | 117 | #else |
a821ce59 | 118 | #define smp_rmb() __sync_synchronize() |
5444e768 PB |
119 | #endif |
120 | #endif | |
121 | ||
122 | #ifndef smp_read_barrier_depends | |
123 | #ifdef __ATOMIC_CONSUME | |
3bbf5723 | 124 | #define smp_read_barrier_depends() ({ barrier(); __atomic_thread_fence(__ATOMIC_CONSUME); barrier(); }) |
5444e768 PB |
125 | #else |
126 | #define smp_read_barrier_depends() barrier() | |
127 | #endif | |
128 | #endif | |
e2251708 | 129 | |
5444e768 | 130 | #ifndef atomic_read |
2cbcfb28 | 131 | #define atomic_read(ptr) (*(__typeof__(*ptr) volatile*) (ptr)) |
e2251708 DG |
132 | #endif |
133 | ||
5444e768 | 134 | #ifndef atomic_set |
2cbcfb28 | 135 | #define atomic_set(ptr, i) ((*(__typeof__(*ptr) volatile*) (ptr)) = (i)) |
5444e768 PB |
136 | #endif |
137 | ||
7911747b PB |
138 | /** |
139 | * atomic_rcu_read - reads a RCU-protected pointer to a local variable | |
140 | * into a RCU read-side critical section. The pointer can later be safely | |
141 | * dereferenced within the critical section. | |
142 | * | |
143 | * This ensures that the pointer copy is invariant thorough the whole critical | |
144 | * section. | |
145 | * | |
146 | * Inserts memory barriers on architectures that require them (currently only | |
147 | * Alpha) and documents which pointers are protected by RCU. | |
148 | * | |
149 | * Unless the __ATOMIC_CONSUME memory order is available, atomic_rcu_read also | |
150 | * includes a compiler barrier to ensure that value-speculative optimizations | |
151 | * (e.g. VSS: Value Speculation Scheduling) does not perform the data read | |
152 | * before the pointer read by speculating the value of the pointer. On new | |
153 | * enough compilers, atomic_load takes care of such concern about | |
154 | * dependency-breaking optimizations. | |
155 | * | |
156 | * Should match atomic_rcu_set(), atomic_xchg(), atomic_cmpxchg(). | |
157 | */ | |
158 | #ifndef atomic_rcu_read | |
159 | #ifdef __ATOMIC_CONSUME | |
160 | #define atomic_rcu_read(ptr) ({ \ | |
161 | typeof(*ptr) _val; \ | |
162 | __atomic_load(ptr, &_val, __ATOMIC_CONSUME); \ | |
163 | _val; \ | |
164 | }) | |
165 | #else | |
166 | #define atomic_rcu_read(ptr) ({ \ | |
167 | typeof(*ptr) _val = atomic_read(ptr); \ | |
168 | smp_read_barrier_depends(); \ | |
169 | _val; \ | |
170 | }) | |
171 | #endif | |
172 | #endif | |
173 | ||
174 | /** | |
175 | * atomic_rcu_set - assigns (publicizes) a pointer to a new data structure | |
176 | * meant to be read by RCU read-side critical sections. | |
177 | * | |
178 | * Documents which pointers will be dereferenced by RCU read-side critical | |
179 | * sections and adds the required memory barriers on architectures requiring | |
180 | * them. It also makes sure the compiler does not reorder code initializing the | |
181 | * data structure before its publication. | |
182 | * | |
183 | * Should match atomic_rcu_read(). | |
184 | */ | |
185 | #ifndef atomic_rcu_set | |
186 | #ifdef __ATOMIC_RELEASE | |
187 | #define atomic_rcu_set(ptr, i) do { \ | |
188 | typeof(*ptr) _val = (i); \ | |
189 | __atomic_store(ptr, &_val, __ATOMIC_RELEASE); \ | |
190 | } while(0) | |
191 | #else | |
192 | #define atomic_rcu_set(ptr, i) do { \ | |
193 | smp_wmb(); \ | |
194 | atomic_set(ptr, i); \ | |
195 | } while (0) | |
196 | #endif | |
197 | #endif | |
198 | ||
5444e768 PB |
199 | /* These have the same semantics as Java volatile variables. |
200 | * See http://gee.cs.oswego.edu/dl/jmm/cookbook.html: | |
201 | * "1. Issue a StoreStore barrier (wmb) before each volatile store." | |
202 | * 2. Issue a StoreLoad barrier after each volatile store. | |
203 | * Note that you could instead issue one before each volatile load, but | |
204 | * this would be slower for typical programs using volatiles in which | |
205 | * reads greatly outnumber writes. Alternatively, if available, you | |
206 | * can implement volatile store as an atomic instruction (for example | |
207 | * XCHG on x86) and omit the barrier. This may be more efficient if | |
208 | * atomic instructions are cheaper than StoreLoad barriers. | |
209 | * 3. Issue LoadLoad and LoadStore barriers after each volatile load." | |
210 | * | |
211 | * If you prefer to think in terms of "pairing" of memory barriers, | |
212 | * an atomic_mb_read pairs with an atomic_mb_set. | |
213 | * | |
214 | * And for the few ia64 lovers that exist, an atomic_mb_read is a ld.acq, | |
215 | * while an atomic_mb_set is a st.rel followed by a memory barrier. | |
216 | * | |
217 | * These are a bit weaker than __atomic_load/store with __ATOMIC_SEQ_CST | |
218 | * (see docs/atomics.txt), and I'm not sure that __ATOMIC_ACQ_REL is enough. | |
219 | * Just always use the barriers manually by the rules above. | |
220 | */ | |
221 | #ifndef atomic_mb_read | |
222 | #define atomic_mb_read(ptr) ({ \ | |
223 | typeof(*ptr) _val = atomic_read(ptr); \ | |
224 | smp_rmb(); \ | |
225 | _val; \ | |
226 | }) | |
227 | #endif | |
228 | ||
229 | #ifndef atomic_mb_set | |
230 | #define atomic_mb_set(ptr, i) do { \ | |
231 | smp_wmb(); \ | |
232 | atomic_set(ptr, i); \ | |
233 | smp_mb(); \ | |
234 | } while (0) | |
235 | #endif | |
236 | ||
237 | #ifndef atomic_xchg | |
33effd3a PM |
238 | #if defined(__clang__) |
239 | #define atomic_xchg(ptr, i) __sync_swap(ptr, i) | |
240 | #elif defined(__ATOMIC_SEQ_CST) | |
5444e768 PB |
241 | #define atomic_xchg(ptr, i) ({ \ |
242 | typeof(*ptr) _new = (i), _old; \ | |
243 | __atomic_exchange(ptr, &_new, &_old, __ATOMIC_SEQ_CST); \ | |
244 | _old; \ | |
245 | }) | |
5444e768 PB |
246 | #else |
247 | /* __sync_lock_test_and_set() is documented to be an acquire barrier only. */ | |
248 | #define atomic_xchg(ptr, i) (smp_mb(), __sync_lock_test_and_set(ptr, i)) | |
249 | #endif | |
250 | #endif | |
251 | ||
252 | /* Provide shorter names for GCC atomic builtins. */ | |
253 | #define atomic_fetch_inc(ptr) __sync_fetch_and_add(ptr, 1) | |
254 | #define atomic_fetch_dec(ptr) __sync_fetch_and_add(ptr, -1) | |
255 | #define atomic_fetch_add __sync_fetch_and_add | |
256 | #define atomic_fetch_sub __sync_fetch_and_sub | |
257 | #define atomic_fetch_and __sync_fetch_and_and | |
258 | #define atomic_fetch_or __sync_fetch_and_or | |
259 | #define atomic_cmpxchg __sync_val_compare_and_swap | |
260 | ||
261 | /* And even shorter names that return void. */ | |
262 | #define atomic_inc(ptr) ((void) __sync_fetch_and_add(ptr, 1)) | |
263 | #define atomic_dec(ptr) ((void) __sync_fetch_and_add(ptr, -1)) | |
264 | #define atomic_add(ptr, n) ((void) __sync_fetch_and_add(ptr, n)) | |
265 | #define atomic_sub(ptr, n) ((void) __sync_fetch_and_sub(ptr, n)) | |
266 | #define atomic_and(ptr, n) ((void) __sync_fetch_and_and(ptr, n)) | |
267 | #define atomic_or(ptr, n) ((void) __sync_fetch_and_or(ptr, n)) | |
268 | ||
85199474 | 269 | #endif |