]>
Commit | Line | Data |
---|---|---|
5444e768 PB |
1 | /* |
2 | * Simple interface for atomic operations. | |
3 | * | |
4 | * Copyright (C) 2013 Red Hat, Inc. | |
5 | * | |
6 | * Author: Paolo Bonzini <[email protected]> | |
7 | * | |
8 | * This work is licensed under the terms of the GNU GPL, version 2 or later. | |
9 | * See the COPYING file in the top-level directory. | |
10 | * | |
a0aa44b4 AB |
11 | * See docs/atomics.txt for discussion about the guarantees each |
12 | * atomic primitive is meant to provide. | |
5444e768 | 13 | */ |
85199474 | 14 | |
2a6a4076 MA |
15 | #ifndef QEMU_ATOMIC_H |
16 | #define QEMU_ATOMIC_H | |
1d31fca4 | 17 | |
5444e768 PB |
18 | /* Compiler barrier */ |
19 | #define barrier() ({ asm volatile("" ::: "memory"); (void)0; }) | |
20 | ||
5927ed84 PB |
21 | /* The variable that receives the old value of an atomically-accessed |
22 | * variable must be non-qualified, because atomic builtins return values | |
23 | * through a pointer-type argument as in __atomic_load(&var, &old, MODEL). | |
24 | * | |
25 | * This macro has to handle types smaller than int manually, because of | |
26 | * implicit promotion. int and larger types, as well as pointers, can be | |
27 | * converted to a non-qualified type just by applying a binary operator. | |
28 | */ | |
29 | #define typeof_strip_qual(expr) \ | |
30 | typeof( \ | |
31 | __builtin_choose_expr( \ | |
32 | __builtin_types_compatible_p(typeof(expr), bool) || \ | |
33 | __builtin_types_compatible_p(typeof(expr), const bool) || \ | |
34 | __builtin_types_compatible_p(typeof(expr), volatile bool) || \ | |
35 | __builtin_types_compatible_p(typeof(expr), const volatile bool), \ | |
36 | (bool)1, \ | |
37 | __builtin_choose_expr( \ | |
38 | __builtin_types_compatible_p(typeof(expr), signed char) || \ | |
39 | __builtin_types_compatible_p(typeof(expr), const signed char) || \ | |
40 | __builtin_types_compatible_p(typeof(expr), volatile signed char) || \ | |
41 | __builtin_types_compatible_p(typeof(expr), const volatile signed char), \ | |
42 | (signed char)1, \ | |
43 | __builtin_choose_expr( \ | |
44 | __builtin_types_compatible_p(typeof(expr), unsigned char) || \ | |
45 | __builtin_types_compatible_p(typeof(expr), const unsigned char) || \ | |
46 | __builtin_types_compatible_p(typeof(expr), volatile unsigned char) || \ | |
47 | __builtin_types_compatible_p(typeof(expr), const volatile unsigned char), \ | |
48 | (unsigned char)1, \ | |
49 | __builtin_choose_expr( \ | |
50 | __builtin_types_compatible_p(typeof(expr), signed short) || \ | |
51 | __builtin_types_compatible_p(typeof(expr), const signed short) || \ | |
52 | __builtin_types_compatible_p(typeof(expr), volatile signed short) || \ | |
53 | __builtin_types_compatible_p(typeof(expr), const volatile signed short), \ | |
54 | (signed short)1, \ | |
55 | __builtin_choose_expr( \ | |
56 | __builtin_types_compatible_p(typeof(expr), unsigned short) || \ | |
57 | __builtin_types_compatible_p(typeof(expr), const unsigned short) || \ | |
58 | __builtin_types_compatible_p(typeof(expr), volatile unsigned short) || \ | |
59 | __builtin_types_compatible_p(typeof(expr), const volatile unsigned short), \ | |
60 | (unsigned short)1, \ | |
61 | (expr)+0)))))) | |
62 | ||
a0aa44b4 AB |
63 | #ifdef __ATOMIC_RELAXED |
64 | /* For C11 atomic ops */ | |
65 | ||
66 | /* Manual memory barriers | |
67 | * | |
68 | *__atomic_thread_fence does not include a compiler barrier; instead, | |
69 | * the barrier is part of __atomic_load/__atomic_store's "volatile-like" | |
70 | * semantics. If smp_wmb() is a no-op, absence of the barrier means that | |
71 | * the compiler is free to reorder stores on each side of the barrier. | |
72 | * Add one here, and similarly in smp_rmb() and smp_read_barrier_depends(). | |
73 | */ | |
74 | ||
f1ee8696 PB |
75 | #define smp_mb() ({ barrier(); __atomic_thread_fence(__ATOMIC_SEQ_CST); }) |
76 | #define smp_mb_release() ({ barrier(); __atomic_thread_fence(__ATOMIC_RELEASE); }) | |
77 | #define smp_mb_acquire() ({ barrier(); __atomic_thread_fence(__ATOMIC_ACQUIRE); }) | |
a0aa44b4 | 78 | |
c9838952 EC |
79 | /* Most compilers currently treat consume and acquire the same, but really |
80 | * no processors except Alpha need a barrier here. Leave it in if | |
81 | * using Thread Sanitizer to avoid warnings, otherwise optimize it away. | |
82 | */ | |
83 | #if defined(__SANITIZE_THREAD__) | |
f1ee8696 | 84 | #define smp_read_barrier_depends() ({ barrier(); __atomic_thread_fence(__ATOMIC_CONSUME); }) |
23ea7f57 | 85 | #elif defined(__alpha__) |
c9838952 EC |
86 | #define smp_read_barrier_depends() asm volatile("mb":::"memory") |
87 | #else | |
88 | #define smp_read_barrier_depends() barrier() | |
89 | #endif | |
90 | ||
a0aa44b4 AB |
91 | |
92 | /* Weak atomic operations prevent the compiler moving other | |
93 | * loads/stores past the atomic operation load/store. However there is | |
94 | * no explicit memory barrier for the processor. | |
e653bc6b AB |
95 | * |
96 | * The C11 memory model says that variables that are accessed from | |
97 | * different threads should at least be done with __ATOMIC_RELAXED | |
98 | * primitives or the result is undefined. Generally this has little to | |
99 | * no effect on the generated code but not using the atomic primitives | |
100 | * will get flagged by sanitizers as a violation. | |
a0aa44b4 | 101 | */ |
ca47a926 AB |
102 | #define atomic_read(ptr) \ |
103 | ({ \ | |
104 | QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \ | |
89943de1 | 105 | __atomic_load_n(ptr, __ATOMIC_RELAXED); \ |
a0aa44b4 AB |
106 | }) |
107 | ||
ca47a926 AB |
108 | #define atomic_set(ptr, i) do { \ |
109 | QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \ | |
89943de1 | 110 | __atomic_store_n(ptr, i, __ATOMIC_RELAXED); \ |
a0aa44b4 AB |
111 | } while(0) |
112 | ||
15487aa1 EC |
113 | /* See above: most compilers currently treat consume and acquire the |
114 | * same, but this slows down atomic_rcu_read unnecessarily. | |
115 | */ | |
116 | #ifdef __SANITIZE_THREAD__ | |
117 | #define atomic_rcu_read__nocheck(ptr, valptr) \ | |
118 | __atomic_load(ptr, valptr, __ATOMIC_CONSUME); | |
119 | #else | |
120 | #define atomic_rcu_read__nocheck(ptr, valptr) \ | |
121 | __atomic_load(ptr, valptr, __ATOMIC_RELAXED); \ | |
122 | smp_read_barrier_depends(); | |
123 | #endif | |
a0aa44b4 | 124 | |
ca47a926 AB |
125 | #define atomic_rcu_read(ptr) \ |
126 | ({ \ | |
127 | QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \ | |
5927ed84 | 128 | typeof_strip_qual(*ptr) _val; \ |
15487aa1 | 129 | atomic_rcu_read__nocheck(ptr, &_val); \ |
ca47a926 | 130 | _val; \ |
a0aa44b4 AB |
131 | }) |
132 | ||
ca47a926 AB |
133 | #define atomic_rcu_set(ptr, i) do { \ |
134 | QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \ | |
89943de1 | 135 | __atomic_store_n(ptr, i, __ATOMIC_RELEASE); \ |
a0aa44b4 AB |
136 | } while(0) |
137 | ||
803cf26a | 138 | #define atomic_load_acquire(ptr) \ |
a0aa44b4 | 139 | ({ \ |
ca47a926 | 140 | QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \ |
5927ed84 | 141 | typeof_strip_qual(*ptr) _val; \ |
803cf26a | 142 | __atomic_load(ptr, &_val, __ATOMIC_ACQUIRE); \ |
a0aa44b4 AB |
143 | _val; \ |
144 | }) | |
145 | ||
803cf26a | 146 | #define atomic_store_release(ptr, i) do { \ |
ca47a926 | 147 | QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \ |
803cf26a | 148 | __atomic_store_n(ptr, i, __ATOMIC_RELEASE); \ |
a0aa44b4 | 149 | } while(0) |
a0aa44b4 AB |
150 | |
151 | ||
152 | /* All the remaining operations are fully sequentially consistent */ | |
153 | ||
154 | #define atomic_xchg(ptr, i) ({ \ | |
ca47a926 | 155 | QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \ |
89943de1 | 156 | __atomic_exchange_n(ptr, i, __ATOMIC_SEQ_CST); \ |
a0aa44b4 AB |
157 | }) |
158 | ||
159 | /* Returns the eventual value, failed or not */ | |
160 | #define atomic_cmpxchg(ptr, old, new) \ | |
161 | ({ \ | |
ca47a926 | 162 | QEMU_BUILD_BUG_ON(sizeof(*ptr) > sizeof(void *)); \ |
89943de1 PK |
163 | typeof_strip_qual(*ptr) _old = (old); \ |
164 | __atomic_compare_exchange_n(ptr, &_old, new, false, \ | |
a0aa44b4 AB |
165 | __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); \ |
166 | _old; \ | |
167 | }) | |
168 | ||
169 | /* Provide shorter names for GCC atomic builtins, return old value */ | |
170 | #define atomic_fetch_inc(ptr) __atomic_fetch_add(ptr, 1, __ATOMIC_SEQ_CST) | |
171 | #define atomic_fetch_dec(ptr) __atomic_fetch_sub(ptr, 1, __ATOMIC_SEQ_CST) | |
172 | #define atomic_fetch_add(ptr, n) __atomic_fetch_add(ptr, n, __ATOMIC_SEQ_CST) | |
173 | #define atomic_fetch_sub(ptr, n) __atomic_fetch_sub(ptr, n, __ATOMIC_SEQ_CST) | |
174 | #define atomic_fetch_and(ptr, n) __atomic_fetch_and(ptr, n, __ATOMIC_SEQ_CST) | |
175 | #define atomic_fetch_or(ptr, n) __atomic_fetch_or(ptr, n, __ATOMIC_SEQ_CST) | |
176 | ||
177 | /* And even shorter names that return void. */ | |
178 | #define atomic_inc(ptr) ((void) __atomic_fetch_add(ptr, 1, __ATOMIC_SEQ_CST)) | |
179 | #define atomic_dec(ptr) ((void) __atomic_fetch_sub(ptr, 1, __ATOMIC_SEQ_CST)) | |
180 | #define atomic_add(ptr, n) ((void) __atomic_fetch_add(ptr, n, __ATOMIC_SEQ_CST)) | |
181 | #define atomic_sub(ptr, n) ((void) __atomic_fetch_sub(ptr, n, __ATOMIC_SEQ_CST)) | |
182 | #define atomic_and(ptr, n) ((void) __atomic_fetch_and(ptr, n, __ATOMIC_SEQ_CST)) | |
183 | #define atomic_or(ptr, n) ((void) __atomic_fetch_or(ptr, n, __ATOMIC_SEQ_CST)) | |
184 | ||
185 | #else /* __ATOMIC_RELAXED */ | |
52e850de | 186 | |
a281ebc1 | 187 | /* |
5444e768 PB |
188 | * We use GCC builtin if it's available, as that can use mfence on |
189 | * 32-bit as well, e.g. if built with -march=pentium-m. However, on | |
190 | * i386 the spec is buggy, and the implementation followed it until | |
191 | * 4.3 (http://gcc.gnu.org/bugzilla/show_bug.cgi?id=36793). | |
192 | */ | |
193 | #if defined(__i386__) || defined(__x86_64__) | |
194 | #if !QEMU_GNUC_PREREQ(4, 4) | |
195 | #if defined __x86_64__ | |
196 | #define smp_mb() ({ asm volatile("mfence" ::: "memory"); (void)0; }) | |
a281ebc1 | 197 | #else |
5444e768 PB |
198 | #define smp_mb() ({ asm volatile("lock; addl $0,0(%%esp) " ::: "memory"); (void)0; }) |
199 | #endif | |
200 | #endif | |
201 | #endif | |
202 | ||
203 | ||
204 | #ifdef __alpha__ | |
205 | #define smp_read_barrier_depends() asm volatile("mb":::"memory") | |
a281ebc1 MT |
206 | #endif |
207 | ||
5444e768 | 208 | #if defined(__i386__) || defined(__x86_64__) || defined(__s390x__) |
a281ebc1 | 209 | |
5444e768 PB |
210 | /* |
211 | * Because of the strongly ordered storage model, wmb() and rmb() are nops | |
212 | * here (a compiler barrier only). QEMU doesn't do accesses to write-combining | |
213 | * qemu memory or non-temporal load/stores from C code. | |
214 | */ | |
f1ee8696 PB |
215 | #define smp_mb_release() barrier() |
216 | #define smp_mb_acquire() barrier() | |
5444e768 PB |
217 | |
218 | /* | |
219 | * __sync_lock_test_and_set() is documented to be an acquire barrier only, | |
220 | * but it is a full barrier at the hardware level. Add a compiler barrier | |
221 | * to make it a full barrier also at the compiler level. | |
222 | */ | |
223 | #define atomic_xchg(ptr, i) (barrier(), __sync_lock_test_and_set(ptr, i)) | |
224 | ||
463ce4ae | 225 | #elif defined(_ARCH_PPC) |
e2251708 DG |
226 | |
227 | /* | |
a281ebc1 | 228 | * We use an eieio() for wmb() on powerpc. This assumes we don't |
e2251708 | 229 | * need to order cacheable and non-cacheable stores with respect to |
5444e768 PB |
230 | * each other. |
231 | * | |
232 | * smp_mb has the same problem as on x86 for not-very-new GCC | |
233 | * (http://patchwork.ozlabs.org/patch/126184/, Nov 2011). | |
e2251708 | 234 | */ |
f1ee8696 | 235 | #define smp_wmb() ({ asm volatile("eieio" ::: "memory"); (void)0; }) |
a821ce59 | 236 | #if defined(__powerpc64__) |
f1ee8696 PB |
237 | #define smp_mb_release() ({ asm volatile("lwsync" ::: "memory"); (void)0; }) |
238 | #define smp_mb_acquire() ({ asm volatile("lwsync" ::: "memory"); (void)0; }) | |
a821ce59 | 239 | #else |
f1ee8696 PB |
240 | #define smp_mb_release() ({ asm volatile("sync" ::: "memory"); (void)0; }) |
241 | #define smp_mb_acquire() ({ asm volatile("sync" ::: "memory"); (void)0; }) | |
a821ce59 | 242 | #endif |
f1ee8696 | 243 | #define smp_mb() ({ asm volatile("sync" ::: "memory"); (void)0; }) |
a821ce59 | 244 | |
5444e768 | 245 | #endif /* _ARCH_PPC */ |
e2251708 | 246 | |
e2251708 DG |
247 | /* |
248 | * For (host) platforms we don't have explicit barrier definitions | |
249 | * for, we use the gcc __sync_synchronize() primitive to generate a | |
250 | * full barrier. This should be safe on all platforms, though it may | |
f1ee8696 | 251 | * be overkill for smp_mb_acquire() and smp_mb_release(). |
e2251708 | 252 | */ |
5444e768 | 253 | #ifndef smp_mb |
f1ee8696 | 254 | #define smp_mb() __sync_synchronize() |
5444e768 PB |
255 | #endif |
256 | ||
f1ee8696 PB |
257 | #ifndef smp_mb_acquire |
258 | #define smp_mb_acquire() __sync_synchronize() | |
5444e768 | 259 | #endif |
5444e768 | 260 | |
f1ee8696 PB |
261 | #ifndef smp_mb_release |
262 | #define smp_mb_release() __sync_synchronize() | |
5444e768 | 263 | #endif |
5444e768 PB |
264 | |
265 | #ifndef smp_read_barrier_depends | |
5444e768 PB |
266 | #define smp_read_barrier_depends() barrier() |
267 | #endif | |
e2251708 | 268 | |
a0aa44b4 AB |
269 | /* These will only be atomic if the processor does the fetch or store |
270 | * in a single issue memory operation | |
271 | */ | |
2cbcfb28 | 272 | #define atomic_read(ptr) (*(__typeof__(*ptr) volatile*) (ptr)) |
2cbcfb28 | 273 | #define atomic_set(ptr, i) ((*(__typeof__(*ptr) volatile*) (ptr)) = (i)) |
5444e768 | 274 | |
7911747b PB |
275 | /** |
276 | * atomic_rcu_read - reads a RCU-protected pointer to a local variable | |
277 | * into a RCU read-side critical section. The pointer can later be safely | |
278 | * dereferenced within the critical section. | |
279 | * | |
280 | * This ensures that the pointer copy is invariant thorough the whole critical | |
281 | * section. | |
282 | * | |
283 | * Inserts memory barriers on architectures that require them (currently only | |
284 | * Alpha) and documents which pointers are protected by RCU. | |
285 | * | |
a0aa44b4 AB |
286 | * atomic_rcu_read also includes a compiler barrier to ensure that |
287 | * value-speculative optimizations (e.g. VSS: Value Speculation | |
288 | * Scheduling) does not perform the data read before the pointer read | |
289 | * by speculating the value of the pointer. | |
7911747b PB |
290 | * |
291 | * Should match atomic_rcu_set(), atomic_xchg(), atomic_cmpxchg(). | |
292 | */ | |
7911747b PB |
293 | #define atomic_rcu_read(ptr) ({ \ |
294 | typeof(*ptr) _val = atomic_read(ptr); \ | |
295 | smp_read_barrier_depends(); \ | |
296 | _val; \ | |
297 | }) | |
7911747b PB |
298 | |
299 | /** | |
300 | * atomic_rcu_set - assigns (publicizes) a pointer to a new data structure | |
301 | * meant to be read by RCU read-side critical sections. | |
302 | * | |
303 | * Documents which pointers will be dereferenced by RCU read-side critical | |
304 | * sections and adds the required memory barriers on architectures requiring | |
305 | * them. It also makes sure the compiler does not reorder code initializing the | |
306 | * data structure before its publication. | |
307 | * | |
308 | * Should match atomic_rcu_read(). | |
309 | */ | |
7911747b PB |
310 | #define atomic_rcu_set(ptr, i) do { \ |
311 | smp_wmb(); \ | |
312 | atomic_set(ptr, i); \ | |
313 | } while (0) | |
7911747b | 314 | |
803cf26a | 315 | #define atomic_load_acquire(ptr) ({ \ |
5444e768 | 316 | typeof(*ptr) _val = atomic_read(ptr); \ |
f1ee8696 | 317 | smp_mb_acquire(); \ |
5444e768 PB |
318 | _val; \ |
319 | }) | |
5444e768 | 320 | |
803cf26a | 321 | #define atomic_store_release(ptr, i) do { \ |
f1ee8696 | 322 | smp_mb_release(); \ |
5444e768 | 323 | atomic_set(ptr, i); \ |
5444e768 | 324 | } while (0) |
5444e768 PB |
325 | |
326 | #ifndef atomic_xchg | |
33effd3a PM |
327 | #if defined(__clang__) |
328 | #define atomic_xchg(ptr, i) __sync_swap(ptr, i) | |
5444e768 PB |
329 | #else |
330 | /* __sync_lock_test_and_set() is documented to be an acquire barrier only. */ | |
331 | #define atomic_xchg(ptr, i) (smp_mb(), __sync_lock_test_and_set(ptr, i)) | |
332 | #endif | |
333 | #endif | |
334 | ||
335 | /* Provide shorter names for GCC atomic builtins. */ | |
336 | #define atomic_fetch_inc(ptr) __sync_fetch_and_add(ptr, 1) | |
337 | #define atomic_fetch_dec(ptr) __sync_fetch_and_add(ptr, -1) | |
338 | #define atomic_fetch_add __sync_fetch_and_add | |
339 | #define atomic_fetch_sub __sync_fetch_and_sub | |
340 | #define atomic_fetch_and __sync_fetch_and_and | |
341 | #define atomic_fetch_or __sync_fetch_and_or | |
342 | #define atomic_cmpxchg __sync_val_compare_and_swap | |
343 | ||
344 | /* And even shorter names that return void. */ | |
345 | #define atomic_inc(ptr) ((void) __sync_fetch_and_add(ptr, 1)) | |
346 | #define atomic_dec(ptr) ((void) __sync_fetch_and_add(ptr, -1)) | |
347 | #define atomic_add(ptr, n) ((void) __sync_fetch_and_add(ptr, n)) | |
348 | #define atomic_sub(ptr, n) ((void) __sync_fetch_and_sub(ptr, n)) | |
349 | #define atomic_and(ptr, n) ((void) __sync_fetch_and_and(ptr, n)) | |
350 | #define atomic_or(ptr, n) ((void) __sync_fetch_and_or(ptr, n)) | |
351 | ||
a0aa44b4 | 352 | #endif /* __ATOMIC_RELAXED */ |
f1ee8696 PB |
353 | |
354 | #ifndef smp_wmb | |
355 | #define smp_wmb() smp_mb_release() | |
356 | #endif | |
357 | #ifndef smp_rmb | |
358 | #define smp_rmb() smp_mb_acquire() | |
359 | #endif | |
360 | ||
803cf26a PB |
361 | /* This is more efficient than a store plus a fence. */ |
362 | #if !defined(__SANITIZE_THREAD__) | |
363 | #if defined(__i386__) || defined(__x86_64__) || defined(__s390x__) | |
364 | #define atomic_mb_set(ptr, i) ((void)atomic_xchg(ptr, i)) | |
365 | #endif | |
366 | #endif | |
367 | ||
368 | /* atomic_mb_read/set semantics map Java volatile variables. They are | |
369 | * less expensive on some platforms (notably POWER) than fully | |
370 | * sequentially consistent operations. | |
371 | * | |
372 | * As long as they are used as paired operations they are safe to | |
373 | * use. See docs/atomic.txt for more discussion. | |
374 | */ | |
375 | ||
376 | #ifndef atomic_mb_read | |
377 | #define atomic_mb_read(ptr) \ | |
378 | atomic_load_acquire(ptr) | |
379 | #endif | |
380 | ||
381 | #ifndef atomic_mb_set | |
382 | #define atomic_mb_set(ptr, i) do { \ | |
383 | atomic_store_release(ptr, i); \ | |
384 | smp_mb(); \ | |
385 | } while(0) | |
386 | #endif | |
387 | ||
2a6a4076 | 388 | #endif /* QEMU_ATOMIC_H */ |