1 /* SPDX-License-Identifier: GPL-2.0-only */
3 * Copyright (C) 2014 Regents of the University of California
6 #ifndef _ASM_RISCV_CMPXCHG_H
7 #define _ASM_RISCV_CMPXCHG_H
11 #include <asm/alternative-macros.h>
12 #include <asm/fence.h>
13 #include <asm/hwcap.h>
14 #include <asm/insn-def.h>
15 #include <asm/cpufeature-macros.h>
17 #define __arch_xchg_masked(sc_sfx, swap_sfx, prepend, sc_append, \
18 swap_append, r, p, n) \
20 if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) && \
21 riscv_has_extension_unlikely(RISCV_ISA_EXT_ZABHA)) { \
22 __asm__ __volatile__ ( \
24 " amoswap" swap_sfx " %0, %z2, %1\n" \
26 : "=&r" (r), "+A" (*(p)) \
30 u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \
31 ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \
32 ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \
34 ulong __newx = (ulong)(n) << __s; \
38 __asm__ __volatile__ ( \
41 " and %1, %0, %z4\n" \
43 " sc.w" sc_sfx " %1, %1, %2\n" \
46 : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \
47 : "rJ" (__newx), "rJ" (~__mask) \
50 r = (__typeof__(*(p)))((__retx & __mask) >> __s); \
54 #define __arch_xchg(sfx, prepend, append, r, p, n) \
56 __asm__ __volatile__ ( \
58 " amoswap" sfx " %0, %2, %1\n" \
60 : "=r" (r), "+A" (*(p)) \
65 #define _arch_xchg(ptr, new, sc_sfx, swap_sfx, prepend, \
66 sc_append, swap_append) \
68 __typeof__(ptr) __ptr = (ptr); \
69 __typeof__(*(__ptr)) __new = (new); \
70 __typeof__(*(__ptr)) __ret; \
72 switch (sizeof(*__ptr)) { \
74 __arch_xchg_masked(sc_sfx, ".b" swap_sfx, \
75 prepend, sc_append, swap_append, \
76 __ret, __ptr, __new); \
79 __arch_xchg_masked(sc_sfx, ".h" swap_sfx, \
80 prepend, sc_append, swap_append, \
81 __ret, __ptr, __new); \
84 __arch_xchg(".w" swap_sfx, prepend, swap_append, \
85 __ret, __ptr, __new); \
88 __arch_xchg(".d" swap_sfx, prepend, swap_append, \
89 __ret, __ptr, __new); \
94 (__typeof__(*(__ptr)))__ret; \
97 #define arch_xchg_relaxed(ptr, x) \
98 _arch_xchg(ptr, x, "", "", "", "", "")
100 #define arch_xchg_acquire(ptr, x) \
101 _arch_xchg(ptr, x, "", "", "", \
102 RISCV_ACQUIRE_BARRIER, RISCV_ACQUIRE_BARRIER)
104 #define arch_xchg_release(ptr, x) \
105 _arch_xchg(ptr, x, "", "", RISCV_RELEASE_BARRIER, "", "")
107 #define arch_xchg(ptr, x) \
108 _arch_xchg(ptr, x, ".rl", ".aqrl", "", RISCV_FULL_BARRIER, "")
110 #define xchg32(ptr, x) \
112 BUILD_BUG_ON(sizeof(*(ptr)) != 4); \
113 arch_xchg((ptr), (x)); \
116 #define xchg64(ptr, x) \
118 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
119 arch_xchg((ptr), (x)); \
123 * Atomic compare and exchange. Compare OLD with MEM, if identical,
124 * store NEW in MEM. Return the initial value in MEM. Success is
125 * indicated by comparing RETURN with OLD.
127 #define __arch_cmpxchg_masked(sc_sfx, cas_sfx, \
128 sc_prepend, sc_append, \
129 cas_prepend, cas_append, \
132 if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) && \
133 IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) && \
134 riscv_has_extension_unlikely(RISCV_ISA_EXT_ZABHA) && \
135 riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS)) { \
138 __asm__ __volatile__ ( \
140 " amocas" cas_sfx " %0, %z2, %1\n" \
142 : "+&r" (r), "+A" (*(p)) \
146 u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \
147 ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \
148 ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \
150 ulong __newx = (ulong)(n) << __s; \
151 ulong __oldx = (ulong)(o) << __s; \
155 __asm__ __volatile__ ( \
158 " and %1, %0, %z5\n" \
159 " bne %1, %z3, 1f\n" \
160 " and %1, %0, %z6\n" \
161 " or %1, %1, %z4\n" \
162 " sc.w" sc_sfx " %1, %1, %2\n" \
166 : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \
167 : "rJ" ((long)__oldx), "rJ" (__newx), \
168 "rJ" (__mask), "rJ" (~__mask) \
171 r = (__typeof__(*(p)))((__retx & __mask) >> __s); \
175 #define __arch_cmpxchg(lr_sfx, sc_sfx, cas_sfx, \
176 sc_prepend, sc_append, \
177 cas_prepend, cas_append, \
180 if (IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) && \
181 riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS)) { \
184 __asm__ __volatile__ ( \
186 " amocas" cas_sfx " %0, %z2, %1\n" \
188 : "+&r" (r), "+A" (*(p)) \
192 register unsigned int __rc; \
194 __asm__ __volatile__ ( \
196 "0: lr" lr_sfx " %0, %2\n" \
197 " bne %0, %z3, 1f\n" \
198 " sc" sc_sfx " %1, %z4, %2\n" \
202 : "=&r" (r), "=&r" (__rc), "+A" (*(p)) \
203 : "rJ" (co o), "rJ" (n) \
208 #define _arch_cmpxchg(ptr, old, new, sc_sfx, cas_sfx, \
209 sc_prepend, sc_append, \
210 cas_prepend, cas_append) \
212 __typeof__(ptr) __ptr = (ptr); \
213 __typeof__(*(__ptr)) __old = (old); \
214 __typeof__(*(__ptr)) __new = (new); \
215 __typeof__(*(__ptr)) __ret; \
217 switch (sizeof(*__ptr)) { \
219 __arch_cmpxchg_masked(sc_sfx, ".b" cas_sfx, \
220 sc_prepend, sc_append, \
221 cas_prepend, cas_append, \
222 __ret, __ptr, __old, __new); \
225 __arch_cmpxchg_masked(sc_sfx, ".h" cas_sfx, \
226 sc_prepend, sc_append, \
227 cas_prepend, cas_append, \
228 __ret, __ptr, __old, __new); \
231 __arch_cmpxchg(".w", ".w" sc_sfx, ".w" cas_sfx, \
232 sc_prepend, sc_append, \
233 cas_prepend, cas_append, \
234 __ret, __ptr, (long), __old, __new); \
237 __arch_cmpxchg(".d", ".d" sc_sfx, ".d" cas_sfx, \
238 sc_prepend, sc_append, \
239 cas_prepend, cas_append, \
240 __ret, __ptr, /**/, __old, __new); \
245 (__typeof__(*(__ptr)))__ret; \
249 * These macros are here to improve the readability of the arch_cmpxchg_XXX()
254 #define SC_PREPEND(x) x
255 #define SC_APPEND(x) x
256 #define CAS_PREPEND(x) x
257 #define CAS_APPEND(x) x
259 #define arch_cmpxchg_relaxed(ptr, o, n) \
260 _arch_cmpxchg((ptr), (o), (n), \
261 SC_SFX(""), CAS_SFX(""), \
262 SC_PREPEND(""), SC_APPEND(""), \
263 CAS_PREPEND(""), CAS_APPEND(""))
265 #define arch_cmpxchg_acquire(ptr, o, n) \
266 _arch_cmpxchg((ptr), (o), (n), \
267 SC_SFX(""), CAS_SFX(""), \
268 SC_PREPEND(""), SC_APPEND(RISCV_ACQUIRE_BARRIER), \
269 CAS_PREPEND(""), CAS_APPEND(RISCV_ACQUIRE_BARRIER))
271 #define arch_cmpxchg_release(ptr, o, n) \
272 _arch_cmpxchg((ptr), (o), (n), \
273 SC_SFX(""), CAS_SFX(""), \
274 SC_PREPEND(RISCV_RELEASE_BARRIER), SC_APPEND(""), \
275 CAS_PREPEND(RISCV_RELEASE_BARRIER), CAS_APPEND(""))
277 #define arch_cmpxchg(ptr, o, n) \
278 _arch_cmpxchg((ptr), (o), (n), \
279 SC_SFX(".rl"), CAS_SFX(".aqrl"), \
280 SC_PREPEND(""), SC_APPEND(RISCV_FULL_BARRIER), \
281 CAS_PREPEND(""), CAS_APPEND(""))
283 #define arch_cmpxchg_local(ptr, o, n) \
284 arch_cmpxchg_relaxed((ptr), (o), (n))
286 #define arch_cmpxchg64(ptr, o, n) \
288 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
289 arch_cmpxchg((ptr), (o), (n)); \
292 #define arch_cmpxchg64_local(ptr, o, n) \
294 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
295 arch_cmpxchg_relaxed((ptr), (o), (n)); \
298 #define arch_cmpxchg64_relaxed(ptr, o, n) \
300 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
301 arch_cmpxchg_relaxed((ptr), (o), (n)); \
304 #define arch_cmpxchg64_acquire(ptr, o, n) \
306 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
307 arch_cmpxchg_acquire((ptr), (o), (n)); \
310 #define arch_cmpxchg64_release(ptr, o, n) \
312 BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
313 arch_cmpxchg_release((ptr), (o), (n)); \
316 #if defined(CONFIG_64BIT) && defined(CONFIG_RISCV_ISA_ZACAS)
318 #define system_has_cmpxchg128() riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS)
320 union __u128_halves {
327 #define __arch_cmpxchg128(p, o, n, cas_sfx) \
329 __typeof__(*(p)) __o = (o); \
330 union __u128_halves __hn = { .full = (n) }; \
331 union __u128_halves __ho = { .full = (__o) }; \
332 register unsigned long t1 asm ("t1") = __hn.low; \
333 register unsigned long t2 asm ("t2") = __hn.high; \
334 register unsigned long t3 asm ("t3") = __ho.low; \
335 register unsigned long t4 asm ("t4") = __ho.high; \
337 __asm__ __volatile__ ( \
338 " amocas.q" cas_sfx " %0, %z3, %2" \
339 : "+&r" (t3), "+&r" (t4), "+A" (*(p)) \
340 : "rJ" (t1), "rJ" (t2) \
343 ((u128)t4 << 64) | t3; \
346 #define arch_cmpxchg128(ptr, o, n) \
347 __arch_cmpxchg128((ptr), (o), (n), ".aqrl")
349 #define arch_cmpxchg128_local(ptr, o, n) \
350 __arch_cmpxchg128((ptr), (o), (n), "")
352 #endif /* CONFIG_64BIT && CONFIG_RISCV_ISA_ZACAS */
354 #ifdef CONFIG_RISCV_ISA_ZAWRS
356 * Despite wrs.nto being "WRS-with-no-timeout", in the absence of changes to
357 * @val we expect it to still terminate within a "reasonable" amount of time
358 * for an implementation-specific other reason, a pending, locally-enabled
359 * interrupt, or because it has been configured to raise an illegal
360 * instruction exception.
362 static __always_inline void __cmpwait(volatile void *ptr,
368 asm goto(ALTERNATIVE("j %l[no_zawrs]", "nop",
369 0, RISCV_ISA_EXT_ZAWRS, 1)
376 /* RISC-V doesn't have lr instructions on byte and half-word. */
385 : "=&r" (tmp), "+A" (*(u32 *)ptr)
388 #if __riscv_xlen == 64
396 : "=&r" (tmp), "+A" (*(u64 *)ptr)
407 asm volatile(RISCV_PAUSE : : : "memory");
410 #define __cmpwait_relaxed(ptr, val) \
411 __cmpwait((ptr), (unsigned long)(val), sizeof(*(ptr)))
414 #endif /* _ASM_RISCV_CMPXCHG_H */