]>
Commit | Line | Data |
---|---|---|
51dee5e4 PB |
1 | /* |
2 | * QemuLockCnt implementation | |
3 | * | |
4 | * Copyright Red Hat, Inc. 2017 | |
5 | * | |
6 | * Author: | |
7 | * Paolo Bonzini <[email protected]> | |
8 | */ | |
9 | #include "qemu/osdep.h" | |
10 | #include "qemu/thread.h" | |
11 | #include "qemu/atomic.h" | |
fbcc3e50 | 12 | #include "trace.h" |
51dee5e4 | 13 | |
fbcc3e50 PB |
14 | #ifdef CONFIG_LINUX |
15 | #include "qemu/futex.h" | |
16 | ||
17 | /* On Linux, bits 0-1 are a futex-based lock, bits 2-31 are the counter. | |
18 | * For the mutex algorithm see Ulrich Drepper's "Futexes Are Tricky" (ok, | |
19 | * this is not the most relaxing citation I could make...). It is similar | |
20 | * to mutex2 in the paper. | |
21 | */ | |
22 | ||
23 | #define QEMU_LOCKCNT_STATE_MASK 3 | |
24 | #define QEMU_LOCKCNT_STATE_FREE 0 /* free, uncontended */ | |
25 | #define QEMU_LOCKCNT_STATE_LOCKED 1 /* locked, uncontended */ | |
26 | #define QEMU_LOCKCNT_STATE_WAITING 2 /* locked, contended */ | |
27 | ||
28 | #define QEMU_LOCKCNT_COUNT_STEP 4 | |
29 | #define QEMU_LOCKCNT_COUNT_SHIFT 2 | |
30 | ||
31 | void qemu_lockcnt_init(QemuLockCnt *lockcnt) | |
32 | { | |
33 | lockcnt->count = 0; | |
34 | } | |
35 | ||
36 | void qemu_lockcnt_destroy(QemuLockCnt *lockcnt) | |
37 | { | |
38 | } | |
39 | ||
40 | /* *val is the current value of lockcnt->count. | |
41 | * | |
42 | * If the lock is free, try a cmpxchg from *val to new_if_free; return | |
43 | * true and set *val to the old value found by the cmpxchg in | |
44 | * lockcnt->count. | |
45 | * | |
46 | * If the lock is taken, wait for it to be released and return false | |
47 | * *without trying again to take the lock*. Again, set *val to the | |
48 | * new value of lockcnt->count. | |
49 | * | |
50 | * If *waited is true on return, new_if_free's bottom two bits must not | |
51 | * be QEMU_LOCKCNT_STATE_LOCKED on subsequent calls, because the caller | |
52 | * does not know if there are other waiters. Furthermore, after *waited | |
53 | * is set the caller has effectively acquired the lock. If it returns | |
54 | * with the lock not taken, it must wake another futex waiter. | |
55 | */ | |
56 | static bool qemu_lockcnt_cmpxchg_or_wait(QemuLockCnt *lockcnt, int *val, | |
57 | int new_if_free, bool *waited) | |
58 | { | |
59 | /* Fast path for when the lock is free. */ | |
60 | if ((*val & QEMU_LOCKCNT_STATE_MASK) == QEMU_LOCKCNT_STATE_FREE) { | |
61 | int expected = *val; | |
62 | ||
63 | trace_lockcnt_fast_path_attempt(lockcnt, expected, new_if_free); | |
d73415a3 | 64 | *val = qatomic_cmpxchg(&lockcnt->count, expected, new_if_free); |
fbcc3e50 PB |
65 | if (*val == expected) { |
66 | trace_lockcnt_fast_path_success(lockcnt, expected, new_if_free); | |
67 | *val = new_if_free; | |
68 | return true; | |
69 | } | |
70 | } | |
71 | ||
72 | /* The slow path moves from locked to waiting if necessary, then | |
73 | * does a futex wait. Both steps can be repeated ad nauseam, | |
74 | * only getting out of the loop if we can have another shot at the | |
75 | * fast path. Once we can, get out to compute the new destination | |
76 | * value for the fast path. | |
77 | */ | |
78 | while ((*val & QEMU_LOCKCNT_STATE_MASK) != QEMU_LOCKCNT_STATE_FREE) { | |
79 | if ((*val & QEMU_LOCKCNT_STATE_MASK) == QEMU_LOCKCNT_STATE_LOCKED) { | |
80 | int expected = *val; | |
81 | int new = expected - QEMU_LOCKCNT_STATE_LOCKED + QEMU_LOCKCNT_STATE_WAITING; | |
82 | ||
83 | trace_lockcnt_futex_wait_prepare(lockcnt, expected, new); | |
d73415a3 | 84 | *val = qatomic_cmpxchg(&lockcnt->count, expected, new); |
fbcc3e50 PB |
85 | if (*val == expected) { |
86 | *val = new; | |
87 | } | |
88 | continue; | |
89 | } | |
90 | ||
91 | if ((*val & QEMU_LOCKCNT_STATE_MASK) == QEMU_LOCKCNT_STATE_WAITING) { | |
92 | *waited = true; | |
93 | trace_lockcnt_futex_wait(lockcnt, *val); | |
94 | qemu_futex_wait(&lockcnt->count, *val); | |
d73415a3 | 95 | *val = qatomic_read(&lockcnt->count); |
fbcc3e50 PB |
96 | trace_lockcnt_futex_wait_resume(lockcnt, *val); |
97 | continue; | |
98 | } | |
99 | ||
100 | abort(); | |
101 | } | |
102 | return false; | |
103 | } | |
104 | ||
105 | static void lockcnt_wake(QemuLockCnt *lockcnt) | |
106 | { | |
107 | trace_lockcnt_futex_wake(lockcnt); | |
108 | qemu_futex_wake(&lockcnt->count, 1); | |
109 | } | |
110 | ||
111 | void qemu_lockcnt_inc(QemuLockCnt *lockcnt) | |
112 | { | |
d73415a3 | 113 | int val = qatomic_read(&lockcnt->count); |
fbcc3e50 PB |
114 | bool waited = false; |
115 | ||
116 | for (;;) { | |
117 | if (val >= QEMU_LOCKCNT_COUNT_STEP) { | |
118 | int expected = val; | |
d73415a3 SH |
119 | val = qatomic_cmpxchg(&lockcnt->count, val, |
120 | val + QEMU_LOCKCNT_COUNT_STEP); | |
fbcc3e50 PB |
121 | if (val == expected) { |
122 | break; | |
123 | } | |
124 | } else { | |
125 | /* The fast path is (0, unlocked)->(1, unlocked). */ | |
126 | if (qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, QEMU_LOCKCNT_COUNT_STEP, | |
127 | &waited)) { | |
128 | break; | |
129 | } | |
130 | } | |
131 | } | |
132 | ||
133 | /* If we were woken by another thread, we should also wake one because | |
134 | * we are effectively releasing the lock that was given to us. This is | |
135 | * the case where qemu_lockcnt_lock would leave QEMU_LOCKCNT_STATE_WAITING | |
136 | * in the low bits, and qemu_lockcnt_inc_and_unlock would find it and | |
137 | * wake someone. | |
138 | */ | |
139 | if (waited) { | |
140 | lockcnt_wake(lockcnt); | |
141 | } | |
142 | } | |
143 | ||
144 | void qemu_lockcnt_dec(QemuLockCnt *lockcnt) | |
145 | { | |
d73415a3 | 146 | qatomic_sub(&lockcnt->count, QEMU_LOCKCNT_COUNT_STEP); |
fbcc3e50 PB |
147 | } |
148 | ||
149 | /* Decrement a counter, and return locked if it is decremented to zero. | |
150 | * If the function returns true, it is impossible for the counter to | |
151 | * become nonzero until the next qemu_lockcnt_unlock. | |
152 | */ | |
153 | bool qemu_lockcnt_dec_and_lock(QemuLockCnt *lockcnt) | |
154 | { | |
d73415a3 | 155 | int val = qatomic_read(&lockcnt->count); |
fbcc3e50 PB |
156 | int locked_state = QEMU_LOCKCNT_STATE_LOCKED; |
157 | bool waited = false; | |
158 | ||
159 | for (;;) { | |
160 | if (val >= 2 * QEMU_LOCKCNT_COUNT_STEP) { | |
161 | int expected = val; | |
d73415a3 SH |
162 | val = qatomic_cmpxchg(&lockcnt->count, val, |
163 | val - QEMU_LOCKCNT_COUNT_STEP); | |
fbcc3e50 PB |
164 | if (val == expected) { |
165 | break; | |
166 | } | |
167 | } else { | |
168 | /* If count is going 1->0, take the lock. The fast path is | |
169 | * (1, unlocked)->(0, locked) or (1, unlocked)->(0, waiting). | |
170 | */ | |
171 | if (qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, locked_state, &waited)) { | |
172 | return true; | |
173 | } | |
174 | ||
175 | if (waited) { | |
176 | /* At this point we do not know if there are more waiters. Assume | |
177 | * there are. | |
178 | */ | |
179 | locked_state = QEMU_LOCKCNT_STATE_WAITING; | |
180 | } | |
181 | } | |
182 | } | |
183 | ||
184 | /* If we were woken by another thread, but we're returning in unlocked | |
185 | * state, we should also wake a thread because we are effectively | |
186 | * releasing the lock that was given to us. This is the case where | |
187 | * qemu_lockcnt_lock would leave QEMU_LOCKCNT_STATE_WAITING in the low | |
188 | * bits, and qemu_lockcnt_unlock would find it and wake someone. | |
189 | */ | |
190 | if (waited) { | |
191 | lockcnt_wake(lockcnt); | |
192 | } | |
193 | return false; | |
194 | } | |
195 | ||
196 | /* If the counter is one, decrement it and return locked. Otherwise do | |
197 | * nothing. | |
198 | * | |
199 | * If the function returns true, it is impossible for the counter to | |
200 | * become nonzero until the next qemu_lockcnt_unlock. | |
201 | */ | |
202 | bool qemu_lockcnt_dec_if_lock(QemuLockCnt *lockcnt) | |
203 | { | |
d73415a3 | 204 | int val = qatomic_read(&lockcnt->count); |
fbcc3e50 PB |
205 | int locked_state = QEMU_LOCKCNT_STATE_LOCKED; |
206 | bool waited = false; | |
207 | ||
208 | while (val < 2 * QEMU_LOCKCNT_COUNT_STEP) { | |
209 | /* If count is going 1->0, take the lock. The fast path is | |
210 | * (1, unlocked)->(0, locked) or (1, unlocked)->(0, waiting). | |
211 | */ | |
212 | if (qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, locked_state, &waited)) { | |
213 | return true; | |
214 | } | |
215 | ||
216 | if (waited) { | |
217 | /* At this point we do not know if there are more waiters. Assume | |
218 | * there are. | |
219 | */ | |
220 | locked_state = QEMU_LOCKCNT_STATE_WAITING; | |
221 | } | |
222 | } | |
223 | ||
224 | /* If we were woken by another thread, but we're returning in unlocked | |
225 | * state, we should also wake a thread because we are effectively | |
226 | * releasing the lock that was given to us. This is the case where | |
227 | * qemu_lockcnt_lock would leave QEMU_LOCKCNT_STATE_WAITING in the low | |
228 | * bits, and qemu_lockcnt_inc_and_unlock would find it and wake someone. | |
229 | */ | |
230 | if (waited) { | |
231 | lockcnt_wake(lockcnt); | |
232 | } | |
233 | return false; | |
234 | } | |
235 | ||
236 | void qemu_lockcnt_lock(QemuLockCnt *lockcnt) | |
237 | { | |
d73415a3 | 238 | int val = qatomic_read(&lockcnt->count); |
fbcc3e50 PB |
239 | int step = QEMU_LOCKCNT_STATE_LOCKED; |
240 | bool waited = false; | |
241 | ||
242 | /* The third argument is only used if the low bits of val are 0 | |
243 | * (QEMU_LOCKCNT_STATE_FREE), so just blindly mix in the desired | |
244 | * state. | |
245 | */ | |
246 | while (!qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, val + step, &waited)) { | |
247 | if (waited) { | |
248 | /* At this point we do not know if there are more waiters. Assume | |
249 | * there are. | |
250 | */ | |
251 | step = QEMU_LOCKCNT_STATE_WAITING; | |
252 | } | |
253 | } | |
254 | } | |
255 | ||
256 | void qemu_lockcnt_inc_and_unlock(QemuLockCnt *lockcnt) | |
257 | { | |
258 | int expected, new, val; | |
259 | ||
d73415a3 | 260 | val = qatomic_read(&lockcnt->count); |
fbcc3e50 PB |
261 | do { |
262 | expected = val; | |
263 | new = (val + QEMU_LOCKCNT_COUNT_STEP) & ~QEMU_LOCKCNT_STATE_MASK; | |
264 | trace_lockcnt_unlock_attempt(lockcnt, val, new); | |
d73415a3 | 265 | val = qatomic_cmpxchg(&lockcnt->count, val, new); |
fbcc3e50 PB |
266 | } while (val != expected); |
267 | ||
268 | trace_lockcnt_unlock_success(lockcnt, val, new); | |
269 | if (val & QEMU_LOCKCNT_STATE_WAITING) { | |
270 | lockcnt_wake(lockcnt); | |
271 | } | |
272 | } | |
273 | ||
274 | void qemu_lockcnt_unlock(QemuLockCnt *lockcnt) | |
275 | { | |
276 | int expected, new, val; | |
277 | ||
d73415a3 | 278 | val = qatomic_read(&lockcnt->count); |
fbcc3e50 PB |
279 | do { |
280 | expected = val; | |
281 | new = val & ~QEMU_LOCKCNT_STATE_MASK; | |
282 | trace_lockcnt_unlock_attempt(lockcnt, val, new); | |
d73415a3 | 283 | val = qatomic_cmpxchg(&lockcnt->count, val, new); |
fbcc3e50 PB |
284 | } while (val != expected); |
285 | ||
286 | trace_lockcnt_unlock_success(lockcnt, val, new); | |
287 | if (val & QEMU_LOCKCNT_STATE_WAITING) { | |
288 | lockcnt_wake(lockcnt); | |
289 | } | |
290 | } | |
291 | ||
292 | unsigned qemu_lockcnt_count(QemuLockCnt *lockcnt) | |
293 | { | |
d73415a3 | 294 | return qatomic_read(&lockcnt->count) >> QEMU_LOCKCNT_COUNT_SHIFT; |
fbcc3e50 PB |
295 | } |
296 | #else | |
51dee5e4 PB |
297 | void qemu_lockcnt_init(QemuLockCnt *lockcnt) |
298 | { | |
299 | qemu_mutex_init(&lockcnt->mutex); | |
300 | lockcnt->count = 0; | |
301 | } | |
302 | ||
303 | void qemu_lockcnt_destroy(QemuLockCnt *lockcnt) | |
304 | { | |
305 | qemu_mutex_destroy(&lockcnt->mutex); | |
306 | } | |
307 | ||
308 | void qemu_lockcnt_inc(QemuLockCnt *lockcnt) | |
309 | { | |
310 | int old; | |
311 | for (;;) { | |
d73415a3 | 312 | old = qatomic_read(&lockcnt->count); |
51dee5e4 PB |
313 | if (old == 0) { |
314 | qemu_lockcnt_lock(lockcnt); | |
315 | qemu_lockcnt_inc_and_unlock(lockcnt); | |
316 | return; | |
317 | } else { | |
d73415a3 | 318 | if (qatomic_cmpxchg(&lockcnt->count, old, old + 1) == old) { |
51dee5e4 PB |
319 | return; |
320 | } | |
321 | } | |
322 | } | |
323 | } | |
324 | ||
325 | void qemu_lockcnt_dec(QemuLockCnt *lockcnt) | |
326 | { | |
d73415a3 | 327 | qatomic_dec(&lockcnt->count); |
51dee5e4 PB |
328 | } |
329 | ||
330 | /* Decrement a counter, and return locked if it is decremented to zero. | |
331 | * It is impossible for the counter to become nonzero while the mutex | |
332 | * is taken. | |
333 | */ | |
334 | bool qemu_lockcnt_dec_and_lock(QemuLockCnt *lockcnt) | |
335 | { | |
d73415a3 | 336 | int val = qatomic_read(&lockcnt->count); |
51dee5e4 | 337 | while (val > 1) { |
d73415a3 | 338 | int old = qatomic_cmpxchg(&lockcnt->count, val, val - 1); |
51dee5e4 PB |
339 | if (old != val) { |
340 | val = old; | |
341 | continue; | |
342 | } | |
343 | ||
344 | return false; | |
345 | } | |
346 | ||
347 | qemu_lockcnt_lock(lockcnt); | |
d73415a3 | 348 | if (qatomic_fetch_dec(&lockcnt->count) == 1) { |
51dee5e4 PB |
349 | return true; |
350 | } | |
351 | ||
352 | qemu_lockcnt_unlock(lockcnt); | |
353 | return false; | |
354 | } | |
355 | ||
356 | /* Decrement a counter and return locked if it is decremented to zero. | |
357 | * Otherwise do nothing. | |
358 | * | |
359 | * It is impossible for the counter to become nonzero while the mutex | |
360 | * is taken. | |
361 | */ | |
362 | bool qemu_lockcnt_dec_if_lock(QemuLockCnt *lockcnt) | |
363 | { | |
364 | /* No need for acquire semantics if we return false. */ | |
d73415a3 | 365 | int val = qatomic_read(&lockcnt->count); |
51dee5e4 PB |
366 | if (val > 1) { |
367 | return false; | |
368 | } | |
369 | ||
370 | qemu_lockcnt_lock(lockcnt); | |
d73415a3 | 371 | if (qatomic_fetch_dec(&lockcnt->count) == 1) { |
51dee5e4 PB |
372 | return true; |
373 | } | |
374 | ||
375 | qemu_lockcnt_inc_and_unlock(lockcnt); | |
376 | return false; | |
377 | } | |
378 | ||
379 | void qemu_lockcnt_lock(QemuLockCnt *lockcnt) | |
380 | { | |
381 | qemu_mutex_lock(&lockcnt->mutex); | |
382 | } | |
383 | ||
384 | void qemu_lockcnt_inc_and_unlock(QemuLockCnt *lockcnt) | |
385 | { | |
d73415a3 | 386 | qatomic_inc(&lockcnt->count); |
51dee5e4 PB |
387 | qemu_mutex_unlock(&lockcnt->mutex); |
388 | } | |
389 | ||
390 | void qemu_lockcnt_unlock(QemuLockCnt *lockcnt) | |
391 | { | |
392 | qemu_mutex_unlock(&lockcnt->mutex); | |
393 | } | |
394 | ||
395 | unsigned qemu_lockcnt_count(QemuLockCnt *lockcnt) | |
396 | { | |
d73415a3 | 397 | return qatomic_read(&lockcnt->count); |
51dee5e4 | 398 | } |
fbcc3e50 | 399 | #endif |