]>
Commit | Line | Data |
---|---|---|
51dee5e4 PB |
1 | /* |
2 | * QemuLockCnt implementation | |
3 | * | |
4 | * Copyright Red Hat, Inc. 2017 | |
5 | * | |
6 | * Author: | |
7 | * Paolo Bonzini <[email protected]> | |
8 | */ | |
9 | #include "qemu/osdep.h" | |
10 | #include "qemu/thread.h" | |
11 | #include "qemu/atomic.h" | |
fbcc3e50 | 12 | #include "trace.h" |
51dee5e4 | 13 | |
fbcc3e50 PB |
14 | #ifdef CONFIG_LINUX |
15 | #include "qemu/futex.h" | |
16 | ||
17 | /* On Linux, bits 0-1 are a futex-based lock, bits 2-31 are the counter. | |
18 | * For the mutex algorithm see Ulrich Drepper's "Futexes Are Tricky" (ok, | |
19 | * this is not the most relaxing citation I could make...). It is similar | |
20 | * to mutex2 in the paper. | |
21 | */ | |
22 | ||
23 | #define QEMU_LOCKCNT_STATE_MASK 3 | |
24 | #define QEMU_LOCKCNT_STATE_FREE 0 /* free, uncontended */ | |
25 | #define QEMU_LOCKCNT_STATE_LOCKED 1 /* locked, uncontended */ | |
26 | #define QEMU_LOCKCNT_STATE_WAITING 2 /* locked, contended */ | |
27 | ||
28 | #define QEMU_LOCKCNT_COUNT_STEP 4 | |
29 | #define QEMU_LOCKCNT_COUNT_SHIFT 2 | |
30 | ||
31 | void qemu_lockcnt_init(QemuLockCnt *lockcnt) | |
32 | { | |
33 | lockcnt->count = 0; | |
34 | } | |
35 | ||
36 | void qemu_lockcnt_destroy(QemuLockCnt *lockcnt) | |
37 | { | |
38 | } | |
39 | ||
40 | /* *val is the current value of lockcnt->count. | |
41 | * | |
42 | * If the lock is free, try a cmpxchg from *val to new_if_free; return | |
43 | * true and set *val to the old value found by the cmpxchg in | |
44 | * lockcnt->count. | |
45 | * | |
46 | * If the lock is taken, wait for it to be released and return false | |
47 | * *without trying again to take the lock*. Again, set *val to the | |
48 | * new value of lockcnt->count. | |
49 | * | |
50 | * If *waited is true on return, new_if_free's bottom two bits must not | |
51 | * be QEMU_LOCKCNT_STATE_LOCKED on subsequent calls, because the caller | |
52 | * does not know if there are other waiters. Furthermore, after *waited | |
53 | * is set the caller has effectively acquired the lock. If it returns | |
54 | * with the lock not taken, it must wake another futex waiter. | |
55 | */ | |
56 | static bool qemu_lockcnt_cmpxchg_or_wait(QemuLockCnt *lockcnt, int *val, | |
57 | int new_if_free, bool *waited) | |
58 | { | |
59 | /* Fast path for when the lock is free. */ | |
60 | if ((*val & QEMU_LOCKCNT_STATE_MASK) == QEMU_LOCKCNT_STATE_FREE) { | |
61 | int expected = *val; | |
62 | ||
63 | trace_lockcnt_fast_path_attempt(lockcnt, expected, new_if_free); | |
64 | *val = atomic_cmpxchg(&lockcnt->count, expected, new_if_free); | |
65 | if (*val == expected) { | |
66 | trace_lockcnt_fast_path_success(lockcnt, expected, new_if_free); | |
67 | *val = new_if_free; | |
68 | return true; | |
69 | } | |
70 | } | |
71 | ||
72 | /* The slow path moves from locked to waiting if necessary, then | |
73 | * does a futex wait. Both steps can be repeated ad nauseam, | |
74 | * only getting out of the loop if we can have another shot at the | |
75 | * fast path. Once we can, get out to compute the new destination | |
76 | * value for the fast path. | |
77 | */ | |
78 | while ((*val & QEMU_LOCKCNT_STATE_MASK) != QEMU_LOCKCNT_STATE_FREE) { | |
79 | if ((*val & QEMU_LOCKCNT_STATE_MASK) == QEMU_LOCKCNT_STATE_LOCKED) { | |
80 | int expected = *val; | |
81 | int new = expected - QEMU_LOCKCNT_STATE_LOCKED + QEMU_LOCKCNT_STATE_WAITING; | |
82 | ||
83 | trace_lockcnt_futex_wait_prepare(lockcnt, expected, new); | |
84 | *val = atomic_cmpxchg(&lockcnt->count, expected, new); | |
85 | if (*val == expected) { | |
86 | *val = new; | |
87 | } | |
88 | continue; | |
89 | } | |
90 | ||
91 | if ((*val & QEMU_LOCKCNT_STATE_MASK) == QEMU_LOCKCNT_STATE_WAITING) { | |
92 | *waited = true; | |
93 | trace_lockcnt_futex_wait(lockcnt, *val); | |
94 | qemu_futex_wait(&lockcnt->count, *val); | |
95 | *val = atomic_read(&lockcnt->count); | |
96 | trace_lockcnt_futex_wait_resume(lockcnt, *val); | |
97 | continue; | |
98 | } | |
99 | ||
100 | abort(); | |
101 | } | |
102 | return false; | |
103 | } | |
104 | ||
105 | static void lockcnt_wake(QemuLockCnt *lockcnt) | |
106 | { | |
107 | trace_lockcnt_futex_wake(lockcnt); | |
108 | qemu_futex_wake(&lockcnt->count, 1); | |
109 | } | |
110 | ||
111 | void qemu_lockcnt_inc(QemuLockCnt *lockcnt) | |
112 | { | |
113 | int val = atomic_read(&lockcnt->count); | |
114 | bool waited = false; | |
115 | ||
116 | for (;;) { | |
117 | if (val >= QEMU_LOCKCNT_COUNT_STEP) { | |
118 | int expected = val; | |
119 | val = atomic_cmpxchg(&lockcnt->count, val, val + QEMU_LOCKCNT_COUNT_STEP); | |
120 | if (val == expected) { | |
121 | break; | |
122 | } | |
123 | } else { | |
124 | /* The fast path is (0, unlocked)->(1, unlocked). */ | |
125 | if (qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, QEMU_LOCKCNT_COUNT_STEP, | |
126 | &waited)) { | |
127 | break; | |
128 | } | |
129 | } | |
130 | } | |
131 | ||
132 | /* If we were woken by another thread, we should also wake one because | |
133 | * we are effectively releasing the lock that was given to us. This is | |
134 | * the case where qemu_lockcnt_lock would leave QEMU_LOCKCNT_STATE_WAITING | |
135 | * in the low bits, and qemu_lockcnt_inc_and_unlock would find it and | |
136 | * wake someone. | |
137 | */ | |
138 | if (waited) { | |
139 | lockcnt_wake(lockcnt); | |
140 | } | |
141 | } | |
142 | ||
143 | void qemu_lockcnt_dec(QemuLockCnt *lockcnt) | |
144 | { | |
145 | atomic_sub(&lockcnt->count, QEMU_LOCKCNT_COUNT_STEP); | |
146 | } | |
147 | ||
148 | /* Decrement a counter, and return locked if it is decremented to zero. | |
149 | * If the function returns true, it is impossible for the counter to | |
150 | * become nonzero until the next qemu_lockcnt_unlock. | |
151 | */ | |
152 | bool qemu_lockcnt_dec_and_lock(QemuLockCnt *lockcnt) | |
153 | { | |
154 | int val = atomic_read(&lockcnt->count); | |
155 | int locked_state = QEMU_LOCKCNT_STATE_LOCKED; | |
156 | bool waited = false; | |
157 | ||
158 | for (;;) { | |
159 | if (val >= 2 * QEMU_LOCKCNT_COUNT_STEP) { | |
160 | int expected = val; | |
161 | val = atomic_cmpxchg(&lockcnt->count, val, val - QEMU_LOCKCNT_COUNT_STEP); | |
162 | if (val == expected) { | |
163 | break; | |
164 | } | |
165 | } else { | |
166 | /* If count is going 1->0, take the lock. The fast path is | |
167 | * (1, unlocked)->(0, locked) or (1, unlocked)->(0, waiting). | |
168 | */ | |
169 | if (qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, locked_state, &waited)) { | |
170 | return true; | |
171 | } | |
172 | ||
173 | if (waited) { | |
174 | /* At this point we do not know if there are more waiters. Assume | |
175 | * there are. | |
176 | */ | |
177 | locked_state = QEMU_LOCKCNT_STATE_WAITING; | |
178 | } | |
179 | } | |
180 | } | |
181 | ||
182 | /* If we were woken by another thread, but we're returning in unlocked | |
183 | * state, we should also wake a thread because we are effectively | |
184 | * releasing the lock that was given to us. This is the case where | |
185 | * qemu_lockcnt_lock would leave QEMU_LOCKCNT_STATE_WAITING in the low | |
186 | * bits, and qemu_lockcnt_unlock would find it and wake someone. | |
187 | */ | |
188 | if (waited) { | |
189 | lockcnt_wake(lockcnt); | |
190 | } | |
191 | return false; | |
192 | } | |
193 | ||
194 | /* If the counter is one, decrement it and return locked. Otherwise do | |
195 | * nothing. | |
196 | * | |
197 | * If the function returns true, it is impossible for the counter to | |
198 | * become nonzero until the next qemu_lockcnt_unlock. | |
199 | */ | |
200 | bool qemu_lockcnt_dec_if_lock(QemuLockCnt *lockcnt) | |
201 | { | |
202 | int val = atomic_read(&lockcnt->count); | |
203 | int locked_state = QEMU_LOCKCNT_STATE_LOCKED; | |
204 | bool waited = false; | |
205 | ||
206 | while (val < 2 * QEMU_LOCKCNT_COUNT_STEP) { | |
207 | /* If count is going 1->0, take the lock. The fast path is | |
208 | * (1, unlocked)->(0, locked) or (1, unlocked)->(0, waiting). | |
209 | */ | |
210 | if (qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, locked_state, &waited)) { | |
211 | return true; | |
212 | } | |
213 | ||
214 | if (waited) { | |
215 | /* At this point we do not know if there are more waiters. Assume | |
216 | * there are. | |
217 | */ | |
218 | locked_state = QEMU_LOCKCNT_STATE_WAITING; | |
219 | } | |
220 | } | |
221 | ||
222 | /* If we were woken by another thread, but we're returning in unlocked | |
223 | * state, we should also wake a thread because we are effectively | |
224 | * releasing the lock that was given to us. This is the case where | |
225 | * qemu_lockcnt_lock would leave QEMU_LOCKCNT_STATE_WAITING in the low | |
226 | * bits, and qemu_lockcnt_inc_and_unlock would find it and wake someone. | |
227 | */ | |
228 | if (waited) { | |
229 | lockcnt_wake(lockcnt); | |
230 | } | |
231 | return false; | |
232 | } | |
233 | ||
234 | void qemu_lockcnt_lock(QemuLockCnt *lockcnt) | |
235 | { | |
236 | int val = atomic_read(&lockcnt->count); | |
237 | int step = QEMU_LOCKCNT_STATE_LOCKED; | |
238 | bool waited = false; | |
239 | ||
240 | /* The third argument is only used if the low bits of val are 0 | |
241 | * (QEMU_LOCKCNT_STATE_FREE), so just blindly mix in the desired | |
242 | * state. | |
243 | */ | |
244 | while (!qemu_lockcnt_cmpxchg_or_wait(lockcnt, &val, val + step, &waited)) { | |
245 | if (waited) { | |
246 | /* At this point we do not know if there are more waiters. Assume | |
247 | * there are. | |
248 | */ | |
249 | step = QEMU_LOCKCNT_STATE_WAITING; | |
250 | } | |
251 | } | |
252 | } | |
253 | ||
254 | void qemu_lockcnt_inc_and_unlock(QemuLockCnt *lockcnt) | |
255 | { | |
256 | int expected, new, val; | |
257 | ||
258 | val = atomic_read(&lockcnt->count); | |
259 | do { | |
260 | expected = val; | |
261 | new = (val + QEMU_LOCKCNT_COUNT_STEP) & ~QEMU_LOCKCNT_STATE_MASK; | |
262 | trace_lockcnt_unlock_attempt(lockcnt, val, new); | |
263 | val = atomic_cmpxchg(&lockcnt->count, val, new); | |
264 | } while (val != expected); | |
265 | ||
266 | trace_lockcnt_unlock_success(lockcnt, val, new); | |
267 | if (val & QEMU_LOCKCNT_STATE_WAITING) { | |
268 | lockcnt_wake(lockcnt); | |
269 | } | |
270 | } | |
271 | ||
272 | void qemu_lockcnt_unlock(QemuLockCnt *lockcnt) | |
273 | { | |
274 | int expected, new, val; | |
275 | ||
276 | val = atomic_read(&lockcnt->count); | |
277 | do { | |
278 | expected = val; | |
279 | new = val & ~QEMU_LOCKCNT_STATE_MASK; | |
280 | trace_lockcnt_unlock_attempt(lockcnt, val, new); | |
281 | val = atomic_cmpxchg(&lockcnt->count, val, new); | |
282 | } while (val != expected); | |
283 | ||
284 | trace_lockcnt_unlock_success(lockcnt, val, new); | |
285 | if (val & QEMU_LOCKCNT_STATE_WAITING) { | |
286 | lockcnt_wake(lockcnt); | |
287 | } | |
288 | } | |
289 | ||
290 | unsigned qemu_lockcnt_count(QemuLockCnt *lockcnt) | |
291 | { | |
292 | return atomic_read(&lockcnt->count) >> QEMU_LOCKCNT_COUNT_SHIFT; | |
293 | } | |
294 | #else | |
51dee5e4 PB |
295 | void qemu_lockcnt_init(QemuLockCnt *lockcnt) |
296 | { | |
297 | qemu_mutex_init(&lockcnt->mutex); | |
298 | lockcnt->count = 0; | |
299 | } | |
300 | ||
301 | void qemu_lockcnt_destroy(QemuLockCnt *lockcnt) | |
302 | { | |
303 | qemu_mutex_destroy(&lockcnt->mutex); | |
304 | } | |
305 | ||
306 | void qemu_lockcnt_inc(QemuLockCnt *lockcnt) | |
307 | { | |
308 | int old; | |
309 | for (;;) { | |
310 | old = atomic_read(&lockcnt->count); | |
311 | if (old == 0) { | |
312 | qemu_lockcnt_lock(lockcnt); | |
313 | qemu_lockcnt_inc_and_unlock(lockcnt); | |
314 | return; | |
315 | } else { | |
316 | if (atomic_cmpxchg(&lockcnt->count, old, old + 1) == old) { | |
317 | return; | |
318 | } | |
319 | } | |
320 | } | |
321 | } | |
322 | ||
323 | void qemu_lockcnt_dec(QemuLockCnt *lockcnt) | |
324 | { | |
325 | atomic_dec(&lockcnt->count); | |
326 | } | |
327 | ||
328 | /* Decrement a counter, and return locked if it is decremented to zero. | |
329 | * It is impossible for the counter to become nonzero while the mutex | |
330 | * is taken. | |
331 | */ | |
332 | bool qemu_lockcnt_dec_and_lock(QemuLockCnt *lockcnt) | |
333 | { | |
334 | int val = atomic_read(&lockcnt->count); | |
335 | while (val > 1) { | |
336 | int old = atomic_cmpxchg(&lockcnt->count, val, val - 1); | |
337 | if (old != val) { | |
338 | val = old; | |
339 | continue; | |
340 | } | |
341 | ||
342 | return false; | |
343 | } | |
344 | ||
345 | qemu_lockcnt_lock(lockcnt); | |
346 | if (atomic_fetch_dec(&lockcnt->count) == 1) { | |
347 | return true; | |
348 | } | |
349 | ||
350 | qemu_lockcnt_unlock(lockcnt); | |
351 | return false; | |
352 | } | |
353 | ||
354 | /* Decrement a counter and return locked if it is decremented to zero. | |
355 | * Otherwise do nothing. | |
356 | * | |
357 | * It is impossible for the counter to become nonzero while the mutex | |
358 | * is taken. | |
359 | */ | |
360 | bool qemu_lockcnt_dec_if_lock(QemuLockCnt *lockcnt) | |
361 | { | |
362 | /* No need for acquire semantics if we return false. */ | |
363 | int val = atomic_read(&lockcnt->count); | |
364 | if (val > 1) { | |
365 | return false; | |
366 | } | |
367 | ||
368 | qemu_lockcnt_lock(lockcnt); | |
369 | if (atomic_fetch_dec(&lockcnt->count) == 1) { | |
370 | return true; | |
371 | } | |
372 | ||
373 | qemu_lockcnt_inc_and_unlock(lockcnt); | |
374 | return false; | |
375 | } | |
376 | ||
377 | void qemu_lockcnt_lock(QemuLockCnt *lockcnt) | |
378 | { | |
379 | qemu_mutex_lock(&lockcnt->mutex); | |
380 | } | |
381 | ||
382 | void qemu_lockcnt_inc_and_unlock(QemuLockCnt *lockcnt) | |
383 | { | |
384 | atomic_inc(&lockcnt->count); | |
385 | qemu_mutex_unlock(&lockcnt->mutex); | |
386 | } | |
387 | ||
388 | void qemu_lockcnt_unlock(QemuLockCnt *lockcnt) | |
389 | { | |
390 | qemu_mutex_unlock(&lockcnt->mutex); | |
391 | } | |
392 | ||
393 | unsigned qemu_lockcnt_count(QemuLockCnt *lockcnt) | |
394 | { | |
395 | return atomic_read(&lockcnt->count); | |
396 | } | |
fbcc3e50 | 397 | #endif |