]>
Commit | Line | Data |
---|---|---|
c1d7c514 | 1 | // SPDX-License-Identifier: GPL-2.0 |
925baedd CM |
2 | /* |
3 | * Copyright (C) 2008 Oracle. All rights reserved. | |
925baedd | 4 | */ |
c1d7c514 | 5 | |
925baedd | 6 | #include <linux/sched.h> |
925baedd CM |
7 | #include <linux/pagemap.h> |
8 | #include <linux/spinlock.h> | |
9 | #include <linux/page-flags.h> | |
4881ee5a | 10 | #include <asm/bug.h> |
602cbe91 | 11 | #include "misc.h" |
925baedd CM |
12 | #include "ctree.h" |
13 | #include "extent_io.h" | |
14 | #include "locking.h" | |
15 | ||
e4e9fd0f | 16 | #ifdef CONFIG_BTRFS_DEBUG |
d6156218 | 17 | static inline void btrfs_assert_spinning_writers_get(struct extent_buffer *eb) |
e4e9fd0f | 18 | { |
f3dc24c5 DS |
19 | WARN_ON(eb->spinning_writers); |
20 | eb->spinning_writers++; | |
e4e9fd0f DS |
21 | } |
22 | ||
d6156218 | 23 | static inline void btrfs_assert_spinning_writers_put(struct extent_buffer *eb) |
e4e9fd0f | 24 | { |
f3dc24c5 DS |
25 | WARN_ON(eb->spinning_writers != 1); |
26 | eb->spinning_writers--; | |
e4e9fd0f DS |
27 | } |
28 | ||
d6156218 | 29 | static inline void btrfs_assert_no_spinning_writers(struct extent_buffer *eb) |
e4e9fd0f | 30 | { |
f3dc24c5 | 31 | WARN_ON(eb->spinning_writers); |
e4e9fd0f DS |
32 | } |
33 | ||
d6156218 | 34 | static inline void btrfs_assert_spinning_readers_get(struct extent_buffer *eb) |
225948de DS |
35 | { |
36 | atomic_inc(&eb->spinning_readers); | |
37 | } | |
38 | ||
d6156218 | 39 | static inline void btrfs_assert_spinning_readers_put(struct extent_buffer *eb) |
225948de DS |
40 | { |
41 | WARN_ON(atomic_read(&eb->spinning_readers) == 0); | |
42 | atomic_dec(&eb->spinning_readers); | |
43 | } | |
44 | ||
d6156218 | 45 | static inline void btrfs_assert_tree_read_locks_get(struct extent_buffer *eb) |
58a2ddae DS |
46 | { |
47 | atomic_inc(&eb->read_locks); | |
48 | } | |
49 | ||
d6156218 | 50 | static inline void btrfs_assert_tree_read_locks_put(struct extent_buffer *eb) |
58a2ddae DS |
51 | { |
52 | atomic_dec(&eb->read_locks); | |
53 | } | |
54 | ||
d6156218 | 55 | static inline void btrfs_assert_tree_read_locked(struct extent_buffer *eb) |
58a2ddae DS |
56 | { |
57 | BUG_ON(!atomic_read(&eb->read_locks)); | |
58 | } | |
59 | ||
d6156218 | 60 | static inline void btrfs_assert_tree_write_locks_get(struct extent_buffer *eb) |
e3f15388 | 61 | { |
00801ae4 | 62 | eb->write_locks++; |
e3f15388 DS |
63 | } |
64 | ||
d6156218 | 65 | static inline void btrfs_assert_tree_write_locks_put(struct extent_buffer *eb) |
e3f15388 | 66 | { |
00801ae4 | 67 | eb->write_locks--; |
e3f15388 DS |
68 | } |
69 | ||
e4e9fd0f DS |
70 | #else |
71 | static void btrfs_assert_spinning_writers_get(struct extent_buffer *eb) { } | |
72 | static void btrfs_assert_spinning_writers_put(struct extent_buffer *eb) { } | |
73 | static void btrfs_assert_no_spinning_writers(struct extent_buffer *eb) { } | |
225948de DS |
74 | static void btrfs_assert_spinning_readers_put(struct extent_buffer *eb) { } |
75 | static void btrfs_assert_spinning_readers_get(struct extent_buffer *eb) { } | |
58a2ddae DS |
76 | static void btrfs_assert_tree_read_locked(struct extent_buffer *eb) { } |
77 | static void btrfs_assert_tree_read_locks_get(struct extent_buffer *eb) { } | |
78 | static void btrfs_assert_tree_read_locks_put(struct extent_buffer *eb) { } | |
e3f15388 DS |
79 | static void btrfs_assert_tree_write_locks_get(struct extent_buffer *eb) { } |
80 | static void btrfs_assert_tree_write_locks_put(struct extent_buffer *eb) { } | |
e4e9fd0f DS |
81 | #endif |
82 | ||
b95be2d9 DS |
83 | void btrfs_set_lock_blocking_read(struct extent_buffer *eb) |
84 | { | |
31aab402 | 85 | trace_btrfs_set_lock_blocking_read(eb); |
b95be2d9 DS |
86 | /* |
87 | * No lock is required. The lock owner may change if we have a read | |
88 | * lock, but it won't change to or away from us. If we have the write | |
89 | * lock, we are the owner and it'll never change. | |
90 | */ | |
91 | if (eb->lock_nested && current->pid == eb->lock_owner) | |
92 | return; | |
93 | btrfs_assert_tree_read_locked(eb); | |
94 | atomic_inc(&eb->blocking_readers); | |
afd495a8 | 95 | btrfs_assert_spinning_readers_put(eb); |
b95be2d9 DS |
96 | read_unlock(&eb->lock); |
97 | } | |
98 | ||
99 | void btrfs_set_lock_blocking_write(struct extent_buffer *eb) | |
925baedd | 100 | { |
31aab402 | 101 | trace_btrfs_set_lock_blocking_write(eb); |
ea4ebde0 | 102 | /* |
b95be2d9 DS |
103 | * No lock is required. The lock owner may change if we have a read |
104 | * lock, but it won't change to or away from us. If we have the write | |
105 | * lock, we are the owner and it'll never change. | |
ea4ebde0 CM |
106 | */ |
107 | if (eb->lock_nested && current->pid == eb->lock_owner) | |
108 | return; | |
06297d8c | 109 | if (eb->blocking_writers == 0) { |
843ccf9f | 110 | btrfs_assert_spinning_writers_put(eb); |
b95be2d9 | 111 | btrfs_assert_tree_locked(eb); |
06297d8c | 112 | eb->blocking_writers++; |
b95be2d9 | 113 | write_unlock(&eb->lock); |
b4ce94de | 114 | } |
b4ce94de | 115 | } |
f9efa9c7 | 116 | |
b4ce94de | 117 | /* |
bd681513 CM |
118 | * take a spinning read lock. This will wait for any blocking |
119 | * writers | |
b4ce94de | 120 | */ |
bd681513 | 121 | void btrfs_tree_read_lock(struct extent_buffer *eb) |
b4ce94de | 122 | { |
34e73cc9 QW |
123 | u64 start_ns = 0; |
124 | ||
125 | if (trace_btrfs_tree_read_lock_enabled()) | |
126 | start_ns = ktime_get_ns(); | |
bd681513 | 127 | again: |
5b25f70f | 128 | read_lock(&eb->lock); |
06297d8c DS |
129 | BUG_ON(eb->blocking_writers == 0 && |
130 | current->pid == eb->lock_owner); | |
131 | if (eb->blocking_writers && current->pid == eb->lock_owner) { | |
5b25f70f AJ |
132 | /* |
133 | * This extent is already write-locked by our thread. We allow | |
134 | * an additional read lock to be added because it's for the same | |
135 | * thread. btrfs_find_all_roots() depends on this as it may be | |
136 | * called on a partly (write-)locked tree. | |
137 | */ | |
138 | BUG_ON(eb->lock_nested); | |
ed1b4ed7 | 139 | eb->lock_nested = true; |
5b25f70f | 140 | read_unlock(&eb->lock); |
34e73cc9 | 141 | trace_btrfs_tree_read_lock(eb, start_ns); |
5b25f70f AJ |
142 | return; |
143 | } | |
06297d8c | 144 | if (eb->blocking_writers) { |
bd681513 | 145 | read_unlock(&eb->lock); |
39f9d028 | 146 | wait_event(eb->write_lock_wq, |
06297d8c | 147 | eb->blocking_writers == 0); |
bd681513 | 148 | goto again; |
b4ce94de | 149 | } |
5c9c799a | 150 | btrfs_assert_tree_read_locks_get(eb); |
afd495a8 | 151 | btrfs_assert_spinning_readers_get(eb); |
34e73cc9 | 152 | trace_btrfs_tree_read_lock(eb, start_ns); |
b4ce94de CM |
153 | } |
154 | ||
f82c458a CM |
155 | /* |
156 | * take a spinning read lock. | |
157 | * returns 1 if we get the read lock and 0 if we don't | |
158 | * this won't wait for blocking writers | |
159 | */ | |
160 | int btrfs_tree_read_lock_atomic(struct extent_buffer *eb) | |
161 | { | |
06297d8c | 162 | if (eb->blocking_writers) |
f82c458a CM |
163 | return 0; |
164 | ||
165 | read_lock(&eb->lock); | |
06297d8c | 166 | if (eb->blocking_writers) { |
f82c458a CM |
167 | read_unlock(&eb->lock); |
168 | return 0; | |
169 | } | |
5c9c799a | 170 | btrfs_assert_tree_read_locks_get(eb); |
afd495a8 | 171 | btrfs_assert_spinning_readers_get(eb); |
31aab402 | 172 | trace_btrfs_tree_read_lock_atomic(eb); |
f82c458a CM |
173 | return 1; |
174 | } | |
175 | ||
b4ce94de | 176 | /* |
bd681513 CM |
177 | * returns 1 if we get the read lock and 0 if we don't |
178 | * this won't wait for blocking writers | |
b4ce94de | 179 | */ |
bd681513 | 180 | int btrfs_try_tree_read_lock(struct extent_buffer *eb) |
b4ce94de | 181 | { |
06297d8c | 182 | if (eb->blocking_writers) |
bd681513 | 183 | return 0; |
b4ce94de | 184 | |
ea4ebde0 CM |
185 | if (!read_trylock(&eb->lock)) |
186 | return 0; | |
187 | ||
06297d8c | 188 | if (eb->blocking_writers) { |
bd681513 CM |
189 | read_unlock(&eb->lock); |
190 | return 0; | |
b9473439 | 191 | } |
5c9c799a | 192 | btrfs_assert_tree_read_locks_get(eb); |
afd495a8 | 193 | btrfs_assert_spinning_readers_get(eb); |
31aab402 | 194 | trace_btrfs_try_tree_read_lock(eb); |
bd681513 | 195 | return 1; |
b4ce94de CM |
196 | } |
197 | ||
198 | /* | |
bd681513 CM |
199 | * returns 1 if we get the read lock and 0 if we don't |
200 | * this won't wait for blocking writers or readers | |
b4ce94de | 201 | */ |
bd681513 | 202 | int btrfs_try_tree_write_lock(struct extent_buffer *eb) |
b4ce94de | 203 | { |
06297d8c | 204 | if (eb->blocking_writers || atomic_read(&eb->blocking_readers)) |
bd681513 | 205 | return 0; |
ea4ebde0 | 206 | |
f82c458a | 207 | write_lock(&eb->lock); |
06297d8c | 208 | if (eb->blocking_writers || atomic_read(&eb->blocking_readers)) { |
bd681513 CM |
209 | write_unlock(&eb->lock); |
210 | return 0; | |
211 | } | |
c79adfc0 | 212 | btrfs_assert_tree_write_locks_get(eb); |
843ccf9f | 213 | btrfs_assert_spinning_writers_get(eb); |
5b25f70f | 214 | eb->lock_owner = current->pid; |
31aab402 | 215 | trace_btrfs_try_tree_write_lock(eb); |
b4ce94de CM |
216 | return 1; |
217 | } | |
218 | ||
219 | /* | |
bd681513 CM |
220 | * drop a spinning read lock |
221 | */ | |
222 | void btrfs_tree_read_unlock(struct extent_buffer *eb) | |
223 | { | |
31aab402 | 224 | trace_btrfs_tree_read_unlock(eb); |
ea4ebde0 CM |
225 | /* |
226 | * if we're nested, we have the write lock. No new locking | |
227 | * is needed as long as we are the lock owner. | |
228 | * The write unlock will do a barrier for us, and the lock_nested | |
229 | * field only matters to the lock owner. | |
230 | */ | |
231 | if (eb->lock_nested && current->pid == eb->lock_owner) { | |
ed1b4ed7 | 232 | eb->lock_nested = false; |
ea4ebde0 | 233 | return; |
5b25f70f | 234 | } |
bd681513 | 235 | btrfs_assert_tree_read_locked(eb); |
afd495a8 | 236 | btrfs_assert_spinning_readers_put(eb); |
5c9c799a | 237 | btrfs_assert_tree_read_locks_put(eb); |
bd681513 CM |
238 | read_unlock(&eb->lock); |
239 | } | |
240 | ||
241 | /* | |
242 | * drop a blocking read lock | |
243 | */ | |
244 | void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb) | |
245 | { | |
31aab402 | 246 | trace_btrfs_tree_read_unlock_blocking(eb); |
ea4ebde0 CM |
247 | /* |
248 | * if we're nested, we have the write lock. No new locking | |
249 | * is needed as long as we are the lock owner. | |
250 | * The write unlock will do a barrier for us, and the lock_nested | |
251 | * field only matters to the lock owner. | |
252 | */ | |
253 | if (eb->lock_nested && current->pid == eb->lock_owner) { | |
ed1b4ed7 | 254 | eb->lock_nested = false; |
ea4ebde0 | 255 | return; |
5b25f70f | 256 | } |
bd681513 CM |
257 | btrfs_assert_tree_read_locked(eb); |
258 | WARN_ON(atomic_read(&eb->blocking_readers) == 0); | |
093258e6 DS |
259 | /* atomic_dec_and_test implies a barrier */ |
260 | if (atomic_dec_and_test(&eb->blocking_readers)) | |
261 | cond_wake_up_nomb(&eb->read_lock_wq); | |
5c9c799a | 262 | btrfs_assert_tree_read_locks_put(eb); |
bd681513 CM |
263 | } |
264 | ||
265 | /* | |
266 | * take a spinning write lock. This will wait for both | |
267 | * blocking readers or writers | |
b4ce94de | 268 | */ |
143bede5 | 269 | void btrfs_tree_lock(struct extent_buffer *eb) |
b4ce94de | 270 | { |
34e73cc9 QW |
271 | u64 start_ns = 0; |
272 | ||
273 | if (trace_btrfs_tree_lock_enabled()) | |
274 | start_ns = ktime_get_ns(); | |
275 | ||
166f66d0 | 276 | WARN_ON(eb->lock_owner == current->pid); |
bd681513 CM |
277 | again: |
278 | wait_event(eb->read_lock_wq, atomic_read(&eb->blocking_readers) == 0); | |
06297d8c | 279 | wait_event(eb->write_lock_wq, eb->blocking_writers == 0); |
bd681513 | 280 | write_lock(&eb->lock); |
06297d8c | 281 | if (atomic_read(&eb->blocking_readers) || eb->blocking_writers) { |
bd681513 | 282 | write_unlock(&eb->lock); |
bd681513 CM |
283 | goto again; |
284 | } | |
843ccf9f | 285 | btrfs_assert_spinning_writers_get(eb); |
c79adfc0 | 286 | btrfs_assert_tree_write_locks_get(eb); |
5b25f70f | 287 | eb->lock_owner = current->pid; |
34e73cc9 | 288 | trace_btrfs_tree_lock(eb, start_ns); |
925baedd CM |
289 | } |
290 | ||
bd681513 CM |
291 | /* |
292 | * drop a spinning or a blocking write lock. | |
293 | */ | |
143bede5 | 294 | void btrfs_tree_unlock(struct extent_buffer *eb) |
925baedd | 295 | { |
06297d8c | 296 | int blockers = eb->blocking_writers; |
bd681513 CM |
297 | |
298 | BUG_ON(blockers > 1); | |
299 | ||
300 | btrfs_assert_tree_locked(eb); | |
31aab402 | 301 | trace_btrfs_tree_unlock(eb); |
ea4ebde0 | 302 | eb->lock_owner = 0; |
c79adfc0 | 303 | btrfs_assert_tree_write_locks_put(eb); |
bd681513 CM |
304 | |
305 | if (blockers) { | |
843ccf9f | 306 | btrfs_assert_no_spinning_writers(eb); |
06297d8c | 307 | eb->blocking_writers--; |
6e7ca09b NB |
308 | /* |
309 | * We need to order modifying blocking_writers above with | |
310 | * actually waking up the sleepers to ensure they see the | |
311 | * updated value of blocking_writers | |
312 | */ | |
313 | cond_wake_up(&eb->write_lock_wq); | |
bd681513 | 314 | } else { |
843ccf9f | 315 | btrfs_assert_spinning_writers_put(eb); |
bd681513 CM |
316 | write_unlock(&eb->lock); |
317 | } | |
925baedd | 318 | } |
ed2b1d36 DS |
319 | |
320 | /* | |
321 | * Set all locked nodes in the path to blocking locks. This should be done | |
322 | * before scheduling | |
323 | */ | |
324 | void btrfs_set_path_blocking(struct btrfs_path *p) | |
325 | { | |
326 | int i; | |
327 | ||
328 | for (i = 0; i < BTRFS_MAX_LEVEL; i++) { | |
329 | if (!p->nodes[i] || !p->locks[i]) | |
330 | continue; | |
331 | /* | |
332 | * If we currently have a spinning reader or writer lock this | |
333 | * will bump the count of blocking holders and drop the | |
334 | * spinlock. | |
335 | */ | |
336 | if (p->locks[i] == BTRFS_READ_LOCK) { | |
337 | btrfs_set_lock_blocking_read(p->nodes[i]); | |
338 | p->locks[i] = BTRFS_READ_LOCK_BLOCKING; | |
339 | } else if (p->locks[i] == BTRFS_WRITE_LOCK) { | |
340 | btrfs_set_lock_blocking_write(p->nodes[i]); | |
341 | p->locks[i] = BTRFS_WRITE_LOCK_BLOCKING; | |
342 | } | |
343 | } | |
344 | } | |
1f95ec01 DS |
345 | |
346 | /* | |
347 | * This releases any locks held in the path starting at level and going all the | |
348 | * way up to the root. | |
349 | * | |
350 | * btrfs_search_slot will keep the lock held on higher nodes in a few corner | |
351 | * cases, such as COW of the block at slot zero in the node. This ignores | |
352 | * those rules, and it should only be called when there are no more updates to | |
353 | * be done higher up in the tree. | |
354 | */ | |
355 | void btrfs_unlock_up_safe(struct btrfs_path *path, int level) | |
356 | { | |
357 | int i; | |
358 | ||
359 | if (path->keep_locks) | |
360 | return; | |
361 | ||
362 | for (i = level; i < BTRFS_MAX_LEVEL; i++) { | |
363 | if (!path->nodes[i]) | |
364 | continue; | |
365 | if (!path->locks[i]) | |
366 | continue; | |
367 | btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]); | |
368 | path->locks[i] = 0; | |
369 | } | |
370 | } |