]>
Commit | Line | Data |
---|---|---|
c1d7c514 | 1 | // SPDX-License-Identifier: GPL-2.0 |
925baedd CM |
2 | /* |
3 | * Copyright (C) 2008 Oracle. All rights reserved. | |
925baedd | 4 | */ |
c1d7c514 | 5 | |
925baedd | 6 | #include <linux/sched.h> |
925baedd CM |
7 | #include <linux/pagemap.h> |
8 | #include <linux/spinlock.h> | |
9 | #include <linux/page-flags.h> | |
4881ee5a | 10 | #include <asm/bug.h> |
5335f437 | 11 | #include <trace/events/btrfs.h> |
602cbe91 | 12 | #include "misc.h" |
925baedd CM |
13 | #include "ctree.h" |
14 | #include "extent_io.h" | |
15 | #include "locking.h" | |
07e81dc9 | 16 | #include "accessors.h" |
925baedd | 17 | |
0a27a047 JB |
18 | /* |
19 | * Lockdep class keys for extent_buffer->lock's in this root. For a given | |
20 | * eb, the lockdep key is determined by the btrfs_root it belongs to and | |
21 | * the level the eb occupies in the tree. | |
22 | * | |
23 | * Different roots are used for different purposes and may nest inside each | |
24 | * other and they require separate keysets. As lockdep keys should be | |
25 | * static, assign keysets according to the purpose of the root as indicated | |
26 | * by btrfs_root->root_key.objectid. This ensures that all special purpose | |
27 | * roots have separate keysets. | |
28 | * | |
29 | * Lock-nesting across peer nodes is always done with the immediate parent | |
30 | * node locked thus preventing deadlock. As lockdep doesn't know this, use | |
31 | * subclass to avoid triggering lockdep warning in such cases. | |
32 | * | |
33 | * The key is set by the readpage_end_io_hook after the buffer has passed | |
34 | * csum validation but before the pages are unlocked. It is also set by | |
35 | * btrfs_init_new_buffer on freshly allocated blocks. | |
36 | * | |
37 | * We also add a check to make sure the highest level of the tree is the | |
38 | * same as our lockdep setup here. If BTRFS_MAX_LEVEL changes, this code | |
39 | * needs update as well. | |
40 | */ | |
41 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | |
42 | #if BTRFS_MAX_LEVEL != 8 | |
43 | #error | |
44 | #endif | |
45 | ||
46 | #define DEFINE_LEVEL(stem, level) \ | |
47 | .names[level] = "btrfs-" stem "-0" #level, | |
48 | ||
49 | #define DEFINE_NAME(stem) \ | |
50 | DEFINE_LEVEL(stem, 0) \ | |
51 | DEFINE_LEVEL(stem, 1) \ | |
52 | DEFINE_LEVEL(stem, 2) \ | |
53 | DEFINE_LEVEL(stem, 3) \ | |
54 | DEFINE_LEVEL(stem, 4) \ | |
55 | DEFINE_LEVEL(stem, 5) \ | |
56 | DEFINE_LEVEL(stem, 6) \ | |
57 | DEFINE_LEVEL(stem, 7) | |
58 | ||
59 | static struct btrfs_lockdep_keyset { | |
60 | u64 id; /* root objectid */ | |
1a1b0e72 DS |
61 | /* Longest entry: btrfs-block-group-00 */ |
62 | char names[BTRFS_MAX_LEVEL][24]; | |
0a27a047 JB |
63 | struct lock_class_key keys[BTRFS_MAX_LEVEL]; |
64 | } btrfs_lockdep_keysets[] = { | |
65 | { .id = BTRFS_ROOT_TREE_OBJECTID, DEFINE_NAME("root") }, | |
66 | { .id = BTRFS_EXTENT_TREE_OBJECTID, DEFINE_NAME("extent") }, | |
67 | { .id = BTRFS_CHUNK_TREE_OBJECTID, DEFINE_NAME("chunk") }, | |
68 | { .id = BTRFS_DEV_TREE_OBJECTID, DEFINE_NAME("dev") }, | |
69 | { .id = BTRFS_CSUM_TREE_OBJECTID, DEFINE_NAME("csum") }, | |
70 | { .id = BTRFS_QUOTA_TREE_OBJECTID, DEFINE_NAME("quota") }, | |
71 | { .id = BTRFS_TREE_LOG_OBJECTID, DEFINE_NAME("log") }, | |
72 | { .id = BTRFS_TREE_RELOC_OBJECTID, DEFINE_NAME("treloc") }, | |
73 | { .id = BTRFS_DATA_RELOC_TREE_OBJECTID, DEFINE_NAME("dreloc") }, | |
74 | { .id = BTRFS_UUID_TREE_OBJECTID, DEFINE_NAME("uuid") }, | |
75 | { .id = BTRFS_FREE_SPACE_TREE_OBJECTID, DEFINE_NAME("free-space") }, | |
1a1b0e72 | 76 | { .id = BTRFS_BLOCK_GROUP_TREE_OBJECTID, DEFINE_NAME("block-group") }, |
ee129330 | 77 | { .id = BTRFS_RAID_STRIPE_TREE_OBJECTID, DEFINE_NAME("raid-stripe") }, |
0a27a047 JB |
78 | { .id = 0, DEFINE_NAME("tree") }, |
79 | }; | |
80 | ||
81 | #undef DEFINE_LEVEL | |
82 | #undef DEFINE_NAME | |
83 | ||
84 | void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb, int level) | |
85 | { | |
86 | struct btrfs_lockdep_keyset *ks; | |
87 | ||
88 | BUG_ON(level >= ARRAY_SIZE(ks->keys)); | |
89 | ||
90 | /* Find the matching keyset, id 0 is the default entry */ | |
91 | for (ks = btrfs_lockdep_keysets; ks->id; ks++) | |
92 | if (ks->id == objectid) | |
93 | break; | |
94 | ||
95 | lockdep_set_class_and_name(&eb->lock, &ks->keys[level], ks->names[level]); | |
96 | } | |
97 | ||
b40130b2 JB |
98 | void btrfs_maybe_reset_lockdep_class(struct btrfs_root *root, struct extent_buffer *eb) |
99 | { | |
100 | if (test_bit(BTRFS_ROOT_RESET_LOCKDEP_CLASS, &root->state)) | |
101 | btrfs_set_buffer_lockdep_class(root->root_key.objectid, | |
102 | eb, btrfs_header_level(eb)); | |
103 | } | |
104 | ||
0a27a047 JB |
105 | #endif |
106 | ||
150cce2d DS |
107 | #ifdef CONFIG_BTRFS_DEBUG |
108 | static void btrfs_set_eb_lock_owner(struct extent_buffer *eb, pid_t owner) | |
109 | { | |
110 | eb->lock_owner = owner; | |
111 | } | |
112 | #else | |
113 | static void btrfs_set_eb_lock_owner(struct extent_buffer *eb, pid_t owner) { } | |
114 | #endif | |
115 | ||
d4e253bb DS |
116 | /* |
117 | * Extent buffer locking | |
118 | * ===================== | |
119 | * | |
196d59ab JB |
120 | * We use a rw_semaphore for tree locking, and the semantics are exactly the |
121 | * same: | |
d4e253bb DS |
122 | * |
123 | * - reader/writer exclusion | |
124 | * - writer/writer exclusion | |
125 | * - reader/reader sharing | |
d4e253bb | 126 | * - try-lock semantics for readers and writers |
d4e253bb | 127 | * |
4048daed JB |
128 | * The rwsem implementation does opportunistic spinning which reduces number of |
129 | * times the locking task needs to sleep. | |
d4e253bb DS |
130 | */ |
131 | ||
b4ce94de | 132 | /* |
196d59ab JB |
133 | * __btrfs_tree_read_lock - lock extent buffer for read |
134 | * @eb: the eb to be locked | |
135 | * @nest: the nesting level to be used for lockdep | |
d4e253bb | 136 | * |
196d59ab JB |
137 | * This takes the read lock on the extent buffer, using the specified nesting |
138 | * level for lockdep purposes. | |
b4ce94de | 139 | */ |
0ecae6ff | 140 | void __btrfs_tree_read_lock(struct extent_buffer *eb, enum btrfs_lock_nesting nest) |
b4ce94de | 141 | { |
34e73cc9 QW |
142 | u64 start_ns = 0; |
143 | ||
144 | if (trace_btrfs_tree_read_lock_enabled()) | |
145 | start_ns = ktime_get_ns(); | |
196d59ab | 146 | |
196d59ab | 147 | down_read_nested(&eb->lock, nest); |
34e73cc9 | 148 | trace_btrfs_tree_read_lock(eb, start_ns); |
b4ce94de CM |
149 | } |
150 | ||
51899412 JB |
151 | void btrfs_tree_read_lock(struct extent_buffer *eb) |
152 | { | |
0ecae6ff | 153 | __btrfs_tree_read_lock(eb, BTRFS_NESTING_NORMAL); |
51899412 JB |
154 | } |
155 | ||
b4ce94de | 156 | /* |
196d59ab | 157 | * Try-lock for read. |
d4e253bb | 158 | * |
1a9fd417 | 159 | * Return 1 if the rwlock has been taken, 0 otherwise |
b4ce94de | 160 | */ |
bd681513 | 161 | int btrfs_try_tree_read_lock(struct extent_buffer *eb) |
b4ce94de | 162 | { |
196d59ab | 163 | if (down_read_trylock(&eb->lock)) { |
196d59ab JB |
164 | trace_btrfs_try_tree_read_lock(eb); |
165 | return 1; | |
b9473439 | 166 | } |
196d59ab | 167 | return 0; |
b4ce94de CM |
168 | } |
169 | ||
170 | /* | |
196d59ab | 171 | * Try-lock for write. |
d4e253bb | 172 | * |
1a9fd417 | 173 | * Return 1 if the rwlock has been taken, 0 otherwise |
b4ce94de | 174 | */ |
bd681513 | 175 | int btrfs_try_tree_write_lock(struct extent_buffer *eb) |
b4ce94de | 176 | { |
196d59ab | 177 | if (down_write_trylock(&eb->lock)) { |
150cce2d | 178 | btrfs_set_eb_lock_owner(eb, current->pid); |
196d59ab JB |
179 | trace_btrfs_try_tree_write_lock(eb); |
180 | return 1; | |
bd681513 | 181 | } |
196d59ab | 182 | return 0; |
b4ce94de CM |
183 | } |
184 | ||
185 | /* | |
4048daed | 186 | * Release read lock. |
bd681513 CM |
187 | */ |
188 | void btrfs_tree_read_unlock(struct extent_buffer *eb) | |
189 | { | |
31aab402 | 190 | trace_btrfs_tree_read_unlock(eb); |
196d59ab | 191 | up_read(&eb->lock); |
bd681513 CM |
192 | } |
193 | ||
bd681513 | 194 | /* |
9580503b DS |
195 | * Lock eb for write. |
196 | * | |
196d59ab JB |
197 | * @eb: the eb to lock |
198 | * @nest: the nesting to use for the lock | |
d4e253bb | 199 | * |
196d59ab | 200 | * Returns with the eb->lock write locked. |
b4ce94de | 201 | */ |
fd7ba1c1 | 202 | void __btrfs_tree_lock(struct extent_buffer *eb, enum btrfs_lock_nesting nest) |
78d933c7 | 203 | __acquires(&eb->lock) |
b4ce94de | 204 | { |
34e73cc9 QW |
205 | u64 start_ns = 0; |
206 | ||
207 | if (trace_btrfs_tree_lock_enabled()) | |
208 | start_ns = ktime_get_ns(); | |
209 | ||
196d59ab | 210 | down_write_nested(&eb->lock, nest); |
150cce2d | 211 | btrfs_set_eb_lock_owner(eb, current->pid); |
34e73cc9 | 212 | trace_btrfs_tree_lock(eb, start_ns); |
925baedd CM |
213 | } |
214 | ||
fd7ba1c1 JB |
215 | void btrfs_tree_lock(struct extent_buffer *eb) |
216 | { | |
217 | __btrfs_tree_lock(eb, BTRFS_NESTING_NORMAL); | |
218 | } | |
219 | ||
bd681513 | 220 | /* |
196d59ab | 221 | * Release the write lock. |
bd681513 | 222 | */ |
143bede5 | 223 | void btrfs_tree_unlock(struct extent_buffer *eb) |
925baedd | 224 | { |
31aab402 | 225 | trace_btrfs_tree_unlock(eb); |
150cce2d | 226 | btrfs_set_eb_lock_owner(eb, 0); |
196d59ab | 227 | up_write(&eb->lock); |
925baedd | 228 | } |
ed2b1d36 | 229 | |
1f95ec01 DS |
230 | /* |
231 | * This releases any locks held in the path starting at level and going all the | |
232 | * way up to the root. | |
233 | * | |
234 | * btrfs_search_slot will keep the lock held on higher nodes in a few corner | |
235 | * cases, such as COW of the block at slot zero in the node. This ignores | |
236 | * those rules, and it should only be called when there are no more updates to | |
237 | * be done higher up in the tree. | |
238 | */ | |
239 | void btrfs_unlock_up_safe(struct btrfs_path *path, int level) | |
240 | { | |
241 | int i; | |
242 | ||
243 | if (path->keep_locks) | |
244 | return; | |
245 | ||
246 | for (i = level; i < BTRFS_MAX_LEVEL; i++) { | |
247 | if (!path->nodes[i]) | |
248 | continue; | |
249 | if (!path->locks[i]) | |
250 | continue; | |
251 | btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]); | |
252 | path->locks[i] = 0; | |
253 | } | |
254 | } | |
b908c334 DS |
255 | |
256 | /* | |
257 | * Loop around taking references on and locking the root node of the tree until | |
258 | * we end up with a lock on the root node. | |
259 | * | |
260 | * Return: root extent buffer with write lock held | |
261 | */ | |
262 | struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root) | |
263 | { | |
264 | struct extent_buffer *eb; | |
265 | ||
266 | while (1) { | |
267 | eb = btrfs_root_node(root); | |
b40130b2 JB |
268 | |
269 | btrfs_maybe_reset_lockdep_class(root, eb); | |
b908c334 DS |
270 | btrfs_tree_lock(eb); |
271 | if (eb == root->node) | |
272 | break; | |
273 | btrfs_tree_unlock(eb); | |
274 | free_extent_buffer(eb); | |
275 | } | |
276 | return eb; | |
277 | } | |
278 | ||
279 | /* | |
280 | * Loop around taking references on and locking the root node of the tree until | |
281 | * we end up with a lock on the root node. | |
282 | * | |
283 | * Return: root extent buffer with read lock held | |
284 | */ | |
1bb96598 | 285 | struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root) |
b908c334 DS |
286 | { |
287 | struct extent_buffer *eb; | |
288 | ||
289 | while (1) { | |
290 | eb = btrfs_root_node(root); | |
b40130b2 JB |
291 | |
292 | btrfs_maybe_reset_lockdep_class(root, eb); | |
1bb96598 | 293 | btrfs_tree_read_lock(eb); |
b908c334 DS |
294 | if (eb == root->node) |
295 | break; | |
296 | btrfs_tree_read_unlock(eb); | |
297 | free_extent_buffer(eb); | |
298 | } | |
299 | return eb; | |
300 | } | |
2992df73 | 301 | |
857bc13f JB |
302 | /* |
303 | * Loop around taking references on and locking the root node of the tree in | |
304 | * nowait mode until we end up with a lock on the root node or returning to | |
305 | * avoid blocking. | |
306 | * | |
307 | * Return: root extent buffer with read lock held or -EAGAIN. | |
308 | */ | |
309 | struct extent_buffer *btrfs_try_read_lock_root_node(struct btrfs_root *root) | |
310 | { | |
311 | struct extent_buffer *eb; | |
312 | ||
313 | while (1) { | |
314 | eb = btrfs_root_node(root); | |
315 | if (!btrfs_try_tree_read_lock(eb)) { | |
316 | free_extent_buffer(eb); | |
317 | return ERR_PTR(-EAGAIN); | |
318 | } | |
319 | if (eb == root->node) | |
320 | break; | |
321 | btrfs_tree_read_unlock(eb); | |
322 | free_extent_buffer(eb); | |
323 | } | |
324 | return eb; | |
325 | } | |
326 | ||
2992df73 NB |
327 | /* |
328 | * DREW locks | |
329 | * ========== | |
330 | * | |
331 | * DREW stands for double-reader-writer-exclusion lock. It's used in situation | |
332 | * where you want to provide A-B exclusion but not AA or BB. | |
333 | * | |
334 | * Currently implementation gives more priority to reader. If a reader and a | |
335 | * writer both race to acquire their respective sides of the lock the writer | |
336 | * would yield its lock as soon as it detects a concurrent reader. Additionally | |
337 | * if there are pending readers no new writers would be allowed to come in and | |
338 | * acquire the lock. | |
339 | */ | |
340 | ||
0b548539 | 341 | void btrfs_drew_lock_init(struct btrfs_drew_lock *lock) |
2992df73 | 342 | { |
2992df73 | 343 | atomic_set(&lock->readers, 0); |
0b548539 | 344 | atomic_set(&lock->writers, 0); |
2992df73 NB |
345 | init_waitqueue_head(&lock->pending_readers); |
346 | init_waitqueue_head(&lock->pending_writers); | |
2992df73 NB |
347 | } |
348 | ||
349 | /* Return true if acquisition is successful, false otherwise */ | |
350 | bool btrfs_drew_try_write_lock(struct btrfs_drew_lock *lock) | |
351 | { | |
352 | if (atomic_read(&lock->readers)) | |
353 | return false; | |
354 | ||
0b548539 | 355 | atomic_inc(&lock->writers); |
2992df73 NB |
356 | |
357 | /* Ensure writers count is updated before we check for pending readers */ | |
0b548539 | 358 | smp_mb__after_atomic(); |
2992df73 NB |
359 | if (atomic_read(&lock->readers)) { |
360 | btrfs_drew_write_unlock(lock); | |
361 | return false; | |
362 | } | |
363 | ||
364 | return true; | |
365 | } | |
366 | ||
367 | void btrfs_drew_write_lock(struct btrfs_drew_lock *lock) | |
368 | { | |
369 | while (true) { | |
370 | if (btrfs_drew_try_write_lock(lock)) | |
371 | return; | |
372 | wait_event(lock->pending_writers, !atomic_read(&lock->readers)); | |
373 | } | |
374 | } | |
375 | ||
376 | void btrfs_drew_write_unlock(struct btrfs_drew_lock *lock) | |
377 | { | |
0b548539 | 378 | atomic_dec(&lock->writers); |
2992df73 NB |
379 | cond_wake_up(&lock->pending_readers); |
380 | } | |
381 | ||
382 | void btrfs_drew_read_lock(struct btrfs_drew_lock *lock) | |
383 | { | |
384 | atomic_inc(&lock->readers); | |
385 | ||
386 | /* | |
387 | * Ensure the pending reader count is perceieved BEFORE this reader | |
388 | * goes to sleep in case of active writers. This guarantees new writers | |
389 | * won't be allowed and that the current reader will be woken up when | |
390 | * the last active writer finishes its jobs. | |
391 | */ | |
392 | smp_mb__after_atomic(); | |
393 | ||
0b548539 | 394 | wait_event(lock->pending_readers, atomic_read(&lock->writers) == 0); |
2992df73 NB |
395 | } |
396 | ||
397 | void btrfs_drew_read_unlock(struct btrfs_drew_lock *lock) | |
398 | { | |
399 | /* | |
400 | * atomic_dec_and_test implies a full barrier, so woken up writers | |
401 | * are guaranteed to see the decrement | |
402 | */ | |
403 | if (atomic_dec_and_test(&lock->readers)) | |
404 | wake_up(&lock->pending_writers); | |
405 | } |