]>
Commit | Line | Data |
---|---|---|
cac06d84 QW |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | ||
3 | #include <linux/slab.h> | |
9b569ea0 | 4 | #include "messages.h" |
cac06d84 QW |
5 | #include "ctree.h" |
6 | #include "subpage.h" | |
3d078efa | 7 | #include "btrfs_inode.h" |
cac06d84 | 8 | |
894d1378 QW |
9 | /* |
10 | * Subpage (sectorsize < PAGE_SIZE) support overview: | |
11 | * | |
12 | * Limitations: | |
13 | * | |
14 | * - Only support 64K page size for now | |
15 | * This is to make metadata handling easier, as 64K page would ensure | |
16 | * all nodesize would fit inside one page, thus we don't need to handle | |
17 | * cases where a tree block crosses several pages. | |
18 | * | |
19 | * - Only metadata read-write for now | |
20 | * The data read-write part is in development. | |
21 | * | |
22 | * - Metadata can't cross 64K page boundary | |
23 | * btrfs-progs and kernel have done that for a while, thus only ancient | |
24 | * filesystems could have such problem. For such case, do a graceful | |
25 | * rejection. | |
26 | * | |
27 | * Special behavior: | |
28 | * | |
29 | * - Metadata | |
30 | * Metadata read is fully supported. | |
31 | * Meaning when reading one tree block will only trigger the read for the | |
32 | * needed range, other unrelated range in the same page will not be touched. | |
33 | * | |
34 | * Metadata write support is partial. | |
35 | * The writeback is still for the full page, but we will only submit | |
36 | * the dirty extent buffers in the page. | |
37 | * | |
38 | * This means, if we have a metadata page like this: | |
39 | * | |
40 | * Page offset | |
41 | * 0 16K 32K 48K 64K | |
42 | * |/////////| |///////////| | |
43 | * \- Tree block A \- Tree block B | |
44 | * | |
45 | * Even if we just want to writeback tree block A, we will also writeback | |
46 | * tree block B if it's also dirty. | |
47 | * | |
48 | * This may cause extra metadata writeback which results more COW. | |
49 | * | |
50 | * Implementation: | |
51 | * | |
52 | * - Common | |
53 | * Both metadata and data will use a new structure, btrfs_subpage, to | |
54 | * record the status of each sector inside a page. This provides the extra | |
55 | * granularity needed. | |
56 | * | |
57 | * - Metadata | |
58 | * Since we have multiple tree blocks inside one page, we can't rely on page | |
59 | * locking anymore, or we will have greatly reduced concurrency or even | |
60 | * deadlocks (hold one tree lock while trying to lock another tree lock in | |
61 | * the same page). | |
62 | * | |
63 | * Thus for metadata locking, subpage support relies on io_tree locking only. | |
64 | * This means a slightly higher tree locking latency. | |
65 | */ | |
66 | ||
efffb803 | 67 | #if PAGE_SIZE > SZ_4K |
13df3775 | 68 | bool btrfs_is_subpage(const struct btrfs_fs_info *fs_info, struct address_space *mapping) |
fbca46eb QW |
69 | { |
70 | if (fs_info->sectorsize >= PAGE_SIZE) | |
71 | return false; | |
72 | ||
73 | /* | |
74 | * Only data pages (either through DIO or compression) can have no | |
75 | * mapping. And if page->mapping->host is data inode, it's subpage. | |
76 | * As we have ruled our sectorsize >= PAGE_SIZE case already. | |
77 | */ | |
8610ba7e | 78 | if (!mapping || !mapping->host || is_data_inode(BTRFS_I(mapping->host))) |
fbca46eb QW |
79 | return true; |
80 | ||
81 | /* | |
82 | * Now the only remaining case is metadata, which we only go subpage | |
83 | * routine if nodesize < PAGE_SIZE. | |
84 | */ | |
85 | if (fs_info->nodesize < PAGE_SIZE) | |
86 | return true; | |
87 | return false; | |
88 | } | |
efffb803 | 89 | #endif |
fbca46eb | 90 | |
cac06d84 | 91 | int btrfs_attach_subpage(const struct btrfs_fs_info *fs_info, |
55151ea9 | 92 | struct folio *folio, enum btrfs_subpage_type type) |
cac06d84 | 93 | { |
651fb419 | 94 | struct btrfs_subpage *subpage; |
cac06d84 QW |
95 | |
96 | /* | |
143823cf | 97 | * We have cases like a dummy extent buffer page, which is not mapped |
cac06d84 QW |
98 | * and doesn't need to be locked. |
99 | */ | |
55151ea9 QW |
100 | if (folio->mapping) |
101 | ASSERT(folio_test_locked(folio)); | |
651fb419 | 102 | |
cfbf07e2 | 103 | /* Either not subpage, or the folio already has private attached. */ |
55151ea9 | 104 | if (!btrfs_is_subpage(fs_info, folio->mapping) || folio_test_private(folio)) |
cac06d84 QW |
105 | return 0; |
106 | ||
651fb419 QW |
107 | subpage = btrfs_alloc_subpage(fs_info, type); |
108 | if (IS_ERR(subpage)) | |
109 | return PTR_ERR(subpage); | |
110 | ||
cfbf07e2 | 111 | folio_attach_private(folio, subpage); |
cac06d84 QW |
112 | return 0; |
113 | } | |
114 | ||
55151ea9 | 115 | void btrfs_detach_subpage(const struct btrfs_fs_info *fs_info, struct folio *folio) |
cac06d84 QW |
116 | { |
117 | struct btrfs_subpage *subpage; | |
118 | ||
cfbf07e2 | 119 | /* Either not subpage, or the folio already has private attached. */ |
55151ea9 | 120 | if (!btrfs_is_subpage(fs_info, folio->mapping) || !folio_test_private(folio)) |
cac06d84 QW |
121 | return; |
122 | ||
cfbf07e2 | 123 | subpage = folio_detach_private(folio); |
cac06d84 | 124 | ASSERT(subpage); |
760f991f QW |
125 | btrfs_free_subpage(subpage); |
126 | } | |
127 | ||
651fb419 QW |
128 | struct btrfs_subpage *btrfs_alloc_subpage(const struct btrfs_fs_info *fs_info, |
129 | enum btrfs_subpage_type type) | |
760f991f | 130 | { |
651fb419 | 131 | struct btrfs_subpage *ret; |
72a69cd0 | 132 | unsigned int real_size; |
651fb419 | 133 | |
fdf250db | 134 | ASSERT(fs_info->sectorsize < PAGE_SIZE); |
760f991f | 135 | |
72a69cd0 | 136 | real_size = struct_size(ret, bitmaps, |
ce4a71ee | 137 | BITS_TO_LONGS(btrfs_bitmap_nr_max * fs_info->sectors_per_page)); |
72a69cd0 | 138 | ret = kzalloc(real_size, GFP_NOFS); |
651fb419 QW |
139 | if (!ret) |
140 | return ERR_PTR(-ENOMEM); | |
141 | ||
142 | spin_lock_init(&ret->lock); | |
336e69f3 | 143 | if (type == BTRFS_SUBPAGE_METADATA) |
651fb419 | 144 | atomic_set(&ret->eb_refs, 0); |
336e69f3 | 145 | else |
0f712026 | 146 | atomic_set(&ret->nr_locked, 0); |
651fb419 | 147 | return ret; |
760f991f QW |
148 | } |
149 | ||
150 | void btrfs_free_subpage(struct btrfs_subpage *subpage) | |
151 | { | |
cac06d84 QW |
152 | kfree(subpage); |
153 | } | |
8ff8466d QW |
154 | |
155 | /* | |
156 | * Increase the eb_refs of current subpage. | |
157 | * | |
158 | * This is important for eb allocation, to prevent race with last eb freeing | |
159 | * of the same page. | |
160 | * With the eb_refs increased before the eb inserted into radix tree, | |
cfbf07e2 | 161 | * detach_extent_buffer_page() won't detach the folio private while we're still |
8ff8466d QW |
162 | * allocating the extent buffer. |
163 | */ | |
13df3775 | 164 | void btrfs_folio_inc_eb_refs(const struct btrfs_fs_info *fs_info, struct folio *folio) |
8ff8466d QW |
165 | { |
166 | struct btrfs_subpage *subpage; | |
167 | ||
13df3775 | 168 | if (!btrfs_is_subpage(fs_info, folio->mapping)) |
8ff8466d QW |
169 | return; |
170 | ||
13df3775 | 171 | ASSERT(folio_test_private(folio) && folio->mapping); |
affc5af3 | 172 | lockdep_assert_held(&folio->mapping->i_private_lock); |
8ff8466d | 173 | |
cfbf07e2 | 174 | subpage = folio_get_private(folio); |
8ff8466d QW |
175 | atomic_inc(&subpage->eb_refs); |
176 | } | |
177 | ||
13df3775 | 178 | void btrfs_folio_dec_eb_refs(const struct btrfs_fs_info *fs_info, struct folio *folio) |
8ff8466d QW |
179 | { |
180 | struct btrfs_subpage *subpage; | |
181 | ||
13df3775 | 182 | if (!btrfs_is_subpage(fs_info, folio->mapping)) |
8ff8466d QW |
183 | return; |
184 | ||
13df3775 | 185 | ASSERT(folio_test_private(folio) && folio->mapping); |
affc5af3 | 186 | lockdep_assert_held(&folio->mapping->i_private_lock); |
8ff8466d | 187 | |
cfbf07e2 | 188 | subpage = folio_get_private(folio); |
8ff8466d QW |
189 | ASSERT(atomic_read(&subpage->eb_refs)); |
190 | atomic_dec(&subpage->eb_refs); | |
191 | } | |
a1d767c1 | 192 | |
92082d40 | 193 | static void btrfs_subpage_assert(const struct btrfs_fs_info *fs_info, |
55151ea9 | 194 | struct folio *folio, u64 start, u32 len) |
a1d767c1 | 195 | { |
55151ea9 QW |
196 | /* For subpage support, the folio must be single page. */ |
197 | ASSERT(folio_order(folio) == 0); | |
cfbf07e2 | 198 | |
a1d767c1 | 199 | /* Basic checks */ |
cfbf07e2 | 200 | ASSERT(folio_test_private(folio) && folio_get_private(folio)); |
a1d767c1 QW |
201 | ASSERT(IS_ALIGNED(start, fs_info->sectorsize) && |
202 | IS_ALIGNED(len, fs_info->sectorsize)); | |
a1d767c1 QW |
203 | /* |
204 | * The range check only works for mapped page, we can still have | |
205 | * unmapped page like dummy extent buffer pages. | |
206 | */ | |
55151ea9 QW |
207 | if (folio->mapping) |
208 | ASSERT(folio_pos(folio) <= start && | |
209 | start + len <= folio_pos(folio) + PAGE_SIZE); | |
92082d40 QW |
210 | } |
211 | ||
8e7e9c67 QW |
212 | #define subpage_calc_start_bit(fs_info, folio, name, start, len) \ |
213 | ({ \ | |
91629e6d | 214 | unsigned int __start_bit; \ |
8e7e9c67 QW |
215 | \ |
216 | btrfs_subpage_assert(fs_info, folio, start, len); \ | |
91629e6d | 217 | __start_bit = offset_in_page(start) >> fs_info->sectorsize_bits; \ |
ce4a71ee | 218 | __start_bit += fs_info->sectors_per_page * btrfs_bitmap_nr_##name; \ |
91629e6d | 219 | __start_bit; \ |
8e7e9c67 QW |
220 | }) |
221 | ||
55151ea9 | 222 | static void btrfs_subpage_clamp_range(struct folio *folio, u64 *start, u32 *len) |
1e1de387 QW |
223 | { |
224 | u64 orig_start = *start; | |
225 | u32 orig_len = *len; | |
226 | ||
55151ea9 | 227 | *start = max_t(u64, folio_pos(folio), orig_start); |
e4f94347 QW |
228 | /* |
229 | * For certain call sites like btrfs_drop_pages(), we may have pages | |
230 | * beyond the target range. In that case, just set @len to 0, subpage | |
231 | * helpers can handle @len == 0 without any problem. | |
232 | */ | |
55151ea9 | 233 | if (folio_pos(folio) >= orig_start + orig_len) |
e4f94347 QW |
234 | *len = 0; |
235 | else | |
55151ea9 | 236 | *len = min_t(u64, folio_pos(folio) + PAGE_SIZE, |
e4f94347 | 237 | orig_start + orig_len) - *start; |
1e1de387 QW |
238 | } |
239 | ||
0f712026 QW |
240 | static bool btrfs_subpage_end_and_test_lock(const struct btrfs_fs_info *fs_info, |
241 | struct folio *folio, u64 start, u32 len) | |
1e1de387 | 242 | { |
cfbf07e2 | 243 | struct btrfs_subpage *subpage = folio_get_private(folio); |
b086c5bd | 244 | const int start_bit = subpage_calc_start_bit(fs_info, folio, locked, start, len); |
1e1de387 | 245 | const int nbits = (len >> fs_info->sectorsize_bits); |
b086c5bd | 246 | unsigned long flags; |
ab6eac7c QW |
247 | unsigned int cleared = 0; |
248 | int bit = start_bit; | |
b086c5bd | 249 | bool last; |
1e1de387 | 250 | |
55151ea9 | 251 | btrfs_subpage_assert(fs_info, folio, start, len); |
1e1de387 | 252 | |
b086c5bd | 253 | spin_lock_irqsave(&subpage->lock, flags); |
164674a7 QW |
254 | /* |
255 | * We have call sites passing @lock_page into | |
256 | * extent_clear_unlock_delalloc() for compression path. | |
257 | * | |
258 | * This @locked_page is locked by plain lock_page(), thus its | |
0f712026 | 259 | * subpage::locked is 0. Handle them in a special way. |
164674a7 | 260 | */ |
0f712026 | 261 | if (atomic_read(&subpage->nr_locked) == 0) { |
b086c5bd | 262 | spin_unlock_irqrestore(&subpage->lock, flags); |
164674a7 | 263 | return true; |
b086c5bd | 264 | } |
164674a7 | 265 | |
ab6eac7c QW |
266 | for_each_set_bit_from(bit, subpage->bitmaps, start_bit + nbits) { |
267 | clear_bit(bit, subpage->bitmaps); | |
268 | cleared++; | |
269 | } | |
0f712026 QW |
270 | ASSERT(atomic_read(&subpage->nr_locked) >= cleared); |
271 | last = atomic_sub_and_test(cleared, &subpage->nr_locked); | |
b086c5bd QW |
272 | spin_unlock_irqrestore(&subpage->lock, flags); |
273 | return last; | |
1e1de387 QW |
274 | } |
275 | ||
49a99073 QW |
276 | /* |
277 | * Handle different locked folios: | |
278 | * | |
279 | * - Non-subpage folio | |
280 | * Just unlock it. | |
281 | * | |
282 | * - folio locked but without any subpage locked | |
283 | * This happens either before writepage_delalloc() or the delalloc range is | |
284 | * already handled by previous folio. | |
285 | * We can simple unlock it. | |
286 | * | |
287 | * - folio locked with subpage range locked. | |
288 | * We go through the locked sectors inside the range and clear their locked | |
289 | * bitmap, reduce the writer lock number, and unlock the page if that's | |
290 | * the last locked range. | |
291 | */ | |
0f712026 QW |
292 | void btrfs_folio_end_lock(const struct btrfs_fs_info *fs_info, |
293 | struct folio *folio, u64 start, u32 len) | |
1e1de387 | 294 | { |
49a99073 QW |
295 | struct btrfs_subpage *subpage = folio_get_private(folio); |
296 | ||
297 | ASSERT(folio_test_locked(folio)); | |
298 | ||
55151ea9 QW |
299 | if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, folio->mapping)) { |
300 | folio_unlock(folio); | |
301 | return; | |
302 | } | |
49a99073 QW |
303 | |
304 | /* | |
305 | * For subpage case, there are two types of locked page. With or | |
0f712026 | 306 | * without locked number. |
49a99073 | 307 | * |
0f712026 | 308 | * Since we own the page lock, no one else could touch subpage::locked |
49a99073 QW |
309 | * and we are safe to do several atomic operations without spinlock. |
310 | */ | |
0f712026 QW |
311 | if (atomic_read(&subpage->nr_locked) == 0) { |
312 | /* No subpage lock, locked by plain lock_page(). */ | |
49a99073 QW |
313 | folio_unlock(folio); |
314 | return; | |
315 | } | |
316 | ||
55151ea9 | 317 | btrfs_subpage_clamp_range(folio, &start, &len); |
0f712026 | 318 | if (btrfs_subpage_end_and_test_lock(fs_info, folio, start, len)) |
55151ea9 | 319 | folio_unlock(folio); |
1e1de387 QW |
320 | } |
321 | ||
0f712026 QW |
322 | void btrfs_folio_end_lock_bitmap(const struct btrfs_fs_info *fs_info, |
323 | struct folio *folio, unsigned long bitmap) | |
bd610c09 QW |
324 | { |
325 | struct btrfs_subpage *subpage = folio_get_private(folio); | |
326 | const int start_bit = fs_info->sectors_per_page * btrfs_bitmap_nr_locked; | |
327 | unsigned long flags; | |
328 | bool last = false; | |
329 | int cleared = 0; | |
330 | int bit; | |
331 | ||
722d343f | 332 | if (!btrfs_is_subpage(fs_info, folio->mapping)) { |
bd610c09 QW |
333 | folio_unlock(folio); |
334 | return; | |
335 | } | |
336 | ||
0f712026 QW |
337 | if (atomic_read(&subpage->nr_locked) == 0) { |
338 | /* No subpage lock, locked by plain lock_page(). */ | |
bd610c09 QW |
339 | folio_unlock(folio); |
340 | return; | |
341 | } | |
342 | ||
343 | spin_lock_irqsave(&subpage->lock, flags); | |
344 | for_each_set_bit(bit, &bitmap, fs_info->sectors_per_page) { | |
345 | if (test_and_clear_bit(bit + start_bit, subpage->bitmaps)) | |
346 | cleared++; | |
347 | } | |
0f712026 QW |
348 | ASSERT(atomic_read(&subpage->nr_locked) >= cleared); |
349 | last = atomic_sub_and_test(cleared, &subpage->nr_locked); | |
bd610c09 QW |
350 | spin_unlock_irqrestore(&subpage->lock, flags); |
351 | if (last) | |
352 | folio_unlock(folio); | |
353 | } | |
354 | ||
72a69cd0 QW |
355 | #define subpage_test_bitmap_all_set(fs_info, subpage, name) \ |
356 | bitmap_test_range_all_set(subpage->bitmaps, \ | |
ce4a71ee QW |
357 | fs_info->sectors_per_page * btrfs_bitmap_nr_##name, \ |
358 | fs_info->sectors_per_page) | |
72a69cd0 QW |
359 | |
360 | #define subpage_test_bitmap_all_zero(fs_info, subpage, name) \ | |
361 | bitmap_test_range_all_zero(subpage->bitmaps, \ | |
ce4a71ee QW |
362 | fs_info->sectors_per_page * btrfs_bitmap_nr_##name, \ |
363 | fs_info->sectors_per_page) | |
72a69cd0 | 364 | |
a1d767c1 | 365 | void btrfs_subpage_set_uptodate(const struct btrfs_fs_info *fs_info, |
55151ea9 | 366 | struct folio *folio, u64 start, u32 len) |
a1d767c1 | 367 | { |
cfbf07e2 | 368 | struct btrfs_subpage *subpage = folio_get_private(folio); |
55151ea9 | 369 | unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, |
72a69cd0 | 370 | uptodate, start, len); |
a1d767c1 QW |
371 | unsigned long flags; |
372 | ||
373 | spin_lock_irqsave(&subpage->lock, flags); | |
72a69cd0 QW |
374 | bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); |
375 | if (subpage_test_bitmap_all_set(fs_info, subpage, uptodate)) | |
55151ea9 | 376 | folio_mark_uptodate(folio); |
a1d767c1 QW |
377 | spin_unlock_irqrestore(&subpage->lock, flags); |
378 | } | |
379 | ||
380 | void btrfs_subpage_clear_uptodate(const struct btrfs_fs_info *fs_info, | |
55151ea9 | 381 | struct folio *folio, u64 start, u32 len) |
a1d767c1 | 382 | { |
cfbf07e2 | 383 | struct btrfs_subpage *subpage = folio_get_private(folio); |
55151ea9 | 384 | unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, |
72a69cd0 | 385 | uptodate, start, len); |
a1d767c1 QW |
386 | unsigned long flags; |
387 | ||
388 | spin_lock_irqsave(&subpage->lock, flags); | |
72a69cd0 | 389 | bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); |
55151ea9 | 390 | folio_clear_uptodate(folio); |
a1d767c1 QW |
391 | spin_unlock_irqrestore(&subpage->lock, flags); |
392 | } | |
393 | ||
d8a5713e | 394 | void btrfs_subpage_set_dirty(const struct btrfs_fs_info *fs_info, |
55151ea9 | 395 | struct folio *folio, u64 start, u32 len) |
d8a5713e | 396 | { |
cfbf07e2 | 397 | struct btrfs_subpage *subpage = folio_get_private(folio); |
55151ea9 | 398 | unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, |
72a69cd0 | 399 | dirty, start, len); |
d8a5713e QW |
400 | unsigned long flags; |
401 | ||
402 | spin_lock_irqsave(&subpage->lock, flags); | |
72a69cd0 | 403 | bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); |
d8a5713e | 404 | spin_unlock_irqrestore(&subpage->lock, flags); |
55151ea9 | 405 | folio_mark_dirty(folio); |
d8a5713e QW |
406 | } |
407 | ||
408 | /* | |
409 | * Extra clear_and_test function for subpage dirty bitmap. | |
410 | * | |
411 | * Return true if we're the last bits in the dirty_bitmap and clear the | |
412 | * dirty_bitmap. | |
413 | * Return false otherwise. | |
414 | * | |
415 | * NOTE: Callers should manually clear page dirty for true case, as we have | |
416 | * extra handling for tree blocks. | |
417 | */ | |
418 | bool btrfs_subpage_clear_and_test_dirty(const struct btrfs_fs_info *fs_info, | |
55151ea9 | 419 | struct folio *folio, u64 start, u32 len) |
d8a5713e | 420 | { |
cfbf07e2 | 421 | struct btrfs_subpage *subpage = folio_get_private(folio); |
55151ea9 | 422 | unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, |
72a69cd0 | 423 | dirty, start, len); |
d8a5713e QW |
424 | unsigned long flags; |
425 | bool last = false; | |
426 | ||
427 | spin_lock_irqsave(&subpage->lock, flags); | |
72a69cd0 QW |
428 | bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); |
429 | if (subpage_test_bitmap_all_zero(fs_info, subpage, dirty)) | |
d8a5713e QW |
430 | last = true; |
431 | spin_unlock_irqrestore(&subpage->lock, flags); | |
432 | return last; | |
433 | } | |
434 | ||
435 | void btrfs_subpage_clear_dirty(const struct btrfs_fs_info *fs_info, | |
55151ea9 | 436 | struct folio *folio, u64 start, u32 len) |
d8a5713e QW |
437 | { |
438 | bool last; | |
439 | ||
55151ea9 | 440 | last = btrfs_subpage_clear_and_test_dirty(fs_info, folio, start, len); |
d8a5713e | 441 | if (last) |
55151ea9 | 442 | folio_clear_dirty_for_io(folio); |
d8a5713e QW |
443 | } |
444 | ||
3470da3b | 445 | void btrfs_subpage_set_writeback(const struct btrfs_fs_info *fs_info, |
55151ea9 | 446 | struct folio *folio, u64 start, u32 len) |
3470da3b | 447 | { |
cfbf07e2 | 448 | struct btrfs_subpage *subpage = folio_get_private(folio); |
55151ea9 | 449 | unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, |
72a69cd0 | 450 | writeback, start, len); |
3470da3b QW |
451 | unsigned long flags; |
452 | ||
453 | spin_lock_irqsave(&subpage->lock, flags); | |
72a69cd0 | 454 | bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); |
1e61b8c6 JB |
455 | if (!folio_test_writeback(folio)) |
456 | folio_start_writeback(folio); | |
3470da3b QW |
457 | spin_unlock_irqrestore(&subpage->lock, flags); |
458 | } | |
459 | ||
460 | void btrfs_subpage_clear_writeback(const struct btrfs_fs_info *fs_info, | |
55151ea9 | 461 | struct folio *folio, u64 start, u32 len) |
3470da3b | 462 | { |
cfbf07e2 | 463 | struct btrfs_subpage *subpage = folio_get_private(folio); |
55151ea9 | 464 | unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, |
72a69cd0 | 465 | writeback, start, len); |
3470da3b QW |
466 | unsigned long flags; |
467 | ||
468 | spin_lock_irqsave(&subpage->lock, flags); | |
72a69cd0 QW |
469 | bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); |
470 | if (subpage_test_bitmap_all_zero(fs_info, subpage, writeback)) { | |
55151ea9 QW |
471 | ASSERT(folio_test_writeback(folio)); |
472 | folio_end_writeback(folio); | |
7c11d0ae | 473 | } |
3470da3b QW |
474 | spin_unlock_irqrestore(&subpage->lock, flags); |
475 | } | |
476 | ||
6f17400b | 477 | void btrfs_subpage_set_ordered(const struct btrfs_fs_info *fs_info, |
55151ea9 | 478 | struct folio *folio, u64 start, u32 len) |
6f17400b | 479 | { |
cfbf07e2 | 480 | struct btrfs_subpage *subpage = folio_get_private(folio); |
55151ea9 | 481 | unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, |
72a69cd0 | 482 | ordered, start, len); |
6f17400b QW |
483 | unsigned long flags; |
484 | ||
485 | spin_lock_irqsave(&subpage->lock, flags); | |
72a69cd0 | 486 | bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); |
55151ea9 | 487 | folio_set_ordered(folio); |
6f17400b QW |
488 | spin_unlock_irqrestore(&subpage->lock, flags); |
489 | } | |
490 | ||
491 | void btrfs_subpage_clear_ordered(const struct btrfs_fs_info *fs_info, | |
55151ea9 | 492 | struct folio *folio, u64 start, u32 len) |
6f17400b | 493 | { |
cfbf07e2 | 494 | struct btrfs_subpage *subpage = folio_get_private(folio); |
55151ea9 | 495 | unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, |
72a69cd0 | 496 | ordered, start, len); |
6f17400b QW |
497 | unsigned long flags; |
498 | ||
499 | spin_lock_irqsave(&subpage->lock, flags); | |
72a69cd0 QW |
500 | bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); |
501 | if (subpage_test_bitmap_all_zero(fs_info, subpage, ordered)) | |
55151ea9 | 502 | folio_clear_ordered(folio); |
6f17400b QW |
503 | spin_unlock_irqrestore(&subpage->lock, flags); |
504 | } | |
e4f94347 QW |
505 | |
506 | void btrfs_subpage_set_checked(const struct btrfs_fs_info *fs_info, | |
55151ea9 | 507 | struct folio *folio, u64 start, u32 len) |
e4f94347 | 508 | { |
cfbf07e2 | 509 | struct btrfs_subpage *subpage = folio_get_private(folio); |
55151ea9 | 510 | unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, |
e4f94347 QW |
511 | checked, start, len); |
512 | unsigned long flags; | |
513 | ||
514 | spin_lock_irqsave(&subpage->lock, flags); | |
515 | bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); | |
516 | if (subpage_test_bitmap_all_set(fs_info, subpage, checked)) | |
55151ea9 | 517 | folio_set_checked(folio); |
e4f94347 QW |
518 | spin_unlock_irqrestore(&subpage->lock, flags); |
519 | } | |
520 | ||
521 | void btrfs_subpage_clear_checked(const struct btrfs_fs_info *fs_info, | |
55151ea9 | 522 | struct folio *folio, u64 start, u32 len) |
e4f94347 | 523 | { |
cfbf07e2 | 524 | struct btrfs_subpage *subpage = folio_get_private(folio); |
55151ea9 | 525 | unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, |
e4f94347 QW |
526 | checked, start, len); |
527 | unsigned long flags; | |
528 | ||
529 | spin_lock_irqsave(&subpage->lock, flags); | |
530 | bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); | |
55151ea9 | 531 | folio_clear_checked(folio); |
e4f94347 QW |
532 | spin_unlock_irqrestore(&subpage->lock, flags); |
533 | } | |
534 | ||
a1d767c1 QW |
535 | /* |
536 | * Unlike set/clear which is dependent on each page status, for test all bits | |
537 | * are tested in the same way. | |
538 | */ | |
539 | #define IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(name) \ | |
540 | bool btrfs_subpage_test_##name(const struct btrfs_fs_info *fs_info, \ | |
55151ea9 | 541 | struct folio *folio, u64 start, u32 len) \ |
a1d767c1 | 542 | { \ |
cfbf07e2 | 543 | struct btrfs_subpage *subpage = folio_get_private(folio); \ |
55151ea9 | 544 | unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, \ |
72a69cd0 | 545 | name, start, len); \ |
a1d767c1 QW |
546 | unsigned long flags; \ |
547 | bool ret; \ | |
548 | \ | |
549 | spin_lock_irqsave(&subpage->lock, flags); \ | |
72a69cd0 QW |
550 | ret = bitmap_test_range_all_set(subpage->bitmaps, start_bit, \ |
551 | len >> fs_info->sectorsize_bits); \ | |
a1d767c1 QW |
552 | spin_unlock_irqrestore(&subpage->lock, flags); \ |
553 | return ret; \ | |
554 | } | |
555 | IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(uptodate); | |
d8a5713e | 556 | IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(dirty); |
3470da3b | 557 | IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(writeback); |
6f17400b | 558 | IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(ordered); |
e4f94347 | 559 | IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(checked); |
a1d767c1 QW |
560 | |
561 | /* | |
562 | * Note that, in selftests (extent-io-tests), we can have empty fs_info passed | |
563 | * in. We only test sectorsize == PAGE_SIZE cases so far, thus we can fall | |
564 | * back to regular sectorsize branch. | |
565 | */ | |
55151ea9 QW |
566 | #define IMPLEMENT_BTRFS_PAGE_OPS(name, folio_set_func, \ |
567 | folio_clear_func, folio_test_func) \ | |
568 | void btrfs_folio_set_##name(const struct btrfs_fs_info *fs_info, \ | |
569 | struct folio *folio, u64 start, u32 len) \ | |
a1d767c1 | 570 | { \ |
13df3775 | 571 | if (unlikely(!fs_info) || \ |
55151ea9 QW |
572 | !btrfs_is_subpage(fs_info, folio->mapping)) { \ |
573 | folio_set_func(folio); \ | |
a1d767c1 QW |
574 | return; \ |
575 | } \ | |
55151ea9 | 576 | btrfs_subpage_set_##name(fs_info, folio, start, len); \ |
a1d767c1 | 577 | } \ |
55151ea9 QW |
578 | void btrfs_folio_clear_##name(const struct btrfs_fs_info *fs_info, \ |
579 | struct folio *folio, u64 start, u32 len) \ | |
a1d767c1 | 580 | { \ |
13df3775 | 581 | if (unlikely(!fs_info) || \ |
55151ea9 QW |
582 | !btrfs_is_subpage(fs_info, folio->mapping)) { \ |
583 | folio_clear_func(folio); \ | |
a1d767c1 QW |
584 | return; \ |
585 | } \ | |
55151ea9 | 586 | btrfs_subpage_clear_##name(fs_info, folio, start, len); \ |
a1d767c1 | 587 | } \ |
55151ea9 QW |
588 | bool btrfs_folio_test_##name(const struct btrfs_fs_info *fs_info, \ |
589 | struct folio *folio, u64 start, u32 len) \ | |
a1d767c1 | 590 | { \ |
13df3775 | 591 | if (unlikely(!fs_info) || \ |
55151ea9 QW |
592 | !btrfs_is_subpage(fs_info, folio->mapping)) \ |
593 | return folio_test_func(folio); \ | |
594 | return btrfs_subpage_test_##name(fs_info, folio, start, len); \ | |
60e2d255 | 595 | } \ |
55151ea9 QW |
596 | void btrfs_folio_clamp_set_##name(const struct btrfs_fs_info *fs_info, \ |
597 | struct folio *folio, u64 start, u32 len) \ | |
60e2d255 | 598 | { \ |
13df3775 | 599 | if (unlikely(!fs_info) || \ |
55151ea9 QW |
600 | !btrfs_is_subpage(fs_info, folio->mapping)) { \ |
601 | folio_set_func(folio); \ | |
60e2d255 QW |
602 | return; \ |
603 | } \ | |
55151ea9 QW |
604 | btrfs_subpage_clamp_range(folio, &start, &len); \ |
605 | btrfs_subpage_set_##name(fs_info, folio, start, len); \ | |
60e2d255 | 606 | } \ |
55151ea9 QW |
607 | void btrfs_folio_clamp_clear_##name(const struct btrfs_fs_info *fs_info, \ |
608 | struct folio *folio, u64 start, u32 len) \ | |
60e2d255 | 609 | { \ |
13df3775 | 610 | if (unlikely(!fs_info) || \ |
55151ea9 QW |
611 | !btrfs_is_subpage(fs_info, folio->mapping)) { \ |
612 | folio_clear_func(folio); \ | |
60e2d255 QW |
613 | return; \ |
614 | } \ | |
55151ea9 QW |
615 | btrfs_subpage_clamp_range(folio, &start, &len); \ |
616 | btrfs_subpage_clear_##name(fs_info, folio, start, len); \ | |
60e2d255 | 617 | } \ |
55151ea9 QW |
618 | bool btrfs_folio_clamp_test_##name(const struct btrfs_fs_info *fs_info, \ |
619 | struct folio *folio, u64 start, u32 len) \ | |
60e2d255 | 620 | { \ |
13df3775 | 621 | if (unlikely(!fs_info) || \ |
55151ea9 QW |
622 | !btrfs_is_subpage(fs_info, folio->mapping)) \ |
623 | return folio_test_func(folio); \ | |
624 | btrfs_subpage_clamp_range(folio, &start, &len); \ | |
625 | return btrfs_subpage_test_##name(fs_info, folio, start, len); \ | |
626 | } | |
627 | IMPLEMENT_BTRFS_PAGE_OPS(uptodate, folio_mark_uptodate, folio_clear_uptodate, | |
628 | folio_test_uptodate); | |
629 | IMPLEMENT_BTRFS_PAGE_OPS(dirty, folio_mark_dirty, folio_clear_dirty_for_io, | |
630 | folio_test_dirty); | |
631 | IMPLEMENT_BTRFS_PAGE_OPS(writeback, folio_start_writeback, folio_end_writeback, | |
632 | folio_test_writeback); | |
633 | IMPLEMENT_BTRFS_PAGE_OPS(ordered, folio_set_ordered, folio_clear_ordered, | |
634 | folio_test_ordered); | |
635 | IMPLEMENT_BTRFS_PAGE_OPS(checked, folio_set_checked, folio_clear_checked, | |
636 | folio_test_checked); | |
cc1d0d93 | 637 | |
61d73073 QW |
638 | #define GET_SUBPAGE_BITMAP(subpage, fs_info, name, dst) \ |
639 | { \ | |
640 | const int sectors_per_page = fs_info->sectors_per_page; \ | |
641 | \ | |
642 | ASSERT(sectors_per_page < BITS_PER_LONG); \ | |
643 | *dst = bitmap_read(subpage->bitmaps, \ | |
644 | sectors_per_page * btrfs_bitmap_nr_##name, \ | |
645 | sectors_per_page); \ | |
646 | } | |
647 | ||
648 | #define SUBPAGE_DUMP_BITMAP(fs_info, folio, name, start, len) \ | |
649 | { \ | |
650 | const struct btrfs_subpage *subpage = folio_get_private(folio); \ | |
651 | unsigned long bitmap; \ | |
652 | \ | |
653 | GET_SUBPAGE_BITMAP(subpage, fs_info, name, &bitmap); \ | |
654 | btrfs_warn(fs_info, \ | |
655 | "dumpping bitmap start=%llu len=%u folio=%llu " #name "_bitmap=%*pbl", \ | |
656 | start, len, folio_pos(folio), \ | |
657 | fs_info->sectors_per_page, &bitmap); \ | |
658 | } | |
659 | ||
cc1d0d93 QW |
660 | /* |
661 | * Make sure not only the page dirty bit is cleared, but also subpage dirty bit | |
662 | * is cleared. | |
663 | */ | |
21b5bef2 QW |
664 | void btrfs_folio_assert_not_dirty(const struct btrfs_fs_info *fs_info, |
665 | struct folio *folio, u64 start, u32 len) | |
cc1d0d93 | 666 | { |
21b5bef2 QW |
667 | struct btrfs_subpage *subpage; |
668 | unsigned int start_bit; | |
669 | unsigned int nbits; | |
670 | unsigned long flags; | |
cc1d0d93 QW |
671 | |
672 | if (!IS_ENABLED(CONFIG_BTRFS_ASSERT)) | |
673 | return; | |
674 | ||
21b5bef2 QW |
675 | if (!btrfs_is_subpage(fs_info, folio->mapping)) { |
676 | ASSERT(!folio_test_dirty(folio)); | |
cc1d0d93 | 677 | return; |
21b5bef2 | 678 | } |
cc1d0d93 | 679 | |
21b5bef2 QW |
680 | start_bit = subpage_calc_start_bit(fs_info, folio, dirty, start, len); |
681 | nbits = len >> fs_info->sectorsize_bits; | |
682 | subpage = folio_get_private(folio); | |
683 | ASSERT(subpage); | |
684 | spin_lock_irqsave(&subpage->lock, flags); | |
61d73073 QW |
685 | if (unlikely(!bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits))) { |
686 | SUBPAGE_DUMP_BITMAP(fs_info, folio, dirty, start, len); | |
687 | ASSERT(bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits)); | |
688 | } | |
21b5bef2 QW |
689 | ASSERT(bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits)); |
690 | spin_unlock_irqrestore(&subpage->lock, flags); | |
cc1d0d93 | 691 | } |
e55a0de1 | 692 | |
bca707e5 QW |
693 | /* |
694 | * This is for folio already locked by plain lock_page()/folio_lock(), which | |
695 | * doesn't have any subpage awareness. | |
696 | * | |
697 | * This populates the involved subpage ranges so that subpage helpers can | |
698 | * properly unlock them. | |
699 | */ | |
0f712026 QW |
700 | void btrfs_folio_set_lock(const struct btrfs_fs_info *fs_info, |
701 | struct folio *folio, u64 start, u32 len) | |
bca707e5 QW |
702 | { |
703 | struct btrfs_subpage *subpage; | |
704 | unsigned long flags; | |
705 | unsigned int start_bit; | |
706 | unsigned int nbits; | |
707 | int ret; | |
708 | ||
709 | ASSERT(folio_test_locked(folio)); | |
710 | if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, folio->mapping)) | |
711 | return; | |
712 | ||
713 | subpage = folio_get_private(folio); | |
714 | start_bit = subpage_calc_start_bit(fs_info, folio, locked, start, len); | |
715 | nbits = len >> fs_info->sectorsize_bits; | |
716 | spin_lock_irqsave(&subpage->lock, flags); | |
717 | /* Target range should not yet be locked. */ | |
61d73073 QW |
718 | if (unlikely(!bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits))) { |
719 | SUBPAGE_DUMP_BITMAP(fs_info, folio, locked, start, len); | |
720 | ASSERT(bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits)); | |
721 | } | |
bca707e5 | 722 | bitmap_set(subpage->bitmaps, start_bit, nbits); |
0f712026 | 723 | ret = atomic_add_return(nbits, &subpage->nr_locked); |
ce4a71ee | 724 | ASSERT(ret <= fs_info->sectors_per_page); |
bca707e5 QW |
725 | spin_unlock_irqrestore(&subpage->lock, flags); |
726 | } | |
727 | ||
75258f20 | 728 | void __cold btrfs_subpage_dump_bitmap(const struct btrfs_fs_info *fs_info, |
55151ea9 | 729 | struct folio *folio, u64 start, u32 len) |
75258f20 | 730 | { |
75258f20 | 731 | struct btrfs_subpage *subpage; |
ce4a71ee | 732 | const u32 sectors_per_page = fs_info->sectors_per_page; |
75258f20 | 733 | unsigned long uptodate_bitmap; |
75258f20 QW |
734 | unsigned long dirty_bitmap; |
735 | unsigned long writeback_bitmap; | |
736 | unsigned long ordered_bitmap; | |
737 | unsigned long checked_bitmap; | |
396294d1 | 738 | unsigned long locked_bitmap; |
75258f20 QW |
739 | unsigned long flags; |
740 | ||
cfbf07e2 | 741 | ASSERT(folio_test_private(folio) && folio_get_private(folio)); |
ce4a71ee | 742 | ASSERT(sectors_per_page > 1); |
cfbf07e2 | 743 | subpage = folio_get_private(folio); |
75258f20 QW |
744 | |
745 | spin_lock_irqsave(&subpage->lock, flags); | |
ce4a71ee QW |
746 | GET_SUBPAGE_BITMAP(subpage, fs_info, uptodate, &uptodate_bitmap); |
747 | GET_SUBPAGE_BITMAP(subpage, fs_info, dirty, &dirty_bitmap); | |
748 | GET_SUBPAGE_BITMAP(subpage, fs_info, writeback, &writeback_bitmap); | |
749 | GET_SUBPAGE_BITMAP(subpage, fs_info, ordered, &ordered_bitmap); | |
750 | GET_SUBPAGE_BITMAP(subpage, fs_info, checked, &checked_bitmap); | |
396294d1 | 751 | GET_SUBPAGE_BITMAP(subpage, fs_info, locked, &locked_bitmap); |
75258f20 QW |
752 | spin_unlock_irqrestore(&subpage->lock, flags); |
753 | ||
55151ea9 | 754 | dump_page(folio_page(folio, 0), "btrfs subpage dump"); |
75258f20 | 755 | btrfs_warn(fs_info, |
396294d1 | 756 | "start=%llu len=%u page=%llu, bitmaps uptodate=%*pbl dirty=%*pbl locked=%*pbl writeback=%*pbl ordered=%*pbl checked=%*pbl", |
55151ea9 | 757 | start, len, folio_pos(folio), |
ce4a71ee QW |
758 | sectors_per_page, &uptodate_bitmap, |
759 | sectors_per_page, &dirty_bitmap, | |
396294d1 | 760 | sectors_per_page, &locked_bitmap, |
ce4a71ee QW |
761 | sectors_per_page, &writeback_bitmap, |
762 | sectors_per_page, &ordered_bitmap, | |
763 | sectors_per_page, &checked_bitmap); | |
75258f20 | 764 | } |
81891974 QW |
765 | |
766 | void btrfs_get_subpage_dirty_bitmap(struct btrfs_fs_info *fs_info, | |
767 | struct folio *folio, | |
768 | unsigned long *ret_bitmap) | |
769 | { | |
81891974 QW |
770 | struct btrfs_subpage *subpage; |
771 | unsigned long flags; | |
772 | ||
773 | ASSERT(folio_test_private(folio) && folio_get_private(folio)); | |
ce4a71ee | 774 | ASSERT(fs_info->sectors_per_page > 1); |
81891974 QW |
775 | subpage = folio_get_private(folio); |
776 | ||
777 | spin_lock_irqsave(&subpage->lock, flags); | |
ce4a71ee | 778 | GET_SUBPAGE_BITMAP(subpage, fs_info, dirty, ret_bitmap); |
81891974 QW |
779 | spin_unlock_irqrestore(&subpage->lock, flags); |
780 | } |