1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (C) 2015 Facebook. All rights reserved.
6 #include <linux/kernel.h>
7 #include <linux/sched/mm.h>
11 #include "free-space-tree.h"
12 #include "transaction.h"
13 #include "block-group.h"
15 static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
16 struct btrfs_block_group *block_group,
17 struct btrfs_path *path);
19 static struct btrfs_root *btrfs_free_space_root(
20 struct btrfs_block_group *block_group)
22 struct btrfs_key key = {
23 .objectid = BTRFS_FREE_SPACE_TREE_OBJECTID,
24 .type = BTRFS_ROOT_ITEM_KEY,
28 return btrfs_global_root(block_group->fs_info, &key);
31 void set_free_space_tree_thresholds(struct btrfs_block_group *cache)
35 u64 num_bitmaps, total_bitmap_size;
37 if (WARN_ON(cache->length == 0))
38 btrfs_warn(cache->fs_info, "block group %llu length is zero",
42 * We convert to bitmaps when the disk space required for using extents
43 * exceeds that required for using bitmaps.
45 bitmap_range = cache->fs_info->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS;
46 num_bitmaps = div_u64(cache->length + bitmap_range - 1, bitmap_range);
47 bitmap_size = sizeof(struct btrfs_item) + BTRFS_FREE_SPACE_BITMAP_SIZE;
48 total_bitmap_size = num_bitmaps * bitmap_size;
49 cache->bitmap_high_thresh = div_u64(total_bitmap_size,
50 sizeof(struct btrfs_item));
53 * We allow for a small buffer between the high threshold and low
54 * threshold to avoid thrashing back and forth between the two formats.
56 if (cache->bitmap_high_thresh > 100)
57 cache->bitmap_low_thresh = cache->bitmap_high_thresh - 100;
59 cache->bitmap_low_thresh = 0;
62 static int add_new_free_space_info(struct btrfs_trans_handle *trans,
63 struct btrfs_block_group *block_group,
64 struct btrfs_path *path)
66 struct btrfs_root *root = btrfs_free_space_root(block_group);
67 struct btrfs_free_space_info *info;
69 struct extent_buffer *leaf;
72 key.objectid = block_group->start;
73 key.type = BTRFS_FREE_SPACE_INFO_KEY;
74 key.offset = block_group->length;
76 ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*info));
80 leaf = path->nodes[0];
81 info = btrfs_item_ptr(leaf, path->slots[0],
82 struct btrfs_free_space_info);
83 btrfs_set_free_space_extent_count(leaf, info, 0);
84 btrfs_set_free_space_flags(leaf, info, 0);
85 btrfs_mark_buffer_dirty(leaf);
89 btrfs_release_path(path);
94 struct btrfs_free_space_info *search_free_space_info(
95 struct btrfs_trans_handle *trans,
96 struct btrfs_block_group *block_group,
97 struct btrfs_path *path, int cow)
99 struct btrfs_fs_info *fs_info = block_group->fs_info;
100 struct btrfs_root *root = btrfs_free_space_root(block_group);
101 struct btrfs_key key;
104 key.objectid = block_group->start;
105 key.type = BTRFS_FREE_SPACE_INFO_KEY;
106 key.offset = block_group->length;
108 ret = btrfs_search_slot(trans, root, &key, path, 0, cow);
112 btrfs_warn(fs_info, "missing free space info for %llu",
115 return ERR_PTR(-ENOENT);
118 return btrfs_item_ptr(path->nodes[0], path->slots[0],
119 struct btrfs_free_space_info);
123 * btrfs_search_slot() but we're looking for the greatest key less than the
126 static int btrfs_search_prev_slot(struct btrfs_trans_handle *trans,
127 struct btrfs_root *root,
128 struct btrfs_key *key, struct btrfs_path *p,
129 int ins_len, int cow)
133 ret = btrfs_search_slot(trans, root, key, p, ins_len, cow);
142 if (p->slots[0] == 0) {
151 static inline u32 free_space_bitmap_size(const struct btrfs_fs_info *fs_info,
154 return DIV_ROUND_UP(size >> fs_info->sectorsize_bits, BITS_PER_BYTE);
157 static unsigned long *alloc_bitmap(u32 bitmap_size)
160 unsigned int nofs_flag;
161 u32 bitmap_rounded_size = round_up(bitmap_size, sizeof(unsigned long));
164 * GFP_NOFS doesn't work with kvmalloc(), but we really can't recurse
165 * into the filesystem as the free space bitmap can be modified in the
166 * critical section of a transaction commit.
168 * TODO: push the memalloc_nofs_{save,restore}() to the caller where we
169 * know that recursion is unsafe.
171 nofs_flag = memalloc_nofs_save();
172 ret = kvzalloc(bitmap_rounded_size, GFP_KERNEL);
173 memalloc_nofs_restore(nofs_flag);
177 static void le_bitmap_set(unsigned long *map, unsigned int start, int len)
179 u8 *p = ((u8 *)map) + BIT_BYTE(start);
180 const unsigned int size = start + len;
181 int bits_to_set = BITS_PER_BYTE - (start % BITS_PER_BYTE);
182 u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(start);
184 while (len - bits_to_set >= 0) {
187 bits_to_set = BITS_PER_BYTE;
192 mask_to_set &= BITMAP_LAST_BYTE_MASK(size);
198 int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
199 struct btrfs_block_group *block_group,
200 struct btrfs_path *path)
202 struct btrfs_fs_info *fs_info = trans->fs_info;
203 struct btrfs_root *root = btrfs_free_space_root(block_group);
204 struct btrfs_free_space_info *info;
205 struct btrfs_key key, found_key;
206 struct extent_buffer *leaf;
207 unsigned long *bitmap;
211 u32 bitmap_size, flags, expected_extent_count;
212 u32 extent_count = 0;
216 bitmap_size = free_space_bitmap_size(fs_info, block_group->length);
217 bitmap = alloc_bitmap(bitmap_size);
223 start = block_group->start;
224 end = block_group->start + block_group->length;
226 key.objectid = end - 1;
228 key.offset = (u64)-1;
231 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
235 leaf = path->nodes[0];
238 while (path->slots[0] > 0) {
239 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
241 if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
242 ASSERT(found_key.objectid == block_group->start);
243 ASSERT(found_key.offset == block_group->length);
246 } else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY) {
249 ASSERT(found_key.objectid >= start);
250 ASSERT(found_key.objectid < end);
251 ASSERT(found_key.objectid + found_key.offset <= end);
253 first = div_u64(found_key.objectid - start,
254 fs_info->sectorsize);
255 last = div_u64(found_key.objectid + found_key.offset - start,
256 fs_info->sectorsize);
257 le_bitmap_set(bitmap, first, last - first);
267 ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
270 btrfs_release_path(path);
273 info = search_free_space_info(trans, block_group, path, 1);
278 leaf = path->nodes[0];
279 flags = btrfs_free_space_flags(leaf, info);
280 flags |= BTRFS_FREE_SPACE_USING_BITMAPS;
281 btrfs_set_free_space_flags(leaf, info, flags);
282 expected_extent_count = btrfs_free_space_extent_count(leaf, info);
283 btrfs_mark_buffer_dirty(leaf);
284 btrfs_release_path(path);
286 if (extent_count != expected_extent_count) {
288 "incorrect extent count for %llu; counted %u, expected %u",
289 block_group->start, extent_count,
290 expected_extent_count);
296 bitmap_cursor = (char *)bitmap;
297 bitmap_range = fs_info->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS;
304 extent_size = min(end - i, bitmap_range);
305 data_size = free_space_bitmap_size(fs_info, extent_size);
308 key.type = BTRFS_FREE_SPACE_BITMAP_KEY;
309 key.offset = extent_size;
311 ret = btrfs_insert_empty_item(trans, root, path, &key,
316 leaf = path->nodes[0];
317 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
318 write_extent_buffer(leaf, bitmap_cursor, ptr,
320 btrfs_mark_buffer_dirty(leaf);
321 btrfs_release_path(path);
324 bitmap_cursor += data_size;
331 btrfs_abort_transaction(trans, ret);
336 int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
337 struct btrfs_block_group *block_group,
338 struct btrfs_path *path)
340 struct btrfs_fs_info *fs_info = trans->fs_info;
341 struct btrfs_root *root = btrfs_free_space_root(block_group);
342 struct btrfs_free_space_info *info;
343 struct btrfs_key key, found_key;
344 struct extent_buffer *leaf;
345 unsigned long *bitmap;
347 u32 bitmap_size, flags, expected_extent_count;
348 unsigned long nrbits, start_bit, end_bit;
349 u32 extent_count = 0;
353 bitmap_size = free_space_bitmap_size(fs_info, block_group->length);
354 bitmap = alloc_bitmap(bitmap_size);
360 start = block_group->start;
361 end = block_group->start + block_group->length;
363 key.objectid = end - 1;
365 key.offset = (u64)-1;
368 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
372 leaf = path->nodes[0];
375 while (path->slots[0] > 0) {
376 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
378 if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
379 ASSERT(found_key.objectid == block_group->start);
380 ASSERT(found_key.offset == block_group->length);
383 } else if (found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) {
386 u32 bitmap_pos, data_size;
388 ASSERT(found_key.objectid >= start);
389 ASSERT(found_key.objectid < end);
390 ASSERT(found_key.objectid + found_key.offset <= end);
392 bitmap_pos = div_u64(found_key.objectid - start,
393 fs_info->sectorsize *
395 bitmap_cursor = ((char *)bitmap) + bitmap_pos;
396 data_size = free_space_bitmap_size(fs_info,
399 ptr = btrfs_item_ptr_offset(leaf, path->slots[0] - 1);
400 read_extent_buffer(leaf, bitmap_cursor, ptr,
410 ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
413 btrfs_release_path(path);
416 info = search_free_space_info(trans, block_group, path, 1);
421 leaf = path->nodes[0];
422 flags = btrfs_free_space_flags(leaf, info);
423 flags &= ~BTRFS_FREE_SPACE_USING_BITMAPS;
424 btrfs_set_free_space_flags(leaf, info, flags);
425 expected_extent_count = btrfs_free_space_extent_count(leaf, info);
426 btrfs_mark_buffer_dirty(leaf);
427 btrfs_release_path(path);
429 nrbits = block_group->length >> block_group->fs_info->sectorsize_bits;
430 start_bit = find_next_bit_le(bitmap, nrbits, 0);
432 while (start_bit < nrbits) {
433 end_bit = find_next_zero_bit_le(bitmap, nrbits, start_bit);
434 ASSERT(start_bit < end_bit);
436 key.objectid = start + start_bit * block_group->fs_info->sectorsize;
437 key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
438 key.offset = (end_bit - start_bit) * block_group->fs_info->sectorsize;
440 ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
443 btrfs_release_path(path);
447 start_bit = find_next_bit_le(bitmap, nrbits, end_bit);
450 if (extent_count != expected_extent_count) {
452 "incorrect extent count for %llu; counted %u, expected %u",
453 block_group->start, extent_count,
454 expected_extent_count);
464 btrfs_abort_transaction(trans, ret);
468 static int update_free_space_extent_count(struct btrfs_trans_handle *trans,
469 struct btrfs_block_group *block_group,
470 struct btrfs_path *path,
473 struct btrfs_free_space_info *info;
478 if (new_extents == 0)
481 info = search_free_space_info(trans, block_group, path, 1);
486 flags = btrfs_free_space_flags(path->nodes[0], info);
487 extent_count = btrfs_free_space_extent_count(path->nodes[0], info);
489 extent_count += new_extents;
490 btrfs_set_free_space_extent_count(path->nodes[0], info, extent_count);
491 btrfs_mark_buffer_dirty(path->nodes[0]);
492 btrfs_release_path(path);
494 if (!(flags & BTRFS_FREE_SPACE_USING_BITMAPS) &&
495 extent_count > block_group->bitmap_high_thresh) {
496 ret = convert_free_space_to_bitmaps(trans, block_group, path);
497 } else if ((flags & BTRFS_FREE_SPACE_USING_BITMAPS) &&
498 extent_count < block_group->bitmap_low_thresh) {
499 ret = convert_free_space_to_extents(trans, block_group, path);
507 int free_space_test_bit(struct btrfs_block_group *block_group,
508 struct btrfs_path *path, u64 offset)
510 struct extent_buffer *leaf;
511 struct btrfs_key key;
512 u64 found_start, found_end;
513 unsigned long ptr, i;
515 leaf = path->nodes[0];
516 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
517 ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
519 found_start = key.objectid;
520 found_end = key.objectid + key.offset;
521 ASSERT(offset >= found_start && offset < found_end);
523 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
524 i = div_u64(offset - found_start,
525 block_group->fs_info->sectorsize);
526 return !!extent_buffer_test_bit(leaf, ptr, i);
529 static void free_space_set_bits(struct btrfs_block_group *block_group,
530 struct btrfs_path *path, u64 *start, u64 *size,
533 struct btrfs_fs_info *fs_info = block_group->fs_info;
534 struct extent_buffer *leaf;
535 struct btrfs_key key;
536 u64 end = *start + *size;
537 u64 found_start, found_end;
538 unsigned long ptr, first, last;
540 leaf = path->nodes[0];
541 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
542 ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
544 found_start = key.objectid;
545 found_end = key.objectid + key.offset;
546 ASSERT(*start >= found_start && *start < found_end);
547 ASSERT(end > found_start);
552 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
553 first = (*start - found_start) >> fs_info->sectorsize_bits;
554 last = (end - found_start) >> fs_info->sectorsize_bits;
556 extent_buffer_bitmap_set(leaf, ptr, first, last - first);
558 extent_buffer_bitmap_clear(leaf, ptr, first, last - first);
559 btrfs_mark_buffer_dirty(leaf);
561 *size -= end - *start;
566 * We can't use btrfs_next_item() in modify_free_space_bitmap() because
567 * btrfs_next_leaf() doesn't get the path for writing. We can forgo the fancy
568 * tree walking in btrfs_next_leaf() anyways because we know exactly what we're
571 static int free_space_next_bitmap(struct btrfs_trans_handle *trans,
572 struct btrfs_root *root, struct btrfs_path *p)
574 struct btrfs_key key;
576 if (p->slots[0] + 1 < btrfs_header_nritems(p->nodes[0])) {
581 btrfs_item_key_to_cpu(p->nodes[0], &key, p->slots[0]);
582 btrfs_release_path(p);
584 key.objectid += key.offset;
586 key.offset = (u64)-1;
588 return btrfs_search_prev_slot(trans, root, &key, p, 0, 1);
592 * If remove is 1, then we are removing free space, thus clearing bits in the
593 * bitmap. If remove is 0, then we are adding free space, thus setting bits in
596 static int modify_free_space_bitmap(struct btrfs_trans_handle *trans,
597 struct btrfs_block_group *block_group,
598 struct btrfs_path *path,
599 u64 start, u64 size, int remove)
601 struct btrfs_root *root = btrfs_free_space_root(block_group);
602 struct btrfs_key key;
603 u64 end = start + size;
604 u64 cur_start, cur_size;
605 int prev_bit, next_bit;
610 * Read the bit for the block immediately before the extent of space if
611 * that block is within the block group.
613 if (start > block_group->start) {
614 u64 prev_block = start - block_group->fs_info->sectorsize;
616 key.objectid = prev_block;
618 key.offset = (u64)-1;
620 ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1);
624 prev_bit = free_space_test_bit(block_group, path, prev_block);
626 /* The previous block may have been in the previous bitmap. */
627 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
628 if (start >= key.objectid + key.offset) {
629 ret = free_space_next_bitmap(trans, root, path);
634 key.objectid = start;
636 key.offset = (u64)-1;
638 ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1);
646 * Iterate over all of the bitmaps overlapped by the extent of space,
647 * clearing/setting bits as required.
652 free_space_set_bits(block_group, path, &cur_start, &cur_size,
656 ret = free_space_next_bitmap(trans, root, path);
662 * Read the bit for the block immediately after the extent of space if
663 * that block is within the block group.
665 if (end < block_group->start + block_group->length) {
666 /* The next block may be in the next bitmap. */
667 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
668 if (end >= key.objectid + key.offset) {
669 ret = free_space_next_bitmap(trans, root, path);
674 next_bit = free_space_test_bit(block_group, path, end);
682 /* Leftover on the left. */
686 /* Leftover on the right. */
692 /* Merging with neighbor on the left. */
696 /* Merging with neighbor on the right. */
701 btrfs_release_path(path);
702 ret = update_free_space_extent_count(trans, block_group, path,
709 static int remove_free_space_extent(struct btrfs_trans_handle *trans,
710 struct btrfs_block_group *block_group,
711 struct btrfs_path *path,
714 struct btrfs_root *root = btrfs_free_space_root(block_group);
715 struct btrfs_key key;
716 u64 found_start, found_end;
717 u64 end = start + size;
718 int new_extents = -1;
721 key.objectid = start;
723 key.offset = (u64)-1;
725 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
729 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
731 ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY);
733 found_start = key.objectid;
734 found_end = key.objectid + key.offset;
735 ASSERT(start >= found_start && end <= found_end);
738 * Okay, now that we've found the free space extent which contains the
739 * free space that we are removing, there are four cases:
741 * 1. We're using the whole extent: delete the key we found and
742 * decrement the free space extent count.
743 * 2. We are using part of the extent starting at the beginning: delete
744 * the key we found and insert a new key representing the leftover at
745 * the end. There is no net change in the number of extents.
746 * 3. We are using part of the extent ending at the end: delete the key
747 * we found and insert a new key representing the leftover at the
748 * beginning. There is no net change in the number of extents.
749 * 4. We are using part of the extent in the middle: delete the key we
750 * found and insert two new keys representing the leftovers on each
751 * side. Where we used to have one extent, we now have two, so increment
752 * the extent count. We may need to convert the block group to bitmaps
756 /* Delete the existing key (cases 1-4). */
757 ret = btrfs_del_item(trans, root, path);
761 /* Add a key for leftovers at the beginning (cases 3 and 4). */
762 if (start > found_start) {
763 key.objectid = found_start;
764 key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
765 key.offset = start - found_start;
767 btrfs_release_path(path);
768 ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
774 /* Add a key for leftovers at the end (cases 2 and 4). */
775 if (end < found_end) {
777 key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
778 key.offset = found_end - end;
780 btrfs_release_path(path);
781 ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
787 btrfs_release_path(path);
788 ret = update_free_space_extent_count(trans, block_group, path,
796 int __remove_from_free_space_tree(struct btrfs_trans_handle *trans,
797 struct btrfs_block_group *block_group,
798 struct btrfs_path *path, u64 start, u64 size)
800 struct btrfs_free_space_info *info;
804 if (block_group->needs_free_space) {
805 ret = __add_block_group_free_space(trans, block_group, path);
810 info = search_free_space_info(NULL, block_group, path, 0);
812 return PTR_ERR(info);
813 flags = btrfs_free_space_flags(path->nodes[0], info);
814 btrfs_release_path(path);
816 if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) {
817 return modify_free_space_bitmap(trans, block_group, path,
820 return remove_free_space_extent(trans, block_group, path,
825 int remove_from_free_space_tree(struct btrfs_trans_handle *trans,
828 struct btrfs_block_group *block_group;
829 struct btrfs_path *path;
832 if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
835 path = btrfs_alloc_path();
841 block_group = btrfs_lookup_block_group(trans->fs_info, start);
848 mutex_lock(&block_group->free_space_lock);
849 ret = __remove_from_free_space_tree(trans, block_group, path, start,
851 mutex_unlock(&block_group->free_space_lock);
853 btrfs_put_block_group(block_group);
855 btrfs_free_path(path);
857 btrfs_abort_transaction(trans, ret);
861 static int add_free_space_extent(struct btrfs_trans_handle *trans,
862 struct btrfs_block_group *block_group,
863 struct btrfs_path *path,
866 struct btrfs_root *root = btrfs_free_space_root(block_group);
867 struct btrfs_key key, new_key;
868 u64 found_start, found_end;
869 u64 end = start + size;
874 * We are adding a new extent of free space, but we need to merge
875 * extents. There are four cases here:
877 * 1. The new extent does not have any immediate neighbors to merge
878 * with: add the new key and increment the free space extent count. We
879 * may need to convert the block group to bitmaps as a result.
880 * 2. The new extent has an immediate neighbor before it: remove the
881 * previous key and insert a new key combining both of them. There is no
882 * net change in the number of extents.
883 * 3. The new extent has an immediate neighbor after it: remove the next
884 * key and insert a new key combining both of them. There is no net
885 * change in the number of extents.
886 * 4. The new extent has immediate neighbors on both sides: remove both
887 * of the keys and insert a new key combining all of them. Where we used
888 * to have two extents, we now have one, so decrement the extent count.
891 new_key.objectid = start;
892 new_key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
893 new_key.offset = size;
895 /* Search for a neighbor on the left. */
896 if (start == block_group->start)
898 key.objectid = start - 1;
900 key.offset = (u64)-1;
902 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
906 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
908 if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) {
909 ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY);
910 btrfs_release_path(path);
914 found_start = key.objectid;
915 found_end = key.objectid + key.offset;
916 ASSERT(found_start >= block_group->start &&
917 found_end > block_group->start);
918 ASSERT(found_start < start && found_end <= start);
921 * Delete the neighbor on the left and absorb it into the new key (cases
924 if (found_end == start) {
925 ret = btrfs_del_item(trans, root, path);
928 new_key.objectid = found_start;
929 new_key.offset += key.offset;
932 btrfs_release_path(path);
935 /* Search for a neighbor on the right. */
936 if (end == block_group->start + block_group->length)
940 key.offset = (u64)-1;
942 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
946 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
948 if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) {
949 ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY);
950 btrfs_release_path(path);
954 found_start = key.objectid;
955 found_end = key.objectid + key.offset;
956 ASSERT(found_start >= block_group->start &&
957 found_end > block_group->start);
958 ASSERT((found_start < start && found_end <= start) ||
959 (found_start >= end && found_end > end));
962 * Delete the neighbor on the right and absorb it into the new key
965 if (found_start == end) {
966 ret = btrfs_del_item(trans, root, path);
969 new_key.offset += key.offset;
972 btrfs_release_path(path);
975 /* Insert the new key (cases 1-4). */
976 ret = btrfs_insert_empty_item(trans, root, path, &new_key, 0);
980 btrfs_release_path(path);
981 ret = update_free_space_extent_count(trans, block_group, path,
989 int __add_to_free_space_tree(struct btrfs_trans_handle *trans,
990 struct btrfs_block_group *block_group,
991 struct btrfs_path *path, u64 start, u64 size)
993 struct btrfs_free_space_info *info;
997 if (block_group->needs_free_space) {
998 ret = __add_block_group_free_space(trans, block_group, path);
1003 info = search_free_space_info(NULL, block_group, path, 0);
1005 return PTR_ERR(info);
1006 flags = btrfs_free_space_flags(path->nodes[0], info);
1007 btrfs_release_path(path);
1009 if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) {
1010 return modify_free_space_bitmap(trans, block_group, path,
1013 return add_free_space_extent(trans, block_group, path, start,
1018 int add_to_free_space_tree(struct btrfs_trans_handle *trans,
1019 u64 start, u64 size)
1021 struct btrfs_block_group *block_group;
1022 struct btrfs_path *path;
1025 if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
1028 path = btrfs_alloc_path();
1034 block_group = btrfs_lookup_block_group(trans->fs_info, start);
1041 mutex_lock(&block_group->free_space_lock);
1042 ret = __add_to_free_space_tree(trans, block_group, path, start, size);
1043 mutex_unlock(&block_group->free_space_lock);
1045 btrfs_put_block_group(block_group);
1047 btrfs_free_path(path);
1049 btrfs_abort_transaction(trans, ret);
1054 * Populate the free space tree by walking the extent tree. Operations on the
1055 * extent tree that happen as a result of writes to the free space tree will go
1056 * through the normal add/remove hooks.
1058 static int populate_free_space_tree(struct btrfs_trans_handle *trans,
1059 struct btrfs_block_group *block_group)
1061 struct btrfs_root *extent_root;
1062 struct btrfs_path *path, *path2;
1063 struct btrfs_key key;
1067 path = btrfs_alloc_path();
1070 path->reada = READA_FORWARD;
1072 path2 = btrfs_alloc_path();
1074 btrfs_free_path(path);
1078 ret = add_new_free_space_info(trans, block_group, path2);
1082 mutex_lock(&block_group->free_space_lock);
1085 * Iterate through all of the extent and metadata items in this block
1086 * group, adding the free space between them and the free space at the
1087 * end. Note that EXTENT_ITEM and METADATA_ITEM are less than
1088 * BLOCK_GROUP_ITEM, so an extent may precede the block group that it's
1091 key.objectid = block_group->start;
1092 key.type = BTRFS_EXTENT_ITEM_KEY;
1095 extent_root = btrfs_extent_root(trans->fs_info, key.objectid);
1096 ret = btrfs_search_slot_for_read(extent_root, &key, path, 1, 0);
1101 start = block_group->start;
1102 end = block_group->start + block_group->length;
1104 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1106 if (key.type == BTRFS_EXTENT_ITEM_KEY ||
1107 key.type == BTRFS_METADATA_ITEM_KEY) {
1108 if (key.objectid >= end)
1111 if (start < key.objectid) {
1112 ret = __add_to_free_space_tree(trans,
1120 start = key.objectid;
1121 if (key.type == BTRFS_METADATA_ITEM_KEY)
1122 start += trans->fs_info->nodesize;
1124 start += key.offset;
1125 } else if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
1126 if (key.objectid != block_group->start)
1130 ret = btrfs_next_item(extent_root, path);
1137 ret = __add_to_free_space_tree(trans, block_group, path2,
1138 start, end - start);
1145 mutex_unlock(&block_group->free_space_lock);
1147 btrfs_free_path(path2);
1148 btrfs_free_path(path);
1152 int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info)
1154 struct btrfs_trans_handle *trans;
1155 struct btrfs_root *tree_root = fs_info->tree_root;
1156 struct btrfs_root *free_space_root;
1157 struct btrfs_block_group *block_group;
1158 struct rb_node *node;
1161 trans = btrfs_start_transaction(tree_root, 0);
1163 return PTR_ERR(trans);
1165 set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1166 set_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
1167 free_space_root = btrfs_create_tree(trans,
1168 BTRFS_FREE_SPACE_TREE_OBJECTID);
1169 if (IS_ERR(free_space_root)) {
1170 ret = PTR_ERR(free_space_root);
1173 ret = btrfs_global_root_insert(free_space_root);
1175 btrfs_put_root(free_space_root);
1179 node = rb_first(&fs_info->block_group_cache_tree);
1181 block_group = rb_entry(node, struct btrfs_block_group,
1183 ret = populate_free_space_tree(trans, block_group);
1186 node = rb_next(node);
1189 btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE);
1190 btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
1191 clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1192 ret = btrfs_commit_transaction(trans);
1195 * Now that we've committed the transaction any reading of our commit
1196 * root will be safe, so we can cache from the free space tree now.
1198 clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
1202 clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1203 clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
1204 btrfs_abort_transaction(trans, ret);
1205 btrfs_end_transaction(trans);
1209 static int clear_free_space_tree(struct btrfs_trans_handle *trans,
1210 struct btrfs_root *root)
1212 struct btrfs_path *path;
1213 struct btrfs_key key;
1217 path = btrfs_alloc_path();
1226 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1230 nr = btrfs_header_nritems(path->nodes[0]);
1235 ret = btrfs_del_items(trans, root, path, 0, nr);
1239 btrfs_release_path(path);
1244 btrfs_free_path(path);
1248 int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info)
1250 struct btrfs_trans_handle *trans;
1251 struct btrfs_root *tree_root = fs_info->tree_root;
1252 struct btrfs_key key = {
1253 .objectid = BTRFS_FREE_SPACE_TREE_OBJECTID,
1254 .type = BTRFS_ROOT_ITEM_KEY,
1257 struct btrfs_root *free_space_root = btrfs_global_root(fs_info, &key);
1260 trans = btrfs_start_transaction(tree_root, 0);
1262 return PTR_ERR(trans);
1264 btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE);
1265 btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
1267 ret = clear_free_space_tree(trans, free_space_root);
1271 ret = btrfs_del_root(trans, &free_space_root->root_key);
1275 btrfs_global_root_delete(free_space_root);
1276 list_del(&free_space_root->dirty_list);
1278 btrfs_tree_lock(free_space_root->node);
1279 btrfs_clean_tree_block(free_space_root->node);
1280 btrfs_tree_unlock(free_space_root->node);
1281 btrfs_free_tree_block(trans, btrfs_root_id(free_space_root),
1282 free_space_root->node, 0, 1);
1284 btrfs_put_root(free_space_root);
1286 return btrfs_commit_transaction(trans);
1289 btrfs_abort_transaction(trans, ret);
1290 btrfs_end_transaction(trans);
1294 static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
1295 struct btrfs_block_group *block_group,
1296 struct btrfs_path *path)
1300 block_group->needs_free_space = 0;
1302 ret = add_new_free_space_info(trans, block_group, path);
1306 return __add_to_free_space_tree(trans, block_group, path,
1308 block_group->length);
1311 int add_block_group_free_space(struct btrfs_trans_handle *trans,
1312 struct btrfs_block_group *block_group)
1314 struct btrfs_fs_info *fs_info = trans->fs_info;
1315 struct btrfs_path *path = NULL;
1318 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
1321 mutex_lock(&block_group->free_space_lock);
1322 if (!block_group->needs_free_space)
1325 path = btrfs_alloc_path();
1331 ret = __add_block_group_free_space(trans, block_group, path);
1334 btrfs_free_path(path);
1335 mutex_unlock(&block_group->free_space_lock);
1337 btrfs_abort_transaction(trans, ret);
1341 int remove_block_group_free_space(struct btrfs_trans_handle *trans,
1342 struct btrfs_block_group *block_group)
1344 struct btrfs_root *root = btrfs_free_space_root(block_group);
1345 struct btrfs_path *path;
1346 struct btrfs_key key, found_key;
1347 struct extent_buffer *leaf;
1352 if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
1355 if (block_group->needs_free_space) {
1356 /* We never added this block group to the free space tree. */
1360 path = btrfs_alloc_path();
1366 start = block_group->start;
1367 end = block_group->start + block_group->length;
1369 key.objectid = end - 1;
1371 key.offset = (u64)-1;
1374 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
1378 leaf = path->nodes[0];
1381 while (path->slots[0] > 0) {
1382 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
1384 if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
1385 ASSERT(found_key.objectid == block_group->start);
1386 ASSERT(found_key.offset == block_group->length);
1391 } else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY ||
1392 found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) {
1393 ASSERT(found_key.objectid >= start);
1394 ASSERT(found_key.objectid < end);
1395 ASSERT(found_key.objectid + found_key.offset <= end);
1403 ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
1406 btrfs_release_path(path);
1411 btrfs_free_path(path);
1413 btrfs_abort_transaction(trans, ret);
1417 static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl,
1418 struct btrfs_path *path,
1419 u32 expected_extent_count)
1421 struct btrfs_block_group *block_group;
1422 struct btrfs_fs_info *fs_info;
1423 struct btrfs_root *root;
1424 struct btrfs_key key;
1425 int prev_bit = 0, bit;
1426 /* Initialize to silence GCC. */
1427 u64 extent_start = 0;
1429 u64 total_found = 0;
1430 u32 extent_count = 0;
1433 block_group = caching_ctl->block_group;
1434 fs_info = block_group->fs_info;
1435 root = btrfs_free_space_root(block_group);
1437 end = block_group->start + block_group->length;
1440 ret = btrfs_next_item(root, path);
1446 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1448 if (key.type == BTRFS_FREE_SPACE_INFO_KEY)
1451 ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
1452 ASSERT(key.objectid < end && key.objectid + key.offset <= end);
1454 caching_ctl->progress = key.objectid;
1456 offset = key.objectid;
1457 while (offset < key.objectid + key.offset) {
1458 bit = free_space_test_bit(block_group, path, offset);
1459 if (prev_bit == 0 && bit == 1) {
1460 extent_start = offset;
1461 } else if (prev_bit == 1 && bit == 0) {
1462 total_found += add_new_free_space(block_group,
1465 if (total_found > CACHING_CTL_WAKE_UP) {
1467 wake_up(&caching_ctl->wait);
1472 offset += fs_info->sectorsize;
1475 if (prev_bit == 1) {
1476 total_found += add_new_free_space(block_group, extent_start,
1481 if (extent_count != expected_extent_count) {
1483 "incorrect extent count for %llu; counted %u, expected %u",
1484 block_group->start, extent_count,
1485 expected_extent_count);
1491 caching_ctl->progress = (u64)-1;
1498 static int load_free_space_extents(struct btrfs_caching_control *caching_ctl,
1499 struct btrfs_path *path,
1500 u32 expected_extent_count)
1502 struct btrfs_block_group *block_group;
1503 struct btrfs_fs_info *fs_info;
1504 struct btrfs_root *root;
1505 struct btrfs_key key;
1507 u64 total_found = 0;
1508 u32 extent_count = 0;
1511 block_group = caching_ctl->block_group;
1512 fs_info = block_group->fs_info;
1513 root = btrfs_free_space_root(block_group);
1515 end = block_group->start + block_group->length;
1518 ret = btrfs_next_item(root, path);
1524 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1526 if (key.type == BTRFS_FREE_SPACE_INFO_KEY)
1529 ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY);
1530 ASSERT(key.objectid < end && key.objectid + key.offset <= end);
1532 caching_ctl->progress = key.objectid;
1534 total_found += add_new_free_space(block_group, key.objectid,
1535 key.objectid + key.offset);
1536 if (total_found > CACHING_CTL_WAKE_UP) {
1538 wake_up(&caching_ctl->wait);
1543 if (extent_count != expected_extent_count) {
1545 "incorrect extent count for %llu; counted %u, expected %u",
1546 block_group->start, extent_count,
1547 expected_extent_count);
1553 caching_ctl->progress = (u64)-1;
1560 int load_free_space_tree(struct btrfs_caching_control *caching_ctl)
1562 struct btrfs_block_group *block_group;
1563 struct btrfs_free_space_info *info;
1564 struct btrfs_path *path;
1565 u32 extent_count, flags;
1568 block_group = caching_ctl->block_group;
1570 path = btrfs_alloc_path();
1575 * Just like caching_thread() doesn't want to deadlock on the extent
1576 * tree, we don't want to deadlock on the free space tree.
1578 path->skip_locking = 1;
1579 path->search_commit_root = 1;
1580 path->reada = READA_FORWARD;
1582 info = search_free_space_info(NULL, block_group, path, 0);
1584 ret = PTR_ERR(info);
1587 extent_count = btrfs_free_space_extent_count(path->nodes[0], info);
1588 flags = btrfs_free_space_flags(path->nodes[0], info);
1591 * We left path pointing to the free space info item, so now
1592 * load_free_space_foo can just iterate through the free space tree from
1595 if (flags & BTRFS_FREE_SPACE_USING_BITMAPS)
1596 ret = load_free_space_bitmaps(caching_ctl, path, extent_count);
1598 ret = load_free_space_extents(caching_ctl, path, extent_count);
1601 btrfs_free_path(path);