Merge tag 'for-5.19-rc7-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave...

author Linus Torvalds <[email protected]>

Sat, 16 Jul 2022 20:48:55 +0000 (13:48 -0700)

committer Linus Torvalds <[email protected]>

Sat, 16 Jul 2022 20:48:55 +0000 (13:48 -0700)
author Linus Torvalds <[email protected]>
Sat, 16 Jul 2022 20:48:55 +0000 (13:48 -0700)
committer Linus Torvalds <[email protected]>
Sat, 16 Jul 2022 20:48:55 +0000 (13:48 -0700)
diff --combined fs/btrfs/disk-io.c

index 4ba005c4198368214fb4b055fd15d02af19e3b06,daa756b3606de9794ee54da68ebeaba874753fa4..de440ebf5648b059ff3a63022b9015b2a99ed0bf
--- 1/fs/btrfs/disk-io.c
--- 2/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@@ -5,6 -5,7 +5,7 @@@
   
   #include <linux/fs.h>
   #include <linux/blkdev.h>
+ #include <linux/radix-tree.h>
   #include <linux/writeback.h>
   #include <linux/workqueue.h>
   #include <linux/kthread.h>
@@@ -485,7 -486,7 +486,7 @@@ static int csum_dirty_subpage_buffers(s
                 uptodate = btrfs_subpage_test_uptodate(fs_info, page, cur,
                                                        fs_info->nodesize);
   
-               /* A dirty eb shouldn't disappear from extent_buffers */
+               /* A dirty eb shouldn't disappear from buffer_radix */
                 if (WARN_ON(!eb))
                         return -EUCLEAN;
   
@@@ -996,12 -997,12 +997,12 @@@ static int btree_writepages(struct addr
         return btree_write_cache_pages(mapping, wbc);
   }
   
- -static int btree_releasepage(struct page *page, gfp_t gfp_flags)
+ +static bool btree_release_folio(struct folio *folio, gfp_t gfp_flags)
   {
- -      if (PageWriteback(page) || PageDirty(page))
- -              return 0;
+ +      if (folio_test_writeback(folio) || folio_test_dirty(folio))
+ +              return false;
   
- -      return try_release_extent_buffer(page);
+ +      return try_release_extent_buffer(&folio->page);
   }
   
   static void btree_invalidate_folio(struct folio *folio, size_t offset,
@@@ -1010,7 -1011,7 +1011,7 @@@
         struct extent_io_tree *tree;
         tree = &BTRFS_I(folio->mapping->host)->io_tree;
         extent_invalidate_folio(tree, folio, offset);
- -      btree_releasepage(&folio->page, GFP_NOFS);
+ +      btree_release_folio(folio, GFP_NOFS);
         if (folio_get_private(folio)) {
                 btrfs_warn(BTRFS_I(folio->mapping->host)->root->fs_info,
                            "folio private not zero on folio %llu",
@@@ -1071,7 -1072,7 +1072,7 @@@ static bool btree_dirty_folio(struct ad
   
   static const struct address_space_operations btree_aops = {
         .writepages     = btree_writepages,
- -      .releasepage    = btree_releasepage,
+ +      .release_folio  = btree_release_folio,
         .invalidate_folio = btree_invalidate_folio,
   #ifdef CONFIG_MIGRATION
         .migratepage    = btree_migratepage,
@@@ -1158,7 -1159,7 +1159,7 @@@ static void __setup_root(struct btrfs_r
         root->nr_delalloc_inodes = 0;
         root->nr_ordered_extents = 0;
         root->inode_tree = RB_ROOT;
-       xa_init_flags(&root->delayed_nodes, GFP_ATOMIC);
+       INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC);
   
         btrfs_init_root_block_rsv(root);
   
@@@ -1210,9 -1211,9 +1211,9 @@@
         btrfs_qgroup_init_swapped_blocks(&root->swapped_blocks);
   #ifdef CONFIG_BTRFS_DEBUG
         INIT_LIST_HEAD(&root->leak_list);
-       spin_lock(&fs_info->fs_roots_lock);
+       spin_lock(&fs_info->fs_roots_radix_lock);
         list_add_tail(&root->leak_list, &fs_info->allocated_roots);
-       spin_unlock(&fs_info->fs_roots_lock);
+       spin_unlock(&fs_info->fs_roots_radix_lock);
   #endif
   }
   
@@@ -1659,11 -1660,12 +1660,12 @@@ static struct btrfs_root *btrfs_lookup_
   {
         struct btrfs_root *root;
   
-       spin_lock(&fs_info->fs_roots_lock);
-       root = xa_load(&fs_info->fs_roots, (unsigned long)root_id);
+       spin_lock(&fs_info->fs_roots_radix_lock);
+       root = radix_tree_lookup(&fs_info->fs_roots_radix,
+                                (unsigned long)root_id);
         if (root)
                 root = btrfs_grab_root(root);
-       spin_unlock(&fs_info->fs_roots_lock);
+       spin_unlock(&fs_info->fs_roots_radix_lock);
         return root;
   }
   
@@@ -1705,14 -1707,20 +1707,20 @@@ int btrfs_insert_fs_root(struct btrfs_f
   {
         int ret;
   
-       spin_lock(&fs_info->fs_roots_lock);
-       ret = xa_insert(&fs_info->fs_roots, (unsigned long)root->root_key.objectid,
-                       root, GFP_NOFS);
+       ret = radix_tree_preload(GFP_NOFS);
+       if (ret)
+               return ret;
+ 
+       spin_lock(&fs_info->fs_roots_radix_lock);
+       ret = radix_tree_insert(&fs_info->fs_roots_radix,
+                               (unsigned long)root->root_key.objectid,
+                               root);
         if (ret == 0) {
                 btrfs_grab_root(root);
-               set_bit(BTRFS_ROOT_REGISTERED, &root->state);
+               set_bit(BTRFS_ROOT_IN_RADIX, &root->state);
         }
-       spin_unlock(&fs_info->fs_roots_lock);
+       spin_unlock(&fs_info->fs_roots_radix_lock);
+       radix_tree_preload_end();
   
         return ret;
   }
@@@ -2342,9 -2350,9 +2350,9 @@@ void btrfs_put_root(struct btrfs_root *
                 btrfs_drew_lock_destroy(&root->snapshot_lock);
                 free_root_extent_buffers(root);
   #ifdef CONFIG_BTRFS_DEBUG
-               spin_lock(&root->fs_info->fs_roots_lock);
+               spin_lock(&root->fs_info->fs_roots_radix_lock);
                 list_del_init(&root->leak_list);
-               spin_unlock(&root->fs_info->fs_roots_lock);
+               spin_unlock(&root->fs_info->fs_roots_radix_lock);
   #endif
                 kfree(root);
         }
@@@ -2352,21 -2360,28 +2360,28 @@@
   
   void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info)
   {
-       struct btrfs_root *root;
-       unsigned long index = 0;
+       int ret;
+       struct btrfs_root *gang[8];
+       int i;
   
         while (!list_empty(&fs_info->dead_roots)) {
-               root = list_entry(fs_info->dead_roots.next,
-                                 struct btrfs_root, root_list);
-               list_del(&root->root_list);
+               gang[0] = list_entry(fs_info->dead_roots.next,
+                                    struct btrfs_root, root_list);
+               list_del(&gang[0]->root_list);
   
-               if (test_bit(BTRFS_ROOT_REGISTERED, &root->state))
-                       btrfs_drop_and_free_fs_root(fs_info, root);
-               btrfs_put_root(root);
+               if (test_bit(BTRFS_ROOT_IN_RADIX, &gang[0]->state))
+                       btrfs_drop_and_free_fs_root(fs_info, gang[0]);
+               btrfs_put_root(gang[0]);
         }
   
-       xa_for_each(&fs_info->fs_roots, index, root) {
-               btrfs_drop_and_free_fs_root(fs_info, root);
+       while (1) {
+               ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
+                                            (void **)gang, 0,
+                                            ARRAY_SIZE(gang));
+               if (!ret)
+                       break;
+               for (i = 0; i < ret; i++)
+                       btrfs_drop_and_free_fs_root(fs_info, gang[i]);
         }
   }
   
@@@ -3134,8 -3149,8 +3149,8 @@@ static int __cold init_tree_roots(struc
   
   void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
   {
-       xa_init_flags(&fs_info->fs_roots, GFP_ATOMIC);
-       xa_init_flags(&fs_info->extent_buffers, GFP_ATOMIC);
+       INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
+       INIT_RADIX_TREE(&fs_info->buffer_radix, GFP_ATOMIC);
         INIT_LIST_HEAD(&fs_info->trans_list);
         INIT_LIST_HEAD(&fs_info->dead_roots);
         INIT_LIST_HEAD(&fs_info->delayed_iputs);
@@@ -3143,7 -3158,7 +3158,7 @@@
         INIT_LIST_HEAD(&fs_info->caching_block_groups);
         spin_lock_init(&fs_info->delalloc_root_lock);
         spin_lock_init(&fs_info->trans_lock);
-       spin_lock_init(&fs_info->fs_roots_lock);
+       spin_lock_init(&fs_info->fs_roots_radix_lock);
         spin_lock_init(&fs_info->delayed_iput_lock);
         spin_lock_init(&fs_info->defrag_inodes_lock);
         spin_lock_init(&fs_info->super_lock);
@@@ -3374,7 -3389,7 +3389,7 @@@ int btrfs_start_pre_rw_mount(struct btr
         /*
          * btrfs_find_orphan_roots() is responsible for finding all the dead
          * roots (with 0 refs), flag them with BTRFS_ROOT_DEAD_TREE and load
-        * them into the fs_info->fs_roots. This must be done before
+        * them into the fs_info->fs_roots_radix tree. This must be done before
          * calling btrfs_orphan_cleanup() on the tree root. If we don't do it
          * first, then btrfs_orphan_cleanup() will delete a dead root's orphan
          * item before the root's tree is deleted - this means that if we unmount
@@@ -4232,7 -4247,6 +4247,7 @@@ static int wait_dev_supers(struct btrfs
    */
   static void btrfs_end_empty_barrier(struct bio *bio)
   {
+ +      bio_uninit(bio);
         complete(bio->bi_private);
   }
   
@@@ -4242,7 -4256,7 +4257,7 @@@
    */
   static void write_dev_flush(struct btrfs_device *device)
   {
- -      struct bio *bio = device->flush_bio;
+ +      struct bio *bio = &device->flush_bio;
   
   #ifndef CONFIG_BTRFS_FS_CHECK_INTEGRITY
         /*
@@@ -4255,12 -4269,12 +4270,12 @@@
          * of simplicity, since this is a debug tool and not meant for use in
          * non-debug builds.
          */
- -      struct request_queue *q = bdev_get_queue(device->bdev);
- -      if (!test_bit(QUEUE_FLAG_WC, &q->queue_flags))
+ +      if (!bdev_write_cache(device->bdev))
                 return;
   #endif
   
- -      bio_reset(bio, device->bdev, REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH);
+ +      bio_init(bio, device->bdev, NULL, 0,
+ +               REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH);
         bio->bi_end_io = btrfs_end_empty_barrier;
         init_completion(&device->flush_wait);
         bio->bi_private = &device->flush_wait;
@@@ -4275,7 -4289,7 +4290,7 @@@
    */
   static blk_status_t wait_dev_flush(struct btrfs_device *device)
   {
- -      struct bio *bio = device->flush_bio;
+ +      struct bio *bio = &device->flush_bio;
   
         if (!test_bit(BTRFS_DEV_STATE_FLUSH_SENT, &device->dev_state))
                 return BLK_STS_OK;
@@@ -4499,11 -4513,12 +4514,12 @@@ void btrfs_drop_and_free_fs_root(struc
   {
         bool drop_ref = false;
   
-       spin_lock(&fs_info->fs_roots_lock);
-       xa_erase(&fs_info->fs_roots, (unsigned long)root->root_key.objectid);
-       if (test_and_clear_bit(BTRFS_ROOT_REGISTERED, &root->state))
+       spin_lock(&fs_info->fs_roots_radix_lock);
+       radix_tree_delete(&fs_info->fs_roots_radix,
+                         (unsigned long)root->root_key.objectid);
+       if (test_and_clear_bit(BTRFS_ROOT_IN_RADIX, &root->state))
                 drop_ref = true;
-       spin_unlock(&fs_info->fs_roots_lock);
+       spin_unlock(&fs_info->fs_roots_radix_lock);
   
         if (BTRFS_FS_ERROR(fs_info)) {
                 ASSERT(root->log_root == NULL);
@@@ -4519,48 -4534,50 +4535,50 @@@
   
   int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
   {
-       struct btrfs_root *roots[8];
-       unsigned long index = 0;
-       int i;
+       u64 root_objectid = 0;
+       struct btrfs_root *gang[8];
+       int i = 0;
         int err = 0;
-       int grabbed;
+       unsigned int ret = 0;
   
         while (1) {
-               struct btrfs_root *root;
- 
-               spin_lock(&fs_info->fs_roots_lock);
-               if (!xa_find(&fs_info->fs_roots, &index, ULONG_MAX, XA_PRESENT)) {
-                       spin_unlock(&fs_info->fs_roots_lock);
-                       return err;
+               spin_lock(&fs_info->fs_roots_radix_lock);
+               ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
+                                            (void **)gang, root_objectid,
+                                            ARRAY_SIZE(gang));
+               if (!ret) {
+                       spin_unlock(&fs_info->fs_roots_radix_lock);
+                       break;
                 }
+               root_objectid = gang[ret - 1]->root_key.objectid + 1;
   
-               grabbed = 0;
-               xa_for_each_start(&fs_info->fs_roots, index, root, index) {
-                       /* Avoid grabbing roots in dead_roots */
-                       if (btrfs_root_refs(&root->root_item) > 0)
-                               roots[grabbed++] = btrfs_grab_root(root);
-                       if (grabbed >= ARRAY_SIZE(roots))
-                               break;
+               for (i = 0; i < ret; i++) {
+                       /* Avoid to grab roots in dead_roots */
+                       if (btrfs_root_refs(&gang[i]->root_item) == 0) {
+                               gang[i] = NULL;
+                               continue;
+                       }
+                       /* grab all the search result for later use */
+                       gang[i] = btrfs_grab_root(gang[i]);
                 }
-               spin_unlock(&fs_info->fs_roots_lock);
+               spin_unlock(&fs_info->fs_roots_radix_lock);
   
-               for (i = 0; i < grabbed; i++) {
-                       if (!roots[i])
+               for (i = 0; i < ret; i++) {
+                       if (!gang[i])
                                 continue;
-                       index = roots[i]->root_key.objectid;
-                       err = btrfs_orphan_cleanup(roots[i]);
+                       root_objectid = gang[i]->root_key.objectid;
+                       err = btrfs_orphan_cleanup(gang[i]);
                         if (err)
-                               goto out;
-                       btrfs_put_root(roots[i]);
+                               break;
+                       btrfs_put_root(gang[i]);
                 }
-               index++;
+               root_objectid++;
         }
   
- out:
-       /* Release the roots that remain uncleaned due to error */
-       for (; i < grabbed; i++) {
-               if (roots[i])
-                       btrfs_put_root(roots[i]);
+       /* release the uncleaned roots due to error */
+       for (; i < ret; i++) {
+               if (gang[i])
+                       btrfs_put_root(gang[i]);
         }
         return err;
   }
@@@ -4879,28 -4896,31 +4897,31 @@@ static void btrfs_error_commit_super(st
   
   static void btrfs_drop_all_logs(struct btrfs_fs_info *fs_info)
   {
-       unsigned long index = 0;
-       int grabbed = 0;
-       struct btrfs_root *roots[8];
- 
-       spin_lock(&fs_info->fs_roots_lock);
-       while ((grabbed = xa_extract(&fs_info->fs_roots, (void **)roots, index,
-                                    ULONG_MAX, 8, XA_PRESENT))) {
-               for (int i = 0; i < grabbed; i++)
-                       roots[i] = btrfs_grab_root(roots[i]);
-               spin_unlock(&fs_info->fs_roots_lock);
- 
-               for (int i = 0; i < grabbed; i++) {
-                       if (!roots[i])
+       struct btrfs_root *gang[8];
+       u64 root_objectid = 0;
+       int ret;
+ 
+       spin_lock(&fs_info->fs_roots_radix_lock);
+       while ((ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
+                                            (void **)gang, root_objectid,
+                                            ARRAY_SIZE(gang))) != 0) {
+               int i;
+ 
+               for (i = 0; i < ret; i++)
+                       gang[i] = btrfs_grab_root(gang[i]);
+               spin_unlock(&fs_info->fs_roots_radix_lock);
+ 
+               for (i = 0; i < ret; i++) {
+                       if (!gang[i])
                                 continue;
-                       index = roots[i]->root_key.objectid;
-                       btrfs_free_log(NULL, roots[i]);
-                       btrfs_put_root(roots[i]);
+                       root_objectid = gang[i]->root_key.objectid;
+                       btrfs_free_log(NULL, gang[i]);
+                       btrfs_put_root(gang[i]);
                 }
-               index++;
-               spin_lock(&fs_info->fs_roots_lock);
+               root_objectid++;
+               spin_lock(&fs_info->fs_roots_radix_lock);
         }
-       spin_unlock(&fs_info->fs_roots_lock);
+       spin_unlock(&fs_info->fs_roots_radix_lock);
         btrfs_free_log_root_tree(NULL, fs_info);
   }
   
diff --combined fs/btrfs/extent-tree.c

index 4157ecc27d4b6aeca1c290064b6c8baf0e69c580,e14f61ecc1897813d5cc3d838174a3a70139c337..a3afc15430cead41e1d49b7606cb6539a0aed1d7
--- 1/fs/btrfs/extent-tree.c
--- 2/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@@ -1245,7 -1245,7 +1245,7 @@@ static int btrfs_issue_discard(struct b
   
                 if (size) {
                         ret = blkdev_issue_discard(bdev, start >> 9, size >> 9,
- -                                                 GFP_NOFS, 0);
+ +                                                 GFP_NOFS);
                         if (!ret)
                                 *discarded_bytes += size;
                         else if (ret != -EOPNOTSUPP)
@@@ -1262,7 -1262,7 +1262,7 @@@
   
         if (bytes_left) {
                 ret = blkdev_issue_discard(bdev, start >> 9, bytes_left >> 9,
- -                                         GFP_NOFS, 0);
+ +                                         GFP_NOFS);
                 if (!ret)
                         *discarded_bytes += bytes_left;
         }
@@@ -1297,7 -1297,7 +1297,7 @@@ static int do_discard_extent(struct btr
                 ret = btrfs_reset_device_zone(dev_replace->tgtdev, phys, len,
                                               &discarded);
                 discarded += src_disc;
- -      } else if (blk_queue_discard(bdev_get_queue(stripe->dev->bdev))) {
+ +      } else if (bdev_max_discard_sectors(stripe->dev->bdev)) {
                 ret = btrfs_issue_discard(dev->bdev, phys, len, &discarded);
         } else {
                 ret = 0;
@@@ -5829,7 -5829,7 +5829,7 @@@ int btrfs_drop_snapshot(struct btrfs_ro
         btrfs_qgroup_convert_reserved_meta(root, INT_MAX);
         btrfs_qgroup_free_meta_all_pertrans(root);
   
-       if (test_bit(BTRFS_ROOT_REGISTERED, &root->state))
+       if (test_bit(BTRFS_ROOT_IN_RADIX, &root->state))
                 btrfs_add_dropped_root(trans, root);
         else
                 btrfs_put_root(root);
@@@ -5998,7 -5998,7 +5998,7 @@@ static int btrfs_trim_free_extents(stru
         *trimmed = 0;
   
         /* Discard not supported = nothing to do. */
- -      if (!blk_queue_discard(bdev_get_queue(device->bdev)))
+ +      if (!bdev_max_discard_sectors(device->bdev))
                 return 0;
   
         /* Not writable = nothing to do. */
diff --combined fs/btrfs/extent_io.c

index 04e36343da3a160c455e26f475c5696357f08401,31729b1be5f3c639a646023c672dc9650e8efdda..f03ab5dbda7ae2693bdd4f7c493e90b9a157b8ca
--- 1/fs/btrfs/extent_io.c
--- 2/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@@ -2966,7 -2966,7 +2966,7 @@@ static void begin_page_read(struct btrf
   }
   
   /*
-  * Find extent buffer for a given bytenr.
+  * Find extent buffer for a givne bytenr.
    *
    * This is for end_bio_extent_readpage(), thus we can't do any unsafe locking
    * in endio context.
@@@ -2985,9 -2985,11 +2985,11 @@@ static struct extent_buffer *find_exten
                 return (struct extent_buffer *)page->private;
         }
   
-       /* For subpage case, we need to lookup extent buffer xarray */
-       eb = xa_load(&fs_info->extent_buffers,
-                    bytenr >> fs_info->sectorsize_bits);
+       /* For subpage case, we need to lookup buffer radix tree */
+       rcu_read_lock();
+       eb = radix_tree_lookup(&fs_info->buffer_radix,
+                              bytenr >> fs_info->sectorsize_bits);
+       rcu_read_unlock();
         ASSERT(eb);
         return eb;
   }
@@@ -3799,9 -3801,8 +3801,9 @@@ out
         return ret;
   }
   
- -int btrfs_readpage(struct file *file, struct page *page)
+ +int btrfs_read_folio(struct file *file, struct folio *folio)
   {
+ +      struct page *page = &folio->page;
         struct btrfs_inode *inode = BTRFS_I(page->mapping->host);
         u64 start = page_offset(page);
         u64 end = start + PAGE_SIZE - 1;
@@@ -4435,8 -4436,8 +4437,8 @@@ static struct extent_buffer *find_exten
         struct extent_buffer *eb;
   
         rcu_read_lock();
-       eb = xa_load(&fs_info->extent_buffers,
-                    start >> fs_info->sectorsize_bits);
+       eb = radix_tree_lookup(&fs_info->buffer_radix,
+                              start >> fs_info->sectorsize_bits);
         if (eb && atomic_inc_not_zero(&eb->refs)) {
                 rcu_read_unlock();
                 return eb;
@@@ -5308,7 -5309,7 +5310,7 @@@ int extent_invalidate_folio(struct exte
   }
   
   /*
- - * a helper for releasepage, this tests for areas of the page that
+ + * a helper for release_folio, this tests for areas of the page that
    * are locked or under IO and drops the related state bits if it is safe
    * to drop the page.
    */
@@@ -5344,7 -5345,7 +5346,7 @@@ static int try_release_extent_state(str
   }
   
   /*
- - * a helper for releasepage.  As long as there are no locked extents
+ + * a helper for release_folio.  As long as there are no locked extents
    * in the range corresponding to the page, both state records and extent
    * map records are removed
    */
@@@ -6044,10 -6045,10 +6046,10 @@@ static void check_buffer_tree_ref(struc
          *
          * It is only cleared in two cases: freeing the last non-tree
          * reference to the extent_buffer when its STALE bit is set or
- -       * calling releasepage when the tree reference is the only reference.
+ +       * calling release_folio when the tree reference is the only reference.
          *
          * In both cases, care is taken to ensure that the extent_buffer's
- -       * pages are not under io. However, releasepage can be concurrently
+ +       * pages are not under io. However, release_folio can be concurrently
          * called with creating new references, which is prone to race
          * conditions between the calls to check_buffer_tree_ref in those
          * codepaths and clearing TREE_REF in try_release_extent_buffer.
@@@ -6129,22 -6130,24 +6131,24 @@@ struct extent_buffer *alloc_test_extent
         if (!eb)
                 return ERR_PTR(-ENOMEM);
         eb->fs_info = fs_info;
- 
-       do {
-               ret = xa_insert(&fs_info->extent_buffers,
-                               start >> fs_info->sectorsize_bits,
-                               eb, GFP_NOFS);
-               if (ret == -ENOMEM) {
-                       exists = ERR_PTR(ret);
+ again:
+       ret = radix_tree_preload(GFP_NOFS);
+       if (ret) {
+               exists = ERR_PTR(ret);
+               goto free_eb;
+       }
+       spin_lock(&fs_info->buffer_lock);
+       ret = radix_tree_insert(&fs_info->buffer_radix,
+                               start >> fs_info->sectorsize_bits, eb);
+       spin_unlock(&fs_info->buffer_lock);
+       radix_tree_preload_end();
+       if (ret == -EEXIST) {
+               exists = find_extent_buffer(fs_info, start);
+               if (exists)
                         goto free_eb;
-               }
-               if (ret == -EBUSY) {
-                       exists = find_extent_buffer(fs_info, start);
-                       if (exists)
-                               goto free_eb;
-               }
-       } while (ret);
- 
+               else
+                       goto again;
+       }
         check_buffer_tree_ref(eb);
         set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags);
   
@@@ -6312,36 -6315,39 +6316,39 @@@ struct extent_buffer *alloc_extent_buff
                 /*
                  * We can't unlock the pages just yet since the extent buffer
                  * hasn't been properly inserted in the radix tree, this
- -               * opens a race with btree_releasepage which can free a page
+ +               * opens a race with btree_release_folio which can free a page
                  * while we are still filling in all pages for the buffer and
                  * we could crash.
                  */
         }
         if (uptodate)
                 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
- 
-       do {
-               ret = xa_insert(&fs_info->extent_buffers,
-                               start >> fs_info->sectorsize_bits,
-                               eb, GFP_NOFS);
-               if (ret == -ENOMEM) {
-                       exists = ERR_PTR(ret);
+ again:
+       ret = radix_tree_preload(GFP_NOFS);
+       if (ret) {
+               exists = ERR_PTR(ret);
+               goto free_eb;
+       }
+ 
+       spin_lock(&fs_info->buffer_lock);
+       ret = radix_tree_insert(&fs_info->buffer_radix,
+                               start >> fs_info->sectorsize_bits, eb);
+       spin_unlock(&fs_info->buffer_lock);
+       radix_tree_preload_end();
+       if (ret == -EEXIST) {
+               exists = find_extent_buffer(fs_info, start);
+               if (exists)
                         goto free_eb;
-               }
-               if (ret == -EBUSY) {
-                       exists = find_extent_buffer(fs_info, start);
-                       if (exists)
-                               goto free_eb;
-               }
-       } while (ret);
- 
+               else
+                       goto again;
+       }
         /* add one reference for the tree */
         check_buffer_tree_ref(eb);
         set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags);
   
         /*
          * Now it's safe to unlock the pages because any calls to
- -       * btree_releasepage will correctly detect that a page belongs to a
+ +       * btree_release_folio will correctly detect that a page belongs to a
          * live buffer and won't free them prematurely.
          */
         for (i = 0; i < num_pages; i++)
@@@ -6379,8 -6385,10 +6386,10 @@@ static int release_extent_buffer(struc
   
                         spin_unlock(&eb->refs_lock);
   
-                       xa_erase(&fs_info->extent_buffers,
-                                eb->start >> fs_info->sectorsize_bits);
+                       spin_lock(&fs_info->buffer_lock);
+                       radix_tree_delete(&fs_info->buffer_radix,
+                                         eb->start >> fs_info->sectorsize_bits);
+                       spin_unlock(&fs_info->buffer_lock);
                 } else {
                         spin_unlock(&eb->refs_lock);
                 }
@@@ -6723,7 -6731,7 +6732,7 @@@ int read_extent_buffer_pages(struct ext
         eb->read_mirror = 0;
         atomic_set(&eb->io_pages, num_reads);
         /*
- -       * It is possible for releasepage to clear the TREE_REF bit before we
+ +       * It is possible for release_folio to clear the TREE_REF bit before we
          * set io_pages. See check_buffer_tree_ref for a more detailed comment.
          */
         check_buffer_tree_ref(eb);
@@@ -7325,25 -7333,42 +7334,42 @@@ void memmove_extent_buffer(const struc
         }
   }
   
+ #define GANG_LOOKUP_SIZE      16
   static struct extent_buffer *get_next_extent_buffer(
                 struct btrfs_fs_info *fs_info, struct page *page, u64 bytenr)
   {
-       struct extent_buffer *eb;
-       unsigned long index;
+       struct extent_buffer *gang[GANG_LOOKUP_SIZE];
+       struct extent_buffer *found = NULL;
         u64 page_start = page_offset(page);
+       u64 cur = page_start;
   
         ASSERT(in_range(bytenr, page_start, PAGE_SIZE));
         lockdep_assert_held(&fs_info->buffer_lock);
   
-       xa_for_each_start(&fs_info->extent_buffers, index, eb,
-                         page_start >> fs_info->sectorsize_bits) {
-               if (in_range(eb->start, page_start, PAGE_SIZE))
-                       return eb;
-               else if (eb->start >= page_start + PAGE_SIZE)
-                       /* Already beyond page end */
-                       return NULL;
+       while (cur < page_start + PAGE_SIZE) {
+               int ret;
+               int i;
+ 
+               ret = radix_tree_gang_lookup(&fs_info->buffer_radix,
+                               (void **)gang, cur >> fs_info->sectorsize_bits,
+                               min_t(unsigned int, GANG_LOOKUP_SIZE,
+                                     PAGE_SIZE / fs_info->nodesize));
+               if (ret == 0)
+                       goto out;
+               for (i = 0; i < ret; i++) {
+                       /* Already beyond page end */
+                       if (gang[i]->start >= page_start + PAGE_SIZE)
+                               goto out;
+                       /* Found one */
+                       if (gang[i]->start >= bytenr) {
+                               found = gang[i];
+                               goto out;
+                       }
+               }
+               cur = gang[ret - 1]->start + gang[ret - 1]->len;
         }
-       return NULL;
+ out:
+       return found;
   }
   
   static int try_release_subpage_extent_buffer(struct page *page)
diff --combined fs/btrfs/inode.c

index 0172f75e051ad6e9129dfc3cb48cfa698d1fc755,329ad27f091890e7e20887c25fc4e5d92d0dbabb..d50448bf8eedd951890d6e6f0a64ef131b90c70a
--- 1/fs/btrfs/inode.c
--- 2/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@@ -3578,7 -3578,6 +3578,6 @@@ int btrfs_orphan_cleanup(struct btrfs_r
         u64 last_objectid = 0;
         int ret = 0, nr_unlink = 0;
   
-       /* Bail out if the cleanup is already running. */
         if (test_and_set_bit(BTRFS_ROOT_ORPHAN_CLEANUP, &root->state))
                 return 0;
   
@@@ -3661,17 -3660,17 +3660,17 @@@
                          *
                          * btrfs_find_orphan_roots() ran before us, which has
                          * found all deleted roots and loaded them into
-                        * fs_info->fs_roots. So here we can find if an
+                        * fs_info->fs_roots_radix. So here we can find if an
                          * orphan item corresponds to a deleted root by looking
-                        * up the root from that xarray.
+                        * up the root from that radix tree.
                          */
   
-                       spin_lock(&fs_info->fs_roots_lock);
-                       dead_root = xa_load(&fs_info->fs_roots,
-                                           (unsigned long)found_key.objectid);
+                       spin_lock(&fs_info->fs_roots_radix_lock);
+                       dead_root = radix_tree_lookup(&fs_info->fs_roots_radix,
+                                                        (unsigned long)found_key.objectid);
                         if (dead_root && btrfs_root_refs(&dead_root->root_item) == 0)
                                 is_dead_root = 1;
-                       spin_unlock(&fs_info->fs_roots_lock);
+                       spin_unlock(&fs_info->fs_roots_radix_lock);
   
                         if (is_dead_root) {
                                 /* prevent this orphan from being found again */
@@@ -3911,7 -3910,7 +3910,7 @@@ cache_index
          * cache.
          *
          * This is required for both inode re-read from disk and delayed inode
-        * in the delayed_nodes xarray.
+        * in delayed_nodes_tree.
          */
         if (BTRFS_I(inode)->last_trans == fs_info->generation)
                 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
@@@ -4811,7 -4810,7 +4810,7 @@@ again
                 goto out_unlock;
   
         if (!PageUptodate(page)) {
- -              ret = btrfs_readpage(NULL, page);
+ +              ret = btrfs_read_folio(NULL, page_folio(page));
                 lock_page(page);
                 if (page->mapping != mapping) {
                         unlock_page(page);
@@@ -8218,7 -8217,7 +8217,7 @@@ static void btrfs_readahead(struct read
   }
   
   /*
- - * For releasepage() and invalidate_folio() we have a race window where
+ + * For release_folio() and invalidate_folio() we have a race window where
    * folio_end_writeback() is called but the subpage spinlock is not yet released.
    * If we continue to release/invalidate the page, we could cause use-after-free
    * for subpage spinlock.  So this function is to spin and wait for subpage
@@@ -8250,22 -8249,22 +8249,22 @@@ static void wait_subpage_spinlock(struc
         spin_unlock_irq(&subpage->lock);
   }
   
- -static int __btrfs_releasepage(struct page *page, gfp_t gfp_flags)
+ +static bool __btrfs_release_folio(struct folio *folio, gfp_t gfp_flags)
   {
- -      int ret = try_release_extent_mapping(page, gfp_flags);
+ +      int ret = try_release_extent_mapping(&folio->page, gfp_flags);
   
         if (ret == 1) {
- -              wait_subpage_spinlock(page);
- -              clear_page_extent_mapped(page);
+ +              wait_subpage_spinlock(&folio->page);
+ +              clear_page_extent_mapped(&folio->page);
         }
         return ret;
   }
   
- -static int btrfs_releasepage(struct page *page, gfp_t gfp_flags)
+ +static bool btrfs_release_folio(struct folio *folio, gfp_t gfp_flags)
   {
- -      if (PageWriteback(page) || PageDirty(page))
- -              return 0;
- -      return __btrfs_releasepage(page, gfp_flags);
+ +      if (folio_test_writeback(folio) || folio_test_dirty(folio))
+ +              return false;
+ +      return __btrfs_release_folio(folio, gfp_flags);
   }
   
   #ifdef CONFIG_MIGRATION
@@@ -8336,7 -8335,7 +8335,7 @@@ static void btrfs_invalidate_folio(stru
          * still safe to wait for ordered extent to finish.
          */
         if (!(offset == 0 && length == folio_size(folio))) {
- -              btrfs_releasepage(&folio->page, GFP_NOFS);
+ +              btrfs_release_folio(folio, GFP_NOFS);
                 return;
         }
   
@@@ -8460,7 -8459,7 +8459,7 @@@ next
         ASSERT(!folio_test_ordered(folio));
         btrfs_page_clear_checked(fs_info, &folio->page, folio_pos(folio), folio_size(folio));
         if (!inode_evicting)
- -              __btrfs_releasepage(&folio->page, GFP_NOFS);
+ +              __btrfs_release_folio(folio, GFP_NOFS);
         clear_page_extent_mapped(&folio->page);
   }
   
@@@ -11430,13 -11429,13 +11429,13 @@@ static const struct file_operations btr
    * For now we're avoiding this by dropping bmap.
    */
   static const struct address_space_operations btrfs_aops = {
- -      .readpage       = btrfs_readpage,
+ +      .read_folio     = btrfs_read_folio,
         .writepage      = btrfs_writepage,
         .writepages     = btrfs_writepages,
         .readahead      = btrfs_readahead,
         .direct_IO      = noop_direct_IO,
         .invalidate_folio = btrfs_invalidate_folio,
- -      .releasepage    = btrfs_releasepage,
+ +      .release_folio  = btrfs_release_folio,
   #ifdef CONFIG_MIGRATION
         .migratepage    = btrfs_migratepage,
   #endif
diff --combined fs/btrfs/send.c

index fa56890ff81fc0adf2bd63e9c5cd69f0bdf2d159,d6a2428fe22fb433d56c6d054031dd6055ac6be3..c7dea639a56f9dd2faa756af3d8cf61c31c2c992
--- 1/fs/btrfs/send.c
--- 2/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@@ -10,6 -10,7 +10,7 @@@
   #include <linux/mount.h>
   #include <linux/xattr.h>
   #include <linux/posix_acl_xattr.h>
+ #include <linux/radix-tree.h>
   #include <linux/vmalloc.h>
   #include <linux/string.h>
   #include <linux/compat.h>
@@@ -127,7 -128,7 +128,7 @@@ struct send_ctx 
         struct list_head new_refs;
         struct list_head deleted_refs;
   
-       struct xarray name_cache;
+       struct radix_tree_root name_cache;
         struct list_head name_cache_list;
         int name_cache_size;
   
@@@ -268,13 -269,14 +269,14 @@@ struct orphan_dir_info 
   struct name_cache_entry {
         struct list_head list;
         /*
-        * On 32bit kernels, xarray has only 32bit indices, but we need to
-        * handle 64bit inums. We use the lower 32bit of the 64bit inum to store
-        * it in the tree. If more than one inum would fall into the same entry,
-        * we use inum_aliases to store the additional entries. inum_aliases is
-        * also used to store entries with the same inum but different generations.
+        * radix_tree has only 32bit entries but we need to handle 64bit inums.
+        * We use the lower 32bit of the 64bit inum to store it in the tree. If
+        * more then one inum would fall into the same entry, we use radix_list
+        * to store the additional entries. radix_list is also used to store
+        * entries where two entries have the same inum but different
+        * generations.
          */
-       struct list_head inum_aliases;
+       struct list_head radix_list;
         u64 ino;
         u64 gen;
         u64 parent_ino;
@@@ -2024,9 -2026,9 +2026,9 @@@ out
   }
   
   /*
-  * Insert a name cache entry. On 32bit kernels the xarray index is 32bit,
+  * Insert a name cache entry. On 32bit kernels the radix tree index is 32bit,
    * so we need to do some special handling in case we have clashes. This function
-  * takes care of this with the help of name_cache_entry::inum_aliases.
+  * takes care of this with the help of name_cache_entry::radix_list.
    * In case of error, nce is kfreed.
    */
   static int name_cache_insert(struct send_ctx *sctx,
@@@ -2035,7 -2037,8 +2037,8 @@@
         int ret = 0;
         struct list_head *nce_head;
   
-       nce_head = xa_load(&sctx->name_cache, (unsigned long)nce->ino);
+       nce_head = radix_tree_lookup(&sctx->name_cache,
+                       (unsigned long)nce->ino);
         if (!nce_head) {
                 nce_head = kmalloc(sizeof(*nce_head), GFP_KERNEL);
                 if (!nce_head) {
@@@ -2044,14 -2047,14 +2047,14 @@@
                 }
                 INIT_LIST_HEAD(nce_head);
   
-               ret = xa_insert(&sctx->name_cache, nce->ino, nce_head, GFP_KERNEL);
+               ret = radix_tree_insert(&sctx->name_cache, nce->ino, nce_head);
                 if (ret < 0) {
                         kfree(nce_head);
                         kfree(nce);
                         return ret;
                 }
         }
-       list_add_tail(&nce->inum_aliases, nce_head);
+       list_add_tail(&nce->radix_list, nce_head);
         list_add_tail(&nce->list, &sctx->name_cache_list);
         sctx->name_cache_size++;
   
@@@ -2063,14 -2066,15 +2066,15 @@@ static void name_cache_delete(struct se
   {
         struct list_head *nce_head;
   
-       nce_head = xa_load(&sctx->name_cache, (unsigned long)nce->ino);
+       nce_head = radix_tree_lookup(&sctx->name_cache,
+                       (unsigned long)nce->ino);
         if (!nce_head) {
                 btrfs_err(sctx->send_root->fs_info,
               "name_cache_delete lookup failed ino %llu cache size %d, leaking memory",
                         nce->ino, sctx->name_cache_size);
         }
   
-       list_del(&nce->inum_aliases);
+       list_del(&nce->radix_list);
         list_del(&nce->list);
         sctx->name_cache_size--;
   
@@@ -2078,7 -2082,7 +2082,7 @@@
          * We may not get to the final release of nce_head if the lookup fails
          */
         if (nce_head && list_empty(nce_head)) {
-               xa_erase(&sctx->name_cache, (unsigned long)nce->ino);
+               radix_tree_delete(&sctx->name_cache, (unsigned long)nce->ino);
                 kfree(nce_head);
         }
   }
@@@ -2089,11 -2093,11 +2093,11 @@@ static struct name_cache_entry *name_ca
         struct list_head *nce_head;
         struct name_cache_entry *cur;
   
-       nce_head = xa_load(&sctx->name_cache, (unsigned long)ino);
+       nce_head = radix_tree_lookup(&sctx->name_cache, (unsigned long)ino);
         if (!nce_head)
                 return NULL;
   
-       list_for_each_entry(cur, nce_head, inum_aliases) {
+       list_for_each_entry(cur, nce_head, radix_list) {
                 if (cur->ino == ino && cur->gen == gen)
                         return cur;
         }
@@@ -4907,11 -4911,11 +4911,11 @@@ static int put_file_data(struct send_ct
   
                 if (PageReadahead(page))
                         page_cache_async_readahead(sctx->cur_inode->i_mapping,
- -                                                 &sctx->ra, NULL, page, index,
- -                                                 last_index + 1 - index);
+ +                                                 &sctx->ra, NULL, page_folio(page),
+ +                                                 index, last_index + 1 - index);
   
                 if (!PageUptodate(page)) {
- -                      btrfs_readpage(NULL, page);
+ +                      btrfs_read_folio(NULL, page_folio(page));
                         lock_page(page);
                         if (!PageUptodate(page)) {
                                 unlock_page(page);
@@@ -7518,7 -7522,7 +7522,7 @@@ long btrfs_ioctl_send(struct inode *ino
   
         INIT_LIST_HEAD(&sctx->new_refs);
         INIT_LIST_HEAD(&sctx->deleted_refs);
-       xa_init_flags(&sctx->name_cache, GFP_KERNEL);
+       INIT_RADIX_TREE(&sctx->name_cache, GFP_KERNEL);
         INIT_LIST_HEAD(&sctx->name_cache_list);
   
         sctx->flags = arg->flags;
author	Linus Torvalds <[email protected]>
	Sat, 16 Jul 2022 20:48:55 +0000 (13:48 -0700)
committer	Linus Torvalds <[email protected]>
	Sat, 16 Jul 2022 20:48:55 +0000 (13:48 -0700)
		1	2
fs/btrfs/disk-io.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/extent-tree.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/extent_io.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/inode.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/send.c	patch \|	diff1 \|	diff2 \|	blob \| history