]> Git Repo - linux.git/commitdiff
Merge tag 'for-5.19-rc7-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave...
authorLinus Torvalds <[email protected]>
Sat, 16 Jul 2022 20:48:55 +0000 (13:48 -0700)
committerLinus Torvalds <[email protected]>
Sat, 16 Jul 2022 20:48:55 +0000 (13:48 -0700)
Pull btrfs reverts from David Sterba:
 "Due to a recent report [1] we need to revert the radix tree to xarray
  conversion patches.

  There's a problem with sleeping under spinlock, when xa_insert could
  allocate memory under pressure. We use GFP_NOFS so this is a real
  problem that we unfortunately did not discover during review.

  I'm sorry to do such change at rc6 time but the revert is IMO the
  safer option, there are patches to use mutex instead of the spin locks
  but that would need more testing. The revert branch has been tested on
  a few setups, all seem ok.

  The conversion to xarray will be revisited in the future"

Link: https://lore.kernel.org/linux-btrfs/[email protected]/
* tag 'for-5.19-rc7-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  Revert "btrfs: turn delayed_nodes_tree into an XArray"
  Revert "btrfs: turn name_cache radix tree into XArray in send_ctx"
  Revert "btrfs: turn fs_info member buffer_radix into XArray"
  Revert "btrfs: turn fs_roots_radix in btrfs_fs_info into an XArray"

1  2 
fs/btrfs/disk-io.c
fs/btrfs/extent-tree.c
fs/btrfs/extent_io.c
fs/btrfs/inode.c
fs/btrfs/send.c

diff --combined fs/btrfs/disk-io.c
index 4ba005c4198368214fb4b055fd15d02af19e3b06,daa756b3606de9794ee54da68ebeaba874753fa4..de440ebf5648b059ff3a63022b9015b2a99ed0bf
@@@ -5,6 -5,7 +5,7 @@@
  
  #include <linux/fs.h>
  #include <linux/blkdev.h>
+ #include <linux/radix-tree.h>
  #include <linux/writeback.h>
  #include <linux/workqueue.h>
  #include <linux/kthread.h>
@@@ -485,7 -486,7 +486,7 @@@ static int csum_dirty_subpage_buffers(s
                uptodate = btrfs_subpage_test_uptodate(fs_info, page, cur,
                                                       fs_info->nodesize);
  
-               /* A dirty eb shouldn't disappear from extent_buffers */
+               /* A dirty eb shouldn't disappear from buffer_radix */
                if (WARN_ON(!eb))
                        return -EUCLEAN;
  
@@@ -996,12 -997,12 +997,12 @@@ static int btree_writepages(struct addr
        return btree_write_cache_pages(mapping, wbc);
  }
  
 -static int btree_releasepage(struct page *page, gfp_t gfp_flags)
 +static bool btree_release_folio(struct folio *folio, gfp_t gfp_flags)
  {
 -      if (PageWriteback(page) || PageDirty(page))
 -              return 0;
 +      if (folio_test_writeback(folio) || folio_test_dirty(folio))
 +              return false;
  
 -      return try_release_extent_buffer(page);
 +      return try_release_extent_buffer(&folio->page);
  }
  
  static void btree_invalidate_folio(struct folio *folio, size_t offset,
        struct extent_io_tree *tree;
        tree = &BTRFS_I(folio->mapping->host)->io_tree;
        extent_invalidate_folio(tree, folio, offset);
 -      btree_releasepage(&folio->page, GFP_NOFS);
 +      btree_release_folio(folio, GFP_NOFS);
        if (folio_get_private(folio)) {
                btrfs_warn(BTRFS_I(folio->mapping->host)->root->fs_info,
                           "folio private not zero on folio %llu",
@@@ -1071,7 -1072,7 +1072,7 @@@ static bool btree_dirty_folio(struct ad
  
  static const struct address_space_operations btree_aops = {
        .writepages     = btree_writepages,
 -      .releasepage    = btree_releasepage,
 +      .release_folio  = btree_release_folio,
        .invalidate_folio = btree_invalidate_folio,
  #ifdef CONFIG_MIGRATION
        .migratepage    = btree_migratepage,
@@@ -1158,7 -1159,7 +1159,7 @@@ static void __setup_root(struct btrfs_r
        root->nr_delalloc_inodes = 0;
        root->nr_ordered_extents = 0;
        root->inode_tree = RB_ROOT;
-       xa_init_flags(&root->delayed_nodes, GFP_ATOMIC);
+       INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC);
  
        btrfs_init_root_block_rsv(root);
  
        btrfs_qgroup_init_swapped_blocks(&root->swapped_blocks);
  #ifdef CONFIG_BTRFS_DEBUG
        INIT_LIST_HEAD(&root->leak_list);
-       spin_lock(&fs_info->fs_roots_lock);
+       spin_lock(&fs_info->fs_roots_radix_lock);
        list_add_tail(&root->leak_list, &fs_info->allocated_roots);
-       spin_unlock(&fs_info->fs_roots_lock);
+       spin_unlock(&fs_info->fs_roots_radix_lock);
  #endif
  }
  
@@@ -1659,11 -1660,12 +1660,12 @@@ static struct btrfs_root *btrfs_lookup_
  {
        struct btrfs_root *root;
  
-       spin_lock(&fs_info->fs_roots_lock);
-       root = xa_load(&fs_info->fs_roots, (unsigned long)root_id);
+       spin_lock(&fs_info->fs_roots_radix_lock);
+       root = radix_tree_lookup(&fs_info->fs_roots_radix,
+                                (unsigned long)root_id);
        if (root)
                root = btrfs_grab_root(root);
-       spin_unlock(&fs_info->fs_roots_lock);
+       spin_unlock(&fs_info->fs_roots_radix_lock);
        return root;
  }
  
@@@ -1705,14 -1707,20 +1707,20 @@@ int btrfs_insert_fs_root(struct btrfs_f
  {
        int ret;
  
-       spin_lock(&fs_info->fs_roots_lock);
-       ret = xa_insert(&fs_info->fs_roots, (unsigned long)root->root_key.objectid,
-                       root, GFP_NOFS);
+       ret = radix_tree_preload(GFP_NOFS);
+       if (ret)
+               return ret;
+       spin_lock(&fs_info->fs_roots_radix_lock);
+       ret = radix_tree_insert(&fs_info->fs_roots_radix,
+                               (unsigned long)root->root_key.objectid,
+                               root);
        if (ret == 0) {
                btrfs_grab_root(root);
-               set_bit(BTRFS_ROOT_REGISTERED, &root->state);
+               set_bit(BTRFS_ROOT_IN_RADIX, &root->state);
        }
-       spin_unlock(&fs_info->fs_roots_lock);
+       spin_unlock(&fs_info->fs_roots_radix_lock);
+       radix_tree_preload_end();
  
        return ret;
  }
@@@ -2342,9 -2350,9 +2350,9 @@@ void btrfs_put_root(struct btrfs_root *
                btrfs_drew_lock_destroy(&root->snapshot_lock);
                free_root_extent_buffers(root);
  #ifdef CONFIG_BTRFS_DEBUG
-               spin_lock(&root->fs_info->fs_roots_lock);
+               spin_lock(&root->fs_info->fs_roots_radix_lock);
                list_del_init(&root->leak_list);
-               spin_unlock(&root->fs_info->fs_roots_lock);
+               spin_unlock(&root->fs_info->fs_roots_radix_lock);
  #endif
                kfree(root);
        }
  
  void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info)
  {
-       struct btrfs_root *root;
-       unsigned long index = 0;
+       int ret;
+       struct btrfs_root *gang[8];
+       int i;
  
        while (!list_empty(&fs_info->dead_roots)) {
-               root = list_entry(fs_info->dead_roots.next,
-                                 struct btrfs_root, root_list);
-               list_del(&root->root_list);
+               gang[0] = list_entry(fs_info->dead_roots.next,
+                                    struct btrfs_root, root_list);
+               list_del(&gang[0]->root_list);
  
-               if (test_bit(BTRFS_ROOT_REGISTERED, &root->state))
-                       btrfs_drop_and_free_fs_root(fs_info, root);
-               btrfs_put_root(root);
+               if (test_bit(BTRFS_ROOT_IN_RADIX, &gang[0]->state))
+                       btrfs_drop_and_free_fs_root(fs_info, gang[0]);
+               btrfs_put_root(gang[0]);
        }
  
-       xa_for_each(&fs_info->fs_roots, index, root) {
-               btrfs_drop_and_free_fs_root(fs_info, root);
+       while (1) {
+               ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
+                                            (void **)gang, 0,
+                                            ARRAY_SIZE(gang));
+               if (!ret)
+                       break;
+               for (i = 0; i < ret; i++)
+                       btrfs_drop_and_free_fs_root(fs_info, gang[i]);
        }
  }
  
@@@ -3134,8 -3149,8 +3149,8 @@@ static int __cold init_tree_roots(struc
  
  void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
  {
-       xa_init_flags(&fs_info->fs_roots, GFP_ATOMIC);
-       xa_init_flags(&fs_info->extent_buffers, GFP_ATOMIC);
+       INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
+       INIT_RADIX_TREE(&fs_info->buffer_radix, GFP_ATOMIC);
        INIT_LIST_HEAD(&fs_info->trans_list);
        INIT_LIST_HEAD(&fs_info->dead_roots);
        INIT_LIST_HEAD(&fs_info->delayed_iputs);
        INIT_LIST_HEAD(&fs_info->caching_block_groups);
        spin_lock_init(&fs_info->delalloc_root_lock);
        spin_lock_init(&fs_info->trans_lock);
-       spin_lock_init(&fs_info->fs_roots_lock);
+       spin_lock_init(&fs_info->fs_roots_radix_lock);
        spin_lock_init(&fs_info->delayed_iput_lock);
        spin_lock_init(&fs_info->defrag_inodes_lock);
        spin_lock_init(&fs_info->super_lock);
@@@ -3374,7 -3389,7 +3389,7 @@@ int btrfs_start_pre_rw_mount(struct btr
        /*
         * btrfs_find_orphan_roots() is responsible for finding all the dead
         * roots (with 0 refs), flag them with BTRFS_ROOT_DEAD_TREE and load
-        * them into the fs_info->fs_roots. This must be done before
+        * them into the fs_info->fs_roots_radix tree. This must be done before
         * calling btrfs_orphan_cleanup() on the tree root. If we don't do it
         * first, then btrfs_orphan_cleanup() will delete a dead root's orphan
         * item before the root's tree is deleted - this means that if we unmount
@@@ -4232,7 -4247,6 +4247,7 @@@ static int wait_dev_supers(struct btrfs
   */
  static void btrfs_end_empty_barrier(struct bio *bio)
  {
 +      bio_uninit(bio);
        complete(bio->bi_private);
  }
  
   */
  static void write_dev_flush(struct btrfs_device *device)
  {
 -      struct bio *bio = device->flush_bio;
 +      struct bio *bio = &device->flush_bio;
  
  #ifndef CONFIG_BTRFS_FS_CHECK_INTEGRITY
        /*
         * of simplicity, since this is a debug tool and not meant for use in
         * non-debug builds.
         */
 -      struct request_queue *q = bdev_get_queue(device->bdev);
 -      if (!test_bit(QUEUE_FLAG_WC, &q->queue_flags))
 +      if (!bdev_write_cache(device->bdev))
                return;
  #endif
  
 -      bio_reset(bio, device->bdev, REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH);
 +      bio_init(bio, device->bdev, NULL, 0,
 +               REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH);
        bio->bi_end_io = btrfs_end_empty_barrier;
        init_completion(&device->flush_wait);
        bio->bi_private = &device->flush_wait;
   */
  static blk_status_t wait_dev_flush(struct btrfs_device *device)
  {
 -      struct bio *bio = device->flush_bio;
 +      struct bio *bio = &device->flush_bio;
  
        if (!test_bit(BTRFS_DEV_STATE_FLUSH_SENT, &device->dev_state))
                return BLK_STS_OK;
@@@ -4499,11 -4513,12 +4514,12 @@@ void btrfs_drop_and_free_fs_root(struc
  {
        bool drop_ref = false;
  
-       spin_lock(&fs_info->fs_roots_lock);
-       xa_erase(&fs_info->fs_roots, (unsigned long)root->root_key.objectid);
-       if (test_and_clear_bit(BTRFS_ROOT_REGISTERED, &root->state))
+       spin_lock(&fs_info->fs_roots_radix_lock);
+       radix_tree_delete(&fs_info->fs_roots_radix,
+                         (unsigned long)root->root_key.objectid);
+       if (test_and_clear_bit(BTRFS_ROOT_IN_RADIX, &root->state))
                drop_ref = true;
-       spin_unlock(&fs_info->fs_roots_lock);
+       spin_unlock(&fs_info->fs_roots_radix_lock);
  
        if (BTRFS_FS_ERROR(fs_info)) {
                ASSERT(root->log_root == NULL);
  
  int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
  {
-       struct btrfs_root *roots[8];
-       unsigned long index = 0;
-       int i;
+       u64 root_objectid = 0;
+       struct btrfs_root *gang[8];
+       int i = 0;
        int err = 0;
-       int grabbed;
+       unsigned int ret = 0;
  
        while (1) {
-               struct btrfs_root *root;
-               spin_lock(&fs_info->fs_roots_lock);
-               if (!xa_find(&fs_info->fs_roots, &index, ULONG_MAX, XA_PRESENT)) {
-                       spin_unlock(&fs_info->fs_roots_lock);
-                       return err;
+               spin_lock(&fs_info->fs_roots_radix_lock);
+               ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
+                                            (void **)gang, root_objectid,
+                                            ARRAY_SIZE(gang));
+               if (!ret) {
+                       spin_unlock(&fs_info->fs_roots_radix_lock);
+                       break;
                }
+               root_objectid = gang[ret - 1]->root_key.objectid + 1;
  
-               grabbed = 0;
-               xa_for_each_start(&fs_info->fs_roots, index, root, index) {
-                       /* Avoid grabbing roots in dead_roots */
-                       if (btrfs_root_refs(&root->root_item) > 0)
-                               roots[grabbed++] = btrfs_grab_root(root);
-                       if (grabbed >= ARRAY_SIZE(roots))
-                               break;
+               for (i = 0; i < ret; i++) {
+                       /* Avoid to grab roots in dead_roots */
+                       if (btrfs_root_refs(&gang[i]->root_item) == 0) {
+                               gang[i] = NULL;
+                               continue;
+                       }
+                       /* grab all the search result for later use */
+                       gang[i] = btrfs_grab_root(gang[i]);
                }
-               spin_unlock(&fs_info->fs_roots_lock);
+               spin_unlock(&fs_info->fs_roots_radix_lock);
  
-               for (i = 0; i < grabbed; i++) {
-                       if (!roots[i])
+               for (i = 0; i < ret; i++) {
+                       if (!gang[i])
                                continue;
-                       index = roots[i]->root_key.objectid;
-                       err = btrfs_orphan_cleanup(roots[i]);
+                       root_objectid = gang[i]->root_key.objectid;
+                       err = btrfs_orphan_cleanup(gang[i]);
                        if (err)
-                               goto out;
-                       btrfs_put_root(roots[i]);
+                               break;
+                       btrfs_put_root(gang[i]);
                }
-               index++;
+               root_objectid++;
        }
  
- out:
-       /* Release the roots that remain uncleaned due to error */
-       for (; i < grabbed; i++) {
-               if (roots[i])
-                       btrfs_put_root(roots[i]);
+       /* release the uncleaned roots due to error */
+       for (; i < ret; i++) {
+               if (gang[i])
+                       btrfs_put_root(gang[i]);
        }
        return err;
  }
@@@ -4879,28 -4896,31 +4897,31 @@@ static void btrfs_error_commit_super(st
  
  static void btrfs_drop_all_logs(struct btrfs_fs_info *fs_info)
  {
-       unsigned long index = 0;
-       int grabbed = 0;
-       struct btrfs_root *roots[8];
-       spin_lock(&fs_info->fs_roots_lock);
-       while ((grabbed = xa_extract(&fs_info->fs_roots, (void **)roots, index,
-                                    ULONG_MAX, 8, XA_PRESENT))) {
-               for (int i = 0; i < grabbed; i++)
-                       roots[i] = btrfs_grab_root(roots[i]);
-               spin_unlock(&fs_info->fs_roots_lock);
-               for (int i = 0; i < grabbed; i++) {
-                       if (!roots[i])
+       struct btrfs_root *gang[8];
+       u64 root_objectid = 0;
+       int ret;
+       spin_lock(&fs_info->fs_roots_radix_lock);
+       while ((ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
+                                            (void **)gang, root_objectid,
+                                            ARRAY_SIZE(gang))) != 0) {
+               int i;
+               for (i = 0; i < ret; i++)
+                       gang[i] = btrfs_grab_root(gang[i]);
+               spin_unlock(&fs_info->fs_roots_radix_lock);
+               for (i = 0; i < ret; i++) {
+                       if (!gang[i])
                                continue;
-                       index = roots[i]->root_key.objectid;
-                       btrfs_free_log(NULL, roots[i]);
-                       btrfs_put_root(roots[i]);
+                       root_objectid = gang[i]->root_key.objectid;
+                       btrfs_free_log(NULL, gang[i]);
+                       btrfs_put_root(gang[i]);
                }
-               index++;
-               spin_lock(&fs_info->fs_roots_lock);
+               root_objectid++;
+               spin_lock(&fs_info->fs_roots_radix_lock);
        }
-       spin_unlock(&fs_info->fs_roots_lock);
+       spin_unlock(&fs_info->fs_roots_radix_lock);
        btrfs_free_log_root_tree(NULL, fs_info);
  }
  
diff --combined fs/btrfs/extent-tree.c
index 4157ecc27d4b6aeca1c290064b6c8baf0e69c580,e14f61ecc1897813d5cc3d838174a3a70139c337..a3afc15430cead41e1d49b7606cb6539a0aed1d7
@@@ -1245,7 -1245,7 +1245,7 @@@ static int btrfs_issue_discard(struct b
  
                if (size) {
                        ret = blkdev_issue_discard(bdev, start >> 9, size >> 9,
 -                                                 GFP_NOFS, 0);
 +                                                 GFP_NOFS);
                        if (!ret)
                                *discarded_bytes += size;
                        else if (ret != -EOPNOTSUPP)
  
        if (bytes_left) {
                ret = blkdev_issue_discard(bdev, start >> 9, bytes_left >> 9,
 -                                         GFP_NOFS, 0);
 +                                         GFP_NOFS);
                if (!ret)
                        *discarded_bytes += bytes_left;
        }
@@@ -1297,7 -1297,7 +1297,7 @@@ static int do_discard_extent(struct btr
                ret = btrfs_reset_device_zone(dev_replace->tgtdev, phys, len,
                                              &discarded);
                discarded += src_disc;
 -      } else if (blk_queue_discard(bdev_get_queue(stripe->dev->bdev))) {
 +      } else if (bdev_max_discard_sectors(stripe->dev->bdev)) {
                ret = btrfs_issue_discard(dev->bdev, phys, len, &discarded);
        } else {
                ret = 0;
@@@ -5829,7 -5829,7 +5829,7 @@@ int btrfs_drop_snapshot(struct btrfs_ro
        btrfs_qgroup_convert_reserved_meta(root, INT_MAX);
        btrfs_qgroup_free_meta_all_pertrans(root);
  
-       if (test_bit(BTRFS_ROOT_REGISTERED, &root->state))
+       if (test_bit(BTRFS_ROOT_IN_RADIX, &root->state))
                btrfs_add_dropped_root(trans, root);
        else
                btrfs_put_root(root);
@@@ -5998,7 -5998,7 +5998,7 @@@ static int btrfs_trim_free_extents(stru
        *trimmed = 0;
  
        /* Discard not supported = nothing to do. */
 -      if (!blk_queue_discard(bdev_get_queue(device->bdev)))
 +      if (!bdev_max_discard_sectors(device->bdev))
                return 0;
  
        /* Not writable = nothing to do. */
diff --combined fs/btrfs/extent_io.c
index 04e36343da3a160c455e26f475c5696357f08401,31729b1be5f3c639a646023c672dc9650e8efdda..f03ab5dbda7ae2693bdd4f7c493e90b9a157b8ca
@@@ -2966,7 -2966,7 +2966,7 @@@ static void begin_page_read(struct btrf
  }
  
  /*
-  * Find extent buffer for a given bytenr.
+  * Find extent buffer for a givne bytenr.
   *
   * This is for end_bio_extent_readpage(), thus we can't do any unsafe locking
   * in endio context.
@@@ -2985,9 -2985,11 +2985,11 @@@ static struct extent_buffer *find_exten
                return (struct extent_buffer *)page->private;
        }
  
-       /* For subpage case, we need to lookup extent buffer xarray */
-       eb = xa_load(&fs_info->extent_buffers,
-                    bytenr >> fs_info->sectorsize_bits);
+       /* For subpage case, we need to lookup buffer radix tree */
+       rcu_read_lock();
+       eb = radix_tree_lookup(&fs_info->buffer_radix,
+                              bytenr >> fs_info->sectorsize_bits);
+       rcu_read_unlock();
        ASSERT(eb);
        return eb;
  }
@@@ -3799,9 -3801,8 +3801,9 @@@ out
        return ret;
  }
  
 -int btrfs_readpage(struct file *file, struct page *page)
 +int btrfs_read_folio(struct file *file, struct folio *folio)
  {
 +      struct page *page = &folio->page;
        struct btrfs_inode *inode = BTRFS_I(page->mapping->host);
        u64 start = page_offset(page);
        u64 end = start + PAGE_SIZE - 1;
@@@ -4435,8 -4436,8 +4437,8 @@@ static struct extent_buffer *find_exten
        struct extent_buffer *eb;
  
        rcu_read_lock();
-       eb = xa_load(&fs_info->extent_buffers,
-                    start >> fs_info->sectorsize_bits);
+       eb = radix_tree_lookup(&fs_info->buffer_radix,
+                              start >> fs_info->sectorsize_bits);
        if (eb && atomic_inc_not_zero(&eb->refs)) {
                rcu_read_unlock();
                return eb;
@@@ -5308,7 -5309,7 +5310,7 @@@ int extent_invalidate_folio(struct exte
  }
  
  /*
 - * a helper for releasepage, this tests for areas of the page that
 + * a helper for release_folio, this tests for areas of the page that
   * are locked or under IO and drops the related state bits if it is safe
   * to drop the page.
   */
@@@ -5344,7 -5345,7 +5346,7 @@@ static int try_release_extent_state(str
  }
  
  /*
 - * a helper for releasepage.  As long as there are no locked extents
 + * a helper for release_folio.  As long as there are no locked extents
   * in the range corresponding to the page, both state records and extent
   * map records are removed
   */
@@@ -6044,10 -6045,10 +6046,10 @@@ static void check_buffer_tree_ref(struc
         *
         * It is only cleared in two cases: freeing the last non-tree
         * reference to the extent_buffer when its STALE bit is set or
 -       * calling releasepage when the tree reference is the only reference.
 +       * calling release_folio when the tree reference is the only reference.
         *
         * In both cases, care is taken to ensure that the extent_buffer's
 -       * pages are not under io. However, releasepage can be concurrently
 +       * pages are not under io. However, release_folio can be concurrently
         * called with creating new references, which is prone to race
         * conditions between the calls to check_buffer_tree_ref in those
         * codepaths and clearing TREE_REF in try_release_extent_buffer.
@@@ -6129,22 -6130,24 +6131,24 @@@ struct extent_buffer *alloc_test_extent
        if (!eb)
                return ERR_PTR(-ENOMEM);
        eb->fs_info = fs_info;
-       do {
-               ret = xa_insert(&fs_info->extent_buffers,
-                               start >> fs_info->sectorsize_bits,
-                               eb, GFP_NOFS);
-               if (ret == -ENOMEM) {
-                       exists = ERR_PTR(ret);
+ again:
+       ret = radix_tree_preload(GFP_NOFS);
+       if (ret) {
+               exists = ERR_PTR(ret);
+               goto free_eb;
+       }
+       spin_lock(&fs_info->buffer_lock);
+       ret = radix_tree_insert(&fs_info->buffer_radix,
+                               start >> fs_info->sectorsize_bits, eb);
+       spin_unlock(&fs_info->buffer_lock);
+       radix_tree_preload_end();
+       if (ret == -EEXIST) {
+               exists = find_extent_buffer(fs_info, start);
+               if (exists)
                        goto free_eb;
-               }
-               if (ret == -EBUSY) {
-                       exists = find_extent_buffer(fs_info, start);
-                       if (exists)
-                               goto free_eb;
-               }
-       } while (ret);
+               else
+                       goto again;
+       }
        check_buffer_tree_ref(eb);
        set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags);
  
@@@ -6312,36 -6315,39 +6316,39 @@@ struct extent_buffer *alloc_extent_buff
                /*
                 * We can't unlock the pages just yet since the extent buffer
                 * hasn't been properly inserted in the radix tree, this
 -               * opens a race with btree_releasepage which can free a page
 +               * opens a race with btree_release_folio which can free a page
                 * while we are still filling in all pages for the buffer and
                 * we could crash.
                 */
        }
        if (uptodate)
                set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
-       do {
-               ret = xa_insert(&fs_info->extent_buffers,
-                               start >> fs_info->sectorsize_bits,
-                               eb, GFP_NOFS);
-               if (ret == -ENOMEM) {
-                       exists = ERR_PTR(ret);
+ again:
+       ret = radix_tree_preload(GFP_NOFS);
+       if (ret) {
+               exists = ERR_PTR(ret);
+               goto free_eb;
+       }
+       spin_lock(&fs_info->buffer_lock);
+       ret = radix_tree_insert(&fs_info->buffer_radix,
+                               start >> fs_info->sectorsize_bits, eb);
+       spin_unlock(&fs_info->buffer_lock);
+       radix_tree_preload_end();
+       if (ret == -EEXIST) {
+               exists = find_extent_buffer(fs_info, start);
+               if (exists)
                        goto free_eb;
-               }
-               if (ret == -EBUSY) {
-                       exists = find_extent_buffer(fs_info, start);
-                       if (exists)
-                               goto free_eb;
-               }
-       } while (ret);
+               else
+                       goto again;
+       }
        /* add one reference for the tree */
        check_buffer_tree_ref(eb);
        set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags);
  
        /*
         * Now it's safe to unlock the pages because any calls to
 -       * btree_releasepage will correctly detect that a page belongs to a
 +       * btree_release_folio will correctly detect that a page belongs to a
         * live buffer and won't free them prematurely.
         */
        for (i = 0; i < num_pages; i++)
@@@ -6379,8 -6385,10 +6386,10 @@@ static int release_extent_buffer(struc
  
                        spin_unlock(&eb->refs_lock);
  
-                       xa_erase(&fs_info->extent_buffers,
-                                eb->start >> fs_info->sectorsize_bits);
+                       spin_lock(&fs_info->buffer_lock);
+                       radix_tree_delete(&fs_info->buffer_radix,
+                                         eb->start >> fs_info->sectorsize_bits);
+                       spin_unlock(&fs_info->buffer_lock);
                } else {
                        spin_unlock(&eb->refs_lock);
                }
@@@ -6723,7 -6731,7 +6732,7 @@@ int read_extent_buffer_pages(struct ext
        eb->read_mirror = 0;
        atomic_set(&eb->io_pages, num_reads);
        /*
 -       * It is possible for releasepage to clear the TREE_REF bit before we
 +       * It is possible for release_folio to clear the TREE_REF bit before we
         * set io_pages. See check_buffer_tree_ref for a more detailed comment.
         */
        check_buffer_tree_ref(eb);
@@@ -7325,25 -7333,42 +7334,42 @@@ void memmove_extent_buffer(const struc
        }
  }
  
+ #define GANG_LOOKUP_SIZE      16
  static struct extent_buffer *get_next_extent_buffer(
                struct btrfs_fs_info *fs_info, struct page *page, u64 bytenr)
  {
-       struct extent_buffer *eb;
-       unsigned long index;
+       struct extent_buffer *gang[GANG_LOOKUP_SIZE];
+       struct extent_buffer *found = NULL;
        u64 page_start = page_offset(page);
+       u64 cur = page_start;
  
        ASSERT(in_range(bytenr, page_start, PAGE_SIZE));
        lockdep_assert_held(&fs_info->buffer_lock);
  
-       xa_for_each_start(&fs_info->extent_buffers, index, eb,
-                         page_start >> fs_info->sectorsize_bits) {
-               if (in_range(eb->start, page_start, PAGE_SIZE))
-                       return eb;
-               else if (eb->start >= page_start + PAGE_SIZE)
-                       /* Already beyond page end */
-                       return NULL;
+       while (cur < page_start + PAGE_SIZE) {
+               int ret;
+               int i;
+               ret = radix_tree_gang_lookup(&fs_info->buffer_radix,
+                               (void **)gang, cur >> fs_info->sectorsize_bits,
+                               min_t(unsigned int, GANG_LOOKUP_SIZE,
+                                     PAGE_SIZE / fs_info->nodesize));
+               if (ret == 0)
+                       goto out;
+               for (i = 0; i < ret; i++) {
+                       /* Already beyond page end */
+                       if (gang[i]->start >= page_start + PAGE_SIZE)
+                               goto out;
+                       /* Found one */
+                       if (gang[i]->start >= bytenr) {
+                               found = gang[i];
+                               goto out;
+                       }
+               }
+               cur = gang[ret - 1]->start + gang[ret - 1]->len;
        }
-       return NULL;
+ out:
+       return found;
  }
  
  static int try_release_subpage_extent_buffer(struct page *page)
diff --combined fs/btrfs/inode.c
index 0172f75e051ad6e9129dfc3cb48cfa698d1fc755,329ad27f091890e7e20887c25fc4e5d92d0dbabb..d50448bf8eedd951890d6e6f0a64ef131b90c70a
@@@ -3578,7 -3578,6 +3578,6 @@@ int btrfs_orphan_cleanup(struct btrfs_r
        u64 last_objectid = 0;
        int ret = 0, nr_unlink = 0;
  
-       /* Bail out if the cleanup is already running. */
        if (test_and_set_bit(BTRFS_ROOT_ORPHAN_CLEANUP, &root->state))
                return 0;
  
                         *
                         * btrfs_find_orphan_roots() ran before us, which has
                         * found all deleted roots and loaded them into
-                        * fs_info->fs_roots. So here we can find if an
+                        * fs_info->fs_roots_radix. So here we can find if an
                         * orphan item corresponds to a deleted root by looking
-                        * up the root from that xarray.
+                        * up the root from that radix tree.
                         */
  
-                       spin_lock(&fs_info->fs_roots_lock);
-                       dead_root = xa_load(&fs_info->fs_roots,
-                                           (unsigned long)found_key.objectid);
+                       spin_lock(&fs_info->fs_roots_radix_lock);
+                       dead_root = radix_tree_lookup(&fs_info->fs_roots_radix,
+                                                        (unsigned long)found_key.objectid);
                        if (dead_root && btrfs_root_refs(&dead_root->root_item) == 0)
                                is_dead_root = 1;
-                       spin_unlock(&fs_info->fs_roots_lock);
+                       spin_unlock(&fs_info->fs_roots_radix_lock);
  
                        if (is_dead_root) {
                                /* prevent this orphan from being found again */
@@@ -3911,7 -3910,7 +3910,7 @@@ cache_index
         * cache.
         *
         * This is required for both inode re-read from disk and delayed inode
-        * in the delayed_nodes xarray.
+        * in delayed_nodes_tree.
         */
        if (BTRFS_I(inode)->last_trans == fs_info->generation)
                set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
@@@ -4811,7 -4810,7 +4810,7 @@@ again
                goto out_unlock;
  
        if (!PageUptodate(page)) {
 -              ret = btrfs_readpage(NULL, page);
 +              ret = btrfs_read_folio(NULL, page_folio(page));
                lock_page(page);
                if (page->mapping != mapping) {
                        unlock_page(page);
@@@ -8218,7 -8217,7 +8217,7 @@@ static void btrfs_readahead(struct read
  }
  
  /*
 - * For releasepage() and invalidate_folio() we have a race window where
 + * For release_folio() and invalidate_folio() we have a race window where
   * folio_end_writeback() is called but the subpage spinlock is not yet released.
   * If we continue to release/invalidate the page, we could cause use-after-free
   * for subpage spinlock.  So this function is to spin and wait for subpage
@@@ -8250,22 -8249,22 +8249,22 @@@ static void wait_subpage_spinlock(struc
        spin_unlock_irq(&subpage->lock);
  }
  
 -static int __btrfs_releasepage(struct page *page, gfp_t gfp_flags)
 +static bool __btrfs_release_folio(struct folio *folio, gfp_t gfp_flags)
  {
 -      int ret = try_release_extent_mapping(page, gfp_flags);
 +      int ret = try_release_extent_mapping(&folio->page, gfp_flags);
  
        if (ret == 1) {
 -              wait_subpage_spinlock(page);
 -              clear_page_extent_mapped(page);
 +              wait_subpage_spinlock(&folio->page);
 +              clear_page_extent_mapped(&folio->page);
        }
        return ret;
  }
  
 -static int btrfs_releasepage(struct page *page, gfp_t gfp_flags)
 +static bool btrfs_release_folio(struct folio *folio, gfp_t gfp_flags)
  {
 -      if (PageWriteback(page) || PageDirty(page))
 -              return 0;
 -      return __btrfs_releasepage(page, gfp_flags);
 +      if (folio_test_writeback(folio) || folio_test_dirty(folio))
 +              return false;
 +      return __btrfs_release_folio(folio, gfp_flags);
  }
  
  #ifdef CONFIG_MIGRATION
@@@ -8336,7 -8335,7 +8335,7 @@@ static void btrfs_invalidate_folio(stru
         * still safe to wait for ordered extent to finish.
         */
        if (!(offset == 0 && length == folio_size(folio))) {
 -              btrfs_releasepage(&folio->page, GFP_NOFS);
 +              btrfs_release_folio(folio, GFP_NOFS);
                return;
        }
  
@@@ -8460,7 -8459,7 +8459,7 @@@ next
        ASSERT(!folio_test_ordered(folio));
        btrfs_page_clear_checked(fs_info, &folio->page, folio_pos(folio), folio_size(folio));
        if (!inode_evicting)
 -              __btrfs_releasepage(&folio->page, GFP_NOFS);
 +              __btrfs_release_folio(folio, GFP_NOFS);
        clear_page_extent_mapped(&folio->page);
  }
  
@@@ -11430,13 -11429,13 +11429,13 @@@ static const struct file_operations btr
   * For now we're avoiding this by dropping bmap.
   */
  static const struct address_space_operations btrfs_aops = {
 -      .readpage       = btrfs_readpage,
 +      .read_folio     = btrfs_read_folio,
        .writepage      = btrfs_writepage,
        .writepages     = btrfs_writepages,
        .readahead      = btrfs_readahead,
        .direct_IO      = noop_direct_IO,
        .invalidate_folio = btrfs_invalidate_folio,
 -      .releasepage    = btrfs_releasepage,
 +      .release_folio  = btrfs_release_folio,
  #ifdef CONFIG_MIGRATION
        .migratepage    = btrfs_migratepage,
  #endif
diff --combined fs/btrfs/send.c
index fa56890ff81fc0adf2bd63e9c5cd69f0bdf2d159,d6a2428fe22fb433d56c6d054031dd6055ac6be3..c7dea639a56f9dd2faa756af3d8cf61c31c2c992
@@@ -10,6 -10,7 +10,7 @@@
  #include <linux/mount.h>
  #include <linux/xattr.h>
  #include <linux/posix_acl_xattr.h>
+ #include <linux/radix-tree.h>
  #include <linux/vmalloc.h>
  #include <linux/string.h>
  #include <linux/compat.h>
@@@ -127,7 -128,7 +128,7 @@@ struct send_ctx 
        struct list_head new_refs;
        struct list_head deleted_refs;
  
-       struct xarray name_cache;
+       struct radix_tree_root name_cache;
        struct list_head name_cache_list;
        int name_cache_size;
  
@@@ -268,13 -269,14 +269,14 @@@ struct orphan_dir_info 
  struct name_cache_entry {
        struct list_head list;
        /*
-        * On 32bit kernels, xarray has only 32bit indices, but we need to
-        * handle 64bit inums. We use the lower 32bit of the 64bit inum to store
-        * it in the tree. If more than one inum would fall into the same entry,
-        * we use inum_aliases to store the additional entries. inum_aliases is
-        * also used to store entries with the same inum but different generations.
+        * radix_tree has only 32bit entries but we need to handle 64bit inums.
+        * We use the lower 32bit of the 64bit inum to store it in the tree. If
+        * more then one inum would fall into the same entry, we use radix_list
+        * to store the additional entries. radix_list is also used to store
+        * entries where two entries have the same inum but different
+        * generations.
         */
-       struct list_head inum_aliases;
+       struct list_head radix_list;
        u64 ino;
        u64 gen;
        u64 parent_ino;
@@@ -2024,9 -2026,9 +2026,9 @@@ out
  }
  
  /*
-  * Insert a name cache entry. On 32bit kernels the xarray index is 32bit,
+  * Insert a name cache entry. On 32bit kernels the radix tree index is 32bit,
   * so we need to do some special handling in case we have clashes. This function
-  * takes care of this with the help of name_cache_entry::inum_aliases.
+  * takes care of this with the help of name_cache_entry::radix_list.
   * In case of error, nce is kfreed.
   */
  static int name_cache_insert(struct send_ctx *sctx,
        int ret = 0;
        struct list_head *nce_head;
  
-       nce_head = xa_load(&sctx->name_cache, (unsigned long)nce->ino);
+       nce_head = radix_tree_lookup(&sctx->name_cache,
+                       (unsigned long)nce->ino);
        if (!nce_head) {
                nce_head = kmalloc(sizeof(*nce_head), GFP_KERNEL);
                if (!nce_head) {
                }
                INIT_LIST_HEAD(nce_head);
  
-               ret = xa_insert(&sctx->name_cache, nce->ino, nce_head, GFP_KERNEL);
+               ret = radix_tree_insert(&sctx->name_cache, nce->ino, nce_head);
                if (ret < 0) {
                        kfree(nce_head);
                        kfree(nce);
                        return ret;
                }
        }
-       list_add_tail(&nce->inum_aliases, nce_head);
+       list_add_tail(&nce->radix_list, nce_head);
        list_add_tail(&nce->list, &sctx->name_cache_list);
        sctx->name_cache_size++;
  
@@@ -2063,14 -2066,15 +2066,15 @@@ static void name_cache_delete(struct se
  {
        struct list_head *nce_head;
  
-       nce_head = xa_load(&sctx->name_cache, (unsigned long)nce->ino);
+       nce_head = radix_tree_lookup(&sctx->name_cache,
+                       (unsigned long)nce->ino);
        if (!nce_head) {
                btrfs_err(sctx->send_root->fs_info,
              "name_cache_delete lookup failed ino %llu cache size %d, leaking memory",
                        nce->ino, sctx->name_cache_size);
        }
  
-       list_del(&nce->inum_aliases);
+       list_del(&nce->radix_list);
        list_del(&nce->list);
        sctx->name_cache_size--;
  
         * We may not get to the final release of nce_head if the lookup fails
         */
        if (nce_head && list_empty(nce_head)) {
-               xa_erase(&sctx->name_cache, (unsigned long)nce->ino);
+               radix_tree_delete(&sctx->name_cache, (unsigned long)nce->ino);
                kfree(nce_head);
        }
  }
@@@ -2089,11 -2093,11 +2093,11 @@@ static struct name_cache_entry *name_ca
        struct list_head *nce_head;
        struct name_cache_entry *cur;
  
-       nce_head = xa_load(&sctx->name_cache, (unsigned long)ino);
+       nce_head = radix_tree_lookup(&sctx->name_cache, (unsigned long)ino);
        if (!nce_head)
                return NULL;
  
-       list_for_each_entry(cur, nce_head, inum_aliases) {
+       list_for_each_entry(cur, nce_head, radix_list) {
                if (cur->ino == ino && cur->gen == gen)
                        return cur;
        }
@@@ -4907,11 -4911,11 +4911,11 @@@ static int put_file_data(struct send_ct
  
                if (PageReadahead(page))
                        page_cache_async_readahead(sctx->cur_inode->i_mapping,
 -                                                 &sctx->ra, NULL, page, index,
 -                                                 last_index + 1 - index);
 +                                                 &sctx->ra, NULL, page_folio(page),
 +                                                 index, last_index + 1 - index);
  
                if (!PageUptodate(page)) {
 -                      btrfs_readpage(NULL, page);
 +                      btrfs_read_folio(NULL, page_folio(page));
                        lock_page(page);
                        if (!PageUptodate(page)) {
                                unlock_page(page);
@@@ -7518,7 -7522,7 +7522,7 @@@ long btrfs_ioctl_send(struct inode *ino
  
        INIT_LIST_HEAD(&sctx->new_refs);
        INIT_LIST_HEAD(&sctx->deleted_refs);
-       xa_init_flags(&sctx->name_cache, GFP_KERNEL);
+       INIT_RADIX_TREE(&sctx->name_cache, GFP_KERNEL);
        INIT_LIST_HEAD(&sctx->name_cache_list);
  
        sctx->flags = arg->flags;
This page took 0.120066 seconds and 4 git commands to generate.