]> Git Repo - J-linux.git/commitdiff
Merge tag 'for-6.9-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave...
authorLinus Torvalds <[email protected]>
Wed, 27 Mar 2024 20:56:41 +0000 (13:56 -0700)
committerLinus Torvalds <[email protected]>
Wed, 27 Mar 2024 20:56:41 +0000 (13:56 -0700)
Pull btrfs fixes from David Sterba:

 - fix race when reading extent buffer and 'uptodate' status is missed
   by one thread (introduced in 6.5)

 - do additional validation of devices using major:minor numbers

 - zoned mode fixes:
     - use zone-aware super block access during scrub
     - fix use-after-free during device replace (found by KASAN)
     - also delete zones that are 100% unusable to reclaim space

 - extent unpinning fixes:
     - fix extent map leak after error handling
     - print correct range in error message

 - error code and message updates

* tag 'for-6.9-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  btrfs: fix race in read_extent_buffer_pages()
  btrfs: return accurate error code on open failure in open_fs_devices()
  btrfs: zoned: don't skip block groups with 100% zone unusable
  btrfs: use btrfs_warn() to log message at btrfs_add_extent_mapping()
  btrfs: fix message not properly printing interval when adding extent map
  btrfs: fix warning messages not printing interval at unpin_extent_range()
  btrfs: fix extent map leak in unexpected scenario at unpin_extent_cache()
  btrfs: validate device maj:min during open
  btrfs: zoned: fix use-after-free in do_zone_finish()
  btrfs: zoned: use zone aware sb location for scrub

1  2 
fs/btrfs/volumes.c
fs/btrfs/zoned.c

diff --combined fs/btrfs/volumes.c
index 1dc1f1946ae0eb3158a38b2d214746e7cc4a09ee,dedec3d9b1117d9fcca42c9d301bfef86e5782ff..f15591f3e54fa4cd7e92103e17b0ae74eb1a54f9
@@@ -466,39 -466,39 +466,39 @@@ static noinline struct btrfs_fs_device
  
  static int
  btrfs_get_bdev_and_sb(const char *device_path, blk_mode_t flags, void *holder,
 -                    int flush, struct bdev_handle **bdev_handle,
 +                    int flush, struct file **bdev_file,
                      struct btrfs_super_block **disk_super)
  {
        struct block_device *bdev;
        int ret;
  
 -      *bdev_handle = bdev_open_by_path(device_path, flags, holder, NULL);
 +      *bdev_file = bdev_file_open_by_path(device_path, flags, holder, NULL);
  
 -      if (IS_ERR(*bdev_handle)) {
 -              ret = PTR_ERR(*bdev_handle);
 +      if (IS_ERR(*bdev_file)) {
 +              ret = PTR_ERR(*bdev_file);
                goto error;
        }
 -      bdev = (*bdev_handle)->bdev;
 +      bdev = file_bdev(*bdev_file);
  
        if (flush)
                sync_blockdev(bdev);
        ret = set_blocksize(bdev, BTRFS_BDEV_BLOCKSIZE);
        if (ret) {
 -              bdev_release(*bdev_handle);
 +              fput(*bdev_file);
                goto error;
        }
        invalidate_bdev(bdev);
        *disk_super = btrfs_read_dev_super(bdev);
        if (IS_ERR(*disk_super)) {
                ret = PTR_ERR(*disk_super);
 -              bdev_release(*bdev_handle);
 +              fput(*bdev_file);
                goto error;
        }
  
        return 0;
  
  error:
 -      *bdev_handle = NULL;
 +      *bdev_file = NULL;
        return ret;
  }
  
@@@ -641,7 -641,7 +641,7 @@@ static int btrfs_open_one_device(struc
                        struct btrfs_device *device, blk_mode_t flags,
                        void *holder)
  {
 -      struct bdev_handle *bdev_handle;
 +      struct file *bdev_file;
        struct btrfs_super_block *disk_super;
        u64 devid;
        int ret;
                return -EINVAL;
  
        ret = btrfs_get_bdev_and_sb(device->name->str, flags, holder, 1,
 -                                  &bdev_handle, &disk_super);
 +                                  &bdev_file, &disk_super);
        if (ret)
                return ret;
  
                clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
                fs_devices->seeding = true;
        } else {
 -              if (bdev_read_only(bdev_handle->bdev))
 +              if (bdev_read_only(file_bdev(bdev_file)))
                        clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
                else
                        set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
        }
  
 -      if (!bdev_nonrot(bdev_handle->bdev))
 +      if (!bdev_nonrot(file_bdev(bdev_file)))
                fs_devices->rotating = true;
  
 -      if (bdev_max_discard_sectors(bdev_handle->bdev))
 +      if (bdev_max_discard_sectors(file_bdev(bdev_file)))
                fs_devices->discardable = true;
  
 -      device->bdev_handle = bdev_handle;
 -      device->bdev = bdev_handle->bdev;
 +      device->bdev_file = bdev_file;
 +      device->bdev = file_bdev(bdev_file);
        clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
  
+       if (device->devt != device->bdev->bd_dev) {
+               btrfs_warn(NULL,
+                          "device %s maj:min changed from %d:%d to %d:%d",
+                          device->name->str, MAJOR(device->devt),
+                          MINOR(device->devt), MAJOR(device->bdev->bd_dev),
+                          MINOR(device->bdev->bd_dev));
+               device->devt = device->bdev->bd_dev;
+       }
        fs_devices->open_devices++;
        if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
            device->devid != BTRFS_DEV_REPLACE_DEVID) {
  
  error_free_page:
        btrfs_release_disk_super(disk_super);
 -      bdev_release(bdev_handle);
 +      fput(bdev_file);
  
        return -EINVAL;
  }
@@@ -1017,10 -1027,10 +1027,10 @@@ static void __btrfs_free_extra_devids(s
                if (device->devid == BTRFS_DEV_REPLACE_DEVID)
                        continue;
  
 -              if (device->bdev_handle) {
 -                      bdev_release(device->bdev_handle);
 +              if (device->bdev_file) {
 +                      fput(device->bdev_file);
                        device->bdev = NULL;
 -                      device->bdev_handle = NULL;
 +                      device->bdev_file = NULL;
                        fs_devices->open_devices--;
                }
                if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
@@@ -1065,7 -1075,7 +1075,7 @@@ static void btrfs_close_bdev(struct btr
                invalidate_bdev(device->bdev);
        }
  
 -      bdev_release(device->bdev_handle);
 +      fput(device->bdev_file);
  }
  
  static void btrfs_close_one_device(struct btrfs_device *device)
@@@ -1174,23 -1184,30 +1184,30 @@@ static int open_fs_devices(struct btrfs
        struct btrfs_device *device;
        struct btrfs_device *latest_dev = NULL;
        struct btrfs_device *tmp_device;
+       int ret = 0;
  
        list_for_each_entry_safe(device, tmp_device, &fs_devices->devices,
                                 dev_list) {
-               int ret;
+               int ret2;
  
-               ret = btrfs_open_one_device(fs_devices, device, flags, holder);
-               if (ret == 0 &&
+               ret2 = btrfs_open_one_device(fs_devices, device, flags, holder);
+               if (ret2 == 0 &&
                    (!latest_dev || device->generation > latest_dev->generation)) {
                        latest_dev = device;
-               } else if (ret == -ENODATA) {
+               } else if (ret2 == -ENODATA) {
                        fs_devices->num_devices--;
                        list_del(&device->dev_list);
                        btrfs_free_device(device);
                }
+               if (ret == 0 && ret2 != 0)
+                       ret = ret2;
        }
-       if (fs_devices->open_devices == 0)
+       if (fs_devices->open_devices == 0) {
+               if (ret)
+                       return ret;
                return -EINVAL;
+       }
  
        fs_devices->opened = 1;
        fs_devices->latest_dev = latest_dev;
@@@ -1303,47 -1320,6 +1320,47 @@@ int btrfs_forget_devices(dev_t devt
        return ret;
  }
  
 +static bool btrfs_skip_registration(struct btrfs_super_block *disk_super,
 +                                  const char *path, dev_t devt,
 +                                  bool mount_arg_dev)
 +{
 +      struct btrfs_fs_devices *fs_devices;
 +
 +      /*
 +       * Do not skip device registration for mounted devices with matching
 +       * maj:min but different paths. Booting without initrd relies on
 +       * /dev/root initially, later replaced with the actual root device.
 +       * A successful scan ensures grub2-probe selects the correct device.
 +       */
 +      list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
 +              struct btrfs_device *device;
 +
 +              mutex_lock(&fs_devices->device_list_mutex);
 +
 +              if (!fs_devices->opened) {
 +                      mutex_unlock(&fs_devices->device_list_mutex);
 +                      continue;
 +              }
 +
 +              list_for_each_entry(device, &fs_devices->devices, dev_list) {
 +                      if (device->bdev && (device->bdev->bd_dev == devt) &&
 +                          strcmp(device->name->str, path) != 0) {
 +                              mutex_unlock(&fs_devices->device_list_mutex);
 +
 +                              /* Do not skip registration. */
 +                              return false;
 +                      }
 +              }
 +              mutex_unlock(&fs_devices->device_list_mutex);
 +      }
 +
 +      if (!mount_arg_dev && btrfs_super_num_devices(disk_super) == 1 &&
 +          !(btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_SEEDING))
 +              return true;
 +
 +      return false;
 +}
 +
  /*
   * Look for a btrfs signature on a device. This may be called out of the mount path
   * and we are not allowed to call set_blocksize during the scan. The superblock
@@@ -1359,9 -1335,8 +1376,9 @@@ struct btrfs_device *btrfs_scan_one_dev
        struct btrfs_super_block *disk_super;
        bool new_device_added = false;
        struct btrfs_device *device = NULL;
 -      struct bdev_handle *bdev_handle;
 +      struct file *bdev_file;
        u64 bytenr, bytenr_orig;
 +      dev_t devt;
        int ret;
  
        lockdep_assert_held(&uuid_mutex);
         * values temporarily, as the device paths of the fsid are the only
         * required information for assembling the volume.
         */
 -      bdev_handle = bdev_open_by_path(path, flags, NULL, NULL);
 -      if (IS_ERR(bdev_handle))
 -              return ERR_CAST(bdev_handle);
 +      bdev_file = bdev_file_open_by_path(path, flags, NULL, NULL);
 +      if (IS_ERR(bdev_file))
 +              return ERR_CAST(bdev_file);
  
        bytenr_orig = btrfs_sb_offset(0);
 -      ret = btrfs_sb_log_location_bdev(bdev_handle->bdev, 0, READ, &bytenr);
 +      ret = btrfs_sb_log_location_bdev(file_bdev(bdev_file), 0, READ, &bytenr);
        if (ret) {
                device = ERR_PTR(ret);
                goto error_bdev_put;
        }
  
 -      disk_super = btrfs_read_disk_super(bdev_handle->bdev, bytenr,
 +      disk_super = btrfs_read_disk_super(file_bdev(bdev_file), bytenr,
                                           bytenr_orig);
        if (IS_ERR(disk_super)) {
                device = ERR_CAST(disk_super);
                goto error_bdev_put;
        }
  
 -      if (!mount_arg_dev && btrfs_super_num_devices(disk_super) == 1 &&
 -          !(btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_SEEDING)) {
 -              dev_t devt;
 +      devt = file_bdev(bdev_file)->bd_dev;
 +      if (btrfs_skip_registration(disk_super, path, devt, mount_arg_dev)) {
 +              pr_debug("BTRFS: skip registering single non-seed device %s (%d:%d)\n",
 +                        path, MAJOR(devt), MINOR(devt));
  
 -              ret = lookup_bdev(path, &devt);
 -              if (ret)
 -                      btrfs_warn(NULL, "lookup bdev failed for path %s: %d",
 -                                 path, ret);
 -              else
 -                      btrfs_free_stale_devices(devt, NULL);
 +              btrfs_free_stale_devices(devt, NULL);
  
 -      pr_debug("BTRFS: skip registering single non-seed device %s (%d:%d)\n",
 -                      path, MAJOR(devt), MINOR(devt));
                device = NULL;
                goto free_disk_super;
        }
@@@ -1420,7 -1401,7 +1437,7 @@@ free_disk_super
        btrfs_release_disk_super(disk_super);
  
  error_bdev_put:
 -      bdev_release(bdev_handle);
 +      fput(bdev_file);
  
        return device;
  }
@@@ -2095,7 -2076,7 +2112,7 @@@ void btrfs_scratch_superblocks(struct b
  
  int btrfs_rm_device(struct btrfs_fs_info *fs_info,
                    struct btrfs_dev_lookup_args *args,
 -                  struct bdev_handle **bdev_handle)
 +                  struct file **bdev_file)
  {
        struct btrfs_trans_handle *trans;
        struct btrfs_device *device;
  
        btrfs_assign_next_active_device(device, NULL);
  
 -      if (device->bdev_handle) {
 +      if (device->bdev_file) {
                cur_devices->open_devices--;
                /* remove sysfs entry */
                btrfs_sysfs_remove_device(device);
         * free the device.
         *
         * We cannot call btrfs_close_bdev() here because we're holding the sb
 -       * write lock, and bdev_release() will pull in the ->open_mutex on
 -       * the block device and it's dependencies.  Instead just flush the
 -       * device and let the caller do the final bdev_release.
 +       * write lock, and fput() on the block device will pull in the
 +       * ->open_mutex on the block device and it's dependencies.  Instead
 +       *  just flush the device and let the caller do the final bdev_release.
         */
        if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
                btrfs_scratch_superblocks(fs_info, device);
                }
        }
  
 -      *bdev_handle = device->bdev_handle;
 +      *bdev_file = device->bdev_file;
        synchronize_rcu();
        btrfs_free_device(device);
  
@@@ -2368,7 -2349,7 +2385,7 @@@ int btrfs_get_dev_args_from_path(struc
                                 const char *path)
  {
        struct btrfs_super_block *disk_super;
 -      struct bdev_handle *bdev_handle;
 +      struct file *bdev_file;
        int ret;
  
        if (!path || !path[0])
        }
  
        ret = btrfs_get_bdev_and_sb(path, BLK_OPEN_READ, NULL, 0,
 -                                  &bdev_handle, &disk_super);
 +                                  &bdev_file, &disk_super);
        if (ret) {
                btrfs_put_dev_args_from_path(args);
                return ret;
        else
                memcpy(args->fsid, disk_super->fsid, BTRFS_FSID_SIZE);
        btrfs_release_disk_super(disk_super);
 -      bdev_release(bdev_handle);
 +      fput(bdev_file);
        return 0;
  }
  
@@@ -2619,7 -2600,7 +2636,7 @@@ int btrfs_init_new_device(struct btrfs_
        struct btrfs_root *root = fs_info->dev_root;
        struct btrfs_trans_handle *trans;
        struct btrfs_device *device;
 -      struct bdev_handle *bdev_handle;
 +      struct file *bdev_file;
        struct super_block *sb = fs_info->sb;
        struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
        struct btrfs_fs_devices *seed_devices = NULL;
        if (sb_rdonly(sb) && !fs_devices->seeding)
                return -EROFS;
  
 -      bdev_handle = bdev_open_by_path(device_path, BLK_OPEN_WRITE,
 +      bdev_file = bdev_file_open_by_path(device_path, BLK_OPEN_WRITE,
                                        fs_info->bdev_holder, NULL);
 -      if (IS_ERR(bdev_handle))
 -              return PTR_ERR(bdev_handle);
 +      if (IS_ERR(bdev_file))
 +              return PTR_ERR(bdev_file);
  
 -      if (!btrfs_check_device_zone_type(fs_info, bdev_handle->bdev)) {
 +      if (!btrfs_check_device_zone_type(fs_info, file_bdev(bdev_file))) {
                ret = -EINVAL;
                goto error;
        }
                locked = true;
        }
  
 -      sync_blockdev(bdev_handle->bdev);
 +      sync_blockdev(file_bdev(bdev_file));
  
        rcu_read_lock();
        list_for_each_entry_rcu(device, &fs_devices->devices, dev_list) {
 -              if (device->bdev == bdev_handle->bdev) {
 +              if (device->bdev == file_bdev(bdev_file)) {
                        ret = -EEXIST;
                        rcu_read_unlock();
                        goto error;
        }
  
        device->fs_info = fs_info;
 -      device->bdev_handle = bdev_handle;
 -      device->bdev = bdev_handle->bdev;
 +      device->bdev_file = bdev_file;
 +      device->bdev = file_bdev(bdev_file);
        ret = lookup_bdev(device_path, &device->devt);
        if (ret)
                goto error_free_device;
@@@ -2853,7 -2834,7 +2870,7 @@@ error_free_zone
  error_free_device:
        btrfs_free_device(device);
  error:
 -      bdev_release(bdev_handle);
 +      fput(bdev_file);
        if (locked) {
                mutex_unlock(&uuid_mutex);
                up_write(&sb->s_umount);
diff --combined fs/btrfs/zoned.c
index 5a3d5ec75c5a94262c2431fce61c84d1e95f7512,459d1af02c3ce0d667ff854f13a31d2ad9260e7f..4cba80b34387c102a15299a69f1bd11fc0caff2f
@@@ -822,14 -822,11 +822,14 @@@ static int sb_log_location(struct block
                        reset = &zones[1];
  
                if (reset && reset->cond != BLK_ZONE_COND_EMPTY) {
 +                      unsigned int nofs_flags;
 +
                        ASSERT(sb_zone_is_full(reset));
  
 +                      nofs_flags = memalloc_nofs_save();
                        ret = blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET,
 -                                             reset->start, reset->len,
 -                                             GFP_NOFS);
 +                                             reset->start, reset->len);
 +                      memalloc_nofs_restore(nofs_flags);
                        if (ret)
                                return ret;
  
@@@ -975,14 -972,11 +975,14 @@@ int btrfs_advance_sb_log(struct btrfs_d
                         * explicit ZONE_FINISH is not necessary.
                         */
                        if (zone->wp != zone->start + zone->capacity) {
 +                              unsigned int nofs_flags;
                                int ret;
  
 +                              nofs_flags = memalloc_nofs_save();
                                ret = blkdev_zone_mgmt(device->bdev,
                                                REQ_OP_ZONE_FINISH, zone->start,
 -                                              zone->len, GFP_NOFS);
 +                                              zone->len);
 +                              memalloc_nofs_restore(nofs_flags);
                                if (ret)
                                        return ret;
                        }
  
  int btrfs_reset_sb_log_zones(struct block_device *bdev, int mirror)
  {
 +      unsigned int nofs_flags;
        sector_t zone_sectors;
        sector_t nr_sectors;
        u8 zone_sectors_shift;
        u32 sb_zone;
        u32 nr_zones;
 +      int ret;
  
        zone_sectors = bdev_zone_sectors(bdev);
        zone_sectors_shift = ilog2(zone_sectors);
        if (sb_zone + 1 >= nr_zones)
                return -ENOENT;
  
 -      return blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET,
 -                              zone_start_sector(sb_zone, bdev),
 -                              zone_sectors * BTRFS_NR_SB_LOG_ZONES, GFP_NOFS);
 +      nofs_flags = memalloc_nofs_save();
 +      ret = blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET,
 +                             zone_start_sector(sb_zone, bdev),
 +                             zone_sectors * BTRFS_NR_SB_LOG_ZONES);
 +      memalloc_nofs_restore(nofs_flags);
 +      return ret;
  }
  
  /*
@@@ -1133,14 -1122,12 +1133,14 @@@ static void btrfs_dev_clear_active_zone
  int btrfs_reset_device_zone(struct btrfs_device *device, u64 physical,
                            u64 length, u64 *bytes)
  {
 +      unsigned int nofs_flags;
        int ret;
  
        *bytes = 0;
 +      nofs_flags = memalloc_nofs_save();
        ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_RESET,
 -                             physical >> SECTOR_SHIFT, length >> SECTOR_SHIFT,
 -                             GFP_NOFS);
 +                             physical >> SECTOR_SHIFT, length >> SECTOR_SHIFT);
 +      memalloc_nofs_restore(nofs_flags);
        if (ret)
                return ret;
  
@@@ -1574,11 -1561,7 +1574,7 @@@ int btrfs_load_block_group_zone_info(st
        if (!map)
                return -EINVAL;
  
-       cache->physical_map = btrfs_clone_chunk_map(map, GFP_NOFS);
-       if (!cache->physical_map) {
-               ret = -ENOMEM;
-               goto out;
-       }
+       cache->physical_map = map;
  
        zone_info = kcalloc(map->num_stripes, sizeof(*zone_info), GFP_NOFS);
        if (!zone_info) {
@@@ -1690,7 -1673,6 +1686,6 @@@ out
        }
        bitmap_free(active);
        kfree(zone_info);
-       btrfs_free_chunk_map(map);
  
        return ret;
  }
@@@ -2175,6 -2157,7 +2170,7 @@@ static int do_zone_finish(struct btrfs_
        struct btrfs_chunk_map *map;
        const bool is_metadata = (block_group->flags &
                        (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM));
+       struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
        int ret = 0;
        int i;
  
        btrfs_clear_data_reloc_bg(block_group);
        spin_unlock(&block_group->lock);
  
+       down_read(&dev_replace->rwsem);
        map = block_group->physical_map;
        for (i = 0; i < map->num_stripes; i++) {
                struct btrfs_device *device = map->stripes[i].dev;
                const u64 physical = map->stripes[i].physical;
                struct btrfs_zoned_device_info *zinfo = device->zone_info;
 +              unsigned int nofs_flags;
  
                if (zinfo->max_active_zones == 0)
                        continue;
  
 +              nofs_flags = memalloc_nofs_save();
                ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH,
                                       physical >> SECTOR_SHIFT,
 -                                     zinfo->zone_size >> SECTOR_SHIFT,
 -                                     GFP_NOFS);
 +                                     zinfo->zone_size >> SECTOR_SHIFT);
 +              memalloc_nofs_restore(nofs_flags);
  
-               if (ret)
+               if (ret) {
+                       up_read(&dev_replace->rwsem);
                        return ret;
+               }
  
                if (!(block_group->flags & BTRFS_BLOCK_GROUP_DATA))
                        zinfo->reserved_active_zones++;
                btrfs_dev_clear_active_zone(device, physical);
        }
+       up_read(&dev_replace->rwsem);
  
        if (!fully_written)
                btrfs_dec_block_group_ro(block_group);
This page took 0.122886 seconds and 4 git commands to generate.