Merge tag 'vfs-6.10.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

author Linus Torvalds <[email protected]>

Mon, 13 May 2024 18:40:06 +0000 (11:40 -0700)

committer Linus Torvalds <[email protected]>

Mon, 13 May 2024 18:40:06 +0000 (11:40 -0700)
author Linus Torvalds <[email protected]>
Mon, 13 May 2024 18:40:06 +0000 (11:40 -0700)
committer Linus Torvalds <[email protected]>
Mon, 13 May 2024 18:40:06 +0000 (11:40 -0700)
diff --combined block/bdev.c

index da2a167a4d08b66fe26c99826a9e84277f76f0e3,1322dfe32c5db393d0ce9241d6f29a3469db0424..2af3dca56f3db67692b3ab4d63b5cb6544e2a819
--- 1/block/bdev.c
--- 2/block/bdev.c
+++ b/block/bdev.c
@@@ -583,6 -583,9 +583,6 @@@ static void bd_finish_claiming(struct b
         mutex_unlock(&bdev->bd_holder_lock);
         bd_clear_claiming(whole, holder);
         mutex_unlock(&bdev_lock);
- -
- -      if (hops && hops->get_holder)
- -              hops->get_holder(holder);
   }
   
   /**
@@@ -605,6 -608,7 +605,6 @@@ EXPORT_SYMBOL(bd_abort_claiming)
   static void bd_end_claim(struct block_device *bdev, void *holder)
   {
         struct block_device *whole = bdev_whole(bdev);
- -      const struct blk_holder_ops *hops = bdev->bd_holder_ops;
         bool unblock = false;
   
         /*
@@@ -627,6 -631,9 +627,6 @@@
                 whole->bd_holder = NULL;
         mutex_unlock(&bdev_lock);
   
- -      if (hops && hops->put_holder)
- -              hops->put_holder(holder);
- -
         /*
          * If this was the last claim, remove holder link and unblock evpoll if
          * it was a write holder.
@@@ -645,14 -652,6 +645,14 @@@ static void blkdev_flush_mapping(struc
         bdev_write_inode(bdev);
   }
   
+ +static void blkdev_put_whole(struct block_device *bdev)
+ +{
+ +      if (atomic_dec_and_test(&bdev->bd_openers))
+ +              blkdev_flush_mapping(bdev);
+ +      if (bdev->bd_disk->fops->release)
+ +              bdev->bd_disk->fops->release(bdev->bd_disk);
+ +}
+ +
   static int blkdev_get_whole(struct block_device *bdev, blk_mode_t mode)
   {
         struct gendisk *disk = bdev->bd_disk;
@@@ -671,21 -670,20 +671,21 @@@
   
         if (!atomic_read(&bdev->bd_openers))
                 set_init_blocksize(bdev);
- -      if (test_bit(GD_NEED_PART_SCAN, &disk->state))
- -              bdev_disk_changed(disk, false);
         atomic_inc(&bdev->bd_openers);
+ +      if (test_bit(GD_NEED_PART_SCAN, &disk->state)) {
+ +              /*
+ +               * Only return scanning errors if we are called from contexts
+ +               * that explicitly want them, e.g. the BLKRRPART ioctl.
+ +               */
+ +              ret = bdev_disk_changed(disk, false);
+ +              if (ret && (mode & BLK_OPEN_STRICT_SCAN)) {
+ +                      blkdev_put_whole(bdev);
+ +                      return ret;
+ +              }
+ +      }
         return 0;
   }
   
- -static void blkdev_put_whole(struct block_device *bdev)
- -{
- -      if (atomic_dec_and_test(&bdev->bd_openers))
- -              blkdev_flush_mapping(bdev);
- -      if (bdev->bd_disk->fops->release)
- -              bdev->bd_disk->fops->release(bdev->bd_disk);
- -}
- -
   static int blkdev_get_part(struct block_device *part, blk_mode_t mode)
   {
         struct gendisk *disk = part->bd_disk;
@@@ -778,17 -776,17 +778,17 @@@ void blkdev_put_no_open(struct block_de
   
   static bool bdev_writes_blocked(struct block_device *bdev)
   {
- -      return bdev->bd_writers == -1;
+ +      return bdev->bd_writers < 0;
   }
   
   static void bdev_block_writes(struct block_device *bdev)
   {
- -      bdev->bd_writers = -1;
+ +      bdev->bd_writers--;
   }
   
   static void bdev_unblock_writes(struct block_device *bdev)
   {
- -      bdev->bd_writers = 0;
+ +      bdev->bd_writers++;
   }
   
   static bool bdev_may_open(struct block_device *bdev, blk_mode_t mode)
@@@ -815,11 -813,6 +815,11 @@@ static void bdev_claim_write_access(str
                 bdev->bd_writers++;
   }
   
+ +static inline bool bdev_unclaimed(const struct file *bdev_file)
+ +{
+ +      return bdev_file->private_data == BDEV_I(bdev_file->f_mapping->host);
+ +}
+ +
   static void bdev_yield_write_access(struct file *bdev_file)
   {
         struct block_device *bdev;
@@@ -827,15 -820,14 +827,15 @@@
         if (bdev_allow_write_mounted)
                 return;
   
+ +      if (bdev_unclaimed(bdev_file))
+ +              return;
+ +
         bdev = file_bdev(bdev_file);
- -      /* Yield exclusive or shared write access. */
- -      if (bdev_file->f_mode & FMODE_WRITE) {
- -              if (bdev_writes_blocked(bdev))
- -                      bdev_unblock_writes(bdev);
- -              else
- -                      bdev->bd_writers--;
- -      }
+ +
+ +      if (bdev_file->f_mode & FMODE_WRITE_RESTRICTED)
+ +              bdev_unblock_writes(bdev);
+ +      else if (bdev_file->f_mode & FMODE_WRITE)
+ +              bdev->bd_writers--;
   }
   
   /**
@@@ -882,7 -874,7 +882,7 @@@ int bdev_open(struct block_device *bdev
                 goto abort_claiming;
         ret = -EBUSY;
         if (!bdev_may_open(bdev, mode))
- -              goto abort_claiming;
+ +              goto put_module;
         if (bdev_is_partition(bdev))
                 ret = blkdev_get_part(bdev, mode);
         else
@@@ -912,11 -904,9 +912,11 @@@
                 disk_unblock_events(disk);
   
         bdev_file->f_flags |= O_LARGEFILE;
-       bdev_file->f_mode |= FMODE_BUF_RASYNC | FMODE_CAN_ODIRECT;
+       bdev_file->f_mode |= FMODE_CAN_ODIRECT;
         if (bdev_nowait(bdev))
                 bdev_file->f_mode |= FMODE_NOWAIT;
+ +      if (mode & BLK_OPEN_RESTRICT_WRITES)
+ +              bdev_file->f_mode |= FMODE_WRITE_RESTRICTED;
         bdev_file->f_mapping = bdev->bd_inode->i_mapping;
         bdev_file->f_wb_err = filemap_sample_wb_err(bdev_file->f_mapping);
         bdev_file->private_data = holder;
@@@ -1022,20 -1012,6 +1022,20 @@@ struct file *bdev_file_open_by_path(con
   }
   EXPORT_SYMBOL(bdev_file_open_by_path);
   
+ +static inline void bd_yield_claim(struct file *bdev_file)
+ +{
+ +      struct block_device *bdev = file_bdev(bdev_file);
+ +      void *holder = bdev_file->private_data;
+ +
+ +      lockdep_assert_held(&bdev->bd_disk->open_mutex);
+ +
+ +      if (WARN_ON_ONCE(IS_ERR_OR_NULL(holder)))
+ +              return;
+ +
+ +      if (!bdev_unclaimed(bdev_file))
+ +              bd_end_claim(bdev, holder);
+ +}
+ +
   void bdev_release(struct file *bdev_file)
   {
         struct block_device *bdev = file_bdev(bdev_file);
@@@ -1060,7 -1036,7 +1060,7 @@@
         bdev_yield_write_access(bdev_file);
   
         if (holder)
- -              bd_end_claim(bdev, holder);
+ +              bd_yield_claim(bdev_file);
   
         /*
          * Trigger event checking and tell drivers to flush MEDIA_CHANGE
@@@ -1080,39 -1056,6 +1080,39 @@@ put_no_open
         blkdev_put_no_open(bdev);
   }
   
+ +/**
+ + * bdev_fput - yield claim to the block device and put the file
+ + * @bdev_file: open block device
+ + *
+ + * Yield claim on the block device and put the file. Ensure that the
+ + * block device can be reclaimed before the file is closed which is a
+ + * deferred operation.
+ + */
+ +void bdev_fput(struct file *bdev_file)
+ +{
+ +      if (WARN_ON_ONCE(bdev_file->f_op != &def_blk_fops))
+ +              return;
+ +
+ +      if (bdev_file->private_data) {
+ +              struct block_device *bdev = file_bdev(bdev_file);
+ +              struct gendisk *disk = bdev->bd_disk;
+ +
+ +              mutex_lock(&disk->open_mutex);
+ +              bdev_yield_write_access(bdev_file);
+ +              bd_yield_claim(bdev_file);
+ +              /*
+ +               * Tell release we already gave up our hold on the
+ +               * device and if write restrictions are available that
+ +               * we already gave up write access to the device.
+ +               */
+ +              bdev_file->private_data = BDEV_I(bdev_file->f_mapping->host);
+ +              mutex_unlock(&disk->open_mutex);
+ +      }
+ +
+ +      fput(bdev_file);
+ +}
+ +EXPORT_SYMBOL(bdev_fput);
+ +
   /**
    * lookup_bdev() - Look up a struct block_device by name.
    * @pathname: Name of the block device in the filesystem.
diff --combined fs/aio.c

index 0f4f531c97800c648437fb2eb7409ccc2b198536,fb22a17859c6d39cea5d28cbafc27b0f7b07d734..6ed5507cd33099047b7da049f0b13ae1b31d3895
--- 1/fs/aio.c
--- 2/fs/aio.c
+++ b/fs/aio.c
@@@ -122,7 -122,7 +122,7 @@@ struct kioctx 
         unsigned long           mmap_base;
         unsigned long           mmap_size;
   
-       struct page             **ring_pages;
+       struct folio            **ring_folios;
         long                    nr_pages;
   
         struct rcu_work         free_rwork;     /* see free_ioctx() */
@@@ -160,7 -160,7 +160,7 @@@
                 spinlock_t      completion_lock;
         } ____cacheline_aligned_in_smp;
   
-       struct page             *internal_pages[AIO_RING_PAGES];
+       struct folio            *internal_folios[AIO_RING_PAGES];
         struct file             *aio_ring_file;
   
         unsigned                id;
@@@ -334,19 -334,20 +334,20 @@@ static void aio_free_ring(struct kioct
         put_aio_ring_file(ctx);
   
         for (i = 0; i < ctx->nr_pages; i++) {
-               struct page *page;
-               pr_debug("pid(%d) [%d] page->count=%d\n", current->pid, i,
-                               page_count(ctx->ring_pages[i]));
-               page = ctx->ring_pages[i];
-               if (!page)
+               struct folio *folio = ctx->ring_folios[i];
+ 
+               if (!folio)
                         continue;
-               ctx->ring_pages[i] = NULL;
-               put_page(page);
+ 
+               pr_debug("pid(%d) [%d] folio->count=%d\n", current->pid, i,
+                        folio_ref_count(folio));
+               ctx->ring_folios[i] = NULL;
+               folio_put(folio);
         }
   
-       if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages) {
-               kfree(ctx->ring_pages);
-               ctx->ring_pages = NULL;
+       if (ctx->ring_folios && ctx->ring_folios != ctx->internal_folios) {
+               kfree(ctx->ring_folios);
+               ctx->ring_folios = NULL;
         }
   }
   
@@@ -441,7 -442,7 +442,7 @@@ static int aio_migrate_folio(struct add
         idx = src->index;
         if (idx < (pgoff_t)ctx->nr_pages) {
                 /* Make sure the old folio hasn't already been changed */
-               if (ctx->ring_pages[idx] != &src->page)
+               if (ctx->ring_folios[idx] != src)
                         rc = -EAGAIN;
         } else
                 rc = -EINVAL;
@@@ -465,8 -466,8 +466,8 @@@
          */
         spin_lock_irqsave(&ctx->completion_lock, flags);
         folio_migrate_copy(dst, src);
-       BUG_ON(ctx->ring_pages[idx] != &src->page);
-       ctx->ring_pages[idx] = &dst->page;
+       BUG_ON(ctx->ring_folios[idx] != src);
+       ctx->ring_folios[idx] = dst;
         spin_unlock_irqrestore(&ctx->completion_lock, flags);
   
         /* The old folio is no longer accessible. */
@@@ -516,28 -517,30 +517,30 @@@ static int aio_setup_ring(struct kioct
         nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring))
                         / sizeof(struct io_event);
   
-       ctx->ring_pages = ctx->internal_pages;
+       ctx->ring_folios = ctx->internal_folios;
         if (nr_pages > AIO_RING_PAGES) {
-               ctx->ring_pages = kcalloc(nr_pages, sizeof(struct page *),
-                                         GFP_KERNEL);
-               if (!ctx->ring_pages) {
+               ctx->ring_folios = kcalloc(nr_pages, sizeof(struct folio *),
+                                          GFP_KERNEL);
+               if (!ctx->ring_folios) {
                         put_aio_ring_file(ctx);
                         return -ENOMEM;
                 }
         }
   
         for (i = 0; i < nr_pages; i++) {
-               struct page *page;
-               page = find_or_create_page(file->f_mapping,
-                                          i, GFP_USER | __GFP_ZERO);
-               if (!page)
+               struct folio *folio;
+ 
+               folio = __filemap_get_folio(file->f_mapping, i,
+                                           FGP_LOCK | FGP_ACCESSED | FGP_CREAT,
+                                           GFP_USER | __GFP_ZERO);
+               if (IS_ERR(folio))
                         break;
-               pr_debug("pid(%d) page[%d]->count=%d\n",
-                        current->pid, i, page_count(page));
-               SetPageUptodate(page);
-               unlock_page(page);
   
-               ctx->ring_pages[i] = page;
+               pr_debug("pid(%d) [%d] folio->count=%d\n", current->pid, i,
+                        folio_ref_count(folio));
+               folio_end_read(folio, true);
+ 
+               ctx->ring_folios[i] = folio;
         }
         ctx->nr_pages = i;
   
@@@ -570,7 -573,7 +573,7 @@@
         ctx->user_id = ctx->mmap_base;
         ctx->nr_events = nr_events; /* trusted copy */
   
-       ring = page_address(ctx->ring_pages[0]);
+       ring = folio_address(ctx->ring_folios[0]);
         ring->nr = nr_events;   /* user copy */
         ring->id = ~0U;
         ring->head = ring->tail = 0;
@@@ -578,7 -581,7 +581,7 @@@
         ring->compat_features = AIO_RING_COMPAT_FEATURES;
         ring->incompat_features = AIO_RING_INCOMPAT_FEATURES;
         ring->header_length = sizeof(struct aio_ring);
-       flush_dcache_page(ctx->ring_pages[0]);
+       flush_dcache_folio(ctx->ring_folios[0]);
   
         return 0;
   }
@@@ -689,9 -692,9 +692,9 @@@ static int ioctx_add_table(struct kioct
   
                                         /* While kioctx setup is in progress,
                                          * we are protected from page migration
-                                        * changes ring_pages by ->ring_lock.
+                                        * changes ring_folios by ->ring_lock.
                                          */
-                                       ring = page_address(ctx->ring_pages[0]);
+                                       ring = folio_address(ctx->ring_folios[0]);
                                         ring->id = ctx->id;
                                         return 0;
                                 }
@@@ -1033,7 -1036,7 +1036,7 @@@ static void user_refill_reqs_available(
                  * against ctx->completed_events below will make sure we do the
                  * safe/right thing.
                  */
-               ring = page_address(ctx->ring_pages[0]);
+               ring = folio_address(ctx->ring_folios[0]);
                 head = ring->head;
   
                 refill_reqs_available(ctx, head, ctx->tail);
@@@ -1145,12 -1148,12 +1148,12 @@@ static void aio_complete(struct aio_kio
         if (++tail >= ctx->nr_events)
                 tail = 0;
   
-       ev_page = page_address(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
+       ev_page = folio_address(ctx->ring_folios[pos / AIO_EVENTS_PER_PAGE]);
         event = ev_page + pos % AIO_EVENTS_PER_PAGE;
   
         *event = iocb->ki_res;
   
-       flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
+       flush_dcache_folio(ctx->ring_folios[pos / AIO_EVENTS_PER_PAGE]);
   
         pr_debug("%p[%u]: %p: %p %Lx %Lx %Lx\n", ctx, tail, iocb,
                  (void __user *)(unsigned long)iocb->ki_res.obj,
@@@ -1163,10 -1166,10 +1166,10 @@@
   
         ctx->tail = tail;
   
-       ring = page_address(ctx->ring_pages[0]);
+       ring = folio_address(ctx->ring_folios[0]);
         head = ring->head;
         ring->tail = tail;
-       flush_dcache_page(ctx->ring_pages[0]);
+       flush_dcache_folio(ctx->ring_folios[0]);
   
         ctx->completed_events++;
         if (ctx->completed_events > 1)
@@@ -1202,8 -1205,8 +1205,8 @@@
                 spin_lock_irqsave(&ctx->wait.lock, flags);
                 list_for_each_entry_safe(curr, next, &ctx->wait.head, w.entry)
                         if (avail >= curr->min_nr) {
- -                              list_del_init_careful(&curr->w.entry);
                                 wake_up_process(curr->w.private);
+ +                              list_del_init_careful(&curr->w.entry);
                         }
                 spin_unlock_irqrestore(&ctx->wait.lock, flags);
         }
@@@ -1238,8 -1241,8 +1241,8 @@@ static long aio_read_events_ring(struc
         sched_annotate_sleep();
         mutex_lock(&ctx->ring_lock);
   
-       /* Access to ->ring_pages here is protected by ctx->ring_lock. */
-       ring = page_address(ctx->ring_pages[0]);
+       /* Access to ->ring_folios here is protected by ctx->ring_lock. */
+       ring = folio_address(ctx->ring_folios[0]);
         head = ring->head;
         tail = ring->tail;
   
@@@ -1260,20 -1263,20 +1263,20 @@@
         while (ret < nr) {
                 long avail;
                 struct io_event *ev;
-               struct page *page;
+               struct folio *folio;
   
                 avail = (head <= tail ?  tail : ctx->nr_events) - head;
                 if (head == tail)
                         break;
   
                 pos = head + AIO_EVENTS_OFFSET;
-               page = ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE];
+               folio = ctx->ring_folios[pos / AIO_EVENTS_PER_PAGE];
                 pos %= AIO_EVENTS_PER_PAGE;
   
                 avail = min(avail, nr - ret);
                 avail = min_t(long, avail, AIO_EVENTS_PER_PAGE - pos);
   
-               ev = page_address(page);
+               ev = folio_address(folio);
                 copy_ret = copy_to_user(event + ret, ev + pos,
                                         sizeof(*ev) * avail);
   
@@@ -1287,9 -1290,9 +1290,9 @@@
                 head %= ctx->nr_events;
         }
   
-       ring = page_address(ctx->ring_pages[0]);
+       ring = folio_address(ctx->ring_folios[0]);
         ring->head = head;
-       flush_dcache_page(ctx->ring_pages[0]);
+       flush_dcache_folio(ctx->ring_folios[0]);
   
         pr_debug("%li  h%u t%u\n", ret, head, tail);
   out:
diff --combined fs/bcachefs/fs.c

index 6f114803c6f23951189872cedf0f41144fa81b07,99a0abeadbe209fe674d712b1af789147491b10f..65b04b3c2679fe2cb12180f8cb9828ae0be9d0ab
--- 1/fs/bcachefs/fs.c
--- 2/fs/bcachefs/fs.c
+++ b/fs/bcachefs/fs.c
@@@ -188,8 -188,7 +188,8 @@@ static struct bch_inode_info *bch2_inod
         BUG_ON(!old);
   
         if (unlikely(old != inode)) {
- -              discard_new_inode(&inode->v);
+ +              __destroy_inode(&inode->v);
+ +              kmem_cache_free(bch2_inode_cache, inode);
                 inode = old;
         } else {
                 mutex_lock(&c->vfs_inodes_lock);
@@@ -226,10 -225,8 +226,10 @@@ static struct bch_inode_info *bch2_new_
   
         if (unlikely(!inode)) {
                 int ret = drop_locks_do(trans, (inode = to_bch_ei(new_inode(c->vfs_sb))) ? 0 : -ENOMEM);
- -              if (ret && inode)
- -                      discard_new_inode(&inode->v);
+ +              if (ret && inode) {
+ +                      __destroy_inode(&inode->v);
+ +                      kmem_cache_free(bch2_inode_cache, inode);
+ +              }
                 if (ret)
                         return ERR_PTR(ret);
         }
@@@ -844,6 -841,9 +844,9 @@@ static int bch2_getattr(struct mnt_idma
         stat->blksize   = block_bytes(c);
         stat->blocks    = inode->v.i_blocks;
   
+       stat->subvol    = inode->ei_subvol;
+       stat->result_mask |= STATX_SUBVOL;
+ 
         if (request_mask & STATX_BTIME) {
                 stat->result_mask |= STATX_BTIME;
                 stat->btime = bch2_time_to_timespec(c, inode->ei_inode.bi_otime);
@@@ -964,6 -964,7 +967,6 @@@ static int bch2_fiemap(struct inode *vi
         struct btree_iter iter;
         struct bkey_s_c k;
         struct bkey_buf cur, prev;
- -      struct bpos end = POS(ei->v.i_ino, (start + len) >> 9);
         unsigned offset_into_extent, sectors;
         bool have_extent = false;
         u32 snapshot;
@@@ -973,7 -974,6 +976,7 @@@
         if (ret)
                 return ret;
   
+ +      struct bpos end = POS(ei->v.i_ino, (start + len) >> 9);
         if (start + len < start)
                 return -EINVAL;
   
@@@ -2000,7 -2000,6 +2003,7 @@@ out
         return dget(sb->s_root);
   
   err_put_super:
+ +      __bch2_fs_stop(c);
         deactivate_locked_super(sb);
         return ERR_PTR(bch2_err_class(ret));
   }
diff --combined fs/btrfs/inode.c

index 7fed887e700c4e8e07b6ff7932434a384790b8da,8cf692c708d7955849220d403d8a3ec0dfd0b14a..f454ba34968350ec557a876f3e273be39b3ddc77
--- 1/fs/btrfs/inode.c
--- 2/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@@ -1145,13 -1145,13 +1145,13 @@@ static void submit_one_async_extent(str
                                    0, *alloc_hint, &ins, 1, 1);
         if (ret) {
                 /*
- -               * Here we used to try again by going back to non-compressed
- -               * path for ENOSPC.  But we can't reserve space even for
- -               * compressed size, how could it work for uncompressed size
- -               * which requires larger size?  So here we directly go error
- -               * path.
+ +               * We can't reserve contiguous space for the compressed size.
+ +               * Unlikely, but it's possible that we could have enough
+ +               * non-contiguous space for the uncompressed size instead.  So
+ +               * fall back to uncompressed.
                  */
- -              goto out_free;
+ +              submit_uncompressed_range(inode, async_extent, locked_page);
+ +              goto done;
         }
   
         /* Here we're doing allocation and writeback of the compressed pages */
@@@ -1203,6 -1203,7 +1203,6 @@@ done
   out_free_reserve:
         btrfs_dec_block_group_reservations(fs_info, ins.objectid);
         btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1);
- -out_free:
         mapping_set_error(inode->vfs_inode.i_mapping, -EIO);
         extent_clear_unlock_delalloc(inode, start, end,
                                      NULL, EXTENT_LOCKED | EXTENT_DELALLOC |
@@@ -2532,7 -2533,7 +2532,7 @@@ void btrfs_clear_delalloc_extent(struc
                  */
                 if (bits & EXTENT_CLEAR_META_RESV &&
                     root != fs_info->tree_root)
- -                      btrfs_delalloc_release_metadata(inode, len, false);
+ +                      btrfs_delalloc_release_metadata(inode, len, true);
   
                 /* For sanity tests. */
                 if (btrfs_is_testing(fs_info))
@@@ -4502,7 -4503,6 +4502,7 @@@ int btrfs_delete_subvolume(struct btrfs
         struct btrfs_trans_handle *trans;
         struct btrfs_block_rsv block_rsv;
         u64 root_flags;
+ +      u64 qgroup_reserved = 0;
         int ret;
   
         down_write(&fs_info->subvol_sem);
@@@ -4547,20 -4547,12 +4547,20 @@@
         ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, 5, true);
         if (ret)
                 goto out_undead;
+ +      qgroup_reserved = block_rsv.qgroup_rsv_reserved;
   
         trans = btrfs_start_transaction(root, 0);
         if (IS_ERR(trans)) {
                 ret = PTR_ERR(trans);
                 goto out_release;
         }
+ +      ret = btrfs_record_root_in_trans(trans, root);
+ +      if (ret) {
+ +              btrfs_abort_transaction(trans, ret);
+ +              goto out_end_trans;
+ +      }
+ +      btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved);
+ +      qgroup_reserved = 0;
         trans->block_rsv = &block_rsv;
         trans->bytes_reserved = block_rsv.size;
   
@@@ -4619,9 -4611,7 +4619,9 @@@ out_end_trans
         ret = btrfs_end_transaction(trans);
         inode->i_flags |= S_DEAD;
   out_release:
- -      btrfs_subvolume_release_metadata(root, &block_rsv);
+ +      btrfs_block_rsv_release(fs_info, &block_rsv, (u64)-1, NULL);
+ +      if (qgroup_reserved)
+ +              btrfs_qgroup_free_meta_prealloc(root, qgroup_reserved);
   out_undead:
         if (ret) {
                 spin_lock(&dest->root_item_lock);
@@@ -8789,6 -8779,9 +8789,9 @@@ static int btrfs_getattr(struct mnt_idm
         generic_fillattr(idmap, request_mask, inode, stat);
         stat->dev = BTRFS_I(inode)->root->anon_dev;
   
+       stat->subvol = BTRFS_I(inode)->root->root_key.objectid;
+       stat->result_mask |= STATX_SUBVOL;
+ 
         spin_lock(&BTRFS_I(inode)->lock);
         delalloc_bytes = BTRFS_I(inode)->new_delalloc_bytes;
         inode_bytes = inode_get_bytes(inode);
diff --combined fs/ext4/super.c

index 044135796f2b6ebe86e56b69f57501e7567d761b,2e906dbbe015de380b60cff297e4b087e16db284..3fce1b80c419588ef4d8874ec93f2e1c21cb1caf
--- 1/fs/ext4/super.c
--- 2/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@@ -1723,10 -1723,6 +1723,6 @@@ static const struct constant_table ext4
         {}
   };
   
- /* String parameter that allows empty argument */
- #define fsparam_string_empty(NAME, OPT) \
-       __fsparam(fs_param_is_string, NAME, OPT, fs_param_can_be_empty, NULL)
- 
   /*
    * Mount option specification
    * We don't use fsparam_flag_no because of the way we set the
@@@ -5668,7 -5664,7 +5664,7 @@@ failed_mount
         brelse(sbi->s_sbh);
         if (sbi->s_journal_bdev_file) {
                 invalidate_bdev(file_bdev(sbi->s_journal_bdev_file));
- -              fput(sbi->s_journal_bdev_file);
+ +              bdev_fput(sbi->s_journal_bdev_file);
         }
   out_fail:
         invalidate_bdev(sb->s_bdev);
@@@ -5913,7 -5909,7 +5909,7 @@@ static struct file *ext4_get_journal_bl
   out_bh:
         brelse(bh);
   out_bdev:
- -      fput(bdev_file);
+ +      bdev_fput(bdev_file);
         return ERR_PTR(errno);
   }
   
@@@ -5952,7 -5948,7 +5948,7 @@@ static journal_t *ext4_open_dev_journal
   out_journal:
         jbd2_journal_destroy(journal);
   out_bdev:
- -      fput(bdev_file);
+ +      bdev_fput(bdev_file);
         return ERR_PTR(errno);
   }
   
@@@ -7327,7 -7323,7 +7323,7 @@@ static void ext4_kill_sb(struct super_b
         kill_block_super(sb);
   
         if (bdev_file)
- -              fput(bdev_file);
+ +              bdev_fput(bdev_file);
   }
   
   static struct file_system_type ext4_fs_type = {
diff --combined fs/namei.c

index c5b2a25be7d048b613a11ef77b6f2f37ec1d0142,8c1ff0b388e2bbcd87455e68b807a24038275b5e..cb5dde0e309f7a2d27aae1d00d0f12da4064e92e
--- 1/fs/namei.c
--- 2/fs/namei.c
+++ b/fs/namei.c
@@@ -2422,6 -2422,14 +2422,14 @@@ static const char *path_init(struct nam
                 if (!f.file)
                         return ERR_PTR(-EBADF);
   
+               if (flags & LOOKUP_LINKAT_EMPTY) {
+                       if (f.file->f_cred != current_cred() &&
+                           !ns_capable(f.file->f_cred->user_ns, CAP_DAC_READ_SEARCH)) {
+                               fdput(f);
+                               return ERR_PTR(-ENOENT);
+                       }
+               }
+ 
                 dentry = f.file->f_path.dentry;
   
                 if (*s && unlikely(!d_can_lookup(dentry))) {
@@@ -4050,8 -4058,6 +4058,8 @@@ retry
                 case 0: case S_IFREG:
                         error = vfs_create(idmap, path.dentry->d_inode,
                                            dentry, mode, true);
+ +                      if (!error)
+ +                              security_path_post_mknod(idmap, dentry);
                         break;
                 case S_IFCHR: case S_IFBLK:
                         error = vfs_mknod(idmap, path.dentry->d_inode,
@@@ -4062,6 -4068,11 +4070,6 @@@
                                           dentry, mode, 0);
                         break;
         }
- -
- -      if (error)
- -              goto out2;
- -
- -      security_path_post_mknod(idmap, dentry);
   out2:
         done_path_create(&path, dentry);
         if (retry_estale(error, lookup_flags)) {
@@@ -4641,14 -4652,13 +4649,13 @@@ int do_linkat(int olddfd, struct filena
                 goto out_putnames;
         }
         /*
-        * To use null names we require CAP_DAC_READ_SEARCH
+        * To use null names we require CAP_DAC_READ_SEARCH or
+        * that the open-time creds of the dfd matches current.
          * This ensures that not everyone will be able to create
-        * handlink using the passed filedescriptor.
+        * a hardlink using the passed file descriptor.
          */
-       if (flags & AT_EMPTY_PATH && !capable(CAP_DAC_READ_SEARCH)) {
-               error = -ENOENT;
-               goto out_putnames;
-       }
+       if (flags & AT_EMPTY_PATH)
+               how |= LOOKUP_LINKAT_EMPTY;
   
         if (flags & AT_SYMLINK_FOLLOW)
                 how |= LOOKUP_FOLLOW;
diff --combined include/linux/fs.h

index 8dfd53b52744a4dfffb8ccb350364972658f00eb,5b351c1e6f58855bbe9eaebfca41f1cc8035ab15..de946a1fd845abe45c403791ae3c823720d9ca13
--- 1/include/linux/fs.h
--- 2/include/linux/fs.h
+++ b/include/linux/fs.h
@@@ -110,23 -110,24 +110,26 @@@ typedef int (dio_iodone_t)(struct kioc
    */
   
   /* file is open for reading */
- #define FMODE_READ            ((__force fmode_t)0x1)
+ #define FMODE_READ            ((__force fmode_t)(1 << 0))
   /* file is open for writing */
- #define FMODE_WRITE           ((__force fmode_t)0x2)
+ #define FMODE_WRITE           ((__force fmode_t)(1 << 1))
   /* file is seekable */
- #define FMODE_LSEEK           ((__force fmode_t)0x4)
+ #define FMODE_LSEEK           ((__force fmode_t)(1 << 2))
   /* file can be accessed using pread */
- #define FMODE_PREAD           ((__force fmode_t)0x8)
+ #define FMODE_PREAD           ((__force fmode_t)(1 << 3))
   /* file can be accessed using pwrite */
- #define FMODE_PWRITE          ((__force fmode_t)0x10)
+ #define FMODE_PWRITE          ((__force fmode_t)(1 << 4))
   /* File is opened for execution with sys_execve / sys_uselib */
- #define FMODE_EXEC            ((__force fmode_t)0x20)
+ #define FMODE_EXEC            ((__force fmode_t)(1 << 5))
+ +/* File writes are restricted (block device specific) */
- #define FMODE_WRITE_RESTRICTED  ((__force fmode_t)0x40)
++#define FMODE_WRITE_RESTRICTED        ((__force fmode_t)(1 << 6))
+ 
- -/* FMODE_* bits 6 to 8 */
++/* FMODE_* bits 7 to 8 */
+ 
   /* 32bit hashes as llseek() offset (for directories) */
- #define FMODE_32BITHASH         ((__force fmode_t)0x200)
+ #define FMODE_32BITHASH         ((__force fmode_t)(1 << 9))
   /* 64bit hashes as llseek() offset (for directories) */
- #define FMODE_64BITHASH         ((__force fmode_t)0x400)
+ #define FMODE_64BITHASH         ((__force fmode_t)(1 << 10))
   
   /*
    * Don't update ctime and mtime.
@@@ -134,60 -135,53 +137,53 @@@
    * Currently a special hack for the XFS open_by_handle ioctl, but we'll
    * hopefully graduate it to a proper O_CMTIME flag supported by open(2) soon.
    */
- #define FMODE_NOCMTIME                ((__force fmode_t)0x800)
+ #define FMODE_NOCMTIME                ((__force fmode_t)(1 << 11))
   
   /* Expect random access pattern */
- #define FMODE_RANDOM          ((__force fmode_t)0x1000)
+ #define FMODE_RANDOM          ((__force fmode_t)(1 << 12))
   
   /* File is huge (eg. /dev/mem): treat loff_t as unsigned */
- #define FMODE_UNSIGNED_OFFSET ((__force fmode_t)0x2000)
+ #define FMODE_UNSIGNED_OFFSET ((__force fmode_t)(1 << 13))
   
   /* File is opened with O_PATH; almost nothing can be done with it */
- #define FMODE_PATH            ((__force fmode_t)0x4000)
+ #define FMODE_PATH            ((__force fmode_t)(1 << 14))
   
   /* File needs atomic accesses to f_pos */
- #define FMODE_ATOMIC_POS      ((__force fmode_t)0x8000)
+ #define FMODE_ATOMIC_POS      ((__force fmode_t)(1 << 15))
   /* Write access to underlying fs */
- #define FMODE_WRITER          ((__force fmode_t)0x10000)
+ #define FMODE_WRITER          ((__force fmode_t)(1 << 16))
   /* Has read method(s) */
- #define FMODE_CAN_READ          ((__force fmode_t)0x20000)
+ #define FMODE_CAN_READ          ((__force fmode_t)(1 << 17))
   /* Has write method(s) */
- #define FMODE_CAN_WRITE         ((__force fmode_t)0x40000)
+ #define FMODE_CAN_WRITE         ((__force fmode_t)(1 << 18))
   
- #define FMODE_OPENED          ((__force fmode_t)0x80000)
- #define FMODE_CREATED         ((__force fmode_t)0x100000)
+ #define FMODE_OPENED          ((__force fmode_t)(1 << 19))
+ #define FMODE_CREATED         ((__force fmode_t)(1 << 20))
   
   /* File is stream-like */
- #define FMODE_STREAM          ((__force fmode_t)0x200000)
+ #define FMODE_STREAM          ((__force fmode_t)(1 << 21))
   
   /* File supports DIRECT IO */
- #define       FMODE_CAN_ODIRECT       ((__force fmode_t)0x400000)
+ #define       FMODE_CAN_ODIRECT       ((__force fmode_t)(1 << 22))
   
- #define       FMODE_NOREUSE           ((__force fmode_t)0x800000)
+ #define       FMODE_NOREUSE           ((__force fmode_t)(1 << 23))
   
- /* File supports non-exclusive O_DIRECT writes from multiple threads */
- #define FMODE_DIO_PARALLEL_WRITE      ((__force fmode_t)0x1000000)
+ /* FMODE_* bit 24 */
   
   /* File is embedded in backing_file object */
- #define FMODE_BACKING         ((__force fmode_t)0x2000000)
+ #define FMODE_BACKING         ((__force fmode_t)(1 << 25))
   
   /* File was opened by fanotify and shouldn't generate fanotify events */
- #define FMODE_NONOTIFY                ((__force fmode_t)0x4000000)
+ #define FMODE_NONOTIFY                ((__force fmode_t)(1 << 26))
   
   /* File is capable of returning -EAGAIN if I/O will block */
- #define FMODE_NOWAIT          ((__force fmode_t)0x8000000)
+ #define FMODE_NOWAIT          ((__force fmode_t)(1 << 27))
   
   /* File represents mount that needs unmounting */
- #define FMODE_NEED_UNMOUNT    ((__force fmode_t)0x10000000)
+ #define FMODE_NEED_UNMOUNT    ((__force fmode_t)(1 << 28))
   
   /* File does not contribute to nr_files count */
- #define FMODE_NOACCOUNT               ((__force fmode_t)0x20000000)
- 
- /* File supports async buffered reads */
- #define FMODE_BUF_RASYNC      ((__force fmode_t)0x40000000)
- 
- /* File supports async nowait buffered writes */
- #define FMODE_BUF_WASYNC      ((__force fmode_t)0x80000000)
+ #define FMODE_NOACCOUNT               ((__force fmode_t)(1 << 29))
   
   /*
    * Attribute flags.  These should be or-ed together to figure out what
@@@ -1035,12 -1029,13 +1031,13 @@@ struct file_handle 
         __u32 handle_bytes;
         int handle_type;
         /* file identifier */
-       unsigned char f_handle[];
+       unsigned char f_handle[] __counted_by(handle_bytes);
   };
   
   static inline struct file *get_file(struct file *f)
   {
-       atomic_long_inc(&f->f_count);
+       long prior = atomic_long_fetch_inc_relaxed(&f->f_count);
+       WARN_ONCE(!prior, "struct file::f_count incremented from zero; use-after-free condition present!\n");
         return f;
   }
   
@@@ -2003,8 -1998,11 +2000,11 @@@ struct iov_iter
   struct io_uring_cmd;
   struct offset_ctx;
   
+ typedef unsigned int __bitwise fop_flags_t;
+ 
   struct file_operations {
         struct module *owner;
+       fop_flags_t fop_flags;
         loff_t (*llseek) (struct file *, loff_t, int);
         ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
         ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
@@@ -2017,7 -2015,6 +2017,6 @@@
         long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
         long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
         int (*mmap) (struct file *, struct vm_area_struct *);
-       unsigned long mmap_supported_flags;
         int (*open) (struct inode *, struct file *);
         int (*flush) (struct file *, fl_owner_t id);
         int (*release) (struct inode *, struct file *);
@@@ -2048,6 -2045,17 +2047,17 @@@
                                 unsigned int poll_flags);
   } __randomize_layout;
   
+ /* Supports async buffered reads */
+ #define FOP_BUFFER_RASYNC     ((__force fop_flags_t)(1 << 0))
+ /* Supports async buffered writes */
+ #define FOP_BUFFER_WASYNC     ((__force fop_flags_t)(1 << 1))
+ /* Supports synchronous page faults for mappings */
+ #define FOP_MMAP_SYNC         ((__force fop_flags_t)(1 << 2))
+ /* Supports non-exclusive O_DIRECT writes from multiple threads */
+ #define FOP_DIO_PARALLEL_WRITE        ((__force fop_flags_t)(1 << 3))
+ /* Contains huge pages */
+ #define FOP_HUGE_PAGES                ((__force fop_flags_t)(1 << 4))
+ 
   /* Wrap a directory iterator that needs exclusive inode access */
   int wrap_directory_iterator(struct file *, struct dir_context *,
                             int (*) (struct file *, struct dir_context *));
@@@ -2253,7 -2261,13 +2263,13 @@@ static inline bool sb_rdonly(const stru
   
   #define IS_DEADDIR(inode)     ((inode)->i_flags & S_DEAD)
   #define IS_NOCMTIME(inode)    ((inode)->i_flags & S_NOCMTIME)
+ 
+ #ifdef CONFIG_SWAP
   #define IS_SWAPFILE(inode)    ((inode)->i_flags & S_SWAPFILE)
+ #else
+ #define IS_SWAPFILE(inode)    ((void)(inode), 0U)
+ #endif
+ 
   #define IS_PRIVATE(inode)     ((inode)->i_flags & S_PRIVATE)
   #define IS_IMA(inode)         ((inode)->i_flags & S_IMA)
   #define IS_AUTOMOUNT(inode)   ((inode)->i_flags & S_AUTOMOUNT)
@@@ -3340,6 -3354,8 +3356,8 @@@ void simple_offset_init(struct offset_c
   int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry);
   void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry);
   int simple_offset_empty(struct dentry *dentry);
+ int simple_offset_rename(struct inode *old_dir, struct dentry *old_dentry,
+                        struct inode *new_dir, struct dentry *new_dentry);
   int simple_offset_rename_exchange(struct inode *old_dir,
                                   struct dentry *old_dentry,
                                   struct inode *new_dir,
diff --combined io_uring/io_uring.c

index c170a2b8d2cf21f06d1c5af8bf57edecb94aaa95,d73c9ad2d2f89b548ca2719b6c70135bf6142b3b..380b9ce1d30144c9a5f7ded984b45d03f90bc813
--- 1/io_uring/io_uring.c
--- 2/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@@ -147,7 -147,6 +147,7 @@@ static bool io_uring_try_cancel_request
   static void io_queue_sqe(struct io_kiocb *req);
   
   struct kmem_cache *req_cachep;
+ +static struct workqueue_struct *iou_wq __ro_after_init;
   
   static int __read_mostly sysctl_io_uring_disabled;
   static int __read_mostly sysctl_io_uring_group = -1;
@@@ -351,6 -350,7 +351,6 @@@ static __cold struct io_ring_ctx *io_ri
   err:
         kfree(ctx->cancel_table.hbs);
         kfree(ctx->cancel_table_locked.hbs);
- -      kfree(ctx->io_bl);
         xa_destroy(&ctx->io_bl_xa);
         kfree(ctx);
         return NULL;
@@@ -471,7 -471,7 +471,7 @@@ static void io_prep_async_work(struct i
   
                 /* don't serialize this request if the fs doesn't need it */
                 if (should_hash && (req->file->f_flags & O_DIRECT) &&
-                   (req->file->f_mode & FMODE_DIO_PARALLEL_WRITE))
+                   (req->file->f_op->fop_flags & FOP_DIO_PARALLEL_WRITE))
                         should_hash = false;
                 if (should_hash || (ctx->flags & IORING_SETUP_IOPOLL))
                         io_wq_hash_work(&req->work, file_inode(req->file));
@@@ -1982,15 -1982,10 +1982,15 @@@ fail
                 err = -EBADFD;
                 if (!io_file_can_poll(req))
                         goto fail;
- -              err = -ECANCELED;
- -              if (io_arm_poll_handler(req, issue_flags) != IO_APOLL_OK)
- -                      goto fail;
- -              return;
+ +              if (req->file->f_flags & O_NONBLOCK ||
+ +                  req->file->f_mode & FMODE_NOWAIT) {
+ +                      err = -ECANCELED;
+ +                      if (io_arm_poll_handler(req, issue_flags) != IO_APOLL_OK)
+ +                              goto fail;
+ +                      return;
+ +              } else {
+ +                      req->flags &= ~REQ_F_APOLL_MULTISHOT;
+ +              }
         }
   
         if (req->flags & REQ_F_FORCE_ASYNC) {
@@@ -2602,6 -2597,19 +2602,6 @@@ static int io_cqring_wait(struct io_rin
         if (__io_cqring_events_user(ctx) >= min_events)
                 return 0;
   
- -      if (sig) {
- -#ifdef CONFIG_COMPAT
- -              if (in_compat_syscall())
- -                      ret = set_compat_user_sigmask((const compat_sigset_t __user *)sig,
- -                                                    sigsz);
- -              else
- -#endif
- -                      ret = set_user_sigmask(sig, sigsz);
- -
- -              if (ret)
- -                      return ret;
- -      }
- -
         init_waitqueue_func_entry(&iowq.wq, io_wake_function);
         iowq.wq.private = current;
         INIT_LIST_HEAD(&iowq.wq.entry);
@@@ -2620,19 -2628,6 +2620,19 @@@
                 io_napi_adjust_timeout(ctx, &iowq, &ts);
         }
   
+ +      if (sig) {
+ +#ifdef CONFIG_COMPAT
+ +              if (in_compat_syscall())
+ +                      ret = set_compat_user_sigmask((const compat_sigset_t __user *)sig,
+ +                                                    sigsz);
+ +              else
+ +#endif
+ +                      ret = set_user_sigmask(sig, sigsz);
+ +
+ +              if (ret)
+ +                      return ret;
+ +      }
+ +
         io_napi_busy_loop(ctx, &iowq);
   
         trace_io_uring_cqring_wait(ctx, min_events);
@@@ -2931,6 -2926,7 +2931,6 @@@ static __cold void io_ring_ctx_free(str
         io_napi_free(ctx);
         kfree(ctx->cancel_table.hbs);
         kfree(ctx->cancel_table_locked.hbs);
- -      kfree(ctx->io_bl);
         xa_destroy(&ctx->io_bl_xa);
         kfree(ctx);
   }
@@@ -3165,7 -3161,7 +3165,7 @@@ static __cold void io_ring_ctx_wait_and
          * noise and overhead, there's no discernable change in runtime
          * over using system_wq.
          */
- -      queue_work(system_unbound_wq, &ctx->exit_work);
+ +      queue_work(iou_wq, &ctx->exit_work);
   }
   
   static int io_uring_release(struct inode *inode, struct file *file)
@@@ -3447,15 -3443,14 +3447,15 @@@ static void *io_uring_validate_mmap_req
                 ptr = ctx->sq_sqes;
                 break;
         case IORING_OFF_PBUF_RING: {
+ +              struct io_buffer_list *bl;
                 unsigned int bgid;
   
                 bgid = (offset & ~IORING_OFF_MMAP_MASK) >> IORING_OFF_PBUF_SHIFT;
- -              rcu_read_lock();
- -              ptr = io_pbuf_get_address(ctx, bgid);
- -              rcu_read_unlock();
- -              if (!ptr)
- -                      return ERR_PTR(-EINVAL);
+ +              bl = io_pbuf_get_bl(ctx, bgid);
+ +              if (IS_ERR(bl))
+ +                      return bl;
+ +              ptr = bl->buf_ring;
+ +              io_put_bl(ctx, bl);
                 break;
                 }
         default:
@@@ -4190,8 -4185,6 +4190,8 @@@ static int __init io_uring_init(void
         io_buf_cachep = KMEM_CACHE(io_buffer,
                                           SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT);
   
+ +      iou_wq = alloc_workqueue("iou_exit", WQ_UNBOUND, 64);
+ +
   #ifdef CONFIG_SYSCTL
         register_sysctl_init("kernel", kernel_io_uring_disabled_table);
   #endif
diff --combined io_uring/rw.c

index c8d48287439e5a06d19113ecb07f1c05db47dc3f,d9dfde1142a1cbcb061abcfc89814f33f3c41b92..2382116aa4b20e99918ede2e40a9066ad619237f
--- 1/io_uring/rw.c
--- 2/io_uring/rw.c
+++ b/io_uring/rw.c
@@@ -683,7 -683,8 +683,8 @@@ static bool io_rw_should_retry(struct i
          * just use poll if we can, and don't attempt if the fs doesn't
          * support callback based unlocks
          */
-       if (io_file_can_poll(req) || !(req->file->f_mode & FMODE_BUF_RASYNC))
+       if (io_file_can_poll(req) ||
+           !(req->file->f_op->fop_flags & FOP_BUFFER_RASYNC))
                 return false;
   
         wait->wait.func = io_async_buf_func;
@@@ -936,13 -937,6 +937,13 @@@ int io_read_mshot(struct io_kiocb *req
   
         ret = __io_read(req, issue_flags);
   
+ +      /*
+ +       * If the file doesn't support proper NOWAIT, then disable multishot
+ +       * and stay in single shot mode.
+ +       */
+ +      if (!io_file_supports_nowait(req))
+ +              req->flags &= ~REQ_F_APOLL_MULTISHOT;
+ +
         /*
          * If we get -EAGAIN, recycle our buffer and just let normal poll
          * handling arm it.
@@@ -962,7 -956,7 +963,7 @@@
         /*
          * Any successful return value will keep the multishot read armed.
          */
- -      if (ret > 0) {
+ +      if (ret > 0 && req->flags & REQ_F_APOLL_MULTISHOT) {
                 /*
                  * Put our buffer and post a CQE. If we fail to post a CQE, then
                  * jump to the termination path. This request is then done.
@@@ -1029,10 -1023,10 +1030,10 @@@ int io_write(struct io_kiocb *req, unsi
                 if (unlikely(!io_file_supports_nowait(req)))
                         goto copy_iov;
   
-               /* File path supports NOWAIT for non-direct_IO only for block devices. */
+               /* Check if we can support NOWAIT. */
                 if (!(kiocb->ki_flags & IOCB_DIRECT) &&
-                       !(kiocb->ki_filp->f_mode & FMODE_BUF_WASYNC) &&
-                       (req->flags & REQ_F_ISREG))
+                   !(req->file->f_op->fop_flags & FOP_BUFFER_WASYNC) &&
+                   (req->flags & REQ_F_ISREG))
                         goto copy_iov;
   
                 kiocb->ki_flags |= IOCB_NOWAIT;
diff --combined mm/shmem.c

index 94ab99b6b574a461e34bb875fdec497ad24728ce,c0fb65223963d5a2d0c69980a81853c40ebccf86..1f84a41aeb850ebe4da955f81c044ad11dadaf9b
--- 1/mm/shmem.c
--- 2/mm/shmem.c
+++ b/mm/shmem.c
@@@ -748,6 -748,12 +748,6 @@@ static long shmem_unused_huge_count(str
   
   #define shmem_huge SHMEM_HUGE_DENY
   
- -bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force,
- -                 struct mm_struct *mm, unsigned long vm_flags)
- -{
- -      return false;
- -}
- -
   static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
                 struct shrink_control *sc, unsigned long nr_to_split)
   {
@@@ -3467,8 -3473,7 +3467,7 @@@ static int shmem_rename2(struct mnt_idm
                         return error;
         }
   
-       simple_offset_remove(shmem_get_offset_ctx(old_dir), old_dentry);
-       error = simple_offset_add(shmem_get_offset_ctx(new_dir), old_dentry);
+       error = simple_offset_rename(old_dir, old_dentry, new_dir, new_dentry);
         if (error)
                 return error;
author	Linus Torvalds <[email protected]>
	Mon, 13 May 2024 18:40:06 +0000 (11:40 -0700)
committer	Linus Torvalds <[email protected]>
	Mon, 13 May 2024 18:40:06 +0000 (11:40 -0700)
		1	2
block/bdev.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/aio.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/bcachefs/fs.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/inode.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/super.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/namei.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/fs.h	patch \|	diff1 \|	diff2 \|	blob \| history
io_uring/io_uring.c	patch \|	diff1 \|	diff2 \|	blob \| history
io_uring/rw.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/shmem.c	patch \|	diff1 \|	diff2 \|	blob \| history