Merge tag 'pull-revalidate' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

author Linus Torvalds <[email protected]>

Thu, 30 Jan 2025 17:13:35 +0000 (09:13 -0800)

committer Linus Torvalds <[email protected]>

Thu, 30 Jan 2025 17:13:35 +0000 (09:13 -0800)
author Linus Torvalds <[email protected]>
Thu, 30 Jan 2025 17:13:35 +0000 (09:13 -0800)
committer Linus Torvalds <[email protected]>
Thu, 30 Jan 2025 17:13:35 +0000 (09:13 -0800)
diff --combined Documentation/filesystems/porting.rst

index c1c1210552047cf4412256b604bee1b278c76877,568e7ea3c4aea3cae4c561de709515db345f82ae..1639e78e314696527d62794d00d73c13f7850f97
--- 1/Documentation/filesystems/porting.rst
--- 2/Documentation/filesystems/porting.rst
+++ b/Documentation/filesystems/porting.rst
@@@ -313,7 -313,7 +313,7 @@@ done
   
   **mandatory**
   
- -block truncatation on error exit from ->write_begin, and ->direct_IO
+ +block truncation on error exit from ->write_begin, and ->direct_IO
   moved from generic methods (block_write_begin, cont_write_begin,
   nobh_write_begin, blockdev_direct_IO*) to callers.  Take a look at
   ext2_write_failed and callers for an example.
@@@ -1141,3 -1141,19 +1141,19 @@@ pointer are gone
   
   set_blocksize() takes opened struct file instead of struct block_device now
   and it *must* be opened exclusive.
+ 
+ ---
+ 
+ ** mandatory**
+ 
+ ->d_revalidate() gets two extra arguments - inode of parent directory and
+ name our dentry is expected to have.  Both are stable (dir is pinned in
+ non-RCU case and will stay around during the call in RCU case, and name
+ is guaranteed to stay unchanging).  Your instance doesn't have to use
+ either, but it often helps to avoid a lot of painful boilerplate.
+ Note that while name->name is stable and NUL-terminated, it may (and
+ often will) have name->name[name->len] equal to '/' rather than '\0' -
+ in normal case it points into the pathname being looked up.
+ NOTE: if you need something like full path from the root of filesystem,
+ you are still on your own - this assists with simple cases, but it's not
+ magic.
diff --combined fs/afs/dir.c

index a843c36fc471268502ee68688c555fcee5e7bd30,e04cffe4beb11fc9d08c857b9b589c2ef0f64b11..02cbf38e1a7762b95a97a31e4dcd1d8335c2df75
--- 1/fs/afs/dir.c
--- 2/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@@ -13,7 -13,6 +13,7 @@@
   #include <linux/ctype.h>
   #include <linux/sched.h>
   #include <linux/iversion.h>
+ +#include <linux/iov_iter.h>
   #include <linux/task_io_accounting_ops.h>
   #include "internal.h"
   #include "afs_fs.h"
@@@ -23,7 -22,8 +23,8 @@@ static struct dentry *afs_lookup(struc
                                  unsigned int flags);
   static int afs_dir_open(struct inode *inode, struct file *file);
   static int afs_readdir(struct file *file, struct dir_context *ctx);
- static int afs_d_revalidate(struct dentry *dentry, unsigned int flags);
+ static int afs_d_revalidate(struct inode *dir, const struct qstr *name,
+                           struct dentry *dentry, unsigned int flags);
   static int afs_d_delete(const struct dentry *dentry);
   static void afs_d_iput(struct dentry *dentry, struct inode *inode);
   static bool afs_lookup_one_filldir(struct dir_context *ctx, const char *name, int nlen,
@@@ -43,6 -43,15 +44,6 @@@ static int afs_symlink(struct mnt_idma
   static int afs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
                       struct dentry *old_dentry, struct inode *new_dir,
                       struct dentry *new_dentry, unsigned int flags);
- -static bool afs_dir_release_folio(struct folio *folio, gfp_t gfp_flags);
- -static void afs_dir_invalidate_folio(struct folio *folio, size_t offset,
- -                                 size_t length);
- -
- -static bool afs_dir_dirty_folio(struct address_space *mapping,
- -              struct folio *folio)
- -{
- -      BUG(); /* This should never happen. */
- -}
   
   const struct file_operations afs_dir_file_operations = {
         .open           = afs_dir_open,
@@@ -67,7 -76,10 +68,7 @@@ const struct inode_operations afs_dir_i
   };
   
   const struct address_space_operations afs_dir_aops = {
- -      .dirty_folio    = afs_dir_dirty_folio,
- -      .release_folio  = afs_dir_release_folio,
- -      .invalidate_folio = afs_dir_invalidate_folio,
- -      .migrate_folio  = filemap_migrate_folio,
+ +      .writepages     = afs_single_writepages,
   };
   
   const struct dentry_operations afs_fs_dentry_operations = {
@@@ -88,124 -100,152 +89,124 @@@ struct afs_lookup_one_cookie 
   struct afs_lookup_cookie {
         struct dir_context      ctx;
         struct qstr             name;
- -      bool                    found;
- -      bool                    one_only;
         unsigned short          nr_fids;
         struct afs_fid          fids[50];
   };
   
+ +static void afs_dir_unuse_cookie(struct afs_vnode *dvnode, int ret)
+ +{
+ +      if (ret == 0) {
+ +              struct afs_vnode_cache_aux aux;
+ +              loff_t i_size = i_size_read(&dvnode->netfs.inode);
+ +
+ +              afs_set_cache_aux(dvnode, &aux);
+ +              fscache_unuse_cookie(afs_vnode_cache(dvnode), &aux, &i_size);
+ +      } else {
+ +              fscache_unuse_cookie(afs_vnode_cache(dvnode), NULL, NULL);
+ +      }
+ +}
+ +
   /*
- - * Drop the refs that we're holding on the folios we were reading into.  We've
- - * got refs on the first nr_pages pages.
+ + * Iterate through a kmapped directory segment, dumping a summary of
+ + * the contents.
    */
- -static void afs_dir_read_cleanup(struct afs_read *req)
+ +static size_t afs_dir_dump_step(void *iter_base, size_t progress, size_t len,
+ +                              void *priv, void *priv2)
   {
- -      struct address_space *mapping = req->vnode->netfs.inode.i_mapping;
- -      struct folio *folio;
- -      pgoff_t last = req->nr_pages - 1;
+ +      do {
+ +              union afs_xdr_dir_block *block = iter_base;
   
- -      XA_STATE(xas, &mapping->i_pages, 0);
+ +              pr_warn("[%05zx] %32phN\n", progress, block);
+ +              iter_base += AFS_DIR_BLOCK_SIZE;
+ +              progress += AFS_DIR_BLOCK_SIZE;
+ +              len -= AFS_DIR_BLOCK_SIZE;
+ +      } while (len > 0);
   
- -      if (unlikely(!req->nr_pages))
- -              return;
+ +      return len;
+ +}
   
- -      rcu_read_lock();
- -      xas_for_each(&xas, folio, last) {
- -              if (xas_retry(&xas, folio))
- -                      continue;
- -              BUG_ON(xa_is_value(folio));
- -              ASSERTCMP(folio->mapping, ==, mapping);
+ +/*
+ + * Dump the contents of a directory.
+ + */
+ +static void afs_dir_dump(struct afs_vnode *dvnode)
+ +{
+ +      struct iov_iter iter;
+ +      unsigned long long i_size = i_size_read(&dvnode->netfs.inode);
   
- -              folio_put(folio);
- -      }
+ +      pr_warn("DIR %llx:%llx is=%llx\n",
+ +              dvnode->fid.vid, dvnode->fid.vnode, i_size);
   
- -      rcu_read_unlock();
+ +      iov_iter_folio_queue(&iter, ITER_SOURCE, dvnode->directory, 0, 0, i_size);
+ +      iterate_folioq(&iter, iov_iter_count(&iter), NULL, NULL,
+ +                     afs_dir_dump_step);
   }
   
   /*
    * check that a directory folio is valid
    */
- -static bool afs_dir_check_folio(struct afs_vnode *dvnode, struct folio *folio,
- -                              loff_t i_size)
+ +static bool afs_dir_check_block(struct afs_vnode *dvnode, size_t progress,
+ +                              union afs_xdr_dir_block *block)
   {
- -      union afs_xdr_dir_block *block;
- -      size_t offset, size;
- -      loff_t pos;
+ +      if (block->hdr.magic != AFS_DIR_MAGIC) {
+ +              pr_warn("%s(%lx): [%zx] bad magic %04x\n",
+ +                     __func__, dvnode->netfs.inode.i_ino,
+ +                     progress, ntohs(block->hdr.magic));
+ +              trace_afs_dir_check_failed(dvnode, progress);
+ +              trace_afs_file_error(dvnode, -EIO, afs_file_error_dir_bad_magic);
+ +              return false;
+ +      }
   
- -      /* Determine how many magic numbers there should be in this folio, but
- -       * we must take care because the directory may change size under us.
+ +      /* Make sure each block is NUL terminated so we can reasonably
+ +       * use string functions on it.  The filenames in the folio
+ +       * *should* be NUL-terminated anyway.
          */
- -      pos = folio_pos(folio);
- -      if (i_size <= pos)
- -              goto checked;
- -
- -      size = min_t(loff_t, folio_size(folio), i_size - pos);
- -      for (offset = 0; offset < size; offset += sizeof(*block)) {
- -              block = kmap_local_folio(folio, offset);
- -              if (block->hdr.magic != AFS_DIR_MAGIC) {
- -                      printk("kAFS: %s(%lx): [%llx] bad magic %zx/%zx is %04hx\n",
- -                             __func__, dvnode->netfs.inode.i_ino,
- -                             pos, offset, size, ntohs(block->hdr.magic));
- -                      trace_afs_dir_check_failed(dvnode, pos + offset, i_size);
- -                      kunmap_local(block);
- -                      trace_afs_file_error(dvnode, -EIO, afs_file_error_dir_bad_magic);
- -                      goto error;
- -              }
- -
- -              /* Make sure each block is NUL terminated so we can reasonably
- -               * use string functions on it.  The filenames in the folio
- -               * *should* be NUL-terminated anyway.
- -               */
- -              ((u8 *)block)[AFS_DIR_BLOCK_SIZE - 1] = 0;
- -
- -              kunmap_local(block);
- -      }
- -checked:
+ +      ((u8 *)block)[AFS_DIR_BLOCK_SIZE - 1] = 0;
         afs_stat_v(dvnode, n_read_dir);
         return true;
- -
- -error:
- -      return false;
   }
   
   /*
- - * Dump the contents of a directory.
+ + * Iterate through a kmapped directory segment, checking the content.
    */
- -static void afs_dir_dump(struct afs_vnode *dvnode, struct afs_read *req)
+ +static size_t afs_dir_check_step(void *iter_base, size_t progress, size_t len,
+ +                               void *priv, void *priv2)
   {
- -      union afs_xdr_dir_block *block;
- -      struct address_space *mapping = dvnode->netfs.inode.i_mapping;
- -      struct folio *folio;
- -      pgoff_t last = req->nr_pages - 1;
- -      size_t offset, size;
- -
- -      XA_STATE(xas, &mapping->i_pages, 0);
- -
- -      pr_warn("DIR %llx:%llx f=%llx l=%llx al=%llx\n",
- -              dvnode->fid.vid, dvnode->fid.vnode,
- -              req->file_size, req->len, req->actual_len);
- -      pr_warn("DIR %llx %x %zx %zx\n",
- -              req->pos, req->nr_pages,
- -              req->iter->iov_offset,  iov_iter_count(req->iter));
- -
- -      xas_for_each(&xas, folio, last) {
- -              if (xas_retry(&xas, folio))
- -                      continue;
+ +      struct afs_vnode *dvnode = priv;
   
- -              BUG_ON(folio->mapping != mapping);
+ +      if (WARN_ON_ONCE(progress % AFS_DIR_BLOCK_SIZE ||
+ +                       len % AFS_DIR_BLOCK_SIZE))
+ +              return len;
   
- -              size = min_t(loff_t, folio_size(folio), req->actual_len - folio_pos(folio));
- -              for (offset = 0; offset < size; offset += sizeof(*block)) {
- -                      block = kmap_local_folio(folio, offset);
- -                      pr_warn("[%02lx] %32phN\n", folio->index + offset, block);
- -                      kunmap_local(block);
- -              }
- -      }
+ +      do {
+ +              if (!afs_dir_check_block(dvnode, progress, iter_base))
+ +                      break;
+ +              iter_base += AFS_DIR_BLOCK_SIZE;
+ +              len -= AFS_DIR_BLOCK_SIZE;
+ +      } while (len > 0);
+ +
+ +      return len;
   }
   
   /*
- - * Check all the blocks in a directory.  All the folios are held pinned.
+ + * Check all the blocks in a directory.
    */
- -static int afs_dir_check(struct afs_vnode *dvnode, struct afs_read *req)
+ +static int afs_dir_check(struct afs_vnode *dvnode)
   {
- -      struct address_space *mapping = dvnode->netfs.inode.i_mapping;
- -      struct folio *folio;
- -      pgoff_t last = req->nr_pages - 1;
- -      int ret = 0;
+ +      struct iov_iter iter;
+ +      unsigned long long i_size = i_size_read(&dvnode->netfs.inode);
+ +      size_t checked = 0;
   
- -      XA_STATE(xas, &mapping->i_pages, 0);
- -
- -      if (unlikely(!req->nr_pages))
+ +      if (unlikely(!i_size))
                 return 0;
   
- -      rcu_read_lock();
- -      xas_for_each(&xas, folio, last) {
- -              if (xas_retry(&xas, folio))
- -                      continue;
- -
- -              BUG_ON(folio->mapping != mapping);
- -
- -              if (!afs_dir_check_folio(dvnode, folio, req->actual_len)) {
- -                      afs_dir_dump(dvnode, req);
- -                      ret = -EIO;
- -                      break;
- -              }
+ +      iov_iter_folio_queue(&iter, ITER_SOURCE, dvnode->directory, 0, 0, i_size);
+ +      checked = iterate_folioq(&iter, iov_iter_count(&iter), dvnode, NULL,
+ +                               afs_dir_check_step);
+ +      if (checked != i_size) {
+ +              afs_dir_dump(dvnode);
+ +              return -EIO;
         }
- -
- -      rcu_read_unlock();
- -      return ret;
+ +      return 0;
   }
   
   /*
@@@ -225,140 -265,134 +226,140 @@@ static int afs_dir_open(struct inode *i
   }
   
   /*
- - * Read the directory into the pagecache in one go, scrubbing the previous
- - * contents.  The list of folios is returned, pinning them so that they don't
- - * get reclaimed during the iteration.
+ + * Read a file in a single download.
    */
- -static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key)
- -      __acquires(&dvnode->validate_lock)
+ +static ssize_t afs_do_read_single(struct afs_vnode *dvnode, struct file *file)
   {
- -      struct address_space *mapping = dvnode->netfs.inode.i_mapping;
- -      struct afs_read *req;
+ +      struct iov_iter iter;
+ +      ssize_t ret;
         loff_t i_size;
- -      int nr_pages, i;
- -      int ret;
- -      loff_t remote_size = 0;
- -
- -      _enter("");
+ +      bool is_dir = (S_ISDIR(dvnode->netfs.inode.i_mode) &&
+ +                     !test_bit(AFS_VNODE_MOUNTPOINT, &dvnode->flags));
   
- -      req = kzalloc(sizeof(*req), GFP_KERNEL);
- -      if (!req)
- -              return ERR_PTR(-ENOMEM);
- -
- -      refcount_set(&req->usage, 1);
- -      req->vnode = dvnode;
- -      req->key = key_get(key);
- -      req->cleanup = afs_dir_read_cleanup;
- -
- -expand:
         i_size = i_size_read(&dvnode->netfs.inode);
- -      if (i_size < remote_size)
- -          i_size = remote_size;
- -      if (i_size < 2048) {
- -              ret = afs_bad(dvnode, afs_file_error_dir_small);
- -              goto error;
- -      }
- -      if (i_size > 2048 * 1024) {
- -              trace_afs_file_error(dvnode, -EFBIG, afs_file_error_dir_big);
- -              ret = -EFBIG;
- -              goto error;
+ +      if (is_dir) {
+ +              if (i_size < AFS_DIR_BLOCK_SIZE)
+ +                      return afs_bad(dvnode, afs_file_error_dir_small);
+ +              if (i_size > AFS_DIR_BLOCK_SIZE * 1024) {
+ +                      trace_afs_file_error(dvnode, -EFBIG, afs_file_error_dir_big);
+ +                      return -EFBIG;
+ +              }
+ +      } else {
+ +              if (i_size > AFSPATHMAX) {
+ +                      trace_afs_file_error(dvnode, -EFBIG, afs_file_error_dir_big);
+ +                      return -EFBIG;
+ +              }
         }
   
- -      _enter("%llu", i_size);
+ +      /* Expand the storage.  TODO: Shrink the storage too. */
+ +      if (dvnode->directory_size < i_size) {
+ +              size_t cur_size = dvnode->directory_size;
   
- -      nr_pages = (i_size + PAGE_SIZE - 1) / PAGE_SIZE;
+ +              ret = netfs_alloc_folioq_buffer(NULL,
+ +                                              &dvnode->directory, &cur_size, i_size,
+ +                                              mapping_gfp_mask(dvnode->netfs.inode.i_mapping));
+ +              dvnode->directory_size = cur_size;
+ +              if (ret < 0)
+ +                      return ret;
+ +      }
   
- -      req->actual_len = i_size; /* May change */
- -      req->len = nr_pages * PAGE_SIZE; /* We can ask for more than there is */
- -      req->data_version = dvnode->status.data_version; /* May change */
- -      iov_iter_xarray(&req->def_iter, ITER_DEST, &dvnode->netfs.inode.i_mapping->i_pages,
- -                      0, i_size);
- -      req->iter = &req->def_iter;
+ +      iov_iter_folio_queue(&iter, ITER_DEST, dvnode->directory, 0, 0, dvnode->directory_size);
   
- -      /* Fill in any gaps that we might find where the memory reclaimer has
- -       * been at work and pin all the folios.  If there are any gaps, we will
- -       * need to reread the entire directory contents.
+ +      /* AFS requires us to perform the read of a directory synchronously as
+ +       * a single unit to avoid issues with the directory contents being
+ +       * changed between reads.
          */
- -      i = req->nr_pages;
- -      while (i < nr_pages) {
- -              struct folio *folio;
- -
- -              folio = filemap_get_folio(mapping, i);
- -              if (IS_ERR(folio)) {
- -                      if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
- -                              afs_stat_v(dvnode, n_inval);
- -                      folio = __filemap_get_folio(mapping,
- -                                                  i, FGP_LOCK | FGP_CREAT,
- -                                                  mapping->gfp_mask);
- -                      if (IS_ERR(folio)) {
- -                              ret = PTR_ERR(folio);
- -                              goto error;
- -                      }
- -                      folio_attach_private(folio, (void *)1);
- -                      folio_unlock(folio);
+ +      ret = netfs_read_single(&dvnode->netfs.inode, file, &iter);
+ +      if (ret >= 0) {
+ +              i_size = i_size_read(&dvnode->netfs.inode);
+ +              if (i_size > ret) {
+ +                      /* The content has grown, so we need to expand the
+ +                       * buffer.
+ +                       */
+ +                      ret = -ESTALE;
+ +              } else if (is_dir) {
+ +                      int ret2 = afs_dir_check(dvnode);
+ +
+ +                      if (ret2 < 0)
+ +                              ret = ret2;
+ +              } else if (i_size < folioq_folio_size(dvnode->directory, 0)) {
+ +                      /* NUL-terminate a symlink. */
+ +                      char *symlink = kmap_local_folio(folioq_folio(dvnode->directory, 0), 0);
+ +
+ +                      symlink[i_size] = 0;
+ +                      kunmap_local(symlink);
                 }
- -
- -              req->nr_pages += folio_nr_pages(folio);
- -              i += folio_nr_pages(folio);
         }
   
- -      /* If we're going to reload, we need to lock all the pages to prevent
- -       * races.
- -       */
+ +      return ret;
+ +}
+ +
+ +ssize_t afs_read_single(struct afs_vnode *dvnode, struct file *file)
+ +{
+ +      ssize_t ret;
+ +
+ +      fscache_use_cookie(afs_vnode_cache(dvnode), false);
+ +      ret = afs_do_read_single(dvnode, file);
+ +      fscache_unuse_cookie(afs_vnode_cache(dvnode), NULL, NULL);
+ +      return ret;
+ +}
+ +
+ +/*
+ + * Read the directory into a folio_queue buffer in one go, scrubbing the
+ + * previous contents.  We return -ESTALE if the caller needs to call us again.
+ + */
+ +ssize_t afs_read_dir(struct afs_vnode *dvnode, struct file *file)
+ +      __acquires(&dvnode->validate_lock)
+ +{
+ +      ssize_t ret;
+ +      loff_t i_size;
+ +
+ +      i_size = i_size_read(&dvnode->netfs.inode);
+ +
         ret = -ERESTARTSYS;
         if (down_read_killable(&dvnode->validate_lock) < 0)
                 goto error;
   
- -      if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
- -              goto success;
+ +      /* We only need to reread the data if it became invalid - or if we
+ +       * haven't read it yet.
+ +       */
+ +      if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) &&
+ +          test_bit(AFS_VNODE_DIR_READ, &dvnode->flags)) {
+ +              ret = i_size;
+ +              goto valid;
+ +      }
   
         up_read(&dvnode->validate_lock);
         if (down_write_killable(&dvnode->validate_lock) < 0)
                 goto error;
   
- -      if (!test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) {
- -              trace_afs_reload_dir(dvnode);
- -              ret = afs_fetch_data(dvnode, req);
- -              if (ret < 0)
- -                      goto error_unlock;
- -
- -              task_io_account_read(PAGE_SIZE * req->nr_pages);
- -
- -              if (req->len < req->file_size) {
- -                      /* The content has grown, so we need to expand the
- -                       * buffer.
- -                       */
- -                      up_write(&dvnode->validate_lock);
- -                      remote_size = req->file_size;
- -                      goto expand;
- -              }
+ +      if (!test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
+ +              afs_invalidate_cache(dvnode, 0);
   
- -              /* Validate the data we just read. */
- -              ret = afs_dir_check(dvnode, req);
+ +      if (!test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) ||
+ +          !test_bit(AFS_VNODE_DIR_READ, &dvnode->flags)) {
+ +              trace_afs_reload_dir(dvnode);
+ +              ret = afs_read_single(dvnode, file);
                 if (ret < 0)
                         goto error_unlock;
   
                 // TODO: Trim excess pages
   
                 set_bit(AFS_VNODE_DIR_VALID, &dvnode->flags);
+ +              set_bit(AFS_VNODE_DIR_READ, &dvnode->flags);
+ +      } else {
+ +              ret = i_size;
         }
   
         downgrade_write(&dvnode->validate_lock);
- -success:
- -      return req;
+ +valid:
+ +      return ret;
   
   error_unlock:
         up_write(&dvnode->validate_lock);
   error:
- -      afs_put_read(req);
- -      _leave(" = %d", ret);
- -      return ERR_PTR(ret);
+ +      _leave(" = %zd", ret);
+ +      return ret;
   }
   
   /*
@@@ -366,69 -400,79 +367,69 @@@
    */
   static int afs_dir_iterate_block(struct afs_vnode *dvnode,
                                  struct dir_context *ctx,
- -                               union afs_xdr_dir_block *block,
- -                               unsigned blkoff)
+ +                               union afs_xdr_dir_block *block)
   {
         union afs_xdr_dirent *dire;
- -      unsigned offset, next, curr, nr_slots;
+ +      unsigned int blknum, base, hdr, pos, next, nr_slots;
         size_t nlen;
         int tmp;
   
- -      _enter("%llx,%x", ctx->pos, blkoff);
+ +      blknum  = ctx->pos / AFS_DIR_BLOCK_SIZE;
+ +      base    = blknum * AFS_DIR_SLOTS_PER_BLOCK;
+ +      hdr     = (blknum == 0 ? AFS_DIR_RESV_BLOCKS0 : AFS_DIR_RESV_BLOCKS);
+ +      pos     = DIV_ROUND_UP(ctx->pos, AFS_DIR_DIRENT_SIZE) - base;
   
- -      curr = (ctx->pos - blkoff) / sizeof(union afs_xdr_dirent);
+ +      _enter("%llx,%x", ctx->pos, blknum);
   
         /* walk through the block, an entry at a time */
- -      for (offset = (blkoff == 0 ? AFS_DIR_RESV_BLOCKS0 : AFS_DIR_RESV_BLOCKS);
- -           offset < AFS_DIR_SLOTS_PER_BLOCK;
- -           offset = next
- -           ) {
+ +      for (unsigned int slot = hdr; slot < AFS_DIR_SLOTS_PER_BLOCK; slot = next) {
                 /* skip entries marked unused in the bitmap */
- -              if (!(block->hdr.bitmap[offset / 8] &
- -                    (1 << (offset % 8)))) {
- -                      _debug("ENT[%zu.%u]: unused",
- -                             blkoff / sizeof(union afs_xdr_dir_block), offset);
- -                      next = offset + 1;
- -                      if (offset >= curr)
- -                              ctx->pos = blkoff +
- -                                      next * sizeof(union afs_xdr_dirent);
+ +              if (!(block->hdr.bitmap[slot / 8] &
+ +                    (1 << (slot % 8)))) {
+ +                      _debug("ENT[%x]: Unused", base + slot);
+ +                      next = slot + 1;
+ +                      if (next >= pos)
+ +                              ctx->pos = (base + next) * sizeof(union afs_xdr_dirent);
                         continue;
                 }
   
                 /* got a valid entry */
- -              dire = &block->dirents[offset];
+ +              dire = &block->dirents[slot];
                 nlen = strnlen(dire->u.name,
- -                             sizeof(*block) -
- -                             offset * sizeof(union afs_xdr_dirent));
+ +                             (unsigned long)(block + 1) - (unsigned long)dire->u.name - 1);
                 if (nlen > AFSNAMEMAX - 1) {
- -                      _debug("ENT[%zu]: name too long (len %u/%zu)",
- -                             blkoff / sizeof(union afs_xdr_dir_block),
- -                             offset, nlen);
+ +                      _debug("ENT[%x]: Name too long (len %zx)",
+ +                             base + slot, nlen);
                         return afs_bad(dvnode, afs_file_error_dir_name_too_long);
                 }
   
- -              _debug("ENT[%zu.%u]: %s %zu \"%s\"",
- -                     blkoff / sizeof(union afs_xdr_dir_block), offset,
- -                     (offset < curr ? "skip" : "fill"),
+ +              _debug("ENT[%x]: %s %zx \"%s\"",
+ +                     base + slot, (slot < pos ? "skip" : "fill"),
                        nlen, dire->u.name);
   
                 nr_slots = afs_dir_calc_slots(nlen);
- -              next = offset + nr_slots;
+ +              next = slot + nr_slots;
                 if (next > AFS_DIR_SLOTS_PER_BLOCK) {
- -                      _debug("ENT[%zu.%u]:"
- -                             " %u extends beyond end dir block"
- -                             " (len %zu)",
- -                             blkoff / sizeof(union afs_xdr_dir_block),
- -                             offset, next, nlen);
+ +                      _debug("ENT[%x]: extends beyond end dir block (len %zx)",
+ +                             base + slot, nlen);
                         return afs_bad(dvnode, afs_file_error_dir_over_end);
                 }
   
                 /* Check that the name-extension dirents are all allocated */
                 for (tmp = 1; tmp < nr_slots; tmp++) {
- -                      unsigned int ix = offset + tmp;
- -                      if (!(block->hdr.bitmap[ix / 8] & (1 << (ix % 8)))) {
- -                              _debug("ENT[%zu.u]:"
- -                                     " %u unmarked extension (%u/%u)",
- -                                     blkoff / sizeof(union afs_xdr_dir_block),
- -                                     offset, tmp, nr_slots);
+ +                      unsigned int xslot = slot + tmp;
+ +
+ +                      if (!(block->hdr.bitmap[xslot / 8] & (1 << (xslot % 8)))) {
+ +                              _debug("ENT[%x]: Unmarked extension (%x/%x)",
+ +                                     base + slot, tmp, nr_slots);
                                 return afs_bad(dvnode, afs_file_error_dir_unmarked_ext);
                         }
                 }
   
                 /* skip if starts before the current position */
- -              if (offset < curr) {
- -                      if (next > curr)
- -                              ctx->pos = blkoff + next * sizeof(union afs_xdr_dirent);
+ +              if (slot < pos) {
+ +                      if (next > pos)
+ +                              ctx->pos = (base + next) * sizeof(union afs_xdr_dirent);
                         continue;
                 }
   
@@@ -442,110 -486,75 +443,110 @@@
                         return 0;
                 }
   
- -              ctx->pos = blkoff + next * sizeof(union afs_xdr_dirent);
+ +              ctx->pos = (base + next) * sizeof(union afs_xdr_dirent);
         }
   
         _leave(" = 1 [more]");
         return 1;
   }
   
+ +struct afs_dir_iteration_ctx {
+ +      struct dir_context      *dir_ctx;
+ +      int                     error;
+ +};
+ +
   /*
- - * iterate through the data blob that lists the contents of an AFS directory
+ + * Iterate through a kmapped directory segment.
    */
- -static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx,
- -                         struct key *key, afs_dataversion_t *_dir_version)
+ +static size_t afs_dir_iterate_step(void *iter_base, size_t progress, size_t len,
+ +                                 void *priv, void *priv2)
   {
- -      struct afs_vnode *dvnode = AFS_FS_I(dir);
- -      union afs_xdr_dir_block *dblock;
- -      struct afs_read *req;
- -      struct folio *folio;
- -      unsigned offset, size;
+ +      struct afs_dir_iteration_ctx *ctx = priv2;
+ +      struct afs_vnode *dvnode = priv;
         int ret;
   
- -      _enter("{%lu},%u,,", dir->i_ino, (unsigned)ctx->pos);
- -
- -      if (test_bit(AFS_VNODE_DELETED, &AFS_FS_I(dir)->flags)) {
- -              _leave(" = -ESTALE");
- -              return -ESTALE;
+ +      if (WARN_ON_ONCE(progress % AFS_DIR_BLOCK_SIZE ||
+ +                       len % AFS_DIR_BLOCK_SIZE)) {
+ +              pr_err("Mis-iteration prog=%zx len=%zx\n",
+ +                     progress % AFS_DIR_BLOCK_SIZE,
+ +                     len % AFS_DIR_BLOCK_SIZE);
+ +              return len;
         }
   
- -      req = afs_read_dir(dvnode, key);
- -      if (IS_ERR(req))
- -              return PTR_ERR(req);
- -      *_dir_version = req->data_version;
+ +      do {
+ +              ret = afs_dir_iterate_block(dvnode, ctx->dir_ctx, iter_base);
+ +              if (ret != 1)
+ +                      break;
   
- -      /* round the file position up to the next entry boundary */
- -      ctx->pos += sizeof(union afs_xdr_dirent) - 1;
- -      ctx->pos &= ~(sizeof(union afs_xdr_dirent) - 1);
+ +              ctx->dir_ctx->pos = round_up(ctx->dir_ctx->pos, AFS_DIR_BLOCK_SIZE);
+ +              iter_base += AFS_DIR_BLOCK_SIZE;
+ +              len -= AFS_DIR_BLOCK_SIZE;
+ +      } while (len > 0);
   
- -      /* walk through the blocks in sequence */
- -      ret = 0;
- -      while (ctx->pos < req->actual_len) {
- -              /* Fetch the appropriate folio from the directory and re-add it
- -               * to the LRU.  We have all the pages pinned with an extra ref.
- -               */
- -              folio = __filemap_get_folio(dir->i_mapping, ctx->pos / PAGE_SIZE,
- -                                          FGP_ACCESSED, 0);
- -              if (IS_ERR(folio)) {
- -                      ret = afs_bad(dvnode, afs_file_error_dir_missing_page);
- -                      break;
- -              }
+ +      return len;
+ +}
   
- -              offset = round_down(ctx->pos, sizeof(*dblock)) - folio_pos(folio);
- -              size = min_t(loff_t, folio_size(folio),
- -                           req->actual_len - folio_pos(folio));
+ +/*
+ + * Iterate through the directory folios.
+ + */
+ +static int afs_dir_iterate_contents(struct inode *dir, struct dir_context *dir_ctx)
+ +{
+ +      struct afs_dir_iteration_ctx ctx = { .dir_ctx = dir_ctx };
+ +      struct afs_vnode *dvnode = AFS_FS_I(dir);
+ +      struct iov_iter iter;
+ +      unsigned long long i_size = i_size_read(dir);
   
- -              do {
- -                      dblock = kmap_local_folio(folio, offset);
- -                      ret = afs_dir_iterate_block(dvnode, ctx, dblock,
- -                                                  folio_pos(folio) + offset);
- -                      kunmap_local(dblock);
- -                      if (ret != 1)
- -                              goto out;
+ +      /* Round the file position up to the next entry boundary */
+ +      dir_ctx->pos = round_up(dir_ctx->pos, sizeof(union afs_xdr_dirent));
   
- -              } while (offset += sizeof(*dblock), offset < size);
+ +      if (i_size <= 0 || dir_ctx->pos >= i_size)
+ +              return 0;
   
- -              ret = 0;
- -      }
+ +      iov_iter_folio_queue(&iter, ITER_SOURCE, dvnode->directory, 0, 0, i_size);
+ +      iov_iter_advance(&iter, round_down(dir_ctx->pos, AFS_DIR_BLOCK_SIZE));
+ +
+ +      iterate_folioq(&iter, iov_iter_count(&iter), dvnode, &ctx,
+ +                     afs_dir_iterate_step);
+ +
+ +      if (ctx.error == -ESTALE)
+ +              afs_invalidate_dir(dvnode, afs_dir_invalid_iter_stale);
+ +      return ctx.error;
+ +}
+ +
+ +/*
+ + * iterate through the data blob that lists the contents of an AFS directory
+ + */
+ +static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx,
+ +                         struct file *file, afs_dataversion_t *_dir_version)
+ +{
+ +      struct afs_vnode *dvnode = AFS_FS_I(dir);
+ +      int retry_limit = 100;
+ +      int ret;
+ +
+ +      _enter("{%lu},%llx,,", dir->i_ino, ctx->pos);
+ +
+ +      do {
+ +              if (--retry_limit < 0) {
+ +                      pr_warn("afs_read_dir(): Too many retries\n");
+ +                      ret = -ESTALE;
+ +                      break;
+ +              }
+ +              ret = afs_read_dir(dvnode, file);
+ +              if (ret < 0) {
+ +                      if (ret != -ESTALE)
+ +                              break;
+ +                      if (test_bit(AFS_VNODE_DELETED, &AFS_FS_I(dir)->flags)) {
+ +                              ret = -ESTALE;
+ +                              break;
+ +                      }
+ +                      continue;
+ +              }
+ +              *_dir_version = inode_peek_iversion_raw(dir);
+ +
+ +              ret = afs_dir_iterate_contents(dir, ctx);
+ +              up_read(&dvnode->validate_lock);
+ +      } while (ret == -ESTALE);
   
- -out:
- -      up_read(&dvnode->validate_lock);
- -      afs_put_read(req);
         _leave(" = %d", ret);
         return ret;
   }
@@@ -557,7 -566,8 +558,7 @@@ static int afs_readdir(struct file *fil
   {
         afs_dataversion_t dir_version;
   
- -      return afs_dir_iterate(file_inode(file), ctx, afs_file_key(file),
- -                             &dir_version);
+ +      return afs_dir_iterate(file_inode(file), ctx, file, &dir_version);
   }
   
   /*
@@@ -597,22 -607,22 +598,22 @@@ static bool afs_lookup_one_filldir(stru
    * Do a lookup of a single name in a directory
    * - just returns the FID the dentry name maps to if found
    */
- static int afs_do_lookup_one(struct inode *dir, struct dentry *dentry,
+ static int afs_do_lookup_one(struct inode *dir, const struct qstr *name,
- -                           struct afs_fid *fid, struct key *key,
+ +                           struct afs_fid *fid,
                              afs_dataversion_t *_dir_version)
   {
         struct afs_super_info *as = dir->i_sb->s_fs_info;
         struct afs_lookup_one_cookie cookie = {
                 .ctx.actor = afs_lookup_one_filldir,
-               .name = dentry->d_name,
+               .name = *name,
                 .fid.vid = as->volume->vid
         };
         int ret;
   
-       _enter("{%lu},%p{%pd},", dir->i_ino, dentry, dentry);
+       _enter("{%lu},{%.*s},", dir->i_ino, name->len, name->name);
   
         /* search the directory */
- -      ret = afs_dir_iterate(dir, &cookie.ctx, key, _dir_version);
+ +      ret = afs_dir_iterate(dir, &cookie.ctx, NULL, _dir_version);
         if (ret < 0) {
                 _leave(" = %d [iter]", ret);
                 return ret;
@@@ -647,10 -657,19 +648,10 @@@ static bool afs_lookup_filldir(struct d
         BUILD_BUG_ON(sizeof(union afs_xdr_dir_block) != 2048);
         BUILD_BUG_ON(sizeof(union afs_xdr_dirent) != 32);
   
- -      if (cookie->found) {
- -              if (cookie->nr_fids < 50) {
- -                      cookie->fids[cookie->nr_fids].vnode     = ino;
- -                      cookie->fids[cookie->nr_fids].unique    = dtype;
- -                      cookie->nr_fids++;
- -              }
- -      } else if (cookie->name.len == nlen &&
- -                 memcmp(cookie->name.name, name, nlen) == 0) {
- -              cookie->fids[1].vnode   = ino;
- -              cookie->fids[1].unique  = dtype;
- -              cookie->found = 1;
- -              if (cookie->one_only)
- -                      return false;
+ +      if (cookie->nr_fids < 50) {
+ +              cookie->fids[cookie->nr_fids].vnode     = ino;
+ +              cookie->fids[cookie->nr_fids].unique    = dtype;
+ +              cookie->nr_fids++;
         }
   
         return cookie->nr_fids < 50;
@@@ -770,7 -789,8 +771,7 @@@ static bool afs_server_supports_ibulk(s
    * files in one go and create inodes for them.  The inode of the file we were
    * asked for is returned.
    */
- -static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry,
- -                                 struct key *key)
+ +static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry)
   {
         struct afs_lookup_cookie *cookie;
         struct afs_vnode_param *vp;
@@@ -778,7 -798,6 +779,7 @@@
         struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode;
         struct inode *inode = NULL, *ti;
         afs_dataversion_t data_version = READ_ONCE(dvnode->status.data_version);
+ +      bool supports_ibulk;
         long ret;
         int i;
   
@@@ -795,19 -814,19 +796,19 @@@
         cookie->nr_fids = 2; /* slot 1 is saved for the fid we actually want
                               * and slot 0 for the directory */
   
- -      if (!afs_server_supports_ibulk(dvnode))
- -              cookie->one_only = true;
- -
- -      /* search the directory */
- -      ret = afs_dir_iterate(dir, &cookie->ctx, key, &data_version);
+ +      /* Search the directory for the named entry using the hash table... */
+ +      ret = afs_dir_search(dvnode, &dentry->d_name, &cookie->fids[1], &data_version);
         if (ret < 0)
                 goto out;
   
- -      dentry->d_fsdata = (void *)(unsigned long)data_version;
+ +      supports_ibulk = afs_server_supports_ibulk(dvnode);
+ +      if (supports_ibulk) {
+ +              /* ...then scan linearly from that point for entries to lookup-ahead. */
+ +              cookie->ctx.pos = (ret + 1) * AFS_DIR_DIRENT_SIZE;
+ +              afs_dir_iterate(dir, &cookie->ctx, NULL, &data_version);
+ +      }
   
- -      ret = -ENOENT;
- -      if (!cookie->found)
- -              goto out;
+ +      dentry->d_fsdata = (void *)(unsigned long)data_version;
   
         /* Check to see if we already have an inode for the primary fid. */
         inode = ilookup5(dir->i_sb, cookie->fids[1].vnode,
@@@ -866,7 -885,7 +867,7 @@@
          * the whole operation.
          */
         afs_op_set_error(op, -ENOTSUPP);
- -      if (!cookie->one_only) {
+ +      if (supports_ibulk) {
                 op->ops = &afs_inline_bulk_status_operation;
                 afs_begin_vnode_operation(op);
                 afs_wait_for_operation(op);
@@@ -908,7 -927,8 +909,7 @@@ out
   /*
    * Look up an entry in a directory with @sys substitution.
    */
- -static struct dentry *afs_lookup_atsys(struct inode *dir, struct dentry *dentry,
- -                                     struct key *key)
+ +static struct dentry *afs_lookup_atsys(struct inode *dir, struct dentry *dentry)
   {
         struct afs_sysnames *subs;
         struct afs_net *net = afs_i2net(dir);
@@@ -956,6 -976,7 +957,6 @@@ out_s
         afs_put_sysnames(subs);
         kfree(buf);
   out_p:
- -      key_put(key);
         return ret;
   }
   
@@@ -969,6 -990,7 +970,6 @@@ static struct dentry *afs_lookup(struc
         struct afs_fid fid = {};
         struct inode *inode;
         struct dentry *d;
- -      struct key *key;
         int ret;
   
         _enter("{%llx:%llu},%p{%pd},",
@@@ -986,9 -1008,15 +987,9 @@@
                 return ERR_PTR(-ESTALE);
         }
   
- -      key = afs_request_key(dvnode->volume->cell);
- -      if (IS_ERR(key)) {
- -              _leave(" = %ld [key]", PTR_ERR(key));
- -              return ERR_CAST(key);
- -      }
- -
- -      ret = afs_validate(dvnode, key);
+ +      ret = afs_validate(dvnode, NULL);
         if (ret < 0) {
- -              key_put(key);
+ +              afs_dir_unuse_cookie(dvnode, ret);
                 _leave(" = %d [val]", ret);
                 return ERR_PTR(ret);
         }
@@@ -998,10 -1026,11 +999,10 @@@
             dentry->d_name.name[dentry->d_name.len - 3] == 's' &&
             dentry->d_name.name[dentry->d_name.len - 2] == 'y' &&
             dentry->d_name.name[dentry->d_name.len - 1] == 's')
- -              return afs_lookup_atsys(dir, dentry, key);
+ +              return afs_lookup_atsys(dir, dentry);
   
         afs_stat_v(dvnode, n_lookup);
- -      inode = afs_do_lookup(dir, dentry, key);
- -      key_put(key);
+ +      inode = afs_do_lookup(dir, dentry);
         if (inode == ERR_PTR(-ENOENT))
                 inode = afs_try_auto_mntpt(dentry, dir);
   
@@@ -1023,21 -1052,12 +1024,12 @@@
   /*
    * Check the validity of a dentry under RCU conditions.
    */
- static int afs_d_revalidate_rcu(struct dentry *dentry)
+ static int afs_d_revalidate_rcu(struct afs_vnode *dvnode, struct dentry *dentry)
   {
-       struct afs_vnode *dvnode;
-       struct dentry *parent;
-       struct inode *dir;
         long dir_version, de_version;
   
         _enter("%p", dentry);
   
-       /* Check the parent directory is still valid first. */
-       parent = READ_ONCE(dentry->d_parent);
-       dir = d_inode_rcu(parent);
-       if (!dir)
-               return -ECHILD;
-       dvnode = AFS_FS_I(dir);
         if (test_bit(AFS_VNODE_DELETED, &dvnode->flags))
                 return -ECHILD;
   
@@@ -1065,11 -1085,11 +1057,11 @@@
    * - NOTE! the hit can be a negative hit too, so we can't assume we have an
    *   inode
    */
- static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
+ static int afs_d_revalidate(struct inode *parent_dir, const struct qstr *name,
+                           struct dentry *dentry, unsigned int flags)
   {
-       struct afs_vnode *vnode, *dir;
+       struct afs_vnode *vnode, *dir = AFS_FS_I(parent_dir);
         struct afs_fid fid;
-       struct dentry *parent;
         struct inode *inode;
         struct key *key;
         afs_dataversion_t dir_version, invalid_before;
@@@ -1077,7 -1097,7 +1069,7 @@@
         int ret;
   
         if (flags & LOOKUP_RCU)
-               return afs_d_revalidate_rcu(dentry);
+               return afs_d_revalidate_rcu(dir, dentry);
   
         if (d_really_is_positive(dentry)) {
                 vnode = AFS_FS_I(d_inode(dentry));
@@@ -1092,14 -1112,9 +1084,9 @@@
         if (IS_ERR(key))
                 key = NULL;
   
-       /* Hold the parent dentry so we can peer at it */
-       parent = dget_parent(dentry);
-       dir = AFS_FS_I(d_inode(parent));
- 
         /* validate the parent directory */
         ret = afs_validate(dir, key);
         if (ret == -ERESTARTSYS) {
-               dput(parent);
                 key_put(key);
                 return ret;
         }
@@@ -1127,7 -1142,7 +1114,7 @@@
         afs_stat_v(dir, n_reval);
   
         /* search the directory for this vnode */
-       ret = afs_do_lookup_one(&dir->netfs.inode, dentry, &fid, &dir_version);
- -      ret = afs_do_lookup_one(&dir->netfs.inode, name, &fid, key, &dir_version);
++      ret = afs_do_lookup_one(&dir->netfs.inode, name, &fid, &dir_version);
         switch (ret) {
         case 0:
                 /* the filename maps to something */
@@@ -1171,22 -1186,19 +1158,19 @@@
                 goto out_valid;
   
         default:
-               _debug("failed to iterate dir %pd: %d",
-                      parent, ret);
+               _debug("failed to iterate parent %pd2: %d", dentry, ret);
                 goto not_found;
         }
   
   out_valid:
         dentry->d_fsdata = (void *)(unsigned long)dir_version;
   out_valid_noupdate:
-       dput(parent);
         key_put(key);
         _leave(" = 1 [valid]");
         return 1;
   
   not_found:
         _debug("dropping dentry %pd2", dentry);
-       dput(parent);
         key_put(key);
   
         _leave(" = 0 [bad]");
@@@ -1254,7 -1266,6 +1238,7 @@@ void afs_check_for_remote_deletion(stru
    */
   static void afs_vnode_new_inode(struct afs_operation *op)
   {
+ +      struct afs_vnode_param *dvp = &op->file[0];
         struct afs_vnode_param *vp = &op->file[1];
         struct afs_vnode *vnode;
         struct inode *inode;
@@@ -1274,10 -1285,6 +1258,10 @@@
   
         vnode = AFS_FS_I(inode);
         set_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags);
+ +      if (S_ISDIR(inode->i_mode))
+ +              afs_mkdir_init_dir(vnode, dvp->vnode);
+ +      else if (S_ISLNK(inode->i_mode))
+ +              afs_init_new_symlink(vnode, op);
         if (!afs_op_error(op))
                 afs_cache_permit(vnode, op->key, vnode->cb_break, &vp->scb);
         d_instantiate(op->dentry, inode);
@@@ -1294,21 -1301,18 +1278,21 @@@ static void afs_create_success(struct a
   
   static void afs_create_edit_dir(struct afs_operation *op)
   {
+ +      struct netfs_cache_resources cres = {};
         struct afs_vnode_param *dvp = &op->file[0];
         struct afs_vnode_param *vp = &op->file[1];
         struct afs_vnode *dvnode = dvp->vnode;
   
         _enter("op=%08x", op->debug_id);
   
+ +      fscache_begin_write_operation(&cres, afs_vnode_cache(dvnode));
         down_write(&dvnode->validate_lock);
         if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) &&
             dvnode->status.data_version == dvp->dv_before + dvp->dv_delta)
                 afs_edit_dir_add(dvnode, &op->dentry->d_name, &vp->fid,
                                  op->create.reason);
         up_write(&dvnode->validate_lock);
+ +      fscache_end_operation(&cres);
   }
   
   static void afs_create_put(struct afs_operation *op)
@@@ -1336,7 -1340,6 +1320,7 @@@ static int afs_mkdir(struct mnt_idmap *
   {
         struct afs_operation *op;
         struct afs_vnode *dvnode = AFS_FS_I(dir);
+ +      int ret;
   
         _enter("{%llx:%llu},{%pd},%ho",
                dvnode->fid.vid, dvnode->fid.vnode, dentry, mode);
@@@ -1347,8 -1350,6 +1331,8 @@@
                 return PTR_ERR(op);
         }
   
+ +      fscache_use_cookie(afs_vnode_cache(dvnode), true);
+ +
         afs_op_set_vnode(op, 0, dvnode);
         op->file[0].dv_delta = 1;
         op->file[0].modification = true;
@@@ -1358,9 -1359,7 +1342,9 @@@
         op->create.reason = afs_edit_dir_for_mkdir;
         op->mtime       = current_time(dir);
         op->ops         = &afs_mkdir_operation;
- -      return afs_do_sync_operation(op);
+ +      ret = afs_do_sync_operation(op);
+ +      afs_dir_unuse_cookie(dvnode, ret);
+ +      return ret;
   }
   
   /*
@@@ -1373,8 -1372,8 +1357,8 @@@ static void afs_dir_remove_subdir(struc
   
                 clear_nlink(&vnode->netfs.inode);
                 set_bit(AFS_VNODE_DELETED, &vnode->flags);
- -              atomic64_set(&vnode->cb_expires_at, AFS_NO_CB_PROMISE);
- -              clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
+ +              afs_clear_cb_promise(vnode, afs_cb_promise_clear_rmdir);
+ +              afs_invalidate_dir(vnode, afs_dir_invalid_subdir_removed);
         }
   }
   
@@@ -1388,21 -1387,18 +1372,21 @@@ static void afs_rmdir_success(struct af
   
   static void afs_rmdir_edit_dir(struct afs_operation *op)
   {
+ +      struct netfs_cache_resources cres = {};
         struct afs_vnode_param *dvp = &op->file[0];
         struct afs_vnode *dvnode = dvp->vnode;
   
         _enter("op=%08x", op->debug_id);
         afs_dir_remove_subdir(op->dentry);
   
+ +      fscache_begin_write_operation(&cres, afs_vnode_cache(dvnode));
         down_write(&dvnode->validate_lock);
         if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) &&
             dvnode->status.data_version == dvp->dv_before + dvp->dv_delta)
                 afs_edit_dir_remove(dvnode, &op->dentry->d_name,
                                     afs_edit_dir_for_rmdir);
         up_write(&dvnode->validate_lock);
+ +      fscache_end_operation(&cres);
   }
   
   static void afs_rmdir_put(struct afs_operation *op)
@@@ -1437,8 -1433,6 +1421,8 @@@ static int afs_rmdir(struct inode *dir
         if (IS_ERR(op))
                 return PTR_ERR(op);
   
+ +      fscache_use_cookie(afs_vnode_cache(dvnode), true);
+ +
         afs_op_set_vnode(op, 0, dvnode);
         op->file[0].dv_delta = 1;
         op->file[0].modification = true;
@@@ -1462,18 -1456,10 +1446,18 @@@
                 op->file[1].vnode = vnode;
         }
   
- -      return afs_do_sync_operation(op);
+ +      ret = afs_do_sync_operation(op);
+ +
+ +      /* Not all systems that can host afs servers have ENOTEMPTY. */
+ +      if (ret == -EEXIST)
+ +              ret = -ENOTEMPTY;
+ +out:
+ +      afs_dir_unuse_cookie(dvnode, ret);
+ +      return ret;
   
   error:
- -      return afs_put_operation(op);
+ +      ret = afs_put_operation(op);
+ +      goto out;
   }
   
   /*
@@@ -1536,19 -1522,16 +1520,19 @@@ static void afs_unlink_success(struct a
   
   static void afs_unlink_edit_dir(struct afs_operation *op)
   {
+ +      struct netfs_cache_resources cres = {};
         struct afs_vnode_param *dvp = &op->file[0];
         struct afs_vnode *dvnode = dvp->vnode;
   
         _enter("op=%08x", op->debug_id);
+ +      fscache_begin_write_operation(&cres, afs_vnode_cache(dvnode));
         down_write(&dvnode->validate_lock);
         if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) &&
             dvnode->status.data_version == dvp->dv_before + dvp->dv_delta)
                 afs_edit_dir_remove(dvnode, &op->dentry->d_name,
                                     afs_edit_dir_for_unlink);
         up_write(&dvnode->validate_lock);
+ +      fscache_end_operation(&cres);
   }
   
   static void afs_unlink_put(struct afs_operation *op)
@@@ -1587,8 -1570,6 +1571,8 @@@ static int afs_unlink(struct inode *dir
         if (IS_ERR(op))
                 return PTR_ERR(op);
   
+ +      fscache_use_cookie(afs_vnode_cache(dvnode), true);
+ +
         afs_op_set_vnode(op, 0, dvnode);
         op->file[0].dv_delta = 1;
         op->file[0].modification = true;
@@@ -1635,10 -1616,10 +1619,10 @@@
                 afs_wait_for_operation(op);
         }
   
- -      return afs_put_operation(op);
- -
   error:
- -      return afs_put_operation(op);
+ +      ret = afs_put_operation(op);
+ +      afs_dir_unuse_cookie(dvnode, ret);
+ +      return ret;
   }
   
   static const struct afs_operation_ops afs_create_operation = {
@@@ -1672,8 -1653,6 +1656,8 @@@ static int afs_create(struct mnt_idmap 
                 goto error;
         }
   
+ +      fscache_use_cookie(afs_vnode_cache(dvnode), true);
+ +
         afs_op_set_vnode(op, 0, dvnode);
         op->file[0].dv_delta = 1;
         op->file[0].modification = true;
@@@ -1684,9 -1663,7 +1668,9 @@@
         op->create.reason = afs_edit_dir_for_create;
         op->mtime       = current_time(dir);
         op->ops         = &afs_create_operation;
- -      return afs_do_sync_operation(op);
+ +      ret = afs_do_sync_operation(op);
+ +      afs_dir_unuse_cookie(dvnode, ret);
+ +      return ret;
   
   error:
         d_drop(dentry);
@@@ -1751,8 -1728,6 +1735,8 @@@ static int afs_link(struct dentry *from
                 goto error;
         }
   
+ +      fscache_use_cookie(afs_vnode_cache(dvnode), true);
+ +
         ret = afs_validate(vnode, op->key);
         if (ret < 0)
                 goto error_op;
@@@ -1768,13 -1743,10 +1752,13 @@@
         op->dentry_2            = from;
         op->ops                 = &afs_link_operation;
         op->create.reason       = afs_edit_dir_for_link;
- -      return afs_do_sync_operation(op);
+ +      ret = afs_do_sync_operation(op);
+ +      afs_dir_unuse_cookie(dvnode, ret);
+ +      return ret;
   
   error_op:
         afs_put_operation(op);
+ +      afs_dir_unuse_cookie(dvnode, ret);
   error:
         d_drop(dentry);
         _leave(" = %d", ret);
@@@ -1818,8 -1790,6 +1802,8 @@@ static int afs_symlink(struct mnt_idma
                 goto error;
         }
   
+ +      fscache_use_cookie(afs_vnode_cache(dvnode), true);
+ +
         afs_op_set_vnode(op, 0, dvnode);
         op->file[0].dv_delta = 1;
   
@@@ -1828,9 -1798,7 +1812,9 @@@
         op->create.reason       = afs_edit_dir_for_symlink;
         op->create.symlink      = content;
         op->mtime               = current_time(dir);
- -      return afs_do_sync_operation(op);
+ +      ret = afs_do_sync_operation(op);
+ +      afs_dir_unuse_cookie(dvnode, ret);
+ +      return ret;
   
   error:
         d_drop(dentry);
@@@ -1862,7 -1830,6 +1846,7 @@@ static void afs_rename_success(struct a
                 write_seqlock(&vnode->cb_lock);
   
                 new_dv = vnode->status.data_version + 1;
+ +              trace_afs_set_dv(vnode, new_dv);
                 vnode->status.data_version = new_dv;
                 inode_set_iversion_raw(&vnode->netfs.inode, new_dv);
   
@@@ -1872,7 -1839,6 +1856,7 @@@
   
   static void afs_rename_edit_dir(struct afs_operation *op)
   {
+ +      struct netfs_cache_resources orig_cres = {}, new_cres = {};
         struct afs_vnode_param *orig_dvp = &op->file[0];
         struct afs_vnode_param *new_dvp = &op->file[1];
         struct afs_vnode *orig_dvnode = orig_dvp->vnode;
@@@ -1889,10 -1855,6 +1873,10 @@@
                 op->rename.rehash = NULL;
         }
   
+ +      fscache_begin_write_operation(&orig_cres, afs_vnode_cache(orig_dvnode));
+ +      if (new_dvnode != orig_dvnode)
+ +              fscache_begin_write_operation(&new_cres, afs_vnode_cache(new_dvnode));
+ +
         down_write(&orig_dvnode->validate_lock);
         if (test_bit(AFS_VNODE_DIR_VALID, &orig_dvnode->flags) &&
             orig_dvnode->status.data_version == orig_dvp->dv_before + orig_dvp->dv_delta)
@@@ -1942,9 -1904,6 +1926,9 @@@
         d_move(old_dentry, new_dentry);
   
         up_write(&new_dvnode->validate_lock);
+ +      fscache_end_operation(&orig_cres);
+ +      if (new_dvnode != orig_dvnode)
+ +              fscache_end_operation(&new_cres);
   }
   
   static void afs_rename_put(struct afs_operation *op)
@@@ -1997,10 -1956,6 +1981,10 @@@ static int afs_rename(struct mnt_idmap 
         if (IS_ERR(op))
                 return PTR_ERR(op);
   
+ +      fscache_use_cookie(afs_vnode_cache(orig_dvnode), true);
+ +      if (new_dvnode != orig_dvnode)
+ +              fscache_use_cookie(afs_vnode_cache(new_dvnode), true);
+ +
         ret = afs_validate(vnode, op->key);
         afs_op_set_error(op, ret);
         if (ret < 0)
@@@ -2068,43 -2023,47 +2052,43 @@@
          */
         d_drop(old_dentry);
   
- -      return afs_do_sync_operation(op);
+ +      ret = afs_do_sync_operation(op);
+ +out:
+ +      afs_dir_unuse_cookie(orig_dvnode, ret);
+ +      if (new_dvnode != orig_dvnode)
+ +              afs_dir_unuse_cookie(new_dvnode, ret);
+ +      return ret;
   
   error:
- -      return afs_put_operation(op);
- -}
- -
- -/*
- - * Release a directory folio and clean up its private state if it's not busy
- - * - return true if the folio can now be released, false if not
- - */
- -static bool afs_dir_release_folio(struct folio *folio, gfp_t gfp_flags)
- -{
- -      struct afs_vnode *dvnode = AFS_FS_I(folio_inode(folio));
- -
- -      _enter("{{%llx:%llu}[%lu]}", dvnode->fid.vid, dvnode->fid.vnode, folio->index);
- -
- -      folio_detach_private(folio);
- -
- -      /* The directory will need reloading. */
- -      if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
- -              afs_stat_v(dvnode, n_relpg);
- -      return true;
+ +      ret = afs_put_operation(op);
+ +      goto out;
   }
   
   /*
- - * Invalidate part or all of a folio.
+ + * Write the file contents to the cache as a single blob.
    */
- -static void afs_dir_invalidate_folio(struct folio *folio, size_t offset,
- -                                 size_t length)
+ +int afs_single_writepages(struct address_space *mapping,
+ +                        struct writeback_control *wbc)
   {
- -      struct afs_vnode *dvnode = AFS_FS_I(folio_inode(folio));
- -
- -      _enter("{%lu},%zu,%zu", folio->index, offset, length);
- -
- -      BUG_ON(!folio_test_locked(folio));
+ +      struct afs_vnode *dvnode = AFS_FS_I(mapping->host);
+ +      struct iov_iter iter;
+ +      bool is_dir = (S_ISDIR(dvnode->netfs.inode.i_mode) &&
+ +                     !test_bit(AFS_VNODE_MOUNTPOINT, &dvnode->flags));
+ +      int ret = 0;
   
- -      /* The directory will need reloading. */
- -      if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
- -              afs_stat_v(dvnode, n_inval);
+ +      /* Need to lock to prevent the folio queue and folios from being thrown
+ +       * away.
+ +       */
+ +      down_read(&dvnode->validate_lock);
+ +
+ +      if (is_dir ?
+ +          test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) :
+ +          atomic64_read(&dvnode->cb_expires_at) != AFS_NO_CB_PROMISE) {
+ +              iov_iter_folio_queue(&iter, ITER_SOURCE, dvnode->directory, 0, 0,
+ +                                   i_size_read(&dvnode->netfs.inode));
+ +              ret = netfs_writeback_single(mapping, wbc, &iter);
+ +      }
   
- -      /* we clean up only if the entire folio is being invalidated */
- -      if (offset == 0 && length == folio_size(folio))
- -              folio_detach_private(folio);
+ +      up_read(&dvnode->validate_lock);
+ +      return ret;
   }
diff --combined fs/ceph/mds_client.c

index 785fe489ef4b8e97f2030a47711235f9dce37389,3b766b9847136baf665709b1abb040d416ed129b..d7a06b9aaef6b2e51c9c89d0f7d12128f4a13439
--- 1/fs/ceph/mds_client.c
--- 2/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@@ -2621,6 -2621,7 +2621,7 @@@ static u8 *get_fscrypt_altname(const st
   {
         struct inode *dir = req->r_parent;
         struct dentry *dentry = req->r_dentry;
+       const struct qstr *name = req->r_dname;
         u8 *cryptbuf = NULL;
         u32 len = 0;
         int ret = 0;
@@@ -2641,8 -2642,10 +2642,10 @@@
         if (!fscrypt_has_encryption_key(dir))
                 goto success;
   
-       if (!fscrypt_fname_encrypted_size(dir, dentry->d_name.len, NAME_MAX,
-                                         &len)) {
+       if (!name)
+               name = &dentry->d_name;
+ 
+       if (!fscrypt_fname_encrypted_size(dir, name->len, NAME_MAX, &len)) {
                 WARN_ON_ONCE(1);
                 return ERR_PTR(-ENAMETOOLONG);
         }
@@@ -2657,7 -2660,7 +2660,7 @@@
         if (!cryptbuf)
                 return ERR_PTR(-ENOMEM);
   
-       ret = fscrypt_fname_encrypt(dir, &dentry->d_name, cryptbuf, len);
+       ret = fscrypt_fname_encrypt(dir, name, cryptbuf, len);
         if (ret) {
                 kfree(cryptbuf);
                 return ERR_PTR(ret);
@@@ -2800,11 -2803,12 +2803,11 @@@ retry
   
         if (pos < 0) {
                 /*
- -               * A rename didn't occur, but somehow we didn't end up where
- -               * we thought we would. Throw a warning and try again.
+ +               * The path is longer than PATH_MAX and this function
+ +               * cannot ever succeed.  Creating paths that long is
+ +               * possible with Ceph, but Linux cannot use them.
                  */
- -              pr_warn_client(cl, "did not end path lookup where expected (pos = %d)\n",
- -                             pos);
- -              goto retry;
+ +              return ERR_PTR(-ENAMETOOLONG);
         }
   
         *pbase = base;
diff --combined fs/dcache.c

index 1cd929f17eecf759c9eed1f814aa627df404f2b3,903142b324e98288cec33f2ca4af1ae530d5d845..9cc0d47da321c52669e05f0e97a6ccfa2451d025
--- 1/fs/dcache.c
--- 2/fs/dcache.c
+++ b/fs/dcache.c
@@@ -192,7 -192,7 +192,7 @@@ static int proc_nr_dentry(const struct 
         return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
   }
   
- -static struct ctl_table fs_dcache_sysctls[] = {
+ +static const struct ctl_table fs_dcache_sysctls[] = {
         {
                 .procname       = "dentry-state",
                 .data           = &dentry_stat,
@@@ -295,12 -295,16 +295,16 @@@ static inline int dentry_cmp(const stru
         return dentry_string_cmp(cs, ct, tcount);
   }
   
+ /*
+  * long names are allocated separately from dentry and never modified.
+  * Refcounted, freeing is RCU-delayed.  See take_dentry_name_snapshot()
+  * for the reason why ->count and ->head can't be combined into a union.
+  * dentry_string_cmp() relies upon ->name[] being word-aligned.
+  */
   struct external_name {
-       union {
-               atomic_t count;
-               struct rcu_head head;
-       } u;
-       unsigned char name[];
+       atomic_t count;
+       struct rcu_head head;
+       unsigned char name[] __aligned(sizeof(unsigned long));
   };
   
   static inline struct external_name *external_name(struct dentry *dentry)
@@@ -324,31 -328,45 +328,45 @@@ static void __d_free_external(struct rc
   
   static inline int dname_external(const struct dentry *dentry)
   {
-       return dentry->d_name.name != dentry->d_iname;
+       return dentry->d_name.name != dentry->d_shortname.string;
   }
   
   void take_dentry_name_snapshot(struct name_snapshot *name, struct dentry *dentry)
   {
-       spin_lock(&dentry->d_lock);
-       name->name = dentry->d_name;
-       if (unlikely(dname_external(dentry))) {
-               atomic_inc(&external_name(dentry)->u.count);
+       unsigned seq;
+       const unsigned char *s;
+ 
+       rcu_read_lock();
+ retry:
+       seq = read_seqcount_begin(&dentry->d_seq);
+       s = READ_ONCE(dentry->d_name.name);
+       name->name.hash_len = dentry->d_name.hash_len;
+       name->name.name = name->inline_name.string;
+       if (likely(s == dentry->d_shortname.string)) {
+               name->inline_name = dentry->d_shortname;
         } else {
-               memcpy(name->inline_name, dentry->d_iname,
-                      dentry->d_name.len + 1);
-               name->name.name = name->inline_name;
+               struct external_name *p;
+               p = container_of(s, struct external_name, name[0]);
+               // get a valid reference
+               if (unlikely(!atomic_inc_not_zero(&p->count)))
+                       goto retry;
+               name->name.name = s;
         }
-       spin_unlock(&dentry->d_lock);
+       if (read_seqcount_retry(&dentry->d_seq, seq)) {
+               release_dentry_name_snapshot(name);
+               goto retry;
+       }
+       rcu_read_unlock();
   }
   EXPORT_SYMBOL(take_dentry_name_snapshot);
   
   void release_dentry_name_snapshot(struct name_snapshot *name)
   {
-       if (unlikely(name->name.name != name->inline_name)) {
+       if (unlikely(name->name.name != name->inline_name.string)) {
                 struct external_name *p;
                 p = container_of(name->name.name, struct external_name, name[0]);
-               if (unlikely(atomic_dec_and_test(&p->u.count)))
-                       kfree_rcu(p, u.head);
+               if (unlikely(atomic_dec_and_test(&p->count)))
+                       kfree_rcu(p, head);
         }
   }
   EXPORT_SYMBOL(release_dentry_name_snapshot);
@@@ -386,7 -404,7 +404,7 @@@ static void dentry_free(struct dentry *
         WARN_ON(!hlist_unhashed(&dentry->d_u.d_alias));
         if (unlikely(dname_external(dentry))) {
                 struct external_name *p = external_name(dentry);
-               if (likely(atomic_dec_and_test(&p->u.count))) {
+               if (likely(atomic_dec_and_test(&p->count))) {
                         call_rcu(&dentry->d_u.d_rcu, __d_free_external);
                         return;
                 }
@@@ -1654,10 -1672,10 +1672,10 @@@ static struct dentry *__d_alloc(struct 
          * will still always have a NUL at the end, even if we might
          * be overwriting an internal NUL character
          */
-       dentry->d_iname[DNAME_INLINE_LEN-1] = 0;
+       dentry->d_shortname.string[DNAME_INLINE_LEN-1] = 0;
         if (unlikely(!name)) {
                 name = &slash_name;
-               dname = dentry->d_iname;
+               dname = dentry->d_shortname.string;
         } else if (name->len > DNAME_INLINE_LEN-1) {
                 size_t size = offsetof(struct external_name, name[1]);
                 struct external_name *p = kmalloc(size + name->len,
@@@ -1667,10 -1685,10 +1685,10 @@@
                         kmem_cache_free(dentry_cache, dentry); 
                         return NULL;
                 }
-               atomic_set(&p->u.count, 1);
+               atomic_set(&p->count, 1);
                 dname = p->name;
         } else  {
-               dname = dentry->d_iname;
+               dname = dentry->d_shortname.string;
         }       
   
         dentry->d_name.len = name->len;
@@@ -1681,8 -1699,9 +1699,8 @@@
         /* Make sure we always see the terminating NUL character */
         smp_store_release(&dentry->d_name.name, dname); /* ^^^ */
   
- -      dentry->d_lockref.count = 1;
         dentry->d_flags = 0;
- -      spin_lock_init(&dentry->d_lock);
+ +      lockref_init(&dentry->d_lockref, 1);
         seqcount_spinlock_init(&dentry->d_seq, &dentry->d_lock);
         dentry->d_inode = NULL;
         dentry->d_parent = dentry;
@@@ -2728,10 -2747,9 +2746,9 @@@ static void swap_names(struct dentry *d
                          * dentry:internal, target:external.  Steal target's
                          * storage and make target internal.
                          */
-                       memcpy(target->d_iname, dentry->d_name.name,
-                                       dentry->d_name.len + 1);
                         dentry->d_name.name = target->d_name.name;
-                       target->d_name.name = target->d_iname;
+                       target->d_shortname = dentry->d_shortname;
+                       target->d_name.name = target->d_shortname.string;
                 }
         } else {
                 if (unlikely(dname_external(dentry))) {
@@@ -2739,20 -2757,16 +2756,16 @@@
                          * dentry:external, target:internal.  Give dentry's
                          * storage to target and make dentry internal
                          */
-                       memcpy(dentry->d_iname, target->d_name.name,
-                                       target->d_name.len + 1);
                         target->d_name.name = dentry->d_name.name;
-                       dentry->d_name.name = dentry->d_iname;
+                       dentry->d_shortname = target->d_shortname;
+                       dentry->d_name.name = dentry->d_shortname.string;
                 } else {
                         /*
                          * Both are internal.
                          */
-                       unsigned int i;
-                       BUILD_BUG_ON(!IS_ALIGNED(DNAME_INLINE_LEN, sizeof(long)));
-                       for (i = 0; i < DNAME_INLINE_LEN / sizeof(long); i++) {
-                               swap(((long *) &dentry->d_iname)[i],
-                                    ((long *) &target->d_iname)[i]);
-                       }
+                       for (int i = 0; i < DNAME_INLINE_WORDS; i++)
+                               swap(dentry->d_shortname.words[i],
+                                    target->d_shortname.words[i]);
                 }
         }
         swap(dentry->d_name.hash_len, target->d_name.hash_len);
@@@ -2764,16 -2778,15 +2777,15 @@@ static void copy_name(struct dentry *de
         if (unlikely(dname_external(dentry)))
                 old_name = external_name(dentry);
         if (unlikely(dname_external(target))) {
-               atomic_inc(&external_name(target)->u.count);
+               atomic_inc(&external_name(target)->count);
                 dentry->d_name = target->d_name;
         } else {
-               memcpy(dentry->d_iname, target->d_name.name,
-                               target->d_name.len + 1);
-               dentry->d_name.name = dentry->d_iname;
+               dentry->d_shortname = target->d_shortname;
+               dentry->d_name.name = dentry->d_shortname.string;
                 dentry->d_name.hash_len = target->d_name.hash_len;
         }
-       if (old_name && likely(atomic_dec_and_test(&old_name->u.count)))
-               kfree_rcu(old_name, u.head);
+       if (old_name && likely(atomic_dec_and_test(&old_name->count)))
+               kfree_rcu(old_name, head);
   }
   
   /*
@@@ -2954,7 -2967,12 +2966,12 @@@ static int __d_unalias(struct dentry *d
                 goto out_err;
         m2 = &alias->d_parent->d_inode->i_rwsem;
   out_unalias:
+       if (alias->d_op->d_unalias_trylock &&
+           !alias->d_op->d_unalias_trylock(alias))
+               goto out_err;
         __d_move(alias, dentry, false);
+       if (alias->d_op->d_unalias_unlock)
+               alias->d_op->d_unalias_unlock(alias);
         ret = 0;
   out_err:
         if (m2)
@@@ -3102,12 -3120,12 +3119,12 @@@ void d_mark_tmpfile(struct file *file, 
   {
         struct dentry *dentry = file->f_path.dentry;
   
-       BUG_ON(dentry->d_name.name != dentry->d_iname ||
+       BUG_ON(dname_external(dentry) ||
                 !hlist_unhashed(&dentry->d_u.d_alias) ||
                 !d_unlinked(dentry));
         spin_lock(&dentry->d_parent->d_lock);
         spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
-       dentry->d_name.len = sprintf(dentry->d_iname, "#%llu",
+       dentry->d_name.len = sprintf(dentry->d_shortname.string, "#%llu",
                                 (unsigned long long)inode->i_ino);
         spin_unlock(&dentry->d_lock);
         spin_unlock(&dentry->d_parent->d_lock);
@@@ -3195,7 -3213,7 +3212,7 @@@ static void __init dcache_init(void
          */
         dentry_cache = KMEM_CACHE_USERCOPY(dentry,
                 SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_ACCOUNT,
-               d_iname);
+               d_shortname.string);
   
         /* Hash may have been set up in dcache_init_early */
         if (!hashdist)
diff --combined fs/exfat/namei.c

index 099f8064507213946aa8838ad65dc4a240fe6864,61c7164b85b301d891b2c835495cbc6dfd1ccc27..691dd77b6ab5ff52da868dbe1e3840230aaec865
--- 1/fs/exfat/namei.c
--- 2/fs/exfat/namei.c
+++ b/fs/exfat/namei.c
@@@ -31,10 -31,9 +31,9 @@@ static inline void exfat_d_version_set(
    * If it happened, the negative dentry isn't actually negative anymore.  So,
    * drop it.
    */
- static int exfat_d_revalidate(struct dentry *dentry, unsigned int flags)
+ static int exfat_d_revalidate(struct inode *dir, const struct qstr *name,
+                             struct dentry *dentry, unsigned int flags)
   {
-       int ret;
- 
         if (flags & LOOKUP_RCU)
                 return -ECHILD;
   
@@@ -58,11 -57,7 +57,7 @@@
         if (flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET))
                 return 0;
   
-       spin_lock(&dentry->d_lock);
-       ret = inode_eq_iversion(d_inode(dentry->d_parent),
-                       exfat_d_version(dentry));
-       spin_unlock(&dentry->d_lock);
-       return ret;
+       return inode_eq_iversion(dir, exfat_d_version(dentry));
   }
   
   /* returns the length of a struct qstr, ignoring trailing dots if necessary */
@@@ -330,8 -325,8 +325,8 @@@ static int exfat_find_empty_entry(struc
   
         while ((dentry = exfat_search_empty_slot(sb, &hint_femp, p_dir,
                                         num_entries, es)) < 0) {
- -              if (dentry == -EIO)
- -                      break;
+ +              if (dentry != -ENOSPC)
+ +                      return dentry;
   
                 if (exfat_check_max_dentries(inode))
                         return -ENOSPC;
diff --combined fs/fuse/dir.c

index be693a8a10109d1ceeeebaf1976b42cb33dc1ffc,3019bc1d9f9de4018b078963a6cfccfb61dd9308..198862b086ff7bad4007ec2f3200377d12a78385
--- 1/fs/fuse/dir.c
--- 2/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@@ -175,10 -175,11 +175,12 @@@ static void fuse_lookup_init(struct fus
         memset(outarg, 0, sizeof(struct fuse_entry_out));
         args->opcode = FUSE_LOOKUP;
         args->nodeid = nodeid;
--      args->in_numargs = 2;
- -      args->in_args[0].size = name->len;
- -      args->in_args[0].value = name->name;
- -      args->in_args[1].size = 1;
- -      args->in_args[1].value = "";
++      args->in_numargs = 3;
+ +      fuse_set_zero_arg0(args);
-       args->in_args[1].size = name->len + 1;
++      args->in_args[1].size = name->len;
+ +      args->in_args[1].value = name->name;
++      args->in_args[2].size = 1;
++      args->in_args[2].value = "";
         args->out_numargs = 1;
         args->out_args[0].size = sizeof(struct fuse_entry_out);
         args->out_args[0].value = outarg;
@@@ -193,10 -194,10 +195,10 @@@
    * the lookup once more.  If the lookup results in the same inode,
    * then refresh the attributes, timeouts and mark the dentry valid.
    */
- static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
+ static int fuse_dentry_revalidate(struct inode *dir, const struct qstr *name,
+                                 struct dentry *entry, unsigned int flags)
   {
         struct inode *inode;
-       struct dentry *parent;
         struct fuse_mount *fm;
         struct fuse_inode *fi;
         int ret;
@@@ -228,11 -229,9 +230,9 @@@
   
                 attr_version = fuse_get_attr_version(fm->fc);
   
-               parent = dget_parent(entry);
-               fuse_lookup_init(fm->fc, &args, get_node_id(d_inode(parent)),
-                                &entry->d_name, &outarg);
+               fuse_lookup_init(fm->fc, &args, get_node_id(dir),
+                                name, &outarg);
                 ret = fuse_simple_request(fm, &args);
-               dput(parent);
                 /* Zero nodeid is same as -ENOENT */
                 if (!ret && !outarg.nodeid)
                         ret = -ENOENT;
@@@ -266,9 -265,7 +266,7 @@@
                         if (test_bit(FUSE_I_INIT_RDPLUS, &fi->state))
                                 return -ECHILD;
                 } else if (test_and_clear_bit(FUSE_I_INIT_RDPLUS, &fi->state)) {
-                       parent = dget_parent(entry);
-                       fuse_advise_use_readdirplus(d_inode(parent));
-                       dput(parent);
+                       fuse_advise_use_readdirplus(dir);
                 }
         }
         ret = 1;
@@@ -468,29 -465,29 +466,29 @@@ static int get_security_context(struct 
   {
         struct fuse_secctx *fctx;
         struct fuse_secctx_header *header;
- -      void *ctx = NULL, *ptr;
- -      u32 ctxlen, total_len = sizeof(*header);
+ +      struct lsm_context lsmctx = { };
+ +      void *ptr;
+ +      u32 total_len = sizeof(*header);
         int err, nr_ctx = 0;
- -      const char *name;
+ +      const char *name = NULL;
         size_t namelen;
   
         err = security_dentry_init_security(entry, mode, &entry->d_name,
- -                                          &name, &ctx, &ctxlen);
- -      if (err) {
- -              if (err != -EOPNOTSUPP)
- -                      goto out_err;
- -              /* No LSM is supporting this security hook. Ignore error */
- -              ctxlen = 0;
- -              ctx = NULL;
- -      }
+ +                                          &name, &lsmctx);
+ +
+ +      /* If no LSM is supporting this security hook ignore error */
+ +      if (err && err != -EOPNOTSUPP)
+ +              goto out_err;
   
- -      if (ctxlen) {
+ +      if (lsmctx.len) {
                 nr_ctx = 1;
                 namelen = strlen(name) + 1;
                 err = -EIO;
- -              if (WARN_ON(namelen > XATTR_NAME_MAX + 1 || ctxlen > S32_MAX))
+ +              if (WARN_ON(namelen > XATTR_NAME_MAX + 1 ||
+ +                  lsmctx.len > S32_MAX))
                         goto out_err;
- -              total_len += FUSE_REC_ALIGN(sizeof(*fctx) + namelen + ctxlen);
+ +              total_len += FUSE_REC_ALIGN(sizeof(*fctx) + namelen +
+ +                                          lsmctx.len);
         }
   
         err = -ENOMEM;
@@@ -503,20 -500,19 +501,20 @@@
         ptr += sizeof(*header);
         if (nr_ctx) {
                 fctx = ptr;
- -              fctx->size = ctxlen;
+ +              fctx->size = lsmctx.len;
                 ptr += sizeof(*fctx);
   
                 strcpy(ptr, name);
                 ptr += namelen;
   
- -              memcpy(ptr, ctx, ctxlen);
+ +              memcpy(ptr, lsmctx.context, lsmctx.len);
         }
         ext->size = total_len;
         ext->value = header;
         err = 0;
   out_err:
- -      kfree(ctx);
+ +      if (nr_ctx)
+ +              security_release_secctx(&lsmctx);
         return err;
   }
   
@@@ -930,12 -926,11 +928,12 @@@ static int fuse_symlink(struct mnt_idma
         FUSE_ARGS(args);
   
         args.opcode = FUSE_SYMLINK;
- -      args.in_numargs = 2;
- -      args.in_args[0].size = entry->d_name.len + 1;
- -      args.in_args[0].value = entry->d_name.name;
- -      args.in_args[1].size = len;
- -      args.in_args[1].value = link;
+ +      args.in_numargs = 3;
+ +      fuse_set_zero_arg0(&args);
+ +      args.in_args[1].size = entry->d_name.len + 1;
+ +      args.in_args[1].value = entry->d_name.name;
+ +      args.in_args[2].size = len;
+ +      args.in_args[2].value = link;
         return create_new_entry(idmap, fm, &args, dir, entry, S_IFLNK);
   }
   
@@@ -995,10 -990,9 +993,10 @@@ static int fuse_unlink(struct inode *di
   
         args.opcode = FUSE_UNLINK;
         args.nodeid = get_node_id(dir);
- -      args.in_numargs = 1;
- -      args.in_args[0].size = entry->d_name.len + 1;
- -      args.in_args[0].value = entry->d_name.name;
+ +      args.in_numargs = 2;
+ +      fuse_set_zero_arg0(&args);
+ +      args.in_args[1].size = entry->d_name.len + 1;
+ +      args.in_args[1].value = entry->d_name.name;
         err = fuse_simple_request(fm, &args);
         if (!err) {
                 fuse_dir_changed(dir);
@@@ -1019,10 -1013,9 +1017,10 @@@ static int fuse_rmdir(struct inode *dir
   
         args.opcode = FUSE_RMDIR;
         args.nodeid = get_node_id(dir);
- -      args.in_numargs = 1;
- -      args.in_args[0].size = entry->d_name.len + 1;
- -      args.in_args[0].value = entry->d_name.name;
+ +      args.in_numargs = 2;
+ +      fuse_set_zero_arg0(&args);
+ +      args.in_args[1].size = entry->d_name.len + 1;
+ +      args.in_args[1].value = entry->d_name.name;
         err = fuse_simple_request(fm, &args);
         if (!err) {
                 fuse_dir_changed(dir);
@@@ -1686,8 -1679,6 +1684,8 @@@ static int fuse_dir_open(struct inode *
                  */
                 if (ff->open_flags & (FOPEN_STREAM | FOPEN_NONSEEKABLE))
                         nonseekable_open(inode, file);
+ +              if (!(ff->open_flags & FOPEN_KEEP_CACHE))
+ +                      invalidate_inode_pages2(inode->i_mapping);
         }
   
         return err;
diff --combined fs/libfs.c

index 5b6120b19e996d2f1a915596787f0671ed7b349e,3ad1b1b7fed6e7433ac9997d7f34429d58f97b79..8444f5cc406415d4fdba45dc059a9d19d13ab881
--- 1/fs/libfs.c
--- 2/fs/libfs.c
+++ b/fs/libfs.c
@@@ -245,16 -245,9 +245,16 @@@ const struct inode_operations simple_di
   };
   EXPORT_SYMBOL(simple_dir_inode_operations);
   
- -/* 0 is '.', 1 is '..', so always start with offset 2 or more */
+ +/* simple_offset_add() never assigns these to a dentry */
   enum {
- -      DIR_OFFSET_MIN  = 2,
+ +      DIR_OFFSET_FIRST        = 2,            /* Find first real entry */
+ +      DIR_OFFSET_EOD          = S32_MAX,
+ +};
+ +
+ +/* simple_offset_add() allocation range */
+ +enum {
+ +      DIR_OFFSET_MIN          = DIR_OFFSET_FIRST + 1,
+ +      DIR_OFFSET_MAX          = DIR_OFFSET_EOD - 1,
   };
   
   static void offset_set(struct dentry *dentry, long offset)
@@@ -298,10 -291,9 +298,10 @@@ int simple_offset_add(struct offset_ct
                 return -EBUSY;
   
         ret = mtree_alloc_cyclic(&octx->mt, &offset, dentry, DIR_OFFSET_MIN,
- -                               LONG_MAX, &octx->next_offset, GFP_KERNEL);
- -      if (ret < 0)
- -              return ret;
+ +                               DIR_OFFSET_MAX, &octx->next_offset,
+ +                               GFP_KERNEL);
+ +      if (unlikely(ret < 0))
+ +              return ret == -EBUSY ? -ENOSPC : ret;
   
         offset_set(dentry, offset);
         return 0;
@@@ -337,6 -329,38 +337,6 @@@ void simple_offset_remove(struct offset
         offset_set(dentry, 0);
   }
   
- -/**
- - * simple_offset_empty - Check if a dentry can be unlinked
- - * @dentry: dentry to be tested
- - *
- - * Returns 0 if @dentry is a non-empty directory; otherwise returns 1.
- - */
- -int simple_offset_empty(struct dentry *dentry)
- -{
- -      struct inode *inode = d_inode(dentry);
- -      struct offset_ctx *octx;
- -      struct dentry *child;
- -      unsigned long index;
- -      int ret = 1;
- -
- -      if (!inode || !S_ISDIR(inode->i_mode))
- -              return ret;
- -
- -      index = DIR_OFFSET_MIN;
- -      octx = inode->i_op->get_offset_ctx(inode);
- -      mt_for_each(&octx->mt, child, index, LONG_MAX) {
- -              spin_lock(&child->d_lock);
- -              if (simple_positive(child)) {
- -                      spin_unlock(&child->d_lock);
- -                      ret = 0;
- -                      break;
- -              }
- -              spin_unlock(&child->d_lock);
- -      }
- -
- -      return ret;
- -}
- -
   /**
    * simple_offset_rename - handle directory offsets for rename
    * @old_dir: parent directory of source entry
@@@ -430,6 -454,14 +430,6 @@@ void simple_offset_destroy(struct offse
         mtree_destroy(&octx->mt);
   }
   
- -static int offset_dir_open(struct inode *inode, struct file *file)
- -{
- -      struct offset_ctx *ctx = inode->i_op->get_offset_ctx(inode);
- -
- -      file->private_data = (void *)ctx->next_offset;
- -      return 0;
- -}
- -
   /**
    * offset_dir_llseek - Advance the read position of a directory descriptor
    * @file: an open directory whose position is to be updated
@@@ -443,6 -475,9 +443,6 @@@
    */
   static loff_t offset_dir_llseek(struct file *file, loff_t offset, int whence)
   {
- -      struct inode *inode = file->f_inode;
- -      struct offset_ctx *ctx = inode->i_op->get_offset_ctx(inode);
- -
         switch (whence) {
         case SEEK_CUR:
                 offset += file->f_pos;
@@@ -455,89 -490,62 +455,89 @@@
                 return -EINVAL;
         }
   
- -      /* In this case, ->private_data is protected by f_pos_lock */
- -      if (!offset)
- -              file->private_data = (void *)ctx->next_offset;
         return vfs_setpos(file, offset, LONG_MAX);
   }
   
- -static struct dentry *offset_find_next(struct offset_ctx *octx, loff_t offset)
+ +static struct dentry *find_positive_dentry(struct dentry *parent,
+ +                                         struct dentry *dentry,
+ +                                         bool next)
   {
- -      MA_STATE(mas, &octx->mt, offset, offset);
+ +      struct dentry *found = NULL;
+ +
+ +      spin_lock(&parent->d_lock);
+ +      if (next)
+ +              dentry = d_next_sibling(dentry);
+ +      else if (!dentry)
+ +              dentry = d_first_child(parent);
+ +      hlist_for_each_entry_from(dentry, d_sib) {
+ +              if (!simple_positive(dentry))
+ +                      continue;
+ +              spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
+ +              if (simple_positive(dentry))
+ +                      found = dget_dlock(dentry);
+ +              spin_unlock(&dentry->d_lock);
+ +              if (likely(found))
+ +                      break;
+ +      }
+ +      spin_unlock(&parent->d_lock);
+ +      return found;
+ +}
+ +
+ +static noinline_for_stack struct dentry *
+ +offset_dir_lookup(struct dentry *parent, loff_t offset)
+ +{
+ +      struct inode *inode = d_inode(parent);
+ +      struct offset_ctx *octx = inode->i_op->get_offset_ctx(inode);
         struct dentry *child, *found = NULL;
   
- -      rcu_read_lock();
- -      child = mas_find(&mas, LONG_MAX);
- -      if (!child)
- -              goto out;
- -      spin_lock(&child->d_lock);
- -      if (simple_positive(child))
- -              found = dget_dlock(child);
- -      spin_unlock(&child->d_lock);
- -out:
- -      rcu_read_unlock();
+ +      MA_STATE(mas, &octx->mt, offset, offset);
+ +
+ +      if (offset == DIR_OFFSET_FIRST)
+ +              found = find_positive_dentry(parent, NULL, false);
+ +      else {
+ +              rcu_read_lock();
+ +              child = mas_find(&mas, DIR_OFFSET_MAX);
+ +              found = find_positive_dentry(parent, child, false);
+ +              rcu_read_unlock();
+ +      }
         return found;
   }
   
   static bool offset_dir_emit(struct dir_context *ctx, struct dentry *dentry)
   {
         struct inode *inode = d_inode(dentry);
- -      long offset = dentry2offset(dentry);
   
- -      return ctx->actor(ctx, dentry->d_name.name, dentry->d_name.len, offset,
- -                        inode->i_ino, fs_umode_to_dtype(inode->i_mode));
+ +      return dir_emit(ctx, dentry->d_name.name, dentry->d_name.len,
+ +                      inode->i_ino, fs_umode_to_dtype(inode->i_mode));
   }
   
- -static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx, long last_index)
+ +static void offset_iterate_dir(struct file *file, struct dir_context *ctx)
   {
- -      struct offset_ctx *octx = inode->i_op->get_offset_ctx(inode);
+ +      struct dentry *dir = file->f_path.dentry;
         struct dentry *dentry;
   
+ +      dentry = offset_dir_lookup(dir, ctx->pos);
+ +      if (!dentry)
+ +              goto out_eod;
         while (true) {
- -              dentry = offset_find_next(octx, ctx->pos);
- -              if (!dentry)
- -                      return;
- -
- -              if (dentry2offset(dentry) >= last_index) {
- -                      dput(dentry);
- -                      return;
- -              }
+ +              struct dentry *next;
   
- -              if (!offset_dir_emit(ctx, dentry)) {
- -                      dput(dentry);
- -                      return;
- -              }
+ +              ctx->pos = dentry2offset(dentry);
+ +              if (!offset_dir_emit(ctx, dentry))
+ +                      break;
   
- -              ctx->pos = dentry2offset(dentry) + 1;
+ +              next = find_positive_dentry(dir, dentry, true);
                 dput(dentry);
+ +
+ +              if (!next)
+ +                      goto out_eod;
+ +              dentry = next;
         }
+ +      dput(dentry);
+ +      return;
+ +
+ +out_eod:
+ +      ctx->pos = DIR_OFFSET_EOD;
   }
   
   /**
@@@ -557,8 -565,6 +557,8 @@@
    *
    * On return, @ctx->pos contains an offset that will read the next entry
    * in this directory when offset_readdir() is called again with @ctx.
+ + * Caller places this value in the d_off field of the last entry in the
+ + * user's buffer.
    *
    * Return values:
    *   %0 - Complete
@@@ -566,17 -572,19 +566,17 @@@
   static int offset_readdir(struct file *file, struct dir_context *ctx)
   {
         struct dentry *dir = file->f_path.dentry;
- -      long last_index = (long)file->private_data;
   
         lockdep_assert_held(&d_inode(dir)->i_rwsem);
   
         if (!dir_emit_dots(file, ctx))
                 return 0;
- -
- -      offset_iterate_dir(d_inode(dir), ctx, last_index);
+ +      if (ctx->pos != DIR_OFFSET_EOD)
+ +              offset_iterate_dir(file, ctx);
         return 0;
   }
   
   const struct file_operations simple_offset_dir_operations = {
- -      .open           = offset_dir_open,
         .llseek         = offset_dir_llseek,
         .iterate_shared = offset_readdir,
         .read           = generic_read_dir,
@@@ -665,7 -673,6 +665,7 @@@ static int pseudo_fs_fill_super(struct 
         s->s_blocksize_bits = PAGE_SHIFT;
         s->s_magic = ctx->magic;
         s->s_op = ctx->ops ?: &simple_super_operations;
+ +      s->s_export_op = ctx->eops;
         s->s_xattr = ctx->xattr;
         s->s_time_gran = 1;
         root = new_inode(s);
@@@ -1782,7 -1789,7 +1782,7 @@@ int generic_ci_d_compare(const struct d
   {
         const struct dentry *parent;
         const struct inode *dir;
-       char strbuf[DNAME_INLINE_LEN];
+       union shortname_store strbuf;
         struct qstr qstr;
   
         /*
@@@ -1802,22 -1809,23 +1802,23 @@@
         if (!dir || !IS_CASEFOLDED(dir))
                 return 1;
   
+       qstr.len = len;
+       qstr.name = str;
         /*
          * If the dentry name is stored in-line, then it may be concurrently
          * modified by a rename.  If this happens, the VFS will eventually retry
          * the lookup, so it doesn't matter what ->d_compare() returns.
          * However, it's unsafe to call utf8_strncasecmp() with an unstable
          * string.  Therefore, we have to copy the name into a temporary buffer.
+        * As above, len is guaranteed to match str, so the shortname case
+        * is exactly when str points to ->d_shortname.
          */
-       if (len <= DNAME_INLINE_LEN - 1) {
-               memcpy(strbuf, str, len);
-               strbuf[len] = 0;
-               str = strbuf;
+       if (qstr.name == dentry->d_shortname.string) {
+               strbuf = dentry->d_shortname; // NUL is guaranteed to be in there
+               qstr.name = strbuf.string;
                 /* prevent compiler from optimizing out the temporary buffer */
                 barrier();
         }
-       qstr.len = len;
-       qstr.name = str;
   
         return utf8_strncasecmp(dentry->d_sb->s_encoding, name, &qstr);
   }
diff --combined fs/namei.c

index 8c82afddd2ad80444e974caa5bfe80c34e258b64,77e5d136faaf1e72b56a6902b5b81f0aac2ffe87..3ab9440c5b9313bc4751ab97a1ae73194409188a
--- 1/fs/namei.c
--- 2/fs/namei.c
+++ b/fs/namei.c
@@@ -921,10 -921,11 +921,11 @@@ out_dput
         return false;
   }
   
- static inline int d_revalidate(struct dentry *dentry, unsigned int flags)
+ static inline int d_revalidate(struct inode *dir, const struct qstr *name,
+                              struct dentry *dentry, unsigned int flags)
   {
         if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE))
-               return dentry->d_op->d_revalidate(dentry, flags);
+               return dentry->d_op->d_revalidate(dir, name, dentry, flags);
         else
                 return 1;
   }
@@@ -1099,7 -1100,7 +1100,7 @@@ static int sysctl_protected_fifos __rea
   static int sysctl_protected_regular __read_mostly;
   
   #ifdef CONFIG_SYSCTL
- -static struct ctl_table namei_sysctls[] = {
+ +static const struct ctl_table namei_sysctls[] = {
         {
                 .procname       = "protected_symlinks",
                 .data           = &sysctl_protected_symlinks,
@@@ -1652,7 -1653,7 +1653,7 @@@ static struct dentry *lookup_dcache(con
   {
         struct dentry *dentry = d_lookup(dir, name);
         if (dentry) {
-               int error = d_revalidate(dentry, flags);
+               int error = d_revalidate(dir->d_inode, name, dentry, flags);
                 if (unlikely(error <= 0)) {
                         if (!error)
                                 d_invalidate(dentry);
@@@ -1737,19 -1738,20 +1738,20 @@@ static struct dentry *lookup_fast(struc
                 if (read_seqcount_retry(&parent->d_seq, nd->seq))
                         return ERR_PTR(-ECHILD);
   
-               status = d_revalidate(dentry, nd->flags);
+               status = d_revalidate(nd->inode, &nd->last, dentry, nd->flags);
                 if (likely(status > 0))
                         return dentry;
                 if (!try_to_unlazy_next(nd, dentry))
                         return ERR_PTR(-ECHILD);
                 if (status == -ECHILD)
                         /* we'd been told to redo it in non-rcu mode */
-                       status = d_revalidate(dentry, nd->flags);
+                       status = d_revalidate(nd->inode, &nd->last,
+                                             dentry, nd->flags);
         } else {
                 dentry = __d_lookup(parent, &nd->last);
                 if (unlikely(!dentry))
                         return NULL;
-               status = d_revalidate(dentry, nd->flags);
+               status = d_revalidate(nd->inode, &nd->last, dentry, nd->flags);
         }
         if (unlikely(status <= 0)) {
                 if (!status)
@@@ -1777,7 -1779,7 +1779,7 @@@ again
         if (IS_ERR(dentry))
                 return dentry;
         if (unlikely(!d_in_lookup(dentry))) {
-               int error = d_revalidate(dentry, flags);
+               int error = d_revalidate(inode, name, dentry, flags);
                 if (unlikely(error <= 0)) {
                         if (!error) {
                                 d_invalidate(dentry);
@@@ -3575,7 -3577,7 +3577,7 @@@ static struct dentry *lookup_open(struc
                 if (d_in_lookup(dentry))
                         break;
   
-               error = d_revalidate(dentry, nd->flags);
+               error = d_revalidate(dir_inode, &nd->last, dentry, nd->flags);
                 if (likely(error > 0))
                         break;
                 if (error)
@@@ -5272,16 -5274,19 +5274,16 @@@ SYSCALL_DEFINE2(rename, const char __us
                                 getname(newname), 0);
   }
   
- -int readlink_copy(char __user *buffer, int buflen, const char *link)
+ +int readlink_copy(char __user *buffer, int buflen, const char *link, int linklen)
   {
- -      int len = PTR_ERR(link);
- -      if (IS_ERR(link))
- -              goto out;
+ +      int copylen;
   
- -      len = strlen(link);
- -      if (len > (unsigned) buflen)
- -              len = buflen;
- -      if (copy_to_user(buffer, link, len))
- -              len = -EFAULT;
- -out:
- -      return len;
+ +      copylen = linklen;
+ +      if (unlikely(copylen > (unsigned) buflen))
+ +              copylen = buflen;
+ +      if (copy_to_user(buffer, link, copylen))
+ +              copylen = -EFAULT;
+ +      return copylen;
   }
   
   /**
@@@ -5301,9 -5306,6 +5303,9 @@@ int vfs_readlink(struct dentry *dentry
         const char *link;
         int res;
   
+ +      if (inode->i_opflags & IOP_CACHED_LINK)
+ +              return readlink_copy(buffer, buflen, inode->i_link, inode->i_linklen);
+ +
         if (unlikely(!(inode->i_opflags & IOP_DEFAULT_READLINK))) {
                 if (unlikely(inode->i_op->readlink))
                         return inode->i_op->readlink(dentry, buffer, buflen);
@@@ -5322,7 -5324,7 +5324,7 @@@
                 if (IS_ERR(link))
                         return PTR_ERR(link);
         }
- -      res = readlink_copy(buffer, buflen, link);
+ +      res = readlink_copy(buffer, buflen, link, strlen(link));
         do_delayed_call(&done);
         return res;
   }
@@@ -5391,14 -5393,10 +5393,14 @@@ EXPORT_SYMBOL(page_put_link)
   
   int page_readlink(struct dentry *dentry, char __user *buffer, int buflen)
   {
+ +      const char *link;
+ +      int res;
+ +
         DEFINE_DELAYED_CALL(done);
- -      int res = readlink_copy(buffer, buflen,
- -                              page_get_link(dentry, d_inode(dentry),
- -                                            &done));
+ +      link = page_get_link(dentry, d_inode(dentry), &done);
+ +      res = PTR_ERR(link);
+ +      if (!IS_ERR(link))
+ +              res = readlink_copy(buffer, buflen, link, strlen(link));
         do_delayed_call(&done);
         return res;
   }
diff --combined fs/nfs/nfs3proc.c

index 7359e1a3bd84c12231e652e3e70dff7b13342363,ce70768e0201a0e2c058798533b436c2496f4a23..0c3bc98cd999cc39a0a883102a83a8b35b57c30d
--- 1/fs/nfs/nfs3proc.c
--- 2/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@@ -192,7 -192,7 +192,7 @@@ __nfs3_proc_lookup(struct inode *dir, c
   }
   
   static int
- nfs3_proc_lookup(struct inode *dir, struct dentry *dentry,
+ nfs3_proc_lookup(struct inode *dir, struct dentry *dentry, const struct qstr *name,
                  struct nfs_fh *fhandle, struct nfs_fattr *fattr)
   {
         unsigned short task_flags = 0;
@@@ -202,8 -202,7 +202,7 @@@
                 task_flags |= RPC_TASK_TIMEOUT;
   
         dprintk("NFS call  lookup %pd2\n", dentry);
-       return __nfs3_proc_lookup(dir, dentry->d_name.name,
-                                 dentry->d_name.len, fhandle, fattr,
+       return __nfs3_proc_lookup(dir, name->name, name->len, fhandle, fattr,
                                   task_flags);
   }
   
@@@ -844,41 -843,6 +843,41 @@@ nfs3_proc_pathconf(struct nfs_server *s
         return status;
   }
   
+ +#if IS_ENABLED(CONFIG_NFS_LOCALIO)
+ +
+ +static unsigned nfs3_localio_probe_throttle __read_mostly = 0;
+ +module_param(nfs3_localio_probe_throttle, uint, 0644);
+ +MODULE_PARM_DESC(nfs3_localio_probe_throttle,
+ +               "Probe for NFSv3 LOCALIO every N IO requests. Must be power-of-2, defaults to 0 (probing disabled).");
+ +
+ +static void nfs3_localio_probe(struct nfs_server *server)
+ +{
+ +      struct nfs_client *clp = server->nfs_client;
+ +
+ +      /* Throttled to reduce nfs_local_probe_async() frequency */
+ +      if (!nfs3_localio_probe_throttle || nfs_server_is_local(clp))
+ +              return;
+ +
+ +      /*
+ +       * Try (re)enabling LOCALIO if isn't enabled -- admin deems
+ +       * it worthwhile to periodically check if LOCALIO possible by
+ +       * setting the 'nfs3_localio_probe_throttle' module parameter.
+ +       *
+ +       * This is useful if LOCALIO was previously enabled, but was
+ +       * disabled due to server restart, and IO has successfully
+ +       * completed in terms of normal RPC.
+ +       */
+ +      if ((clp->cl_uuid.nfs3_localio_probe_count++ &
+ +           (nfs3_localio_probe_throttle - 1)) == 0) {
+ +              if (!nfs_server_is_local(clp))
+ +                      nfs_local_probe_async(clp);
+ +      }
+ +}
+ +
+ +#else
+ +static void nfs3_localio_probe(struct nfs_server *server) {}
+ +#endif
+ +
   static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
   {
         struct inode *inode = hdr->inode;
@@@ -890,11 -854,8 +889,11 @@@
         if (nfs3_async_handle_jukebox(task, inode))
                 return -EAGAIN;
   
- -      if (task->tk_status >= 0 && !server->read_hdrsize)
- -              cmpxchg(&server->read_hdrsize, 0, hdr->res.replen);
+ +      if (task->tk_status >= 0) {
+ +              if (!server->read_hdrsize)
+ +                      cmpxchg(&server->read_hdrsize, 0, hdr->res.replen);
+ +              nfs3_localio_probe(server);
+ +      }
   
         nfs_invalidate_atime(inode);
         nfs_refresh_inode(inode, &hdr->fattr);
@@@ -924,10 -885,8 +923,10 @@@ static int nfs3_write_done(struct rpc_t
   
         if (nfs3_async_handle_jukebox(task, inode))
                 return -EAGAIN;
- -      if (task->tk_status >= 0)
+ +      if (task->tk_status >= 0) {
                 nfs_writeback_update_inode(hdr);
+ +              nfs3_localio_probe(NFS_SERVER(inode));
+ +      }
         return 0;
   }
   
diff --combined fs/nfs/nfs4proc.c

index d615d520f8cf1f65857a54647e871c7d90e2a2bd,4d85068e820d77a44168f8c573bfbb162e2851c4..df9669d4ded7f593c505ff33b10dffbd47b59b77
--- 1/fs/nfs/nfs4proc.c
--- 2/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@@ -114,7 -114,6 +114,7 @@@ static inline struct nfs4_label 
   nfs4_label_init_security(struct inode *dir, struct dentry *dentry,
         struct iattr *sattr, struct nfs4_label *label)
   {
+ +      struct lsm_context shim;
         int err;
   
         if (label == NULL)
@@@ -129,25 -128,18 +129,25 @@@
         label->label = NULL;
   
         err = security_dentry_init_security(dentry, sattr->ia_mode,
- -                              &dentry->d_name, NULL,
- -                              (void **)&label->label, &label->len);
- -      if (err == 0)
- -              return label;
+ +                              &dentry->d_name, NULL, &shim);
+ +      if (err)
+ +              return NULL;
   
- -      return NULL;
+ +      label->label = shim.context;
+ +      label->len = shim.len;
+ +      return label;
   }
   static inline void
   nfs4_label_release_security(struct nfs4_label *label)
   {
- -      if (label)
- -              security_release_secctx(label->label, label->len);
+ +      struct lsm_context shim;
+ +
+ +      if (label) {
+ +              shim.context = label->label;
+ +              shim.len = label->len;
+ +              shim.id = LSM_ID_UNDEF;
+ +              security_release_secctx(&shim);
+ +      }
   }
   static inline u32 *nfs4_bitmask(struct nfs_server *server, struct nfs4_label *label)
   {
@@@ -4544,15 -4536,15 +4544,15 @@@ nfs4_proc_setattr(struct dentry *dentry
   }
   
   static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir,
-               struct dentry *dentry, struct nfs_fh *fhandle,
-               struct nfs_fattr *fattr)
+               struct dentry *dentry, const struct qstr *name,
+               struct nfs_fh *fhandle, struct nfs_fattr *fattr)
   {
         struct nfs_server *server = NFS_SERVER(dir);
         int                    status;
         struct nfs4_lookup_arg args = {
                 .bitmask = server->attr_bitmask,
                 .dir_fh = NFS_FH(dir),
-               .name = &dentry->d_name,
+               .name = name,
         };
         struct nfs4_lookup_res res = {
                 .server = server,
@@@ -4594,17 -4586,16 +4594,16 @@@ static void nfs_fixup_secinfo_attribute
   }
   
   static int nfs4_proc_lookup_common(struct rpc_clnt **clnt, struct inode *dir,
-                                  struct dentry *dentry, struct nfs_fh *fhandle,
-                                  struct nfs_fattr *fattr)
+                                  struct dentry *dentry, const struct qstr *name,
+                                  struct nfs_fh *fhandle, struct nfs_fattr *fattr)
   {
         struct nfs4_exception exception = {
                 .interruptible = true,
         };
         struct rpc_clnt *client = *clnt;
-       const struct qstr *name = &dentry->d_name;
         int err;
         do {
-               err = _nfs4_proc_lookup(client, dir, dentry, fhandle, fattr);
+               err = _nfs4_proc_lookup(client, dir, dentry, name, fhandle, fattr);
                 trace_nfs4_lookup(dir, name, err);
                 switch (err) {
                 case -NFS4ERR_BADNAME:
@@@ -4639,13 -4630,13 +4638,13 @@@ out
         return err;
   }
   
- static int nfs4_proc_lookup(struct inode *dir, struct dentry *dentry,
+ static int nfs4_proc_lookup(struct inode *dir, struct dentry *dentry, const struct qstr *name,
                             struct nfs_fh *fhandle, struct nfs_fattr *fattr)
   {
         int status;
         struct rpc_clnt *client = NFS_CLIENT(dir);
   
-       status = nfs4_proc_lookup_common(&client, dir, dentry, fhandle, fattr);
+       status = nfs4_proc_lookup_common(&client, dir, dentry, name, fhandle, fattr);
         if (client != NFS_CLIENT(dir)) {
                 rpc_shutdown_client(client);
                 nfs_fixup_secinfo_attributes(fattr);
@@@ -4660,7 -4651,8 +4659,8 @@@ nfs4_proc_lookup_mountpoint(struct inod
         struct rpc_clnt *client = NFS_CLIENT(dir);
         int status;
   
-       status = nfs4_proc_lookup_common(&client, dir, dentry, fhandle, fattr);
+       status = nfs4_proc_lookup_common(&client, dir, dentry, &dentry->d_name,
+                                        fhandle, fattr);
         if (status < 0)
                 return ERR_PTR(status);
         return (client == NFS_CLIENT(dir)) ? rpc_clone_client(client) : client;
diff --combined fs/proc/base.c

index a50b222a5917e036a48eb50e9c6a0a6787e58a4b,fb5493d0edf02fe2d13e1550ff94ddcd5af8d08f..cd89e956c322440f35ed75187416f4b247b07f96
--- 1/fs/proc/base.c
--- 2/fs/proc/base.c
+++ b/fs/proc/base.c
@@@ -2058,7 -2058,8 +2058,8 @@@ void pid_update_inode(struct task_struc
    * performed a setuid(), etc.
    *
    */
- static int pid_revalidate(struct dentry *dentry, unsigned int flags)
+ static int pid_revalidate(struct inode *dir, const struct qstr *name,
+                         struct dentry *dentry, unsigned int flags)
   {
         struct inode *inode;
         struct task_struct *task;
@@@ -2191,7 -2192,8 +2192,8 @@@ static int dname_to_vma_addr(struct den
         return 0;
   }
   
- static int map_files_d_revalidate(struct dentry *dentry, unsigned int flags)
+ static int map_files_d_revalidate(struct inode *dir, const struct qstr *name,
+                                 struct dentry *dentry, unsigned int flags)
   {
         unsigned long vm_start, vm_end;
         bool exact_vma_exists = false;
@@@ -3269,7 -3271,6 +3271,7 @@@ static int proc_pid_ksm_stat(struct seq
                                 struct pid *pid, struct task_struct *task)
   {
         struct mm_struct *mm;
+ +      int ret = 0;
   
         mm = get_task_mm(task);
         if (mm) {
@@@ -3277,16 -3278,6 +3279,16 @@@
                 seq_printf(m, "ksm_zero_pages %ld\n", mm_ksm_zero_pages(mm));
                 seq_printf(m, "ksm_merging_pages %lu\n", mm->ksm_merging_pages);
                 seq_printf(m, "ksm_process_profit %ld\n", ksm_process_profit(mm));
+ +              seq_printf(m, "ksm_merge_any: %s\n",
+ +                              test_bit(MMF_VM_MERGE_ANY, &mm->flags) ? "yes" : "no");
+ +              ret = mmap_read_lock_killable(mm);
+ +              if (ret) {
+ +                      mmput(mm);
+ +                      return ret;
+ +              }
+ +              seq_printf(m, "ksm_mergeable: %s\n",
+ +                              ksm_process_mergeable(mm) ? "yes" : "no");
+ +              mmap_read_unlock(mm);
                 mmput(mm);
         }
   
diff --combined fs/smb/client/dir.c

index 1822493dd0842ecaa62ad36792684751d5715914,8c5d44ee91edfff6513020266f6e62c6f3bdbe3e..d1e95632ac54e17d566c9f34d99c3a004563ae96
--- 1/fs/smb/client/dir.c
--- 2/fs/smb/client/dir.c
+++ b/fs/smb/client/dir.c
@@@ -627,7 -627,7 +627,7 @@@ int cifs_mknod(struct mnt_idmap *idmap
                 goto mknod_out;
         }
   
- -      trace_smb3_mknod_enter(xid, tcon->ses->Suid, tcon->tid, full_path);
+ +      trace_smb3_mknod_enter(xid, tcon->tid, tcon->ses->Suid, full_path);
   
         rc = tcon->ses->server->ops->make_node(xid, inode, direntry, tcon,
                                                full_path, mode,
@@@ -635,9 -635,9 +635,9 @@@
   
   mknod_out:
         if (rc)
- -              trace_smb3_mknod_err(xid,  tcon->ses->Suid, tcon->tid, rc);
+ +              trace_smb3_mknod_err(xid,  tcon->tid, tcon->ses->Suid, rc);
         else
- -              trace_smb3_mknod_done(xid, tcon->ses->Suid, tcon->tid);
+ +              trace_smb3_mknod_done(xid, tcon->tid, tcon->ses->Suid);
   
         free_dentry_path(page);
         free_xid(xid);
@@@ -737,7 -737,8 +737,8 @@@ again
   }
   
   static int
- cifs_d_revalidate(struct dentry *direntry, unsigned int flags)
+ cifs_d_revalidate(struct inode *dir, const struct qstr *name,
+                 struct dentry *direntry, unsigned int flags)
   {
         struct inode *inode;
         int rc;
diff --combined include/linux/nfs_xdr.h

index 162b7c0c35557b9dd1530c17e7c9bb27576c8004,08b62bbf59f0a397a00bce464d4cda191d58bf81..9155a6ffc3709e2adceaac0ee03488dc030209eb
--- 1/include/linux/nfs_xdr.h
--- 2/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@@ -1315,6 -1315,11 +1315,6 @@@ struct nfs4_fsid_present_res 
   
   #endif /* CONFIG_NFS_V4 */
   
- -struct nfstime4 {
- -      u64     seconds;
- -      u32     nseconds;
- -};
- -
   #ifdef CONFIG_NFS_V4_1
   
   struct pnfs_commit_bucket {
@@@ -1632,7 -1637,6 +1632,7 @@@ enum 
         NFS_IOHDR_RESEND_PNFS,
         NFS_IOHDR_RESEND_MDS,
         NFS_IOHDR_UNSTABLE_WRITES,
+ +      NFS_IOHDR_ODIRECT,
   };
   
   struct nfs_io_completion;
@@@ -1781,7 -1785,7 +1781,7 @@@ struct nfs_rpc_ops 
                             struct nfs_fattr *, struct inode *);
         int     (*setattr) (struct dentry *, struct nfs_fattr *,
                             struct iattr *);
-       int     (*lookup)  (struct inode *, struct dentry *,
+       int     (*lookup)  (struct inode *, struct dentry *, const struct qstr *,
                             struct nfs_fh *, struct nfs_fattr *);
         int     (*lookupp) (struct inode *, struct nfs_fh *,
                             struct nfs_fattr *);
author	Linus Torvalds <[email protected]>
	Thu, 30 Jan 2025 17:13:35 +0000 (09:13 -0800)
committer	Linus Torvalds <[email protected]>
	Thu, 30 Jan 2025 17:13:35 +0000 (09:13 -0800)
		1	2
Documentation/filesystems/porting.rst	patch \|	diff1 \|	diff2 \|	blob \| history
fs/afs/dir.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ceph/mds_client.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/dcache.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/exfat/namei.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/fuse/dir.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/libfs.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/namei.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/nfs/nfs3proc.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/nfs/nfs4proc.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/proc/base.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/smb/client/dir.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/nfs_xdr.h	patch \|	diff1 \|	diff2 \|	blob \| history