Merge tag 'pull-rename' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

author Linus Torvalds <[email protected]>

Fri, 12 Jan 2024 04:00:22 +0000 (20:00 -0800)

committer Linus Torvalds <[email protected]>

Fri, 12 Jan 2024 04:00:22 +0000 (20:00 -0800)
author Linus Torvalds <[email protected]>
Fri, 12 Jan 2024 04:00:22 +0000 (20:00 -0800)
committer Linus Torvalds <[email protected]>
Fri, 12 Jan 2024 04:00:22 +0000 (20:00 -0800)
diff --combined Documentation/filesystems/locking.rst

index 421daf83794088c1cab620341433a8fbd55789d4,bd12f2f850ad3a863f495543d95de8174bb4a727..d5bf4b6b7509b01c9a2d5225a6bb5b2e1ef327b2
--- 1/Documentation/filesystems/locking.rst
--- 2/Documentation/filesystems/locking.rst
+++ b/Documentation/filesystems/locking.rst
@@@ -101,7 -101,7 +101,7 @@@ symlink:   exclusiv
   mkdir:                exclusive
   unlink:               exclusive (both)
   rmdir:                exclusive (both)(see below)
- rename:               exclusive (all) (see below)
+ rename:               exclusive (both parents, some children) (see below)
   readlink:     no
   get_link:     no
   setattr:      exclusive
@@@ -123,6 -123,9 +123,9 @@@ get_offset_ctx  n
         Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_rwsem
         exclusive on victim.
         cross-directory ->rename() has (per-superblock) ->s_vfs_rename_sem.
+       ->unlink() and ->rename() have ->i_rwsem exclusive on all non-directories
+       involved.
+       ->rename() has ->i_rwsem exclusive on any subdirectory that changes parent.
   
   See Documentation/filesystems/directory-locking.rst for more detailed discussion
   of the locking scheme for directory operations.
@@@ -261,7 -264,7 +264,7 @@@ prototypes:
                         struct folio *src, enum migrate_mode);
         int (*launder_folio)(struct folio *);
         bool (*is_partially_uptodate)(struct folio *, size_t from, size_t count);
- -      int (*error_remove_page)(struct address_space *, struct page *);
+ +      int (*error_remove_folio)(struct address_space *, struct folio *);
         int (*swap_activate)(struct swap_info_struct *sis, struct file *f, sector_t *span)
         int (*swap_deactivate)(struct file *);
         int (*swap_rw)(struct kiocb *iocb, struct iov_iter *iter);
@@@ -287,7 -290,7 +290,7 @@@ direct_IO
   migrate_folio:                yes (both)
   launder_folio:                yes
   is_partially_uptodate:        yes
- -error_remove_page:    yes
+ +error_remove_folio:   yes
   swap_activate:                no
   swap_deactivate:      no
   swap_rw:              yes, unlocks
diff --combined Documentation/filesystems/porting.rst

index ced3a6761329340d29e3ce05ce8e4f47b253d986,33cd56e2ca1a50622ee8c967d5d880a979f09930..c549fb2fc3ba7adb85edbdb573f2b801215372b1
--- 1/Documentation/filesystems/porting.rst
--- 2/Documentation/filesystems/porting.rst
+++ b/Documentation/filesystems/porting.rst
@@@ -1064,12 -1064,27 +1064,39 @@@ generic_encode_ino32_fh() explicitly
   
   ---
   
+ **mandatory**
+ 
+ If ->rename() update of .. on cross-directory move needs an exclusion with
+ directory modifications, do *not* lock the subdirectory in question in your
+ ->rename() - it's done by the caller now [that item should've been added in
+ 28eceeda130f "fs: Lock moved directories"].
+ 
+ ---
+ 
+ **mandatory**
+ 
+ On same-directory ->rename() the (tautological) update of .. is not protected
+ by any locks; just don't do it if the old parent is the same as the new one.
+ We really can't lock two subdirectories in same-directory rename - not without
+ deadlocks.
+ 
+ ---
+ 
+ **mandatory**
+ 
+ lock_rename() and lock_rename_child() may fail in cross-directory case, if
+ their arguments do not have a common ancestor.  In that case ERR_PTR(-EXDEV)
+ is returned, with no locks taken.  In-tree users updated; out-of-tree ones
+ would need to do so.
++
++---
++
+ +**recommended**
+ +
+ +Block device freezing and thawing have been moved to holder operations.
+ +
+ +Before this change, get_active_super() would only be able to find the
+ +superblock of the main block device, i.e., the one stored in sb->s_bdev. Block
+ +device freezing now works for any block device owned by a given superblock, not
+ +just the main block device. The get_active_super() helper and bd_fsfreeze_sb
+ +pointer are gone.
diff --combined fs/ecryptfs/inode.c

index d7193687b9b4cfec18e8232c64e79583bb4ef119,8efd20dc902bf39875507e22bd94de5c25c4ed83..5ed1e4cf6c0b4ecbf4efd55169374a3d4fca3e87
--- 1/fs/ecryptfs/inode.c
--- 2/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@@ -78,14 -78,6 +78,14 @@@ static struct inode *__ecryptfs_get_ino
   
         if (lower_inode->i_sb != ecryptfs_superblock_to_lower(sb))
                 return ERR_PTR(-EXDEV);
+ +
+ +      /* Reject dealing with casefold directories. */
+ +      if (IS_CASEFOLDED(lower_inode)) {
+ +              pr_err_ratelimited("%s: Can't handle casefolded directory.\n",
+ +                                 __func__);
+ +              return ERR_PTR(-EREMOTE);
+ +      }
+ +
         if (!igrab(lower_inode))
                 return ERR_PTR(-ESTALE);
         inode = iget5_locked(sb, (unsigned long)lower_inode,
@@@ -607,6 -599,8 +607,8 @@@ ecryptfs_rename(struct mnt_idmap *idmap
         target_inode = d_inode(new_dentry);
   
         trap = lock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
+       if (IS_ERR(trap))
+               return PTR_ERR(trap);
         dget(lower_new_dentry);
         rc = -EINVAL;
         if (lower_old_dentry->d_parent != lower_old_dir_dentry)
@@@ -1006,14 -1000,6 +1008,14 @@@ static int ecryptfs_getattr_link(struc
         return rc;
   }
   
+ +static int ecryptfs_do_getattr(const struct path *path, struct kstat *stat,
+ +                             u32 request_mask, unsigned int flags)
+ +{
+ +      if (flags & AT_GETATTR_NOSEC)
+ +              return vfs_getattr_nosec(path, stat, request_mask, flags);
+ +      return vfs_getattr(path, stat, request_mask, flags);
+ +}
+ +
   static int ecryptfs_getattr(struct mnt_idmap *idmap,
                             const struct path *path, struct kstat *stat,
                             u32 request_mask, unsigned int flags)
@@@ -1022,8 -1008,8 +1024,8 @@@
         struct kstat lower_stat;
         int rc;
   
- -      rc = vfs_getattr(ecryptfs_dentry_to_lower_path(dentry), &lower_stat,
- -                       request_mask, flags);
+ +      rc = ecryptfs_do_getattr(ecryptfs_dentry_to_lower_path(dentry),
+ +                               &lower_stat, request_mask, flags);
         if (!rc) {
                 fsstack_copy_attr_all(d_inode(dentry),
                                       ecryptfs_inode_to_lower(d_inode(dentry)));
diff --combined fs/inode.c

index d23362a671ddae34101ea5d112274ae951de6a8a,453d5be1a014daed203d1f069244d5ab611ebe3f..91048c4c9c9e7d1079d375afe64383c74f0c3c81
--- 1/fs/inode.c
--- 2/fs/inode.c
+++ b/fs/inode.c
@@@ -129,6 -129,7 +129,6 @@@ static struct ctl_table inodes_sysctls[
                 .mode           = 0444,
                 .proc_handler   = proc_nr_inodes,
         },
- -      { }
   };
   
   static int __init init_fs_inode_sysctls(void)
@@@ -208,14 -209,12 +208,14 @@@ int inode_init_always(struct super_bloc
         atomic_set(&mapping->nr_thps, 0);
   #endif
         mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE);
- -      mapping->private_data = NULL;
+ +      mapping->i_private_data = NULL;
         mapping->writeback_index = 0;
         init_rwsem(&mapping->invalidate_lock);
         lockdep_set_class_and_name(&mapping->invalidate_lock,
                                    &sb->s_type->invalidate_lock_key,
                                    "mapping.invalidate_lock");
+ +      if (sb->s_iflags & SB_I_STABLE_WRITES)
+ +              mapping_set_stable_writes(mapping);
         inode->i_private = NULL;
         inode->i_mapping = mapping;
         INIT_HLIST_HEAD(&inode->i_dentry);      /* buggered by rcu freeing */
@@@ -397,8 -396,8 +397,8 @@@ static void __address_space_init_once(s
   {
         xa_init_flags(&mapping->i_pages, XA_FLAGS_LOCK_IRQ | XA_FLAGS_ACCOUNT);
         init_rwsem(&mapping->i_mmap_rwsem);
- -      INIT_LIST_HEAD(&mapping->private_list);
- -      spin_lock_init(&mapping->private_lock);
+ +      INIT_LIST_HEAD(&mapping->i_private_list);
+ +      spin_lock_init(&mapping->i_private_lock);
         mapping->i_mmap = RB_ROOT_CACHED;
   }
   
@@@ -463,7 -462,7 +463,7 @@@ static void __inode_add_lru(struct inod
         if (!mapping_shrinkable(&inode->i_data))
                 return;
   
- -      if (list_lru_add(&inode->i_sb->s_inode_lru, &inode->i_lru))
+ +      if (list_lru_add_obj(&inode->i_sb->s_inode_lru, &inode->i_lru))
                 this_cpu_inc(nr_unused);
         else if (rotate)
                 inode->i_state |= I_REFERENCED;
@@@ -481,7 -480,7 +481,7 @@@ void inode_add_lru(struct inode *inode
   
   static void inode_lru_list_del(struct inode *inode)
   {
- -      if (list_lru_del(&inode->i_sb->s_inode_lru, &inode->i_lru))
+ +      if (list_lru_del_obj(&inode->i_sb->s_inode_lru, &inode->i_lru))
                 this_cpu_dec(nr_unused);
   }
   
@@@ -619,7 -618,7 +619,7 @@@ void clear_inode(struct inode *inode
          * nor even WARN_ON(!mapping_empty).
          */
         xa_unlock_irq(&inode->i_data.i_pages);
- -      BUG_ON(!list_empty(&inode->i_data.private_list));
+ +      BUG_ON(!list_empty(&inode->i_data.i_private_list));
         BUG_ON(!(inode->i_state & I_FREEING));
         BUG_ON(inode->i_state & I_CLEAR);
         BUG_ON(!list_empty(&inode->i_wb_list));
@@@ -1088,48 -1087,6 +1088,6 @@@ void discard_new_inode(struct inode *in
   }
   EXPORT_SYMBOL(discard_new_inode);
   
- /**
-  * lock_two_inodes - lock two inodes (may be regular files but also dirs)
-  *
-  * Lock any non-NULL argument. The caller must make sure that if he is passing
-  * in two directories, one is not ancestor of the other.  Zero, one or two
-  * objects may be locked by this function.
-  *
-  * @inode1: first inode to lock
-  * @inode2: second inode to lock
-  * @subclass1: inode lock subclass for the first lock obtained
-  * @subclass2: inode lock subclass for the second lock obtained
-  */
- void lock_two_inodes(struct inode *inode1, struct inode *inode2,
-                    unsigned subclass1, unsigned subclass2)
- {
-       if (!inode1 || !inode2) {
-               /*
-                * Make sure @subclass1 will be used for the acquired lock.
-                * This is not strictly necessary (no current caller cares) but
-                * let's keep things consistent.
-                */
-               if (!inode1)
-                       swap(inode1, inode2);
-               goto lock;
-       }
- 
-       /*
-        * If one object is directory and the other is not, we must make sure
-        * to lock directory first as the other object may be its child.
-        */
-       if (S_ISDIR(inode2->i_mode) == S_ISDIR(inode1->i_mode)) {
-               if (inode1 > inode2)
-                       swap(inode1, inode2);
-       } else if (!S_ISDIR(inode1->i_mode))
-               swap(inode1, inode2);
- lock:
-       if (inode1)
-               inode_lock_nested(inode1, subclass1);
-       if (inode2 && inode2 != inode1)
-               inode_lock_nested(inode2, subclass2);
- }
- 
   /**
    * lock_two_nondirectories - take two i_mutexes on non-directory objects
    *
@@@ -1145,7 -1102,12 +1103,12 @@@ void lock_two_nondirectories(struct ino
                 WARN_ON_ONCE(S_ISDIR(inode1->i_mode));
         if (inode2)
                 WARN_ON_ONCE(S_ISDIR(inode2->i_mode));
-       lock_two_inodes(inode1, inode2, I_MUTEX_NORMAL, I_MUTEX_NONDIR2);
+       if (inode1 > inode2)
+               swap(inode1, inode2);
+       if (inode1)
+               inode_lock(inode1);
+       if (inode2 && inode2 != inode1)
+               inode_lock_nested(inode2, I_MUTEX_NONDIR2);
   }
   EXPORT_SYMBOL(lock_two_nondirectories);
   
@@@ -1835,37 -1797,37 +1798,37 @@@ EXPORT_SYMBOL(bmap)
    * earlier than or equal to either the ctime or mtime,
    * or if at least a day has passed since the last atime update.
    */
- -static int relatime_need_update(struct vfsmount *mnt, struct inode *inode,
+ +static bool relatime_need_update(struct vfsmount *mnt, struct inode *inode,
                              struct timespec64 now)
   {
         struct timespec64 atime, mtime, ctime;
   
         if (!(mnt->mnt_flags & MNT_RELATIME))
- -              return 1;
+ +              return true;
         /*
          * Is mtime younger than or equal to atime? If yes, update atime:
          */
         atime = inode_get_atime(inode);
         mtime = inode_get_mtime(inode);
         if (timespec64_compare(&mtime, &atime) >= 0)
- -              return 1;
+ +              return true;
         /*
          * Is ctime younger than or equal to atime? If yes, update atime:
          */
         ctime = inode_get_ctime(inode);
         if (timespec64_compare(&ctime, &atime) >= 0)
- -              return 1;
+ +              return true;
   
         /*
          * Is the previous atime value older than a day? If yes,
          * update atime:
          */
         if ((long)(now.tv_sec - atime.tv_sec) >= 24*60*60)
- -              return 1;
+ +              return true;
         /*
          * Good, we can skip the atime update:
          */
- -      return 0;
+ +      return false;
   }
   
   /**
@@@ -2403,7 -2365,7 +2366,7 @@@ EXPORT_SYMBOL(inode_init_owner)
    * the vfsmount must be passed through @idmap. This function will then take
    * care to map the inode according to @idmap before checking permissions.
    * On non-idmapped mounts or if permission checking is to be performed on the
- - * raw inode simply passs @nop_mnt_idmap.
+ + * raw inode simply pass @nop_mnt_idmap.
    */
   bool inode_owner_or_capable(struct mnt_idmap *idmap,
                             const struct inode *inode)
diff --combined fs/internal.h

index bf2ee2e0d45d29e15458208fb1e57c3c6aaede4b,de67b02226e599c6e22a5c0a85b1938df2340a34..93cdeeb858cb4b1d4d937ddcda830b5c6c62b992
--- 1/fs/internal.h
--- 2/fs/internal.h
+++ b/fs/internal.h
@@@ -83,8 -83,6 +83,8 @@@ int path_mount(const char *dev_name, st
                 const char *type_page, unsigned long flags, void *data_page);
   int path_umount(struct path *path, int flags);
   
+ +int show_path(struct seq_file *m, struct dentry *root);
+ +
   /*
    * fs_struct.c
    */
@@@ -96,6 -94,7 +96,6 @@@ extern void chroot_fs_refs(const struc
   struct file *alloc_empty_file(int flags, const struct cred *cred);
   struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred);
   struct file *alloc_empty_backing_file(int flags, const struct cred *cred);
- -void release_empty_file(struct file *f);
   
   static inline void file_put_write_access(struct file *file)
   {
@@@ -181,7 -180,7 +181,7 @@@ extern struct file *do_file_open_root(c
                 const char *, const struct open_flags *);
   extern struct open_how build_open_how(int flags, umode_t mode);
   extern int build_open_flags(const struct open_how *how, struct open_flags *op);
- -extern struct file *__close_fd_get_file(unsigned int fd);
+ +struct file *file_close_fd_locked(struct files_struct *files, unsigned fd);
   
   long do_sys_ftruncate(unsigned int fd, loff_t length, int small);
   int chmod_common(const struct path *path, umode_t mode);
@@@ -197,8 -196,6 +197,6 @@@ extern long prune_icache_sb(struct supe
   int dentry_needs_remove_privs(struct mnt_idmap *, struct dentry *dentry);
   bool in_group_or_capable(struct mnt_idmap *idmap,
                          const struct inode *inode, vfsgid_t vfsgid);
- void lock_two_inodes(struct inode *inode1, struct inode *inode2,
-                    unsigned subclass1, unsigned subclass2);
   
   /*
    * fs-writeback.c
@@@ -244,10 -241,10 +242,10 @@@ int do_statx(int dfd, struct filename *
   /*
    * fs/splice.c:
    */
- -long splice_file_to_pipe(struct file *in,
- -                       struct pipe_inode_info *opipe,
- -                       loff_t *offset,
- -                       size_t len, unsigned int flags);
+ +ssize_t splice_file_to_pipe(struct file *in,
+ +                          struct pipe_inode_info *opipe,
+ +                          loff_t *offset,
+ +                          size_t len, unsigned int flags);
   
   /*
    * fs/xattr.c:
diff --combined fs/namei.c

index 963576e67f6279c64d33b04f08c6fd8f84033691,6b0302ac80d1408470ef147ae13cd28309c607c8..5c318d657503c72f0f903c652d0378577047ecf6
--- 1/fs/namei.c
--- 2/fs/namei.c
+++ b/fs/namei.c
@@@ -289,7 -289,7 +289,7 @@@ EXPORT_SYMBOL(putname)
    * the vfsmount must be passed through @idmap. This function will then take
    * care to map the inode according to @idmap before checking permissions.
    * On non-idmapped mounts or if permission checking is to be performed on the
- - * raw inode simply passs @nop_mnt_idmap.
+ + * raw inode simply pass @nop_mnt_idmap.
    */
   static int check_acl(struct mnt_idmap *idmap,
                      struct inode *inode, int mask)
@@@ -334,7 -334,7 +334,7 @@@
    * the vfsmount must be passed through @idmap. This function will then take
    * care to map the inode according to @idmap before checking permissions.
    * On non-idmapped mounts or if permission checking is to be performed on the
- - * raw inode simply passs @nop_mnt_idmap.
+ + * raw inode simply pass @nop_mnt_idmap.
    */
   static int acl_permission_check(struct mnt_idmap *idmap,
                                 struct inode *inode, int mask)
@@@ -395,7 -395,7 +395,7 @@@
    * the vfsmount must be passed through @idmap. This function will then take
    * care to map the inode according to @idmap before checking permissions.
    * On non-idmapped mounts or if permission checking is to be performed on the
- - * raw inode simply passs @nop_mnt_idmap.
+ + * raw inode simply pass @nop_mnt_idmap.
    */
   int generic_permission(struct mnt_idmap *idmap, struct inode *inode,
                        int mask)
@@@ -1071,6 -1071,7 +1071,6 @@@ static struct ctl_table namei_sysctls[
                 .extra1         = SYSCTL_ZERO,
                 .extra2         = SYSCTL_TWO,
         },
- -      { }
   };
   
   static int __init init_fs_namei_sysctls(void)
@@@ -2466,7 -2467,7 +2466,7 @@@ static int handle_lookup_down(struct na
         return PTR_ERR(step_into(nd, WALK_NOFOLLOW, nd->path.dentry));
   }
   
- -/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
+ +/* Returns 0 and nd will be valid on success; Returns error, otherwise. */
   static int path_lookupat(struct nameidata *nd, unsigned flags, struct path *path)
   {
         const char *s = path_init(nd, flags);
@@@ -2521,7 -2522,7 +2521,7 @@@ int filename_lookup(int dfd, struct fil
         return retval;
   }
   
- -/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
+ +/* Returns 0 and nd will be valid on success; Returns error, otherwise. */
   static int path_parentat(struct nameidata *nd, unsigned flags,
                                 struct path *parent)
   {
@@@ -3013,27 -3014,37 +3013,37 @@@ static inline int may_create(struct mnt
         return inode_permission(idmap, dir, MAY_WRITE | MAY_EXEC);
   }
   
+ // p1 != p2, both are on the same filesystem, ->s_vfs_rename_mutex is held
   static struct dentry *lock_two_directories(struct dentry *p1, struct dentry *p2)
   {
-       struct dentry *p;
+       struct dentry *p = p1, *q = p2, *r;
   
-       p = d_ancestor(p2, p1);
-       if (p) {
+       while ((r = p->d_parent) != p2 && r != p)
+               p = r;
+       if (r == p2) {
+               // p is a child of p2 and an ancestor of p1 or p1 itself
                 inode_lock_nested(p2->d_inode, I_MUTEX_PARENT);
-               inode_lock_nested(p1->d_inode, I_MUTEX_CHILD);
+               inode_lock_nested(p1->d_inode, I_MUTEX_PARENT2);
                 return p;
         }
- 
-       p = d_ancestor(p1, p2);
-       if (p) {
+       // p is the root of connected component that contains p1
+       // p2 does not occur on the path from p to p1
+       while ((r = q->d_parent) != p1 && r != p && r != q)
+               q = r;
+       if (r == p1) {
+               // q is a child of p1 and an ancestor of p2 or p2 itself
                 inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
-               inode_lock_nested(p2->d_inode, I_MUTEX_CHILD);
-               return p;
+               inode_lock_nested(p2->d_inode, I_MUTEX_PARENT2);
+               return q;
+       } else if (likely(r == p)) {
+               // both p2 and p1 are descendents of p
+               inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
+               inode_lock_nested(p2->d_inode, I_MUTEX_PARENT2);
+               return NULL;
+       } else { // no common ancestor at the time we'd been called
+               mutex_unlock(&p1->d_sb->s_vfs_rename_mutex);
+               return ERR_PTR(-EXDEV);
         }
- 
-       lock_two_inodes(p1->d_inode, p2->d_inode,
-                       I_MUTEX_PARENT, I_MUTEX_PARENT2);
-       return NULL;
   }
   
   /*
@@@ -3157,7 -3168,7 +3167,7 @@@ static inline umode_t vfs_prepare_mode(
    * the vfsmount must be passed through @idmap. This function will then take
    * care to map the inode according to @idmap before checking permissions.
    * On non-idmapped mounts or if permission checking is to be performed on the
- - * raw inode simply passs @nop_mnt_idmap.
+ + * raw inode simply pass @nop_mnt_idmap.
    */
   int vfs_create(struct mnt_idmap *idmap, struct inode *dir,
                struct dentry *dentry, umode_t mode, bool want_excl)
@@@ -3645,7 -3656,7 +3655,7 @@@ static int do_open(struct nameidata *nd
    * the vfsmount must be passed through @idmap. This function will then take
    * care to map the inode according to @idmap before checking permissions.
    * On non-idmapped mounts or if permission checking is to be performed on the
- - * raw inode simply passs @nop_mnt_idmap.
+ + * raw inode simply pass @nop_mnt_idmap.
    */
   static int vfs_tmpfile(struct mnt_idmap *idmap,
                        const struct path *parentpath,
@@@ -3784,7 -3795,10 +3794,7 @@@ static struct file *path_openat(struct 
                 WARN_ON(1);
                 error = -EINVAL;
         }
- -      if (unlikely(file->f_mode & FMODE_OPENED))
- -              fput(file);
- -      else
- -              release_empty_file(file);
+ +      fput(file);
         if (error == -EOPENSTALE) {
                 if (flags & LOOKUP_RCU)
                         error = -ECHILD;
@@@ -3950,7 -3964,7 +3960,7 @@@ EXPORT_SYMBOL(user_path_create)
    * the vfsmount must be passed through @idmap. This function will then take
    * care to map the inode according to @idmap before checking permissions.
    * On non-idmapped mounts or if permission checking is to be performed on the
- - * raw inode simply passs @nop_mnt_idmap.
+ + * raw inode simply pass @nop_mnt_idmap.
    */
   int vfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
               struct dentry *dentry, umode_t mode, dev_t dev)
@@@ -4076,7 -4090,7 +4086,7 @@@ SYSCALL_DEFINE3(mknod, const char __use
    * the vfsmount must be passed through @idmap. This function will then take
    * care to map the inode according to @idmap before checking permissions.
    * On non-idmapped mounts or if permission checking is to be performed on the
- - * raw inode simply passs @nop_mnt_idmap.
+ + * raw inode simply pass @nop_mnt_idmap.
    */
   int vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
               struct dentry *dentry, umode_t mode)
@@@ -4157,7 -4171,7 +4167,7 @@@ SYSCALL_DEFINE2(mkdir, const char __use
    * the vfsmount must be passed through @idmap. This function will then take
    * care to map the inode according to @idmap before checking permissions.
    * On non-idmapped mounts or if permission checking is to be performed on the
- - * raw inode simply passs @nop_mnt_idmap.
+ + * raw inode simply pass @nop_mnt_idmap.
    */
   int vfs_rmdir(struct mnt_idmap *idmap, struct inode *dir,
                      struct dentry *dentry)
@@@ -4286,7 -4300,7 +4296,7 @@@ SYSCALL_DEFINE1(rmdir, const char __use
    * the vfsmount must be passed through @idmap. This function will then take
    * care to map the inode according to @idmap before checking permissions.
    * On non-idmapped mounts or if permission checking is to be performed on the
- - * raw inode simply passs @nop_mnt_idmap.
+ + * raw inode simply pass @nop_mnt_idmap.
    */
   int vfs_unlink(struct mnt_idmap *idmap, struct inode *dir,
                struct dentry *dentry, struct inode **delegated_inode)
@@@ -4439,7 -4453,7 +4449,7 @@@ SYSCALL_DEFINE1(unlink, const char __us
    * the vfsmount must be passed through @idmap. This function will then take
    * care to map the inode according to @idmap before checking permissions.
    * On non-idmapped mounts or if permission checking is to be performed on the
- - * raw inode simply passs @nop_mnt_idmap.
+ + * raw inode simply pass @nop_mnt_idmap.
    */
   int vfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
                 struct dentry *dentry, const char *oldname)
@@@ -4531,7 -4545,7 +4541,7 @@@ SYSCALL_DEFINE2(symlink, const char __u
    * the vfsmount must be passed through @idmap. This function will then take
    * care to map the inode according to @idmap before checking permissions.
    * On non-idmapped mounts or if permission checking is to be performed on the
- - * raw inode simply passs @nop_mnt_idmap.
+ + * raw inode simply pass @nop_mnt_idmap.
    */
   int vfs_link(struct dentry *old_dentry, struct mnt_idmap *idmap,
              struct inode *dir, struct dentry *new_dentry,
@@@ -4712,11 -4726,12 +4722,12 @@@ SYSCALL_DEFINE2(link, const char __use
    *
    *    a) we can get into loop creation.
    *    b) race potential - two innocent renames can create a loop together.
-  *       That's where 4.4 screws up. Current fix: serialization on
+  *       That's where 4.4BSD screws up. Current fix: serialization on
    *       sb->s_vfs_rename_mutex. We might be more accurate, but that's another
    *       story.
-  *    c) we have to lock _four_ objects - parents and victim (if it exists),
-  *       and source.
+  *    c) we may have to lock up to _four_ objects - parents and victim (if it exists),
+  *       and source (if it's a non-directory or a subdirectory that moves to
+  *       different parent).
    *       And that - after we got ->i_mutex on parents (until then we don't know
    *       whether the target exists).  Solution: try to be smart with locking
    *       order for inodes.  We rely on the fact that tree topology may change
@@@ -4748,6 -4763,7 +4759,7 @@@ int vfs_rename(struct renamedata *rd
         bool new_is_dir = false;
         unsigned max_links = new_dir->i_sb->s_max_links;
         struct name_snapshot old_name;
+       bool lock_old_subdir, lock_new_subdir;
   
         if (source == target)
                 return 0;
@@@ -4801,15 -4817,32 +4813,32 @@@
         take_dentry_name_snapshot(&old_name, old_dentry);
         dget(new_dentry);
         /*
-        * Lock all moved children. Moved directories may need to change parent
-        * pointer so they need the lock to prevent against concurrent
-        * directory changes moving parent pointer. For regular files we've
-        * historically always done this. The lockdep locking subclasses are
-        * somewhat arbitrary but RENAME_EXCHANGE in particular can swap
-        * regular files and directories so it's difficult to tell which
-        * subclasses to use.
+        * Lock children.
+        * The source subdirectory needs to be locked on cross-directory
+        * rename or cross-directory exchange since its parent changes.
+        * The target subdirectory needs to be locked on cross-directory
+        * exchange due to parent change and on any rename due to becoming
+        * a victim.
+        * Non-directories need locking in all cases (for NFS reasons);
+        * they get locked after any subdirectories (in inode address order).
+        *
+        * NOTE: WE ONLY LOCK UNRELATED DIRECTORIES IN CROSS-DIRECTORY CASE.
+        * NEVER, EVER DO THAT WITHOUT ->s_vfs_rename_mutex.
          */
-       lock_two_inodes(source, target, I_MUTEX_NORMAL, I_MUTEX_NONDIR2);
+       lock_old_subdir = new_dir != old_dir;
+       lock_new_subdir = new_dir != old_dir || !(flags & RENAME_EXCHANGE);
+       if (is_dir) {
+               if (lock_old_subdir)
+                       inode_lock_nested(source, I_MUTEX_CHILD);
+               if (target && (!new_is_dir || lock_new_subdir))
+                       inode_lock(target);
+       } else if (new_is_dir) {
+               if (lock_new_subdir)
+                       inode_lock_nested(target, I_MUTEX_CHILD);
+               inode_lock(source);
+       } else {
+               lock_two_nondirectories(source, target);
+       }
   
         error = -EPERM;
         if (IS_SWAPFILE(source) || (target && IS_SWAPFILE(target)))
@@@ -4857,8 -4890,9 +4886,9 @@@
                         d_exchange(old_dentry, new_dentry);
         }
   out:
-       inode_unlock(source);
-       if (target)
+       if (!is_dir || lock_old_subdir)
+               inode_unlock(source);
+       if (target && (!new_is_dir || lock_new_subdir))
                 inode_unlock(target);
         dput(new_dentry);
         if (!error) {
@@@ -4929,6 -4963,10 +4959,10 @@@ retry
   
   retry_deleg:
         trap = lock_rename(new_path.dentry, old_path.dentry);
+       if (IS_ERR(trap)) {
+               error = PTR_ERR(trap);
+               goto exit_lock_rename;
+       }
   
         old_dentry = lookup_one_qstr_excl(&old_last, old_path.dentry,
                                           lookup_flags);
@@@ -4996,6 -5034,7 +5030,7 @@@ exit4
         dput(old_dentry);
   exit3:
         unlock_rename(new_path.dentry, old_path.dentry);
+ exit_lock_rename:
         if (delegated_inode) {
                 error = break_deleg_wait(&delegated_inode);
                 if (!error)
diff --combined fs/nfsd/vfs.c

index 6e7e37192461e63a3726bca9f306bba9b7d79d91,a99260c3f9bc3e3820bbb36556293f066067550e..b7c7a9273ea01d9d84bcc2ea487bd6d886bc060d
--- 1/fs/nfsd/vfs.c
--- 2/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@@ -901,6 -901,7 +901,6 @@@ nfsd_open(struct svc_rqst *rqstp, struc
         int host_err;
         bool retried = false;
   
- -      validate_process_creds();
         /*
          * If we get here, then the client has already done an "open",
          * and (hopefully) checked permission - so allow OWNER_OVERRIDE
@@@ -925,6 -926,7 +925,6 @@@ retry
                 }
                 err = nfserrno(host_err);
         }
- -      validate_process_creds();
         return err;
   }
   
@@@ -941,7 -943,12 +941,7 @@@ in
   nfsd_open_verified(struct svc_rqst *rqstp, struct svc_fh *fhp, int may_flags,
                    struct file **filp)
   {
- -      int err;
- -
- -      validate_process_creds();
- -      err = __nfsd_open(rqstp, fhp, S_IFREG, may_flags, filp);
- -      validate_process_creds();
- -      return err;
+ +      return __nfsd_open(rqstp, fhp, S_IFREG, may_flags, filp);
   }
   
   /*
@@@ -1039,10 -1046,7 +1039,10 @@@ __be32 nfsd_splice_read(struct svc_rqs
         ssize_t host_err;
   
         trace_nfsd_read_splice(rqstp, fhp, offset, *count);
- -      host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor);
+ +      host_err = rw_verify_area(READ, file, &offset, *count);
+ +      if (!host_err)
+ +              host_err = splice_direct_to_actor(file, &sd,
+ +                                                nfsd_direct_splice_actor);
         return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err);
   }
   
@@@ -1179,7 -1183,9 +1179,7 @@@ nfsd_vfs_write(struct svc_rqst *rqstp, 
         since = READ_ONCE(file->f_wb_err);
         if (verf)
                 nfsd_copy_write_verifier(verf, nn);
- -      file_start_write(file);
         host_err = vfs_iter_write(file, &iter, &pos, flags);
- -      file_end_write(file);
         if (host_err < 0) {
                 commit_reset_write_verifier(nn, rqstp, host_err);
                 goto out_nfserr;
@@@ -1210,30 -1216,6 +1210,30 @@@ out_nfserr
         return nfserr;
   }
   
+ +/**
+ + * nfsd_read_splice_ok - check if spliced reading is supported
+ + * @rqstp: RPC transaction context
+ + *
+ + * Return values:
+ + *   %true: nfsd_splice_read() may be used
+ + *   %false: nfsd_splice_read() must not be used
+ + *
+ + * NFS READ normally uses splice to send data in-place. However the
+ + * data in cache can change after the reply's MIC is computed but
+ + * before the RPC reply is sent. To prevent the client from
+ + * rejecting the server-computed MIC in this somewhat rare case, do
+ + * not use splice with the GSS integrity and privacy services.
+ + */
+ +bool nfsd_read_splice_ok(struct svc_rqst *rqstp)
+ +{
+ +      switch (svc_auth_flavor(rqstp)) {
+ +      case RPC_AUTH_GSS_KRB5I:
+ +      case RPC_AUTH_GSS_KRB5P:
+ +              return false;
+ +      }
+ +      return true;
+ +}
+ +
   /**
    * nfsd_read - Read data from a file
    * @rqstp: RPC transaction context
@@@ -1263,7 -1245,7 +1263,7 @@@ __be32 nfsd_read(struct svc_rqst *rqstp
                 return err;
   
         file = nf->nf_file;
- -      if (file->f_op->splice_read && test_bit(RQ_SPLICE_OK, &rqstp->rq_flags))
+ +      if (file->f_op->splice_read && nfsd_read_splice_ok(rqstp))
                 err = nfsd_splice_read(rqstp, fhp, file, offset, count, eof);
         else
                 err = nfsd_iter_read(rqstp, fhp, file, offset, count, 0, eof);
@@@ -1831,6 -1813,10 +1831,10 @@@ retry
         }
   
         trap = lock_rename(tdentry, fdentry);
+       if (IS_ERR(trap)) {
+               err = (rqstp->rq_vers == 2) ? nfserr_acces : nfserr_xdev;
+               goto out;
+       }
         err = fh_fill_pre_attrs(ffhp);
         if (err != nfs_ok)
                 goto out_unlock;
@@@ -2127,23 -2113,9 +2131,23 @@@ static __be32 nfsd_buffered_readdir(str
         return cdp->err;
   }
   
- -/*
- - * Read entries from a directory.
- - * The  NFSv3/4 verifier we ignore for now.
+ +/**
+ + * nfsd_readdir - Read entries from a directory
+ + * @rqstp: RPC transaction context
+ + * @fhp: NFS file handle of directory to be read
+ + * @offsetp: OUT: seek offset of final entry that was read
+ + * @cdp: OUT: an eof error value
+ + * @func: entry filler actor
+ + *
+ + * This implementation ignores the NFSv3/4 verifier cookie.
+ + *
+ + * NB: normal system calls hold file->f_pos_lock when calling
+ + * ->iterate_shared and ->llseek, but nfsd_readdir() does not.
+ + * Because the struct file acquired here is not visible to other
+ + * threads, it's internal state does not need mutex protection.
+ + *
+ + * Returns nfs_ok on success, otherwise an nfsstat code is
+ + * returned.
    */
   __be32
   nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp, 
diff --combined fs/overlayfs/copy_up.c

index 696478f09cc1b459cbe26502ae4db81ec91300b3,e44dc5f661610d760f69bd73e71685b81af86e22..b8e25ca51016d9df648ca58495baa9db553330ec
--- 1/fs/overlayfs/copy_up.c
--- 2/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@@ -230,19 -230,6 +230,19 @@@ static int ovl_copy_fileattr(struct ino
         return ovl_real_fileattr_set(new, &newfa);
   }
   
+ +static int ovl_verify_area(loff_t pos, loff_t pos2, loff_t len, loff_t totlen)
+ +{
+ +      loff_t tmp;
+ +
+ +      if (WARN_ON_ONCE(pos != pos2))
+ +              return -EIO;
+ +      if (WARN_ON_ONCE(pos < 0 || len < 0 || totlen < 0))
+ +              return -EIO;
+ +      if (WARN_ON_ONCE(check_add_overflow(pos, len, &tmp)))
+ +              return -EIO;
+ +      return 0;
+ +}
+ +
   static int ovl_copy_up_file(struct ovl_fs *ofs, struct dentry *dentry,
                             struct file *new_file, loff_t len)
   {
@@@ -257,20 -244,13 +257,20 @@@
         int error = 0;
   
         ovl_path_lowerdata(dentry, &datapath);
- -      if (WARN_ON(datapath.dentry == NULL))
+ +      if (WARN_ON_ONCE(datapath.dentry == NULL) ||
+ +          WARN_ON_ONCE(len < 0))
                 return -EIO;
   
         old_file = ovl_path_open(&datapath, O_LARGEFILE | O_RDONLY);
         if (IS_ERR(old_file))
                 return PTR_ERR(old_file);
   
+ +      error = rw_verify_area(READ, old_file, &old_pos, len);
+ +      if (!error)
+ +              error = rw_verify_area(WRITE, new_file, &new_pos, len);
+ +      if (error)
+ +              goto out_fput;
+ +
         /* Try to use clone_file_range to clone up within the same fs */
         ovl_start_write(dentry);
         cloned = do_clone_file_range(old_file, 0, new_file, 0, len, 0);
@@@ -285,7 -265,7 +285,7 @@@
   
         while (len) {
                 size_t this_len = OVL_COPY_UP_CHUNK_SIZE;
- -              long bytes;
+ +              ssize_t bytes;
   
                 if (len < this_len)
                         this_len = len;
@@@ -329,13 -309,11 +329,13 @@@
                         }
                 }
   
- -              ovl_start_write(dentry);
+ +              error = ovl_verify_area(old_pos, new_pos, this_len, len);
+ +              if (error)
+ +                      break;
+ +
                 bytes = do_splice_direct(old_file, &old_pos,
                                          new_file, &new_pos,
                                          this_len, SPLICE_F_MOVE);
- -              ovl_end_write(dentry);
                 if (bytes <= 0) {
                         error = bytes;
                         break;
@@@ -744,7 -722,7 +744,7 @@@ static int ovl_copy_up_workdir(struct o
         struct inode *inode;
         struct inode *udir = d_inode(c->destdir), *wdir = d_inode(c->workdir);
         struct path path = { .mnt = ovl_upper_mnt(ofs) };
-       struct dentry *temp, *upper;
+       struct dentry *temp, *upper, *trap;
         struct ovl_cu_creds cc;
         int err;
         struct ovl_cattr cattr = {
@@@ -775,17 -753,18 +775,19 @@@
         path.dentry = temp;
         err = ovl_copy_up_data(c, &path);
         /*
- -       * We cannot hold lock_rename() throughout this helper, because or
+ +       * We cannot hold lock_rename() throughout this helper, because of
          * lock ordering with sb_writers, which shouldn't be held when calling
          * ovl_copy_up_data(), so lock workdir and destdir and make sure that
          * temp wasn't moved before copy up completion or cleanup.
- -       * If temp was moved, abort without the cleanup.
          */
         ovl_start_write(c->dentry);
-       if (lock_rename(c->workdir, c->destdir) != NULL ||
-           temp->d_parent != c->workdir) {
+       trap = lock_rename(c->workdir, c->destdir);
+       if (trap || temp->d_parent != c->workdir) {
+ +              /* temp or workdir moved underneath us? abort without cleanup */
+ +              dput(temp);
                 err = -EIO;
+               if (IS_ERR(trap))
+                       goto out;
                 goto unlock;
         } else if (err) {
                 goto cleanup;
@@@ -826,6 -805,7 +828,7 @@@
                 ovl_set_flag(OVL_WHITEOUTS, inode);
   unlock:
         unlock_rename(c->workdir, c->destdir);
+ out:
         ovl_end_write(c->dentry);
   
         return err;
@@@ -952,13 -932,6 +955,13 @@@ static int ovl_do_copy_up(struct ovl_co
                 err = -EIO;
                 goto out_free_fh;
         } else {
+ +              /*
+ +               * c->dentry->d_name is stabilzed by ovl_copy_up_start(),
+ +               * because if we got here, it means that c->dentry has no upper
+ +               * alias and changing ->d_name means going through ovl_rename()
+ +               * that will call ovl_copy_up() on source and target dentry.
+ +               */
+ +              c->destname = c->dentry->d_name;
                 /*
                  * Mark parent "impure" because it may now contain non-pure
                  * upper
@@@ -1139,6 -1112,7 +1142,6 @@@ static int ovl_copy_up_one(struct dentr
         if (parent) {
                 ovl_path_upper(parent, &parentpath);
                 ctx.destdir = parentpath.dentry;
- -              ctx.destname = dentry->d_name;
   
                 err = vfs_getattr(&parentpath, &ctx.pstat,
                                   STATX_ATIME | STATX_MTIME,
diff --combined fs/overlayfs/super.c

index 0bbbe4818f676c954abc99c3a0e052834a2fbdd9,fc3a6ff648bdc464b8d20ed5376c1ba275e62283..4ab66e3d4cff9854a99bcc1505963927476bf1d5
--- 1/fs/overlayfs/super.c
--- 2/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@@ -439,8 -439,10 +439,10 @@@ static bool ovl_workdir_ok(struct dentr
         bool ok = false;
   
         if (workdir != upperdir) {
-               ok = (lock_rename(workdir, upperdir) == NULL);
-               unlock_rename(workdir, upperdir);
+               struct dentry *trap = lock_rename(workdir, upperdir);
+               if (!IS_ERR(trap))
+                       unlock_rename(workdir, upperdir);
+               ok = (trap == NULL);
         }
         return ok;
   }
@@@ -853,8 -855,10 +855,8 @@@ static int ovl_get_indexdir(struct supe
         if (IS_ERR(indexdir)) {
                 err = PTR_ERR(indexdir);
         } else if (indexdir) {
- -              ofs->indexdir = indexdir;
- -              ofs->workdir = dget(indexdir);
- -
- -              err = ovl_setup_trap(sb, ofs->indexdir, &ofs->indexdir_trap,
+ +              ofs->workdir = indexdir;
+ +              err = ovl_setup_trap(sb, indexdir, &ofs->workdir_trap,
                                      "indexdir");
                 if (err)
                         goto out;
@@@ -867,15 -871,16 +869,15 @@@
                  * ".overlay.upper" to indicate that index may have
                  * directory entries.
                  */
- -              if (ovl_check_origin_xattr(ofs, ofs->indexdir)) {
- -                      err = ovl_verify_origin_xattr(ofs, ofs->indexdir,
+ +              if (ovl_check_origin_xattr(ofs, indexdir)) {
+ +                      err = ovl_verify_origin_xattr(ofs, indexdir,
                                                       OVL_XATTR_ORIGIN,
                                                       upperpath->dentry, true,
                                                       false);
                         if (err)
                                 pr_err("failed to verify index dir 'origin' xattr\n");
                 }
- -              err = ovl_verify_upper(ofs, ofs->indexdir, upperpath->dentry,
- -                                     true);
+ +              err = ovl_verify_upper(ofs, indexdir, upperpath->dentry, true);
                 if (err)
                         pr_err("failed to verify index dir 'upper' xattr\n");
   
@@@ -883,7 -888,7 +885,7 @@@
                 if (!err)
                         err = ovl_indexdir_cleanup(ofs);
         }
- -      if (err || !ofs->indexdir)
+ +      if (err || !indexdir)
                 pr_warn("try deleting index dir or mounting with '-o index=off' to disable inodes index.\n");
   
   out:
@@@ -1403,7 -1408,7 +1405,7 @@@ int ovl_fill_super(struct super_block *
                         goto out_free_oe;
   
                 /* Force r/o mount with no index dir */
- -              if (!ofs->indexdir)
+ +              if (!ofs->workdir)
                         sb->s_flags |= SB_RDONLY;
         }
   
@@@ -1412,7 -1417,7 +1414,7 @@@
                 goto out_free_oe;
   
         /* Show index=off in /proc/mounts for forced r/o mount */
- -      if (!ofs->indexdir) {
+ +      if (!ofs->workdir) {
                 ofs->config.index = false;
                 if (ovl_upper_mnt(ofs) && ofs->config.nfs_export) {
                         pr_warn("NFS export requires an index dir, falling back to nfs_export=off.\n");
@@@ -1451,7 -1456,6 +1453,7 @@@
          * lead to unexpected results.
          */
         sb->s_iflags |= SB_I_NOUMASK;
+ +      sb->s_iflags |= SB_I_EVM_UNSUPPORTED;
   
         err = -ENOMEM;
         root_dentry = ovl_get_root(sb, ctx->upper.dentry, oe);
@@@ -1499,10 -1503,14 +1501,10 @@@ static int __init ovl_init(void
         if (ovl_inode_cachep == NULL)
                 return -ENOMEM;
   
- -      err = ovl_aio_request_cache_init();
- -      if (!err) {
- -              err = register_filesystem(&ovl_fs_type);
- -              if (!err)
- -                      return 0;
+ +      err = register_filesystem(&ovl_fs_type);
+ +      if (!err)
+ +              return 0;
   
- -              ovl_aio_request_cache_destroy();
- -      }
         kmem_cache_destroy(ovl_inode_cachep);
   
         return err;
@@@ -1518,6 -1526,7 +1520,6 @@@ static void __exit ovl_exit(void
          */
         rcu_barrier();
         kmem_cache_destroy(ovl_inode_cachep);
- -      ovl_aio_request_cache_destroy();
   }
   
   module_init(ovl_init);
diff --combined fs/overlayfs/util.c

index 22b519763267f5018a9949da7b2c74b2f3422853,7b667345e6731683650b9a3a57f9dc4019729401..0217094c23ea6ae8905c7cb0c44c3ba969345200
--- 1/fs/overlayfs/util.c
--- 2/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@@ -91,7 -91,7 +91,7 @@@ struct dentry *ovl_indexdir(struct supe
   {
         struct ovl_fs *ofs = OVL_FS(sb);
   
- -      return ofs->indexdir;
+ +      return ofs->config.index ? ofs->workdir : NULL;
   }
   
   /* Index all files on copy up. For now only enabled for NFS export */
@@@ -978,7 -978,7 +978,7 @@@ int ovl_set_protattr(struct inode *inod
         return 0;
   }
   
- -/**
+ +/*
    * Caller must hold a reference to inode to prevent it from being freed while
    * it is marked inuse.
    */
@@@ -1198,12 -1198,17 +1198,17 @@@ void ovl_nlink_end(struct dentry *dentr
   
   int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir)
   {
+       struct dentry *trap;
+ 
         /* Workdir should not be the same as upperdir */
         if (workdir == upperdir)
                 goto err;
   
         /* Workdir should not be subdir of upperdir and vice versa */
-       if (lock_rename(workdir, upperdir) != NULL)
+       trap = lock_rename(workdir, upperdir);
+       if (IS_ERR(trap))
+               goto err;
+       if (trap)
                 goto err_unlock;
   
         return 0;
diff --combined fs/smb/server/vfs.c

index 4277750a6da1b16e6e6f19d29f157f31124cff3b,4cf8523ad0381a50eed016607877826899824b59..b6904a9b05f66c63ec86b745847a5feb2b51bb05
--- 1/fs/smb/server/vfs.c
--- 2/fs/smb/server/vfs.c
+++ b/fs/smb/server/vfs.c
@@@ -97,13 -97,6 +97,13 @@@ static int ksmbd_vfs_path_lookup_locked
                 return -ENOENT;
         }
   
+ +      err = mnt_want_write(parent_path->mnt);
+ +      if (err) {
+ +              path_put(parent_path);
+ +              putname(filename);
+ +              return -ENOENT;
+ +      }
+ +
         inode_lock_nested(parent_path->dentry->d_inode, I_MUTEX_PARENT);
         d = lookup_one_qstr_excl(&last, parent_path->dentry, 0);
         if (IS_ERR(d))
@@@ -130,7 -123,6 +130,7 @@@
   
   err_out:
         inode_unlock(d_inode(parent_path->dentry));
+ +      mnt_drop_write(parent_path->mnt);
         path_put(parent_path);
         putname(filename);
         return -ENOENT;
@@@ -459,8 -451,7 +459,8 @@@ static int ksmbd_vfs_stream_write(struc
                                  fp->stream.name,
                                  (void *)stream_buf,
                                  size,
- -                               0);
+ +                               0,
+ +                               true);
         if (err < 0)
                 goto out;
   
@@@ -517,9 -508,6 +517,9 @@@ int ksmbd_vfs_write(struct ksmbd_work *
                 }
         }
   
+ +      /* Reserve lease break for parent dir at closing time */
+ +      fp->reserve_lease_break = true;
+ +
         /* Do we need to break any of a levelII oplock? */
         smb_break_all_levII_oplock(work, fp, 1);
   
@@@ -605,6 -593,10 +605,6 @@@ int ksmbd_vfs_remove_file(struct ksmbd_
                 goto out_err;
         }
   
- -      err = mnt_want_write(path->mnt);
- -      if (err)
- -              goto out_err;
- -
         idmap = mnt_idmap(path->mnt);
         if (S_ISDIR(d_inode(path->dentry)->i_mode)) {
                 err = vfs_rmdir(idmap, d_inode(parent), path->dentry);
@@@ -615,6 -607,7 +615,6 @@@
                 if (err)
                         ksmbd_debug(VFS, "unlink failed, err %d\n", err);
         }
- -      mnt_drop_write(path->mnt);
   
   out_err:
         ksmbd_revert_fsids(work);
@@@ -715,6 -708,10 +715,10 @@@ retry
                 goto out2;
   
         trap = lock_rename_child(old_child, new_path.dentry);
+       if (IS_ERR(trap)) {
+               err = PTR_ERR(trap);
+               goto out_drop_write;
+       }
   
         old_parent = dget(old_child->d_parent);
         if (d_unhashed(old_child)) {
@@@ -722,7 -719,7 +726,7 @@@
                 goto out3;
         }
   
- -      parent_fp = ksmbd_lookup_fd_inode(d_inode(old_child->d_parent));
+ +      parent_fp = ksmbd_lookup_fd_inode(old_child->d_parent);
         if (parent_fp) {
                 if (parent_fp->daccess & FILE_DELETE_LE) {
                         pr_err("parent dir is opened with delete access\n");
@@@ -777,6 -774,7 +781,7 @@@ out4
   out3:
         dput(old_parent);
         unlock_rename(old_parent, new_path.dentry);
+ out_drop_write:
         mnt_drop_write(old_path->mnt);
   out2:
         path_put(&new_path);
@@@ -914,22 -912,18 +919,22 @@@ ssize_t ksmbd_vfs_getxattr(struct mnt_i
    * @attr_value:       xattr value to set
    * @attr_size:        size of xattr value
    * @flags:    destination buffer length
+ + * @get_write:        get write access to a mount
    *
    * Return:    0 on success, otherwise error
    */
   int ksmbd_vfs_setxattr(struct mnt_idmap *idmap,
                        const struct path *path, const char *attr_name,
- -                     void *attr_value, size_t attr_size, int flags)
+ +                     void *attr_value, size_t attr_size, int flags,
+ +                     bool get_write)
   {
         int err;
   
- -      err = mnt_want_write(path->mnt);
- -      if (err)
- -              return err;
+ +      if (get_write == true) {
+ +              err = mnt_want_write(path->mnt);
+ +              if (err)
+ +                      return err;
+ +      }
   
         err = vfs_setxattr(idmap,
                            path->dentry,
@@@ -939,8 -933,7 +944,8 @@@
                            flags);
         if (err)
                 ksmbd_debug(VFS, "setxattr failed, err %d\n", err);
- -      mnt_drop_write(path->mnt);
+ +      if (get_write == true)
+ +              mnt_drop_write(path->mnt);
         return err;
   }
   
@@@ -1264,13 -1257,6 +1269,13 @@@ out1
         }
   
         if (!err) {
+ +              err = mnt_want_write(parent_path->mnt);
+ +              if (err) {
+ +                      path_put(path);
+ +                      path_put(parent_path);
+ +                      return err;
+ +              }
+ +
                 err = ksmbd_vfs_lock_parent(parent_path->dentry, path->dentry);
                 if (err) {
                         path_put(path);
@@@ -1280,14 -1266,6 +1285,14 @@@
         return err;
   }
   
+ +void ksmbd_vfs_kern_path_unlock(struct path *parent_path, struct path *path)
+ +{
+ +      inode_unlock(d_inode(parent_path->dentry));
+ +      mnt_drop_write(parent_path->mnt);
+ +      path_put(path);
+ +      path_put(parent_path);
+ +}
+ +
   struct dentry *ksmbd_vfs_kern_path_create(struct ksmbd_work *work,
                                           const char *name,
                                           unsigned int flags,
@@@ -1442,8 -1420,7 +1447,8 @@@ out
   int ksmbd_vfs_set_sd_xattr(struct ksmbd_conn *conn,
                            struct mnt_idmap *idmap,
                            const struct path *path,
- -                         struct smb_ntsd *pntsd, int len)
+ +                         struct smb_ntsd *pntsd, int len,
+ +                         bool get_write)
   {
         int rc;
         struct ndr sd_ndr = {0}, acl_ndr = {0};
@@@ -1503,7 -1480,7 +1508,7 @@@
   
         rc = ksmbd_vfs_setxattr(idmap, path,
                                 XATTR_NAME_SD, sd_ndr.data,
- -                              sd_ndr.offset, 0);
+ +                              sd_ndr.offset, 0, get_write);
         if (rc < 0)
                 pr_err("Failed to store XATTR ntacl :%d\n", rc);
   
@@@ -1592,8 -1569,7 +1597,8 @@@ free_n_data
   
   int ksmbd_vfs_set_dos_attrib_xattr(struct mnt_idmap *idmap,
                                    const struct path *path,
- -                                 struct xattr_dos_attrib *da)
+ +                                 struct xattr_dos_attrib *da,
+ +                                 bool get_write)
   {
         struct ndr n;
         int err;
@@@ -1603,7 -1579,7 +1608,7 @@@
                 return err;
   
         err = ksmbd_vfs_setxattr(idmap, path, XATTR_NAME_DOS_ATTRIBUTE,
- -                               (void *)n.data, n.offset, 0);
+ +                               (void *)n.data, n.offset, 0, get_write);
         if (err)
                 ksmbd_debug(SMB, "failed to store dos attribute in xattr\n");
         kfree(n.data);
@@@ -1875,6 -1851,10 +1880,6 @@@ int ksmbd_vfs_set_init_posix_acl(struc
         }
         posix_state_to_acl(&acl_state, acls->a_entries);
   
- -      rc = mnt_want_write(path->mnt);
- -      if (rc)
- -              goto out_err;
- -
         rc = set_posix_acl(idmap, dentry, ACL_TYPE_ACCESS, acls);
         if (rc < 0)
                 ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_ACCESS) failed, rc : %d\n",
@@@ -1886,7 -1866,9 +1891,7 @@@
                         ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_DEFAULT) failed, rc : %d\n",
                                     rc);
         }
- -      mnt_drop_write(path->mnt);
   
- -out_err:
         free_acl_state(&acl_state);
         posix_acl_release(acls);
         return rc;
@@@ -1916,6 -1898,10 +1921,6 @@@ int ksmbd_vfs_inherit_posix_acl(struct 
                 }
         }
   
- -      rc = mnt_want_write(path->mnt);
- -      if (rc)
- -              goto out_err;
- -
         rc = set_posix_acl(idmap, dentry, ACL_TYPE_ACCESS, acls);
         if (rc < 0)
                 ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_ACCESS) failed, rc : %d\n",
@@@ -1927,7 -1913,9 +1932,7 @@@
                         ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_DEFAULT) failed, rc : %d\n",
                                     rc);
         }
- -      mnt_drop_write(path->mnt);
   
- -out_err:
         posix_acl_release(acls);
         return rc;
   }
author	Linus Torvalds <[email protected]>
	Fri, 12 Jan 2024 04:00:22 +0000 (20:00 -0800)
committer	Linus Torvalds <[email protected]>
	Fri, 12 Jan 2024 04:00:22 +0000 (20:00 -0800)
		1	2
Documentation/filesystems/locking.rst	patch \|	diff1 \|	diff2 \|	blob \| history
Documentation/filesystems/porting.rst	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ecryptfs/inode.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/inode.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/internal.h	patch \|	diff1 \|	diff2 \|	blob \| history
fs/namei.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/nfsd/vfs.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/overlayfs/copy_up.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/overlayfs/super.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/overlayfs/util.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/smb/server/vfs.c	patch \|	diff1 \|	diff2 \|	blob \| history