Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso...

author Linus Torvalds <[email protected]>

Fri, 7 Oct 2022 00:45:53 +0000 (17:45 -0700)

committer Linus Torvalds <[email protected]>

Fri, 7 Oct 2022 00:45:53 +0000 (17:45 -0700)
author Linus Torvalds <[email protected]>
Fri, 7 Oct 2022 00:45:53 +0000 (17:45 -0700)
committer Linus Torvalds <[email protected]>
Fri, 7 Oct 2022 00:45:53 +0000 (17:45 -0700)
diff --combined fs/ext4/ext4.h

index e5f2f5ca5120ea3e1d8b470883dba3663f56f2e0,9a3521e95f00eb6fe6a6880353f51d07a665d1db..8d5453852f98ec06c9ed3a49b2c76f7605f8c3f4
--- 1/fs/ext4/ext4.h
--- 2/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@@ -2977,7 -2977,6 +2977,7 @@@ extern struct inode *__ext4_iget(struc
   extern int  ext4_write_inode(struct inode *, struct writeback_control *);
   extern int  ext4_setattr(struct user_namespace *, struct dentry *,
                          struct iattr *);
+ +extern u32  ext4_dio_alignment(struct inode *inode);
   extern int  ext4_getattr(struct user_namespace *, const struct path *,
                          struct kstat *, u32, unsigned int);
   extern void ext4_evict_inode(struct inode *);
@@@ -3592,9 -3591,6 +3592,6 @@@ extern bool empty_inline_dir(struct ino
   extern struct buffer_head *ext4_get_first_inline_block(struct inode *inode,
                                         struct ext4_dir_entry_2 **parent_de,
                                         int *retval);
- extern int ext4_inline_data_fiemap(struct inode *inode,
-                                  struct fiemap_extent_info *fieinfo,
-                                  int *has_inline, __u64 start, __u64 len);
   extern void *ext4_read_inline_link(struct inode *inode);
   
   struct iomap;
@@@ -3713,7 -3709,7 +3710,7 @@@ extern int ext4_ext_insert_extent(handl
   extern struct ext4_ext_path *ext4_find_extent(struct inode *, ext4_lblk_t,
                                               struct ext4_ext_path **,
                                               int flags);
- extern void ext4_ext_drop_refs(struct ext4_ext_path *);
+ extern void ext4_free_ext_path(struct ext4_ext_path *);
   extern int ext4_ext_check_inode(struct inode *inode);
   extern ext4_lblk_t ext4_ext_next_allocated_block(struct ext4_ext_path *path);
   extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
diff --combined fs/ext4/file.c

index 8bb1c35fd6dd5a6e7a918dcc3c7f41d03cb62c83,847a2f806b8f62df5d31baa3d6ebb0d6872ee72e..a7a597c727e638dff296d7d7b5b663c63a9d3051
--- 1/fs/ext4/file.c
--- 2/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@@ -36,34 -36,19 +36,34 @@@
   #include "acl.h"
   #include "truncate.h"
   
- -static bool ext4_dio_supported(struct kiocb *iocb, struct iov_iter *iter)
+ +/*
+ + * Returns %true if the given DIO request should be attempted with DIO, or
+ + * %false if it should fall back to buffered I/O.
+ + *
+ + * DIO isn't well specified; when it's unsupported (either due to the request
+ + * being misaligned, or due to the file not supporting DIO at all), filesystems
+ + * either fall back to buffered I/O or return EINVAL.  For files that don't use
+ + * any special features like encryption or verity, ext4 has traditionally
+ + * returned EINVAL for misaligned DIO.  iomap_dio_rw() uses this convention too.
+ + * In this case, we should attempt the DIO, *not* fall back to buffered I/O.
+ + *
+ + * In contrast, in cases where DIO is unsupported due to ext4 features, ext4
+ + * traditionally falls back to buffered I/O.
+ + *
+ + * This function implements the traditional ext4 behavior in all these cases.
+ + */
+ +static bool ext4_should_use_dio(struct kiocb *iocb, struct iov_iter *iter)
   {
         struct inode *inode = file_inode(iocb->ki_filp);
+ +      u32 dio_align = ext4_dio_alignment(inode);
   
- -      if (!fscrypt_dio_supported(iocb, iter))
- -              return false;
- -      if (fsverity_active(inode))
+ +      if (dio_align == 0)
                 return false;
- -      if (ext4_should_journal_data(inode))
- -              return false;
- -      if (ext4_has_inline_data(inode))
- -              return false;
- -      return true;
+ +
+ +      if (dio_align == 1)
+ +              return true;
+ +
+ +      return IS_ALIGNED(iocb->ki_pos | iov_iter_alignment(iter), dio_align);
   }
   
   static ssize_t ext4_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
@@@ -78,7 -63,7 +78,7 @@@
                 inode_lock_shared(inode);
         }
   
- -      if (!ext4_dio_supported(iocb, to)) {
+ +      if (!ext4_should_use_dio(iocb, to)) {
                 inode_unlock_shared(inode);
                 /*
                  * Fallback to buffered I/O if the operation being performed on
@@@ -526,7 -511,7 +526,7 @@@ static ssize_t ext4_dio_write_iter(stru
         }
   
         /* Fallback to buffered I/O if the inode does not support direct I/O. */
- -      if (!ext4_dio_supported(iocb, from)) {
+ +      if (!ext4_should_use_dio(iocb, from)) {
                 if (ilock_shared)
                         inode_unlock_shared(inode);
                 else
@@@ -543,6 -528,12 +543,12 @@@
                 ret = -EAGAIN;
                 goto out;
         }
+       /*
+        * Make sure inline data cannot be created anymore since we are going
+        * to allocate blocks for DIO. We know the inode does not have any
+        * inline data now because ext4_dio_supported() checked for that.
+        */
+       ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
   
         offset = iocb->ki_pos;
         count = ret;
diff --combined fs/ext4/inode.c

index 364774230d87ac3e3833b6b7d34d6deb7639ce0c,6da73be32bff3590e056a23f8dd4c1253a8f3cdd..2b5ef1b6424992b43a834de801eec226a5e2900b
--- 1/fs/ext4/inode.c
--- 2/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@@ -1188,6 -1188,13 +1188,13 @@@ retry_grab
         page = grab_cache_page_write_begin(mapping, index);
         if (!page)
                 return -ENOMEM;
+       /*
+        * The same as page allocation, we prealloc buffer heads before
+        * starting the handle.
+        */
+       if (!page_has_buffers(page))
+               create_empty_buffers(page, inode->i_sb->s_blocksize, 0);
+ 
         unlock_page(page);
   
   retry_journal:
@@@ -5342,6 -5349,7 +5349,7 @@@ int ext4_setattr(struct user_namespace 
         int error, rc = 0;
         int orphan = 0;
         const unsigned int ia_valid = attr->ia_valid;
+       bool inc_ivers = true;
   
         if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
                 return -EIO;
@@@ -5425,8 -5433,8 +5433,8 @@@
                         return -EINVAL;
                 }
   
-               if (IS_I_VERSION(inode) && attr->ia_size != inode->i_size)
-                       inode_inc_iversion(inode);
+               if (attr->ia_size == inode->i_size)
+                       inc_ivers = false;
   
                 if (shrink) {
                         if (ext4_should_order_data(inode)) {
@@@ -5528,6 -5536,8 +5536,8 @@@ out_mmap_sem
         }
   
         if (!error) {
+               if (inc_ivers)
+                       inode_inc_iversion(inode);
                 setattr_copy(mnt_userns, inode, attr);
                 mark_inode_dirty(inode);
         }
@@@ -5550,22 -5560,6 +5560,22 @@@ err_out
         return error;
   }
   
+ +u32 ext4_dio_alignment(struct inode *inode)
+ +{
+ +      if (fsverity_active(inode))
+ +              return 0;
+ +      if (ext4_should_journal_data(inode))
+ +              return 0;
+ +      if (ext4_has_inline_data(inode))
+ +              return 0;
+ +      if (IS_ENCRYPTED(inode)) {
+ +              if (!fscrypt_dio_supported(inode))
+ +                      return 0;
+ +              return i_blocksize(inode);
+ +      }
+ +      return 1; /* use the iomap defaults */
+ +}
+ +
   int ext4_getattr(struct user_namespace *mnt_userns, const struct path *path,
                  struct kstat *stat, u32 request_mask, unsigned int query_flags)
   {
@@@ -5581,27 -5575,6 +5591,27 @@@
                 stat->btime.tv_nsec = ei->i_crtime.tv_nsec;
         }
   
+ +      /*
+ +       * Return the DIO alignment restrictions if requested.  We only return
+ +       * this information when requested, since on encrypted files it might
+ +       * take a fair bit of work to get if the file wasn't opened recently.
+ +       */
+ +      if ((request_mask & STATX_DIOALIGN) && S_ISREG(inode->i_mode)) {
+ +              u32 dio_align = ext4_dio_alignment(inode);
+ +
+ +              stat->result_mask |= STATX_DIOALIGN;
+ +              if (dio_align == 1) {
+ +                      struct block_device *bdev = inode->i_sb->s_bdev;
+ +
+ +                      /* iomap defaults */
+ +                      stat->dio_mem_align = bdev_dma_alignment(bdev) + 1;
+ +                      stat->dio_offset_align = bdev_logical_block_size(bdev);
+ +              } else {
+ +                      stat->dio_mem_align = dio_align;
+ +                      stat->dio_offset_align = dio_align;
+ +              }
+ +      }
+ +
         flags = ei->i_flags & EXT4_FL_USER_VISIBLE;
         if (flags & EXT4_APPEND_FL)
                 stat->attributes |= STATX_ATTR_APPEND;
@@@ -5768,9 -5741,6 +5778,6 @@@ int ext4_mark_iloc_dirty(handle_t *hand
         }
         ext4_fc_track_inode(handle, inode);
   
-       if (IS_I_VERSION(inode))
-               inode_inc_iversion(inode);
- 
         /* the do_update_inode consumes one bh->b_count */
         get_bh(iloc->bh);
   
diff --combined include/linux/buffer_head.h

index df518c429667275553543d53eeab9471c7db151c,dcc0e90d8979cae9e48dfd7493d4b053a3056042..06089390d81d774b5526f8258908019793f09d3d
--- 1/include/linux/buffer_head.h
--- 2/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@@ -137,17 -137,6 +137,17 @@@ BUFFER_FNS(Defer_Completion, defer_comp
   
   static __always_inline void set_buffer_uptodate(struct buffer_head *bh)
   {
+ +      /*
+ +       * If somebody else already set this uptodate, they will
+ +       * have done the memory barrier, and a reader will thus
+ +       * see *some* valid buffer state.
+ +       *
+ +       * Any other serialization (with IO errors or whatever that
+ +       * might clear the bit) has to come from other state (eg BH_Lock).
+ +       */
+ +      if (test_bit(BH_Uptodate, &bh->b_state))
+ +              return;
+ +
         /*
          * make it consistent with folio_mark_uptodate
          * pairs with smp_load_acquire in buffer_uptodate
@@@ -240,7 -229,7 +240,7 @@@ void ll_rw_block(blk_opf_t, int, struc
   int sync_dirty_buffer(struct buffer_head *bh);
   int __sync_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags);
   void write_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags);
- int submit_bh(blk_opf_t, struct buffer_head *);
+ void submit_bh(blk_opf_t, struct buffer_head *);
   void write_boundary_block(struct block_device *bdev,
                         sector_t bblock, unsigned blocksize);
   int bh_uptodate_or_lock(struct buffer_head *bh);
diff --combined include/linux/fs.h

index 7591d2d2dcbba9a0a1e501a3759c23f195caade6,56a4b4b02477db4ad75d4b33a2a94aa5aa99b2cb..7098f085d32dad089002ad5583621ad37f59a068
--- 1/include/linux/fs.h
--- 2/include/linux/fs.h
+++ b/include/linux/fs.h
@@@ -1472,7 -1472,7 +1472,7 @@@ struct super_block 
         const struct xattr_handler **s_xattr;
   #ifdef CONFIG_FS_ENCRYPTION
         const struct fscrypt_operations *s_cop;
- -      struct key              *s_master_keys; /* master crypto keys in use */
+ +      struct fscrypt_keyring  *s_master_keys; /* master crypto keys in use */
   #endif
   #ifdef CONFIG_FS_VERITY
         const struct fsverity_operations *s_vop;
@@@ -2038,10 -2038,9 +2038,10 @@@ umode_t mode_strip_sgid(struct user_nam
    * the kernel specify what kind of dirent layout it wants to have.
    * This allows the kernel to read directories into kernel space or
    * to have different dirent layouts depending on the binary type.
+ + * Return 'true' to keep going and 'false' if there are no more entries.
    */
   struct dir_context;
- -typedef int (*filldir_t)(struct dir_context *, const char *, int, loff_t, u64,
+ +typedef bool (*filldir_t)(struct dir_context *, const char *, int, loff_t, u64,
                          unsigned);
   
   struct dir_context {
@@@ -2372,13 -2371,14 +2372,14 @@@ static inline void kiocb_clone(struct k
    *                    don't have to write inode on fdatasync() when only
    *                    e.g. the timestamps have changed.
    * I_DIRTY_PAGES      Inode has dirty pages.  Inode itself may be clean.
-  * I_DIRTY_TIME               The inode itself only has dirty timestamps, and the
+  * I_DIRTY_TIME               The inode itself has dirty timestamps, and the
    *                    lazytime mount option is enabled.  We keep track of this
    *                    separately from I_DIRTY_SYNC in order to implement
    *                    lazytime.  This gets cleared if I_DIRTY_INODE
-  *                    (I_DIRTY_SYNC and/or I_DIRTY_DATASYNC) gets set.  I.e.
-  *                    either I_DIRTY_TIME *or* I_DIRTY_INODE can be set in
-  *                    i_state, but not both.  I_DIRTY_PAGES may still be set.
+  *                    (I_DIRTY_SYNC and/or I_DIRTY_DATASYNC) gets set. But
+  *                    I_DIRTY_TIME can still be set if I_DIRTY_SYNC is already
+  *                    in place because writeback might already be in progress
+  *                    and we don't want to lose the time update
    * I_NEW              Serves as both a mutex and completion notification.
    *                    New inodes set I_NEW.  If two processes both create
    *                    the same inode, one of them will release its inode and
@@@ -3541,17 -3541,17 +3542,17 @@@ static inline bool dir_emit(struct dir_
                             const char *name, int namelen,
                             u64 ino, unsigned type)
   {
- -      return ctx->actor(ctx, name, namelen, ctx->pos, ino, type) == 0;
+ +      return ctx->actor(ctx, name, namelen, ctx->pos, ino, type);
   }
   static inline bool dir_emit_dot(struct file *file, struct dir_context *ctx)
   {
         return ctx->actor(ctx, ".", 1, ctx->pos,
- -                        file->f_path.dentry->d_inode->i_ino, DT_DIR) == 0;
+ +                        file->f_path.dentry->d_inode->i_ino, DT_DIR);
   }
   static inline bool dir_emit_dotdot(struct file *file, struct dir_context *ctx)
   {
         return ctx->actor(ctx, "..", 2, ctx->pos,
- -                        parent_ino(file->f_path.dentry), DT_DIR) == 0;
+ +                        parent_ino(file->f_path.dentry), DT_DIR);
   }
   static inline bool dir_emit_dots(struct file *file, struct dir_context *ctx)
   {
author	Linus Torvalds <[email protected]>
	Fri, 7 Oct 2022 00:45:53 +0000 (17:45 -0700)
committer	Linus Torvalds <[email protected]>
	Fri, 7 Oct 2022 00:45:53 +0000 (17:45 -0700)
		1	2
fs/ext4/ext4.h	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/file.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/inode.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/buffer_head.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/fs.h	patch \|	diff1 \|	diff2 \|	blob \| history