Merge tag 'ext4_for_linus-6.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git...

author Linus Torvalds <[email protected]>

Thu, 31 Aug 2023 22:18:15 +0000 (15:18 -0700)

committer Linus Torvalds <[email protected]>

Thu, 31 Aug 2023 22:18:15 +0000 (15:18 -0700)
author Linus Torvalds <[email protected]>
Thu, 31 Aug 2023 22:18:15 +0000 (15:18 -0700)
committer Linus Torvalds <[email protected]>
Thu, 31 Aug 2023 22:18:15 +0000 (15:18 -0700)
diff --combined fs/ext4/ext4.h

index 481491e892dfe4c30f0bbcdf55e7365b7d868b2c,84618c46f2390e0253ea8caa88d43d4e539395d4..9418359b1d9d3b0fb8f57d26f3dfdfb3970fb4ba
--- 1/fs/ext4/ext4.h
--- 2/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@@ -176,9 -176,6 +176,6 @@@ enum criteria 
         EXT4_MB_NUM_CRS
   };
   
- /* criteria below which we use fast block scanning and avoid unnecessary IO */
- #define CR_FAST CR_GOAL_LEN_SLOW
- 
   /*
    * Flags used in mballoc's allocation_context flags field.
    *
@@@ -868,70 -865,64 +865,70 @@@ struct ext4_inode 
    * affected filesystem before 2242.
    */
   
- -static inline __le32 ext4_encode_extra_time(struct timespec64 *time)
+ +static inline __le32 ext4_encode_extra_time(struct timespec64 ts)
   {
- -      u32 extra =((time->tv_sec - (s32)time->tv_sec) >> 32) & EXT4_EPOCH_MASK;
- -      return cpu_to_le32(extra | (time->tv_nsec << EXT4_EPOCH_BITS));
+ +      u32 extra = ((ts.tv_sec - (s32)ts.tv_sec) >> 32) & EXT4_EPOCH_MASK;
+ +      return cpu_to_le32(extra | (ts.tv_nsec << EXT4_EPOCH_BITS));
   }
   
- -static inline void ext4_decode_extra_time(struct timespec64 *time,
- -                                        __le32 extra)
+ +static inline struct timespec64 ext4_decode_extra_time(__le32 base,
+ +                                                     __le32 extra)
   {
+ +      struct timespec64 ts = { .tv_sec = (signed)le32_to_cpu(base) };
+ +
         if (unlikely(extra & cpu_to_le32(EXT4_EPOCH_MASK)))
- -              time->tv_sec += (u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK) << 32;
- -      time->tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> EXT4_EPOCH_BITS;
+ +              ts.tv_sec += (u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK) << 32;
+ +      ts.tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> EXT4_EPOCH_BITS;
+ +      return ts;
   }
   
- -#define EXT4_INODE_SET_XTIME(xtime, inode, raw_inode)                         \
+ +#define EXT4_INODE_SET_XTIME_VAL(xtime, inode, raw_inode, ts)                 \
   do {                                                                          \
- -      if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra))     {\
- -              (raw_inode)->xtime = cpu_to_le32((inode)->xtime.tv_sec);        \
- -              (raw_inode)->xtime ## _extra =                                  \
- -                              ext4_encode_extra_time(&(inode)->xtime);        \
- -              }                                                               \
- -      else    \
- -              (raw_inode)->xtime = cpu_to_le32(clamp_t(int32_t, (inode)->xtime.tv_sec, S32_MIN, S32_MAX));    \
+ +      if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) {    \
+ +              (raw_inode)->xtime = cpu_to_le32((ts).tv_sec);                  \
+ +              (raw_inode)->xtime ## _extra = ext4_encode_extra_time(ts);      \
+ +      } else                                                                  \
+ +              (raw_inode)->xtime = cpu_to_le32(clamp_t(int32_t, (ts).tv_sec, S32_MIN, S32_MAX));      \
   } while (0)
   
- -#define EXT4_EINODE_SET_XTIME(xtime, einode, raw_inode)                              \
- -do {                                                                         \
- -      if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime))                      \
- -              (raw_inode)->xtime = cpu_to_le32((einode)->xtime.tv_sec);      \
- -      if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra))            \
- -              (raw_inode)->xtime ## _extra =                                 \
- -                              ext4_encode_extra_time(&(einode)->xtime);      \
- -} while (0)
+ +#define EXT4_INODE_SET_XTIME(xtime, inode, raw_inode)                         \
+ +      EXT4_INODE_SET_XTIME_VAL(xtime, inode, raw_inode, (inode)->xtime)
+ +
+ +#define EXT4_INODE_SET_CTIME(inode, raw_inode)                                        \
+ +      EXT4_INODE_SET_XTIME_VAL(i_ctime, inode, raw_inode, inode_get_ctime(inode))
+ +
+ +#define EXT4_EINODE_SET_XTIME(xtime, einode, raw_inode)                               \
+ +      if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime))                       \
+ +              EXT4_INODE_SET_XTIME_VAL(xtime, &((einode)->vfs_inode),         \
+ +                                       raw_inode, (einode)->xtime)
+ +
+ +#define EXT4_INODE_GET_XTIME_VAL(xtime, inode, raw_inode)                     \
+ +      (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra) ?        \
+ +              ext4_decode_extra_time((raw_inode)->xtime,                              \
+ +                                     (raw_inode)->xtime ## _extra) :          \
+ +              (struct timespec64) {                                           \
+ +                      .tv_sec = (signed)le32_to_cpu((raw_inode)->xtime)       \
+ +              })
   
   #define EXT4_INODE_GET_XTIME(xtime, inode, raw_inode)                         \
   do {                                                                          \
- -      (inode)->xtime.tv_sec = (signed)le32_to_cpu((raw_inode)->xtime);        \
- -      if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) {    \
- -              ext4_decode_extra_time(&(inode)->xtime,                         \
- -                                     raw_inode->xtime ## _extra);             \
- -              }                                                               \
- -      else                                                                    \
- -              (inode)->xtime.tv_nsec = 0;                                     \
+ +      (inode)->xtime = EXT4_INODE_GET_XTIME_VAL(xtime, inode, raw_inode);     \
   } while (0)
   
+ +#define EXT4_INODE_GET_CTIME(inode, raw_inode)                                        \
+ +do {                                                                          \
+ +      inode_set_ctime_to_ts(inode,                                            \
+ +              EXT4_INODE_GET_XTIME_VAL(i_ctime, inode, raw_inode));           \
+ +} while (0)
   
- -#define EXT4_EINODE_GET_XTIME(xtime, einode, raw_inode)                              \
- -do {                                                                         \
- -      if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime))                      \
- -              (einode)->xtime.tv_sec =                                       \
- -                      (signed)le32_to_cpu((raw_inode)->xtime);               \
- -      else                                                                   \
- -              (einode)->xtime.tv_sec = 0;                                    \
- -      if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra))            \
- -              ext4_decode_extra_time(&(einode)->xtime,                       \
- -                                     raw_inode->xtime ## _extra);            \
- -      else                                                                   \
- -              (einode)->xtime.tv_nsec = 0;                                   \
+ +#define EXT4_EINODE_GET_XTIME(xtime, einode, raw_inode)                               \
+ +do {                                                                          \
+ +      if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime))                       \
+ +              (einode)->xtime =                                               \
+ +                      EXT4_INODE_GET_XTIME_VAL(xtime, &(einode->vfs_inode),   \
+ +                                               raw_inode);                    \
+ +      else                                                                    \
+ +              (einode)->xtime = (struct timespec64){0, 0};                    \
   } while (0)
   
   #define i_disk_version osd1.linux1.l_i_version
@@@ -1241,6 -1232,7 +1238,7 @@@ struct ext4_inode_info 
   #define EXT4_MOUNT2_MB_OPTIMIZE_SCAN  0x00000080 /* Optimize group
                                                     * scanning in mballoc
                                                     */
+ #define EXT4_MOUNT2_ABORT             0x00000100 /* Abort filesystem */
   
   #define clear_opt(sb, opt)            EXT4_SB(sb)->s_mount_opt &= \
                                                 ~EXT4_MOUNT_##opt
@@@ -1258,10 -1250,8 +1256,8 @@@
   
   #define ext4_test_and_set_bit         __test_and_set_bit_le
   #define ext4_set_bit                  __set_bit_le
- #define ext4_set_bit_atomic           ext2_set_bit_atomic
   #define ext4_test_and_clear_bit               __test_and_clear_bit_le
   #define ext4_clear_bit                        __clear_bit_le
- #define ext4_clear_bit_atomic         ext2_clear_bit_atomic
   #define ext4_test_bit                 test_bit_le
   #define ext4_find_next_zero_bit               find_next_zero_bit_le
   #define ext4_find_next_bit            find_next_bit_le
@@@ -1708,10 -1698,13 +1704,13 @@@ struct ext4_sb_info 
         const char *s_last_error_func;
         time64_t s_last_error_time;
         /*
-        * If we are in a context where we cannot update error information in
-        * the on-disk superblock, we queue this work to do it.
+        * If we are in a context where we cannot update the on-disk
+        * superblock, we queue the work here.  This is used to update
+        * the error information in the superblock, and for periodic
+        * updates of the superblock called from the commit callback
+        * function.
          */
-       struct work_struct s_error_work;
+       struct work_struct s_sb_upd_work;
   
         /* Ext4 fast commit sub transaction ID */
         atomic_t s_fc_subtid;
@@@ -1804,7 -1797,6 +1803,6 @@@ static inline int ext4_valid_inum(struc
    */
   enum {
         EXT4_MF_MNTDIR_SAMPLED,
-       EXT4_MF_FS_ABORTED,     /* Fatal error detected */
         EXT4_MF_FC_INELIGIBLE   /* Fast commit ineligible */
   };
   
@@@ -2228,9 -2220,9 +2226,9 @@@ extern int ext4_feature_set_ok(struct s
   #define EXT4_FLAGS_SHUTDOWN   1
   #define EXT4_FLAGS_BDEV_IS_DAX        2
   
- static inline int ext4_forced_shutdown(struct ext4_sb_info *sbi)
+ static inline int ext4_forced_shutdown(struct super_block *sb)
   {
-       return test_bit(EXT4_FLAGS_SHUTDOWN, &sbi->s_ext4_flags);
+       return test_bit(EXT4_FLAGS_SHUTDOWN, &EXT4_SB(sb)->s_ext4_flags);
   }
   
   /*
@@@ -2708,7 -2700,6 +2706,6 @@@ extern ext4_fsblk_t ext4_new_meta_block
   extern int ext4_claim_free_clusters(struct ext4_sb_info *sbi,
                                     s64 nclusters, unsigned int flags);
   extern ext4_fsblk_t ext4_count_free_clusters(struct super_block *);
- extern void ext4_check_blocks_bitmap(struct super_block *);
   extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
                                                     ext4_group_t block_group,
                                                     struct buffer_head ** bh);
@@@ -2864,7 -2855,6 +2861,6 @@@ extern void ext4_free_inode(handle_t *
   extern struct inode * ext4_orphan_get(struct super_block *, unsigned long);
   extern unsigned long ext4_count_free_inodes(struct super_block *);
   extern unsigned long ext4_count_dirs(struct super_block *);
- extern void ext4_check_inodes_bitmap(struct super_block *);
   extern void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap);
   extern int ext4_init_inode_table(struct super_block *sb,
                                  ext4_group_t group, int barrier);
@@@ -2907,7 -2897,6 +2903,6 @@@ extern int ext4_mb_init(struct super_bl
   extern int ext4_mb_release(struct super_block *);
   extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *,
                                 struct ext4_allocation_request *, int *);
- extern int ext4_mb_reserve_blocks(struct super_block *, int);
   extern void ext4_discard_preallocations(struct inode *, unsigned int);
   extern int __init ext4_init_mballoc(void);
   extern void ext4_exit_mballoc(void);
@@@ -2930,6 -2919,10 +2925,10 @@@ extern int ext4_trim_fs(struct super_bl
   extern void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid);
   extern void ext4_mb_mark_bb(struct super_block *sb, ext4_fsblk_t block,
                        int len, int state);
+ static inline bool ext4_mb_cr_expensive(enum criteria cr)
+ {
+       return cr >= CR_GOAL_LEN_SLOW;
+ }
   
   /* inode.c */
   void ext4_inode_csum_set(struct inode *inode, struct ext4_inode *raw,
@@@ -2983,7 -2976,6 +2982,6 @@@ extern void ext4_evict_inode(struct ino
   extern void ext4_clear_inode(struct inode *);
   extern int  ext4_file_getattr(struct mnt_idmap *, const struct path *,
                               struct kstat *, u32, unsigned int);
- extern int  ext4_sync_inode(handle_t *, struct inode *);
   extern void ext4_dirty_inode(struct inode *, int);
   extern int ext4_change_inode_journal_flag(struct inode *, int);
   extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *);
@@@ -3090,6 -3082,8 +3088,8 @@@ extern const char *ext4_decode_error(st
   extern void ext4_mark_group_bitmap_corrupted(struct super_block *sb,
                                              ext4_group_t block_group,
                                              unsigned int flags);
+ extern unsigned int ext4_num_base_meta_blocks(struct super_block *sb,
+                                             ext4_group_t block_group);
   
   extern __printf(7, 8)
   void __ext4_error(struct super_block *, const char *, unsigned int, bool,
@@@ -3531,8 -3525,6 +3531,6 @@@ extern loff_t ext4_llseek(struct file *
   /* inline.c */
   extern int ext4_get_max_inline_size(struct inode *inode);
   extern int ext4_find_inline_data_nolock(struct inode *inode);
- extern int ext4_init_inline_data(handle_t *handle, struct inode *inode,
-                                unsigned int len);
   extern int ext4_destroy_inline_data(handle_t *handle, struct inode *inode);
   
   int ext4_readpage_inline(struct inode *inode, struct folio *folio);
@@@ -3780,6 -3772,8 +3778,6 @@@ static inline void set_bitmap_uptodate(
         set_bit(BH_BITMAP_UPTODATE, &(bh)->b_state);
   }
   
- -#define in_range(b, first, len)       ((b) >= (first) && (b) <= (first) + (len) - 1)
- -
   /* For ioend & aio unwritten conversion wait queues */
   #define EXT4_WQ_HASH_SZ               37
   #define ext4_ioend_wq(v)   (&ext4__ioend_wq[((unsigned long)(v)) %\
diff --combined fs/ext4/ext4_jbd2.c

index b38d59581411c03099826e1db49b1df37a643040,ca0eaf2147b0eca0918a8702288ed09bfa44eb19..d1a2e662440178e87c8240a1fa70f5cdf4731cef
--- 1/fs/ext4/ext4_jbd2.c
--- 2/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@@ -67,11 -67,12 +67,12 @@@ static int ext4_journal_check_start(str
   
         might_sleep();
   
-       if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
+       if (unlikely(ext4_forced_shutdown(sb)))
                 return -EIO;
   
-       if (sb_rdonly(sb))
+       if (WARN_ON_ONCE(sb_rdonly(sb)))
                 return -EROFS;
+ 
         WARN_ON(sb->s_writers.frozen == SB_FREEZE_COMPLETE);
         journal = EXT4_SB(sb)->s_journal;
         /*
@@@ -234,7 -235,8 +235,7 @@@ int __ext4_journal_get_write_access(con
   
         might_sleep();
   
- -      if (bh->b_bdev->bd_super)
- -              ext4_check_bdev_write_error(bh->b_bdev->bd_super);
+ +      ext4_check_bdev_write_error(sb);
   
         if (ext4_handle_valid(handle)) {
                 err = jbd2_journal_get_write_access(handle, bh);
diff --combined fs/ext4/file.c

index 2dc3f8301225a9a08e28757e0ba61be7f05edc5a,e99cc17b6bd279ee8e60ea52d3a0373540415685..6830ea3a6c59c6b116f83ee4b3345811f92841ed
--- 1/fs/ext4/file.c
--- 2/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@@ -131,7 -131,7 +131,7 @@@ static ssize_t ext4_file_read_iter(stru
   {
         struct inode *inode = file_inode(iocb->ki_filp);
   
-       if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+       if (unlikely(ext4_forced_shutdown(inode->i_sb)))
                 return -EIO;
   
         if (!iov_iter_count(to))
@@@ -153,7 -153,7 +153,7 @@@ static ssize_t ext4_file_splice_read(st
   {
         struct inode *inode = file_inode(in);
   
-       if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+       if (unlikely(ext4_forced_shutdown(inode->i_sb)))
                 return -EIO;
         return filemap_splice_read(in, ppos, pipe, len, flags);
   }
@@@ -476,6 -476,11 +476,11 @@@ restart
          * required to change security info in file_modified(), for extending
          * I/O, any form of non-overwrite I/O, and unaligned I/O to unwritten
          * extents (as partial block zeroing may be required).
+        *
+        * Note that unaligned writes are allowed under shared lock so long as
+        * they are pure overwrites. Otherwise, concurrent unaligned writes risk
+        * data corruption due to partial block zeroing in the dio layer, and so
+        * the I/O must occur exclusively.
          */
         if (*ilock_shared &&
             ((!IS_NOSEC(inode) || *extend || !overwrite ||
@@@ -492,21 -497,12 +497,12 @@@
   
         /*
          * Now that locking is settled, determine dio flags and exclusivity
-        * requirements. Unaligned writes are allowed under shared lock so long
-        * as they are pure overwrites. Set the iomap overwrite only flag as an
-        * added precaution in this case. Even though this is unnecessary, we
-        * can detect and warn on unexpected -EAGAIN if an unsafe unaligned
-        * write is ever submitted.
-        *
-        * Otherwise, concurrent unaligned writes risk data corruption due to
-        * partial block zeroing in the dio layer, and so the I/O must occur
-        * exclusively. The inode lock is already held exclusive if the write is
-        * non-overwrite or extending, so drain all outstanding dio and set the
-        * force wait dio flag.
+        * requirements. We don't use DIO_OVERWRITE_ONLY because we enforce
+        * behavior already. The inode lock is already held exclusive if the
+        * write is non-overwrite or extending, so drain all outstanding dio and
+        * set the force wait dio flag.
          */
-       if (*ilock_shared && unaligned_io) {
-               *dio_flags = IOMAP_DIO_OVERWRITE_ONLY;
-       } else if (!*ilock_shared && (unaligned_io || *extend)) {
+       if (!*ilock_shared && (unaligned_io || *extend)) {
                 if (iocb->ki_flags & IOCB_NOWAIT) {
                         ret = -EAGAIN;
                         goto out;
@@@ -608,7 -604,6 +604,6 @@@ static ssize_t ext4_dio_write_iter(stru
                 iomap_ops = &ext4_iomap_overwrite_ops;
         ret = iomap_dio_rw(iocb, from, iomap_ops, &ext4_dio_write_ops,
                            dio_flags, NULL, 0);
-       WARN_ON_ONCE(ret == -EAGAIN && !(iocb->ki_flags & IOCB_NOWAIT));
         if (ret == -ENOTBLK)
                 ret = 0;
   
@@@ -709,7 -704,7 +704,7 @@@ ext4_file_write_iter(struct kiocb *iocb
   {
         struct inode *inode = file_inode(iocb->ki_filp);
   
-       if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+       if (unlikely(ext4_forced_shutdown(inode->i_sb)))
                 return -EIO;
   
   #ifdef CONFIG_FS_DAX
@@@ -723,7 -718,8 +718,7 @@@
   }
   
   #ifdef CONFIG_FS_DAX
- -static vm_fault_t ext4_dax_huge_fault(struct vm_fault *vmf,
- -              enum page_entry_size pe_size)
+ +static vm_fault_t ext4_dax_huge_fault(struct vm_fault *vmf, unsigned int order)
   {
         int error = 0;
         vm_fault_t result;
@@@ -739,7 -735,7 +734,7 @@@
          * read-only.
          *
          * We check for VM_SHARED rather than vmf->cow_page since the latter is
- -       * unset for pe_size != PE_SIZE_PTE (i.e. only in do_cow_fault); for
+ +       * unset for order != 0 (i.e. only in do_cow_fault); for
          * other sizes, dax_iomap_fault will handle splitting / fallback so that
          * we eventually come back with a COW page.
          */
@@@ -763,7 -759,7 +758,7 @@@ retry
         } else {
                 filemap_invalidate_lock_shared(mapping);
         }
- -      result = dax_iomap_fault(vmf, pe_size, &pfn, &error, &ext4_iomap_ops);
+ +      result = dax_iomap_fault(vmf, order, &pfn, &error, &ext4_iomap_ops);
         if (write) {
                 ext4_journal_stop(handle);
   
@@@ -772,7 -768,7 +767,7 @@@
                         goto retry;
                 /* Handling synchronous page fault? */
                 if (result & VM_FAULT_NEEDDSYNC)
- -                      result = dax_finish_sync_fault(vmf, pe_size, pfn);
+ +                      result = dax_finish_sync_fault(vmf, order, pfn);
                 filemap_invalidate_unlock_shared(mapping);
                 sb_end_pagefault(sb);
         } else {
@@@ -784,7 -780,7 +779,7 @@@
   
   static vm_fault_t ext4_dax_fault(struct vm_fault *vmf)
   {
- -      return ext4_dax_huge_fault(vmf, PE_SIZE_PTE);
+ +      return ext4_dax_huge_fault(vmf, 0);
   }
   
   static const struct vm_operations_struct ext4_dax_vm_ops = {
@@@ -806,10 -802,9 +801,9 @@@ static const struct vm_operations_struc
   static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
   {
         struct inode *inode = file->f_mapping->host;
-       struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
-       struct dax_device *dax_dev = sbi->s_daxdev;
+       struct dax_device *dax_dev = EXT4_SB(inode->i_sb)->s_daxdev;
   
-       if (unlikely(ext4_forced_shutdown(sbi)))
+       if (unlikely(ext4_forced_shutdown(inode->i_sb)))
                 return -EIO;
   
         /*
@@@ -885,7 -880,7 +879,7 @@@ static int ext4_file_open(struct inode 
   {
         int ret;
   
-       if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+       if (unlikely(ext4_forced_shutdown(inode->i_sb)))
                 return -EIO;
   
         ret = ext4_sample_last_mounted(inode->i_sb, filp->f_path.mnt);
diff --combined fs/ext4/ialloc.c

index 48abef5f23e7fcffc3ad8011dd826f2ef372d6a6,e0698f54e17ae3a7ee1fdbc17c6742baef9988f0..b65058d972f95646fa0d629a96a36836ea86076a
--- 1/fs/ext4/ialloc.c
--- 2/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@@ -950,7 -950,7 +950,7 @@@ struct inode *__ext4_new_inode(struct m
         sb = dir->i_sb;
         sbi = EXT4_SB(sb);
   
-       if (unlikely(ext4_forced_shutdown(sbi)))
+       if (unlikely(ext4_forced_shutdown(sb)))
                 return ERR_PTR(-EIO);
   
         ngroups = ext4_get_groups_count(sb);
@@@ -1250,7 -1250,7 +1250,7 @@@ got
         inode->i_ino = ino + group * EXT4_INODES_PER_GROUP(sb);
         /* This is the optimal IO size (for stat), not the fs block size */
         inode->i_blocks = 0;
- -      inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+ +      inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
         ei->i_crtime = inode->i_mtime;
   
         memset(ei->i_data, 0, sizeof(ei->i_data));
@@@ -1523,12 -1523,6 +1523,6 @@@ int ext4_init_inode_table(struct super_
         int num, ret = 0, used_blks = 0;
         unsigned long used_inos = 0;
   
-       /* This should not happen, but just to be sure check this */
-       if (sb_rdonly(sb)) {
-               ret = 1;
-               goto out;
-       }
- 
         gdp = ext4_get_group_desc(sb, group, &group_desc_bh);
         if (!gdp || !grp)
                 goto out;
diff --combined fs/ext4/inline.c

index 0038610373745f7853c53421c8cdccb08ff092b1,3623dfcc8fc7b93b18456600a6ef034494e2387f..012d9259ff532060a5aa0cea0457f6e3a2fdac48
--- 1/fs/ext4/inline.c
--- 2/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@@ -228,7 -228,7 +228,7 @@@ static void ext4_write_inline_data(stru
         struct ext4_inode *raw_inode;
         int cp_len = 0;
   
-       if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+       if (unlikely(ext4_forced_shutdown(inode->i_sb)))
                 return;
   
         BUG_ON(!EXT4_I(inode)->i_inline_off);
@@@ -1037,7 -1037,7 +1037,7 @@@ static int ext4_add_dirent_to_inline(ha
          * happen is that the times are slightly out of date
          * and/or different from the directory change time.
          */
- -      dir->i_mtime = dir->i_ctime = current_time(dir);
+ +      dir->i_mtime = inode_set_ctime_current(dir);
         ext4_update_dx_flag(dir);
         inode_inc_iversion(dir);
         return 1;
@@@ -1991,7 -1991,7 +1991,7 @@@ out
                 ext4_orphan_del(handle, inode);
   
         if (err == 0) {
- -              inode->i_mtime = inode->i_ctime = current_time(inode);
+ +              inode->i_mtime = inode_set_ctime_current(inode);
                 err = ext4_mark_inode_dirty(handle, inode);
                 if (IS_SYNC(inode))
                         ext4_handle_sync(handle);
diff --combined fs/ext4/inode.c

index 89737d5a161483706e9175d6055196b506d92012,6c490f05e2baf13f956fe7dd241eace0a59b9e15..4ce35f1c8b0a8412d9e513d8fa01fae21bab74d5
--- 1/fs/ext4/inode.c
--- 2/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@@ -1114,7 -1114,7 +1114,7 @@@ static int ext4_write_begin(struct fil
         pgoff_t index;
         unsigned from, to;
   
-       if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+       if (unlikely(ext4_forced_shutdown(inode->i_sb)))
                 return -EIO;
   
         trace_ext4_write_begin(inode, pos, len);
@@@ -1569,7 -1569,7 +1569,7 @@@ static void mpage_release_unused_pages(
   
                         if (folio->index < mpd->first_page)
                                 continue;
- -                      if (folio->index + folio_nr_pages(folio) - 1 > end)
+ +                      if (folio_next_index(folio) - 1 > end)
                                 continue;
                         BUG_ON(!folio_test_locked(folio));
                         BUG_ON(folio_test_writeback(folio));
@@@ -2213,8 -2213,7 +2213,7 @@@ static int mpage_map_and_submit_extent(
                 if (err < 0) {
                         struct super_block *sb = inode->i_sb;
   
-                       if (ext4_forced_shutdown(EXT4_SB(sb)) ||
-                           ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED))
+                       if (ext4_forced_shutdown(sb))
                                 goto invalidate_dirty_pages;
                         /*
                          * Let the uper layers retry transient errors.
@@@ -2455,7 -2454,7 +2454,7 @@@ static int mpage_prepare_extent_to_map(
   
                         if (mpd->map.m_len == 0)
                                 mpd->first_page = folio->index;
- -                      mpd->next_page = folio->index + folio_nr_pages(folio);
+ +                      mpd->next_page = folio_next_index(folio);
                         /*
                          * Writeout when we cannot modify metadata is simple.
                          * Just submit the page. For data=journal mode we
@@@ -2534,14 -2533,13 +2533,13 @@@ static int ext4_do_writepages(struct mp
          * If the filesystem has aborted, it is read-only, so return
          * right away instead of dumping stack traces later on that
          * will obscure the real source of the problem.  We test
-        * EXT4_MF_FS_ABORTED instead of sb->s_flag's SB_RDONLY because
+        * fs shutdown state instead of sb->s_flag's SB_RDONLY because
          * the latter could be true if the filesystem is mounted
          * read-only, and in that case, ext4_writepages should
          * *never* be called, so if that ever happens, we would want
          * the stack trace.
          */
-       if (unlikely(ext4_forced_shutdown(EXT4_SB(mapping->host->i_sb)) ||
-                    ext4_test_mount_flag(inode->i_sb, EXT4_MF_FS_ABORTED))) {
+       if (unlikely(ext4_forced_shutdown(mapping->host->i_sb))) {
                 ret = -EROFS;
                 goto out_writepages;
         }
@@@ -2759,7 -2757,7 +2757,7 @@@ static int ext4_writepages(struct addre
         int ret;
         int alloc_ctx;
   
-       if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
+       if (unlikely(ext4_forced_shutdown(sb)))
                 return -EIO;
   
         alloc_ctx = ext4_writepages_down_read(sb);
@@@ -2798,16 -2796,16 +2796,16 @@@ static int ext4_dax_writepages(struct a
         int ret;
         long nr_to_write = wbc->nr_to_write;
         struct inode *inode = mapping->host;
-       struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
         int alloc_ctx;
   
-       if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+       if (unlikely(ext4_forced_shutdown(inode->i_sb)))
                 return -EIO;
   
         alloc_ctx = ext4_writepages_down_read(inode->i_sb);
         trace_ext4_writepages(inode, wbc);
   
-       ret = dax_writeback_mapping_range(mapping, sbi->s_daxdev, wbc);
+       ret = dax_writeback_mapping_range(mapping,
+                                         EXT4_SB(inode->i_sb)->s_daxdev, wbc);
         trace_ext4_writepages_result(inode, wbc, ret,
                                      nr_to_write - wbc->nr_to_write);
         ext4_writepages_up_read(inode->i_sb, alloc_ctx);
@@@ -2857,7 -2855,7 +2855,7 @@@ static int ext4_da_write_begin(struct f
         pgoff_t index;
         struct inode *inode = mapping->host;
   
-       if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+       if (unlikely(ext4_forced_shutdown(inode->i_sb)))
                 return -EIO;
   
         index = pos >> PAGE_SHIFT;
@@@ -2937,14 -2935,73 +2935,73 @@@ static int ext4_da_should_update_i_disk
         return 1;
   }
   
+ static int ext4_da_do_write_end(struct address_space *mapping,
+                       loff_t pos, unsigned len, unsigned copied,
+                       struct page *page)
+ {
+       struct inode *inode = mapping->host;
+       loff_t old_size = inode->i_size;
+       bool disksize_changed = false;
+       loff_t new_i_size;
+ 
+       /*
+        * block_write_end() will mark the inode as dirty with I_DIRTY_PAGES
+        * flag, which all that's needed to trigger page writeback.
+        */
+       copied = block_write_end(NULL, mapping, pos, len, copied, page, NULL);
+       new_i_size = pos + copied;
+ 
+       /*
+        * It's important to update i_size while still holding page lock,
+        * because page writeout could otherwise come in and zero beyond
+        * i_size.
+        *
+        * Since we are holding inode lock, we are sure i_disksize <=
+        * i_size. We also know that if i_disksize < i_size, there are
+        * delalloc writes pending in the range up to i_size. If the end of
+        * the current write is <= i_size, there's no need to touch
+        * i_disksize since writeback will push i_disksize up to i_size
+        * eventually. If the end of the current write is > i_size and
+        * inside an allocated block which ext4_da_should_update_i_disksize()
+        * checked, we need to update i_disksize here as certain
+        * ext4_writepages() paths not allocating blocks and update i_disksize.
+        */
+       if (new_i_size > inode->i_size) {
+               unsigned long end;
+ 
+               i_size_write(inode, new_i_size);
+               end = (new_i_size - 1) & (PAGE_SIZE - 1);
+               if (copied && ext4_da_should_update_i_disksize(page_folio(page), end)) {
+                       ext4_update_i_disksize(inode, new_i_size);
+                       disksize_changed = true;
+               }
+       }
+ 
+       unlock_page(page);
+       put_page(page);
+ 
+       if (old_size < pos)
+               pagecache_isize_extended(inode, old_size, pos);
+ 
+       if (disksize_changed) {
+               handle_t *handle;
+ 
+               handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
+               if (IS_ERR(handle))
+                       return PTR_ERR(handle);
+               ext4_mark_inode_dirty(handle, inode);
+               ext4_journal_stop(handle);
+       }
+ 
+       return copied;
+ }
+ 
   static int ext4_da_write_end(struct file *file,
                              struct address_space *mapping,
                              loff_t pos, unsigned len, unsigned copied,
                              struct page *page, void *fsdata)
   {
         struct inode *inode = mapping->host;
-       loff_t new_i_size;
-       unsigned long start, end;
         int write_mode = (int)(unsigned long)fsdata;
         struct folio *folio = page_folio(page);
   
@@@ -2963,30 -3020,7 +3020,7 @@@
         if (unlikely(copied < len) && !PageUptodate(page))
                 copied = 0;
   
-       start = pos & (PAGE_SIZE - 1);
-       end = start + copied - 1;
- 
-       /*
-        * Since we are holding inode lock, we are sure i_disksize <=
-        * i_size. We also know that if i_disksize < i_size, there are
-        * delalloc writes pending in the range upto i_size. If the end of
-        * the current write is <= i_size, there's no need to touch
-        * i_disksize since writeback will push i_disksize upto i_size
-        * eventually. If the end of the current write is > i_size and
-        * inside an allocated block (ext4_da_should_update_i_disksize()
-        * check), we need to update i_disksize here as certain
-        * ext4_writepages() paths not allocating blocks update i_disksize.
-        *
-        * Note that we defer inode dirtying to generic_write_end() /
-        * ext4_da_write_inline_data_end().
-        */
-       new_i_size = pos + copied;
-       if (copied && new_i_size > inode->i_size &&
-           ext4_da_should_update_i_disksize(folio, end))
-               ext4_update_i_disksize(inode, new_i_size);
- 
-       return generic_write_end(file, mapping, pos, len, copied, &folio->page,
-                                fsdata);
+       return ext4_da_do_write_end(mapping, pos, len, copied, &folio->page);
   }
   
   /*
@@@ -3986,7 -4020,7 +4020,7 @@@ int ext4_punch_hole(struct file *file, 
         if (IS_SYNC(inode))
                 ext4_handle_sync(handle);
   
- -      inode->i_mtime = inode->i_ctime = current_time(inode);
+ +      inode->i_mtime = inode_set_ctime_current(inode);
         ret2 = ext4_mark_inode_dirty(handle, inode);
         if (unlikely(ret2))
                 ret = ret2;
@@@ -4146,7 -4180,7 +4180,7 @@@ out_stop
         if (inode->i_nlink)
                 ext4_orphan_del(handle, inode);
   
- -      inode->i_mtime = inode->i_ctime = current_time(inode);
+ +      inode->i_mtime = inode_set_ctime_current(inode);
         err2 = ext4_mark_inode_dirty(handle, inode);
         if (unlikely(err2 && !err))
                 err = err2;
@@@ -4249,7 -4283,7 +4283,7 @@@ static int ext4_fill_raw_inode(struct i
         }
         raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
   
- -      EXT4_INODE_SET_XTIME(i_ctime, inode, raw_inode);
+ +      EXT4_INODE_SET_CTIME(inode, raw_inode);
         EXT4_INODE_SET_XTIME(i_mtime, inode, raw_inode);
         EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode);
         EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode);
@@@ -4858,7 -4892,7 +4892,7 @@@ struct inode *__ext4_iget(struct super_
                 }
         }
   
- -      EXT4_INODE_GET_XTIME(i_ctime, inode, raw_inode);
+ +      EXT4_INODE_GET_CTIME(inode, raw_inode);
         EXT4_INODE_GET_XTIME(i_mtime, inode, raw_inode);
         EXT4_INODE_GET_XTIME(i_atime, inode, raw_inode);
         EXT4_EINODE_GET_XTIME(i_crtime, ei, raw_inode);
@@@ -4940,9 -4974,12 +4974,12 @@@
                                  "iget: bogus i_mode (%o)", inode->i_mode);
                 goto bad_inode;
         }
-       if (IS_CASEFOLDED(inode) && !ext4_has_feature_casefold(inode->i_sb))
+       if (IS_CASEFOLDED(inode) && !ext4_has_feature_casefold(inode->i_sb)) {
                 ext4_error_inode(inode, function, line, 0,
                                  "casefold flag without casefold feature");
+               ret = -EFSCORRUPTED;
+               goto bad_inode;
+       }
         if ((err_str = check_igot_inode(inode, flags)) != NULL) {
                 ext4_error_inode(inode, function, line, 0, err_str);
                 ret = -EFSCORRUPTED;
@@@ -4981,7 -5018,7 +5018,7 @@@ static void __ext4_update_other_inode_t
                 spin_unlock(&inode->i_lock);
   
                 spin_lock(&ei->i_raw_lock);
- -              EXT4_INODE_SET_XTIME(i_ctime, inode, raw_inode);
+ +              EXT4_INODE_SET_CTIME(inode, raw_inode);
                 EXT4_INODE_SET_XTIME(i_mtime, inode, raw_inode);
                 EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode);
                 ext4_inode_csum_set(inode, raw_inode, ei);
@@@ -5131,11 -5168,10 +5168,10 @@@ int ext4_write_inode(struct inode *inod
   {
         int err;
   
-       if (WARN_ON_ONCE(current->flags & PF_MEMALLOC) ||
-           sb_rdonly(inode->i_sb))
+       if (WARN_ON_ONCE(current->flags & PF_MEMALLOC))
                 return 0;
   
-       if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+       if (unlikely(ext4_forced_shutdown(inode->i_sb)))
                 return -EIO;
   
         if (EXT4_SB(inode->i_sb)->s_journal) {
@@@ -5255,7 -5291,7 +5291,7 @@@ int ext4_setattr(struct mnt_idmap *idma
         const unsigned int ia_valid = attr->ia_valid;
         bool inc_ivers = true;
   
-       if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+       if (unlikely(ext4_forced_shutdown(inode->i_sb)))
                 return -EIO;
   
         if (unlikely(IS_IMMUTABLE(inode)))
@@@ -5376,8 -5412,10 +5412,8 @@@
                          * Update c/mtime on truncate up, ext4_truncate() will
                          * update c/mtime in shrink case below
                          */
- -                      if (!shrink) {
- -                              inode->i_mtime = current_time(inode);
- -                              inode->i_ctime = inode->i_mtime;
- -                      }
+ +                      if (!shrink)
+ +                              inode->i_mtime = inode_set_ctime_current(inode);
   
                         if (shrink)
                                 ext4_fc_track_range(handle, inode,
@@@ -5535,7 -5573,7 +5571,7 @@@ int ext4_getattr(struct mnt_idmap *idma
                                   STATX_ATTR_NODUMP |
                                   STATX_ATTR_VERITY);
   
- -      generic_fillattr(idmap, inode, stat);
+ +      generic_fillattr(idmap, request_mask, inode, stat);
         return 0;
   }
   
@@@ -5674,7 -5712,7 +5710,7 @@@ int ext4_mark_iloc_dirty(handle_t *hand
   {
         int err = 0;
   
-       if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) {
+       if (unlikely(ext4_forced_shutdown(inode->i_sb))) {
                 put_bh(iloc->bh);
                 return -EIO;
         }
@@@ -5700,7 -5738,7 +5736,7 @@@ ext4_reserve_inode_write(handle_t *hand
   {
         int err;
   
-       if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+       if (unlikely(ext4_forced_shutdown(inode->i_sb)))
                 return -EIO;
   
         err = ext4_get_inode_loc(inode, iloc);
@@@ -6138,7 -6176,7 +6174,7 @@@ retry_alloc
         if (err == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
                 goto retry_alloc;
   out_ret:
- -      ret = block_page_mkwrite_return(err);
+ +      ret = vmf_fs_error(err);
   out:
         filemap_invalidate_unlock_shared(mapping);
         sb_end_pagefault(inode->i_sb);
diff --combined fs/ext4/ioctl.c

index b0349f45186370192fb98e7bf2e67abdcc475f31,0d3aef1118cbc31d09d32ce20aa56193cde82a6a..0bfe2ce589e224bac8a1dabecaaee8d4c45bad9a
--- 1/fs/ext4/ioctl.c
--- 2/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@@ -449,8 -449,7 +449,8 @@@ static long swap_inode_boot_loader(stru
         diff = size - size_bl;
         swap_inode_data(inode, inode_bl);
   
- -      inode->i_ctime = inode_bl->i_ctime = current_time(inode);
+ +      inode_set_ctime_current(inode);
+ +      inode_set_ctime_current(inode_bl);
         inode_inc_iversion(inode);
   
         inode->i_generation = get_random_u32();
@@@ -664,7 -663,7 +664,7 @@@ static int ext4_ioctl_setflags(struct i
   
         ext4_set_inode_flags(inode, false);
   
- -      inode->i_ctime = current_time(inode);
+ +      inode_set_ctime_current(inode);
         inode_inc_iversion(inode);
   
         err = ext4_mark_iloc_dirty(handle, inode, &iloc);
@@@ -775,7 -774,7 +775,7 @@@ static int ext4_ioctl_setproject(struc
         }
   
         EXT4_I(inode)->i_projid = kprojid;
- -      inode->i_ctime = current_time(inode);
+ +      inode_set_ctime_current(inode);
         inode_inc_iversion(inode);
   out_dirty:
         rc = ext4_mark_iloc_dirty(handle, inode, &iloc);
@@@ -802,7 -801,7 +802,7 @@@ int ext4_force_shutdown(struct super_bl
         if (flags > EXT4_GOING_FLAGS_NOLOGFLUSH)
                 return -EINVAL;
   
-       if (ext4_forced_shutdown(sbi))
+       if (ext4_forced_shutdown(sb))
                 return 0;
   
         ext4_msg(sb, KERN_ALERT, "shut down requested (%d)", flags);
@@@ -1267,7 -1266,7 +1267,7 @@@ static long __ext4_ioctl(struct file *f
                 }
                 err = ext4_reserve_inode_write(handle, inode, &iloc);
                 if (err == 0) {
- -                      inode->i_ctime = current_time(inode);
+ +                      inode_set_ctime_current(inode);
                         inode_inc_iversion(inode);
                         inode->i_generation = generation;
                         err = ext4_mark_iloc_dirty(handle, inode, &iloc);
diff --combined fs/ext4/namei.c

index 933ad03f4f5853542d44f59ea27e2aad15d3d879,c0f0b4e2413b5e7dc4f9f23ad7ae49b250a69c3e..41a6411c600b1cba08ed24b667913a563f2d3b90
--- 1/fs/ext4/namei.c
--- 2/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@@ -1445,7 -1445,7 +1445,7 @@@ int ext4_fname_setup_ci_filename(struc
         struct dx_hash_info *hinfo = &name->hinfo;
         int len;
   
-       if (!IS_CASEFOLDED(dir) || !dir->i_sb->s_encoding ||
+       if (!IS_CASEFOLDED(dir) ||
             (IS_ENCRYPTED(dir) && !fscrypt_has_encryption_key(dir))) {
                 cf_name->name = NULL;
                 return 0;
@@@ -1496,7 -1496,7 +1496,7 @@@ static bool ext4_match(struct inode *pa
   #endif
   
   #if IS_ENABLED(CONFIG_UNICODE)
-       if (parent->i_sb->s_encoding && IS_CASEFOLDED(parent) &&
+       if (IS_CASEFOLDED(parent) &&
             (!IS_ENCRYPTED(parent) || fscrypt_has_encryption_key(parent))) {
                 if (fname->cf_name.name) {
                         struct qstr cf = {.name = fname->cf_name.name,
@@@ -2203,7 -2203,7 +2203,7 @@@ static int add_dirent_to_buf(handle_t *
          * happen is that the times are slightly out of date
          * and/or different from the directory change time.
          */
- -      dir->i_mtime = dir->i_ctime = current_time(dir);
+ +      dir->i_mtime = inode_set_ctime_current(dir);
         ext4_update_dx_flag(dir);
         inode_inc_iversion(dir);
         err2 = ext4_mark_inode_dirty(handle, dir);
@@@ -2393,7 -2393,7 +2393,7 @@@ static int ext4_add_entry(handle_t *han
   
   #if IS_ENABLED(CONFIG_UNICODE)
         if (sb_has_strict_encoding(sb) && IS_CASEFOLDED(dir) &&
-           sb->s_encoding && utf8_validate(sb->s_encoding, &dentry->d_name))
+           utf8_validate(sb->s_encoding, &dentry->d_name))
                 return -EINVAL;
   #endif
   
@@@ -2799,6 -2799,7 +2799,7 @@@ static int ext4_add_nondir(handle_t *ha
                 return err;
         }
         drop_nlink(inode);
+       ext4_mark_inode_dirty(handle, inode);
         ext4_orphan_add(handle, inode);
         unlock_new_inode(inode);
         return err;
@@@ -3142,7 -3143,7 +3143,7 @@@ static int ext4_rmdir(struct inode *dir
         struct ext4_dir_entry_2 *de;
         handle_t *handle = NULL;
   
-       if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb))))
+       if (unlikely(ext4_forced_shutdown(dir->i_sb)))
                 return -EIO;
   
         /* Initialize quotas before so that eventual writes go in
@@@ -3197,8 -3198,7 +3198,8 @@@
          * recovery. */
         inode->i_size = 0;
         ext4_orphan_add(handle, inode);
- -      inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode);
+ +      dir->i_mtime = inode_set_ctime_current(dir);
+ +      inode_set_ctime_current(inode);
         retval = ext4_mark_inode_dirty(handle, inode);
         if (retval)
                 goto end_rmdir;
@@@ -3272,7 -3272,7 +3273,7 @@@ int __ext4_unlink(struct inode *dir, co
                 retval = ext4_delete_entry(handle, dir, de, bh);
                 if (retval)
                         goto out_handle;
- -              dir->i_ctime = dir->i_mtime = current_time(dir);
+ +              dir->i_mtime = inode_set_ctime_current(dir);
                 ext4_update_dx_flag(dir);
                 retval = ext4_mark_inode_dirty(handle, dir);
                 if (retval)
@@@ -3287,7 -3287,7 +3288,7 @@@
                 drop_nlink(inode);
         if (!inode->i_nlink)
                 ext4_orphan_add(handle, inode);
- -      inode->i_ctime = current_time(inode);
+ +      inode_set_ctime_current(inode);
         retval = ext4_mark_inode_dirty(handle, inode);
         if (dentry && !retval)
                 ext4_fc_track_unlink(handle, dentry);
@@@ -3302,7 -3302,7 +3303,7 @@@ static int ext4_unlink(struct inode *di
   {
         int retval;
   
-       if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb))))
+       if (unlikely(ext4_forced_shutdown(dir->i_sb)))
                 return -EIO;
   
         trace_ext4_unlink_enter(dir, dentry);
@@@ -3370,7 -3370,7 +3371,7 @@@ static int ext4_symlink(struct mnt_idma
         struct fscrypt_str disk_link;
         int retries = 0;
   
-       if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb))))
+       if (unlikely(ext4_forced_shutdown(dir->i_sb)))
                 return -EIO;
   
         err = fscrypt_prepare_symlink(dir, symname, len, dir->i_sb->s_blocksize,
@@@ -3437,6 -3437,7 +3438,7 @@@ retry
   
   err_drop_inode:
         clear_nlink(inode);
+       ext4_mark_inode_dirty(handle, inode);
         ext4_orphan_add(handle, inode);
         unlock_new_inode(inode);
         if (handle)
@@@ -3464,7 -3465,7 +3466,7 @@@ retry
         if (IS_DIRSYNC(dir))
                 ext4_handle_sync(handle);
   
- -      inode->i_ctime = current_time(inode);
+ +      inode_set_ctime_current(inode);
         ext4_inc_count(inode);
         ihold(inode);
   
@@@ -3642,7 -3643,8 +3644,7 @@@ static int ext4_setent(handle_t *handle
         if (ext4_has_feature_filetype(ent->dir->i_sb))
                 ent->de->file_type = file_type;
         inode_inc_iversion(ent->dir);
- -      ent->dir->i_ctime = ent->dir->i_mtime =
- -              current_time(ent->dir);
+ +      ent->dir->i_mtime = inode_set_ctime_current(ent->dir);
         retval = ext4_mark_inode_dirty(handle, ent->dir);
         BUFFER_TRACE(ent->bh, "call ext4_handle_dirty_metadata");
         if (!ent->inlined) {
@@@ -3941,7 -3943,7 +3943,7 @@@ static int ext4_rename(struct mnt_idma
          * Like most other Unix systems, set the ctime for inodes on a
          * rename.
          */
- -      old.inode->i_ctime = current_time(old.inode);
+ +      inode_set_ctime_current(old.inode);
         retval = ext4_mark_inode_dirty(handle, old.inode);
         if (unlikely(retval))
                 goto end_rename;
@@@ -3955,9 -3957,9 +3957,9 @@@
   
         if (new.inode) {
                 ext4_dec_count(new.inode);
- -              new.inode->i_ctime = current_time(new.inode);
+ +              inode_set_ctime_current(new.inode);
         }
- -      old.dir->i_ctime = old.dir->i_mtime = current_time(old.dir);
+ +      old.dir->i_mtime = inode_set_ctime_current(old.dir);
         ext4_update_dx_flag(old.dir);
         if (old.dir_bh) {
                 retval = ext4_rename_dir_finish(handle, &old, new.dir->i_ino);
@@@ -4021,6 -4023,7 +4023,7 @@@ end_rename
                         ext4_resetent(handle, &old,
                                       old.inode->i_ino, old_file_type);
                         drop_nlink(whiteout);
+                       ext4_mark_inode_dirty(handle, whiteout);
                         ext4_orphan_add(handle, whiteout);
                 }
                 unlock_new_inode(whiteout);
@@@ -4053,6 -4056,7 +4056,6 @@@ static int ext4_cross_rename(struct ino
         };
         u8 new_file_type;
         int retval;
- -      struct timespec64 ctime;
   
         if ((ext4_test_inode_flag(new_dir, EXT4_INODE_PROJINHERIT) &&
              !projid_eq(EXT4_I(new_dir)->i_projid,
@@@ -4146,8 -4150,9 +4149,8 @@@
          * Like most other Unix systems, set the ctime for inodes on a
          * rename.
          */
- -      ctime = current_time(old.inode);
- -      old.inode->i_ctime = ctime;
- -      new.inode->i_ctime = ctime;
+ +      inode_set_ctime_current(old.inode);
+ +      inode_set_ctime_current(new.inode);
         retval = ext4_mark_inode_dirty(handle, old.inode);
         if (unlikely(retval))
                 goto end_rename;
@@@ -4187,7 -4192,7 +4190,7 @@@ static int ext4_rename2(struct mnt_idma
   {
         int err;
   
-       if (unlikely(ext4_forced_shutdown(EXT4_SB(old_dir->i_sb))))
+       if (unlikely(ext4_forced_shutdown(old_dir->i_sb)))
                 return -EIO;
   
         if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
diff --combined fs/ext4/super.c

index 73547d2334fd7c3b605cf08b78d7a0eef5018b81,91f20afa1d71cf269f2b16666e2d26cd9cc262cc..38217422f938833f2eb9312f80da735190080dd6
--- 1/fs/ext4/super.c
--- 2/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@@ -93,7 -93,6 +93,7 @@@ static int ext4_get_tree(struct fs_cont
   static int ext4_reconfigure(struct fs_context *fc);
   static void ext4_fc_free(struct fs_context *fc);
   static int ext4_init_fs_context(struct fs_context *fc);
+ +static void ext4_kill_sb(struct super_block *sb);
   static const struct fs_parameter_spec ext4_param_specs[];
   
   /*
@@@ -136,12 -135,12 +136,12 @@@ static struct file_system_type ext2_fs_
         .name                   = "ext2",
         .init_fs_context        = ext4_init_fs_context,
         .parameters             = ext4_param_specs,
- -      .kill_sb                = kill_block_super,
+ +      .kill_sb                = ext4_kill_sb,
         .fs_flags               = FS_REQUIRES_DEV,
   };
   MODULE_ALIAS_FS("ext2");
   MODULE_ALIAS("ext2");
- -#define IS_EXT2_SB(sb) ((sb)->s_bdev->bd_holder == &ext2_fs_type)
+ +#define IS_EXT2_SB(sb) ((sb)->s_type == &ext2_fs_type)
   #else
   #define IS_EXT2_SB(sb) (0)
   #endif
@@@ -152,12 -151,12 +152,12 @@@ static struct file_system_type ext3_fs_
         .name                   = "ext3",
         .init_fs_context        = ext4_init_fs_context,
         .parameters             = ext4_param_specs,
- -      .kill_sb                = kill_block_super,
+ +      .kill_sb                = ext4_kill_sb,
         .fs_flags               = FS_REQUIRES_DEV,
   };
   MODULE_ALIAS_FS("ext3");
   MODULE_ALIAS("ext3");
- -#define IS_EXT3_SB(sb) ((sb)->s_bdev->bd_holder == &ext3_fs_type)
+ +#define IS_EXT3_SB(sb) ((sb)->s_type == &ext3_fs_type)
   
   
   static inline void __ext4_read_bh(struct buffer_head *bh, blk_opf_t op_flags,
@@@ -434,6 -433,57 +434,57 @@@ static time64_t __ext4_get_tstamp(__le3
   #define ext4_get_tstamp(es, tstamp) \
         __ext4_get_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi)
   
+ #define EXT4_SB_REFRESH_INTERVAL_SEC (3600) /* seconds (1 hour) */
+ #define EXT4_SB_REFRESH_INTERVAL_KB (16384) /* kilobytes (16MB) */
+ 
+ /*
+  * The ext4_maybe_update_superblock() function checks and updates the
+  * superblock if needed.
+  *
+  * This function is designed to update the on-disk superblock only under
+  * certain conditions to prevent excessive disk writes and unnecessary
+  * waking of the disk from sleep. The superblock will be updated if:
+  * 1. More than an hour has passed since the last superblock update, and
+  * 2. More than 16MB have been written since the last superblock update.
+  *
+  * @sb: The superblock
+  */
+ static void ext4_maybe_update_superblock(struct super_block *sb)
+ {
+       struct ext4_sb_info *sbi = EXT4_SB(sb);
+       struct ext4_super_block *es = sbi->s_es;
+       journal_t *journal = sbi->s_journal;
+       time64_t now;
+       __u64 last_update;
+       __u64 lifetime_write_kbytes;
+       __u64 diff_size;
+ 
+       if (sb_rdonly(sb) || !(sb->s_flags & SB_ACTIVE) ||
+           !journal || (journal->j_flags & JBD2_UNMOUNT))
+               return;
+ 
+       now = ktime_get_real_seconds();
+       last_update = ext4_get_tstamp(es, s_wtime);
+ 
+       if (likely(now - last_update < EXT4_SB_REFRESH_INTERVAL_SEC))
+               return;
+ 
+       lifetime_write_kbytes = sbi->s_kbytes_written +
+               ((part_stat_read(sb->s_bdev, sectors[STAT_WRITE]) -
+                 sbi->s_sectors_written_start) >> 1);
+ 
+       /* Get the number of kilobytes not written to disk to account
+        * for statistics and compare with a multiple of 16 MB. This
+        * is used to determine when the next superblock commit should
+        * occur (i.e. not more often than once per 16MB if there was
+        * less written in an hour).
+        */
+       diff_size = lifetime_write_kbytes - le64_to_cpu(es->s_kbytes_written);
+ 
+       if (diff_size > EXT4_SB_REFRESH_INTERVAL_KB)
+               schedule_work(&EXT4_SB(sb)->s_sb_upd_work);
+ }
+ 
   /*
    * The del_gendisk() function uninitializes the disk-specific data
    * structures, including the bdi structure, without telling anyone
@@@ -460,6 -510,7 +511,7 @@@ static void ext4_journal_commit_callbac
         BUG_ON(txn->t_state == T_FINISHED);
   
         ext4_process_freed_data(sb, txn->t_tid);
+       ext4_maybe_update_superblock(sb);
   
         spin_lock(&sbi->s_md_lock);
         while (!list_empty(&txn->t_private_list)) {
@@@ -658,7 -709,7 +710,7 @@@ static void ext4_handle_error(struct su
                 WARN_ON_ONCE(1);
   
         if (!continue_fs && !sb_rdonly(sb)) {
-               ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED);
+               set_bit(EXT4_FLAGS_SHUTDOWN, &EXT4_SB(sb)->s_ext4_flags);
                 if (journal)
                         jbd2_journal_abort(journal, -EIO);
         }
@@@ -672,7 -723,7 +724,7 @@@
                  * defer superblock flushing to a workqueue.
                  */
                 if (continue_fs && journal)
-                       schedule_work(&EXT4_SB(sb)->s_error_work);
+                       schedule_work(&EXT4_SB(sb)->s_sb_upd_work);
                 else
                         ext4_commit_super(sb);
         }
@@@ -699,10 -750,10 +751,10 @@@
         sb->s_flags |= SB_RDONLY;
   }
   
- static void flush_stashed_error_work(struct work_struct *work)
+ static void update_super_work(struct work_struct *work)
   {
         struct ext4_sb_info *sbi = container_of(work, struct ext4_sb_info,
-                                               s_error_work);
+                                               s_sb_upd_work);
         journal_t *journal = sbi->s_journal;
         handle_t *handle;
   
@@@ -716,6 -767,7 +768,7 @@@
          */
         if (!sb_rdonly(sbi->s_sb) && journal) {
                 struct buffer_head *sbh = sbi->s_sbh;
+               bool call_notify_err;
                 handle = jbd2_journal_start(journal, 1);
                 if (IS_ERR(handle))
                         goto write_directly;
@@@ -723,6 -775,10 +776,10 @@@
                         jbd2_journal_stop(handle);
                         goto write_directly;
                 }
+ 
+               if (sbi->s_add_error_count > 0)
+                       call_notify_err = true;
+ 
                 ext4_update_super(sbi->s_sb);
                 if (buffer_write_io_error(sbh) || !buffer_uptodate(sbh)) {
                         ext4_msg(sbi->s_sb, KERN_ERR, "previous I/O error to "
@@@ -736,7 -792,10 +793,10 @@@
                         goto write_directly;
                 }
                 jbd2_journal_stop(handle);
-               ext4_notify_error_sysfs(sbi);
+ 
+               if (call_notify_err)
+                       ext4_notify_error_sysfs(sbi);
+ 
                 return;
         }
   write_directly:
@@@ -759,7 -818,7 +819,7 @@@ void __ext4_error(struct super_block *s
         struct va_format vaf;
         va_list args;
   
-       if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
+       if (unlikely(ext4_forced_shutdown(sb)))
                 return;
   
         trace_ext4_error(sb, function, line);
@@@ -784,7 -843,7 +844,7 @@@ void __ext4_error_inode(struct inode *i
         va_list args;
         struct va_format vaf;
   
-       if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+       if (unlikely(ext4_forced_shutdown(inode->i_sb)))
                 return;
   
         trace_ext4_error(inode->i_sb, function, line);
@@@ -819,7 -878,7 +879,7 @@@ void __ext4_error_file(struct file *fil
         struct inode *inode = file_inode(file);
         char pathname[80], *path;
   
-       if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+       if (unlikely(ext4_forced_shutdown(inode->i_sb)))
                 return;
   
         trace_ext4_error(inode->i_sb, function, line);
@@@ -899,7 -958,7 +959,7 @@@ void __ext4_std_error(struct super_bloc
         char nbuf[16];
         const char *errstr;
   
-       if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
+       if (unlikely(ext4_forced_shutdown(sb)))
                 return;
   
         /* Special case: if the error is EROFS, and we're not already
@@@ -993,7 -1052,7 +1053,7 @@@ __acquires(bitlock
         struct va_format vaf;
         va_list args;
   
-       if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
+       if (unlikely(ext4_forced_shutdown(sb)))
                 return;
   
         trace_ext4_error(sb, function, line);
@@@ -1019,7 -1078,7 +1079,7 @@@
                 if (!bdev_read_only(sb->s_bdev)) {
                         save_error_info(sb, EFSCORRUPTED, ino, block, function,
                                         line);
-                       schedule_work(&EXT4_SB(sb)->s_error_work);
+                       schedule_work(&EXT4_SB(sb)->s_sb_upd_work);
                 }
                 return;
         }
@@@ -1097,26 -1156,34 +1157,6 @@@ void ext4_update_dynamic_rev(struct sup
          */
   }
   
- -static void ext4_bdev_mark_dead(struct block_device *bdev)
- -{
- -      ext4_force_shutdown(bdev->bd_holder, EXT4_GOING_FLAGS_NOLOGFLUSH);
- -}
- -
- -static const struct blk_holder_ops ext4_holder_ops = {
- -      .mark_dead              = ext4_bdev_mark_dead,
- -};
- -
--/*
-  * Open the external journal device
- - * Release the journal device
-- */
- static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb)
- -static void ext4_blkdev_remove(struct ext4_sb_info *sbi)
--{
--      struct block_device *bdev;
- 
-       bdev = blkdev_get_by_dev(dev, BLK_OPEN_READ | BLK_OPEN_WRITE, sb,
-                                &fs_holder_ops);
-       if (IS_ERR(bdev))
-               goto fail;
-       return bdev;
- 
- fail:
-       ext4_msg(sb, KERN_ERR,
-                "failed to open journal device unknown-block(%u,%u) %ld",
-                MAJOR(dev), MINOR(dev), PTR_ERR(bdev));
-       return NULL;
- -      bdev = sbi->s_journal_bdev;
- -      if (bdev) {
- -              /*
- -               * Invalidate the journal device's buffers.  We don't want them
- -               * floating about in memory - the physical journal device may
- -               * hotswapped, and it breaks the `ro-after' testing code.
- -               */
- -              invalidate_bdev(bdev);
- -              blkdev_put(bdev, sbi->s_sb);
- -              sbi->s_journal_bdev = NULL;
- -      }
--}
--
   static inline struct inode *orphan_list_entry(struct list_head *l)
   {
         return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode;
@@@ -1251,10 -1318,10 +1291,10 @@@ static void ext4_put_super(struct super
          * Unregister sysfs before destroying jbd2 journal.
          * Since we could still access attr_journal_task attribute via sysfs
          * path which could have sbi->s_journal->j_task as NULL
-        * Unregister sysfs before flush sbi->s_error_work.
+        * Unregister sysfs before flush sbi->s_sb_upd_work.
          * Since user may read /proc/fs/ext4/xx/mb_groups during umount, If
          * read metadata verify failed then will queue error work.
-        * flush_stashed_error_work will call start_this_handle may trigger
+        * update_super_work will call start_this_handle may trigger
          * BUG_ON.
          */
         ext4_unregister_sysfs(sb);
@@@ -1266,7 -1333,7 +1306,7 @@@
         ext4_unregister_li_request(sb);
         ext4_quotas_off(sb, EXT4_MAXQUOTAS);
   
-       flush_work(&sbi->s_error_work);
+       flush_work(&sbi->s_sb_upd_work);
         destroy_workqueue(sbi->rsv_conversion_wq);
         ext4_release_orphan_info(sb);
   
@@@ -1312,13 -1379,8 +1352,13 @@@
         sync_blockdev(sb->s_bdev);
         invalidate_bdev(sb->s_bdev);
         if (sbi->s_journal_bdev) {
+ +              /*
+ +               * Invalidate the journal device's buffers.  We don't want them
+ +               * floating about in memory - the physical journal device may
+ +               * hotswapped, and it breaks the `ro-after' testing code.
+ +               */
                 sync_blockdev(sbi->s_journal_bdev);
- -              ext4_blkdev_remove(sbi);
+ +              invalidate_bdev(sbi->s_journal_bdev);
         }
   
         ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
@@@ -1875,6 -1937,7 +1915,7 @@@ static const struct mount_opts 
         {Opt_fc_debug_force, EXT4_MOUNT2_JOURNAL_FAST_COMMIT,
          MOPT_SET | MOPT_2 | MOPT_EXT4_ONLY},
   #endif
+       {Opt_abort, EXT4_MOUNT2_ABORT, MOPT_SET | MOPT_2},
         {Opt_err, 0, 0}
   };
   
@@@ -1943,8 -2006,6 +1984,6 @@@ struct ext4_fs_context 
         unsigned int    mask_s_mount_opt;
         unsigned int    vals_s_mount_opt2;
         unsigned int    mask_s_mount_opt2;
-       unsigned long   vals_s_mount_flags;
-       unsigned long   mask_s_mount_flags;
         unsigned int    opt_flags;      /* MOPT flags */
         unsigned int    spec;
         u32             s_max_batch_time;
@@@ -2095,12 -2156,6 +2134,6 @@@ EXT4_SET_CTX(mount_opt2)
   EXT4_CLEAR_CTX(mount_opt2);
   EXT4_TEST_CTX(mount_opt2);
   
- static inline void ctx_set_mount_flag(struct ext4_fs_context *ctx, int bit)
- {
-       set_bit(bit, &ctx->mask_s_mount_flags);
-       set_bit(bit, &ctx->vals_s_mount_flags);
- }
- 
   static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param)
   {
         struct ext4_fs_context *ctx = fc->fs_private;
@@@ -2164,9 -2219,6 +2197,6 @@@
                 ext4_msg(NULL, KERN_WARNING, "Ignoring removed %s option",
                          param->key);
                 return 0;
-       case Opt_abort:
-               ctx_set_mount_flag(ctx, EXT4_MF_FS_ABORTED);
-               return 0;
         case Opt_inlinecrypt:
   #ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT
                 ctx_set_flags(ctx, SB_INLINECRYPT);
@@@ -2820,8 -2872,6 +2850,6 @@@ static void ext4_apply_options(struct f
         sbi->s_mount_opt |= ctx->vals_s_mount_opt;
         sbi->s_mount_opt2 &= ~ctx->mask_s_mount_opt2;
         sbi->s_mount_opt2 |= ctx->vals_s_mount_opt2;
-       sbi->s_mount_flags &= ~ctx->mask_s_mount_flags;
-       sbi->s_mount_flags |= ctx->vals_s_mount_flags;
         sb->s_flags &= ~ctx->mask_s_flags;
         sb->s_flags |= ctx->vals_s_flags;
   
@@@ -4210,7 -4260,7 +4238,7 @@@ int ext4_calculate_overhead(struct supe
         else if (ext4_has_feature_journal(sb) && !sbi->s_journal && j_inum) {
                 /* j_inum for internal journal is non-zero */
                 j_inode = ext4_get_journal_inode(sb, j_inum);
-               if (j_inode) {
+               if (!IS_ERR(j_inode)) {
                         j_blocks = j_inode->i_size >> sb->s_blocksize_bits;
                         overhead += EXT4_NUM_B2C(sbi, j_blocks);
                         iput(j_inode);
@@@ -4948,8 -4998,8 +4976,8 @@@ static int ext4_load_and_init_journal(s
         return 0;
   
   out:
-       /* flush s_error_work before journal destroy. */
-       flush_work(&sbi->s_error_work);
+       /* flush s_sb_upd_work before destroying the journal. */
+       flush_work(&sbi->s_sb_upd_work);
         jbd2_journal_destroy(sbi->s_journal);
         sbi->s_journal = NULL;
         return -EINVAL;
@@@ -5272,7 -5322,7 +5300,7 @@@ static int __ext4_fill_super(struct fs_
   
         timer_setup(&sbi->s_err_report, print_daily_error_info, 0);
         spin_lock_init(&sbi->s_error_lock);
-       INIT_WORK(&sbi->s_error_work, flush_stashed_error_work);
+       INIT_WORK(&sbi->s_sb_upd_work, update_super_work);
   
         err = ext4_group_desc_init(sb, es, logical_sb_block, &first_not_zeroed);
         if (err)
@@@ -5550,6 -5600,7 +5578,6 @@@
         spin_lock_init(&sbi->s_bdev_wb_lock);
         errseq_check_and_advance(&sb->s_bdev->bd_inode->i_mapping->wb_err,
                                  &sbi->s_bdev_wb_err);
- -      sb->s_bdev->bd_super = sb;
         EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
         ext4_orphan_cleanup(sb, es);
         EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
@@@ -5615,16 -5666,16 +5643,16 @@@ failed_mount_wq
         sbi->s_ea_block_cache = NULL;
   
         if (sbi->s_journal) {
-               /* flush s_error_work before journal destroy. */
-               flush_work(&sbi->s_error_work);
+               /* flush s_sb_upd_work before journal destroy. */
+               flush_work(&sbi->s_sb_upd_work);
                 jbd2_journal_destroy(sbi->s_journal);
                 sbi->s_journal = NULL;
         }
   failed_mount3a:
         ext4_es_unregister_shrinker(sbi);
   failed_mount3:
-       /* flush s_error_work before sbi destroy */
-       flush_work(&sbi->s_error_work);
+       /* flush s_sb_upd_work before sbi destroy */
+       flush_work(&sbi->s_sb_upd_work);
         del_timer_sync(&sbi->s_err_report);
         ext4_stop_mmpd(sbi);
         ext4_group_desc_free(sbi);
@@@ -5641,11 -5692,9 +5669,11 @@@ failed_mount
                 kfree(get_qf_name(sb, sbi, i));
   #endif
         fscrypt_free_dummy_policy(&sbi->s_dummy_enc_policy);
- -      /* ext4_blkdev_remove() calls kill_bdev(), release bh before it. */
         brelse(sbi->s_sbh);
- -      ext4_blkdev_remove(sbi);
+ +      if (sbi->s_journal_bdev) {
+ +              invalidate_bdev(sbi->s_journal_bdev);
+ +              blkdev_put(sbi->s_journal_bdev, sb);
+ +      }
   out_fail:
         invalidate_bdev(sb->s_bdev);
         sb->s_fs_info = NULL;
@@@ -5751,22 -5800,22 +5779,22 @@@ static struct inode *ext4_get_journal_i
         journal_inode = ext4_iget(sb, journal_inum, EXT4_IGET_SPECIAL);
         if (IS_ERR(journal_inode)) {
                 ext4_msg(sb, KERN_ERR, "no journal found");
-               return NULL;
+               return ERR_CAST(journal_inode);
         }
         if (!journal_inode->i_nlink) {
                 make_bad_inode(journal_inode);
                 iput(journal_inode);
                 ext4_msg(sb, KERN_ERR, "journal inode is deleted");
-               return NULL;
+               return ERR_PTR(-EFSCORRUPTED);
         }
- 
-       ext4_debug("Journal inode found at %p: %lld bytes\n",
-                 journal_inode, journal_inode->i_size);
         if (!S_ISREG(journal_inode->i_mode) || IS_ENCRYPTED(journal_inode)) {
                 ext4_msg(sb, KERN_ERR, "invalid journal inode");
                 iput(journal_inode);
-               return NULL;
+               return ERR_PTR(-EFSCORRUPTED);
         }
+ 
+       ext4_debug("Journal inode found at %p: %lld bytes\n",
+                 journal_inode, journal_inode->i_size);
         return journal_inode;
   }
   
@@@ -5792,24 -5841,21 +5820,21 @@@ static int ext4_journal_bmap(journal_t 
         return 0;
   }
   
- static journal_t *ext4_get_journal(struct super_block *sb,
-                                  unsigned int journal_inum)
+ static journal_t *ext4_open_inode_journal(struct super_block *sb,
+                                         unsigned int journal_inum)
   {
         struct inode *journal_inode;
         journal_t *journal;
   
-       if (WARN_ON_ONCE(!ext4_has_feature_journal(sb)))
-               return NULL;
- 
         journal_inode = ext4_get_journal_inode(sb, journal_inum);
-       if (!journal_inode)
-               return NULL;
+       if (IS_ERR(journal_inode))
+               return ERR_CAST(journal_inode);
   
         journal = jbd2_journal_init_inode(journal_inode);
-       if (!journal) {
+       if (IS_ERR(journal)) {
                 ext4_msg(sb, KERN_ERR, "Could not load journal inode");
                 iput(journal_inode);
-               return NULL;
+               return ERR_CAST(journal);
         }
         journal->j_private = sb;
         journal->j_bmap = ext4_journal_bmap;
@@@ -5817,43 -5863,44 +5842,47 @@@
         return journal;
   }
   
- static journal_t *ext4_get_dev_journal(struct super_block *sb,
-                                      dev_t j_dev)
+ static struct block_device *ext4_get_journal_blkdev(struct super_block *sb,
+                                       dev_t j_dev, ext4_fsblk_t *j_start,
+                                       ext4_fsblk_t *j_len)
   {
         struct buffer_head *bh;
-       journal_t *journal;
-       ext4_fsblk_t start;
-       ext4_fsblk_t len;
+       struct block_device *bdev;
         int hblock, blocksize;
         ext4_fsblk_t sb_block;
         unsigned long offset;
         struct ext4_super_block *es;
-       struct block_device *bdev;
- 
-       if (WARN_ON_ONCE(!ext4_has_feature_journal(sb)))
-               return NULL;
+       int errno;
   
-       bdev = ext4_blkdev_get(j_dev, sb);
+ +      /* see get_tree_bdev why this is needed and safe */
+ +      up_write(&sb->s_umount);
- -                               &ext4_holder_ops);
+       bdev = blkdev_get_by_dev(j_dev, BLK_OPEN_READ | BLK_OPEN_WRITE, sb,
-       if (bdev == NULL)
-               return NULL;
++                               &fs_holder_ops);
+ +      down_write(&sb->s_umount);
+       if (IS_ERR(bdev)) {
+               ext4_msg(sb, KERN_ERR,
+                        "failed to open journal device unknown-block(%u,%u) %ld",
+                        MAJOR(j_dev), MINOR(j_dev), PTR_ERR(bdev));
+               return ERR_CAST(bdev);
+       }
   
         blocksize = sb->s_blocksize;
         hblock = bdev_logical_block_size(bdev);
         if (blocksize < hblock) {
                 ext4_msg(sb, KERN_ERR,
                         "blocksize too small for journal device");
+               errno = -EINVAL;
                 goto out_bdev;
         }
   
         sb_block = EXT4_MIN_BLOCK_SIZE / blocksize;
         offset = EXT4_MIN_BLOCK_SIZE % blocksize;
         set_blocksize(bdev, blocksize);
-       if (!(bh = __bread(bdev, sb_block, blocksize))) {
+       bh = __bread(bdev, sb_block, blocksize);
+       if (!bh) {
                 ext4_msg(sb, KERN_ERR, "couldn't read superblock of "
                        "external journal");
+               errno = -EINVAL;
                 goto out_bdev;
         }
   
@@@ -5861,57 -5908,74 +5890,74 @@@
         if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) ||
             !(le32_to_cpu(es->s_feature_incompat) &
               EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) {
-               ext4_msg(sb, KERN_ERR, "external journal has "
-                                       "bad superblock");
-               brelse(bh);
-               goto out_bdev;
+               ext4_msg(sb, KERN_ERR, "external journal has bad superblock");
+               errno = -EFSCORRUPTED;
+               goto out_bh;
         }
   
         if ((le32_to_cpu(es->s_feature_ro_compat) &
              EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) &&
             es->s_checksum != ext4_superblock_csum(sb, es)) {
-               ext4_msg(sb, KERN_ERR, "external journal has "
-                                      "corrupt superblock");
-               brelse(bh);
-               goto out_bdev;
+               ext4_msg(sb, KERN_ERR, "external journal has corrupt superblock");
+               errno = -EFSCORRUPTED;
+               goto out_bh;
         }
   
         if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
                 ext4_msg(sb, KERN_ERR, "journal UUID does not match");
-               brelse(bh);
-               goto out_bdev;
+               errno = -EFSCORRUPTED;
+               goto out_bh;
         }
   
-       len = ext4_blocks_count(es);
-       start = sb_block + 1;
-       brelse(bh);     /* we're done with the superblock */
+       *j_start = sb_block + 1;
+       *j_len = ext4_blocks_count(es);
+       brelse(bh);
+       return bdev;
   
-       journal = jbd2_journal_init_dev(bdev, sb->s_bdev,
-                                       start, len, blocksize);
-       if (!journal) {
+ out_bh:
+       brelse(bh);
+ out_bdev:
+       blkdev_put(bdev, sb);
+       return ERR_PTR(errno);
+ }
+ 
+ static journal_t *ext4_open_dev_journal(struct super_block *sb,
+                                       dev_t j_dev)
+ {
+       journal_t *journal;
+       ext4_fsblk_t j_start;
+       ext4_fsblk_t j_len;
+       struct block_device *journal_bdev;
+       int errno = 0;
+ 
+       journal_bdev = ext4_get_journal_blkdev(sb, j_dev, &j_start, &j_len);
+       if (IS_ERR(journal_bdev))
+               return ERR_CAST(journal_bdev);
+ 
+       journal = jbd2_journal_init_dev(journal_bdev, sb->s_bdev, j_start,
+                                       j_len, sb->s_blocksize);
+       if (IS_ERR(journal)) {
                 ext4_msg(sb, KERN_ERR, "failed to create device journal");
+               errno = PTR_ERR(journal);
                 goto out_bdev;
         }
-       journal->j_private = sb;
-       if (ext4_read_bh_lock(journal->j_sb_buffer, REQ_META | REQ_PRIO, true)) {
-               ext4_msg(sb, KERN_ERR, "I/O error on journal device");
-               goto out_journal;
-       }
         if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
                 ext4_msg(sb, KERN_ERR, "External journal has more than one "
                                         "user (unsupported) - %d",
                         be32_to_cpu(journal->j_superblock->s_nr_users));
+               errno = -EINVAL;
                 goto out_journal;
         }
-       EXT4_SB(sb)->s_journal_bdev = bdev;
+       journal->j_private = sb;
+       EXT4_SB(sb)->s_journal_bdev = journal_bdev;
         ext4_init_journal_params(sb, journal);
         return journal;
   
   out_journal:
         jbd2_journal_destroy(journal);
   out_bdev:
-       blkdev_put(bdev, sb);
-       return NULL;
+       blkdev_put(journal_bdev, sb);
+       return ERR_PTR(errno);
   }
   
   static int ext4_load_journal(struct super_block *sb,
@@@ -5943,13 -6007,13 +5989,13 @@@
         }
   
         if (journal_inum) {
-               journal = ext4_get_journal(sb, journal_inum);
-               if (!journal)
-                       return -EINVAL;
+               journal = ext4_open_inode_journal(sb, journal_inum);
+               if (IS_ERR(journal))
+                       return PTR_ERR(journal);
         } else {
-               journal = ext4_get_dev_journal(sb, journal_dev);
-               if (!journal)
-                       return -EINVAL;
+               journal = ext4_open_dev_journal(sb, journal_dev);
+               if (IS_ERR(journal))
+                       return PTR_ERR(journal);
         }
   
         journal_dev_ro = bdev_read_only(journal->j_dev);
@@@ -6066,7 -6130,7 +6112,7 @@@ static void ext4_update_super(struct su
          * the clock is set in the future, and this will cause e2fsck
          * to complain and force a full file system check.
          */
-       if (!(sb->s_flags & SB_RDONLY))
+       if (!sb_rdonly(sb))
                 ext4_update_tstamp(es, s_wtime);
         es->s_kbytes_written =
                 cpu_to_le64(sbi->s_kbytes_written +
@@@ -6264,13 -6328,7 +6310,7 @@@ static int ext4_clear_journal_err(struc
    */
   int ext4_force_commit(struct super_block *sb)
   {
-       journal_t *journal;
- 
-       if (sb_rdonly(sb))
-               return 0;
- 
-       journal = EXT4_SB(sb)->s_journal;
-       return ext4_journal_force_commit(journal);
+       return ext4_journal_force_commit(EXT4_SB(sb)->s_journal);
   }
   
   static int ext4_sync_fs(struct super_block *sb, int wait)
@@@ -6280,7 -6338,7 +6320,7 @@@
         bool needs_barrier = false;
         struct ext4_sb_info *sbi = EXT4_SB(sb);
   
-       if (unlikely(ext4_forced_shutdown(sbi)))
+       if (unlikely(ext4_forced_shutdown(sb)))
                 return 0;
   
         trace_ext4_sync_fs(sb, wait);
@@@ -6329,12 -6387,7 +6369,7 @@@
   static int ext4_freeze(struct super_block *sb)
   {
         int error = 0;
-       journal_t *journal;
- 
-       if (sb_rdonly(sb))
-               return 0;
- 
-       journal = EXT4_SB(sb)->s_journal;
+       journal_t *journal = EXT4_SB(sb)->s_journal;
   
         if (journal) {
                 /* Now we set up the journal barrier. */
@@@ -6368,7 -6421,7 +6403,7 @@@ out
    */
   static int ext4_unfreeze(struct super_block *sb)
   {
-       if (sb_rdonly(sb) || ext4_forced_shutdown(EXT4_SB(sb)))
+       if (ext4_forced_shutdown(sb))
                 return 0;
   
         if (EXT4_SB(sb)->s_journal) {
@@@ -6484,7 -6537,7 +6519,7 @@@ static int __ext4_remount(struct fs_con
                 goto restore_opts;
         }
   
-       if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED))
+       if (test_opt2(sb, ABORT))
                 ext4_abort(sb, ESHUTDOWN, "Abort forced by user");
   
         sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
@@@ -6498,10 -6551,10 +6533,10 @@@
         }
   
         /* Flush outstanding errors before changing fs state */
-       flush_work(&sbi->s_error_work);
+       flush_work(&sbi->s_sb_upd_work);
   
         if ((bool)(fc->sb_flags & SB_RDONLY) != sb_rdonly(sb)) {
-               if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED)) {
+               if (ext4_forced_shutdown(sb)) {
                         err = -EROFS;
                         goto restore_opts;
                 }
@@@ -6662,7 -6715,7 +6697,7 @@@ restore_opts
          * If there was a failing r/w to ro transition, we may need to
          * re-enable quota
          */
-       if ((sb->s_flags & SB_RDONLY) && !(old_sb_flags & SB_RDONLY) &&
+       if (sb_rdonly(sb) && !(old_sb_flags & SB_RDONLY) &&
             sb_any_quota_suspended(sb))
                 dquot_resume(sb, -1);
         sb->s_flags = old_sb_flags;
@@@ -7071,6 -7124,13 +7106,13 @@@ static int ext4_quota_off(struct super_
         err = dquot_quota_off(sb, type);
         if (err || ext4_has_feature_quota(sb))
                 goto out_put;
+       /*
+        * When the filesystem was remounted read-only first, we cannot cleanup
+        * inode flags here. Bad luck but people should be using QUOTA feature
+        * these days anyway.
+        */
+       if (sb_rdonly(sb))
+               goto out_put;
   
         inode_lock(inode);
         /*
@@@ -7085,7 -7145,7 +7127,7 @@@
         }
         EXT4_I(inode)->i_flags &= ~(EXT4_NOATIME_FL | EXT4_IMMUTABLE_FL);
         inode_set_flags(inode, 0, S_NOATIME | S_IMMUTABLE);
- -      inode->i_mtime = inode->i_ctime = current_time(inode);
+ +      inode->i_mtime = inode_set_ctime_current(inode);
         err = ext4_mark_inode_dirty(handle, inode);
         ext4_journal_stop(handle);
   out_unlock:
@@@ -7255,24 -7315,13 +7297,24 @@@ static inline int ext3_feature_set_ok(s
         return 1;
   }
   
+ +static void ext4_kill_sb(struct super_block *sb)
+ +{
+ +      struct ext4_sb_info *sbi = EXT4_SB(sb);
+ +      struct block_device *journal_bdev = sbi ? sbi->s_journal_bdev : NULL;
+ +
+ +      kill_block_super(sb);
+ +
+ +      if (journal_bdev)
+ +              blkdev_put(journal_bdev, sb);
+ +}
+ +
   static struct file_system_type ext4_fs_type = {
         .owner                  = THIS_MODULE,
         .name                   = "ext4",
         .init_fs_context        = ext4_init_fs_context,
         .parameters             = ext4_param_specs,
- -      .kill_sb                = kill_block_super,
- -      .fs_flags               = FS_REQUIRES_DEV | FS_ALLOW_IDMAP,
+ +      .kill_sb                = ext4_kill_sb,
+ +      .fs_flags               = FS_REQUIRES_DEV | FS_ALLOW_IDMAP | FS_MGTIME,
   };
   MODULE_ALIAS_FS("ext4");
   
diff --combined fs/ext4/xattr.c

index 281e1bfbbe3ec0ad76833df9a12036bad2f58bf2,7cc502c06246edf09063572e2a588631ff17f435..92ba28cebac63d018efcb88110634fa35796fa50
--- 1/fs/ext4/xattr.c
--- 2/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@@ -356,13 -356,13 +356,13 @@@ ext4_xattr_inode_hash(struct ext4_sb_in
   
   static u64 ext4_xattr_inode_get_ref(struct inode *ea_inode)
   {
- -      return ((u64)ea_inode->i_ctime.tv_sec << 32) |
+ +      return ((u64) inode_get_ctime(ea_inode).tv_sec << 32) |
                 (u32) inode_peek_iversion_raw(ea_inode);
   }
   
   static void ext4_xattr_inode_set_ref(struct inode *ea_inode, u64 ref_count)
   {
- -      ea_inode->i_ctime.tv_sec = (u32)(ref_count >> 32);
+ +      inode_set_ctime(ea_inode, (u32)(ref_count >> 32), 0);
         inode_set_iversion_raw(ea_inode, ref_count & 0xffffffff);
   }
   
@@@ -701,7 -701,7 +701,7 @@@ ext4_xattr_get(struct inode *inode, in
   {
         int error;
   
-       if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+       if (unlikely(ext4_forced_shutdown(inode->i_sb)))
                 return -EIO;
   
         if (strlen(name) > 255)
@@@ -2473,7 -2473,7 +2473,7 @@@ retry_inode
         }
         if (!error) {
                 ext4_xattr_update_super_block(handle, inode->i_sb);
- -              inode->i_ctime = current_time(inode);
+ +              inode_set_ctime_current(inode);
                 inode_inc_iversion(inode);
                 if (!value)
                         no_expand = 0;
diff --combined fs/jbd2/journal.c

index 1b5a45ab62b0d1e21826d34e1b92bb79e0ba7d2b,15e33c26c6cd7011fb137b4d426c224ddf85c159..768fa05bcbedeb7523a8ef76c8f1155ecaf2d7f3
--- 1/fs/jbd2/journal.c
--- 2/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@@ -115,14 -115,6 +115,6 @@@ void __jbd2_debug(int level, const cha
   #endif
   
   /* Checksumming functions */
- static int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb)
- {
-       if (!jbd2_journal_has_csum_v2or3_feature(j))
-               return 1;
- 
-       return sb->s_checksum_type == JBD2_CRC32C_CHKSUM;
- }
- 
   static __be32 jbd2_superblock_csum(journal_t *j, journal_superblock_t *sb)
   {
         __u32 csum;
@@@ -341,7 -333,7 +333,7 @@@ int jbd2_journal_write_metadata_buffer(
         int do_escape = 0;
         char *mapped_data;
         struct buffer_head *new_bh;
- -      struct page *new_page;
+ +      struct folio *new_folio;
         unsigned int new_offset;
         struct buffer_head *bh_in = jh2bh(jh_in);
         journal_t *journal = transaction->t_journal;
@@@ -370,14 -362,14 +362,14 @@@ repeat
          */
         if (jh_in->b_frozen_data) {
                 done_copy_out = 1;
- -              new_page = virt_to_page(jh_in->b_frozen_data);
- -              new_offset = offset_in_page(jh_in->b_frozen_data);
+ +              new_folio = virt_to_folio(jh_in->b_frozen_data);
+ +              new_offset = offset_in_folio(new_folio, jh_in->b_frozen_data);
         } else {
- -              new_page = jh2bh(jh_in)->b_page;
- -              new_offset = offset_in_page(jh2bh(jh_in)->b_data);
+ +              new_folio = jh2bh(jh_in)->b_folio;
+ +              new_offset = offset_in_folio(new_folio, jh2bh(jh_in)->b_data);
         }
   
- -      mapped_data = kmap_atomic(new_page);
+ +      mapped_data = kmap_local_folio(new_folio, new_offset);
         /*
          * Fire data frozen trigger if data already wasn't frozen.  Do this
          * before checking for escaping, as the trigger may modify the magic
@@@ -385,17 -377,18 +377,17 @@@
          * data in the buffer.
          */
         if (!done_copy_out)
- -              jbd2_buffer_frozen_trigger(jh_in, mapped_data + new_offset,
+ +              jbd2_buffer_frozen_trigger(jh_in, mapped_data,
                                            jh_in->b_triggers);
   
         /*
          * Check for escaping
          */
- -      if (*((__be32 *)(mapped_data + new_offset)) ==
- -                              cpu_to_be32(JBD2_MAGIC_NUMBER)) {
+ +      if (*((__be32 *)mapped_data) == cpu_to_be32(JBD2_MAGIC_NUMBER)) {
                 need_copy_out = 1;
                 do_escape = 1;
         }
- -      kunmap_atomic(mapped_data);
+ +      kunmap_local(mapped_data);
   
         /*
          * Do we need to do a data copy?
@@@ -416,10 -409,12 +408,10 @@@
                 }
   
                 jh_in->b_frozen_data = tmp;
- -              mapped_data = kmap_atomic(new_page);
- -              memcpy(tmp, mapped_data + new_offset, bh_in->b_size);
- -              kunmap_atomic(mapped_data);
+ +              memcpy_from_folio(tmp, new_folio, new_offset, bh_in->b_size);
   
- -              new_page = virt_to_page(tmp);
- -              new_offset = offset_in_page(tmp);
+ +              new_folio = virt_to_folio(tmp);
+ +              new_offset = offset_in_folio(new_folio, tmp);
                 done_copy_out = 1;
   
                 /*
@@@ -435,12 -430,12 +427,12 @@@
          * copying, we can finally do so.
          */
         if (do_escape) {
- -              mapped_data = kmap_atomic(new_page);
- -              *((unsigned int *)(mapped_data + new_offset)) = 0;
- -              kunmap_atomic(mapped_data);
+ +              mapped_data = kmap_local_folio(new_folio, new_offset);
+ +              *((unsigned int *)mapped_data) = 0;
+ +              kunmap_local(mapped_data);
         }
   
- -      set_bh_page(new_bh, new_page, new_offset);
+ +      folio_set_bh(new_bh, new_folio, new_offset);
         new_bh->b_size = bh_in->b_size;
         new_bh->b_bdev = journal->j_dev;
         new_bh->b_blocknr = blocknr;
@@@ -1333,6 -1328,189 +1325,189 @@@ static unsigned long jbd2_journal_shrin
         return count;
   }
   
+ /*
+  * If the journal init or create aborts, we need to mark the journal
+  * superblock as being NULL to prevent the journal destroy from writing
+  * back a bogus superblock.
+  */
+ static void journal_fail_superblock(journal_t *journal)
+ {
+       struct buffer_head *bh = journal->j_sb_buffer;
+       brelse(bh);
+       journal->j_sb_buffer = NULL;
+ }
+ 
+ /*
+  * Check the superblock for a given journal, performing initial
+  * validation of the format.
+  */
+ static int journal_check_superblock(journal_t *journal)
+ {
+       journal_superblock_t *sb = journal->j_superblock;
+       int num_fc_blks;
+       int err = -EINVAL;
+ 
+       if (sb->s_header.h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER) ||
+           sb->s_blocksize != cpu_to_be32(journal->j_blocksize)) {
+               printk(KERN_WARNING "JBD2: no valid journal superblock found\n");
+               return err;
+       }
+ 
+       if (be32_to_cpu(sb->s_header.h_blocktype) != JBD2_SUPERBLOCK_V1 &&
+           be32_to_cpu(sb->s_header.h_blocktype) != JBD2_SUPERBLOCK_V2) {
+               printk(KERN_WARNING "JBD2: unrecognised superblock format ID\n");
+               return err;
+       }
+ 
+       if (be32_to_cpu(sb->s_maxlen) > journal->j_total_len) {
+               printk(KERN_WARNING "JBD2: journal file too short\n");
+               return err;
+       }
+ 
+       if (be32_to_cpu(sb->s_first) == 0 ||
+           be32_to_cpu(sb->s_first) >= journal->j_total_len) {
+               printk(KERN_WARNING
+                       "JBD2: Invalid start block of journal: %u\n",
+                       be32_to_cpu(sb->s_first));
+               return err;
+       }
+ 
+       /*
+        * If this is a V2 superblock, then we have to check the
+        * features flags on it.
+        */
+       if (!jbd2_format_support_feature(journal))
+               return 0;
+ 
+       if ((sb->s_feature_ro_compat &
+                       ~cpu_to_be32(JBD2_KNOWN_ROCOMPAT_FEATURES)) ||
+           (sb->s_feature_incompat &
+                       ~cpu_to_be32(JBD2_KNOWN_INCOMPAT_FEATURES))) {
+               printk(KERN_WARNING "JBD2: Unrecognised features on journal\n");
+               return err;
+       }
+ 
+       num_fc_blks = jbd2_has_feature_fast_commit(journal) ?
+                               jbd2_journal_get_num_fc_blks(sb) : 0;
+       if (be32_to_cpu(sb->s_maxlen) < JBD2_MIN_JOURNAL_BLOCKS ||
+           be32_to_cpu(sb->s_maxlen) - JBD2_MIN_JOURNAL_BLOCKS < num_fc_blks) {
+               printk(KERN_ERR "JBD2: journal file too short %u,%d\n",
+                      be32_to_cpu(sb->s_maxlen), num_fc_blks);
+               return err;
+       }
+ 
+       if (jbd2_has_feature_csum2(journal) &&
+           jbd2_has_feature_csum3(journal)) {
+               /* Can't have checksum v2 and v3 at the same time! */
+               printk(KERN_ERR "JBD2: Can't enable checksumming v2 and v3 "
+                      "at the same time!\n");
+               return err;
+       }
+ 
+       if (jbd2_journal_has_csum_v2or3_feature(journal) &&
+           jbd2_has_feature_checksum(journal)) {
+               /* Can't have checksum v1 and v2 on at the same time! */
+               printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2/3 "
+                      "at the same time!\n");
+               return err;
+       }
+ 
+       /* Load the checksum driver */
+       if (jbd2_journal_has_csum_v2or3_feature(journal)) {
+               if (sb->s_checksum_type != JBD2_CRC32C_CHKSUM) {
+                       printk(KERN_ERR "JBD2: Unknown checksum type\n");
+                       return err;
+               }
+ 
+               journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
+               if (IS_ERR(journal->j_chksum_driver)) {
+                       printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n");
+                       err = PTR_ERR(journal->j_chksum_driver);
+                       journal->j_chksum_driver = NULL;
+                       return err;
+               }
+               /* Check superblock checksum */
+               if (sb->s_checksum != jbd2_superblock_csum(journal, sb)) {
+                       printk(KERN_ERR "JBD2: journal checksum error\n");
+                       err = -EFSBADCRC;
+                       return err;
+               }
+       }
+ 
+       return 0;
+ }
+ 
+ static int journal_revoke_records_per_block(journal_t *journal)
+ {
+       int record_size;
+       int space = journal->j_blocksize - sizeof(jbd2_journal_revoke_header_t);
+ 
+       if (jbd2_has_feature_64bit(journal))
+               record_size = 8;
+       else
+               record_size = 4;
+ 
+       if (jbd2_journal_has_csum_v2or3(journal))
+               space -= sizeof(struct jbd2_journal_block_tail);
+       return space / record_size;
+ }
+ 
+ /*
+  * Load the on-disk journal superblock and read the key fields into the
+  * journal_t.
+  */
+ static int journal_load_superblock(journal_t *journal)
+ {
+       int err;
+       struct buffer_head *bh;
+       journal_superblock_t *sb;
+ 
+       bh = getblk_unmovable(journal->j_dev, journal->j_blk_offset,
+                             journal->j_blocksize);
+       if (bh)
+               err = bh_read(bh, 0);
+       if (!bh || err < 0) {
+               pr_err("%s: Cannot read journal superblock\n", __func__);
+               brelse(bh);
+               return -EIO;
+       }
+ 
+       journal->j_sb_buffer = bh;
+       sb = (journal_superblock_t *)bh->b_data;
+       journal->j_superblock = sb;
+       err = journal_check_superblock(journal);
+       if (err) {
+               journal_fail_superblock(journal);
+               return err;
+       }
+ 
+       journal->j_tail_sequence = be32_to_cpu(sb->s_sequence);
+       journal->j_tail = be32_to_cpu(sb->s_start);
+       journal->j_first = be32_to_cpu(sb->s_first);
+       journal->j_errno = be32_to_cpu(sb->s_errno);
+       journal->j_last = be32_to_cpu(sb->s_maxlen);
+ 
+       if (be32_to_cpu(sb->s_maxlen) < journal->j_total_len)
+               journal->j_total_len = be32_to_cpu(sb->s_maxlen);
+       /* Precompute checksum seed for all metadata */
+       if (jbd2_journal_has_csum_v2or3(journal))
+               journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid,
+                                                  sizeof(sb->s_uuid));
+       journal->j_revoke_records_per_block =
+                               journal_revoke_records_per_block(journal);
+ 
+       if (jbd2_has_feature_fast_commit(journal)) {
+               journal->j_fc_last = be32_to_cpu(sb->s_maxlen);
+               journal->j_last = journal->j_fc_last -
+                                 jbd2_journal_get_num_fc_blks(sb);
+               journal->j_fc_first = journal->j_last + 1;
+               journal->j_fc_off = 0;
+       }
+ 
+       return 0;
+ }
+ 
+ 
   /*
    * Management for journal control blocks: functions to create and
    * destroy journal_t structures, and to initialise and read existing
@@@ -1349,12 -1527,21 +1524,21 @@@ static journal_t *journal_init_common(s
         static struct lock_class_key jbd2_trans_commit_key;
         journal_t *journal;
         int err;
-       struct buffer_head *bh;
         int n;
   
         journal = kzalloc(sizeof(*journal), GFP_KERNEL);
         if (!journal)
-               return NULL;
+               return ERR_PTR(-ENOMEM);
+ 
+       journal->j_blocksize = blocksize;
+       journal->j_dev = bdev;
+       journal->j_fs_dev = fs_dev;
+       journal->j_blk_offset = start;
+       journal->j_total_len = len;
+ 
+       err = journal_load_superblock(journal);
+       if (err)
+               goto err_cleanup;
   
         init_waitqueue_head(&journal->j_wait_transaction_locked);
         init_waitqueue_head(&journal->j_wait_done_commit);
@@@ -1367,12 -1554,15 +1551,15 @@@
         mutex_init(&journal->j_checkpoint_mutex);
         spin_lock_init(&journal->j_revoke_lock);
         spin_lock_init(&journal->j_list_lock);
+       spin_lock_init(&journal->j_history_lock);
         rwlock_init(&journal->j_state_lock);
   
         journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE);
         journal->j_min_batch_time = 0;
         journal->j_max_batch_time = 15000; /* 15ms */
         atomic_set(&journal->j_reserved_credits, 0);
+       lockdep_init_map(&journal->j_trans_commit_map, "jbd2_handle",
+                        &jbd2_trans_commit_key, 0);
   
         /* The journal is marked for error until we succeed with recovery! */
         journal->j_flags = JBD2_ABORT;
@@@ -1382,18 -1572,11 +1569,11 @@@
         if (err)
                 goto err_cleanup;
   
-       spin_lock_init(&journal->j_history_lock);
- 
-       lockdep_init_map(&journal->j_trans_commit_map, "jbd2_handle",
-                        &jbd2_trans_commit_key, 0);
- 
-       /* journal descriptor can store up to n blocks -bzzz */
-       journal->j_blocksize = blocksize;
-       journal->j_dev = bdev;
-       journal->j_fs_dev = fs_dev;
-       journal->j_blk_offset = start;
-       journal->j_total_len = len;
-       /* We need enough buffers to write out full descriptor block. */
+       /*
+        * journal descriptor can store up to n blocks, we need enough
+        * buffers to write out full descriptor block.
+        */
+       err = -ENOMEM;
         n = journal->j_blocksize / jbd2_min_tag_size();
         journal->j_wbufsize = n;
         journal->j_fc_wbuf = NULL;
@@@ -1402,37 -1585,30 +1582,30 @@@
         if (!journal->j_wbuf)
                 goto err_cleanup;
   
-       bh = getblk_unmovable(journal->j_dev, start, journal->j_blocksize);
-       if (!bh) {
-               pr_err("%s: Cannot get buffer for journal superblock\n",
-                       __func__);
+       err = percpu_counter_init(&journal->j_checkpoint_jh_count, 0,
+                                 GFP_KERNEL);
+       if (err)
                 goto err_cleanup;
-       }
-       journal->j_sb_buffer = bh;
-       journal->j_superblock = (journal_superblock_t *)bh->b_data;
   
         journal->j_shrink_transaction = NULL;
         journal->j_shrinker.scan_objects = jbd2_journal_shrink_scan;
         journal->j_shrinker.count_objects = jbd2_journal_shrink_count;
         journal->j_shrinker.seeks = DEFAULT_SEEKS;
         journal->j_shrinker.batch = journal->j_max_transaction_buffers;
- 
-       if (percpu_counter_init(&journal->j_checkpoint_jh_count, 0, GFP_KERNEL))
+       err = register_shrinker(&journal->j_shrinker, "jbd2-journal:(%u:%u)",
+                               MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev));
+       if (err)
                 goto err_cleanup;
   
-       if (register_shrinker(&journal->j_shrinker, "jbd2-journal:(%u:%u)",
-                             MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev))) {
-               percpu_counter_destroy(&journal->j_checkpoint_jh_count);
-               goto err_cleanup;
-       }
         return journal;
   
   err_cleanup:
-       brelse(journal->j_sb_buffer);
+       percpu_counter_destroy(&journal->j_checkpoint_jh_count);
         kfree(journal->j_wbuf);
         jbd2_journal_destroy_revoke(journal);
+       journal_fail_superblock(journal);
         kfree(journal);
-       return NULL;
+       return ERR_PTR(err);
   }
   
   /* jbd2_journal_init_dev and jbd2_journal_init_inode:
@@@ -1465,8 -1641,8 +1638,8 @@@ journal_t *jbd2_journal_init_dev(struc
         journal_t *journal;
   
         journal = journal_init_common(bdev, fs_dev, start, len, blocksize);
-       if (!journal)
-               return NULL;
+       if (IS_ERR(journal))
+               return ERR_CAST(journal);
   
         snprintf(journal->j_devname, sizeof(journal->j_devname),
                  "%pg", journal->j_dev);
@@@ -1492,11 -1668,9 +1665,9 @@@ journal_t *jbd2_journal_init_inode(stru
   
         blocknr = 0;
         err = bmap(inode, &blocknr);
- 
         if (err || !blocknr) {
-               pr_err("%s: Cannot locate journal superblock\n",
-                       __func__);
-               return NULL;
+               pr_err("%s: Cannot locate journal superblock\n", __func__);
+               return err ? ERR_PTR(err) : ERR_PTR(-EINVAL);
         }
   
         jbd2_debug(1, "JBD2: inode %s/%ld, size %lld, bits %d, blksize %ld\n",
@@@ -1506,8 -1680,8 +1677,8 @@@
         journal = journal_init_common(inode->i_sb->s_bdev, inode->i_sb->s_bdev,
                         blocknr, inode->i_size >> inode->i_sb->s_blocksize_bits,
                         inode->i_sb->s_blocksize);
-       if (!journal)
-               return NULL;
+       if (IS_ERR(journal))
+               return ERR_CAST(journal);
   
         journal->j_inode = inode;
         snprintf(journal->j_devname, sizeof(journal->j_devname),
@@@ -1518,18 -1692,6 +1689,6 @@@
         return journal;
   }
   
- /*
-  * If the journal init or create aborts, we need to mark the journal
-  * superblock as being NULL to prevent the journal destroy from writing
-  * back a bogus superblock.
-  */
- static void journal_fail_superblock(journal_t *journal)
- {
-       struct buffer_head *bh = journal->j_sb_buffer;
-       brelse(bh);
-       journal->j_sb_buffer = NULL;
- }
- 
   /*
    * Given a journal_t structure, initialise the various fields for
    * startup of a new journaling session.  We use this both when creating
@@@ -1886,163 -2048,6 +2045,6 @@@ void jbd2_journal_update_sb_errno(journ
   }
   EXPORT_SYMBOL(jbd2_journal_update_sb_errno);
   
- static int journal_revoke_records_per_block(journal_t *journal)
- {
-       int record_size;
-       int space = journal->j_blocksize - sizeof(jbd2_journal_revoke_header_t);
- 
-       if (jbd2_has_feature_64bit(journal))
-               record_size = 8;
-       else
-               record_size = 4;
- 
-       if (jbd2_journal_has_csum_v2or3(journal))
-               space -= sizeof(struct jbd2_journal_block_tail);
-       return space / record_size;
- }
- 
- /*
-  * Read the superblock for a given journal, performing initial
-  * validation of the format.
-  */
- static int journal_get_superblock(journal_t *journal)
- {
-       struct buffer_head *bh;
-       journal_superblock_t *sb;
-       int err;
- 
-       bh = journal->j_sb_buffer;
- 
-       J_ASSERT(bh != NULL);
-       if (buffer_verified(bh))
-               return 0;
- 
-       err = bh_read(bh, 0);
-       if (err < 0) {
-               printk(KERN_ERR
-                       "JBD2: IO error reading journal superblock\n");
-               goto out;
-       }
- 
-       sb = journal->j_superblock;
- 
-       err = -EINVAL;
- 
-       if (sb->s_header.h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER) ||
-           sb->s_blocksize != cpu_to_be32(journal->j_blocksize)) {
-               printk(KERN_WARNING "JBD2: no valid journal superblock found\n");
-               goto out;
-       }
- 
-       if (be32_to_cpu(sb->s_header.h_blocktype) != JBD2_SUPERBLOCK_V1 &&
-           be32_to_cpu(sb->s_header.h_blocktype) != JBD2_SUPERBLOCK_V2) {
-               printk(KERN_WARNING "JBD2: unrecognised superblock format ID\n");
-               goto out;
-       }
- 
-       if (be32_to_cpu(sb->s_maxlen) > journal->j_total_len) {
-               printk(KERN_WARNING "JBD2: journal file too short\n");
-               goto out;
-       }
- 
-       if (be32_to_cpu(sb->s_first) == 0 ||
-           be32_to_cpu(sb->s_first) >= journal->j_total_len) {
-               printk(KERN_WARNING
-                       "JBD2: Invalid start block of journal: %u\n",
-                       be32_to_cpu(sb->s_first));
-               goto out;
-       }
- 
-       if (jbd2_has_feature_csum2(journal) &&
-           jbd2_has_feature_csum3(journal)) {
-               /* Can't have checksum v2 and v3 at the same time! */
-               printk(KERN_ERR "JBD2: Can't enable checksumming v2 and v3 "
-                      "at the same time!\n");
-               goto out;
-       }
- 
-       if (jbd2_journal_has_csum_v2or3_feature(journal) &&
-           jbd2_has_feature_checksum(journal)) {
-               /* Can't have checksum v1 and v2 on at the same time! */
-               printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2/3 "
-                      "at the same time!\n");
-               goto out;
-       }
- 
-       if (!jbd2_verify_csum_type(journal, sb)) {
-               printk(KERN_ERR "JBD2: Unknown checksum type\n");
-               goto out;
-       }
- 
-       /* Load the checksum driver */
-       if (jbd2_journal_has_csum_v2or3_feature(journal)) {
-               journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
-               if (IS_ERR(journal->j_chksum_driver)) {
-                       printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n");
-                       err = PTR_ERR(journal->j_chksum_driver);
-                       journal->j_chksum_driver = NULL;
-                       goto out;
-               }
-               /* Check superblock checksum */
-               if (sb->s_checksum != jbd2_superblock_csum(journal, sb)) {
-                       printk(KERN_ERR "JBD2: journal checksum error\n");
-                       err = -EFSBADCRC;
-                       goto out;
-               }
-       }
-       set_buffer_verified(bh);
-       return 0;
- 
- out:
-       journal_fail_superblock(journal);
-       return err;
- }
- 
- /*
-  * Load the on-disk journal superblock and read the key fields into the
-  * journal_t.
-  */
- 
- static int load_superblock(journal_t *journal)
- {
-       int err;
-       journal_superblock_t *sb;
-       int num_fc_blocks;
- 
-       err = journal_get_superblock(journal);
-       if (err)
-               return err;
- 
-       sb = journal->j_superblock;
- 
-       journal->j_tail_sequence = be32_to_cpu(sb->s_sequence);
-       journal->j_tail = be32_to_cpu(sb->s_start);
-       journal->j_first = be32_to_cpu(sb->s_first);
-       journal->j_errno = be32_to_cpu(sb->s_errno);
-       journal->j_last = be32_to_cpu(sb->s_maxlen);
- 
-       if (be32_to_cpu(sb->s_maxlen) < journal->j_total_len)
-               journal->j_total_len = be32_to_cpu(sb->s_maxlen);
-       /* Precompute checksum seed for all metadata */
-       if (jbd2_journal_has_csum_v2or3(journal))
-               journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid,
-                                                  sizeof(sb->s_uuid));
-       journal->j_revoke_records_per_block =
-                               journal_revoke_records_per_block(journal);
- 
-       if (jbd2_has_feature_fast_commit(journal)) {
-               journal->j_fc_last = be32_to_cpu(sb->s_maxlen);
-               num_fc_blocks = jbd2_journal_get_num_fc_blks(sb);
-               if (journal->j_last - num_fc_blocks >= JBD2_MIN_JOURNAL_BLOCKS)
-                       journal->j_last = journal->j_fc_last - num_fc_blocks;
-               journal->j_fc_first = journal->j_last + 1;
-               journal->j_fc_off = 0;
-       }
- 
-       return 0;
- }
- 
- 
   /**
    * jbd2_journal_load() - Read journal from disk.
    * @journal: Journal to act on.
@@@ -2054,28 -2059,7 +2056,7 @@@
   int jbd2_journal_load(journal_t *journal)
   {
         int err;
-       journal_superblock_t *sb;
- 
-       err = load_superblock(journal);
-       if (err)
-               return err;
- 
-       sb = journal->j_superblock;
- 
-       /*
-        * If this is a V2 superblock, then we have to check the
-        * features flags on it.
-        */
-       if (jbd2_format_support_feature(journal)) {
-               if ((sb->s_feature_ro_compat &
-                    ~cpu_to_be32(JBD2_KNOWN_ROCOMPAT_FEATURES)) ||
-                   (sb->s_feature_incompat &
-                    ~cpu_to_be32(JBD2_KNOWN_INCOMPAT_FEATURES))) {
-                       printk(KERN_WARNING
-                               "JBD2: Unrecognised features on journal\n");
-                       return -EINVAL;
-               }
-       }
+       journal_superblock_t *sb = journal->j_superblock;
   
         /*
          * Create a slab for this blocksize
@@@ -2086,8 -2070,11 +2067,11 @@@
   
         /* Let the recovery code check whether it needs to recover any
          * data from the journal. */
-       if (jbd2_journal_recover(journal))
-               goto recovery_error;
+       err = jbd2_journal_recover(journal);
+       if (err) {
+               pr_warn("JBD2: journal recovery failed\n");
+               return err;
+       }
   
         if (journal->j_failed_commit) {
                 printk(KERN_ERR "JBD2: journal transaction %u on %s "
@@@ -2104,15 -2091,14 +2088,14 @@@
         /* OK, we've finished with the dynamic journal bits:
          * reinitialise the dynamic contents of the superblock in memory
          * and reset them on disk. */
-       if (journal_reset(journal))
-               goto recovery_error;
+       err = journal_reset(journal);
+       if (err) {
+               pr_warn("JBD2: journal reset failed\n");
+               return err;
+       }
   
         journal->j_flags |= JBD2_LOADED;
         return 0;
- 
- recovery_error:
-       printk(KERN_WARNING "JBD2: recovery failed\n");
-       return -EIO;
   }
   
   /**
@@@ -2224,8 -2210,6 +2207,6 @@@ int jbd2_journal_check_used_features(jo
   
         if (!compat && !ro && !incompat)
                 return 1;
-       if (journal_get_superblock(journal))
-               return 0;
         if (!jbd2_format_support_feature(journal))
                 return 0;
   
@@@ -2515,16 -2499,12 +2496,12 @@@ out
   
   int jbd2_journal_wipe(journal_t *journal, int write)
   {
-       int err = 0;
+       int err;
   
         J_ASSERT (!(journal->j_flags & JBD2_LOADED));
   
-       err = load_superblock(journal);
-       if (err)
-               return err;
- 
         if (!journal->j_tail)
-               goto no_recovery;
+               return 0;
   
         printk(KERN_WARNING "JBD2: %s recovery information on journal\n",
                 write ? "Clearing" : "Ignoring");
@@@ -2537,7 -2517,6 +2514,6 @@@
                 mutex_unlock(&journal->j_checkpoint_mutex);
         }
   
-  no_recovery:
         return err;
   }
   
diff --combined fs/libfs.c

index da78eb64831eca0e83b070a7be5bf18536e6ea61,5197ea8c66d35af6bd5cff196842fd7109a4db5b..a4eb1275788627161d1a1f0d5f5b6bc3557a5df1
--- 1/fs/libfs.c
--- 2/fs/libfs.c
+++ b/fs/libfs.c
@@@ -33,7 -33,7 +33,7 @@@ int simple_getattr(struct mnt_idmap *id
                    unsigned int query_flags)
   {
         struct inode *inode = d_inode(path->dentry);
- -      generic_fillattr(&nop_mnt_idmap, inode, stat);
+ +      generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
         stat->blocks = inode->i_mapping->nrpages << (PAGE_SHIFT - 9);
         return 0;
   }
@@@ -239,254 -239,6 +239,254 @@@ const struct inode_operations simple_di
   };
   EXPORT_SYMBOL(simple_dir_inode_operations);
   
+ +static void offset_set(struct dentry *dentry, u32 offset)
+ +{
+ +      dentry->d_fsdata = (void *)((uintptr_t)(offset));
+ +}
+ +
+ +static u32 dentry2offset(struct dentry *dentry)
+ +{
+ +      return (u32)((uintptr_t)(dentry->d_fsdata));
+ +}
+ +
+ +static struct lock_class_key simple_offset_xa_lock;
+ +
+ +/**
+ + * simple_offset_init - initialize an offset_ctx
+ + * @octx: directory offset map to be initialized
+ + *
+ + */
+ +void simple_offset_init(struct offset_ctx *octx)
+ +{
+ +      xa_init_flags(&octx->xa, XA_FLAGS_ALLOC1);
+ +      lockdep_set_class(&octx->xa.xa_lock, &simple_offset_xa_lock);
+ +
+ +      /* 0 is '.', 1 is '..', so always start with offset 2 */
+ +      octx->next_offset = 2;
+ +}
+ +
+ +/**
+ + * simple_offset_add - Add an entry to a directory's offset map
+ + * @octx: directory offset ctx to be updated
+ + * @dentry: new dentry being added
+ + *
+ + * Returns zero on success. @so_ctx and the dentry offset are updated.
+ + * Otherwise, a negative errno value is returned.
+ + */
+ +int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry)
+ +{
+ +      static const struct xa_limit limit = XA_LIMIT(2, U32_MAX);
+ +      u32 offset;
+ +      int ret;
+ +
+ +      if (dentry2offset(dentry) != 0)
+ +              return -EBUSY;
+ +
+ +      ret = xa_alloc_cyclic(&octx->xa, &offset, dentry, limit,
+ +                            &octx->next_offset, GFP_KERNEL);
+ +      if (ret < 0)
+ +              return ret;
+ +
+ +      offset_set(dentry, offset);
+ +      return 0;
+ +}
+ +
+ +/**
+ + * simple_offset_remove - Remove an entry to a directory's offset map
+ + * @octx: directory offset ctx to be updated
+ + * @dentry: dentry being removed
+ + *
+ + */
+ +void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry)
+ +{
+ +      u32 offset;
+ +
+ +      offset = dentry2offset(dentry);
+ +      if (offset == 0)
+ +              return;
+ +
+ +      xa_erase(&octx->xa, offset);
+ +      offset_set(dentry, 0);
+ +}
+ +
+ +/**
+ + * simple_offset_rename_exchange - exchange rename with directory offsets
+ + * @old_dir: parent of dentry being moved
+ + * @old_dentry: dentry being moved
+ + * @new_dir: destination parent
+ + * @new_dentry: destination dentry
+ + *
+ + * Returns zero on success. Otherwise a negative errno is returned and the
+ + * rename is rolled back.
+ + */
+ +int simple_offset_rename_exchange(struct inode *old_dir,
+ +                                struct dentry *old_dentry,
+ +                                struct inode *new_dir,
+ +                                struct dentry *new_dentry)
+ +{
+ +      struct offset_ctx *old_ctx = old_dir->i_op->get_offset_ctx(old_dir);
+ +      struct offset_ctx *new_ctx = new_dir->i_op->get_offset_ctx(new_dir);
+ +      u32 old_index = dentry2offset(old_dentry);
+ +      u32 new_index = dentry2offset(new_dentry);
+ +      int ret;
+ +
+ +      simple_offset_remove(old_ctx, old_dentry);
+ +      simple_offset_remove(new_ctx, new_dentry);
+ +
+ +      ret = simple_offset_add(new_ctx, old_dentry);
+ +      if (ret)
+ +              goto out_restore;
+ +
+ +      ret = simple_offset_add(old_ctx, new_dentry);
+ +      if (ret) {
+ +              simple_offset_remove(new_ctx, old_dentry);
+ +              goto out_restore;
+ +      }
+ +
+ +      ret = simple_rename_exchange(old_dir, old_dentry, new_dir, new_dentry);
+ +      if (ret) {
+ +              simple_offset_remove(new_ctx, old_dentry);
+ +              simple_offset_remove(old_ctx, new_dentry);
+ +              goto out_restore;
+ +      }
+ +      return 0;
+ +
+ +out_restore:
+ +      offset_set(old_dentry, old_index);
+ +      xa_store(&old_ctx->xa, old_index, old_dentry, GFP_KERNEL);
+ +      offset_set(new_dentry, new_index);
+ +      xa_store(&new_ctx->xa, new_index, new_dentry, GFP_KERNEL);
+ +      return ret;
+ +}
+ +
+ +/**
+ + * simple_offset_destroy - Release offset map
+ + * @octx: directory offset ctx that is about to be destroyed
+ + *
+ + * During fs teardown (eg. umount), a directory's offset map might still
+ + * contain entries. xa_destroy() cleans out anything that remains.
+ + */
+ +void simple_offset_destroy(struct offset_ctx *octx)
+ +{
+ +      xa_destroy(&octx->xa);
+ +}
+ +
+ +/**
+ + * offset_dir_llseek - Advance the read position of a directory descriptor
+ + * @file: an open directory whose position is to be updated
+ + * @offset: a byte offset
+ + * @whence: enumerator describing the starting position for this update
+ + *
+ + * SEEK_END, SEEK_DATA, and SEEK_HOLE are not supported for directories.
+ + *
+ + * Returns the updated read position if successful; otherwise a
+ + * negative errno is returned and the read position remains unchanged.
+ + */
+ +static loff_t offset_dir_llseek(struct file *file, loff_t offset, int whence)
+ +{
+ +      switch (whence) {
+ +      case SEEK_CUR:
+ +              offset += file->f_pos;
+ +              fallthrough;
+ +      case SEEK_SET:
+ +              if (offset >= 0)
+ +                      break;
+ +              fallthrough;
+ +      default:
+ +              return -EINVAL;
+ +      }
+ +
+ +      return vfs_setpos(file, offset, U32_MAX);
+ +}
+ +
+ +static struct dentry *offset_find_next(struct xa_state *xas)
+ +{
+ +      struct dentry *child, *found = NULL;
+ +
+ +      rcu_read_lock();
+ +      child = xas_next_entry(xas, U32_MAX);
+ +      if (!child)
+ +              goto out;
+ +      spin_lock(&child->d_lock);
+ +      if (simple_positive(child))
+ +              found = dget_dlock(child);
+ +      spin_unlock(&child->d_lock);
+ +out:
+ +      rcu_read_unlock();
+ +      return found;
+ +}
+ +
+ +static bool offset_dir_emit(struct dir_context *ctx, struct dentry *dentry)
+ +{
+ +      u32 offset = dentry2offset(dentry);
+ +      struct inode *inode = d_inode(dentry);
+ +
+ +      return ctx->actor(ctx, dentry->d_name.name, dentry->d_name.len, offset,
+ +                        inode->i_ino, fs_umode_to_dtype(inode->i_mode));
+ +}
+ +
+ +static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
+ +{
+ +      struct offset_ctx *so_ctx = inode->i_op->get_offset_ctx(inode);
+ +      XA_STATE(xas, &so_ctx->xa, ctx->pos);
+ +      struct dentry *dentry;
+ +
+ +      while (true) {
+ +              dentry = offset_find_next(&xas);
+ +              if (!dentry)
+ +                      break;
+ +
+ +              if (!offset_dir_emit(ctx, dentry)) {
+ +                      dput(dentry);
+ +                      break;
+ +              }
+ +
+ +              dput(dentry);
+ +              ctx->pos = xas.xa_index + 1;
+ +      }
+ +}
+ +
+ +/**
+ + * offset_readdir - Emit entries starting at offset @ctx->pos
+ + * @file: an open directory to iterate over
+ + * @ctx: directory iteration context
+ + *
+ + * Caller must hold @file's i_rwsem to prevent insertion or removal of
+ + * entries during this call.
+ + *
+ + * On entry, @ctx->pos contains an offset that represents the first entry
+ + * to be read from the directory.
+ + *
+ + * The operation continues until there are no more entries to read, or
+ + * until the ctx->actor indicates there is no more space in the caller's
+ + * output buffer.
+ + *
+ + * On return, @ctx->pos contains an offset that will read the next entry
+ + * in this directory when offset_readdir() is called again with @ctx.
+ + *
+ + * Return values:
+ + *   %0 - Complete
+ + */
+ +static int offset_readdir(struct file *file, struct dir_context *ctx)
+ +{
+ +      struct dentry *dir = file->f_path.dentry;
+ +
+ +      lockdep_assert_held(&d_inode(dir)->i_rwsem);
+ +
+ +      if (!dir_emit_dots(file, ctx))
+ +              return 0;
+ +
+ +      offset_iterate_dir(d_inode(dir), ctx);
+ +      return 0;
+ +}
+ +
+ +const struct file_operations simple_offset_dir_operations = {
+ +      .llseek         = offset_dir_llseek,
+ +      .iterate_shared = offset_readdir,
+ +      .read           = generic_read_dir,
+ +      .fsync          = noop_fsync,
+ +};
+ +
   static struct dentry *find_next_child(struct dentry *parent, struct dentry *prev)
   {
         struct dentry *child = NULL;
@@@ -523,7 -275,7 +523,7 @@@ void simple_recursive_removal(struct de
                 while ((child = find_next_child(this, victim)) == NULL) {
                         // kill and ascend
                         // update metadata while it's still locked
- -                      inode->i_ctime = current_time(inode);
+ +                      inode_set_ctime_current(inode);
                         clear_nlink(inode);
                         inode_unlock(inode);
                         victim = this;
@@@ -541,7 -293,8 +541,7 @@@
                                 dput(victim);           // unpin it
                         }
                         if (victim == dentry) {
- -                              inode->i_ctime = inode->i_mtime =
- -                                      current_time(inode);
+ +                              inode->i_mtime = inode_set_ctime_current(inode);
                                 if (d_is_dir(dentry))
                                         drop_nlink(inode);
                                 inode_unlock(inode);
@@@ -582,7 -335,7 +582,7 @@@ static int pseudo_fs_fill_super(struct 
          */
         root->i_ino = 1;
         root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR;
- -      root->i_atime = root->i_mtime = root->i_ctime = current_time(root);
+ +      root->i_atime = root->i_mtime = inode_set_ctime_current(root);
         s->s_root = d_make_root(root);
         if (!s->s_root)
                 return -ENOMEM;
@@@ -638,8 -391,7 +638,8 @@@ int simple_link(struct dentry *old_dent
   {
         struct inode *inode = d_inode(old_dentry);
   
- -      inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode);
+ +      dir->i_mtime = inode_set_ctime_to_ts(dir,
+ +                                           inode_set_ctime_current(inode));
         inc_nlink(inode);
         ihold(inode);
         dget(dentry);
@@@ -673,8 -425,7 +673,8 @@@ int simple_unlink(struct inode *dir, st
   {
         struct inode *inode = d_inode(dentry);
   
- -      inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode);
+ +      dir->i_mtime = inode_set_ctime_to_ts(dir,
+ +                                           inode_set_ctime_current(inode));
         drop_nlink(inode);
         dput(dentry);
         return 0;
@@@ -693,31 -444,6 +693,31 @@@ int simple_rmdir(struct inode *dir, str
   }
   EXPORT_SYMBOL(simple_rmdir);
   
+ +/**
+ + * simple_rename_timestamp - update the various inode timestamps for rename
+ + * @old_dir: old parent directory
+ + * @old_dentry: dentry that is being renamed
+ + * @new_dir: new parent directory
+ + * @new_dentry: target for rename
+ + *
+ + * POSIX mandates that the old and new parent directories have their ctime and
+ + * mtime updated, and that inodes of @old_dentry and @new_dentry (if any), have
+ + * their ctime updated.
+ + */
+ +void simple_rename_timestamp(struct inode *old_dir, struct dentry *old_dentry,
+ +                           struct inode *new_dir, struct dentry *new_dentry)
+ +{
+ +      struct inode *newino = d_inode(new_dentry);
+ +
+ +      old_dir->i_mtime = inode_set_ctime_current(old_dir);
+ +      if (new_dir != old_dir)
+ +              new_dir->i_mtime = inode_set_ctime_current(new_dir);
+ +      inode_set_ctime_current(d_inode(old_dentry));
+ +      if (newino)
+ +              inode_set_ctime_current(newino);
+ +}
+ +EXPORT_SYMBOL_GPL(simple_rename_timestamp);
+ +
   int simple_rename_exchange(struct inode *old_dir, struct dentry *old_dentry,
                            struct inode *new_dir, struct dentry *new_dentry)
   {
@@@ -733,7 -459,11 +733,7 @@@
                         inc_nlink(old_dir);
                 }
         }
- -      old_dir->i_ctime = old_dir->i_mtime =
- -      new_dir->i_ctime = new_dir->i_mtime =
- -      d_inode(old_dentry)->i_ctime =
- -      d_inode(new_dentry)->i_ctime = current_time(old_dir);
- -
+ +      simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry);
         return 0;
   }
   EXPORT_SYMBOL_GPL(simple_rename_exchange);
@@@ -742,6 -472,7 +742,6 @@@ int simple_rename(struct mnt_idmap *idm
                   struct dentry *old_dentry, struct inode *new_dir,
                   struct dentry *new_dentry, unsigned int flags)
   {
- -      struct inode *inode = d_inode(old_dentry);
         int they_are_dirs = d_is_dir(old_dentry);
   
         if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
@@@ -764,7 -495,9 +764,7 @@@
                 inc_nlink(new_dir);
         }
   
- -      old_dir->i_ctime = old_dir->i_mtime = new_dir->i_ctime =
- -              new_dir->i_mtime = inode->i_ctime = current_time(old_dir);
- -
+ +      simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry);
         return 0;
   }
   EXPORT_SYMBOL(simple_rename);
@@@ -815,20 -548,21 +815,20 @@@ int simple_write_begin(struct file *fil
                         loff_t pos, unsigned len,
                         struct page **pagep, void **fsdata)
   {
- -      struct page *page;
- -      pgoff_t index;
+ +      struct folio *folio;
   
- -      index = pos >> PAGE_SHIFT;
+ +      folio = __filemap_get_folio(mapping, pos / PAGE_SIZE, FGP_WRITEBEGIN,
+ +                      mapping_gfp_mask(mapping));
+ +      if (IS_ERR(folio))
+ +              return PTR_ERR(folio);
   
- -      page = grab_cache_page_write_begin(mapping, index);
- -      if (!page)
- -              return -ENOMEM;
- -
- -      *pagep = page;
+ +      *pagep = &folio->page;
   
- -      if (!PageUptodate(page) && (len != PAGE_SIZE)) {
- -              unsigned from = pos & (PAGE_SIZE - 1);
+ +      if (!folio_test_uptodate(folio) && (len != folio_size(folio))) {
+ +              size_t from = offset_in_folio(folio, pos);
   
- -              zero_user_segments(page, 0, from, from + len, PAGE_SIZE);
+ +              folio_zero_segments(folio, 0, from,
+ +                              from + len, folio_size(folio));
         }
         return 0;
   }
@@@ -860,18 -594,17 +860,18 @@@ static int simple_write_end(struct fil
                         loff_t pos, unsigned len, unsigned copied,
                         struct page *page, void *fsdata)
   {
- -      struct inode *inode = page->mapping->host;
+ +      struct folio *folio = page_folio(page);
+ +      struct inode *inode = folio->mapping->host;
         loff_t last_pos = pos + copied;
   
- -      /* zero the stale part of the page if we did a short copy */
- -      if (!PageUptodate(page)) {
+ +      /* zero the stale part of the folio if we did a short copy */
+ +      if (!folio_test_uptodate(folio)) {
                 if (copied < len) {
- -                      unsigned from = pos & (PAGE_SIZE - 1);
+ +                      size_t from = offset_in_folio(folio, pos);
   
- -                      zero_user(page, from + copied, len - copied);
+ +                      folio_zero_range(folio, from + copied, len - copied);
                 }
- -              SetPageUptodate(page);
+ +              folio_mark_uptodate(folio);
         }
         /*
          * No need to use i_size_read() here, the i_size
@@@ -880,9 -613,9 +880,9 @@@
         if (last_pos > inode->i_size)
                 i_size_write(inode, last_pos);
   
- -      set_page_dirty(page);
- -      unlock_page(page);
- -      put_page(page);
+ +      folio_mark_dirty(folio);
+ +      folio_unlock(folio);
+ +      folio_put(folio);
   
         return copied;
   }
@@@ -926,7 -659,7 +926,7 @@@ int simple_fill_super(struct super_bloc
          */
         inode->i_ino = 1;
         inode->i_mode = S_IFDIR | 0755;
- -      inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+ +      inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
         inode->i_op = &simple_dir_inode_operations;
         inode->i_fop = &simple_dir_operations;
         set_nlink(inode, 2);
@@@ -952,7 -685,7 +952,7 @@@
                         goto out;
                 }
                 inode->i_mode = S_IFREG | files->mode;
- -              inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+ +              inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
                 inode->i_fop = files->ops;
                 inode->i_ino = i;
                 d_add(dentry, inode);
@@@ -1520,7 -1253,7 +1520,7 @@@ struct inode *alloc_anon_inode(struct s
         inode->i_uid = current_fsuid();
         inode->i_gid = current_fsgid();
         inode->i_flags |= S_PRIVATE;
- -      inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+ +      inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
         return inode;
   }
   EXPORT_SYMBOL(alloc_anon_inode);
@@@ -1536,7 -1269,7 +1536,7 @@@
    * All arguments are ignored and it just returns -EINVAL.
    */
   int
- -simple_nosetlease(struct file *filp, long arg, struct file_lock **flp,
+ +simple_nosetlease(struct file *filp, int arg, struct file_lock **flp,
                   void **priv)
   {
         return -EINVAL;
@@@ -1582,7 -1315,7 +1582,7 @@@ static int empty_dir_getattr(struct mnt
                              u32 request_mask, unsigned int query_flags)
   {
         struct inode *inode = d_inode(path->dentry);
- -      generic_fillattr(&nop_mnt_idmap, inode, stat);
+ +      generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
         return 0;
   }
   
@@@ -1648,16 -1381,6 +1648,6 @@@ bool is_empty_dir_inode(struct inode *i
   }
   
   #if IS_ENABLED(CONFIG_UNICODE)
- /*
-  * Determine if the name of a dentry should be casefolded.
-  *
-  * Return: if names will need casefolding
-  */
- static bool needs_casefold(const struct inode *dir)
- {
-       return IS_CASEFOLDED(dir) && dir->i_sb->s_encoding;
- }
- 
   /**
    * generic_ci_d_compare - generic d_compare implementation for casefolding filesystems
    * @dentry:   dentry whose name we are checking against
@@@ -1678,7 -1401,7 +1668,7 @@@ static int generic_ci_d_compare(const s
         char strbuf[DNAME_INLINE_LEN];
         int ret;
   
-       if (!dir || !needs_casefold(dir))
+       if (!dir || !IS_CASEFOLDED(dir))
                 goto fallback;
         /*
          * If the dentry name is stored in-line, then it may be concurrently
@@@ -1720,7 -1443,7 +1710,7 @@@ static int generic_ci_d_hash(const stru
         const struct unicode_map *um = sb->s_encoding;
         int ret = 0;
   
-       if (!dir || !needs_casefold(dir))
+       if (!dir || !IS_CASEFOLDED(dir))
                 return 0;
   
         ret = utf8_casefold_hash(um, dentry, str);
diff --combined fs/ocfs2/journal.c

index e8e7d47265aa95fd897f8332f9a99f21a05274ef,1d2960e8ce74517c207ca73d134860bf2bb91f9a..ce215565d061ed98686dc075923d7a7503b96cd9
--- 1/fs/ocfs2/journal.c
--- 2/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@@ -114,9 -114,9 +114,9 @@@ int ocfs2_compute_replay_slots(struct o
         if (osb->replay_map)
                 return 0;
   
- -      replay_map = kzalloc(sizeof(struct ocfs2_replay_map) +
- -                           (osb->max_slots * sizeof(char)), GFP_KERNEL);
- -
+ +      replay_map = kzalloc(struct_size(replay_map, rm_replay_slots,
+ +                                       osb->max_slots),
+ +                           GFP_KERNEL);
         if (!replay_map) {
                 mlog_errno(-ENOMEM);
                 return -ENOMEM;
@@@ -178,13 -178,16 +178,13 @@@ int ocfs2_recovery_init(struct ocfs2_su
         osb->recovery_thread_task = NULL;
         init_waitqueue_head(&osb->recovery_event);
   
- -      rm = kzalloc(sizeof(struct ocfs2_recovery_map) +
- -                   osb->max_slots * sizeof(unsigned int),
+ +      rm = kzalloc(struct_size(rm, rm_entries, osb->max_slots),
                      GFP_KERNEL);
         if (!rm) {
                 mlog_errno(-ENOMEM);
                 return -ENOMEM;
         }
   
- -      rm->rm_entries = (unsigned int *)((char *)rm +
- -                                        sizeof(struct ocfs2_recovery_map));
         osb->recovery_map = rm;
   
         return 0;
@@@ -554,7 -557,7 +554,7 @@@ static void ocfs2_abort_trigger(struct 
              (unsigned long)bh,
              (unsigned long long)bh->b_blocknr);
   
- -      ocfs2_error(bh->b_bdev->bd_super,
+ +      ocfs2_error(bh->b_assoc_map->host->i_sb,
                     "JBD2 has aborted our journal, ocfs2 cannot continue\n");
   }
   
@@@ -777,14 -780,14 +777,14 @@@ void ocfs2_journal_dirty(handle_t *hand
                 mlog_errno(status);
                 if (!is_handle_aborted(handle)) {
                         journal_t *journal = handle->h_transaction->t_journal;
- -                      struct super_block *sb = bh->b_bdev->bd_super;
   
                         mlog(ML_ERROR, "jbd2_journal_dirty_metadata failed. "
                                         "Aborting transaction and journal.\n");
                         handle->h_err = status;
                         jbd2_journal_abort_handle(handle);
                         jbd2_journal_abort(journal, status);
- -                      ocfs2_abort(sb, "Journal already aborted.\n");
+ +                      ocfs2_abort(bh->b_assoc_map->host->i_sb,
+ +                                  "Journal already aborted.\n");
                 }
         }
   }
@@@ -908,9 -911,9 +908,9 @@@ int ocfs2_journal_init(struct ocfs2_sup
   
         /* call the kernels journal init function now */
         j_journal = jbd2_journal_init_inode(inode);
-       if (j_journal == NULL) {
+       if (IS_ERR(j_journal)) {
                 mlog(ML_ERROR, "Linux journal layer error\n");
-               status = -EINVAL;
+               status = PTR_ERR(j_journal);
                 goto done;
         }
   
@@@ -1684,9 -1687,9 +1684,9 @@@ static int ocfs2_replay_journal(struct 
         }
   
         journal = jbd2_journal_init_inode(inode);
-       if (journal == NULL) {
+       if (IS_ERR(journal)) {
                 mlog(ML_ERROR, "Linux journal layer error\n");
-               status = -EIO;
+               status = PTR_ERR(journal);
                 goto done;
         }
author	Linus Torvalds <[email protected]>
	Thu, 31 Aug 2023 22:18:15 +0000 (15:18 -0700)
committer	Linus Torvalds <[email protected]>
	Thu, 31 Aug 2023 22:18:15 +0000 (15:18 -0700)
		1	2
fs/ext4/ext4.h	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/ext4_jbd2.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/file.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/ialloc.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/inline.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/inode.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/ioctl.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/namei.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/super.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/xattr.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/jbd2/journal.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/libfs.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ocfs2/journal.c	patch \|	diff1 \|	diff2 \|	blob \| history