]> Git Repo - linux.git/commitdiff
Merge tag 'ext4_for_linus-6.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorLinus Torvalds <[email protected]>
Thu, 31 Aug 2023 22:18:15 +0000 (15:18 -0700)
committerLinus Torvalds <[email protected]>
Thu, 31 Aug 2023 22:18:15 +0000 (15:18 -0700)
Pull ext4 updates from Ted Ts'o:
 "Many ext4 and jbd2 cleanups and bug fixes:

   - Cleanups in the ext4 remount code when going to and from read-only

   - Cleanups in ext4's multiblock allocator

   - Cleanups in the jbd2 setup/mounting code paths

   - Performance improvements when appending to a delayed allocation file

   - Miscellaneous syzbot and other bug fixes"

* tag 'ext4_for_linus-6.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (60 commits)
  ext4: fix slab-use-after-free in ext4_es_insert_extent()
  libfs: remove redundant checks of s_encoding
  ext4: remove redundant checks of s_encoding
  ext4: reject casefold inode flag without casefold feature
  ext4: use LIST_HEAD() to initialize the list_head in mballoc.c
  ext4: do not mark inode dirty every time when appending using delalloc
  ext4: rename s_error_work to s_sb_upd_work
  ext4: add periodic superblock update check
  ext4: drop dio overwrite only flag and associated warning
  ext4: add correct group descriptors and reserved GDT blocks to system zone
  ext4: remove unused function declaration
  ext4: mballoc: avoid garbage value from err
  ext4: use sbi instead of EXT4_SB(sb) in ext4_mb_new_blocks_simple()
  ext4: change the type of blocksize in ext4_mb_init_cache()
  ext4: fix unttached inode after power cut with orphan file feature enabled
  jbd2: correct the end of the journal recovery scan range
  ext4: ext4_get_{dev}_journal return proper error value
  ext4: cleanup ext4_get_dev_journal() and ext4_get_journal()
  jbd2: jbd2_journal_init_{dev,inode} return proper error return value
  jbd2: drop useless error tag in jbd2_journal_wipe()
  ...

13 files changed:
1  2 
fs/ext4/ext4.h
fs/ext4/ext4_jbd2.c
fs/ext4/file.c
fs/ext4/ialloc.c
fs/ext4/inline.c
fs/ext4/inode.c
fs/ext4/ioctl.c
fs/ext4/namei.c
fs/ext4/super.c
fs/ext4/xattr.c
fs/jbd2/journal.c
fs/libfs.c
fs/ocfs2/journal.c

diff --combined fs/ext4/ext4.h
index 481491e892dfe4c30f0bbcdf55e7365b7d868b2c,84618c46f2390e0253ea8caa88d43d4e539395d4..9418359b1d9d3b0fb8f57d26f3dfdfb3970fb4ba
@@@ -176,9 -176,6 +176,6 @@@ enum criteria 
        EXT4_MB_NUM_CRS
  };
  
- /* criteria below which we use fast block scanning and avoid unnecessary IO */
- #define CR_FAST CR_GOAL_LEN_SLOW
  /*
   * Flags used in mballoc's allocation_context flags field.
   *
@@@ -868,70 -865,64 +865,70 @@@ struct ext4_inode 
   * affected filesystem before 2242.
   */
  
 -static inline __le32 ext4_encode_extra_time(struct timespec64 *time)
 +static inline __le32 ext4_encode_extra_time(struct timespec64 ts)
  {
 -      u32 extra =((time->tv_sec - (s32)time->tv_sec) >> 32) & EXT4_EPOCH_MASK;
 -      return cpu_to_le32(extra | (time->tv_nsec << EXT4_EPOCH_BITS));
 +      u32 extra = ((ts.tv_sec - (s32)ts.tv_sec) >> 32) & EXT4_EPOCH_MASK;
 +      return cpu_to_le32(extra | (ts.tv_nsec << EXT4_EPOCH_BITS));
  }
  
 -static inline void ext4_decode_extra_time(struct timespec64 *time,
 -                                        __le32 extra)
 +static inline struct timespec64 ext4_decode_extra_time(__le32 base,
 +                                                     __le32 extra)
  {
 +      struct timespec64 ts = { .tv_sec = (signed)le32_to_cpu(base) };
 +
        if (unlikely(extra & cpu_to_le32(EXT4_EPOCH_MASK)))
 -              time->tv_sec += (u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK) << 32;
 -      time->tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> EXT4_EPOCH_BITS;
 +              ts.tv_sec += (u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK) << 32;
 +      ts.tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> EXT4_EPOCH_BITS;
 +      return ts;
  }
  
 -#define EXT4_INODE_SET_XTIME(xtime, inode, raw_inode)                         \
 +#define EXT4_INODE_SET_XTIME_VAL(xtime, inode, raw_inode, ts)                 \
  do {                                                                          \
 -      if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra))     {\
 -              (raw_inode)->xtime = cpu_to_le32((inode)->xtime.tv_sec);        \
 -              (raw_inode)->xtime ## _extra =                                  \
 -                              ext4_encode_extra_time(&(inode)->xtime);        \
 -              }                                                               \
 -      else    \
 -              (raw_inode)->xtime = cpu_to_le32(clamp_t(int32_t, (inode)->xtime.tv_sec, S32_MIN, S32_MAX));    \
 +      if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) {    \
 +              (raw_inode)->xtime = cpu_to_le32((ts).tv_sec);                  \
 +              (raw_inode)->xtime ## _extra = ext4_encode_extra_time(ts);      \
 +      } else                                                                  \
 +              (raw_inode)->xtime = cpu_to_le32(clamp_t(int32_t, (ts).tv_sec, S32_MIN, S32_MAX));      \
  } while (0)
  
 -#define EXT4_EINODE_SET_XTIME(xtime, einode, raw_inode)                              \
 -do {                                                                         \
 -      if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime))                      \
 -              (raw_inode)->xtime = cpu_to_le32((einode)->xtime.tv_sec);      \
 -      if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra))            \
 -              (raw_inode)->xtime ## _extra =                                 \
 -                              ext4_encode_extra_time(&(einode)->xtime);      \
 -} while (0)
 +#define EXT4_INODE_SET_XTIME(xtime, inode, raw_inode)                         \
 +      EXT4_INODE_SET_XTIME_VAL(xtime, inode, raw_inode, (inode)->xtime)
 +
 +#define EXT4_INODE_SET_CTIME(inode, raw_inode)                                        \
 +      EXT4_INODE_SET_XTIME_VAL(i_ctime, inode, raw_inode, inode_get_ctime(inode))
 +
 +#define EXT4_EINODE_SET_XTIME(xtime, einode, raw_inode)                               \
 +      if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime))                       \
 +              EXT4_INODE_SET_XTIME_VAL(xtime, &((einode)->vfs_inode),         \
 +                                       raw_inode, (einode)->xtime)
 +
 +#define EXT4_INODE_GET_XTIME_VAL(xtime, inode, raw_inode)                     \
 +      (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra) ?        \
 +              ext4_decode_extra_time((raw_inode)->xtime,                              \
 +                                     (raw_inode)->xtime ## _extra) :          \
 +              (struct timespec64) {                                           \
 +                      .tv_sec = (signed)le32_to_cpu((raw_inode)->xtime)       \
 +              })
  
  #define EXT4_INODE_GET_XTIME(xtime, inode, raw_inode)                         \
  do {                                                                          \
 -      (inode)->xtime.tv_sec = (signed)le32_to_cpu((raw_inode)->xtime);        \
 -      if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) {    \
 -              ext4_decode_extra_time(&(inode)->xtime,                         \
 -                                     raw_inode->xtime ## _extra);             \
 -              }                                                               \
 -      else                                                                    \
 -              (inode)->xtime.tv_nsec = 0;                                     \
 +      (inode)->xtime = EXT4_INODE_GET_XTIME_VAL(xtime, inode, raw_inode);     \
  } while (0)
  
 +#define EXT4_INODE_GET_CTIME(inode, raw_inode)                                        \
 +do {                                                                          \
 +      inode_set_ctime_to_ts(inode,                                            \
 +              EXT4_INODE_GET_XTIME_VAL(i_ctime, inode, raw_inode));           \
 +} while (0)
  
 -#define EXT4_EINODE_GET_XTIME(xtime, einode, raw_inode)                              \
 -do {                                                                         \
 -      if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime))                      \
 -              (einode)->xtime.tv_sec =                                       \
 -                      (signed)le32_to_cpu((raw_inode)->xtime);               \
 -      else                                                                   \
 -              (einode)->xtime.tv_sec = 0;                                    \
 -      if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra))            \
 -              ext4_decode_extra_time(&(einode)->xtime,                       \
 -                                     raw_inode->xtime ## _extra);            \
 -      else                                                                   \
 -              (einode)->xtime.tv_nsec = 0;                                   \
 +#define EXT4_EINODE_GET_XTIME(xtime, einode, raw_inode)                               \
 +do {                                                                          \
 +      if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime))                       \
 +              (einode)->xtime =                                               \
 +                      EXT4_INODE_GET_XTIME_VAL(xtime, &(einode->vfs_inode),   \
 +                                               raw_inode);                    \
 +      else                                                                    \
 +              (einode)->xtime = (struct timespec64){0, 0};                    \
  } while (0)
  
  #define i_disk_version osd1.linux1.l_i_version
@@@ -1241,6 -1232,7 +1238,7 @@@ struct ext4_inode_info 
  #define EXT4_MOUNT2_MB_OPTIMIZE_SCAN  0x00000080 /* Optimize group
                                                    * scanning in mballoc
                                                    */
+ #define EXT4_MOUNT2_ABORT             0x00000100 /* Abort filesystem */
  
  #define clear_opt(sb, opt)            EXT4_SB(sb)->s_mount_opt &= \
                                                ~EXT4_MOUNT_##opt
  
  #define ext4_test_and_set_bit         __test_and_set_bit_le
  #define ext4_set_bit                  __set_bit_le
- #define ext4_set_bit_atomic           ext2_set_bit_atomic
  #define ext4_test_and_clear_bit               __test_and_clear_bit_le
  #define ext4_clear_bit                        __clear_bit_le
- #define ext4_clear_bit_atomic         ext2_clear_bit_atomic
  #define ext4_test_bit                 test_bit_le
  #define ext4_find_next_zero_bit               find_next_zero_bit_le
  #define ext4_find_next_bit            find_next_bit_le
@@@ -1708,10 -1698,13 +1704,13 @@@ struct ext4_sb_info 
        const char *s_last_error_func;
        time64_t s_last_error_time;
        /*
-        * If we are in a context where we cannot update error information in
-        * the on-disk superblock, we queue this work to do it.
+        * If we are in a context where we cannot update the on-disk
+        * superblock, we queue the work here.  This is used to update
+        * the error information in the superblock, and for periodic
+        * updates of the superblock called from the commit callback
+        * function.
         */
-       struct work_struct s_error_work;
+       struct work_struct s_sb_upd_work;
  
        /* Ext4 fast commit sub transaction ID */
        atomic_t s_fc_subtid;
@@@ -1804,7 -1797,6 +1803,6 @@@ static inline int ext4_valid_inum(struc
   */
  enum {
        EXT4_MF_MNTDIR_SAMPLED,
-       EXT4_MF_FS_ABORTED,     /* Fatal error detected */
        EXT4_MF_FC_INELIGIBLE   /* Fast commit ineligible */
  };
  
@@@ -2228,9 -2220,9 +2226,9 @@@ extern int ext4_feature_set_ok(struct s
  #define EXT4_FLAGS_SHUTDOWN   1
  #define EXT4_FLAGS_BDEV_IS_DAX        2
  
- static inline int ext4_forced_shutdown(struct ext4_sb_info *sbi)
+ static inline int ext4_forced_shutdown(struct super_block *sb)
  {
-       return test_bit(EXT4_FLAGS_SHUTDOWN, &sbi->s_ext4_flags);
+       return test_bit(EXT4_FLAGS_SHUTDOWN, &EXT4_SB(sb)->s_ext4_flags);
  }
  
  /*
@@@ -2708,7 -2700,6 +2706,6 @@@ extern ext4_fsblk_t ext4_new_meta_block
  extern int ext4_claim_free_clusters(struct ext4_sb_info *sbi,
                                    s64 nclusters, unsigned int flags);
  extern ext4_fsblk_t ext4_count_free_clusters(struct super_block *);
- extern void ext4_check_blocks_bitmap(struct super_block *);
  extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
                                                    ext4_group_t block_group,
                                                    struct buffer_head ** bh);
@@@ -2864,7 -2855,6 +2861,6 @@@ extern void ext4_free_inode(handle_t *
  extern struct inode * ext4_orphan_get(struct super_block *, unsigned long);
  extern unsigned long ext4_count_free_inodes(struct super_block *);
  extern unsigned long ext4_count_dirs(struct super_block *);
- extern void ext4_check_inodes_bitmap(struct super_block *);
  extern void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap);
  extern int ext4_init_inode_table(struct super_block *sb,
                                 ext4_group_t group, int barrier);
@@@ -2907,7 -2897,6 +2903,6 @@@ extern int ext4_mb_init(struct super_bl
  extern int ext4_mb_release(struct super_block *);
  extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *,
                                struct ext4_allocation_request *, int *);
- extern int ext4_mb_reserve_blocks(struct super_block *, int);
  extern void ext4_discard_preallocations(struct inode *, unsigned int);
  extern int __init ext4_init_mballoc(void);
  extern void ext4_exit_mballoc(void);
@@@ -2930,6 -2919,10 +2925,10 @@@ extern int ext4_trim_fs(struct super_bl
  extern void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid);
  extern void ext4_mb_mark_bb(struct super_block *sb, ext4_fsblk_t block,
                       int len, int state);
+ static inline bool ext4_mb_cr_expensive(enum criteria cr)
+ {
+       return cr >= CR_GOAL_LEN_SLOW;
+ }
  
  /* inode.c */
  void ext4_inode_csum_set(struct inode *inode, struct ext4_inode *raw,
@@@ -2983,7 -2976,6 +2982,6 @@@ extern void ext4_evict_inode(struct ino
  extern void ext4_clear_inode(struct inode *);
  extern int  ext4_file_getattr(struct mnt_idmap *, const struct path *,
                              struct kstat *, u32, unsigned int);
- extern int  ext4_sync_inode(handle_t *, struct inode *);
  extern void ext4_dirty_inode(struct inode *, int);
  extern int ext4_change_inode_journal_flag(struct inode *, int);
  extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *);
@@@ -3090,6 -3082,8 +3088,8 @@@ extern const char *ext4_decode_error(st
  extern void ext4_mark_group_bitmap_corrupted(struct super_block *sb,
                                             ext4_group_t block_group,
                                             unsigned int flags);
+ extern unsigned int ext4_num_base_meta_blocks(struct super_block *sb,
+                                             ext4_group_t block_group);
  
  extern __printf(7, 8)
  void __ext4_error(struct super_block *, const char *, unsigned int, bool,
@@@ -3531,8 -3525,6 +3531,6 @@@ extern loff_t ext4_llseek(struct file *
  /* inline.c */
  extern int ext4_get_max_inline_size(struct inode *inode);
  extern int ext4_find_inline_data_nolock(struct inode *inode);
- extern int ext4_init_inline_data(handle_t *handle, struct inode *inode,
-                                unsigned int len);
  extern int ext4_destroy_inline_data(handle_t *handle, struct inode *inode);
  
  int ext4_readpage_inline(struct inode *inode, struct folio *folio);
@@@ -3780,6 -3772,8 +3778,6 @@@ static inline void set_bitmap_uptodate(
        set_bit(BH_BITMAP_UPTODATE, &(bh)->b_state);
  }
  
 -#define in_range(b, first, len)       ((b) >= (first) && (b) <= (first) + (len) - 1)
 -
  /* For ioend & aio unwritten conversion wait queues */
  #define EXT4_WQ_HASH_SZ               37
  #define ext4_ioend_wq(v)   (&ext4__ioend_wq[((unsigned long)(v)) %\
diff --combined fs/ext4/ext4_jbd2.c
index b38d59581411c03099826e1db49b1df37a643040,ca0eaf2147b0eca0918a8702288ed09bfa44eb19..d1a2e662440178e87c8240a1fa70f5cdf4731cef
@@@ -67,11 -67,12 +67,12 @@@ static int ext4_journal_check_start(str
  
        might_sleep();
  
-       if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
+       if (unlikely(ext4_forced_shutdown(sb)))
                return -EIO;
  
-       if (sb_rdonly(sb))
+       if (WARN_ON_ONCE(sb_rdonly(sb)))
                return -EROFS;
        WARN_ON(sb->s_writers.frozen == SB_FREEZE_COMPLETE);
        journal = EXT4_SB(sb)->s_journal;
        /*
@@@ -234,7 -235,8 +235,7 @@@ int __ext4_journal_get_write_access(con
  
        might_sleep();
  
 -      if (bh->b_bdev->bd_super)
 -              ext4_check_bdev_write_error(bh->b_bdev->bd_super);
 +      ext4_check_bdev_write_error(sb);
  
        if (ext4_handle_valid(handle)) {
                err = jbd2_journal_get_write_access(handle, bh);
diff --combined fs/ext4/file.c
index 2dc3f8301225a9a08e28757e0ba61be7f05edc5a,e99cc17b6bd279ee8e60ea52d3a0373540415685..6830ea3a6c59c6b116f83ee4b3345811f92841ed
@@@ -131,7 -131,7 +131,7 @@@ static ssize_t ext4_file_read_iter(stru
  {
        struct inode *inode = file_inode(iocb->ki_filp);
  
-       if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+       if (unlikely(ext4_forced_shutdown(inode->i_sb)))
                return -EIO;
  
        if (!iov_iter_count(to))
@@@ -153,7 -153,7 +153,7 @@@ static ssize_t ext4_file_splice_read(st
  {
        struct inode *inode = file_inode(in);
  
-       if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+       if (unlikely(ext4_forced_shutdown(inode->i_sb)))
                return -EIO;
        return filemap_splice_read(in, ppos, pipe, len, flags);
  }
@@@ -476,6 -476,11 +476,11 @@@ restart
         * required to change security info in file_modified(), for extending
         * I/O, any form of non-overwrite I/O, and unaligned I/O to unwritten
         * extents (as partial block zeroing may be required).
+        *
+        * Note that unaligned writes are allowed under shared lock so long as
+        * they are pure overwrites. Otherwise, concurrent unaligned writes risk
+        * data corruption due to partial block zeroing in the dio layer, and so
+        * the I/O must occur exclusively.
         */
        if (*ilock_shared &&
            ((!IS_NOSEC(inode) || *extend || !overwrite ||
  
        /*
         * Now that locking is settled, determine dio flags and exclusivity
-        * requirements. Unaligned writes are allowed under shared lock so long
-        * as they are pure overwrites. Set the iomap overwrite only flag as an
-        * added precaution in this case. Even though this is unnecessary, we
-        * can detect and warn on unexpected -EAGAIN if an unsafe unaligned
-        * write is ever submitted.
-        *
-        * Otherwise, concurrent unaligned writes risk data corruption due to
-        * partial block zeroing in the dio layer, and so the I/O must occur
-        * exclusively. The inode lock is already held exclusive if the write is
-        * non-overwrite or extending, so drain all outstanding dio and set the
-        * force wait dio flag.
+        * requirements. We don't use DIO_OVERWRITE_ONLY because we enforce
+        * behavior already. The inode lock is already held exclusive if the
+        * write is non-overwrite or extending, so drain all outstanding dio and
+        * set the force wait dio flag.
         */
-       if (*ilock_shared && unaligned_io) {
-               *dio_flags = IOMAP_DIO_OVERWRITE_ONLY;
-       } else if (!*ilock_shared && (unaligned_io || *extend)) {
+       if (!*ilock_shared && (unaligned_io || *extend)) {
                if (iocb->ki_flags & IOCB_NOWAIT) {
                        ret = -EAGAIN;
                        goto out;
@@@ -608,7 -604,6 +604,6 @@@ static ssize_t ext4_dio_write_iter(stru
                iomap_ops = &ext4_iomap_overwrite_ops;
        ret = iomap_dio_rw(iocb, from, iomap_ops, &ext4_dio_write_ops,
                           dio_flags, NULL, 0);
-       WARN_ON_ONCE(ret == -EAGAIN && !(iocb->ki_flags & IOCB_NOWAIT));
        if (ret == -ENOTBLK)
                ret = 0;
  
@@@ -709,7 -704,7 +704,7 @@@ ext4_file_write_iter(struct kiocb *iocb
  {
        struct inode *inode = file_inode(iocb->ki_filp);
  
-       if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+       if (unlikely(ext4_forced_shutdown(inode->i_sb)))
                return -EIO;
  
  #ifdef CONFIG_FS_DAX
  }
  
  #ifdef CONFIG_FS_DAX
 -static vm_fault_t ext4_dax_huge_fault(struct vm_fault *vmf,
 -              enum page_entry_size pe_size)
 +static vm_fault_t ext4_dax_huge_fault(struct vm_fault *vmf, unsigned int order)
  {
        int error = 0;
        vm_fault_t result;
         * read-only.
         *
         * We check for VM_SHARED rather than vmf->cow_page since the latter is
 -       * unset for pe_size != PE_SIZE_PTE (i.e. only in do_cow_fault); for
 +       * unset for order != 0 (i.e. only in do_cow_fault); for
         * other sizes, dax_iomap_fault will handle splitting / fallback so that
         * we eventually come back with a COW page.
         */
@@@ -763,7 -759,7 +758,7 @@@ retry
        } else {
                filemap_invalidate_lock_shared(mapping);
        }
 -      result = dax_iomap_fault(vmf, pe_size, &pfn, &error, &ext4_iomap_ops);
 +      result = dax_iomap_fault(vmf, order, &pfn, &error, &ext4_iomap_ops);
        if (write) {
                ext4_journal_stop(handle);
  
                        goto retry;
                /* Handling synchronous page fault? */
                if (result & VM_FAULT_NEEDDSYNC)
 -                      result = dax_finish_sync_fault(vmf, pe_size, pfn);
 +                      result = dax_finish_sync_fault(vmf, order, pfn);
                filemap_invalidate_unlock_shared(mapping);
                sb_end_pagefault(sb);
        } else {
  
  static vm_fault_t ext4_dax_fault(struct vm_fault *vmf)
  {
 -      return ext4_dax_huge_fault(vmf, PE_SIZE_PTE);
 +      return ext4_dax_huge_fault(vmf, 0);
  }
  
  static const struct vm_operations_struct ext4_dax_vm_ops = {
@@@ -806,10 -802,9 +801,9 @@@ static const struct vm_operations_struc
  static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
  {
        struct inode *inode = file->f_mapping->host;
-       struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
-       struct dax_device *dax_dev = sbi->s_daxdev;
+       struct dax_device *dax_dev = EXT4_SB(inode->i_sb)->s_daxdev;
  
-       if (unlikely(ext4_forced_shutdown(sbi)))
+       if (unlikely(ext4_forced_shutdown(inode->i_sb)))
                return -EIO;
  
        /*
@@@ -885,7 -880,7 +879,7 @@@ static int ext4_file_open(struct inode 
  {
        int ret;
  
-       if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+       if (unlikely(ext4_forced_shutdown(inode->i_sb)))
                return -EIO;
  
        ret = ext4_sample_last_mounted(inode->i_sb, filp->f_path.mnt);
diff --combined fs/ext4/ialloc.c
index 48abef5f23e7fcffc3ad8011dd826f2ef372d6a6,e0698f54e17ae3a7ee1fdbc17c6742baef9988f0..b65058d972f95646fa0d629a96a36836ea86076a
@@@ -950,7 -950,7 +950,7 @@@ struct inode *__ext4_new_inode(struct m
        sb = dir->i_sb;
        sbi = EXT4_SB(sb);
  
-       if (unlikely(ext4_forced_shutdown(sbi)))
+       if (unlikely(ext4_forced_shutdown(sb)))
                return ERR_PTR(-EIO);
  
        ngroups = ext4_get_groups_count(sb);
@@@ -1250,7 -1250,7 +1250,7 @@@ got
        inode->i_ino = ino + group * EXT4_INODES_PER_GROUP(sb);
        /* This is the optimal IO size (for stat), not the fs block size */
        inode->i_blocks = 0;
 -      inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
 +      inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
        ei->i_crtime = inode->i_mtime;
  
        memset(ei->i_data, 0, sizeof(ei->i_data));
@@@ -1523,12 -1523,6 +1523,6 @@@ int ext4_init_inode_table(struct super_
        int num, ret = 0, used_blks = 0;
        unsigned long used_inos = 0;
  
-       /* This should not happen, but just to be sure check this */
-       if (sb_rdonly(sb)) {
-               ret = 1;
-               goto out;
-       }
        gdp = ext4_get_group_desc(sb, group, &group_desc_bh);
        if (!gdp || !grp)
                goto out;
diff --combined fs/ext4/inline.c
index 0038610373745f7853c53421c8cdccb08ff092b1,3623dfcc8fc7b93b18456600a6ef034494e2387f..012d9259ff532060a5aa0cea0457f6e3a2fdac48
@@@ -228,7 -228,7 +228,7 @@@ static void ext4_write_inline_data(stru
        struct ext4_inode *raw_inode;
        int cp_len = 0;
  
-       if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+       if (unlikely(ext4_forced_shutdown(inode->i_sb)))
                return;
  
        BUG_ON(!EXT4_I(inode)->i_inline_off);
@@@ -1037,7 -1037,7 +1037,7 @@@ static int ext4_add_dirent_to_inline(ha
         * happen is that the times are slightly out of date
         * and/or different from the directory change time.
         */
 -      dir->i_mtime = dir->i_ctime = current_time(dir);
 +      dir->i_mtime = inode_set_ctime_current(dir);
        ext4_update_dx_flag(dir);
        inode_inc_iversion(dir);
        return 1;
@@@ -1991,7 -1991,7 +1991,7 @@@ out
                ext4_orphan_del(handle, inode);
  
        if (err == 0) {
 -              inode->i_mtime = inode->i_ctime = current_time(inode);
 +              inode->i_mtime = inode_set_ctime_current(inode);
                err = ext4_mark_inode_dirty(handle, inode);
                if (IS_SYNC(inode))
                        ext4_handle_sync(handle);
diff --combined fs/ext4/inode.c
index 89737d5a161483706e9175d6055196b506d92012,6c490f05e2baf13f956fe7dd241eace0a59b9e15..4ce35f1c8b0a8412d9e513d8fa01fae21bab74d5
@@@ -1114,7 -1114,7 +1114,7 @@@ static int ext4_write_begin(struct fil
        pgoff_t index;
        unsigned from, to;
  
-       if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+       if (unlikely(ext4_forced_shutdown(inode->i_sb)))
                return -EIO;
  
        trace_ext4_write_begin(inode, pos, len);
@@@ -1569,7 -1569,7 +1569,7 @@@ static void mpage_release_unused_pages(
  
                        if (folio->index < mpd->first_page)
                                continue;
 -                      if (folio->index + folio_nr_pages(folio) - 1 > end)
 +                      if (folio_next_index(folio) - 1 > end)
                                continue;
                        BUG_ON(!folio_test_locked(folio));
                        BUG_ON(folio_test_writeback(folio));
@@@ -2213,8 -2213,7 +2213,7 @@@ static int mpage_map_and_submit_extent(
                if (err < 0) {
                        struct super_block *sb = inode->i_sb;
  
-                       if (ext4_forced_shutdown(EXT4_SB(sb)) ||
-                           ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED))
+                       if (ext4_forced_shutdown(sb))
                                goto invalidate_dirty_pages;
                        /*
                         * Let the uper layers retry transient errors.
@@@ -2455,7 -2454,7 +2454,7 @@@ static int mpage_prepare_extent_to_map(
  
                        if (mpd->map.m_len == 0)
                                mpd->first_page = folio->index;
 -                      mpd->next_page = folio->index + folio_nr_pages(folio);
 +                      mpd->next_page = folio_next_index(folio);
                        /*
                         * Writeout when we cannot modify metadata is simple.
                         * Just submit the page. For data=journal mode we
@@@ -2534,14 -2533,13 +2533,13 @@@ static int ext4_do_writepages(struct mp
         * If the filesystem has aborted, it is read-only, so return
         * right away instead of dumping stack traces later on that
         * will obscure the real source of the problem.  We test
-        * EXT4_MF_FS_ABORTED instead of sb->s_flag's SB_RDONLY because
+        * fs shutdown state instead of sb->s_flag's SB_RDONLY because
         * the latter could be true if the filesystem is mounted
         * read-only, and in that case, ext4_writepages should
         * *never* be called, so if that ever happens, we would want
         * the stack trace.
         */
-       if (unlikely(ext4_forced_shutdown(EXT4_SB(mapping->host->i_sb)) ||
-                    ext4_test_mount_flag(inode->i_sb, EXT4_MF_FS_ABORTED))) {
+       if (unlikely(ext4_forced_shutdown(mapping->host->i_sb))) {
                ret = -EROFS;
                goto out_writepages;
        }
@@@ -2759,7 -2757,7 +2757,7 @@@ static int ext4_writepages(struct addre
        int ret;
        int alloc_ctx;
  
-       if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
+       if (unlikely(ext4_forced_shutdown(sb)))
                return -EIO;
  
        alloc_ctx = ext4_writepages_down_read(sb);
@@@ -2798,16 -2796,16 +2796,16 @@@ static int ext4_dax_writepages(struct a
        int ret;
        long nr_to_write = wbc->nr_to_write;
        struct inode *inode = mapping->host;
-       struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
        int alloc_ctx;
  
-       if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+       if (unlikely(ext4_forced_shutdown(inode->i_sb)))
                return -EIO;
  
        alloc_ctx = ext4_writepages_down_read(inode->i_sb);
        trace_ext4_writepages(inode, wbc);
  
-       ret = dax_writeback_mapping_range(mapping, sbi->s_daxdev, wbc);
+       ret = dax_writeback_mapping_range(mapping,
+                                         EXT4_SB(inode->i_sb)->s_daxdev, wbc);
        trace_ext4_writepages_result(inode, wbc, ret,
                                     nr_to_write - wbc->nr_to_write);
        ext4_writepages_up_read(inode->i_sb, alloc_ctx);
@@@ -2857,7 -2855,7 +2855,7 @@@ static int ext4_da_write_begin(struct f
        pgoff_t index;
        struct inode *inode = mapping->host;
  
-       if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+       if (unlikely(ext4_forced_shutdown(inode->i_sb)))
                return -EIO;
  
        index = pos >> PAGE_SHIFT;
@@@ -2937,14 -2935,73 +2935,73 @@@ static int ext4_da_should_update_i_disk
        return 1;
  }
  
+ static int ext4_da_do_write_end(struct address_space *mapping,
+                       loff_t pos, unsigned len, unsigned copied,
+                       struct page *page)
+ {
+       struct inode *inode = mapping->host;
+       loff_t old_size = inode->i_size;
+       bool disksize_changed = false;
+       loff_t new_i_size;
+       /*
+        * block_write_end() will mark the inode as dirty with I_DIRTY_PAGES
+        * flag, which all that's needed to trigger page writeback.
+        */
+       copied = block_write_end(NULL, mapping, pos, len, copied, page, NULL);
+       new_i_size = pos + copied;
+       /*
+        * It's important to update i_size while still holding page lock,
+        * because page writeout could otherwise come in and zero beyond
+        * i_size.
+        *
+        * Since we are holding inode lock, we are sure i_disksize <=
+        * i_size. We also know that if i_disksize < i_size, there are
+        * delalloc writes pending in the range up to i_size. If the end of
+        * the current write is <= i_size, there's no need to touch
+        * i_disksize since writeback will push i_disksize up to i_size
+        * eventually. If the end of the current write is > i_size and
+        * inside an allocated block which ext4_da_should_update_i_disksize()
+        * checked, we need to update i_disksize here as certain
+        * ext4_writepages() paths not allocating blocks and update i_disksize.
+        */
+       if (new_i_size > inode->i_size) {
+               unsigned long end;
+               i_size_write(inode, new_i_size);
+               end = (new_i_size - 1) & (PAGE_SIZE - 1);
+               if (copied && ext4_da_should_update_i_disksize(page_folio(page), end)) {
+                       ext4_update_i_disksize(inode, new_i_size);
+                       disksize_changed = true;
+               }
+       }
+       unlock_page(page);
+       put_page(page);
+       if (old_size < pos)
+               pagecache_isize_extended(inode, old_size, pos);
+       if (disksize_changed) {
+               handle_t *handle;
+               handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
+               if (IS_ERR(handle))
+                       return PTR_ERR(handle);
+               ext4_mark_inode_dirty(handle, inode);
+               ext4_journal_stop(handle);
+       }
+       return copied;
+ }
  static int ext4_da_write_end(struct file *file,
                             struct address_space *mapping,
                             loff_t pos, unsigned len, unsigned copied,
                             struct page *page, void *fsdata)
  {
        struct inode *inode = mapping->host;
-       loff_t new_i_size;
-       unsigned long start, end;
        int write_mode = (int)(unsigned long)fsdata;
        struct folio *folio = page_folio(page);
  
        if (unlikely(copied < len) && !PageUptodate(page))
                copied = 0;
  
-       start = pos & (PAGE_SIZE - 1);
-       end = start + copied - 1;
-       /*
-        * Since we are holding inode lock, we are sure i_disksize <=
-        * i_size. We also know that if i_disksize < i_size, there are
-        * delalloc writes pending in the range upto i_size. If the end of
-        * the current write is <= i_size, there's no need to touch
-        * i_disksize since writeback will push i_disksize upto i_size
-        * eventually. If the end of the current write is > i_size and
-        * inside an allocated block (ext4_da_should_update_i_disksize()
-        * check), we need to update i_disksize here as certain
-        * ext4_writepages() paths not allocating blocks update i_disksize.
-        *
-        * Note that we defer inode dirtying to generic_write_end() /
-        * ext4_da_write_inline_data_end().
-        */
-       new_i_size = pos + copied;
-       if (copied && new_i_size > inode->i_size &&
-           ext4_da_should_update_i_disksize(folio, end))
-               ext4_update_i_disksize(inode, new_i_size);
-       return generic_write_end(file, mapping, pos, len, copied, &folio->page,
-                                fsdata);
+       return ext4_da_do_write_end(mapping, pos, len, copied, &folio->page);
  }
  
  /*
@@@ -3986,7 -4020,7 +4020,7 @@@ int ext4_punch_hole(struct file *file, 
        if (IS_SYNC(inode))
                ext4_handle_sync(handle);
  
 -      inode->i_mtime = inode->i_ctime = current_time(inode);
 +      inode->i_mtime = inode_set_ctime_current(inode);
        ret2 = ext4_mark_inode_dirty(handle, inode);
        if (unlikely(ret2))
                ret = ret2;
@@@ -4146,7 -4180,7 +4180,7 @@@ out_stop
        if (inode->i_nlink)
                ext4_orphan_del(handle, inode);
  
 -      inode->i_mtime = inode->i_ctime = current_time(inode);
 +      inode->i_mtime = inode_set_ctime_current(inode);
        err2 = ext4_mark_inode_dirty(handle, inode);
        if (unlikely(err2 && !err))
                err = err2;
@@@ -4249,7 -4283,7 +4283,7 @@@ static int ext4_fill_raw_inode(struct i
        }
        raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
  
 -      EXT4_INODE_SET_XTIME(i_ctime, inode, raw_inode);
 +      EXT4_INODE_SET_CTIME(inode, raw_inode);
        EXT4_INODE_SET_XTIME(i_mtime, inode, raw_inode);
        EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode);
        EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode);
@@@ -4858,7 -4892,7 +4892,7 @@@ struct inode *__ext4_iget(struct super_
                }
        }
  
 -      EXT4_INODE_GET_XTIME(i_ctime, inode, raw_inode);
 +      EXT4_INODE_GET_CTIME(inode, raw_inode);
        EXT4_INODE_GET_XTIME(i_mtime, inode, raw_inode);
        EXT4_INODE_GET_XTIME(i_atime, inode, raw_inode);
        EXT4_EINODE_GET_XTIME(i_crtime, ei, raw_inode);
                                 "iget: bogus i_mode (%o)", inode->i_mode);
                goto bad_inode;
        }
-       if (IS_CASEFOLDED(inode) && !ext4_has_feature_casefold(inode->i_sb))
+       if (IS_CASEFOLDED(inode) && !ext4_has_feature_casefold(inode->i_sb)) {
                ext4_error_inode(inode, function, line, 0,
                                 "casefold flag without casefold feature");
+               ret = -EFSCORRUPTED;
+               goto bad_inode;
+       }
        if ((err_str = check_igot_inode(inode, flags)) != NULL) {
                ext4_error_inode(inode, function, line, 0, err_str);
                ret = -EFSCORRUPTED;
@@@ -4981,7 -5018,7 +5018,7 @@@ static void __ext4_update_other_inode_t
                spin_unlock(&inode->i_lock);
  
                spin_lock(&ei->i_raw_lock);
 -              EXT4_INODE_SET_XTIME(i_ctime, inode, raw_inode);
 +              EXT4_INODE_SET_CTIME(inode, raw_inode);
                EXT4_INODE_SET_XTIME(i_mtime, inode, raw_inode);
                EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode);
                ext4_inode_csum_set(inode, raw_inode, ei);
@@@ -5131,11 -5168,10 +5168,10 @@@ int ext4_write_inode(struct inode *inod
  {
        int err;
  
-       if (WARN_ON_ONCE(current->flags & PF_MEMALLOC) ||
-           sb_rdonly(inode->i_sb))
+       if (WARN_ON_ONCE(current->flags & PF_MEMALLOC))
                return 0;
  
-       if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+       if (unlikely(ext4_forced_shutdown(inode->i_sb)))
                return -EIO;
  
        if (EXT4_SB(inode->i_sb)->s_journal) {
@@@ -5255,7 -5291,7 +5291,7 @@@ int ext4_setattr(struct mnt_idmap *idma
        const unsigned int ia_valid = attr->ia_valid;
        bool inc_ivers = true;
  
-       if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+       if (unlikely(ext4_forced_shutdown(inode->i_sb)))
                return -EIO;
  
        if (unlikely(IS_IMMUTABLE(inode)))
                         * Update c/mtime on truncate up, ext4_truncate() will
                         * update c/mtime in shrink case below
                         */
 -                      if (!shrink) {
 -                              inode->i_mtime = current_time(inode);
 -                              inode->i_ctime = inode->i_mtime;
 -                      }
 +                      if (!shrink)
 +                              inode->i_mtime = inode_set_ctime_current(inode);
  
                        if (shrink)
                                ext4_fc_track_range(handle, inode,
@@@ -5535,7 -5573,7 +5571,7 @@@ int ext4_getattr(struct mnt_idmap *idma
                                  STATX_ATTR_NODUMP |
                                  STATX_ATTR_VERITY);
  
 -      generic_fillattr(idmap, inode, stat);
 +      generic_fillattr(idmap, request_mask, inode, stat);
        return 0;
  }
  
@@@ -5674,7 -5712,7 +5710,7 @@@ int ext4_mark_iloc_dirty(handle_t *hand
  {
        int err = 0;
  
-       if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) {
+       if (unlikely(ext4_forced_shutdown(inode->i_sb))) {
                put_bh(iloc->bh);
                return -EIO;
        }
@@@ -5700,7 -5738,7 +5736,7 @@@ ext4_reserve_inode_write(handle_t *hand
  {
        int err;
  
-       if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+       if (unlikely(ext4_forced_shutdown(inode->i_sb)))
                return -EIO;
  
        err = ext4_get_inode_loc(inode, iloc);
@@@ -6138,7 -6176,7 +6174,7 @@@ retry_alloc
        if (err == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
                goto retry_alloc;
  out_ret:
 -      ret = block_page_mkwrite_return(err);
 +      ret = vmf_fs_error(err);
  out:
        filemap_invalidate_unlock_shared(mapping);
        sb_end_pagefault(inode->i_sb);
diff --combined fs/ext4/ioctl.c
index b0349f45186370192fb98e7bf2e67abdcc475f31,0d3aef1118cbc31d09d32ce20aa56193cde82a6a..0bfe2ce589e224bac8a1dabecaaee8d4c45bad9a
@@@ -449,8 -449,7 +449,8 @@@ static long swap_inode_boot_loader(stru
        diff = size - size_bl;
        swap_inode_data(inode, inode_bl);
  
 -      inode->i_ctime = inode_bl->i_ctime = current_time(inode);
 +      inode_set_ctime_current(inode);
 +      inode_set_ctime_current(inode_bl);
        inode_inc_iversion(inode);
  
        inode->i_generation = get_random_u32();
@@@ -664,7 -663,7 +664,7 @@@ static int ext4_ioctl_setflags(struct i
  
        ext4_set_inode_flags(inode, false);
  
 -      inode->i_ctime = current_time(inode);
 +      inode_set_ctime_current(inode);
        inode_inc_iversion(inode);
  
        err = ext4_mark_iloc_dirty(handle, inode, &iloc);
@@@ -775,7 -774,7 +775,7 @@@ static int ext4_ioctl_setproject(struc
        }
  
        EXT4_I(inode)->i_projid = kprojid;
 -      inode->i_ctime = current_time(inode);
 +      inode_set_ctime_current(inode);
        inode_inc_iversion(inode);
  out_dirty:
        rc = ext4_mark_iloc_dirty(handle, inode, &iloc);
@@@ -802,7 -801,7 +802,7 @@@ int ext4_force_shutdown(struct super_bl
        if (flags > EXT4_GOING_FLAGS_NOLOGFLUSH)
                return -EINVAL;
  
-       if (ext4_forced_shutdown(sbi))
+       if (ext4_forced_shutdown(sb))
                return 0;
  
        ext4_msg(sb, KERN_ALERT, "shut down requested (%d)", flags);
@@@ -1267,7 -1266,7 +1267,7 @@@ static long __ext4_ioctl(struct file *f
                }
                err = ext4_reserve_inode_write(handle, inode, &iloc);
                if (err == 0) {
 -                      inode->i_ctime = current_time(inode);
 +                      inode_set_ctime_current(inode);
                        inode_inc_iversion(inode);
                        inode->i_generation = generation;
                        err = ext4_mark_iloc_dirty(handle, inode, &iloc);
diff --combined fs/ext4/namei.c
index 933ad03f4f5853542d44f59ea27e2aad15d3d879,c0f0b4e2413b5e7dc4f9f23ad7ae49b250a69c3e..41a6411c600b1cba08ed24b667913a563f2d3b90
@@@ -1445,7 -1445,7 +1445,7 @@@ int ext4_fname_setup_ci_filename(struc
        struct dx_hash_info *hinfo = &name->hinfo;
        int len;
  
-       if (!IS_CASEFOLDED(dir) || !dir->i_sb->s_encoding ||
+       if (!IS_CASEFOLDED(dir) ||
            (IS_ENCRYPTED(dir) && !fscrypt_has_encryption_key(dir))) {
                cf_name->name = NULL;
                return 0;
@@@ -1496,7 -1496,7 +1496,7 @@@ static bool ext4_match(struct inode *pa
  #endif
  
  #if IS_ENABLED(CONFIG_UNICODE)
-       if (parent->i_sb->s_encoding && IS_CASEFOLDED(parent) &&
+       if (IS_CASEFOLDED(parent) &&
            (!IS_ENCRYPTED(parent) || fscrypt_has_encryption_key(parent))) {
                if (fname->cf_name.name) {
                        struct qstr cf = {.name = fname->cf_name.name,
@@@ -2203,7 -2203,7 +2203,7 @@@ static int add_dirent_to_buf(handle_t *
         * happen is that the times are slightly out of date
         * and/or different from the directory change time.
         */
 -      dir->i_mtime = dir->i_ctime = current_time(dir);
 +      dir->i_mtime = inode_set_ctime_current(dir);
        ext4_update_dx_flag(dir);
        inode_inc_iversion(dir);
        err2 = ext4_mark_inode_dirty(handle, dir);
@@@ -2393,7 -2393,7 +2393,7 @@@ static int ext4_add_entry(handle_t *han
  
  #if IS_ENABLED(CONFIG_UNICODE)
        if (sb_has_strict_encoding(sb) && IS_CASEFOLDED(dir) &&
-           sb->s_encoding && utf8_validate(sb->s_encoding, &dentry->d_name))
+           utf8_validate(sb->s_encoding, &dentry->d_name))
                return -EINVAL;
  #endif
  
@@@ -2799,6 -2799,7 +2799,7 @@@ static int ext4_add_nondir(handle_t *ha
                return err;
        }
        drop_nlink(inode);
+       ext4_mark_inode_dirty(handle, inode);
        ext4_orphan_add(handle, inode);
        unlock_new_inode(inode);
        return err;
@@@ -3142,7 -3143,7 +3143,7 @@@ static int ext4_rmdir(struct inode *dir
        struct ext4_dir_entry_2 *de;
        handle_t *handle = NULL;
  
-       if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb))))
+       if (unlikely(ext4_forced_shutdown(dir->i_sb)))
                return -EIO;
  
        /* Initialize quotas before so that eventual writes go in
         * recovery. */
        inode->i_size = 0;
        ext4_orphan_add(handle, inode);
 -      inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode);
 +      dir->i_mtime = inode_set_ctime_current(dir);
 +      inode_set_ctime_current(inode);
        retval = ext4_mark_inode_dirty(handle, inode);
        if (retval)
                goto end_rmdir;
@@@ -3272,7 -3272,7 +3273,7 @@@ int __ext4_unlink(struct inode *dir, co
                retval = ext4_delete_entry(handle, dir, de, bh);
                if (retval)
                        goto out_handle;
 -              dir->i_ctime = dir->i_mtime = current_time(dir);
 +              dir->i_mtime = inode_set_ctime_current(dir);
                ext4_update_dx_flag(dir);
                retval = ext4_mark_inode_dirty(handle, dir);
                if (retval)
                drop_nlink(inode);
        if (!inode->i_nlink)
                ext4_orphan_add(handle, inode);
 -      inode->i_ctime = current_time(inode);
 +      inode_set_ctime_current(inode);
        retval = ext4_mark_inode_dirty(handle, inode);
        if (dentry && !retval)
                ext4_fc_track_unlink(handle, dentry);
@@@ -3302,7 -3302,7 +3303,7 @@@ static int ext4_unlink(struct inode *di
  {
        int retval;
  
-       if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb))))
+       if (unlikely(ext4_forced_shutdown(dir->i_sb)))
                return -EIO;
  
        trace_ext4_unlink_enter(dir, dentry);
@@@ -3370,7 -3370,7 +3371,7 @@@ static int ext4_symlink(struct mnt_idma
        struct fscrypt_str disk_link;
        int retries = 0;
  
-       if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb))))
+       if (unlikely(ext4_forced_shutdown(dir->i_sb)))
                return -EIO;
  
        err = fscrypt_prepare_symlink(dir, symname, len, dir->i_sb->s_blocksize,
@@@ -3437,6 -3437,7 +3438,7 @@@ retry
  
  err_drop_inode:
        clear_nlink(inode);
+       ext4_mark_inode_dirty(handle, inode);
        ext4_orphan_add(handle, inode);
        unlock_new_inode(inode);
        if (handle)
@@@ -3464,7 -3465,7 +3466,7 @@@ retry
        if (IS_DIRSYNC(dir))
                ext4_handle_sync(handle);
  
 -      inode->i_ctime = current_time(inode);
 +      inode_set_ctime_current(inode);
        ext4_inc_count(inode);
        ihold(inode);
  
@@@ -3642,7 -3643,8 +3644,7 @@@ static int ext4_setent(handle_t *handle
        if (ext4_has_feature_filetype(ent->dir->i_sb))
                ent->de->file_type = file_type;
        inode_inc_iversion(ent->dir);
 -      ent->dir->i_ctime = ent->dir->i_mtime =
 -              current_time(ent->dir);
 +      ent->dir->i_mtime = inode_set_ctime_current(ent->dir);
        retval = ext4_mark_inode_dirty(handle, ent->dir);
        BUFFER_TRACE(ent->bh, "call ext4_handle_dirty_metadata");
        if (!ent->inlined) {
@@@ -3941,7 -3943,7 +3943,7 @@@ static int ext4_rename(struct mnt_idma
         * Like most other Unix systems, set the ctime for inodes on a
         * rename.
         */
 -      old.inode->i_ctime = current_time(old.inode);
 +      inode_set_ctime_current(old.inode);
        retval = ext4_mark_inode_dirty(handle, old.inode);
        if (unlikely(retval))
                goto end_rename;
  
        if (new.inode) {
                ext4_dec_count(new.inode);
 -              new.inode->i_ctime = current_time(new.inode);
 +              inode_set_ctime_current(new.inode);
        }
 -      old.dir->i_ctime = old.dir->i_mtime = current_time(old.dir);
 +      old.dir->i_mtime = inode_set_ctime_current(old.dir);
        ext4_update_dx_flag(old.dir);
        if (old.dir_bh) {
                retval = ext4_rename_dir_finish(handle, &old, new.dir->i_ino);
@@@ -4021,6 -4023,7 +4023,7 @@@ end_rename
                        ext4_resetent(handle, &old,
                                      old.inode->i_ino, old_file_type);
                        drop_nlink(whiteout);
+                       ext4_mark_inode_dirty(handle, whiteout);
                        ext4_orphan_add(handle, whiteout);
                }
                unlock_new_inode(whiteout);
@@@ -4053,6 -4056,7 +4056,6 @@@ static int ext4_cross_rename(struct ino
        };
        u8 new_file_type;
        int retval;
 -      struct timespec64 ctime;
  
        if ((ext4_test_inode_flag(new_dir, EXT4_INODE_PROJINHERIT) &&
             !projid_eq(EXT4_I(new_dir)->i_projid,
         * Like most other Unix systems, set the ctime for inodes on a
         * rename.
         */
 -      ctime = current_time(old.inode);
 -      old.inode->i_ctime = ctime;
 -      new.inode->i_ctime = ctime;
 +      inode_set_ctime_current(old.inode);
 +      inode_set_ctime_current(new.inode);
        retval = ext4_mark_inode_dirty(handle, old.inode);
        if (unlikely(retval))
                goto end_rename;
@@@ -4187,7 -4192,7 +4190,7 @@@ static int ext4_rename2(struct mnt_idma
  {
        int err;
  
-       if (unlikely(ext4_forced_shutdown(EXT4_SB(old_dir->i_sb))))
+       if (unlikely(ext4_forced_shutdown(old_dir->i_sb)))
                return -EIO;
  
        if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
diff --combined fs/ext4/super.c
index 73547d2334fd7c3b605cf08b78d7a0eef5018b81,91f20afa1d71cf269f2b16666e2d26cd9cc262cc..38217422f938833f2eb9312f80da735190080dd6
@@@ -93,7 -93,6 +93,7 @@@ static int ext4_get_tree(struct fs_cont
  static int ext4_reconfigure(struct fs_context *fc);
  static void ext4_fc_free(struct fs_context *fc);
  static int ext4_init_fs_context(struct fs_context *fc);
 +static void ext4_kill_sb(struct super_block *sb);
  static const struct fs_parameter_spec ext4_param_specs[];
  
  /*
@@@ -136,12 -135,12 +136,12 @@@ static struct file_system_type ext2_fs_
        .name                   = "ext2",
        .init_fs_context        = ext4_init_fs_context,
        .parameters             = ext4_param_specs,
 -      .kill_sb                = kill_block_super,
 +      .kill_sb                = ext4_kill_sb,
        .fs_flags               = FS_REQUIRES_DEV,
  };
  MODULE_ALIAS_FS("ext2");
  MODULE_ALIAS("ext2");
 -#define IS_EXT2_SB(sb) ((sb)->s_bdev->bd_holder == &ext2_fs_type)
 +#define IS_EXT2_SB(sb) ((sb)->s_type == &ext2_fs_type)
  #else
  #define IS_EXT2_SB(sb) (0)
  #endif
@@@ -152,12 -151,12 +152,12 @@@ static struct file_system_type ext3_fs_
        .name                   = "ext3",
        .init_fs_context        = ext4_init_fs_context,
        .parameters             = ext4_param_specs,
 -      .kill_sb                = kill_block_super,
 +      .kill_sb                = ext4_kill_sb,
        .fs_flags               = FS_REQUIRES_DEV,
  };
  MODULE_ALIAS_FS("ext3");
  MODULE_ALIAS("ext3");
 -#define IS_EXT3_SB(sb) ((sb)->s_bdev->bd_holder == &ext3_fs_type)
 +#define IS_EXT3_SB(sb) ((sb)->s_type == &ext3_fs_type)
  
  
  static inline void __ext4_read_bh(struct buffer_head *bh, blk_opf_t op_flags,
@@@ -434,6 -433,57 +434,57 @@@ static time64_t __ext4_get_tstamp(__le3
  #define ext4_get_tstamp(es, tstamp) \
        __ext4_get_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi)
  
+ #define EXT4_SB_REFRESH_INTERVAL_SEC (3600) /* seconds (1 hour) */
+ #define EXT4_SB_REFRESH_INTERVAL_KB (16384) /* kilobytes (16MB) */
+ /*
+  * The ext4_maybe_update_superblock() function checks and updates the
+  * superblock if needed.
+  *
+  * This function is designed to update the on-disk superblock only under
+  * certain conditions to prevent excessive disk writes and unnecessary
+  * waking of the disk from sleep. The superblock will be updated if:
+  * 1. More than an hour has passed since the last superblock update, and
+  * 2. More than 16MB have been written since the last superblock update.
+  *
+  * @sb: The superblock
+  */
+ static void ext4_maybe_update_superblock(struct super_block *sb)
+ {
+       struct ext4_sb_info *sbi = EXT4_SB(sb);
+       struct ext4_super_block *es = sbi->s_es;
+       journal_t *journal = sbi->s_journal;
+       time64_t now;
+       __u64 last_update;
+       __u64 lifetime_write_kbytes;
+       __u64 diff_size;
+       if (sb_rdonly(sb) || !(sb->s_flags & SB_ACTIVE) ||
+           !journal || (journal->j_flags & JBD2_UNMOUNT))
+               return;
+       now = ktime_get_real_seconds();
+       last_update = ext4_get_tstamp(es, s_wtime);
+       if (likely(now - last_update < EXT4_SB_REFRESH_INTERVAL_SEC))
+               return;
+       lifetime_write_kbytes = sbi->s_kbytes_written +
+               ((part_stat_read(sb->s_bdev, sectors[STAT_WRITE]) -
+                 sbi->s_sectors_written_start) >> 1);
+       /* Get the number of kilobytes not written to disk to account
+        * for statistics and compare with a multiple of 16 MB. This
+        * is used to determine when the next superblock commit should
+        * occur (i.e. not more often than once per 16MB if there was
+        * less written in an hour).
+        */
+       diff_size = lifetime_write_kbytes - le64_to_cpu(es->s_kbytes_written);
+       if (diff_size > EXT4_SB_REFRESH_INTERVAL_KB)
+               schedule_work(&EXT4_SB(sb)->s_sb_upd_work);
+ }
  /*
   * The del_gendisk() function uninitializes the disk-specific data
   * structures, including the bdi structure, without telling anyone
@@@ -460,6 -510,7 +511,7 @@@ static void ext4_journal_commit_callbac
        BUG_ON(txn->t_state == T_FINISHED);
  
        ext4_process_freed_data(sb, txn->t_tid);
+       ext4_maybe_update_superblock(sb);
  
        spin_lock(&sbi->s_md_lock);
        while (!list_empty(&txn->t_private_list)) {
@@@ -658,7 -709,7 +710,7 @@@ static void ext4_handle_error(struct su
                WARN_ON_ONCE(1);
  
        if (!continue_fs && !sb_rdonly(sb)) {
-               ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED);
+               set_bit(EXT4_FLAGS_SHUTDOWN, &EXT4_SB(sb)->s_ext4_flags);
                if (journal)
                        jbd2_journal_abort(journal, -EIO);
        }
                 * defer superblock flushing to a workqueue.
                 */
                if (continue_fs && journal)
-                       schedule_work(&EXT4_SB(sb)->s_error_work);
+                       schedule_work(&EXT4_SB(sb)->s_sb_upd_work);
                else
                        ext4_commit_super(sb);
        }
        sb->s_flags |= SB_RDONLY;
  }
  
- static void flush_stashed_error_work(struct work_struct *work)
+ static void update_super_work(struct work_struct *work)
  {
        struct ext4_sb_info *sbi = container_of(work, struct ext4_sb_info,
-                                               s_error_work);
+                                               s_sb_upd_work);
        journal_t *journal = sbi->s_journal;
        handle_t *handle;
  
         */
        if (!sb_rdonly(sbi->s_sb) && journal) {
                struct buffer_head *sbh = sbi->s_sbh;
+               bool call_notify_err;
                handle = jbd2_journal_start(journal, 1);
                if (IS_ERR(handle))
                        goto write_directly;
                        jbd2_journal_stop(handle);
                        goto write_directly;
                }
+               if (sbi->s_add_error_count > 0)
+                       call_notify_err = true;
                ext4_update_super(sbi->s_sb);
                if (buffer_write_io_error(sbh) || !buffer_uptodate(sbh)) {
                        ext4_msg(sbi->s_sb, KERN_ERR, "previous I/O error to "
                        goto write_directly;
                }
                jbd2_journal_stop(handle);
-               ext4_notify_error_sysfs(sbi);
+               if (call_notify_err)
+                       ext4_notify_error_sysfs(sbi);
                return;
        }
  write_directly:
@@@ -759,7 -818,7 +819,7 @@@ void __ext4_error(struct super_block *s
        struct va_format vaf;
        va_list args;
  
-       if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
+       if (unlikely(ext4_forced_shutdown(sb)))
                return;
  
        trace_ext4_error(sb, function, line);
@@@ -784,7 -843,7 +844,7 @@@ void __ext4_error_inode(struct inode *i
        va_list args;
        struct va_format vaf;
  
-       if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+       if (unlikely(ext4_forced_shutdown(inode->i_sb)))
                return;
  
        trace_ext4_error(inode->i_sb, function, line);
@@@ -819,7 -878,7 +879,7 @@@ void __ext4_error_file(struct file *fil
        struct inode *inode = file_inode(file);
        char pathname[80], *path;
  
-       if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+       if (unlikely(ext4_forced_shutdown(inode->i_sb)))
                return;
  
        trace_ext4_error(inode->i_sb, function, line);
@@@ -899,7 -958,7 +959,7 @@@ void __ext4_std_error(struct super_bloc
        char nbuf[16];
        const char *errstr;
  
-       if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
+       if (unlikely(ext4_forced_shutdown(sb)))
                return;
  
        /* Special case: if the error is EROFS, and we're not already
@@@ -993,7 -1052,7 +1053,7 @@@ __acquires(bitlock
        struct va_format vaf;
        va_list args;
  
-       if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
+       if (unlikely(ext4_forced_shutdown(sb)))
                return;
  
        trace_ext4_error(sb, function, line);
                if (!bdev_read_only(sb->s_bdev)) {
                        save_error_info(sb, EFSCORRUPTED, ino, block, function,
                                        line);
-                       schedule_work(&EXT4_SB(sb)->s_error_work);
+                       schedule_work(&EXT4_SB(sb)->s_sb_upd_work);
                }
                return;
        }
@@@ -1097,26 -1156,34 +1157,6 @@@ void ext4_update_dynamic_rev(struct sup
         */
  }
  
 -static void ext4_bdev_mark_dead(struct block_device *bdev)
 -{
 -      ext4_force_shutdown(bdev->bd_holder, EXT4_GOING_FLAGS_NOLOGFLUSH);
 -}
 -
 -static const struct blk_holder_ops ext4_holder_ops = {
 -      .mark_dead              = ext4_bdev_mark_dead,
 -};
 -
--/*
-  * Open the external journal device
 - * Release the journal device
-- */
- static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb)
 -static void ext4_blkdev_remove(struct ext4_sb_info *sbi)
--{
--      struct block_device *bdev;
-       bdev = blkdev_get_by_dev(dev, BLK_OPEN_READ | BLK_OPEN_WRITE, sb,
-                                &fs_holder_ops);
-       if (IS_ERR(bdev))
-               goto fail;
-       return bdev;
- fail:
-       ext4_msg(sb, KERN_ERR,
-                "failed to open journal device unknown-block(%u,%u) %ld",
-                MAJOR(dev), MINOR(dev), PTR_ERR(bdev));
-       return NULL;
 -      bdev = sbi->s_journal_bdev;
 -      if (bdev) {
 -              /*
 -               * Invalidate the journal device's buffers.  We don't want them
 -               * floating about in memory - the physical journal device may
 -               * hotswapped, and it breaks the `ro-after' testing code.
 -               */
 -              invalidate_bdev(bdev);
 -              blkdev_put(bdev, sbi->s_sb);
 -              sbi->s_journal_bdev = NULL;
 -      }
--}
--
  static inline struct inode *orphan_list_entry(struct list_head *l)
  {
        return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode;
@@@ -1251,10 -1318,10 +1291,10 @@@ static void ext4_put_super(struct super
         * Unregister sysfs before destroying jbd2 journal.
         * Since we could still access attr_journal_task attribute via sysfs
         * path which could have sbi->s_journal->j_task as NULL
-        * Unregister sysfs before flush sbi->s_error_work.
+        * Unregister sysfs before flush sbi->s_sb_upd_work.
         * Since user may read /proc/fs/ext4/xx/mb_groups during umount, If
         * read metadata verify failed then will queue error work.
-        * flush_stashed_error_work will call start_this_handle may trigger
+        * update_super_work will call start_this_handle may trigger
         * BUG_ON.
         */
        ext4_unregister_sysfs(sb);
        ext4_unregister_li_request(sb);
        ext4_quotas_off(sb, EXT4_MAXQUOTAS);
  
-       flush_work(&sbi->s_error_work);
+       flush_work(&sbi->s_sb_upd_work);
        destroy_workqueue(sbi->rsv_conversion_wq);
        ext4_release_orphan_info(sb);
  
        sync_blockdev(sb->s_bdev);
        invalidate_bdev(sb->s_bdev);
        if (sbi->s_journal_bdev) {
 +              /*
 +               * Invalidate the journal device's buffers.  We don't want them
 +               * floating about in memory - the physical journal device may
 +               * hotswapped, and it breaks the `ro-after' testing code.
 +               */
                sync_blockdev(sbi->s_journal_bdev);
 -              ext4_blkdev_remove(sbi);
 +              invalidate_bdev(sbi->s_journal_bdev);
        }
  
        ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
@@@ -1875,6 -1937,7 +1915,7 @@@ static const struct mount_opts 
        {Opt_fc_debug_force, EXT4_MOUNT2_JOURNAL_FAST_COMMIT,
         MOPT_SET | MOPT_2 | MOPT_EXT4_ONLY},
  #endif
+       {Opt_abort, EXT4_MOUNT2_ABORT, MOPT_SET | MOPT_2},
        {Opt_err, 0, 0}
  };
  
@@@ -1943,8 -2006,6 +1984,6 @@@ struct ext4_fs_context 
        unsigned int    mask_s_mount_opt;
        unsigned int    vals_s_mount_opt2;
        unsigned int    mask_s_mount_opt2;
-       unsigned long   vals_s_mount_flags;
-       unsigned long   mask_s_mount_flags;
        unsigned int    opt_flags;      /* MOPT flags */
        unsigned int    spec;
        u32             s_max_batch_time;
@@@ -2095,12 -2156,6 +2134,6 @@@ EXT4_SET_CTX(mount_opt2)
  EXT4_CLEAR_CTX(mount_opt2);
  EXT4_TEST_CTX(mount_opt2);
  
- static inline void ctx_set_mount_flag(struct ext4_fs_context *ctx, int bit)
- {
-       set_bit(bit, &ctx->mask_s_mount_flags);
-       set_bit(bit, &ctx->vals_s_mount_flags);
- }
  static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param)
  {
        struct ext4_fs_context *ctx = fc->fs_private;
                ext4_msg(NULL, KERN_WARNING, "Ignoring removed %s option",
                         param->key);
                return 0;
-       case Opt_abort:
-               ctx_set_mount_flag(ctx, EXT4_MF_FS_ABORTED);
-               return 0;
        case Opt_inlinecrypt:
  #ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT
                ctx_set_flags(ctx, SB_INLINECRYPT);
@@@ -2820,8 -2872,6 +2850,6 @@@ static void ext4_apply_options(struct f
        sbi->s_mount_opt |= ctx->vals_s_mount_opt;
        sbi->s_mount_opt2 &= ~ctx->mask_s_mount_opt2;
        sbi->s_mount_opt2 |= ctx->vals_s_mount_opt2;
-       sbi->s_mount_flags &= ~ctx->mask_s_mount_flags;
-       sbi->s_mount_flags |= ctx->vals_s_mount_flags;
        sb->s_flags &= ~ctx->mask_s_flags;
        sb->s_flags |= ctx->vals_s_flags;
  
@@@ -4210,7 -4260,7 +4238,7 @@@ int ext4_calculate_overhead(struct supe
        else if (ext4_has_feature_journal(sb) && !sbi->s_journal && j_inum) {
                /* j_inum for internal journal is non-zero */
                j_inode = ext4_get_journal_inode(sb, j_inum);
-               if (j_inode) {
+               if (!IS_ERR(j_inode)) {
                        j_blocks = j_inode->i_size >> sb->s_blocksize_bits;
                        overhead += EXT4_NUM_B2C(sbi, j_blocks);
                        iput(j_inode);
@@@ -4948,8 -4998,8 +4976,8 @@@ static int ext4_load_and_init_journal(s
        return 0;
  
  out:
-       /* flush s_error_work before journal destroy. */
-       flush_work(&sbi->s_error_work);
+       /* flush s_sb_upd_work before destroying the journal. */
+       flush_work(&sbi->s_sb_upd_work);
        jbd2_journal_destroy(sbi->s_journal);
        sbi->s_journal = NULL;
        return -EINVAL;
@@@ -5272,7 -5322,7 +5300,7 @@@ static int __ext4_fill_super(struct fs_
  
        timer_setup(&sbi->s_err_report, print_daily_error_info, 0);
        spin_lock_init(&sbi->s_error_lock);
-       INIT_WORK(&sbi->s_error_work, flush_stashed_error_work);
+       INIT_WORK(&sbi->s_sb_upd_work, update_super_work);
  
        err = ext4_group_desc_init(sb, es, logical_sb_block, &first_not_zeroed);
        if (err)
        spin_lock_init(&sbi->s_bdev_wb_lock);
        errseq_check_and_advance(&sb->s_bdev->bd_inode->i_mapping->wb_err,
                                 &sbi->s_bdev_wb_err);
 -      sb->s_bdev->bd_super = sb;
        EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
        ext4_orphan_cleanup(sb, es);
        EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
@@@ -5615,16 -5666,16 +5643,16 @@@ failed_mount_wq
        sbi->s_ea_block_cache = NULL;
  
        if (sbi->s_journal) {
-               /* flush s_error_work before journal destroy. */
-               flush_work(&sbi->s_error_work);
+               /* flush s_sb_upd_work before journal destroy. */
+               flush_work(&sbi->s_sb_upd_work);
                jbd2_journal_destroy(sbi->s_journal);
                sbi->s_journal = NULL;
        }
  failed_mount3a:
        ext4_es_unregister_shrinker(sbi);
  failed_mount3:
-       /* flush s_error_work before sbi destroy */
-       flush_work(&sbi->s_error_work);
+       /* flush s_sb_upd_work before sbi destroy */
+       flush_work(&sbi->s_sb_upd_work);
        del_timer_sync(&sbi->s_err_report);
        ext4_stop_mmpd(sbi);
        ext4_group_desc_free(sbi);
@@@ -5641,11 -5692,9 +5669,11 @@@ failed_mount
                kfree(get_qf_name(sb, sbi, i));
  #endif
        fscrypt_free_dummy_policy(&sbi->s_dummy_enc_policy);
 -      /* ext4_blkdev_remove() calls kill_bdev(), release bh before it. */
        brelse(sbi->s_sbh);
 -      ext4_blkdev_remove(sbi);
 +      if (sbi->s_journal_bdev) {
 +              invalidate_bdev(sbi->s_journal_bdev);
 +              blkdev_put(sbi->s_journal_bdev, sb);
 +      }
  out_fail:
        invalidate_bdev(sb->s_bdev);
        sb->s_fs_info = NULL;
@@@ -5751,22 -5800,22 +5779,22 @@@ static struct inode *ext4_get_journal_i
        journal_inode = ext4_iget(sb, journal_inum, EXT4_IGET_SPECIAL);
        if (IS_ERR(journal_inode)) {
                ext4_msg(sb, KERN_ERR, "no journal found");
-               return NULL;
+               return ERR_CAST(journal_inode);
        }
        if (!journal_inode->i_nlink) {
                make_bad_inode(journal_inode);
                iput(journal_inode);
                ext4_msg(sb, KERN_ERR, "journal inode is deleted");
-               return NULL;
+               return ERR_PTR(-EFSCORRUPTED);
        }
-       ext4_debug("Journal inode found at %p: %lld bytes\n",
-                 journal_inode, journal_inode->i_size);
        if (!S_ISREG(journal_inode->i_mode) || IS_ENCRYPTED(journal_inode)) {
                ext4_msg(sb, KERN_ERR, "invalid journal inode");
                iput(journal_inode);
-               return NULL;
+               return ERR_PTR(-EFSCORRUPTED);
        }
+       ext4_debug("Journal inode found at %p: %lld bytes\n",
+                 journal_inode, journal_inode->i_size);
        return journal_inode;
  }
  
@@@ -5792,24 -5841,21 +5820,21 @@@ static int ext4_journal_bmap(journal_t 
        return 0;
  }
  
- static journal_t *ext4_get_journal(struct super_block *sb,
-                                  unsigned int journal_inum)
+ static journal_t *ext4_open_inode_journal(struct super_block *sb,
+                                         unsigned int journal_inum)
  {
        struct inode *journal_inode;
        journal_t *journal;
  
-       if (WARN_ON_ONCE(!ext4_has_feature_journal(sb)))
-               return NULL;
        journal_inode = ext4_get_journal_inode(sb, journal_inum);
-       if (!journal_inode)
-               return NULL;
+       if (IS_ERR(journal_inode))
+               return ERR_CAST(journal_inode);
  
        journal = jbd2_journal_init_inode(journal_inode);
-       if (!journal) {
+       if (IS_ERR(journal)) {
                ext4_msg(sb, KERN_ERR, "Could not load journal inode");
                iput(journal_inode);
-               return NULL;
+               return ERR_CAST(journal);
        }
        journal->j_private = sb;
        journal->j_bmap = ext4_journal_bmap;
        return journal;
  }
  
- static journal_t *ext4_get_dev_journal(struct super_block *sb,
-                                      dev_t j_dev)
+ static struct block_device *ext4_get_journal_blkdev(struct super_block *sb,
+                                       dev_t j_dev, ext4_fsblk_t *j_start,
+                                       ext4_fsblk_t *j_len)
  {
        struct buffer_head *bh;
-       journal_t *journal;
-       ext4_fsblk_t start;
-       ext4_fsblk_t len;
+       struct block_device *bdev;
        int hblock, blocksize;
        ext4_fsblk_t sb_block;
        unsigned long offset;
        struct ext4_super_block *es;
-       struct block_device *bdev;
-       if (WARN_ON_ONCE(!ext4_has_feature_journal(sb)))
-               return NULL;
+       int errno;
  
-       bdev = ext4_blkdev_get(j_dev, sb);
 +      /* see get_tree_bdev why this is needed and safe */
 +      up_write(&sb->s_umount);
 -                               &ext4_holder_ops);
+       bdev = blkdev_get_by_dev(j_dev, BLK_OPEN_READ | BLK_OPEN_WRITE, sb,
-       if (bdev == NULL)
-               return NULL;
++                               &fs_holder_ops);
 +      down_write(&sb->s_umount);
+       if (IS_ERR(bdev)) {
+               ext4_msg(sb, KERN_ERR,
+                        "failed to open journal device unknown-block(%u,%u) %ld",
+                        MAJOR(j_dev), MINOR(j_dev), PTR_ERR(bdev));
+               return ERR_CAST(bdev);
+       }
  
        blocksize = sb->s_blocksize;
        hblock = bdev_logical_block_size(bdev);
        if (blocksize < hblock) {
                ext4_msg(sb, KERN_ERR,
                        "blocksize too small for journal device");
+               errno = -EINVAL;
                goto out_bdev;
        }
  
        sb_block = EXT4_MIN_BLOCK_SIZE / blocksize;
        offset = EXT4_MIN_BLOCK_SIZE % blocksize;
        set_blocksize(bdev, blocksize);
-       if (!(bh = __bread(bdev, sb_block, blocksize))) {
+       bh = __bread(bdev, sb_block, blocksize);
+       if (!bh) {
                ext4_msg(sb, KERN_ERR, "couldn't read superblock of "
                       "external journal");
+               errno = -EINVAL;
                goto out_bdev;
        }
  
        if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) ||
            !(le32_to_cpu(es->s_feature_incompat) &
              EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) {
-               ext4_msg(sb, KERN_ERR, "external journal has "
-                                       "bad superblock");
-               brelse(bh);
-               goto out_bdev;
+               ext4_msg(sb, KERN_ERR, "external journal has bad superblock");
+               errno = -EFSCORRUPTED;
+               goto out_bh;
        }
  
        if ((le32_to_cpu(es->s_feature_ro_compat) &
             EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) &&
            es->s_checksum != ext4_superblock_csum(sb, es)) {
-               ext4_msg(sb, KERN_ERR, "external journal has "
-                                      "corrupt superblock");
-               brelse(bh);
-               goto out_bdev;
+               ext4_msg(sb, KERN_ERR, "external journal has corrupt superblock");
+               errno = -EFSCORRUPTED;
+               goto out_bh;
        }
  
        if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
                ext4_msg(sb, KERN_ERR, "journal UUID does not match");
-               brelse(bh);
-               goto out_bdev;
+               errno = -EFSCORRUPTED;
+               goto out_bh;
        }
  
-       len = ext4_blocks_count(es);
-       start = sb_block + 1;
-       brelse(bh);     /* we're done with the superblock */
+       *j_start = sb_block + 1;
+       *j_len = ext4_blocks_count(es);
+       brelse(bh);
+       return bdev;
  
-       journal = jbd2_journal_init_dev(bdev, sb->s_bdev,
-                                       start, len, blocksize);
-       if (!journal) {
+ out_bh:
+       brelse(bh);
+ out_bdev:
+       blkdev_put(bdev, sb);
+       return ERR_PTR(errno);
+ }
+ static journal_t *ext4_open_dev_journal(struct super_block *sb,
+                                       dev_t j_dev)
+ {
+       journal_t *journal;
+       ext4_fsblk_t j_start;
+       ext4_fsblk_t j_len;
+       struct block_device *journal_bdev;
+       int errno = 0;
+       journal_bdev = ext4_get_journal_blkdev(sb, j_dev, &j_start, &j_len);
+       if (IS_ERR(journal_bdev))
+               return ERR_CAST(journal_bdev);
+       journal = jbd2_journal_init_dev(journal_bdev, sb->s_bdev, j_start,
+                                       j_len, sb->s_blocksize);
+       if (IS_ERR(journal)) {
                ext4_msg(sb, KERN_ERR, "failed to create device journal");
+               errno = PTR_ERR(journal);
                goto out_bdev;
        }
-       journal->j_private = sb;
-       if (ext4_read_bh_lock(journal->j_sb_buffer, REQ_META | REQ_PRIO, true)) {
-               ext4_msg(sb, KERN_ERR, "I/O error on journal device");
-               goto out_journal;
-       }
        if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
                ext4_msg(sb, KERN_ERR, "External journal has more than one "
                                        "user (unsupported) - %d",
                        be32_to_cpu(journal->j_superblock->s_nr_users));
+               errno = -EINVAL;
                goto out_journal;
        }
-       EXT4_SB(sb)->s_journal_bdev = bdev;
+       journal->j_private = sb;
+       EXT4_SB(sb)->s_journal_bdev = journal_bdev;
        ext4_init_journal_params(sb, journal);
        return journal;
  
  out_journal:
        jbd2_journal_destroy(journal);
  out_bdev:
-       blkdev_put(bdev, sb);
-       return NULL;
+       blkdev_put(journal_bdev, sb);
+       return ERR_PTR(errno);
  }
  
  static int ext4_load_journal(struct super_block *sb,
        }
  
        if (journal_inum) {
-               journal = ext4_get_journal(sb, journal_inum);
-               if (!journal)
-                       return -EINVAL;
+               journal = ext4_open_inode_journal(sb, journal_inum);
+               if (IS_ERR(journal))
+                       return PTR_ERR(journal);
        } else {
-               journal = ext4_get_dev_journal(sb, journal_dev);
-               if (!journal)
-                       return -EINVAL;
+               journal = ext4_open_dev_journal(sb, journal_dev);
+               if (IS_ERR(journal))
+                       return PTR_ERR(journal);
        }
  
        journal_dev_ro = bdev_read_only(journal->j_dev);
@@@ -6066,7 -6130,7 +6112,7 @@@ static void ext4_update_super(struct su
         * the clock is set in the future, and this will cause e2fsck
         * to complain and force a full file system check.
         */
-       if (!(sb->s_flags & SB_RDONLY))
+       if (!sb_rdonly(sb))
                ext4_update_tstamp(es, s_wtime);
        es->s_kbytes_written =
                cpu_to_le64(sbi->s_kbytes_written +
@@@ -6264,13 -6328,7 +6310,7 @@@ static int ext4_clear_journal_err(struc
   */
  int ext4_force_commit(struct super_block *sb)
  {
-       journal_t *journal;
-       if (sb_rdonly(sb))
-               return 0;
-       journal = EXT4_SB(sb)->s_journal;
-       return ext4_journal_force_commit(journal);
+       return ext4_journal_force_commit(EXT4_SB(sb)->s_journal);
  }
  
  static int ext4_sync_fs(struct super_block *sb, int wait)
        bool needs_barrier = false;
        struct ext4_sb_info *sbi = EXT4_SB(sb);
  
-       if (unlikely(ext4_forced_shutdown(sbi)))
+       if (unlikely(ext4_forced_shutdown(sb)))
                return 0;
  
        trace_ext4_sync_fs(sb, wait);
  static int ext4_freeze(struct super_block *sb)
  {
        int error = 0;
-       journal_t *journal;
-       if (sb_rdonly(sb))
-               return 0;
-       journal = EXT4_SB(sb)->s_journal;
+       journal_t *journal = EXT4_SB(sb)->s_journal;
  
        if (journal) {
                /* Now we set up the journal barrier. */
@@@ -6368,7 -6421,7 +6403,7 @@@ out
   */
  static int ext4_unfreeze(struct super_block *sb)
  {
-       if (sb_rdonly(sb) || ext4_forced_shutdown(EXT4_SB(sb)))
+       if (ext4_forced_shutdown(sb))
                return 0;
  
        if (EXT4_SB(sb)->s_journal) {
@@@ -6484,7 -6537,7 +6519,7 @@@ static int __ext4_remount(struct fs_con
                goto restore_opts;
        }
  
-       if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED))
+       if (test_opt2(sb, ABORT))
                ext4_abort(sb, ESHUTDOWN, "Abort forced by user");
  
        sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
        }
  
        /* Flush outstanding errors before changing fs state */
-       flush_work(&sbi->s_error_work);
+       flush_work(&sbi->s_sb_upd_work);
  
        if ((bool)(fc->sb_flags & SB_RDONLY) != sb_rdonly(sb)) {
-               if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED)) {
+               if (ext4_forced_shutdown(sb)) {
                        err = -EROFS;
                        goto restore_opts;
                }
@@@ -6662,7 -6715,7 +6697,7 @@@ restore_opts
         * If there was a failing r/w to ro transition, we may need to
         * re-enable quota
         */
-       if ((sb->s_flags & SB_RDONLY) && !(old_sb_flags & SB_RDONLY) &&
+       if (sb_rdonly(sb) && !(old_sb_flags & SB_RDONLY) &&
            sb_any_quota_suspended(sb))
                dquot_resume(sb, -1);
        sb->s_flags = old_sb_flags;
@@@ -7071,6 -7124,13 +7106,13 @@@ static int ext4_quota_off(struct super_
        err = dquot_quota_off(sb, type);
        if (err || ext4_has_feature_quota(sb))
                goto out_put;
+       /*
+        * When the filesystem was remounted read-only first, we cannot cleanup
+        * inode flags here. Bad luck but people should be using QUOTA feature
+        * these days anyway.
+        */
+       if (sb_rdonly(sb))
+               goto out_put;
  
        inode_lock(inode);
        /*
        }
        EXT4_I(inode)->i_flags &= ~(EXT4_NOATIME_FL | EXT4_IMMUTABLE_FL);
        inode_set_flags(inode, 0, S_NOATIME | S_IMMUTABLE);
 -      inode->i_mtime = inode->i_ctime = current_time(inode);
 +      inode->i_mtime = inode_set_ctime_current(inode);
        err = ext4_mark_inode_dirty(handle, inode);
        ext4_journal_stop(handle);
  out_unlock:
@@@ -7255,24 -7315,13 +7297,24 @@@ static inline int ext3_feature_set_ok(s
        return 1;
  }
  
 +static void ext4_kill_sb(struct super_block *sb)
 +{
 +      struct ext4_sb_info *sbi = EXT4_SB(sb);
 +      struct block_device *journal_bdev = sbi ? sbi->s_journal_bdev : NULL;
 +
 +      kill_block_super(sb);
 +
 +      if (journal_bdev)
 +              blkdev_put(journal_bdev, sb);
 +}
 +
  static struct file_system_type ext4_fs_type = {
        .owner                  = THIS_MODULE,
        .name                   = "ext4",
        .init_fs_context        = ext4_init_fs_context,
        .parameters             = ext4_param_specs,
 -      .kill_sb                = kill_block_super,
 -      .fs_flags               = FS_REQUIRES_DEV | FS_ALLOW_IDMAP,
 +      .kill_sb                = ext4_kill_sb,
 +      .fs_flags               = FS_REQUIRES_DEV | FS_ALLOW_IDMAP | FS_MGTIME,
  };
  MODULE_ALIAS_FS("ext4");
  
diff --combined fs/ext4/xattr.c
index 281e1bfbbe3ec0ad76833df9a12036bad2f58bf2,7cc502c06246edf09063572e2a588631ff17f435..92ba28cebac63d018efcb88110634fa35796fa50
@@@ -356,13 -356,13 +356,13 @@@ ext4_xattr_inode_hash(struct ext4_sb_in
  
  static u64 ext4_xattr_inode_get_ref(struct inode *ea_inode)
  {
 -      return ((u64)ea_inode->i_ctime.tv_sec << 32) |
 +      return ((u64) inode_get_ctime(ea_inode).tv_sec << 32) |
                (u32) inode_peek_iversion_raw(ea_inode);
  }
  
  static void ext4_xattr_inode_set_ref(struct inode *ea_inode, u64 ref_count)
  {
 -      ea_inode->i_ctime.tv_sec = (u32)(ref_count >> 32);
 +      inode_set_ctime(ea_inode, (u32)(ref_count >> 32), 0);
        inode_set_iversion_raw(ea_inode, ref_count & 0xffffffff);
  }
  
@@@ -701,7 -701,7 +701,7 @@@ ext4_xattr_get(struct inode *inode, in
  {
        int error;
  
-       if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+       if (unlikely(ext4_forced_shutdown(inode->i_sb)))
                return -EIO;
  
        if (strlen(name) > 255)
@@@ -2473,7 -2473,7 +2473,7 @@@ retry_inode
        }
        if (!error) {
                ext4_xattr_update_super_block(handle, inode->i_sb);
 -              inode->i_ctime = current_time(inode);
 +              inode_set_ctime_current(inode);
                inode_inc_iversion(inode);
                if (!value)
                        no_expand = 0;
diff --combined fs/jbd2/journal.c
index 1b5a45ab62b0d1e21826d34e1b92bb79e0ba7d2b,15e33c26c6cd7011fb137b4d426c224ddf85c159..768fa05bcbedeb7523a8ef76c8f1155ecaf2d7f3
@@@ -115,14 -115,6 +115,6 @@@ void __jbd2_debug(int level, const cha
  #endif
  
  /* Checksumming functions */
- static int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb)
- {
-       if (!jbd2_journal_has_csum_v2or3_feature(j))
-               return 1;
-       return sb->s_checksum_type == JBD2_CRC32C_CHKSUM;
- }
  static __be32 jbd2_superblock_csum(journal_t *j, journal_superblock_t *sb)
  {
        __u32 csum;
@@@ -341,7 -333,7 +333,7 @@@ int jbd2_journal_write_metadata_buffer(
        int do_escape = 0;
        char *mapped_data;
        struct buffer_head *new_bh;
 -      struct page *new_page;
 +      struct folio *new_folio;
        unsigned int new_offset;
        struct buffer_head *bh_in = jh2bh(jh_in);
        journal_t *journal = transaction->t_journal;
@@@ -370,14 -362,14 +362,14 @@@ repeat
         */
        if (jh_in->b_frozen_data) {
                done_copy_out = 1;
 -              new_page = virt_to_page(jh_in->b_frozen_data);
 -              new_offset = offset_in_page(jh_in->b_frozen_data);
 +              new_folio = virt_to_folio(jh_in->b_frozen_data);
 +              new_offset = offset_in_folio(new_folio, jh_in->b_frozen_data);
        } else {
 -              new_page = jh2bh(jh_in)->b_page;
 -              new_offset = offset_in_page(jh2bh(jh_in)->b_data);
 +              new_folio = jh2bh(jh_in)->b_folio;
 +              new_offset = offset_in_folio(new_folio, jh2bh(jh_in)->b_data);
        }
  
 -      mapped_data = kmap_atomic(new_page);
 +      mapped_data = kmap_local_folio(new_folio, new_offset);
        /*
         * Fire data frozen trigger if data already wasn't frozen.  Do this
         * before checking for escaping, as the trigger may modify the magic
         * data in the buffer.
         */
        if (!done_copy_out)
 -              jbd2_buffer_frozen_trigger(jh_in, mapped_data + new_offset,
 +              jbd2_buffer_frozen_trigger(jh_in, mapped_data,
                                           jh_in->b_triggers);
  
        /*
         * Check for escaping
         */
 -      if (*((__be32 *)(mapped_data + new_offset)) ==
 -                              cpu_to_be32(JBD2_MAGIC_NUMBER)) {
 +      if (*((__be32 *)mapped_data) == cpu_to_be32(JBD2_MAGIC_NUMBER)) {
                need_copy_out = 1;
                do_escape = 1;
        }
 -      kunmap_atomic(mapped_data);
 +      kunmap_local(mapped_data);
  
        /*
         * Do we need to do a data copy?
                }
  
                jh_in->b_frozen_data = tmp;
 -              mapped_data = kmap_atomic(new_page);
 -              memcpy(tmp, mapped_data + new_offset, bh_in->b_size);
 -              kunmap_atomic(mapped_data);
 +              memcpy_from_folio(tmp, new_folio, new_offset, bh_in->b_size);
  
 -              new_page = virt_to_page(tmp);
 -              new_offset = offset_in_page(tmp);
 +              new_folio = virt_to_folio(tmp);
 +              new_offset = offset_in_folio(new_folio, tmp);
                done_copy_out = 1;
  
                /*
         * copying, we can finally do so.
         */
        if (do_escape) {
 -              mapped_data = kmap_atomic(new_page);
 -              *((unsigned int *)(mapped_data + new_offset)) = 0;
 -              kunmap_atomic(mapped_data);
 +              mapped_data = kmap_local_folio(new_folio, new_offset);
 +              *((unsigned int *)mapped_data) = 0;
 +              kunmap_local(mapped_data);
        }
  
 -      set_bh_page(new_bh, new_page, new_offset);
 +      folio_set_bh(new_bh, new_folio, new_offset);
        new_bh->b_size = bh_in->b_size;
        new_bh->b_bdev = journal->j_dev;
        new_bh->b_blocknr = blocknr;
@@@ -1333,6 -1328,189 +1325,189 @@@ static unsigned long jbd2_journal_shrin
        return count;
  }
  
+ /*
+  * If the journal init or create aborts, we need to mark the journal
+  * superblock as being NULL to prevent the journal destroy from writing
+  * back a bogus superblock.
+  */
+ static void journal_fail_superblock(journal_t *journal)
+ {
+       struct buffer_head *bh = journal->j_sb_buffer;
+       brelse(bh);
+       journal->j_sb_buffer = NULL;
+ }
+ /*
+  * Check the superblock for a given journal, performing initial
+  * validation of the format.
+  */
+ static int journal_check_superblock(journal_t *journal)
+ {
+       journal_superblock_t *sb = journal->j_superblock;
+       int num_fc_blks;
+       int err = -EINVAL;
+       if (sb->s_header.h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER) ||
+           sb->s_blocksize != cpu_to_be32(journal->j_blocksize)) {
+               printk(KERN_WARNING "JBD2: no valid journal superblock found\n");
+               return err;
+       }
+       if (be32_to_cpu(sb->s_header.h_blocktype) != JBD2_SUPERBLOCK_V1 &&
+           be32_to_cpu(sb->s_header.h_blocktype) != JBD2_SUPERBLOCK_V2) {
+               printk(KERN_WARNING "JBD2: unrecognised superblock format ID\n");
+               return err;
+       }
+       if (be32_to_cpu(sb->s_maxlen) > journal->j_total_len) {
+               printk(KERN_WARNING "JBD2: journal file too short\n");
+               return err;
+       }
+       if (be32_to_cpu(sb->s_first) == 0 ||
+           be32_to_cpu(sb->s_first) >= journal->j_total_len) {
+               printk(KERN_WARNING
+                       "JBD2: Invalid start block of journal: %u\n",
+                       be32_to_cpu(sb->s_first));
+               return err;
+       }
+       /*
+        * If this is a V2 superblock, then we have to check the
+        * features flags on it.
+        */
+       if (!jbd2_format_support_feature(journal))
+               return 0;
+       if ((sb->s_feature_ro_compat &
+                       ~cpu_to_be32(JBD2_KNOWN_ROCOMPAT_FEATURES)) ||
+           (sb->s_feature_incompat &
+                       ~cpu_to_be32(JBD2_KNOWN_INCOMPAT_FEATURES))) {
+               printk(KERN_WARNING "JBD2: Unrecognised features on journal\n");
+               return err;
+       }
+       num_fc_blks = jbd2_has_feature_fast_commit(journal) ?
+                               jbd2_journal_get_num_fc_blks(sb) : 0;
+       if (be32_to_cpu(sb->s_maxlen) < JBD2_MIN_JOURNAL_BLOCKS ||
+           be32_to_cpu(sb->s_maxlen) - JBD2_MIN_JOURNAL_BLOCKS < num_fc_blks) {
+               printk(KERN_ERR "JBD2: journal file too short %u,%d\n",
+                      be32_to_cpu(sb->s_maxlen), num_fc_blks);
+               return err;
+       }
+       if (jbd2_has_feature_csum2(journal) &&
+           jbd2_has_feature_csum3(journal)) {
+               /* Can't have checksum v2 and v3 at the same time! */
+               printk(KERN_ERR "JBD2: Can't enable checksumming v2 and v3 "
+                      "at the same time!\n");
+               return err;
+       }
+       if (jbd2_journal_has_csum_v2or3_feature(journal) &&
+           jbd2_has_feature_checksum(journal)) {
+               /* Can't have checksum v1 and v2 on at the same time! */
+               printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2/3 "
+                      "at the same time!\n");
+               return err;
+       }
+       /* Load the checksum driver */
+       if (jbd2_journal_has_csum_v2or3_feature(journal)) {
+               if (sb->s_checksum_type != JBD2_CRC32C_CHKSUM) {
+                       printk(KERN_ERR "JBD2: Unknown checksum type\n");
+                       return err;
+               }
+               journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
+               if (IS_ERR(journal->j_chksum_driver)) {
+                       printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n");
+                       err = PTR_ERR(journal->j_chksum_driver);
+                       journal->j_chksum_driver = NULL;
+                       return err;
+               }
+               /* Check superblock checksum */
+               if (sb->s_checksum != jbd2_superblock_csum(journal, sb)) {
+                       printk(KERN_ERR "JBD2: journal checksum error\n");
+                       err = -EFSBADCRC;
+                       return err;
+               }
+       }
+       return 0;
+ }
+ static int journal_revoke_records_per_block(journal_t *journal)
+ {
+       int record_size;
+       int space = journal->j_blocksize - sizeof(jbd2_journal_revoke_header_t);
+       if (jbd2_has_feature_64bit(journal))
+               record_size = 8;
+       else
+               record_size = 4;
+       if (jbd2_journal_has_csum_v2or3(journal))
+               space -= sizeof(struct jbd2_journal_block_tail);
+       return space / record_size;
+ }
+ /*
+  * Load the on-disk journal superblock and read the key fields into the
+  * journal_t.
+  */
+ static int journal_load_superblock(journal_t *journal)
+ {
+       int err;
+       struct buffer_head *bh;
+       journal_superblock_t *sb;
+       bh = getblk_unmovable(journal->j_dev, journal->j_blk_offset,
+                             journal->j_blocksize);
+       if (bh)
+               err = bh_read(bh, 0);
+       if (!bh || err < 0) {
+               pr_err("%s: Cannot read journal superblock\n", __func__);
+               brelse(bh);
+               return -EIO;
+       }
+       journal->j_sb_buffer = bh;
+       sb = (journal_superblock_t *)bh->b_data;
+       journal->j_superblock = sb;
+       err = journal_check_superblock(journal);
+       if (err) {
+               journal_fail_superblock(journal);
+               return err;
+       }
+       journal->j_tail_sequence = be32_to_cpu(sb->s_sequence);
+       journal->j_tail = be32_to_cpu(sb->s_start);
+       journal->j_first = be32_to_cpu(sb->s_first);
+       journal->j_errno = be32_to_cpu(sb->s_errno);
+       journal->j_last = be32_to_cpu(sb->s_maxlen);
+       if (be32_to_cpu(sb->s_maxlen) < journal->j_total_len)
+               journal->j_total_len = be32_to_cpu(sb->s_maxlen);
+       /* Precompute checksum seed for all metadata */
+       if (jbd2_journal_has_csum_v2or3(journal))
+               journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid,
+                                                  sizeof(sb->s_uuid));
+       journal->j_revoke_records_per_block =
+                               journal_revoke_records_per_block(journal);
+       if (jbd2_has_feature_fast_commit(journal)) {
+               journal->j_fc_last = be32_to_cpu(sb->s_maxlen);
+               journal->j_last = journal->j_fc_last -
+                                 jbd2_journal_get_num_fc_blks(sb);
+               journal->j_fc_first = journal->j_last + 1;
+               journal->j_fc_off = 0;
+       }
+       return 0;
+ }
  /*
   * Management for journal control blocks: functions to create and
   * destroy journal_t structures, and to initialise and read existing
@@@ -1349,12 -1527,21 +1524,21 @@@ static journal_t *journal_init_common(s
        static struct lock_class_key jbd2_trans_commit_key;
        journal_t *journal;
        int err;
-       struct buffer_head *bh;
        int n;
  
        journal = kzalloc(sizeof(*journal), GFP_KERNEL);
        if (!journal)
-               return NULL;
+               return ERR_PTR(-ENOMEM);
+       journal->j_blocksize = blocksize;
+       journal->j_dev = bdev;
+       journal->j_fs_dev = fs_dev;
+       journal->j_blk_offset = start;
+       journal->j_total_len = len;
+       err = journal_load_superblock(journal);
+       if (err)
+               goto err_cleanup;
  
        init_waitqueue_head(&journal->j_wait_transaction_locked);
        init_waitqueue_head(&journal->j_wait_done_commit);
        mutex_init(&journal->j_checkpoint_mutex);
        spin_lock_init(&journal->j_revoke_lock);
        spin_lock_init(&journal->j_list_lock);
+       spin_lock_init(&journal->j_history_lock);
        rwlock_init(&journal->j_state_lock);
  
        journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE);
        journal->j_min_batch_time = 0;
        journal->j_max_batch_time = 15000; /* 15ms */
        atomic_set(&journal->j_reserved_credits, 0);
+       lockdep_init_map(&journal->j_trans_commit_map, "jbd2_handle",
+                        &jbd2_trans_commit_key, 0);
  
        /* The journal is marked for error until we succeed with recovery! */
        journal->j_flags = JBD2_ABORT;
        if (err)
                goto err_cleanup;
  
-       spin_lock_init(&journal->j_history_lock);
-       lockdep_init_map(&journal->j_trans_commit_map, "jbd2_handle",
-                        &jbd2_trans_commit_key, 0);
-       /* journal descriptor can store up to n blocks -bzzz */
-       journal->j_blocksize = blocksize;
-       journal->j_dev = bdev;
-       journal->j_fs_dev = fs_dev;
-       journal->j_blk_offset = start;
-       journal->j_total_len = len;
-       /* We need enough buffers to write out full descriptor block. */
+       /*
+        * journal descriptor can store up to n blocks, we need enough
+        * buffers to write out full descriptor block.
+        */
+       err = -ENOMEM;
        n = journal->j_blocksize / jbd2_min_tag_size();
        journal->j_wbufsize = n;
        journal->j_fc_wbuf = NULL;
        if (!journal->j_wbuf)
                goto err_cleanup;
  
-       bh = getblk_unmovable(journal->j_dev, start, journal->j_blocksize);
-       if (!bh) {
-               pr_err("%s: Cannot get buffer for journal superblock\n",
-                       __func__);
+       err = percpu_counter_init(&journal->j_checkpoint_jh_count, 0,
+                                 GFP_KERNEL);
+       if (err)
                goto err_cleanup;
-       }
-       journal->j_sb_buffer = bh;
-       journal->j_superblock = (journal_superblock_t *)bh->b_data;
  
        journal->j_shrink_transaction = NULL;
        journal->j_shrinker.scan_objects = jbd2_journal_shrink_scan;
        journal->j_shrinker.count_objects = jbd2_journal_shrink_count;
        journal->j_shrinker.seeks = DEFAULT_SEEKS;
        journal->j_shrinker.batch = journal->j_max_transaction_buffers;
-       if (percpu_counter_init(&journal->j_checkpoint_jh_count, 0, GFP_KERNEL))
+       err = register_shrinker(&journal->j_shrinker, "jbd2-journal:(%u:%u)",
+                               MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev));
+       if (err)
                goto err_cleanup;
  
-       if (register_shrinker(&journal->j_shrinker, "jbd2-journal:(%u:%u)",
-                             MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev))) {
-               percpu_counter_destroy(&journal->j_checkpoint_jh_count);
-               goto err_cleanup;
-       }
        return journal;
  
  err_cleanup:
-       brelse(journal->j_sb_buffer);
+       percpu_counter_destroy(&journal->j_checkpoint_jh_count);
        kfree(journal->j_wbuf);
        jbd2_journal_destroy_revoke(journal);
+       journal_fail_superblock(journal);
        kfree(journal);
-       return NULL;
+       return ERR_PTR(err);
  }
  
  /* jbd2_journal_init_dev and jbd2_journal_init_inode:
@@@ -1465,8 -1641,8 +1638,8 @@@ journal_t *jbd2_journal_init_dev(struc
        journal_t *journal;
  
        journal = journal_init_common(bdev, fs_dev, start, len, blocksize);
-       if (!journal)
-               return NULL;
+       if (IS_ERR(journal))
+               return ERR_CAST(journal);
  
        snprintf(journal->j_devname, sizeof(journal->j_devname),
                 "%pg", journal->j_dev);
@@@ -1492,11 -1668,9 +1665,9 @@@ journal_t *jbd2_journal_init_inode(stru
  
        blocknr = 0;
        err = bmap(inode, &blocknr);
        if (err || !blocknr) {
-               pr_err("%s: Cannot locate journal superblock\n",
-                       __func__);
-               return NULL;
+               pr_err("%s: Cannot locate journal superblock\n", __func__);
+               return err ? ERR_PTR(err) : ERR_PTR(-EINVAL);
        }
  
        jbd2_debug(1, "JBD2: inode %s/%ld, size %lld, bits %d, blksize %ld\n",
        journal = journal_init_common(inode->i_sb->s_bdev, inode->i_sb->s_bdev,
                        blocknr, inode->i_size >> inode->i_sb->s_blocksize_bits,
                        inode->i_sb->s_blocksize);
-       if (!journal)
-               return NULL;
+       if (IS_ERR(journal))
+               return ERR_CAST(journal);
  
        journal->j_inode = inode;
        snprintf(journal->j_devname, sizeof(journal->j_devname),
        return journal;
  }
  
- /*
-  * If the journal init or create aborts, we need to mark the journal
-  * superblock as being NULL to prevent the journal destroy from writing
-  * back a bogus superblock.
-  */
- static void journal_fail_superblock(journal_t *journal)
- {
-       struct buffer_head *bh = journal->j_sb_buffer;
-       brelse(bh);
-       journal->j_sb_buffer = NULL;
- }
  /*
   * Given a journal_t structure, initialise the various fields for
   * startup of a new journaling session.  We use this both when creating
@@@ -1886,163 -2048,6 +2045,6 @@@ void jbd2_journal_update_sb_errno(journ
  }
  EXPORT_SYMBOL(jbd2_journal_update_sb_errno);
  
- static int journal_revoke_records_per_block(journal_t *journal)
- {
-       int record_size;
-       int space = journal->j_blocksize - sizeof(jbd2_journal_revoke_header_t);
-       if (jbd2_has_feature_64bit(journal))
-               record_size = 8;
-       else
-               record_size = 4;
-       if (jbd2_journal_has_csum_v2or3(journal))
-               space -= sizeof(struct jbd2_journal_block_tail);
-       return space / record_size;
- }
- /*
-  * Read the superblock for a given journal, performing initial
-  * validation of the format.
-  */
- static int journal_get_superblock(journal_t *journal)
- {
-       struct buffer_head *bh;
-       journal_superblock_t *sb;
-       int err;
-       bh = journal->j_sb_buffer;
-       J_ASSERT(bh != NULL);
-       if (buffer_verified(bh))
-               return 0;
-       err = bh_read(bh, 0);
-       if (err < 0) {
-               printk(KERN_ERR
-                       "JBD2: IO error reading journal superblock\n");
-               goto out;
-       }
-       sb = journal->j_superblock;
-       err = -EINVAL;
-       if (sb->s_header.h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER) ||
-           sb->s_blocksize != cpu_to_be32(journal->j_blocksize)) {
-               printk(KERN_WARNING "JBD2: no valid journal superblock found\n");
-               goto out;
-       }
-       if (be32_to_cpu(sb->s_header.h_blocktype) != JBD2_SUPERBLOCK_V1 &&
-           be32_to_cpu(sb->s_header.h_blocktype) != JBD2_SUPERBLOCK_V2) {
-               printk(KERN_WARNING "JBD2: unrecognised superblock format ID\n");
-               goto out;
-       }
-       if (be32_to_cpu(sb->s_maxlen) > journal->j_total_len) {
-               printk(KERN_WARNING "JBD2: journal file too short\n");
-               goto out;
-       }
-       if (be32_to_cpu(sb->s_first) == 0 ||
-           be32_to_cpu(sb->s_first) >= journal->j_total_len) {
-               printk(KERN_WARNING
-                       "JBD2: Invalid start block of journal: %u\n",
-                       be32_to_cpu(sb->s_first));
-               goto out;
-       }
-       if (jbd2_has_feature_csum2(journal) &&
-           jbd2_has_feature_csum3(journal)) {
-               /* Can't have checksum v2 and v3 at the same time! */
-               printk(KERN_ERR "JBD2: Can't enable checksumming v2 and v3 "
-                      "at the same time!\n");
-               goto out;
-       }
-       if (jbd2_journal_has_csum_v2or3_feature(journal) &&
-           jbd2_has_feature_checksum(journal)) {
-               /* Can't have checksum v1 and v2 on at the same time! */
-               printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2/3 "
-                      "at the same time!\n");
-               goto out;
-       }
-       if (!jbd2_verify_csum_type(journal, sb)) {
-               printk(KERN_ERR "JBD2: Unknown checksum type\n");
-               goto out;
-       }
-       /* Load the checksum driver */
-       if (jbd2_journal_has_csum_v2or3_feature(journal)) {
-               journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
-               if (IS_ERR(journal->j_chksum_driver)) {
-                       printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n");
-                       err = PTR_ERR(journal->j_chksum_driver);
-                       journal->j_chksum_driver = NULL;
-                       goto out;
-               }
-               /* Check superblock checksum */
-               if (sb->s_checksum != jbd2_superblock_csum(journal, sb)) {
-                       printk(KERN_ERR "JBD2: journal checksum error\n");
-                       err = -EFSBADCRC;
-                       goto out;
-               }
-       }
-       set_buffer_verified(bh);
-       return 0;
- out:
-       journal_fail_superblock(journal);
-       return err;
- }
- /*
-  * Load the on-disk journal superblock and read the key fields into the
-  * journal_t.
-  */
- static int load_superblock(journal_t *journal)
- {
-       int err;
-       journal_superblock_t *sb;
-       int num_fc_blocks;
-       err = journal_get_superblock(journal);
-       if (err)
-               return err;
-       sb = journal->j_superblock;
-       journal->j_tail_sequence = be32_to_cpu(sb->s_sequence);
-       journal->j_tail = be32_to_cpu(sb->s_start);
-       journal->j_first = be32_to_cpu(sb->s_first);
-       journal->j_errno = be32_to_cpu(sb->s_errno);
-       journal->j_last = be32_to_cpu(sb->s_maxlen);
-       if (be32_to_cpu(sb->s_maxlen) < journal->j_total_len)
-               journal->j_total_len = be32_to_cpu(sb->s_maxlen);
-       /* Precompute checksum seed for all metadata */
-       if (jbd2_journal_has_csum_v2or3(journal))
-               journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid,
-                                                  sizeof(sb->s_uuid));
-       journal->j_revoke_records_per_block =
-                               journal_revoke_records_per_block(journal);
-       if (jbd2_has_feature_fast_commit(journal)) {
-               journal->j_fc_last = be32_to_cpu(sb->s_maxlen);
-               num_fc_blocks = jbd2_journal_get_num_fc_blks(sb);
-               if (journal->j_last - num_fc_blocks >= JBD2_MIN_JOURNAL_BLOCKS)
-                       journal->j_last = journal->j_fc_last - num_fc_blocks;
-               journal->j_fc_first = journal->j_last + 1;
-               journal->j_fc_off = 0;
-       }
-       return 0;
- }
  /**
   * jbd2_journal_load() - Read journal from disk.
   * @journal: Journal to act on.
  int jbd2_journal_load(journal_t *journal)
  {
        int err;
-       journal_superblock_t *sb;
-       err = load_superblock(journal);
-       if (err)
-               return err;
-       sb = journal->j_superblock;
-       /*
-        * If this is a V2 superblock, then we have to check the
-        * features flags on it.
-        */
-       if (jbd2_format_support_feature(journal)) {
-               if ((sb->s_feature_ro_compat &
-                    ~cpu_to_be32(JBD2_KNOWN_ROCOMPAT_FEATURES)) ||
-                   (sb->s_feature_incompat &
-                    ~cpu_to_be32(JBD2_KNOWN_INCOMPAT_FEATURES))) {
-                       printk(KERN_WARNING
-                               "JBD2: Unrecognised features on journal\n");
-                       return -EINVAL;
-               }
-       }
+       journal_superblock_t *sb = journal->j_superblock;
  
        /*
         * Create a slab for this blocksize
  
        /* Let the recovery code check whether it needs to recover any
         * data from the journal. */
-       if (jbd2_journal_recover(journal))
-               goto recovery_error;
+       err = jbd2_journal_recover(journal);
+       if (err) {
+               pr_warn("JBD2: journal recovery failed\n");
+               return err;
+       }
  
        if (journal->j_failed_commit) {
                printk(KERN_ERR "JBD2: journal transaction %u on %s "
        /* OK, we've finished with the dynamic journal bits:
         * reinitialise the dynamic contents of the superblock in memory
         * and reset them on disk. */
-       if (journal_reset(journal))
-               goto recovery_error;
+       err = journal_reset(journal);
+       if (err) {
+               pr_warn("JBD2: journal reset failed\n");
+               return err;
+       }
  
        journal->j_flags |= JBD2_LOADED;
        return 0;
- recovery_error:
-       printk(KERN_WARNING "JBD2: recovery failed\n");
-       return -EIO;
  }
  
  /**
@@@ -2224,8 -2210,6 +2207,6 @@@ int jbd2_journal_check_used_features(jo
  
        if (!compat && !ro && !incompat)
                return 1;
-       if (journal_get_superblock(journal))
-               return 0;
        if (!jbd2_format_support_feature(journal))
                return 0;
  
  
  int jbd2_journal_wipe(journal_t *journal, int write)
  {
-       int err = 0;
+       int err;
  
        J_ASSERT (!(journal->j_flags & JBD2_LOADED));
  
-       err = load_superblock(journal);
-       if (err)
-               return err;
        if (!journal->j_tail)
-               goto no_recovery;
+               return 0;
  
        printk(KERN_WARNING "JBD2: %s recovery information on journal\n",
                write ? "Clearing" : "Ignoring");
                mutex_unlock(&journal->j_checkpoint_mutex);
        }
  
-  no_recovery:
        return err;
  }
  
diff --combined fs/libfs.c
index da78eb64831eca0e83b070a7be5bf18536e6ea61,5197ea8c66d35af6bd5cff196842fd7109a4db5b..a4eb1275788627161d1a1f0d5f5b6bc3557a5df1
@@@ -33,7 -33,7 +33,7 @@@ int simple_getattr(struct mnt_idmap *id
                   unsigned int query_flags)
  {
        struct inode *inode = d_inode(path->dentry);
 -      generic_fillattr(&nop_mnt_idmap, inode, stat);
 +      generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
        stat->blocks = inode->i_mapping->nrpages << (PAGE_SHIFT - 9);
        return 0;
  }
@@@ -239,254 -239,6 +239,254 @@@ const struct inode_operations simple_di
  };
  EXPORT_SYMBOL(simple_dir_inode_operations);
  
 +static void offset_set(struct dentry *dentry, u32 offset)
 +{
 +      dentry->d_fsdata = (void *)((uintptr_t)(offset));
 +}
 +
 +static u32 dentry2offset(struct dentry *dentry)
 +{
 +      return (u32)((uintptr_t)(dentry->d_fsdata));
 +}
 +
 +static struct lock_class_key simple_offset_xa_lock;
 +
 +/**
 + * simple_offset_init - initialize an offset_ctx
 + * @octx: directory offset map to be initialized
 + *
 + */
 +void simple_offset_init(struct offset_ctx *octx)
 +{
 +      xa_init_flags(&octx->xa, XA_FLAGS_ALLOC1);
 +      lockdep_set_class(&octx->xa.xa_lock, &simple_offset_xa_lock);
 +
 +      /* 0 is '.', 1 is '..', so always start with offset 2 */
 +      octx->next_offset = 2;
 +}
 +
 +/**
 + * simple_offset_add - Add an entry to a directory's offset map
 + * @octx: directory offset ctx to be updated
 + * @dentry: new dentry being added
 + *
 + * Returns zero on success. @so_ctx and the dentry offset are updated.
 + * Otherwise, a negative errno value is returned.
 + */
 +int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry)
 +{
 +      static const struct xa_limit limit = XA_LIMIT(2, U32_MAX);
 +      u32 offset;
 +      int ret;
 +
 +      if (dentry2offset(dentry) != 0)
 +              return -EBUSY;
 +
 +      ret = xa_alloc_cyclic(&octx->xa, &offset, dentry, limit,
 +                            &octx->next_offset, GFP_KERNEL);
 +      if (ret < 0)
 +              return ret;
 +
 +      offset_set(dentry, offset);
 +      return 0;
 +}
 +
 +/**
 + * simple_offset_remove - Remove an entry to a directory's offset map
 + * @octx: directory offset ctx to be updated
 + * @dentry: dentry being removed
 + *
 + */
 +void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry)
 +{
 +      u32 offset;
 +
 +      offset = dentry2offset(dentry);
 +      if (offset == 0)
 +              return;
 +
 +      xa_erase(&octx->xa, offset);
 +      offset_set(dentry, 0);
 +}
 +
 +/**
 + * simple_offset_rename_exchange - exchange rename with directory offsets
 + * @old_dir: parent of dentry being moved
 + * @old_dentry: dentry being moved
 + * @new_dir: destination parent
 + * @new_dentry: destination dentry
 + *
 + * Returns zero on success. Otherwise a negative errno is returned and the
 + * rename is rolled back.
 + */
 +int simple_offset_rename_exchange(struct inode *old_dir,
 +                                struct dentry *old_dentry,
 +                                struct inode *new_dir,
 +                                struct dentry *new_dentry)
 +{
 +      struct offset_ctx *old_ctx = old_dir->i_op->get_offset_ctx(old_dir);
 +      struct offset_ctx *new_ctx = new_dir->i_op->get_offset_ctx(new_dir);
 +      u32 old_index = dentry2offset(old_dentry);
 +      u32 new_index = dentry2offset(new_dentry);
 +      int ret;
 +
 +      simple_offset_remove(old_ctx, old_dentry);
 +      simple_offset_remove(new_ctx, new_dentry);
 +
 +      ret = simple_offset_add(new_ctx, old_dentry);
 +      if (ret)
 +              goto out_restore;
 +
 +      ret = simple_offset_add(old_ctx, new_dentry);
 +      if (ret) {
 +              simple_offset_remove(new_ctx, old_dentry);
 +              goto out_restore;
 +      }
 +
 +      ret = simple_rename_exchange(old_dir, old_dentry, new_dir, new_dentry);
 +      if (ret) {
 +              simple_offset_remove(new_ctx, old_dentry);
 +              simple_offset_remove(old_ctx, new_dentry);
 +              goto out_restore;
 +      }
 +      return 0;
 +
 +out_restore:
 +      offset_set(old_dentry, old_index);
 +      xa_store(&old_ctx->xa, old_index, old_dentry, GFP_KERNEL);
 +      offset_set(new_dentry, new_index);
 +      xa_store(&new_ctx->xa, new_index, new_dentry, GFP_KERNEL);
 +      return ret;
 +}
 +
 +/**
 + * simple_offset_destroy - Release offset map
 + * @octx: directory offset ctx that is about to be destroyed
 + *
 + * During fs teardown (eg. umount), a directory's offset map might still
 + * contain entries. xa_destroy() cleans out anything that remains.
 + */
 +void simple_offset_destroy(struct offset_ctx *octx)
 +{
 +      xa_destroy(&octx->xa);
 +}
 +
 +/**
 + * offset_dir_llseek - Advance the read position of a directory descriptor
 + * @file: an open directory whose position is to be updated
 + * @offset: a byte offset
 + * @whence: enumerator describing the starting position for this update
 + *
 + * SEEK_END, SEEK_DATA, and SEEK_HOLE are not supported for directories.
 + *
 + * Returns the updated read position if successful; otherwise a
 + * negative errno is returned and the read position remains unchanged.
 + */
 +static loff_t offset_dir_llseek(struct file *file, loff_t offset, int whence)
 +{
 +      switch (whence) {
 +      case SEEK_CUR:
 +              offset += file->f_pos;
 +              fallthrough;
 +      case SEEK_SET:
 +              if (offset >= 0)
 +                      break;
 +              fallthrough;
 +      default:
 +              return -EINVAL;
 +      }
 +
 +      return vfs_setpos(file, offset, U32_MAX);
 +}
 +
 +static struct dentry *offset_find_next(struct xa_state *xas)
 +{
 +      struct dentry *child, *found = NULL;
 +
 +      rcu_read_lock();
 +      child = xas_next_entry(xas, U32_MAX);
 +      if (!child)
 +              goto out;
 +      spin_lock(&child->d_lock);
 +      if (simple_positive(child))
 +              found = dget_dlock(child);
 +      spin_unlock(&child->d_lock);
 +out:
 +      rcu_read_unlock();
 +      return found;
 +}
 +
 +static bool offset_dir_emit(struct dir_context *ctx, struct dentry *dentry)
 +{
 +      u32 offset = dentry2offset(dentry);
 +      struct inode *inode = d_inode(dentry);
 +
 +      return ctx->actor(ctx, dentry->d_name.name, dentry->d_name.len, offset,
 +                        inode->i_ino, fs_umode_to_dtype(inode->i_mode));
 +}
 +
 +static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
 +{
 +      struct offset_ctx *so_ctx = inode->i_op->get_offset_ctx(inode);
 +      XA_STATE(xas, &so_ctx->xa, ctx->pos);
 +      struct dentry *dentry;
 +
 +      while (true) {
 +              dentry = offset_find_next(&xas);
 +              if (!dentry)
 +                      break;
 +
 +              if (!offset_dir_emit(ctx, dentry)) {
 +                      dput(dentry);
 +                      break;
 +              }
 +
 +              dput(dentry);
 +              ctx->pos = xas.xa_index + 1;
 +      }
 +}
 +
 +/**
 + * offset_readdir - Emit entries starting at offset @ctx->pos
 + * @file: an open directory to iterate over
 + * @ctx: directory iteration context
 + *
 + * Caller must hold @file's i_rwsem to prevent insertion or removal of
 + * entries during this call.
 + *
 + * On entry, @ctx->pos contains an offset that represents the first entry
 + * to be read from the directory.
 + *
 + * The operation continues until there are no more entries to read, or
 + * until the ctx->actor indicates there is no more space in the caller's
 + * output buffer.
 + *
 + * On return, @ctx->pos contains an offset that will read the next entry
 + * in this directory when offset_readdir() is called again with @ctx.
 + *
 + * Return values:
 + *   %0 - Complete
 + */
 +static int offset_readdir(struct file *file, struct dir_context *ctx)
 +{
 +      struct dentry *dir = file->f_path.dentry;
 +
 +      lockdep_assert_held(&d_inode(dir)->i_rwsem);
 +
 +      if (!dir_emit_dots(file, ctx))
 +              return 0;
 +
 +      offset_iterate_dir(d_inode(dir), ctx);
 +      return 0;
 +}
 +
 +const struct file_operations simple_offset_dir_operations = {
 +      .llseek         = offset_dir_llseek,
 +      .iterate_shared = offset_readdir,
 +      .read           = generic_read_dir,
 +      .fsync          = noop_fsync,
 +};
 +
  static struct dentry *find_next_child(struct dentry *parent, struct dentry *prev)
  {
        struct dentry *child = NULL;
@@@ -523,7 -275,7 +523,7 @@@ void simple_recursive_removal(struct de
                while ((child = find_next_child(this, victim)) == NULL) {
                        // kill and ascend
                        // update metadata while it's still locked
 -                      inode->i_ctime = current_time(inode);
 +                      inode_set_ctime_current(inode);
                        clear_nlink(inode);
                        inode_unlock(inode);
                        victim = this;
                                dput(victim);           // unpin it
                        }
                        if (victim == dentry) {
 -                              inode->i_ctime = inode->i_mtime =
 -                                      current_time(inode);
 +                              inode->i_mtime = inode_set_ctime_current(inode);
                                if (d_is_dir(dentry))
                                        drop_nlink(inode);
                                inode_unlock(inode);
@@@ -582,7 -335,7 +582,7 @@@ static int pseudo_fs_fill_super(struct 
         */
        root->i_ino = 1;
        root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR;
 -      root->i_atime = root->i_mtime = root->i_ctime = current_time(root);
 +      root->i_atime = root->i_mtime = inode_set_ctime_current(root);
        s->s_root = d_make_root(root);
        if (!s->s_root)
                return -ENOMEM;
@@@ -638,8 -391,7 +638,8 @@@ int simple_link(struct dentry *old_dent
  {
        struct inode *inode = d_inode(old_dentry);
  
 -      inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode);
 +      dir->i_mtime = inode_set_ctime_to_ts(dir,
 +                                           inode_set_ctime_current(inode));
        inc_nlink(inode);
        ihold(inode);
        dget(dentry);
@@@ -673,8 -425,7 +673,8 @@@ int simple_unlink(struct inode *dir, st
  {
        struct inode *inode = d_inode(dentry);
  
 -      inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode);
 +      dir->i_mtime = inode_set_ctime_to_ts(dir,
 +                                           inode_set_ctime_current(inode));
        drop_nlink(inode);
        dput(dentry);
        return 0;
@@@ -693,31 -444,6 +693,31 @@@ int simple_rmdir(struct inode *dir, str
  }
  EXPORT_SYMBOL(simple_rmdir);
  
 +/**
 + * simple_rename_timestamp - update the various inode timestamps for rename
 + * @old_dir: old parent directory
 + * @old_dentry: dentry that is being renamed
 + * @new_dir: new parent directory
 + * @new_dentry: target for rename
 + *
 + * POSIX mandates that the old and new parent directories have their ctime and
 + * mtime updated, and that inodes of @old_dentry and @new_dentry (if any), have
 + * their ctime updated.
 + */
 +void simple_rename_timestamp(struct inode *old_dir, struct dentry *old_dentry,
 +                           struct inode *new_dir, struct dentry *new_dentry)
 +{
 +      struct inode *newino = d_inode(new_dentry);
 +
 +      old_dir->i_mtime = inode_set_ctime_current(old_dir);
 +      if (new_dir != old_dir)
 +              new_dir->i_mtime = inode_set_ctime_current(new_dir);
 +      inode_set_ctime_current(d_inode(old_dentry));
 +      if (newino)
 +              inode_set_ctime_current(newino);
 +}
 +EXPORT_SYMBOL_GPL(simple_rename_timestamp);
 +
  int simple_rename_exchange(struct inode *old_dir, struct dentry *old_dentry,
                           struct inode *new_dir, struct dentry *new_dentry)
  {
                        inc_nlink(old_dir);
                }
        }
 -      old_dir->i_ctime = old_dir->i_mtime =
 -      new_dir->i_ctime = new_dir->i_mtime =
 -      d_inode(old_dentry)->i_ctime =
 -      d_inode(new_dentry)->i_ctime = current_time(old_dir);
 -
 +      simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry);
        return 0;
  }
  EXPORT_SYMBOL_GPL(simple_rename_exchange);
@@@ -742,6 -472,7 +742,6 @@@ int simple_rename(struct mnt_idmap *idm
                  struct dentry *old_dentry, struct inode *new_dir,
                  struct dentry *new_dentry, unsigned int flags)
  {
 -      struct inode *inode = d_inode(old_dentry);
        int they_are_dirs = d_is_dir(old_dentry);
  
        if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
                inc_nlink(new_dir);
        }
  
 -      old_dir->i_ctime = old_dir->i_mtime = new_dir->i_ctime =
 -              new_dir->i_mtime = inode->i_ctime = current_time(old_dir);
 -
 +      simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry);
        return 0;
  }
  EXPORT_SYMBOL(simple_rename);
@@@ -815,20 -548,21 +815,20 @@@ int simple_write_begin(struct file *fil
                        loff_t pos, unsigned len,
                        struct page **pagep, void **fsdata)
  {
 -      struct page *page;
 -      pgoff_t index;
 +      struct folio *folio;
  
 -      index = pos >> PAGE_SHIFT;
 +      folio = __filemap_get_folio(mapping, pos / PAGE_SIZE, FGP_WRITEBEGIN,
 +                      mapping_gfp_mask(mapping));
 +      if (IS_ERR(folio))
 +              return PTR_ERR(folio);
  
 -      page = grab_cache_page_write_begin(mapping, index);
 -      if (!page)
 -              return -ENOMEM;
 -
 -      *pagep = page;
 +      *pagep = &folio->page;
  
 -      if (!PageUptodate(page) && (len != PAGE_SIZE)) {
 -              unsigned from = pos & (PAGE_SIZE - 1);
 +      if (!folio_test_uptodate(folio) && (len != folio_size(folio))) {
 +              size_t from = offset_in_folio(folio, pos);
  
 -              zero_user_segments(page, 0, from, from + len, PAGE_SIZE);
 +              folio_zero_segments(folio, 0, from,
 +                              from + len, folio_size(folio));
        }
        return 0;
  }
@@@ -860,18 -594,17 +860,18 @@@ static int simple_write_end(struct fil
                        loff_t pos, unsigned len, unsigned copied,
                        struct page *page, void *fsdata)
  {
 -      struct inode *inode = page->mapping->host;
 +      struct folio *folio = page_folio(page);
 +      struct inode *inode = folio->mapping->host;
        loff_t last_pos = pos + copied;
  
 -      /* zero the stale part of the page if we did a short copy */
 -      if (!PageUptodate(page)) {
 +      /* zero the stale part of the folio if we did a short copy */
 +      if (!folio_test_uptodate(folio)) {
                if (copied < len) {
 -                      unsigned from = pos & (PAGE_SIZE - 1);
 +                      size_t from = offset_in_folio(folio, pos);
  
 -                      zero_user(page, from + copied, len - copied);
 +                      folio_zero_range(folio, from + copied, len - copied);
                }
 -              SetPageUptodate(page);
 +              folio_mark_uptodate(folio);
        }
        /*
         * No need to use i_size_read() here, the i_size
        if (last_pos > inode->i_size)
                i_size_write(inode, last_pos);
  
 -      set_page_dirty(page);
 -      unlock_page(page);
 -      put_page(page);
 +      folio_mark_dirty(folio);
 +      folio_unlock(folio);
 +      folio_put(folio);
  
        return copied;
  }
@@@ -926,7 -659,7 +926,7 @@@ int simple_fill_super(struct super_bloc
         */
        inode->i_ino = 1;
        inode->i_mode = S_IFDIR | 0755;
 -      inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
 +      inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
        inode->i_op = &simple_dir_inode_operations;
        inode->i_fop = &simple_dir_operations;
        set_nlink(inode, 2);
                        goto out;
                }
                inode->i_mode = S_IFREG | files->mode;
 -              inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
 +              inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
                inode->i_fop = files->ops;
                inode->i_ino = i;
                d_add(dentry, inode);
@@@ -1520,7 -1253,7 +1520,7 @@@ struct inode *alloc_anon_inode(struct s
        inode->i_uid = current_fsuid();
        inode->i_gid = current_fsgid();
        inode->i_flags |= S_PRIVATE;
 -      inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
 +      inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
        return inode;
  }
  EXPORT_SYMBOL(alloc_anon_inode);
   * All arguments are ignored and it just returns -EINVAL.
   */
  int
 -simple_nosetlease(struct file *filp, long arg, struct file_lock **flp,
 +simple_nosetlease(struct file *filp, int arg, struct file_lock **flp,
                  void **priv)
  {
        return -EINVAL;
@@@ -1582,7 -1315,7 +1582,7 @@@ static int empty_dir_getattr(struct mnt
                             u32 request_mask, unsigned int query_flags)
  {
        struct inode *inode = d_inode(path->dentry);
 -      generic_fillattr(&nop_mnt_idmap, inode, stat);
 +      generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
        return 0;
  }
  
@@@ -1648,16 -1381,6 +1648,6 @@@ bool is_empty_dir_inode(struct inode *i
  }
  
  #if IS_ENABLED(CONFIG_UNICODE)
- /*
-  * Determine if the name of a dentry should be casefolded.
-  *
-  * Return: if names will need casefolding
-  */
- static bool needs_casefold(const struct inode *dir)
- {
-       return IS_CASEFOLDED(dir) && dir->i_sb->s_encoding;
- }
  /**
   * generic_ci_d_compare - generic d_compare implementation for casefolding filesystems
   * @dentry:   dentry whose name we are checking against
@@@ -1678,7 -1401,7 +1668,7 @@@ static int generic_ci_d_compare(const s
        char strbuf[DNAME_INLINE_LEN];
        int ret;
  
-       if (!dir || !needs_casefold(dir))
+       if (!dir || !IS_CASEFOLDED(dir))
                goto fallback;
        /*
         * If the dentry name is stored in-line, then it may be concurrently
@@@ -1720,7 -1443,7 +1710,7 @@@ static int generic_ci_d_hash(const stru
        const struct unicode_map *um = sb->s_encoding;
        int ret = 0;
  
-       if (!dir || !needs_casefold(dir))
+       if (!dir || !IS_CASEFOLDED(dir))
                return 0;
  
        ret = utf8_casefold_hash(um, dentry, str);
diff --combined fs/ocfs2/journal.c
index e8e7d47265aa95fd897f8332f9a99f21a05274ef,1d2960e8ce74517c207ca73d134860bf2bb91f9a..ce215565d061ed98686dc075923d7a7503b96cd9
@@@ -114,9 -114,9 +114,9 @@@ int ocfs2_compute_replay_slots(struct o
        if (osb->replay_map)
                return 0;
  
 -      replay_map = kzalloc(sizeof(struct ocfs2_replay_map) +
 -                           (osb->max_slots * sizeof(char)), GFP_KERNEL);
 -
 +      replay_map = kzalloc(struct_size(replay_map, rm_replay_slots,
 +                                       osb->max_slots),
 +                           GFP_KERNEL);
        if (!replay_map) {
                mlog_errno(-ENOMEM);
                return -ENOMEM;
@@@ -178,13 -178,16 +178,13 @@@ int ocfs2_recovery_init(struct ocfs2_su
        osb->recovery_thread_task = NULL;
        init_waitqueue_head(&osb->recovery_event);
  
 -      rm = kzalloc(sizeof(struct ocfs2_recovery_map) +
 -                   osb->max_slots * sizeof(unsigned int),
 +      rm = kzalloc(struct_size(rm, rm_entries, osb->max_slots),
                     GFP_KERNEL);
        if (!rm) {
                mlog_errno(-ENOMEM);
                return -ENOMEM;
        }
  
 -      rm->rm_entries = (unsigned int *)((char *)rm +
 -                                        sizeof(struct ocfs2_recovery_map));
        osb->recovery_map = rm;
  
        return 0;
@@@ -554,7 -557,7 +554,7 @@@ static void ocfs2_abort_trigger(struct 
             (unsigned long)bh,
             (unsigned long long)bh->b_blocknr);
  
 -      ocfs2_error(bh->b_bdev->bd_super,
 +      ocfs2_error(bh->b_assoc_map->host->i_sb,
                    "JBD2 has aborted our journal, ocfs2 cannot continue\n");
  }
  
@@@ -777,14 -780,14 +777,14 @@@ void ocfs2_journal_dirty(handle_t *hand
                mlog_errno(status);
                if (!is_handle_aborted(handle)) {
                        journal_t *journal = handle->h_transaction->t_journal;
 -                      struct super_block *sb = bh->b_bdev->bd_super;
  
                        mlog(ML_ERROR, "jbd2_journal_dirty_metadata failed. "
                                        "Aborting transaction and journal.\n");
                        handle->h_err = status;
                        jbd2_journal_abort_handle(handle);
                        jbd2_journal_abort(journal, status);
 -                      ocfs2_abort(sb, "Journal already aborted.\n");
 +                      ocfs2_abort(bh->b_assoc_map->host->i_sb,
 +                                  "Journal already aborted.\n");
                }
        }
  }
@@@ -908,9 -911,9 +908,9 @@@ int ocfs2_journal_init(struct ocfs2_sup
  
        /* call the kernels journal init function now */
        j_journal = jbd2_journal_init_inode(inode);
-       if (j_journal == NULL) {
+       if (IS_ERR(j_journal)) {
                mlog(ML_ERROR, "Linux journal layer error\n");
-               status = -EINVAL;
+               status = PTR_ERR(j_journal);
                goto done;
        }
  
@@@ -1684,9 -1687,9 +1684,9 @@@ static int ocfs2_replay_journal(struct 
        }
  
        journal = jbd2_journal_init_inode(inode);
-       if (journal == NULL) {
+       if (IS_ERR(journal)) {
                mlog(ML_ERROR, "Linux journal layer error\n");
-               status = -EIO;
+               status = PTR_ERR(journal);
                goto done;
        }
  
This page took 0.264955 seconds and 4 git commands to generate.