fs/ext4/super.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  *  linux/fs/ext4/super.c
   4  *
   5  * Copyright (C) 1992, 1993, 1994, 1995
   6  * Remy Card ([email protected])
   7  * Laboratoire MASI - Institut Blaise Pascal
   8  * Universite Pierre et Marie Curie (Paris VI)
   9  *
  10  *  from
  11  *
  12  *  linux/fs/minix/inode.c
  13  *
  14  *  Copyright (C) 1991, 1992  Linus Torvalds
  15  *
  16  *  Big-endian to little-endian byte-swapping/bitmaps by
  17  *        David S. Miller ([email protected]), 1995
  18  */
  19
  20 #include <linux/module.h>
  21 #include <linux/string.h>
  22 #include <linux/fs.h>
  23 #include <linux/time.h>
  24 #include <linux/vmalloc.h>
  25 #include <linux/slab.h>
  26 #include <linux/init.h>
  27 #include <linux/blkdev.h>
  28 #include <linux/backing-dev.h>
  29 #include <linux/parser.h>
  30 #include <linux/buffer_head.h>
  31 #include <linux/exportfs.h>
  32 #include <linux/vfs.h>
  33 #include <linux/random.h>
  34 #include <linux/mount.h>
  35 #include <linux/namei.h>
  36 #include <linux/quotaops.h>
  37 #include <linux/seq_file.h>
  38 #include <linux/ctype.h>
  39 #include <linux/log2.h>
  40 #include <linux/crc16.h>
  41 #include <linux/dax.h>
  42 #include <linux/uaccess.h>
  43 #include <linux/iversion.h>
  44 #include <linux/unicode.h>
  45 #include <linux/part_stat.h>
  46 #include <linux/kthread.h>
  47 #include <linux/freezer.h>
  48 #include <linux/fsnotify.h>
  49 #include <linux/fs_context.h>
  50 #include <linux/fs_parser.h>
  51
  52 #include "ext4.h"
  53 #include "ext4_extents.h"       /* Needed for trace points definition */
  54 #include "ext4_jbd2.h"
  55 #include "xattr.h"
  56 #include "acl.h"
  57 #include "mballoc.h"
  58 #include "fsmap.h"
  59
  60 #define CREATE_TRACE_POINTS
  61 #include <trace/events/ext4.h>
  62
  63 static struct ext4_lazy_init *ext4_li_info;
  64 static DEFINE_MUTEX(ext4_li_mtx);
  65 static struct ratelimit_state ext4_mount_msg_ratelimit;
  66
  67 static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
  68                              unsigned long journal_devnum);
  69 static int ext4_show_options(struct seq_file *seq, struct dentry *root);
  70 static void ext4_update_super(struct super_block *sb);
  71 static int ext4_commit_super(struct super_block *sb);
  72 static int ext4_mark_recovery_complete(struct super_block *sb,
  73                                         struct ext4_super_block *es);
  74 static int ext4_clear_journal_err(struct super_block *sb,
  75                                   struct ext4_super_block *es);
  76 static int ext4_sync_fs(struct super_block *sb, int wait);
  77 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
  78 static int ext4_unfreeze(struct super_block *sb);
  79 static int ext4_freeze(struct super_block *sb);
  80 static inline int ext2_feature_set_ok(struct super_block *sb);
  81 static inline int ext3_feature_set_ok(struct super_block *sb);
  82 static void ext4_destroy_lazyinit_thread(void);
  83 static void ext4_unregister_li_request(struct super_block *sb);
  84 static void ext4_clear_request_list(void);
  85 static struct inode *ext4_get_journal_inode(struct super_block *sb,
  86                                             unsigned int journal_inum);
  87 static int ext4_validate_options(struct fs_context *fc);
  88 static int ext4_check_opt_consistency(struct fs_context *fc,
  89                                       struct super_block *sb);
  90 static void ext4_apply_options(struct fs_context *fc, struct super_block *sb);
  91 static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param);
  92 static int ext4_get_tree(struct fs_context *fc);
  93 static int ext4_reconfigure(struct fs_context *fc);
  94 static void ext4_fc_free(struct fs_context *fc);
  95 static int ext4_init_fs_context(struct fs_context *fc);
  96 static void ext4_kill_sb(struct super_block *sb);
  97 static const struct fs_parameter_spec ext4_param_specs[];
  98
  99 /*
 100  * Lock ordering
 101  *
 102  * page fault path:
 103  * mmap_lock -> sb_start_pagefault -> invalidate_lock (r) -> transaction start
 104  *   -> page lock -> i_data_sem (rw)
 105  *
 106  * buffered write path:
 107  * sb_start_write -> i_mutex -> mmap_lock
 108  * sb_start_write -> i_mutex -> transaction start -> page lock ->
 109  *   i_data_sem (rw)
 110  *
 111  * truncate:
 112  * sb_start_write -> i_mutex -> invalidate_lock (w) -> i_mmap_rwsem (w) ->
 113  *   page lock
 114  * sb_start_write -> i_mutex -> invalidate_lock (w) -> transaction start ->
 115  *   i_data_sem (rw)
 116  *
 117  * direct IO:
 118  * sb_start_write -> i_mutex -> mmap_lock
 119  * sb_start_write -> i_mutex -> transaction start -> i_data_sem (rw)
 120  *
 121  * writepages:
 122  * transaction start -> page lock(s) -> i_data_sem (rw)
 123  */
 124
 125 static const struct fs_context_operations ext4_context_ops = {
 126         .parse_param    = ext4_parse_param,
 127         .get_tree       = ext4_get_tree,
 128         .reconfigure    = ext4_reconfigure,
 129         .free           = ext4_fc_free,
 130 };
 131
 132
 133 #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2)
 134 static struct file_system_type ext2_fs_type = {
 135         .owner                  = THIS_MODULE,
 136         .name                   = "ext2",
 137         .init_fs_context        = ext4_init_fs_context,
 138         .parameters             = ext4_param_specs,
 139         .kill_sb                = ext4_kill_sb,
 140         .fs_flags               = FS_REQUIRES_DEV,
 141 };
 142 MODULE_ALIAS_FS("ext2");
 143 MODULE_ALIAS("ext2");
 144 #define IS_EXT2_SB(sb) ((sb)->s_type == &ext2_fs_type)
 145 #else
 146 #define IS_EXT2_SB(sb) (0)
 147 #endif
 148
 149
 150 static struct file_system_type ext3_fs_type = {
 151         .owner                  = THIS_MODULE,
 152         .name                   = "ext3",
 153         .init_fs_context        = ext4_init_fs_context,
 154         .parameters             = ext4_param_specs,
 155         .kill_sb                = ext4_kill_sb,
 156         .fs_flags               = FS_REQUIRES_DEV,
 157 };
 158 MODULE_ALIAS_FS("ext3");
 159 MODULE_ALIAS("ext3");
 160 #define IS_EXT3_SB(sb) ((sb)->s_type == &ext3_fs_type)
 161
 162
 163 static inline void __ext4_read_bh(struct buffer_head *bh, blk_opf_t op_flags,
 164                                   bh_end_io_t *end_io)
 165 {
 166         /*
 167          * buffer's verified bit is no longer valid after reading from
 168          * disk again due to write out error, clear it to make sure we
 169          * recheck the buffer contents.
 170          */
 171         clear_buffer_verified(bh);
 172
 173         bh->b_end_io = end_io ? end_io : end_buffer_read_sync;
 174         get_bh(bh);
 175         submit_bh(REQ_OP_READ | op_flags, bh);
 176 }
 177
 178 void ext4_read_bh_nowait(struct buffer_head *bh, blk_opf_t op_flags,
 179                          bh_end_io_t *end_io)
 180 {
 181         BUG_ON(!buffer_locked(bh));
 182
 183         if (ext4_buffer_uptodate(bh)) {
 184                 unlock_buffer(bh);
 185                 return;
 186         }
 187         __ext4_read_bh(bh, op_flags, end_io);
 188 }
 189
 190 int ext4_read_bh(struct buffer_head *bh, blk_opf_t op_flags, bh_end_io_t *end_io)
 191 {
 192         BUG_ON(!buffer_locked(bh));
 193
 194         if (ext4_buffer_uptodate(bh)) {
 195                 unlock_buffer(bh);
 196                 return 0;
 197         }
 198
 199         __ext4_read_bh(bh, op_flags, end_io);
 200
 201         wait_on_buffer(bh);
 202         if (buffer_uptodate(bh))
 203                 return 0;
 204         return -EIO;
 205 }
 206
 207 int ext4_read_bh_lock(struct buffer_head *bh, blk_opf_t op_flags, bool wait)
 208 {
 209         lock_buffer(bh);
 210         if (!wait) {
 211                 ext4_read_bh_nowait(bh, op_flags, NULL);
 212                 return 0;
 213         }
 214         return ext4_read_bh(bh, op_flags, NULL);
 215 }
 216
 217 /*
 218  * This works like __bread_gfp() except it uses ERR_PTR for error
 219  * returns.  Currently with sb_bread it's impossible to distinguish
 220  * between ENOMEM and EIO situations (since both result in a NULL
 221  * return.
 222  */
 223 static struct buffer_head *__ext4_sb_bread_gfp(struct super_block *sb,
 224                                                sector_t block,
 225                                                blk_opf_t op_flags, gfp_t gfp)
 226 {
 227         struct buffer_head *bh;
 228         int ret;
 229
 230         bh = sb_getblk_gfp(sb, block, gfp);
 231         if (bh == NULL)
 232                 return ERR_PTR(-ENOMEM);
 233         if (ext4_buffer_uptodate(bh))
 234                 return bh;
 235
 236         ret = ext4_read_bh_lock(bh, REQ_META | op_flags, true);
 237         if (ret) {
 238                 put_bh(bh);
 239                 return ERR_PTR(ret);
 240         }
 241         return bh;
 242 }
 243
 244 struct buffer_head *ext4_sb_bread(struct super_block *sb, sector_t block,
 245                                    blk_opf_t op_flags)
 246 {
 247         gfp_t gfp = mapping_gfp_constraint(sb->s_bdev->bd_inode->i_mapping,
 248                         ~__GFP_FS) | __GFP_MOVABLE;
 249
 250         return __ext4_sb_bread_gfp(sb, block, op_flags, gfp);
 251 }
 252
 253 struct buffer_head *ext4_sb_bread_unmovable(struct super_block *sb,
 254                                             sector_t block)
 255 {
 256         gfp_t gfp = mapping_gfp_constraint(sb->s_bdev->bd_inode->i_mapping,
 257                         ~__GFP_FS);
 258
 259         return __ext4_sb_bread_gfp(sb, block, 0, gfp);
 260 }
 261
 262 void ext4_sb_breadahead_unmovable(struct super_block *sb, sector_t block)
 263 {
 264         struct buffer_head *bh = bdev_getblk(sb->s_bdev, block,
 265                         sb->s_blocksize, GFP_NOWAIT);
 266
 267         if (likely(bh)) {
 268                 if (trylock_buffer(bh))
 269                         ext4_read_bh_nowait(bh, REQ_RAHEAD, NULL);
 270                 brelse(bh);
 271         }
 272 }
 273
 274 static int ext4_verify_csum_type(struct super_block *sb,
 275                                  struct ext4_super_block *es)
 276 {
 277         if (!ext4_has_feature_metadata_csum(sb))
 278                 return 1;
 279
 280         return es->s_checksum_type == EXT4_CRC32C_CHKSUM;
 281 }
 282
 283 __le32 ext4_superblock_csum(struct super_block *sb,
 284                             struct ext4_super_block *es)
 285 {
 286         struct ext4_sb_info *sbi = EXT4_SB(sb);
 287         int offset = offsetof(struct ext4_super_block, s_checksum);
 288         __u32 csum;
 289
 290         csum = ext4_chksum(sbi, ~0, (char *)es, offset);
 291
 292         return cpu_to_le32(csum);
 293 }
 294
 295 static int ext4_superblock_csum_verify(struct super_block *sb,
 296                                        struct ext4_super_block *es)
 297 {
 298         if (!ext4_has_metadata_csum(sb))
 299                 return 1;
 300
 301         return es->s_checksum == ext4_superblock_csum(sb, es);
 302 }
 303
 304 void ext4_superblock_csum_set(struct super_block *sb)
 305 {
 306         struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 307
 308         if (!ext4_has_metadata_csum(sb))
 309                 return;
 310
 311         es->s_checksum = ext4_superblock_csum(sb, es);
 312 }
 313
 314 ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
 315                                struct ext4_group_desc *bg)
 316 {
 317         return le32_to_cpu(bg->bg_block_bitmap_lo) |
 318                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 319                  (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0);
 320 }
 321
 322 ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
 323                                struct ext4_group_desc *bg)
 324 {
 325         return le32_to_cpu(bg->bg_inode_bitmap_lo) |
 326                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 327                  (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0);
 328 }
 329
 330 ext4_fsblk_t ext4_inode_table(struct super_block *sb,
 331                               struct ext4_group_desc *bg)
 332 {
 333         return le32_to_cpu(bg->bg_inode_table_lo) |
 334                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 335                  (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
 336 }
 337
 338 __u32 ext4_free_group_clusters(struct super_block *sb,
 339                                struct ext4_group_desc *bg)
 340 {
 341         return le16_to_cpu(bg->bg_free_blocks_count_lo) |
 342                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 343                  (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0);
 344 }
 345
 346 __u32 ext4_free_inodes_count(struct super_block *sb,
 347                               struct ext4_group_desc *bg)
 348 {
 349         return le16_to_cpu(bg->bg_free_inodes_count_lo) |
 350                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 351                  (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0);
 352 }
 353
 354 __u32 ext4_used_dirs_count(struct super_block *sb,
 355                               struct ext4_group_desc *bg)
 356 {
 357         return le16_to_cpu(bg->bg_used_dirs_count_lo) |
 358                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 359                  (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0);
 360 }
 361
 362 __u32 ext4_itable_unused_count(struct super_block *sb,
 363                               struct ext4_group_desc *bg)
 364 {
 365         return le16_to_cpu(bg->bg_itable_unused_lo) |
 366                 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
 367                  (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0);
 368 }
 369
 370 void ext4_block_bitmap_set(struct super_block *sb,
 371                            struct ext4_group_desc *bg, ext4_fsblk_t blk)
 372 {
 373         bg->bg_block_bitmap_lo = cpu_to_le32((u32)blk);
 374         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 375                 bg->bg_block_bitmap_hi = cpu_to_le32(blk >> 32);
 376 }
 377
 378 void ext4_inode_bitmap_set(struct super_block *sb,
 379                            struct ext4_group_desc *bg, ext4_fsblk_t blk)
 380 {
 381         bg->bg_inode_bitmap_lo  = cpu_to_le32((u32)blk);
 382         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 383                 bg->bg_inode_bitmap_hi = cpu_to_le32(blk >> 32);
 384 }
 385
 386 void ext4_inode_table_set(struct super_block *sb,
 387                           struct ext4_group_desc *bg, ext4_fsblk_t blk)
 388 {
 389         bg->bg_inode_table_lo = cpu_to_le32((u32)blk);
 390         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 391                 bg->bg_inode_table_hi = cpu_to_le32(blk >> 32);
 392 }
 393
 394 void ext4_free_group_clusters_set(struct super_block *sb,
 395                                   struct ext4_group_desc *bg, __u32 count)
 396 {
 397         bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count);
 398         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 399                 bg->bg_free_blocks_count_hi = cpu_to_le16(count >> 16);
 400 }
 401
 402 void ext4_free_inodes_set(struct super_block *sb,
 403                           struct ext4_group_desc *bg, __u32 count)
 404 {
 405         bg->bg_free_inodes_count_lo = cpu_to_le16((__u16)count);
 406         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 407                 bg->bg_free_inodes_count_hi = cpu_to_le16(count >> 16);
 408 }
 409
 410 void ext4_used_dirs_set(struct super_block *sb,
 411                           struct ext4_group_desc *bg, __u32 count)
 412 {
 413         bg->bg_used_dirs_count_lo = cpu_to_le16((__u16)count);
 414         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 415                 bg->bg_used_dirs_count_hi = cpu_to_le16(count >> 16);
 416 }
 417
 418 void ext4_itable_unused_set(struct super_block *sb,
 419                           struct ext4_group_desc *bg, __u32 count)
 420 {
 421         bg->bg_itable_unused_lo = cpu_to_le16((__u16)count);
 422         if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
 423                 bg->bg_itable_unused_hi = cpu_to_le16(count >> 16);
 424 }
 425
 426 static void __ext4_update_tstamp(__le32 *lo, __u8 *hi, time64_t now)
 427 {
 428         now = clamp_val(now, 0, (1ull << 40) - 1);
 429
 430         *lo = cpu_to_le32(lower_32_bits(now));
 431         *hi = upper_32_bits(now);
 432 }
 433
 434 static time64_t __ext4_get_tstamp(__le32 *lo, __u8 *hi)
 435 {
 436         return ((time64_t)(*hi) << 32) + le32_to_cpu(*lo);
 437 }
 438 #define ext4_update_tstamp(es, tstamp) \
 439         __ext4_update_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi, \
 440                              ktime_get_real_seconds())
 441 #define ext4_get_tstamp(es, tstamp) \
 442         __ext4_get_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi)
 443
 444 #define EXT4_SB_REFRESH_INTERVAL_SEC (3600) /* seconds (1 hour) */
 445 #define EXT4_SB_REFRESH_INTERVAL_KB (16384) /* kilobytes (16MB) */
 446
 447 /*
 448  * The ext4_maybe_update_superblock() function checks and updates the
 449  * superblock if needed.
 450  *
 451  * This function is designed to update the on-disk superblock only under
 452  * certain conditions to prevent excessive disk writes and unnecessary
 453  * waking of the disk from sleep. The superblock will be updated if:
 454  * 1. More than an hour has passed since the last superblock update, and
 455  * 2. More than 16MB have been written since the last superblock update.
 456  *
 457  * @sb: The superblock
 458  */
 459 static void ext4_maybe_update_superblock(struct super_block *sb)
 460 {
 461         struct ext4_sb_info *sbi = EXT4_SB(sb);
 462         struct ext4_super_block *es = sbi->s_es;
 463         journal_t *journal = sbi->s_journal;
 464         time64_t now;
 465         __u64 last_update;
 466         __u64 lifetime_write_kbytes;
 467         __u64 diff_size;
 468
 469         if (sb_rdonly(sb) || !(sb->s_flags & SB_ACTIVE) ||
 470             !journal || (journal->j_flags & JBD2_UNMOUNT))
 471                 return;
 472
 473         now = ktime_get_real_seconds();
 474         last_update = ext4_get_tstamp(es, s_wtime);
 475
 476         if (likely(now - last_update < EXT4_SB_REFRESH_INTERVAL_SEC))
 477                 return;
 478
 479         lifetime_write_kbytes = sbi->s_kbytes_written +
 480                 ((part_stat_read(sb->s_bdev, sectors[STAT_WRITE]) -
 481                   sbi->s_sectors_written_start) >> 1);
 482
 483         /* Get the number of kilobytes not written to disk to account
 484          * for statistics and compare with a multiple of 16 MB. This
 485          * is used to determine when the next superblock commit should
 486          * occur (i.e. not more often than once per 16MB if there was
 487          * less written in an hour).
 488          */
 489         diff_size = lifetime_write_kbytes - le64_to_cpu(es->s_kbytes_written);
 490
 491         if (diff_size > EXT4_SB_REFRESH_INTERVAL_KB)
 492                 schedule_work(&EXT4_SB(sb)->s_sb_upd_work);
 493 }
 494
 495 /*
 496  * The del_gendisk() function uninitializes the disk-specific data
 497  * structures, including the bdi structure, without telling anyone
 498  * else.  Once this happens, any attempt to call mark_buffer_dirty()
 499  * (for example, by ext4_commit_super), will cause a kernel OOPS.
 500  * This is a kludge to prevent these oops until we can put in a proper
 501  * hook in del_gendisk() to inform the VFS and file system layers.
 502  */
 503 static int block_device_ejected(struct super_block *sb)
 504 {
 505         struct inode *bd_inode = sb->s_bdev->bd_inode;
 506         struct backing_dev_info *bdi = inode_to_bdi(bd_inode);
 507
 508         return bdi->dev == NULL;
 509 }
 510
 511 static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
 512 {
 513         struct super_block              *sb = journal->j_private;
 514         struct ext4_sb_info             *sbi = EXT4_SB(sb);
 515         int                             error = is_journal_aborted(journal);
 516         struct ext4_journal_cb_entry    *jce;
 517
 518         BUG_ON(txn->t_state == T_FINISHED);
 519
 520         ext4_process_freed_data(sb, txn->t_tid);
 521         ext4_maybe_update_superblock(sb);
 522
 523         spin_lock(&sbi->s_md_lock);
 524         while (!list_empty(&txn->t_private_list)) {
 525                 jce = list_entry(txn->t_private_list.next,
 526                                  struct ext4_journal_cb_entry, jce_list);
 527                 list_del_init(&jce->jce_list);
 528                 spin_unlock(&sbi->s_md_lock);
 529                 jce->jce_func(sb, jce, error);
 530                 spin_lock(&sbi->s_md_lock);
 531         }
 532         spin_unlock(&sbi->s_md_lock);
 533 }
 534
 535 /*
 536  * This writepage callback for write_cache_pages()
 537  * takes care of a few cases after page cleaning.
 538  *
 539  * write_cache_pages() already checks for dirty pages
 540  * and calls clear_page_dirty_for_io(), which we want,
 541  * to write protect the pages.
 542  *
 543  * However, we may have to redirty a page (see below.)
 544  */
 545 static int ext4_journalled_writepage_callback(struct folio *folio,
 546                                               struct writeback_control *wbc,
 547                                               void *data)
 548 {
 549         transaction_t *transaction = (transaction_t *) data;
 550         struct buffer_head *bh, *head;
 551         struct journal_head *jh;
 552
 553         bh = head = folio_buffers(folio);
 554         do {
 555                 /*
 556                  * We have to redirty a page in these cases:
 557                  * 1) If buffer is dirty, it means the page was dirty because it
 558                  * contains a buffer that needs checkpointing. So the dirty bit
 559                  * needs to be preserved so that checkpointing writes the buffer
 560                  * properly.
 561                  * 2) If buffer is not part of the committing transaction
 562                  * (we may have just accidentally come across this buffer because
 563                  * inode range tracking is not exact) or if the currently running
 564                  * transaction already contains this buffer as well, dirty bit
 565                  * needs to be preserved so that the buffer gets writeprotected
 566                  * properly on running transaction's commit.
 567                  */
 568                 jh = bh2jh(bh);
 569                 if (buffer_dirty(bh) ||
 570                     (jh && (jh->b_transaction != transaction ||
 571                             jh->b_next_transaction))) {
 572                         folio_redirty_for_writepage(wbc, folio);
 573                         goto out;
 574                 }
 575         } while ((bh = bh->b_this_page) != head);
 576
 577 out:
 578         return AOP_WRITEPAGE_ACTIVATE;
 579 }
 580
 581 static int ext4_journalled_submit_inode_data_buffers(struct jbd2_inode *jinode)
 582 {
 583         struct address_space *mapping = jinode->i_vfs_inode->i_mapping;
 584         struct writeback_control wbc = {
 585                 .sync_mode =  WB_SYNC_ALL,
 586                 .nr_to_write = LONG_MAX,
 587                 .range_start = jinode->i_dirty_start,
 588                 .range_end = jinode->i_dirty_end,
 589         };
 590
 591         return write_cache_pages(mapping, &wbc,
 592                                  ext4_journalled_writepage_callback,
 593                                  jinode->i_transaction);
 594 }
 595
 596 static int ext4_journal_submit_inode_data_buffers(struct jbd2_inode *jinode)
 597 {
 598         int ret;
 599
 600         if (ext4_should_journal_data(jinode->i_vfs_inode))
 601                 ret = ext4_journalled_submit_inode_data_buffers(jinode);
 602         else
 603                 ret = ext4_normal_submit_inode_data_buffers(jinode);
 604         return ret;
 605 }
 606
 607 static int ext4_journal_finish_inode_data_buffers(struct jbd2_inode *jinode)
 608 {
 609         int ret = 0;
 610
 611         if (!ext4_should_journal_data(jinode->i_vfs_inode))
 612                 ret = jbd2_journal_finish_inode_data_buffers(jinode);
 613
 614         return ret;
 615 }
 616
 617 static bool system_going_down(void)
 618 {
 619         return system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF
 620                 || system_state == SYSTEM_RESTART;
 621 }
 622
 623 struct ext4_err_translation {
 624         int code;
 625         int errno;
 626 };
 627
 628 #define EXT4_ERR_TRANSLATE(err) { .code = EXT4_ERR_##err, .errno = err }
 629
 630 static struct ext4_err_translation err_translation[] = {
 631         EXT4_ERR_TRANSLATE(EIO),
 632         EXT4_ERR_TRANSLATE(ENOMEM),
 633         EXT4_ERR_TRANSLATE(EFSBADCRC),
 634         EXT4_ERR_TRANSLATE(EFSCORRUPTED),
 635         EXT4_ERR_TRANSLATE(ENOSPC),
 636         EXT4_ERR_TRANSLATE(ENOKEY),
 637         EXT4_ERR_TRANSLATE(EROFS),
 638         EXT4_ERR_TRANSLATE(EFBIG),
 639         EXT4_ERR_TRANSLATE(EEXIST),
 640         EXT4_ERR_TRANSLATE(ERANGE),
 641         EXT4_ERR_TRANSLATE(EOVERFLOW),
 642         EXT4_ERR_TRANSLATE(EBUSY),
 643         EXT4_ERR_TRANSLATE(ENOTDIR),
 644         EXT4_ERR_TRANSLATE(ENOTEMPTY),
 645         EXT4_ERR_TRANSLATE(ESHUTDOWN),
 646         EXT4_ERR_TRANSLATE(EFAULT),
 647 };
 648
 649 static int ext4_errno_to_code(int errno)
 650 {
 651         int i;
 652
 653         for (i = 0; i < ARRAY_SIZE(err_translation); i++)
 654                 if (err_translation[i].errno == errno)
 655                         return err_translation[i].code;
 656         return EXT4_ERR_UNKNOWN;
 657 }
 658
 659 static void save_error_info(struct super_block *sb, int error,
 660                             __u32 ino, __u64 block,
 661                             const char *func, unsigned int line)
 662 {
 663         struct ext4_sb_info *sbi = EXT4_SB(sb);
 664
 665         /* We default to EFSCORRUPTED error... */
 666         if (error == 0)
 667                 error = EFSCORRUPTED;
 668
 669         spin_lock(&sbi->s_error_lock);
 670         sbi->s_add_error_count++;
 671         sbi->s_last_error_code = error;
 672         sbi->s_last_error_line = line;
 673         sbi->s_last_error_ino = ino;
 674         sbi->s_last_error_block = block;
 675         sbi->s_last_error_func = func;
 676         sbi->s_last_error_time = ktime_get_real_seconds();
 677         if (!sbi->s_first_error_time) {
 678                 sbi->s_first_error_code = error;
 679                 sbi->s_first_error_line = line;
 680                 sbi->s_first_error_ino = ino;
 681                 sbi->s_first_error_block = block;
 682                 sbi->s_first_error_func = func;
 683                 sbi->s_first_error_time = sbi->s_last_error_time;
 684         }
 685         spin_unlock(&sbi->s_error_lock);
 686 }
 687
 688 /* Deal with the reporting of failure conditions on a filesystem such as
 689  * inconsistencies detected or read IO failures.
 690  *
 691  * On ext2, we can store the error state of the filesystem in the
 692  * superblock.  That is not possible on ext4, because we may have other
 693  * write ordering constraints on the superblock which prevent us from
 694  * writing it out straight away; and given that the journal is about to
 695  * be aborted, we can't rely on the current, or future, transactions to
 696  * write out the superblock safely.
 697  *
 698  * We'll just use the jbd2_journal_abort() error code to record an error in
 699  * the journal instead.  On recovery, the journal will complain about
 700  * that error until we've noted it down and cleared it.
 701  *
 702  * If force_ro is set, we unconditionally force the filesystem into an
 703  * ABORT|READONLY state, unless the error response on the fs has been set to
 704  * panic in which case we take the easy way out and panic immediately. This is
 705  * used to deal with unrecoverable failures such as journal IO errors or ENOMEM
 706  * at a critical moment in log management.
 707  */
 708 static void ext4_handle_error(struct super_block *sb, bool force_ro, int error,
 709                               __u32 ino, __u64 block,
 710                               const char *func, unsigned int line)
 711 {
 712         journal_t *journal = EXT4_SB(sb)->s_journal;
 713         bool continue_fs = !force_ro && test_opt(sb, ERRORS_CONT);
 714
 715         EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
 716         if (test_opt(sb, WARN_ON_ERROR))
 717                 WARN_ON_ONCE(1);
 718
 719         if (!continue_fs && !sb_rdonly(sb)) {
 720                 set_bit(EXT4_FLAGS_SHUTDOWN, &EXT4_SB(sb)->s_ext4_flags);
 721                 if (journal)
 722                         jbd2_journal_abort(journal, -EIO);
 723         }
 724
 725         if (!bdev_read_only(sb->s_bdev)) {
 726                 save_error_info(sb, error, ino, block, func, line);
 727                 /*
 728                  * In case the fs should keep running, we need to writeout
 729                  * superblock through the journal. Due to lock ordering
 730                  * constraints, it may not be safe to do it right here so we
 731                  * defer superblock flushing to a workqueue.
 732                  */
 733                 if (continue_fs && journal)
 734                         schedule_work(&EXT4_SB(sb)->s_sb_upd_work);
 735                 else
 736                         ext4_commit_super(sb);
 737         }
 738
 739         /*
 740          * We force ERRORS_RO behavior when system is rebooting. Otherwise we
 741          * could panic during 'reboot -f' as the underlying device got already
 742          * disabled.
 743          */
 744         if (test_opt(sb, ERRORS_PANIC) && !system_going_down()) {
 745                 panic("EXT4-fs (device %s): panic forced after error\n",
 746                         sb->s_id);
 747         }
 748
 749         if (sb_rdonly(sb) || continue_fs)
 750                 return;
 751
 752         ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
 753         /*
 754          * Make sure updated value of ->s_mount_flags will be visible before
 755          * ->s_flags update
 756          */
 757         smp_wmb();
 758         sb->s_flags |= SB_RDONLY;
 759 }
 760
 761 static void update_super_work(struct work_struct *work)
 762 {
 763         struct ext4_sb_info *sbi = container_of(work, struct ext4_sb_info,
 764                                                 s_sb_upd_work);
 765         journal_t *journal = sbi->s_journal;
 766         handle_t *handle;
 767
 768         /*
 769          * If the journal is still running, we have to write out superblock
 770          * through the journal to avoid collisions of other journalled sb
 771          * updates.
 772          *
 773          * We use directly jbd2 functions here to avoid recursing back into
 774          * ext4 error handling code during handling of previous errors.
 775          */
 776         if (!sb_rdonly(sbi->s_sb) && journal) {
 777                 struct buffer_head *sbh = sbi->s_sbh;
 778                 bool call_notify_err;
 779                 handle = jbd2_journal_start(journal, 1);
 780                 if (IS_ERR(handle))
 781                         goto write_directly;
 782                 if (jbd2_journal_get_write_access(handle, sbh)) {
 783                         jbd2_journal_stop(handle);
 784                         goto write_directly;
 785                 }
 786
 787                 if (sbi->s_add_error_count > 0)
 788                         call_notify_err = true;
 789
 790                 ext4_update_super(sbi->s_sb);
 791                 if (buffer_write_io_error(sbh) || !buffer_uptodate(sbh)) {
 792                         ext4_msg(sbi->s_sb, KERN_ERR, "previous I/O error to "
 793                                  "superblock detected");
 794                         clear_buffer_write_io_error(sbh);
 795                         set_buffer_uptodate(sbh);
 796                 }
 797
 798                 if (jbd2_journal_dirty_metadata(handle, sbh)) {
 799                         jbd2_journal_stop(handle);
 800                         goto write_directly;
 801                 }
 802                 jbd2_journal_stop(handle);
 803
 804                 if (call_notify_err)
 805                         ext4_notify_error_sysfs(sbi);
 806
 807                 return;
 808         }
 809 write_directly:
 810         /*
 811          * Write through journal failed. Write sb directly to get error info
 812          * out and hope for the best.
 813          */
 814         ext4_commit_super(sbi->s_sb);
 815         ext4_notify_error_sysfs(sbi);
 816 }
 817
 818 #define ext4_error_ratelimit(sb)                                        \
 819                 ___ratelimit(&(EXT4_SB(sb)->s_err_ratelimit_state),     \
 820                              "EXT4-fs error")
 821
 822 void __ext4_error(struct super_block *sb, const char *function,
 823                   unsigned int line, bool force_ro, int error, __u64 block,
 824                   const char *fmt, ...)
 825 {
 826         struct va_format vaf;
 827         va_list args;
 828
 829         if (unlikely(ext4_forced_shutdown(sb)))
 830                 return;
 831
 832         trace_ext4_error(sb, function, line);
 833         if (ext4_error_ratelimit(sb)) {
 834                 va_start(args, fmt);
 835                 vaf.fmt = fmt;
 836                 vaf.va = &args;
 837                 printk(KERN_CRIT
 838                        "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n",
 839                        sb->s_id, function, line, current->comm, &vaf);
 840                 va_end(args);
 841         }
 842         fsnotify_sb_error(sb, NULL, error ? error : EFSCORRUPTED);
 843
 844         ext4_handle_error(sb, force_ro, error, 0, block, function, line);
 845 }
 846
 847 void __ext4_error_inode(struct inode *inode, const char *function,
 848                         unsigned int line, ext4_fsblk_t block, int error,
 849                         const char *fmt, ...)
 850 {
 851         va_list args;
 852         struct va_format vaf;
 853
 854         if (unlikely(ext4_forced_shutdown(inode->i_sb)))
 855                 return;
 856
 857         trace_ext4_error(inode->i_sb, function, line);
 858         if (ext4_error_ratelimit(inode->i_sb)) {
 859                 va_start(args, fmt);
 860                 vaf.fmt = fmt;
 861                 vaf.va = &args;
 862                 if (block)
 863                         printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
 864                                "inode #%lu: block %llu: comm %s: %pV\n",
 865                                inode->i_sb->s_id, function, line, inode->i_ino,
 866                                block, current->comm, &vaf);
 867                 else
 868                         printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
 869                                "inode #%lu: comm %s: %pV\n",
 870                                inode->i_sb->s_id, function, line, inode->i_ino,
 871                                current->comm, &vaf);
 872                 va_end(args);
 873         }
 874         fsnotify_sb_error(inode->i_sb, inode, error ? error : EFSCORRUPTED);
 875
 876         ext4_handle_error(inode->i_sb, false, error, inode->i_ino, block,
 877                           function, line);
 878 }
 879
 880 void __ext4_error_file(struct file *file, const char *function,
 881                        unsigned int line, ext4_fsblk_t block,
 882                        const char *fmt, ...)
 883 {
 884         va_list args;
 885         struct va_format vaf;
 886         struct inode *inode = file_inode(file);
 887         char pathname[80], *path;
 888
 889         if (unlikely(ext4_forced_shutdown(inode->i_sb)))
 890                 return;
 891
 892         trace_ext4_error(inode->i_sb, function, line);
 893         if (ext4_error_ratelimit(inode->i_sb)) {
 894                 path = file_path(file, pathname, sizeof(pathname));
 895                 if (IS_ERR(path))
 896                         path = "(unknown)";
 897                 va_start(args, fmt);
 898                 vaf.fmt = fmt;
 899                 vaf.va = &args;
 900                 if (block)
 901                         printk(KERN_CRIT
 902                                "EXT4-fs error (device %s): %s:%d: inode #%lu: "
 903                                "block %llu: comm %s: path %s: %pV\n",
 904                                inode->i_sb->s_id, function, line, inode->i_ino,
 905                                block, current->comm, path, &vaf);
 906                 else
 907                         printk(KERN_CRIT
 908                                "EXT4-fs error (device %s): %s:%d: inode #%lu: "
 909                                "comm %s: path %s: %pV\n",
 910                                inode->i_sb->s_id, function, line, inode->i_ino,
 911                                current->comm, path, &vaf);
 912                 va_end(args);
 913         }
 914         fsnotify_sb_error(inode->i_sb, inode, EFSCORRUPTED);
 915
 916         ext4_handle_error(inode->i_sb, false, EFSCORRUPTED, inode->i_ino, block,
 917                           function, line);
 918 }
 919
 920 const char *ext4_decode_error(struct super_block *sb, int errno,
 921                               char nbuf[16])
 922 {
 923         char *errstr = NULL;
 924
 925         switch (errno) {
 926         case -EFSCORRUPTED:
 927                 errstr = "Corrupt filesystem";
 928                 break;
 929         case -EFSBADCRC:
 930                 errstr = "Filesystem failed CRC";
 931                 break;
 932         case -EIO:
 933                 errstr = "IO failure";
 934                 break;
 935         case -ENOMEM:
 936                 errstr = "Out of memory";
 937                 break;
 938         case -EROFS:
 939                 if (!sb || (EXT4_SB(sb)->s_journal &&
 940                             EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT))
 941                         errstr = "Journal has aborted";
 942                 else
 943                         errstr = "Readonly filesystem";
 944                 break;
 945         default:
 946                 /* If the caller passed in an extra buffer for unknown
 947                  * errors, textualise them now.  Else we just return
 948                  * NULL. */
 949                 if (nbuf) {
 950                         /* Check for truncated error codes... */
 951                         if (snprintf(nbuf, 16, "error %d", -errno) >= 0)
 952                                 errstr = nbuf;
 953                 }
 954                 break;
 955         }
 956
 957         return errstr;
 958 }
 959
 960 /* __ext4_std_error decodes expected errors from journaling functions
 961  * automatically and invokes the appropriate error response.  */
 962
 963 void __ext4_std_error(struct super_block *sb, const char *function,
 964                       unsigned int line, int errno)
 965 {
 966         char nbuf[16];
 967         const char *errstr;
 968
 969         if (unlikely(ext4_forced_shutdown(sb)))
 970                 return;
 971
 972         /* Special case: if the error is EROFS, and we're not already
 973          * inside a transaction, then there's really no point in logging
 974          * an error. */
 975         if (errno == -EROFS && journal_current_handle() == NULL && sb_rdonly(sb))
 976                 return;
 977
 978         if (ext4_error_ratelimit(sb)) {
 979                 errstr = ext4_decode_error(sb, errno, nbuf);
 980                 printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n",
 981                        sb->s_id, function, line, errstr);
 982         }
 983         fsnotify_sb_error(sb, NULL, errno ? errno : EFSCORRUPTED);
 984
 985         ext4_handle_error(sb, false, -errno, 0, 0, function, line);
 986 }
 987
 988 void __ext4_msg(struct super_block *sb,
 989                 const char *prefix, const char *fmt, ...)
 990 {
 991         struct va_format vaf;
 992         va_list args;
 993
 994         if (sb) {
 995                 atomic_inc(&EXT4_SB(sb)->s_msg_count);
 996                 if (!___ratelimit(&(EXT4_SB(sb)->s_msg_ratelimit_state),
 997                                   "EXT4-fs"))
 998                         return;
 999         }
1000
1001         va_start(args, fmt);
1002         vaf.fmt = fmt;
1003         vaf.va = &args;
1004         if (sb)
1005                 printk("%sEXT4-fs (%s): %pV\n", prefix, sb->s_id, &vaf);
1006         else
1007                 printk("%sEXT4-fs: %pV\n", prefix, &vaf);
1008         va_end(args);
1009 }
1010
1011 static int ext4_warning_ratelimit(struct super_block *sb)
1012 {
1013         atomic_inc(&EXT4_SB(sb)->s_warning_count);
1014         return ___ratelimit(&(EXT4_SB(sb)->s_warning_ratelimit_state),
1015                             "EXT4-fs warning");
1016 }
1017
1018 void __ext4_warning(struct super_block *sb, const char *function,
1019                     unsigned int line, const char *fmt, ...)
1020 {
1021         struct va_format vaf;
1022         va_list args;
1023
1024         if (!ext4_warning_ratelimit(sb))
1025                 return;
1026
1027         va_start(args, fmt);
1028         vaf.fmt = fmt;
1029         vaf.va = &args;
1030         printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: %pV\n",
1031                sb->s_id, function, line, &vaf);
1032         va_end(args);
1033 }
1034
1035 void __ext4_warning_inode(const struct inode *inode, const char *function,
1036                           unsigned int line, const char *fmt, ...)
1037 {
1038         struct va_format vaf;
1039         va_list args;
1040
1041         if (!ext4_warning_ratelimit(inode->i_sb))
1042                 return;
1043
1044         va_start(args, fmt);
1045         vaf.fmt = fmt;
1046         vaf.va = &args;
1047         printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: "
1048                "inode #%lu: comm %s: %pV\n", inode->i_sb->s_id,
1049                function, line, inode->i_ino, current->comm, &vaf);
1050         va_end(args);
1051 }
1052
1053 void __ext4_grp_locked_error(const char *function, unsigned int line,
1054                              struct super_block *sb, ext4_group_t grp,
1055                              unsigned long ino, ext4_fsblk_t block,
1056                              const char *fmt, ...)
1057 __releases(bitlock)
1058 __acquires(bitlock)
1059 {
1060         struct va_format vaf;
1061         va_list args;
1062
1063         if (unlikely(ext4_forced_shutdown(sb)))
1064                 return;
1065
1066         trace_ext4_error(sb, function, line);
1067         if (ext4_error_ratelimit(sb)) {
1068                 va_start(args, fmt);
1069                 vaf.fmt = fmt;
1070                 vaf.va = &args;
1071                 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ",
1072                        sb->s_id, function, line, grp);
1073                 if (ino)
1074                         printk(KERN_CONT "inode %lu: ", ino);
1075                 if (block)
1076                         printk(KERN_CONT "block %llu:",
1077                                (unsigned long long) block);
1078                 printk(KERN_CONT "%pV\n", &vaf);
1079                 va_end(args);
1080         }
1081
1082         if (test_opt(sb, ERRORS_CONT)) {
1083                 if (test_opt(sb, WARN_ON_ERROR))
1084                         WARN_ON_ONCE(1);
1085                 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
1086                 if (!bdev_read_only(sb->s_bdev)) {
1087                         save_error_info(sb, EFSCORRUPTED, ino, block, function,
1088                                         line);
1089                         schedule_work(&EXT4_SB(sb)->s_sb_upd_work);
1090                 }
1091                 return;
1092         }
1093         ext4_unlock_group(sb, grp);
1094         ext4_handle_error(sb, false, EFSCORRUPTED, ino, block, function, line);
1095         /*
1096          * We only get here in the ERRORS_RO case; relocking the group
1097          * may be dangerous, but nothing bad will happen since the
1098          * filesystem will have already been marked read/only and the
1099          * journal has been aborted.  We return 1 as a hint to callers
1100          * who might what to use the return value from
1101          * ext4_grp_locked_error() to distinguish between the
1102          * ERRORS_CONT and ERRORS_RO case, and perhaps return more
1103          * aggressively from the ext4 function in question, with a
1104          * more appropriate error code.
1105          */
1106         ext4_lock_group(sb, grp);
1107         return;
1108 }
1109
1110 void ext4_mark_group_bitmap_corrupted(struct super_block *sb,
1111                                      ext4_group_t group,
1112                                      unsigned int flags)
1113 {
1114         struct ext4_sb_info *sbi = EXT4_SB(sb);
1115         struct ext4_group_info *grp = ext4_get_group_info(sb, group);
1116         struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
1117         int ret;
1118
1119         if (!grp || !gdp)
1120                 return;
1121         if (flags & EXT4_GROUP_INFO_BBITMAP_CORRUPT) {
1122                 ret = ext4_test_and_set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT,
1123                                             &grp->bb_state);
1124                 if (!ret)
1125                         percpu_counter_sub(&sbi->s_freeclusters_counter,
1126                                            grp->bb_free);
1127         }
1128
1129         if (flags & EXT4_GROUP_INFO_IBITMAP_CORRUPT) {
1130                 ret = ext4_test_and_set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT,
1131                                             &grp->bb_state);
1132                 if (!ret && gdp) {
1133                         int count;
1134
1135                         count = ext4_free_inodes_count(sb, gdp);
1136                         percpu_counter_sub(&sbi->s_freeinodes_counter,
1137                                            count);
1138                 }
1139         }
1140 }
1141
1142 void ext4_update_dynamic_rev(struct super_block *sb)
1143 {
1144         struct ext4_super_block *es = EXT4_SB(sb)->s_es;
1145
1146         if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV)
1147                 return;
1148
1149         ext4_warning(sb,
1150                      "updating to rev %d because of new feature flag, "
1151                      "running e2fsck is recommended",
1152                      EXT4_DYNAMIC_REV);
1153
1154         es->s_first_ino = cpu_to_le32(EXT4_GOOD_OLD_FIRST_INO);
1155         es->s_inode_size = cpu_to_le16(EXT4_GOOD_OLD_INODE_SIZE);
1156         es->s_rev_level = cpu_to_le32(EXT4_DYNAMIC_REV);
1157         /* leave es->s_feature_*compat flags alone */
1158         /* es->s_uuid will be set by e2fsck if empty */
1159
1160         /*
1161          * The rest of the superblock fields should be zero, and if not it
1162          * means they are likely already in use, so leave them alone.  We
1163          * can leave it up to e2fsck to clean up any inconsistencies there.
1164          */
1165 }
1166
1167 static inline struct inode *orphan_list_entry(struct list_head *l)
1168 {
1169         return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode;
1170 }
1171
1172 static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi)
1173 {
1174         struct list_head *l;
1175
1176         ext4_msg(sb, KERN_ERR, "sb orphan head is %d",
1177                  le32_to_cpu(sbi->s_es->s_last_orphan));
1178
1179         printk(KERN_ERR "sb_info orphan list:\n");
1180         list_for_each(l, &sbi->s_orphan) {
1181                 struct inode *inode = orphan_list_entry(l);
1182                 printk(KERN_ERR "  "
1183                        "inode %s:%lu at %p: mode %o, nlink %d, next %d\n",
1184                        inode->i_sb->s_id, inode->i_ino, inode,
1185                        inode->i_mode, inode->i_nlink,
1186                        NEXT_ORPHAN(inode));
1187         }
1188 }
1189
1190 #ifdef CONFIG_QUOTA
1191 static int ext4_quota_off(struct super_block *sb, int type);
1192
1193 static inline void ext4_quotas_off(struct super_block *sb, int type)
1194 {
1195         BUG_ON(type > EXT4_MAXQUOTAS);
1196
1197         /* Use our quota_off function to clear inode flags etc. */
1198         for (type--; type >= 0; type--)
1199                 ext4_quota_off(sb, type);
1200 }
1201
1202 /*
1203  * This is a helper function which is used in the mount/remount
1204  * codepaths (which holds s_umount) to fetch the quota file name.
1205  */
1206 static inline char *get_qf_name(struct super_block *sb,
1207                                 struct ext4_sb_info *sbi,
1208                                 int type)
1209 {
1210         return rcu_dereference_protected(sbi->s_qf_names[type],
1211                                          lockdep_is_held(&sb->s_umount));
1212 }
1213 #else
1214 static inline void ext4_quotas_off(struct super_block *sb, int type)
1215 {
1216 }
1217 #endif
1218
1219 static int ext4_percpu_param_init(struct ext4_sb_info *sbi)
1220 {
1221         ext4_fsblk_t block;
1222         int err;
1223
1224         block = ext4_count_free_clusters(sbi->s_sb);
1225         ext4_free_blocks_count_set(sbi->s_es, EXT4_C2B(sbi, block));
1226         err = percpu_counter_init(&sbi->s_freeclusters_counter, block,
1227                                   GFP_KERNEL);
1228         if (!err) {
1229                 unsigned long freei = ext4_count_free_inodes(sbi->s_sb);
1230                 sbi->s_es->s_free_inodes_count = cpu_to_le32(freei);
1231                 err = percpu_counter_init(&sbi->s_freeinodes_counter, freei,
1232                                           GFP_KERNEL);
1233         }
1234         if (!err)
1235                 err = percpu_counter_init(&sbi->s_dirs_counter,
1236                                           ext4_count_dirs(sbi->s_sb), GFP_KERNEL);
1237         if (!err)
1238                 err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0,
1239                                           GFP_KERNEL);
1240         if (!err)
1241                 err = percpu_counter_init(&sbi->s_sra_exceeded_retry_limit, 0,
1242                                           GFP_KERNEL);
1243         if (!err)
1244                 err = percpu_init_rwsem(&sbi->s_writepages_rwsem);
1245
1246         if (err)
1247                 ext4_msg(sbi->s_sb, KERN_ERR, "insufficient memory");
1248
1249         return err;
1250 }
1251
1252 static void ext4_percpu_param_destroy(struct ext4_sb_info *sbi)
1253 {
1254         percpu_counter_destroy(&sbi->s_freeclusters_counter);
1255         percpu_counter_destroy(&sbi->s_freeinodes_counter);
1256         percpu_counter_destroy(&sbi->s_dirs_counter);
1257         percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
1258         percpu_counter_destroy(&sbi->s_sra_exceeded_retry_limit);
1259         percpu_free_rwsem(&sbi->s_writepages_rwsem);
1260 }
1261
1262 static void ext4_group_desc_free(struct ext4_sb_info *sbi)
1263 {
1264         struct buffer_head **group_desc;
1265         int i;
1266
1267         rcu_read_lock();
1268         group_desc = rcu_dereference(sbi->s_group_desc);
1269         for (i = 0; i < sbi->s_gdb_count; i++)
1270                 brelse(group_desc[i]);
1271         kvfree(group_desc);
1272         rcu_read_unlock();
1273 }
1274
1275 static void ext4_flex_groups_free(struct ext4_sb_info *sbi)
1276 {
1277         struct flex_groups **flex_groups;
1278         int i;
1279
1280         rcu_read_lock();
1281         flex_groups = rcu_dereference(sbi->s_flex_groups);
1282         if (flex_groups) {
1283                 for (i = 0; i < sbi->s_flex_groups_allocated; i++)
1284                         kvfree(flex_groups[i]);
1285                 kvfree(flex_groups);
1286         }
1287         rcu_read_unlock();
1288 }
1289
1290 static void ext4_put_super(struct super_block *sb)
1291 {
1292         struct ext4_sb_info *sbi = EXT4_SB(sb);
1293         struct ext4_super_block *es = sbi->s_es;
1294         int aborted = 0;
1295         int err;
1296
1297         /*
1298          * Unregister sysfs before destroying jbd2 journal.
1299          * Since we could still access attr_journal_task attribute via sysfs
1300          * path which could have sbi->s_journal->j_task as NULL
1301          * Unregister sysfs before flush sbi->s_sb_upd_work.
1302          * Since user may read /proc/fs/ext4/xx/mb_groups during umount, If
1303          * read metadata verify failed then will queue error work.
1304          * update_super_work will call start_this_handle may trigger
1305          * BUG_ON.
1306          */
1307         ext4_unregister_sysfs(sb);
1308
1309         if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs unmount"))
1310                 ext4_msg(sb, KERN_INFO, "unmounting filesystem %pU.",
1311                          &sb->s_uuid);
1312
1313         ext4_unregister_li_request(sb);
1314         ext4_quotas_off(sb, EXT4_MAXQUOTAS);
1315
1316         flush_work(&sbi->s_sb_upd_work);
1317         destroy_workqueue(sbi->rsv_conversion_wq);
1318         ext4_release_orphan_info(sb);
1319
1320         if (sbi->s_journal) {
1321                 aborted = is_journal_aborted(sbi->s_journal);
1322                 err = jbd2_journal_destroy(sbi->s_journal);
1323                 sbi->s_journal = NULL;
1324                 if ((err < 0) && !aborted) {
1325                         ext4_abort(sb, -err, "Couldn't clean up the journal");
1326                 }
1327         }
1328
1329         ext4_es_unregister_shrinker(sbi);
1330         timer_shutdown_sync(&sbi->s_err_report);
1331         ext4_release_system_zone(sb);
1332         ext4_mb_release(sb);
1333         ext4_ext_release(sb);
1334
1335         if (!sb_rdonly(sb) && !aborted) {
1336                 ext4_clear_feature_journal_needs_recovery(sb);
1337                 ext4_clear_feature_orphan_present(sb);
1338                 es->s_state = cpu_to_le16(sbi->s_mount_state);
1339         }
1340         if (!sb_rdonly(sb))
1341                 ext4_commit_super(sb);
1342
1343         ext4_group_desc_free(sbi);
1344         ext4_flex_groups_free(sbi);
1345         ext4_percpu_param_destroy(sbi);
1346 #ifdef CONFIG_QUOTA
1347         for (int i = 0; i < EXT4_MAXQUOTAS; i++)
1348                 kfree(get_qf_name(sb, sbi, i));
1349 #endif
1350
1351         /* Debugging code just in case the in-memory inode orphan list
1352          * isn't empty.  The on-disk one can be non-empty if we've
1353          * detected an error and taken the fs readonly, but the
1354          * in-memory list had better be clean by this point. */
1355         if (!list_empty(&sbi->s_orphan))
1356                 dump_orphan_list(sb, sbi);
1357         ASSERT(list_empty(&sbi->s_orphan));
1358
1359         sync_blockdev(sb->s_bdev);
1360         invalidate_bdev(sb->s_bdev);
1361         if (sbi->s_journal_bdev) {
1362                 /*
1363                  * Invalidate the journal device's buffers.  We don't want them
1364                  * floating about in memory - the physical journal device may
1365                  * hotswapped, and it breaks the `ro-after' testing code.
1366                  */
1367                 sync_blockdev(sbi->s_journal_bdev);
1368                 invalidate_bdev(sbi->s_journal_bdev);
1369         }
1370
1371         ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
1372         sbi->s_ea_inode_cache = NULL;
1373
1374         ext4_xattr_destroy_cache(sbi->s_ea_block_cache);
1375         sbi->s_ea_block_cache = NULL;
1376
1377         ext4_stop_mmpd(sbi);
1378
1379         brelse(sbi->s_sbh);
1380         sb->s_fs_info = NULL;
1381         /*
1382          * Now that we are completely done shutting down the
1383          * superblock, we need to actually destroy the kobject.
1384          */
1385         kobject_put(&sbi->s_kobj);
1386         wait_for_completion(&sbi->s_kobj_unregister);
1387         if (sbi->s_chksum_driver)
1388                 crypto_free_shash(sbi->s_chksum_driver);
1389         kfree(sbi->s_blockgroup_lock);
1390         fs_put_dax(sbi->s_daxdev, NULL);
1391         fscrypt_free_dummy_policy(&sbi->s_dummy_enc_policy);
1392 #if IS_ENABLED(CONFIG_UNICODE)
1393         utf8_unload(sb->s_encoding);
1394 #endif
1395         kfree(sbi);
1396 }
1397
1398 static struct kmem_cache *ext4_inode_cachep;
1399
1400 /*
1401  * Called inside transaction, so use GFP_NOFS
1402  */
1403 static struct inode *ext4_alloc_inode(struct super_block *sb)
1404 {
1405         struct ext4_inode_info *ei;
1406
1407         ei = alloc_inode_sb(sb, ext4_inode_cachep, GFP_NOFS);
1408         if (!ei)
1409                 return NULL;
1410
1411         inode_set_iversion(&ei->vfs_inode, 1);
1412         ei->i_flags = 0;
1413         spin_lock_init(&ei->i_raw_lock);
1414         ei->i_prealloc_node = RB_ROOT;
1415         atomic_set(&ei->i_prealloc_active, 0);
1416         rwlock_init(&ei->i_prealloc_lock);
1417         ext4_es_init_tree(&ei->i_es_tree);
1418         rwlock_init(&ei->i_es_lock);
1419         INIT_LIST_HEAD(&ei->i_es_list);
1420         ei->i_es_all_nr = 0;
1421         ei->i_es_shk_nr = 0;
1422         ei->i_es_shrink_lblk = 0;
1423         ei->i_reserved_data_blocks = 0;
1424         spin_lock_init(&(ei->i_block_reservation_lock));
1425         ext4_init_pending_tree(&ei->i_pending_tree);
1426 #ifdef CONFIG_QUOTA
1427         ei->i_reserved_quota = 0;
1428         memset(&ei->i_dquot, 0, sizeof(ei->i_dquot));
1429 #endif
1430         ei->jinode = NULL;
1431         INIT_LIST_HEAD(&ei->i_rsv_conversion_list);
1432         spin_lock_init(&ei->i_completed_io_lock);
1433         ei->i_sync_tid = 0;
1434         ei->i_datasync_tid = 0;
1435         atomic_set(&ei->i_unwritten, 0);
1436         INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work);
1437         ext4_fc_init_inode(&ei->vfs_inode);
1438         mutex_init(&ei->i_fc_lock);
1439         return &ei->vfs_inode;
1440 }
1441
1442 static int ext4_drop_inode(struct inode *inode)
1443 {
1444         int drop = generic_drop_inode(inode);
1445
1446         if (!drop)
1447                 drop = fscrypt_drop_inode(inode);
1448
1449         trace_ext4_drop_inode(inode, drop);
1450         return drop;
1451 }
1452
1453 static void ext4_free_in_core_inode(struct inode *inode)
1454 {
1455         fscrypt_free_inode(inode);
1456         if (!list_empty(&(EXT4_I(inode)->i_fc_list))) {
1457                 pr_warn("%s: inode %ld still in fc list",
1458                         __func__, inode->i_ino);
1459         }
1460         kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
1461 }
1462
1463 static void ext4_destroy_inode(struct inode *inode)
1464 {
1465         if (!list_empty(&(EXT4_I(inode)->i_orphan))) {
1466                 ext4_msg(inode->i_sb, KERN_ERR,
1467                          "Inode %lu (%p): orphan list check failed!",
1468                          inode->i_ino, EXT4_I(inode));
1469                 print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4,
1470                                 EXT4_I(inode), sizeof(struct ext4_inode_info),
1471                                 true);
1472                 dump_stack();
1473         }
1474
1475         if (EXT4_I(inode)->i_reserved_data_blocks)
1476                 ext4_msg(inode->i_sb, KERN_ERR,
1477                          "Inode %lu (%p): i_reserved_data_blocks (%u) not cleared!",
1478                          inode->i_ino, EXT4_I(inode),
1479                          EXT4_I(inode)->i_reserved_data_blocks);
1480 }
1481
1482 static void ext4_shutdown(struct super_block *sb)
1483 {
1484        ext4_force_shutdown(sb, EXT4_GOING_FLAGS_NOLOGFLUSH);
1485 }
1486
1487 static void init_once(void *foo)
1488 {
1489         struct ext4_inode_info *ei = foo;
1490
1491         INIT_LIST_HEAD(&ei->i_orphan);
1492         init_rwsem(&ei->xattr_sem);
1493         init_rwsem(&ei->i_data_sem);
1494         inode_init_once(&ei->vfs_inode);
1495         ext4_fc_init_inode(&ei->vfs_inode);
1496 }
1497
1498 static int __init init_inodecache(void)
1499 {
1500         ext4_inode_cachep = kmem_cache_create_usercopy("ext4_inode_cache",
1501                                 sizeof(struct ext4_inode_info), 0,
1502                                 (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|
1503                                         SLAB_ACCOUNT),
1504                                 offsetof(struct ext4_inode_info, i_data),
1505                                 sizeof_field(struct ext4_inode_info, i_data),
1506                                 init_once);
1507         if (ext4_inode_cachep == NULL)
1508                 return -ENOMEM;
1509         return 0;
1510 }
1511
1512 static void destroy_inodecache(void)
1513 {
1514         /*
1515          * Make sure all delayed rcu free inodes are flushed before we
1516          * destroy cache.
1517          */
1518         rcu_barrier();
1519         kmem_cache_destroy(ext4_inode_cachep);
1520 }
1521
1522 void ext4_clear_inode(struct inode *inode)
1523 {
1524         ext4_fc_del(inode);
1525         invalidate_inode_buffers(inode);
1526         clear_inode(inode);
1527         ext4_discard_preallocations(inode, 0);
1528         ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
1529         dquot_drop(inode);
1530         if (EXT4_I(inode)->jinode) {
1531                 jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode),
1532                                                EXT4_I(inode)->jinode);
1533                 jbd2_free_inode(EXT4_I(inode)->jinode);
1534                 EXT4_I(inode)->jinode = NULL;
1535         }
1536         fscrypt_put_encryption_info(inode);
1537         fsverity_cleanup_inode(inode);
1538 }
1539
1540 static struct inode *ext4_nfs_get_inode(struct super_block *sb,
1541                                         u64 ino, u32 generation)
1542 {
1543         struct inode *inode;
1544
1545         /*
1546          * Currently we don't know the generation for parent directory, so
1547          * a generation of 0 means "accept any"
1548          */
1549         inode = ext4_iget(sb, ino, EXT4_IGET_HANDLE);
1550         if (IS_ERR(inode))
1551                 return ERR_CAST(inode);
1552         if (generation && inode->i_generation != generation) {
1553                 iput(inode);
1554                 return ERR_PTR(-ESTALE);
1555         }
1556
1557         return inode;
1558 }
1559
1560 static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid,
1561                                         int fh_len, int fh_type)
1562 {
1563         return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
1564                                     ext4_nfs_get_inode);
1565 }
1566
1567 static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid,
1568                                         int fh_len, int fh_type)
1569 {
1570         return generic_fh_to_parent(sb, fid, fh_len, fh_type,
1571                                     ext4_nfs_get_inode);
1572 }
1573
1574 static int ext4_nfs_commit_metadata(struct inode *inode)
1575 {
1576         struct writeback_control wbc = {
1577                 .sync_mode = WB_SYNC_ALL
1578         };
1579
1580         trace_ext4_nfs_commit_metadata(inode);
1581         return ext4_write_inode(inode, &wbc);
1582 }
1583
1584 #ifdef CONFIG_QUOTA
1585 static const char * const quotatypes[] = INITQFNAMES;
1586 #define QTYPE2NAME(t) (quotatypes[t])
1587
1588 static int ext4_write_dquot(struct dquot *dquot);
1589 static int ext4_acquire_dquot(struct dquot *dquot);
1590 static int ext4_release_dquot(struct dquot *dquot);
1591 static int ext4_mark_dquot_dirty(struct dquot *dquot);
1592 static int ext4_write_info(struct super_block *sb, int type);
1593 static int ext4_quota_on(struct super_block *sb, int type, int format_id,
1594                          const struct path *path);
1595 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
1596                                size_t len, loff_t off);
1597 static ssize_t ext4_quota_write(struct super_block *sb, int type,
1598                                 const char *data, size_t len, loff_t off);
1599 static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
1600                              unsigned int flags);
1601
1602 static struct dquot **ext4_get_dquots(struct inode *inode)
1603 {
1604         return EXT4_I(inode)->i_dquot;
1605 }
1606
1607 static const struct dquot_operations ext4_quota_operations = {
1608         .get_reserved_space     = ext4_get_reserved_space,
1609         .write_dquot            = ext4_write_dquot,
1610         .acquire_dquot          = ext4_acquire_dquot,
1611         .release_dquot          = ext4_release_dquot,
1612         .mark_dirty             = ext4_mark_dquot_dirty,
1613         .write_info             = ext4_write_info,
1614         .alloc_dquot            = dquot_alloc,
1615         .destroy_dquot          = dquot_destroy,
1616         .get_projid             = ext4_get_projid,
1617         .get_inode_usage        = ext4_get_inode_usage,
1618         .get_next_id            = dquot_get_next_id,
1619 };
1620
1621 static const struct quotactl_ops ext4_qctl_operations = {
1622         .quota_on       = ext4_quota_on,
1623         .quota_off      = ext4_quota_off,
1624         .quota_sync     = dquot_quota_sync,
1625         .get_state      = dquot_get_state,
1626         .set_info       = dquot_set_dqinfo,
1627         .get_dqblk      = dquot_get_dqblk,
1628         .set_dqblk      = dquot_set_dqblk,
1629         .get_nextdqblk  = dquot_get_next_dqblk,
1630 };
1631 #endif
1632
1633 static const struct super_operations ext4_sops = {
1634         .alloc_inode    = ext4_alloc_inode,
1635         .free_inode     = ext4_free_in_core_inode,
1636         .destroy_inode  = ext4_destroy_inode,
1637         .write_inode    = ext4_write_inode,
1638         .dirty_inode    = ext4_dirty_inode,
1639         .drop_inode     = ext4_drop_inode,
1640         .evict_inode    = ext4_evict_inode,
1641         .put_super      = ext4_put_super,
1642         .sync_fs        = ext4_sync_fs,
1643         .freeze_fs      = ext4_freeze,
1644         .unfreeze_fs    = ext4_unfreeze,
1645         .statfs         = ext4_statfs,
1646         .show_options   = ext4_show_options,
1647         .shutdown       = ext4_shutdown,
1648 #ifdef CONFIG_QUOTA
1649         .quota_read     = ext4_quota_read,
1650         .quota_write    = ext4_quota_write,
1651         .get_dquots     = ext4_get_dquots,
1652 #endif
1653 };
1654
1655 static const struct export_operations ext4_export_ops = {
1656         .fh_to_dentry = ext4_fh_to_dentry,
1657         .fh_to_parent = ext4_fh_to_parent,
1658         .get_parent = ext4_get_parent,
1659         .commit_metadata = ext4_nfs_commit_metadata,
1660 };
1661
1662 enum {
1663         Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
1664         Opt_resgid, Opt_resuid, Opt_sb,
1665         Opt_nouid32, Opt_debug, Opt_removed,
1666         Opt_user_xattr, Opt_acl,
1667         Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload,
1668         Opt_commit, Opt_min_batch_time, Opt_max_batch_time, Opt_journal_dev,
1669         Opt_journal_path, Opt_journal_checksum, Opt_journal_async_commit,
1670         Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
1671         Opt_data_err_abort, Opt_data_err_ignore, Opt_test_dummy_encryption,
1672         Opt_inlinecrypt,
1673         Opt_usrjquota, Opt_grpjquota, Opt_quota,
1674         Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
1675         Opt_usrquota, Opt_grpquota, Opt_prjquota,
1676         Opt_dax, Opt_dax_always, Opt_dax_inode, Opt_dax_never,
1677         Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_warn_on_error,
1678         Opt_nowarn_on_error, Opt_mblk_io_submit, Opt_debug_want_extra_isize,
1679         Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
1680         Opt_inode_readahead_blks, Opt_journal_ioprio,
1681         Opt_dioread_nolock, Opt_dioread_lock,
1682         Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
1683         Opt_max_dir_size_kb, Opt_nojournal_checksum, Opt_nombcache,
1684         Opt_no_prefetch_block_bitmaps, Opt_mb_optimize_scan,
1685         Opt_errors, Opt_data, Opt_data_err, Opt_jqfmt, Opt_dax_type,
1686 #ifdef CONFIG_EXT4_DEBUG
1687         Opt_fc_debug_max_replay, Opt_fc_debug_force
1688 #endif
1689 };
1690
1691 static const struct constant_table ext4_param_errors[] = {
1692         {"continue",    EXT4_MOUNT_ERRORS_CONT},
1693         {"panic",       EXT4_MOUNT_ERRORS_PANIC},
1694         {"remount-ro",  EXT4_MOUNT_ERRORS_RO},
1695         {}
1696 };
1697
1698 static const struct constant_table ext4_param_data[] = {
1699         {"journal",     EXT4_MOUNT_JOURNAL_DATA},
1700         {"ordered",     EXT4_MOUNT_ORDERED_DATA},
1701         {"writeback",   EXT4_MOUNT_WRITEBACK_DATA},
1702         {}
1703 };
1704
1705 static const struct constant_table ext4_param_data_err[] = {
1706         {"abort",       Opt_data_err_abort},
1707         {"ignore",      Opt_data_err_ignore},
1708         {}
1709 };
1710
1711 static const struct constant_table ext4_param_jqfmt[] = {
1712         {"vfsold",      QFMT_VFS_OLD},
1713         {"vfsv0",       QFMT_VFS_V0},
1714         {"vfsv1",       QFMT_VFS_V1},
1715         {}
1716 };
1717
1718 static const struct constant_table ext4_param_dax[] = {
1719         {"always",      Opt_dax_always},
1720         {"inode",       Opt_dax_inode},
1721         {"never",       Opt_dax_never},
1722         {}
1723 };
1724
1725 /* String parameter that allows empty argument */
1726 #define fsparam_string_empty(NAME, OPT) \
1727         __fsparam(fs_param_is_string, NAME, OPT, fs_param_can_be_empty, NULL)
1728
1729 /*
1730  * Mount option specification
1731  * We don't use fsparam_flag_no because of the way we set the
1732  * options and the way we show them in _ext4_show_options(). To
1733  * keep the changes to a minimum, let's keep the negative options
1734  * separate for now.
1735  */
1736 static const struct fs_parameter_spec ext4_param_specs[] = {
1737         fsparam_flag    ("bsddf",               Opt_bsd_df),
1738         fsparam_flag    ("minixdf",             Opt_minix_df),
1739         fsparam_flag    ("grpid",               Opt_grpid),
1740         fsparam_flag    ("bsdgroups",           Opt_grpid),
1741         fsparam_flag    ("nogrpid",             Opt_nogrpid),
1742         fsparam_flag    ("sysvgroups",          Opt_nogrpid),
1743         fsparam_u32     ("resgid",              Opt_resgid),
1744         fsparam_u32     ("resuid",              Opt_resuid),
1745         fsparam_u32     ("sb",                  Opt_sb),
1746         fsparam_enum    ("errors",              Opt_errors, ext4_param_errors),
1747         fsparam_flag    ("nouid32",             Opt_nouid32),
1748         fsparam_flag    ("debug",               Opt_debug),
1749         fsparam_flag    ("oldalloc",            Opt_removed),
1750         fsparam_flag    ("orlov",               Opt_removed),
1751         fsparam_flag    ("user_xattr",          Opt_user_xattr),
1752         fsparam_flag    ("acl",                 Opt_acl),
1753         fsparam_flag    ("norecovery",          Opt_noload),
1754         fsparam_flag    ("noload",              Opt_noload),
1755         fsparam_flag    ("bh",                  Opt_removed),
1756         fsparam_flag    ("nobh",                Opt_removed),
1757         fsparam_u32     ("commit",              Opt_commit),
1758         fsparam_u32     ("min_batch_time",      Opt_min_batch_time),
1759         fsparam_u32     ("max_batch_time",      Opt_max_batch_time),
1760         fsparam_u32     ("journal_dev",         Opt_journal_dev),
1761         fsparam_bdev    ("journal_path",        Opt_journal_path),
1762         fsparam_flag    ("journal_checksum",    Opt_journal_checksum),
1763         fsparam_flag    ("nojournal_checksum",  Opt_nojournal_checksum),
1764         fsparam_flag    ("journal_async_commit",Opt_journal_async_commit),
1765         fsparam_flag    ("abort",               Opt_abort),
1766         fsparam_enum    ("data",                Opt_data, ext4_param_data),
1767         fsparam_enum    ("data_err",            Opt_data_err,
1768                                                 ext4_param_data_err),
1769         fsparam_string_empty
1770                         ("usrjquota",           Opt_usrjquota),
1771         fsparam_string_empty
1772                         ("grpjquota",           Opt_grpjquota),
1773         fsparam_enum    ("jqfmt",               Opt_jqfmt, ext4_param_jqfmt),
1774         fsparam_flag    ("grpquota",            Opt_grpquota),
1775         fsparam_flag    ("quota",               Opt_quota),
1776         fsparam_flag    ("noquota",             Opt_noquota),
1777         fsparam_flag    ("usrquota",            Opt_usrquota),
1778         fsparam_flag    ("prjquota",            Opt_prjquota),
1779         fsparam_flag    ("barrier",             Opt_barrier),
1780         fsparam_u32     ("barrier",             Opt_barrier),
1781         fsparam_flag    ("nobarrier",           Opt_nobarrier),
1782         fsparam_flag    ("i_version",           Opt_removed),
1783         fsparam_flag    ("dax",                 Opt_dax),
1784         fsparam_enum    ("dax",                 Opt_dax_type, ext4_param_dax),
1785         fsparam_u32     ("stripe",              Opt_stripe),
1786         fsparam_flag    ("delalloc",            Opt_delalloc),
1787         fsparam_flag    ("nodelalloc",          Opt_nodelalloc),
1788         fsparam_flag    ("warn_on_error",       Opt_warn_on_error),
1789         fsparam_flag    ("nowarn_on_error",     Opt_nowarn_on_error),
1790         fsparam_u32     ("debug_want_extra_isize",
1791                                                 Opt_debug_want_extra_isize),
1792         fsparam_flag    ("mblk_io_submit",      Opt_removed),
1793         fsparam_flag    ("nomblk_io_submit",    Opt_removed),
1794         fsparam_flag    ("block_validity",      Opt_block_validity),
1795         fsparam_flag    ("noblock_validity",    Opt_noblock_validity),
1796         fsparam_u32     ("inode_readahead_blks",
1797                                                 Opt_inode_readahead_blks),
1798         fsparam_u32     ("journal_ioprio",      Opt_journal_ioprio),
1799         fsparam_u32     ("auto_da_alloc",       Opt_auto_da_alloc),
1800         fsparam_flag    ("auto_da_alloc",       Opt_auto_da_alloc),
1801         fsparam_flag    ("noauto_da_alloc",     Opt_noauto_da_alloc),
1802         fsparam_flag    ("dioread_nolock",      Opt_dioread_nolock),
1803         fsparam_flag    ("nodioread_nolock",    Opt_dioread_lock),
1804         fsparam_flag    ("dioread_lock",        Opt_dioread_lock),
1805         fsparam_flag    ("discard",             Opt_discard),
1806         fsparam_flag    ("nodiscard",           Opt_nodiscard),
1807         fsparam_u32     ("init_itable",         Opt_init_itable),
1808         fsparam_flag    ("init_itable",         Opt_init_itable),
1809         fsparam_flag    ("noinit_itable",       Opt_noinit_itable),
1810 #ifdef CONFIG_EXT4_DEBUG
1811         fsparam_flag    ("fc_debug_force",      Opt_fc_debug_force),
1812         fsparam_u32     ("fc_debug_max_replay", Opt_fc_debug_max_replay),
1813 #endif
1814         fsparam_u32     ("max_dir_size_kb",     Opt_max_dir_size_kb),
1815         fsparam_flag    ("test_dummy_encryption",
1816                                                 Opt_test_dummy_encryption),
1817         fsparam_string  ("test_dummy_encryption",
1818                                                 Opt_test_dummy_encryption),
1819         fsparam_flag    ("inlinecrypt",         Opt_inlinecrypt),
1820         fsparam_flag    ("nombcache",           Opt_nombcache),
1821         fsparam_flag    ("no_mbcache",          Opt_nombcache), /* for backward compatibility */
1822         fsparam_flag    ("prefetch_block_bitmaps",
1823                                                 Opt_removed),
1824         fsparam_flag    ("no_prefetch_block_bitmaps",
1825                                                 Opt_no_prefetch_block_bitmaps),
1826         fsparam_s32     ("mb_optimize_scan",    Opt_mb_optimize_scan),
1827         fsparam_string  ("check",               Opt_removed),   /* mount option from ext2/3 */
1828         fsparam_flag    ("nocheck",             Opt_removed),   /* mount option from ext2/3 */
1829         fsparam_flag    ("reservation",         Opt_removed),   /* mount option from ext2/3 */
1830         fsparam_flag    ("noreservation",       Opt_removed),   /* mount option from ext2/3 */
1831         fsparam_u32     ("journal",             Opt_removed),   /* mount option from ext2/3 */
1832         {}
1833 };
1834
1835 #define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3))
1836
1837 #define MOPT_SET        0x0001
1838 #define MOPT_CLEAR      0x0002
1839 #define MOPT_NOSUPPORT  0x0004
1840 #define MOPT_EXPLICIT   0x0008
1841 #ifdef CONFIG_QUOTA
1842 #define MOPT_Q          0
1843 #define MOPT_QFMT       0x0010
1844 #else
1845 #define MOPT_Q          MOPT_NOSUPPORT
1846 #define MOPT_QFMT       MOPT_NOSUPPORT
1847 #endif
1848 #define MOPT_NO_EXT2    0x0020
1849 #define MOPT_NO_EXT3    0x0040
1850 #define MOPT_EXT4_ONLY  (MOPT_NO_EXT2 | MOPT_NO_EXT3)
1851 #define MOPT_SKIP       0x0080
1852 #define MOPT_2          0x0100
1853
1854 static const struct mount_opts {
1855         int     token;
1856         int     mount_opt;
1857         int     flags;
1858 } ext4_mount_opts[] = {
1859         {Opt_minix_df, EXT4_MOUNT_MINIX_DF, MOPT_SET},
1860         {Opt_bsd_df, EXT4_MOUNT_MINIX_DF, MOPT_CLEAR},
1861         {Opt_grpid, EXT4_MOUNT_GRPID, MOPT_SET},
1862         {Opt_nogrpid, EXT4_MOUNT_GRPID, MOPT_CLEAR},
1863         {Opt_block_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_SET},
1864         {Opt_noblock_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_CLEAR},
1865         {Opt_dioread_nolock, EXT4_MOUNT_DIOREAD_NOLOCK,
1866          MOPT_EXT4_ONLY | MOPT_SET},
1867         {Opt_dioread_lock, EXT4_MOUNT_DIOREAD_NOLOCK,
1868          MOPT_EXT4_ONLY | MOPT_CLEAR},
1869         {Opt_discard, EXT4_MOUNT_DISCARD, MOPT_SET},
1870         {Opt_nodiscard, EXT4_MOUNT_DISCARD, MOPT_CLEAR},
1871         {Opt_delalloc, EXT4_MOUNT_DELALLOC,
1872          MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT},
1873         {Opt_nodelalloc, EXT4_MOUNT_DELALLOC,
1874          MOPT_EXT4_ONLY | MOPT_CLEAR},
1875         {Opt_warn_on_error, EXT4_MOUNT_WARN_ON_ERROR, MOPT_SET},
1876         {Opt_nowarn_on_error, EXT4_MOUNT_WARN_ON_ERROR, MOPT_CLEAR},
1877         {Opt_commit, 0, MOPT_NO_EXT2},
1878         {Opt_nojournal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
1879          MOPT_EXT4_ONLY | MOPT_CLEAR},
1880         {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
1881          MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT},
1882         {Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT |
1883                                     EXT4_MOUNT_JOURNAL_CHECKSUM),
1884          MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT},
1885         {Opt_noload, EXT4_MOUNT_NOLOAD, MOPT_NO_EXT2 | MOPT_SET},
1886         {Opt_data_err, EXT4_MOUNT_DATA_ERR_ABORT, MOPT_NO_EXT2},
1887         {Opt_barrier, EXT4_MOUNT_BARRIER, MOPT_SET},
1888         {Opt_nobarrier, EXT4_MOUNT_BARRIER, MOPT_CLEAR},
1889         {Opt_noauto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_SET},
1890         {Opt_auto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_CLEAR},
1891         {Opt_noinit_itable, EXT4_MOUNT_INIT_INODE_TABLE, MOPT_CLEAR},
1892         {Opt_dax_type, 0, MOPT_EXT4_ONLY},
1893         {Opt_journal_dev, 0, MOPT_NO_EXT2},
1894         {Opt_journal_path, 0, MOPT_NO_EXT2},
1895         {Opt_journal_ioprio, 0, MOPT_NO_EXT2},
1896         {Opt_data, 0, MOPT_NO_EXT2},
1897         {Opt_user_xattr, EXT4_MOUNT_XATTR_USER, MOPT_SET},
1898 #ifdef CONFIG_EXT4_FS_POSIX_ACL
1899         {Opt_acl, EXT4_MOUNT_POSIX_ACL, MOPT_SET},
1900 #else
1901         {Opt_acl, 0, MOPT_NOSUPPORT},
1902 #endif
1903         {Opt_nouid32, EXT4_MOUNT_NO_UID32, MOPT_SET},
1904         {Opt_debug, EXT4_MOUNT_DEBUG, MOPT_SET},
1905         {Opt_quota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA, MOPT_SET | MOPT_Q},
1906         {Opt_usrquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA,
1907                                                         MOPT_SET | MOPT_Q},
1908         {Opt_grpquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_GRPQUOTA,
1909                                                         MOPT_SET | MOPT_Q},
1910         {Opt_prjquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_PRJQUOTA,
1911                                                         MOPT_SET | MOPT_Q},
1912         {Opt_noquota, (EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA |
1913                        EXT4_MOUNT_GRPQUOTA | EXT4_MOUNT_PRJQUOTA),
1914                                                         MOPT_CLEAR | MOPT_Q},
1915         {Opt_usrjquota, 0, MOPT_Q},
1916         {Opt_grpjquota, 0, MOPT_Q},
1917         {Opt_jqfmt, 0, MOPT_QFMT},
1918         {Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET},
1919         {Opt_no_prefetch_block_bitmaps, EXT4_MOUNT_NO_PREFETCH_BLOCK_BITMAPS,
1920          MOPT_SET},
1921 #ifdef CONFIG_EXT4_DEBUG
1922         {Opt_fc_debug_force, EXT4_MOUNT2_JOURNAL_FAST_COMMIT,
1923          MOPT_SET | MOPT_2 | MOPT_EXT4_ONLY},
1924 #endif
1925         {Opt_abort, EXT4_MOUNT2_ABORT, MOPT_SET | MOPT_2},
1926         {Opt_err, 0, 0}
1927 };
1928
1929 #if IS_ENABLED(CONFIG_UNICODE)
1930 static const struct ext4_sb_encodings {
1931         __u16 magic;
1932         char *name;
1933         unsigned int version;
1934 } ext4_sb_encoding_map[] = {
1935         {EXT4_ENC_UTF8_12_1, "utf8", UNICODE_AGE(12, 1, 0)},
1936 };
1937
1938 static const struct ext4_sb_encodings *
1939 ext4_sb_read_encoding(const struct ext4_super_block *es)
1940 {
1941         __u16 magic = le16_to_cpu(es->s_encoding);
1942         int i;
1943
1944         for (i = 0; i < ARRAY_SIZE(ext4_sb_encoding_map); i++)
1945                 if (magic == ext4_sb_encoding_map[i].magic)
1946                         return &ext4_sb_encoding_map[i];
1947
1948         return NULL;
1949 }
1950 #endif
1951
1952 #define EXT4_SPEC_JQUOTA                        (1 <<  0)
1953 #define EXT4_SPEC_JQFMT                         (1 <<  1)
1954 #define EXT4_SPEC_DATAJ                         (1 <<  2)
1955 #define EXT4_SPEC_SB_BLOCK                      (1 <<  3)
1956 #define EXT4_SPEC_JOURNAL_DEV                   (1 <<  4)
1957 #define EXT4_SPEC_JOURNAL_IOPRIO                (1 <<  5)
1958 #define EXT4_SPEC_s_want_extra_isize            (1 <<  7)
1959 #define EXT4_SPEC_s_max_batch_time              (1 <<  8)
1960 #define EXT4_SPEC_s_min_batch_time              (1 <<  9)
1961 #define EXT4_SPEC_s_inode_readahead_blks        (1 << 10)
1962 #define EXT4_SPEC_s_li_wait_mult                (1 << 11)
1963 #define EXT4_SPEC_s_max_dir_size_kb             (1 << 12)
1964 #define EXT4_SPEC_s_stripe                      (1 << 13)
1965 #define EXT4_SPEC_s_resuid                      (1 << 14)
1966 #define EXT4_SPEC_s_resgid                      (1 << 15)
1967 #define EXT4_SPEC_s_commit_interval             (1 << 16)
1968 #define EXT4_SPEC_s_fc_debug_max_replay         (1 << 17)
1969 #define EXT4_SPEC_s_sb_block                    (1 << 18)
1970 #define EXT4_SPEC_mb_optimize_scan              (1 << 19)
1971
1972 struct ext4_fs_context {
1973         char            *s_qf_names[EXT4_MAXQUOTAS];
1974         struct fscrypt_dummy_policy dummy_enc_policy;
1975         int             s_jquota_fmt;   /* Format of quota to use */
1976 #ifdef CONFIG_EXT4_DEBUG
1977         int s_fc_debug_max_replay;
1978 #endif
1979         unsigned short  qname_spec;
1980         unsigned long   vals_s_flags;   /* Bits to set in s_flags */
1981         unsigned long   mask_s_flags;   /* Bits changed in s_flags */
1982         unsigned long   journal_devnum;
1983         unsigned long   s_commit_interval;
1984         unsigned long   s_stripe;
1985         unsigned int    s_inode_readahead_blks;
1986         unsigned int    s_want_extra_isize;
1987         unsigned int    s_li_wait_mult;
1988         unsigned int    s_max_dir_size_kb;
1989         unsigned int    journal_ioprio;
1990         unsigned int    vals_s_mount_opt;
1991         unsigned int    mask_s_mount_opt;
1992         unsigned int    vals_s_mount_opt2;
1993         unsigned int    mask_s_mount_opt2;
1994         unsigned int    opt_flags;      /* MOPT flags */
1995         unsigned int    spec;
1996         u32             s_max_batch_time;
1997         u32             s_min_batch_time;
1998         kuid_t          s_resuid;
1999         kgid_t          s_resgid;
2000         ext4_fsblk_t    s_sb_block;
2001 };
2002
2003 static void ext4_fc_free(struct fs_context *fc)
2004 {
2005         struct ext4_fs_context *ctx = fc->fs_private;
2006         int i;
2007
2008         if (!ctx)
2009                 return;
2010
2011         for (i = 0; i < EXT4_MAXQUOTAS; i++)
2012                 kfree(ctx->s_qf_names[i]);
2013
2014         fscrypt_free_dummy_policy(&ctx->dummy_enc_policy);
2015         kfree(ctx);
2016 }
2017
2018 int ext4_init_fs_context(struct fs_context *fc)
2019 {
2020         struct ext4_fs_context *ctx;
2021
2022         ctx = kzalloc(sizeof(struct ext4_fs_context), GFP_KERNEL);
2023         if (!ctx)
2024                 return -ENOMEM;
2025
2026         fc->fs_private = ctx;
2027         fc->ops = &ext4_context_ops;
2028
2029         return 0;
2030 }
2031
2032 #ifdef CONFIG_QUOTA
2033 /*
2034  * Note the name of the specified quota file.
2035  */
2036 static int note_qf_name(struct fs_context *fc, int qtype,
2037                        struct fs_parameter *param)
2038 {
2039         struct ext4_fs_context *ctx = fc->fs_private;
2040         char *qname;
2041
2042         if (param->size < 1) {
2043                 ext4_msg(NULL, KERN_ERR, "Missing quota name");
2044                 return -EINVAL;
2045         }
2046         if (strchr(param->string, '/')) {
2047                 ext4_msg(NULL, KERN_ERR,
2048                          "quotafile must be on filesystem root");
2049                 return -EINVAL;
2050         }
2051         if (ctx->s_qf_names[qtype]) {
2052                 if (strcmp(ctx->s_qf_names[qtype], param->string) != 0) {
2053                         ext4_msg(NULL, KERN_ERR,
2054                                  "%s quota file already specified",
2055                                  QTYPE2NAME(qtype));
2056                         return -EINVAL;
2057                 }
2058                 return 0;
2059         }
2060
2061         qname = kmemdup_nul(param->string, param->size, GFP_KERNEL);
2062         if (!qname) {
2063                 ext4_msg(NULL, KERN_ERR,
2064                          "Not enough memory for storing quotafile name");
2065                 return -ENOMEM;
2066         }
2067         ctx->s_qf_names[qtype] = qname;
2068         ctx->qname_spec |= 1 << qtype;
2069         ctx->spec |= EXT4_SPEC_JQUOTA;
2070         return 0;
2071 }
2072
2073 /*
2074  * Clear the name of the specified quota file.
2075  */
2076 static int unnote_qf_name(struct fs_context *fc, int qtype)
2077 {
2078         struct ext4_fs_context *ctx = fc->fs_private;
2079
2080         if (ctx->s_qf_names[qtype])
2081                 kfree(ctx->s_qf_names[qtype]);
2082
2083         ctx->s_qf_names[qtype] = NULL;
2084         ctx->qname_spec |= 1 << qtype;
2085         ctx->spec |= EXT4_SPEC_JQUOTA;
2086         return 0;
2087 }
2088 #endif
2089
2090 static int ext4_parse_test_dummy_encryption(const struct fs_parameter *param,
2091                                             struct ext4_fs_context *ctx)
2092 {
2093         int err;
2094
2095         if (!IS_ENABLED(CONFIG_FS_ENCRYPTION)) {
2096                 ext4_msg(NULL, KERN_WARNING,
2097                          "test_dummy_encryption option not supported");
2098                 return -EINVAL;
2099         }
2100         err = fscrypt_parse_test_dummy_encryption(param,
2101                                                   &ctx->dummy_enc_policy);
2102         if (err == -EINVAL) {
2103                 ext4_msg(NULL, KERN_WARNING,
2104                          "Value of option \"%s\" is unrecognized", param->key);
2105         } else if (err == -EEXIST) {
2106                 ext4_msg(NULL, KERN_WARNING,
2107                          "Conflicting test_dummy_encryption options");
2108                 return -EINVAL;
2109         }
2110         return err;
2111 }
2112
2113 #define EXT4_SET_CTX(name)                                              \
2114 static inline void ctx_set_##name(struct ext4_fs_context *ctx,          \
2115                                   unsigned long flag)                   \
2116 {                                                                       \
2117         ctx->mask_s_##name |= flag;                                     \
2118         ctx->vals_s_##name |= flag;                                     \
2119 }
2120
2121 #define EXT4_CLEAR_CTX(name)                                            \
2122 static inline void ctx_clear_##name(struct ext4_fs_context *ctx,        \
2123                                     unsigned long flag)                 \
2124 {                                                                       \
2125         ctx->mask_s_##name |= flag;                                     \
2126         ctx->vals_s_##name &= ~flag;                                    \
2127 }
2128
2129 #define EXT4_TEST_CTX(name)                                             \
2130 static inline unsigned long                                             \
2131 ctx_test_##name(struct ext4_fs_context *ctx, unsigned long flag)        \
2132 {                                                                       \
2133         return (ctx->vals_s_##name & flag);                             \
2134 }
2135
2136 EXT4_SET_CTX(flags); /* set only */
2137 EXT4_SET_CTX(mount_opt);
2138 EXT4_CLEAR_CTX(mount_opt);
2139 EXT4_TEST_CTX(mount_opt);
2140 EXT4_SET_CTX(mount_opt2);
2141 EXT4_CLEAR_CTX(mount_opt2);
2142 EXT4_TEST_CTX(mount_opt2);
2143
2144 static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param)
2145 {
2146         struct ext4_fs_context *ctx = fc->fs_private;
2147         struct fs_parse_result result;
2148         const struct mount_opts *m;
2149         int is_remount;
2150         kuid_t uid;
2151         kgid_t gid;
2152         int token;
2153
2154         token = fs_parse(fc, ext4_param_specs, param, &result);
2155         if (token < 0)
2156                 return token;
2157         is_remount = fc->purpose == FS_CONTEXT_FOR_RECONFIGURE;
2158
2159         for (m = ext4_mount_opts; m->token != Opt_err; m++)
2160                 if (token == m->token)
2161                         break;
2162
2163         ctx->opt_flags |= m->flags;
2164
2165         if (m->flags & MOPT_EXPLICIT) {
2166                 if (m->mount_opt & EXT4_MOUNT_DELALLOC) {
2167                         ctx_set_mount_opt2(ctx, EXT4_MOUNT2_EXPLICIT_DELALLOC);
2168                 } else if (m->mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) {
2169                         ctx_set_mount_opt2(ctx,
2170                                        EXT4_MOUNT2_EXPLICIT_JOURNAL_CHECKSUM);
2171                 } else
2172                         return -EINVAL;
2173         }
2174
2175         if (m->flags & MOPT_NOSUPPORT) {
2176                 ext4_msg(NULL, KERN_ERR, "%s option not supported",
2177                          param->key);
2178                 return 0;
2179         }
2180
2181         switch (token) {
2182 #ifdef CONFIG_QUOTA
2183         case Opt_usrjquota:
2184                 if (!*param->string)
2185                         return unnote_qf_name(fc, USRQUOTA);
2186                 else
2187                         return note_qf_name(fc, USRQUOTA, param);
2188         case Opt_grpjquota:
2189                 if (!*param->string)
2190                         return unnote_qf_name(fc, GRPQUOTA);
2191                 else
2192                         return note_qf_name(fc, GRPQUOTA, param);
2193 #endif
2194         case Opt_sb:
2195                 if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
2196                         ext4_msg(NULL, KERN_WARNING,
2197                                  "Ignoring %s option on remount", param->key);
2198                 } else {
2199                         ctx->s_sb_block = result.uint_32;
2200                         ctx->spec |= EXT4_SPEC_s_sb_block;
2201                 }
2202                 return 0;
2203         case Opt_removed:
2204                 ext4_msg(NULL, KERN_WARNING, "Ignoring removed %s option",
2205                          param->key);
2206                 return 0;
2207         case Opt_inlinecrypt:
2208 #ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT
2209                 ctx_set_flags(ctx, SB_INLINECRYPT);
2210 #else
2211                 ext4_msg(NULL, KERN_ERR, "inline encryption not supported");
2212 #endif
2213                 return 0;
2214         case Opt_errors:
2215                 ctx_clear_mount_opt(ctx, EXT4_MOUNT_ERRORS_MASK);
2216                 ctx_set_mount_opt(ctx, result.uint_32);
2217                 return 0;
2218 #ifdef CONFIG_QUOTA
2219         case Opt_jqfmt:
2220                 ctx->s_jquota_fmt = result.uint_32;
2221                 ctx->spec |= EXT4_SPEC_JQFMT;
2222                 return 0;
2223 #endif
2224         case Opt_data:
2225                 ctx_clear_mount_opt(ctx, EXT4_MOUNT_DATA_FLAGS);
2226                 ctx_set_mount_opt(ctx, result.uint_32);
2227                 ctx->spec |= EXT4_SPEC_DATAJ;
2228                 return 0;
2229         case Opt_commit:
2230                 if (result.uint_32 == 0)
2231                         result.uint_32 = JBD2_DEFAULT_MAX_COMMIT_AGE;
2232                 else if (result.uint_32 > INT_MAX / HZ) {
2233                         ext4_msg(NULL, KERN_ERR,
2234                                  "Invalid commit interval %d, "
2235                                  "must be smaller than %d",
2236                                  result.uint_32, INT_MAX / HZ);
2237                         return -EINVAL;
2238                 }
2239                 ctx->s_commit_interval = HZ * result.uint_32;
2240                 ctx->spec |= EXT4_SPEC_s_commit_interval;
2241                 return 0;
2242         case Opt_debug_want_extra_isize:
2243                 if ((result.uint_32 & 1) || (result.uint_32 < 4)) {
2244                         ext4_msg(NULL, KERN_ERR,
2245                                  "Invalid want_extra_isize %d", result.uint_32);
2246                         return -EINVAL;
2247                 }
2248                 ctx->s_want_extra_isize = result.uint_32;
2249                 ctx->spec |= EXT4_SPEC_s_want_extra_isize;
2250                 return 0;
2251         case Opt_max_batch_time:
2252                 ctx->s_max_batch_time = result.uint_32;
2253                 ctx->spec |= EXT4_SPEC_s_max_batch_time;
2254                 return 0;
2255         case Opt_min_batch_time:
2256                 ctx->s_min_batch_time = result.uint_32;
2257                 ctx->spec |= EXT4_SPEC_s_min_batch_time;
2258                 return 0;
2259         case Opt_inode_readahead_blks:
2260                 if (result.uint_32 &&
2261                     (result.uint_32 > (1 << 30) ||
2262                      !is_power_of_2(result.uint_32))) {
2263                         ext4_msg(NULL, KERN_ERR,
2264                                  "EXT4-fs: inode_readahead_blks must be "
2265                                  "0 or a power of 2 smaller than 2^31");
2266                         return -EINVAL;
2267                 }
2268                 ctx->s_inode_readahead_blks = result.uint_32;
2269                 ctx->spec |= EXT4_SPEC_s_inode_readahead_blks;
2270                 return 0;
2271         case Opt_init_itable:
2272                 ctx_set_mount_opt(ctx, EXT4_MOUNT_INIT_INODE_TABLE);
2273                 ctx->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT;
2274                 if (param->type == fs_value_is_string)
2275                         ctx->s_li_wait_mult = result.uint_32;
2276                 ctx->spec |= EXT4_SPEC_s_li_wait_mult;
2277                 return 0;
2278         case Opt_max_dir_size_kb:
2279                 ctx->s_max_dir_size_kb = result.uint_32;
2280                 ctx->spec |= EXT4_SPEC_s_max_dir_size_kb;
2281                 return 0;
2282 #ifdef CONFIG_EXT4_DEBUG
2283         case Opt_fc_debug_max_replay:
2284                 ctx->s_fc_debug_max_replay = result.uint_32;
2285                 ctx->spec |= EXT4_SPEC_s_fc_debug_max_replay;
2286                 return 0;
2287 #endif
2288         case Opt_stripe:
2289                 ctx->s_stripe = result.uint_32;
2290                 ctx->spec |= EXT4_SPEC_s_stripe;
2291                 return 0;
2292         case Opt_resuid:
2293                 uid = make_kuid(current_user_ns(), result.uint_32);
2294                 if (!uid_valid(uid)) {
2295                         ext4_msg(NULL, KERN_ERR, "Invalid uid value %d",
2296                                  result.uint_32);
2297                         return -EINVAL;
2298                 }
2299                 ctx->s_resuid = uid;
2300                 ctx->spec |= EXT4_SPEC_s_resuid;
2301                 return 0;
2302         case Opt_resgid:
2303                 gid = make_kgid(current_user_ns(), result.uint_32);
2304                 if (!gid_valid(gid)) {
2305                         ext4_msg(NULL, KERN_ERR, "Invalid gid value %d",
2306                                  result.uint_32);
2307                         return -EINVAL;
2308                 }
2309                 ctx->s_resgid = gid;
2310                 ctx->spec |= EXT4_SPEC_s_resgid;
2311                 return 0;
2312         case Opt_journal_dev:
2313                 if (is_remount) {
2314                         ext4_msg(NULL, KERN_ERR,
2315                                  "Cannot specify journal on remount");
2316                         return -EINVAL;
2317                 }
2318                 ctx->journal_devnum = result.uint_32;
2319                 ctx->spec |= EXT4_SPEC_JOURNAL_DEV;
2320                 return 0;
2321         case Opt_journal_path:
2322         {
2323                 struct inode *journal_inode;
2324                 struct path path;
2325                 int error;
2326
2327                 if (is_remount) {
2328                         ext4_msg(NULL, KERN_ERR,
2329                                  "Cannot specify journal on remount");
2330                         return -EINVAL;
2331                 }
2332
2333                 error = fs_lookup_param(fc, param, 1, LOOKUP_FOLLOW, &path);
2334                 if (error) {
2335                         ext4_msg(NULL, KERN_ERR, "error: could not find "
2336                                  "journal device path");
2337                         return -EINVAL;
2338                 }
2339
2340                 journal_inode = d_inode(path.dentry);
2341                 ctx->journal_devnum = new_encode_dev(journal_inode->i_rdev);
2342                 ctx->spec |= EXT4_SPEC_JOURNAL_DEV;
2343                 path_put(&path);
2344                 return 0;
2345         }
2346         case Opt_journal_ioprio:
2347                 if (result.uint_32 > 7) {
2348                         ext4_msg(NULL, KERN_ERR, "Invalid journal IO priority"
2349                                  " (must be 0-7)");
2350                         return -EINVAL;
2351                 }
2352                 ctx->journal_ioprio =
2353                         IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, result.uint_32);
2354                 ctx->spec |= EXT4_SPEC_JOURNAL_IOPRIO;
2355                 return 0;
2356         case Opt_test_dummy_encryption:
2357                 return ext4_parse_test_dummy_encryption(param, ctx);
2358         case Opt_dax:
2359         case Opt_dax_type:
2360 #ifdef CONFIG_FS_DAX
2361         {
2362                 int type = (token == Opt_dax) ?
2363                            Opt_dax : result.uint_32;
2364
2365                 switch (type) {
2366                 case Opt_dax:
2367                 case Opt_dax_always:
2368                         ctx_set_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS);
2369                         ctx_clear_mount_opt2(ctx, EXT4_MOUNT2_DAX_NEVER);
2370                         break;
2371                 case Opt_dax_never:
2372                         ctx_set_mount_opt2(ctx, EXT4_MOUNT2_DAX_NEVER);
2373                         ctx_clear_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS);
2374                         break;
2375                 case Opt_dax_inode:
2376                         ctx_clear_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS);
2377                         ctx_clear_mount_opt2(ctx, EXT4_MOUNT2_DAX_NEVER);
2378                         /* Strictly for printing options */
2379                         ctx_set_mount_opt2(ctx, EXT4_MOUNT2_DAX_INODE);
2380                         break;
2381                 }
2382                 return 0;
2383         }
2384 #else
2385                 ext4_msg(NULL, KERN_INFO, "dax option not supported");
2386                 return -EINVAL;
2387 #endif
2388         case Opt_data_err:
2389                 if (result.uint_32 == Opt_data_err_abort)
2390                         ctx_set_mount_opt(ctx, m->mount_opt);
2391                 else if (result.uint_32 == Opt_data_err_ignore)
2392                         ctx_clear_mount_opt(ctx, m->mount_opt);
2393                 return 0;
2394         case Opt_mb_optimize_scan:
2395                 if (result.int_32 == 1) {
2396                         ctx_set_mount_opt2(ctx, EXT4_MOUNT2_MB_OPTIMIZE_SCAN);
2397                         ctx->spec |= EXT4_SPEC_mb_optimize_scan;
2398                 } else if (result.int_32 == 0) {
2399                         ctx_clear_mount_opt2(ctx, EXT4_MOUNT2_MB_OPTIMIZE_SCAN);
2400                         ctx->spec |= EXT4_SPEC_mb_optimize_scan;
2401                 } else {
2402                         ext4_msg(NULL, KERN_WARNING,
2403                                  "mb_optimize_scan should be set to 0 or 1.");
2404                         return -EINVAL;
2405                 }
2406                 return 0;
2407         }
2408
2409         /*
2410          * At this point we should only be getting options requiring MOPT_SET,
2411          * or MOPT_CLEAR. Anything else is a bug
2412          */
2413         if (m->token == Opt_err) {
2414                 ext4_msg(NULL, KERN_WARNING, "buggy handling of option %s",
2415                          param->key);
2416                 WARN_ON(1);
2417                 return -EINVAL;
2418         }
2419
2420         else {
2421                 unsigned int set = 0;
2422
2423                 if ((param->type == fs_value_is_flag) ||
2424                     result.uint_32 > 0)
2425                         set = 1;
2426
2427                 if (m->flags & MOPT_CLEAR)
2428                         set = !set;
2429                 else if (unlikely(!(m->flags & MOPT_SET))) {
2430                         ext4_msg(NULL, KERN_WARNING,
2431                                  "buggy handling of option %s",
2432                                  param->key);
2433                         WARN_ON(1);
2434                         return -EINVAL;
2435                 }
2436                 if (m->flags & MOPT_2) {
2437                         if (set != 0)
2438                                 ctx_set_mount_opt2(ctx, m->mount_opt);
2439                         else
2440                                 ctx_clear_mount_opt2(ctx, m->mount_opt);
2441                 } else {
2442                         if (set != 0)
2443                                 ctx_set_mount_opt(ctx, m->mount_opt);
2444                         else
2445                                 ctx_clear_mount_opt(ctx, m->mount_opt);
2446                 }
2447         }
2448
2449         return 0;
2450 }
2451
2452 static int parse_options(struct fs_context *fc, char *options)
2453 {
2454         struct fs_parameter param;
2455         int ret;
2456         char *key;
2457
2458         if (!options)
2459                 return 0;
2460
2461         while ((key = strsep(&options, ",")) != NULL) {
2462                 if (*key) {
2463                         size_t v_len = 0;
2464                         char *value = strchr(key, '=');
2465
2466                         param.type = fs_value_is_flag;
2467                         param.string = NULL;
2468
2469                         if (value) {
2470                                 if (value == key)
2471                                         continue;
2472
2473                                 *value++ = 0;
2474                                 v_len = strlen(value);
2475                                 param.string = kmemdup_nul(value, v_len,
2476                                                            GFP_KERNEL);
2477                                 if (!param.string)
2478                                         return -ENOMEM;
2479                                 param.type = fs_value_is_string;
2480                         }
2481
2482                         param.key = key;
2483                         param.size = v_len;
2484
2485                         ret = ext4_parse_param(fc, &param);
2486                         if (param.string)
2487                                 kfree(param.string);
2488                         if (ret < 0)
2489                                 return ret;
2490                 }
2491         }
2492
2493         ret = ext4_validate_options(fc);
2494         if (ret < 0)
2495                 return ret;
2496
2497         return 0;
2498 }
2499
2500 static int parse_apply_sb_mount_options(struct super_block *sb,
2501                                         struct ext4_fs_context *m_ctx)
2502 {
2503         struct ext4_sb_info *sbi = EXT4_SB(sb);
2504         char *s_mount_opts = NULL;
2505         struct ext4_fs_context *s_ctx = NULL;
2506         struct fs_context *fc = NULL;
2507         int ret = -ENOMEM;
2508
2509         if (!sbi->s_es->s_mount_opts[0])
2510                 return 0;
2511
2512         s_mount_opts = kstrndup(sbi->s_es->s_mount_opts,
2513                                 sizeof(sbi->s_es->s_mount_opts),
2514                                 GFP_KERNEL);
2515         if (!s_mount_opts)
2516                 return ret;
2517
2518         fc = kzalloc(sizeof(struct fs_context), GFP_KERNEL);
2519         if (!fc)
2520                 goto out_free;
2521
2522         s_ctx = kzalloc(sizeof(struct ext4_fs_context), GFP_KERNEL);
2523         if (!s_ctx)
2524                 goto out_free;
2525
2526         fc->fs_private = s_ctx;
2527         fc->s_fs_info = sbi;
2528
2529         ret = parse_options(fc, s_mount_opts);
2530         if (ret < 0)
2531                 goto parse_failed;
2532
2533         ret = ext4_check_opt_consistency(fc, sb);
2534         if (ret < 0) {
2535 parse_failed:
2536                 ext4_msg(sb, KERN_WARNING,
2537                          "failed to parse options in superblock: %s",
2538                          s_mount_opts);
2539                 ret = 0;
2540                 goto out_free;
2541         }
2542
2543         if (s_ctx->spec & EXT4_SPEC_JOURNAL_DEV)
2544                 m_ctx->journal_devnum = s_ctx->journal_devnum;
2545         if (s_ctx->spec & EXT4_SPEC_JOURNAL_IOPRIO)
2546                 m_ctx->journal_ioprio = s_ctx->journal_ioprio;
2547
2548         ext4_apply_options(fc, sb);
2549         ret = 0;
2550
2551 out_free:
2552         if (fc) {
2553                 ext4_fc_free(fc);
2554                 kfree(fc);
2555         }
2556         kfree(s_mount_opts);
2557         return ret;
2558 }
2559
2560 static void ext4_apply_quota_options(struct fs_context *fc,
2561                                      struct super_block *sb)
2562 {
2563 #ifdef CONFIG_QUOTA
2564         bool quota_feature = ext4_has_feature_quota(sb);
2565         struct ext4_fs_context *ctx = fc->fs_private;
2566         struct ext4_sb_info *sbi = EXT4_SB(sb);
2567         char *qname;
2568         int i;
2569
2570         if (quota_feature)
2571                 return;
2572
2573         if (ctx->spec & EXT4_SPEC_JQUOTA) {
2574                 for (i = 0; i < EXT4_MAXQUOTAS; i++) {
2575                         if (!(ctx->qname_spec & (1 << i)))
2576                                 continue;
2577
2578                         qname = ctx->s_qf_names[i]; /* May be NULL */
2579                         if (qname)
2580                                 set_opt(sb, QUOTA);
2581                         ctx->s_qf_names[i] = NULL;
2582                         qname = rcu_replace_pointer(sbi->s_qf_names[i], qname,
2583                                                 lockdep_is_held(&sb->s_umount));
2584                         if (qname)
2585                                 kfree_rcu_mightsleep(qname);
2586                 }
2587         }
2588
2589         if (ctx->spec & EXT4_SPEC_JQFMT)
2590                 sbi->s_jquota_fmt = ctx->s_jquota_fmt;
2591 #endif
2592 }
2593
2594 /*
2595  * Check quota settings consistency.
2596  */
2597 static int ext4_check_quota_consistency(struct fs_context *fc,
2598                                         struct super_block *sb)
2599 {
2600 #ifdef CONFIG_QUOTA
2601         struct ext4_fs_context *ctx = fc->fs_private;
2602         struct ext4_sb_info *sbi = EXT4_SB(sb);
2603         bool quota_feature = ext4_has_feature_quota(sb);
2604         bool quota_loaded = sb_any_quota_loaded(sb);
2605         bool usr_qf_name, grp_qf_name, usrquota, grpquota;
2606         int quota_flags, i;
2607
2608         /*
2609          * We do the test below only for project quotas. 'usrquota' and
2610          * 'grpquota' mount options are allowed even without quota feature
2611          * to support legacy quotas in quota files.
2612          */
2613         if (ctx_test_mount_opt(ctx, EXT4_MOUNT_PRJQUOTA) &&
2614             !ext4_has_feature_project(sb)) {
2615                 ext4_msg(NULL, KERN_ERR, "Project quota feature not enabled. "
2616                          "Cannot enable project quota enforcement.");
2617                 return -EINVAL;
2618         }
2619
2620         quota_flags = EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA |
2621                       EXT4_MOUNT_GRPQUOTA | EXT4_MOUNT_PRJQUOTA;
2622         if (quota_loaded &&
2623             ctx->mask_s_mount_opt & quota_flags &&
2624             !ctx_test_mount_opt(ctx, quota_flags))
2625                 goto err_quota_change;
2626
2627         if (ctx->spec & EXT4_SPEC_JQUOTA) {
2628
2629                 for (i = 0; i < EXT4_MAXQUOTAS; i++) {
2630                         if (!(ctx->qname_spec & (1 << i)))
2631                                 continue;
2632
2633                         if (quota_loaded &&
2634                             !!sbi->s_qf_names[i] != !!ctx->s_qf_names[i])
2635                                 goto err_jquota_change;
2636
2637                         if (sbi->s_qf_names[i] && ctx->s_qf_names[i] &&
2638                             strcmp(get_qf_name(sb, sbi, i),
2639                                    ctx->s_qf_names[i]) != 0)
2640                                 goto err_jquota_specified;
2641                 }
2642
2643                 if (quota_feature) {
2644                         ext4_msg(NULL, KERN_INFO,
2645                                  "Journaled quota options ignored when "
2646                                  "QUOTA feature is enabled");
2647                         return 0;
2648                 }
2649         }
2650
2651         if (ctx->spec & EXT4_SPEC_JQFMT) {
2652                 if (sbi->s_jquota_fmt != ctx->s_jquota_fmt && quota_loaded)
2653                         goto err_jquota_change;
2654                 if (quota_feature) {
2655                         ext4_msg(NULL, KERN_INFO, "Quota format mount options "
2656                                  "ignored when QUOTA feature is enabled");
2657                         return 0;
2658                 }
2659         }
2660
2661         /* Make sure we don't mix old and new quota format */
2662         usr_qf_name = (get_qf_name(sb, sbi, USRQUOTA) ||
2663                        ctx->s_qf_names[USRQUOTA]);
2664         grp_qf_name = (get_qf_name(sb, sbi, GRPQUOTA) ||
2665                        ctx->s_qf_names[GRPQUOTA]);
2666
2667         usrquota = (ctx_test_mount_opt(ctx, EXT4_MOUNT_USRQUOTA) ||
2668                     test_opt(sb, USRQUOTA));
2669
2670         grpquota = (ctx_test_mount_opt(ctx, EXT4_MOUNT_GRPQUOTA) ||
2671                     test_opt(sb, GRPQUOTA));
2672
2673         if (usr_qf_name) {
2674                 ctx_clear_mount_opt(ctx, EXT4_MOUNT_USRQUOTA);
2675                 usrquota = false;
2676         }
2677         if (grp_qf_name) {
2678                 ctx_clear_mount_opt(ctx, EXT4_MOUNT_GRPQUOTA);
2679                 grpquota = false;
2680         }
2681
2682         if (usr_qf_name || grp_qf_name) {
2683                 if (usrquota || grpquota) {
2684                         ext4_msg(NULL, KERN_ERR, "old and new quota "
2685                                  "format mixing");
2686                         return -EINVAL;
2687                 }
2688
2689                 if (!(ctx->spec & EXT4_SPEC_JQFMT || sbi->s_jquota_fmt)) {
2690                         ext4_msg(NULL, KERN_ERR, "journaled quota format "
2691                                  "not specified");
2692                         return -EINVAL;
2693                 }
2694         }
2695
2696         return 0;
2697
2698 err_quota_change:
2699         ext4_msg(NULL, KERN_ERR,
2700                  "Cannot change quota options when quota turned on");
2701         return -EINVAL;
2702 err_jquota_change:
2703         ext4_msg(NULL, KERN_ERR, "Cannot change journaled quota "
2704                  "options when quota turned on");
2705         return -EINVAL;
2706 err_jquota_specified:
2707         ext4_msg(NULL, KERN_ERR, "%s quota file already specified",
2708                  QTYPE2NAME(i));
2709         return -EINVAL;
2710 #else
2711         return 0;
2712 #endif
2713 }
2714
2715 static int ext4_check_test_dummy_encryption(const struct fs_context *fc,
2716                                             struct super_block *sb)
2717 {
2718         const struct ext4_fs_context *ctx = fc->fs_private;
2719         const struct ext4_sb_info *sbi = EXT4_SB(sb);
2720
2721         if (!fscrypt_is_dummy_policy_set(&ctx->dummy_enc_policy))
2722                 return 0;
2723
2724         if (!ext4_has_feature_encrypt(sb)) {
2725                 ext4_msg(NULL, KERN_WARNING,
2726                          "test_dummy_encryption requires encrypt feature");
2727                 return -EINVAL;
2728         }
2729         /*
2730          * This mount option is just for testing, and it's not worthwhile to
2731          * implement the extra complexity (e.g. RCU protection) that would be
2732          * needed to allow it to be set or changed during remount.  We do allow
2733          * it to be specified during remount, but only if there is no change.
2734          */
2735         if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
2736                 if (fscrypt_dummy_policies_equal(&sbi->s_dummy_enc_policy,
2737                                                  &ctx->dummy_enc_policy))
2738                         return 0;
2739                 ext4_msg(NULL, KERN_WARNING,
2740                          "Can't set or change test_dummy_encryption on remount");
2741                 return -EINVAL;
2742         }
2743         /* Also make sure s_mount_opts didn't contain a conflicting value. */
2744         if (fscrypt_is_dummy_policy_set(&sbi->s_dummy_enc_policy)) {
2745                 if (fscrypt_dummy_policies_equal(&sbi->s_dummy_enc_policy,
2746                                                  &ctx->dummy_enc_policy))
2747                         return 0;
2748                 ext4_msg(NULL, KERN_WARNING,
2749                          "Conflicting test_dummy_encryption options");
2750                 return -EINVAL;
2751         }
2752         return 0;
2753 }
2754
2755 static void ext4_apply_test_dummy_encryption(struct ext4_fs_context *ctx,
2756                                              struct super_block *sb)
2757 {
2758         if (!fscrypt_is_dummy_policy_set(&ctx->dummy_enc_policy) ||
2759             /* if already set, it was already verified to be the same */
2760             fscrypt_is_dummy_policy_set(&EXT4_SB(sb)->s_dummy_enc_policy))
2761                 return;
2762         EXT4_SB(sb)->s_dummy_enc_policy = ctx->dummy_enc_policy;
2763         memset(&ctx->dummy_enc_policy, 0, sizeof(ctx->dummy_enc_policy));
2764         ext4_msg(sb, KERN_WARNING, "Test dummy encryption mode enabled");
2765 }
2766
2767 static int ext4_check_opt_consistency(struct fs_context *fc,
2768                                       struct super_block *sb)
2769 {
2770         struct ext4_fs_context *ctx = fc->fs_private;
2771         struct ext4_sb_info *sbi = fc->s_fs_info;
2772         int is_remount = fc->purpose == FS_CONTEXT_FOR_RECONFIGURE;
2773         int err;
2774
2775         if ((ctx->opt_flags & MOPT_NO_EXT2) && IS_EXT2_SB(sb)) {
2776                 ext4_msg(NULL, KERN_ERR,
2777                          "Mount option(s) incompatible with ext2");
2778                 return -EINVAL;
2779         }
2780         if ((ctx->opt_flags & MOPT_NO_EXT3) && IS_EXT3_SB(sb)) {
2781                 ext4_msg(NULL, KERN_ERR,
2782                          "Mount option(s) incompatible with ext3");
2783                 return -EINVAL;
2784         }
2785
2786         if (ctx->s_want_extra_isize >
2787             (sbi->s_inode_size - EXT4_GOOD_OLD_INODE_SIZE)) {
2788                 ext4_msg(NULL, KERN_ERR,
2789                          "Invalid want_extra_isize %d",
2790                          ctx->s_want_extra_isize);
2791                 return -EINVAL;
2792         }
2793
2794         if (ctx_test_mount_opt(ctx, EXT4_MOUNT_DIOREAD_NOLOCK)) {
2795                 int blocksize =
2796                         BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
2797                 if (blocksize < PAGE_SIZE)
2798                         ext4_msg(NULL, KERN_WARNING, "Warning: mounting with an "
2799                                  "experimental mount option 'dioread_nolock' "
2800                                  "for blocksize < PAGE_SIZE");
2801         }
2802
2803         err = ext4_check_test_dummy_encryption(fc, sb);
2804         if (err)
2805                 return err;
2806
2807         if ((ctx->spec & EXT4_SPEC_DATAJ) && is_remount) {
2808                 if (!sbi->s_journal) {
2809                         ext4_msg(NULL, KERN_WARNING,
2810                                  "Remounting file system with no journal "
2811                                  "so ignoring journalled data option");
2812                         ctx_clear_mount_opt(ctx, EXT4_MOUNT_DATA_FLAGS);
2813                 } else if (ctx_test_mount_opt(ctx, EXT4_MOUNT_DATA_FLAGS) !=
2814                            test_opt(sb, DATA_FLAGS)) {
2815                         ext4_msg(NULL, KERN_ERR, "Cannot change data mode "
2816                                  "on remount");
2817                         return -EINVAL;
2818                 }
2819         }
2820
2821         if (is_remount) {
2822                 if (ctx_test_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS) &&
2823                     (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)) {
2824                         ext4_msg(NULL, KERN_ERR, "can't mount with "
2825                                  "both data=journal and dax");
2826                         return -EINVAL;
2827                 }
2828
2829                 if (ctx_test_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS) &&
2830                     (!(sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) ||
2831                      (sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER))) {
2832 fail_dax_change_remount:
2833                         ext4_msg(NULL, KERN_ERR, "can't change "
2834                                  "dax mount option while remounting");
2835                         return -EINVAL;
2836                 } else if (ctx_test_mount_opt2(ctx, EXT4_MOUNT2_DAX_NEVER) &&
2837                          (!(sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER) ||
2838                           (sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS))) {
2839                         goto fail_dax_change_remount;
2840                 } else if (ctx_test_mount_opt2(ctx, EXT4_MOUNT2_DAX_INODE) &&
2841                            ((sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) ||
2842                             (sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER) ||
2843                             !(sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_INODE))) {
2844                         goto fail_dax_change_remount;
2845                 }
2846         }
2847
2848         return ext4_check_quota_consistency(fc, sb);
2849 }
2850
2851 static void ext4_apply_options(struct fs_context *fc, struct super_block *sb)
2852 {
2853         struct ext4_fs_context *ctx = fc->fs_private;
2854         struct ext4_sb_info *sbi = fc->s_fs_info;
2855
2856         sbi->s_mount_opt &= ~ctx->mask_s_mount_opt;
2857         sbi->s_mount_opt |= ctx->vals_s_mount_opt;
2858         sbi->s_mount_opt2 &= ~ctx->mask_s_mount_opt2;
2859         sbi->s_mount_opt2 |= ctx->vals_s_mount_opt2;
2860         sb->s_flags &= ~ctx->mask_s_flags;
2861         sb->s_flags |= ctx->vals_s_flags;
2862
2863 #define APPLY(X) ({ if (ctx->spec & EXT4_SPEC_##X) sbi->X = ctx->X; })
2864         APPLY(s_commit_interval);
2865         APPLY(s_stripe);
2866         APPLY(s_max_batch_time);
2867         APPLY(s_min_batch_time);
2868         APPLY(s_want_extra_isize);
2869         APPLY(s_inode_readahead_blks);
2870         APPLY(s_max_dir_size_kb);
2871         APPLY(s_li_wait_mult);
2872         APPLY(s_resgid);
2873         APPLY(s_resuid);
2874
2875 #ifdef CONFIG_EXT4_DEBUG
2876         APPLY(s_fc_debug_max_replay);
2877 #endif
2878
2879         ext4_apply_quota_options(fc, sb);
2880         ext4_apply_test_dummy_encryption(ctx, sb);
2881 }
2882
2883
2884 static int ext4_validate_options(struct fs_context *fc)
2885 {
2886 #ifdef CONFIG_QUOTA
2887         struct ext4_fs_context *ctx = fc->fs_private;
2888         char *usr_qf_name, *grp_qf_name;
2889
2890         usr_qf_name = ctx->s_qf_names[USRQUOTA];
2891         grp_qf_name = ctx->s_qf_names[GRPQUOTA];
2892
2893         if (usr_qf_name || grp_qf_name) {
2894                 if (ctx_test_mount_opt(ctx, EXT4_MOUNT_USRQUOTA) && usr_qf_name)
2895                         ctx_clear_mount_opt(ctx, EXT4_MOUNT_USRQUOTA);
2896
2897                 if (ctx_test_mount_opt(ctx, EXT4_MOUNT_GRPQUOTA) && grp_qf_name)
2898                         ctx_clear_mount_opt(ctx, EXT4_MOUNT_GRPQUOTA);
2899
2900                 if (ctx_test_mount_opt(ctx, EXT4_MOUNT_USRQUOTA) ||
2901                     ctx_test_mount_opt(ctx, EXT4_MOUNT_GRPQUOTA)) {
2902                         ext4_msg(NULL, KERN_ERR, "old and new quota "
2903                                  "format mixing");
2904                         return -EINVAL;
2905                 }
2906         }
2907 #endif
2908         return 1;
2909 }
2910
2911 static inline void ext4_show_quota_options(struct seq_file *seq,
2912                                            struct super_block *sb)
2913 {
2914 #if defined(CONFIG_QUOTA)
2915         struct ext4_sb_info *sbi = EXT4_SB(sb);
2916         char *usr_qf_name, *grp_qf_name;
2917
2918         if (sbi->s_jquota_fmt) {
2919                 char *fmtname = "";
2920
2921                 switch (sbi->s_jquota_fmt) {
2922                 case QFMT_VFS_OLD:
2923                         fmtname = "vfsold";
2924                         break;
2925                 case QFMT_VFS_V0:
2926                         fmtname = "vfsv0";
2927                         break;
2928                 case QFMT_VFS_V1:
2929                         fmtname = "vfsv1";
2930                         break;
2931                 }
2932                 seq_printf(seq, ",jqfmt=%s", fmtname);
2933         }
2934
2935         rcu_read_lock();
2936         usr_qf_name = rcu_dereference(sbi->s_qf_names[USRQUOTA]);
2937         grp_qf_name = rcu_dereference(sbi->s_qf_names[GRPQUOTA]);
2938         if (usr_qf_name)
2939                 seq_show_option(seq, "usrjquota", usr_qf_name);
2940         if (grp_qf_name)
2941                 seq_show_option(seq, "grpjquota", grp_qf_name);
2942         rcu_read_unlock();
2943 #endif
2944 }
2945
2946 static const char *token2str(int token)
2947 {
2948         const struct fs_parameter_spec *spec;
2949
2950         for (spec = ext4_param_specs; spec->name != NULL; spec++)
2951                 if (spec->opt == token && !spec->type)
2952                         break;
2953         return spec->name;
2954 }
2955
2956 /*
2957  * Show an option if
2958  *  - it's set to a non-default value OR
2959  *  - if the per-sb default is different from the global default
2960  */
2961 static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
2962                               int nodefs)
2963 {
2964         struct ext4_sb_info *sbi = EXT4_SB(sb);
2965         struct ext4_super_block *es = sbi->s_es;
2966         int def_errors;
2967         const struct mount_opts *m;
2968         char sep = nodefs ? '\n' : ',';
2969
2970 #define SEQ_OPTS_PUTS(str) seq_printf(seq, "%c" str, sep)
2971 #define SEQ_OPTS_PRINT(str, arg) seq_printf(seq, "%c" str, sep, arg)
2972
2973         if (sbi->s_sb_block != 1)
2974                 SEQ_OPTS_PRINT("sb=%llu", sbi->s_sb_block);
2975
2976         for (m = ext4_mount_opts; m->token != Opt_err; m++) {
2977                 int want_set = m->flags & MOPT_SET;
2978                 int opt_2 = m->flags & MOPT_2;
2979                 unsigned int mount_opt, def_mount_opt;
2980
2981                 if (((m->flags & (MOPT_SET|MOPT_CLEAR)) == 0) ||
2982                     m->flags & MOPT_SKIP)
2983                         continue;
2984
2985                 if (opt_2) {
2986                         mount_opt = sbi->s_mount_opt2;
2987                         def_mount_opt = sbi->s_def_mount_opt2;
2988                 } else {
2989                         mount_opt = sbi->s_mount_opt;
2990                         def_mount_opt = sbi->s_def_mount_opt;
2991                 }
2992                 /* skip if same as the default */
2993                 if (!nodefs && !(m->mount_opt & (mount_opt ^ def_mount_opt)))
2994                         continue;
2995                 /* select Opt_noFoo vs Opt_Foo */
2996                 if ((want_set &&
2997                      (mount_opt & m->mount_opt) != m->mount_opt) ||
2998                     (!want_set && (mount_opt & m->mount_opt)))
2999                         continue;
3000                 SEQ_OPTS_PRINT("%s", token2str(m->token));
3001         }
3002
3003         if (nodefs || !uid_eq(sbi->s_resuid, make_kuid(&init_user_ns, EXT4_DEF_RESUID)) ||
3004             le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID)
3005                 SEQ_OPTS_PRINT("resuid=%u",
3006                                 from_kuid_munged(&init_user_ns, sbi->s_resuid));
3007         if (nodefs || !gid_eq(sbi->s_resgid, make_kgid(&init_user_ns, EXT4_DEF_RESGID)) ||
3008             le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID)
3009                 SEQ_OPTS_PRINT("resgid=%u",
3010                                 from_kgid_munged(&init_user_ns, sbi->s_resgid));
3011         def_errors = nodefs ? -1 : le16_to_cpu(es->s_errors);
3012         if (test_opt(sb, ERRORS_RO) && def_errors != EXT4_ERRORS_RO)
3013                 SEQ_OPTS_PUTS("errors=remount-ro");
3014         if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE)
3015                 SEQ_OPTS_PUTS("errors=continue");
3016         if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC)
3017                 SEQ_OPTS_PUTS("errors=panic");
3018         if (nodefs || sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ)
3019                 SEQ_OPTS_PRINT("commit=%lu", sbi->s_commit_interval / HZ);
3020         if (nodefs || sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME)
3021                 SEQ_OPTS_PRINT("min_batch_time=%u", sbi->s_min_batch_time);
3022         if (nodefs || sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME)
3023                 SEQ_OPTS_PRINT("max_batch_time=%u", sbi->s_max_batch_time);
3024         if (nodefs || sbi->s_stripe)
3025                 SEQ_OPTS_PRINT("stripe=%lu", sbi->s_stripe);
3026         if (nodefs || EXT4_MOUNT_DATA_FLAGS &
3027                         (sbi->s_mount_opt ^ sbi->s_def_mount_opt)) {
3028                 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
3029                         SEQ_OPTS_PUTS("data=journal");
3030                 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
3031                         SEQ_OPTS_PUTS("data=ordered");
3032                 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
3033                         SEQ_OPTS_PUTS("data=writeback");
3034         }
3035         if (nodefs ||
3036             sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS)
3037                 SEQ_OPTS_PRINT("inode_readahead_blks=%u",
3038                                sbi->s_inode_readahead_blks);
3039
3040         if (test_opt(sb, INIT_INODE_TABLE) && (nodefs ||
3041                        (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT)))
3042                 SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult);
3043         if (nodefs || sbi->s_max_dir_size_kb)
3044                 SEQ_OPTS_PRINT("max_dir_size_kb=%u", sbi->s_max_dir_size_kb);
3045         if (test_opt(sb, DATA_ERR_ABORT))
3046                 SEQ_OPTS_PUTS("data_err=abort");
3047
3048         fscrypt_show_test_dummy_encryption(seq, sep, sb);
3049
3050         if (sb->s_flags & SB_INLINECRYPT)
3051                 SEQ_OPTS_PUTS("inlinecrypt");
3052
3053         if (test_opt(sb, DAX_ALWAYS)) {
3054                 if (IS_EXT2_SB(sb))
3055                         SEQ_OPTS_PUTS("dax");
3056                 else
3057                         SEQ_OPTS_PUTS("dax=always");
3058         } else if (test_opt2(sb, DAX_NEVER)) {
3059                 SEQ_OPTS_PUTS("dax=never");
3060         } else if (test_opt2(sb, DAX_INODE)) {
3061                 SEQ_OPTS_PUTS("dax=inode");
3062         }
3063
3064         if (sbi->s_groups_count >= MB_DEFAULT_LINEAR_SCAN_THRESHOLD &&
3065                         !test_opt2(sb, MB_OPTIMIZE_SCAN)) {
3066                 SEQ_OPTS_PUTS("mb_optimize_scan=0");
3067         } else if (sbi->s_groups_count < MB_DEFAULT_LINEAR_SCAN_THRESHOLD &&
3068                         test_opt2(sb, MB_OPTIMIZE_SCAN)) {
3069                 SEQ_OPTS_PUTS("mb_optimize_scan=1");
3070         }
3071
3072         ext4_show_quota_options(seq, sb);
3073         return 0;
3074 }
3075
3076 static int ext4_show_options(struct seq_file *seq, struct dentry *root)
3077 {
3078         return _ext4_show_options(seq, root->d_sb, 0);
3079 }
3080
3081 int ext4_seq_options_show(struct seq_file *seq, void *offset)
3082 {
3083         struct super_block *sb = seq->private;
3084         int rc;
3085
3086         seq_puts(seq, sb_rdonly(sb) ? "ro" : "rw");
3087         rc = _ext4_show_options(seq, sb, 1);
3088         seq_puts(seq, "\n");
3089         return rc;
3090 }
3091
3092 static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
3093                             int read_only)
3094 {
3095         struct ext4_sb_info *sbi = EXT4_SB(sb);
3096         int err = 0;
3097
3098         if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) {
3099                 ext4_msg(sb, KERN_ERR, "revision level too high, "
3100                          "forcing read-only mode");
3101                 err = -EROFS;
3102                 goto done;
3103         }
3104         if (read_only)
3105                 goto done;
3106         if (!(sbi->s_mount_state & EXT4_VALID_FS))
3107                 ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, "
3108                          "running e2fsck is recommended");
3109         else if (sbi->s_mount_state & EXT4_ERROR_FS)
3110                 ext4_msg(sb, KERN_WARNING,
3111                          "warning: mounting fs with errors, "
3112                          "running e2fsck is recommended");
3113         else if ((__s16) le16_to_cpu(es->s_max_mnt_count) > 0 &&
3114                  le16_to_cpu(es->s_mnt_count) >=
3115                  (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
3116                 ext4_msg(sb, KERN_WARNING,
3117                          "warning: maximal mount count reached, "
3118                          "running e2fsck is recommended");
3119         else if (le32_to_cpu(es->s_checkinterval) &&
3120                  (ext4_get_tstamp(es, s_lastcheck) +
3121                   le32_to_cpu(es->s_checkinterval) <= ktime_get_real_seconds()))
3122                 ext4_msg(sb, KERN_WARNING,
3123                          "warning: checktime reached, "
3124                          "running e2fsck is recommended");
3125         if (!sbi->s_journal)
3126                 es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
3127         if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
3128                 es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
3129         le16_add_cpu(&es->s_mnt_count, 1);
3130         ext4_update_tstamp(es, s_mtime);
3131         if (sbi->s_journal) {
3132                 ext4_set_feature_journal_needs_recovery(sb);
3133                 if (ext4_has_feature_orphan_file(sb))
3134                         ext4_set_feature_orphan_present(sb);
3135         }
3136
3137         err = ext4_commit_super(sb);
3138 done:
3139         if (test_opt(sb, DEBUG))
3140                 printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
3141                                 "bpg=%lu, ipg=%lu, mo=%04x, mo2=%04x]\n",
3142                         sb->s_blocksize,
3143                         sbi->s_groups_count,
3144                         EXT4_BLOCKS_PER_GROUP(sb),
3145                         EXT4_INODES_PER_GROUP(sb),
3146                         sbi->s_mount_opt, sbi->s_mount_opt2);
3147         return err;
3148 }
3149
3150 int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup)
3151 {
3152         struct ext4_sb_info *sbi = EXT4_SB(sb);
3153         struct flex_groups **old_groups, **new_groups;
3154         int size, i, j;
3155
3156         if (!sbi->s_log_groups_per_flex)
3157                 return 0;
3158
3159         size = ext4_flex_group(sbi, ngroup - 1) + 1;
3160         if (size <= sbi->s_flex_groups_allocated)
3161                 return 0;
3162
3163         new_groups = kvzalloc(roundup_pow_of_two(size *
3164                               sizeof(*sbi->s_flex_groups)), GFP_KERNEL);
3165         if (!new_groups) {
3166                 ext4_msg(sb, KERN_ERR,
3167                          "not enough memory for %d flex group pointers", size);
3168                 return -ENOMEM;
3169         }
3170         for (i = sbi->s_flex_groups_allocated; i < size; i++) {
3171                 new_groups[i] = kvzalloc(roundup_pow_of_two(
3172                                          sizeof(struct flex_groups)),
3173                                          GFP_KERNEL);
3174                 if (!new_groups[i]) {
3175                         for (j = sbi->s_flex_groups_allocated; j < i; j++)
3176                                 kvfree(new_groups[j]);
3177                         kvfree(new_groups);
3178                         ext4_msg(sb, KERN_ERR,
3179                                  "not enough memory for %d flex groups", size);
3180                         return -ENOMEM;
3181                 }
3182         }
3183         rcu_read_lock();
3184         old_groups = rcu_dereference(sbi->s_flex_groups);
3185         if (old_groups)
3186                 memcpy(new_groups, old_groups,
3187                        (sbi->s_flex_groups_allocated *
3188                         sizeof(struct flex_groups *)));
3189         rcu_read_unlock();
3190         rcu_assign_pointer(sbi->s_flex_groups, new_groups);
3191         sbi->s_flex_groups_allocated = size;
3192         if (old_groups)
3193                 ext4_kvfree_array_rcu(old_groups);
3194         return 0;
3195 }
3196
3197 static int ext4_fill_flex_info(struct super_block *sb)
3198 {
3199         struct ext4_sb_info *sbi = EXT4_SB(sb);
3200         struct ext4_group_desc *gdp = NULL;
3201         struct flex_groups *fg;
3202         ext4_group_t flex_group;
3203         int i, err;
3204
3205         sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
3206         if (sbi->s_log_groups_per_flex < 1 || sbi->s_log_groups_per_flex > 31) {
3207                 sbi->s_log_groups_per_flex = 0;
3208                 return 1;
3209         }
3210
3211         err = ext4_alloc_flex_bg_array(sb, sbi->s_groups_count);
3212         if (err)
3213                 goto failed;
3214
3215         for (i = 0; i < sbi->s_groups_count; i++) {
3216                 gdp = ext4_get_group_desc(sb, i, NULL);
3217
3218                 flex_group = ext4_flex_group(sbi, i);
3219                 fg = sbi_array_rcu_deref(sbi, s_flex_groups, flex_group);
3220                 atomic_add(ext4_free_inodes_count(sb, gdp), &fg->free_inodes);
3221                 atomic64_add(ext4_free_group_clusters(sb, gdp),
3222                              &fg->free_clusters);
3223                 atomic_add(ext4_used_dirs_count(sb, gdp), &fg->used_dirs);
3224         }
3225
3226         return 1;
3227 failed:
3228         return 0;
3229 }
3230
3231 static __le16 ext4_group_desc_csum(struct super_block *sb, __u32 block_group,
3232                                    struct ext4_group_desc *gdp)
3233 {
3234         int offset = offsetof(struct ext4_group_desc, bg_checksum);
3235         __u16 crc = 0;
3236         __le32 le_group = cpu_to_le32(block_group);
3237         struct ext4_sb_info *sbi = EXT4_SB(sb);
3238
3239         if (ext4_has_metadata_csum(sbi->s_sb)) {
3240                 /* Use new metadata_csum algorithm */
3241                 __u32 csum32;
3242                 __u16 dummy_csum = 0;
3243
3244                 csum32 = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&le_group,
3245                                      sizeof(le_group));
3246                 csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp, offset);
3247                 csum32 = ext4_chksum(sbi, csum32, (__u8 *)&dummy_csum,
3248                                      sizeof(dummy_csum));
3249                 offset += sizeof(dummy_csum);
3250                 if (offset < sbi->s_desc_size)
3251                         csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp + offset,
3252                                              sbi->s_desc_size - offset);
3253
3254                 crc = csum32 & 0xFFFF;
3255                 goto out;
3256         }
3257
3258         /* old crc16 code */
3259         if (!ext4_has_feature_gdt_csum(sb))
3260                 return 0;
3261
3262         crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));
3263         crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group));
3264         crc = crc16(crc, (__u8 *)gdp, offset);
3265         offset += sizeof(gdp->bg_checksum); /* skip checksum */
3266         /* for checksum of struct ext4_group_desc do the rest...*/
3267         if (ext4_has_feature_64bit(sb) && offset < sbi->s_desc_size)
3268                 crc = crc16(crc, (__u8 *)gdp + offset,
3269                             sbi->s_desc_size - offset);
3270
3271 out:
3272         return cpu_to_le16(crc);
3273 }
3274
3275 int ext4_group_desc_csum_verify(struct super_block *sb, __u32 block_group,
3276                                 struct ext4_group_desc *gdp)
3277 {
3278         if (ext4_has_group_desc_csum(sb) &&
3279             (gdp->bg_checksum != ext4_group_desc_csum(sb, block_group, gdp)))
3280                 return 0;
3281
3282         return 1;
3283 }
3284
3285 void ext4_group_desc_csum_set(struct super_block *sb, __u32 block_group,
3286                               struct ext4_group_desc *gdp)
3287 {
3288         if (!ext4_has_group_desc_csum(sb))
3289                 return;
3290         gdp->bg_checksum = ext4_group_desc_csum(sb, block_group, gdp);
3291 }
3292
3293 /* Called at mount-time, super-block is locked */
3294 static int ext4_check_descriptors(struct super_block *sb,
3295                                   ext4_fsblk_t sb_block,
3296                                   ext4_group_t *first_not_zeroed)
3297 {
3298         struct ext4_sb_info *sbi = EXT4_SB(sb);
3299         ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
3300         ext4_fsblk_t last_block;
3301         ext4_fsblk_t last_bg_block = sb_block + ext4_bg_num_gdb(sb, 0);
3302         ext4_fsblk_t block_bitmap;
3303         ext4_fsblk_t inode_bitmap;
3304         ext4_fsblk_t inode_table;
3305         int flexbg_flag = 0;
3306         ext4_group_t i, grp = sbi->s_groups_count;
3307
3308         if (ext4_has_feature_flex_bg(sb))
3309                 flexbg_flag = 1;
3310
3311         ext4_debug("Checking group descriptors");
3312
3313         for (i = 0; i < sbi->s_groups_count; i++) {
3314                 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL);
3315
3316                 if (i == sbi->s_groups_count - 1 || flexbg_flag)
3317                         last_block = ext4_blocks_count(sbi->s_es) - 1;
3318                 else
3319                         last_block = first_block +
3320                                 (EXT4_BLOCKS_PER_GROUP(sb) - 1);
3321
3322                 if ((grp == sbi->s_groups_count) &&
3323                    !(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
3324                         grp = i;
3325
3326                 block_bitmap = ext4_block_bitmap(sb, gdp);
3327                 if (block_bitmap == sb_block) {
3328                         ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3329                                  "Block bitmap for group %u overlaps "
3330                                  "superblock", i);
3331                         if (!sb_rdonly(sb))
3332                                 return 0;
3333                 }
3334                 if (block_bitmap >= sb_block + 1 &&
3335                     block_bitmap <= last_bg_block) {
3336                         ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3337                                  "Block bitmap for group %u overlaps "
3338                                  "block group descriptors", i);
3339                         if (!sb_rdonly(sb))
3340                                 return 0;
3341                 }
3342                 if (block_bitmap < first_block || block_bitmap > last_block) {
3343                         ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3344                                "Block bitmap for group %u not in group "
3345                                "(block %llu)!", i, block_bitmap);
3346                         return 0;
3347                 }
3348                 inode_bitmap = ext4_inode_bitmap(sb, gdp);
3349                 if (inode_bitmap == sb_block) {
3350                         ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3351                                  "Inode bitmap for group %u overlaps "
3352                                  "superblock", i);
3353                         if (!sb_rdonly(sb))
3354                                 return 0;
3355                 }
3356                 if (inode_bitmap >= sb_block + 1 &&
3357                     inode_bitmap <= last_bg_block) {
3358                         ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3359                                  "Inode bitmap for group %u overlaps "
3360                                  "block group descriptors", i);
3361                         if (!sb_rdonly(sb))
3362                                 return 0;
3363                 }
3364                 if (inode_bitmap < first_block || inode_bitmap > last_block) {
3365                         ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3366                                "Inode bitmap for group %u not in group "
3367                                "(block %llu)!", i, inode_bitmap);
3368                         return 0;
3369                 }
3370                 inode_table = ext4_inode_table(sb, gdp);
3371                 if (inode_table == sb_block) {
3372                         ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3373                                  "Inode table for group %u overlaps "
3374                                  "superblock", i);
3375                         if (!sb_rdonly(sb))
3376                                 return 0;
3377                 }
3378                 if (inode_table >= sb_block + 1 &&
3379                     inode_table <= last_bg_block) {
3380                         ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3381                                  "Inode table for group %u overlaps "
3382                                  "block group descriptors", i);
3383                         if (!sb_rdonly(sb))
3384                                 return 0;
3385                 }
3386                 if (inode_table < first_block ||
3387                     inode_table + sbi->s_itb_per_group - 1 > last_block) {
3388                         ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3389                                "Inode table for group %u not in group "
3390                                "(block %llu)!", i, inode_table);
3391                         return 0;
3392                 }
3393                 ext4_lock_group(sb, i);
3394                 if (!ext4_group_desc_csum_verify(sb, i, gdp)) {
3395                         ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3396                                  "Checksum for group %u failed (%u!=%u)",
3397                                  i, le16_to_cpu(ext4_group_desc_csum(sb, i,
3398                                      gdp)), le16_to_cpu(gdp->bg_checksum));
3399                         if (!sb_rdonly(sb)) {
3400                                 ext4_unlock_group(sb, i);
3401                                 return 0;
3402                         }
3403                 }
3404                 ext4_unlock_group(sb, i);
3405                 if (!flexbg_flag)
3406                         first_block += EXT4_BLOCKS_PER_GROUP(sb);
3407         }
3408         if (NULL != first_not_zeroed)
3409                 *first_not_zeroed = grp;
3410         return 1;
3411 }
3412
3413 /*
3414  * Maximal extent format file size.
3415  * Resulting logical blkno at s_maxbytes must fit in our on-disk
3416  * extent format containers, within a sector_t, and within i_blocks
3417  * in the vfs.  ext4 inode has 48 bits of i_block in fsblock units,
3418  * so that won't be a limiting factor.
3419  *
3420  * However there is other limiting factor. We do store extents in the form
3421  * of starting block and length, hence the resulting length of the extent
3422  * covering maximum file size must fit into on-disk format containers as
3423  * well. Given that length is always by 1 unit bigger than max unit (because
3424  * we count 0 as well) we have to lower the s_maxbytes by one fs block.
3425  *
3426  * Note, this does *not* consider any metadata overhead for vfs i_blocks.
3427  */
3428 static loff_t ext4_max_size(int blkbits, int has_huge_files)
3429 {
3430         loff_t res;
3431         loff_t upper_limit = MAX_LFS_FILESIZE;
3432
3433         BUILD_BUG_ON(sizeof(blkcnt_t) < sizeof(u64));
3434
3435         if (!has_huge_files) {
3436                 upper_limit = (1LL << 32) - 1;
3437
3438                 /* total blocks in file system block size */
3439                 upper_limit >>= (blkbits - 9);
3440                 upper_limit <<= blkbits;
3441         }
3442
3443         /*
3444          * 32-bit extent-start container, ee_block. We lower the maxbytes
3445          * by one fs block, so ee_len can cover the extent of maximum file
3446          * size
3447          */
3448         res = (1LL << 32) - 1;
3449         res <<= blkbits;
3450
3451         /* Sanity check against vm- & vfs- imposed limits */
3452         if (res > upper_limit)
3453                 res = upper_limit;
3454
3455         return res;
3456 }
3457
3458 /*
3459  * Maximal bitmap file size.  There is a direct, and {,double-,triple-}indirect
3460  * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks.
3461  * We need to be 1 filesystem block less than the 2^48 sector limit.
3462  */
3463 static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
3464 {
3465         loff_t upper_limit, res = EXT4_NDIR_BLOCKS;
3466         int meta_blocks;
3467         unsigned int ppb = 1 << (bits - 2);
3468
3469         /*
3470          * This is calculated to be the largest file size for a dense, block
3471          * mapped file such that the file's total number of 512-byte sectors,
3472          * including data and all indirect blocks, does not exceed (2^48 - 1).
3473          *
3474          * __u32 i_blocks_lo and _u16 i_blocks_high represent the total
3475          * number of 512-byte sectors of the file.
3476          */
3477         if (!has_huge_files) {
3478                 /*
3479                  * !has_huge_files or implies that the inode i_block field
3480                  * represents total file blocks in 2^32 512-byte sectors ==
3481                  * size of vfs inode i_blocks * 8
3482                  */
3483                 upper_limit = (1LL << 32) - 1;
3484
3485                 /* total blocks in file system block size */
3486                 upper_limit >>= (bits - 9);
3487
3488         } else {
3489                 /*
3490                  * We use 48 bit ext4_inode i_blocks
3491                  * With EXT4_HUGE_FILE_FL set the i_blocks
3492                  * represent total number of blocks in
3493                  * file system block size
3494                  */
3495                 upper_limit = (1LL << 48) - 1;
3496
3497         }
3498
3499         /* Compute how many blocks we can address by block tree */
3500         res += ppb;
3501         res += ppb * ppb;
3502         res += ((loff_t)ppb) * ppb * ppb;
3503         /* Compute how many metadata blocks are needed */
3504         meta_blocks = 1;
3505         meta_blocks += 1 + ppb;
3506         meta_blocks += 1 + ppb + ppb * ppb;
3507         /* Does block tree limit file size? */
3508         if (res + meta_blocks <= upper_limit)
3509                 goto check_lfs;
3510
3511         res = upper_limit;
3512         /* How many metadata blocks are needed for addressing upper_limit? */
3513         upper_limit -= EXT4_NDIR_BLOCKS;
3514         /* indirect blocks */
3515         meta_blocks = 1;
3516         upper_limit -= ppb;
3517         /* double indirect blocks */
3518         if (upper_limit < ppb * ppb) {
3519                 meta_blocks += 1 + DIV_ROUND_UP_ULL(upper_limit, ppb);
3520                 res -= meta_blocks;
3521                 goto check_lfs;
3522         }
3523         meta_blocks += 1 + ppb;
3524         upper_limit -= ppb * ppb;
3525         /* tripple indirect blocks for the rest */
3526         meta_blocks += 1 + DIV_ROUND_UP_ULL(upper_limit, ppb) +
3527                 DIV_ROUND_UP_ULL(upper_limit, ppb*ppb);
3528         res -= meta_blocks;
3529 check_lfs:
3530         res <<= bits;
3531         if (res > MAX_LFS_FILESIZE)
3532                 res = MAX_LFS_FILESIZE;
3533
3534         return res;
3535 }
3536
3537 static ext4_fsblk_t descriptor_loc(struct super_block *sb,
3538                                    ext4_fsblk_t logical_sb_block, int nr)
3539 {
3540         struct ext4_sb_info *sbi = EXT4_SB(sb);
3541         ext4_group_t bg, first_meta_bg;
3542         int has_super = 0;
3543
3544         first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg);
3545
3546         if (!ext4_has_feature_meta_bg(sb) || nr < first_meta_bg)
3547                 return logical_sb_block + nr + 1;
3548         bg = sbi->s_desc_per_block * nr;
3549         if (ext4_bg_has_super(sb, bg))
3550                 has_super = 1;
3551
3552         /*
3553          * If we have a meta_bg fs with 1k blocks, group 0's GDT is at
3554          * block 2, not 1.  If s_first_data_block == 0 (bigalloc is enabled
3555          * on modern mke2fs or blksize > 1k on older mke2fs) then we must
3556          * compensate.
3557          */
3558         if (sb->s_blocksize == 1024 && nr == 0 &&
3559             le32_to_cpu(sbi->s_es->s_first_data_block) == 0)
3560                 has_super++;
3561
3562         return (has_super + ext4_group_first_block_no(sb, bg));
3563 }
3564
3565 /**
3566  * ext4_get_stripe_size: Get the stripe size.
3567  * @sbi: In memory super block info
3568  *
3569  * If we have specified it via mount option, then
3570  * use the mount option value. If the value specified at mount time is
3571  * greater than the blocks per group use the super block value.
3572  * If the super block value is greater than blocks per group return 0.
3573  * Allocator needs it be less than blocks per group.
3574  *
3575  */
3576 static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
3577 {
3578         unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride);
3579         unsigned long stripe_width =
3580                         le32_to_cpu(sbi->s_es->s_raid_stripe_width);
3581         int ret;
3582
3583         if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group)
3584                 ret = sbi->s_stripe;
3585         else if (stripe_width && stripe_width <= sbi->s_blocks_per_group)
3586                 ret = stripe_width;
3587         else if (stride && stride <= sbi->s_blocks_per_group)
3588                 ret = stride;
3589         else
3590                 ret = 0;
3591
3592         /*
3593          * If the stripe width is 1, this makes no sense and
3594          * we set it to 0 to turn off stripe handling code.
3595          */
3596         if (ret <= 1)
3597                 ret = 0;
3598
3599         return ret;
3600 }
3601
3602 /*
3603  * Check whether this filesystem can be mounted based on
3604  * the features present and the RDONLY/RDWR mount requested.
3605  * Returns 1 if this filesystem can be mounted as requested,
3606  * 0 if it cannot be.
3607  */
3608 int ext4_feature_set_ok(struct super_block *sb, int readonly)
3609 {
3610         if (ext4_has_unknown_ext4_incompat_features(sb)) {
3611                 ext4_msg(sb, KERN_ERR,
3612                         "Couldn't mount because of "
3613                         "unsupported optional features (%x)",
3614                         (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) &
3615                         ~EXT4_FEATURE_INCOMPAT_SUPP));
3616                 return 0;
3617         }
3618
3619 #if !IS_ENABLED(CONFIG_UNICODE)
3620         if (ext4_has_feature_casefold(sb)) {
3621                 ext4_msg(sb, KERN_ERR,
3622                          "Filesystem with casefold feature cannot be "
3623                          "mounted without CONFIG_UNICODE");
3624                 return 0;
3625         }
3626 #endif
3627
3628         if (readonly)
3629                 return 1;
3630
3631         if (ext4_has_feature_readonly(sb)) {
3632                 ext4_msg(sb, KERN_INFO, "filesystem is read-only");
3633                 sb->s_flags |= SB_RDONLY;
3634                 return 1;
3635         }
3636
3637         /* Check that feature set is OK for a read-write mount */
3638         if (ext4_has_unknown_ext4_ro_compat_features(sb)) {
3639                 ext4_msg(sb, KERN_ERR, "couldn't mount RDWR because of "
3640                          "unsupported optional features (%x)",
3641                          (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) &
3642                                 ~EXT4_FEATURE_RO_COMPAT_SUPP));
3643                 return 0;
3644         }
3645         if (ext4_has_feature_bigalloc(sb) && !ext4_has_feature_extents(sb)) {
3646                 ext4_msg(sb, KERN_ERR,
3647                          "Can't support bigalloc feature without "
3648                          "extents feature\n");
3649                 return 0;
3650         }
3651
3652 #if !IS_ENABLED(CONFIG_QUOTA) || !IS_ENABLED(CONFIG_QFMT_V2)
3653         if (!readonly && (ext4_has_feature_quota(sb) ||
3654                           ext4_has_feature_project(sb))) {
3655                 ext4_msg(sb, KERN_ERR,
3656                          "The kernel was not built with CONFIG_QUOTA and CONFIG_QFMT_V2");
3657                 return 0;
3658         }
3659 #endif  /* CONFIG_QUOTA */
3660         return 1;
3661 }
3662
3663 /*
3664  * This function is called once a day if we have errors logged
3665  * on the file system
3666  */
3667 static void print_daily_error_info(struct timer_list *t)
3668 {
3669         struct ext4_sb_info *sbi = from_timer(sbi, t, s_err_report);
3670         struct super_block *sb = sbi->s_sb;
3671         struct ext4_super_block *es = sbi->s_es;
3672
3673         if (es->s_error_count)
3674                 /* fsck newer than v1.41.13 is needed to clean this condition. */
3675                 ext4_msg(sb, KERN_NOTICE, "error count since last fsck: %u",
3676                          le32_to_cpu(es->s_error_count));
3677         if (es->s_first_error_time) {
3678                 printk(KERN_NOTICE "EXT4-fs (%s): initial error at time %llu: %.*s:%d",
3679                        sb->s_id,
3680                        ext4_get_tstamp(es, s_first_error_time),
3681                        (int) sizeof(es->s_first_error_func),
3682                        es->s_first_error_func,
3683                        le32_to_cpu(es->s_first_error_line));
3684                 if (es->s_first_error_ino)
3685                         printk(KERN_CONT ": inode %u",
3686                                le32_to_cpu(es->s_first_error_ino));
3687                 if (es->s_first_error_block)
3688                         printk(KERN_CONT ": block %llu", (unsigned long long)
3689                                le64_to_cpu(es->s_first_error_block));
3690                 printk(KERN_CONT "\n");
3691         }
3692         if (es->s_last_error_time) {
3693                 printk(KERN_NOTICE "EXT4-fs (%s): last error at time %llu: %.*s:%d",
3694                        sb->s_id,
3695                        ext4_get_tstamp(es, s_last_error_time),
3696                        (int) sizeof(es->s_last_error_func),
3697                        es->s_last_error_func,
3698                        le32_to_cpu(es->s_last_error_line));
3699                 if (es->s_last_error_ino)
3700                         printk(KERN_CONT ": inode %u",
3701                                le32_to_cpu(es->s_last_error_ino));
3702                 if (es->s_last_error_block)
3703                         printk(KERN_CONT ": block %llu", (unsigned long long)
3704                                le64_to_cpu(es->s_last_error_block));
3705                 printk(KERN_CONT "\n");
3706         }
3707         mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ);  /* Once a day */
3708 }
3709
3710 /* Find next suitable group and run ext4_init_inode_table */
3711 static int ext4_run_li_request(struct ext4_li_request *elr)
3712 {
3713         struct ext4_group_desc *gdp = NULL;
3714         struct super_block *sb = elr->lr_super;
3715         ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
3716         ext4_group_t group = elr->lr_next_group;
3717         unsigned int prefetch_ios = 0;
3718         int ret = 0;
3719         int nr = EXT4_SB(sb)->s_mb_prefetch;
3720         u64 start_time;
3721
3722         if (elr->lr_mode == EXT4_LI_MODE_PREFETCH_BBITMAP) {
3723                 elr->lr_next_group = ext4_mb_prefetch(sb, group, nr, &prefetch_ios);
3724                 ext4_mb_prefetch_fini(sb, elr->lr_next_group, nr);
3725                 trace_ext4_prefetch_bitmaps(sb, group, elr->lr_next_group, nr);
3726                 if (group >= elr->lr_next_group) {
3727                         ret = 1;
3728                         if (elr->lr_first_not_zeroed != ngroups &&
3729                             !sb_rdonly(sb) && test_opt(sb, INIT_INODE_TABLE)) {
3730                                 elr->lr_next_group = elr->lr_first_not_zeroed;
3731                                 elr->lr_mode = EXT4_LI_MODE_ITABLE;
3732                                 ret = 0;
3733                         }
3734                 }
3735                 return ret;
3736         }
3737
3738         for (; group < ngroups; group++) {
3739                 gdp = ext4_get_group_desc(sb, group, NULL);
3740                 if (!gdp) {
3741                         ret = 1;
3742                         break;
3743                 }
3744
3745                 if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
3746                         break;
3747         }
3748
3749         if (group >= ngroups)
3750                 ret = 1;
3751
3752         if (!ret) {
3753                 start_time = ktime_get_real_ns();
3754                 ret = ext4_init_inode_table(sb, group,
3755                                             elr->lr_timeout ? 0 : 1);
3756                 trace_ext4_lazy_itable_init(sb, group);
3757                 if (elr->lr_timeout == 0) {
3758                         elr->lr_timeout = nsecs_to_jiffies((ktime_get_real_ns() - start_time) *
3759                                 EXT4_SB(elr->lr_super)->s_li_wait_mult);
3760                 }
3761                 elr->lr_next_sched = jiffies + elr->lr_timeout;
3762                 elr->lr_next_group = group + 1;
3763         }
3764         return ret;
3765 }
3766
3767 /*
3768  * Remove lr_request from the list_request and free the
3769  * request structure. Should be called with li_list_mtx held
3770  */
3771 static void ext4_remove_li_request(struct ext4_li_request *elr)
3772 {
3773         if (!elr)
3774                 return;
3775
3776         list_del(&elr->lr_request);
3777         EXT4_SB(elr->lr_super)->s_li_request = NULL;
3778         kfree(elr);
3779 }
3780
3781 static void ext4_unregister_li_request(struct super_block *sb)
3782 {
3783         mutex_lock(&ext4_li_mtx);
3784         if (!ext4_li_info) {
3785                 mutex_unlock(&ext4_li_mtx);
3786                 return;
3787         }
3788
3789         mutex_lock(&ext4_li_info->li_list_mtx);
3790         ext4_remove_li_request(EXT4_SB(sb)->s_li_request);
3791         mutex_unlock(&ext4_li_info->li_list_mtx);
3792         mutex_unlock(&ext4_li_mtx);
3793 }
3794
3795 static struct task_struct *ext4_lazyinit_task;
3796
3797 /*
3798  * This is the function where ext4lazyinit thread lives. It walks
3799  * through the request list searching for next scheduled filesystem.
3800  * When such a fs is found, run the lazy initialization request
3801  * (ext4_rn_li_request) and keep track of the time spend in this
3802  * function. Based on that time we compute next schedule time of
3803  * the request. When walking through the list is complete, compute
3804  * next waking time and put itself into sleep.
3805  */
3806 static int ext4_lazyinit_thread(void *arg)
3807 {
3808         struct ext4_lazy_init *eli = arg;
3809         struct list_head *pos, *n;
3810         struct ext4_li_request *elr;
3811         unsigned long next_wakeup, cur;
3812
3813         BUG_ON(NULL == eli);
3814         set_freezable();
3815
3816 cont_thread:
3817         while (true) {
3818                 next_wakeup = MAX_JIFFY_OFFSET;
3819
3820                 mutex_lock(&eli->li_list_mtx);
3821                 if (list_empty(&eli->li_request_list)) {
3822                         mutex_unlock(&eli->li_list_mtx);
3823                         goto exit_thread;
3824                 }
3825                 list_for_each_safe(pos, n, &eli->li_request_list) {
3826                         int err = 0;
3827                         int progress = 0;
3828                         elr = list_entry(pos, struct ext4_li_request,
3829                                          lr_request);
3830
3831                         if (time_before(jiffies, elr->lr_next_sched)) {
3832                                 if (time_before(elr->lr_next_sched, next_wakeup))
3833                                         next_wakeup = elr->lr_next_sched;
3834                                 continue;
3835                         }
3836                         if (down_read_trylock(&elr->lr_super->s_umount)) {
3837                                 if (sb_start_write_trylock(elr->lr_super)) {
3838                                         progress = 1;
3839                                         /*
3840                                          * We hold sb->s_umount, sb can not
3841                                          * be removed from the list, it is
3842                                          * now safe to drop li_list_mtx
3843                                          */
3844                                         mutex_unlock(&eli->li_list_mtx);
3845                                         err = ext4_run_li_request(elr);
3846                                         sb_end_write(elr->lr_super);
3847                                         mutex_lock(&eli->li_list_mtx);
3848                                         n = pos->next;
3849                                 }
3850                                 up_read((&elr->lr_super->s_umount));
3851                         }
3852                         /* error, remove the lazy_init job */
3853                         if (err) {
3854                                 ext4_remove_li_request(elr);
3855                                 continue;
3856                         }
3857                         if (!progress) {
3858                                 elr->lr_next_sched = jiffies +
3859                                         get_random_u32_below(EXT4_DEF_LI_MAX_START_DELAY * HZ);
3860                         }
3861                         if (time_before(elr->lr_next_sched, next_wakeup))
3862                                 next_wakeup = elr->lr_next_sched;
3863                 }
3864                 mutex_unlock(&eli->li_list_mtx);
3865
3866                 try_to_freeze();
3867
3868                 cur = jiffies;
3869                 if ((time_after_eq(cur, next_wakeup)) ||
3870                     (MAX_JIFFY_OFFSET == next_wakeup)) {
3871                         cond_resched();
3872                         continue;
3873                 }
3874
3875                 schedule_timeout_interruptible(next_wakeup - cur);
3876
3877                 if (kthread_should_stop()) {
3878                         ext4_clear_request_list();
3879                         goto exit_thread;
3880                 }
3881         }
3882
3883 exit_thread:
3884         /*
3885          * It looks like the request list is empty, but we need
3886          * to check it under the li_list_mtx lock, to prevent any
3887          * additions into it, and of course we should lock ext4_li_mtx
3888          * to atomically free the list and ext4_li_info, because at
3889          * this point another ext4 filesystem could be registering
3890          * new one.
3891          */
3892         mutex_lock(&ext4_li_mtx);
3893         mutex_lock(&eli->li_list_mtx);
3894         if (!list_empty(&eli->li_request_list)) {
3895                 mutex_unlock(&eli->li_list_mtx);
3896                 mutex_unlock(&ext4_li_mtx);
3897                 goto cont_thread;
3898         }
3899         mutex_unlock(&eli->li_list_mtx);
3900         kfree(ext4_li_info);
3901         ext4_li_info = NULL;
3902         mutex_unlock(&ext4_li_mtx);
3903
3904         return 0;
3905 }
3906
3907 static void ext4_clear_request_list(void)
3908 {
3909         struct list_head *pos, *n;
3910         struct ext4_li_request *elr;
3911
3912         mutex_lock(&ext4_li_info->li_list_mtx);
3913         list_for_each_safe(pos, n, &ext4_li_info->li_request_list) {
3914                 elr = list_entry(pos, struct ext4_li_request,
3915                                  lr_request);
3916                 ext4_remove_li_request(elr);
3917         }
3918         mutex_unlock(&ext4_li_info->li_list_mtx);
3919 }
3920
3921 static int ext4_run_lazyinit_thread(void)
3922 {
3923         ext4_lazyinit_task = kthread_run(ext4_lazyinit_thread,
3924                                          ext4_li_info, "ext4lazyinit");
3925         if (IS_ERR(ext4_lazyinit_task)) {
3926                 int err = PTR_ERR(ext4_lazyinit_task);
3927                 ext4_clear_request_list();
3928                 kfree(ext4_li_info);
3929                 ext4_li_info = NULL;
3930                 printk(KERN_CRIT "EXT4-fs: error %d creating inode table "
3931                                  "initialization thread\n",
3932                                  err);
3933                 return err;
3934         }
3935         ext4_li_info->li_state |= EXT4_LAZYINIT_RUNNING;
3936         return 0;
3937 }
3938
3939 /*
3940  * Check whether it make sense to run itable init. thread or not.
3941  * If there is at least one uninitialized inode table, return
3942  * corresponding group number, else the loop goes through all
3943  * groups and return total number of groups.
3944  */
3945 static ext4_group_t ext4_has_uninit_itable(struct super_block *sb)
3946 {
3947         ext4_group_t group, ngroups = EXT4_SB(sb)->s_groups_count;
3948         struct ext4_group_desc *gdp = NULL;
3949
3950         if (!ext4_has_group_desc_csum(sb))
3951                 return ngroups;
3952
3953         for (group = 0; group < ngroups; group++) {
3954                 gdp = ext4_get_group_desc(sb, group, NULL);
3955                 if (!gdp)
3956                         continue;
3957
3958                 if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
3959                         break;
3960         }
3961
3962         return group;
3963 }
3964
3965 static int ext4_li_info_new(void)
3966 {
3967         struct ext4_lazy_init *eli = NULL;
3968
3969         eli = kzalloc(sizeof(*eli), GFP_KERNEL);
3970         if (!eli)
3971                 return -ENOMEM;
3972
3973         INIT_LIST_HEAD(&eli->li_request_list);
3974         mutex_init(&eli->li_list_mtx);
3975
3976         eli->li_state |= EXT4_LAZYINIT_QUIT;
3977
3978         ext4_li_info = eli;
3979
3980         return 0;
3981 }
3982
3983 static struct ext4_li_request *ext4_li_request_new(struct super_block *sb,
3984                                             ext4_group_t start)
3985 {
3986         struct ext4_li_request *elr;
3987
3988         elr = kzalloc(sizeof(*elr), GFP_KERNEL);
3989         if (!elr)
3990                 return NULL;
3991
3992         elr->lr_super = sb;
3993         elr->lr_first_not_zeroed = start;
3994         if (test_opt(sb, NO_PREFETCH_BLOCK_BITMAPS)) {
3995                 elr->lr_mode = EXT4_LI_MODE_ITABLE;
3996                 elr->lr_next_group = start;
3997         } else {
3998                 elr->lr_mode = EXT4_LI_MODE_PREFETCH_BBITMAP;
3999         }
4000
4001         /*
4002          * Randomize first schedule time of the request to
4003          * spread the inode table initialization requests
4004          * better.
4005          */
4006         elr->lr_next_sched = jiffies + get_random_u32_below(EXT4_DEF_LI_MAX_START_DELAY * HZ);
4007         return elr;
4008 }
4009
4010 int ext4_register_li_request(struct super_block *sb,
4011                              ext4_group_t first_not_zeroed)
4012 {
4013         struct ext4_sb_info *sbi = EXT4_SB(sb);
4014         struct ext4_li_request *elr = NULL;
4015         ext4_group_t ngroups = sbi->s_groups_count;
4016         int ret = 0;
4017
4018         mutex_lock(&ext4_li_mtx);
4019         if (sbi->s_li_request != NULL) {
4020                 /*
4021                  * Reset timeout so it can be computed again, because
4022                  * s_li_wait_mult might have changed.
4023                  */
4024                 sbi->s_li_request->lr_timeout = 0;
4025                 goto out;
4026         }
4027
4028         if (sb_rdonly(sb) ||
4029             (test_opt(sb, NO_PREFETCH_BLOCK_BITMAPS) &&
4030              (first_not_zeroed == ngroups || !test_opt(sb, INIT_INODE_TABLE))))
4031                 goto out;
4032
4033         elr = ext4_li_request_new(sb, first_not_zeroed);
4034         if (!elr) {
4035                 ret = -ENOMEM;
4036                 goto out;
4037         }
4038
4039         if (NULL == ext4_li_info) {
4040                 ret = ext4_li_info_new();
4041                 if (ret)
4042                         goto out;
4043         }
4044
4045         mutex_lock(&ext4_li_info->li_list_mtx);
4046         list_add(&elr->lr_request, &ext4_li_info->li_request_list);
4047         mutex_unlock(&ext4_li_info->li_list_mtx);
4048
4049         sbi->s_li_request = elr;
4050         /*
4051          * set elr to NULL here since it has been inserted to
4052          * the request_list and the removal and free of it is
4053          * handled by ext4_clear_request_list from now on.
4054          */
4055         elr = NULL;
4056
4057         if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) {
4058                 ret = ext4_run_lazyinit_thread();
4059                 if (ret)
4060                         goto out;
4061         }
4062 out:
4063         mutex_unlock(&ext4_li_mtx);
4064         if (ret)
4065                 kfree(elr);
4066         return ret;
4067 }
4068
4069 /*
4070  * We do not need to lock anything since this is called on
4071  * module unload.
4072  */
4073 static void ext4_destroy_lazyinit_thread(void)
4074 {
4075         /*
4076          * If thread exited earlier
4077          * there's nothing to be done.
4078          */
4079         if (!ext4_li_info || !ext4_lazyinit_task)
4080                 return;
4081
4082         kthread_stop(ext4_lazyinit_task);
4083 }
4084
4085 static int set_journal_csum_feature_set(struct super_block *sb)
4086 {
4087         int ret = 1;
4088         int compat, incompat;
4089         struct ext4_sb_info *sbi = EXT4_SB(sb);
4090
4091         if (ext4_has_metadata_csum(sb)) {
4092                 /* journal checksum v3 */
4093                 compat = 0;
4094                 incompat = JBD2_FEATURE_INCOMPAT_CSUM_V3;
4095         } else {
4096                 /* journal checksum v1 */
4097                 compat = JBD2_FEATURE_COMPAT_CHECKSUM;
4098                 incompat = 0;
4099         }
4100
4101         jbd2_journal_clear_features(sbi->s_journal,
4102                         JBD2_FEATURE_COMPAT_CHECKSUM, 0,
4103                         JBD2_FEATURE_INCOMPAT_CSUM_V3 |
4104                         JBD2_FEATURE_INCOMPAT_CSUM_V2);
4105         if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
4106                 ret = jbd2_journal_set_features(sbi->s_journal,
4107                                 compat, 0,
4108                                 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT |
4109                                 incompat);
4110         } else if (test_opt(sb, JOURNAL_CHECKSUM)) {
4111                 ret = jbd2_journal_set_features(sbi->s_journal,
4112                                 compat, 0,
4113                                 incompat);
4114                 jbd2_journal_clear_features(sbi->s_journal, 0, 0,
4115                                 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
4116         } else {
4117                 jbd2_journal_clear_features(sbi->s_journal, 0, 0,
4118                                 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
4119         }
4120
4121         return ret;
4122 }
4123
4124 /*
4125  * Note: calculating the overhead so we can be compatible with
4126  * historical BSD practice is quite difficult in the face of
4127  * clusters/bigalloc.  This is because multiple metadata blocks from
4128  * different block group can end up in the same allocation cluster.
4129  * Calculating the exact overhead in the face of clustered allocation
4130  * requires either O(all block bitmaps) in memory or O(number of block
4131  * groups**2) in time.  We will still calculate the superblock for
4132  * older file systems --- and if we come across with a bigalloc file
4133  * system with zero in s_overhead_clusters the estimate will be close to
4134  * correct especially for very large cluster sizes --- but for newer
4135  * file systems, it's better to calculate this figure once at mkfs
4136  * time, and store it in the superblock.  If the superblock value is
4137  * present (even for non-bigalloc file systems), we will use it.
4138  */
4139 static int count_overhead(struct super_block *sb, ext4_group_t grp,
4140                           char *buf)
4141 {
4142         struct ext4_sb_info     *sbi = EXT4_SB(sb);
4143         struct ext4_group_desc  *gdp;
4144         ext4_fsblk_t            first_block, last_block, b;
4145         ext4_group_t            i, ngroups = ext4_get_groups_count(sb);
4146         int                     s, j, count = 0;
4147         int                     has_super = ext4_bg_has_super(sb, grp);
4148
4149         if (!ext4_has_feature_bigalloc(sb))
4150                 return (has_super + ext4_bg_num_gdb(sb, grp) +
4151                         (has_super ? le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) : 0) +
4152                         sbi->s_itb_per_group + 2);
4153
4154         first_block = le32_to_cpu(sbi->s_es->s_first_data_block) +
4155                 (grp * EXT4_BLOCKS_PER_GROUP(sb));
4156         last_block = first_block + EXT4_BLOCKS_PER_GROUP(sb) - 1;
4157         for (i = 0; i < ngroups; i++) {
4158                 gdp = ext4_get_group_desc(sb, i, NULL);
4159                 b = ext4_block_bitmap(sb, gdp);
4160                 if (b >= first_block && b <= last_block) {
4161                         ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf);
4162                         count++;
4163                 }
4164                 b = ext4_inode_bitmap(sb, gdp);
4165                 if (b >= first_block && b <= last_block) {
4166                         ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf);
4167                         count++;
4168                 }
4169                 b = ext4_inode_table(sb, gdp);
4170                 if (b >= first_block && b + sbi->s_itb_per_group <= last_block)
4171                         for (j = 0; j < sbi->s_itb_per_group; j++, b++) {
4172                                 int c = EXT4_B2C(sbi, b - first_block);
4173                                 ext4_set_bit(c, buf);
4174                                 count++;
4175                         }
4176                 if (i != grp)
4177                         continue;
4178                 s = 0;
4179                 if (ext4_bg_has_super(sb, grp)) {
4180                         ext4_set_bit(s++, buf);
4181                         count++;
4182                 }
4183                 j = ext4_bg_num_gdb(sb, grp);
4184                 if (s + j > EXT4_BLOCKS_PER_GROUP(sb)) {
4185                         ext4_error(sb, "Invalid number of block group "
4186                                    "descriptor blocks: %d", j);
4187                         j = EXT4_BLOCKS_PER_GROUP(sb) - s;
4188                 }
4189                 count += j;
4190                 for (; j > 0; j--)
4191                         ext4_set_bit(EXT4_B2C(sbi, s++), buf);
4192         }
4193         if (!count)
4194                 return 0;
4195         return EXT4_CLUSTERS_PER_GROUP(sb) -
4196                 ext4_count_free(buf, EXT4_CLUSTERS_PER_GROUP(sb) / 8);
4197 }
4198
4199 /*
4200  * Compute the overhead and stash it in sbi->s_overhead
4201  */
4202 int ext4_calculate_overhead(struct super_block *sb)
4203 {
4204         struct ext4_sb_info *sbi = EXT4_SB(sb);
4205         struct ext4_super_block *es = sbi->s_es;
4206         struct inode *j_inode;
4207         unsigned int j_blocks, j_inum = le32_to_cpu(es->s_journal_inum);
4208         ext4_group_t i, ngroups = ext4_get_groups_count(sb);
4209         ext4_fsblk_t overhead = 0;
4210         char *buf = (char *) get_zeroed_page(GFP_NOFS);
4211
4212         if (!buf)
4213                 return -ENOMEM;
4214
4215         /*
4216          * Compute the overhead (FS structures).  This is constant
4217          * for a given filesystem unless the number of block groups
4218          * changes so we cache the previous value until it does.
4219          */
4220
4221         /*
4222          * All of the blocks before first_data_block are overhead
4223          */
4224         overhead = EXT4_B2C(sbi, le32_to_cpu(es->s_first_data_block));
4225
4226         /*
4227          * Add the overhead found in each block group
4228          */
4229         for (i = 0; i < ngroups; i++) {
4230                 int blks;
4231
4232                 blks = count_overhead(sb, i, buf);
4233                 overhead += blks;
4234                 if (blks)
4235                         memset(buf, 0, PAGE_SIZE);
4236                 cond_resched();
4237         }
4238
4239         /*
4240          * Add the internal journal blocks whether the journal has been
4241          * loaded or not
4242          */
4243         if (sbi->s_journal && !sbi->s_journal_bdev)
4244                 overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_total_len);
4245         else if (ext4_has_feature_journal(sb) && !sbi->s_journal && j_inum) {
4246                 /* j_inum for internal journal is non-zero */
4247                 j_inode = ext4_get_journal_inode(sb, j_inum);
4248                 if (!IS_ERR(j_inode)) {
4249                         j_blocks = j_inode->i_size >> sb->s_blocksize_bits;
4250                         overhead += EXT4_NUM_B2C(sbi, j_blocks);
4251                         iput(j_inode);
4252                 } else {
4253                         ext4_msg(sb, KERN_ERR, "can't get journal size");
4254                 }
4255         }
4256         sbi->s_overhead = overhead;
4257         smp_wmb();
4258         free_page((unsigned long) buf);
4259         return 0;
4260 }
4261
4262 static void ext4_set_resv_clusters(struct super_block *sb)
4263 {
4264         ext4_fsblk_t resv_clusters;
4265         struct ext4_sb_info *sbi = EXT4_SB(sb);
4266
4267         /*
4268          * There's no need to reserve anything when we aren't using extents.
4269          * The space estimates are exact, there are no unwritten extents,
4270          * hole punching doesn't need new metadata... This is needed especially
4271          * to keep ext2/3 backward compatibility.
4272          */
4273         if (!ext4_has_feature_extents(sb))
4274                 return;
4275         /*
4276          * By default we reserve 2% or 4096 clusters, whichever is smaller.
4277          * This should cover the situations where we can not afford to run
4278          * out of space like for example punch hole, or converting
4279          * unwritten extents in delalloc path. In most cases such
4280          * allocation would require 1, or 2 blocks, higher numbers are
4281          * very rare.
4282          */
4283         resv_clusters = (ext4_blocks_count(sbi->s_es) >>
4284                          sbi->s_cluster_bits);
4285
4286         do_div(resv_clusters, 50);
4287         resv_clusters = min_t(ext4_fsblk_t, resv_clusters, 4096);
4288
4289         atomic64_set(&sbi->s_resv_clusters, resv_clusters);
4290 }
4291
4292 static const char *ext4_quota_mode(struct super_block *sb)
4293 {
4294 #ifdef CONFIG_QUOTA
4295         if (!ext4_quota_capable(sb))
4296                 return "none";
4297
4298         if (EXT4_SB(sb)->s_journal && ext4_is_quota_journalled(sb))
4299                 return "journalled";
4300         else
4301                 return "writeback";
4302 #else
4303         return "disabled";
4304 #endif
4305 }
4306
4307 static void ext4_setup_csum_trigger(struct super_block *sb,
4308                                     enum ext4_journal_trigger_type type,
4309                                     void (*trigger)(
4310                                         struct jbd2_buffer_trigger_type *type,
4311                                         struct buffer_head *bh,
4312                                         void *mapped_data,
4313                                         size_t size))
4314 {
4315         struct ext4_sb_info *sbi = EXT4_SB(sb);
4316
4317         sbi->s_journal_triggers[type].sb = sb;
4318         sbi->s_journal_triggers[type].tr_triggers.t_frozen = trigger;
4319 }
4320
4321 static void ext4_free_sbi(struct ext4_sb_info *sbi)
4322 {
4323         if (!sbi)
4324                 return;
4325
4326         kfree(sbi->s_blockgroup_lock);
4327         fs_put_dax(sbi->s_daxdev, NULL);
4328         kfree(sbi);
4329 }
4330
4331 static struct ext4_sb_info *ext4_alloc_sbi(struct super_block *sb)
4332 {
4333         struct ext4_sb_info *sbi;
4334
4335         sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
4336         if (!sbi)
4337                 return NULL;
4338
4339         sbi->s_daxdev = fs_dax_get_by_bdev(sb->s_bdev, &sbi->s_dax_part_off,
4340                                            NULL, NULL);
4341
4342         sbi->s_blockgroup_lock =
4343                 kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
4344
4345         if (!sbi->s_blockgroup_lock)
4346                 goto err_out;
4347
4348         sb->s_fs_info = sbi;
4349         sbi->s_sb = sb;
4350         return sbi;
4351 err_out:
4352         fs_put_dax(sbi->s_daxdev, NULL);
4353         kfree(sbi);
4354         return NULL;
4355 }
4356
4357 static void ext4_set_def_opts(struct super_block *sb,
4358                               struct ext4_super_block *es)
4359 {
4360         unsigned long def_mount_opts;
4361
4362         /* Set defaults before we parse the mount options */
4363         def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
4364         set_opt(sb, INIT_INODE_TABLE);
4365         if (def_mount_opts & EXT4_DEFM_DEBUG)
4366                 set_opt(sb, DEBUG);
4367         if (def_mount_opts & EXT4_DEFM_BSDGROUPS)
4368                 set_opt(sb, GRPID);
4369         if (def_mount_opts & EXT4_DEFM_UID16)
4370                 set_opt(sb, NO_UID32);
4371         /* xattr user namespace & acls are now defaulted on */
4372         set_opt(sb, XATTR_USER);
4373 #ifdef CONFIG_EXT4_FS_POSIX_ACL
4374         set_opt(sb, POSIX_ACL);
4375 #endif
4376         if (ext4_has_feature_fast_commit(sb))
4377                 set_opt2(sb, JOURNAL_FAST_COMMIT);
4378         /* don't forget to enable journal_csum when metadata_csum is enabled. */
4379         if (ext4_has_metadata_csum(sb))
4380                 set_opt(sb, JOURNAL_CHECKSUM);
4381
4382         if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
4383                 set_opt(sb, JOURNAL_DATA);
4384         else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
4385                 set_opt(sb, ORDERED_DATA);
4386         else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK)
4387                 set_opt(sb, WRITEBACK_DATA);
4388
4389         if (le16_to_cpu(es->s_errors) == EXT4_ERRORS_PANIC)
4390                 set_opt(sb, ERRORS_PANIC);
4391         else if (le16_to_cpu(es->s_errors) == EXT4_ERRORS_CONTINUE)
4392                 set_opt(sb, ERRORS_CONT);
4393         else
4394                 set_opt(sb, ERRORS_RO);
4395         /* block_validity enabled by default; disable with noblock_validity */
4396         set_opt(sb, BLOCK_VALIDITY);
4397         if (def_mount_opts & EXT4_DEFM_DISCARD)
4398                 set_opt(sb, DISCARD);
4399
4400         if ((def_mount_opts & EXT4_DEFM_NOBARRIER) == 0)
4401                 set_opt(sb, BARRIER);
4402
4403         /*
4404          * enable delayed allocation by default
4405          * Use -o nodelalloc to turn it off
4406          */
4407         if (!IS_EXT3_SB(sb) && !IS_EXT2_SB(sb) &&
4408             ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0))
4409                 set_opt(sb, DELALLOC);
4410
4411         if (sb->s_blocksize == PAGE_SIZE)
4412                 set_opt(sb, DIOREAD_NOLOCK);
4413 }
4414
4415 static int ext4_handle_clustersize(struct super_block *sb)
4416 {
4417         struct ext4_sb_info *sbi = EXT4_SB(sb);
4418         struct ext4_super_block *es = sbi->s_es;
4419         int clustersize;
4420
4421         /* Handle clustersize */
4422         clustersize = BLOCK_SIZE << le32_to_cpu(es->s_log_cluster_size);
4423         if (ext4_has_feature_bigalloc(sb)) {
4424                 if (clustersize < sb->s_blocksize) {
4425                         ext4_msg(sb, KERN_ERR,
4426                                  "cluster size (%d) smaller than "
4427                                  "block size (%lu)", clustersize, sb->s_blocksize);
4428                         return -EINVAL;
4429                 }
4430                 sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) -
4431                         le32_to_cpu(es->s_log_block_size);
4432                 sbi->s_clusters_per_group =
4433                         le32_to_cpu(es->s_clusters_per_group);
4434                 if (sbi->s_clusters_per_group > sb->s_blocksize * 8) {
4435                         ext4_msg(sb, KERN_ERR,
4436                                  "#clusters per group too big: %lu",
4437                                  sbi->s_clusters_per_group);
4438                         return -EINVAL;
4439                 }
4440                 if (sbi->s_blocks_per_group !=
4441                     (sbi->s_clusters_per_group * (clustersize / sb->s_blocksize))) {
4442                         ext4_msg(sb, KERN_ERR, "blocks per group (%lu) and "
4443                                  "clusters per group (%lu) inconsistent",
4444                                  sbi->s_blocks_per_group,
4445                                  sbi->s_clusters_per_group);
4446                         return -EINVAL;
4447                 }
4448         } else {
4449                 if (clustersize != sb->s_blocksize) {
4450                         ext4_msg(sb, KERN_ERR,
4451                                  "fragment/cluster size (%d) != "
4452                                  "block size (%lu)", clustersize, sb->s_blocksize);
4453                         return -EINVAL;
4454                 }
4455                 if (sbi->s_blocks_per_group > sb->s_blocksize * 8) {
4456                         ext4_msg(sb, KERN_ERR,
4457                                  "#blocks per group too big: %lu",
4458                                  sbi->s_blocks_per_group);
4459                         return -EINVAL;
4460                 }
4461                 sbi->s_clusters_per_group = sbi->s_blocks_per_group;
4462                 sbi->s_cluster_bits = 0;
4463         }
4464         sbi->s_cluster_ratio = clustersize / sb->s_blocksize;
4465
4466         /* Do we have standard group size of clustersize * 8 blocks ? */
4467         if (sbi->s_blocks_per_group == clustersize << 3)
4468                 set_opt2(sb, STD_GROUP_SIZE);
4469
4470         return 0;
4471 }
4472
4473 static void ext4_fast_commit_init(struct super_block *sb)
4474 {
4475         struct ext4_sb_info *sbi = EXT4_SB(sb);
4476
4477         /* Initialize fast commit stuff */
4478         atomic_set(&sbi->s_fc_subtid, 0);
4479         INIT_LIST_HEAD(&sbi->s_fc_q[FC_Q_MAIN]);
4480         INIT_LIST_HEAD(&sbi->s_fc_q[FC_Q_STAGING]);
4481         INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_MAIN]);
4482         INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_STAGING]);
4483         sbi->s_fc_bytes = 0;
4484         ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
4485         sbi->s_fc_ineligible_tid = 0;
4486         spin_lock_init(&sbi->s_fc_lock);
4487         memset(&sbi->s_fc_stats, 0, sizeof(sbi->s_fc_stats));
4488         sbi->s_fc_replay_state.fc_regions = NULL;
4489         sbi->s_fc_replay_state.fc_regions_size = 0;
4490         sbi->s_fc_replay_state.fc_regions_used = 0;
4491         sbi->s_fc_replay_state.fc_regions_valid = 0;
4492         sbi->s_fc_replay_state.fc_modified_inodes = NULL;
4493         sbi->s_fc_replay_state.fc_modified_inodes_size = 0;
4494         sbi->s_fc_replay_state.fc_modified_inodes_used = 0;
4495 }
4496
4497 static int ext4_inode_info_init(struct super_block *sb,
4498                                 struct ext4_super_block *es)
4499 {
4500         struct ext4_sb_info *sbi = EXT4_SB(sb);
4501
4502         if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) {
4503                 sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE;
4504                 sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO;
4505         } else {
4506                 sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
4507                 sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
4508                 if (sbi->s_first_ino < EXT4_GOOD_OLD_FIRST_INO) {
4509                         ext4_msg(sb, KERN_ERR, "invalid first ino: %u",
4510                                  sbi->s_first_ino);
4511                         return -EINVAL;
4512                 }
4513                 if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) ||
4514                     (!is_power_of_2(sbi->s_inode_size)) ||
4515                     (sbi->s_inode_size > sb->s_blocksize)) {
4516                         ext4_msg(sb, KERN_ERR,
4517                                "unsupported inode size: %d",
4518                                sbi->s_inode_size);
4519                         ext4_msg(sb, KERN_ERR, "blocksize: %lu", sb->s_blocksize);
4520                         return -EINVAL;
4521                 }
4522                 /*
4523                  * i_atime_extra is the last extra field available for
4524                  * [acm]times in struct ext4_inode. Checking for that
4525                  * field should suffice to ensure we have extra space
4526                  * for all three.
4527                  */
4528                 if (sbi->s_inode_size >= offsetof(struct ext4_inode, i_atime_extra) +
4529                         sizeof(((struct ext4_inode *)0)->i_atime_extra)) {
4530                         sb->s_time_gran = 1;
4531                         sb->s_time_max = EXT4_EXTRA_TIMESTAMP_MAX;
4532                 } else {
4533                         sb->s_time_gran = NSEC_PER_SEC;
4534                         sb->s_time_max = EXT4_NON_EXTRA_TIMESTAMP_MAX;
4535                 }
4536                 sb->s_time_min = EXT4_TIMESTAMP_MIN;
4537         }
4538
4539         if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) {
4540                 sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
4541                         EXT4_GOOD_OLD_INODE_SIZE;
4542                 if (ext4_has_feature_extra_isize(sb)) {
4543                         unsigned v, max = (sbi->s_inode_size -
4544                                            EXT4_GOOD_OLD_INODE_SIZE);
4545
4546                         v = le16_to_cpu(es->s_want_extra_isize);
4547                         if (v > max) {
4548                                 ext4_msg(sb, KERN_ERR,
4549                                          "bad s_want_extra_isize: %d", v);
4550                                 return -EINVAL;
4551                         }
4552                         if (sbi->s_want_extra_isize < v)
4553                                 sbi->s_want_extra_isize = v;
4554
4555                         v = le16_to_cpu(es->s_min_extra_isize);
4556                         if (v > max) {
4557                                 ext4_msg(sb, KERN_ERR,
4558                                          "bad s_min_extra_isize: %d", v);
4559                                 return -EINVAL;
4560                         }
4561                         if (sbi->s_want_extra_isize < v)
4562                                 sbi->s_want_extra_isize = v;
4563                 }
4564         }
4565
4566         return 0;
4567 }
4568
4569 #if IS_ENABLED(CONFIG_UNICODE)
4570 static int ext4_encoding_init(struct super_block *sb, struct ext4_super_block *es)
4571 {
4572         const struct ext4_sb_encodings *encoding_info;
4573         struct unicode_map *encoding;
4574         __u16 encoding_flags = le16_to_cpu(es->s_encoding_flags);
4575
4576         if (!ext4_has_feature_casefold(sb) || sb->s_encoding)
4577                 return 0;
4578
4579         encoding_info = ext4_sb_read_encoding(es);
4580         if (!encoding_info) {
4581                 ext4_msg(sb, KERN_ERR,
4582                         "Encoding requested by superblock is unknown");
4583                 return -EINVAL;
4584         }
4585
4586         encoding = utf8_load(encoding_info->version);
4587         if (IS_ERR(encoding)) {
4588                 ext4_msg(sb, KERN_ERR,
4589                         "can't mount with superblock charset: %s-%u.%u.%u "
4590                         "not supported by the kernel. flags: 0x%x.",
4591                         encoding_info->name,
4592                         unicode_major(encoding_info->version),
4593                         unicode_minor(encoding_info->version),
4594                         unicode_rev(encoding_info->version),
4595                         encoding_flags);
4596                 return -EINVAL;
4597         }
4598         ext4_msg(sb, KERN_INFO,"Using encoding defined by superblock: "
4599                 "%s-%u.%u.%u with flags 0x%hx", encoding_info->name,
4600                 unicode_major(encoding_info->version),
4601                 unicode_minor(encoding_info->version),
4602                 unicode_rev(encoding_info->version),
4603                 encoding_flags);
4604
4605         sb->s_encoding = encoding;
4606         sb->s_encoding_flags = encoding_flags;
4607
4608         return 0;
4609 }
4610 #else
4611 static inline int ext4_encoding_init(struct super_block *sb, struct ext4_super_block *es)
4612 {
4613         return 0;
4614 }
4615 #endif
4616
4617 static int ext4_init_metadata_csum(struct super_block *sb, struct ext4_super_block *es)
4618 {
4619         struct ext4_sb_info *sbi = EXT4_SB(sb);
4620
4621         /* Warn if metadata_csum and gdt_csum are both set. */
4622         if (ext4_has_feature_metadata_csum(sb) &&
4623             ext4_has_feature_gdt_csum(sb))
4624                 ext4_warning(sb, "metadata_csum and uninit_bg are "
4625                              "redundant flags; please run fsck.");
4626
4627         /* Check for a known checksum algorithm */
4628         if (!ext4_verify_csum_type(sb, es)) {
4629                 ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with "
4630                          "unknown checksum algorithm.");
4631                 return -EINVAL;
4632         }
4633         ext4_setup_csum_trigger(sb, EXT4_JTR_ORPHAN_FILE,
4634                                 ext4_orphan_file_block_trigger);
4635
4636         /* Load the checksum driver */
4637         sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
4638         if (IS_ERR(sbi->s_chksum_driver)) {
4639                 int ret = PTR_ERR(sbi->s_chksum_driver);
4640                 ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver.");
4641                 sbi->s_chksum_driver = NULL;
4642                 return ret;
4643         }
4644
4645         /* Check superblock checksum */
4646         if (!ext4_superblock_csum_verify(sb, es)) {
4647                 ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with "
4648                          "invalid superblock checksum.  Run e2fsck?");
4649                 return -EFSBADCRC;
4650         }
4651
4652         /* Precompute checksum seed for all metadata */
4653         if (ext4_has_feature_csum_seed(sb))
4654                 sbi->s_csum_seed = le32_to_cpu(es->s_checksum_seed);
4655         else if (ext4_has_metadata_csum(sb) || ext4_has_feature_ea_inode(sb))
4656                 sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid,
4657                                                sizeof(es->s_uuid));
4658         return 0;
4659 }
4660
4661 static int ext4_check_feature_compatibility(struct super_block *sb,
4662                                             struct ext4_super_block *es,
4663                                             int silent)
4664 {
4665         struct ext4_sb_info *sbi = EXT4_SB(sb);
4666
4667         if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV &&
4668             (ext4_has_compat_features(sb) ||
4669              ext4_has_ro_compat_features(sb) ||
4670              ext4_has_incompat_features(sb)))
4671                 ext4_msg(sb, KERN_WARNING,
4672                        "feature flags set on rev 0 fs, "
4673                        "running e2fsck is recommended");
4674
4675         if (es->s_creator_os == cpu_to_le32(EXT4_OS_HURD)) {
4676                 set_opt2(sb, HURD_COMPAT);
4677                 if (ext4_has_feature_64bit(sb)) {
4678                         ext4_msg(sb, KERN_ERR,
4679                                  "The Hurd can't support 64-bit file systems");
4680                         return -EINVAL;
4681                 }
4682
4683                 /*
4684                  * ea_inode feature uses l_i_version field which is not
4685                  * available in HURD_COMPAT mode.
4686                  */
4687                 if (ext4_has_feature_ea_inode(sb)) {
4688                         ext4_msg(sb, KERN_ERR,
4689                                  "ea_inode feature is not supported for Hurd");
4690                         return -EINVAL;
4691                 }
4692         }
4693
4694         if (IS_EXT2_SB(sb)) {
4695                 if (ext2_feature_set_ok(sb))
4696                         ext4_msg(sb, KERN_INFO, "mounting ext2 file system "
4697                                  "using the ext4 subsystem");
4698                 else {
4699                         /*
4700                          * If we're probing be silent, if this looks like
4701                          * it's actually an ext[34] filesystem.
4702                          */
4703                         if (silent && ext4_feature_set_ok(sb, sb_rdonly(sb)))
4704                                 return -EINVAL;
4705                         ext4_msg(sb, KERN_ERR, "couldn't mount as ext2 due "
4706                                  "to feature incompatibilities");
4707                         return -EINVAL;
4708                 }
4709         }
4710
4711         if (IS_EXT3_SB(sb)) {
4712                 if (ext3_feature_set_ok(sb))
4713                         ext4_msg(sb, KERN_INFO, "mounting ext3 file system "
4714                                  "using the ext4 subsystem");
4715                 else {
4716                         /*
4717                          * If we're probing be silent, if this looks like
4718                          * it's actually an ext4 filesystem.
4719                          */
4720                         if (silent && ext4_feature_set_ok(sb, sb_rdonly(sb)))
4721                                 return -EINVAL;
4722                         ext4_msg(sb, KERN_ERR, "couldn't mount as ext3 due "
4723                                  "to feature incompatibilities");
4724                         return -EINVAL;
4725                 }
4726         }
4727
4728         /*
4729          * Check feature flags regardless of the revision level, since we
4730          * previously didn't change the revision level when setting the flags,
4731          * so there is a chance incompat flags are set on a rev 0 filesystem.
4732          */
4733         if (!ext4_feature_set_ok(sb, (sb_rdonly(sb))))
4734                 return -EINVAL;
4735
4736         if (sbi->s_daxdev) {
4737                 if (sb->s_blocksize == PAGE_SIZE)
4738                         set_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags);
4739                 else
4740                         ext4_msg(sb, KERN_ERR, "unsupported blocksize for DAX\n");
4741         }
4742
4743         if (sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) {
4744                 if (ext4_has_feature_inline_data(sb)) {
4745                         ext4_msg(sb, KERN_ERR, "Cannot use DAX on a filesystem"
4746                                         " that may contain inline data");
4747                         return -EINVAL;
4748                 }
4749                 if (!test_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags)) {
4750                         ext4_msg(sb, KERN_ERR,
4751                                 "DAX unsupported by block device.");
4752                         return -EINVAL;
4753                 }
4754         }
4755
4756         if (ext4_has_feature_encrypt(sb) && es->s_encryption_level) {
4757                 ext4_msg(sb, KERN_ERR, "Unsupported encryption level %d",
4758                          es->s_encryption_level);
4759                 return -EINVAL;
4760         }
4761
4762         return 0;
4763 }
4764
4765 static int ext4_check_geometry(struct super_block *sb,
4766                                struct ext4_super_block *es)
4767 {
4768         struct ext4_sb_info *sbi = EXT4_SB(sb);
4769         __u64 blocks_count;
4770         int err;
4771
4772         if (le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) > (sb->s_blocksize / 4)) {
4773                 ext4_msg(sb, KERN_ERR,
4774                          "Number of reserved GDT blocks insanely large: %d",
4775                          le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks));
4776                 return -EINVAL;
4777         }
4778         /*
4779          * Test whether we have more sectors than will fit in sector_t,
4780          * and whether the max offset is addressable by the page cache.
4781          */
4782         err = generic_check_addressable(sb->s_blocksize_bits,
4783                                         ext4_blocks_count(es));
4784         if (err) {
4785                 ext4_msg(sb, KERN_ERR, "filesystem"
4786                          " too large to mount safely on this system");
4787                 return err;
4788         }
4789
4790         /* check blocks count against device size */
4791         blocks_count = sb_bdev_nr_blocks(sb);
4792         if (blocks_count && ext4_blocks_count(es) > blocks_count) {
4793                 ext4_msg(sb, KERN_WARNING, "bad geometry: block count %llu "
4794                        "exceeds size of device (%llu blocks)",
4795                        ext4_blocks_count(es), blocks_count);
4796                 return -EINVAL;
4797         }
4798
4799         /*
4800          * It makes no sense for the first data block to be beyond the end
4801          * of the filesystem.
4802          */
4803         if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) {
4804                 ext4_msg(sb, KERN_WARNING, "bad geometry: first data "
4805                          "block %u is beyond end of filesystem (%llu)",
4806                          le32_to_cpu(es->s_first_data_block),
4807                          ext4_blocks_count(es));
4808                 return -EINVAL;
4809         }
4810         if ((es->s_first_data_block == 0) && (es->s_log_block_size == 0) &&
4811             (sbi->s_cluster_ratio == 1)) {
4812                 ext4_msg(sb, KERN_WARNING, "bad geometry: first data "
4813                          "block is 0 with a 1k block and cluster size");
4814                 return -EINVAL;
4815         }
4816
4817         blocks_count = (ext4_blocks_count(es) -
4818                         le32_to_cpu(es->s_first_data_block) +
4819                         EXT4_BLOCKS_PER_GROUP(sb) - 1);
4820         do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb));
4821         if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) {
4822                 ext4_msg(sb, KERN_WARNING, "groups count too large: %llu "
4823                        "(block count %llu, first data block %u, "
4824                        "blocks per group %lu)", blocks_count,
4825                        ext4_blocks_count(es),
4826                        le32_to_cpu(es->s_first_data_block),
4827                        EXT4_BLOCKS_PER_GROUP(sb));
4828                 return -EINVAL;
4829         }
4830         sbi->s_groups_count = blocks_count;
4831         sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count,
4832                         (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb)));
4833         if (((u64)sbi->s_groups_count * sbi->s_inodes_per_group) !=
4834             le32_to_cpu(es->s_inodes_count)) {
4835                 ext4_msg(sb, KERN_ERR, "inodes count not valid: %u vs %llu",
4836                          le32_to_cpu(es->s_inodes_count),
4837                          ((u64)sbi->s_groups_count * sbi->s_inodes_per_group));
4838                 return -EINVAL;
4839         }
4840
4841         return 0;
4842 }
4843
4844 static int ext4_group_desc_init(struct super_block *sb,
4845                                 struct ext4_super_block *es,
4846                                 ext4_fsblk_t logical_sb_block,
4847                                 ext4_group_t *first_not_zeroed)
4848 {
4849         struct ext4_sb_info *sbi = EXT4_SB(sb);
4850         unsigned int db_count;
4851         ext4_fsblk_t block;
4852         int i;
4853
4854         db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
4855                    EXT4_DESC_PER_BLOCK(sb);
4856         if (ext4_has_feature_meta_bg(sb)) {
4857                 if (le32_to_cpu(es->s_first_meta_bg) > db_count) {
4858                         ext4_msg(sb, KERN_WARNING,
4859                                  "first meta block group too large: %u "
4860                                  "(group descriptor block count %u)",
4861                                  le32_to_cpu(es->s_first_meta_bg), db_count);
4862                         return -EINVAL;
4863                 }
4864         }
4865         rcu_assign_pointer(sbi->s_group_desc,
4866                            kvmalloc_array(db_count,
4867                                           sizeof(struct buffer_head *),
4868                                           GFP_KERNEL));
4869         if (sbi->s_group_desc == NULL) {
4870                 ext4_msg(sb, KERN_ERR, "not enough memory");
4871                 return -ENOMEM;
4872         }
4873
4874         bgl_lock_init(sbi->s_blockgroup_lock);
4875
4876         /* Pre-read the descriptors into the buffer cache */
4877         for (i = 0; i < db_count; i++) {
4878                 block = descriptor_loc(sb, logical_sb_block, i);
4879                 ext4_sb_breadahead_unmovable(sb, block);
4880         }
4881
4882         for (i = 0; i < db_count; i++) {
4883                 struct buffer_head *bh;
4884
4885                 block = descriptor_loc(sb, logical_sb_block, i);
4886                 bh = ext4_sb_bread_unmovable(sb, block);
4887                 if (IS_ERR(bh)) {
4888                         ext4_msg(sb, KERN_ERR,
4889                                "can't read group descriptor %d", i);
4890                         sbi->s_gdb_count = i;
4891                         return PTR_ERR(bh);
4892                 }
4893                 rcu_read_lock();
4894                 rcu_dereference(sbi->s_group_desc)[i] = bh;
4895                 rcu_read_unlock();
4896         }
4897         sbi->s_gdb_count = db_count;
4898         if (!ext4_check_descriptors(sb, logical_sb_block, first_not_zeroed)) {
4899                 ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
4900                 return -EFSCORRUPTED;
4901         }
4902
4903         return 0;
4904 }
4905
4906 static int ext4_load_and_init_journal(struct super_block *sb,
4907                                       struct ext4_super_block *es,
4908                                       struct ext4_fs_context *ctx)
4909 {
4910         struct ext4_sb_info *sbi = EXT4_SB(sb);
4911         int err;
4912
4913         err = ext4_load_journal(sb, es, ctx->journal_devnum);
4914         if (err)
4915                 return err;
4916
4917         if (ext4_has_feature_64bit(sb) &&
4918             !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
4919                                        JBD2_FEATURE_INCOMPAT_64BIT)) {
4920                 ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature");
4921                 goto out;
4922         }
4923
4924         if (!set_journal_csum_feature_set(sb)) {
4925                 ext4_msg(sb, KERN_ERR, "Failed to set journal checksum "
4926                          "feature set");
4927                 goto out;
4928         }
4929
4930         if (test_opt2(sb, JOURNAL_FAST_COMMIT) &&
4931                 !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
4932                                           JBD2_FEATURE_INCOMPAT_FAST_COMMIT)) {
4933                 ext4_msg(sb, KERN_ERR,
4934                         "Failed to set fast commit journal feature");
4935                 goto out;
4936         }
4937
4938         /* We have now updated the journal if required, so we can
4939          * validate the data journaling mode. */
4940         switch (test_opt(sb, DATA_FLAGS)) {
4941         case 0:
4942                 /* No mode set, assume a default based on the journal
4943                  * capabilities: ORDERED_DATA if the journal can
4944                  * cope, else JOURNAL_DATA
4945                  */
4946                 if (jbd2_journal_check_available_features
4947                     (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
4948                         set_opt(sb, ORDERED_DATA);
4949                         sbi->s_def_mount_opt |= EXT4_MOUNT_ORDERED_DATA;
4950                 } else {
4951                         set_opt(sb, JOURNAL_DATA);
4952                         sbi->s_def_mount_opt |= EXT4_MOUNT_JOURNAL_DATA;
4953                 }
4954                 break;
4955
4956         case EXT4_MOUNT_ORDERED_DATA:
4957         case EXT4_MOUNT_WRITEBACK_DATA:
4958                 if (!jbd2_journal_check_available_features
4959                     (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
4960                         ext4_msg(sb, KERN_ERR, "Journal does not support "
4961                                "requested data journaling mode");
4962                         goto out;
4963                 }
4964                 break;
4965         default:
4966                 break;
4967         }
4968
4969         if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA &&
4970             test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
4971                 ext4_msg(sb, KERN_ERR, "can't mount with "
4972                         "journal_async_commit in data=ordered mode");
4973                 goto out;
4974         }
4975
4976         set_task_ioprio(sbi->s_journal->j_task, ctx->journal_ioprio);
4977
4978         sbi->s_journal->j_submit_inode_data_buffers =
4979                 ext4_journal_submit_inode_data_buffers;
4980         sbi->s_journal->j_finish_inode_data_buffers =
4981                 ext4_journal_finish_inode_data_buffers;
4982
4983         return 0;
4984
4985 out:
4986         /* flush s_sb_upd_work before destroying the journal. */
4987         flush_work(&sbi->s_sb_upd_work);
4988         jbd2_journal_destroy(sbi->s_journal);
4989         sbi->s_journal = NULL;
4990         return -EINVAL;
4991 }
4992
4993 static int ext4_check_journal_data_mode(struct super_block *sb)
4994 {
4995         if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
4996                 printk_once(KERN_WARNING "EXT4-fs: Warning: mounting with "
4997                             "data=journal disables delayed allocation, "
4998                             "dioread_nolock, O_DIRECT and fast_commit support!\n");
4999                 /* can't mount with both data=journal and dioread_nolock. */
5000                 clear_opt(sb, DIOREAD_NOLOCK);
5001                 clear_opt2(sb, JOURNAL_FAST_COMMIT);
5002                 if (test_opt2(sb, EXPLICIT_DELALLOC)) {
5003                         ext4_msg(sb, KERN_ERR, "can't mount with "
5004                                  "both data=journal and delalloc");
5005                         return -EINVAL;
5006                 }
5007                 if (test_opt(sb, DAX_ALWAYS)) {
5008                         ext4_msg(sb, KERN_ERR, "can't mount with "
5009                                  "both data=journal and dax");
5010                         return -EINVAL;
5011                 }
5012                 if (ext4_has_feature_encrypt(sb)) {
5013                         ext4_msg(sb, KERN_WARNING,
5014                                  "encrypted files will use data=ordered "
5015                                  "instead of data journaling mode");
5016                 }
5017                 if (test_opt(sb, DELALLOC))
5018                         clear_opt(sb, DELALLOC);
5019         } else {
5020                 sb->s_iflags |= SB_I_CGROUPWB;
5021         }
5022
5023         return 0;
5024 }
5025
5026 static int ext4_load_super(struct super_block *sb, ext4_fsblk_t *lsb,
5027                            int silent)
5028 {
5029         struct ext4_sb_info *sbi = EXT4_SB(sb);
5030         struct ext4_super_block *es;
5031         ext4_fsblk_t logical_sb_block;
5032         unsigned long offset = 0;
5033         struct buffer_head *bh;
5034         int ret = -EINVAL;
5035         int blocksize;
5036
5037         blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE);
5038         if (!blocksize) {
5039                 ext4_msg(sb, KERN_ERR, "unable to set blocksize");
5040                 return -EINVAL;
5041         }
5042
5043         /*
5044          * The ext4 superblock will not be buffer aligned for other than 1kB
5045          * block sizes.  We need to calculate the offset from buffer start.
5046          */
5047         if (blocksize != EXT4_MIN_BLOCK_SIZE) {
5048                 logical_sb_block = sbi->s_sb_block * EXT4_MIN_BLOCK_SIZE;
5049                 offset = do_div(logical_sb_block, blocksize);
5050         } else {
5051                 logical_sb_block = sbi->s_sb_block;
5052         }
5053
5054         bh = ext4_sb_bread_unmovable(sb, logical_sb_block);
5055         if (IS_ERR(bh)) {
5056                 ext4_msg(sb, KERN_ERR, "unable to read superblock");
5057                 return PTR_ERR(bh);
5058         }
5059         /*
5060          * Note: s_es must be initialized as soon as possible because
5061          *       some ext4 macro-instructions depend on its value
5062          */
5063         es = (struct ext4_super_block *) (bh->b_data + offset);
5064         sbi->s_es = es;
5065         sb->s_magic = le16_to_cpu(es->s_magic);
5066         if (sb->s_magic != EXT4_SUPER_MAGIC) {
5067                 if (!silent)
5068                         ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
5069                 goto out;
5070         }
5071
5072         if (le32_to_cpu(es->s_log_block_size) >
5073             (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
5074                 ext4_msg(sb, KERN_ERR,
5075                          "Invalid log block size: %u",
5076                          le32_to_cpu(es->s_log_block_size));
5077                 goto out;
5078         }
5079         if (le32_to_cpu(es->s_log_cluster_size) >
5080             (EXT4_MAX_CLUSTER_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
5081                 ext4_msg(sb, KERN_ERR,
5082                          "Invalid log cluster size: %u",
5083                          le32_to_cpu(es->s_log_cluster_size));
5084                 goto out;
5085         }
5086
5087         blocksize = EXT4_MIN_BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
5088
5089         /*
5090          * If the default block size is not the same as the real block size,
5091          * we need to reload it.
5092          */
5093         if (sb->s_blocksize == blocksize) {
5094                 *lsb = logical_sb_block;
5095                 sbi->s_sbh = bh;
5096                 return 0;
5097         }
5098
5099         /*
5100          * bh must be released before kill_bdev(), otherwise
5101          * it won't be freed and its page also. kill_bdev()
5102          * is called by sb_set_blocksize().
5103          */
5104         brelse(bh);
5105         /* Validate the filesystem blocksize */
5106         if (!sb_set_blocksize(sb, blocksize)) {
5107                 ext4_msg(sb, KERN_ERR, "bad block size %d",
5108                                 blocksize);
5109                 bh = NULL;
5110                 goto out;
5111         }
5112
5113         logical_sb_block = sbi->s_sb_block * EXT4_MIN_BLOCK_SIZE;
5114         offset = do_div(logical_sb_block, blocksize);
5115         bh = ext4_sb_bread_unmovable(sb, logical_sb_block);
5116         if (IS_ERR(bh)) {
5117                 ext4_msg(sb, KERN_ERR, "Can't read superblock on 2nd try");
5118                 ret = PTR_ERR(bh);
5119                 bh = NULL;
5120                 goto out;
5121         }
5122         es = (struct ext4_super_block *)(bh->b_data + offset);
5123         sbi->s_es = es;
5124         if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) {
5125                 ext4_msg(sb, KERN_ERR, "Magic mismatch, very weird!");
5126                 goto out;
5127         }
5128         *lsb = logical_sb_block;
5129         sbi->s_sbh = bh;
5130         return 0;
5131 out:
5132         brelse(bh);
5133         return ret;
5134 }
5135
5136 static void ext4_hash_info_init(struct super_block *sb)
5137 {
5138         struct ext4_sb_info *sbi = EXT4_SB(sb);
5139         struct ext4_super_block *es = sbi->s_es;
5140         unsigned int i;
5141
5142         for (i = 0; i < 4; i++)
5143                 sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
5144
5145         sbi->s_def_hash_version = es->s_def_hash_version;
5146         if (ext4_has_feature_dir_index(sb)) {
5147                 i = le32_to_cpu(es->s_flags);
5148                 if (i & EXT2_FLAGS_UNSIGNED_HASH)
5149                         sbi->s_hash_unsigned = 3;
5150                 else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) {
5151 #ifdef __CHAR_UNSIGNED__
5152                         if (!sb_rdonly(sb))
5153                                 es->s_flags |=
5154                                         cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH);
5155                         sbi->s_hash_unsigned = 3;
5156 #else
5157                         if (!sb_rdonly(sb))
5158                                 es->s_flags |=
5159                                         cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
5160 #endif
5161                 }
5162         }
5163 }
5164
5165 static int ext4_block_group_meta_init(struct super_block *sb, int silent)
5166 {
5167         struct ext4_sb_info *sbi = EXT4_SB(sb);
5168         struct ext4_super_block *es = sbi->s_es;
5169         int has_huge_files;
5170
5171         has_huge_files = ext4_has_feature_huge_file(sb);
5172         sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits,
5173                                                       has_huge_files);
5174         sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files);
5175
5176         sbi->s_desc_size = le16_to_cpu(es->s_desc_size);
5177         if (ext4_has_feature_64bit(sb)) {
5178                 if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT ||
5179                     sbi->s_desc_size > EXT4_MAX_DESC_SIZE ||
5180                     !is_power_of_2(sbi->s_desc_size)) {
5181                         ext4_msg(sb, KERN_ERR,
5182                                "unsupported descriptor size %lu",
5183                                sbi->s_desc_size);
5184                         return -EINVAL;
5185                 }
5186         } else
5187                 sbi->s_desc_size = EXT4_MIN_DESC_SIZE;
5188
5189         sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
5190         sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
5191
5192         sbi->s_inodes_per_block = sb->s_blocksize / EXT4_INODE_SIZE(sb);
5193         if (sbi->s_inodes_per_block == 0 || sbi->s_blocks_per_group == 0) {
5194                 if (!silent)
5195                         ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
5196                 return -EINVAL;
5197         }
5198         if (sbi->s_inodes_per_group < sbi->s_inodes_per_block ||
5199             sbi->s_inodes_per_group > sb->s_blocksize * 8) {
5200                 ext4_msg(sb, KERN_ERR, "invalid inodes per group: %lu\n",
5201                          sbi->s_inodes_per_group);
5202                 return -EINVAL;
5203         }
5204         sbi->s_itb_per_group = sbi->s_inodes_per_group /
5205                                         sbi->s_inodes_per_block;
5206         sbi->s_desc_per_block = sb->s_blocksize / EXT4_DESC_SIZE(sb);
5207         sbi->s_mount_state = le16_to_cpu(es->s_state) & ~EXT4_FC_REPLAY;
5208         sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb));
5209         sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb));
5210
5211         return 0;
5212 }
5213
5214 static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
5215 {
5216         struct ext4_super_block *es = NULL;
5217         struct ext4_sb_info *sbi = EXT4_SB(sb);
5218         ext4_fsblk_t logical_sb_block;
5219         struct inode *root;
5220         int needs_recovery;
5221         int err;
5222         ext4_group_t first_not_zeroed;
5223         struct ext4_fs_context *ctx = fc->fs_private;
5224         int silent = fc->sb_flags & SB_SILENT;
5225
5226         /* Set defaults for the variables that will be set during parsing */
5227         if (!(ctx->spec & EXT4_SPEC_JOURNAL_IOPRIO))
5228                 ctx->journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
5229
5230         sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
5231         sbi->s_sectors_written_start =
5232                 part_stat_read(sb->s_bdev, sectors[STAT_WRITE]);
5233
5234         err = ext4_load_super(sb, &logical_sb_block, silent);
5235         if (err)
5236                 goto out_fail;
5237
5238         es = sbi->s_es;
5239         sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written);
5240
5241         err = ext4_init_metadata_csum(sb, es);
5242         if (err)
5243                 goto failed_mount;
5244
5245         ext4_set_def_opts(sb, es);
5246
5247         sbi->s_resuid = make_kuid(&init_user_ns, le16_to_cpu(es->s_def_resuid));
5248         sbi->s_resgid = make_kgid(&init_user_ns, le16_to_cpu(es->s_def_resgid));
5249         sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ;
5250         sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
5251         sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
5252
5253         /*
5254          * set default s_li_wait_mult for lazyinit, for the case there is
5255          * no mount option specified.
5256          */
5257         sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT;
5258
5259         err = ext4_inode_info_init(sb, es);
5260         if (err)
5261                 goto failed_mount;
5262
5263         err = parse_apply_sb_mount_options(sb, ctx);
5264         if (err < 0)
5265                 goto failed_mount;
5266
5267         sbi->s_def_mount_opt = sbi->s_mount_opt;
5268         sbi->s_def_mount_opt2 = sbi->s_mount_opt2;
5269
5270         err = ext4_check_opt_consistency(fc, sb);
5271         if (err < 0)
5272                 goto failed_mount;
5273
5274         ext4_apply_options(fc, sb);
5275
5276         err = ext4_encoding_init(sb, es);
5277         if (err)
5278                 goto failed_mount;
5279
5280         err = ext4_check_journal_data_mode(sb);
5281         if (err)
5282                 goto failed_mount;
5283
5284         sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
5285                 (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0);
5286
5287         /* i_version is always enabled now */
5288         sb->s_flags |= SB_I_VERSION;
5289
5290         err = ext4_check_feature_compatibility(sb, es, silent);
5291         if (err)
5292                 goto failed_mount;
5293
5294         err = ext4_block_group_meta_init(sb, silent);
5295         if (err)
5296                 goto failed_mount;
5297
5298         ext4_hash_info_init(sb);
5299
5300         err = ext4_handle_clustersize(sb);
5301         if (err)
5302                 goto failed_mount;
5303
5304         err = ext4_check_geometry(sb, es);
5305         if (err)
5306                 goto failed_mount;
5307
5308         timer_setup(&sbi->s_err_report, print_daily_error_info, 0);
5309         spin_lock_init(&sbi->s_error_lock);
5310         INIT_WORK(&sbi->s_sb_upd_work, update_super_work);
5311
5312         err = ext4_group_desc_init(sb, es, logical_sb_block, &first_not_zeroed);
5313         if (err)
5314                 goto failed_mount3;
5315
5316         err = ext4_es_register_shrinker(sbi);
5317         if (err)
5318                 goto failed_mount3;
5319
5320         sbi->s_stripe = ext4_get_stripe_size(sbi);
5321         /*
5322          * It's hard to get stripe aligned blocks if stripe is not aligned with
5323          * cluster, just disable stripe and alert user to simpfy code and avoid
5324          * stripe aligned allocation which will rarely successes.
5325          */
5326         if (sbi->s_stripe > 0 && sbi->s_cluster_ratio > 1 &&
5327             sbi->s_stripe % sbi->s_cluster_ratio != 0) {
5328                 ext4_msg(sb, KERN_WARNING,
5329                          "stripe (%lu) is not aligned with cluster size (%u), "
5330                          "stripe is disabled",
5331                          sbi->s_stripe, sbi->s_cluster_ratio);
5332                 sbi->s_stripe = 0;
5333         }
5334         sbi->s_extent_max_zeroout_kb = 32;
5335
5336         /*
5337          * set up enough so that it can read an inode
5338          */
5339         sb->s_op = &ext4_sops;
5340         sb->s_export_op = &ext4_export_ops;
5341         sb->s_xattr = ext4_xattr_handlers;
5342 #ifdef CONFIG_FS_ENCRYPTION
5343         sb->s_cop = &ext4_cryptops;
5344 #endif
5345 #ifdef CONFIG_FS_VERITY
5346         sb->s_vop = &ext4_verityops;
5347 #endif
5348 #ifdef CONFIG_QUOTA
5349         sb->dq_op = &ext4_quota_operations;
5350         if (ext4_has_feature_quota(sb))
5351                 sb->s_qcop = &dquot_quotactl_sysfile_ops;
5352         else
5353                 sb->s_qcop = &ext4_qctl_operations;
5354         sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
5355 #endif
5356         memcpy(&sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
5357
5358         INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
5359         mutex_init(&sbi->s_orphan_lock);
5360
5361         ext4_fast_commit_init(sb);
5362
5363         sb->s_root = NULL;
5364
5365         needs_recovery = (es->s_last_orphan != 0 ||
5366                           ext4_has_feature_orphan_present(sb) ||
5367                           ext4_has_feature_journal_needs_recovery(sb));
5368
5369         if (ext4_has_feature_mmp(sb) && !sb_rdonly(sb)) {
5370                 err = ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block));
5371                 if (err)
5372                         goto failed_mount3a;
5373         }
5374
5375         err = -EINVAL;
5376         /*
5377          * The first inode we look at is the journal inode.  Don't try
5378          * root first: it may be modified in the journal!
5379          */
5380         if (!test_opt(sb, NOLOAD) && ext4_has_feature_journal(sb)) {
5381                 err = ext4_load_and_init_journal(sb, es, ctx);
5382                 if (err)
5383                         goto failed_mount3a;
5384         } else if (test_opt(sb, NOLOAD) && !sb_rdonly(sb) &&
5385                    ext4_has_feature_journal_needs_recovery(sb)) {
5386                 ext4_msg(sb, KERN_ERR, "required journal recovery "
5387                        "suppressed and not mounted read-only");
5388                 goto failed_mount3a;
5389         } else {
5390                 /* Nojournal mode, all journal mount options are illegal */
5391                 if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
5392                         ext4_msg(sb, KERN_ERR, "can't mount with "
5393                                  "journal_async_commit, fs mounted w/o journal");
5394                         goto failed_mount3a;
5395                 }
5396
5397                 if (test_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM)) {
5398                         ext4_msg(sb, KERN_ERR, "can't mount with "
5399                                  "journal_checksum, fs mounted w/o journal");
5400                         goto failed_mount3a;
5401                 }
5402                 if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) {
5403                         ext4_msg(sb, KERN_ERR, "can't mount with "
5404                                  "commit=%lu, fs mounted w/o journal",
5405                                  sbi->s_commit_interval / HZ);
5406                         goto failed_mount3a;
5407                 }
5408                 if (EXT4_MOUNT_DATA_FLAGS &
5409                     (sbi->s_mount_opt ^ sbi->s_def_mount_opt)) {
5410                         ext4_msg(sb, KERN_ERR, "can't mount with "
5411                                  "data=, fs mounted w/o journal");
5412                         goto failed_mount3a;
5413                 }
5414                 sbi->s_def_mount_opt &= ~EXT4_MOUNT_JOURNAL_CHECKSUM;
5415                 clear_opt(sb, JOURNAL_CHECKSUM);
5416                 clear_opt(sb, DATA_FLAGS);
5417                 clear_opt2(sb, JOURNAL_FAST_COMMIT);
5418                 sbi->s_journal = NULL;
5419                 needs_recovery = 0;
5420         }
5421
5422         if (!test_opt(sb, NO_MBCACHE)) {
5423                 sbi->s_ea_block_cache = ext4_xattr_create_cache();
5424                 if (!sbi->s_ea_block_cache) {
5425                         ext4_msg(sb, KERN_ERR,
5426                                  "Failed to create ea_block_cache");
5427                         err = -EINVAL;
5428                         goto failed_mount_wq;
5429                 }
5430
5431                 if (ext4_has_feature_ea_inode(sb)) {
5432                         sbi->s_ea_inode_cache = ext4_xattr_create_cache();
5433                         if (!sbi->s_ea_inode_cache) {
5434                                 ext4_msg(sb, KERN_ERR,
5435                                          "Failed to create ea_inode_cache");
5436                                 err = -EINVAL;
5437                                 goto failed_mount_wq;
5438                         }
5439                 }
5440         }
5441
5442         /*
5443          * Get the # of file system overhead blocks from the
5444          * superblock if present.
5445          */
5446         sbi->s_overhead = le32_to_cpu(es->s_overhead_clusters);
5447         /* ignore the precalculated value if it is ridiculous */
5448         if (sbi->s_overhead > ext4_blocks_count(es))
5449                 sbi->s_overhead = 0;
5450         /*
5451          * If the bigalloc feature is not enabled recalculating the
5452          * overhead doesn't take long, so we might as well just redo
5453          * it to make sure we are using the correct value.
5454          */
5455         if (!ext4_has_feature_bigalloc(sb))
5456                 sbi->s_overhead = 0;
5457         if (sbi->s_overhead == 0) {
5458                 err = ext4_calculate_overhead(sb);
5459                 if (err)
5460                         goto failed_mount_wq;
5461         }
5462
5463         /*
5464          * The maximum number of concurrent works can be high and
5465          * concurrency isn't really necessary.  Limit it to 1.
5466          */
5467         EXT4_SB(sb)->rsv_conversion_wq =
5468                 alloc_workqueue("ext4-rsv-conversion", WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
5469         if (!EXT4_SB(sb)->rsv_conversion_wq) {
5470                 printk(KERN_ERR "EXT4-fs: failed to create workqueue\n");
5471                 err = -ENOMEM;
5472                 goto failed_mount4;
5473         }
5474
5475         /*
5476          * The jbd2_journal_load will have done any necessary log recovery,
5477          * so we can safely mount the rest of the filesystem now.
5478          */
5479
5480         root = ext4_iget(sb, EXT4_ROOT_INO, EXT4_IGET_SPECIAL);
5481         if (IS_ERR(root)) {
5482                 ext4_msg(sb, KERN_ERR, "get root inode failed");
5483                 err = PTR_ERR(root);
5484                 root = NULL;
5485                 goto failed_mount4;
5486         }
5487         if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
5488                 ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck");
5489                 iput(root);
5490                 err = -EFSCORRUPTED;
5491                 goto failed_mount4;
5492         }
5493
5494         sb->s_root = d_make_root(root);
5495         if (!sb->s_root) {
5496                 ext4_msg(sb, KERN_ERR, "get root dentry failed");
5497                 err = -ENOMEM;
5498                 goto failed_mount4;
5499         }
5500
5501         err = ext4_setup_super(sb, es, sb_rdonly(sb));
5502         if (err == -EROFS) {
5503                 sb->s_flags |= SB_RDONLY;
5504         } else if (err)
5505                 goto failed_mount4a;
5506
5507         ext4_set_resv_clusters(sb);
5508
5509         if (test_opt(sb, BLOCK_VALIDITY)) {
5510                 err = ext4_setup_system_zone(sb);
5511                 if (err) {
5512                         ext4_msg(sb, KERN_ERR, "failed to initialize system "
5513                                  "zone (%d)", err);
5514                         goto failed_mount4a;
5515                 }
5516         }
5517         ext4_fc_replay_cleanup(sb);
5518
5519         ext4_ext_init(sb);
5520
5521         /*
5522          * Enable optimize_scan if number of groups is > threshold. This can be
5523          * turned off by passing "mb_optimize_scan=0". This can also be
5524          * turned on forcefully by passing "mb_optimize_scan=1".
5525          */
5526         if (!(ctx->spec & EXT4_SPEC_mb_optimize_scan)) {
5527                 if (sbi->s_groups_count >= MB_DEFAULT_LINEAR_SCAN_THRESHOLD)
5528                         set_opt2(sb, MB_OPTIMIZE_SCAN);
5529                 else
5530                         clear_opt2(sb, MB_OPTIMIZE_SCAN);
5531         }
5532
5533         err = ext4_mb_init(sb);
5534         if (err) {
5535                 ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)",
5536                          err);
5537                 goto failed_mount5;
5538         }
5539
5540         /*
5541          * We can only set up the journal commit callback once
5542          * mballoc is initialized
5543          */
5544         if (sbi->s_journal)
5545                 sbi->s_journal->j_commit_callback =
5546                         ext4_journal_commit_callback;
5547
5548         err = ext4_percpu_param_init(sbi);
5549         if (err)
5550                 goto failed_mount6;
5551
5552         if (ext4_has_feature_flex_bg(sb))
5553                 if (!ext4_fill_flex_info(sb)) {
5554                         ext4_msg(sb, KERN_ERR,
5555                                "unable to initialize "
5556                                "flex_bg meta info!");
5557                         err = -ENOMEM;
5558                         goto failed_mount6;
5559                 }
5560
5561         err = ext4_register_li_request(sb, first_not_zeroed);
5562         if (err)
5563                 goto failed_mount6;
5564
5565         err = ext4_register_sysfs(sb);
5566         if (err)
5567                 goto failed_mount7;
5568
5569         err = ext4_init_orphan_info(sb);
5570         if (err)
5571                 goto failed_mount8;
5572 #ifdef CONFIG_QUOTA
5573         /* Enable quota usage during mount. */
5574         if (ext4_has_feature_quota(sb) && !sb_rdonly(sb)) {
5575                 err = ext4_enable_quotas(sb);
5576                 if (err)
5577                         goto failed_mount9;
5578         }
5579 #endif  /* CONFIG_QUOTA */
5580
5581         /*
5582          * Save the original bdev mapping's wb_err value which could be
5583          * used to detect the metadata async write error.
5584          */
5585         spin_lock_init(&sbi->s_bdev_wb_lock);
5586         errseq_check_and_advance(&sb->s_bdev->bd_inode->i_mapping->wb_err,
5587                                  &sbi->s_bdev_wb_err);
5588         EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
5589         ext4_orphan_cleanup(sb, es);
5590         EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
5591         /*
5592          * Update the checksum after updating free space/inode counters and
5593          * ext4_orphan_cleanup. Otherwise the superblock can have an incorrect
5594          * checksum in the buffer cache until it is written out and
5595          * e2fsprogs programs trying to open a file system immediately
5596          * after it is mounted can fail.
5597          */
5598         ext4_superblock_csum_set(sb);
5599         if (needs_recovery) {
5600                 ext4_msg(sb, KERN_INFO, "recovery complete");
5601                 err = ext4_mark_recovery_complete(sb, es);
5602                 if (err)
5603                         goto failed_mount10;
5604         }
5605
5606         if (test_opt(sb, DISCARD) && !bdev_max_discard_sectors(sb->s_bdev))
5607                 ext4_msg(sb, KERN_WARNING,
5608                          "mounting with \"discard\" option, but the device does not support discard");
5609
5610         if (es->s_error_count)
5611                 mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */
5612
5613         /* Enable message ratelimiting. Default is 10 messages per 5 secs. */
5614         ratelimit_state_init(&sbi->s_err_ratelimit_state, 5 * HZ, 10);
5615         ratelimit_state_init(&sbi->s_warning_ratelimit_state, 5 * HZ, 10);
5616         ratelimit_state_init(&sbi->s_msg_ratelimit_state, 5 * HZ, 10);
5617         atomic_set(&sbi->s_warning_count, 0);
5618         atomic_set(&sbi->s_msg_count, 0);
5619
5620         return 0;
5621
5622 failed_mount10:
5623         ext4_quotas_off(sb, EXT4_MAXQUOTAS);
5624 failed_mount9: __maybe_unused
5625         ext4_release_orphan_info(sb);
5626 failed_mount8:
5627         ext4_unregister_sysfs(sb);
5628         kobject_put(&sbi->s_kobj);
5629 failed_mount7:
5630         ext4_unregister_li_request(sb);
5631 failed_mount6:
5632         ext4_mb_release(sb);
5633         ext4_flex_groups_free(sbi);
5634         ext4_percpu_param_destroy(sbi);
5635 failed_mount5:
5636         ext4_ext_release(sb);
5637         ext4_release_system_zone(sb);
5638 failed_mount4a:
5639         dput(sb->s_root);
5640         sb->s_root = NULL;
5641 failed_mount4:
5642         ext4_msg(sb, KERN_ERR, "mount failed");
5643         if (EXT4_SB(sb)->rsv_conversion_wq)
5644                 destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq);
5645 failed_mount_wq:
5646         ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
5647         sbi->s_ea_inode_cache = NULL;
5648
5649         ext4_xattr_destroy_cache(sbi->s_ea_block_cache);
5650         sbi->s_ea_block_cache = NULL;
5651
5652         if (sbi->s_journal) {
5653                 /* flush s_sb_upd_work before journal destroy. */
5654                 flush_work(&sbi->s_sb_upd_work);
5655                 jbd2_journal_destroy(sbi->s_journal);
5656                 sbi->s_journal = NULL;
5657         }
5658 failed_mount3a:
5659         ext4_es_unregister_shrinker(sbi);
5660 failed_mount3:
5661         /* flush s_sb_upd_work before sbi destroy */
5662         flush_work(&sbi->s_sb_upd_work);
5663         del_timer_sync(&sbi->s_err_report);
5664         ext4_stop_mmpd(sbi);
5665         ext4_group_desc_free(sbi);
5666 failed_mount:
5667         if (sbi->s_chksum_driver)
5668                 crypto_free_shash(sbi->s_chksum_driver);
5669
5670 #if IS_ENABLED(CONFIG_UNICODE)
5671         utf8_unload(sb->s_encoding);
5672 #endif
5673
5674 #ifdef CONFIG_QUOTA
5675         for (unsigned int i = 0; i < EXT4_MAXQUOTAS; i++)
5676                 kfree(get_qf_name(sb, sbi, i));
5677 #endif
5678         fscrypt_free_dummy_policy(&sbi->s_dummy_enc_policy);
5679         brelse(sbi->s_sbh);
5680         if (sbi->s_journal_bdev) {
5681                 invalidate_bdev(sbi->s_journal_bdev);
5682                 blkdev_put(sbi->s_journal_bdev, sb);
5683         }
5684 out_fail:
5685         invalidate_bdev(sb->s_bdev);
5686         sb->s_fs_info = NULL;
5687         return err;
5688 }
5689
5690 static int ext4_fill_super(struct super_block *sb, struct fs_context *fc)
5691 {
5692         struct ext4_fs_context *ctx = fc->fs_private;
5693         struct ext4_sb_info *sbi;
5694         const char *descr;
5695         int ret;
5696
5697         sbi = ext4_alloc_sbi(sb);
5698         if (!sbi)
5699                 return -ENOMEM;
5700
5701         fc->s_fs_info = sbi;
5702
5703         /* Cleanup superblock name */
5704         strreplace(sb->s_id, '/', '!');
5705
5706         sbi->s_sb_block = 1;    /* Default super block location */
5707         if (ctx->spec & EXT4_SPEC_s_sb_block)
5708                 sbi->s_sb_block = ctx->s_sb_block;
5709
5710         ret = __ext4_fill_super(fc, sb);
5711         if (ret < 0)
5712                 goto free_sbi;
5713
5714         if (sbi->s_journal) {
5715                 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
5716                         descr = " journalled data mode";
5717                 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
5718                         descr = " ordered data mode";
5719                 else
5720                         descr = " writeback data mode";
5721         } else
5722                 descr = "out journal";
5723
5724         if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs mount"))
5725                 ext4_msg(sb, KERN_INFO, "mounted filesystem %pU %s with%s. "
5726                          "Quota mode: %s.", &sb->s_uuid,
5727                          sb_rdonly(sb) ? "ro" : "r/w", descr,
5728                          ext4_quota_mode(sb));
5729
5730         /* Update the s_overhead_clusters if necessary */
5731         ext4_update_overhead(sb, false);
5732         return 0;
5733
5734 free_sbi:
5735         ext4_free_sbi(sbi);
5736         fc->s_fs_info = NULL;
5737         return ret;
5738 }
5739
5740 static int ext4_get_tree(struct fs_context *fc)
5741 {
5742         return get_tree_bdev(fc, ext4_fill_super);
5743 }
5744
5745 /*
5746  * Setup any per-fs journal parameters now.  We'll do this both on
5747  * initial mount, once the journal has been initialised but before we've
5748  * done any recovery; and again on any subsequent remount.
5749  */
5750 static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
5751 {
5752         struct ext4_sb_info *sbi = EXT4_SB(sb);
5753
5754         journal->j_commit_interval = sbi->s_commit_interval;
5755         journal->j_min_batch_time = sbi->s_min_batch_time;
5756         journal->j_max_batch_time = sbi->s_max_batch_time;
5757         ext4_fc_init(sb, journal);
5758
5759         write_lock(&journal->j_state_lock);
5760         if (test_opt(sb, BARRIER))
5761                 journal->j_flags |= JBD2_BARRIER;
5762         else
5763                 journal->j_flags &= ~JBD2_BARRIER;
5764         if (test_opt(sb, DATA_ERR_ABORT))
5765                 journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR;
5766         else
5767                 journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR;
5768         /*
5769          * Always enable journal cycle record option, letting the journal
5770          * records log transactions continuously between each mount.
5771          */
5772         journal->j_flags |= JBD2_CYCLE_RECORD;
5773         write_unlock(&journal->j_state_lock);
5774 }
5775
5776 static struct inode *ext4_get_journal_inode(struct super_block *sb,
5777                                              unsigned int journal_inum)
5778 {
5779         struct inode *journal_inode;
5780
5781         /*
5782          * Test for the existence of a valid inode on disk.  Bad things
5783          * happen if we iget() an unused inode, as the subsequent iput()
5784          * will try to delete it.
5785          */
5786         journal_inode = ext4_iget(sb, journal_inum, EXT4_IGET_SPECIAL);
5787         if (IS_ERR(journal_inode)) {
5788                 ext4_msg(sb, KERN_ERR, "no journal found");
5789                 return ERR_CAST(journal_inode);
5790         }
5791         if (!journal_inode->i_nlink) {
5792                 make_bad_inode(journal_inode);
5793                 iput(journal_inode);
5794                 ext4_msg(sb, KERN_ERR, "journal inode is deleted");
5795                 return ERR_PTR(-EFSCORRUPTED);
5796         }
5797         if (!S_ISREG(journal_inode->i_mode) || IS_ENCRYPTED(journal_inode)) {
5798                 ext4_msg(sb, KERN_ERR, "invalid journal inode");
5799                 iput(journal_inode);
5800                 return ERR_PTR(-EFSCORRUPTED);
5801         }
5802
5803         ext4_debug("Journal inode found at %p: %lld bytes\n",
5804                   journal_inode, journal_inode->i_size);
5805         return journal_inode;
5806 }
5807
5808 static int ext4_journal_bmap(journal_t *journal, sector_t *block)
5809 {
5810         struct ext4_map_blocks map;
5811         int ret;
5812
5813         if (journal->j_inode == NULL)
5814                 return 0;
5815
5816         map.m_lblk = *block;
5817         map.m_len = 1;
5818         ret = ext4_map_blocks(NULL, journal->j_inode, &map, 0);
5819         if (ret <= 0) {
5820                 ext4_msg(journal->j_inode->i_sb, KERN_CRIT,
5821                          "journal bmap failed: block %llu ret %d\n",
5822                          *block, ret);
5823                 jbd2_journal_abort(journal, ret ? ret : -EIO);
5824                 return ret;
5825         }
5826         *block = map.m_pblk;
5827         return 0;
5828 }
5829
5830 static journal_t *ext4_open_inode_journal(struct super_block *sb,
5831                                           unsigned int journal_inum)
5832 {
5833         struct inode *journal_inode;
5834         journal_t *journal;
5835
5836         journal_inode = ext4_get_journal_inode(sb, journal_inum);
5837         if (IS_ERR(journal_inode))
5838                 return ERR_CAST(journal_inode);
5839
5840         journal = jbd2_journal_init_inode(journal_inode);
5841         if (IS_ERR(journal)) {
5842                 ext4_msg(sb, KERN_ERR, "Could not load journal inode");
5843                 iput(journal_inode);
5844                 return ERR_CAST(journal);
5845         }
5846         journal->j_private = sb;
5847         journal->j_bmap = ext4_journal_bmap;
5848         ext4_init_journal_params(sb, journal);
5849         return journal;
5850 }
5851
5852 static struct block_device *ext4_get_journal_blkdev(struct super_block *sb,
5853                                         dev_t j_dev, ext4_fsblk_t *j_start,
5854                                         ext4_fsblk_t *j_len)
5855 {
5856         struct buffer_head *bh;
5857         struct block_device *bdev;
5858         int hblock, blocksize;
5859         ext4_fsblk_t sb_block;
5860         unsigned long offset;
5861         struct ext4_super_block *es;
5862         int errno;
5863
5864         /* see get_tree_bdev why this is needed and safe */
5865         up_write(&sb->s_umount);
5866         bdev = blkdev_get_by_dev(j_dev, BLK_OPEN_READ | BLK_OPEN_WRITE, sb,
5867                                  &fs_holder_ops);
5868         down_write(&sb->s_umount);
5869         if (IS_ERR(bdev)) {
5870                 ext4_msg(sb, KERN_ERR,
5871                          "failed to open journal device unknown-block(%u,%u) %ld",
5872                          MAJOR(j_dev), MINOR(j_dev), PTR_ERR(bdev));
5873                 return ERR_CAST(bdev);
5874         }
5875
5876         blocksize = sb->s_blocksize;
5877         hblock = bdev_logical_block_size(bdev);
5878         if (blocksize < hblock) {
5879                 ext4_msg(sb, KERN_ERR,
5880                         "blocksize too small for journal device");
5881                 errno = -EINVAL;
5882                 goto out_bdev;
5883         }
5884
5885         sb_block = EXT4_MIN_BLOCK_SIZE / blocksize;
5886         offset = EXT4_MIN_BLOCK_SIZE % blocksize;
5887         set_blocksize(bdev, blocksize);
5888         bh = __bread(bdev, sb_block, blocksize);
5889         if (!bh) {
5890                 ext4_msg(sb, KERN_ERR, "couldn't read superblock of "
5891                        "external journal");
5892                 errno = -EINVAL;
5893                 goto out_bdev;
5894         }
5895
5896         es = (struct ext4_super_block *) (bh->b_data + offset);
5897         if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) ||
5898             !(le32_to_cpu(es->s_feature_incompat) &
5899               EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) {
5900                 ext4_msg(sb, KERN_ERR, "external journal has bad superblock");
5901                 errno = -EFSCORRUPTED;
5902                 goto out_bh;
5903         }
5904
5905         if ((le32_to_cpu(es->s_feature_ro_compat) &
5906              EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) &&
5907             es->s_checksum != ext4_superblock_csum(sb, es)) {
5908                 ext4_msg(sb, KERN_ERR, "external journal has corrupt superblock");
5909                 errno = -EFSCORRUPTED;
5910                 goto out_bh;
5911         }
5912
5913         if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
5914                 ext4_msg(sb, KERN_ERR, "journal UUID does not match");
5915                 errno = -EFSCORRUPTED;
5916                 goto out_bh;
5917         }
5918
5919         *j_start = sb_block + 1;
5920         *j_len = ext4_blocks_count(es);
5921         brelse(bh);
5922         return bdev;
5923
5924 out_bh:
5925         brelse(bh);
5926 out_bdev:
5927         blkdev_put(bdev, sb);
5928         return ERR_PTR(errno);
5929 }
5930
5931 static journal_t *ext4_open_dev_journal(struct super_block *sb,
5932                                         dev_t j_dev)
5933 {
5934         journal_t *journal;
5935         ext4_fsblk_t j_start;
5936         ext4_fsblk_t j_len;
5937         struct block_device *journal_bdev;
5938         int errno = 0;
5939
5940         journal_bdev = ext4_get_journal_blkdev(sb, j_dev, &j_start, &j_len);
5941         if (IS_ERR(journal_bdev))
5942                 return ERR_CAST(journal_bdev);
5943
5944         journal = jbd2_journal_init_dev(journal_bdev, sb->s_bdev, j_start,
5945                                         j_len, sb->s_blocksize);
5946         if (IS_ERR(journal)) {
5947                 ext4_msg(sb, KERN_ERR, "failed to create device journal");
5948                 errno = PTR_ERR(journal);
5949                 goto out_bdev;
5950         }
5951         if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
5952                 ext4_msg(sb, KERN_ERR, "External journal has more than one "
5953                                         "user (unsupported) - %d",
5954                         be32_to_cpu(journal->j_superblock->s_nr_users));
5955                 errno = -EINVAL;
5956                 goto out_journal;
5957         }
5958         journal->j_private = sb;
5959         EXT4_SB(sb)->s_journal_bdev = journal_bdev;
5960         ext4_init_journal_params(sb, journal);
5961         return journal;
5962
5963 out_journal:
5964         jbd2_journal_destroy(journal);
5965 out_bdev:
5966         blkdev_put(journal_bdev, sb);
5967         return ERR_PTR(errno);
5968 }
5969
5970 static int ext4_load_journal(struct super_block *sb,
5971                              struct ext4_super_block *es,
5972                              unsigned long journal_devnum)
5973 {
5974         journal_t *journal;
5975         unsigned int journal_inum = le32_to_cpu(es->s_journal_inum);
5976         dev_t journal_dev;
5977         int err = 0;
5978         int really_read_only;
5979         int journal_dev_ro;
5980
5981         if (WARN_ON_ONCE(!ext4_has_feature_journal(sb)))
5982                 return -EFSCORRUPTED;
5983
5984         if (journal_devnum &&
5985             journal_devnum != le32_to_cpu(es->s_journal_dev)) {
5986                 ext4_msg(sb, KERN_INFO, "external journal device major/minor "
5987                         "numbers have changed");
5988                 journal_dev = new_decode_dev(journal_devnum);
5989         } else
5990                 journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
5991
5992         if (journal_inum && journal_dev) {
5993                 ext4_msg(sb, KERN_ERR,
5994                          "filesystem has both journal inode and journal device!");
5995                 return -EINVAL;
5996         }
5997
5998         if (journal_inum) {
5999                 journal = ext4_open_inode_journal(sb, journal_inum);
6000                 if (IS_ERR(journal))
6001                         return PTR_ERR(journal);
6002         } else {
6003                 journal = ext4_open_dev_journal(sb, journal_dev);
6004                 if (IS_ERR(journal))
6005                         return PTR_ERR(journal);
6006         }
6007
6008         journal_dev_ro = bdev_read_only(journal->j_dev);
6009         really_read_only = bdev_read_only(sb->s_bdev) | journal_dev_ro;
6010
6011         if (journal_dev_ro && !sb_rdonly(sb)) {
6012                 ext4_msg(sb, KERN_ERR,
6013                          "journal device read-only, try mounting with '-o ro'");
6014                 err = -EROFS;
6015                 goto err_out;
6016         }
6017
6018         /*
6019          * Are we loading a blank journal or performing recovery after a
6020          * crash?  For recovery, we need to check in advance whether we
6021          * can get read-write access to the device.
6022          */
6023         if (ext4_has_feature_journal_needs_recovery(sb)) {
6024                 if (sb_rdonly(sb)) {
6025                         ext4_msg(sb, KERN_INFO, "INFO: recovery "
6026                                         "required on readonly filesystem");
6027                         if (really_read_only) {
6028                                 ext4_msg(sb, KERN_ERR, "write access "
6029                                         "unavailable, cannot proceed "
6030                                         "(try mounting with noload)");
6031                                 err = -EROFS;
6032                                 goto err_out;
6033                         }
6034                         ext4_msg(sb, KERN_INFO, "write access will "
6035                                "be enabled during recovery");
6036                 }
6037         }
6038
6039         if (!(journal->j_flags & JBD2_BARRIER))
6040                 ext4_msg(sb, KERN_INFO, "barriers disabled");
6041
6042         if (!ext4_has_feature_journal_needs_recovery(sb))
6043                 err = jbd2_journal_wipe(journal, !really_read_only);
6044         if (!err) {
6045                 char *save = kmalloc(EXT4_S_ERR_LEN, GFP_KERNEL);
6046                 __le16 orig_state;
6047                 bool changed = false;
6048
6049                 if (save)
6050                         memcpy(save, ((char *) es) +
6051                                EXT4_S_ERR_START, EXT4_S_ERR_LEN);
6052                 err = jbd2_journal_load(journal);
6053                 if (save && memcmp(((char *) es) + EXT4_S_ERR_START,
6054                                    save, EXT4_S_ERR_LEN)) {
6055                         memcpy(((char *) es) + EXT4_S_ERR_START,
6056                                save, EXT4_S_ERR_LEN);
6057                         changed = true;
6058                 }
6059                 kfree(save);
6060                 orig_state = es->s_state;
6061                 es->s_state |= cpu_to_le16(EXT4_SB(sb)->s_mount_state &
6062                                            EXT4_ERROR_FS);
6063                 if (orig_state != es->s_state)
6064                         changed = true;
6065                 /* Write out restored error information to the superblock */
6066                 if (changed && !really_read_only) {
6067                         int err2;
6068                         err2 = ext4_commit_super(sb);
6069                         err = err ? : err2;
6070                 }
6071         }
6072
6073         if (err) {
6074                 ext4_msg(sb, KERN_ERR, "error loading journal");
6075                 goto err_out;
6076         }
6077
6078         EXT4_SB(sb)->s_journal = journal;
6079         err = ext4_clear_journal_err(sb, es);
6080         if (err) {
6081                 EXT4_SB(sb)->s_journal = NULL;
6082                 jbd2_journal_destroy(journal);
6083                 return err;
6084         }
6085
6086         if (!really_read_only && journal_devnum &&
6087             journal_devnum != le32_to_cpu(es->s_journal_dev)) {
6088                 es->s_journal_dev = cpu_to_le32(journal_devnum);
6089                 ext4_commit_super(sb);
6090         }
6091         if (!really_read_only && journal_inum &&
6092             journal_inum != le32_to_cpu(es->s_journal_inum)) {
6093                 es->s_journal_inum = cpu_to_le32(journal_inum);
6094                 ext4_commit_super(sb);
6095         }
6096
6097         return 0;
6098
6099 err_out:
6100         jbd2_journal_destroy(journal);
6101         return err;
6102 }
6103
6104 /* Copy state of EXT4_SB(sb) into buffer for on-disk superblock */
6105 static void ext4_update_super(struct super_block *sb)
6106 {
6107         struct ext4_sb_info *sbi = EXT4_SB(sb);
6108         struct ext4_super_block *es = sbi->s_es;
6109         struct buffer_head *sbh = sbi->s_sbh;
6110
6111         lock_buffer(sbh);
6112         /*
6113          * If the file system is mounted read-only, don't update the
6114          * superblock write time.  This avoids updating the superblock
6115          * write time when we are mounting the root file system
6116          * read/only but we need to replay the journal; at that point,
6117          * for people who are east of GMT and who make their clock
6118          * tick in localtime for Windows bug-for-bug compatibility,
6119          * the clock is set in the future, and this will cause e2fsck
6120          * to complain and force a full file system check.
6121          */
6122         if (!sb_rdonly(sb))
6123                 ext4_update_tstamp(es, s_wtime);
6124         es->s_kbytes_written =
6125                 cpu_to_le64(sbi->s_kbytes_written +
6126                     ((part_stat_read(sb->s_bdev, sectors[STAT_WRITE]) -
6127                       sbi->s_sectors_written_start) >> 1));
6128         if (percpu_counter_initialized(&sbi->s_freeclusters_counter))
6129                 ext4_free_blocks_count_set(es,
6130                         EXT4_C2B(sbi, percpu_counter_sum_positive(
6131                                 &sbi->s_freeclusters_counter)));
6132         if (percpu_counter_initialized(&sbi->s_freeinodes_counter))
6133                 es->s_free_inodes_count =
6134                         cpu_to_le32(percpu_counter_sum_positive(
6135                                 &sbi->s_freeinodes_counter));
6136         /* Copy error information to the on-disk superblock */
6137         spin_lock(&sbi->s_error_lock);
6138         if (sbi->s_add_error_count > 0) {
6139                 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
6140                 if (!es->s_first_error_time && !es->s_first_error_time_hi) {
6141                         __ext4_update_tstamp(&es->s_first_error_time,
6142                                              &es->s_first_error_time_hi,
6143                                              sbi->s_first_error_time);
6144                         strncpy(es->s_first_error_func, sbi->s_first_error_func,
6145                                 sizeof(es->s_first_error_func));
6146                         es->s_first_error_line =
6147                                 cpu_to_le32(sbi->s_first_error_line);
6148                         es->s_first_error_ino =
6149                                 cpu_to_le32(sbi->s_first_error_ino);
6150                         es->s_first_error_block =
6151                                 cpu_to_le64(sbi->s_first_error_block);
6152                         es->s_first_error_errcode =
6153                                 ext4_errno_to_code(sbi->s_first_error_code);
6154                 }
6155                 __ext4_update_tstamp(&es->s_last_error_time,
6156                                      &es->s_last_error_time_hi,
6157                                      sbi->s_last_error_time);
6158                 strncpy(es->s_last_error_func, sbi->s_last_error_func,
6159                         sizeof(es->s_last_error_func));
6160                 es->s_last_error_line = cpu_to_le32(sbi->s_last_error_line);
6161                 es->s_last_error_ino = cpu_to_le32(sbi->s_last_error_ino);
6162                 es->s_last_error_block = cpu_to_le64(sbi->s_last_error_block);
6163                 es->s_last_error_errcode =
6164                                 ext4_errno_to_code(sbi->s_last_error_code);
6165                 /*
6166                  * Start the daily error reporting function if it hasn't been
6167                  * started already
6168                  */
6169                 if (!es->s_error_count)
6170                         mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ);
6171                 le32_add_cpu(&es->s_error_count, sbi->s_add_error_count);
6172                 sbi->s_add_error_count = 0;
6173         }
6174         spin_unlock(&sbi->s_error_lock);
6175
6176         ext4_superblock_csum_set(sb);
6177         unlock_buffer(sbh);
6178 }
6179
6180 static int ext4_commit_super(struct super_block *sb)
6181 {
6182         struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
6183
6184         if (!sbh)
6185                 return -EINVAL;
6186         if (block_device_ejected(sb))
6187                 return -ENODEV;
6188
6189         ext4_update_super(sb);
6190
6191         lock_buffer(sbh);
6192         /* Buffer got discarded which means block device got invalidated */
6193         if (!buffer_mapped(sbh)) {
6194                 unlock_buffer(sbh);
6195                 return -EIO;
6196         }
6197
6198         if (buffer_write_io_error(sbh) || !buffer_uptodate(sbh)) {
6199                 /*
6200                  * Oh, dear.  A previous attempt to write the
6201                  * superblock failed.  This could happen because the
6202                  * USB device was yanked out.  Or it could happen to
6203                  * be a transient write error and maybe the block will
6204                  * be remapped.  Nothing we can do but to retry the
6205                  * write and hope for the best.
6206                  */
6207                 ext4_msg(sb, KERN_ERR, "previous I/O error to "
6208                        "superblock detected");
6209                 clear_buffer_write_io_error(sbh);
6210                 set_buffer_uptodate(sbh);
6211         }
6212         get_bh(sbh);
6213         /* Clear potential dirty bit if it was journalled update */
6214         clear_buffer_dirty(sbh);
6215         sbh->b_end_io = end_buffer_write_sync;
6216         submit_bh(REQ_OP_WRITE | REQ_SYNC |
6217                   (test_opt(sb, BARRIER) ? REQ_FUA : 0), sbh);
6218         wait_on_buffer(sbh);
6219         if (buffer_write_io_error(sbh)) {
6220                 ext4_msg(sb, KERN_ERR, "I/O error while writing "
6221                        "superblock");
6222                 clear_buffer_write_io_error(sbh);
6223                 set_buffer_uptodate(sbh);
6224                 return -EIO;
6225         }
6226         return 0;
6227 }
6228
6229 /*
6230  * Have we just finished recovery?  If so, and if we are mounting (or
6231  * remounting) the filesystem readonly, then we will end up with a
6232  * consistent fs on disk.  Record that fact.
6233  */
6234 static int ext4_mark_recovery_complete(struct super_block *sb,
6235                                        struct ext4_super_block *es)
6236 {
6237         int err;
6238         journal_t *journal = EXT4_SB(sb)->s_journal;
6239
6240         if (!ext4_has_feature_journal(sb)) {
6241                 if (journal != NULL) {
6242                         ext4_error(sb, "Journal got removed while the fs was "
6243                                    "mounted!");
6244                         return -EFSCORRUPTED;
6245                 }
6246                 return 0;
6247         }
6248         jbd2_journal_lock_updates(journal);
6249         err = jbd2_journal_flush(journal, 0);
6250         if (err < 0)
6251                 goto out;
6252
6253         if (sb_rdonly(sb) && (ext4_has_feature_journal_needs_recovery(sb) ||
6254             ext4_has_feature_orphan_present(sb))) {
6255                 if (!ext4_orphan_file_empty(sb)) {
6256                         ext4_error(sb, "Orphan file not empty on read-only fs.");
6257                         err = -EFSCORRUPTED;
6258                         goto out;
6259                 }
6260                 ext4_clear_feature_journal_needs_recovery(sb);
6261                 ext4_clear_feature_orphan_present(sb);
6262                 ext4_commit_super(sb);
6263         }
6264 out:
6265         jbd2_journal_unlock_updates(journal);
6266         return err;
6267 }
6268
6269 /*
6270  * If we are mounting (or read-write remounting) a filesystem whose journal
6271  * has recorded an error from a previous lifetime, move that error to the
6272  * main filesystem now.
6273  */
6274 static int ext4_clear_journal_err(struct super_block *sb,
6275                                    struct ext4_super_block *es)
6276 {
6277         journal_t *journal;
6278         int j_errno;
6279         const char *errstr;
6280
6281         if (!ext4_has_feature_journal(sb)) {
6282                 ext4_error(sb, "Journal got removed while the fs was mounted!");
6283                 return -EFSCORRUPTED;
6284         }
6285
6286         journal = EXT4_SB(sb)->s_journal;
6287
6288         /*
6289          * Now check for any error status which may have been recorded in the
6290          * journal by a prior ext4_error() or ext4_abort()
6291          */
6292
6293         j_errno = jbd2_journal_errno(journal);
6294         if (j_errno) {
6295                 char nbuf[16];
6296
6297                 errstr = ext4_decode_error(sb, j_errno, nbuf);
6298                 ext4_warning(sb, "Filesystem error recorded "
6299                              "from previous mount: %s", errstr);
6300
6301                 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
6302                 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
6303                 j_errno = ext4_commit_super(sb);
6304                 if (j_errno)
6305                         return j_errno;
6306                 ext4_warning(sb, "Marked fs in need of filesystem check.");
6307
6308                 jbd2_journal_clear_err(journal);
6309                 jbd2_journal_update_sb_errno(journal);
6310         }
6311         return 0;
6312 }
6313
6314 /*
6315  * Force the running and committing transactions to commit,
6316  * and wait on the commit.
6317  */
6318 int ext4_force_commit(struct super_block *sb)
6319 {
6320         return ext4_journal_force_commit(EXT4_SB(sb)->s_journal);
6321 }
6322
6323 static int ext4_sync_fs(struct super_block *sb, int wait)
6324 {
6325         int ret = 0;
6326         tid_t target;
6327         bool needs_barrier = false;
6328         struct ext4_sb_info *sbi = EXT4_SB(sb);
6329
6330         if (unlikely(ext4_forced_shutdown(sb)))
6331                 return 0;
6332
6333         trace_ext4_sync_fs(sb, wait);
6334         flush_workqueue(sbi->rsv_conversion_wq);
6335         /*
6336          * Writeback quota in non-journalled quota case - journalled quota has
6337          * no dirty dquots
6338          */
6339         dquot_writeback_dquots(sb, -1);
6340         /*
6341          * Data writeback is possible w/o journal transaction, so barrier must
6342          * being sent at the end of the function. But we can skip it if
6343          * transaction_commit will do it for us.
6344          */
6345         if (sbi->s_journal) {
6346                 target = jbd2_get_latest_transaction(sbi->s_journal);
6347                 if (wait && sbi->s_journal->j_flags & JBD2_BARRIER &&
6348                     !jbd2_trans_will_send_data_barrier(sbi->s_journal, target))
6349                         needs_barrier = true;
6350
6351                 if (jbd2_journal_start_commit(sbi->s_journal, &target)) {
6352                         if (wait)
6353                                 ret = jbd2_log_wait_commit(sbi->s_journal,
6354                                                            target);
6355                 }
6356         } else if (wait && test_opt(sb, BARRIER))
6357                 needs_barrier = true;
6358         if (needs_barrier) {
6359                 int err;
6360                 err = blkdev_issue_flush(sb->s_bdev);
6361                 if (!ret)
6362                         ret = err;
6363         }
6364
6365         return ret;
6366 }
6367
6368 /*
6369  * LVM calls this function before a (read-only) snapshot is created.  This
6370  * gives us a chance to flush the journal completely and mark the fs clean.
6371  *
6372  * Note that only this function cannot bring a filesystem to be in a clean
6373  * state independently. It relies on upper layer to stop all data & metadata
6374  * modifications.
6375  */
6376 static int ext4_freeze(struct super_block *sb)
6377 {
6378         int error = 0;
6379         journal_t *journal = EXT4_SB(sb)->s_journal;
6380
6381         if (journal) {
6382                 /* Now we set up the journal barrier. */
6383                 jbd2_journal_lock_updates(journal);
6384
6385                 /*
6386                  * Don't clear the needs_recovery flag if we failed to
6387                  * flush the journal.
6388                  */
6389                 error = jbd2_journal_flush(journal, 0);
6390                 if (error < 0)
6391                         goto out;
6392
6393                 /* Journal blocked and flushed, clear needs_recovery flag. */
6394                 ext4_clear_feature_journal_needs_recovery(sb);
6395                 if (ext4_orphan_file_empty(sb))
6396                         ext4_clear_feature_orphan_present(sb);
6397         }
6398
6399         error = ext4_commit_super(sb);
6400 out:
6401         if (journal)
6402                 /* we rely on upper layer to stop further updates */
6403                 jbd2_journal_unlock_updates(journal);
6404         return error;
6405 }
6406
6407 /*
6408  * Called by LVM after the snapshot is done.  We need to reset the RECOVER
6409  * flag here, even though the filesystem is not technically dirty yet.
6410  */
6411 static int ext4_unfreeze(struct super_block *sb)
6412 {
6413         if (ext4_forced_shutdown(sb))
6414                 return 0;
6415
6416         if (EXT4_SB(sb)->s_journal) {
6417                 /* Reset the needs_recovery flag before the fs is unlocked. */
6418                 ext4_set_feature_journal_needs_recovery(sb);
6419                 if (ext4_has_feature_orphan_file(sb))
6420                         ext4_set_feature_orphan_present(sb);
6421         }
6422
6423         ext4_commit_super(sb);
6424         return 0;
6425 }
6426
6427 /*
6428  * Structure to save mount options for ext4_remount's benefit
6429  */
6430 struct ext4_mount_options {
6431         unsigned long s_mount_opt;
6432         unsigned long s_mount_opt2;
6433         kuid_t s_resuid;
6434         kgid_t s_resgid;
6435         unsigned long s_commit_interval;
6436         u32 s_min_batch_time, s_max_batch_time;
6437 #ifdef CONFIG_QUOTA
6438         int s_jquota_fmt;
6439         char *s_qf_names[EXT4_MAXQUOTAS];
6440 #endif
6441 };
6442
6443 static int __ext4_remount(struct fs_context *fc, struct super_block *sb)
6444 {
6445         struct ext4_fs_context *ctx = fc->fs_private;
6446         struct ext4_super_block *es;
6447         struct ext4_sb_info *sbi = EXT4_SB(sb);
6448         unsigned long old_sb_flags;
6449         struct ext4_mount_options old_opts;
6450         ext4_group_t g;
6451         int err = 0;
6452 #ifdef CONFIG_QUOTA
6453         int enable_quota = 0;
6454         int i, j;
6455         char *to_free[EXT4_MAXQUOTAS];
6456 #endif
6457
6458
6459         /* Store the original options */
6460         old_sb_flags = sb->s_flags;
6461         old_opts.s_mount_opt = sbi->s_mount_opt;
6462         old_opts.s_mount_opt2 = sbi->s_mount_opt2;
6463         old_opts.s_resuid = sbi->s_resuid;
6464         old_opts.s_resgid = sbi->s_resgid;
6465         old_opts.s_commit_interval = sbi->s_commit_interval;
6466         old_opts.s_min_batch_time = sbi->s_min_batch_time;
6467         old_opts.s_max_batch_time = sbi->s_max_batch_time;
6468 #ifdef CONFIG_QUOTA
6469         old_opts.s_jquota_fmt = sbi->s_jquota_fmt;
6470         for (i = 0; i < EXT4_MAXQUOTAS; i++)
6471                 if (sbi->s_qf_names[i]) {
6472                         char *qf_name = get_qf_name(sb, sbi, i);
6473
6474                         old_opts.s_qf_names[i] = kstrdup(qf_name, GFP_KERNEL);
6475                         if (!old_opts.s_qf_names[i]) {
6476                                 for (j = 0; j < i; j++)
6477                                         kfree(old_opts.s_qf_names[j]);
6478                                 return -ENOMEM;
6479                         }
6480                 } else
6481                         old_opts.s_qf_names[i] = NULL;
6482 #endif
6483         if (!(ctx->spec & EXT4_SPEC_JOURNAL_IOPRIO)) {
6484                 if (sbi->s_journal && sbi->s_journal->j_task->io_context)
6485                         ctx->journal_ioprio =
6486                                 sbi->s_journal->j_task->io_context->ioprio;
6487                 else
6488                         ctx->journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
6489
6490         }
6491
6492         ext4_apply_options(fc, sb);
6493
6494         if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^
6495             test_opt(sb, JOURNAL_CHECKSUM)) {
6496                 ext4_msg(sb, KERN_ERR, "changing journal_checksum "
6497                          "during remount not supported; ignoring");
6498                 sbi->s_mount_opt ^= EXT4_MOUNT_JOURNAL_CHECKSUM;
6499         }
6500
6501         if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
6502                 if (test_opt2(sb, EXPLICIT_DELALLOC)) {
6503                         ext4_msg(sb, KERN_ERR, "can't mount with "
6504                                  "both data=journal and delalloc");
6505                         err = -EINVAL;
6506                         goto restore_opts;
6507                 }
6508                 if (test_opt(sb, DIOREAD_NOLOCK)) {
6509                         ext4_msg(sb, KERN_ERR, "can't mount with "
6510                                  "both data=journal and dioread_nolock");
6511                         err = -EINVAL;
6512                         goto restore_opts;
6513                 }
6514         } else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) {
6515                 if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
6516                         ext4_msg(sb, KERN_ERR, "can't mount with "
6517                                 "journal_async_commit in data=ordered mode");
6518                         err = -EINVAL;
6519                         goto restore_opts;
6520                 }
6521         }
6522
6523         if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_NO_MBCACHE) {
6524                 ext4_msg(sb, KERN_ERR, "can't enable nombcache during remount");
6525                 err = -EINVAL;
6526                 goto restore_opts;
6527         }
6528
6529         if (test_opt2(sb, ABORT))
6530                 ext4_abort(sb, ESHUTDOWN, "Abort forced by user");
6531
6532         sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
6533                 (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0);
6534
6535         es = sbi->s_es;
6536
6537         if (sbi->s_journal) {
6538                 ext4_init_journal_params(sb, sbi->s_journal);
6539                 set_task_ioprio(sbi->s_journal->j_task, ctx->journal_ioprio);
6540         }
6541
6542         /* Flush outstanding errors before changing fs state */
6543         flush_work(&sbi->s_sb_upd_work);
6544
6545         if ((bool)(fc->sb_flags & SB_RDONLY) != sb_rdonly(sb)) {
6546                 if (ext4_forced_shutdown(sb)) {
6547                         err = -EROFS;
6548                         goto restore_opts;
6549                 }
6550
6551                 if (fc->sb_flags & SB_RDONLY) {
6552                         err = sync_filesystem(sb);
6553                         if (err < 0)
6554                                 goto restore_opts;
6555                         err = dquot_suspend(sb, -1);
6556                         if (err < 0)
6557                                 goto restore_opts;
6558
6559                         /*
6560                          * First of all, the unconditional stuff we have to do
6561                          * to disable replay of the journal when we next remount
6562                          */
6563                         sb->s_flags |= SB_RDONLY;
6564
6565                         /*
6566                          * OK, test if we are remounting a valid rw partition
6567                          * readonly, and if so set the rdonly flag and then
6568                          * mark the partition as valid again.
6569                          */
6570                         if (!(es->s_state & cpu_to_le16(EXT4_VALID_FS)) &&
6571                             (sbi->s_mount_state & EXT4_VALID_FS))
6572                                 es->s_state = cpu_to_le16(sbi->s_mount_state);
6573
6574                         if (sbi->s_journal) {
6575                                 /*
6576                                  * We let remount-ro finish even if marking fs
6577                                  * as clean failed...
6578                                  */
6579                                 ext4_mark_recovery_complete(sb, es);
6580                         }
6581                 } else {
6582                         /* Make sure we can mount this feature set readwrite */
6583                         if (ext4_has_feature_readonly(sb) ||
6584                             !ext4_feature_set_ok(sb, 0)) {
6585                                 err = -EROFS;
6586                                 goto restore_opts;
6587                         }
6588                         /*
6589                          * Make sure the group descriptor checksums
6590                          * are sane.  If they aren't, refuse to remount r/w.
6591                          */
6592                         for (g = 0; g < sbi->s_groups_count; g++) {
6593                                 struct ext4_group_desc *gdp =
6594                                         ext4_get_group_desc(sb, g, NULL);
6595
6596                                 if (!ext4_group_desc_csum_verify(sb, g, gdp)) {
6597                                         ext4_msg(sb, KERN_ERR,
6598                "ext4_remount: Checksum for group %u failed (%u!=%u)",
6599                 g, le16_to_cpu(ext4_group_desc_csum(sb, g, gdp)),
6600                                                le16_to_cpu(gdp->bg_checksum));
6601                                         err = -EFSBADCRC;
6602                                         goto restore_opts;
6603                                 }
6604                         }
6605
6606                         /*
6607                          * If we have an unprocessed orphan list hanging
6608                          * around from a previously readonly bdev mount,
6609                          * require a full umount/remount for now.
6610                          */
6611                         if (es->s_last_orphan || !ext4_orphan_file_empty(sb)) {
6612                                 ext4_msg(sb, KERN_WARNING, "Couldn't "
6613                                        "remount RDWR because of unprocessed "
6614                                        "orphan inode list.  Please "
6615                                        "umount/remount instead");
6616                                 err = -EINVAL;
6617                                 goto restore_opts;
6618                         }
6619
6620                         /*
6621                          * Mounting a RDONLY partition read-write, so reread
6622                          * and store the current valid flag.  (It may have
6623                          * been changed by e2fsck since we originally mounted
6624                          * the partition.)
6625                          */
6626                         if (sbi->s_journal) {
6627                                 err = ext4_clear_journal_err(sb, es);
6628                                 if (err)
6629                                         goto restore_opts;
6630                         }
6631                         sbi->s_mount_state = (le16_to_cpu(es->s_state) &
6632                                               ~EXT4_FC_REPLAY);
6633
6634                         err = ext4_setup_super(sb, es, 0);
6635                         if (err)
6636                                 goto restore_opts;
6637
6638                         sb->s_flags &= ~SB_RDONLY;
6639                         if (ext4_has_feature_mmp(sb)) {
6640                                 err = ext4_multi_mount_protect(sb,
6641                                                 le64_to_cpu(es->s_mmp_block));
6642                                 if (err)
6643                                         goto restore_opts;
6644                         }
6645 #ifdef CONFIG_QUOTA
6646                         enable_quota = 1;
6647 #endif
6648                 }
6649         }
6650
6651         /*
6652          * Handle creation of system zone data early because it can fail.
6653          * Releasing of existing data is done when we are sure remount will
6654          * succeed.
6655          */
6656         if (test_opt(sb, BLOCK_VALIDITY) && !sbi->s_system_blks) {
6657                 err = ext4_setup_system_zone(sb);
6658                 if (err)
6659                         goto restore_opts;
6660         }
6661
6662         if (sbi->s_journal == NULL && !(old_sb_flags & SB_RDONLY)) {
6663                 err = ext4_commit_super(sb);
6664                 if (err)
6665                         goto restore_opts;
6666         }
6667
6668 #ifdef CONFIG_QUOTA
6669         if (enable_quota) {
6670                 if (sb_any_quota_suspended(sb))
6671                         dquot_resume(sb, -1);
6672                 else if (ext4_has_feature_quota(sb)) {
6673                         err = ext4_enable_quotas(sb);
6674                         if (err)
6675                                 goto restore_opts;
6676                 }
6677         }
6678         /* Release old quota file names */
6679         for (i = 0; i < EXT4_MAXQUOTAS; i++)
6680                 kfree(old_opts.s_qf_names[i]);
6681 #endif
6682         if (!test_opt(sb, BLOCK_VALIDITY) && sbi->s_system_blks)
6683                 ext4_release_system_zone(sb);
6684
6685         /*
6686          * Reinitialize lazy itable initialization thread based on
6687          * current settings
6688          */
6689         if (sb_rdonly(sb) || !test_opt(sb, INIT_INODE_TABLE))
6690                 ext4_unregister_li_request(sb);
6691         else {
6692                 ext4_group_t first_not_zeroed;
6693                 first_not_zeroed = ext4_has_uninit_itable(sb);
6694                 ext4_register_li_request(sb, first_not_zeroed);
6695         }
6696
6697         if (!ext4_has_feature_mmp(sb) || sb_rdonly(sb))
6698                 ext4_stop_mmpd(sbi);
6699
6700         return 0;
6701
6702 restore_opts:
6703         /*
6704          * If there was a failing r/w to ro transition, we may need to
6705          * re-enable quota
6706          */
6707         if (sb_rdonly(sb) && !(old_sb_flags & SB_RDONLY) &&
6708             sb_any_quota_suspended(sb))
6709                 dquot_resume(sb, -1);
6710         sb->s_flags = old_sb_flags;
6711         sbi->s_mount_opt = old_opts.s_mount_opt;
6712         sbi->s_mount_opt2 = old_opts.s_mount_opt2;
6713         sbi->s_resuid = old_opts.s_resuid;
6714         sbi->s_resgid = old_opts.s_resgid;
6715         sbi->s_commit_interval = old_opts.s_commit_interval;
6716         sbi->s_min_batch_time = old_opts.s_min_batch_time;
6717         sbi->s_max_batch_time = old_opts.s_max_batch_time;
6718         if (!test_opt(sb, BLOCK_VALIDITY) && sbi->s_system_blks)
6719                 ext4_release_system_zone(sb);
6720 #ifdef CONFIG_QUOTA
6721         sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
6722         for (i = 0; i < EXT4_MAXQUOTAS; i++) {
6723                 to_free[i] = get_qf_name(sb, sbi, i);
6724                 rcu_assign_pointer(sbi->s_qf_names[i], old_opts.s_qf_names[i]);
6725         }
6726         synchronize_rcu();
6727         for (i = 0; i < EXT4_MAXQUOTAS; i++)
6728                 kfree(to_free[i]);
6729 #endif
6730         if (!ext4_has_feature_mmp(sb) || sb_rdonly(sb))
6731                 ext4_stop_mmpd(sbi);
6732         return err;
6733 }
6734
6735 static int ext4_reconfigure(struct fs_context *fc)
6736 {
6737         struct super_block *sb = fc->root->d_sb;
6738         int ret;
6739
6740         fc->s_fs_info = EXT4_SB(sb);
6741
6742         ret = ext4_check_opt_consistency(fc, sb);
6743         if (ret < 0)
6744                 return ret;
6745
6746         ret = __ext4_remount(fc, sb);
6747         if (ret < 0)
6748                 return ret;
6749
6750         ext4_msg(sb, KERN_INFO, "re-mounted %pU %s. Quota mode: %s.",
6751                  &sb->s_uuid, sb_rdonly(sb) ? "ro" : "r/w",
6752                  ext4_quota_mode(sb));
6753
6754         return 0;
6755 }
6756
6757 #ifdef CONFIG_QUOTA
6758 static int ext4_statfs_project(struct super_block *sb,
6759                                kprojid_t projid, struct kstatfs *buf)
6760 {
6761         struct kqid qid;
6762         struct dquot *dquot;
6763         u64 limit;
6764         u64 curblock;
6765
6766         qid = make_kqid_projid(projid);
6767         dquot = dqget(sb, qid);
6768         if (IS_ERR(dquot))
6769                 return PTR_ERR(dquot);
6770         spin_lock(&dquot->dq_dqb_lock);
6771
6772         limit = min_not_zero(dquot->dq_dqb.dqb_bsoftlimit,
6773                              dquot->dq_dqb.dqb_bhardlimit);
6774         limit >>= sb->s_blocksize_bits;
6775
6776         if (limit && buf->f_blocks > limit) {
6777                 curblock = (dquot->dq_dqb.dqb_curspace +
6778                             dquot->dq_dqb.dqb_rsvspace) >> sb->s_blocksize_bits;
6779                 buf->f_blocks = limit;
6780                 buf->f_bfree = buf->f_bavail =
6781                         (buf->f_blocks > curblock) ?
6782                          (buf->f_blocks - curblock) : 0;
6783         }
6784
6785         limit = min_not_zero(dquot->dq_dqb.dqb_isoftlimit,
6786                              dquot->dq_dqb.dqb_ihardlimit);
6787         if (limit && buf->f_files > limit) {
6788                 buf->f_files = limit;
6789                 buf->f_ffree =
6790                         (buf->f_files > dquot->dq_dqb.dqb_curinodes) ?
6791                          (buf->f_files - dquot->dq_dqb.dqb_curinodes) : 0;
6792         }
6793
6794         spin_unlock(&dquot->dq_dqb_lock);
6795         dqput(dquot);
6796         return 0;
6797 }
6798 #endif
6799
6800 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
6801 {
6802         struct super_block *sb = dentry->d_sb;
6803         struct ext4_sb_info *sbi = EXT4_SB(sb);
6804         struct ext4_super_block *es = sbi->s_es;
6805         ext4_fsblk_t overhead = 0, resv_blocks;
6806         s64 bfree;
6807         resv_blocks = EXT4_C2B(sbi, atomic64_read(&sbi->s_resv_clusters));
6808
6809         if (!test_opt(sb, MINIX_DF))
6810                 overhead = sbi->s_overhead;
6811
6812         buf->f_type = EXT4_SUPER_MAGIC;
6813         buf->f_bsize = sb->s_blocksize;
6814         buf->f_blocks = ext4_blocks_count(es) - EXT4_C2B(sbi, overhead);
6815         bfree = percpu_counter_sum_positive(&sbi->s_freeclusters_counter) -
6816                 percpu_counter_sum_positive(&sbi->s_dirtyclusters_counter);
6817         /* prevent underflow in case that few free space is available */
6818         buf->f_bfree = EXT4_C2B(sbi, max_t(s64, bfree, 0));
6819         buf->f_bavail = buf->f_bfree -
6820                         (ext4_r_blocks_count(es) + resv_blocks);
6821         if (buf->f_bfree < (ext4_r_blocks_count(es) + resv_blocks))
6822                 buf->f_bavail = 0;
6823         buf->f_files = le32_to_cpu(es->s_inodes_count);
6824         buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter);
6825         buf->f_namelen = EXT4_NAME_LEN;
6826         buf->f_fsid = uuid_to_fsid(es->s_uuid);
6827
6828 #ifdef CONFIG_QUOTA
6829         if (ext4_test_inode_flag(dentry->d_inode, EXT4_INODE_PROJINHERIT) &&
6830             sb_has_quota_limits_enabled(sb, PRJQUOTA))
6831                 ext4_statfs_project(sb, EXT4_I(dentry->d_inode)->i_projid, buf);
6832 #endif
6833         return 0;
6834 }
6835
6836
6837 #ifdef CONFIG_QUOTA
6838
6839 /*
6840  * Helper functions so that transaction is started before we acquire dqio_sem
6841  * to keep correct lock ordering of transaction > dqio_sem
6842  */
6843 static inline struct inode *dquot_to_inode(struct dquot *dquot)
6844 {
6845         return sb_dqopt(dquot->dq_sb)->files[dquot->dq_id.type];
6846 }
6847
6848 static int ext4_write_dquot(struct dquot *dquot)
6849 {
6850         int ret, err;
6851         handle_t *handle;
6852         struct inode *inode;
6853
6854         inode = dquot_to_inode(dquot);
6855         handle = ext4_journal_start(inode, EXT4_HT_QUOTA,
6856                                     EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
6857         if (IS_ERR(handle))
6858                 return PTR_ERR(handle);
6859         ret = dquot_commit(dquot);
6860         err = ext4_journal_stop(handle);
6861         if (!ret)
6862                 ret = err;
6863         return ret;
6864 }
6865
6866 static int ext4_acquire_dquot(struct dquot *dquot)
6867 {
6868         int ret, err;
6869         handle_t *handle;
6870
6871         handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA,
6872                                     EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb));
6873         if (IS_ERR(handle))
6874                 return PTR_ERR(handle);
6875         ret = dquot_acquire(dquot);
6876         err = ext4_journal_stop(handle);
6877         if (!ret)
6878                 ret = err;
6879         return ret;
6880 }
6881
6882 static int ext4_release_dquot(struct dquot *dquot)
6883 {
6884         int ret, err;
6885         handle_t *handle;
6886
6887         handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA,
6888                                     EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb));
6889         if (IS_ERR(handle)) {
6890                 /* Release dquot anyway to avoid endless cycle in dqput() */
6891                 dquot_release(dquot);
6892                 return PTR_ERR(handle);
6893         }
6894         ret = dquot_release(dquot);
6895         err = ext4_journal_stop(handle);
6896         if (!ret)
6897                 ret = err;
6898         return ret;
6899 }
6900
6901 static int ext4_mark_dquot_dirty(struct dquot *dquot)
6902 {
6903         struct super_block *sb = dquot->dq_sb;
6904
6905         if (ext4_is_quota_journalled(sb)) {
6906                 dquot_mark_dquot_dirty(dquot);
6907                 return ext4_write_dquot(dquot);
6908         } else {
6909                 return dquot_mark_dquot_dirty(dquot);
6910         }
6911 }
6912
6913 static int ext4_write_info(struct super_block *sb, int type)
6914 {
6915         int ret, err;
6916         handle_t *handle;
6917
6918         /* Data block + inode block */
6919         handle = ext4_journal_start_sb(sb, EXT4_HT_QUOTA, 2);
6920         if (IS_ERR(handle))
6921                 return PTR_ERR(handle);
6922         ret = dquot_commit_info(sb, type);
6923         err = ext4_journal_stop(handle);
6924         if (!ret)
6925                 ret = err;
6926         return ret;
6927 }
6928
6929 static void lockdep_set_quota_inode(struct inode *inode, int subclass)
6930 {
6931         struct ext4_inode_info *ei = EXT4_I(inode);
6932
6933         /* The first argument of lockdep_set_subclass has to be
6934          * *exactly* the same as the argument to init_rwsem() --- in
6935          * this case, in init_once() --- or lockdep gets unhappy
6936          * because the name of the lock is set using the
6937          * stringification of the argument to init_rwsem().
6938          */
6939         (void) ei;      /* shut up clang warning if !CONFIG_LOCKDEP */
6940         lockdep_set_subclass(&ei->i_data_sem, subclass);
6941 }
6942
6943 /*
6944  * Standard function to be called on quota_on
6945  */
6946 static int ext4_quota_on(struct super_block *sb, int type, int format_id,
6947                          const struct path *path)
6948 {
6949         int err;
6950
6951         if (!test_opt(sb, QUOTA))
6952                 return -EINVAL;
6953
6954         /* Quotafile not on the same filesystem? */
6955         if (path->dentry->d_sb != sb)
6956                 return -EXDEV;
6957
6958         /* Quota already enabled for this file? */
6959         if (IS_NOQUOTA(d_inode(path->dentry)))
6960                 return -EBUSY;
6961
6962         /* Journaling quota? */
6963         if (EXT4_SB(sb)->s_qf_names[type]) {
6964                 /* Quotafile not in fs root? */
6965                 if (path->dentry->d_parent != sb->s_root)
6966                         ext4_msg(sb, KERN_WARNING,
6967                                 "Quota file not on filesystem root. "
6968                                 "Journaled quota will not work");
6969                 sb_dqopt(sb)->flags |= DQUOT_NOLIST_DIRTY;
6970         } else {
6971                 /*
6972                  * Clear the flag just in case mount options changed since
6973                  * last time.
6974                  */
6975                 sb_dqopt(sb)->flags &= ~DQUOT_NOLIST_DIRTY;
6976         }
6977
6978         lockdep_set_quota_inode(path->dentry->d_inode, I_DATA_SEM_QUOTA);
6979         err = dquot_quota_on(sb, type, format_id, path);
6980         if (!err) {
6981                 struct inode *inode = d_inode(path->dentry);
6982                 handle_t *handle;
6983
6984                 /*
6985                  * Set inode flags to prevent userspace from messing with quota
6986                  * files. If this fails, we return success anyway since quotas
6987                  * are already enabled and this is not a hard failure.
6988                  */
6989                 inode_lock(inode);
6990                 handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1);
6991                 if (IS_ERR(handle))
6992                         goto unlock_inode;
6993                 EXT4_I(inode)->i_flags |= EXT4_NOATIME_FL | EXT4_IMMUTABLE_FL;
6994                 inode_set_flags(inode, S_NOATIME | S_IMMUTABLE,
6995                                 S_NOATIME | S_IMMUTABLE);
6996                 err = ext4_mark_inode_dirty(handle, inode);
6997                 ext4_journal_stop(handle);
6998         unlock_inode:
6999                 inode_unlock(inode);
7000                 if (err)
7001                         dquot_quota_off(sb, type);
7002         }
7003         if (err)
7004                 lockdep_set_quota_inode(path->dentry->d_inode,
7005                                              I_DATA_SEM_NORMAL);
7006         return err;
7007 }
7008
7009 static inline bool ext4_check_quota_inum(int type, unsigned long qf_inum)
7010 {
7011         switch (type) {
7012         case USRQUOTA:
7013                 return qf_inum == EXT4_USR_QUOTA_INO;
7014         case GRPQUOTA:
7015                 return qf_inum == EXT4_GRP_QUOTA_INO;
7016         case PRJQUOTA:
7017                 return qf_inum >= EXT4_GOOD_OLD_FIRST_INO;
7018         default:
7019                 BUG();
7020         }
7021 }
7022
7023 static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
7024                              unsigned int flags)
7025 {
7026         int err;
7027         struct inode *qf_inode;
7028         unsigned long qf_inums[EXT4_MAXQUOTAS] = {
7029                 le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
7030                 le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum),
7031                 le32_to_cpu(EXT4_SB(sb)->s_es->s_prj_quota_inum)
7032         };
7033
7034         BUG_ON(!ext4_has_feature_quota(sb));
7035
7036         if (!qf_inums[type])
7037                 return -EPERM;
7038
7039         if (!ext4_check_quota_inum(type, qf_inums[type])) {
7040                 ext4_error(sb, "Bad quota inum: %lu, type: %d",
7041                                 qf_inums[type], type);
7042                 return -EUCLEAN;
7043         }
7044
7045         qf_inode = ext4_iget(sb, qf_inums[type], EXT4_IGET_SPECIAL);
7046         if (IS_ERR(qf_inode)) {
7047                 ext4_error(sb, "Bad quota inode: %lu, type: %d",
7048                                 qf_inums[type], type);
7049                 return PTR_ERR(qf_inode);
7050         }
7051
7052         /* Don't account quota for quota files to avoid recursion */
7053         qf_inode->i_flags |= S_NOQUOTA;
7054         lockdep_set_quota_inode(qf_inode, I_DATA_SEM_QUOTA);
7055         err = dquot_load_quota_inode(qf_inode, type, format_id, flags);
7056         if (err)
7057                 lockdep_set_quota_inode(qf_inode, I_DATA_SEM_NORMAL);
7058         iput(qf_inode);
7059
7060         return err;
7061 }
7062
7063 /* Enable usage tracking for all quota types. */
7064 int ext4_enable_quotas(struct super_block *sb)
7065 {
7066         int type, err = 0;
7067         unsigned long qf_inums[EXT4_MAXQUOTAS] = {
7068                 le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
7069                 le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum),
7070                 le32_to_cpu(EXT4_SB(sb)->s_es->s_prj_quota_inum)
7071         };
7072         bool quota_mopt[EXT4_MAXQUOTAS] = {
7073                 test_opt(sb, USRQUOTA),
7074                 test_opt(sb, GRPQUOTA),
7075                 test_opt(sb, PRJQUOTA),
7076         };
7077
7078         sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE | DQUOT_NOLIST_DIRTY;
7079         for (type = 0; type < EXT4_MAXQUOTAS; type++) {
7080                 if (qf_inums[type]) {
7081                         err = ext4_quota_enable(sb, type, QFMT_VFS_V1,
7082                                 DQUOT_USAGE_ENABLED |
7083                                 (quota_mopt[type] ? DQUOT_LIMITS_ENABLED : 0));
7084                         if (err) {
7085                                 ext4_warning(sb,
7086                                         "Failed to enable quota tracking "
7087                                         "(type=%d, err=%d, ino=%lu). "
7088                                         "Please run e2fsck to fix.", type,
7089                                         err, qf_inums[type]);
7090
7091                                 ext4_quotas_off(sb, type);
7092                                 return err;
7093                         }
7094                 }
7095         }
7096         return 0;
7097 }
7098
7099 static int ext4_quota_off(struct super_block *sb, int type)
7100 {
7101         struct inode *inode = sb_dqopt(sb)->files[type];
7102         handle_t *handle;
7103         int err;
7104
7105         /* Force all delayed allocation blocks to be allocated.
7106          * Caller already holds s_umount sem */
7107         if (test_opt(sb, DELALLOC))
7108                 sync_filesystem(sb);
7109
7110         if (!inode || !igrab(inode))
7111                 goto out;
7112
7113         err = dquot_quota_off(sb, type);
7114         if (err || ext4_has_feature_quota(sb))
7115                 goto out_put;
7116         /*
7117          * When the filesystem was remounted read-only first, we cannot cleanup
7118          * inode flags here. Bad luck but people should be using QUOTA feature
7119          * these days anyway.
7120          */
7121         if (sb_rdonly(sb))
7122                 goto out_put;
7123
7124         inode_lock(inode);
7125         /*
7126          * Update modification times of quota files when userspace can
7127          * start looking at them. If we fail, we return success anyway since
7128          * this is not a hard failure and quotas are already disabled.
7129          */
7130         handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1);
7131         if (IS_ERR(handle)) {
7132                 err = PTR_ERR(handle);
7133                 goto out_unlock;
7134         }
7135         EXT4_I(inode)->i_flags &= ~(EXT4_NOATIME_FL | EXT4_IMMUTABLE_FL);
7136         inode_set_flags(inode, 0, S_NOATIME | S_IMMUTABLE);
7137         inode->i_mtime = inode_set_ctime_current(inode);
7138         err = ext4_mark_inode_dirty(handle, inode);
7139         ext4_journal_stop(handle);
7140 out_unlock:
7141         inode_unlock(inode);
7142 out_put:
7143         lockdep_set_quota_inode(inode, I_DATA_SEM_NORMAL);
7144         iput(inode);
7145         return err;
7146 out:
7147         return dquot_quota_off(sb, type);
7148 }
7149
7150 /* Read data from quotafile - avoid pagecache and such because we cannot afford
7151  * acquiring the locks... As quota files are never truncated and quota code
7152  * itself serializes the operations (and no one else should touch the files)
7153  * we don't have to be afraid of races */
7154 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
7155                                size_t len, loff_t off)
7156 {
7157         struct inode *inode = sb_dqopt(sb)->files[type];
7158         ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
7159         int offset = off & (sb->s_blocksize - 1);
7160         int tocopy;
7161         size_t toread;
7162         struct buffer_head *bh;
7163         loff_t i_size = i_size_read(inode);
7164
7165         if (off > i_size)
7166                 return 0;
7167         if (off+len > i_size)
7168                 len = i_size-off;
7169         toread = len;
7170         while (toread > 0) {
7171                 tocopy = min_t(unsigned long, sb->s_blocksize - offset, toread);
7172                 bh = ext4_bread(NULL, inode, blk, 0);
7173                 if (IS_ERR(bh))
7174                         return PTR_ERR(bh);
7175                 if (!bh)        /* A hole? */
7176                         memset(data, 0, tocopy);
7177                 else
7178                         memcpy(data, bh->b_data+offset, tocopy);
7179                 brelse(bh);
7180                 offset = 0;
7181                 toread -= tocopy;
7182                 data += tocopy;
7183                 blk++;
7184         }
7185         return len;
7186 }
7187
7188 /* Write to quotafile (we know the transaction is already started and has
7189  * enough credits) */
7190 static ssize_t ext4_quota_write(struct super_block *sb, int type,
7191                                 const char *data, size_t len, loff_t off)
7192 {
7193         struct inode *inode = sb_dqopt(sb)->files[type];
7194         ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
7195         int err = 0, err2 = 0, offset = off & (sb->s_blocksize - 1);
7196         int retries = 0;
7197         struct buffer_head *bh;
7198         handle_t *handle = journal_current_handle();
7199
7200         if (!handle) {
7201                 ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
7202                         " cancelled because transaction is not started",
7203                         (unsigned long long)off, (unsigned long long)len);
7204                 return -EIO;
7205         }
7206         /*
7207          * Since we account only one data block in transaction credits,
7208          * then it is impossible to cross a block boundary.
7209          */
7210         if (sb->s_blocksize - offset < len) {
7211                 ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
7212                         " cancelled because not block aligned",
7213                         (unsigned long long)off, (unsigned long long)len);
7214                 return -EIO;
7215         }
7216
7217         do {
7218                 bh = ext4_bread(handle, inode, blk,
7219                                 EXT4_GET_BLOCKS_CREATE |
7220                                 EXT4_GET_BLOCKS_METADATA_NOFAIL);
7221         } while (PTR_ERR(bh) == -ENOSPC &&
7222                  ext4_should_retry_alloc(inode->i_sb, &retries));
7223         if (IS_ERR(bh))
7224                 return PTR_ERR(bh);
7225         if (!bh)
7226                 goto out;
7227         BUFFER_TRACE(bh, "get write access");
7228         err = ext4_journal_get_write_access(handle, sb, bh, EXT4_JTR_NONE);
7229         if (err) {
7230                 brelse(bh);
7231                 return err;
7232         }
7233         lock_buffer(bh);
7234         memcpy(bh->b_data+offset, data, len);
7235         flush_dcache_page(bh->b_page);
7236         unlock_buffer(bh);
7237         err = ext4_handle_dirty_metadata(handle, NULL, bh);
7238         brelse(bh);
7239 out:
7240         if (inode->i_size < off + len) {
7241                 i_size_write(inode, off + len);
7242                 EXT4_I(inode)->i_disksize = inode->i_size;
7243                 err2 = ext4_mark_inode_dirty(handle, inode);
7244                 if (unlikely(err2 && !err))
7245                         err = err2;
7246         }
7247         return err ? err : len;
7248 }
7249 #endif
7250
7251 #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2)
7252 static inline void register_as_ext2(void)
7253 {
7254         int err = register_filesystem(&ext2_fs_type);
7255         if (err)
7256                 printk(KERN_WARNING
7257                        "EXT4-fs: Unable to register as ext2 (%d)\n", err);
7258 }
7259
7260 static inline void unregister_as_ext2(void)
7261 {
7262         unregister_filesystem(&ext2_fs_type);
7263 }
7264
7265 static inline int ext2_feature_set_ok(struct super_block *sb)
7266 {
7267         if (ext4_has_unknown_ext2_incompat_features(sb))
7268                 return 0;
7269         if (sb_rdonly(sb))
7270                 return 1;
7271         if (ext4_has_unknown_ext2_ro_compat_features(sb))
7272                 return 0;
7273         return 1;
7274 }
7275 #else
7276 static inline void register_as_ext2(void) { }
7277 static inline void unregister_as_ext2(void) { }
7278 static inline int ext2_feature_set_ok(struct super_block *sb) { return 0; }
7279 #endif
7280
7281 static inline void register_as_ext3(void)
7282 {
7283         int err = register_filesystem(&ext3_fs_type);
7284         if (err)
7285                 printk(KERN_WARNING
7286                        "EXT4-fs: Unable to register as ext3 (%d)\n", err);
7287 }
7288
7289 static inline void unregister_as_ext3(void)
7290 {
7291         unregister_filesystem(&ext3_fs_type);
7292 }
7293
7294 static inline int ext3_feature_set_ok(struct super_block *sb)
7295 {
7296         if (ext4_has_unknown_ext3_incompat_features(sb))
7297                 return 0;
7298         if (!ext4_has_feature_journal(sb))
7299                 return 0;
7300         if (sb_rdonly(sb))
7301                 return 1;
7302         if (ext4_has_unknown_ext3_ro_compat_features(sb))
7303                 return 0;
7304         return 1;
7305 }
7306
7307 static void ext4_kill_sb(struct super_block *sb)
7308 {
7309         struct ext4_sb_info *sbi = EXT4_SB(sb);
7310         struct block_device *journal_bdev = sbi ? sbi->s_journal_bdev : NULL;
7311
7312         kill_block_super(sb);
7313
7314         if (journal_bdev)
7315                 blkdev_put(journal_bdev, sb);
7316 }
7317
7318 static struct file_system_type ext4_fs_type = {
7319         .owner                  = THIS_MODULE,
7320         .name                   = "ext4",
7321         .init_fs_context        = ext4_init_fs_context,
7322         .parameters             = ext4_param_specs,
7323         .kill_sb                = ext4_kill_sb,
7324         .fs_flags               = FS_REQUIRES_DEV | FS_ALLOW_IDMAP,
7325 };
7326 MODULE_ALIAS_FS("ext4");
7327
7328 /* Shared across all ext4 file systems */
7329 wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
7330
7331 static int __init ext4_init_fs(void)
7332 {
7333         int i, err;
7334
7335         ratelimit_state_init(&ext4_mount_msg_ratelimit, 30 * HZ, 64);
7336         ext4_li_info = NULL;
7337
7338         /* Build-time check for flags consistency */
7339         ext4_check_flag_values();
7340
7341         for (i = 0; i < EXT4_WQ_HASH_SZ; i++)
7342                 init_waitqueue_head(&ext4__ioend_wq[i]);
7343
7344         err = ext4_init_es();
7345         if (err)
7346                 return err;
7347
7348         err = ext4_init_pending();
7349         if (err)
7350                 goto out7;
7351
7352         err = ext4_init_post_read_processing();
7353         if (err)
7354                 goto out6;
7355
7356         err = ext4_init_pageio();
7357         if (err)
7358                 goto out5;
7359
7360         err = ext4_init_system_zone();
7361         if (err)
7362                 goto out4;
7363
7364         err = ext4_init_sysfs();
7365         if (err)
7366                 goto out3;
7367
7368         err = ext4_init_mballoc();
7369         if (err)
7370                 goto out2;
7371         err = init_inodecache();
7372         if (err)
7373                 goto out1;
7374
7375         err = ext4_fc_init_dentry_cache();
7376         if (err)
7377                 goto out05;
7378
7379         register_as_ext3();
7380         register_as_ext2();
7381         err = register_filesystem(&ext4_fs_type);
7382         if (err)
7383                 goto out;
7384
7385         return 0;
7386 out:
7387         unregister_as_ext2();
7388         unregister_as_ext3();
7389         ext4_fc_destroy_dentry_cache();
7390 out05:
7391         destroy_inodecache();
7392 out1:
7393         ext4_exit_mballoc();
7394 out2:
7395         ext4_exit_sysfs();
7396 out3:
7397         ext4_exit_system_zone();
7398 out4:
7399         ext4_exit_pageio();
7400 out5:
7401         ext4_exit_post_read_processing();
7402 out6:
7403         ext4_exit_pending();
7404 out7:
7405         ext4_exit_es();
7406
7407         return err;
7408 }
7409
7410 static void __exit ext4_exit_fs(void)
7411 {
7412         ext4_destroy_lazyinit_thread();
7413         unregister_as_ext2();
7414         unregister_as_ext3();
7415         unregister_filesystem(&ext4_fs_type);
7416         ext4_fc_destroy_dentry_cache();
7417         destroy_inodecache();
7418         ext4_exit_mballoc();
7419         ext4_exit_sysfs();
7420         ext4_exit_system_zone();
7421         ext4_exit_pageio();
7422         ext4_exit_post_read_processing();
7423         ext4_exit_es();
7424         ext4_exit_pending();
7425 }
7426
7427 MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
7428 MODULE_DESCRIPTION("Fourth Extended Filesystem");
7429 MODULE_LICENSE("GPL");
7430 MODULE_SOFTDEP("pre: crc32c");
7431 module_init(ext4_init_fs)
7432 module_exit(ext4_exit_fs)