]> Git Repo - J-linux.git/commitdiff
Merge tag 'ext4_for_linus_stable' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorLinus Torvalds <[email protected]>
Sat, 23 Apr 2022 01:18:27 +0000 (18:18 -0700)
committerLinus Torvalds <[email protected]>
Sat, 23 Apr 2022 01:18:27 +0000 (18:18 -0700)
Pull ext4 fixes from Ted Ts'o:
 "Fix some syzbot-detected bugs, as well as other bugs found by I/O
  injection testing.

  Change ext4's fallocate to consistently drop set[ug]id bits when an
  fallocate operation might possibly change the user-visible contents of
  a file.

  Also, improve handling of potentially invalid values in the the
  s_overhead_cluster superblock field to avoid ext4 returning a negative
  number of free blocks"

* tag 'ext4_for_linus_stable' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
  jbd2: fix a potential race while discarding reserved buffers after an abort
  ext4: update the cached overhead value in the superblock
  ext4: force overhead calculation if the s_overhead_cluster makes no sense
  ext4: fix overhead calculation to account for the reserved gdt blocks
  ext4, doc: fix incorrect h_reserved size
  ext4: limit length to bitmap_maxbytes - blocksize in punch_hole
  ext4: fix use-after-free in ext4_search_dir
  ext4: fix bug_on in start_this_handle during umount filesystem
  ext4: fix symlink file size not match to file content
  ext4: fix fallocate to use file_modified to update permissions consistently

1  2 
fs/ext4/inode.c
fs/ext4/page-io.c
fs/ext4/super.c

diff --combined fs/ext4/inode.c
index 13740f2d0e6109a88bae9f83c0f1ff08bad99bce,d815502cc97cfca92edf7b567c3d414366030604..646ece9b3455ffc04007f330974e3f2284e01bc1
@@@ -137,6 -137,8 +137,6 @@@ static inline int ext4_begin_ordered_tr
                                                   new_size);
  }
  
 -static void ext4_invalidatepage(struct page *page, unsigned int offset,
 -                              unsigned int length);
  static int __ext4_journalled_writepage(struct page *page, unsigned int len);
  static int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
                                  int pextents);
@@@ -184,7 -186,7 +184,7 @@@ void ext4_evict_inode(struct inode *ino
                 * journal. So although mm thinks everything is clean and
                 * ready for reaping the inode might still have some pages to
                 * write in the running transaction or waiting to be
 -               * checkpointed. Thus calling jbd2_journal_invalidatepage()
 +               * checkpointed. Thus calling jbd2_journal_invalidate_folio()
                 * (via truncate_inode_pages()) to discard these buffers can
                 * cause data loss. Also even if we did not discard these
                 * buffers, we would have no way to find them after the inode
@@@ -1569,18 -1571,16 +1569,18 @@@ static void mpage_release_unused_pages(
                        break;
                for (i = 0; i < nr_pages; i++) {
                        struct page *page = pvec.pages[i];
 +                      struct folio *folio = page_folio(page);
  
 -                      BUG_ON(!PageLocked(page));
 -                      BUG_ON(PageWriteback(page));
 +                      BUG_ON(!folio_test_locked(folio));
 +                      BUG_ON(folio_test_writeback(folio));
                        if (invalidate) {
 -                              if (page_mapped(page))
 -                                      clear_page_dirty_for_io(page);
 -                              block_invalidatepage(page, 0, PAGE_SIZE);
 -                              ClearPageUptodate(page);
 +                              if (folio_mapped(folio))
 +                                      folio_clear_dirty_for_io(folio);
 +                              block_invalidate_folio(folio, 0,
 +                                              folio_size(folio));
 +                              folio_clear_uptodate(folio);
                        }
 -                      unlock_page(page);
 +                      folio_unlock(folio);
                }
                pagevec_release(&pvec);
        }
@@@ -1971,7 -1971,6 +1971,7 @@@ out_no_pagelock
  static int ext4_writepage(struct page *page,
                          struct writeback_control *wbc)
  {
 +      struct folio *folio = page_folio(page);
        int ret = 0;
        loff_t size;
        unsigned int len;
        bool keep_towrite = false;
  
        if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) {
 -              inode->i_mapping->a_ops->invalidatepage(page, 0, PAGE_SIZE);
 -              unlock_page(page);
 +              folio_invalidate(folio, 0, folio_size(folio));
 +              folio_unlock(folio);
                return -EIO;
        }
  
@@@ -3208,39 -3207,40 +3208,39 @@@ static void ext4_readahead(struct reada
        ext4_mpage_readpages(inode, rac, NULL);
  }
  
 -static void ext4_invalidatepage(struct page *page, unsigned int offset,
 -                              unsigned int length)
 +static void ext4_invalidate_folio(struct folio *folio, size_t offset,
 +                              size_t length)
  {
 -      trace_ext4_invalidatepage(page, offset, length);
 +      trace_ext4_invalidate_folio(folio, offset, length);
  
        /* No journalling happens on data buffers when this function is used */
 -      WARN_ON(page_has_buffers(page) && buffer_jbd(page_buffers(page)));
 +      WARN_ON(folio_buffers(folio) && buffer_jbd(folio_buffers(folio)));
  
 -      block_invalidatepage(page, offset, length);
 +      block_invalidate_folio(folio, offset, length);
  }
  
 -static int __ext4_journalled_invalidatepage(struct page *page,
 -                                          unsigned int offset,
 -                                          unsigned int length)
 +static int __ext4_journalled_invalidate_folio(struct folio *folio,
 +                                          size_t offset, size_t length)
  {
 -      journal_t *journal = EXT4_JOURNAL(page->mapping->host);
 +      journal_t *journal = EXT4_JOURNAL(folio->mapping->host);
  
 -      trace_ext4_journalled_invalidatepage(page, offset, length);
 +      trace_ext4_journalled_invalidate_folio(folio, offset, length);
  
        /*
         * If it's a full truncate we just forget about the pending dirtying
         */
 -      if (offset == 0 && length == PAGE_SIZE)
 -              ClearPageChecked(page);
 +      if (offset == 0 && length == folio_size(folio))
 +              folio_clear_checked(folio);
  
 -      return jbd2_journal_invalidatepage(journal, page, offset, length);
 +      return jbd2_journal_invalidate_folio(journal, folio, offset, length);
  }
  
  /* Wrapper for aops... */
 -static void ext4_journalled_invalidatepage(struct page *page,
 -                                         unsigned int offset,
 -                                         unsigned int length)
 +static void ext4_journalled_invalidate_folio(struct folio *folio,
 +                                         size_t offset,
 +                                         size_t length)
  {
 -      WARN_ON(__ext4_journalled_invalidatepage(page, offset, length) < 0);
 +      WARN_ON(__ext4_journalled_invalidate_folio(folio, offset, length) < 0);
  }
  
  static int ext4_releasepage(struct page *page, gfp_t wait)
@@@ -3434,13 -3434,6 +3434,13 @@@ static int ext4_iomap_begin(struct inod
        if (ret < 0)
                return ret;
  out:
 +      /*
 +       * When inline encryption is enabled, sometimes I/O to an encrypted file
 +       * has to be broken up to guarantee DUN contiguity.  Handle this by
 +       * limiting the length of the mapping returned.
 +       */
 +      map.m_len = fscrypt_limit_io_blocks(inode, map.m_lblk, map.m_len);
 +
        ext4_set_iomap(inode, iomap, &map, offset, length, flags);
  
        return 0;
@@@ -3573,32 -3566,31 +3573,32 @@@ const struct iomap_ops ext4_iomap_repor
  };
  
  /*
 - * Whenever the page is being dirtied, corresponding buffers should already be
 - * attached to the transaction (we take care of this in ext4_page_mkwrite() and
 - * ext4_write_begin()). However we cannot move buffers to dirty transaction
 - * lists here because ->set_page_dirty is called under VFS locks and the page
 + * Whenever the folio is being dirtied, corresponding buffers should already
 + * be attached to the transaction (we take care of this in ext4_page_mkwrite()
 + * and ext4_write_begin()). However we cannot move buffers to dirty transaction
 + * lists here because ->dirty_folio is called under VFS locks and the folio
   * is not necessarily locked.
   *
 - * We cannot just dirty the page and leave attached buffers clean, because the
 + * We cannot just dirty the folio and leave attached buffers clean, because the
   * buffers' dirty state is "definitive".  We cannot just set the buffers dirty
   * or jbddirty because all the journalling code will explode.
   *
 - * So what we do is to mark the page "pending dirty" and next time writepage
 + * So what we do is to mark the folio "pending dirty" and next time writepage
   * is called, propagate that into the buffers appropriately.
   */
 -static int ext4_journalled_set_page_dirty(struct page *page)
 +static bool ext4_journalled_dirty_folio(struct address_space *mapping,
 +              struct folio *folio)
  {
 -      WARN_ON_ONCE(!page_has_buffers(page));
 -      SetPageChecked(page);
 -      return __set_page_dirty_nobuffers(page);
 +      WARN_ON_ONCE(!folio_buffers(folio));
 +      folio_set_checked(folio);
 +      return filemap_dirty_folio(mapping, folio);
  }
  
 -static int ext4_set_page_dirty(struct page *page)
 +static bool ext4_dirty_folio(struct address_space *mapping, struct folio *folio)
  {
 -      WARN_ON_ONCE(!PageLocked(page) && !PageDirty(page));
 -      WARN_ON_ONCE(!page_has_buffers(page));
 -      return __set_page_dirty_buffers(page);
 +      WARN_ON_ONCE(!folio_test_locked(folio) && !folio_test_dirty(folio));
 +      WARN_ON_ONCE(!folio_buffers(folio));
 +      return block_dirty_folio(mapping, folio);
  }
  
  static int ext4_iomap_swap_activate(struct swap_info_struct *sis,
@@@ -3615,9 -3607,9 +3615,9 @@@ static const struct address_space_opera
        .writepages             = ext4_writepages,
        .write_begin            = ext4_write_begin,
        .write_end              = ext4_write_end,
 -      .set_page_dirty         = ext4_set_page_dirty,
 +      .dirty_folio            = ext4_dirty_folio,
        .bmap                   = ext4_bmap,
 -      .invalidatepage         = ext4_invalidatepage,
 +      .invalidate_folio       = ext4_invalidate_folio,
        .releasepage            = ext4_releasepage,
        .direct_IO              = noop_direct_IO,
        .migratepage            = buffer_migrate_page,
@@@ -3633,9 -3625,9 +3633,9 @@@ static const struct address_space_opera
        .writepages             = ext4_writepages,
        .write_begin            = ext4_write_begin,
        .write_end              = ext4_journalled_write_end,
 -      .set_page_dirty         = ext4_journalled_set_page_dirty,
 +      .dirty_folio            = ext4_journalled_dirty_folio,
        .bmap                   = ext4_bmap,
 -      .invalidatepage         = ext4_journalled_invalidatepage,
 +      .invalidate_folio       = ext4_journalled_invalidate_folio,
        .releasepage            = ext4_releasepage,
        .direct_IO              = noop_direct_IO,
        .is_partially_uptodate  = block_is_partially_uptodate,
@@@ -3650,9 -3642,9 +3650,9 @@@ static const struct address_space_opera
        .writepages             = ext4_writepages,
        .write_begin            = ext4_da_write_begin,
        .write_end              = ext4_da_write_end,
 -      .set_page_dirty         = ext4_set_page_dirty,
 +      .dirty_folio            = ext4_dirty_folio,
        .bmap                   = ext4_bmap,
 -      .invalidatepage         = ext4_invalidatepage,
 +      .invalidate_folio       = ext4_invalidate_folio,
        .releasepage            = ext4_releasepage,
        .direct_IO              = noop_direct_IO,
        .migratepage            = buffer_migrate_page,
  static const struct address_space_operations ext4_dax_aops = {
        .writepages             = ext4_dax_writepages,
        .direct_IO              = noop_direct_IO,
 -      .set_page_dirty         = __set_page_dirty_no_writeback,
 +      .dirty_folio            = noop_dirty_folio,
        .bmap                   = ext4_bmap,
 -      .invalidatepage         = noop_invalidatepage,
        .swap_activate          = ext4_iomap_swap_activate,
  };
  
@@@ -3953,12 -3946,14 +3953,14 @@@ int ext4_break_layouts(struct inode *in
   * Returns: 0 on success or negative on failure
   */
  
- int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
+ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
  {
+       struct inode *inode = file_inode(file);
        struct super_block *sb = inode->i_sb;
        ext4_lblk_t first_block, stop_block;
        struct address_space *mapping = inode->i_mapping;
-       loff_t first_block_offset, last_block_offset;
+       loff_t first_block_offset, last_block_offset, max_length;
+       struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
        handle_t *handle;
        unsigned int credits;
        int ret = 0, ret2 = 0;
                   offset;
        }
  
+       /*
+        * For punch hole the length + offset needs to be within one block
+        * before last range. Adjust the length if it goes beyond that limit.
+        */
+       max_length = sbi->s_bitmap_maxbytes - inode->i_sb->s_blocksize;
+       if (offset + length > max_length)
+               length = max_length - offset;
        if (offset & (sb->s_blocksize - 1) ||
            (offset + length) & (sb->s_blocksize - 1)) {
                /*
        /* Wait all existing dio workers, newcomers will block on i_rwsem */
        inode_dio_wait(inode);
  
+       ret = file_modified(file);
+       if (ret)
+               goto out_mutex;
        /*
         * Prevent page faults from reinstantiating pages we have released from
         * page cache.
@@@ -5238,12 -5245,13 +5252,12 @@@ int ext4_write_inode(struct inode *inod
  }
  
  /*
 - * In data=journal mode ext4_journalled_invalidatepage() may fail to invalidate
 - * buffers that are attached to a page stradding i_size and are undergoing
 + * In data=journal mode ext4_journalled_invalidate_folio() may fail to invalidate
 + * buffers that are attached to a folio straddling i_size and are undergoing
   * commit. In that case we have to wait for commit to finish and try again.
   */
  static void ext4_wait_for_tail_page_commit(struct inode *inode)
  {
 -      struct page *page;
        unsigned offset;
        journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
        tid_t commit_tid = 0;
  
        offset = inode->i_size & (PAGE_SIZE - 1);
        /*
 -       * If the page is fully truncated, we don't need to wait for any commit
 -       * (and we even should not as __ext4_journalled_invalidatepage() may
 -       * strip all buffers from the page but keep the page dirty which can then
 -       * confuse e.g. concurrent ext4_writepage() seeing dirty page without
 +       * If the folio is fully truncated, we don't need to wait for any commit
 +       * (and we even should not as __ext4_journalled_invalidate_folio() may
 +       * strip all buffers from the folio but keep the folio dirty which can then
 +       * confuse e.g. concurrent ext4_writepage() seeing dirty folio without
         * buffers). Also we don't need to wait for any commit if all buffers in
 -       * the page remain valid. This is most beneficial for the common case of
 +       * the folio remain valid. This is most beneficial for the common case of
         * blocksize == PAGESIZE.
         */
        if (!offset || offset > (PAGE_SIZE - i_blocksize(inode)))
                return;
        while (1) {
 -              page = find_lock_page(inode->i_mapping,
 +              struct folio *folio = filemap_lock_folio(inode->i_mapping,
                                      inode->i_size >> PAGE_SHIFT);
 -              if (!page)
 +              if (!folio)
                        return;
 -              ret = __ext4_journalled_invalidatepage(page, offset,
 -                                              PAGE_SIZE - offset);
 -              unlock_page(page);
 -              put_page(page);
 +              ret = __ext4_journalled_invalidate_folio(folio, offset,
 +                                              folio_size(folio) - offset);
 +              folio_unlock(folio);
 +              folio_put(folio);
                if (ret != -EBUSY)
                        return;
                commit_tid = 0;
diff --combined fs/ext4/page-io.c
index 495ce59fb4ad7781bc44bd8ca0ca5649a63915cd,40b7d8485b44560a7cc91f4aa999216c8cfc4089..14695e2b5042ba5e6dbb594ecff5238899c8d955
@@@ -134,8 -134,10 +134,10 @@@ static void ext4_finish_bio(struct bio 
                                continue;
                        }
                        clear_buffer_async_write(bh);
-                       if (bio->bi_status)
+                       if (bio->bi_status) {
+                               set_buffer_write_io_error(bh);
                                buffer_io_error(bh);
+                       }
                } while ((bh = bh->b_this_page) != head);
                spin_unlock_irqrestore(&head->b_uptodate_lock, flags);
                if (!under_io) {
@@@ -323,9 -325,10 +325,9 @@@ static void ext4_end_bio(struct bio *bi
  {
        ext4_io_end_t *io_end = bio->bi_private;
        sector_t bi_sector = bio->bi_iter.bi_sector;
 -      char b[BDEVNAME_SIZE];
  
 -      if (WARN_ONCE(!io_end, "io_end is NULL: %s: sector %Lu len %u err %d\n",
 -                    bio_devname(bio, b),
 +      if (WARN_ONCE(!io_end, "io_end is NULL: %pg: sector %Lu len %u err %d\n",
 +                    bio->bi_bdev,
                      (long long) bio->bi_iter.bi_sector,
                      (unsigned) bio_sectors(bio),
                      bio->bi_status)) {
@@@ -371,8 -374,10 +373,8 @@@ void ext4_io_submit(struct ext4_io_subm
        struct bio *bio = io->io_bio;
  
        if (bio) {
 -              int io_op_flags = io->io_wbc->sync_mode == WB_SYNC_ALL ?
 -                                REQ_SYNC : 0;
 -              io->io_bio->bi_write_hint = io->io_end->inode->i_write_hint;
 -              bio_set_op_attrs(io->io_bio, REQ_OP_WRITE, io_op_flags);
 +              if (io->io_wbc->sync_mode == WB_SYNC_ALL)
 +                      io->io_bio->bi_opf |= REQ_SYNC;
                submit_bio(io->io_bio);
        }
        io->io_bio = NULL;
@@@ -395,9 -400,10 +397,9 @@@ static void io_submit_init_bio(struct e
         * bio_alloc will _always_ be able to allocate a bio if
         * __GFP_DIRECT_RECLAIM is set, see comments for bio_alloc_bioset().
         */
 -      bio = bio_alloc(GFP_NOIO, BIO_MAX_VECS);
 +      bio = bio_alloc(bh->b_bdev, BIO_MAX_VECS, REQ_OP_WRITE, GFP_NOIO);
        fscrypt_set_bio_crypt_ctx_bh(bio, bh, GFP_NOIO);
        bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
 -      bio_set_dev(bio, bh->b_bdev);
        bio->bi_end_io = ext4_end_bio;
        bio->bi_private = ext4_get_io_end(io->io_end);
        io->io_bio = bio;
@@@ -417,8 -423,10 +419,8 @@@ static void io_submit_add_bh(struct ext
  submit_and_retry:
                ext4_io_submit(io);
        }
 -      if (io->io_bio == NULL) {
 +      if (io->io_bio == NULL)
                io_submit_init_bio(io, bh);
 -              io->io_bio->bi_write_hint = inode->i_write_hint;
 -      }
        ret = bio_add_page(io->io_bio, page, bh->b_size, bh_offset(bh));
        if (ret != bh->b_size)
                goto submit_and_retry;
diff --combined fs/ext4/super.c
index 81749eaddf4c1212ee3650a3bcf021fbcd532826,1847b46af808375c559e65bb2d1a642de9953996..1466fbdbc8e345974b07c48c05aee1de79f0a1de
@@@ -1199,20 -1199,25 +1199,25 @@@ static void ext4_put_super(struct super
        int aborted = 0;
        int i, err;
  
-       ext4_unregister_li_request(sb);
-       ext4_quota_off_umount(sb);
-       flush_work(&sbi->s_error_work);
-       destroy_workqueue(sbi->rsv_conversion_wq);
-       ext4_release_orphan_info(sb);
        /*
         * Unregister sysfs before destroying jbd2 journal.
         * Since we could still access attr_journal_task attribute via sysfs
         * path which could have sbi->s_journal->j_task as NULL
+        * Unregister sysfs before flush sbi->s_error_work.
+        * Since user may read /proc/fs/ext4/xx/mb_groups during umount, If
+        * read metadata verify failed then will queue error work.
+        * flush_stashed_error_work will call start_this_handle may trigger
+        * BUG_ON.
         */
        ext4_unregister_sysfs(sb);
  
+       ext4_unregister_li_request(sb);
+       ext4_quota_off_umount(sb);
+       flush_work(&sbi->s_error_work);
+       destroy_workqueue(sbi->rsv_conversion_wq);
+       ext4_release_orphan_info(sb);
        if (sbi->s_journal) {
                aborted = is_journal_aborted(sbi->s_journal);
                err = jbd2_journal_destroy(sbi->s_journal);
@@@ -1316,7 -1321,7 +1321,7 @@@ static struct inode *ext4_alloc_inode(s
  {
        struct ext4_inode_info *ei;
  
 -      ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS);
 +      ei = alloc_inode_sb(sb, ext4_inode_cachep, GFP_NOFS);
        if (!ei)
                return NULL;
  
@@@ -4172,9 -4177,11 +4177,11 @@@ static int count_overhead(struct super_
        ext4_fsblk_t            first_block, last_block, b;
        ext4_group_t            i, ngroups = ext4_get_groups_count(sb);
        int                     s, j, count = 0;
+       int                     has_super = ext4_bg_has_super(sb, grp);
  
        if (!ext4_has_feature_bigalloc(sb))
-               return (ext4_bg_has_super(sb, grp) + ext4_bg_num_gdb(sb, grp) +
+               return (has_super + ext4_bg_num_gdb(sb, grp) +
+                       (has_super ? le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) : 0) +
                        sbi->s_itb_per_group + 2);
  
        first_block = le32_to_cpu(sbi->s_es->s_first_data_block) +
@@@ -5282,9 -5289,18 +5289,18 @@@ no_journal
         * Get the # of file system overhead blocks from the
         * superblock if present.
         */
-       if (es->s_overhead_clusters)
-               sbi->s_overhead = le32_to_cpu(es->s_overhead_clusters);
-       else {
+       sbi->s_overhead = le32_to_cpu(es->s_overhead_clusters);
+       /* ignore the precalculated value if it is ridiculous */
+       if (sbi->s_overhead > ext4_blocks_count(es))
+               sbi->s_overhead = 0;
+       /*
+        * If the bigalloc feature is not enabled recalculating the
+        * overhead doesn't take long, so we might as well just redo
+        * it to make sure we are using the correct value.
+        */
+       if (!ext4_has_feature_bigalloc(sb))
+               sbi->s_overhead = 0;
+       if (sbi->s_overhead == 0) {
                err = ext4_calculate_overhead(sb);
                if (err)
                        goto failed_mount_wq;
@@@ -5602,6 -5618,8 +5618,8 @@@ static int ext4_fill_super(struct super
                ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. "
                         "Quota mode: %s.", descr, ext4_quota_mode(sb));
  
+       /* Update the s_overhead_clusters if necessary */
+       ext4_update_overhead(sb);
        return 0;
  
  free_sbi:
This page took 0.087025 seconds and 4 git commands to generate.