Merge branch 'work.iov_iter' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

author Linus Torvalds <[email protected]>

Sat, 3 Jul 2021 18:30:04 +0000 (11:30 -0700)

committer Linus Torvalds <[email protected]>

Sat, 3 Jul 2021 18:30:04 +0000 (11:30 -0700)
author Linus Torvalds <[email protected]>
Sat, 3 Jul 2021 18:30:04 +0000 (11:30 -0700)
committer Linus Torvalds <[email protected]>
Sat, 3 Jul 2021 18:30:04 +0000 (11:30 -0700)
diff --combined fs/btrfs/file.c

index 28a05ba47060e66544eff306834d92534331a2c1,78cb8f9eaa6b42710214b64065fc4f22302690c5..ee34497500e169415efdb47817c5af65260aebfd
--- 1/fs/btrfs/file.c
--- 2/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@@ -28,7 -28,6 +28,7 @@@
   #include "compression.h"
   #include "delalloc-space.h"
   #include "reflink.h"
+ +#include "subpage.h"
   
   static struct kmem_cache *btrfs_inode_defrag_cachep;
   /*
@@@ -399,7 -398,7 +399,7 @@@ static noinline int btrfs_copy_from_use
                 /*
                  * Copy data from userspace to the current page
                  */
-               copied = iov_iter_copy_from_user_atomic(page, i, offset, count);
+               copied = copy_page_from_iter_atomic(page, offset, count, i);
   
                 /* Flush processor's dcache for this page */
                 flush_dcache_page(page);
@@@ -413,20 -412,19 +413,19 @@@
                  * The rest of the btrfs_file_write code will fall
                  * back to page at a time copies after we return 0.
                  */
-               if (!PageUptodate(page) && copied < count)
-                       copied = 0;
+               if (unlikely(copied < count)) {
+                       if (!PageUptodate(page)) {
+                               iov_iter_revert(i, copied);
+                               copied = 0;
+                       }
+                       if (!copied)
+                               break;
+               }
   
-               iov_iter_advance(i, copied);
                 write_bytes -= copied;
                 total_copied += copied;
- 
-               /* Return to btrfs_file_write_iter to fault page */
-               if (unlikely(copied == 0))
-                       break;
- 
-               if (copied < PAGE_SIZE - offset) {
-                       offset += copied;
-               } else {
+               offset += copied;
+               if (offset == PAGE_SIZE) {
                         pg++;
                         offset = 0;
                 }
@@@ -483,7 -481,6 +482,7 @@@ int btrfs_dirty_pages(struct btrfs_inod
         start_pos = round_down(pos, fs_info->sectorsize);
         num_bytes = round_up(write_bytes + pos - start_pos,
                              fs_info->sectorsize);
+ +      ASSERT(num_bytes <= U32_MAX);
   
         end_of_last_block = start_pos + num_bytes - 1;
   
@@@ -502,10 -499,9 +501,10 @@@
   
         for (i = 0; i < num_pages; i++) {
                 struct page *p = pages[i];
- -              SetPageUptodate(p);
+ +
+ +              btrfs_page_clamp_set_uptodate(fs_info, p, start_pos, num_bytes);
                 ClearPageChecked(p);
- -              set_page_dirty(p);
+ +              btrfs_page_clamp_set_dirty(fs_info, p, start_pos, num_bytes);
         }
   
         /*
@@@ -1097,7 -1093,7 +1096,7 @@@ int btrfs_mark_extent_written(struct bt
         int del_nr = 0;
         int del_slot = 0;
         int recow;
- -      int ret;
+ +      int ret = 0;
         u64 ino = btrfs_ino(inode);
   
         path = btrfs_alloc_path();
@@@ -1318,7 -1314,7 +1317,7 @@@ again
         }
   out:
         btrfs_free_path(path);
- -      return 0;
+ +      return ret;
   }
   
   /*
@@@ -2070,30 -2066,6 +2069,30 @@@ static int start_ordered_ops(struct ino
         return ret;
   }
   
+ +static inline bool skip_inode_logging(const struct btrfs_log_ctx *ctx)
+ +{
+ +      struct btrfs_inode *inode = BTRFS_I(ctx->inode);
+ +      struct btrfs_fs_info *fs_info = inode->root->fs_info;
+ +
+ +      if (btrfs_inode_in_log(inode, fs_info->generation) &&
+ +          list_empty(&ctx->ordered_extents))
+ +              return true;
+ +
+ +      /*
+ +       * If we are doing a fast fsync we can not bail out if the inode's
+ +       * last_trans is <= then the last committed transaction, because we only
+ +       * update the last_trans of the inode during ordered extent completion,
+ +       * and for a fast fsync we don't wait for that, we only wait for the
+ +       * writeback to complete.
+ +       */
+ +      if (inode->last_trans <= fs_info->last_trans_committed &&
+ +          (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags) ||
+ +           list_empty(&ctx->ordered_extents)))
+ +              return true;
+ +
+ +      return false;
+ +}
+ +
   /*
    * fsync call for both files and directories.  This logs the inode into
    * the tree log instead of forcing full commits whenever possible.
@@@ -2212,8 -2184,17 +2211,8 @@@ int btrfs_sync_file(struct file *file, 
   
         atomic_inc(&root->log_batch);
   
- -      /*
- -       * If we are doing a fast fsync we can not bail out if the inode's
- -       * last_trans is <= then the last committed transaction, because we only
- -       * update the last_trans of the inode during ordered extent completion,
- -       * and for a fast fsync we don't wait for that, we only wait for the
- -       * writeback to complete.
- -       */
         smp_mb();
- -      if (btrfs_inode_in_log(BTRFS_I(inode), fs_info->generation) ||
- -          (BTRFS_I(inode)->last_trans <= fs_info->last_trans_committed &&
- -           (full_sync || list_empty(&ctx.ordered_extents)))) {
+ +      if (skip_inode_logging(&ctx)) {
                 /*
                  * We've had everything committed since the last time we were
                  * modified so clear this flag in case it was set for whatever
@@@ -2486,17 -2467,6 +2485,17 @@@ static int btrfs_punch_hole_lock_range(
                                        const u64 lockend,
                                        struct extent_state **cached_state)
   {
+ +      /*
+ +       * For subpage case, if the range is not at page boundary, we could
+ +       * have pages at the leading/tailing part of the range.
+ +       * This could lead to dead loop since filemap_range_has_page()
+ +       * will always return true.
+ +       * So here we need to do extra page alignment for
+ +       * filemap_range_has_page().
+ +       */
+ +      const u64 page_lockstart = round_up(lockstart, PAGE_SIZE);
+ +      const u64 page_lockend = round_down(lockend + 1, PAGE_SIZE) - 1;
+ +
         while (1) {
                 struct btrfs_ordered_extent *ordered;
                 int ret;
@@@ -2517,7 -2487,7 +2516,7 @@@
                     (ordered->file_offset + ordered->num_bytes <= lockstart ||
                      ordered->file_offset > lockend)) &&
                      !filemap_range_has_page(inode->i_mapping,
- -                                           lockstart, lockend)) {
+ +                                           page_lockstart, page_lockend)) {
                         if (ordered)
                                 btrfs_put_ordered_extent(ordered);
                         break;
@@@ -3048,20 -3018,22 +3047,20 @@@ struct falloc_range 
    */
   static int add_falloc_range(struct list_head *head, u64 start, u64 len)
   {
- -      struct falloc_range *prev = NULL;
         struct falloc_range *range = NULL;
   
- -      if (list_empty(head))
- -              goto insert;
- -
- -      /*
- -       * As fallocate iterate by bytenr order, we only need to check
- -       * the last range.
- -       */
- -      prev = list_entry(head->prev, struct falloc_range, list);
- -      if (prev->start + prev->len == start) {
- -              prev->len += len;
- -              return 0;
+ +      if (!list_empty(head)) {
+ +              /*
+ +               * As fallocate iterates by bytenr order, we only need to check
+ +               * the last range.
+ +               */
+ +              range = list_last_entry(head, struct falloc_range, list);
+ +              if (range->start + range->len == start) {
+ +                      range->len += len;
+ +                      return 0;
+ +              }
         }
- -insert:
+ +
         range = kmalloc(sizeof(*range), GFP_KERNEL);
         if (!range)
                 return -ENOMEM;
diff --combined fs/iomap/buffered-io.c

index 0065781935c7e30966e11b12fde49d5819ce01be,c5ff13e0e7cfbb10382e15450462fd27c6f7cb3b..41da4f14c00bb515b67acc28fbb0acef3662a9a6
--- 1/fs/iomap/buffered-io.c
--- 2/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@@ -394,7 -394,7 +394,7 @@@ void iomap_readahead(struct readahead_c
   {
         struct inode *inode = rac->mapping->host;
         loff_t pos = readahead_pos(rac);
- -      loff_t length = readahead_length(rac);
+ +      size_t length = readahead_length(rac);
         struct iomap_readpage_ctx ctx = {
                 .rac    = rac,
         };
@@@ -402,7 -402,7 +402,7 @@@
         trace_iomap_readahead(inode, readahead_count(rac));
   
         while (length > 0) {
- -              loff_t ret = iomap_apply(inode, pos, length, 0, ops,
+ +              ssize_t ret = iomap_apply(inode, pos, length, 0, ops,
                                 &ctx, iomap_readahead_actor);
                 if (ret <= 0) {
                         WARN_ON_ONCE(ret == 0);
@@@ -640,6 -640,31 +640,6 @@@ out_no_page
         return status;
   }
   
- -int
- -iomap_set_page_dirty(struct page *page)
- -{
- -      struct address_space *mapping = page_mapping(page);
- -      int newly_dirty;
- -
- -      if (unlikely(!mapping))
- -              return !TestSetPageDirty(page);
- -
- -      /*
- -       * Lock out page's memcg migration to keep PageDirty
- -       * synchronized with per-memcg dirty page counters.
- -       */
- -      lock_page_memcg(page);
- -      newly_dirty = !TestSetPageDirty(page);
- -      if (newly_dirty)
- -              __set_page_dirty(page, mapping, 0);
- -      unlock_page_memcg(page);
- -
- -      if (newly_dirty)
- -              __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
- -      return newly_dirty;
- -}
- -EXPORT_SYMBOL_GPL(iomap_set_page_dirty);
- -
   static size_t __iomap_write_end(struct inode *inode, loff_t pos, size_t len,
                 size_t copied, struct page *page)
   {
@@@ -659,7 -684,7 +659,7 @@@
         if (unlikely(copied < len && !PageUptodate(page)))
                 return 0;
         iomap_set_range_uptodate(page, offset_in_page(pos), len);
- -      iomap_set_page_dirty(page);
+ +      __set_page_dirty_nobuffers(page);
         return copied;
   }
   
@@@ -746,10 -771,6 +746,6 @@@ again
                  * Otherwise there's a nasty deadlock on copying from the
                  * same page as we're writing to, without it being marked
                  * up-to-date.
-                *
-                * Not only is this an optimisation, but it is also required
-                * to check that the address is actually valid, when atomic
-                * usercopies are used, below.
                  */
                 if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
                         status = -EFAULT;
@@@ -764,30 -785,29 +760,29 @@@
                 if (mapping_writably_mapped(inode->i_mapping))
                         flush_dcache_page(page);
   
-               copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes);
+               copied = copy_page_from_iter_atomic(page, offset, bytes, i);
   
-               copied = iomap_write_end(inode, pos, bytes, copied, page, iomap,
+               status = iomap_write_end(inode, pos, bytes, copied, page, iomap,
                                 srcmap);
   
-               cond_resched();
+               if (unlikely(copied != status))
+                       iov_iter_revert(i, copied - status);
   
-               iov_iter_advance(i, copied);
-               if (unlikely(copied == 0)) {
+               cond_resched();
+               if (unlikely(status == 0)) {
                         /*
-                        * If we were unable to copy any data at all, we must
-                        * fall back to a single segment length write.
-                        *
-                        * If we didn't fallback here, we could livelock
-                        * because not all segments in the iov can be copied at
-                        * once without a pagefault.
+                        * A short copy made iomap_write_end() reject the
+                        * thing entirely.  Might be memory poisoning
+                        * halfway through, might be a race with munmap,
+                        * might be severe memory pressure.
                          */
-                       bytes = min_t(unsigned long, PAGE_SIZE - offset,
-                                               iov_iter_single_seg_count(i));
+                       if (copied)
+                               bytes = copied;
                         goto again;
                 }
-               pos += copied;
-               written += copied;
-               length -= copied;
+               pos += status;
+               written += status;
+               length -= status;
   
                 balance_dirty_pages_ratelimited(inode->i_mapping);
         } while (iov_iter_count(i) && length);
diff --combined mm/filemap.c

index ac82a93d4f38c5d6f134284a72230583738453ab,cf9de790f49391264d35aa90a1db9e67f885bddc..d1458ecf2f51ec2100eae25e3c53801e96acc0e5
--- 1/mm/filemap.c
--- 2/mm/filemap.c
+++ b/mm/filemap.c
@@@ -872,7 -872,7 +872,7 @@@ noinline int __add_to_page_cache_locked
         page->index = offset;
   
         if (!huge) {
- -              error = mem_cgroup_charge(page, current->mm, gfp);
+ +              error = mem_cgroup_charge(page, NULL, gfp);
                 if (error)
                         goto error;
                 charged = true;
@@@ -3642,10 -3642,6 +3642,6 @@@ again
                  * Otherwise there's a nasty deadlock on copying from the
                  * same page as we're writing to, without it being marked
                  * up-to-date.
-                *
-                * Not only is this an optimisation, but it is also required
-                * to check that the address is actually valid, when atomic
-                * usercopies are used, below.
                  */
                 if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
                         status = -EFAULT;
@@@ -3665,33 -3661,31 +3661,31 @@@
                 if (mapping_writably_mapped(mapping))
                         flush_dcache_page(page);
   
-               copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes);
+               copied = copy_page_from_iter_atomic(page, offset, bytes, i);
                 flush_dcache_page(page);
   
                 status = a_ops->write_end(file, mapping, pos, bytes, copied,
                                                 page, fsdata);
-               if (unlikely(status < 0))
-                       break;
-               copied = status;
- 
+               if (unlikely(status != copied)) {
+                       iov_iter_revert(i, copied - max(status, 0L));
+                       if (unlikely(status < 0))
+                               break;
+               }
                 cond_resched();
   
-               iov_iter_advance(i, copied);
-               if (unlikely(copied == 0)) {
+               if (unlikely(status == 0)) {
                         /*
-                        * If we were unable to copy any data at all, we must
-                        * fall back to a single segment length write.
-                        *
-                        * If we didn't fallback here, we could livelock
-                        * because not all segments in the iov can be copied at
-                        * once without a pagefault.
+                        * A short copy made ->write_end() reject the
+                        * thing entirely.  Might be memory poisoning
+                        * halfway through, might be a race with munmap,
+                        * might be severe memory pressure.
                          */
-                       bytes = min_t(unsigned long, PAGE_SIZE - offset,
-                                               iov_iter_single_seg_count(i));
+                       if (copied)
+                               bytes = copied;
                         goto again;
                 }
-               pos += copied;
-               written += copied;
+               pos += status;
+               written += status;
   
                 balance_dirty_pages_ratelimited(mapping);
         } while (iov_iter_count(i));
author	Linus Torvalds <[email protected]>
	Sat, 3 Jul 2021 18:30:04 +0000 (11:30 -0700)
committer	Linus Torvalds <[email protected]>
	Sat, 3 Jul 2021 18:30:04 +0000 (11:30 -0700)
		1	2
fs/btrfs/file.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/iomap/buffered-io.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/filemap.c	patch \|	diff1 \|	diff2 \|	blob \| history