Merge branch 'work.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

author Linus Torvalds <[email protected]>

Sat, 8 Aug 2020 04:14:30 +0000 (21:14 -0700)

committer Linus Torvalds <[email protected]>

Sat, 8 Aug 2020 04:14:30 +0000 (21:14 -0700)
author Linus Torvalds <[email protected]>
Sat, 8 Aug 2020 04:14:30 +0000 (21:14 -0700)
committer Linus Torvalds <[email protected]>
Sat, 8 Aug 2020 04:14:30 +0000 (21:14 -0700)
diff --combined drivers/misc/uacce/uacce.c

index aa91f69a5fa96e7066d8fb6786d02f9aa4dca1f1,e45bfd409cc548e5a542bc412770a4a9f2fd7fa0..a5b8dab80c76be213f2d5cc9e84764292056f153
--- 1/drivers/misc/uacce/uacce.c
--- 2/drivers/misc/uacce/uacce.c
+++ b/drivers/misc/uacce/uacce.c
@@@ -4,6 -4,7 +4,7 @@@
   #include <linux/iommu.h>
   #include <linux/module.h>
   #include <linux/poll.h>
+ #include <linux/slab.h>
   #include <linux/uacce.h>
   
   static struct class *uacce_class;
@@@ -179,6 -180,14 +180,6 @@@ static int uacce_fops_release(struct in
         return 0;
   }
   
- -static vm_fault_t uacce_vma_fault(struct vm_fault *vmf)
- -{
- -      if (vmf->flags & (FAULT_FLAG_MKWRITE | FAULT_FLAG_WRITE))
- -              return VM_FAULT_SIGBUS;
- -
- -      return 0;
- -}
- -
   static void uacce_vma_close(struct vm_area_struct *vma)
   {
         struct uacce_queue *q = vma->vm_private_data;
@@@ -191,6 -200,7 +192,6 @@@
   }
   
   static const struct vm_operations_struct uacce_vm_ops = {
- -      .fault = uacce_vma_fault,
         .close = uacce_vma_close,
   };
   
diff --combined drivers/soc/qcom/pdr_interface.c

index 4c9225f15c4e6282f7dd94e4b912a8ad94e8efe1,a90d707da68943ef6eb7f6ef2a62ea2fe6d2bb7a..088dc99f77f3fe30ac54b8cfd56cc42722ce3426
--- 1/drivers/soc/qcom/pdr_interface.c
--- 2/drivers/soc/qcom/pdr_interface.c
+++ b/drivers/soc/qcom/pdr_interface.c
@@@ -5,6 -5,7 +5,7 @@@
   
   #include <linux/kernel.h>
   #include <linux/module.h>
+ #include <linux/slab.h>
   #include <linux/string.h>
   #include <linux/workqueue.h>
   
@@@ -278,15 -279,13 +279,15 @@@ static void pdr_indack_work(struct work
   
         list_for_each_entry_safe(ind, tmp, &pdr->indack_list, node) {
                 pds = ind->pds;
- -              pdr_send_indack_msg(pdr, pds, ind->transaction_id);
   
                 mutex_lock(&pdr->status_lock);
                 pds->state = ind->curr_state;
                 pdr->status(pds->state, pds->service_path, pdr->priv);
                 mutex_unlock(&pdr->status_lock);
   
+ +              /* Ack the indication after clients release the PD resources */
+ +              pdr_send_indack_msg(pdr, pds, ind->transaction_id);
+ +
                 mutex_lock(&pdr->list_lock);
                 list_del(&ind->node);
                 mutex_unlock(&pdr->list_lock);
diff --combined fs/btrfs/inode.c

index 611b3412fbfdc2bcd86d1b223bcd1151b7d29a6e,d901d53e4f03baeb6ab610bbc6dfe75f16c19dec..6dc03bab0c9d1b51c3f8572d046c8ac4b49560c3
--- 1/fs/btrfs/inode.c
--- 2/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@@ -3,6 -3,7 +3,7 @@@
    * Copyright (C) 2007 Oracle.  All rights reserved.
    */
   
+ #include <crypto/hash.h>
   #include <linux/kernel.h>
   #include <linux/bio.h>
   #include <linux/buffer_head.h>
@@@ -80,17 -81,17 +81,17 @@@ struct kmem_cache *btrfs_free_space_bit
   static int btrfs_setsize(struct inode *inode, struct iattr *attr);
   static int btrfs_truncate(struct inode *inode, bool skip_writeback);
   static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent);
- -static noinline int cow_file_range(struct inode *inode,
+ +static noinline int cow_file_range(struct btrfs_inode *inode,
                                    struct page *locked_page,
                                    u64 start, u64 end, int *page_started,
                                    unsigned long *nr_written, int unlock);
- -static struct extent_map *create_io_em(struct inode *inode, u64 start, u64 len,
- -                                     u64 orig_start, u64 block_start,
+ +static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start,
+ +                                     u64 len, u64 orig_start, u64 block_start,
                                        u64 block_len, u64 orig_block_len,
                                        u64 ram_bytes, int compress_type,
                                        int type);
   
- -static void __endio_write_update_ordered(struct inode *inode,
+ +static void __endio_write_update_ordered(struct btrfs_inode *inode,
                                          const u64 offset, const u64 bytes,
                                          const bool uptodate);
   
@@@ -104,7 -105,7 +105,7 @@@
    * to be released, which we want to happen only when finishing the ordered
    * extent (btrfs_finish_ordered_io()).
    */
- -static inline void btrfs_cleanup_ordered_extents(struct inode *inode,
+ +static inline void btrfs_cleanup_ordered_extents(struct btrfs_inode *inode,
                                                  struct page *locked_page,
                                                  u64 offset, u64 bytes)
   {
@@@ -116,7 -117,7 +117,7 @@@
         struct page *page;
   
         while (index <= end_index) {
- -              page = find_get_page(inode->i_mapping, index);
+ +              page = find_get_page(inode->vfs_inode.i_mapping, index);
                 index++;
                 if (!page)
                         continue;
@@@ -274,15 -275,15 +275,15 @@@ fail
    * does the checks required to make sure the data is small enough
    * to fit as an inline extent.
    */
- -static noinline int cow_file_range_inline(struct inode *inode, u64 start,
+ +static noinline int cow_file_range_inline(struct btrfs_inode *inode, u64 start,
                                           u64 end, size_t compressed_size,
                                           int compress_type,
                                           struct page **compressed_pages)
   {
- -      struct btrfs_root *root = BTRFS_I(inode)->root;
+ +      struct btrfs_root *root = inode->root;
         struct btrfs_fs_info *fs_info = root->fs_info;
         struct btrfs_trans_handle *trans;
- -      u64 isize = i_size_read(inode);
+ +      u64 isize = i_size_read(&inode->vfs_inode);
         u64 actual_end = min(end + 1, isize);
         u64 inline_len = actual_end - start;
         u64 aligned_end = ALIGN(end, fs_info->sectorsize);
@@@ -314,7 -315,7 +315,7 @@@
                 btrfs_free_path(path);
                 return PTR_ERR(trans);
         }
- -      trans->block_rsv = &BTRFS_I(inode)->block_rsv;
+ +      trans->block_rsv = &inode->block_rsv;
   
         if (compressed_size && compressed_pages)
                 extent_item_size = btrfs_file_extent_calc_inline_size(
@@@ -323,9 -324,9 +324,9 @@@
                 extent_item_size = btrfs_file_extent_calc_inline_size(
                     inline_len);
   
- -      ret = __btrfs_drop_extents(trans, root, inode, path,
- -                                 start, aligned_end, NULL,
- -                                 1, 1, extent_item_size, &extent_inserted);
+ +      ret = __btrfs_drop_extents(trans, root, inode, path, start, aligned_end,
+ +                                 NULL, 1, 1, extent_item_size,
+ +                                 &extent_inserted);
         if (ret) {
                 btrfs_abort_transaction(trans, ret);
                 goto out;
@@@ -334,7 -335,7 +335,7 @@@
         if (isize > actual_end)
                 inline_len = min_t(u64, isize, actual_end);
         ret = insert_inline_extent(trans, path, extent_inserted,
- -                                 root, inode, start,
+ +                                 root, &inode->vfs_inode, start,
                                    inline_len, compressed_size,
                                    compress_type, compressed_pages);
         if (ret && ret != -ENOSPC) {
@@@ -345,8 -346,8 +346,8 @@@
                 goto out;
         }
   
- -      set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
- -      btrfs_drop_extent_cache(BTRFS_I(inode), start, aligned_end - 1, 0);
+ +      set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags);
+ +      btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
   out:
         /*
          * Don't forget to free the reserved space, as for inlined extent
@@@ -412,10 -413,10 +413,10 @@@ static noinline int add_async_extent(st
   /*
    * Check if the inode has flags compatible with compression
    */
- -static inline bool inode_can_compress(struct inode *inode)
+ +static inline bool inode_can_compress(struct btrfs_inode *inode)
   {
- -      if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW ||
- -          BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
+ +      if (inode->flags & BTRFS_INODE_NODATACOW ||
+ +          inode->flags & BTRFS_INODE_NODATASUM)
                 return false;
         return true;
   }
@@@ -424,30 -425,29 +425,30 @@@
    * Check if the inode needs to be submitted to compression, based on mount
    * options, defragmentation, properties or heuristics.
    */
- -static inline int inode_need_compress(struct inode *inode, u64 start, u64 end)
+ +static inline int inode_need_compress(struct btrfs_inode *inode, u64 start,
+ +                                    u64 end)
   {
- -      struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ +      struct btrfs_fs_info *fs_info = inode->root->fs_info;
   
         if (!inode_can_compress(inode)) {
                 WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG),
                         KERN_ERR "BTRFS: unexpected compression for ino %llu\n",
- -                      btrfs_ino(BTRFS_I(inode)));
+ +                      btrfs_ino(inode));
                 return 0;
         }
         /* force compress */
         if (btrfs_test_opt(fs_info, FORCE_COMPRESS))
                 return 1;
         /* defrag ioctl */
- -      if (BTRFS_I(inode)->defrag_compress)
+ +      if (inode->defrag_compress)
                 return 1;
         /* bad compression ratios */
- -      if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS)
+ +      if (inode->flags & BTRFS_INODE_NOCOMPRESS)
                 return 0;
         if (btrfs_test_opt(fs_info, COMPRESS) ||
- -          BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS ||
- -          BTRFS_I(inode)->prop_compress)
- -              return btrfs_compress_heuristic(inode, start, end);
+ +          inode->flags & BTRFS_INODE_COMPRESS ||
+ +          inode->prop_compress)
+ +              return btrfs_compress_heuristic(&inode->vfs_inode, start, end);
         return 0;
   }
   
@@@ -553,7 -553,7 +554,7 @@@ again
          * inode has not been flagged as nocompress.  This flag can
          * change at any time if we discover bad compression ratios.
          */
- -      if (inode_need_compress(inode, start, end)) {
+ +      if (inode_need_compress(BTRFS_I(inode), start, end)) {
                 WARN_ON(pages);
                 pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS);
                 if (!pages) {
@@@ -617,12 -617,11 +618,12 @@@ cont
                         /* we didn't compress the entire range, try
                          * to make an uncompressed inline extent.
                          */
- -                      ret = cow_file_range_inline(inode, start, end, 0,
- -                                                  BTRFS_COMPRESS_NONE, NULL);
+ +                      ret = cow_file_range_inline(BTRFS_I(inode), start, end,
+ +                                                  0, BTRFS_COMPRESS_NONE,
+ +                                                  NULL);
                 } else {
                         /* try making a compressed inline extent */
- -                      ret = cow_file_range_inline(inode, start, end,
+ +                      ret = cow_file_range_inline(BTRFS_I(inode), start, end,
                                                     total_compressed,
                                                     compress_type, pages);
                 }
@@@ -644,8 -643,7 +645,8 @@@
                          * our outstanding extent for clearing delalloc for this
                          * range.
                          */
- -                      extent_clear_unlock_delalloc(inode, start, end, NULL,
+ +                      extent_clear_unlock_delalloc(BTRFS_I(inode), start, end,
+ +                                                   NULL,
                                                      clear_flags,
                                                      PAGE_UNLOCK |
                                                      PAGE_CLEAR_DIRTY |
@@@ -765,14 -763,14 +766,14 @@@ static void free_async_extent_pages(str
    */
   static noinline void submit_compressed_extents(struct async_chunk *async_chunk)
   {
- -      struct inode *inode = async_chunk->inode;
- -      struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ +      struct btrfs_inode *inode = BTRFS_I(async_chunk->inode);
+ +      struct btrfs_fs_info *fs_info = inode->root->fs_info;
         struct async_extent *async_extent;
         u64 alloc_hint = 0;
         struct btrfs_key ins;
         struct extent_map *em;
- -      struct btrfs_root *root = BTRFS_I(inode)->root;
- -      struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
+ +      struct btrfs_root *root = inode->root;
+ +      struct extent_io_tree *io_tree = &inode->io_tree;
         int ret = 0;
   
   again:
@@@ -805,7 -803,7 +806,7 @@@ retry
                          * all those pages down to the drive.
                          */
                         if (!page_started && !ret)
- -                              extent_write_locked_range(inode,
+ +                              extent_write_locked_range(&inode->vfs_inode,
                                                   async_extent->start,
                                                   async_extent->start +
                                                   async_extent->ram_size - 1,
@@@ -835,7 -833,7 +836,7 @@@
                                  * will not submit these pages down to lower
                                  * layers.
                                  */
- -                              extent_range_redirty_for_io(inode,
+ +                              extent_range_redirty_for_io(&inode->vfs_inode,
                                                 async_extent->start,
                                                 async_extent->start +
                                                 async_extent->ram_size - 1);
@@@ -870,7 -868,8 +871,7 @@@
                                                 BTRFS_ORDERED_COMPRESSED,
                                                 async_extent->compress_type);
                 if (ret) {
- -                      btrfs_drop_extent_cache(BTRFS_I(inode),
- -                                              async_extent->start,
+ +                      btrfs_drop_extent_cache(inode, async_extent->start,
                                                 async_extent->start +
                                                 async_extent->ram_size - 1, 0);
                         goto out_free_reserve;
@@@ -886,7 -885,8 +887,7 @@@
                                 NULL, EXTENT_LOCKED | EXTENT_DELALLOC,
                                 PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
                                 PAGE_SET_WRITEBACK);
- -              if (btrfs_submit_compressed_write(inode,
- -                                  async_extent->start,
+ +              if (btrfs_submit_compressed_write(inode, async_extent->start,
                                     async_extent->ram_size,
                                     ins.objectid,
                                     ins.offset, async_extent->pages,
@@@ -897,11 -897,12 +898,11 @@@
                         const u64 start = async_extent->start;
                         const u64 end = start + async_extent->ram_size - 1;
   
- -                      p->mapping = inode->i_mapping;
+ +                      p->mapping = inode->vfs_inode.i_mapping;
                         btrfs_writepage_endio_finish_ordered(p, start, end, 0);
   
                         p->mapping = NULL;
- -                      extent_clear_unlock_delalloc(inode, start, end,
- -                                                   NULL, 0,
+ +                      extent_clear_unlock_delalloc(inode, start, end, NULL, 0,
                                                      PAGE_END_WRITEBACK |
                                                      PAGE_SET_ERROR);
                         free_async_extent_pages(async_extent);
@@@ -929,10 -930,10 +930,10 @@@ out_free
         goto again;
   }
   
- -static u64 get_extent_allocation_hint(struct inode *inode, u64 start,
+ +static u64 get_extent_allocation_hint(struct btrfs_inode *inode, u64 start,
                                       u64 num_bytes)
   {
- -      struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+ +      struct extent_map_tree *em_tree = &inode->extent_tree;
         struct extent_map *em;
         u64 alloc_hint = 0;
   
@@@ -974,18 -975,17 +975,18 @@@
    * required to start IO on it.  It may be clean and already done with
    * IO when we return.
    */
- -static noinline int cow_file_range(struct inode *inode,
+ +static noinline int cow_file_range(struct btrfs_inode *inode,
                                    struct page *locked_page,
                                    u64 start, u64 end, int *page_started,
                                    unsigned long *nr_written, int unlock)
   {
- -      struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- -      struct btrfs_root *root = BTRFS_I(inode)->root;
+ +      struct btrfs_root *root = inode->root;
+ +      struct btrfs_fs_info *fs_info = root->fs_info;
         u64 alloc_hint = 0;
         u64 num_bytes;
         unsigned long ram_size;
         u64 cur_alloc_size = 0;
+ +      u64 min_alloc_size;
         u64 blocksize = fs_info->sectorsize;
         struct btrfs_key ins;
         struct extent_map *em;
@@@ -994,7 -994,7 +995,7 @@@
         bool extent_reserved = false;
         int ret = 0;
   
- -      if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
+ +      if (btrfs_is_free_space_inode(inode)) {
                 WARN_ON_ONCE(1);
                 ret = -EINVAL;
                 goto out_unlock;
@@@ -1004,7 -1004,7 +1005,7 @@@
         num_bytes = max(blocksize,  num_bytes);
         ASSERT(num_bytes <= btrfs_super_total_bytes(fs_info->super_copy));
   
- -      inode_should_defrag(BTRFS_I(inode), start, end, num_bytes, SZ_64K);
+ +      inode_should_defrag(inode, start, end, num_bytes, SZ_64K);
   
         if (start == 0) {
                 /* lets try to make an inline extent */
@@@ -1033,28 -1033,13 +1034,28 @@@
         }
   
         alloc_hint = get_extent_allocation_hint(inode, start, num_bytes);
- -      btrfs_drop_extent_cache(BTRFS_I(inode), start,
- -                      start + num_bytes - 1, 0);
+ +      btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0);
+ +
+ +      /*
+ +       * Relocation relies on the relocated extents to have exactly the same
+ +       * size as the original extents. Normally writeback for relocation data
+ +       * extents follows a NOCOW path because relocation preallocates the
+ +       * extents. However, due to an operation such as scrub turning a block
+ +       * group to RO mode, it may fallback to COW mode, so we must make sure
+ +       * an extent allocated during COW has exactly the requested size and can
+ +       * not be split into smaller extents, otherwise relocation breaks and
+ +       * fails during the stage where it updates the bytenr of file extent
+ +       * items.
+ +       */
+ +      if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
+ +              min_alloc_size = num_bytes;
+ +      else
+ +              min_alloc_size = fs_info->sectorsize;
   
         while (num_bytes > 0) {
                 cur_alloc_size = num_bytes;
                 ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size,
- -                                         fs_info->sectorsize, 0, alloc_hint,
+ +                                         min_alloc_size, 0, alloc_hint,
                                            &ins, 1, 1);
                 if (ret < 0)
                         goto out_unlock;
@@@ -1097,7 -1082,7 +1098,7 @@@
                          * skip current ordered extent.
                          */
                         if (ret)
- -                              btrfs_drop_extent_cache(BTRFS_I(inode), start,
+ +                              btrfs_drop_extent_cache(inode, start,
                                                 start + ram_size - 1, 0);
                 }
   
@@@ -1113,7 -1098,8 +1114,7 @@@
                 page_ops = unlock ? PAGE_UNLOCK : 0;
                 page_ops |= PAGE_SET_PRIVATE2;
   
- -              extent_clear_unlock_delalloc(inode, start,
- -                                           start + ram_size - 1,
+ +              extent_clear_unlock_delalloc(inode, start, start + ram_size - 1,
                                              locked_page,
                                              EXTENT_LOCKED | EXTENT_DELALLOC,
                                              page_ops);
@@@ -1137,7 -1123,7 +1138,7 @@@ out
         return ret;
   
   out_drop_extent_cache:
- -      btrfs_drop_extent_cache(BTRFS_I(inode), start, start + ram_size - 1, 0);
+ +      btrfs_drop_extent_cache(inode, start, start + ram_size - 1, 0);
   out_reserve:
         btrfs_dec_block_group_reservations(fs_info, ins.objectid);
         btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1);
@@@ -1234,13 -1220,13 +1235,13 @@@ static noinline void async_cow_free(str
                 kvfree(async_chunk->pending);
   }
   
- -static int cow_file_range_async(struct inode *inode,
+ +static int cow_file_range_async(struct btrfs_inode *inode,
                                 struct writeback_control *wbc,
                                 struct page *locked_page,
                                 u64 start, u64 end, int *page_started,
                                 unsigned long *nr_written)
   {
- -      struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ +      struct btrfs_fs_info *fs_info = inode->root->fs_info;
         struct cgroup_subsys_state *blkcg_css = wbc_blkcg_css(wbc);
         struct async_cow *ctx;
         struct async_chunk *async_chunk;
@@@ -1252,9 -1238,9 +1253,9 @@@
         unsigned nofs_flag;
         const unsigned int write_flags = wbc_to_write_flags(wbc);
   
- -      unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
+ +      unlock_extent(&inode->io_tree, start, end);
   
- -      if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS &&
+ +      if (inode->flags & BTRFS_INODE_NOCOMPRESS &&
             !btrfs_test_opt(fs_info, FORCE_COMPRESS)) {
                 num_chunks = 1;
                 should_compress = false;
@@@ -1292,9 -1278,9 +1293,9 @@@
                  * igrab is called higher up in the call chain, take only the
                  * lightweight reference for the callback lifetime
                  */
- -              ihold(inode);
+ +              ihold(&inode->vfs_inode);
                 async_chunk[i].pending = &ctx->num_chunks;
- -              async_chunk[i].inode = inode;
+ +              async_chunk[i].inode = &inode->vfs_inode;
                 async_chunk[i].start = start;
                 async_chunk[i].end = cur_end;
                 async_chunk[i].write_flags = write_flags;
@@@ -1371,15 -1357,13 +1372,15 @@@ static noinline int csum_exist_in_range
         return 1;
   }
   
- -static int fallback_to_cow(struct inode *inode, struct page *locked_page,
+ +static int fallback_to_cow(struct btrfs_inode *inode, struct page *locked_page,
                            const u64 start, const u64 end,
                            int *page_started, unsigned long *nr_written)
   {
- -      const bool is_space_ino = btrfs_is_free_space_inode(BTRFS_I(inode));
+ +      const bool is_space_ino = btrfs_is_free_space_inode(inode);
+ +      const bool is_reloc_ino = (inode->root->root_key.objectid ==
+ +                                 BTRFS_DATA_RELOC_TREE_OBJECTID);
         const u64 range_bytes = end + 1 - start;
- -      struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
+ +      struct extent_io_tree *io_tree = &inode->io_tree;
         u64 range_start = start;
         u64 count;
   
@@@ -1408,23 -1392,18 +1409,23 @@@
          *    data space info, which we incremented in the step above.
          *
          * If we need to fallback to cow and the inode corresponds to a free
- -       * space cache inode, we must also increment bytes_may_use of the data
- -       * space_info for the same reason. Space caches always get a prealloc
+ +       * space cache inode or an inode of the data relocation tree, we must
+ +       * also increment bytes_may_use of the data space_info for the same
+ +       * reason. Space caches and relocated data extents always get a prealloc
          * extent for them, however scrub or balance may have set the block
- -       * group that contains that extent to RO mode.
+ +       * group that contains that extent to RO mode and therefore force COW
+ +       * when starting writeback.
          */
         count = count_range_bits(io_tree, &range_start, end, range_bytes,
                                  EXTENT_NORESERVE, 0);
- -      if (count > 0 || is_space_ino) {
- -              const u64 bytes = is_space_ino ? range_bytes : count;
- -              struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
+ +      if (count > 0 || is_space_ino || is_reloc_ino) {
+ +              u64 bytes = count;
+ +              struct btrfs_fs_info *fs_info = inode->root->fs_info;
                 struct btrfs_space_info *sinfo = fs_info->data_sinfo;
   
+ +              if (is_space_ino || is_reloc_ino)
+ +                      bytes = range_bytes;
+ +
                 spin_lock(&sinfo->lock);
                 btrfs_space_info_update_bytes_may_use(fs_info, sinfo, bytes);
                 spin_unlock(&sinfo->lock);
@@@ -1445,21 -1424,21 +1446,21 @@@
    * If no cow copies or snapshots exist, we write directly to the existing
    * blocks on disk
    */
- -static noinline int run_delalloc_nocow(struct inode *inode,
+ +static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
                                        struct page *locked_page,
                                        const u64 start, const u64 end,
                                        int *page_started, int force,
                                        unsigned long *nr_written)
   {
- -      struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- -      struct btrfs_root *root = BTRFS_I(inode)->root;
+ +      struct btrfs_fs_info *fs_info = inode->root->fs_info;
+ +      struct btrfs_root *root = inode->root;
         struct btrfs_path *path;
         u64 cow_start = (u64)-1;
         u64 cur_offset = start;
         int ret;
         bool check_prev = true;
- -      const bool freespace_inode = btrfs_is_free_space_inode(BTRFS_I(inode));
- -      u64 ino = btrfs_ino(BTRFS_I(inode));
+ +      const bool freespace_inode = btrfs_is_free_space_inode(inode);
+ +      u64 ino = btrfs_ino(inode);
         bool nocow = false;
         u64 disk_bytenr = 0;
   
@@@ -1685,11 -1664,15 +1686,11 @@@ out_check
                  * NOCOW, following one which needs to be COW'ed
                  */
                 if (cow_start != (u64)-1) {
- -                      ret = fallback_to_cow(inode, locked_page, cow_start,
- -                                            found_key.offset - 1,
+ +                      ret = fallback_to_cow(inode, locked_page,
+ +                                            cow_start, found_key.offset - 1,
                                               page_started, nr_written);
- -                      if (ret) {
- -                              if (nocow)
- -                                      btrfs_dec_nocow_writers(fs_info,
- -                                                              disk_bytenr);
+ +                      if (ret)
                                 goto error;
- -                      }
                         cow_start = (u64)-1;
                 }
   
@@@ -1705,6 -1688,9 +1706,6 @@@
                                           ram_bytes, BTRFS_COMPRESS_NONE,
                                           BTRFS_ORDERED_PREALLOC);
                         if (IS_ERR(em)) {
- -                              if (nocow)
- -                                      btrfs_dec_nocow_writers(fs_info,
- -                                                              disk_bytenr);
                                 ret = PTR_ERR(em);
                                 goto error;
                         }
@@@ -1714,7 -1700,8 +1715,7 @@@
                                                        num_bytes,
                                                        BTRFS_ORDERED_PREALLOC);
                         if (ret) {
- -                              btrfs_drop_extent_cache(BTRFS_I(inode),
- -                                                      cur_offset,
+ +                              btrfs_drop_extent_cache(inode, cur_offset,
                                                         cur_offset + num_bytes - 1,
                                                         0);
                                 goto error;
@@@ -1790,11 -1777,11 +1791,11 @@@ error
         return ret;
   }
   
- -static inline int need_force_cow(struct inode *inode, u64 start, u64 end)
+ +static inline int need_force_cow(struct btrfs_inode *inode, u64 start, u64 end)
   {
   
- -      if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) &&
- -          !(BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC))
+ +      if (!(inode->flags & BTRFS_INODE_NODATACOW) &&
+ +          !(inode->flags & BTRFS_INODE_PREALLOC))
                 return 0;
   
         /*
@@@ -1802,8 -1789,9 +1803,8 @@@
          * if is not zero, it means the file is defragging.
          * Force cow if given extent needs to be defragged.
          */
- -      if (BTRFS_I(inode)->defrag_bytes &&
- -          test_range_bit(&BTRFS_I(inode)->io_tree, start, end,
- -                         EXTENT_DEFRAG, 0, NULL))
+ +      if (inode->defrag_bytes &&
+ +          test_range_bit(&inode->io_tree, start, end, EXTENT_DEFRAG, 0, NULL))
                 return 1;
   
         return 0;
@@@ -1813,25 -1801,26 +1814,25 @@@
    * Function to process delayed allocation (create CoW) for ranges which are
    * being touched for the first time.
    */
- -int btrfs_run_delalloc_range(struct inode *inode, struct page *locked_page,
+ +int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page,
                 u64 start, u64 end, int *page_started, unsigned long *nr_written,
                 struct writeback_control *wbc)
   {
         int ret;
         int force_cow = need_force_cow(inode, start, end);
   
- -      if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW && !force_cow) {
+ +      if (inode->flags & BTRFS_INODE_NODATACOW && !force_cow) {
                 ret = run_delalloc_nocow(inode, locked_page, start, end,
                                          page_started, 1, nr_written);
- -      } else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC && !force_cow) {
+ +      } else if (inode->flags & BTRFS_INODE_PREALLOC && !force_cow) {
                 ret = run_delalloc_nocow(inode, locked_page, start, end,
                                          page_started, 0, nr_written);
         } else if (!inode_can_compress(inode) ||
                    !inode_need_compress(inode, start, end)) {
                 ret = cow_file_range(inode, locked_page, start, end,
- -                                    page_started, nr_written, 1);
+ +                                   page_started, nr_written, 1);
         } else {
- -              set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
- -                      &BTRFS_I(inode)->runtime_flags);
+ +              set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, &inode->runtime_flags);
                 ret = cow_file_range_async(inode, wbc, locked_page, start, end,
                                            page_started, nr_written);
         }
@@@ -2080,7 -2069,9 +2081,7 @@@ void btrfs_clear_delalloc_extent(struc
                 if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID &&
                     do_list && !(state->state & EXTENT_NORESERVE) &&
                     (*bits & EXTENT_CLEAR_DATA_RESV))
- -                      btrfs_free_reserved_data_space_noquota(
- -                                      &inode->vfs_inode,
- -                                      state->start, len);
+ +                      btrfs_free_reserved_data_space_noquota(fs_info, len);
   
                 percpu_counter_add_batch(&fs_info->delalloc_bytes, -len,
                                          fs_info->delalloc_batch);
@@@ -2156,7 -2147,7 +2157,7 @@@ static blk_status_t btrfs_submit_bio_st
         struct inode *inode = private_data;
         blk_status_t ret = 0;
   
- -      ret = btrfs_csum_one_bio(inode, bio, 0, 0);
+ +      ret = btrfs_csum_one_bio(BTRFS_I(inode), bio, 0, 0);
         BUG_ON(ret); /* -ENOMEM */
         return 0;
   }
@@@ -2221,7 -2212,7 +2222,7 @@@ static blk_status_t btrfs_submit_bio_ho
                                           0, inode, btrfs_submit_bio_start);
                 goto out;
         } else if (!skip_sum) {
- -              ret = btrfs_csum_one_bio(inode, bio, 0, 0);
+ +              ret = btrfs_csum_one_bio(BTRFS_I(inode), bio, 0, 0);
                 if (ret)
                         goto out;
         }
@@@ -2258,13 -2249,13 +2259,13 @@@ static noinline int add_pending_csums(s
         return 0;
   }
   
- -int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
+ +int btrfs_set_extent_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
                               unsigned int extra_bits,
                               struct extent_state **cached_state)
   {
         WARN_ON(PAGE_ALIGNED(end));
- -      return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end,
- -                                 extra_bits, cached_state);
+ +      return set_extent_delalloc(&inode->io_tree, start, end, extra_bits,
+ +                                 cached_state);
   }
   
   /* see btrfs_writepage_start_hook for details on why this is required */
@@@ -2281,7 -2272,7 +2282,7 @@@ static void btrfs_writepage_fixup_worke
         struct extent_state *cached_state = NULL;
         struct extent_changeset *data_reserved = NULL;
         struct page *page;
- -      struct inode *inode;
+ +      struct btrfs_inode *inode;
         u64 page_start;
         u64 page_end;
         int ret = 0;
@@@ -2289,7 -2280,7 +2290,7 @@@
   
         fixup = container_of(work, struct btrfs_writepage_fixup, work);
         page = fixup->page;
- -      inode = fixup->inode;
+ +      inode = BTRFS_I(fixup->inode);
         page_start = page_offset(page);
         page_end = page_offset(page) + PAGE_SIZE - 1;
   
@@@ -2326,7 -2317,8 +2327,7 @@@ again
                  *    when the page was already properly dealt with.
                  */
                 if (!ret) {
- -                      btrfs_delalloc_release_extents(BTRFS_I(inode),
- -                                                     PAGE_SIZE);
+ +                      btrfs_delalloc_release_extents(inode, PAGE_SIZE);
                         btrfs_delalloc_release_space(inode, data_reserved,
                                                      page_start, PAGE_SIZE,
                                                      true);
@@@ -2342,18 -2334,20 +2343,18 @@@
         if (ret)
                 goto out_page;
   
- -      lock_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end,
- -                       &cached_state);
+ +      lock_extent_bits(&inode->io_tree, page_start, page_end, &cached_state);
   
         /* already ordered? We're done */
         if (PagePrivate2(page))
                 goto out_reserved;
   
- -      ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), page_start,
- -                                      PAGE_SIZE);
+ +      ordered = btrfs_lookup_ordered_range(inode, page_start, PAGE_SIZE);
         if (ordered) {
- -              unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start,
- -                                   page_end, &cached_state);
+ +              unlock_extent_cached(&inode->io_tree, page_start, page_end,
+ +                                   &cached_state);
                 unlock_page(page);
- -              btrfs_start_ordered_extent(inode, ordered, 1);
+ +              btrfs_start_ordered_extent(&inode->vfs_inode, ordered, 1);
                 btrfs_put_ordered_extent(ordered);
                 goto again;
         }
@@@ -2373,11 -2367,11 +2374,11 @@@
         BUG_ON(!PageDirty(page));
         free_delalloc_space = false;
   out_reserved:
- -      btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
+ +      btrfs_delalloc_release_extents(inode, PAGE_SIZE);
         if (free_delalloc_space)
                 btrfs_delalloc_release_space(inode, data_reserved, page_start,
                                              PAGE_SIZE, true);
- -      unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end,
+ +      unlock_extent_cached(&inode->io_tree, page_start, page_end,
                              &cached_state);
   out_page:
         if (ret) {
@@@ -2400,7 -2394,7 +2401,7 @@@
          * that could need flushing space. Recursing back to fixup worker would
          * deadlock.
          */
- -      btrfs_add_delayed_iput(inode);
+ +      btrfs_add_delayed_iput(&inode->vfs_inode);
   }
   
   /*
@@@ -2456,18 -2450,18 +2457,18 @@@ int btrfs_writepage_cow_fixup(struct pa
   }
   
   static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
- -                                     struct inode *inode, u64 file_pos,
- -                                     u64 disk_bytenr, u64 disk_num_bytes,
- -                                     u64 num_bytes, u64 ram_bytes,
- -                                     u8 compression, u8 encryption,
- -                                     u16 other_encoding, int extent_type)
+ +                                     struct btrfs_inode *inode, u64 file_pos,
+ +                                     struct btrfs_file_extent_item *stack_fi,
+ +                                     u64 qgroup_reserved)
   {
- -      struct btrfs_root *root = BTRFS_I(inode)->root;
- -      struct btrfs_file_extent_item *fi;
+ +      struct btrfs_root *root = inode->root;
         struct btrfs_path *path;
         struct extent_buffer *leaf;
         struct btrfs_key ins;
- -      u64 qg_released;
+ +      u64 disk_num_bytes = btrfs_stack_file_extent_disk_num_bytes(stack_fi);
+ +      u64 disk_bytenr = btrfs_stack_file_extent_disk_bytenr(stack_fi);
+ +      u64 num_bytes = btrfs_stack_file_extent_num_bytes(stack_fi);
+ +      u64 ram_bytes = btrfs_stack_file_extent_ram_bytes(stack_fi);
         int extent_inserted = 0;
         int ret;
   
@@@ -2486,42 -2480,60 +2487,42 @@@
          */
         ret = __btrfs_drop_extents(trans, root, inode, path, file_pos,
                                    file_pos + num_bytes, NULL, 0,
- -                                 1, sizeof(*fi), &extent_inserted);
+ +                                 1, sizeof(*stack_fi), &extent_inserted);
         if (ret)
                 goto out;
   
         if (!extent_inserted) {
- -              ins.objectid = btrfs_ino(BTRFS_I(inode));
+ +              ins.objectid = btrfs_ino(inode);
                 ins.offset = file_pos;
                 ins.type = BTRFS_EXTENT_DATA_KEY;
   
                 path->leave_spinning = 1;
                 ret = btrfs_insert_empty_item(trans, root, path, &ins,
- -                                            sizeof(*fi));
+ +                                            sizeof(*stack_fi));
                 if (ret)
                         goto out;
         }
         leaf = path->nodes[0];
- -      fi = btrfs_item_ptr(leaf, path->slots[0],
- -                          struct btrfs_file_extent_item);
- -      btrfs_set_file_extent_generation(leaf, fi, trans->transid);
- -      btrfs_set_file_extent_type(leaf, fi, extent_type);
- -      btrfs_set_file_extent_disk_bytenr(leaf, fi, disk_bytenr);
- -      btrfs_set_file_extent_disk_num_bytes(leaf, fi, disk_num_bytes);
- -      btrfs_set_file_extent_offset(leaf, fi, 0);
- -      btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes);
- -      btrfs_set_file_extent_ram_bytes(leaf, fi, ram_bytes);
- -      btrfs_set_file_extent_compression(leaf, fi, compression);
- -      btrfs_set_file_extent_encryption(leaf, fi, encryption);
- -      btrfs_set_file_extent_other_encoding(leaf, fi, other_encoding);
+ +      btrfs_set_stack_file_extent_generation(stack_fi, trans->transid);
+ +      write_extent_buffer(leaf, stack_fi,
+ +                      btrfs_item_ptr_offset(leaf, path->slots[0]),
+ +                      sizeof(struct btrfs_file_extent_item));
   
         btrfs_mark_buffer_dirty(leaf);
         btrfs_release_path(path);
   
- -      inode_add_bytes(inode, num_bytes);
+ +      inode_add_bytes(&inode->vfs_inode, num_bytes);
   
         ins.objectid = disk_bytenr;
         ins.offset = disk_num_bytes;
         ins.type = BTRFS_EXTENT_ITEM_KEY;
   
- -      ret = btrfs_inode_set_file_extent_range(BTRFS_I(inode), file_pos,
- -                                              ram_bytes);
+ +      ret = btrfs_inode_set_file_extent_range(inode, file_pos, ram_bytes);
         if (ret)
                 goto out;
   
- -      /*
- -       * Release the reserved range from inode dirty range map, as it is
- -       * already moved into delayed_ref_head
- -       */
- -      ret = btrfs_qgroup_release_data(inode, file_pos, ram_bytes);
- -      if (ret < 0)
- -              goto out;
- -      qg_released = ret;
- -      ret = btrfs_alloc_reserved_file_extent(trans, root,
- -                                             btrfs_ino(BTRFS_I(inode)),
- -                                             file_pos, qg_released, &ins);
+ +      ret = btrfs_alloc_reserved_file_extent(trans, root, btrfs_ino(inode),
+ +                                             file_pos, qgroup_reserved, &ins);
   out:
         btrfs_free_path(path);
   
@@@ -2543,33 -2555,7 +2544,33 @@@ static void btrfs_release_delalloc_byte
         btrfs_put_block_group(cache);
   }
   
- -/* as ordered data IO finishes, this gets called so we can finish
+ +static int insert_ordered_extent_file_extent(struct btrfs_trans_handle *trans,
+ +                                           struct inode *inode,
+ +                                           struct btrfs_ordered_extent *oe)
+ +{
+ +      struct btrfs_file_extent_item stack_fi;
+ +      u64 logical_len;
+ +
+ +      memset(&stack_fi, 0, sizeof(stack_fi));
+ +      btrfs_set_stack_file_extent_type(&stack_fi, BTRFS_FILE_EXTENT_REG);
+ +      btrfs_set_stack_file_extent_disk_bytenr(&stack_fi, oe->disk_bytenr);
+ +      btrfs_set_stack_file_extent_disk_num_bytes(&stack_fi,
+ +                                                 oe->disk_num_bytes);
+ +      if (test_bit(BTRFS_ORDERED_TRUNCATED, &oe->flags))
+ +              logical_len = oe->truncated_len;
+ +      else
+ +              logical_len = oe->num_bytes;
+ +      btrfs_set_stack_file_extent_num_bytes(&stack_fi, logical_len);
+ +      btrfs_set_stack_file_extent_ram_bytes(&stack_fi, logical_len);
+ +      btrfs_set_stack_file_extent_compression(&stack_fi, oe->compress_type);
+ +      /* Encryption and other encoding is reserved and all 0 */
+ +
+ +      return insert_reserved_file_extent(trans, BTRFS_I(inode), oe->file_offset,
+ +                                         &stack_fi, oe->qgroup_rsv);
+ +}
+ +
+ +/*
+ + * As ordered data IO finishes, this gets called so we can finish
    * an ordered extent if the range of bytes in the file it covers are
    * fully written.
    */
@@@ -2620,6 -2606,13 +2621,6 @@@ static int btrfs_finish_ordered_io(stru
         if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
                 BUG_ON(!list_empty(&ordered_extent->list)); /* Logic error */
   
- -              /*
- -               * For mwrite(mmap + memset to write) case, we still reserve
- -               * space for NOCOW range.
- -               * As NOCOW won't cause a new delayed ref, just free the space
- -               */
- -              btrfs_qgroup_free_data(inode, NULL, start,
- -                                     ordered_extent->num_bytes);
                 btrfs_inode_safe_disk_i_size_write(inode, 0);
                 if (freespace_inode)
                         trans = btrfs_join_transaction_spacecache(root);
@@@ -2656,14 -2649,20 +2657,14 @@@
                 compress_type = ordered_extent->compress_type;
         if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
                 BUG_ON(compress_type);
- -              btrfs_qgroup_free_data(inode, NULL, start,
- -                                     ordered_extent->num_bytes);
                 ret = btrfs_mark_extent_written(trans, BTRFS_I(inode),
                                                 ordered_extent->file_offset,
                                                 ordered_extent->file_offset +
                                                 logical_len);
         } else {
                 BUG_ON(root == fs_info->tree_root);
- -              ret = insert_reserved_file_extent(trans, inode, start,
- -                                              ordered_extent->disk_bytenr,
- -                                              ordered_extent->disk_num_bytes,
- -                                              logical_len, logical_len,
- -                                              compress_type, 0, 0,
- -                                              BTRFS_FILE_EXTENT_REG);
+ +              ret = insert_ordered_extent_file_extent(trans, inode,
+ +                                                      ordered_extent);
                 if (!ret) {
                         clear_reserved_extent = false;
                         btrfs_release_delalloc_bytes(fs_info,
@@@ -2815,9 -2814,6 +2816,9 @@@ static int check_data_csum(struct inod
   zeroit:
         btrfs_print_data_csum_error(BTRFS_I(inode), start, csum, csum_expected,
                                     io_bio->mirror_num);
+ +      if (io_bio->device)
+ +              btrfs_dev_stat_inc_and_print(io_bio->device,
+ +                                           BTRFS_DEV_STAT_CORRUPTION_ERRS);
         memset(kaddr + pgoff, 1, len);
         flush_dcache_page(page);
         kunmap_atomic(kaddr);
@@@ -3336,14 -3332,6 +3337,14 @@@ cache_index
          */
         BTRFS_I(inode)->last_unlink_trans = BTRFS_I(inode)->last_trans;
   
+ +      /*
+ +       * Same logic as for last_unlink_trans. We don't persist the generation
+ +       * of the last transaction where this inode was used for a reflink
+ +       * operation, so after eviction and reloading the inode we must be
+ +       * pessimistic and assume the last transaction that modified the inode.
+ +       */
+ +      BTRFS_I(inode)->last_reflink_trans = BTRFS_I(inode)->last_trans;
+ +
         path->slots[0]++;
         if (inode->i_nlink != 1 ||
             path->slots[0] >= btrfs_header_nritems(leaf))
@@@ -3492,7 -3480,7 +3493,7 @@@ static noinline int btrfs_update_inode_
   
         fill_inode_item(trans, leaf, inode_item, inode);
         btrfs_mark_buffer_dirty(leaf);
- -      btrfs_set_inode_last_trans(trans, inode);
+ +      btrfs_set_inode_last_trans(trans, BTRFS_I(inode));
         ret = 0;
   failed:
         btrfs_free_path(path);
@@@ -3522,7 -3510,7 +3523,7 @@@ noinline int btrfs_update_inode(struct 
   
                 ret = btrfs_delayed_update_inode(trans, root, inode);
                 if (!ret)
- -                      btrfs_set_inode_last_trans(trans, inode);
+ +                      btrfs_set_inode_last_trans(trans, BTRFS_I(inode));
                 return ret;
         }
   
@@@ -4037,8 -4025,6 +4038,8 @@@ int btrfs_delete_subvolume(struct inod
                 }
         }
   
+ +      free_anon_bdev(dest->anon_dev);
+ +      dest->anon_dev = 0;
   out_end_trans:
         trans->block_rsv = NULL;
         trans->bytes_reserved = 0;
@@@ -4509,13 -4495,11 +4510,13 @@@ int btrfs_truncate_block(struct inode *
         struct extent_state *cached_state = NULL;
         struct extent_changeset *data_reserved = NULL;
         char *kaddr;
+ +      bool only_release_metadata = false;
         u32 blocksize = fs_info->sectorsize;
         pgoff_t index = from >> PAGE_SHIFT;
         unsigned offset = from & (blocksize - 1);
         struct page *page;
         gfp_t mask = btrfs_alloc_write_mask(mapping);
+ +      size_t write_bytes = blocksize;
         int ret = 0;
         u64 block_start;
         u64 block_end;
@@@ -4527,28 -4511,15 +4528,28 @@@
         block_start = round_down(from, blocksize);
         block_end = block_start + blocksize - 1;
   
- -      ret = btrfs_delalloc_reserve_space(inode, &data_reserved,
- -                                         block_start, blocksize);
- -      if (ret)
+ +      ret = btrfs_check_data_free_space(BTRFS_I(inode), &data_reserved,
+ +                                        block_start, blocksize);
+ +      if (ret < 0) {
+ +              if (btrfs_check_nocow_lock(BTRFS_I(inode), block_start,
+ +                                         &write_bytes) > 0) {
+ +                      /* For nocow case, no need to reserve data space */
+ +                      only_release_metadata = true;
+ +              } else {
+ +                      goto out;
+ +              }
+ +      }
+ +      ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode), blocksize);
+ +      if (ret < 0) {
+ +              if (!only_release_metadata)
+ +                      btrfs_free_reserved_data_space(BTRFS_I(inode),
+ +                                      data_reserved, block_start, blocksize);
                 goto out;
- -
+ +      }
   again:
         page = find_or_create_page(mapping, index, mask);
         if (!page) {
- -              btrfs_delalloc_release_space(inode, data_reserved,
+ +              btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved,
                                              block_start, blocksize, true);
                 btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize);
                 ret = -ENOMEM;
@@@ -4573,7 -4544,7 +4574,7 @@@
         lock_extent_bits(io_tree, block_start, block_end, &cached_state);
         set_page_extent_mapped(page);
   
- -      ordered = btrfs_lookup_ordered_extent(inode, block_start);
+ +      ordered = btrfs_lookup_ordered_extent(BTRFS_I(inode), block_start);
         if (ordered) {
                 unlock_extent_cached(io_tree, block_start, block_end,
                                      &cached_state);
@@@ -4588,7 -4559,7 +4589,7 @@@
                          EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
                          0, 0, &cached_state);
   
- -      ret = btrfs_set_extent_delalloc(inode, block_start, block_end, 0,
+ +      ret = btrfs_set_extent_delalloc(BTRFS_I(inode), block_start, block_end, 0,
                                         &cached_state);
         if (ret) {
                 unlock_extent_cached(io_tree, block_start, block_end,
@@@ -4613,26 -4584,14 +4614,26 @@@
         set_page_dirty(page);
         unlock_extent_cached(io_tree, block_start, block_end, &cached_state);
   
+ +      if (only_release_metadata)
+ +              set_extent_bit(&BTRFS_I(inode)->io_tree, block_start,
+ +                              block_end, EXTENT_NORESERVE, NULL, NULL,
+ +                              GFP_NOFS);
+ +
   out_unlock:
- -      if (ret)
- -              btrfs_delalloc_release_space(inode, data_reserved, block_start,
- -                                           blocksize, true);
+ +      if (ret) {
+ +              if (only_release_metadata)
+ +                      btrfs_delalloc_release_metadata(BTRFS_I(inode),
+ +                                      blocksize, true);
+ +              else
+ +                      btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved,
+ +                                      block_start, blocksize, true);
+ +      }
         btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize);
         unlock_page(page);
         put_page(page);
   out:
+ +      if (only_release_metadata)
+ +              btrfs_check_nocow_unlock(BTRFS_I(inode));
         extent_changeset_free(data_reserved);
         return ret;
   }
@@@ -4990,8 -4949,7 +4991,8 @@@ static void evict_inode_truncate_pages(
                  * Note, end is the bytenr of last byte, so we need + 1 here.
                  */
                 if (state_flags & EXTENT_DELALLOC)
- -                      btrfs_qgroup_free_data(inode, NULL, start, end - start + 1);
+ +                      btrfs_qgroup_free_data(BTRFS_I(inode), NULL, start,
+ +                                             end - start + 1);
   
                 clear_extent_bit(io_tree, start, end,
                                  EXTENT_LOCKED | EXTENT_DELALLOC |
@@@ -6066,7 -6024,7 +6067,7 @@@ static struct inode *btrfs_new_inode(st
         inode_tree_add(inode);
   
         trace_btrfs_inode_new(inode);
- -      btrfs_set_inode_last_trans(trans, inode);
+ +      btrfs_set_inode_last_trans(trans, BTRFS_I(inode));
   
         btrfs_update_root_times(trans, root);
   
@@@ -6875,7 -6833,7 +6876,7 @@@ out
         return em;
   }
   
- -static struct extent_map *btrfs_create_dio_extent(struct inode *inode,
+ +static struct extent_map *btrfs_create_dio_extent(struct btrfs_inode *inode,
                                                   const u64 start,
                                                   const u64 len,
                                                   const u64 orig_start,
@@@ -6889,19 -6847,21 +6890,19 @@@
         int ret;
   
         if (type != BTRFS_ORDERED_NOCOW) {
- -              em = create_io_em(inode, start, len, orig_start,
- -                                block_start, block_len, orig_block_len,
- -                                ram_bytes,
+ +              em = create_io_em(inode, start, len, orig_start, block_start,
+ +                                block_len, orig_block_len, ram_bytes,
                                   BTRFS_COMPRESS_NONE, /* compress_type */
                                   type);
                 if (IS_ERR(em))
                         goto out;
         }
- -      ret = btrfs_add_ordered_extent_dio(inode, start, block_start,
- -                                         len, block_len, type);
+ +      ret = btrfs_add_ordered_extent_dio(inode, start, block_start, len,
+ +                                         block_len, type);
         if (ret) {
                 if (em) {
                         free_extent_map(em);
- -                      btrfs_drop_extent_cache(BTRFS_I(inode), start,
- -                                              start + len - 1, 0);
+ +                      btrfs_drop_extent_cache(inode, start, start + len - 1, 0);
                 }
                 em = ERR_PTR(ret);
         }
@@@ -6910,11 -6870,11 +6911,11 @@@
         return em;
   }
   
- -static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
+ +static struct extent_map *btrfs_new_extent_direct(struct btrfs_inode *inode,
                                                   u64 start, u64 len)
   {
- -      struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- -      struct btrfs_root *root = BTRFS_I(inode)->root;
+ +      struct btrfs_root *root = inode->root;
+ +      struct btrfs_fs_info *fs_info = root->fs_info;
         struct extent_map *em;
         struct btrfs_key ins;
         u64 alloc_hint;
@@@ -6931,32 -6891,15 +6932,32 @@@
                                      ins.offset, BTRFS_ORDERED_REGULAR);
         btrfs_dec_block_group_reservations(fs_info, ins.objectid);
         if (IS_ERR(em))
- -              btrfs_free_reserved_extent(fs_info, ins.objectid,
- -                                         ins.offset, 1);
+ +              btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset,
+ +                                         1);
   
         return em;
   }
   
   /*
- - * returns 1 when the nocow is safe, < 1 on error, 0 if the
- - * block must be cow'd
+ + * Check if we can do nocow write into the range [@offset, @offset + @len)
+ + *
+ + * @offset:   File offset
+ + * @len:      The length to write, will be updated to the nocow writeable
+ + *            range
+ + * @orig_start:       (optional) Return the original file offset of the file extent
+ + * @orig_len: (optional) Return the original on-disk length of the file extent
+ + * @ram_bytes:        (optional) Return the ram_bytes of the file extent
+ + *
+ + * This function will flush ordered extents in the range to ensure proper
+ + * nocow checks for (nowait == false) case.
+ + *
+ + * Return:
+ + * >0 and update @len if we can do nocow write
+ + *  0 if we can't do nocow write
+ + * <0 if error happened
+ + *
+ + * NOTE: This only checks the file extents, caller is responsible to wait for
+ + *     any ordered extents.
    */
   noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
                               u64 *orig_start, u64 *orig_block_len,
@@@ -7183,8 -7126,8 +7184,8 @@@ static int lock_extent_direct(struct in
   }
   
   /* The callers of this must take lock_extent() */
- -static struct extent_map *create_io_em(struct inode *inode, u64 start, u64 len,
- -                                     u64 orig_start, u64 block_start,
+ +static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start,
+ +                                     u64 len, u64 orig_start, u64 block_start,
                                        u64 block_len, u64 orig_block_len,
                                        u64 ram_bytes, int compress_type,
                                        int type)
@@@ -7198,7 -7141,7 +7199,7 @@@
                type == BTRFS_ORDERED_NOCOW ||
                type == BTRFS_ORDERED_REGULAR);
   
- -      em_tree = &BTRFS_I(inode)->extent_tree;
+ +      em_tree = &inode->extent_tree;
         em = alloc_extent_map();
         if (!em)
                 return ERR_PTR(-ENOMEM);
@@@ -7220,8 -7163,8 +7221,8 @@@
         }
   
         do {
- -              btrfs_drop_extent_cache(BTRFS_I(inode), em->start,
- -                              em->start + em->len - 1, 0);
+ +              btrfs_drop_extent_cache(inode, em->start,
+ +                                      em->start + em->len - 1, 0);
                 write_lock(&em_tree->lock);
                 ret = add_extent_mapping(em_tree, em, 1);
                 write_unlock(&em_tree->lock);
@@@ -7300,7 -7243,7 +7301,7 @@@ static int btrfs_get_blocks_direct_writ
                     btrfs_inc_nocow_writers(fs_info, block_start)) {
                         struct extent_map *em2;
   
- -                      em2 = btrfs_create_dio_extent(inode, start, len,
+ +                      em2 = btrfs_create_dio_extent(BTRFS_I(inode), start, len,
                                                       orig_start, block_start,
                                                       len, orig_block_len,
                                                       ram_bytes, type);
@@@ -7319,7 -7262,8 +7320,7 @@@
                          * use the existing or preallocated extent, so does not
                          * need to adjust btrfs_space_info's bytes_may_use.
                          */
- -                      btrfs_free_reserved_data_space_noquota(inode, start,
- -                                                             len);
+ +                      btrfs_free_reserved_data_space_noquota(fs_info, len);
                         goto skip_cow;
                 }
         }
@@@ -7327,7 -7271,7 +7328,7 @@@
         /* this will cow the extent */
         len = bh_result->b_size;
         free_extent_map(em);
- -      *map = em = btrfs_new_extent_direct(inode, start, len);
+ +      *map = em = btrfs_new_extent_direct(BTRFS_I(inode), start, len);
         if (IS_ERR(em)) {
                 ret = PTR_ERR(em);
                 goto out;
@@@ -7478,8 -7422,7 +7479,8 @@@ static void btrfs_dio_private_put(struc
                 return;
   
         if (bio_op(dip->dio_bio) == REQ_OP_WRITE) {
- -              __endio_write_update_ordered(dip->inode, dip->logical_offset,
+ +              __endio_write_update_ordered(BTRFS_I(dip->inode),
+ +                                           dip->logical_offset,
                                              dip->bytes,
                                              !dip->dio_bio->bi_status);
         } else {
@@@ -7565,18 -7508,18 +7566,18 @@@ static blk_status_t btrfs_check_read_di
         return err;
   }
   
- -static void __endio_write_update_ordered(struct inode *inode,
+ +static void __endio_write_update_ordered(struct btrfs_inode *inode,
                                          const u64 offset, const u64 bytes,
                                          const bool uptodate)
   {
- -      struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ +      struct btrfs_fs_info *fs_info = inode->root->fs_info;
         struct btrfs_ordered_extent *ordered = NULL;
         struct btrfs_workqueue *wq;
         u64 ordered_offset = offset;
         u64 ordered_bytes = bytes;
         u64 last_offset;
   
- -      if (btrfs_is_free_space_inode(BTRFS_I(inode)))
+ +      if (btrfs_is_free_space_inode(inode))
                 wq = fs_info->endio_freespace_worker;
         else
                 wq = fs_info->endio_write_workers;
@@@ -7584,9 -7527,9 +7585,9 @@@
         while (ordered_offset < offset + bytes) {
                 last_offset = ordered_offset;
                 if (btrfs_dec_test_first_ordered_pending(inode, &ordered,
- -                                                         &ordered_offset,
- -                                                         ordered_bytes,
- -                                                         uptodate)) {
+ +                                                       &ordered_offset,
+ +                                                       ordered_bytes,
+ +                                                       uptodate)) {
                         btrfs_init_work(&ordered->work, finish_ordered_fn, NULL,
                                         NULL);
                         btrfs_queue_work(wq, &ordered->work);
@@@ -7613,7 -7556,7 +7614,7 @@@ static blk_status_t btrfs_submit_bio_st
   {
         struct inode *inode = private_data;
         blk_status_t ret;
- -      ret = btrfs_csum_one_bio(inode, bio, offset, 1);
+ +      ret = btrfs_csum_one_bio(BTRFS_I(inode), bio, offset, 1);
         BUG_ON(ret); /* -ENOMEM */
         return 0;
   }
@@@ -7674,7 -7617,7 +7675,7 @@@ static inline blk_status_t btrfs_submit
                  * If we aren't doing async submit, calculate the csum of the
                  * bio now.
                  */
- -              ret = btrfs_csum_one_bio(inode, bio, file_offset, 1);
+ +              ret = btrfs_csum_one_bio(BTRFS_I(inode), bio, file_offset, 1);
                 if (ret)
                         goto err;
         } else {
@@@ -7923,8 -7866,11 +7924,8 @@@ static ssize_t btrfs_direct_IO(struct k
                         dio_data.overwrite = 1;
                         inode_unlock(inode);
                         relock = true;
- -              } else if (iocb->ki_flags & IOCB_NOWAIT) {
- -                      ret = -EAGAIN;
- -                      goto out;
                 }
- -              ret = btrfs_delalloc_reserve_space(inode, &data_reserved,
+ +              ret = btrfs_delalloc_reserve_space(BTRFS_I(inode), &data_reserved,
                                                    offset, count);
                 if (ret)
                         goto out;
@@@ -7956,9 -7902,8 +7957,9 @@@
                 current->journal_info = NULL;
                 if (ret < 0 && ret != -EIOCBQUEUED) {
                         if (dio_data.reserve)
- -                              btrfs_delalloc_release_space(inode, data_reserved,
- -                                      offset, dio_data.reserve, true);
+ +                              btrfs_delalloc_release_space(BTRFS_I(inode),
+ +                                      data_reserved, offset, dio_data.reserve,
+ +                                      true);
                         /*
                          * On error we might have left some ordered extents
                          * without submitting corresponding bios for them, so
@@@ -7967,13 -7912,13 +7968,13 @@@
                          */
                         if (dio_data.unsubmitted_oe_range_start <
                             dio_data.unsubmitted_oe_range_end)
- -                              __endio_write_update_ordered(inode,
+ +                              __endio_write_update_ordered(BTRFS_I(inode),
                                         dio_data.unsubmitted_oe_range_start,
                                         dio_data.unsubmitted_oe_range_end -
                                         dio_data.unsubmitted_oe_range_start,
                                         false);
                 } else if (ret >= 0 && (size_t)ret < count)
- -                      btrfs_delalloc_release_space(inode, data_reserved,
+ +                      btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved,
                                         offset, count - (size_t)ret, true);
                 btrfs_delalloc_release_extents(BTRFS_I(inode), count);
         }
@@@ -7988,7 -7933,7 +7989,7 @@@ out
   }
   
   static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
- -              __u64 start, __u64 len)
+ +                      u64 start, u64 len)
   {
         int     ret;
   
@@@ -8165,17 -8110,20 +8166,17 @@@ again
         /*
          * Qgroup reserved space handler
          * Page here will be either
- -       * 1) Already written to disk
- -       *    In this case, its reserved space is released from data rsv map
- -       *    and will be freed by delayed_ref handler finally.
- -       *    So even we call qgroup_free_data(), it won't decrease reserved
- -       *    space.
- -       * 2) Not written to disk
- -       *    This means the reserved space should be freed here. However,
- -       *    if a truncate invalidates the page (by clearing PageDirty)
- -       *    and the page is accounted for while allocating extent
- -       *    in btrfs_check_data_free_space() we let delayed_ref to
- -       *    free the entire extent.
+ +       * 1) Already written to disk or ordered extent already submitted
+ +       *    Then its QGROUP_RESERVED bit in io_tree is already cleaned.
+ +       *    Qgroup will be handled by its qgroup_record then.
+ +       *    btrfs_qgroup_free_data() call will do nothing here.
+ +       *
+ +       * 2) Not written to disk yet
+ +       *    Then btrfs_qgroup_free_data() call will clear the QGROUP_RESERVED
+ +       *    bit of its io_tree, and free the qgroup reserved data space.
+ +       *    Since the IO will never happen for this page.
          */
- -      if (PageDirty(page))
- -              btrfs_qgroup_free_data(inode, NULL, page_start, PAGE_SIZE);
+ +      btrfs_qgroup_free_data(BTRFS_I(inode), NULL, page_start, PAGE_SIZE);
         if (!inode_evicting) {
                 clear_extent_bit(tree, page_start, page_end, EXTENT_LOCKED |
                                  EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
@@@ -8239,8 -8187,8 +8240,8 @@@ vm_fault_t btrfs_page_mkwrite(struct vm
          * end up waiting indefinitely to get a lock on the page currently
          * being processed by btrfs_page_mkwrite() function.
          */
- -      ret2 = btrfs_delalloc_reserve_space(inode, &data_reserved, page_start,
- -                                         reserved_space);
+ +      ret2 = btrfs_delalloc_reserve_space(BTRFS_I(inode), &data_reserved,
+ +                                          page_start, reserved_space);
         if (!ret2) {
                 ret2 = file_update_time(vmf->vma->vm_file);
                 reserved = 1;
@@@ -8287,9 -8235,9 +8288,9 @@@ again
                                           fs_info->sectorsize);
                 if (reserved_space < PAGE_SIZE) {
                         end = page_start + reserved_space - 1;
- -                      btrfs_delalloc_release_space(inode, data_reserved,
- -                                      page_start, PAGE_SIZE - reserved_space,
- -                                      true);
+ +                      btrfs_delalloc_release_space(BTRFS_I(inode),
+ +                                      data_reserved, page_start,
+ +                                      PAGE_SIZE - reserved_space, true);
                 }
         }
   
@@@ -8304,7 -8252,7 +8305,7 @@@
                           EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
                           EXTENT_DEFRAG, 0, 0, &cached_state);
   
- -      ret2 = btrfs_set_extent_delalloc(inode, page_start, end, 0,
+ +      ret2 = btrfs_set_extent_delalloc(BTRFS_I(inode), page_start, end, 0,
                                         &cached_state);
         if (ret2) {
                 unlock_extent_cached(io_tree, page_start, page_end,
@@@ -8344,7 -8292,7 +8345,7 @@@ out_unlock
         unlock_page(page);
   out:
         btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
- -      btrfs_delalloc_release_space(inode, data_reserved, page_start,
+ +      btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved, page_start,
                                      reserved_space, (ret != 0));
   out_noreserve:
         sb_end_pagefault(inode->i_sb);
@@@ -8558,7 -8506,6 +8559,7 @@@ struct inode *btrfs_alloc_inode(struct 
         ei->index_cnt = (u64)-1;
         ei->dir_index = 0;
         ei->last_unlink_trans = 0;
+ +      ei->last_reflink_trans = 0;
         ei->last_log_commit = 0;
   
         spin_lock_init(&ei->lock);
@@@ -8645,7 -8592,7 +8646,7 @@@ void btrfs_destroy_inode(struct inode *
                         btrfs_put_ordered_extent(ordered);
                 }
         }
- -      btrfs_qgroup_check_reserved_leak(inode);
+ +      btrfs_qgroup_check_reserved_leak(BTRFS_I(inode));
         inode_tree_del(inode);
         btrfs_drop_extent_cache(BTRFS_I(inode), 0, (u64)-1, 0);
         btrfs_inode_clear_file_extent_range(BTRFS_I(inode), 0, (u64)-1);
@@@ -9627,31 -9574,6 +9628,31 @@@ out_unlock
         return err;
   }
   
+ +static int insert_prealloc_file_extent(struct btrfs_trans_handle *trans,
+ +                                     struct inode *inode, struct btrfs_key *ins,
+ +                                     u64 file_offset)
+ +{
+ +      struct btrfs_file_extent_item stack_fi;
+ +      u64 start = ins->objectid;
+ +      u64 len = ins->offset;
+ +      int ret;
+ +
+ +      memset(&stack_fi, 0, sizeof(stack_fi));
+ +
+ +      btrfs_set_stack_file_extent_type(&stack_fi, BTRFS_FILE_EXTENT_PREALLOC);
+ +      btrfs_set_stack_file_extent_disk_bytenr(&stack_fi, start);
+ +      btrfs_set_stack_file_extent_disk_num_bytes(&stack_fi, len);
+ +      btrfs_set_stack_file_extent_num_bytes(&stack_fi, len);
+ +      btrfs_set_stack_file_extent_ram_bytes(&stack_fi, len);
+ +      btrfs_set_stack_file_extent_compression(&stack_fi, BTRFS_COMPRESS_NONE);
+ +      /* Encryption and other encoding is reserved and all 0 */
+ +
+ +      ret = btrfs_qgroup_release_data(BTRFS_I(inode), file_offset, len);
+ +      if (ret < 0)
+ +              return ret;
+ +      return insert_reserved_file_extent(trans, BTRFS_I(inode), file_offset,
+ +                                         &stack_fi, ret);
+ +}
   static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
                                        u64 start, u64 num_bytes, u64 min_size,
                                        loff_t actual_len, u64 *alloc_hint,
@@@ -9710,7 -9632,11 +9711,7 @@@
                 btrfs_dec_block_group_reservations(fs_info, ins.objectid);
   
                 last_alloc = ins.offset;
- -              ret = insert_reserved_file_extent(trans, inode,
- -                                                cur_offset, ins.objectid,
- -                                                ins.offset, ins.offset,
- -                                                ins.offset, 0, 0, 0,
- -                                                BTRFS_FILE_EXTENT_PREALLOC);
+ +              ret = insert_prealloc_file_extent(trans, inode, &ins, cur_offset);
                 if (ret) {
                         btrfs_free_reserved_extent(fs_info, ins.objectid,
                                                    ins.offset, 0);
@@@ -9783,7 -9709,7 +9784,7 @@@ next
                         btrfs_end_transaction(trans);
         }
         if (clear_offset < end)
- -              btrfs_free_reserved_data_space(inode, NULL, clear_offset,
+ +              btrfs_free_reserved_data_space(BTRFS_I(inode), NULL, clear_offset,
                         end - clear_offset + 1);
         return ret;
   }
diff --combined include/linux/fs.h

index 2df72def1f594c51f19be836d177a5f7049d6212,9bf7a32f29328abaaea59d1234bc46e9f7c50b96..407881ebeab1cef6d346fe48092057a8c4a8b5a4
--- 1/include/linux/fs.h
--- 2/include/linux/fs.h
+++ b/include/linux/fs.h
@@@ -175,9 -175,6 +175,9 @@@ typedef int (dio_iodone_t)(struct kioc
   /* File does not contribute to nr_files count */
   #define FMODE_NOACCOUNT               ((__force fmode_t)0x20000000)
   
+ +/* File supports async buffered reads */
+ +#define FMODE_BUF_RASYNC      ((__force fmode_t)0x40000000)
+ +
   /*
    * Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector
    * that indicates that they should check the contents of the iovec are
@@@ -318,9 -315,6 +318,9 @@@ enum rw_hint 
   #define IOCB_SYNC             (1 << 5)
   #define IOCB_WRITE            (1 << 6)
   #define IOCB_NOWAIT           (1 << 7)
+ +/* iocb->ki_waitq is valid */
+ +#define IOCB_WAITQ            (1 << 8)
+ +#define IOCB_NOIO             (1 << 9)
   
   struct kiocb {
         struct file             *ki_filp;
@@@ -334,10 -328,7 +334,10 @@@
         int                     ki_flags;
         u16                     ki_hint;
         u16                     ki_ioprio; /* See linux/ioprio.h */
- -      unsigned int            ki_cookie; /* for ->iopoll */
+ +      union {
+ +              unsigned int            ki_cookie; /* for ->iopoll */
+ +              struct wait_page_queue  *ki_waitq; /* for async buffered IO */
+ +      };
   
         randomized_struct_fields_end
   };
@@@ -479,6 -470,45 +479,6 @@@ struct address_space 
          * must be enforced here for CRIS, to let the least significant bit
          * of struct page's "mapping" pointer be used for PAGE_MAPPING_ANON.
          */
- -struct request_queue;
- -
- -struct block_device {
- -      dev_t                   bd_dev;  /* not a kdev_t - it's a search key */
- -      int                     bd_openers;
- -      struct inode *          bd_inode;       /* will die */
- -      struct super_block *    bd_super;
- -      struct mutex            bd_mutex;       /* open/close mutex */
- -      void *                  bd_claiming;
- -      void *                  bd_holder;
- -      int                     bd_holders;
- -      bool                    bd_write_holder;
- -#ifdef CONFIG_SYSFS
- -      struct list_head        bd_holder_disks;
- -#endif
- -      struct block_device *   bd_contains;
- -      unsigned                bd_block_size;
- -      u8                      bd_partno;
- -      struct hd_struct *      bd_part;
- -      /* number of times partitions within this device have been opened. */
- -      unsigned                bd_part_count;
- -      int                     bd_invalidated;
- -      struct gendisk *        bd_disk;
- -      struct request_queue *  bd_queue;
- -      struct backing_dev_info *bd_bdi;
- -      struct list_head        bd_list;
- -      /*
- -       * Private data.  You must have bd_claim'ed the block_device
- -       * to use this.  NOTE:  bd_claim allows an owner to claim
- -       * the same device multiple times, the owner must take special
- -       * care to not mess up bd_private for that case.
- -       */
- -      unsigned long           bd_private;
- -
- -      /* The counter of freeze processes */
- -      int                     bd_fsfreeze_count;
- -      /* Mutex for freeze */
- -      struct mutex            bd_fsfreeze_mutex;
- -} __randomize_layout;
   
   /* XArray tags, for tagging dirty and writeback pages in the pagecache. */
   #define PAGECACHE_TAG_DIRTY   XA_MARK_0
@@@ -528,7 -558,7 +528,7 @@@ static inline int mapping_mapped(struc
   
   /*
    * Might pages of this file have been modified in userspace?
- - * Note that i_mmap_writable counts all VM_SHARED vmas: do_mmap_pgoff
+ + * Note that i_mmap_writable counts all VM_SHARED vmas: do_mmap
    * marks vma as VM_SHARED if it is shared, and the file was opened for
    * writing i.e. vma may be mprotected writable even if now readonly.
    *
@@@ -877,6 -907,8 +877,6 @@@ static inline unsigned imajor(const str
         return MAJOR(inode->i_rdev);
   }
   
- -extern struct block_device *I_BDEV(struct inode *inode);
- -
   struct fown_struct {
         rwlock_t lock;          /* protects pid, uid, euid fields */
         struct pid *pid;        /* pid or -pgrp where SIGIO should be sent */
@@@ -1348,7 -1380,6 +1348,7 @@@ extern int send_sigurg(struct fown_stru
   #define SB_NODIRATIME 2048    /* Do not update directory access times */
   #define SB_SILENT     32768
   #define SB_POSIXACL   (1<<16) /* VFS does not apply the umask */
+ +#define SB_INLINECRYPT        (1<<17) /* Use blk-crypto for encrypted files */
   #define SB_KERNMOUNT  (1<<22) /* this is a kern_mount call */
   #define SB_I_VERSION  (1<<23) /* Update inode I_version field */
   #define SB_LAZYTIME   (1<<25) /* Update the on-disk [acm]times lazily */
@@@ -1712,10 -1743,6 +1712,10 @@@ int vfs_mkobj(struct dentry *, umode_t
                 int (*f)(struct dentry *, umode_t, void *),
                 void *);
   
+ +int vfs_fchown(struct file *file, uid_t user, gid_t group);
+ +int vfs_fchmod(struct file *file, umode_t mode);
+ +int vfs_utimes(const struct path *path, struct timespec64 *times);
+ +
   extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
   
   #ifdef CONFIG_COMPAT
@@@ -1747,6 -1774,14 +1747,6 @@@ struct dir_context 
         loff_t pos;
   };
   
- -struct block_device_operations;
- -
- -/* These macros are for out of kernel modules to test that
- - * the kernel supports the unlocked_ioctl and compat_ioctl
- - * fields in struct file_operations. */
- -#define HAVE_COMPAT_IOCTL 1
- -#define HAVE_UNLOCKED_IOCTL 1
- -
   /*
    * These flags let !MMU mmap() govern direct device mapping vs immediate
    * copying more easily for MAP_PRIVATE, especially for ROM filesystems.
@@@ -1882,6 -1917,7 +1882,6 @@@ ssize_t rw_copy_check_uvector(int type
                               struct iovec *fast_pointer,
                               struct iovec **ret_pointer);
   
- -extern ssize_t __vfs_read(struct file *, char __user *, size_t, loff_t *);
   extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
   extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *);
   extern ssize_t vfs_readv(struct file *, const struct iovec __user *,
@@@ -1946,27 -1982,27 +1946,27 @@@ struct super_operations 
   /*
    * Inode flags - they have no relation to superblock flags now
    */
- #define S_SYNC                1       /* Writes are synced at once */
- #define S_NOATIME     2       /* Do not update access times */
- #define S_APPEND      4       /* Append-only file */
- #define S_IMMUTABLE   8       /* Immutable file */
- #define S_DEAD                16      /* removed, but still open directory */
- #define S_NOQUOTA     32      /* Inode is not counted to quota */
- #define S_DIRSYNC     64      /* Directory modifications are synchronous */
- #define S_NOCMTIME    128     /* Do not update file c/mtime */
- #define S_SWAPFILE    256     /* Do not truncate: swapon got its bmaps */
- #define S_PRIVATE     512     /* Inode is fs-internal */
- #define S_IMA         1024    /* Inode has an associated IMA struct */
- #define S_AUTOMOUNT   2048    /* Automount/referral quasi-directory */
- #define S_NOSEC               4096    /* no suid or xattr security attributes */
+ #define S_SYNC                (1 << 0)  /* Writes are synced at once */
+ #define S_NOATIME     (1 << 1)  /* Do not update access times */
+ #define S_APPEND      (1 << 2)  /* Append-only file */
+ #define S_IMMUTABLE   (1 << 3)  /* Immutable file */
+ #define S_DEAD                (1 << 4)  /* removed, but still open directory */
+ #define S_NOQUOTA     (1 << 5)  /* Inode is not counted to quota */
+ #define S_DIRSYNC     (1 << 6)  /* Directory modifications are synchronous */
+ #define S_NOCMTIME    (1 << 7)  /* Do not update file c/mtime */
+ #define S_SWAPFILE    (1 << 8)  /* Do not truncate: swapon got its bmaps */
+ #define S_PRIVATE     (1 << 9)  /* Inode is fs-internal */
+ #define S_IMA         (1 << 10) /* Inode has an associated IMA struct */
+ #define S_AUTOMOUNT   (1 << 11) /* Automount/referral quasi-directory */
+ #define S_NOSEC               (1 << 12) /* no suid or xattr security attributes */
   #ifdef CONFIG_FS_DAX
- #define S_DAX         8192    /* Direct Access, avoiding the page cache */
+ #define S_DAX         (1 << 13) /* Direct Access, avoiding the page cache */
   #else
- #define S_DAX         0       /* Make all the DAX code disappear */
+ #define S_DAX         0         /* Make all the DAX code disappear */
   #endif
- #define S_ENCRYPTED   16384   /* Encrypted file (using fs/crypto/) */
- #define S_CASEFOLD    32768   /* Casefolded file */
- #define S_VERITY      65536   /* Verity file (using fs/verity/) */
+ #define S_ENCRYPTED   (1 << 14) /* Encrypted file (using fs/crypto/) */
+ #define S_CASEFOLD    (1 << 15) /* Casefolded file */
+ #define S_VERITY      (1 << 16) /* Verity file (using fs/verity/) */
   
   /*
    * Note that nosuid etc flags are inode-specific: setting some file-system
@@@ -2228,9 -2264,18 +2228,9 @@@ struct file_system_type 
   
   #define MODULE_ALIAS_FS(NAME) MODULE_ALIAS("fs-" NAME)
   
- -#ifdef CONFIG_BLOCK
   extern struct dentry *mount_bdev(struct file_system_type *fs_type,
         int flags, const char *dev_name, void *data,
         int (*fill_super)(struct super_block *, void *, int));
- -#else
- -static inline struct dentry *mount_bdev(struct file_system_type *fs_type,
- -      int flags, const char *dev_name, void *data,
- -      int (*fill_super)(struct super_block *, void *, int))
- -{
- -      return ERR_PTR(-ENODEV);
- -}
- -#endif
   extern struct dentry *mount_single(struct file_system_type *fs_type,
         int flags, void *data,
         int (*fill_super)(struct super_block *, void *, int));
@@@ -2239,7 -2284,14 +2239,7 @@@ extern struct dentry *mount_nodev(struc
         int (*fill_super)(struct super_block *, void *, int));
   extern struct dentry *mount_subtree(struct vfsmount *mnt, const char *path);
   void generic_shutdown_super(struct super_block *sb);
- -#ifdef CONFIG_BLOCK
   void kill_block_super(struct super_block *sb);
- -#else
- -static inline void kill_block_super(struct super_block *sb)
- -{
- -      BUG();
- -}
- -#endif
   void kill_anon_super(struct super_block *sb);
   void kill_litter_super(struct super_block *sb);
   void deactivate_super(struct super_block *sb);
@@@ -2529,16 -2581,95 +2529,16 @@@ extern struct kmem_cache *names_cachep
   #define __getname()           kmem_cache_alloc(names_cachep, GFP_KERNEL)
   #define __putname(name)               kmem_cache_free(names_cachep, (void *)(name))
   
- -#ifdef CONFIG_BLOCK
- -extern int register_blkdev(unsigned int, const char *);
- -extern void unregister_blkdev(unsigned int, const char *);
- -extern struct block_device *bdget(dev_t);
- -extern struct block_device *bdgrab(struct block_device *bdev);
- -extern void bd_set_size(struct block_device *, loff_t size);
- -extern void bd_forget(struct inode *inode);
- -extern void bdput(struct block_device *);
- -extern void invalidate_bdev(struct block_device *);
- -extern void iterate_bdevs(void (*)(struct block_device *, void *), void *);
- -extern int sync_blockdev(struct block_device *bdev);
- -extern void kill_bdev(struct block_device *);
- -extern struct super_block *freeze_bdev(struct block_device *);
- -extern void emergency_thaw_all(void);
- -extern void emergency_thaw_bdev(struct super_block *sb);
- -extern int thaw_bdev(struct block_device *bdev, struct super_block *sb);
- -extern int fsync_bdev(struct block_device *);
- -
   extern struct super_block *blockdev_superblock;
- -
   static inline bool sb_is_blkdev_sb(struct super_block *sb)
   {
- -      return sb == blockdev_superblock;
- -}
- -#else
- -static inline void bd_forget(struct inode *inode) {}
- -static inline int sync_blockdev(struct block_device *bdev) { return 0; }
- -static inline void kill_bdev(struct block_device *bdev) {}
- -static inline void invalidate_bdev(struct block_device *bdev) {}
- -
- -static inline struct super_block *freeze_bdev(struct block_device *sb)
- -{
- -      return NULL;
- -}
- -
- -static inline int thaw_bdev(struct block_device *bdev, struct super_block *sb)
- -{
- -      return 0;
- -}
- -
- -static inline int emergency_thaw_bdev(struct super_block *sb)
- -{
- -      return 0;
+ +      return IS_ENABLED(CONFIG_BLOCK) && sb == blockdev_superblock;
   }
   
- -static inline void iterate_bdevs(void (*f)(struct block_device *, void *), void *arg)
- -{
- -}
- -
- -static inline bool sb_is_blkdev_sb(struct super_block *sb)
- -{
- -      return false;
- -}
- -#endif
+ +void emergency_thaw_all(void);
   extern int sync_filesystem(struct super_block *);
   extern const struct file_operations def_blk_fops;
   extern const struct file_operations def_chr_fops;
- -#ifdef CONFIG_BLOCK
- -extern int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long);
- -extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long);
- -extern int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder);
- -extern struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
- -                                             void *holder);
- -extern struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode,
- -                                            void *holder);
- -extern struct block_device *bd_start_claiming(struct block_device *bdev,
- -                                            void *holder);
- -extern void bd_finish_claiming(struct block_device *bdev,
- -                             struct block_device *whole, void *holder);
- -extern void bd_abort_claiming(struct block_device *bdev,
- -                            struct block_device *whole, void *holder);
- -extern void blkdev_put(struct block_device *bdev, fmode_t mode);
- -
- -#ifdef CONFIG_SYSFS
- -extern int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk);
- -extern void bd_unlink_disk_holder(struct block_device *bdev,
- -                                struct gendisk *disk);
- -#else
- -static inline int bd_link_disk_holder(struct block_device *bdev,
- -                                    struct gendisk *disk)
- -{
- -      return 0;
- -}
- -static inline void bd_unlink_disk_holder(struct block_device *bdev,
- -                                       struct gendisk *disk)
- -{
- -}
- -#endif
- -#endif
   
   /* fs/char_dev.c */
   #define CHRDEV_MAJOR_MAX 512
@@@ -2569,12 -2700,31 +2569,12 @@@ static inline void unregister_chrdev(un
         __unregister_chrdev(major, 0, 256, name);
   }
   
- -/* fs/block_dev.c */
- -#define BDEVNAME_SIZE 32      /* Largest string for a blockdev identifier */
- -#define BDEVT_SIZE    10      /* Largest string for MAJ:MIN for blkdev */
- -
- -#ifdef CONFIG_BLOCK
- -#define BLKDEV_MAJOR_MAX      512
- -extern const char *bdevname(struct block_device *bdev, char *buffer);
- -extern struct block_device *lookup_bdev(const char *);
- -extern void blkdev_show(struct seq_file *,off_t);
- -
- -#else
- -#define BLKDEV_MAJOR_MAX      0
- -#endif
- -
   extern void init_special_inode(struct inode *, umode_t, dev_t);
   
   /* Invalid inode operations -- fs/bad_inode.c */
   extern void make_bad_inode(struct inode *);
   extern bool is_bad_inode(struct inode *);
   
- -#ifdef CONFIG_BLOCK
- -extern int revalidate_disk(struct gendisk *);
- -extern int check_disk_change(struct block_device *);
- -extern int __invalidate_device(struct block_device *, bool);
- -#endif
   unsigned long invalidate_mapping_pages(struct address_space *mapping,
                                         pgoff_t start, pgoff_t end);
   
@@@ -2679,7 -2829,7 +2679,7 @@@ static inline errseq_t filemap_sample_w
   
   /**
    * file_sample_sb_err - sample the current errseq_t to test for later errors
- - * @mapping: mapping to be sampled
+ + * @file: file pointer to be sampled
    *
    * Grab the most current superblock-level errseq_t value for the given
    * struct file.
@@@ -2885,7 -3035,6 +2885,7 @@@ extern int kernel_read_file_from_path_i
   extern int kernel_read_file_from_fd(int, void **, loff_t *, loff_t,
                                     enum kernel_read_file_id);
   extern ssize_t kernel_read(struct file *, void *, size_t, loff_t *);
+ +ssize_t __kernel_read(struct file *file, void *buf, size_t count, loff_t *pos);
   extern ssize_t kernel_write(struct file *, const void *, size_t, loff_t *);
   extern ssize_t __kernel_write(struct file *, const void *, size_t, loff_t *);
   extern struct file * open_exec(const char *);
@@@ -2950,21 -3099,6 +2950,21 @@@ extern void discard_new_inode(struct in
   extern unsigned int get_next_ino(void);
   extern void evict_inodes(struct super_block *sb);
   
+ +/*
+ + * Userspace may rely on the the inode number being non-zero. For example, glibc
+ + * simply ignores files with zero i_ino in unlink() and other places.
+ + *
+ + * As an additional complication, if userspace was compiled with
+ + * _FILE_OFFSET_BITS=32 on a 64-bit kernel we'll only end up reading out the
+ + * lower 32 bits, so we need to check that those aren't zero explicitly. With
+ + * _FILE_OFFSET_BITS=64, this may cause some harmless false-negatives, but
+ + * better safe than sorry.
+ + */
+ +static inline bool is_zero_ino(ino_t ino)
+ +{
+ +      return (u32)ino == 0;
+ +}
+ +
   extern void __iget(struct inode * inode);
   extern void iget_failed(struct inode *);
   extern void clear_inode(struct inode *);
@@@ -2990,6 -3124,10 +2990,6 @@@ static inline void remove_inode_hash(st
   
   extern void inode_sb_list_add(struct inode *inode);
   
- -#ifdef CONFIG_BLOCK
- -extern int bdev_read_only(struct block_device *);
- -#endif
- -extern int set_blocksize(struct block_device *, int);
   extern int sb_set_blocksize(struct super_block *, int);
   extern int sb_min_blocksize(struct super_block *, int);
   
@@@ -3302,28 -3440,22 +3302,28 @@@ static inline int iocb_flags(struct fil
   
   static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags)
   {
+ +      int kiocb_flags = 0;
+ +
+ +      if (!flags)
+ +              return 0;
         if (unlikely(flags & ~RWF_SUPPORTED))
                 return -EOPNOTSUPP;
   
         if (flags & RWF_NOWAIT) {
                 if (!(ki->ki_filp->f_mode & FMODE_NOWAIT))
                         return -EOPNOTSUPP;
- -              ki->ki_flags |= IOCB_NOWAIT;
+ +              kiocb_flags |= IOCB_NOWAIT;
         }
         if (flags & RWF_HIPRI)
- -              ki->ki_flags |= IOCB_HIPRI;
+ +              kiocb_flags |= IOCB_HIPRI;
         if (flags & RWF_DSYNC)
- -              ki->ki_flags |= IOCB_DSYNC;
+ +              kiocb_flags |= IOCB_DSYNC;
         if (flags & RWF_SYNC)
- -              ki->ki_flags |= (IOCB_DSYNC | IOCB_SYNC);
+ +              kiocb_flags |= (IOCB_DSYNC | IOCB_SYNC);
         if (flags & RWF_APPEND)
- -              ki->ki_flags |= IOCB_APPEND;
+ +              kiocb_flags |= IOCB_APPEND;
+ +
+ +      ki->ki_flags |= kiocb_flags;
         return 0;
   }
   
diff --combined include/linux/skbuff.h

index 3ad65d4ce085ca9cafa3ed706008b38705cbc0e7,1530e81a6cce730c14867c5fe2a7bfcf8048ce2b..46881d9021241c27ca39e2cf16665cb1e18a0cfa
--- 1/include/linux/skbuff.h
--- 2/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@@ -238,6 -238,7 +238,7 @@@
                          SKB_DATA_ALIGN(sizeof(struct sk_buff)) +       \
                          SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
   
+ struct ahash_request;
   struct net_device;
   struct scatterlist;
   struct pipe_inode_info;
@@@ -283,7 -284,6 +284,7 @@@ struct nf_bridge_info 
    */
   struct tc_skb_ext {
         __u32 chain;
+ +      __u16 mru;
   };
   #endif
   
@@@ -1329,7 -1329,7 +1330,7 @@@ void skb_flow_dissect_meta(const struc
                            void *target_container);
   
   /* Gets a skb connection tracking info, ctinfo map should be a
- - * a map of mapsize to translate enum ip_conntrack_info states
+ + * map of mapsize to translate enum ip_conntrack_info states
    * to user states.
    */
   void
@@@ -1343,10 -1343,6 +1344,10 @@@ skb_flow_dissect_tunnel_info(const stru
                              struct flow_dissector *flow_dissector,
                              void *target_container);
   
+ +void skb_flow_dissect_hash(const struct sk_buff *skb,
+ +                         struct flow_dissector *flow_dissector,
+ +                         void *target_container);
+ +
   static inline __u32 skb_get_hash(struct sk_buff *skb)
   {
         if (!skb->l4_hash && !skb->sw_hash)
@@@ -3817,7 -3813,7 +3818,7 @@@ static inline bool skb_defer_rx_timesta
    * must call this function to return the skb back to the stack with a
    * timestamp.
    *
- - * @skb: clone of the the original outgoing packet
+ + * @skb: clone of the original outgoing packet
    * @hwtstamps: hardware time stamps
    *
    */
author	Linus Torvalds <[email protected]>
	Sat, 8 Aug 2020 04:14:30 +0000 (21:14 -0700)
committer	Linus Torvalds <[email protected]>
	Sat, 8 Aug 2020 04:14:30 +0000 (21:14 -0700)
		1	2
drivers/misc/uacce/uacce.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/soc/qcom/pdr_interface.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/inode.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/fs.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/skbuff.h	patch \|	diff1 \|	diff2 \|	blob \| history