From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sat, 8 Aug 2020 04:14:30 +0000 (-0700)
Subject: Merge branch 'work.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
X-Git-Tag: v5.9-rc1~83
X-Git-Url: https://repo.jachan.dev/linux.git/commitdiff_plain/b79675e15a754ca51b9fc631e0961ccdd4ec3fc7?hp=-c

Merge branch 'work.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

Pull misc vfs updates from Al Viro:
 "No common topic whatsoever in those, sorry"

* 'work.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
  fs: define inode flags using bit numbers
  iov_iter: Move unnecessary inclusion of crypto/hash.h
  dlmfs: clean up dlmfs_file_{read,write}() a bit
---

b79675e15a754ca51b9fc631e0961ccdd4ec3fc7
diff --combined drivers/misc/uacce/uacce.c
index aa91f69a5fa9,e45bfd409cc5..a5b8dab80c76
--- a/drivers/misc/uacce/uacce.c
+++ b/drivers/misc/uacce/uacce.c
@@@ -4,6 -4,7 +4,7 @@@
  #include <linux/iommu.h>
  #include <linux/module.h>
  #include <linux/poll.h>
+ #include <linux/slab.h>
  #include <linux/uacce.h>
  
  static struct class *uacce_class;
@@@ -179,6 -180,14 +180,6 @@@ static int uacce_fops_release(struct in
  	return 0;
  }
  
 -static vm_fault_t uacce_vma_fault(struct vm_fault *vmf)
 -{
 -	if (vmf->flags & (FAULT_FLAG_MKWRITE | FAULT_FLAG_WRITE))
 -		return VM_FAULT_SIGBUS;
 -
 -	return 0;
 -}
 -
  static void uacce_vma_close(struct vm_area_struct *vma)
  {
  	struct uacce_queue *q = vma->vm_private_data;
@@@ -191,6 -200,7 +192,6 @@@
  }
  
  static const struct vm_operations_struct uacce_vm_ops = {
 -	.fault = uacce_vma_fault,
  	.close = uacce_vma_close,
  };
  
diff --combined drivers/soc/qcom/pdr_interface.c
index 4c9225f15c4e,a90d707da689..088dc99f77f3
--- a/drivers/soc/qcom/pdr_interface.c
+++ b/drivers/soc/qcom/pdr_interface.c
@@@ -5,6 -5,7 +5,7 @@@
  
  #include <linux/kernel.h>
  #include <linux/module.h>
+ #include <linux/slab.h>
  #include <linux/string.h>
  #include <linux/workqueue.h>
  
@@@ -278,15 -279,13 +279,15 @@@ static void pdr_indack_work(struct work
  
  	list_for_each_entry_safe(ind, tmp, &pdr->indack_list, node) {
  		pds = ind->pds;
 -		pdr_send_indack_msg(pdr, pds, ind->transaction_id);
  
  		mutex_lock(&pdr->status_lock);
  		pds->state = ind->curr_state;
  		pdr->status(pds->state, pds->service_path, pdr->priv);
  		mutex_unlock(&pdr->status_lock);
  
 +		/* Ack the indication after clients release the PD resources */
 +		pdr_send_indack_msg(pdr, pds, ind->transaction_id);
 +
  		mutex_lock(&pdr->list_lock);
  		list_del(&ind->node);
  		mutex_unlock(&pdr->list_lock);
diff --combined fs/btrfs/inode.c
index 611b3412fbfd,d901d53e4f03..6dc03bab0c9d
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@@ -3,6 -3,7 +3,7 @@@
   * Copyright (C) 2007 Oracle.  All rights reserved.
   */
  
+ #include <crypto/hash.h>
  #include <linux/kernel.h>
  #include <linux/bio.h>
  #include <linux/buffer_head.h>
@@@ -80,17 -81,17 +81,17 @@@ struct kmem_cache *btrfs_free_space_bit
  static int btrfs_setsize(struct inode *inode, struct iattr *attr);
  static int btrfs_truncate(struct inode *inode, bool skip_writeback);
  static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent);
 -static noinline int cow_file_range(struct inode *inode,
 +static noinline int cow_file_range(struct btrfs_inode *inode,
  				   struct page *locked_page,
  				   u64 start, u64 end, int *page_started,
  				   unsigned long *nr_written, int unlock);
 -static struct extent_map *create_io_em(struct inode *inode, u64 start, u64 len,
 -				       u64 orig_start, u64 block_start,
 +static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start,
 +				       u64 len, u64 orig_start, u64 block_start,
  				       u64 block_len, u64 orig_block_len,
  				       u64 ram_bytes, int compress_type,
  				       int type);
  
 -static void __endio_write_update_ordered(struct inode *inode,
 +static void __endio_write_update_ordered(struct btrfs_inode *inode,
  					 const u64 offset, const u64 bytes,
  					 const bool uptodate);
  
@@@ -104,7 -105,7 +105,7 @@@
   * to be released, which we want to happen only when finishing the ordered
   * extent (btrfs_finish_ordered_io()).
   */
 -static inline void btrfs_cleanup_ordered_extents(struct inode *inode,
 +static inline void btrfs_cleanup_ordered_extents(struct btrfs_inode *inode,
  						 struct page *locked_page,
  						 u64 offset, u64 bytes)
  {
@@@ -116,7 -117,7 +117,7 @@@
  	struct page *page;
  
  	while (index <= end_index) {
 -		page = find_get_page(inode->i_mapping, index);
 +		page = find_get_page(inode->vfs_inode.i_mapping, index);
  		index++;
  		if (!page)
  			continue;
@@@ -274,15 -275,15 +275,15 @@@ fail
   * does the checks required to make sure the data is small enough
   * to fit as an inline extent.
   */
 -static noinline int cow_file_range_inline(struct inode *inode, u64 start,
 +static noinline int cow_file_range_inline(struct btrfs_inode *inode, u64 start,
  					  u64 end, size_t compressed_size,
  					  int compress_type,
  					  struct page **compressed_pages)
  {
 -	struct btrfs_root *root = BTRFS_I(inode)->root;
 +	struct btrfs_root *root = inode->root;
  	struct btrfs_fs_info *fs_info = root->fs_info;
  	struct btrfs_trans_handle *trans;
 -	u64 isize = i_size_read(inode);
 +	u64 isize = i_size_read(&inode->vfs_inode);
  	u64 actual_end = min(end + 1, isize);
  	u64 inline_len = actual_end - start;
  	u64 aligned_end = ALIGN(end, fs_info->sectorsize);
@@@ -314,7 -315,7 +315,7 @@@
  		btrfs_free_path(path);
  		return PTR_ERR(trans);
  	}
 -	trans->block_rsv = &BTRFS_I(inode)->block_rsv;
 +	trans->block_rsv = &inode->block_rsv;
  
  	if (compressed_size && compressed_pages)
  		extent_item_size = btrfs_file_extent_calc_inline_size(
@@@ -323,9 -324,9 +324,9 @@@
  		extent_item_size = btrfs_file_extent_calc_inline_size(
  		    inline_len);
  
 -	ret = __btrfs_drop_extents(trans, root, inode, path,
 -				   start, aligned_end, NULL,
 -				   1, 1, extent_item_size, &extent_inserted);
 +	ret = __btrfs_drop_extents(trans, root, inode, path, start, aligned_end,
 +				   NULL, 1, 1, extent_item_size,
 +				   &extent_inserted);
  	if (ret) {
  		btrfs_abort_transaction(trans, ret);
  		goto out;
@@@ -334,7 -335,7 +335,7 @@@
  	if (isize > actual_end)
  		inline_len = min_t(u64, isize, actual_end);
  	ret = insert_inline_extent(trans, path, extent_inserted,
 -				   root, inode, start,
 +				   root, &inode->vfs_inode, start,
  				   inline_len, compressed_size,
  				   compress_type, compressed_pages);
  	if (ret && ret != -ENOSPC) {
@@@ -345,8 -346,8 +346,8 @@@
  		goto out;
  	}
  
 -	set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
 -	btrfs_drop_extent_cache(BTRFS_I(inode), start, aligned_end - 1, 0);
 +	set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags);
 +	btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
  out:
  	/*
  	 * Don't forget to free the reserved space, as for inlined extent
@@@ -412,10 -413,10 +413,10 @@@ static noinline int add_async_extent(st
  /*
   * Check if the inode has flags compatible with compression
   */
 -static inline bool inode_can_compress(struct inode *inode)
 +static inline bool inode_can_compress(struct btrfs_inode *inode)
  {
 -	if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW ||
 -	    BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
 +	if (inode->flags & BTRFS_INODE_NODATACOW ||
 +	    inode->flags & BTRFS_INODE_NODATASUM)
  		return false;
  	return true;
  }
@@@ -424,30 -425,29 +425,30 @@@
   * Check if the inode needs to be submitted to compression, based on mount
   * options, defragmentation, properties or heuristics.
   */
 -static inline int inode_need_compress(struct inode *inode, u64 start, u64 end)
 +static inline int inode_need_compress(struct btrfs_inode *inode, u64 start,
 +				      u64 end)
  {
 -	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 +	struct btrfs_fs_info *fs_info = inode->root->fs_info;
  
  	if (!inode_can_compress(inode)) {
  		WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG),
  			KERN_ERR "BTRFS: unexpected compression for ino %llu\n",
 -			btrfs_ino(BTRFS_I(inode)));
 +			btrfs_ino(inode));
  		return 0;
  	}
  	/* force compress */
  	if (btrfs_test_opt(fs_info, FORCE_COMPRESS))
  		return 1;
  	/* defrag ioctl */
 -	if (BTRFS_I(inode)->defrag_compress)
 +	if (inode->defrag_compress)
  		return 1;
  	/* bad compression ratios */
 -	if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS)
 +	if (inode->flags & BTRFS_INODE_NOCOMPRESS)
  		return 0;
  	if (btrfs_test_opt(fs_info, COMPRESS) ||
 -	    BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS ||
 -	    BTRFS_I(inode)->prop_compress)
 -		return btrfs_compress_heuristic(inode, start, end);
 +	    inode->flags & BTRFS_INODE_COMPRESS ||
 +	    inode->prop_compress)
 +		return btrfs_compress_heuristic(&inode->vfs_inode, start, end);
  	return 0;
  }
  
@@@ -553,7 -553,7 +554,7 @@@ again
  	 * inode has not been flagged as nocompress.  This flag can
  	 * change at any time if we discover bad compression ratios.
  	 */
 -	if (inode_need_compress(inode, start, end)) {
 +	if (inode_need_compress(BTRFS_I(inode), start, end)) {
  		WARN_ON(pages);
  		pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS);
  		if (!pages) {
@@@ -617,12 -617,11 +618,12 @@@ cont
  			/* we didn't compress the entire range, try
  			 * to make an uncompressed inline extent.
  			 */
 -			ret = cow_file_range_inline(inode, start, end, 0,
 -						    BTRFS_COMPRESS_NONE, NULL);
 +			ret = cow_file_range_inline(BTRFS_I(inode), start, end,
 +						    0, BTRFS_COMPRESS_NONE,
 +						    NULL);
  		} else {
  			/* try making a compressed inline extent */
 -			ret = cow_file_range_inline(inode, start, end,
 +			ret = cow_file_range_inline(BTRFS_I(inode), start, end,
  						    total_compressed,
  						    compress_type, pages);
  		}
@@@ -644,8 -643,7 +645,8 @@@
  			 * our outstanding extent for clearing delalloc for this
  			 * range.
  			 */
 -			extent_clear_unlock_delalloc(inode, start, end, NULL,
 +			extent_clear_unlock_delalloc(BTRFS_I(inode), start, end,
 +						     NULL,
  						     clear_flags,
  						     PAGE_UNLOCK |
  						     PAGE_CLEAR_DIRTY |
@@@ -765,14 -763,14 +766,14 @@@ static void free_async_extent_pages(str
   */
  static noinline void submit_compressed_extents(struct async_chunk *async_chunk)
  {
 -	struct inode *inode = async_chunk->inode;
 -	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 +	struct btrfs_inode *inode = BTRFS_I(async_chunk->inode);
 +	struct btrfs_fs_info *fs_info = inode->root->fs_info;
  	struct async_extent *async_extent;
  	u64 alloc_hint = 0;
  	struct btrfs_key ins;
  	struct extent_map *em;
 -	struct btrfs_root *root = BTRFS_I(inode)->root;
 -	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
 +	struct btrfs_root *root = inode->root;
 +	struct extent_io_tree *io_tree = &inode->io_tree;
  	int ret = 0;
  
  again:
@@@ -805,7 -803,7 +806,7 @@@ retry
  			 * all those pages down to the drive.
  			 */
  			if (!page_started && !ret)
 -				extent_write_locked_range(inode,
 +				extent_write_locked_range(&inode->vfs_inode,
  						  async_extent->start,
  						  async_extent->start +
  						  async_extent->ram_size - 1,
@@@ -835,7 -833,7 +836,7 @@@
  				 * will not submit these pages down to lower
  				 * layers.
  				 */
 -				extent_range_redirty_for_io(inode,
 +				extent_range_redirty_for_io(&inode->vfs_inode,
  						async_extent->start,
  						async_extent->start +
  						async_extent->ram_size - 1);
@@@ -870,7 -868,8 +871,7 @@@
  						BTRFS_ORDERED_COMPRESSED,
  						async_extent->compress_type);
  		if (ret) {
 -			btrfs_drop_extent_cache(BTRFS_I(inode),
 -						async_extent->start,
 +			btrfs_drop_extent_cache(inode, async_extent->start,
  						async_extent->start +
  						async_extent->ram_size - 1, 0);
  			goto out_free_reserve;
@@@ -886,7 -885,8 +887,7 @@@
  				NULL, EXTENT_LOCKED | EXTENT_DELALLOC,
  				PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
  				PAGE_SET_WRITEBACK);
 -		if (btrfs_submit_compressed_write(inode,
 -				    async_extent->start,
 +		if (btrfs_submit_compressed_write(inode, async_extent->start,
  				    async_extent->ram_size,
  				    ins.objectid,
  				    ins.offset, async_extent->pages,
@@@ -897,11 -897,12 +898,11 @@@
  			const u64 start = async_extent->start;
  			const u64 end = start + async_extent->ram_size - 1;
  
 -			p->mapping = inode->i_mapping;
 +			p->mapping = inode->vfs_inode.i_mapping;
  			btrfs_writepage_endio_finish_ordered(p, start, end, 0);
  
  			p->mapping = NULL;
 -			extent_clear_unlock_delalloc(inode, start, end,
 -						     NULL, 0,
 +			extent_clear_unlock_delalloc(inode, start, end, NULL, 0,
  						     PAGE_END_WRITEBACK |
  						     PAGE_SET_ERROR);
  			free_async_extent_pages(async_extent);
@@@ -929,10 -930,10 +930,10 @@@ out_free
  	goto again;
  }
  
 -static u64 get_extent_allocation_hint(struct inode *inode, u64 start,
 +static u64 get_extent_allocation_hint(struct btrfs_inode *inode, u64 start,
  				      u64 num_bytes)
  {
 -	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
 +	struct extent_map_tree *em_tree = &inode->extent_tree;
  	struct extent_map *em;
  	u64 alloc_hint = 0;
  
@@@ -974,18 -975,17 +975,18 @@@
   * required to start IO on it.  It may be clean and already done with
   * IO when we return.
   */
 -static noinline int cow_file_range(struct inode *inode,
 +static noinline int cow_file_range(struct btrfs_inode *inode,
  				   struct page *locked_page,
  				   u64 start, u64 end, int *page_started,
  				   unsigned long *nr_written, int unlock)
  {
 -	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 -	struct btrfs_root *root = BTRFS_I(inode)->root;
 +	struct btrfs_root *root = inode->root;
 +	struct btrfs_fs_info *fs_info = root->fs_info;
  	u64 alloc_hint = 0;
  	u64 num_bytes;
  	unsigned long ram_size;
  	u64 cur_alloc_size = 0;
 +	u64 min_alloc_size;
  	u64 blocksize = fs_info->sectorsize;
  	struct btrfs_key ins;
  	struct extent_map *em;
@@@ -994,7 -994,7 +995,7 @@@
  	bool extent_reserved = false;
  	int ret = 0;
  
 -	if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
 +	if (btrfs_is_free_space_inode(inode)) {
  		WARN_ON_ONCE(1);
  		ret = -EINVAL;
  		goto out_unlock;
@@@ -1004,7 -1004,7 +1005,7 @@@
  	num_bytes = max(blocksize,  num_bytes);
  	ASSERT(num_bytes <= btrfs_super_total_bytes(fs_info->super_copy));
  
 -	inode_should_defrag(BTRFS_I(inode), start, end, num_bytes, SZ_64K);
 +	inode_should_defrag(inode, start, end, num_bytes, SZ_64K);
  
  	if (start == 0) {
  		/* lets try to make an inline extent */
@@@ -1033,28 -1033,13 +1034,28 @@@
  	}
  
  	alloc_hint = get_extent_allocation_hint(inode, start, num_bytes);
 -	btrfs_drop_extent_cache(BTRFS_I(inode), start,
 -			start + num_bytes - 1, 0);
 +	btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0);
 +
 +	/*
 +	 * Relocation relies on the relocated extents to have exactly the same
 +	 * size as the original extents. Normally writeback for relocation data
 +	 * extents follows a NOCOW path because relocation preallocates the
 +	 * extents. However, due to an operation such as scrub turning a block
 +	 * group to RO mode, it may fallback to COW mode, so we must make sure
 +	 * an extent allocated during COW has exactly the requested size and can
 +	 * not be split into smaller extents, otherwise relocation breaks and
 +	 * fails during the stage where it updates the bytenr of file extent
 +	 * items.
 +	 */
 +	if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
 +		min_alloc_size = num_bytes;
 +	else
 +		min_alloc_size = fs_info->sectorsize;
  
  	while (num_bytes > 0) {
  		cur_alloc_size = num_bytes;
  		ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size,
 -					   fs_info->sectorsize, 0, alloc_hint,
 +					   min_alloc_size, 0, alloc_hint,
  					   &ins, 1, 1);
  		if (ret < 0)
  			goto out_unlock;
@@@ -1097,7 -1082,7 +1098,7 @@@
  			 * skip current ordered extent.
  			 */
  			if (ret)
 -				btrfs_drop_extent_cache(BTRFS_I(inode), start,
 +				btrfs_drop_extent_cache(inode, start,
  						start + ram_size - 1, 0);
  		}
  
@@@ -1113,7 -1098,8 +1114,7 @@@
  		page_ops = unlock ? PAGE_UNLOCK : 0;
  		page_ops |= PAGE_SET_PRIVATE2;
  
 -		extent_clear_unlock_delalloc(inode, start,
 -					     start + ram_size - 1,
 +		extent_clear_unlock_delalloc(inode, start, start + ram_size - 1,
  					     locked_page,
  					     EXTENT_LOCKED | EXTENT_DELALLOC,
  					     page_ops);
@@@ -1137,7 -1123,7 +1138,7 @@@ out
  	return ret;
  
  out_drop_extent_cache:
 -	btrfs_drop_extent_cache(BTRFS_I(inode), start, start + ram_size - 1, 0);
 +	btrfs_drop_extent_cache(inode, start, start + ram_size - 1, 0);
  out_reserve:
  	btrfs_dec_block_group_reservations(fs_info, ins.objectid);
  	btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1);
@@@ -1234,13 -1220,13 +1235,13 @@@ static noinline void async_cow_free(str
  		kvfree(async_chunk->pending);
  }
  
 -static int cow_file_range_async(struct inode *inode,
 +static int cow_file_range_async(struct btrfs_inode *inode,
  				struct writeback_control *wbc,
  				struct page *locked_page,
  				u64 start, u64 end, int *page_started,
  				unsigned long *nr_written)
  {
 -	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 +	struct btrfs_fs_info *fs_info = inode->root->fs_info;
  	struct cgroup_subsys_state *blkcg_css = wbc_blkcg_css(wbc);
  	struct async_cow *ctx;
  	struct async_chunk *async_chunk;
@@@ -1252,9 -1238,9 +1253,9 @@@
  	unsigned nofs_flag;
  	const unsigned int write_flags = wbc_to_write_flags(wbc);
  
 -	unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
 +	unlock_extent(&inode->io_tree, start, end);
  
 -	if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS &&
 +	if (inode->flags & BTRFS_INODE_NOCOMPRESS &&
  	    !btrfs_test_opt(fs_info, FORCE_COMPRESS)) {
  		num_chunks = 1;
  		should_compress = false;
@@@ -1292,9 -1278,9 +1293,9 @@@
  		 * igrab is called higher up in the call chain, take only the
  		 * lightweight reference for the callback lifetime
  		 */
 -		ihold(inode);
 +		ihold(&inode->vfs_inode);
  		async_chunk[i].pending = &ctx->num_chunks;
 -		async_chunk[i].inode = inode;
 +		async_chunk[i].inode = &inode->vfs_inode;
  		async_chunk[i].start = start;
  		async_chunk[i].end = cur_end;
  		async_chunk[i].write_flags = write_flags;
@@@ -1371,15 -1357,13 +1372,15 @@@ static noinline int csum_exist_in_range
  	return 1;
  }
  
 -static int fallback_to_cow(struct inode *inode, struct page *locked_page,
 +static int fallback_to_cow(struct btrfs_inode *inode, struct page *locked_page,
  			   const u64 start, const u64 end,
  			   int *page_started, unsigned long *nr_written)
  {
 -	const bool is_space_ino = btrfs_is_free_space_inode(BTRFS_I(inode));
 +	const bool is_space_ino = btrfs_is_free_space_inode(inode);
 +	const bool is_reloc_ino = (inode->root->root_key.objectid ==
 +				   BTRFS_DATA_RELOC_TREE_OBJECTID);
  	const u64 range_bytes = end + 1 - start;
 -	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
 +	struct extent_io_tree *io_tree = &inode->io_tree;
  	u64 range_start = start;
  	u64 count;
  
@@@ -1408,23 -1392,18 +1409,23 @@@
  	 *    data space info, which we incremented in the step above.
  	 *
  	 * If we need to fallback to cow and the inode corresponds to a free
 -	 * space cache inode, we must also increment bytes_may_use of the data
 -	 * space_info for the same reason. Space caches always get a prealloc
 +	 * space cache inode or an inode of the data relocation tree, we must
 +	 * also increment bytes_may_use of the data space_info for the same
 +	 * reason. Space caches and relocated data extents always get a prealloc
  	 * extent for them, however scrub or balance may have set the block
 -	 * group that contains that extent to RO mode.
 +	 * group that contains that extent to RO mode and therefore force COW
 +	 * when starting writeback.
  	 */
  	count = count_range_bits(io_tree, &range_start, end, range_bytes,
  				 EXTENT_NORESERVE, 0);
 -	if (count > 0 || is_space_ino) {
 -		const u64 bytes = is_space_ino ? range_bytes : count;
 -		struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
 +	if (count > 0 || is_space_ino || is_reloc_ino) {
 +		u64 bytes = count;
 +		struct btrfs_fs_info *fs_info = inode->root->fs_info;
  		struct btrfs_space_info *sinfo = fs_info->data_sinfo;
  
 +		if (is_space_ino || is_reloc_ino)
 +			bytes = range_bytes;
 +
  		spin_lock(&sinfo->lock);
  		btrfs_space_info_update_bytes_may_use(fs_info, sinfo, bytes);
  		spin_unlock(&sinfo->lock);
@@@ -1445,21 -1424,21 +1446,21 @@@
   * If no cow copies or snapshots exist, we write directly to the existing
   * blocks on disk
   */
 -static noinline int run_delalloc_nocow(struct inode *inode,
 +static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
  				       struct page *locked_page,
  				       const u64 start, const u64 end,
  				       int *page_started, int force,
  				       unsigned long *nr_written)
  {
 -	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 -	struct btrfs_root *root = BTRFS_I(inode)->root;
 +	struct btrfs_fs_info *fs_info = inode->root->fs_info;
 +	struct btrfs_root *root = inode->root;
  	struct btrfs_path *path;
  	u64 cow_start = (u64)-1;
  	u64 cur_offset = start;
  	int ret;
  	bool check_prev = true;
 -	const bool freespace_inode = btrfs_is_free_space_inode(BTRFS_I(inode));
 -	u64 ino = btrfs_ino(BTRFS_I(inode));
 +	const bool freespace_inode = btrfs_is_free_space_inode(inode);
 +	u64 ino = btrfs_ino(inode);
  	bool nocow = false;
  	u64 disk_bytenr = 0;
  
@@@ -1685,11 -1664,15 +1686,11 @@@ out_check
  		 * NOCOW, following one which needs to be COW'ed
  		 */
  		if (cow_start != (u64)-1) {
 -			ret = fallback_to_cow(inode, locked_page, cow_start,
 -					      found_key.offset - 1,
 +			ret = fallback_to_cow(inode, locked_page,
 +					      cow_start, found_key.offset - 1,
  					      page_started, nr_written);
 -			if (ret) {
 -				if (nocow)
 -					btrfs_dec_nocow_writers(fs_info,
 -								disk_bytenr);
 +			if (ret)
  				goto error;
 -			}
  			cow_start = (u64)-1;
  		}
  
@@@ -1705,6 -1688,9 +1706,6 @@@
  					  ram_bytes, BTRFS_COMPRESS_NONE,
  					  BTRFS_ORDERED_PREALLOC);
  			if (IS_ERR(em)) {
 -				if (nocow)
 -					btrfs_dec_nocow_writers(fs_info,
 -								disk_bytenr);
  				ret = PTR_ERR(em);
  				goto error;
  			}
@@@ -1714,7 -1700,8 +1715,7 @@@
  						       num_bytes,
  						       BTRFS_ORDERED_PREALLOC);
  			if (ret) {
 -				btrfs_drop_extent_cache(BTRFS_I(inode),
 -							cur_offset,
 +				btrfs_drop_extent_cache(inode, cur_offset,
  							cur_offset + num_bytes - 1,
  							0);
  				goto error;
@@@ -1790,11 -1777,11 +1791,11 @@@ error
  	return ret;
  }
  
 -static inline int need_force_cow(struct inode *inode, u64 start, u64 end)
 +static inline int need_force_cow(struct btrfs_inode *inode, u64 start, u64 end)
  {
  
 -	if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) &&
 -	    !(BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC))
 +	if (!(inode->flags & BTRFS_INODE_NODATACOW) &&
 +	    !(inode->flags & BTRFS_INODE_PREALLOC))
  		return 0;
  
  	/*
@@@ -1802,8 -1789,9 +1803,8 @@@
  	 * if is not zero, it means the file is defragging.
  	 * Force cow if given extent needs to be defragged.
  	 */
 -	if (BTRFS_I(inode)->defrag_bytes &&
 -	    test_range_bit(&BTRFS_I(inode)->io_tree, start, end,
 -			   EXTENT_DEFRAG, 0, NULL))
 +	if (inode->defrag_bytes &&
 +	    test_range_bit(&inode->io_tree, start, end, EXTENT_DEFRAG, 0, NULL))
  		return 1;
  
  	return 0;
@@@ -1813,25 -1801,26 +1814,25 @@@
   * Function to process delayed allocation (create CoW) for ranges which are
   * being touched for the first time.
   */
 -int btrfs_run_delalloc_range(struct inode *inode, struct page *locked_page,
 +int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page,
  		u64 start, u64 end, int *page_started, unsigned long *nr_written,
  		struct writeback_control *wbc)
  {
  	int ret;
  	int force_cow = need_force_cow(inode, start, end);
  
 -	if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW && !force_cow) {
 +	if (inode->flags & BTRFS_INODE_NODATACOW && !force_cow) {
  		ret = run_delalloc_nocow(inode, locked_page, start, end,
  					 page_started, 1, nr_written);
 -	} else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC && !force_cow) {
 +	} else if (inode->flags & BTRFS_INODE_PREALLOC && !force_cow) {
  		ret = run_delalloc_nocow(inode, locked_page, start, end,
  					 page_started, 0, nr_written);
  	} else if (!inode_can_compress(inode) ||
  		   !inode_need_compress(inode, start, end)) {
  		ret = cow_file_range(inode, locked_page, start, end,
 -				      page_started, nr_written, 1);
 +				     page_started, nr_written, 1);
  	} else {
 -		set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
 -			&BTRFS_I(inode)->runtime_flags);
 +		set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, &inode->runtime_flags);
  		ret = cow_file_range_async(inode, wbc, locked_page, start, end,
  					   page_started, nr_written);
  	}
@@@ -2080,7 -2069,9 +2081,7 @@@ void btrfs_clear_delalloc_extent(struc
  		if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID &&
  		    do_list && !(state->state & EXTENT_NORESERVE) &&
  		    (*bits & EXTENT_CLEAR_DATA_RESV))
 -			btrfs_free_reserved_data_space_noquota(
 -					&inode->vfs_inode,
 -					state->start, len);
 +			btrfs_free_reserved_data_space_noquota(fs_info, len);
  
  		percpu_counter_add_batch(&fs_info->delalloc_bytes, -len,
  					 fs_info->delalloc_batch);
@@@ -2156,7 -2147,7 +2157,7 @@@ static blk_status_t btrfs_submit_bio_st
  	struct inode *inode = private_data;
  	blk_status_t ret = 0;
  
 -	ret = btrfs_csum_one_bio(inode, bio, 0, 0);
 +	ret = btrfs_csum_one_bio(BTRFS_I(inode), bio, 0, 0);
  	BUG_ON(ret); /* -ENOMEM */
  	return 0;
  }
@@@ -2221,7 -2212,7 +2222,7 @@@ static blk_status_t btrfs_submit_bio_ho
  					  0, inode, btrfs_submit_bio_start);
  		goto out;
  	} else if (!skip_sum) {
 -		ret = btrfs_csum_one_bio(inode, bio, 0, 0);
 +		ret = btrfs_csum_one_bio(BTRFS_I(inode), bio, 0, 0);
  		if (ret)
  			goto out;
  	}
@@@ -2258,13 -2249,13 +2259,13 @@@ static noinline int add_pending_csums(s
  	return 0;
  }
  
 -int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
 +int btrfs_set_extent_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
  			      unsigned int extra_bits,
  			      struct extent_state **cached_state)
  {
  	WARN_ON(PAGE_ALIGNED(end));
 -	return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end,
 -				   extra_bits, cached_state);
 +	return set_extent_delalloc(&inode->io_tree, start, end, extra_bits,
 +				   cached_state);
  }
  
  /* see btrfs_writepage_start_hook for details on why this is required */
@@@ -2281,7 -2272,7 +2282,7 @@@ static void btrfs_writepage_fixup_worke
  	struct extent_state *cached_state = NULL;
  	struct extent_changeset *data_reserved = NULL;
  	struct page *page;
 -	struct inode *inode;
 +	struct btrfs_inode *inode;
  	u64 page_start;
  	u64 page_end;
  	int ret = 0;
@@@ -2289,7 -2280,7 +2290,7 @@@
  
  	fixup = container_of(work, struct btrfs_writepage_fixup, work);
  	page = fixup->page;
 -	inode = fixup->inode;
 +	inode = BTRFS_I(fixup->inode);
  	page_start = page_offset(page);
  	page_end = page_offset(page) + PAGE_SIZE - 1;
  
@@@ -2326,7 -2317,8 +2327,7 @@@ again
  		 *    when the page was already properly dealt with.
  		 */
  		if (!ret) {
 -			btrfs_delalloc_release_extents(BTRFS_I(inode),
 -						       PAGE_SIZE);
 +			btrfs_delalloc_release_extents(inode, PAGE_SIZE);
  			btrfs_delalloc_release_space(inode, data_reserved,
  						     page_start, PAGE_SIZE,
  						     true);
@@@ -2342,18 -2334,20 +2343,18 @@@
  	if (ret)
  		goto out_page;
  
 -	lock_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end,
 -			 &cached_state);
 +	lock_extent_bits(&inode->io_tree, page_start, page_end, &cached_state);
  
  	/* already ordered? We're done */
  	if (PagePrivate2(page))
  		goto out_reserved;
  
 -	ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), page_start,
 -					PAGE_SIZE);
 +	ordered = btrfs_lookup_ordered_range(inode, page_start, PAGE_SIZE);
  	if (ordered) {
 -		unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start,
 -				     page_end, &cached_state);
 +		unlock_extent_cached(&inode->io_tree, page_start, page_end,
 +				     &cached_state);
  		unlock_page(page);
 -		btrfs_start_ordered_extent(inode, ordered, 1);
 +		btrfs_start_ordered_extent(&inode->vfs_inode, ordered, 1);
  		btrfs_put_ordered_extent(ordered);
  		goto again;
  	}
@@@ -2373,11 -2367,11 +2374,11 @@@
  	BUG_ON(!PageDirty(page));
  	free_delalloc_space = false;
  out_reserved:
 -	btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
 +	btrfs_delalloc_release_extents(inode, PAGE_SIZE);
  	if (free_delalloc_space)
  		btrfs_delalloc_release_space(inode, data_reserved, page_start,
  					     PAGE_SIZE, true);
 -	unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end,
 +	unlock_extent_cached(&inode->io_tree, page_start, page_end,
  			     &cached_state);
  out_page:
  	if (ret) {
@@@ -2400,7 -2394,7 +2401,7 @@@
  	 * that could need flushing space. Recursing back to fixup worker would
  	 * deadlock.
  	 */
 -	btrfs_add_delayed_iput(inode);
 +	btrfs_add_delayed_iput(&inode->vfs_inode);
  }
  
  /*
@@@ -2456,18 -2450,18 +2457,18 @@@ int btrfs_writepage_cow_fixup(struct pa
  }
  
  static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
 -				       struct inode *inode, u64 file_pos,
 -				       u64 disk_bytenr, u64 disk_num_bytes,
 -				       u64 num_bytes, u64 ram_bytes,
 -				       u8 compression, u8 encryption,
 -				       u16 other_encoding, int extent_type)
 +				       struct btrfs_inode *inode, u64 file_pos,
 +				       struct btrfs_file_extent_item *stack_fi,
 +				       u64 qgroup_reserved)
  {
 -	struct btrfs_root *root = BTRFS_I(inode)->root;
 -	struct btrfs_file_extent_item *fi;
 +	struct btrfs_root *root = inode->root;
  	struct btrfs_path *path;
  	struct extent_buffer *leaf;
  	struct btrfs_key ins;
 -	u64 qg_released;
 +	u64 disk_num_bytes = btrfs_stack_file_extent_disk_num_bytes(stack_fi);
 +	u64 disk_bytenr = btrfs_stack_file_extent_disk_bytenr(stack_fi);
 +	u64 num_bytes = btrfs_stack_file_extent_num_bytes(stack_fi);
 +	u64 ram_bytes = btrfs_stack_file_extent_ram_bytes(stack_fi);
  	int extent_inserted = 0;
  	int ret;
  
@@@ -2486,42 -2480,60 +2487,42 @@@
  	 */
  	ret = __btrfs_drop_extents(trans, root, inode, path, file_pos,
  				   file_pos + num_bytes, NULL, 0,
 -				   1, sizeof(*fi), &extent_inserted);
 +				   1, sizeof(*stack_fi), &extent_inserted);
  	if (ret)
  		goto out;
  
  	if (!extent_inserted) {
 -		ins.objectid = btrfs_ino(BTRFS_I(inode));
 +		ins.objectid = btrfs_ino(inode);
  		ins.offset = file_pos;
  		ins.type = BTRFS_EXTENT_DATA_KEY;
  
  		path->leave_spinning = 1;
  		ret = btrfs_insert_empty_item(trans, root, path, &ins,
 -					      sizeof(*fi));
 +					      sizeof(*stack_fi));
  		if (ret)
  			goto out;
  	}
  	leaf = path->nodes[0];
 -	fi = btrfs_item_ptr(leaf, path->slots[0],
 -			    struct btrfs_file_extent_item);
 -	btrfs_set_file_extent_generation(leaf, fi, trans->transid);
 -	btrfs_set_file_extent_type(leaf, fi, extent_type);
 -	btrfs_set_file_extent_disk_bytenr(leaf, fi, disk_bytenr);
 -	btrfs_set_file_extent_disk_num_bytes(leaf, fi, disk_num_bytes);
 -	btrfs_set_file_extent_offset(leaf, fi, 0);
 -	btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes);
 -	btrfs_set_file_extent_ram_bytes(leaf, fi, ram_bytes);
 -	btrfs_set_file_extent_compression(leaf, fi, compression);
 -	btrfs_set_file_extent_encryption(leaf, fi, encryption);
 -	btrfs_set_file_extent_other_encoding(leaf, fi, other_encoding);
 +	btrfs_set_stack_file_extent_generation(stack_fi, trans->transid);
 +	write_extent_buffer(leaf, stack_fi,
 +			btrfs_item_ptr_offset(leaf, path->slots[0]),
 +			sizeof(struct btrfs_file_extent_item));
  
  	btrfs_mark_buffer_dirty(leaf);
  	btrfs_release_path(path);
  
 -	inode_add_bytes(inode, num_bytes);
 +	inode_add_bytes(&inode->vfs_inode, num_bytes);
  
  	ins.objectid = disk_bytenr;
  	ins.offset = disk_num_bytes;
  	ins.type = BTRFS_EXTENT_ITEM_KEY;
  
 -	ret = btrfs_inode_set_file_extent_range(BTRFS_I(inode), file_pos,
 -						ram_bytes);
 +	ret = btrfs_inode_set_file_extent_range(inode, file_pos, ram_bytes);
  	if (ret)
  		goto out;
  
 -	/*
 -	 * Release the reserved range from inode dirty range map, as it is
 -	 * already moved into delayed_ref_head
 -	 */
 -	ret = btrfs_qgroup_release_data(inode, file_pos, ram_bytes);
 -	if (ret < 0)
 -		goto out;
 -	qg_released = ret;
 -	ret = btrfs_alloc_reserved_file_extent(trans, root,
 -					       btrfs_ino(BTRFS_I(inode)),
 -					       file_pos, qg_released, &ins);
 +	ret = btrfs_alloc_reserved_file_extent(trans, root, btrfs_ino(inode),
 +					       file_pos, qgroup_reserved, &ins);
  out:
  	btrfs_free_path(path);
  
@@@ -2543,33 -2555,7 +2544,33 @@@ static void btrfs_release_delalloc_byte
  	btrfs_put_block_group(cache);
  }
  
 -/* as ordered data IO finishes, this gets called so we can finish
 +static int insert_ordered_extent_file_extent(struct btrfs_trans_handle *trans,
 +					     struct inode *inode,
 +					     struct btrfs_ordered_extent *oe)
 +{
 +	struct btrfs_file_extent_item stack_fi;
 +	u64 logical_len;
 +
 +	memset(&stack_fi, 0, sizeof(stack_fi));
 +	btrfs_set_stack_file_extent_type(&stack_fi, BTRFS_FILE_EXTENT_REG);
 +	btrfs_set_stack_file_extent_disk_bytenr(&stack_fi, oe->disk_bytenr);
 +	btrfs_set_stack_file_extent_disk_num_bytes(&stack_fi,
 +						   oe->disk_num_bytes);
 +	if (test_bit(BTRFS_ORDERED_TRUNCATED, &oe->flags))
 +		logical_len = oe->truncated_len;
 +	else
 +		logical_len = oe->num_bytes;
 +	btrfs_set_stack_file_extent_num_bytes(&stack_fi, logical_len);
 +	btrfs_set_stack_file_extent_ram_bytes(&stack_fi, logical_len);
 +	btrfs_set_stack_file_extent_compression(&stack_fi, oe->compress_type);
 +	/* Encryption and other encoding is reserved and all 0 */
 +
 +	return insert_reserved_file_extent(trans, BTRFS_I(inode), oe->file_offset,
 +					   &stack_fi, oe->qgroup_rsv);
 +}
 +
 +/*
 + * As ordered data IO finishes, this gets called so we can finish
   * an ordered extent if the range of bytes in the file it covers are
   * fully written.
   */
@@@ -2620,6 -2606,13 +2621,6 @@@ static int btrfs_finish_ordered_io(stru
  	if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
  		BUG_ON(!list_empty(&ordered_extent->list)); /* Logic error */
  
 -		/*
 -		 * For mwrite(mmap + memset to write) case, we still reserve
 -		 * space for NOCOW range.
 -		 * As NOCOW won't cause a new delayed ref, just free the space
 -		 */
 -		btrfs_qgroup_free_data(inode, NULL, start,
 -				       ordered_extent->num_bytes);
  		btrfs_inode_safe_disk_i_size_write(inode, 0);
  		if (freespace_inode)
  			trans = btrfs_join_transaction_spacecache(root);
@@@ -2656,14 -2649,20 +2657,14 @@@
  		compress_type = ordered_extent->compress_type;
  	if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
  		BUG_ON(compress_type);
 -		btrfs_qgroup_free_data(inode, NULL, start,
 -				       ordered_extent->num_bytes);
  		ret = btrfs_mark_extent_written(trans, BTRFS_I(inode),
  						ordered_extent->file_offset,
  						ordered_extent->file_offset +
  						logical_len);
  	} else {
  		BUG_ON(root == fs_info->tree_root);
 -		ret = insert_reserved_file_extent(trans, inode, start,
 -						ordered_extent->disk_bytenr,
 -						ordered_extent->disk_num_bytes,
 -						logical_len, logical_len,
 -						compress_type, 0, 0,
 -						BTRFS_FILE_EXTENT_REG);
 +		ret = insert_ordered_extent_file_extent(trans, inode,
 +							ordered_extent);
  		if (!ret) {
  			clear_reserved_extent = false;
  			btrfs_release_delalloc_bytes(fs_info,
@@@ -2815,9 -2814,6 +2816,9 @@@ static int check_data_csum(struct inod
  zeroit:
  	btrfs_print_data_csum_error(BTRFS_I(inode), start, csum, csum_expected,
  				    io_bio->mirror_num);
 +	if (io_bio->device)
 +		btrfs_dev_stat_inc_and_print(io_bio->device,
 +					     BTRFS_DEV_STAT_CORRUPTION_ERRS);
  	memset(kaddr + pgoff, 1, len);
  	flush_dcache_page(page);
  	kunmap_atomic(kaddr);
@@@ -3336,14 -3332,6 +3337,14 @@@ cache_index
  	 */
  	BTRFS_I(inode)->last_unlink_trans = BTRFS_I(inode)->last_trans;
  
 +	/*
 +	 * Same logic as for last_unlink_trans. We don't persist the generation
 +	 * of the last transaction where this inode was used for a reflink
 +	 * operation, so after eviction and reloading the inode we must be
 +	 * pessimistic and assume the last transaction that modified the inode.
 +	 */
 +	BTRFS_I(inode)->last_reflink_trans = BTRFS_I(inode)->last_trans;
 +
  	path->slots[0]++;
  	if (inode->i_nlink != 1 ||
  	    path->slots[0] >= btrfs_header_nritems(leaf))
@@@ -3492,7 -3480,7 +3493,7 @@@ static noinline int btrfs_update_inode_
  
  	fill_inode_item(trans, leaf, inode_item, inode);
  	btrfs_mark_buffer_dirty(leaf);
 -	btrfs_set_inode_last_trans(trans, inode);
 +	btrfs_set_inode_last_trans(trans, BTRFS_I(inode));
  	ret = 0;
  failed:
  	btrfs_free_path(path);
@@@ -3522,7 -3510,7 +3523,7 @@@ noinline int btrfs_update_inode(struct 
  
  		ret = btrfs_delayed_update_inode(trans, root, inode);
  		if (!ret)
 -			btrfs_set_inode_last_trans(trans, inode);
 +			btrfs_set_inode_last_trans(trans, BTRFS_I(inode));
  		return ret;
  	}
  
@@@ -4037,8 -4025,6 +4038,8 @@@ int btrfs_delete_subvolume(struct inod
  		}
  	}
  
 +	free_anon_bdev(dest->anon_dev);
 +	dest->anon_dev = 0;
  out_end_trans:
  	trans->block_rsv = NULL;
  	trans->bytes_reserved = 0;
@@@ -4509,13 -4495,11 +4510,13 @@@ int btrfs_truncate_block(struct inode *
  	struct extent_state *cached_state = NULL;
  	struct extent_changeset *data_reserved = NULL;
  	char *kaddr;
 +	bool only_release_metadata = false;
  	u32 blocksize = fs_info->sectorsize;
  	pgoff_t index = from >> PAGE_SHIFT;
  	unsigned offset = from & (blocksize - 1);
  	struct page *page;
  	gfp_t mask = btrfs_alloc_write_mask(mapping);
 +	size_t write_bytes = blocksize;
  	int ret = 0;
  	u64 block_start;
  	u64 block_end;
@@@ -4527,28 -4511,15 +4528,28 @@@
  	block_start = round_down(from, blocksize);
  	block_end = block_start + blocksize - 1;
  
 -	ret = btrfs_delalloc_reserve_space(inode, &data_reserved,
 -					   block_start, blocksize);
 -	if (ret)
 +	ret = btrfs_check_data_free_space(BTRFS_I(inode), &data_reserved,
 +					  block_start, blocksize);
 +	if (ret < 0) {
 +		if (btrfs_check_nocow_lock(BTRFS_I(inode), block_start,
 +					   &write_bytes) > 0) {
 +			/* For nocow case, no need to reserve data space */
 +			only_release_metadata = true;
 +		} else {
 +			goto out;
 +		}
 +	}
 +	ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode), blocksize);
 +	if (ret < 0) {
 +		if (!only_release_metadata)
 +			btrfs_free_reserved_data_space(BTRFS_I(inode),
 +					data_reserved, block_start, blocksize);
  		goto out;
 -
 +	}
  again:
  	page = find_or_create_page(mapping, index, mask);
  	if (!page) {
 -		btrfs_delalloc_release_space(inode, data_reserved,
 +		btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved,
  					     block_start, blocksize, true);
  		btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize);
  		ret = -ENOMEM;
@@@ -4573,7 -4544,7 +4574,7 @@@
  	lock_extent_bits(io_tree, block_start, block_end, &cached_state);
  	set_page_extent_mapped(page);
  
 -	ordered = btrfs_lookup_ordered_extent(inode, block_start);
 +	ordered = btrfs_lookup_ordered_extent(BTRFS_I(inode), block_start);
  	if (ordered) {
  		unlock_extent_cached(io_tree, block_start, block_end,
  				     &cached_state);
@@@ -4588,7 -4559,7 +4589,7 @@@
  			 EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
  			 0, 0, &cached_state);
  
 -	ret = btrfs_set_extent_delalloc(inode, block_start, block_end, 0,
 +	ret = btrfs_set_extent_delalloc(BTRFS_I(inode), block_start, block_end, 0,
  					&cached_state);
  	if (ret) {
  		unlock_extent_cached(io_tree, block_start, block_end,
@@@ -4613,26 -4584,14 +4614,26 @@@
  	set_page_dirty(page);
  	unlock_extent_cached(io_tree, block_start, block_end, &cached_state);
  
 +	if (only_release_metadata)
 +		set_extent_bit(&BTRFS_I(inode)->io_tree, block_start,
 +				block_end, EXTENT_NORESERVE, NULL, NULL,
 +				GFP_NOFS);
 +
  out_unlock:
 -	if (ret)
 -		btrfs_delalloc_release_space(inode, data_reserved, block_start,
 -					     blocksize, true);
 +	if (ret) {
 +		if (only_release_metadata)
 +			btrfs_delalloc_release_metadata(BTRFS_I(inode),
 +					blocksize, true);
 +		else
 +			btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved,
 +					block_start, blocksize, true);
 +	}
  	btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize);
  	unlock_page(page);
  	put_page(page);
  out:
 +	if (only_release_metadata)
 +		btrfs_check_nocow_unlock(BTRFS_I(inode));
  	extent_changeset_free(data_reserved);
  	return ret;
  }
@@@ -4990,8 -4949,7 +4991,8 @@@ static void evict_inode_truncate_pages(
  		 * Note, end is the bytenr of last byte, so we need + 1 here.
  		 */
  		if (state_flags & EXTENT_DELALLOC)
 -			btrfs_qgroup_free_data(inode, NULL, start, end - start + 1);
 +			btrfs_qgroup_free_data(BTRFS_I(inode), NULL, start,
 +					       end - start + 1);
  
  		clear_extent_bit(io_tree, start, end,
  				 EXTENT_LOCKED | EXTENT_DELALLOC |
@@@ -6066,7 -6024,7 +6067,7 @@@ static struct inode *btrfs_new_inode(st
  	inode_tree_add(inode);
  
  	trace_btrfs_inode_new(inode);
 -	btrfs_set_inode_last_trans(trans, inode);
 +	btrfs_set_inode_last_trans(trans, BTRFS_I(inode));
  
  	btrfs_update_root_times(trans, root);
  
@@@ -6875,7 -6833,7 +6876,7 @@@ out
  	return em;
  }
  
 -static struct extent_map *btrfs_create_dio_extent(struct inode *inode,
 +static struct extent_map *btrfs_create_dio_extent(struct btrfs_inode *inode,
  						  const u64 start,
  						  const u64 len,
  						  const u64 orig_start,
@@@ -6889,19 -6847,21 +6890,19 @@@
  	int ret;
  
  	if (type != BTRFS_ORDERED_NOCOW) {
 -		em = create_io_em(inode, start, len, orig_start,
 -				  block_start, block_len, orig_block_len,
 -				  ram_bytes,
 +		em = create_io_em(inode, start, len, orig_start, block_start,
 +				  block_len, orig_block_len, ram_bytes,
  				  BTRFS_COMPRESS_NONE, /* compress_type */
  				  type);
  		if (IS_ERR(em))
  			goto out;
  	}
 -	ret = btrfs_add_ordered_extent_dio(inode, start, block_start,
 -					   len, block_len, type);
 +	ret = btrfs_add_ordered_extent_dio(inode, start, block_start, len,
 +					   block_len, type);
  	if (ret) {
  		if (em) {
  			free_extent_map(em);
 -			btrfs_drop_extent_cache(BTRFS_I(inode), start,
 -						start + len - 1, 0);
 +			btrfs_drop_extent_cache(inode, start, start + len - 1, 0);
  		}
  		em = ERR_PTR(ret);
  	}
@@@ -6910,11 -6870,11 +6911,11 @@@
  	return em;
  }
  
 -static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
 +static struct extent_map *btrfs_new_extent_direct(struct btrfs_inode *inode,
  						  u64 start, u64 len)
  {
 -	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 -	struct btrfs_root *root = BTRFS_I(inode)->root;
 +	struct btrfs_root *root = inode->root;
 +	struct btrfs_fs_info *fs_info = root->fs_info;
  	struct extent_map *em;
  	struct btrfs_key ins;
  	u64 alloc_hint;
@@@ -6931,32 -6891,15 +6932,32 @@@
  				     ins.offset, BTRFS_ORDERED_REGULAR);
  	btrfs_dec_block_group_reservations(fs_info, ins.objectid);
  	if (IS_ERR(em))
 -		btrfs_free_reserved_extent(fs_info, ins.objectid,
 -					   ins.offset, 1);
 +		btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset,
 +					   1);
  
  	return em;
  }
  
  /*
 - * returns 1 when the nocow is safe, < 1 on error, 0 if the
 - * block must be cow'd
 + * Check if we can do nocow write into the range [@offset, @offset + @len)
 + *
 + * @offset:	File offset
 + * @len:	The length to write, will be updated to the nocow writeable
 + *		range
 + * @orig_start:	(optional) Return the original file offset of the file extent
 + * @orig_len:	(optional) Return the original on-disk length of the file extent
 + * @ram_bytes:	(optional) Return the ram_bytes of the file extent
 + *
 + * This function will flush ordered extents in the range to ensure proper
 + * nocow checks for (nowait == false) case.
 + *
 + * Return:
 + * >0	and update @len if we can do nocow write
 + *  0	if we can't do nocow write
 + * <0	if error happened
 + *
 + * NOTE: This only checks the file extents, caller is responsible to wait for
 + *	 any ordered extents.
   */
  noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
  			      u64 *orig_start, u64 *orig_block_len,
@@@ -7183,8 -7126,8 +7184,8 @@@ static int lock_extent_direct(struct in
  }
  
  /* The callers of this must take lock_extent() */
 -static struct extent_map *create_io_em(struct inode *inode, u64 start, u64 len,
 -				       u64 orig_start, u64 block_start,
 +static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start,
 +				       u64 len, u64 orig_start, u64 block_start,
  				       u64 block_len, u64 orig_block_len,
  				       u64 ram_bytes, int compress_type,
  				       int type)
@@@ -7198,7 -7141,7 +7199,7 @@@
  	       type == BTRFS_ORDERED_NOCOW ||
  	       type == BTRFS_ORDERED_REGULAR);
  
 -	em_tree = &BTRFS_I(inode)->extent_tree;
 +	em_tree = &inode->extent_tree;
  	em = alloc_extent_map();
  	if (!em)
  		return ERR_PTR(-ENOMEM);
@@@ -7220,8 -7163,8 +7221,8 @@@
  	}
  
  	do {
 -		btrfs_drop_extent_cache(BTRFS_I(inode), em->start,
 -				em->start + em->len - 1, 0);
 +		btrfs_drop_extent_cache(inode, em->start,
 +					em->start + em->len - 1, 0);
  		write_lock(&em_tree->lock);
  		ret = add_extent_mapping(em_tree, em, 1);
  		write_unlock(&em_tree->lock);
@@@ -7300,7 -7243,7 +7301,7 @@@ static int btrfs_get_blocks_direct_writ
  		    btrfs_inc_nocow_writers(fs_info, block_start)) {
  			struct extent_map *em2;
  
 -			em2 = btrfs_create_dio_extent(inode, start, len,
 +			em2 = btrfs_create_dio_extent(BTRFS_I(inode), start, len,
  						      orig_start, block_start,
  						      len, orig_block_len,
  						      ram_bytes, type);
@@@ -7319,7 -7262,8 +7320,7 @@@
  			 * use the existing or preallocated extent, so does not
  			 * need to adjust btrfs_space_info's bytes_may_use.
  			 */
 -			btrfs_free_reserved_data_space_noquota(inode, start,
 -							       len);
 +			btrfs_free_reserved_data_space_noquota(fs_info, len);
  			goto skip_cow;
  		}
  	}
@@@ -7327,7 -7271,7 +7328,7 @@@
  	/* this will cow the extent */
  	len = bh_result->b_size;
  	free_extent_map(em);
 -	*map = em = btrfs_new_extent_direct(inode, start, len);
 +	*map = em = btrfs_new_extent_direct(BTRFS_I(inode), start, len);
  	if (IS_ERR(em)) {
  		ret = PTR_ERR(em);
  		goto out;
@@@ -7478,8 -7422,7 +7479,8 @@@ static void btrfs_dio_private_put(struc
  		return;
  
  	if (bio_op(dip->dio_bio) == REQ_OP_WRITE) {
 -		__endio_write_update_ordered(dip->inode, dip->logical_offset,
 +		__endio_write_update_ordered(BTRFS_I(dip->inode),
 +					     dip->logical_offset,
  					     dip->bytes,
  					     !dip->dio_bio->bi_status);
  	} else {
@@@ -7565,18 -7508,18 +7566,18 @@@ static blk_status_t btrfs_check_read_di
  	return err;
  }
  
 -static void __endio_write_update_ordered(struct inode *inode,
 +static void __endio_write_update_ordered(struct btrfs_inode *inode,
  					 const u64 offset, const u64 bytes,
  					 const bool uptodate)
  {
 -	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 +	struct btrfs_fs_info *fs_info = inode->root->fs_info;
  	struct btrfs_ordered_extent *ordered = NULL;
  	struct btrfs_workqueue *wq;
  	u64 ordered_offset = offset;
  	u64 ordered_bytes = bytes;
  	u64 last_offset;
  
 -	if (btrfs_is_free_space_inode(BTRFS_I(inode)))
 +	if (btrfs_is_free_space_inode(inode))
  		wq = fs_info->endio_freespace_worker;
  	else
  		wq = fs_info->endio_write_workers;
@@@ -7584,9 -7527,9 +7585,9 @@@
  	while (ordered_offset < offset + bytes) {
  		last_offset = ordered_offset;
  		if (btrfs_dec_test_first_ordered_pending(inode, &ordered,
 -							   &ordered_offset,
 -							   ordered_bytes,
 -							   uptodate)) {
 +							 &ordered_offset,
 +							 ordered_bytes,
 +							 uptodate)) {
  			btrfs_init_work(&ordered->work, finish_ordered_fn, NULL,
  					NULL);
  			btrfs_queue_work(wq, &ordered->work);
@@@ -7613,7 -7556,7 +7614,7 @@@ static blk_status_t btrfs_submit_bio_st
  {
  	struct inode *inode = private_data;
  	blk_status_t ret;
 -	ret = btrfs_csum_one_bio(inode, bio, offset, 1);
 +	ret = btrfs_csum_one_bio(BTRFS_I(inode), bio, offset, 1);
  	BUG_ON(ret); /* -ENOMEM */
  	return 0;
  }
@@@ -7674,7 -7617,7 +7675,7 @@@ static inline blk_status_t btrfs_submit
  		 * If we aren't doing async submit, calculate the csum of the
  		 * bio now.
  		 */
 -		ret = btrfs_csum_one_bio(inode, bio, file_offset, 1);
 +		ret = btrfs_csum_one_bio(BTRFS_I(inode), bio, file_offset, 1);
  		if (ret)
  			goto err;
  	} else {
@@@ -7923,8 -7866,11 +7924,8 @@@ static ssize_t btrfs_direct_IO(struct k
  			dio_data.overwrite = 1;
  			inode_unlock(inode);
  			relock = true;
 -		} else if (iocb->ki_flags & IOCB_NOWAIT) {
 -			ret = -EAGAIN;
 -			goto out;
  		}
 -		ret = btrfs_delalloc_reserve_space(inode, &data_reserved,
 +		ret = btrfs_delalloc_reserve_space(BTRFS_I(inode), &data_reserved,
  						   offset, count);
  		if (ret)
  			goto out;
@@@ -7956,9 -7902,8 +7957,9 @@@
  		current->journal_info = NULL;
  		if (ret < 0 && ret != -EIOCBQUEUED) {
  			if (dio_data.reserve)
 -				btrfs_delalloc_release_space(inode, data_reserved,
 -					offset, dio_data.reserve, true);
 +				btrfs_delalloc_release_space(BTRFS_I(inode),
 +					data_reserved, offset, dio_data.reserve,
 +					true);
  			/*
  			 * On error we might have left some ordered extents
  			 * without submitting corresponding bios for them, so
@@@ -7967,13 -7912,13 +7968,13 @@@
  			 */
  			if (dio_data.unsubmitted_oe_range_start <
  			    dio_data.unsubmitted_oe_range_end)
 -				__endio_write_update_ordered(inode,
 +				__endio_write_update_ordered(BTRFS_I(inode),
  					dio_data.unsubmitted_oe_range_start,
  					dio_data.unsubmitted_oe_range_end -
  					dio_data.unsubmitted_oe_range_start,
  					false);
  		} else if (ret >= 0 && (size_t)ret < count)
 -			btrfs_delalloc_release_space(inode, data_reserved,
 +			btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved,
  					offset, count - (size_t)ret, true);
  		btrfs_delalloc_release_extents(BTRFS_I(inode), count);
  	}
@@@ -7988,7 -7933,7 +7989,7 @@@ out
  }
  
  static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 -		__u64 start, __u64 len)
 +			u64 start, u64 len)
  {
  	int	ret;
  
@@@ -8165,17 -8110,20 +8166,17 @@@ again
  	/*
  	 * Qgroup reserved space handler
  	 * Page here will be either
 -	 * 1) Already written to disk
 -	 *    In this case, its reserved space is released from data rsv map
 -	 *    and will be freed by delayed_ref handler finally.
 -	 *    So even we call qgroup_free_data(), it won't decrease reserved
 -	 *    space.
 -	 * 2) Not written to disk
 -	 *    This means the reserved space should be freed here. However,
 -	 *    if a truncate invalidates the page (by clearing PageDirty)
 -	 *    and the page is accounted for while allocating extent
 -	 *    in btrfs_check_data_free_space() we let delayed_ref to
 -	 *    free the entire extent.
 +	 * 1) Already written to disk or ordered extent already submitted
 +	 *    Then its QGROUP_RESERVED bit in io_tree is already cleaned.
 +	 *    Qgroup will be handled by its qgroup_record then.
 +	 *    btrfs_qgroup_free_data() call will do nothing here.
 +	 *
 +	 * 2) Not written to disk yet
 +	 *    Then btrfs_qgroup_free_data() call will clear the QGROUP_RESERVED
 +	 *    bit of its io_tree, and free the qgroup reserved data space.
 +	 *    Since the IO will never happen for this page.
  	 */
 -	if (PageDirty(page))
 -		btrfs_qgroup_free_data(inode, NULL, page_start, PAGE_SIZE);
 +	btrfs_qgroup_free_data(BTRFS_I(inode), NULL, page_start, PAGE_SIZE);
  	if (!inode_evicting) {
  		clear_extent_bit(tree, page_start, page_end, EXTENT_LOCKED |
  				 EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
@@@ -8239,8 -8187,8 +8240,8 @@@ vm_fault_t btrfs_page_mkwrite(struct vm
  	 * end up waiting indefinitely to get a lock on the page currently
  	 * being processed by btrfs_page_mkwrite() function.
  	 */
 -	ret2 = btrfs_delalloc_reserve_space(inode, &data_reserved, page_start,
 -					   reserved_space);
 +	ret2 = btrfs_delalloc_reserve_space(BTRFS_I(inode), &data_reserved,
 +					    page_start, reserved_space);
  	if (!ret2) {
  		ret2 = file_update_time(vmf->vma->vm_file);
  		reserved = 1;
@@@ -8287,9 -8235,9 +8288,9 @@@ again
  					  fs_info->sectorsize);
  		if (reserved_space < PAGE_SIZE) {
  			end = page_start + reserved_space - 1;
 -			btrfs_delalloc_release_space(inode, data_reserved,
 -					page_start, PAGE_SIZE - reserved_space,
 -					true);
 +			btrfs_delalloc_release_space(BTRFS_I(inode),
 +					data_reserved, page_start,
 +					PAGE_SIZE - reserved_space, true);
  		}
  	}
  
@@@ -8304,7 -8252,7 +8305,7 @@@
  			  EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
  			  EXTENT_DEFRAG, 0, 0, &cached_state);
  
 -	ret2 = btrfs_set_extent_delalloc(inode, page_start, end, 0,
 +	ret2 = btrfs_set_extent_delalloc(BTRFS_I(inode), page_start, end, 0,
  					&cached_state);
  	if (ret2) {
  		unlock_extent_cached(io_tree, page_start, page_end,
@@@ -8344,7 -8292,7 +8345,7 @@@ out_unlock
  	unlock_page(page);
  out:
  	btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
 -	btrfs_delalloc_release_space(inode, data_reserved, page_start,
 +	btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved, page_start,
  				     reserved_space, (ret != 0));
  out_noreserve:
  	sb_end_pagefault(inode->i_sb);
@@@ -8558,7 -8506,6 +8559,7 @@@ struct inode *btrfs_alloc_inode(struct 
  	ei->index_cnt = (u64)-1;
  	ei->dir_index = 0;
  	ei->last_unlink_trans = 0;
 +	ei->last_reflink_trans = 0;
  	ei->last_log_commit = 0;
  
  	spin_lock_init(&ei->lock);
@@@ -8645,7 -8592,7 +8646,7 @@@ void btrfs_destroy_inode(struct inode *
  			btrfs_put_ordered_extent(ordered);
  		}
  	}
 -	btrfs_qgroup_check_reserved_leak(inode);
 +	btrfs_qgroup_check_reserved_leak(BTRFS_I(inode));
  	inode_tree_del(inode);
  	btrfs_drop_extent_cache(BTRFS_I(inode), 0, (u64)-1, 0);
  	btrfs_inode_clear_file_extent_range(BTRFS_I(inode), 0, (u64)-1);
@@@ -9627,31 -9574,6 +9628,31 @@@ out_unlock
  	return err;
  }
  
 +static int insert_prealloc_file_extent(struct btrfs_trans_handle *trans,
 +				       struct inode *inode, struct btrfs_key *ins,
 +				       u64 file_offset)
 +{
 +	struct btrfs_file_extent_item stack_fi;
 +	u64 start = ins->objectid;
 +	u64 len = ins->offset;
 +	int ret;
 +
 +	memset(&stack_fi, 0, sizeof(stack_fi));
 +
 +	btrfs_set_stack_file_extent_type(&stack_fi, BTRFS_FILE_EXTENT_PREALLOC);
 +	btrfs_set_stack_file_extent_disk_bytenr(&stack_fi, start);
 +	btrfs_set_stack_file_extent_disk_num_bytes(&stack_fi, len);
 +	btrfs_set_stack_file_extent_num_bytes(&stack_fi, len);
 +	btrfs_set_stack_file_extent_ram_bytes(&stack_fi, len);
 +	btrfs_set_stack_file_extent_compression(&stack_fi, BTRFS_COMPRESS_NONE);
 +	/* Encryption and other encoding is reserved and all 0 */
 +
 +	ret = btrfs_qgroup_release_data(BTRFS_I(inode), file_offset, len);
 +	if (ret < 0)
 +		return ret;
 +	return insert_reserved_file_extent(trans, BTRFS_I(inode), file_offset,
 +					   &stack_fi, ret);
 +}
  static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
  				       u64 start, u64 num_bytes, u64 min_size,
  				       loff_t actual_len, u64 *alloc_hint,
@@@ -9710,7 -9632,11 +9711,7 @@@
  		btrfs_dec_block_group_reservations(fs_info, ins.objectid);
  
  		last_alloc = ins.offset;
 -		ret = insert_reserved_file_extent(trans, inode,
 -						  cur_offset, ins.objectid,
 -						  ins.offset, ins.offset,
 -						  ins.offset, 0, 0, 0,
 -						  BTRFS_FILE_EXTENT_PREALLOC);
 +		ret = insert_prealloc_file_extent(trans, inode, &ins, cur_offset);
  		if (ret) {
  			btrfs_free_reserved_extent(fs_info, ins.objectid,
  						   ins.offset, 0);
@@@ -9783,7 -9709,7 +9784,7 @@@ next
  			btrfs_end_transaction(trans);
  	}
  	if (clear_offset < end)
 -		btrfs_free_reserved_data_space(inode, NULL, clear_offset,
 +		btrfs_free_reserved_data_space(BTRFS_I(inode), NULL, clear_offset,
  			end - clear_offset + 1);
  	return ret;
  }
diff --combined include/linux/fs.h
index 2df72def1f59,9bf7a32f2932..407881ebeab1
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@@ -175,9 -175,6 +175,9 @@@ typedef int (dio_iodone_t)(struct kioc
  /* File does not contribute to nr_files count */
  #define FMODE_NOACCOUNT		((__force fmode_t)0x20000000)
  
 +/* File supports async buffered reads */
 +#define FMODE_BUF_RASYNC	((__force fmode_t)0x40000000)
 +
  /*
   * Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector
   * that indicates that they should check the contents of the iovec are
@@@ -318,9 -315,6 +318,9 @@@ enum rw_hint 
  #define IOCB_SYNC		(1 << 5)
  #define IOCB_WRITE		(1 << 6)
  #define IOCB_NOWAIT		(1 << 7)
 +/* iocb->ki_waitq is valid */
 +#define IOCB_WAITQ		(1 << 8)
 +#define IOCB_NOIO		(1 << 9)
  
  struct kiocb {
  	struct file		*ki_filp;
@@@ -334,10 -328,7 +334,10 @@@
  	int			ki_flags;
  	u16			ki_hint;
  	u16			ki_ioprio; /* See linux/ioprio.h */
 -	unsigned int		ki_cookie; /* for ->iopoll */
 +	union {
 +		unsigned int		ki_cookie; /* for ->iopoll */
 +		struct wait_page_queue	*ki_waitq; /* for async buffered IO */
 +	};
  
  	randomized_struct_fields_end
  };
@@@ -479,6 -470,45 +479,6 @@@ struct address_space 
  	 * must be enforced here for CRIS, to let the least significant bit
  	 * of struct page's "mapping" pointer be used for PAGE_MAPPING_ANON.
  	 */
 -struct request_queue;
 -
 -struct block_device {
 -	dev_t			bd_dev;  /* not a kdev_t - it's a search key */
 -	int			bd_openers;
 -	struct inode *		bd_inode;	/* will die */
 -	struct super_block *	bd_super;
 -	struct mutex		bd_mutex;	/* open/close mutex */
 -	void *			bd_claiming;
 -	void *			bd_holder;
 -	int			bd_holders;
 -	bool			bd_write_holder;
 -#ifdef CONFIG_SYSFS
 -	struct list_head	bd_holder_disks;
 -#endif
 -	struct block_device *	bd_contains;
 -	unsigned		bd_block_size;
 -	u8			bd_partno;
 -	struct hd_struct *	bd_part;
 -	/* number of times partitions within this device have been opened. */
 -	unsigned		bd_part_count;
 -	int			bd_invalidated;
 -	struct gendisk *	bd_disk;
 -	struct request_queue *  bd_queue;
 -	struct backing_dev_info *bd_bdi;
 -	struct list_head	bd_list;
 -	/*
 -	 * Private data.  You must have bd_claim'ed the block_device
 -	 * to use this.  NOTE:  bd_claim allows an owner to claim
 -	 * the same device multiple times, the owner must take special
 -	 * care to not mess up bd_private for that case.
 -	 */
 -	unsigned long		bd_private;
 -
 -	/* The counter of freeze processes */
 -	int			bd_fsfreeze_count;
 -	/* Mutex for freeze */
 -	struct mutex		bd_fsfreeze_mutex;
 -} __randomize_layout;
  
  /* XArray tags, for tagging dirty and writeback pages in the pagecache. */
  #define PAGECACHE_TAG_DIRTY	XA_MARK_0
@@@ -528,7 -558,7 +528,7 @@@ static inline int mapping_mapped(struc
  
  /*
   * Might pages of this file have been modified in userspace?
 - * Note that i_mmap_writable counts all VM_SHARED vmas: do_mmap_pgoff
 + * Note that i_mmap_writable counts all VM_SHARED vmas: do_mmap
   * marks vma as VM_SHARED if it is shared, and the file was opened for
   * writing i.e. vma may be mprotected writable even if now readonly.
   *
@@@ -877,6 -907,8 +877,6 @@@ static inline unsigned imajor(const str
  	return MAJOR(inode->i_rdev);
  }
  
 -extern struct block_device *I_BDEV(struct inode *inode);
 -
  struct fown_struct {
  	rwlock_t lock;          /* protects pid, uid, euid fields */
  	struct pid *pid;	/* pid or -pgrp where SIGIO should be sent */
@@@ -1348,7 -1380,6 +1348,7 @@@ extern int send_sigurg(struct fown_stru
  #define SB_NODIRATIME	2048	/* Do not update directory access times */
  #define SB_SILENT	32768
  #define SB_POSIXACL	(1<<16)	/* VFS does not apply the umask */
 +#define SB_INLINECRYPT	(1<<17)	/* Use blk-crypto for encrypted files */
  #define SB_KERNMOUNT	(1<<22) /* this is a kern_mount call */
  #define SB_I_VERSION	(1<<23) /* Update inode I_version field */
  #define SB_LAZYTIME	(1<<25) /* Update the on-disk [acm]times lazily */
@@@ -1712,10 -1743,6 +1712,10 @@@ int vfs_mkobj(struct dentry *, umode_t
  		int (*f)(struct dentry *, umode_t, void *),
  		void *);
  
 +int vfs_fchown(struct file *file, uid_t user, gid_t group);
 +int vfs_fchmod(struct file *file, umode_t mode);
 +int vfs_utimes(const struct path *path, struct timespec64 *times);
 +
  extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
  
  #ifdef CONFIG_COMPAT
@@@ -1747,6 -1774,14 +1747,6 @@@ struct dir_context 
  	loff_t pos;
  };
  
 -struct block_device_operations;
 -
 -/* These macros are for out of kernel modules to test that
 - * the kernel supports the unlocked_ioctl and compat_ioctl
 - * fields in struct file_operations. */
 -#define HAVE_COMPAT_IOCTL 1
 -#define HAVE_UNLOCKED_IOCTL 1
 -
  /*
   * These flags let !MMU mmap() govern direct device mapping vs immediate
   * copying more easily for MAP_PRIVATE, especially for ROM filesystems.
@@@ -1882,6 -1917,7 +1882,6 @@@ ssize_t rw_copy_check_uvector(int type
  			      struct iovec *fast_pointer,
  			      struct iovec **ret_pointer);
  
 -extern ssize_t __vfs_read(struct file *, char __user *, size_t, loff_t *);
  extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
  extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *);
  extern ssize_t vfs_readv(struct file *, const struct iovec __user *,
@@@ -1946,27 -1982,27 +1946,27 @@@ struct super_operations 
  /*
   * Inode flags - they have no relation to superblock flags now
   */
- #define S_SYNC		1	/* Writes are synced at once */
- #define S_NOATIME	2	/* Do not update access times */
- #define S_APPEND	4	/* Append-only file */
- #define S_IMMUTABLE	8	/* Immutable file */
- #define S_DEAD		16	/* removed, but still open directory */
- #define S_NOQUOTA	32	/* Inode is not counted to quota */
- #define S_DIRSYNC	64	/* Directory modifications are synchronous */
- #define S_NOCMTIME	128	/* Do not update file c/mtime */
- #define S_SWAPFILE	256	/* Do not truncate: swapon got its bmaps */
- #define S_PRIVATE	512	/* Inode is fs-internal */
- #define S_IMA		1024	/* Inode has an associated IMA struct */
- #define S_AUTOMOUNT	2048	/* Automount/referral quasi-directory */
- #define S_NOSEC		4096	/* no suid or xattr security attributes */
+ #define S_SYNC		(1 << 0)  /* Writes are synced at once */
+ #define S_NOATIME	(1 << 1)  /* Do not update access times */
+ #define S_APPEND	(1 << 2)  /* Append-only file */
+ #define S_IMMUTABLE	(1 << 3)  /* Immutable file */
+ #define S_DEAD		(1 << 4)  /* removed, but still open directory */
+ #define S_NOQUOTA	(1 << 5)  /* Inode is not counted to quota */
+ #define S_DIRSYNC	(1 << 6)  /* Directory modifications are synchronous */
+ #define S_NOCMTIME	(1 << 7)  /* Do not update file c/mtime */
+ #define S_SWAPFILE	(1 << 8)  /* Do not truncate: swapon got its bmaps */
+ #define S_PRIVATE	(1 << 9)  /* Inode is fs-internal */
+ #define S_IMA		(1 << 10) /* Inode has an associated IMA struct */
+ #define S_AUTOMOUNT	(1 << 11) /* Automount/referral quasi-directory */
+ #define S_NOSEC		(1 << 12) /* no suid or xattr security attributes */
  #ifdef CONFIG_FS_DAX
- #define S_DAX		8192	/* Direct Access, avoiding the page cache */
+ #define S_DAX		(1 << 13) /* Direct Access, avoiding the page cache */
  #else
- #define S_DAX		0	/* Make all the DAX code disappear */
+ #define S_DAX		0	  /* Make all the DAX code disappear */
  #endif
- #define S_ENCRYPTED	16384	/* Encrypted file (using fs/crypto/) */
- #define S_CASEFOLD	32768	/* Casefolded file */
- #define S_VERITY	65536	/* Verity file (using fs/verity/) */
+ #define S_ENCRYPTED	(1 << 14) /* Encrypted file (using fs/crypto/) */
+ #define S_CASEFOLD	(1 << 15) /* Casefolded file */
+ #define S_VERITY	(1 << 16) /* Verity file (using fs/verity/) */
  
  /*
   * Note that nosuid etc flags are inode-specific: setting some file-system
@@@ -2228,9 -2264,18 +2228,9 @@@ struct file_system_type 
  
  #define MODULE_ALIAS_FS(NAME) MODULE_ALIAS("fs-" NAME)
  
 -#ifdef CONFIG_BLOCK
  extern struct dentry *mount_bdev(struct file_system_type *fs_type,
  	int flags, const char *dev_name, void *data,
  	int (*fill_super)(struct super_block *, void *, int));
 -#else
 -static inline struct dentry *mount_bdev(struct file_system_type *fs_type,
 -	int flags, const char *dev_name, void *data,
 -	int (*fill_super)(struct super_block *, void *, int))
 -{
 -	return ERR_PTR(-ENODEV);
 -}
 -#endif
  extern struct dentry *mount_single(struct file_system_type *fs_type,
  	int flags, void *data,
  	int (*fill_super)(struct super_block *, void *, int));
@@@ -2239,7 -2284,14 +2239,7 @@@ extern struct dentry *mount_nodev(struc
  	int (*fill_super)(struct super_block *, void *, int));
  extern struct dentry *mount_subtree(struct vfsmount *mnt, const char *path);
  void generic_shutdown_super(struct super_block *sb);
 -#ifdef CONFIG_BLOCK
  void kill_block_super(struct super_block *sb);
 -#else
 -static inline void kill_block_super(struct super_block *sb)
 -{
 -	BUG();
 -}
 -#endif
  void kill_anon_super(struct super_block *sb);
  void kill_litter_super(struct super_block *sb);
  void deactivate_super(struct super_block *sb);
@@@ -2529,16 -2581,95 +2529,16 @@@ extern struct kmem_cache *names_cachep
  #define __getname()		kmem_cache_alloc(names_cachep, GFP_KERNEL)
  #define __putname(name)		kmem_cache_free(names_cachep, (void *)(name))
  
 -#ifdef CONFIG_BLOCK
 -extern int register_blkdev(unsigned int, const char *);
 -extern void unregister_blkdev(unsigned int, const char *);
 -extern struct block_device *bdget(dev_t);
 -extern struct block_device *bdgrab(struct block_device *bdev);
 -extern void bd_set_size(struct block_device *, loff_t size);
 -extern void bd_forget(struct inode *inode);
 -extern void bdput(struct block_device *);
 -extern void invalidate_bdev(struct block_device *);
 -extern void iterate_bdevs(void (*)(struct block_device *, void *), void *);
 -extern int sync_blockdev(struct block_device *bdev);
 -extern void kill_bdev(struct block_device *);
 -extern struct super_block *freeze_bdev(struct block_device *);
 -extern void emergency_thaw_all(void);
 -extern void emergency_thaw_bdev(struct super_block *sb);
 -extern int thaw_bdev(struct block_device *bdev, struct super_block *sb);
 -extern int fsync_bdev(struct block_device *);
 -
  extern struct super_block *blockdev_superblock;
 -
  static inline bool sb_is_blkdev_sb(struct super_block *sb)
  {
 -	return sb == blockdev_superblock;
 -}
 -#else
 -static inline void bd_forget(struct inode *inode) {}
 -static inline int sync_blockdev(struct block_device *bdev) { return 0; }
 -static inline void kill_bdev(struct block_device *bdev) {}
 -static inline void invalidate_bdev(struct block_device *bdev) {}
 -
 -static inline struct super_block *freeze_bdev(struct block_device *sb)
 -{
 -	return NULL;
 -}
 -
 -static inline int thaw_bdev(struct block_device *bdev, struct super_block *sb)
 -{
 -	return 0;
 -}
 -
 -static inline int emergency_thaw_bdev(struct super_block *sb)
 -{
 -	return 0;
 +	return IS_ENABLED(CONFIG_BLOCK) && sb == blockdev_superblock;
  }
  
 -static inline void iterate_bdevs(void (*f)(struct block_device *, void *), void *arg)
 -{
 -}
 -
 -static inline bool sb_is_blkdev_sb(struct super_block *sb)
 -{
 -	return false;
 -}
 -#endif
 +void emergency_thaw_all(void);
  extern int sync_filesystem(struct super_block *);
  extern const struct file_operations def_blk_fops;
  extern const struct file_operations def_chr_fops;
 -#ifdef CONFIG_BLOCK
 -extern int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long);
 -extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long);
 -extern int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder);
 -extern struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
 -					       void *holder);
 -extern struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode,
 -					      void *holder);
 -extern struct block_device *bd_start_claiming(struct block_device *bdev,
 -					      void *holder);
 -extern void bd_finish_claiming(struct block_device *bdev,
 -			       struct block_device *whole, void *holder);
 -extern void bd_abort_claiming(struct block_device *bdev,
 -			      struct block_device *whole, void *holder);
 -extern void blkdev_put(struct block_device *bdev, fmode_t mode);
 -
 -#ifdef CONFIG_SYSFS
 -extern int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk);
 -extern void bd_unlink_disk_holder(struct block_device *bdev,
 -				  struct gendisk *disk);
 -#else
 -static inline int bd_link_disk_holder(struct block_device *bdev,
 -				      struct gendisk *disk)
 -{
 -	return 0;
 -}
 -static inline void bd_unlink_disk_holder(struct block_device *bdev,
 -					 struct gendisk *disk)
 -{
 -}
 -#endif
 -#endif
  
  /* fs/char_dev.c */
  #define CHRDEV_MAJOR_MAX 512
@@@ -2569,12 -2700,31 +2569,12 @@@ static inline void unregister_chrdev(un
  	__unregister_chrdev(major, 0, 256, name);
  }
  
 -/* fs/block_dev.c */
 -#define BDEVNAME_SIZE	32	/* Largest string for a blockdev identifier */
 -#define BDEVT_SIZE	10	/* Largest string for MAJ:MIN for blkdev */
 -
 -#ifdef CONFIG_BLOCK
 -#define BLKDEV_MAJOR_MAX	512
 -extern const char *bdevname(struct block_device *bdev, char *buffer);
 -extern struct block_device *lookup_bdev(const char *);
 -extern void blkdev_show(struct seq_file *,off_t);
 -
 -#else
 -#define BLKDEV_MAJOR_MAX	0
 -#endif
 -
  extern void init_special_inode(struct inode *, umode_t, dev_t);
  
  /* Invalid inode operations -- fs/bad_inode.c */
  extern void make_bad_inode(struct inode *);
  extern bool is_bad_inode(struct inode *);
  
 -#ifdef CONFIG_BLOCK
 -extern int revalidate_disk(struct gendisk *);
 -extern int check_disk_change(struct block_device *);
 -extern int __invalidate_device(struct block_device *, bool);
 -#endif
  unsigned long invalidate_mapping_pages(struct address_space *mapping,
  					pgoff_t start, pgoff_t end);
  
@@@ -2679,7 -2829,7 +2679,7 @@@ static inline errseq_t filemap_sample_w
  
  /**
   * file_sample_sb_err - sample the current errseq_t to test for later errors
 - * @mapping: mapping to be sampled
 + * @file: file pointer to be sampled
   *
   * Grab the most current superblock-level errseq_t value for the given
   * struct file.
@@@ -2885,7 -3035,6 +2885,7 @@@ extern int kernel_read_file_from_path_i
  extern int kernel_read_file_from_fd(int, void **, loff_t *, loff_t,
  				    enum kernel_read_file_id);
  extern ssize_t kernel_read(struct file *, void *, size_t, loff_t *);
 +ssize_t __kernel_read(struct file *file, void *buf, size_t count, loff_t *pos);
  extern ssize_t kernel_write(struct file *, const void *, size_t, loff_t *);
  extern ssize_t __kernel_write(struct file *, const void *, size_t, loff_t *);
  extern struct file * open_exec(const char *);
@@@ -2950,21 -3099,6 +2950,21 @@@ extern void discard_new_inode(struct in
  extern unsigned int get_next_ino(void);
  extern void evict_inodes(struct super_block *sb);
  
 +/*
 + * Userspace may rely on the the inode number being non-zero. For example, glibc
 + * simply ignores files with zero i_ino in unlink() and other places.
 + *
 + * As an additional complication, if userspace was compiled with
 + * _FILE_OFFSET_BITS=32 on a 64-bit kernel we'll only end up reading out the
 + * lower 32 bits, so we need to check that those aren't zero explicitly. With
 + * _FILE_OFFSET_BITS=64, this may cause some harmless false-negatives, but
 + * better safe than sorry.
 + */
 +static inline bool is_zero_ino(ino_t ino)
 +{
 +	return (u32)ino == 0;
 +}
 +
  extern void __iget(struct inode * inode);
  extern void iget_failed(struct inode *);
  extern void clear_inode(struct inode *);
@@@ -2990,6 -3124,10 +2990,6 @@@ static inline void remove_inode_hash(st
  
  extern void inode_sb_list_add(struct inode *inode);
  
 -#ifdef CONFIG_BLOCK
 -extern int bdev_read_only(struct block_device *);
 -#endif
 -extern int set_blocksize(struct block_device *, int);
  extern int sb_set_blocksize(struct super_block *, int);
  extern int sb_min_blocksize(struct super_block *, int);
  
@@@ -3302,28 -3440,22 +3302,28 @@@ static inline int iocb_flags(struct fil
  
  static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags)
  {
 +	int kiocb_flags = 0;
 +
 +	if (!flags)
 +		return 0;
  	if (unlikely(flags & ~RWF_SUPPORTED))
  		return -EOPNOTSUPP;
  
  	if (flags & RWF_NOWAIT) {
  		if (!(ki->ki_filp->f_mode & FMODE_NOWAIT))
  			return -EOPNOTSUPP;
 -		ki->ki_flags |= IOCB_NOWAIT;
 +		kiocb_flags |= IOCB_NOWAIT;
  	}
  	if (flags & RWF_HIPRI)
 -		ki->ki_flags |= IOCB_HIPRI;
 +		kiocb_flags |= IOCB_HIPRI;
  	if (flags & RWF_DSYNC)
 -		ki->ki_flags |= IOCB_DSYNC;
 +		kiocb_flags |= IOCB_DSYNC;
  	if (flags & RWF_SYNC)
 -		ki->ki_flags |= (IOCB_DSYNC | IOCB_SYNC);
 +		kiocb_flags |= (IOCB_DSYNC | IOCB_SYNC);
  	if (flags & RWF_APPEND)
 -		ki->ki_flags |= IOCB_APPEND;
 +		kiocb_flags |= IOCB_APPEND;
 +
 +	ki->ki_flags |= kiocb_flags;
  	return 0;
  }
  
diff --combined include/linux/skbuff.h
index 3ad65d4ce085,1530e81a6cce..46881d902124
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@@ -238,6 -238,7 +238,7 @@@
  			 SKB_DATA_ALIGN(sizeof(struct sk_buff)) +	\
  			 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
  
+ struct ahash_request;
  struct net_device;
  struct scatterlist;
  struct pipe_inode_info;
@@@ -283,7 -284,6 +284,7 @@@ struct nf_bridge_info 
   */
  struct tc_skb_ext {
  	__u32 chain;
 +	__u16 mru;
  };
  #endif
  
@@@ -1329,7 -1329,7 +1330,7 @@@ void skb_flow_dissect_meta(const struc
  			   void *target_container);
  
  /* Gets a skb connection tracking info, ctinfo map should be a
 - * a map of mapsize to translate enum ip_conntrack_info states
 + * map of mapsize to translate enum ip_conntrack_info states
   * to user states.
   */
  void
@@@ -1343,10 -1343,6 +1344,10 @@@ skb_flow_dissect_tunnel_info(const stru
  			     struct flow_dissector *flow_dissector,
  			     void *target_container);
  
 +void skb_flow_dissect_hash(const struct sk_buff *skb,
 +			   struct flow_dissector *flow_dissector,
 +			   void *target_container);
 +
  static inline __u32 skb_get_hash(struct sk_buff *skb)
  {
  	if (!skb->l4_hash && !skb->sw_hash)
@@@ -3817,7 -3813,7 +3818,7 @@@ static inline bool skb_defer_rx_timesta
   * must call this function to return the skb back to the stack with a
   * timestamp.
   *
 - * @skb: clone of the the original outgoing packet
 + * @skb: clone of the original outgoing packet
   * @hwtstamps: hardware time stamps
   *
   */