From: Linus Torvalds Date: Sat, 8 Aug 2020 04:14:30 +0000 (-0700) Subject: Merge branch 'work.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs X-Git-Tag: v5.9-rc1~83 X-Git-Url: https://repo.jachan.dev/linux.git/commitdiff_plain/b79675e15a754ca51b9fc631e0961ccdd4ec3fc7?hp=-c Merge branch 'work.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs Pull misc vfs updates from Al Viro: "No common topic whatsoever in those, sorry" * 'work.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: fs: define inode flags using bit numbers iov_iter: Move unnecessary inclusion of crypto/hash.h dlmfs: clean up dlmfs_file_{read,write}() a bit --- b79675e15a754ca51b9fc631e0961ccdd4ec3fc7 diff --combined drivers/misc/uacce/uacce.c index aa91f69a5fa9,e45bfd409cc5..a5b8dab80c76 --- a/drivers/misc/uacce/uacce.c +++ b/drivers/misc/uacce/uacce.c @@@ -4,6 -4,7 +4,7 @@@ #include #include #include + #include #include static struct class *uacce_class; @@@ -179,6 -180,14 +180,6 @@@ static int uacce_fops_release(struct in return 0; } -static vm_fault_t uacce_vma_fault(struct vm_fault *vmf) -{ - if (vmf->flags & (FAULT_FLAG_MKWRITE | FAULT_FLAG_WRITE)) - return VM_FAULT_SIGBUS; - - return 0; -} - static void uacce_vma_close(struct vm_area_struct *vma) { struct uacce_queue *q = vma->vm_private_data; @@@ -191,6 -200,7 +192,6 @@@ } static const struct vm_operations_struct uacce_vm_ops = { - .fault = uacce_vma_fault, .close = uacce_vma_close, }; diff --combined drivers/soc/qcom/pdr_interface.c index 4c9225f15c4e,a90d707da689..088dc99f77f3 --- a/drivers/soc/qcom/pdr_interface.c +++ b/drivers/soc/qcom/pdr_interface.c @@@ -5,6 -5,7 +5,7 @@@ #include #include + #include #include #include @@@ -278,15 -279,13 +279,15 @@@ static void pdr_indack_work(struct work list_for_each_entry_safe(ind, tmp, &pdr->indack_list, node) { pds = ind->pds; - pdr_send_indack_msg(pdr, pds, ind->transaction_id); mutex_lock(&pdr->status_lock); pds->state = ind->curr_state; pdr->status(pds->state, pds->service_path, pdr->priv); mutex_unlock(&pdr->status_lock); + /* Ack the indication after clients release the PD resources */ + pdr_send_indack_msg(pdr, pds, ind->transaction_id); + mutex_lock(&pdr->list_lock); list_del(&ind->node); mutex_unlock(&pdr->list_lock); diff --combined fs/btrfs/inode.c index 611b3412fbfd,d901d53e4f03..6dc03bab0c9d --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@@ -3,6 -3,7 +3,7 @@@ * Copyright (C) 2007 Oracle. All rights reserved. */ + #include #include #include #include @@@ -80,17 -81,17 +81,17 @@@ struct kmem_cache *btrfs_free_space_bit static int btrfs_setsize(struct inode *inode, struct iattr *attr); static int btrfs_truncate(struct inode *inode, bool skip_writeback); static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent); -static noinline int cow_file_range(struct inode *inode, +static noinline int cow_file_range(struct btrfs_inode *inode, struct page *locked_page, u64 start, u64 end, int *page_started, unsigned long *nr_written, int unlock); -static struct extent_map *create_io_em(struct inode *inode, u64 start, u64 len, - u64 orig_start, u64 block_start, +static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start, + u64 len, u64 orig_start, u64 block_start, u64 block_len, u64 orig_block_len, u64 ram_bytes, int compress_type, int type); -static void __endio_write_update_ordered(struct inode *inode, +static void __endio_write_update_ordered(struct btrfs_inode *inode, const u64 offset, const u64 bytes, const bool uptodate); @@@ -104,7 -105,7 +105,7 @@@ * to be released, which we want to happen only when finishing the ordered * extent (btrfs_finish_ordered_io()). */ -static inline void btrfs_cleanup_ordered_extents(struct inode *inode, +static inline void btrfs_cleanup_ordered_extents(struct btrfs_inode *inode, struct page *locked_page, u64 offset, u64 bytes) { @@@ -116,7 -117,7 +117,7 @@@ struct page *page; while (index <= end_index) { - page = find_get_page(inode->i_mapping, index); + page = find_get_page(inode->vfs_inode.i_mapping, index); index++; if (!page) continue; @@@ -274,15 -275,15 +275,15 @@@ fail * does the checks required to make sure the data is small enough * to fit as an inline extent. */ -static noinline int cow_file_range_inline(struct inode *inode, u64 start, +static noinline int cow_file_range_inline(struct btrfs_inode *inode, u64 start, u64 end, size_t compressed_size, int compress_type, struct page **compressed_pages) { - struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_root *root = inode->root; struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_trans_handle *trans; - u64 isize = i_size_read(inode); + u64 isize = i_size_read(&inode->vfs_inode); u64 actual_end = min(end + 1, isize); u64 inline_len = actual_end - start; u64 aligned_end = ALIGN(end, fs_info->sectorsize); @@@ -314,7 -315,7 +315,7 @@@ btrfs_free_path(path); return PTR_ERR(trans); } - trans->block_rsv = &BTRFS_I(inode)->block_rsv; + trans->block_rsv = &inode->block_rsv; if (compressed_size && compressed_pages) extent_item_size = btrfs_file_extent_calc_inline_size( @@@ -323,9 -324,9 +324,9 @@@ extent_item_size = btrfs_file_extent_calc_inline_size( inline_len); - ret = __btrfs_drop_extents(trans, root, inode, path, - start, aligned_end, NULL, - 1, 1, extent_item_size, &extent_inserted); + ret = __btrfs_drop_extents(trans, root, inode, path, start, aligned_end, + NULL, 1, 1, extent_item_size, + &extent_inserted); if (ret) { btrfs_abort_transaction(trans, ret); goto out; @@@ -334,7 -335,7 +335,7 @@@ if (isize > actual_end) inline_len = min_t(u64, isize, actual_end); ret = insert_inline_extent(trans, path, extent_inserted, - root, inode, start, + root, &inode->vfs_inode, start, inline_len, compressed_size, compress_type, compressed_pages); if (ret && ret != -ENOSPC) { @@@ -345,8 -346,8 +346,8 @@@ goto out; } - set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags); - btrfs_drop_extent_cache(BTRFS_I(inode), start, aligned_end - 1, 0); + set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags); + btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0); out: /* * Don't forget to free the reserved space, as for inlined extent @@@ -412,10 -413,10 +413,10 @@@ static noinline int add_async_extent(st /* * Check if the inode has flags compatible with compression */ -static inline bool inode_can_compress(struct inode *inode) +static inline bool inode_can_compress(struct btrfs_inode *inode) { - if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW || - BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) + if (inode->flags & BTRFS_INODE_NODATACOW || + inode->flags & BTRFS_INODE_NODATASUM) return false; return true; } @@@ -424,30 -425,29 +425,30 @@@ * Check if the inode needs to be submitted to compression, based on mount * options, defragmentation, properties or heuristics. */ -static inline int inode_need_compress(struct inode *inode, u64 start, u64 end) +static inline int inode_need_compress(struct btrfs_inode *inode, u64 start, + u64 end) { - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); + struct btrfs_fs_info *fs_info = inode->root->fs_info; if (!inode_can_compress(inode)) { WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG), KERN_ERR "BTRFS: unexpected compression for ino %llu\n", - btrfs_ino(BTRFS_I(inode))); + btrfs_ino(inode)); return 0; } /* force compress */ if (btrfs_test_opt(fs_info, FORCE_COMPRESS)) return 1; /* defrag ioctl */ - if (BTRFS_I(inode)->defrag_compress) + if (inode->defrag_compress) return 1; /* bad compression ratios */ - if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS) + if (inode->flags & BTRFS_INODE_NOCOMPRESS) return 0; if (btrfs_test_opt(fs_info, COMPRESS) || - BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS || - BTRFS_I(inode)->prop_compress) - return btrfs_compress_heuristic(inode, start, end); + inode->flags & BTRFS_INODE_COMPRESS || + inode->prop_compress) + return btrfs_compress_heuristic(&inode->vfs_inode, start, end); return 0; } @@@ -553,7 -553,7 +554,7 @@@ again * inode has not been flagged as nocompress. This flag can * change at any time if we discover bad compression ratios. */ - if (inode_need_compress(inode, start, end)) { + if (inode_need_compress(BTRFS_I(inode), start, end)) { WARN_ON(pages); pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS); if (!pages) { @@@ -617,12 -617,11 +618,12 @@@ cont /* we didn't compress the entire range, try * to make an uncompressed inline extent. */ - ret = cow_file_range_inline(inode, start, end, 0, - BTRFS_COMPRESS_NONE, NULL); + ret = cow_file_range_inline(BTRFS_I(inode), start, end, + 0, BTRFS_COMPRESS_NONE, + NULL); } else { /* try making a compressed inline extent */ - ret = cow_file_range_inline(inode, start, end, + ret = cow_file_range_inline(BTRFS_I(inode), start, end, total_compressed, compress_type, pages); } @@@ -644,8 -643,7 +645,8 @@@ * our outstanding extent for clearing delalloc for this * range. */ - extent_clear_unlock_delalloc(inode, start, end, NULL, + extent_clear_unlock_delalloc(BTRFS_I(inode), start, end, + NULL, clear_flags, PAGE_UNLOCK | PAGE_CLEAR_DIRTY | @@@ -765,14 -763,14 +766,14 @@@ static void free_async_extent_pages(str */ static noinline void submit_compressed_extents(struct async_chunk *async_chunk) { - struct inode *inode = async_chunk->inode; - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); + struct btrfs_inode *inode = BTRFS_I(async_chunk->inode); + struct btrfs_fs_info *fs_info = inode->root->fs_info; struct async_extent *async_extent; u64 alloc_hint = 0; struct btrfs_key ins; struct extent_map *em; - struct btrfs_root *root = BTRFS_I(inode)->root; - struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; + struct btrfs_root *root = inode->root; + struct extent_io_tree *io_tree = &inode->io_tree; int ret = 0; again: @@@ -805,7 -803,7 +806,7 @@@ retry * all those pages down to the drive. */ if (!page_started && !ret) - extent_write_locked_range(inode, + extent_write_locked_range(&inode->vfs_inode, async_extent->start, async_extent->start + async_extent->ram_size - 1, @@@ -835,7 -833,7 +836,7 @@@ * will not submit these pages down to lower * layers. */ - extent_range_redirty_for_io(inode, + extent_range_redirty_for_io(&inode->vfs_inode, async_extent->start, async_extent->start + async_extent->ram_size - 1); @@@ -870,7 -868,8 +871,7 @@@ BTRFS_ORDERED_COMPRESSED, async_extent->compress_type); if (ret) { - btrfs_drop_extent_cache(BTRFS_I(inode), - async_extent->start, + btrfs_drop_extent_cache(inode, async_extent->start, async_extent->start + async_extent->ram_size - 1, 0); goto out_free_reserve; @@@ -886,7 -885,8 +887,7 @@@ NULL, EXTENT_LOCKED | EXTENT_DELALLOC, PAGE_UNLOCK | PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK); - if (btrfs_submit_compressed_write(inode, - async_extent->start, + if (btrfs_submit_compressed_write(inode, async_extent->start, async_extent->ram_size, ins.objectid, ins.offset, async_extent->pages, @@@ -897,11 -897,12 +898,11 @@@ const u64 start = async_extent->start; const u64 end = start + async_extent->ram_size - 1; - p->mapping = inode->i_mapping; + p->mapping = inode->vfs_inode.i_mapping; btrfs_writepage_endio_finish_ordered(p, start, end, 0); p->mapping = NULL; - extent_clear_unlock_delalloc(inode, start, end, - NULL, 0, + extent_clear_unlock_delalloc(inode, start, end, NULL, 0, PAGE_END_WRITEBACK | PAGE_SET_ERROR); free_async_extent_pages(async_extent); @@@ -929,10 -930,10 +930,10 @@@ out_free goto again; } -static u64 get_extent_allocation_hint(struct inode *inode, u64 start, +static u64 get_extent_allocation_hint(struct btrfs_inode *inode, u64 start, u64 num_bytes) { - struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct extent_map_tree *em_tree = &inode->extent_tree; struct extent_map *em; u64 alloc_hint = 0; @@@ -974,18 -975,17 +975,18 @@@ * required to start IO on it. It may be clean and already done with * IO when we return. */ -static noinline int cow_file_range(struct inode *inode, +static noinline int cow_file_range(struct btrfs_inode *inode, struct page *locked_page, u64 start, u64 end, int *page_started, unsigned long *nr_written, int unlock) { - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); - struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_root *root = inode->root; + struct btrfs_fs_info *fs_info = root->fs_info; u64 alloc_hint = 0; u64 num_bytes; unsigned long ram_size; u64 cur_alloc_size = 0; + u64 min_alloc_size; u64 blocksize = fs_info->sectorsize; struct btrfs_key ins; struct extent_map *em; @@@ -994,7 -994,7 +995,7 @@@ bool extent_reserved = false; int ret = 0; - if (btrfs_is_free_space_inode(BTRFS_I(inode))) { + if (btrfs_is_free_space_inode(inode)) { WARN_ON_ONCE(1); ret = -EINVAL; goto out_unlock; @@@ -1004,7 -1004,7 +1005,7 @@@ num_bytes = max(blocksize, num_bytes); ASSERT(num_bytes <= btrfs_super_total_bytes(fs_info->super_copy)); - inode_should_defrag(BTRFS_I(inode), start, end, num_bytes, SZ_64K); + inode_should_defrag(inode, start, end, num_bytes, SZ_64K); if (start == 0) { /* lets try to make an inline extent */ @@@ -1033,28 -1033,13 +1034,28 @@@ } alloc_hint = get_extent_allocation_hint(inode, start, num_bytes); - btrfs_drop_extent_cache(BTRFS_I(inode), start, - start + num_bytes - 1, 0); + btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); + + /* + * Relocation relies on the relocated extents to have exactly the same + * size as the original extents. Normally writeback for relocation data + * extents follows a NOCOW path because relocation preallocates the + * extents. However, due to an operation such as scrub turning a block + * group to RO mode, it may fallback to COW mode, so we must make sure + * an extent allocated during COW has exactly the requested size and can + * not be split into smaller extents, otherwise relocation breaks and + * fails during the stage where it updates the bytenr of file extent + * items. + */ + if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID) + min_alloc_size = num_bytes; + else + min_alloc_size = fs_info->sectorsize; while (num_bytes > 0) { cur_alloc_size = num_bytes; ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size, - fs_info->sectorsize, 0, alloc_hint, + min_alloc_size, 0, alloc_hint, &ins, 1, 1); if (ret < 0) goto out_unlock; @@@ -1097,7 -1082,7 +1098,7 @@@ * skip current ordered extent. */ if (ret) - btrfs_drop_extent_cache(BTRFS_I(inode), start, + btrfs_drop_extent_cache(inode, start, start + ram_size - 1, 0); } @@@ -1113,7 -1098,8 +1114,7 @@@ page_ops = unlock ? PAGE_UNLOCK : 0; page_ops |= PAGE_SET_PRIVATE2; - extent_clear_unlock_delalloc(inode, start, - start + ram_size - 1, + extent_clear_unlock_delalloc(inode, start, start + ram_size - 1, locked_page, EXTENT_LOCKED | EXTENT_DELALLOC, page_ops); @@@ -1137,7 -1123,7 +1138,7 @@@ out return ret; out_drop_extent_cache: - btrfs_drop_extent_cache(BTRFS_I(inode), start, start + ram_size - 1, 0); + btrfs_drop_extent_cache(inode, start, start + ram_size - 1, 0); out_reserve: btrfs_dec_block_group_reservations(fs_info, ins.objectid); btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1); @@@ -1234,13 -1220,13 +1235,13 @@@ static noinline void async_cow_free(str kvfree(async_chunk->pending); } -static int cow_file_range_async(struct inode *inode, +static int cow_file_range_async(struct btrfs_inode *inode, struct writeback_control *wbc, struct page *locked_page, u64 start, u64 end, int *page_started, unsigned long *nr_written) { - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); + struct btrfs_fs_info *fs_info = inode->root->fs_info; struct cgroup_subsys_state *blkcg_css = wbc_blkcg_css(wbc); struct async_cow *ctx; struct async_chunk *async_chunk; @@@ -1252,9 -1238,9 +1253,9 @@@ unsigned nofs_flag; const unsigned int write_flags = wbc_to_write_flags(wbc); - unlock_extent(&BTRFS_I(inode)->io_tree, start, end); + unlock_extent(&inode->io_tree, start, end); - if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS && + if (inode->flags & BTRFS_INODE_NOCOMPRESS && !btrfs_test_opt(fs_info, FORCE_COMPRESS)) { num_chunks = 1; should_compress = false; @@@ -1292,9 -1278,9 +1293,9 @@@ * igrab is called higher up in the call chain, take only the * lightweight reference for the callback lifetime */ - ihold(inode); + ihold(&inode->vfs_inode); async_chunk[i].pending = &ctx->num_chunks; - async_chunk[i].inode = inode; + async_chunk[i].inode = &inode->vfs_inode; async_chunk[i].start = start; async_chunk[i].end = cur_end; async_chunk[i].write_flags = write_flags; @@@ -1371,15 -1357,13 +1372,15 @@@ static noinline int csum_exist_in_range return 1; } -static int fallback_to_cow(struct inode *inode, struct page *locked_page, +static int fallback_to_cow(struct btrfs_inode *inode, struct page *locked_page, const u64 start, const u64 end, int *page_started, unsigned long *nr_written) { - const bool is_space_ino = btrfs_is_free_space_inode(BTRFS_I(inode)); + const bool is_space_ino = btrfs_is_free_space_inode(inode); + const bool is_reloc_ino = (inode->root->root_key.objectid == + BTRFS_DATA_RELOC_TREE_OBJECTID); const u64 range_bytes = end + 1 - start; - struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; + struct extent_io_tree *io_tree = &inode->io_tree; u64 range_start = start; u64 count; @@@ -1408,23 -1392,18 +1409,23 @@@ * data space info, which we incremented in the step above. * * If we need to fallback to cow and the inode corresponds to a free - * space cache inode, we must also increment bytes_may_use of the data - * space_info for the same reason. Space caches always get a prealloc + * space cache inode or an inode of the data relocation tree, we must + * also increment bytes_may_use of the data space_info for the same + * reason. Space caches and relocated data extents always get a prealloc * extent for them, however scrub or balance may have set the block - * group that contains that extent to RO mode. + * group that contains that extent to RO mode and therefore force COW + * when starting writeback. */ count = count_range_bits(io_tree, &range_start, end, range_bytes, EXTENT_NORESERVE, 0); - if (count > 0 || is_space_ino) { - const u64 bytes = is_space_ino ? range_bytes : count; - struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; + if (count > 0 || is_space_ino || is_reloc_ino) { + u64 bytes = count; + struct btrfs_fs_info *fs_info = inode->root->fs_info; struct btrfs_space_info *sinfo = fs_info->data_sinfo; + if (is_space_ino || is_reloc_ino) + bytes = range_bytes; + spin_lock(&sinfo->lock); btrfs_space_info_update_bytes_may_use(fs_info, sinfo, bytes); spin_unlock(&sinfo->lock); @@@ -1445,21 -1424,21 +1446,21 @@@ * If no cow copies or snapshots exist, we write directly to the existing * blocks on disk */ -static noinline int run_delalloc_nocow(struct inode *inode, +static noinline int run_delalloc_nocow(struct btrfs_inode *inode, struct page *locked_page, const u64 start, const u64 end, int *page_started, int force, unsigned long *nr_written) { - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); - struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_fs_info *fs_info = inode->root->fs_info; + struct btrfs_root *root = inode->root; struct btrfs_path *path; u64 cow_start = (u64)-1; u64 cur_offset = start; int ret; bool check_prev = true; - const bool freespace_inode = btrfs_is_free_space_inode(BTRFS_I(inode)); - u64 ino = btrfs_ino(BTRFS_I(inode)); + const bool freespace_inode = btrfs_is_free_space_inode(inode); + u64 ino = btrfs_ino(inode); bool nocow = false; u64 disk_bytenr = 0; @@@ -1685,11 -1664,15 +1686,11 @@@ out_check * NOCOW, following one which needs to be COW'ed */ if (cow_start != (u64)-1) { - ret = fallback_to_cow(inode, locked_page, cow_start, - found_key.offset - 1, + ret = fallback_to_cow(inode, locked_page, + cow_start, found_key.offset - 1, page_started, nr_written); - if (ret) { - if (nocow) - btrfs_dec_nocow_writers(fs_info, - disk_bytenr); + if (ret) goto error; - } cow_start = (u64)-1; } @@@ -1705,6 -1688,9 +1706,6 @@@ ram_bytes, BTRFS_COMPRESS_NONE, BTRFS_ORDERED_PREALLOC); if (IS_ERR(em)) { - if (nocow) - btrfs_dec_nocow_writers(fs_info, - disk_bytenr); ret = PTR_ERR(em); goto error; } @@@ -1714,7 -1700,8 +1715,7 @@@ num_bytes, BTRFS_ORDERED_PREALLOC); if (ret) { - btrfs_drop_extent_cache(BTRFS_I(inode), - cur_offset, + btrfs_drop_extent_cache(inode, cur_offset, cur_offset + num_bytes - 1, 0); goto error; @@@ -1790,11 -1777,11 +1791,11 @@@ error return ret; } -static inline int need_force_cow(struct inode *inode, u64 start, u64 end) +static inline int need_force_cow(struct btrfs_inode *inode, u64 start, u64 end) { - if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) && - !(BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC)) + if (!(inode->flags & BTRFS_INODE_NODATACOW) && + !(inode->flags & BTRFS_INODE_PREALLOC)) return 0; /* @@@ -1802,8 -1789,9 +1803,8 @@@ * if is not zero, it means the file is defragging. * Force cow if given extent needs to be defragged. */ - if (BTRFS_I(inode)->defrag_bytes && - test_range_bit(&BTRFS_I(inode)->io_tree, start, end, - EXTENT_DEFRAG, 0, NULL)) + if (inode->defrag_bytes && + test_range_bit(&inode->io_tree, start, end, EXTENT_DEFRAG, 0, NULL)) return 1; return 0; @@@ -1813,25 -1801,26 +1814,25 @@@ * Function to process delayed allocation (create CoW) for ranges which are * being touched for the first time. */ -int btrfs_run_delalloc_range(struct inode *inode, struct page *locked_page, +int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page, u64 start, u64 end, int *page_started, unsigned long *nr_written, struct writeback_control *wbc) { int ret; int force_cow = need_force_cow(inode, start, end); - if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW && !force_cow) { + if (inode->flags & BTRFS_INODE_NODATACOW && !force_cow) { ret = run_delalloc_nocow(inode, locked_page, start, end, page_started, 1, nr_written); - } else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC && !force_cow) { + } else if (inode->flags & BTRFS_INODE_PREALLOC && !force_cow) { ret = run_delalloc_nocow(inode, locked_page, start, end, page_started, 0, nr_written); } else if (!inode_can_compress(inode) || !inode_need_compress(inode, start, end)) { ret = cow_file_range(inode, locked_page, start, end, - page_started, nr_written, 1); + page_started, nr_written, 1); } else { - set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, - &BTRFS_I(inode)->runtime_flags); + set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, &inode->runtime_flags); ret = cow_file_range_async(inode, wbc, locked_page, start, end, page_started, nr_written); } @@@ -2080,7 -2069,9 +2081,7 @@@ void btrfs_clear_delalloc_extent(struc if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID && do_list && !(state->state & EXTENT_NORESERVE) && (*bits & EXTENT_CLEAR_DATA_RESV)) - btrfs_free_reserved_data_space_noquota( - &inode->vfs_inode, - state->start, len); + btrfs_free_reserved_data_space_noquota(fs_info, len); percpu_counter_add_batch(&fs_info->delalloc_bytes, -len, fs_info->delalloc_batch); @@@ -2156,7 -2147,7 +2157,7 @@@ static blk_status_t btrfs_submit_bio_st struct inode *inode = private_data; blk_status_t ret = 0; - ret = btrfs_csum_one_bio(inode, bio, 0, 0); + ret = btrfs_csum_one_bio(BTRFS_I(inode), bio, 0, 0); BUG_ON(ret); /* -ENOMEM */ return 0; } @@@ -2221,7 -2212,7 +2222,7 @@@ static blk_status_t btrfs_submit_bio_ho 0, inode, btrfs_submit_bio_start); goto out; } else if (!skip_sum) { - ret = btrfs_csum_one_bio(inode, bio, 0, 0); + ret = btrfs_csum_one_bio(BTRFS_I(inode), bio, 0, 0); if (ret) goto out; } @@@ -2258,13 -2249,13 +2259,13 @@@ static noinline int add_pending_csums(s return 0; } -int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, +int btrfs_set_extent_delalloc(struct btrfs_inode *inode, u64 start, u64 end, unsigned int extra_bits, struct extent_state **cached_state) { WARN_ON(PAGE_ALIGNED(end)); - return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end, - extra_bits, cached_state); + return set_extent_delalloc(&inode->io_tree, start, end, extra_bits, + cached_state); } /* see btrfs_writepage_start_hook for details on why this is required */ @@@ -2281,7 -2272,7 +2282,7 @@@ static void btrfs_writepage_fixup_worke struct extent_state *cached_state = NULL; struct extent_changeset *data_reserved = NULL; struct page *page; - struct inode *inode; + struct btrfs_inode *inode; u64 page_start; u64 page_end; int ret = 0; @@@ -2289,7 -2280,7 +2290,7 @@@ fixup = container_of(work, struct btrfs_writepage_fixup, work); page = fixup->page; - inode = fixup->inode; + inode = BTRFS_I(fixup->inode); page_start = page_offset(page); page_end = page_offset(page) + PAGE_SIZE - 1; @@@ -2326,7 -2317,8 +2327,7 @@@ again * when the page was already properly dealt with. */ if (!ret) { - btrfs_delalloc_release_extents(BTRFS_I(inode), - PAGE_SIZE); + btrfs_delalloc_release_extents(inode, PAGE_SIZE); btrfs_delalloc_release_space(inode, data_reserved, page_start, PAGE_SIZE, true); @@@ -2342,18 -2334,20 +2343,18 @@@ if (ret) goto out_page; - lock_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end, - &cached_state); + lock_extent_bits(&inode->io_tree, page_start, page_end, &cached_state); /* already ordered? We're done */ if (PagePrivate2(page)) goto out_reserved; - ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), page_start, - PAGE_SIZE); + ordered = btrfs_lookup_ordered_range(inode, page_start, PAGE_SIZE); if (ordered) { - unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, - page_end, &cached_state); + unlock_extent_cached(&inode->io_tree, page_start, page_end, + &cached_state); unlock_page(page); - btrfs_start_ordered_extent(inode, ordered, 1); + btrfs_start_ordered_extent(&inode->vfs_inode, ordered, 1); btrfs_put_ordered_extent(ordered); goto again; } @@@ -2373,11 -2367,11 +2374,11 @@@ BUG_ON(!PageDirty(page)); free_delalloc_space = false; out_reserved: - btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE); + btrfs_delalloc_release_extents(inode, PAGE_SIZE); if (free_delalloc_space) btrfs_delalloc_release_space(inode, data_reserved, page_start, PAGE_SIZE, true); - unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end, + unlock_extent_cached(&inode->io_tree, page_start, page_end, &cached_state); out_page: if (ret) { @@@ -2400,7 -2394,7 +2401,7 @@@ * that could need flushing space. Recursing back to fixup worker would * deadlock. */ - btrfs_add_delayed_iput(inode); + btrfs_add_delayed_iput(&inode->vfs_inode); } /* @@@ -2456,18 -2450,18 +2457,18 @@@ int btrfs_writepage_cow_fixup(struct pa } static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, - struct inode *inode, u64 file_pos, - u64 disk_bytenr, u64 disk_num_bytes, - u64 num_bytes, u64 ram_bytes, - u8 compression, u8 encryption, - u16 other_encoding, int extent_type) + struct btrfs_inode *inode, u64 file_pos, + struct btrfs_file_extent_item *stack_fi, + u64 qgroup_reserved) { - struct btrfs_root *root = BTRFS_I(inode)->root; - struct btrfs_file_extent_item *fi; + struct btrfs_root *root = inode->root; struct btrfs_path *path; struct extent_buffer *leaf; struct btrfs_key ins; - u64 qg_released; + u64 disk_num_bytes = btrfs_stack_file_extent_disk_num_bytes(stack_fi); + u64 disk_bytenr = btrfs_stack_file_extent_disk_bytenr(stack_fi); + u64 num_bytes = btrfs_stack_file_extent_num_bytes(stack_fi); + u64 ram_bytes = btrfs_stack_file_extent_ram_bytes(stack_fi); int extent_inserted = 0; int ret; @@@ -2486,42 -2480,60 +2487,42 @@@ */ ret = __btrfs_drop_extents(trans, root, inode, path, file_pos, file_pos + num_bytes, NULL, 0, - 1, sizeof(*fi), &extent_inserted); + 1, sizeof(*stack_fi), &extent_inserted); if (ret) goto out; if (!extent_inserted) { - ins.objectid = btrfs_ino(BTRFS_I(inode)); + ins.objectid = btrfs_ino(inode); ins.offset = file_pos; ins.type = BTRFS_EXTENT_DATA_KEY; path->leave_spinning = 1; ret = btrfs_insert_empty_item(trans, root, path, &ins, - sizeof(*fi)); + sizeof(*stack_fi)); if (ret) goto out; } leaf = path->nodes[0]; - fi = btrfs_item_ptr(leaf, path->slots[0], - struct btrfs_file_extent_item); - btrfs_set_file_extent_generation(leaf, fi, trans->transid); - btrfs_set_file_extent_type(leaf, fi, extent_type); - btrfs_set_file_extent_disk_bytenr(leaf, fi, disk_bytenr); - btrfs_set_file_extent_disk_num_bytes(leaf, fi, disk_num_bytes); - btrfs_set_file_extent_offset(leaf, fi, 0); - btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes); - btrfs_set_file_extent_ram_bytes(leaf, fi, ram_bytes); - btrfs_set_file_extent_compression(leaf, fi, compression); - btrfs_set_file_extent_encryption(leaf, fi, encryption); - btrfs_set_file_extent_other_encoding(leaf, fi, other_encoding); + btrfs_set_stack_file_extent_generation(stack_fi, trans->transid); + write_extent_buffer(leaf, stack_fi, + btrfs_item_ptr_offset(leaf, path->slots[0]), + sizeof(struct btrfs_file_extent_item)); btrfs_mark_buffer_dirty(leaf); btrfs_release_path(path); - inode_add_bytes(inode, num_bytes); + inode_add_bytes(&inode->vfs_inode, num_bytes); ins.objectid = disk_bytenr; ins.offset = disk_num_bytes; ins.type = BTRFS_EXTENT_ITEM_KEY; - ret = btrfs_inode_set_file_extent_range(BTRFS_I(inode), file_pos, - ram_bytes); + ret = btrfs_inode_set_file_extent_range(inode, file_pos, ram_bytes); if (ret) goto out; - /* - * Release the reserved range from inode dirty range map, as it is - * already moved into delayed_ref_head - */ - ret = btrfs_qgroup_release_data(inode, file_pos, ram_bytes); - if (ret < 0) - goto out; - qg_released = ret; - ret = btrfs_alloc_reserved_file_extent(trans, root, - btrfs_ino(BTRFS_I(inode)), - file_pos, qg_released, &ins); + ret = btrfs_alloc_reserved_file_extent(trans, root, btrfs_ino(inode), + file_pos, qgroup_reserved, &ins); out: btrfs_free_path(path); @@@ -2543,33 -2555,7 +2544,33 @@@ static void btrfs_release_delalloc_byte btrfs_put_block_group(cache); } -/* as ordered data IO finishes, this gets called so we can finish +static int insert_ordered_extent_file_extent(struct btrfs_trans_handle *trans, + struct inode *inode, + struct btrfs_ordered_extent *oe) +{ + struct btrfs_file_extent_item stack_fi; + u64 logical_len; + + memset(&stack_fi, 0, sizeof(stack_fi)); + btrfs_set_stack_file_extent_type(&stack_fi, BTRFS_FILE_EXTENT_REG); + btrfs_set_stack_file_extent_disk_bytenr(&stack_fi, oe->disk_bytenr); + btrfs_set_stack_file_extent_disk_num_bytes(&stack_fi, + oe->disk_num_bytes); + if (test_bit(BTRFS_ORDERED_TRUNCATED, &oe->flags)) + logical_len = oe->truncated_len; + else + logical_len = oe->num_bytes; + btrfs_set_stack_file_extent_num_bytes(&stack_fi, logical_len); + btrfs_set_stack_file_extent_ram_bytes(&stack_fi, logical_len); + btrfs_set_stack_file_extent_compression(&stack_fi, oe->compress_type); + /* Encryption and other encoding is reserved and all 0 */ + + return insert_reserved_file_extent(trans, BTRFS_I(inode), oe->file_offset, + &stack_fi, oe->qgroup_rsv); +} + +/* + * As ordered data IO finishes, this gets called so we can finish * an ordered extent if the range of bytes in the file it covers are * fully written. */ @@@ -2620,6 -2606,13 +2621,6 @@@ static int btrfs_finish_ordered_io(stru if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { BUG_ON(!list_empty(&ordered_extent->list)); /* Logic error */ - /* - * For mwrite(mmap + memset to write) case, we still reserve - * space for NOCOW range. - * As NOCOW won't cause a new delayed ref, just free the space - */ - btrfs_qgroup_free_data(inode, NULL, start, - ordered_extent->num_bytes); btrfs_inode_safe_disk_i_size_write(inode, 0); if (freespace_inode) trans = btrfs_join_transaction_spacecache(root); @@@ -2656,14 -2649,20 +2657,14 @@@ compress_type = ordered_extent->compress_type; if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) { BUG_ON(compress_type); - btrfs_qgroup_free_data(inode, NULL, start, - ordered_extent->num_bytes); ret = btrfs_mark_extent_written(trans, BTRFS_I(inode), ordered_extent->file_offset, ordered_extent->file_offset + logical_len); } else { BUG_ON(root == fs_info->tree_root); - ret = insert_reserved_file_extent(trans, inode, start, - ordered_extent->disk_bytenr, - ordered_extent->disk_num_bytes, - logical_len, logical_len, - compress_type, 0, 0, - BTRFS_FILE_EXTENT_REG); + ret = insert_ordered_extent_file_extent(trans, inode, + ordered_extent); if (!ret) { clear_reserved_extent = false; btrfs_release_delalloc_bytes(fs_info, @@@ -2815,9 -2814,6 +2816,9 @@@ static int check_data_csum(struct inod zeroit: btrfs_print_data_csum_error(BTRFS_I(inode), start, csum, csum_expected, io_bio->mirror_num); + if (io_bio->device) + btrfs_dev_stat_inc_and_print(io_bio->device, + BTRFS_DEV_STAT_CORRUPTION_ERRS); memset(kaddr + pgoff, 1, len); flush_dcache_page(page); kunmap_atomic(kaddr); @@@ -3336,14 -3332,6 +3337,14 @@@ cache_index */ BTRFS_I(inode)->last_unlink_trans = BTRFS_I(inode)->last_trans; + /* + * Same logic as for last_unlink_trans. We don't persist the generation + * of the last transaction where this inode was used for a reflink + * operation, so after eviction and reloading the inode we must be + * pessimistic and assume the last transaction that modified the inode. + */ + BTRFS_I(inode)->last_reflink_trans = BTRFS_I(inode)->last_trans; + path->slots[0]++; if (inode->i_nlink != 1 || path->slots[0] >= btrfs_header_nritems(leaf)) @@@ -3492,7 -3480,7 +3493,7 @@@ static noinline int btrfs_update_inode_ fill_inode_item(trans, leaf, inode_item, inode); btrfs_mark_buffer_dirty(leaf); - btrfs_set_inode_last_trans(trans, inode); + btrfs_set_inode_last_trans(trans, BTRFS_I(inode)); ret = 0; failed: btrfs_free_path(path); @@@ -3522,7 -3510,7 +3523,7 @@@ noinline int btrfs_update_inode(struct ret = btrfs_delayed_update_inode(trans, root, inode); if (!ret) - btrfs_set_inode_last_trans(trans, inode); + btrfs_set_inode_last_trans(trans, BTRFS_I(inode)); return ret; } @@@ -4037,8 -4025,6 +4038,8 @@@ int btrfs_delete_subvolume(struct inod } } + free_anon_bdev(dest->anon_dev); + dest->anon_dev = 0; out_end_trans: trans->block_rsv = NULL; trans->bytes_reserved = 0; @@@ -4509,13 -4495,11 +4510,13 @@@ int btrfs_truncate_block(struct inode * struct extent_state *cached_state = NULL; struct extent_changeset *data_reserved = NULL; char *kaddr; + bool only_release_metadata = false; u32 blocksize = fs_info->sectorsize; pgoff_t index = from >> PAGE_SHIFT; unsigned offset = from & (blocksize - 1); struct page *page; gfp_t mask = btrfs_alloc_write_mask(mapping); + size_t write_bytes = blocksize; int ret = 0; u64 block_start; u64 block_end; @@@ -4527,28 -4511,15 +4528,28 @@@ block_start = round_down(from, blocksize); block_end = block_start + blocksize - 1; - ret = btrfs_delalloc_reserve_space(inode, &data_reserved, - block_start, blocksize); - if (ret) + ret = btrfs_check_data_free_space(BTRFS_I(inode), &data_reserved, + block_start, blocksize); + if (ret < 0) { + if (btrfs_check_nocow_lock(BTRFS_I(inode), block_start, + &write_bytes) > 0) { + /* For nocow case, no need to reserve data space */ + only_release_metadata = true; + } else { + goto out; + } + } + ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode), blocksize); + if (ret < 0) { + if (!only_release_metadata) + btrfs_free_reserved_data_space(BTRFS_I(inode), + data_reserved, block_start, blocksize); goto out; - + } again: page = find_or_create_page(mapping, index, mask); if (!page) { - btrfs_delalloc_release_space(inode, data_reserved, + btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved, block_start, blocksize, true); btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize); ret = -ENOMEM; @@@ -4573,7 -4544,7 +4574,7 @@@ lock_extent_bits(io_tree, block_start, block_end, &cached_state); set_page_extent_mapped(page); - ordered = btrfs_lookup_ordered_extent(inode, block_start); + ordered = btrfs_lookup_ordered_extent(BTRFS_I(inode), block_start); if (ordered) { unlock_extent_cached(io_tree, block_start, block_end, &cached_state); @@@ -4588,7 -4559,7 +4589,7 @@@ EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 0, 0, &cached_state); - ret = btrfs_set_extent_delalloc(inode, block_start, block_end, 0, + ret = btrfs_set_extent_delalloc(BTRFS_I(inode), block_start, block_end, 0, &cached_state); if (ret) { unlock_extent_cached(io_tree, block_start, block_end, @@@ -4613,26 -4584,14 +4614,26 @@@ set_page_dirty(page); unlock_extent_cached(io_tree, block_start, block_end, &cached_state); + if (only_release_metadata) + set_extent_bit(&BTRFS_I(inode)->io_tree, block_start, + block_end, EXTENT_NORESERVE, NULL, NULL, + GFP_NOFS); + out_unlock: - if (ret) - btrfs_delalloc_release_space(inode, data_reserved, block_start, - blocksize, true); + if (ret) { + if (only_release_metadata) + btrfs_delalloc_release_metadata(BTRFS_I(inode), + blocksize, true); + else + btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved, + block_start, blocksize, true); + } btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize); unlock_page(page); put_page(page); out: + if (only_release_metadata) + btrfs_check_nocow_unlock(BTRFS_I(inode)); extent_changeset_free(data_reserved); return ret; } @@@ -4990,8 -4949,7 +4991,8 @@@ static void evict_inode_truncate_pages( * Note, end is the bytenr of last byte, so we need + 1 here. */ if (state_flags & EXTENT_DELALLOC) - btrfs_qgroup_free_data(inode, NULL, start, end - start + 1); + btrfs_qgroup_free_data(BTRFS_I(inode), NULL, start, + end - start + 1); clear_extent_bit(io_tree, start, end, EXTENT_LOCKED | EXTENT_DELALLOC | @@@ -6066,7 -6024,7 +6067,7 @@@ static struct inode *btrfs_new_inode(st inode_tree_add(inode); trace_btrfs_inode_new(inode); - btrfs_set_inode_last_trans(trans, inode); + btrfs_set_inode_last_trans(trans, BTRFS_I(inode)); btrfs_update_root_times(trans, root); @@@ -6875,7 -6833,7 +6876,7 @@@ out return em; } -static struct extent_map *btrfs_create_dio_extent(struct inode *inode, +static struct extent_map *btrfs_create_dio_extent(struct btrfs_inode *inode, const u64 start, const u64 len, const u64 orig_start, @@@ -6889,19 -6847,21 +6890,19 @@@ int ret; if (type != BTRFS_ORDERED_NOCOW) { - em = create_io_em(inode, start, len, orig_start, - block_start, block_len, orig_block_len, - ram_bytes, + em = create_io_em(inode, start, len, orig_start, block_start, + block_len, orig_block_len, ram_bytes, BTRFS_COMPRESS_NONE, /* compress_type */ type); if (IS_ERR(em)) goto out; } - ret = btrfs_add_ordered_extent_dio(inode, start, block_start, - len, block_len, type); + ret = btrfs_add_ordered_extent_dio(inode, start, block_start, len, + block_len, type); if (ret) { if (em) { free_extent_map(em); - btrfs_drop_extent_cache(BTRFS_I(inode), start, - start + len - 1, 0); + btrfs_drop_extent_cache(inode, start, start + len - 1, 0); } em = ERR_PTR(ret); } @@@ -6910,11 -6870,11 +6911,11 @@@ return em; } -static struct extent_map *btrfs_new_extent_direct(struct inode *inode, +static struct extent_map *btrfs_new_extent_direct(struct btrfs_inode *inode, u64 start, u64 len) { - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); - struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_root *root = inode->root; + struct btrfs_fs_info *fs_info = root->fs_info; struct extent_map *em; struct btrfs_key ins; u64 alloc_hint; @@@ -6931,32 -6891,15 +6932,32 @@@ ins.offset, BTRFS_ORDERED_REGULAR); btrfs_dec_block_group_reservations(fs_info, ins.objectid); if (IS_ERR(em)) - btrfs_free_reserved_extent(fs_info, ins.objectid, - ins.offset, 1); + btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, + 1); return em; } /* - * returns 1 when the nocow is safe, < 1 on error, 0 if the - * block must be cow'd + * Check if we can do nocow write into the range [@offset, @offset + @len) + * + * @offset: File offset + * @len: The length to write, will be updated to the nocow writeable + * range + * @orig_start: (optional) Return the original file offset of the file extent + * @orig_len: (optional) Return the original on-disk length of the file extent + * @ram_bytes: (optional) Return the ram_bytes of the file extent + * + * This function will flush ordered extents in the range to ensure proper + * nocow checks for (nowait == false) case. + * + * Return: + * >0 and update @len if we can do nocow write + * 0 if we can't do nocow write + * <0 if error happened + * + * NOTE: This only checks the file extents, caller is responsible to wait for + * any ordered extents. */ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len, u64 *orig_start, u64 *orig_block_len, @@@ -7183,8 -7126,8 +7184,8 @@@ static int lock_extent_direct(struct in } /* The callers of this must take lock_extent() */ -static struct extent_map *create_io_em(struct inode *inode, u64 start, u64 len, - u64 orig_start, u64 block_start, +static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start, + u64 len, u64 orig_start, u64 block_start, u64 block_len, u64 orig_block_len, u64 ram_bytes, int compress_type, int type) @@@ -7198,7 -7141,7 +7199,7 @@@ type == BTRFS_ORDERED_NOCOW || type == BTRFS_ORDERED_REGULAR); - em_tree = &BTRFS_I(inode)->extent_tree; + em_tree = &inode->extent_tree; em = alloc_extent_map(); if (!em) return ERR_PTR(-ENOMEM); @@@ -7220,8 -7163,8 +7221,8 @@@ } do { - btrfs_drop_extent_cache(BTRFS_I(inode), em->start, - em->start + em->len - 1, 0); + btrfs_drop_extent_cache(inode, em->start, + em->start + em->len - 1, 0); write_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em, 1); write_unlock(&em_tree->lock); @@@ -7300,7 -7243,7 +7301,7 @@@ static int btrfs_get_blocks_direct_writ btrfs_inc_nocow_writers(fs_info, block_start)) { struct extent_map *em2; - em2 = btrfs_create_dio_extent(inode, start, len, + em2 = btrfs_create_dio_extent(BTRFS_I(inode), start, len, orig_start, block_start, len, orig_block_len, ram_bytes, type); @@@ -7319,7 -7262,8 +7320,7 @@@ * use the existing or preallocated extent, so does not * need to adjust btrfs_space_info's bytes_may_use. */ - btrfs_free_reserved_data_space_noquota(inode, start, - len); + btrfs_free_reserved_data_space_noquota(fs_info, len); goto skip_cow; } } @@@ -7327,7 -7271,7 +7328,7 @@@ /* this will cow the extent */ len = bh_result->b_size; free_extent_map(em); - *map = em = btrfs_new_extent_direct(inode, start, len); + *map = em = btrfs_new_extent_direct(BTRFS_I(inode), start, len); if (IS_ERR(em)) { ret = PTR_ERR(em); goto out; @@@ -7478,8 -7422,7 +7479,8 @@@ static void btrfs_dio_private_put(struc return; if (bio_op(dip->dio_bio) == REQ_OP_WRITE) { - __endio_write_update_ordered(dip->inode, dip->logical_offset, + __endio_write_update_ordered(BTRFS_I(dip->inode), + dip->logical_offset, dip->bytes, !dip->dio_bio->bi_status); } else { @@@ -7565,18 -7508,18 +7566,18 @@@ static blk_status_t btrfs_check_read_di return err; } -static void __endio_write_update_ordered(struct inode *inode, +static void __endio_write_update_ordered(struct btrfs_inode *inode, const u64 offset, const u64 bytes, const bool uptodate) { - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); + struct btrfs_fs_info *fs_info = inode->root->fs_info; struct btrfs_ordered_extent *ordered = NULL; struct btrfs_workqueue *wq; u64 ordered_offset = offset; u64 ordered_bytes = bytes; u64 last_offset; - if (btrfs_is_free_space_inode(BTRFS_I(inode))) + if (btrfs_is_free_space_inode(inode)) wq = fs_info->endio_freespace_worker; else wq = fs_info->endio_write_workers; @@@ -7584,9 -7527,9 +7585,9 @@@ while (ordered_offset < offset + bytes) { last_offset = ordered_offset; if (btrfs_dec_test_first_ordered_pending(inode, &ordered, - &ordered_offset, - ordered_bytes, - uptodate)) { + &ordered_offset, + ordered_bytes, + uptodate)) { btrfs_init_work(&ordered->work, finish_ordered_fn, NULL, NULL); btrfs_queue_work(wq, &ordered->work); @@@ -7613,7 -7556,7 +7614,7 @@@ static blk_status_t btrfs_submit_bio_st { struct inode *inode = private_data; blk_status_t ret; - ret = btrfs_csum_one_bio(inode, bio, offset, 1); + ret = btrfs_csum_one_bio(BTRFS_I(inode), bio, offset, 1); BUG_ON(ret); /* -ENOMEM */ return 0; } @@@ -7674,7 -7617,7 +7675,7 @@@ static inline blk_status_t btrfs_submit * If we aren't doing async submit, calculate the csum of the * bio now. */ - ret = btrfs_csum_one_bio(inode, bio, file_offset, 1); + ret = btrfs_csum_one_bio(BTRFS_I(inode), bio, file_offset, 1); if (ret) goto err; } else { @@@ -7923,8 -7866,11 +7924,8 @@@ static ssize_t btrfs_direct_IO(struct k dio_data.overwrite = 1; inode_unlock(inode); relock = true; - } else if (iocb->ki_flags & IOCB_NOWAIT) { - ret = -EAGAIN; - goto out; } - ret = btrfs_delalloc_reserve_space(inode, &data_reserved, + ret = btrfs_delalloc_reserve_space(BTRFS_I(inode), &data_reserved, offset, count); if (ret) goto out; @@@ -7956,9 -7902,8 +7957,9 @@@ current->journal_info = NULL; if (ret < 0 && ret != -EIOCBQUEUED) { if (dio_data.reserve) - btrfs_delalloc_release_space(inode, data_reserved, - offset, dio_data.reserve, true); + btrfs_delalloc_release_space(BTRFS_I(inode), + data_reserved, offset, dio_data.reserve, + true); /* * On error we might have left some ordered extents * without submitting corresponding bios for them, so @@@ -7967,13 -7912,13 +7968,13 @@@ */ if (dio_data.unsubmitted_oe_range_start < dio_data.unsubmitted_oe_range_end) - __endio_write_update_ordered(inode, + __endio_write_update_ordered(BTRFS_I(inode), dio_data.unsubmitted_oe_range_start, dio_data.unsubmitted_oe_range_end - dio_data.unsubmitted_oe_range_start, false); } else if (ret >= 0 && (size_t)ret < count) - btrfs_delalloc_release_space(inode, data_reserved, + btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved, offset, count - (size_t)ret, true); btrfs_delalloc_release_extents(BTRFS_I(inode), count); } @@@ -7988,7 -7933,7 +7989,7 @@@ out } static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, - __u64 start, __u64 len) + u64 start, u64 len) { int ret; @@@ -8165,17 -8110,20 +8166,17 @@@ again /* * Qgroup reserved space handler * Page here will be either - * 1) Already written to disk - * In this case, its reserved space is released from data rsv map - * and will be freed by delayed_ref handler finally. - * So even we call qgroup_free_data(), it won't decrease reserved - * space. - * 2) Not written to disk - * This means the reserved space should be freed here. However, - * if a truncate invalidates the page (by clearing PageDirty) - * and the page is accounted for while allocating extent - * in btrfs_check_data_free_space() we let delayed_ref to - * free the entire extent. + * 1) Already written to disk or ordered extent already submitted + * Then its QGROUP_RESERVED bit in io_tree is already cleaned. + * Qgroup will be handled by its qgroup_record then. + * btrfs_qgroup_free_data() call will do nothing here. + * + * 2) Not written to disk yet + * Then btrfs_qgroup_free_data() call will clear the QGROUP_RESERVED + * bit of its io_tree, and free the qgroup reserved data space. + * Since the IO will never happen for this page. */ - if (PageDirty(page)) - btrfs_qgroup_free_data(inode, NULL, page_start, PAGE_SIZE); + btrfs_qgroup_free_data(BTRFS_I(inode), NULL, page_start, PAGE_SIZE); if (!inode_evicting) { clear_extent_bit(tree, page_start, page_end, EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DELALLOC_NEW | @@@ -8239,8 -8187,8 +8240,8 @@@ vm_fault_t btrfs_page_mkwrite(struct vm * end up waiting indefinitely to get a lock on the page currently * being processed by btrfs_page_mkwrite() function. */ - ret2 = btrfs_delalloc_reserve_space(inode, &data_reserved, page_start, - reserved_space); + ret2 = btrfs_delalloc_reserve_space(BTRFS_I(inode), &data_reserved, + page_start, reserved_space); if (!ret2) { ret2 = file_update_time(vmf->vma->vm_file); reserved = 1; @@@ -8287,9 -8235,9 +8288,9 @@@ again fs_info->sectorsize); if (reserved_space < PAGE_SIZE) { end = page_start + reserved_space - 1; - btrfs_delalloc_release_space(inode, data_reserved, - page_start, PAGE_SIZE - reserved_space, - true); + btrfs_delalloc_release_space(BTRFS_I(inode), + data_reserved, page_start, + PAGE_SIZE - reserved_space, true); } } @@@ -8304,7 -8252,7 +8305,7 @@@ EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 0, 0, &cached_state); - ret2 = btrfs_set_extent_delalloc(inode, page_start, end, 0, + ret2 = btrfs_set_extent_delalloc(BTRFS_I(inode), page_start, end, 0, &cached_state); if (ret2) { unlock_extent_cached(io_tree, page_start, page_end, @@@ -8344,7 -8292,7 +8345,7 @@@ out_unlock unlock_page(page); out: btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE); - btrfs_delalloc_release_space(inode, data_reserved, page_start, + btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved, page_start, reserved_space, (ret != 0)); out_noreserve: sb_end_pagefault(inode->i_sb); @@@ -8558,7 -8506,6 +8559,7 @@@ struct inode *btrfs_alloc_inode(struct ei->index_cnt = (u64)-1; ei->dir_index = 0; ei->last_unlink_trans = 0; + ei->last_reflink_trans = 0; ei->last_log_commit = 0; spin_lock_init(&ei->lock); @@@ -8645,7 -8592,7 +8646,7 @@@ void btrfs_destroy_inode(struct inode * btrfs_put_ordered_extent(ordered); } } - btrfs_qgroup_check_reserved_leak(inode); + btrfs_qgroup_check_reserved_leak(BTRFS_I(inode)); inode_tree_del(inode); btrfs_drop_extent_cache(BTRFS_I(inode), 0, (u64)-1, 0); btrfs_inode_clear_file_extent_range(BTRFS_I(inode), 0, (u64)-1); @@@ -9627,31 -9574,6 +9628,31 @@@ out_unlock return err; } +static int insert_prealloc_file_extent(struct btrfs_trans_handle *trans, + struct inode *inode, struct btrfs_key *ins, + u64 file_offset) +{ + struct btrfs_file_extent_item stack_fi; + u64 start = ins->objectid; + u64 len = ins->offset; + int ret; + + memset(&stack_fi, 0, sizeof(stack_fi)); + + btrfs_set_stack_file_extent_type(&stack_fi, BTRFS_FILE_EXTENT_PREALLOC); + btrfs_set_stack_file_extent_disk_bytenr(&stack_fi, start); + btrfs_set_stack_file_extent_disk_num_bytes(&stack_fi, len); + btrfs_set_stack_file_extent_num_bytes(&stack_fi, len); + btrfs_set_stack_file_extent_ram_bytes(&stack_fi, len); + btrfs_set_stack_file_extent_compression(&stack_fi, BTRFS_COMPRESS_NONE); + /* Encryption and other encoding is reserved and all 0 */ + + ret = btrfs_qgroup_release_data(BTRFS_I(inode), file_offset, len); + if (ret < 0) + return ret; + return insert_reserved_file_extent(trans, BTRFS_I(inode), file_offset, + &stack_fi, ret); +} static int __btrfs_prealloc_file_range(struct inode *inode, int mode, u64 start, u64 num_bytes, u64 min_size, loff_t actual_len, u64 *alloc_hint, @@@ -9710,7 -9632,11 +9711,7 @@@ btrfs_dec_block_group_reservations(fs_info, ins.objectid); last_alloc = ins.offset; - ret = insert_reserved_file_extent(trans, inode, - cur_offset, ins.objectid, - ins.offset, ins.offset, - ins.offset, 0, 0, 0, - BTRFS_FILE_EXTENT_PREALLOC); + ret = insert_prealloc_file_extent(trans, inode, &ins, cur_offset); if (ret) { btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 0); @@@ -9783,7 -9709,7 +9784,7 @@@ next btrfs_end_transaction(trans); } if (clear_offset < end) - btrfs_free_reserved_data_space(inode, NULL, clear_offset, + btrfs_free_reserved_data_space(BTRFS_I(inode), NULL, clear_offset, end - clear_offset + 1); return ret; } diff --combined include/linux/fs.h index 2df72def1f59,9bf7a32f2932..407881ebeab1 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@@ -175,9 -175,6 +175,9 @@@ typedef int (dio_iodone_t)(struct kioc /* File does not contribute to nr_files count */ #define FMODE_NOACCOUNT ((__force fmode_t)0x20000000) +/* File supports async buffered reads */ +#define FMODE_BUF_RASYNC ((__force fmode_t)0x40000000) + /* * Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector * that indicates that they should check the contents of the iovec are @@@ -318,9 -315,6 +318,9 @@@ enum rw_hint #define IOCB_SYNC (1 << 5) #define IOCB_WRITE (1 << 6) #define IOCB_NOWAIT (1 << 7) +/* iocb->ki_waitq is valid */ +#define IOCB_WAITQ (1 << 8) +#define IOCB_NOIO (1 << 9) struct kiocb { struct file *ki_filp; @@@ -334,10 -328,7 +334,10 @@@ int ki_flags; u16 ki_hint; u16 ki_ioprio; /* See linux/ioprio.h */ - unsigned int ki_cookie; /* for ->iopoll */ + union { + unsigned int ki_cookie; /* for ->iopoll */ + struct wait_page_queue *ki_waitq; /* for async buffered IO */ + }; randomized_struct_fields_end }; @@@ -479,6 -470,45 +479,6 @@@ struct address_space * must be enforced here for CRIS, to let the least significant bit * of struct page's "mapping" pointer be used for PAGE_MAPPING_ANON. */ -struct request_queue; - -struct block_device { - dev_t bd_dev; /* not a kdev_t - it's a search key */ - int bd_openers; - struct inode * bd_inode; /* will die */ - struct super_block * bd_super; - struct mutex bd_mutex; /* open/close mutex */ - void * bd_claiming; - void * bd_holder; - int bd_holders; - bool bd_write_holder; -#ifdef CONFIG_SYSFS - struct list_head bd_holder_disks; -#endif - struct block_device * bd_contains; - unsigned bd_block_size; - u8 bd_partno; - struct hd_struct * bd_part; - /* number of times partitions within this device have been opened. */ - unsigned bd_part_count; - int bd_invalidated; - struct gendisk * bd_disk; - struct request_queue * bd_queue; - struct backing_dev_info *bd_bdi; - struct list_head bd_list; - /* - * Private data. You must have bd_claim'ed the block_device - * to use this. NOTE: bd_claim allows an owner to claim - * the same device multiple times, the owner must take special - * care to not mess up bd_private for that case. - */ - unsigned long bd_private; - - /* The counter of freeze processes */ - int bd_fsfreeze_count; - /* Mutex for freeze */ - struct mutex bd_fsfreeze_mutex; -} __randomize_layout; /* XArray tags, for tagging dirty and writeback pages in the pagecache. */ #define PAGECACHE_TAG_DIRTY XA_MARK_0 @@@ -528,7 -558,7 +528,7 @@@ static inline int mapping_mapped(struc /* * Might pages of this file have been modified in userspace? - * Note that i_mmap_writable counts all VM_SHARED vmas: do_mmap_pgoff + * Note that i_mmap_writable counts all VM_SHARED vmas: do_mmap * marks vma as VM_SHARED if it is shared, and the file was opened for * writing i.e. vma may be mprotected writable even if now readonly. * @@@ -877,6 -907,8 +877,6 @@@ static inline unsigned imajor(const str return MAJOR(inode->i_rdev); } -extern struct block_device *I_BDEV(struct inode *inode); - struct fown_struct { rwlock_t lock; /* protects pid, uid, euid fields */ struct pid *pid; /* pid or -pgrp where SIGIO should be sent */ @@@ -1348,7 -1380,6 +1348,7 @@@ extern int send_sigurg(struct fown_stru #define SB_NODIRATIME 2048 /* Do not update directory access times */ #define SB_SILENT 32768 #define SB_POSIXACL (1<<16) /* VFS does not apply the umask */ +#define SB_INLINECRYPT (1<<17) /* Use blk-crypto for encrypted files */ #define SB_KERNMOUNT (1<<22) /* this is a kern_mount call */ #define SB_I_VERSION (1<<23) /* Update inode I_version field */ #define SB_LAZYTIME (1<<25) /* Update the on-disk [acm]times lazily */ @@@ -1712,10 -1743,6 +1712,10 @@@ int vfs_mkobj(struct dentry *, umode_t int (*f)(struct dentry *, umode_t, void *), void *); +int vfs_fchown(struct file *file, uid_t user, gid_t group); +int vfs_fchmod(struct file *file, umode_t mode); +int vfs_utimes(const struct path *path, struct timespec64 *times); + extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); #ifdef CONFIG_COMPAT @@@ -1747,6 -1774,14 +1747,6 @@@ struct dir_context loff_t pos; }; -struct block_device_operations; - -/* These macros are for out of kernel modules to test that - * the kernel supports the unlocked_ioctl and compat_ioctl - * fields in struct file_operations. */ -#define HAVE_COMPAT_IOCTL 1 -#define HAVE_UNLOCKED_IOCTL 1 - /* * These flags let !MMU mmap() govern direct device mapping vs immediate * copying more easily for MAP_PRIVATE, especially for ROM filesystems. @@@ -1882,6 -1917,7 +1882,6 @@@ ssize_t rw_copy_check_uvector(int type struct iovec *fast_pointer, struct iovec **ret_pointer); -extern ssize_t __vfs_read(struct file *, char __user *, size_t, loff_t *); extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *); extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *); extern ssize_t vfs_readv(struct file *, const struct iovec __user *, @@@ -1946,27 -1982,27 +1946,27 @@@ struct super_operations /* * Inode flags - they have no relation to superblock flags now */ - #define S_SYNC 1 /* Writes are synced at once */ - #define S_NOATIME 2 /* Do not update access times */ - #define S_APPEND 4 /* Append-only file */ - #define S_IMMUTABLE 8 /* Immutable file */ - #define S_DEAD 16 /* removed, but still open directory */ - #define S_NOQUOTA 32 /* Inode is not counted to quota */ - #define S_DIRSYNC 64 /* Directory modifications are synchronous */ - #define S_NOCMTIME 128 /* Do not update file c/mtime */ - #define S_SWAPFILE 256 /* Do not truncate: swapon got its bmaps */ - #define S_PRIVATE 512 /* Inode is fs-internal */ - #define S_IMA 1024 /* Inode has an associated IMA struct */ - #define S_AUTOMOUNT 2048 /* Automount/referral quasi-directory */ - #define S_NOSEC 4096 /* no suid or xattr security attributes */ + #define S_SYNC (1 << 0) /* Writes are synced at once */ + #define S_NOATIME (1 << 1) /* Do not update access times */ + #define S_APPEND (1 << 2) /* Append-only file */ + #define S_IMMUTABLE (1 << 3) /* Immutable file */ + #define S_DEAD (1 << 4) /* removed, but still open directory */ + #define S_NOQUOTA (1 << 5) /* Inode is not counted to quota */ + #define S_DIRSYNC (1 << 6) /* Directory modifications are synchronous */ + #define S_NOCMTIME (1 << 7) /* Do not update file c/mtime */ + #define S_SWAPFILE (1 << 8) /* Do not truncate: swapon got its bmaps */ + #define S_PRIVATE (1 << 9) /* Inode is fs-internal */ + #define S_IMA (1 << 10) /* Inode has an associated IMA struct */ + #define S_AUTOMOUNT (1 << 11) /* Automount/referral quasi-directory */ + #define S_NOSEC (1 << 12) /* no suid or xattr security attributes */ #ifdef CONFIG_FS_DAX - #define S_DAX 8192 /* Direct Access, avoiding the page cache */ + #define S_DAX (1 << 13) /* Direct Access, avoiding the page cache */ #else - #define S_DAX 0 /* Make all the DAX code disappear */ + #define S_DAX 0 /* Make all the DAX code disappear */ #endif - #define S_ENCRYPTED 16384 /* Encrypted file (using fs/crypto/) */ - #define S_CASEFOLD 32768 /* Casefolded file */ - #define S_VERITY 65536 /* Verity file (using fs/verity/) */ + #define S_ENCRYPTED (1 << 14) /* Encrypted file (using fs/crypto/) */ + #define S_CASEFOLD (1 << 15) /* Casefolded file */ + #define S_VERITY (1 << 16) /* Verity file (using fs/verity/) */ /* * Note that nosuid etc flags are inode-specific: setting some file-system @@@ -2228,9 -2264,18 +2228,9 @@@ struct file_system_type #define MODULE_ALIAS_FS(NAME) MODULE_ALIAS("fs-" NAME) -#ifdef CONFIG_BLOCK extern struct dentry *mount_bdev(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, int (*fill_super)(struct super_block *, void *, int)); -#else -static inline struct dentry *mount_bdev(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, - int (*fill_super)(struct super_block *, void *, int)) -{ - return ERR_PTR(-ENODEV); -} -#endif extern struct dentry *mount_single(struct file_system_type *fs_type, int flags, void *data, int (*fill_super)(struct super_block *, void *, int)); @@@ -2239,7 -2284,14 +2239,7 @@@ extern struct dentry *mount_nodev(struc int (*fill_super)(struct super_block *, void *, int)); extern struct dentry *mount_subtree(struct vfsmount *mnt, const char *path); void generic_shutdown_super(struct super_block *sb); -#ifdef CONFIG_BLOCK void kill_block_super(struct super_block *sb); -#else -static inline void kill_block_super(struct super_block *sb) -{ - BUG(); -} -#endif void kill_anon_super(struct super_block *sb); void kill_litter_super(struct super_block *sb); void deactivate_super(struct super_block *sb); @@@ -2529,16 -2581,95 +2529,16 @@@ extern struct kmem_cache *names_cachep #define __getname() kmem_cache_alloc(names_cachep, GFP_KERNEL) #define __putname(name) kmem_cache_free(names_cachep, (void *)(name)) -#ifdef CONFIG_BLOCK -extern int register_blkdev(unsigned int, const char *); -extern void unregister_blkdev(unsigned int, const char *); -extern struct block_device *bdget(dev_t); -extern struct block_device *bdgrab(struct block_device *bdev); -extern void bd_set_size(struct block_device *, loff_t size); -extern void bd_forget(struct inode *inode); -extern void bdput(struct block_device *); -extern void invalidate_bdev(struct block_device *); -extern void iterate_bdevs(void (*)(struct block_device *, void *), void *); -extern int sync_blockdev(struct block_device *bdev); -extern void kill_bdev(struct block_device *); -extern struct super_block *freeze_bdev(struct block_device *); -extern void emergency_thaw_all(void); -extern void emergency_thaw_bdev(struct super_block *sb); -extern int thaw_bdev(struct block_device *bdev, struct super_block *sb); -extern int fsync_bdev(struct block_device *); - extern struct super_block *blockdev_superblock; - static inline bool sb_is_blkdev_sb(struct super_block *sb) { - return sb == blockdev_superblock; -} -#else -static inline void bd_forget(struct inode *inode) {} -static inline int sync_blockdev(struct block_device *bdev) { return 0; } -static inline void kill_bdev(struct block_device *bdev) {} -static inline void invalidate_bdev(struct block_device *bdev) {} - -static inline struct super_block *freeze_bdev(struct block_device *sb) -{ - return NULL; -} - -static inline int thaw_bdev(struct block_device *bdev, struct super_block *sb) -{ - return 0; -} - -static inline int emergency_thaw_bdev(struct super_block *sb) -{ - return 0; + return IS_ENABLED(CONFIG_BLOCK) && sb == blockdev_superblock; } -static inline void iterate_bdevs(void (*f)(struct block_device *, void *), void *arg) -{ -} - -static inline bool sb_is_blkdev_sb(struct super_block *sb) -{ - return false; -} -#endif +void emergency_thaw_all(void); extern int sync_filesystem(struct super_block *); extern const struct file_operations def_blk_fops; extern const struct file_operations def_chr_fops; -#ifdef CONFIG_BLOCK -extern int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long); -extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long); -extern int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder); -extern struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, - void *holder); -extern struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, - void *holder); -extern struct block_device *bd_start_claiming(struct block_device *bdev, - void *holder); -extern void bd_finish_claiming(struct block_device *bdev, - struct block_device *whole, void *holder); -extern void bd_abort_claiming(struct block_device *bdev, - struct block_device *whole, void *holder); -extern void blkdev_put(struct block_device *bdev, fmode_t mode); - -#ifdef CONFIG_SYSFS -extern int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk); -extern void bd_unlink_disk_holder(struct block_device *bdev, - struct gendisk *disk); -#else -static inline int bd_link_disk_holder(struct block_device *bdev, - struct gendisk *disk) -{ - return 0; -} -static inline void bd_unlink_disk_holder(struct block_device *bdev, - struct gendisk *disk) -{ -} -#endif -#endif /* fs/char_dev.c */ #define CHRDEV_MAJOR_MAX 512 @@@ -2569,12 -2700,31 +2569,12 @@@ static inline void unregister_chrdev(un __unregister_chrdev(major, 0, 256, name); } -/* fs/block_dev.c */ -#define BDEVNAME_SIZE 32 /* Largest string for a blockdev identifier */ -#define BDEVT_SIZE 10 /* Largest string for MAJ:MIN for blkdev */ - -#ifdef CONFIG_BLOCK -#define BLKDEV_MAJOR_MAX 512 -extern const char *bdevname(struct block_device *bdev, char *buffer); -extern struct block_device *lookup_bdev(const char *); -extern void blkdev_show(struct seq_file *,off_t); - -#else -#define BLKDEV_MAJOR_MAX 0 -#endif - extern void init_special_inode(struct inode *, umode_t, dev_t); /* Invalid inode operations -- fs/bad_inode.c */ extern void make_bad_inode(struct inode *); extern bool is_bad_inode(struct inode *); -#ifdef CONFIG_BLOCK -extern int revalidate_disk(struct gendisk *); -extern int check_disk_change(struct block_device *); -extern int __invalidate_device(struct block_device *, bool); -#endif unsigned long invalidate_mapping_pages(struct address_space *mapping, pgoff_t start, pgoff_t end); @@@ -2679,7 -2829,7 +2679,7 @@@ static inline errseq_t filemap_sample_w /** * file_sample_sb_err - sample the current errseq_t to test for later errors - * @mapping: mapping to be sampled + * @file: file pointer to be sampled * * Grab the most current superblock-level errseq_t value for the given * struct file. @@@ -2885,7 -3035,6 +2885,7 @@@ extern int kernel_read_file_from_path_i extern int kernel_read_file_from_fd(int, void **, loff_t *, loff_t, enum kernel_read_file_id); extern ssize_t kernel_read(struct file *, void *, size_t, loff_t *); +ssize_t __kernel_read(struct file *file, void *buf, size_t count, loff_t *pos); extern ssize_t kernel_write(struct file *, const void *, size_t, loff_t *); extern ssize_t __kernel_write(struct file *, const void *, size_t, loff_t *); extern struct file * open_exec(const char *); @@@ -2950,21 -3099,6 +2950,21 @@@ extern void discard_new_inode(struct in extern unsigned int get_next_ino(void); extern void evict_inodes(struct super_block *sb); +/* + * Userspace may rely on the the inode number being non-zero. For example, glibc + * simply ignores files with zero i_ino in unlink() and other places. + * + * As an additional complication, if userspace was compiled with + * _FILE_OFFSET_BITS=32 on a 64-bit kernel we'll only end up reading out the + * lower 32 bits, so we need to check that those aren't zero explicitly. With + * _FILE_OFFSET_BITS=64, this may cause some harmless false-negatives, but + * better safe than sorry. + */ +static inline bool is_zero_ino(ino_t ino) +{ + return (u32)ino == 0; +} + extern void __iget(struct inode * inode); extern void iget_failed(struct inode *); extern void clear_inode(struct inode *); @@@ -2990,6 -3124,10 +2990,6 @@@ static inline void remove_inode_hash(st extern void inode_sb_list_add(struct inode *inode); -#ifdef CONFIG_BLOCK -extern int bdev_read_only(struct block_device *); -#endif -extern int set_blocksize(struct block_device *, int); extern int sb_set_blocksize(struct super_block *, int); extern int sb_min_blocksize(struct super_block *, int); @@@ -3302,28 -3440,22 +3302,28 @@@ static inline int iocb_flags(struct fil static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags) { + int kiocb_flags = 0; + + if (!flags) + return 0; if (unlikely(flags & ~RWF_SUPPORTED)) return -EOPNOTSUPP; if (flags & RWF_NOWAIT) { if (!(ki->ki_filp->f_mode & FMODE_NOWAIT)) return -EOPNOTSUPP; - ki->ki_flags |= IOCB_NOWAIT; + kiocb_flags |= IOCB_NOWAIT; } if (flags & RWF_HIPRI) - ki->ki_flags |= IOCB_HIPRI; + kiocb_flags |= IOCB_HIPRI; if (flags & RWF_DSYNC) - ki->ki_flags |= IOCB_DSYNC; + kiocb_flags |= IOCB_DSYNC; if (flags & RWF_SYNC) - ki->ki_flags |= (IOCB_DSYNC | IOCB_SYNC); + kiocb_flags |= (IOCB_DSYNC | IOCB_SYNC); if (flags & RWF_APPEND) - ki->ki_flags |= IOCB_APPEND; + kiocb_flags |= IOCB_APPEND; + + ki->ki_flags |= kiocb_flags; return 0; } diff --combined include/linux/skbuff.h index 3ad65d4ce085,1530e81a6cce..46881d902124 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@@ -238,6 -238,7 +238,7 @@@ SKB_DATA_ALIGN(sizeof(struct sk_buff)) + \ SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) + struct ahash_request; struct net_device; struct scatterlist; struct pipe_inode_info; @@@ -283,7 -284,6 +284,7 @@@ struct nf_bridge_info */ struct tc_skb_ext { __u32 chain; + __u16 mru; }; #endif @@@ -1329,7 -1329,7 +1330,7 @@@ void skb_flow_dissect_meta(const struc void *target_container); /* Gets a skb connection tracking info, ctinfo map should be a - * a map of mapsize to translate enum ip_conntrack_info states + * map of mapsize to translate enum ip_conntrack_info states * to user states. */ void @@@ -1343,10 -1343,6 +1344,10 @@@ skb_flow_dissect_tunnel_info(const stru struct flow_dissector *flow_dissector, void *target_container); +void skb_flow_dissect_hash(const struct sk_buff *skb, + struct flow_dissector *flow_dissector, + void *target_container); + static inline __u32 skb_get_hash(struct sk_buff *skb) { if (!skb->l4_hash && !skb->sw_hash) @@@ -3817,7 -3813,7 +3818,7 @@@ static inline bool skb_defer_rx_timesta * must call this function to return the skb back to the stack with a * timestamp. * - * @skb: clone of the the original outgoing packet + * @skb: clone of the original outgoing packet * @hwtstamps: hardware time stamps * */