* Copyright (C) 2007 Oracle. All rights reserved.
*/
+ #include <crypto/hash.h>
#include <linux/kernel.h>
#include <linux/bio.h>
#include <linux/buffer_head.h>
static int btrfs_setsize(struct inode *inode, struct iattr *attr);
static int btrfs_truncate(struct inode *inode, bool skip_writeback);
static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent);
-static noinline int cow_file_range(struct inode *inode,
+static noinline int cow_file_range(struct btrfs_inode *inode,
struct page *locked_page,
u64 start, u64 end, int *page_started,
unsigned long *nr_written, int unlock);
-static struct extent_map *create_io_em(struct inode *inode, u64 start, u64 len,
- u64 orig_start, u64 block_start,
+static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start,
+ u64 len, u64 orig_start, u64 block_start,
u64 block_len, u64 orig_block_len,
u64 ram_bytes, int compress_type,
int type);
-static void __endio_write_update_ordered(struct inode *inode,
+static void __endio_write_update_ordered(struct btrfs_inode *inode,
const u64 offset, const u64 bytes,
const bool uptodate);
* to be released, which we want to happen only when finishing the ordered
* extent (btrfs_finish_ordered_io()).
*/
-static inline void btrfs_cleanup_ordered_extents(struct inode *inode,
+static inline void btrfs_cleanup_ordered_extents(struct btrfs_inode *inode,
struct page *locked_page,
u64 offset, u64 bytes)
{
struct page *page;
while (index <= end_index) {
- page = find_get_page(inode->i_mapping, index);
+ page = find_get_page(inode->vfs_inode.i_mapping, index);
index++;
if (!page)
continue;
* does the checks required to make sure the data is small enough
* to fit as an inline extent.
*/
-static noinline int cow_file_range_inline(struct inode *inode, u64 start,
+static noinline int cow_file_range_inline(struct btrfs_inode *inode, u64 start,
u64 end, size_t compressed_size,
int compress_type,
struct page **compressed_pages)
{
- struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct btrfs_root *root = inode->root;
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_trans_handle *trans;
- u64 isize = i_size_read(inode);
+ u64 isize = i_size_read(&inode->vfs_inode);
u64 actual_end = min(end + 1, isize);
u64 inline_len = actual_end - start;
u64 aligned_end = ALIGN(end, fs_info->sectorsize);
btrfs_free_path(path);
return PTR_ERR(trans);
}
- trans->block_rsv = &BTRFS_I(inode)->block_rsv;
+ trans->block_rsv = &inode->block_rsv;
if (compressed_size && compressed_pages)
extent_item_size = btrfs_file_extent_calc_inline_size(
extent_item_size = btrfs_file_extent_calc_inline_size(
inline_len);
- ret = __btrfs_drop_extents(trans, root, inode, path,
- start, aligned_end, NULL,
- 1, 1, extent_item_size, &extent_inserted);
+ ret = __btrfs_drop_extents(trans, root, inode, path, start, aligned_end,
+ NULL, 1, 1, extent_item_size,
+ &extent_inserted);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out;
if (isize > actual_end)
inline_len = min_t(u64, isize, actual_end);
ret = insert_inline_extent(trans, path, extent_inserted,
- root, inode, start,
+ root, &inode->vfs_inode, start,
inline_len, compressed_size,
compress_type, compressed_pages);
if (ret && ret != -ENOSPC) {
goto out;
}
- set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
- btrfs_drop_extent_cache(BTRFS_I(inode), start, aligned_end - 1, 0);
+ set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags);
+ btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
out:
/*
* Don't forget to free the reserved space, as for inlined extent
/*
* Check if the inode has flags compatible with compression
*/
-static inline bool inode_can_compress(struct inode *inode)
+static inline bool inode_can_compress(struct btrfs_inode *inode)
{
- if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW ||
- BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
+ if (inode->flags & BTRFS_INODE_NODATACOW ||
+ inode->flags & BTRFS_INODE_NODATASUM)
return false;
return true;
}
* Check if the inode needs to be submitted to compression, based on mount
* options, defragmentation, properties or heuristics.
*/
-static inline int inode_need_compress(struct inode *inode, u64 start, u64 end)
+static inline int inode_need_compress(struct btrfs_inode *inode, u64 start,
+ u64 end)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
if (!inode_can_compress(inode)) {
WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG),
KERN_ERR "BTRFS: unexpected compression for ino %llu\n",
- btrfs_ino(BTRFS_I(inode)));
+ btrfs_ino(inode));
return 0;
}
/* force compress */
if (btrfs_test_opt(fs_info, FORCE_COMPRESS))
return 1;
/* defrag ioctl */
- if (BTRFS_I(inode)->defrag_compress)
+ if (inode->defrag_compress)
return 1;
/* bad compression ratios */
- if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS)
+ if (inode->flags & BTRFS_INODE_NOCOMPRESS)
return 0;
if (btrfs_test_opt(fs_info, COMPRESS) ||
- BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS ||
- BTRFS_I(inode)->prop_compress)
- return btrfs_compress_heuristic(inode, start, end);
+ inode->flags & BTRFS_INODE_COMPRESS ||
+ inode->prop_compress)
+ return btrfs_compress_heuristic(&inode->vfs_inode, start, end);
return 0;
}
* inode has not been flagged as nocompress. This flag can
* change at any time if we discover bad compression ratios.
*/
- if (inode_need_compress(inode, start, end)) {
+ if (inode_need_compress(BTRFS_I(inode), start, end)) {
WARN_ON(pages);
pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS);
if (!pages) {
/* we didn't compress the entire range, try
* to make an uncompressed inline extent.
*/
- ret = cow_file_range_inline(inode, start, end, 0,
- BTRFS_COMPRESS_NONE, NULL);
+ ret = cow_file_range_inline(BTRFS_I(inode), start, end,
+ 0, BTRFS_COMPRESS_NONE,
+ NULL);
} else {
/* try making a compressed inline extent */
- ret = cow_file_range_inline(inode, start, end,
+ ret = cow_file_range_inline(BTRFS_I(inode), start, end,
total_compressed,
compress_type, pages);
}
* our outstanding extent for clearing delalloc for this
* range.
*/
- extent_clear_unlock_delalloc(inode, start, end, NULL,
+ extent_clear_unlock_delalloc(BTRFS_I(inode), start, end,
+ NULL,
clear_flags,
PAGE_UNLOCK |
PAGE_CLEAR_DIRTY |
*/
static noinline void submit_compressed_extents(struct async_chunk *async_chunk)
{
- struct inode *inode = async_chunk->inode;
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_inode *inode = BTRFS_I(async_chunk->inode);
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct async_extent *async_extent;
u64 alloc_hint = 0;
struct btrfs_key ins;
struct extent_map *em;
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
+ struct btrfs_root *root = inode->root;
+ struct extent_io_tree *io_tree = &inode->io_tree;
int ret = 0;
again:
* all those pages down to the drive.
*/
if (!page_started && !ret)
- extent_write_locked_range(inode,
+ extent_write_locked_range(&inode->vfs_inode,
async_extent->start,
async_extent->start +
async_extent->ram_size - 1,
* will not submit these pages down to lower
* layers.
*/
- extent_range_redirty_for_io(inode,
+ extent_range_redirty_for_io(&inode->vfs_inode,
async_extent->start,
async_extent->start +
async_extent->ram_size - 1);
BTRFS_ORDERED_COMPRESSED,
async_extent->compress_type);
if (ret) {
- btrfs_drop_extent_cache(BTRFS_I(inode),
- async_extent->start,
+ btrfs_drop_extent_cache(inode, async_extent->start,
async_extent->start +
async_extent->ram_size - 1, 0);
goto out_free_reserve;
NULL, EXTENT_LOCKED | EXTENT_DELALLOC,
PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
PAGE_SET_WRITEBACK);
- if (btrfs_submit_compressed_write(inode,
- async_extent->start,
+ if (btrfs_submit_compressed_write(inode, async_extent->start,
async_extent->ram_size,
ins.objectid,
ins.offset, async_extent->pages,
const u64 start = async_extent->start;
const u64 end = start + async_extent->ram_size - 1;
- p->mapping = inode->i_mapping;
+ p->mapping = inode->vfs_inode.i_mapping;
btrfs_writepage_endio_finish_ordered(p, start, end, 0);
p->mapping = NULL;
- extent_clear_unlock_delalloc(inode, start, end,
- NULL, 0,
+ extent_clear_unlock_delalloc(inode, start, end, NULL, 0,
PAGE_END_WRITEBACK |
PAGE_SET_ERROR);
free_async_extent_pages(async_extent);
goto again;
}
-static u64 get_extent_allocation_hint(struct inode *inode, u64 start,
+static u64 get_extent_allocation_hint(struct btrfs_inode *inode, u64 start,
u64 num_bytes)
{
- struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+ struct extent_map_tree *em_tree = &inode->extent_tree;
struct extent_map *em;
u64 alloc_hint = 0;
* required to start IO on it. It may be clean and already done with
* IO when we return.
*/
-static noinline int cow_file_range(struct inode *inode,
+static noinline int cow_file_range(struct btrfs_inode *inode,
struct page *locked_page,
u64 start, u64 end, int *page_started,
unsigned long *nr_written, int unlock)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct btrfs_root *root = inode->root;
+ struct btrfs_fs_info *fs_info = root->fs_info;
u64 alloc_hint = 0;
u64 num_bytes;
unsigned long ram_size;
u64 cur_alloc_size = 0;
+ u64 min_alloc_size;
u64 blocksize = fs_info->sectorsize;
struct btrfs_key ins;
struct extent_map *em;
bool extent_reserved = false;
int ret = 0;
- if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
+ if (btrfs_is_free_space_inode(inode)) {
WARN_ON_ONCE(1);
ret = -EINVAL;
goto out_unlock;
num_bytes = max(blocksize, num_bytes);
ASSERT(num_bytes <= btrfs_super_total_bytes(fs_info->super_copy));
- inode_should_defrag(BTRFS_I(inode), start, end, num_bytes, SZ_64K);
+ inode_should_defrag(inode, start, end, num_bytes, SZ_64K);
if (start == 0) {
/* lets try to make an inline extent */
}
alloc_hint = get_extent_allocation_hint(inode, start, num_bytes);
- btrfs_drop_extent_cache(BTRFS_I(inode), start,
- start + num_bytes - 1, 0);
+ btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0);
+
+ /*
+ * Relocation relies on the relocated extents to have exactly the same
+ * size as the original extents. Normally writeback for relocation data
+ * extents follows a NOCOW path because relocation preallocates the
+ * extents. However, due to an operation such as scrub turning a block
+ * group to RO mode, it may fallback to COW mode, so we must make sure
+ * an extent allocated during COW has exactly the requested size and can
+ * not be split into smaller extents, otherwise relocation breaks and
+ * fails during the stage where it updates the bytenr of file extent
+ * items.
+ */
+ if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
+ min_alloc_size = num_bytes;
+ else
+ min_alloc_size = fs_info->sectorsize;
while (num_bytes > 0) {
cur_alloc_size = num_bytes;
ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size,
- fs_info->sectorsize, 0, alloc_hint,
+ min_alloc_size, 0, alloc_hint,
&ins, 1, 1);
if (ret < 0)
goto out_unlock;
* skip current ordered extent.
*/
if (ret)
- btrfs_drop_extent_cache(BTRFS_I(inode), start,
+ btrfs_drop_extent_cache(inode, start,
start + ram_size - 1, 0);
}
page_ops = unlock ? PAGE_UNLOCK : 0;
page_ops |= PAGE_SET_PRIVATE2;
- extent_clear_unlock_delalloc(inode, start,
- start + ram_size - 1,
+ extent_clear_unlock_delalloc(inode, start, start + ram_size - 1,
locked_page,
EXTENT_LOCKED | EXTENT_DELALLOC,
page_ops);
return ret;
out_drop_extent_cache:
- btrfs_drop_extent_cache(BTRFS_I(inode), start, start + ram_size - 1, 0);
+ btrfs_drop_extent_cache(inode, start, start + ram_size - 1, 0);
out_reserve:
btrfs_dec_block_group_reservations(fs_info, ins.objectid);
btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1);
kvfree(async_chunk->pending);
}
-static int cow_file_range_async(struct inode *inode,
+static int cow_file_range_async(struct btrfs_inode *inode,
struct writeback_control *wbc,
struct page *locked_page,
u64 start, u64 end, int *page_started,
unsigned long *nr_written)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct cgroup_subsys_state *blkcg_css = wbc_blkcg_css(wbc);
struct async_cow *ctx;
struct async_chunk *async_chunk;
unsigned nofs_flag;
const unsigned int write_flags = wbc_to_write_flags(wbc);
- unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
+ unlock_extent(&inode->io_tree, start, end);
- if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS &&
+ if (inode->flags & BTRFS_INODE_NOCOMPRESS &&
!btrfs_test_opt(fs_info, FORCE_COMPRESS)) {
num_chunks = 1;
should_compress = false;
* igrab is called higher up in the call chain, take only the
* lightweight reference for the callback lifetime
*/
- ihold(inode);
+ ihold(&inode->vfs_inode);
async_chunk[i].pending = &ctx->num_chunks;
- async_chunk[i].inode = inode;
+ async_chunk[i].inode = &inode->vfs_inode;
async_chunk[i].start = start;
async_chunk[i].end = cur_end;
async_chunk[i].write_flags = write_flags;
return 1;
}
-static int fallback_to_cow(struct inode *inode, struct page *locked_page,
+static int fallback_to_cow(struct btrfs_inode *inode, struct page *locked_page,
const u64 start, const u64 end,
int *page_started, unsigned long *nr_written)
{
- const bool is_space_ino = btrfs_is_free_space_inode(BTRFS_I(inode));
+ const bool is_space_ino = btrfs_is_free_space_inode(inode);
+ const bool is_reloc_ino = (inode->root->root_key.objectid ==
+ BTRFS_DATA_RELOC_TREE_OBJECTID);
const u64 range_bytes = end + 1 - start;
- struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
+ struct extent_io_tree *io_tree = &inode->io_tree;
u64 range_start = start;
u64 count;
* data space info, which we incremented in the step above.
*
* If we need to fallback to cow and the inode corresponds to a free
- * space cache inode, we must also increment bytes_may_use of the data
- * space_info for the same reason. Space caches always get a prealloc
+ * space cache inode or an inode of the data relocation tree, we must
+ * also increment bytes_may_use of the data space_info for the same
+ * reason. Space caches and relocated data extents always get a prealloc
* extent for them, however scrub or balance may have set the block
- * group that contains that extent to RO mode.
+ * group that contains that extent to RO mode and therefore force COW
+ * when starting writeback.
*/
count = count_range_bits(io_tree, &range_start, end, range_bytes,
EXTENT_NORESERVE, 0);
- if (count > 0 || is_space_ino) {
- const u64 bytes = is_space_ino ? range_bytes : count;
- struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
+ if (count > 0 || is_space_ino || is_reloc_ino) {
+ u64 bytes = count;
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct btrfs_space_info *sinfo = fs_info->data_sinfo;
+ if (is_space_ino || is_reloc_ino)
+ bytes = range_bytes;
+
spin_lock(&sinfo->lock);
btrfs_space_info_update_bytes_may_use(fs_info, sinfo, bytes);
spin_unlock(&sinfo->lock);
* If no cow copies or snapshots exist, we write directly to the existing
* blocks on disk
*/
-static noinline int run_delalloc_nocow(struct inode *inode,
+static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
struct page *locked_page,
const u64 start, const u64 end,
int *page_started, int force,
unsigned long *nr_written)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
+ struct btrfs_root *root = inode->root;
struct btrfs_path *path;
u64 cow_start = (u64)-1;
u64 cur_offset = start;
int ret;
bool check_prev = true;
- const bool freespace_inode = btrfs_is_free_space_inode(BTRFS_I(inode));
- u64 ino = btrfs_ino(BTRFS_I(inode));
+ const bool freespace_inode = btrfs_is_free_space_inode(inode);
+ u64 ino = btrfs_ino(inode);
bool nocow = false;
u64 disk_bytenr = 0;
* NOCOW, following one which needs to be COW'ed
*/
if (cow_start != (u64)-1) {
- ret = fallback_to_cow(inode, locked_page, cow_start,
- found_key.offset - 1,
+ ret = fallback_to_cow(inode, locked_page,
+ cow_start, found_key.offset - 1,
page_started, nr_written);
- if (ret) {
- if (nocow)
- btrfs_dec_nocow_writers(fs_info,
- disk_bytenr);
+ if (ret)
goto error;
- }
cow_start = (u64)-1;
}
ram_bytes, BTRFS_COMPRESS_NONE,
BTRFS_ORDERED_PREALLOC);
if (IS_ERR(em)) {
- if (nocow)
- btrfs_dec_nocow_writers(fs_info,
- disk_bytenr);
ret = PTR_ERR(em);
goto error;
}
num_bytes,
BTRFS_ORDERED_PREALLOC);
if (ret) {
- btrfs_drop_extent_cache(BTRFS_I(inode),
- cur_offset,
+ btrfs_drop_extent_cache(inode, cur_offset,
cur_offset + num_bytes - 1,
0);
goto error;
return ret;
}
-static inline int need_force_cow(struct inode *inode, u64 start, u64 end)
+static inline int need_force_cow(struct btrfs_inode *inode, u64 start, u64 end)
{
- if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) &&
- !(BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC))
+ if (!(inode->flags & BTRFS_INODE_NODATACOW) &&
+ !(inode->flags & BTRFS_INODE_PREALLOC))
return 0;
/*
* if is not zero, it means the file is defragging.
* Force cow if given extent needs to be defragged.
*/
- if (BTRFS_I(inode)->defrag_bytes &&
- test_range_bit(&BTRFS_I(inode)->io_tree, start, end,
- EXTENT_DEFRAG, 0, NULL))
+ if (inode->defrag_bytes &&
+ test_range_bit(&inode->io_tree, start, end, EXTENT_DEFRAG, 0, NULL))
return 1;
return 0;
* Function to process delayed allocation (create CoW) for ranges which are
* being touched for the first time.
*/
-int btrfs_run_delalloc_range(struct inode *inode, struct page *locked_page,
+int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page,
u64 start, u64 end, int *page_started, unsigned long *nr_written,
struct writeback_control *wbc)
{
int ret;
int force_cow = need_force_cow(inode, start, end);
- if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW && !force_cow) {
+ if (inode->flags & BTRFS_INODE_NODATACOW && !force_cow) {
ret = run_delalloc_nocow(inode, locked_page, start, end,
page_started, 1, nr_written);
- } else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC && !force_cow) {
+ } else if (inode->flags & BTRFS_INODE_PREALLOC && !force_cow) {
ret = run_delalloc_nocow(inode, locked_page, start, end,
page_started, 0, nr_written);
} else if (!inode_can_compress(inode) ||
!inode_need_compress(inode, start, end)) {
ret = cow_file_range(inode, locked_page, start, end,
- page_started, nr_written, 1);
+ page_started, nr_written, 1);
} else {
- set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
- &BTRFS_I(inode)->runtime_flags);
+ set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, &inode->runtime_flags);
ret = cow_file_range_async(inode, wbc, locked_page, start, end,
page_started, nr_written);
}
if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID &&
do_list && !(state->state & EXTENT_NORESERVE) &&
(*bits & EXTENT_CLEAR_DATA_RESV))
- btrfs_free_reserved_data_space_noquota(
- &inode->vfs_inode,
- state->start, len);
+ btrfs_free_reserved_data_space_noquota(fs_info, len);
percpu_counter_add_batch(&fs_info->delalloc_bytes, -len,
fs_info->delalloc_batch);
struct inode *inode = private_data;
blk_status_t ret = 0;
- ret = btrfs_csum_one_bio(inode, bio, 0, 0);
+ ret = btrfs_csum_one_bio(BTRFS_I(inode), bio, 0, 0);
BUG_ON(ret); /* -ENOMEM */
return 0;
}
0, inode, btrfs_submit_bio_start);
goto out;
} else if (!skip_sum) {
- ret = btrfs_csum_one_bio(inode, bio, 0, 0);
+ ret = btrfs_csum_one_bio(BTRFS_I(inode), bio, 0, 0);
if (ret)
goto out;
}
return 0;
}
-int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
+int btrfs_set_extent_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
unsigned int extra_bits,
struct extent_state **cached_state)
{
WARN_ON(PAGE_ALIGNED(end));
- return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end,
- extra_bits, cached_state);
+ return set_extent_delalloc(&inode->io_tree, start, end, extra_bits,
+ cached_state);
}
/* see btrfs_writepage_start_hook for details on why this is required */
struct extent_state *cached_state = NULL;
struct extent_changeset *data_reserved = NULL;
struct page *page;
- struct inode *inode;
+ struct btrfs_inode *inode;
u64 page_start;
u64 page_end;
int ret = 0;
fixup = container_of(work, struct btrfs_writepage_fixup, work);
page = fixup->page;
- inode = fixup->inode;
+ inode = BTRFS_I(fixup->inode);
page_start = page_offset(page);
page_end = page_offset(page) + PAGE_SIZE - 1;
* when the page was already properly dealt with.
*/
if (!ret) {
- btrfs_delalloc_release_extents(BTRFS_I(inode),
- PAGE_SIZE);
+ btrfs_delalloc_release_extents(inode, PAGE_SIZE);
btrfs_delalloc_release_space(inode, data_reserved,
page_start, PAGE_SIZE,
true);
if (ret)
goto out_page;
- lock_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end,
- &cached_state);
+ lock_extent_bits(&inode->io_tree, page_start, page_end, &cached_state);
/* already ordered? We're done */
if (PagePrivate2(page))
goto out_reserved;
- ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), page_start,
- PAGE_SIZE);
+ ordered = btrfs_lookup_ordered_range(inode, page_start, PAGE_SIZE);
if (ordered) {
- unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start,
- page_end, &cached_state);
+ unlock_extent_cached(&inode->io_tree, page_start, page_end,
+ &cached_state);
unlock_page(page);
- btrfs_start_ordered_extent(inode, ordered, 1);
+ btrfs_start_ordered_extent(&inode->vfs_inode, ordered, 1);
btrfs_put_ordered_extent(ordered);
goto again;
}
BUG_ON(!PageDirty(page));
free_delalloc_space = false;
out_reserved:
- btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
+ btrfs_delalloc_release_extents(inode, PAGE_SIZE);
if (free_delalloc_space)
btrfs_delalloc_release_space(inode, data_reserved, page_start,
PAGE_SIZE, true);
- unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end,
+ unlock_extent_cached(&inode->io_tree, page_start, page_end,
&cached_state);
out_page:
if (ret) {
* that could need flushing space. Recursing back to fixup worker would
* deadlock.
*/
- btrfs_add_delayed_iput(inode);
+ btrfs_add_delayed_iput(&inode->vfs_inode);
}
/*
}
static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
- struct inode *inode, u64 file_pos,
- u64 disk_bytenr, u64 disk_num_bytes,
- u64 num_bytes, u64 ram_bytes,
- u8 compression, u8 encryption,
- u16 other_encoding, int extent_type)
+ struct btrfs_inode *inode, u64 file_pos,
+ struct btrfs_file_extent_item *stack_fi,
+ u64 qgroup_reserved)
{
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_file_extent_item *fi;
+ struct btrfs_root *root = inode->root;
struct btrfs_path *path;
struct extent_buffer *leaf;
struct btrfs_key ins;
- u64 qg_released;
+ u64 disk_num_bytes = btrfs_stack_file_extent_disk_num_bytes(stack_fi);
+ u64 disk_bytenr = btrfs_stack_file_extent_disk_bytenr(stack_fi);
+ u64 num_bytes = btrfs_stack_file_extent_num_bytes(stack_fi);
+ u64 ram_bytes = btrfs_stack_file_extent_ram_bytes(stack_fi);
int extent_inserted = 0;
int ret;
*/
ret = __btrfs_drop_extents(trans, root, inode, path, file_pos,
file_pos + num_bytes, NULL, 0,
- 1, sizeof(*fi), &extent_inserted);
+ 1, sizeof(*stack_fi), &extent_inserted);
if (ret)
goto out;
if (!extent_inserted) {
- ins.objectid = btrfs_ino(BTRFS_I(inode));
+ ins.objectid = btrfs_ino(inode);
ins.offset = file_pos;
ins.type = BTRFS_EXTENT_DATA_KEY;
path->leave_spinning = 1;
ret = btrfs_insert_empty_item(trans, root, path, &ins,
- sizeof(*fi));
+ sizeof(*stack_fi));
if (ret)
goto out;
}
leaf = path->nodes[0];
- fi = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_file_extent_item);
- btrfs_set_file_extent_generation(leaf, fi, trans->transid);
- btrfs_set_file_extent_type(leaf, fi, extent_type);
- btrfs_set_file_extent_disk_bytenr(leaf, fi, disk_bytenr);
- btrfs_set_file_extent_disk_num_bytes(leaf, fi, disk_num_bytes);
- btrfs_set_file_extent_offset(leaf, fi, 0);
- btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes);
- btrfs_set_file_extent_ram_bytes(leaf, fi, ram_bytes);
- btrfs_set_file_extent_compression(leaf, fi, compression);
- btrfs_set_file_extent_encryption(leaf, fi, encryption);
- btrfs_set_file_extent_other_encoding(leaf, fi, other_encoding);
+ btrfs_set_stack_file_extent_generation(stack_fi, trans->transid);
+ write_extent_buffer(leaf, stack_fi,
+ btrfs_item_ptr_offset(leaf, path->slots[0]),
+ sizeof(struct btrfs_file_extent_item));
btrfs_mark_buffer_dirty(leaf);
btrfs_release_path(path);
- inode_add_bytes(inode, num_bytes);
+ inode_add_bytes(&inode->vfs_inode, num_bytes);
ins.objectid = disk_bytenr;
ins.offset = disk_num_bytes;
ins.type = BTRFS_EXTENT_ITEM_KEY;
- ret = btrfs_inode_set_file_extent_range(BTRFS_I(inode), file_pos,
- ram_bytes);
+ ret = btrfs_inode_set_file_extent_range(inode, file_pos, ram_bytes);
if (ret)
goto out;
- /*
- * Release the reserved range from inode dirty range map, as it is
- * already moved into delayed_ref_head
- */
- ret = btrfs_qgroup_release_data(inode, file_pos, ram_bytes);
- if (ret < 0)
- goto out;
- qg_released = ret;
- ret = btrfs_alloc_reserved_file_extent(trans, root,
- btrfs_ino(BTRFS_I(inode)),
- file_pos, qg_released, &ins);
+ ret = btrfs_alloc_reserved_file_extent(trans, root, btrfs_ino(inode),
+ file_pos, qgroup_reserved, &ins);
out:
btrfs_free_path(path);
btrfs_put_block_group(cache);
}
-/* as ordered data IO finishes, this gets called so we can finish
+static int insert_ordered_extent_file_extent(struct btrfs_trans_handle *trans,
+ struct inode *inode,
+ struct btrfs_ordered_extent *oe)
+{
+ struct btrfs_file_extent_item stack_fi;
+ u64 logical_len;
+
+ memset(&stack_fi, 0, sizeof(stack_fi));
+ btrfs_set_stack_file_extent_type(&stack_fi, BTRFS_FILE_EXTENT_REG);
+ btrfs_set_stack_file_extent_disk_bytenr(&stack_fi, oe->disk_bytenr);
+ btrfs_set_stack_file_extent_disk_num_bytes(&stack_fi,
+ oe->disk_num_bytes);
+ if (test_bit(BTRFS_ORDERED_TRUNCATED, &oe->flags))
+ logical_len = oe->truncated_len;
+ else
+ logical_len = oe->num_bytes;
+ btrfs_set_stack_file_extent_num_bytes(&stack_fi, logical_len);
+ btrfs_set_stack_file_extent_ram_bytes(&stack_fi, logical_len);
+ btrfs_set_stack_file_extent_compression(&stack_fi, oe->compress_type);
+ /* Encryption and other encoding is reserved and all 0 */
+
+ return insert_reserved_file_extent(trans, BTRFS_I(inode), oe->file_offset,
+ &stack_fi, oe->qgroup_rsv);
+}
+
+/*
+ * As ordered data IO finishes, this gets called so we can finish
* an ordered extent if the range of bytes in the file it covers are
* fully written.
*/
if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
BUG_ON(!list_empty(&ordered_extent->list)); /* Logic error */
- /*
- * For mwrite(mmap + memset to write) case, we still reserve
- * space for NOCOW range.
- * As NOCOW won't cause a new delayed ref, just free the space
- */
- btrfs_qgroup_free_data(inode, NULL, start,
- ordered_extent->num_bytes);
btrfs_inode_safe_disk_i_size_write(inode, 0);
if (freespace_inode)
trans = btrfs_join_transaction_spacecache(root);
compress_type = ordered_extent->compress_type;
if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
BUG_ON(compress_type);
- btrfs_qgroup_free_data(inode, NULL, start,
- ordered_extent->num_bytes);
ret = btrfs_mark_extent_written(trans, BTRFS_I(inode),
ordered_extent->file_offset,
ordered_extent->file_offset +
logical_len);
} else {
BUG_ON(root == fs_info->tree_root);
- ret = insert_reserved_file_extent(trans, inode, start,
- ordered_extent->disk_bytenr,
- ordered_extent->disk_num_bytes,
- logical_len, logical_len,
- compress_type, 0, 0,
- BTRFS_FILE_EXTENT_REG);
+ ret = insert_ordered_extent_file_extent(trans, inode,
+ ordered_extent);
if (!ret) {
clear_reserved_extent = false;
btrfs_release_delalloc_bytes(fs_info,
zeroit:
btrfs_print_data_csum_error(BTRFS_I(inode), start, csum, csum_expected,
io_bio->mirror_num);
+ if (io_bio->device)
+ btrfs_dev_stat_inc_and_print(io_bio->device,
+ BTRFS_DEV_STAT_CORRUPTION_ERRS);
memset(kaddr + pgoff, 1, len);
flush_dcache_page(page);
kunmap_atomic(kaddr);
*/
BTRFS_I(inode)->last_unlink_trans = BTRFS_I(inode)->last_trans;
+ /*
+ * Same logic as for last_unlink_trans. We don't persist the generation
+ * of the last transaction where this inode was used for a reflink
+ * operation, so after eviction and reloading the inode we must be
+ * pessimistic and assume the last transaction that modified the inode.
+ */
+ BTRFS_I(inode)->last_reflink_trans = BTRFS_I(inode)->last_trans;
+
path->slots[0]++;
if (inode->i_nlink != 1 ||
path->slots[0] >= btrfs_header_nritems(leaf))
fill_inode_item(trans, leaf, inode_item, inode);
btrfs_mark_buffer_dirty(leaf);
- btrfs_set_inode_last_trans(trans, inode);
+ btrfs_set_inode_last_trans(trans, BTRFS_I(inode));
ret = 0;
failed:
btrfs_free_path(path);
ret = btrfs_delayed_update_inode(trans, root, inode);
if (!ret)
- btrfs_set_inode_last_trans(trans, inode);
+ btrfs_set_inode_last_trans(trans, BTRFS_I(inode));
return ret;
}
}
}
+ free_anon_bdev(dest->anon_dev);
+ dest->anon_dev = 0;
out_end_trans:
trans->block_rsv = NULL;
trans->bytes_reserved = 0;
struct extent_state *cached_state = NULL;
struct extent_changeset *data_reserved = NULL;
char *kaddr;
+ bool only_release_metadata = false;
u32 blocksize = fs_info->sectorsize;
pgoff_t index = from >> PAGE_SHIFT;
unsigned offset = from & (blocksize - 1);
struct page *page;
gfp_t mask = btrfs_alloc_write_mask(mapping);
+ size_t write_bytes = blocksize;
int ret = 0;
u64 block_start;
u64 block_end;
block_start = round_down(from, blocksize);
block_end = block_start + blocksize - 1;
- ret = btrfs_delalloc_reserve_space(inode, &data_reserved,
- block_start, blocksize);
- if (ret)
+ ret = btrfs_check_data_free_space(BTRFS_I(inode), &data_reserved,
+ block_start, blocksize);
+ if (ret < 0) {
+ if (btrfs_check_nocow_lock(BTRFS_I(inode), block_start,
+ &write_bytes) > 0) {
+ /* For nocow case, no need to reserve data space */
+ only_release_metadata = true;
+ } else {
+ goto out;
+ }
+ }
+ ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode), blocksize);
+ if (ret < 0) {
+ if (!only_release_metadata)
+ btrfs_free_reserved_data_space(BTRFS_I(inode),
+ data_reserved, block_start, blocksize);
goto out;
-
+ }
again:
page = find_or_create_page(mapping, index, mask);
if (!page) {
- btrfs_delalloc_release_space(inode, data_reserved,
+ btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved,
block_start, blocksize, true);
btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize);
ret = -ENOMEM;
lock_extent_bits(io_tree, block_start, block_end, &cached_state);
set_page_extent_mapped(page);
- ordered = btrfs_lookup_ordered_extent(inode, block_start);
+ ordered = btrfs_lookup_ordered_extent(BTRFS_I(inode), block_start);
if (ordered) {
unlock_extent_cached(io_tree, block_start, block_end,
&cached_state);
EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
0, 0, &cached_state);
- ret = btrfs_set_extent_delalloc(inode, block_start, block_end, 0,
+ ret = btrfs_set_extent_delalloc(BTRFS_I(inode), block_start, block_end, 0,
&cached_state);
if (ret) {
unlock_extent_cached(io_tree, block_start, block_end,
set_page_dirty(page);
unlock_extent_cached(io_tree, block_start, block_end, &cached_state);
+ if (only_release_metadata)
+ set_extent_bit(&BTRFS_I(inode)->io_tree, block_start,
+ block_end, EXTENT_NORESERVE, NULL, NULL,
+ GFP_NOFS);
+
out_unlock:
- if (ret)
- btrfs_delalloc_release_space(inode, data_reserved, block_start,
- blocksize, true);
+ if (ret) {
+ if (only_release_metadata)
+ btrfs_delalloc_release_metadata(BTRFS_I(inode),
+ blocksize, true);
+ else
+ btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved,
+ block_start, blocksize, true);
+ }
btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize);
unlock_page(page);
put_page(page);
out:
+ if (only_release_metadata)
+ btrfs_check_nocow_unlock(BTRFS_I(inode));
extent_changeset_free(data_reserved);
return ret;
}
* Note, end is the bytenr of last byte, so we need + 1 here.
*/
if (state_flags & EXTENT_DELALLOC)
- btrfs_qgroup_free_data(inode, NULL, start, end - start + 1);
+ btrfs_qgroup_free_data(BTRFS_I(inode), NULL, start,
+ end - start + 1);
clear_extent_bit(io_tree, start, end,
EXTENT_LOCKED | EXTENT_DELALLOC |
inode_tree_add(inode);
trace_btrfs_inode_new(inode);
- btrfs_set_inode_last_trans(trans, inode);
+ btrfs_set_inode_last_trans(trans, BTRFS_I(inode));
btrfs_update_root_times(trans, root);
return em;
}
-static struct extent_map *btrfs_create_dio_extent(struct inode *inode,
+static struct extent_map *btrfs_create_dio_extent(struct btrfs_inode *inode,
const u64 start,
const u64 len,
const u64 orig_start,
int ret;
if (type != BTRFS_ORDERED_NOCOW) {
- em = create_io_em(inode, start, len, orig_start,
- block_start, block_len, orig_block_len,
- ram_bytes,
+ em = create_io_em(inode, start, len, orig_start, block_start,
+ block_len, orig_block_len, ram_bytes,
BTRFS_COMPRESS_NONE, /* compress_type */
type);
if (IS_ERR(em))
goto out;
}
- ret = btrfs_add_ordered_extent_dio(inode, start, block_start,
- len, block_len, type);
+ ret = btrfs_add_ordered_extent_dio(inode, start, block_start, len,
+ block_len, type);
if (ret) {
if (em) {
free_extent_map(em);
- btrfs_drop_extent_cache(BTRFS_I(inode), start,
- start + len - 1, 0);
+ btrfs_drop_extent_cache(inode, start, start + len - 1, 0);
}
em = ERR_PTR(ret);
}
return em;
}
-static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
+static struct extent_map *btrfs_new_extent_direct(struct btrfs_inode *inode,
u64 start, u64 len)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct btrfs_root *root = inode->root;
+ struct btrfs_fs_info *fs_info = root->fs_info;
struct extent_map *em;
struct btrfs_key ins;
u64 alloc_hint;
ins.offset, BTRFS_ORDERED_REGULAR);
btrfs_dec_block_group_reservations(fs_info, ins.objectid);
if (IS_ERR(em))
- btrfs_free_reserved_extent(fs_info, ins.objectid,
- ins.offset, 1);
+ btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset,
+ 1);
return em;
}
/*
- * returns 1 when the nocow is safe, < 1 on error, 0 if the
- * block must be cow'd
+ * Check if we can do nocow write into the range [@offset, @offset + @len)
+ *
+ * @offset: File offset
+ * @len: The length to write, will be updated to the nocow writeable
+ * range
+ * @orig_start: (optional) Return the original file offset of the file extent
+ * @orig_len: (optional) Return the original on-disk length of the file extent
+ * @ram_bytes: (optional) Return the ram_bytes of the file extent
+ *
+ * This function will flush ordered extents in the range to ensure proper
+ * nocow checks for (nowait == false) case.
+ *
+ * Return:
+ * >0 and update @len if we can do nocow write
+ * 0 if we can't do nocow write
+ * <0 if error happened
+ *
+ * NOTE: This only checks the file extents, caller is responsible to wait for
+ * any ordered extents.
*/
noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
u64 *orig_start, u64 *orig_block_len,
}
/* The callers of this must take lock_extent() */
-static struct extent_map *create_io_em(struct inode *inode, u64 start, u64 len,
- u64 orig_start, u64 block_start,
+static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start,
+ u64 len, u64 orig_start, u64 block_start,
u64 block_len, u64 orig_block_len,
u64 ram_bytes, int compress_type,
int type)
type == BTRFS_ORDERED_NOCOW ||
type == BTRFS_ORDERED_REGULAR);
- em_tree = &BTRFS_I(inode)->extent_tree;
+ em_tree = &inode->extent_tree;
em = alloc_extent_map();
if (!em)
return ERR_PTR(-ENOMEM);
}
do {
- btrfs_drop_extent_cache(BTRFS_I(inode), em->start,
- em->start + em->len - 1, 0);
+ btrfs_drop_extent_cache(inode, em->start,
+ em->start + em->len - 1, 0);
write_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em, 1);
write_unlock(&em_tree->lock);
btrfs_inc_nocow_writers(fs_info, block_start)) {
struct extent_map *em2;
- em2 = btrfs_create_dio_extent(inode, start, len,
+ em2 = btrfs_create_dio_extent(BTRFS_I(inode), start, len,
orig_start, block_start,
len, orig_block_len,
ram_bytes, type);
* use the existing or preallocated extent, so does not
* need to adjust btrfs_space_info's bytes_may_use.
*/
- btrfs_free_reserved_data_space_noquota(inode, start,
- len);
+ btrfs_free_reserved_data_space_noquota(fs_info, len);
goto skip_cow;
}
}
/* this will cow the extent */
len = bh_result->b_size;
free_extent_map(em);
- *map = em = btrfs_new_extent_direct(inode, start, len);
+ *map = em = btrfs_new_extent_direct(BTRFS_I(inode), start, len);
if (IS_ERR(em)) {
ret = PTR_ERR(em);
goto out;
return;
if (bio_op(dip->dio_bio) == REQ_OP_WRITE) {
- __endio_write_update_ordered(dip->inode, dip->logical_offset,
+ __endio_write_update_ordered(BTRFS_I(dip->inode),
+ dip->logical_offset,
dip->bytes,
!dip->dio_bio->bi_status);
} else {
return err;
}
-static void __endio_write_update_ordered(struct inode *inode,
+static void __endio_write_update_ordered(struct btrfs_inode *inode,
const u64 offset, const u64 bytes,
const bool uptodate)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct btrfs_ordered_extent *ordered = NULL;
struct btrfs_workqueue *wq;
u64 ordered_offset = offset;
u64 ordered_bytes = bytes;
u64 last_offset;
- if (btrfs_is_free_space_inode(BTRFS_I(inode)))
+ if (btrfs_is_free_space_inode(inode))
wq = fs_info->endio_freespace_worker;
else
wq = fs_info->endio_write_workers;
while (ordered_offset < offset + bytes) {
last_offset = ordered_offset;
if (btrfs_dec_test_first_ordered_pending(inode, &ordered,
- &ordered_offset,
- ordered_bytes,
- uptodate)) {
+ &ordered_offset,
+ ordered_bytes,
+ uptodate)) {
btrfs_init_work(&ordered->work, finish_ordered_fn, NULL,
NULL);
btrfs_queue_work(wq, &ordered->work);
{
struct inode *inode = private_data;
blk_status_t ret;
- ret = btrfs_csum_one_bio(inode, bio, offset, 1);
+ ret = btrfs_csum_one_bio(BTRFS_I(inode), bio, offset, 1);
BUG_ON(ret); /* -ENOMEM */
return 0;
}
* If we aren't doing async submit, calculate the csum of the
* bio now.
*/
- ret = btrfs_csum_one_bio(inode, bio, file_offset, 1);
+ ret = btrfs_csum_one_bio(BTRFS_I(inode), bio, file_offset, 1);
if (ret)
goto err;
} else {
dio_data.overwrite = 1;
inode_unlock(inode);
relock = true;
- } else if (iocb->ki_flags & IOCB_NOWAIT) {
- ret = -EAGAIN;
- goto out;
}
- ret = btrfs_delalloc_reserve_space(inode, &data_reserved,
+ ret = btrfs_delalloc_reserve_space(BTRFS_I(inode), &data_reserved,
offset, count);
if (ret)
goto out;
current->journal_info = NULL;
if (ret < 0 && ret != -EIOCBQUEUED) {
if (dio_data.reserve)
- btrfs_delalloc_release_space(inode, data_reserved,
- offset, dio_data.reserve, true);
+ btrfs_delalloc_release_space(BTRFS_I(inode),
+ data_reserved, offset, dio_data.reserve,
+ true);
/*
* On error we might have left some ordered extents
* without submitting corresponding bios for them, so
*/
if (dio_data.unsubmitted_oe_range_start <
dio_data.unsubmitted_oe_range_end)
- __endio_write_update_ordered(inode,
+ __endio_write_update_ordered(BTRFS_I(inode),
dio_data.unsubmitted_oe_range_start,
dio_data.unsubmitted_oe_range_end -
dio_data.unsubmitted_oe_range_start,
false);
} else if (ret >= 0 && (size_t)ret < count)
- btrfs_delalloc_release_space(inode, data_reserved,
+ btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved,
offset, count - (size_t)ret, true);
btrfs_delalloc_release_extents(BTRFS_I(inode), count);
}
}
static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
- __u64 start, __u64 len)
+ u64 start, u64 len)
{
int ret;
/*
* Qgroup reserved space handler
* Page here will be either
- * 1) Already written to disk
- * In this case, its reserved space is released from data rsv map
- * and will be freed by delayed_ref handler finally.
- * So even we call qgroup_free_data(), it won't decrease reserved
- * space.
- * 2) Not written to disk
- * This means the reserved space should be freed here. However,
- * if a truncate invalidates the page (by clearing PageDirty)
- * and the page is accounted for while allocating extent
- * in btrfs_check_data_free_space() we let delayed_ref to
- * free the entire extent.
+ * 1) Already written to disk or ordered extent already submitted
+ * Then its QGROUP_RESERVED bit in io_tree is already cleaned.
+ * Qgroup will be handled by its qgroup_record then.
+ * btrfs_qgroup_free_data() call will do nothing here.
+ *
+ * 2) Not written to disk yet
+ * Then btrfs_qgroup_free_data() call will clear the QGROUP_RESERVED
+ * bit of its io_tree, and free the qgroup reserved data space.
+ * Since the IO will never happen for this page.
*/
- if (PageDirty(page))
- btrfs_qgroup_free_data(inode, NULL, page_start, PAGE_SIZE);
+ btrfs_qgroup_free_data(BTRFS_I(inode), NULL, page_start, PAGE_SIZE);
if (!inode_evicting) {
clear_extent_bit(tree, page_start, page_end, EXTENT_LOCKED |
EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
* end up waiting indefinitely to get a lock on the page currently
* being processed by btrfs_page_mkwrite() function.
*/
- ret2 = btrfs_delalloc_reserve_space(inode, &data_reserved, page_start,
- reserved_space);
+ ret2 = btrfs_delalloc_reserve_space(BTRFS_I(inode), &data_reserved,
+ page_start, reserved_space);
if (!ret2) {
ret2 = file_update_time(vmf->vma->vm_file);
reserved = 1;
fs_info->sectorsize);
if (reserved_space < PAGE_SIZE) {
end = page_start + reserved_space - 1;
- btrfs_delalloc_release_space(inode, data_reserved,
- page_start, PAGE_SIZE - reserved_space,
- true);
+ btrfs_delalloc_release_space(BTRFS_I(inode),
+ data_reserved, page_start,
+ PAGE_SIZE - reserved_space, true);
}
}
EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
EXTENT_DEFRAG, 0, 0, &cached_state);
- ret2 = btrfs_set_extent_delalloc(inode, page_start, end, 0,
+ ret2 = btrfs_set_extent_delalloc(BTRFS_I(inode), page_start, end, 0,
&cached_state);
if (ret2) {
unlock_extent_cached(io_tree, page_start, page_end,
unlock_page(page);
out:
btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
- btrfs_delalloc_release_space(inode, data_reserved, page_start,
+ btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved, page_start,
reserved_space, (ret != 0));
out_noreserve:
sb_end_pagefault(inode->i_sb);
ei->index_cnt = (u64)-1;
ei->dir_index = 0;
ei->last_unlink_trans = 0;
+ ei->last_reflink_trans = 0;
ei->last_log_commit = 0;
spin_lock_init(&ei->lock);
btrfs_put_ordered_extent(ordered);
}
}
- btrfs_qgroup_check_reserved_leak(inode);
+ btrfs_qgroup_check_reserved_leak(BTRFS_I(inode));
inode_tree_del(inode);
btrfs_drop_extent_cache(BTRFS_I(inode), 0, (u64)-1, 0);
btrfs_inode_clear_file_extent_range(BTRFS_I(inode), 0, (u64)-1);
return err;
}
+static int insert_prealloc_file_extent(struct btrfs_trans_handle *trans,
+ struct inode *inode, struct btrfs_key *ins,
+ u64 file_offset)
+{
+ struct btrfs_file_extent_item stack_fi;
+ u64 start = ins->objectid;
+ u64 len = ins->offset;
+ int ret;
+
+ memset(&stack_fi, 0, sizeof(stack_fi));
+
+ btrfs_set_stack_file_extent_type(&stack_fi, BTRFS_FILE_EXTENT_PREALLOC);
+ btrfs_set_stack_file_extent_disk_bytenr(&stack_fi, start);
+ btrfs_set_stack_file_extent_disk_num_bytes(&stack_fi, len);
+ btrfs_set_stack_file_extent_num_bytes(&stack_fi, len);
+ btrfs_set_stack_file_extent_ram_bytes(&stack_fi, len);
+ btrfs_set_stack_file_extent_compression(&stack_fi, BTRFS_COMPRESS_NONE);
+ /* Encryption and other encoding is reserved and all 0 */
+
+ ret = btrfs_qgroup_release_data(BTRFS_I(inode), file_offset, len);
+ if (ret < 0)
+ return ret;
+ return insert_reserved_file_extent(trans, BTRFS_I(inode), file_offset,
+ &stack_fi, ret);
+}
static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
u64 start, u64 num_bytes, u64 min_size,
loff_t actual_len, u64 *alloc_hint,
btrfs_dec_block_group_reservations(fs_info, ins.objectid);
last_alloc = ins.offset;
- ret = insert_reserved_file_extent(trans, inode,
- cur_offset, ins.objectid,
- ins.offset, ins.offset,
- ins.offset, 0, 0, 0,
- BTRFS_FILE_EXTENT_PREALLOC);
+ ret = insert_prealloc_file_extent(trans, inode, &ins, cur_offset);
if (ret) {
btrfs_free_reserved_extent(fs_info, ins.objectid,
ins.offset, 0);
btrfs_end_transaction(trans);
}
if (clear_offset < end)
- btrfs_free_reserved_data_space(inode, NULL, clear_offset,
+ btrfs_free_reserved_data_space(BTRFS_I(inode), NULL, clear_offset,
end - clear_offset + 1);
return ret;
}
/* File does not contribute to nr_files count */
#define FMODE_NOACCOUNT ((__force fmode_t)0x20000000)
+/* File supports async buffered reads */
+#define FMODE_BUF_RASYNC ((__force fmode_t)0x40000000)
+
/*
* Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector
* that indicates that they should check the contents of the iovec are
#define IOCB_SYNC (1 << 5)
#define IOCB_WRITE (1 << 6)
#define IOCB_NOWAIT (1 << 7)
+/* iocb->ki_waitq is valid */
+#define IOCB_WAITQ (1 << 8)
+#define IOCB_NOIO (1 << 9)
struct kiocb {
struct file *ki_filp;
int ki_flags;
u16 ki_hint;
u16 ki_ioprio; /* See linux/ioprio.h */
- unsigned int ki_cookie; /* for ->iopoll */
+ union {
+ unsigned int ki_cookie; /* for ->iopoll */
+ struct wait_page_queue *ki_waitq; /* for async buffered IO */
+ };
randomized_struct_fields_end
};
* must be enforced here for CRIS, to let the least significant bit
* of struct page's "mapping" pointer be used for PAGE_MAPPING_ANON.
*/
-struct request_queue;
-
-struct block_device {
- dev_t bd_dev; /* not a kdev_t - it's a search key */
- int bd_openers;
- struct inode * bd_inode; /* will die */
- struct super_block * bd_super;
- struct mutex bd_mutex; /* open/close mutex */
- void * bd_claiming;
- void * bd_holder;
- int bd_holders;
- bool bd_write_holder;
-#ifdef CONFIG_SYSFS
- struct list_head bd_holder_disks;
-#endif
- struct block_device * bd_contains;
- unsigned bd_block_size;
- u8 bd_partno;
- struct hd_struct * bd_part;
- /* number of times partitions within this device have been opened. */
- unsigned bd_part_count;
- int bd_invalidated;
- struct gendisk * bd_disk;
- struct request_queue * bd_queue;
- struct backing_dev_info *bd_bdi;
- struct list_head bd_list;
- /*
- * Private data. You must have bd_claim'ed the block_device
- * to use this. NOTE: bd_claim allows an owner to claim
- * the same device multiple times, the owner must take special
- * care to not mess up bd_private for that case.
- */
- unsigned long bd_private;
-
- /* The counter of freeze processes */
- int bd_fsfreeze_count;
- /* Mutex for freeze */
- struct mutex bd_fsfreeze_mutex;
-} __randomize_layout;
/* XArray tags, for tagging dirty and writeback pages in the pagecache. */
#define PAGECACHE_TAG_DIRTY XA_MARK_0
/*
* Might pages of this file have been modified in userspace?
- * Note that i_mmap_writable counts all VM_SHARED vmas: do_mmap_pgoff
+ * Note that i_mmap_writable counts all VM_SHARED vmas: do_mmap
* marks vma as VM_SHARED if it is shared, and the file was opened for
* writing i.e. vma may be mprotected writable even if now readonly.
*
return MAJOR(inode->i_rdev);
}
-extern struct block_device *I_BDEV(struct inode *inode);
-
struct fown_struct {
rwlock_t lock; /* protects pid, uid, euid fields */
struct pid *pid; /* pid or -pgrp where SIGIO should be sent */
#define SB_NODIRATIME 2048 /* Do not update directory access times */
#define SB_SILENT 32768
#define SB_POSIXACL (1<<16) /* VFS does not apply the umask */
+#define SB_INLINECRYPT (1<<17) /* Use blk-crypto for encrypted files */
#define SB_KERNMOUNT (1<<22) /* this is a kern_mount call */
#define SB_I_VERSION (1<<23) /* Update inode I_version field */
#define SB_LAZYTIME (1<<25) /* Update the on-disk [acm]times lazily */
int (*f)(struct dentry *, umode_t, void *),
void *);
+int vfs_fchown(struct file *file, uid_t user, gid_t group);
+int vfs_fchmod(struct file *file, umode_t mode);
+int vfs_utimes(const struct path *path, struct timespec64 *times);
+
extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
#ifdef CONFIG_COMPAT
loff_t pos;
};
-struct block_device_operations;
-
-/* These macros are for out of kernel modules to test that
- * the kernel supports the unlocked_ioctl and compat_ioctl
- * fields in struct file_operations. */
-#define HAVE_COMPAT_IOCTL 1
-#define HAVE_UNLOCKED_IOCTL 1
-
/*
* These flags let !MMU mmap() govern direct device mapping vs immediate
* copying more easily for MAP_PRIVATE, especially for ROM filesystems.
struct iovec *fast_pointer,
struct iovec **ret_pointer);
-extern ssize_t __vfs_read(struct file *, char __user *, size_t, loff_t *);
extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *);
extern ssize_t vfs_readv(struct file *, const struct iovec __user *,
/*
* Inode flags - they have no relation to superblock flags now
*/
- #define S_SYNC 1 /* Writes are synced at once */
- #define S_NOATIME 2 /* Do not update access times */
- #define S_APPEND 4 /* Append-only file */
- #define S_IMMUTABLE 8 /* Immutable file */
- #define S_DEAD 16 /* removed, but still open directory */
- #define S_NOQUOTA 32 /* Inode is not counted to quota */
- #define S_DIRSYNC 64 /* Directory modifications are synchronous */
- #define S_NOCMTIME 128 /* Do not update file c/mtime */
- #define S_SWAPFILE 256 /* Do not truncate: swapon got its bmaps */
- #define S_PRIVATE 512 /* Inode is fs-internal */
- #define S_IMA 1024 /* Inode has an associated IMA struct */
- #define S_AUTOMOUNT 2048 /* Automount/referral quasi-directory */
- #define S_NOSEC 4096 /* no suid or xattr security attributes */
+ #define S_SYNC (1 << 0) /* Writes are synced at once */
+ #define S_NOATIME (1 << 1) /* Do not update access times */
+ #define S_APPEND (1 << 2) /* Append-only file */
+ #define S_IMMUTABLE (1 << 3) /* Immutable file */
+ #define S_DEAD (1 << 4) /* removed, but still open directory */
+ #define S_NOQUOTA (1 << 5) /* Inode is not counted to quota */
+ #define S_DIRSYNC (1 << 6) /* Directory modifications are synchronous */
+ #define S_NOCMTIME (1 << 7) /* Do not update file c/mtime */
+ #define S_SWAPFILE (1 << 8) /* Do not truncate: swapon got its bmaps */
+ #define S_PRIVATE (1 << 9) /* Inode is fs-internal */
+ #define S_IMA (1 << 10) /* Inode has an associated IMA struct */
+ #define S_AUTOMOUNT (1 << 11) /* Automount/referral quasi-directory */
+ #define S_NOSEC (1 << 12) /* no suid or xattr security attributes */
#ifdef CONFIG_FS_DAX
- #define S_DAX 8192 /* Direct Access, avoiding the page cache */
+ #define S_DAX (1 << 13) /* Direct Access, avoiding the page cache */
#else
- #define S_DAX 0 /* Make all the DAX code disappear */
+ #define S_DAX 0 /* Make all the DAX code disappear */
#endif
- #define S_ENCRYPTED 16384 /* Encrypted file (using fs/crypto/) */
- #define S_CASEFOLD 32768 /* Casefolded file */
- #define S_VERITY 65536 /* Verity file (using fs/verity/) */
+ #define S_ENCRYPTED (1 << 14) /* Encrypted file (using fs/crypto/) */
+ #define S_CASEFOLD (1 << 15) /* Casefolded file */
+ #define S_VERITY (1 << 16) /* Verity file (using fs/verity/) */
/*
* Note that nosuid etc flags are inode-specific: setting some file-system
#define MODULE_ALIAS_FS(NAME) MODULE_ALIAS("fs-" NAME)
-#ifdef CONFIG_BLOCK
extern struct dentry *mount_bdev(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data,
int (*fill_super)(struct super_block *, void *, int));
-#else
-static inline struct dentry *mount_bdev(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data,
- int (*fill_super)(struct super_block *, void *, int))
-{
- return ERR_PTR(-ENODEV);
-}
-#endif
extern struct dentry *mount_single(struct file_system_type *fs_type,
int flags, void *data,
int (*fill_super)(struct super_block *, void *, int));
int (*fill_super)(struct super_block *, void *, int));
extern struct dentry *mount_subtree(struct vfsmount *mnt, const char *path);
void generic_shutdown_super(struct super_block *sb);
-#ifdef CONFIG_BLOCK
void kill_block_super(struct super_block *sb);
-#else
-static inline void kill_block_super(struct super_block *sb)
-{
- BUG();
-}
-#endif
void kill_anon_super(struct super_block *sb);
void kill_litter_super(struct super_block *sb);
void deactivate_super(struct super_block *sb);
#define __getname() kmem_cache_alloc(names_cachep, GFP_KERNEL)
#define __putname(name) kmem_cache_free(names_cachep, (void *)(name))
-#ifdef CONFIG_BLOCK
-extern int register_blkdev(unsigned int, const char *);
-extern void unregister_blkdev(unsigned int, const char *);
-extern struct block_device *bdget(dev_t);
-extern struct block_device *bdgrab(struct block_device *bdev);
-extern void bd_set_size(struct block_device *, loff_t size);
-extern void bd_forget(struct inode *inode);
-extern void bdput(struct block_device *);
-extern void invalidate_bdev(struct block_device *);
-extern void iterate_bdevs(void (*)(struct block_device *, void *), void *);
-extern int sync_blockdev(struct block_device *bdev);
-extern void kill_bdev(struct block_device *);
-extern struct super_block *freeze_bdev(struct block_device *);
-extern void emergency_thaw_all(void);
-extern void emergency_thaw_bdev(struct super_block *sb);
-extern int thaw_bdev(struct block_device *bdev, struct super_block *sb);
-extern int fsync_bdev(struct block_device *);
-
extern struct super_block *blockdev_superblock;
-
static inline bool sb_is_blkdev_sb(struct super_block *sb)
{
- return sb == blockdev_superblock;
-}
-#else
-static inline void bd_forget(struct inode *inode) {}
-static inline int sync_blockdev(struct block_device *bdev) { return 0; }
-static inline void kill_bdev(struct block_device *bdev) {}
-static inline void invalidate_bdev(struct block_device *bdev) {}
-
-static inline struct super_block *freeze_bdev(struct block_device *sb)
-{
- return NULL;
-}
-
-static inline int thaw_bdev(struct block_device *bdev, struct super_block *sb)
-{
- return 0;
-}
-
-static inline int emergency_thaw_bdev(struct super_block *sb)
-{
- return 0;
+ return IS_ENABLED(CONFIG_BLOCK) && sb == blockdev_superblock;
}
-static inline void iterate_bdevs(void (*f)(struct block_device *, void *), void *arg)
-{
-}
-
-static inline bool sb_is_blkdev_sb(struct super_block *sb)
-{
- return false;
-}
-#endif
+void emergency_thaw_all(void);
extern int sync_filesystem(struct super_block *);
extern const struct file_operations def_blk_fops;
extern const struct file_operations def_chr_fops;
-#ifdef CONFIG_BLOCK
-extern int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long);
-extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long);
-extern int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder);
-extern struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
- void *holder);
-extern struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode,
- void *holder);
-extern struct block_device *bd_start_claiming(struct block_device *bdev,
- void *holder);
-extern void bd_finish_claiming(struct block_device *bdev,
- struct block_device *whole, void *holder);
-extern void bd_abort_claiming(struct block_device *bdev,
- struct block_device *whole, void *holder);
-extern void blkdev_put(struct block_device *bdev, fmode_t mode);
-
-#ifdef CONFIG_SYSFS
-extern int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk);
-extern void bd_unlink_disk_holder(struct block_device *bdev,
- struct gendisk *disk);
-#else
-static inline int bd_link_disk_holder(struct block_device *bdev,
- struct gendisk *disk)
-{
- return 0;
-}
-static inline void bd_unlink_disk_holder(struct block_device *bdev,
- struct gendisk *disk)
-{
-}
-#endif
-#endif
/* fs/char_dev.c */
#define CHRDEV_MAJOR_MAX 512
__unregister_chrdev(major, 0, 256, name);
}
-/* fs/block_dev.c */
-#define BDEVNAME_SIZE 32 /* Largest string for a blockdev identifier */
-#define BDEVT_SIZE 10 /* Largest string for MAJ:MIN for blkdev */
-
-#ifdef CONFIG_BLOCK
-#define BLKDEV_MAJOR_MAX 512
-extern const char *bdevname(struct block_device *bdev, char *buffer);
-extern struct block_device *lookup_bdev(const char *);
-extern void blkdev_show(struct seq_file *,off_t);
-
-#else
-#define BLKDEV_MAJOR_MAX 0
-#endif
-
extern void init_special_inode(struct inode *, umode_t, dev_t);
/* Invalid inode operations -- fs/bad_inode.c */
extern void make_bad_inode(struct inode *);
extern bool is_bad_inode(struct inode *);
-#ifdef CONFIG_BLOCK
-extern int revalidate_disk(struct gendisk *);
-extern int check_disk_change(struct block_device *);
-extern int __invalidate_device(struct block_device *, bool);
-#endif
unsigned long invalidate_mapping_pages(struct address_space *mapping,
pgoff_t start, pgoff_t end);
/**
* file_sample_sb_err - sample the current errseq_t to test for later errors
- * @mapping: mapping to be sampled
+ * @file: file pointer to be sampled
*
* Grab the most current superblock-level errseq_t value for the given
* struct file.
extern int kernel_read_file_from_fd(int, void **, loff_t *, loff_t,
enum kernel_read_file_id);
extern ssize_t kernel_read(struct file *, void *, size_t, loff_t *);
+ssize_t __kernel_read(struct file *file, void *buf, size_t count, loff_t *pos);
extern ssize_t kernel_write(struct file *, const void *, size_t, loff_t *);
extern ssize_t __kernel_write(struct file *, const void *, size_t, loff_t *);
extern struct file * open_exec(const char *);
extern unsigned int get_next_ino(void);
extern void evict_inodes(struct super_block *sb);
+/*
+ * Userspace may rely on the the inode number being non-zero. For example, glibc
+ * simply ignores files with zero i_ino in unlink() and other places.
+ *
+ * As an additional complication, if userspace was compiled with
+ * _FILE_OFFSET_BITS=32 on a 64-bit kernel we'll only end up reading out the
+ * lower 32 bits, so we need to check that those aren't zero explicitly. With
+ * _FILE_OFFSET_BITS=64, this may cause some harmless false-negatives, but
+ * better safe than sorry.
+ */
+static inline bool is_zero_ino(ino_t ino)
+{
+ return (u32)ino == 0;
+}
+
extern void __iget(struct inode * inode);
extern void iget_failed(struct inode *);
extern void clear_inode(struct inode *);
extern void inode_sb_list_add(struct inode *inode);
-#ifdef CONFIG_BLOCK
-extern int bdev_read_only(struct block_device *);
-#endif
-extern int set_blocksize(struct block_device *, int);
extern int sb_set_blocksize(struct super_block *, int);
extern int sb_min_blocksize(struct super_block *, int);
static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags)
{
+ int kiocb_flags = 0;
+
+ if (!flags)
+ return 0;
if (unlikely(flags & ~RWF_SUPPORTED))
return -EOPNOTSUPP;
if (flags & RWF_NOWAIT) {
if (!(ki->ki_filp->f_mode & FMODE_NOWAIT))
return -EOPNOTSUPP;
- ki->ki_flags |= IOCB_NOWAIT;
+ kiocb_flags |= IOCB_NOWAIT;
}
if (flags & RWF_HIPRI)
- ki->ki_flags |= IOCB_HIPRI;
+ kiocb_flags |= IOCB_HIPRI;
if (flags & RWF_DSYNC)
- ki->ki_flags |= IOCB_DSYNC;
+ kiocb_flags |= IOCB_DSYNC;
if (flags & RWF_SYNC)
- ki->ki_flags |= (IOCB_DSYNC | IOCB_SYNC);
+ kiocb_flags |= (IOCB_DSYNC | IOCB_SYNC);
if (flags & RWF_APPEND)
- ki->ki_flags |= IOCB_APPEND;
+ kiocb_flags |= IOCB_APPEND;
+
+ ki->ki_flags |= kiocb_flags;
return 0;
}