]> Git Repo - linux.git/commitdiff
Merge branch 'chandan/prep-subpage-blocksize' into for-chris-4.6
authorDavid Sterba <[email protected]>
Fri, 26 Feb 2016 14:38:28 +0000 (15:38 +0100)
committerDavid Sterba <[email protected]>
Fri, 26 Feb 2016 14:38:28 +0000 (15:38 +0100)
# Conflicts:
# fs/btrfs/file.c

1  2 
fs/btrfs/ctree.h
fs/btrfs/file.c
fs/btrfs/inode.c
fs/btrfs/ioctl.c

diff --combined fs/btrfs/ctree.h
index bfe4a337fb4d13a058446265b7baf4a1437aa602,42ab58250d9ec5e9fc001302d75a759dfcfd8ec5..5f5c4fbd7a3c9880d56c86a5ddd16df03cb35f5a
@@@ -2353,6 -2353,9 +2353,9 @@@ struct btrfs_map_token 
        unsigned long offset;
  };
  
+ #define BTRFS_BYTES_TO_BLKS(fs_info, bytes) \
+                               ((bytes) >> (fs_info)->sb->s_blocksize_bits)
  static inline void btrfs_init_map_token (struct btrfs_map_token *token)
  {
        token->kaddr = NULL;
@@@ -4027,7 -4030,7 +4030,7 @@@ int btrfs_unlink_subvol(struct btrfs_tr
                        struct btrfs_root *root,
                        struct inode *dir, u64 objectid,
                        const char *name, int name_len);
- int btrfs_truncate_page(struct inode *inode, loff_t from, loff_t len,
+ int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,
                        int front);
  int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
                               struct btrfs_root *root,
@@@ -4099,8 -4102,7 +4102,8 @@@ void btrfs_get_block_group_info(struct 
                                struct btrfs_ioctl_space_info *space);
  void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
                               struct btrfs_ioctl_balance_args *bargs);
 -
 +ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen,
 +                         struct file *dst_file, u64 dst_loff);
  
  /* file.c */
  int btrfs_auto_defrag_init(void);
@@@ -4131,11 -4133,6 +4134,11 @@@ int btrfs_dirty_pages(struct btrfs_roo
                      loff_t pos, size_t write_bytes,
                      struct extent_state **cached);
  int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end);
 +ssize_t btrfs_copy_file_range(struct file *file_in, loff_t pos_in,
 +                            struct file *file_out, loff_t pos_out,
 +                            size_t len, unsigned int flags);
 +int btrfs_clone_file_range(struct file *file_in, loff_t pos_in,
 +                         struct file *file_out, loff_t pos_out, u64 len);
  
  /* tree-defrag.c */
  int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
diff --combined fs/btrfs/file.c
index 098bb8f690c992e1ebd01270f49ff2d37e6658bd,953f0ad1780272408f6d8070792023b245df268d..5a58e292bdadc7d586086102f42c540e5f52fb98
@@@ -498,7 -498,7 +498,7 @@@ int btrfs_dirty_pages(struct btrfs_roo
        loff_t isize = i_size_read(inode);
  
        start_pos = pos & ~((u64)root->sectorsize - 1);
-       num_bytes = ALIGN(write_bytes + pos - start_pos, root->sectorsize);
+       num_bytes = round_up(write_bytes + pos - start_pos, root->sectorsize);
  
        end_of_last_block = start_pos + num_bytes - 1;
        err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block,
@@@ -1379,16 -1379,19 +1379,19 @@@ fail
  static noinline int
  lock_and_cleanup_extent_if_need(struct inode *inode, struct page **pages,
                                size_t num_pages, loff_t pos,
+                               size_t write_bytes,
                                u64 *lockstart, u64 *lockend,
                                struct extent_state **cached_state)
  {
+       struct btrfs_root *root = BTRFS_I(inode)->root;
        u64 start_pos;
        u64 last_pos;
        int i;
        int ret = 0;
  
-       start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1);
-       last_pos = start_pos + ((u64)num_pages << PAGE_CACHE_SHIFT) - 1;
+       start_pos = round_down(pos, root->sectorsize);
+       last_pos = start_pos
+               + round_up(pos + write_bytes - start_pos, root->sectorsize) - 1;
  
        if (start_pos < inode->i_size) {
                struct btrfs_ordered_extent *ordered;
@@@ -1503,6 -1506,7 +1506,7 @@@ static noinline ssize_t __btrfs_buffere
  
        while (iov_iter_count(i) > 0) {
                size_t offset = pos & (PAGE_CACHE_SIZE - 1);
+               size_t sector_offset;
                size_t write_bytes = min(iov_iter_count(i),
                                         nrptrs * (size_t)PAGE_CACHE_SIZE -
                                         offset);
                size_t reserve_bytes;
                size_t dirty_pages;
                size_t copied;
+               size_t dirty_sectors;
+               size_t num_sectors;
  
                WARN_ON(num_pages > nrptrs);
  
                        break;
                }
  
-               reserve_bytes = num_pages << PAGE_CACHE_SHIFT;
+               sector_offset = pos & (root->sectorsize - 1);
+               reserve_bytes = round_up(write_bytes + sector_offset,
+                               root->sectorsize);
  
                if (BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
                                             BTRFS_INODE_PREALLOC)) {
                                 */
                                num_pages = DIV_ROUND_UP(write_bytes + offset,
                                                         PAGE_CACHE_SIZE);
-                               reserve_bytes = num_pages << PAGE_CACHE_SHIFT;
+                               reserve_bytes = round_up(write_bytes
+                                                       + sector_offset,
+                                                       root->sectorsize);
                                goto reserve_metadata;
                        }
                }
@@@ -1576,8 -1586,8 +1586,8 @@@ again
                        break;
  
                ret = lock_and_cleanup_extent_if_need(inode, pages, num_pages,
-                                                     pos, &lockstart, &lockend,
-                                                     &cached_state);
+                                               pos, write_bytes, &lockstart,
+                                               &lockend, &cached_state);
                if (ret < 0) {
                        if (ret == -EAGAIN)
                                goto again;
                 * we still have an outstanding extent for the chunk we actually
                 * managed to copy.
                 */
-               if (num_pages > dirty_pages) {
-                       release_bytes = (num_pages - dirty_pages) <<
-                               PAGE_CACHE_SHIFT;
+               num_sectors = BTRFS_BYTES_TO_BLKS(root->fs_info,
+                                               reserve_bytes);
+               dirty_sectors = round_up(copied + sector_offset,
+                                       root->sectorsize);
+               dirty_sectors = BTRFS_BYTES_TO_BLKS(root->fs_info,
+                                               dirty_sectors);
+               if (num_sectors > dirty_sectors) {
+                       release_bytes = (write_bytes - copied)
+                               & ~((u64)root->sectorsize - 1);
                        if (copied > 0) {
                                spin_lock(&BTRFS_I(inode)->lock);
                                BTRFS_I(inode)->outstanding_extents++;
                        }
                }
  
-               release_bytes = dirty_pages << PAGE_CACHE_SHIFT;
+               release_bytes = round_up(copied + sector_offset,
+                                       root->sectorsize);
  
                if (copied > 0)
                        ret = btrfs_dirty_pages(root, inode, pages,
  
                if (only_release_metadata && copied > 0) {
                        lockstart = round_down(pos, root->sectorsize);
-                       lockend = lockstart +
-                               (dirty_pages << PAGE_CACHE_SHIFT) - 1;
+                       lockend = round_up(pos + copied, root->sectorsize) - 1;
  
                        set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
                                       lockend, EXTENT_NORESERVE, NULL,
@@@ -1761,18 -1778,20 +1778,20 @@@ static ssize_t btrfs_file_write_iter(st
        ssize_t err;
        loff_t pos;
        size_t count;
+       loff_t oldsize;
+       int clean_page = 0;
  
 -      mutex_lock(&inode->i_mutex);
 +      inode_lock(inode);
        err = generic_write_checks(iocb, from);
        if (err <= 0) {
 -              mutex_unlock(&inode->i_mutex);
 +              inode_unlock(inode);
                return err;
        }
  
        current->backing_dev_info = inode_to_bdi(inode);
        err = file_remove_privs(file);
        if (err) {
 -              mutex_unlock(&inode->i_mutex);
 +              inode_unlock(inode);
                goto out;
        }
  
         * to stop this write operation to ensure FS consistency.
         */
        if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) {
 -              mutex_unlock(&inode->i_mutex);
 +              inode_unlock(inode);
                err = -EROFS;
                goto out;
        }
        pos = iocb->ki_pos;
        count = iov_iter_count(from);
        start_pos = round_down(pos, root->sectorsize);
-       if (start_pos > i_size_read(inode)) {
+       oldsize = i_size_read(inode);
+       if (start_pos > oldsize) {
                /* Expand hole size to cover write data, preventing empty gap */
                end_pos = round_up(pos + count, root->sectorsize);
-               err = btrfs_cont_expand(inode, i_size_read(inode), end_pos);
+               err = btrfs_cont_expand(inode, oldsize, end_pos);
                if (err) {
 -                      mutex_unlock(&inode->i_mutex);
 +                      inode_unlock(inode);
                        goto out;
                }
+               if (start_pos > round_up(oldsize, root->sectorsize))
+                       clean_page = 1;
        }
  
        if (sync)
                num_written = __btrfs_buffered_write(file, from, pos);
                if (num_written > 0)
                        iocb->ki_pos = pos + num_written;
+               if (clean_page)
+                       pagecache_isize_extended(inode, oldsize,
+                                               i_size_read(inode));
        }
  
 -      mutex_unlock(&inode->i_mutex);
 +      inode_unlock(inode);
  
        /*
         * We also have to set last_sub_trans to the current log transid,
@@@ -1909,7 -1934,7 +1934,7 @@@ int btrfs_sync_file(struct file *file, 
        if (ret)
                return ret;
  
 -      mutex_lock(&inode->i_mutex);
 +      inode_lock(inode);
        atomic_inc(&root->log_batch);
        full_sync = test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
                             &BTRFS_I(inode)->runtime_flags);
                ret = start_ordered_ops(inode, start, end);
        }
        if (ret) {
 -              mutex_unlock(&inode->i_mutex);
 +              inode_unlock(inode);
                goto out;
        }
        atomic_inc(&root->log_batch);
                 */
                clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
                          &BTRFS_I(inode)->runtime_flags);
 -              mutex_unlock(&inode->i_mutex);
 +              inode_unlock(inode);
                goto out;
        }
  
        trans = btrfs_start_transaction(root, 0);
        if (IS_ERR(trans)) {
                ret = PTR_ERR(trans);
 -              mutex_unlock(&inode->i_mutex);
 +              inode_unlock(inode);
                goto out;
        }
        trans->sync = true;
         * file again, but that will end up using the synchronization
         * inside btrfs_sync_log to keep things safe.
         */
 -      mutex_unlock(&inode->i_mutex);
 +      inode_unlock(inode);
  
        /*
         * If any of the ordered extents had an error, just return it to user
@@@ -2293,18 -2318,18 +2318,18 @@@ static int btrfs_punch_hole(struct inod
        int ret = 0;
        int err = 0;
        unsigned int rsv_count;
-       bool same_page;
+       bool same_block;
        bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
        u64 ino_size;
-       bool truncated_page = false;
+       bool truncated_block = false;
        bool updated_inode = false;
  
        ret = btrfs_wait_ordered_range(inode, offset, len);
        if (ret)
                return ret;
  
 -      mutex_lock(&inode->i_mutex);
 +      inode_lock(inode);
-       ino_size = round_up(inode->i_size, PAGE_CACHE_SIZE);
+       ino_size = round_up(inode->i_size, root->sectorsize);
        ret = find_first_non_hole(inode, &offset, &len);
        if (ret < 0)
                goto out_only_mutex;
        lockstart = round_up(offset, BTRFS_I(inode)->root->sectorsize);
        lockend = round_down(offset + len,
                             BTRFS_I(inode)->root->sectorsize) - 1;
-       same_page = ((offset >> PAGE_CACHE_SHIFT) ==
-                   ((offset + len - 1) >> PAGE_CACHE_SHIFT));
+       same_block = (BTRFS_BYTES_TO_BLKS(root->fs_info, offset))
+               == (BTRFS_BYTES_TO_BLKS(root->fs_info, offset + len - 1));
        /*
-        * We needn't truncate any page which is beyond the end of the file
+        * We needn't truncate any block which is beyond the end of the file
         * because we are sure there is no data there.
         */
        /*
-        * Only do this if we are in the same page and we aren't doing the
-        * entire page.
+        * Only do this if we are in the same block and we aren't doing the
+        * entire block.
         */
-       if (same_page && len < PAGE_CACHE_SIZE) {
+       if (same_block && len < root->sectorsize) {
                if (offset < ino_size) {
-                       truncated_page = true;
-                       ret = btrfs_truncate_page(inode, offset, len, 0);
+                       truncated_block = true;
+                       ret = btrfs_truncate_block(inode, offset, len, 0);
                } else {
                        ret = 0;
                }
                goto out_only_mutex;
        }
  
-       /* zero back part of the first page */
+       /* zero back part of the first block */
        if (offset < ino_size) {
-               truncated_page = true;
-               ret = btrfs_truncate_page(inode, offset, 0, 0);
+               truncated_block = true;
+               ret = btrfs_truncate_block(inode, offset, 0, 0);
                if (ret) {
 -                      mutex_unlock(&inode->i_mutex);
 +                      inode_unlock(inode);
                        return ret;
                }
        }
                if (!ret) {
                        /* zero the front end of the last page */
                        if (tail_start + tail_len < ino_size) {
-                               truncated_page = true;
-                               ret = btrfs_truncate_page(inode,
-                                               tail_start + tail_len, 0, 1);
+                               truncated_block = true;
+                               ret = btrfs_truncate_block(inode,
+                                                       tail_start + tail_len,
+                                                       0, 1);
                                if (ret)
                                        goto out_only_mutex;
                        }
                ret = btrfs_wait_ordered_range(inode, lockstart,
                                               lockend - lockstart + 1);
                if (ret) {
 -                      mutex_unlock(&inode->i_mutex);
 +                      inode_unlock(inode);
                        return ret;
                }
        }
@@@ -2558,7 -2583,7 +2583,7 @@@ out
        unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
                             &cached_state, GFP_NOFS);
  out_only_mutex:
-       if (!updated_inode && truncated_page && !ret && !err) {
+       if (!updated_inode && truncated_block && !ret && !err) {
                /*
                 * If we only end up zeroing part of a page, we still need to
                 * update the inode item, so that all the time fields are
                        ret = btrfs_end_transaction(trans, root);
                }
        }
 -      mutex_unlock(&inode->i_mutex);
 +      inode_unlock(inode);
        if (ret && !err)
                err = ret;
        return err;
@@@ -2658,7 -2683,7 +2683,7 @@@ static long btrfs_fallocate(struct fil
        if (ret < 0)
                return ret;
  
 -      mutex_lock(&inode->i_mutex);
 +      inode_lock(inode);
        ret = inode_newsize_ok(inode, alloc_end);
        if (ret)
                goto out;
        } else if (offset + len > inode->i_size) {
                /*
                 * If we are fallocating from the end of the file onward we
-                * need to zero out the end of the page if i_size lands in the
-                * middle of a page.
+                * need to zero out the end of the block if i_size lands in the
+                * middle of a block.
                 */
-               ret = btrfs_truncate_page(inode, inode->i_size, 0, 0);
+               ret = btrfs_truncate_block(inode, inode->i_size, 0, 0);
                if (ret)
                        goto out;
        }
@@@ -2816,7 -2841,7 +2841,7 @@@ out
         * So this is completely used as cleanup.
         */
        btrfs_qgroup_free_data(inode, alloc_start, alloc_end - alloc_start);
 -      mutex_unlock(&inode->i_mutex);
 +      inode_unlock(inode);
        /* Let go of our reservation. */
        btrfs_free_reserved_data_space(inode, alloc_start,
                                       alloc_end - alloc_start);
@@@ -2892,7 -2917,7 +2917,7 @@@ static loff_t btrfs_file_llseek(struct 
        struct inode *inode = file->f_mapping->host;
        int ret;
  
 -      mutex_lock(&inode->i_mutex);
 +      inode_lock(inode);
        switch (whence) {
        case SEEK_END:
        case SEEK_CUR:
        case SEEK_DATA:
        case SEEK_HOLE:
                if (offset >= i_size_read(inode)) {
 -                      mutex_unlock(&inode->i_mutex);
 +                      inode_unlock(inode);
                        return -ENXIO;
                }
  
                ret = find_desired_extent(inode, &offset, whence);
                if (ret) {
 -                      mutex_unlock(&inode->i_mutex);
 +                      inode_unlock(inode);
                        return ret;
                }
        }
  
        offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
  out:
 -      mutex_unlock(&inode->i_mutex);
 +      inode_unlock(inode);
        return offset;
  }
  
@@@ -2932,9 -2957,6 +2957,9 @@@ const struct file_operations btrfs_file
  #ifdef CONFIG_COMPAT
        .compat_ioctl   = btrfs_ioctl,
  #endif
 +      .copy_file_range = btrfs_copy_file_range,
 +      .clone_file_range = btrfs_clone_file_range,
 +      .dedupe_file_range = btrfs_dedupe_file_range,
  };
  
  void btrfs_auto_defrag_exit(void)
diff --combined fs/btrfs/inode.c
index 5f06eb1f43843055c0373daeb9ad98648865150f,7d4b2bf2f44f42d4340d0746fabae9fe661214f5..3e0d4151151723446ef4d4cee6dfd7959ddd53d7
@@@ -263,7 -263,7 +263,7 @@@ static noinline int cow_file_range_inli
                data_len = compressed_size;
  
        if (start > 0 ||
-           actual_end > PAGE_CACHE_SIZE ||
+           actual_end > root->sectorsize ||
            data_len > BTRFS_MAX_INLINE_DATA_SIZE(root) ||
            (!compressed_size &&
            (actual_end & (root->sectorsize - 1)) == 0) ||
@@@ -2002,7 -2002,8 +2002,8 @@@ again
        if (PagePrivate2(page))
                goto out;
  
-       ordered = btrfs_lookup_ordered_extent(inode, page_start);
+       ordered = btrfs_lookup_ordered_range(inode, page_start,
+                                       PAGE_CACHE_SIZE);
        if (ordered) {
                unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start,
                                     page_end, &cached_state, GFP_NOFS);
@@@ -3546,10 -3547,10 +3547,10 @@@ static noinline int acls_after_inode_it
        int scanned = 0;
  
        if (!xattr_access) {
 -              xattr_access = btrfs_name_hash(POSIX_ACL_XATTR_ACCESS,
 -                                      strlen(POSIX_ACL_XATTR_ACCESS));
 -              xattr_default = btrfs_name_hash(POSIX_ACL_XATTR_DEFAULT,
 -                                      strlen(POSIX_ACL_XATTR_DEFAULT));
 +              xattr_access = btrfs_name_hash(XATTR_NAME_POSIX_ACL_ACCESS,
 +                                      strlen(XATTR_NAME_POSIX_ACL_ACCESS));
 +              xattr_default = btrfs_name_hash(XATTR_NAME_POSIX_ACL_DEFAULT,
 +                                      strlen(XATTR_NAME_POSIX_ACL_DEFAULT));
        }
  
        slot++;
@@@ -3770,7 -3771,6 +3771,7 @@@ cache_acl
                break;
        case S_IFLNK:
                inode->i_op = &btrfs_symlink_inode_operations;
 +              inode_nohighmem(inode);
                inode->i_mapping->a_ops = &btrfs_symlink_aops;
                break;
        default:
@@@ -4248,7 -4248,8 +4249,8 @@@ static int truncate_inline_extent(struc
                 * read the extent item from disk (data not in the page cache).
                 */
                btrfs_release_path(path);
-               return btrfs_truncate_page(inode, offset, page_end - offset, 0);
+               return btrfs_truncate_block(inode, offset, page_end - offset,
+                                       0);
        }
  
        btrfs_set_file_extent_ram_bytes(leaf, fi, size);
@@@ -4601,17 -4602,17 +4603,17 @@@ error
  }
  
  /*
-  * btrfs_truncate_page - read, zero a chunk and write a page
+  * btrfs_truncate_block - read, zero a chunk and write a block
   * @inode - inode that we're zeroing
   * @from - the offset to start zeroing
   * @len - the length to zero, 0 to zero the entire range respective to the
   *    offset
   * @front - zero up to the offset instead of from the offset on
   *
-  * This will find the page for the "from" offset and cow the page and zero the
+  * This will find the block for the "from" offset and cow the block and zero the
   * part we want to zero.  This is used with truncate and hole punching.
   */
- int btrfs_truncate_page(struct inode *inode, loff_t from, loff_t len,
+ int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,
                        int front)
  {
        struct address_space *mapping = inode->i_mapping;
        char *kaddr;
        u32 blocksize = root->sectorsize;
        pgoff_t index = from >> PAGE_CACHE_SHIFT;
-       unsigned offset = from & (PAGE_CACHE_SIZE-1);
+       unsigned offset = from & (blocksize - 1);
        struct page *page;
        gfp_t mask = btrfs_alloc_write_mask(mapping);
        int ret = 0;
-       u64 page_start;
-       u64 page_end;
+       u64 block_start;
+       u64 block_end;
  
        if ((offset & (blocksize - 1)) == 0 &&
            (!len || ((len & (blocksize - 1)) == 0)))
                goto out;
        ret = btrfs_delalloc_reserve_space(inode,
-                       round_down(from, PAGE_CACHE_SIZE), PAGE_CACHE_SIZE);
+                       round_down(from, blocksize), blocksize);
        if (ret)
                goto out;
  
@@@ -4641,14 -4643,14 +4644,14 @@@ again
        page = find_or_create_page(mapping, index, mask);
        if (!page) {
                btrfs_delalloc_release_space(inode,
-                               round_down(from, PAGE_CACHE_SIZE),
-                               PAGE_CACHE_SIZE);
+                               round_down(from, blocksize),
+                               blocksize);
                ret = -ENOMEM;
                goto out;
        }
  
-       page_start = page_offset(page);
-       page_end = page_start + PAGE_CACHE_SIZE - 1;
+       block_start = round_down(from, blocksize);
+       block_end = block_start + blocksize - 1;
  
        if (!PageUptodate(page)) {
                ret = btrfs_readpage(NULL, page);
        }
        wait_on_page_writeback(page);
  
-       lock_extent_bits(io_tree, page_start, page_end, &cached_state);
+       lock_extent_bits(io_tree, block_start, block_end, &cached_state);
        set_page_extent_mapped(page);
  
-       ordered = btrfs_lookup_ordered_extent(inode, page_start);
+       ordered = btrfs_lookup_ordered_extent(inode, block_start);
        if (ordered) {
-               unlock_extent_cached(io_tree, page_start, page_end,
+               unlock_extent_cached(io_tree, block_start, block_end,
                                     &cached_state, GFP_NOFS);
                unlock_page(page);
                page_cache_release(page);
                goto again;
        }
  
-       clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, page_end,
+       clear_extent_bit(&BTRFS_I(inode)->io_tree, block_start, block_end,
                          EXTENT_DIRTY | EXTENT_DELALLOC |
                          EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
                          0, 0, &cached_state, GFP_NOFS);
  
-       ret = btrfs_set_extent_delalloc(inode, page_start, page_end,
+       ret = btrfs_set_extent_delalloc(inode, block_start, block_end,
                                        &cached_state);
        if (ret) {
-               unlock_extent_cached(io_tree, page_start, page_end,
+               unlock_extent_cached(io_tree, block_start, block_end,
                                     &cached_state, GFP_NOFS);
                goto out_unlock;
        }
  
-       if (offset != PAGE_CACHE_SIZE) {
+       if (offset != blocksize) {
                if (!len)
-                       len = PAGE_CACHE_SIZE - offset;
+                       len = blocksize - offset;
                kaddr = kmap(page);
                if (front)
-                       memset(kaddr, 0, offset);
+                       memset(kaddr + (block_start - page_offset(page)),
+                               0, offset);
                else
-                       memset(kaddr + offset, 0, len);
+                       memset(kaddr + (block_start - page_offset(page)) +  offset,
+                               0, len);
                flush_dcache_page(page);
                kunmap(page);
        }
        ClearPageChecked(page);
        set_page_dirty(page);
-       unlock_extent_cached(io_tree, page_start, page_end, &cached_state,
+       unlock_extent_cached(io_tree, block_start, block_end, &cached_state,
                             GFP_NOFS);
  
  out_unlock:
        if (ret)
-               btrfs_delalloc_release_space(inode, page_start,
-                                            PAGE_CACHE_SIZE);
+               btrfs_delalloc_release_space(inode, block_start,
+                                            blocksize);
        unlock_page(page);
        page_cache_release(page);
  out:
@@@ -4782,11 -4786,11 +4787,11 @@@ int btrfs_cont_expand(struct inode *ino
        int err = 0;
  
        /*
-        * If our size started in the middle of a page we need to zero out the
-        * rest of the page before we expand the i_size, otherwise we could
+        * If our size started in the middle of a block we need to zero out the
+        * rest of the block before we expand the i_size, otherwise we could
         * expose stale data.
         */
-       err = btrfs_truncate_page(inode, oldsize, 0, 0);
+       err = btrfs_truncate_block(inode, oldsize, 0, 0);
        if (err)
                return err;
  
@@@ -4895,7 -4899,6 +4900,6 @@@ static int btrfs_setsize(struct inode *
        }
  
        if (newsize > oldsize) {
-               truncate_pagecache(inode, newsize);
                /*
                 * Don't do an expanding truncate while snapshoting is ongoing.
                 * This is to ensure the snapshot captures a fully consistent
  
                i_size_write(inode, newsize);
                btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL);
+               pagecache_isize_extended(inode, oldsize, newsize);
                ret = btrfs_update_inode(trans, root, inode);
                btrfs_end_write_no_snapshoting(root);
                btrfs_end_transaction(trans, root);
@@@ -7752,9 -7756,9 +7757,9 @@@ static int btrfs_check_dio_repairable(s
  }
  
  static int dio_read_error(struct inode *inode, struct bio *failed_bio,
-                         struct page *page, u64 start, u64 end,
-                         int failed_mirror, bio_end_io_t *repair_endio,
-                         void *repair_arg)
+                       struct page *page, unsigned int pgoff,
+                       u64 start, u64 end, int failed_mirror,
+                       bio_end_io_t *repair_endio, void *repair_arg)
  {
        struct io_failure_record *failrec;
        struct bio *bio;
                return -EIO;
        }
  
-       if (failed_bio->bi_vcnt > 1)
+       if ((failed_bio->bi_vcnt > 1)
+               || (failed_bio->bi_io_vec->bv_len
+                       > BTRFS_I(inode)->root->sectorsize))
                read_mode = READ_SYNC | REQ_FAILFAST_DEV;
        else
                read_mode = READ_SYNC;
        isector = start - btrfs_io_bio(failed_bio)->logical;
        isector >>= inode->i_sb->s_blocksize_bits;
        bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page,
-                                     0, isector, repair_endio, repair_arg);
+                               pgoff, isector, repair_endio, repair_arg);
        if (!bio) {
                free_io_failure(inode, failrec);
                return -EIO;
@@@ -7813,12 -7819,17 +7820,17 @@@ struct btrfs_retry_complete 
  static void btrfs_retry_endio_nocsum(struct bio *bio)
  {
        struct btrfs_retry_complete *done = bio->bi_private;
+       struct inode *inode;
        struct bio_vec *bvec;
        int i;
  
        if (bio->bi_error)
                goto end;
  
+       ASSERT(bio->bi_vcnt == 1);
+       inode = bio->bi_io_vec->bv_page->mapping->host;
+       ASSERT(bio->bi_io_vec->bv_len == BTRFS_I(inode)->root->sectorsize);
        done->uptodate = 1;
        bio_for_each_segment_all(bvec, bio, i)
                clean_io_failure(done->inode, done->start, bvec->bv_page, 0);
  static int __btrfs_correct_data_nocsum(struct inode *inode,
                                       struct btrfs_io_bio *io_bio)
  {
+       struct btrfs_fs_info *fs_info;
        struct bio_vec *bvec;
        struct btrfs_retry_complete done;
        u64 start;
+       unsigned int pgoff;
+       u32 sectorsize;
+       int nr_sectors;
        int i;
        int ret;
  
+       fs_info = BTRFS_I(inode)->root->fs_info;
+       sectorsize = BTRFS_I(inode)->root->sectorsize;
        start = io_bio->logical;
        done.inode = inode;
  
        bio_for_each_segment_all(bvec, &io_bio->bio, i) {
- try_again:
+               nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec->bv_len);
+               pgoff = bvec->bv_offset;
+ next_block_or_try_again:
                done.uptodate = 0;
                done.start = start;
                init_completion(&done.done);
  
-               ret = dio_read_error(inode, &io_bio->bio, bvec->bv_page, start,
-                                    start + bvec->bv_len - 1,
-                                    io_bio->mirror_num,
-                                    btrfs_retry_endio_nocsum, &done);
+               ret = dio_read_error(inode, &io_bio->bio, bvec->bv_page,
+                               pgoff, start, start + sectorsize - 1,
+                               io_bio->mirror_num,
+                               btrfs_retry_endio_nocsum, &done);
                if (ret)
                        return ret;
  
  
                if (!done.uptodate) {
                        /* We might have another mirror, so try again */
-                       goto try_again;
+                       goto next_block_or_try_again;
                }
  
-               start += bvec->bv_len;
+               start += sectorsize;
+               if (nr_sectors--) {
+                       pgoff += sectorsize;
+                       goto next_block_or_try_again;
+               }
        }
  
        return 0;
@@@ -7869,7 -7895,9 +7896,9 @@@ static void btrfs_retry_endio(struct bi
  {
        struct btrfs_retry_complete *done = bio->bi_private;
        struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
+       struct inode *inode;
        struct bio_vec *bvec;
+       u64 start;
        int uptodate;
        int ret;
        int i;
                goto end;
  
        uptodate = 1;
+       start = done->start;
+       ASSERT(bio->bi_vcnt == 1);
+       inode = bio->bi_io_vec->bv_page->mapping->host;
+       ASSERT(bio->bi_io_vec->bv_len == BTRFS_I(inode)->root->sectorsize);
        bio_for_each_segment_all(bvec, bio, i) {
                ret = __readpage_endio_check(done->inode, io_bio, i,
-                                            bvec->bv_page, 0,
-                                            done->start, bvec->bv_len);
+                                       bvec->bv_page, bvec->bv_offset,
+                                       done->start, bvec->bv_len);
                if (!ret)
                        clean_io_failure(done->inode, done->start,
-                                        bvec->bv_page, 0);
+                                       bvec->bv_page, bvec->bv_offset);
                else
                        uptodate = 0;
        }
  static int __btrfs_subio_endio_read(struct inode *inode,
                                    struct btrfs_io_bio *io_bio, int err)
  {
+       struct btrfs_fs_info *fs_info;
        struct bio_vec *bvec;
        struct btrfs_retry_complete done;
        u64 start;
        u64 offset = 0;
+       u32 sectorsize;
+       int nr_sectors;
+       unsigned int pgoff;
+       int csum_pos;
        int i;
        int ret;
  
+       fs_info = BTRFS_I(inode)->root->fs_info;
+       sectorsize = BTRFS_I(inode)->root->sectorsize;
        err = 0;
        start = io_bio->logical;
        done.inode = inode;
  
        bio_for_each_segment_all(bvec, &io_bio->bio, i) {
-               ret = __readpage_endio_check(inode, io_bio, i, bvec->bv_page,
-                                            0, start, bvec->bv_len);
+               nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec->bv_len);
+               pgoff = bvec->bv_offset;
+ next_block:
+               csum_pos = BTRFS_BYTES_TO_BLKS(fs_info, offset);
+               ret = __readpage_endio_check(inode, io_bio, csum_pos,
+                                       bvec->bv_page, pgoff, start,
+                                       sectorsize);
                if (likely(!ret))
                        goto next;
  try_again:
                done.start = start;
                init_completion(&done.done);
  
-               ret = dio_read_error(inode, &io_bio->bio, bvec->bv_page, start,
-                                    start + bvec->bv_len - 1,
-                                    io_bio->mirror_num,
-                                    btrfs_retry_endio, &done);
+               ret = dio_read_error(inode, &io_bio->bio, bvec->bv_page,
+                               pgoff, start, start + sectorsize - 1,
+                               io_bio->mirror_num,
+                               btrfs_retry_endio, &done);
                if (ret) {
                        err = ret;
                        goto next;
                        goto try_again;
                }
  next:
-               offset += bvec->bv_len;
-               start += bvec->bv_len;
+               offset += sectorsize;
+               start += sectorsize;
+               ASSERT(nr_sectors);
+               if (--nr_sectors) {
+                       pgoff += sectorsize;
+                       goto next_block;
+               }
        }
  
        return err;
@@@ -8188,9 -8244,11 +8245,11 @@@ static int btrfs_submit_direct_hook(in
        u64 file_offset = dip->logical_offset;
        u64 submit_len = 0;
        u64 map_length;
-       int nr_pages = 0;
-       int ret;
+       u32 blocksize = root->sectorsize;
        int async_submit = 0;
+       int nr_sectors;
+       int ret;
+       int i;
  
        map_length = orig_bio->bi_iter.bi_size;
        ret = btrfs_map_block(root->fs_info, rw, start_sector << 9,
        atomic_inc(&dip->pending_bios);
  
        while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) {
-               if (map_length < submit_len + bvec->bv_len ||
-                   bio_add_page(bio, bvec->bv_page, bvec->bv_len,
-                                bvec->bv_offset) < bvec->bv_len) {
+               nr_sectors = BTRFS_BYTES_TO_BLKS(root->fs_info, bvec->bv_len);
+               i = 0;
+ next_block:
+               if (unlikely(map_length < submit_len + blocksize ||
+                   bio_add_page(bio, bvec->bv_page, blocksize,
+                           bvec->bv_offset + (i * blocksize)) < blocksize)) {
                        /*
                         * inc the count before we submit the bio so
                         * we know the end IO handler won't happen before
                        file_offset += submit_len;
  
                        submit_len = 0;
-                       nr_pages = 0;
  
                        bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev,
                                                  start_sector, GFP_NOFS);
                                bio_put(bio);
                                goto out_err;
                        }
+                       goto next_block;
                } else {
-                       submit_len += bvec->bv_len;
-                       nr_pages++;
+                       submit_len += blocksize;
+                       if (--nr_sectors) {
+                               i++;
+                               goto next_block;
+                       }
                        bvec++;
                }
        }
@@@ -8467,7 -8532,7 +8533,7 @@@ static ssize_t btrfs_direct_IO(struct k
                 * not unlock the i_mutex at this case.
                 */
                if (offset + count <= inode->i_size) {
 -                      mutex_unlock(&inode->i_mutex);
 +                      inode_unlock(inode);
                        relock = true;
                }
                ret = btrfs_delalloc_reserve_space(inode, offset, count);
@@@ -8524,7 -8589,7 +8590,7 @@@ out
        if (wakeup)
                inode_dio_end(inode);
        if (relock)
 -              mutex_lock(&inode->i_mutex);
 +              inode_lock(inode);
  
        return ret;
  }
@@@ -8628,6 -8693,8 +8694,8 @@@ static void btrfs_invalidatepage(struc
        struct extent_state *cached_state = NULL;
        u64 page_start = page_offset(page);
        u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
+       u64 start;
+       u64 end;
        int inode_evicting = inode->i_state & I_FREEING;
  
        /*
  
        if (!inode_evicting)
                lock_extent_bits(tree, page_start, page_end, &cached_state);
-       ordered = btrfs_lookup_ordered_extent(inode, page_start);
+ again:
+       start = page_start;
+       ordered = btrfs_lookup_ordered_range(inode, start,
+                                       page_end - start + 1);
        if (ordered) {
+               end = min(page_end, ordered->file_offset + ordered->len - 1);
                /*
                 * IO on this page will never be started, so we need
                 * to account for any ordered extents now
                 */
                if (!inode_evicting)
-                       clear_extent_bit(tree, page_start, page_end,
+                       clear_extent_bit(tree, start, end,
                                         EXTENT_DIRTY | EXTENT_DELALLOC |
                                         EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
                                         EXTENT_DEFRAG, 1, 0, &cached_state,
  
                        spin_lock_irq(&tree->lock);
                        set_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags);
-                       new_len = page_start - ordered->file_offset;
+                       new_len = start - ordered->file_offset;
                        if (new_len < ordered->truncated_len)
                                ordered->truncated_len = new_len;
                        spin_unlock_irq(&tree->lock);
  
                        if (btrfs_dec_test_ordered_pending(inode, &ordered,
-                                                          page_start,
-                                                          PAGE_CACHE_SIZE, 1))
+                                                          start,
+                                                          end - start + 1, 1))
                                btrfs_finish_ordered_io(ordered);
                }
                btrfs_put_ordered_extent(ordered);
                if (!inode_evicting) {
                        cached_state = NULL;
-                       lock_extent_bits(tree, page_start, page_end,
+                       lock_extent_bits(tree, start, end,
                                         &cached_state);
                }
+               start = end + 1;
+               if (start < page_end)
+                       goto again;
        }
  
        /*
@@@ -8747,15 -8822,28 +8823,28 @@@ int btrfs_page_mkwrite(struct vm_area_s
        loff_t size;
        int ret;
        int reserved = 0;
+       u64 reserved_space;
        u64 page_start;
        u64 page_end;
+       u64 end;
+       reserved_space = PAGE_CACHE_SIZE;
  
        sb_start_pagefault(inode->i_sb);
        page_start = page_offset(page);
        page_end = page_start + PAGE_CACHE_SIZE - 1;
+       end = page_end;
  
+       /*
+        * Reserving delalloc space after obtaining the page lock can lead to
+        * deadlock. For example, if a dirty page is locked by this function
+        * and the call to btrfs_delalloc_reserve_space() ends up triggering
+        * dirty page write out, then the btrfs_writepage() function could
+        * end up waiting indefinitely to get a lock on the page currently
+        * being processed by btrfs_page_mkwrite() function.
+        */
        ret = btrfs_delalloc_reserve_space(inode, page_start,
-                                          PAGE_CACHE_SIZE);
+                                          reserved_space);
        if (!ret) {
                ret = file_update_time(vma->vm_file);
                reserved = 1;
@@@ -8789,7 -8877,7 +8878,7 @@@ again
         * we can't set the delalloc bits if there are pending ordered
         * extents.  Drop our locks and wait for them to finish
         */
-       ordered = btrfs_lookup_ordered_extent(inode, page_start);
+       ordered = btrfs_lookup_ordered_range(inode, page_start, page_end);
        if (ordered) {
                unlock_extent_cached(io_tree, page_start, page_end,
                                     &cached_state, GFP_NOFS);
                goto again;
        }
  
+       if (page->index == ((size - 1) >> PAGE_CACHE_SHIFT)) {
+               reserved_space = round_up(size - page_start, root->sectorsize);
+               if (reserved_space < PAGE_CACHE_SIZE) {
+                       end = page_start + reserved_space - 1;
+                       spin_lock(&BTRFS_I(inode)->lock);
+                       BTRFS_I(inode)->outstanding_extents++;
+                       spin_unlock(&BTRFS_I(inode)->lock);
+                       btrfs_delalloc_release_space(inode, page_start,
+                                               PAGE_CACHE_SIZE - reserved_space);
+               }
+       }
        /*
         * XXX - page_mkwrite gets called every time the page is dirtied, even
         * if it was already dirty, so for space accounting reasons we need to
         * is probably a better way to do this, but for now keep consistent with
         * prepare_pages in the normal write path.
         */
-       clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, page_end,
+       clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, end,
                          EXTENT_DIRTY | EXTENT_DELALLOC |
                          EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
                          0, 0, &cached_state, GFP_NOFS);
  
-       ret = btrfs_set_extent_delalloc(inode, page_start, page_end,
+       ret = btrfs_set_extent_delalloc(inode, page_start, end,
                                        &cached_state);
        if (ret) {
                unlock_extent_cached(io_tree, page_start, page_end,
@@@ -8850,7 -8950,7 +8951,7 @@@ out_unlock
        }
        unlock_page(page);
  out:
-       btrfs_delalloc_release_space(inode, page_start, PAGE_CACHE_SIZE);
+       btrfs_delalloc_release_space(inode, page_start, reserved_space);
  out_noreserve:
        sb_end_pagefault(inode->i_sb);
        return ret;
@@@ -9192,8 -9292,7 +9293,8 @@@ int btrfs_init_cachep(void
  {
        btrfs_inode_cachep = kmem_cache_create("btrfs_inode",
                        sizeof(struct btrfs_inode), 0,
 -                      SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, init_once);
 +                      SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD | SLAB_ACCOUNT,
 +                      init_once);
        if (!btrfs_inode_cachep)
                goto fail;
  
@@@ -9236,7 -9335,6 +9337,6 @@@ static int btrfs_getattr(struct vfsmoun
  
        generic_fillattr(inode, stat);
        stat->dev = BTRFS_I(inode)->root->anon_dev;
-       stat->blksize = PAGE_CACHE_SIZE;
  
        spin_lock(&BTRFS_I(inode)->lock);
        delalloc_bytes = BTRFS_I(inode)->delalloc_bytes;
@@@ -9724,7 -9822,6 +9824,7 @@@ static int btrfs_symlink(struct inode *
        btrfs_free_path(path);
  
        inode->i_op = &btrfs_symlink_inode_operations;
 +      inode_nohighmem(inode);
        inode->i_mapping->a_ops = &btrfs_symlink_aops;
        inode_set_bytes(inode, name_len);
        btrfs_i_size_write(inode, name_len);
@@@ -10021,7 -10118,7 +10121,7 @@@ static const struct inode_operations bt
        .setattr        = btrfs_setattr,
        .mknod          = btrfs_mknod,
        .setxattr       = btrfs_setxattr,
 -      .getxattr       = btrfs_getxattr,
 +      .getxattr       = generic_getxattr,
        .listxattr      = btrfs_listxattr,
        .removexattr    = btrfs_removexattr,
        .permission     = btrfs_permission,
@@@ -10098,7 -10195,7 +10198,7 @@@ static const struct inode_operations bt
        .getattr        = btrfs_getattr,
        .setattr        = btrfs_setattr,
        .setxattr       = btrfs_setxattr,
 -      .getxattr       = btrfs_getxattr,
 +      .getxattr       = generic_getxattr,
        .listxattr      = btrfs_listxattr,
        .removexattr    = btrfs_removexattr,
        .permission     = btrfs_permission,
@@@ -10112,7 -10209,7 +10212,7 @@@ static const struct inode_operations bt
        .setattr        = btrfs_setattr,
        .permission     = btrfs_permission,
        .setxattr       = btrfs_setxattr,
 -      .getxattr       = btrfs_getxattr,
 +      .getxattr       = generic_getxattr,
        .listxattr      = btrfs_listxattr,
        .removexattr    = btrfs_removexattr,
        .get_acl        = btrfs_get_acl,
  };
  static const struct inode_operations btrfs_symlink_inode_operations = {
        .readlink       = generic_readlink,
 -      .follow_link    = page_follow_link_light,
 -      .put_link       = page_put_link,
 +      .get_link       = page_get_link,
        .getattr        = btrfs_getattr,
        .setattr        = btrfs_setattr,
        .permission     = btrfs_permission,
        .setxattr       = btrfs_setxattr,
 -      .getxattr       = btrfs_getxattr,
 +      .getxattr       = generic_getxattr,
        .listxattr      = btrfs_listxattr,
        .removexattr    = btrfs_removexattr,
        .update_time    = btrfs_update_time,
diff --combined fs/btrfs/ioctl.c
index 952172ca7e455633c28a79292d18ebbfd68c4d18,709419c98ca5e4e7874dd2aeb2bbe2dda908e3b6..93e7832d1d1b22cc93b484ef34ca030c919ab74b
@@@ -240,7 -240,7 +240,7 @@@ static int btrfs_ioctl_setflags(struct 
        if (ret)
                return ret;
  
 -      mutex_lock(&inode->i_mutex);
 +      inode_lock(inode);
  
        ip_oldflags = ip->flags;
        i_oldflags = inode->i_flags;
        }
  
   out_unlock:
 -      mutex_unlock(&inode->i_mutex);
 +      inode_unlock(inode);
        mnt_drop_write_file(file);
        return ret;
  }
@@@ -881,7 -881,7 +881,7 @@@ out_up_read
  out_dput:
        dput(dentry);
  out_unlock:
 -      mutex_unlock(&dir->i_mutex);
 +      inode_unlock(dir);
        return error;
  }
  
@@@ -1393,18 -1393,18 +1393,18 @@@ int btrfs_defrag_file(struct inode *ino
                        ra_index += cluster;
                }
  
 -              mutex_lock(&inode->i_mutex);
 +              inode_lock(inode);
                if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)
                        BTRFS_I(inode)->force_compress = compress_type;
                ret = cluster_pages_for_defrag(inode, pages, i, cluster);
                if (ret < 0) {
 -                      mutex_unlock(&inode->i_mutex);
 +                      inode_unlock(inode);
                        goto out_ra;
                }
  
                defrag_count += ret;
                balance_dirty_pages_ratelimited(inode->i_mapping);
 -              mutex_unlock(&inode->i_mutex);
 +              inode_unlock(inode);
  
                if (newer_than) {
                        if (newer_off == (u64)-1)
  
  out_ra:
        if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) {
 -              mutex_lock(&inode->i_mutex);
 +              inode_lock(inode);
                BTRFS_I(inode)->force_compress = BTRFS_COMPRESS_NONE;
 -              mutex_unlock(&inode->i_mutex);
 +              inode_unlock(inode);
        }
        if (!file)
                kfree(ra);
@@@ -2430,7 -2430,7 +2430,7 @@@ static noinline int btrfs_ioctl_snap_de
                goto out_dput;
        }
  
 -      mutex_lock(&inode->i_mutex);
 +      inode_lock(inode);
  
        /*
         * Don't allow to delete a subvolume with send in progress. This is
@@@ -2543,7 -2543,7 +2543,7 @@@ out_up_write
                spin_unlock(&dest->root_item_lock);
        }
  out_unlock_inode:
 -      mutex_unlock(&inode->i_mutex);
 +      inode_unlock(inode);
        if (!err) {
                d_invalidate(dentry);
                btrfs_invalidate_inodes(dest);
  out_dput:
        dput(dentry);
  out_unlock_dir:
 -      mutex_unlock(&dir->i_mutex);
 +      inode_unlock(dir);
  out_drop_write:
        mnt_drop_write_file(file);
  out:
@@@ -2857,8 -2857,8 +2857,8 @@@ static inline void lock_extent_range(st
  
  static void btrfs_double_inode_unlock(struct inode *inode1, struct inode *inode2)
  {
 -      mutex_unlock(&inode1->i_mutex);
 -      mutex_unlock(&inode2->i_mutex);
 +      inode_unlock(inode1);
 +      inode_unlock(inode2);
  }
  
  static void btrfs_double_inode_lock(struct inode *inode1, struct inode *inode2)
        if (inode1 < inode2)
                swap(inode1, inode2);
  
 -      mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
 -      mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
 +      inode_lock_nested(inode1, I_MUTEX_PARENT);
 +      inode_lock_nested(inode2, I_MUTEX_CHILD);
  }
  
  static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1,
@@@ -2974,7 -2974,7 +2974,7 @@@ static int btrfs_cmp_data(struct inode 
                flush_dcache_page(dst_page);
  
                if (memcmp(addr, dst_addr, cmp_len))
 -                      ret = BTRFS_SAME_DATA_DIFFERS;
 +                      ret = -EBADE;
  
                kunmap_atomic(addr);
                kunmap_atomic(dst_addr);
@@@ -3026,7 -3026,7 +3026,7 @@@ static int btrfs_extent_same(struct ino
                return 0;
  
        if (same_inode) {
 -              mutex_lock(&src->i_mutex);
 +              inode_lock(src);
  
                ret = extent_same_check_offsets(src, loff, &len, olen);
                if (ret)
        btrfs_cmp_data_free(&cmp);
  out_unlock:
        if (same_inode)
 -              mutex_unlock(&src->i_mutex);
 +              inode_unlock(src);
        else
                btrfs_double_inode_unlock(src, dst);
  
  
  #define BTRFS_MAX_DEDUPE_LEN  SZ_16M
  
 -static long btrfs_ioctl_file_extent_same(struct file *file,
 -                      struct btrfs_ioctl_same_args __user *argp)
 +ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen,
 +                              struct file *dst_file, u64 dst_loff)
  {
 -      struct btrfs_ioctl_same_args *same = NULL;
 -      struct btrfs_ioctl_same_extent_info *info;
 -      struct inode *src = file_inode(file);
 -      u64 off;
 -      u64 len;
 -      int i;
 -      int ret;
 -      unsigned long size;
 +      struct inode *src = file_inode(src_file);
 +      struct inode *dst = file_inode(dst_file);
        u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize;
 -      bool is_admin = capable(CAP_SYS_ADMIN);
 -      u16 count;
 -
 -      if (!(file->f_mode & FMODE_READ))
 -              return -EINVAL;
 -
 -      ret = mnt_want_write_file(file);
 -      if (ret)
 -              return ret;
 -
 -      if (get_user(count, &argp->dest_count)) {
 -              ret = -EFAULT;
 -              goto out;
 -      }
 -
 -      size = offsetof(struct btrfs_ioctl_same_args __user, info[count]);
 -
 -      same = memdup_user(argp, size);
 -
 -      if (IS_ERR(same)) {
 -              ret = PTR_ERR(same);
 -              same = NULL;
 -              goto out;
 -      }
 +      ssize_t res;
  
 -      off = same->logical_offset;
 -      len = same->length;
 -
 -      /*
 -       * Limit the total length we will dedupe for each operation.
 -       * This is intended to bound the total time spent in this
 -       * ioctl to something sane.
 -       */
 -      if (len > BTRFS_MAX_DEDUPE_LEN)
 -              len = BTRFS_MAX_DEDUPE_LEN;
 +      if (olen > BTRFS_MAX_DEDUPE_LEN)
 +              olen = BTRFS_MAX_DEDUPE_LEN;
  
        if (WARN_ON_ONCE(bs < PAGE_CACHE_SIZE)) {
                /*
                 * result, btrfs_cmp_data() won't correctly handle
                 * this situation without an update.
                 */
 -              ret = -EINVAL;
 -              goto out;
 -      }
 -
 -      ret = -EISDIR;
 -      if (S_ISDIR(src->i_mode))
 -              goto out;
 -
 -      ret = -EACCES;
 -      if (!S_ISREG(src->i_mode))
 -              goto out;
 -
 -      /* pre-format output fields to sane values */
 -      for (i = 0; i < count; i++) {
 -              same->info[i].bytes_deduped = 0ULL;
 -              same->info[i].status = 0;
 -      }
 -
 -      for (i = 0, info = same->info; i < count; i++, info++) {
 -              struct inode *dst;
 -              struct fd dst_file = fdget(info->fd);
 -              if (!dst_file.file) {
 -                      info->status = -EBADF;
 -                      continue;
 -              }
 -              dst = file_inode(dst_file.file);
 -
 -              if (!(is_admin || (dst_file.file->f_mode & FMODE_WRITE))) {
 -                      info->status = -EINVAL;
 -              } else if (file->f_path.mnt != dst_file.file->f_path.mnt) {
 -                      info->status = -EXDEV;
 -              } else if (S_ISDIR(dst->i_mode)) {
 -                      info->status = -EISDIR;
 -              } else if (!S_ISREG(dst->i_mode)) {
 -                      info->status = -EACCES;
 -              } else {
 -                      info->status = btrfs_extent_same(src, off, len, dst,
 -                                                      info->logical_offset);
 -                      if (info->status == 0)
 -                              info->bytes_deduped += len;
 -              }
 -              fdput(dst_file);
 +              return -EINVAL;
        }
  
 -      ret = copy_to_user(argp, same, size);
 -      if (ret)
 -              ret = -EFAULT;
 -
 -out:
 -      mnt_drop_write_file(file);
 -      kfree(same);
 -      return ret;
 +      res = btrfs_extent_same(src, loff, olen, dst, dst_loff);
 +      if (res)
 +              return res;
 +      return olen;
  }
  
  static int clone_finish_inode_update(struct btrfs_trans_handle *trans,
        return ret;
  }
  
 -static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
 -                                     u64 off, u64 olen, u64 destoff)
 +static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
 +                                      u64 off, u64 olen, u64 destoff)
  {
        struct inode *inode = file_inode(file);
 +      struct inode *src = file_inode(file_src);
        struct btrfs_root *root = BTRFS_I(inode)->root;
 -      struct fd src_file;
 -      struct inode *src;
        int ret;
        u64 len = olen;
        u64 bs = root->fs_info->sb->s_blocksize;
 -      int same_inode = 0;
 +      int same_inode = src == inode;
  
        /*
         * TODO:
         *   be either compressed or non-compressed.
         */
  
 -      /* the destination must be opened for writing */
 -      if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND))
 -              return -EINVAL;
 -
        if (btrfs_root_readonly(root))
                return -EROFS;
  
 -      ret = mnt_want_write_file(file);
 -      if (ret)
 -              return ret;
 -
 -      src_file = fdget(srcfd);
 -      if (!src_file.file) {
 -              ret = -EBADF;
 -              goto out_drop_write;
 -      }
 -
 -      ret = -EXDEV;
 -      if (src_file.file->f_path.mnt != file->f_path.mnt)
 -              goto out_fput;
 -
 -      src = file_inode(src_file.file);
 -
 -      ret = -EINVAL;
 -      if (src == inode)
 -              same_inode = 1;
 -
 -      /* the src must be open for reading */
 -      if (!(src_file.file->f_mode & FMODE_READ))
 -              goto out_fput;
 +      if (file_src->f_path.mnt != file->f_path.mnt ||
 +          src->i_sb != inode->i_sb)
 +              return -EXDEV;
  
        /* don't make the dst file partly checksummed */
        if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) !=
            (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM))
 -              goto out_fput;
 +              return -EINVAL;
  
 -      ret = -EISDIR;
        if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode))
 -              goto out_fput;
 -
 -      ret = -EXDEV;
 -      if (src->i_sb != inode->i_sb)
 -              goto out_fput;
 +              return -EISDIR;
  
        if (!same_inode) {
                btrfs_double_inode_lock(src, inode);
        } else {
 -              mutex_lock(&src->i_mutex);
 +              inode_lock(src);
        }
  
        /* determine range to clone */
         * Truncate page cache pages so that future reads will see the cloned
         * data immediately and not the previous data.
         */
-       truncate_inode_pages_range(&inode->i_data, destoff,
-                                  PAGE_CACHE_ALIGN(destoff + len) - 1);
+       truncate_inode_pages_range(&inode->i_data,
+                               round_down(destoff, PAGE_CACHE_SIZE),
+                               round_up(destoff + len, PAGE_CACHE_SIZE) - 1);
  out_unlock:
        if (!same_inode)
                btrfs_double_inode_unlock(src, inode);
        else
 -              mutex_unlock(&src->i_mutex);
 -out_fput:
 -      fdput(src_file);
 -out_drop_write:
 -      mnt_drop_write_file(file);
 +              inode_unlock(src);
        return ret;
  }
  
 -static long btrfs_ioctl_clone_range(struct file *file, void __user *argp)
 +ssize_t btrfs_copy_file_range(struct file *file_in, loff_t pos_in,
 +                            struct file *file_out, loff_t pos_out,
 +                            size_t len, unsigned int flags)
  {
 -      struct btrfs_ioctl_clone_range_args args;
 +      ssize_t ret;
  
 -      if (copy_from_user(&args, argp, sizeof(args)))
 -              return -EFAULT;
 -      return btrfs_ioctl_clone(file, args.src_fd, args.src_offset,
 -                               args.src_length, args.dest_offset);
 +      ret = btrfs_clone_files(file_out, file_in, pos_in, len, pos_out);
 +      if (ret == 0)
 +              ret = len;
 +      return ret;
 +}
 +
 +int btrfs_clone_file_range(struct file *src_file, loff_t off,
 +              struct file *dst_file, loff_t destoff, u64 len)
 +{
 +      return btrfs_clone_files(dst_file, src_file, off, len, destoff);
  }
  
  /*
@@@ -5389,6 -5498,10 +5390,6 @@@ long btrfs_ioctl(struct file *file, uns
                return btrfs_ioctl_dev_info(root, argp);
        case BTRFS_IOC_BALANCE:
                return btrfs_ioctl_balance(file, NULL);
 -      case BTRFS_IOC_CLONE:
 -              return btrfs_ioctl_clone(file, arg, 0, 0, 0);
 -      case BTRFS_IOC_CLONE_RANGE:
 -              return btrfs_ioctl_clone_range(file, argp);
        case BTRFS_IOC_TRANS_START:
                return btrfs_ioctl_trans_start(file);
        case BTRFS_IOC_TRANS_END:
                return btrfs_ioctl_get_fslabel(file, argp);
        case BTRFS_IOC_SET_FSLABEL:
                return btrfs_ioctl_set_fslabel(file, argp);
 -      case BTRFS_IOC_FILE_EXTENT_SAME:
 -              return btrfs_ioctl_file_extent_same(file, argp);
        case BTRFS_IOC_GET_SUPPORTED_FEATURES:
                return btrfs_ioctl_get_supported_features(file, argp);
        case BTRFS_IOC_GET_FEATURES:
This page took 0.139171 seconds and 4 git commands to generate.