Merge branch 'chandan/prep-subpage-blocksize' into for-chris-4.6

author David Sterba <[email protected]>

Fri, 26 Feb 2016 14:38:28 +0000 (15:38 +0100)

committer David Sterba <[email protected]>

Fri, 26 Feb 2016 14:38:28 +0000 (15:38 +0100)
author David Sterba <[email protected]>
Fri, 26 Feb 2016 14:38:28 +0000 (15:38 +0100)
committer David Sterba <[email protected]>
Fri, 26 Feb 2016 14:38:28 +0000 (15:38 +0100)
diff --combined fs/btrfs/ctree.h

index bfe4a337fb4d13a058446265b7baf4a1437aa602,42ab58250d9ec5e9fc001302d75a759dfcfd8ec5..5f5c4fbd7a3c9880d56c86a5ddd16df03cb35f5a
--- 1/fs/btrfs/ctree.h
--- 2/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@@ -2353,6 -2353,9 +2353,9 @@@ struct btrfs_map_token 
         unsigned long offset;
   };
   
+ #define BTRFS_BYTES_TO_BLKS(fs_info, bytes) \
+                               ((bytes) >> (fs_info)->sb->s_blocksize_bits)
+ 
   static inline void btrfs_init_map_token (struct btrfs_map_token *token)
   {
         token->kaddr = NULL;
@@@ -4027,7 -4030,7 +4030,7 @@@ int btrfs_unlink_subvol(struct btrfs_tr
                         struct btrfs_root *root,
                         struct inode *dir, u64 objectid,
                         const char *name, int name_len);
- int btrfs_truncate_page(struct inode *inode, loff_t from, loff_t len,
+ int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,
                         int front);
   int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
                                struct btrfs_root *root,
@@@ -4099,8 -4102,7 +4102,8 @@@ void btrfs_get_block_group_info(struct 
                                 struct btrfs_ioctl_space_info *space);
   void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
                                struct btrfs_ioctl_balance_args *bargs);
- -
+ +ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen,
+ +                         struct file *dst_file, u64 dst_loff);
   
   /* file.c */
   int btrfs_auto_defrag_init(void);
@@@ -4131,11 -4133,6 +4134,11 @@@ int btrfs_dirty_pages(struct btrfs_roo
                       loff_t pos, size_t write_bytes,
                       struct extent_state **cached);
   int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end);
+ +ssize_t btrfs_copy_file_range(struct file *file_in, loff_t pos_in,
+ +                            struct file *file_out, loff_t pos_out,
+ +                            size_t len, unsigned int flags);
+ +int btrfs_clone_file_range(struct file *file_in, loff_t pos_in,
+ +                         struct file *file_out, loff_t pos_out, u64 len);
   
   /* tree-defrag.c */
   int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
diff --combined fs/btrfs/file.c

index 098bb8f690c992e1ebd01270f49ff2d37e6658bd,953f0ad1780272408f6d8070792023b245df268d..5a58e292bdadc7d586086102f42c540e5f52fb98
--- 1/fs/btrfs/file.c
--- 2/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@@ -498,7 -498,7 +498,7 @@@ int btrfs_dirty_pages(struct btrfs_roo
         loff_t isize = i_size_read(inode);
   
         start_pos = pos & ~((u64)root->sectorsize - 1);
-       num_bytes = ALIGN(write_bytes + pos - start_pos, root->sectorsize);
+       num_bytes = round_up(write_bytes + pos - start_pos, root->sectorsize);
   
         end_of_last_block = start_pos + num_bytes - 1;
         err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block,
@@@ -1379,16 -1379,19 +1379,19 @@@ fail
   static noinline int
   lock_and_cleanup_extent_if_need(struct inode *inode, struct page **pages,
                                 size_t num_pages, loff_t pos,
+                               size_t write_bytes,
                                 u64 *lockstart, u64 *lockend,
                                 struct extent_state **cached_state)
   {
+       struct btrfs_root *root = BTRFS_I(inode)->root;
         u64 start_pos;
         u64 last_pos;
         int i;
         int ret = 0;
   
-       start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1);
-       last_pos = start_pos + ((u64)num_pages << PAGE_CACHE_SHIFT) - 1;
+       start_pos = round_down(pos, root->sectorsize);
+       last_pos = start_pos
+               + round_up(pos + write_bytes - start_pos, root->sectorsize) - 1;
   
         if (start_pos < inode->i_size) {
                 struct btrfs_ordered_extent *ordered;
@@@ -1503,6 -1506,7 +1506,7 @@@ static noinline ssize_t __btrfs_buffere
   
         while (iov_iter_count(i) > 0) {
                 size_t offset = pos & (PAGE_CACHE_SIZE - 1);
+               size_t sector_offset;
                 size_t write_bytes = min(iov_iter_count(i),
                                          nrptrs * (size_t)PAGE_CACHE_SIZE -
                                          offset);
@@@ -1511,6 -1515,8 +1515,8 @@@
                 size_t reserve_bytes;
                 size_t dirty_pages;
                 size_t copied;
+               size_t dirty_sectors;
+               size_t num_sectors;
   
                 WARN_ON(num_pages > nrptrs);
   
@@@ -1523,7 -1529,9 +1529,9 @@@
                         break;
                 }
   
-               reserve_bytes = num_pages << PAGE_CACHE_SHIFT;
+               sector_offset = pos & (root->sectorsize - 1);
+               reserve_bytes = round_up(write_bytes + sector_offset,
+                               root->sectorsize);
   
                 if (BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
                                              BTRFS_INODE_PREALLOC)) {
@@@ -1542,7 -1550,9 +1550,9 @@@
                                  */
                                 num_pages = DIV_ROUND_UP(write_bytes + offset,
                                                          PAGE_CACHE_SIZE);
-                               reserve_bytes = num_pages << PAGE_CACHE_SHIFT;
+                               reserve_bytes = round_up(write_bytes
+                                                       + sector_offset,
+                                                       root->sectorsize);
                                 goto reserve_metadata;
                         }
                 }
@@@ -1576,8 -1586,8 +1586,8 @@@ again
                         break;
   
                 ret = lock_and_cleanup_extent_if_need(inode, pages, num_pages,
-                                                     pos, &lockstart, &lockend,
-                                                     &cached_state);
+                                               pos, write_bytes, &lockstart,
+                                               &lockend, &cached_state);
                 if (ret < 0) {
                         if (ret == -EAGAIN)
                                 goto again;
@@@ -1612,9 -1622,16 +1622,16 @@@
                  * we still have an outstanding extent for the chunk we actually
                  * managed to copy.
                  */
-               if (num_pages > dirty_pages) {
-                       release_bytes = (num_pages - dirty_pages) <<
-                               PAGE_CACHE_SHIFT;
+               num_sectors = BTRFS_BYTES_TO_BLKS(root->fs_info,
+                                               reserve_bytes);
+               dirty_sectors = round_up(copied + sector_offset,
+                                       root->sectorsize);
+               dirty_sectors = BTRFS_BYTES_TO_BLKS(root->fs_info,
+                                               dirty_sectors);
+ 
+               if (num_sectors > dirty_sectors) {
+                       release_bytes = (write_bytes - copied)
+                               & ~((u64)root->sectorsize - 1);
                         if (copied > 0) {
                                 spin_lock(&BTRFS_I(inode)->lock);
                                 BTRFS_I(inode)->outstanding_extents++;
@@@ -1633,7 -1650,8 +1650,8 @@@
                         }
                 }
   
-               release_bytes = dirty_pages << PAGE_CACHE_SHIFT;
+               release_bytes = round_up(copied + sector_offset,
+                                       root->sectorsize);
   
                 if (copied > 0)
                         ret = btrfs_dirty_pages(root, inode, pages,
@@@ -1654,8 -1672,7 +1672,7 @@@
   
                 if (only_release_metadata && copied > 0) {
                         lockstart = round_down(pos, root->sectorsize);
-                       lockend = lockstart +
-                               (dirty_pages << PAGE_CACHE_SHIFT) - 1;
+                       lockend = round_up(pos + copied, root->sectorsize) - 1;
   
                         set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
                                        lockend, EXTENT_NORESERVE, NULL,
@@@ -1761,18 -1778,20 +1778,20 @@@ static ssize_t btrfs_file_write_iter(st
         ssize_t err;
         loff_t pos;
         size_t count;
+       loff_t oldsize;
+       int clean_page = 0;
   
- -      mutex_lock(&inode->i_mutex);
+ +      inode_lock(inode);
         err = generic_write_checks(iocb, from);
         if (err <= 0) {
- -              mutex_unlock(&inode->i_mutex);
+ +              inode_unlock(inode);
                 return err;
         }
   
         current->backing_dev_info = inode_to_bdi(inode);
         err = file_remove_privs(file);
         if (err) {
- -              mutex_unlock(&inode->i_mutex);
+ +              inode_unlock(inode);
                 goto out;
         }
   
@@@ -1783,7 -1802,7 +1802,7 @@@
          * to stop this write operation to ensure FS consistency.
          */
         if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) {
- -              mutex_unlock(&inode->i_mutex);
+ +              inode_unlock(inode);
                 err = -EROFS;
                 goto out;
         }
@@@ -1799,14 -1818,17 +1818,17 @@@
         pos = iocb->ki_pos;
         count = iov_iter_count(from);
         start_pos = round_down(pos, root->sectorsize);
-       if (start_pos > i_size_read(inode)) {
+       oldsize = i_size_read(inode);
+       if (start_pos > oldsize) {
                 /* Expand hole size to cover write data, preventing empty gap */
                 end_pos = round_up(pos + count, root->sectorsize);
-               err = btrfs_cont_expand(inode, i_size_read(inode), end_pos);
+               err = btrfs_cont_expand(inode, oldsize, end_pos);
                 if (err) {
- -                      mutex_unlock(&inode->i_mutex);
+ +                      inode_unlock(inode);
                         goto out;
                 }
+               if (start_pos > round_up(oldsize, root->sectorsize))
+                       clean_page = 1;
         }
   
         if (sync)
@@@ -1818,9 -1840,12 +1840,12 @@@
                 num_written = __btrfs_buffered_write(file, from, pos);
                 if (num_written > 0)
                         iocb->ki_pos = pos + num_written;
+               if (clean_page)
+                       pagecache_isize_extended(inode, oldsize,
+                                               i_size_read(inode));
         }
   
- -      mutex_unlock(&inode->i_mutex);
+ +      inode_unlock(inode);
   
         /*
          * We also have to set last_sub_trans to the current log transid,
@@@ -1909,7 -1934,7 +1934,7 @@@ int btrfs_sync_file(struct file *file, 
         if (ret)
                 return ret;
   
- -      mutex_lock(&inode->i_mutex);
+ +      inode_lock(inode);
         atomic_inc(&root->log_batch);
         full_sync = test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
                              &BTRFS_I(inode)->runtime_flags);
@@@ -1961,7 -1986,7 +1986,7 @@@
                 ret = start_ordered_ops(inode, start, end);
         }
         if (ret) {
- -              mutex_unlock(&inode->i_mutex);
+ +              inode_unlock(inode);
                 goto out;
         }
         atomic_inc(&root->log_batch);
@@@ -2007,7 -2032,7 +2032,7 @@@
                  */
                 clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
                           &BTRFS_I(inode)->runtime_flags);
- -              mutex_unlock(&inode->i_mutex);
+ +              inode_unlock(inode);
                 goto out;
         }
   
@@@ -2031,7 -2056,7 +2056,7 @@@
         trans = btrfs_start_transaction(root, 0);
         if (IS_ERR(trans)) {
                 ret = PTR_ERR(trans);
- -              mutex_unlock(&inode->i_mutex);
+ +              inode_unlock(inode);
                 goto out;
         }
         trans->sync = true;
@@@ -2054,7 -2079,7 +2079,7 @@@
          * file again, but that will end up using the synchronization
          * inside btrfs_sync_log to keep things safe.
          */
- -      mutex_unlock(&inode->i_mutex);
+ +      inode_unlock(inode);
   
         /*
          * If any of the ordered extents had an error, just return it to user
@@@ -2293,18 -2318,18 +2318,18 @@@ static int btrfs_punch_hole(struct inod
         int ret = 0;
         int err = 0;
         unsigned int rsv_count;
-       bool same_page;
+       bool same_block;
         bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
         u64 ino_size;
-       bool truncated_page = false;
+       bool truncated_block = false;
         bool updated_inode = false;
   
         ret = btrfs_wait_ordered_range(inode, offset, len);
         if (ret)
                 return ret;
   
- -      mutex_lock(&inode->i_mutex);
+ +      inode_lock(inode);
-       ino_size = round_up(inode->i_size, PAGE_CACHE_SIZE);
+       ino_size = round_up(inode->i_size, root->sectorsize);
         ret = find_first_non_hole(inode, &offset, &len);
         if (ret < 0)
                 goto out_only_mutex;
@@@ -2317,33 -2342,32 +2342,32 @@@
         lockstart = round_up(offset, BTRFS_I(inode)->root->sectorsize);
         lockend = round_down(offset + len,
                              BTRFS_I(inode)->root->sectorsize) - 1;
-       same_page = ((offset >> PAGE_CACHE_SHIFT) ==
-                   ((offset + len - 1) >> PAGE_CACHE_SHIFT));
- 
+       same_block = (BTRFS_BYTES_TO_BLKS(root->fs_info, offset))
+               == (BTRFS_BYTES_TO_BLKS(root->fs_info, offset + len - 1));
         /*
-        * We needn't truncate any page which is beyond the end of the file
+        * We needn't truncate any block which is beyond the end of the file
          * because we are sure there is no data there.
          */
         /*
-        * Only do this if we are in the same page and we aren't doing the
-        * entire page.
+        * Only do this if we are in the same block and we aren't doing the
+        * entire block.
          */
-       if (same_page && len < PAGE_CACHE_SIZE) {
+       if (same_block && len < root->sectorsize) {
                 if (offset < ino_size) {
-                       truncated_page = true;
-                       ret = btrfs_truncate_page(inode, offset, len, 0);
+                       truncated_block = true;
+                       ret = btrfs_truncate_block(inode, offset, len, 0);
                 } else {
                         ret = 0;
                 }
                 goto out_only_mutex;
         }
   
-       /* zero back part of the first page */
+       /* zero back part of the first block */
         if (offset < ino_size) {
-               truncated_page = true;
-               ret = btrfs_truncate_page(inode, offset, 0, 0);
+               truncated_block = true;
+               ret = btrfs_truncate_block(inode, offset, 0, 0);
                 if (ret) {
- -                      mutex_unlock(&inode->i_mutex);
+ +                      inode_unlock(inode);
                         return ret;
                 }
         }
@@@ -2376,9 -2400,10 +2400,10 @@@
                 if (!ret) {
                         /* zero the front end of the last page */
                         if (tail_start + tail_len < ino_size) {
-                               truncated_page = true;
-                               ret = btrfs_truncate_page(inode,
-                                               tail_start + tail_len, 0, 1);
+                               truncated_block = true;
+                               ret = btrfs_truncate_block(inode,
+                                                       tail_start + tail_len,
+                                                       0, 1);
                                 if (ret)
                                         goto out_only_mutex;
                         }
@@@ -2419,7 -2444,7 +2444,7 @@@
                 ret = btrfs_wait_ordered_range(inode, lockstart,
                                                lockend - lockstart + 1);
                 if (ret) {
- -                      mutex_unlock(&inode->i_mutex);
+ +                      inode_unlock(inode);
                         return ret;
                 }
         }
@@@ -2558,7 -2583,7 +2583,7 @@@ out
         unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
                              &cached_state, GFP_NOFS);
   out_only_mutex:
-       if (!updated_inode && truncated_page && !ret && !err) {
+       if (!updated_inode && truncated_block && !ret && !err) {
                 /*
                  * If we only end up zeroing part of a page, we still need to
                  * update the inode item, so that all the time fields are
@@@ -2574,7 -2599,7 +2599,7 @@@
                         ret = btrfs_end_transaction(trans, root);
                 }
         }
- -      mutex_unlock(&inode->i_mutex);
+ +      inode_unlock(inode);
         if (ret && !err)
                 err = ret;
         return err;
@@@ -2658,7 -2683,7 +2683,7 @@@ static long btrfs_fallocate(struct fil
         if (ret < 0)
                 return ret;
   
- -      mutex_lock(&inode->i_mutex);
+ +      inode_lock(inode);
         ret = inode_newsize_ok(inode, alloc_end);
         if (ret)
                 goto out;
@@@ -2678,10 -2703,10 +2703,10 @@@
         } else if (offset + len > inode->i_size) {
                 /*
                  * If we are fallocating from the end of the file onward we
-                * need to zero out the end of the page if i_size lands in the
-                * middle of a page.
+                * need to zero out the end of the block if i_size lands in the
+                * middle of a block.
                  */
-               ret = btrfs_truncate_page(inode, inode->i_size, 0, 0);
+               ret = btrfs_truncate_block(inode, inode->i_size, 0, 0);
                 if (ret)
                         goto out;
         }
@@@ -2816,7 -2841,7 +2841,7 @@@ out
          * So this is completely used as cleanup.
          */
         btrfs_qgroup_free_data(inode, alloc_start, alloc_end - alloc_start);
- -      mutex_unlock(&inode->i_mutex);
+ +      inode_unlock(inode);
         /* Let go of our reservation. */
         btrfs_free_reserved_data_space(inode, alloc_start,
                                        alloc_end - alloc_start);
@@@ -2892,7 -2917,7 +2917,7 @@@ static loff_t btrfs_file_llseek(struct 
         struct inode *inode = file->f_mapping->host;
         int ret;
   
- -      mutex_lock(&inode->i_mutex);
+ +      inode_lock(inode);
         switch (whence) {
         case SEEK_END:
         case SEEK_CUR:
@@@ -2901,20 -2926,20 +2926,20 @@@
         case SEEK_DATA:
         case SEEK_HOLE:
                 if (offset >= i_size_read(inode)) {
- -                      mutex_unlock(&inode->i_mutex);
+ +                      inode_unlock(inode);
                         return -ENXIO;
                 }
   
                 ret = find_desired_extent(inode, &offset, whence);
                 if (ret) {
- -                      mutex_unlock(&inode->i_mutex);
+ +                      inode_unlock(inode);
                         return ret;
                 }
         }
   
         offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
   out:
- -      mutex_unlock(&inode->i_mutex);
+ +      inode_unlock(inode);
         return offset;
   }
   
@@@ -2932,9 -2957,6 +2957,9 @@@ const struct file_operations btrfs_file
   #ifdef CONFIG_COMPAT
         .compat_ioctl   = btrfs_ioctl,
   #endif
+ +      .copy_file_range = btrfs_copy_file_range,
+ +      .clone_file_range = btrfs_clone_file_range,
+ +      .dedupe_file_range = btrfs_dedupe_file_range,
   };
   
   void btrfs_auto_defrag_exit(void)
diff --combined fs/btrfs/inode.c

index 5f06eb1f43843055c0373daeb9ad98648865150f,7d4b2bf2f44f42d4340d0746fabae9fe661214f5..3e0d4151151723446ef4d4cee6dfd7959ddd53d7
--- 1/fs/btrfs/inode.c
--- 2/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@@ -263,7 -263,7 +263,7 @@@ static noinline int cow_file_range_inli
                 data_len = compressed_size;
   
         if (start > 0 ||
-           actual_end > PAGE_CACHE_SIZE ||
+           actual_end > root->sectorsize ||
             data_len > BTRFS_MAX_INLINE_DATA_SIZE(root) ||
             (!compressed_size &&
             (actual_end & (root->sectorsize - 1)) == 0) ||
@@@ -2002,7 -2002,8 +2002,8 @@@ again
         if (PagePrivate2(page))
                 goto out;
   
-       ordered = btrfs_lookup_ordered_extent(inode, page_start);
+       ordered = btrfs_lookup_ordered_range(inode, page_start,
+                                       PAGE_CACHE_SIZE);
         if (ordered) {
                 unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start,
                                      page_end, &cached_state, GFP_NOFS);
@@@ -3546,10 -3547,10 +3547,10 @@@ static noinline int acls_after_inode_it
         int scanned = 0;
   
         if (!xattr_access) {
- -              xattr_access = btrfs_name_hash(POSIX_ACL_XATTR_ACCESS,
- -                                      strlen(POSIX_ACL_XATTR_ACCESS));
- -              xattr_default = btrfs_name_hash(POSIX_ACL_XATTR_DEFAULT,
- -                                      strlen(POSIX_ACL_XATTR_DEFAULT));
+ +              xattr_access = btrfs_name_hash(XATTR_NAME_POSIX_ACL_ACCESS,
+ +                                      strlen(XATTR_NAME_POSIX_ACL_ACCESS));
+ +              xattr_default = btrfs_name_hash(XATTR_NAME_POSIX_ACL_DEFAULT,
+ +                                      strlen(XATTR_NAME_POSIX_ACL_DEFAULT));
         }
   
         slot++;
@@@ -3770,7 -3771,6 +3771,7 @@@ cache_acl
                 break;
         case S_IFLNK:
                 inode->i_op = &btrfs_symlink_inode_operations;
+ +              inode_nohighmem(inode);
                 inode->i_mapping->a_ops = &btrfs_symlink_aops;
                 break;
         default:
@@@ -4248,7 -4248,8 +4249,8 @@@ static int truncate_inline_extent(struc
                  * read the extent item from disk (data not in the page cache).
                  */
                 btrfs_release_path(path);
-               return btrfs_truncate_page(inode, offset, page_end - offset, 0);
+               return btrfs_truncate_block(inode, offset, page_end - offset,
+                                       0);
         }
   
         btrfs_set_file_extent_ram_bytes(leaf, fi, size);
@@@ -4601,17 -4602,17 +4603,17 @@@ error
   }
   
   /*
-  * btrfs_truncate_page - read, zero a chunk and write a page
+  * btrfs_truncate_block - read, zero a chunk and write a block
    * @inode - inode that we're zeroing
    * @from - the offset to start zeroing
    * @len - the length to zero, 0 to zero the entire range respective to the
    *    offset
    * @front - zero up to the offset instead of from the offset on
    *
-  * This will find the page for the "from" offset and cow the page and zero the
+  * This will find the block for the "from" offset and cow the block and zero the
    * part we want to zero.  This is used with truncate and hole punching.
    */
- int btrfs_truncate_page(struct inode *inode, loff_t from, loff_t len,
+ int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,
                         int front)
   {
         struct address_space *mapping = inode->i_mapping;
@@@ -4622,18 -4623,19 +4624,19 @@@
         char *kaddr;
         u32 blocksize = root->sectorsize;
         pgoff_t index = from >> PAGE_CACHE_SHIFT;
-       unsigned offset = from & (PAGE_CACHE_SIZE-1);
+       unsigned offset = from & (blocksize - 1);
         struct page *page;
         gfp_t mask = btrfs_alloc_write_mask(mapping);
         int ret = 0;
-       u64 page_start;
-       u64 page_end;
+       u64 block_start;
+       u64 block_end;
   
         if ((offset & (blocksize - 1)) == 0 &&
             (!len || ((len & (blocksize - 1)) == 0)))
                 goto out;
+ 
         ret = btrfs_delalloc_reserve_space(inode,
-                       round_down(from, PAGE_CACHE_SIZE), PAGE_CACHE_SIZE);
+                       round_down(from, blocksize), blocksize);
         if (ret)
                 goto out;
   
@@@ -4641,14 -4643,14 +4644,14 @@@ again
         page = find_or_create_page(mapping, index, mask);
         if (!page) {
                 btrfs_delalloc_release_space(inode,
-                               round_down(from, PAGE_CACHE_SIZE),
-                               PAGE_CACHE_SIZE);
+                               round_down(from, blocksize),
+                               blocksize);
                 ret = -ENOMEM;
                 goto out;
         }
   
-       page_start = page_offset(page);
-       page_end = page_start + PAGE_CACHE_SIZE - 1;
+       block_start = round_down(from, blocksize);
+       block_end = block_start + blocksize - 1;
   
         if (!PageUptodate(page)) {
                 ret = btrfs_readpage(NULL, page);
@@@ -4665,12 -4667,12 +4668,12 @@@
         }
         wait_on_page_writeback(page);
   
-       lock_extent_bits(io_tree, page_start, page_end, &cached_state);
+       lock_extent_bits(io_tree, block_start, block_end, &cached_state);
         set_page_extent_mapped(page);
   
-       ordered = btrfs_lookup_ordered_extent(inode, page_start);
+       ordered = btrfs_lookup_ordered_extent(inode, block_start);
         if (ordered) {
-               unlock_extent_cached(io_tree, page_start, page_end,
+               unlock_extent_cached(io_tree, block_start, block_end,
                                      &cached_state, GFP_NOFS);
                 unlock_page(page);
                 page_cache_release(page);
@@@ -4679,39 -4681,41 +4682,41 @@@
                 goto again;
         }
   
-       clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, page_end,
+       clear_extent_bit(&BTRFS_I(inode)->io_tree, block_start, block_end,
                           EXTENT_DIRTY | EXTENT_DELALLOC |
                           EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
                           0, 0, &cached_state, GFP_NOFS);
   
-       ret = btrfs_set_extent_delalloc(inode, page_start, page_end,
+       ret = btrfs_set_extent_delalloc(inode, block_start, block_end,
                                         &cached_state);
         if (ret) {
-               unlock_extent_cached(io_tree, page_start, page_end,
+               unlock_extent_cached(io_tree, block_start, block_end,
                                      &cached_state, GFP_NOFS);
                 goto out_unlock;
         }
   
-       if (offset != PAGE_CACHE_SIZE) {
+       if (offset != blocksize) {
                 if (!len)
-                       len = PAGE_CACHE_SIZE - offset;
+                       len = blocksize - offset;
                 kaddr = kmap(page);
                 if (front)
-                       memset(kaddr, 0, offset);
+                       memset(kaddr + (block_start - page_offset(page)),
+                               0, offset);
                 else
-                       memset(kaddr + offset, 0, len);
+                       memset(kaddr + (block_start - page_offset(page)) +  offset,
+                               0, len);
                 flush_dcache_page(page);
                 kunmap(page);
         }
         ClearPageChecked(page);
         set_page_dirty(page);
-       unlock_extent_cached(io_tree, page_start, page_end, &cached_state,
+       unlock_extent_cached(io_tree, block_start, block_end, &cached_state,
                              GFP_NOFS);
   
   out_unlock:
         if (ret)
-               btrfs_delalloc_release_space(inode, page_start,
-                                            PAGE_CACHE_SIZE);
+               btrfs_delalloc_release_space(inode, block_start,
+                                            blocksize);
         unlock_page(page);
         page_cache_release(page);
   out:
@@@ -4782,11 -4786,11 +4787,11 @@@ int btrfs_cont_expand(struct inode *ino
         int err = 0;
   
         /*
-        * If our size started in the middle of a page we need to zero out the
-        * rest of the page before we expand the i_size, otherwise we could
+        * If our size started in the middle of a block we need to zero out the
+        * rest of the block before we expand the i_size, otherwise we could
          * expose stale data.
          */
-       err = btrfs_truncate_page(inode, oldsize, 0, 0);
+       err = btrfs_truncate_block(inode, oldsize, 0, 0);
         if (err)
                 return err;
   
@@@ -4895,7 -4899,6 +4900,6 @@@ static int btrfs_setsize(struct inode *
         }
   
         if (newsize > oldsize) {
-               truncate_pagecache(inode, newsize);
                 /*
                  * Don't do an expanding truncate while snapshoting is ongoing.
                  * This is to ensure the snapshot captures a fully consistent
@@@ -4918,6 -4921,7 +4922,7 @@@
   
                 i_size_write(inode, newsize);
                 btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL);
+               pagecache_isize_extended(inode, oldsize, newsize);
                 ret = btrfs_update_inode(trans, root, inode);
                 btrfs_end_write_no_snapshoting(root);
                 btrfs_end_transaction(trans, root);
@@@ -7752,9 -7756,9 +7757,9 @@@ static int btrfs_check_dio_repairable(s
   }
   
   static int dio_read_error(struct inode *inode, struct bio *failed_bio,
-                         struct page *page, u64 start, u64 end,
-                         int failed_mirror, bio_end_io_t *repair_endio,
-                         void *repair_arg)
+                       struct page *page, unsigned int pgoff,
+                       u64 start, u64 end, int failed_mirror,
+                       bio_end_io_t *repair_endio, void *repair_arg)
   {
         struct io_failure_record *failrec;
         struct bio *bio;
@@@ -7775,7 -7779,9 +7780,9 @@@
                 return -EIO;
         }
   
-       if (failed_bio->bi_vcnt > 1)
+       if ((failed_bio->bi_vcnt > 1)
+               || (failed_bio->bi_io_vec->bv_len
+                       > BTRFS_I(inode)->root->sectorsize))
                 read_mode = READ_SYNC | REQ_FAILFAST_DEV;
         else
                 read_mode = READ_SYNC;
@@@ -7783,7 -7789,7 +7790,7 @@@
         isector = start - btrfs_io_bio(failed_bio)->logical;
         isector >>= inode->i_sb->s_blocksize_bits;
         bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page,
-                                     0, isector, repair_endio, repair_arg);
+                               pgoff, isector, repair_endio, repair_arg);
         if (!bio) {
                 free_io_failure(inode, failrec);
                 return -EIO;
@@@ -7813,12 -7819,17 +7820,17 @@@ struct btrfs_retry_complete 
   static void btrfs_retry_endio_nocsum(struct bio *bio)
   {
         struct btrfs_retry_complete *done = bio->bi_private;
+       struct inode *inode;
         struct bio_vec *bvec;
         int i;
   
         if (bio->bi_error)
                 goto end;
   
+       ASSERT(bio->bi_vcnt == 1);
+       inode = bio->bi_io_vec->bv_page->mapping->host;
+       ASSERT(bio->bi_io_vec->bv_len == BTRFS_I(inode)->root->sectorsize);
+ 
         done->uptodate = 1;
         bio_for_each_segment_all(bvec, bio, i)
                 clean_io_failure(done->inode, done->start, bvec->bv_page, 0);
@@@ -7830,25 -7841,35 +7842,35 @@@ end
   static int __btrfs_correct_data_nocsum(struct inode *inode,
                                        struct btrfs_io_bio *io_bio)
   {
+       struct btrfs_fs_info *fs_info;
         struct bio_vec *bvec;
         struct btrfs_retry_complete done;
         u64 start;
+       unsigned int pgoff;
+       u32 sectorsize;
+       int nr_sectors;
         int i;
         int ret;
   
+       fs_info = BTRFS_I(inode)->root->fs_info;
+       sectorsize = BTRFS_I(inode)->root->sectorsize;
+ 
         start = io_bio->logical;
         done.inode = inode;
   
         bio_for_each_segment_all(bvec, &io_bio->bio, i) {
- try_again:
+               nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec->bv_len);
+               pgoff = bvec->bv_offset;
+ 
+ next_block_or_try_again:
                 done.uptodate = 0;
                 done.start = start;
                 init_completion(&done.done);
   
-               ret = dio_read_error(inode, &io_bio->bio, bvec->bv_page, start,
-                                    start + bvec->bv_len - 1,
-                                    io_bio->mirror_num,
-                                    btrfs_retry_endio_nocsum, &done);
+               ret = dio_read_error(inode, &io_bio->bio, bvec->bv_page,
+                               pgoff, start, start + sectorsize - 1,
+                               io_bio->mirror_num,
+                               btrfs_retry_endio_nocsum, &done);
                 if (ret)
                         return ret;
   
@@@ -7856,10 -7877,15 +7878,15 @@@
   
                 if (!done.uptodate) {
                         /* We might have another mirror, so try again */
-                       goto try_again;
+                       goto next_block_or_try_again;
                 }
   
-               start += bvec->bv_len;
+               start += sectorsize;
+ 
+               if (nr_sectors--) {
+                       pgoff += sectorsize;
+                       goto next_block_or_try_again;
+               }
         }
   
         return 0;
@@@ -7869,7 -7895,9 +7896,9 @@@ static void btrfs_retry_endio(struct bi
   {
         struct btrfs_retry_complete *done = bio->bi_private;
         struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
+       struct inode *inode;
         struct bio_vec *bvec;
+       u64 start;
         int uptodate;
         int ret;
         int i;
@@@ -7878,13 -7906,20 +7907,20 @@@
                 goto end;
   
         uptodate = 1;
+ 
+       start = done->start;
+ 
+       ASSERT(bio->bi_vcnt == 1);
+       inode = bio->bi_io_vec->bv_page->mapping->host;
+       ASSERT(bio->bi_io_vec->bv_len == BTRFS_I(inode)->root->sectorsize);
+ 
         bio_for_each_segment_all(bvec, bio, i) {
                 ret = __readpage_endio_check(done->inode, io_bio, i,
-                                            bvec->bv_page, 0,
-                                            done->start, bvec->bv_len);
+                                       bvec->bv_page, bvec->bv_offset,
+                                       done->start, bvec->bv_len);
                 if (!ret)
                         clean_io_failure(done->inode, done->start,
-                                        bvec->bv_page, 0);
+                                       bvec->bv_page, bvec->bv_offset);
                 else
                         uptodate = 0;
         }
@@@ -7898,20 -7933,34 +7934,34 @@@ end
   static int __btrfs_subio_endio_read(struct inode *inode,
                                     struct btrfs_io_bio *io_bio, int err)
   {
+       struct btrfs_fs_info *fs_info;
         struct bio_vec *bvec;
         struct btrfs_retry_complete done;
         u64 start;
         u64 offset = 0;
+       u32 sectorsize;
+       int nr_sectors;
+       unsigned int pgoff;
+       int csum_pos;
         int i;
         int ret;
   
+       fs_info = BTRFS_I(inode)->root->fs_info;
+       sectorsize = BTRFS_I(inode)->root->sectorsize;
+ 
         err = 0;
         start = io_bio->logical;
         done.inode = inode;
   
         bio_for_each_segment_all(bvec, &io_bio->bio, i) {
-               ret = __readpage_endio_check(inode, io_bio, i, bvec->bv_page,
-                                            0, start, bvec->bv_len);
+               nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec->bv_len);
+ 
+               pgoff = bvec->bv_offset;
+ next_block:
+               csum_pos = BTRFS_BYTES_TO_BLKS(fs_info, offset);
+               ret = __readpage_endio_check(inode, io_bio, csum_pos,
+                                       bvec->bv_page, pgoff, start,
+                                       sectorsize);
                 if (likely(!ret))
                         goto next;
   try_again:
@@@ -7919,10 -7968,10 +7969,10 @@@
                 done.start = start;
                 init_completion(&done.done);
   
-               ret = dio_read_error(inode, &io_bio->bio, bvec->bv_page, start,
-                                    start + bvec->bv_len - 1,
-                                    io_bio->mirror_num,
-                                    btrfs_retry_endio, &done);
+               ret = dio_read_error(inode, &io_bio->bio, bvec->bv_page,
+                               pgoff, start, start + sectorsize - 1,
+                               io_bio->mirror_num,
+                               btrfs_retry_endio, &done);
                 if (ret) {
                         err = ret;
                         goto next;
@@@ -7935,8 -7984,15 +7985,15 @@@
                         goto try_again;
                 }
   next:
-               offset += bvec->bv_len;
-               start += bvec->bv_len;
+               offset += sectorsize;
+               start += sectorsize;
+ 
+               ASSERT(nr_sectors);
+ 
+               if (--nr_sectors) {
+                       pgoff += sectorsize;
+                       goto next_block;
+               }
         }
   
         return err;
@@@ -8188,9 -8244,11 +8245,11 @@@ static int btrfs_submit_direct_hook(in
         u64 file_offset = dip->logical_offset;
         u64 submit_len = 0;
         u64 map_length;
-       int nr_pages = 0;
-       int ret;
+       u32 blocksize = root->sectorsize;
         int async_submit = 0;
+       int nr_sectors;
+       int ret;
+       int i;
   
         map_length = orig_bio->bi_iter.bi_size;
         ret = btrfs_map_block(root->fs_info, rw, start_sector << 9,
@@@ -8220,9 -8278,12 +8279,12 @@@
         atomic_inc(&dip->pending_bios);
   
         while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) {
-               if (map_length < submit_len + bvec->bv_len ||
-                   bio_add_page(bio, bvec->bv_page, bvec->bv_len,
-                                bvec->bv_offset) < bvec->bv_len) {
+               nr_sectors = BTRFS_BYTES_TO_BLKS(root->fs_info, bvec->bv_len);
+               i = 0;
+ next_block:
+               if (unlikely(map_length < submit_len + blocksize ||
+                   bio_add_page(bio, bvec->bv_page, blocksize,
+                           bvec->bv_offset + (i * blocksize)) < blocksize)) {
                         /*
                          * inc the count before we submit the bio so
                          * we know the end IO handler won't happen before
@@@ -8243,7 -8304,6 +8305,6 @@@
                         file_offset += submit_len;
   
                         submit_len = 0;
-                       nr_pages = 0;
   
                         bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev,
                                                   start_sector, GFP_NOFS);
@@@ -8261,9 -8321,14 +8322,14 @@@
                                 bio_put(bio);
                                 goto out_err;
                         }
+ 
+                       goto next_block;
                 } else {
-                       submit_len += bvec->bv_len;
-                       nr_pages++;
+                       submit_len += blocksize;
+                       if (--nr_sectors) {
+                               i++;
+                               goto next_block;
+                       }
                         bvec++;
                 }
         }
@@@ -8467,7 -8532,7 +8533,7 @@@ static ssize_t btrfs_direct_IO(struct k
                  * not unlock the i_mutex at this case.
                  */
                 if (offset + count <= inode->i_size) {
- -                      mutex_unlock(&inode->i_mutex);
+ +                      inode_unlock(inode);
                         relock = true;
                 }
                 ret = btrfs_delalloc_reserve_space(inode, offset, count);
@@@ -8524,7 -8589,7 +8590,7 @@@ out
         if (wakeup)
                 inode_dio_end(inode);
         if (relock)
- -              mutex_lock(&inode->i_mutex);
+ +              inode_lock(inode);
   
         return ret;
   }
@@@ -8628,6 -8693,8 +8694,8 @@@ static void btrfs_invalidatepage(struc
         struct extent_state *cached_state = NULL;
         u64 page_start = page_offset(page);
         u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
+       u64 start;
+       u64 end;
         int inode_evicting = inode->i_state & I_FREEING;
   
         /*
@@@ -8647,14 -8714,18 +8715,18 @@@
   
         if (!inode_evicting)
                 lock_extent_bits(tree, page_start, page_end, &cached_state);
-       ordered = btrfs_lookup_ordered_extent(inode, page_start);
+ again:
+       start = page_start;
+       ordered = btrfs_lookup_ordered_range(inode, start,
+                                       page_end - start + 1);
         if (ordered) {
+               end = min(page_end, ordered->file_offset + ordered->len - 1);
                 /*
                  * IO on this page will never be started, so we need
                  * to account for any ordered extents now
                  */
                 if (!inode_evicting)
-                       clear_extent_bit(tree, page_start, page_end,
+                       clear_extent_bit(tree, start, end,
                                          EXTENT_DIRTY | EXTENT_DELALLOC |
                                          EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
                                          EXTENT_DEFRAG, 1, 0, &cached_state,
@@@ -8671,22 -8742,26 +8743,26 @@@
   
                         spin_lock_irq(&tree->lock);
                         set_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags);
-                       new_len = page_start - ordered->file_offset;
+                       new_len = start - ordered->file_offset;
                         if (new_len < ordered->truncated_len)
                                 ordered->truncated_len = new_len;
                         spin_unlock_irq(&tree->lock);
   
                         if (btrfs_dec_test_ordered_pending(inode, &ordered,
-                                                          page_start,
-                                                          PAGE_CACHE_SIZE, 1))
+                                                          start,
+                                                          end - start + 1, 1))
                                 btrfs_finish_ordered_io(ordered);
                 }
                 btrfs_put_ordered_extent(ordered);
                 if (!inode_evicting) {
                         cached_state = NULL;
-                       lock_extent_bits(tree, page_start, page_end,
+                       lock_extent_bits(tree, start, end,
                                          &cached_state);
                 }
+ 
+               start = end + 1;
+               if (start < page_end)
+                       goto again;
         }
   
         /*
@@@ -8747,15 -8822,28 +8823,28 @@@ int btrfs_page_mkwrite(struct vm_area_s
         loff_t size;
         int ret;
         int reserved = 0;
+       u64 reserved_space;
         u64 page_start;
         u64 page_end;
+       u64 end;
+ 
+       reserved_space = PAGE_CACHE_SIZE;
   
         sb_start_pagefault(inode->i_sb);
         page_start = page_offset(page);
         page_end = page_start + PAGE_CACHE_SIZE - 1;
+       end = page_end;
   
+       /*
+        * Reserving delalloc space after obtaining the page lock can lead to
+        * deadlock. For example, if a dirty page is locked by this function
+        * and the call to btrfs_delalloc_reserve_space() ends up triggering
+        * dirty page write out, then the btrfs_writepage() function could
+        * end up waiting indefinitely to get a lock on the page currently
+        * being processed by btrfs_page_mkwrite() function.
+        */
         ret = btrfs_delalloc_reserve_space(inode, page_start,
-                                          PAGE_CACHE_SIZE);
+                                          reserved_space);
         if (!ret) {
                 ret = file_update_time(vma->vm_file);
                 reserved = 1;
@@@ -8789,7 -8877,7 +8878,7 @@@ again
          * we can't set the delalloc bits if there are pending ordered
          * extents.  Drop our locks and wait for them to finish
          */
-       ordered = btrfs_lookup_ordered_extent(inode, page_start);
+       ordered = btrfs_lookup_ordered_range(inode, page_start, page_end);
         if (ordered) {
                 unlock_extent_cached(io_tree, page_start, page_end,
                                      &cached_state, GFP_NOFS);
@@@ -8799,6 -8887,18 +8888,18 @@@
                 goto again;
         }
   
+       if (page->index == ((size - 1) >> PAGE_CACHE_SHIFT)) {
+               reserved_space = round_up(size - page_start, root->sectorsize);
+               if (reserved_space < PAGE_CACHE_SIZE) {
+                       end = page_start + reserved_space - 1;
+                       spin_lock(&BTRFS_I(inode)->lock);
+                       BTRFS_I(inode)->outstanding_extents++;
+                       spin_unlock(&BTRFS_I(inode)->lock);
+                       btrfs_delalloc_release_space(inode, page_start,
+                                               PAGE_CACHE_SIZE - reserved_space);
+               }
+       }
+ 
         /*
          * XXX - page_mkwrite gets called every time the page is dirtied, even
          * if it was already dirty, so for space accounting reasons we need to
@@@ -8806,12 -8906,12 +8907,12 @@@
          * is probably a better way to do this, but for now keep consistent with
          * prepare_pages in the normal write path.
          */
-       clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, page_end,
+       clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, end,
                           EXTENT_DIRTY | EXTENT_DELALLOC |
                           EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
                           0, 0, &cached_state, GFP_NOFS);
   
-       ret = btrfs_set_extent_delalloc(inode, page_start, page_end,
+       ret = btrfs_set_extent_delalloc(inode, page_start, end,
                                         &cached_state);
         if (ret) {
                 unlock_extent_cached(io_tree, page_start, page_end,
@@@ -8850,7 -8950,7 +8951,7 @@@ out_unlock
         }
         unlock_page(page);
   out:
-       btrfs_delalloc_release_space(inode, page_start, PAGE_CACHE_SIZE);
+       btrfs_delalloc_release_space(inode, page_start, reserved_space);
   out_noreserve:
         sb_end_pagefault(inode->i_sb);
         return ret;
@@@ -9192,8 -9292,7 +9293,8 @@@ int btrfs_init_cachep(void
   {
         btrfs_inode_cachep = kmem_cache_create("btrfs_inode",
                         sizeof(struct btrfs_inode), 0,
- -                      SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, init_once);
+ +                      SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD | SLAB_ACCOUNT,
+ +                      init_once);
         if (!btrfs_inode_cachep)
                 goto fail;
   
@@@ -9236,7 -9335,6 +9337,6 @@@ static int btrfs_getattr(struct vfsmoun
   
         generic_fillattr(inode, stat);
         stat->dev = BTRFS_I(inode)->root->anon_dev;
-       stat->blksize = PAGE_CACHE_SIZE;
   
         spin_lock(&BTRFS_I(inode)->lock);
         delalloc_bytes = BTRFS_I(inode)->delalloc_bytes;
@@@ -9724,7 -9822,6 +9824,7 @@@ static int btrfs_symlink(struct inode *
         btrfs_free_path(path);
   
         inode->i_op = &btrfs_symlink_inode_operations;
+ +      inode_nohighmem(inode);
         inode->i_mapping->a_ops = &btrfs_symlink_aops;
         inode_set_bytes(inode, name_len);
         btrfs_i_size_write(inode, name_len);
@@@ -10021,7 -10118,7 +10121,7 @@@ static const struct inode_operations bt
         .setattr        = btrfs_setattr,
         .mknod          = btrfs_mknod,
         .setxattr       = btrfs_setxattr,
- -      .getxattr       = btrfs_getxattr,
+ +      .getxattr       = generic_getxattr,
         .listxattr      = btrfs_listxattr,
         .removexattr    = btrfs_removexattr,
         .permission     = btrfs_permission,
@@@ -10098,7 -10195,7 +10198,7 @@@ static const struct inode_operations bt
         .getattr        = btrfs_getattr,
         .setattr        = btrfs_setattr,
         .setxattr       = btrfs_setxattr,
- -      .getxattr       = btrfs_getxattr,
+ +      .getxattr       = generic_getxattr,
         .listxattr      = btrfs_listxattr,
         .removexattr    = btrfs_removexattr,
         .permission     = btrfs_permission,
@@@ -10112,7 -10209,7 +10212,7 @@@ static const struct inode_operations bt
         .setattr        = btrfs_setattr,
         .permission     = btrfs_permission,
         .setxattr       = btrfs_setxattr,
- -      .getxattr       = btrfs_getxattr,
+ +      .getxattr       = generic_getxattr,
         .listxattr      = btrfs_listxattr,
         .removexattr    = btrfs_removexattr,
         .get_acl        = btrfs_get_acl,
@@@ -10121,12 -10218,13 +10221,12 @@@
   };
   static const struct inode_operations btrfs_symlink_inode_operations = {
         .readlink       = generic_readlink,
- -      .follow_link    = page_follow_link_light,
- -      .put_link       = page_put_link,
+ +      .get_link       = page_get_link,
         .getattr        = btrfs_getattr,
         .setattr        = btrfs_setattr,
         .permission     = btrfs_permission,
         .setxattr       = btrfs_setxattr,
- -      .getxattr       = btrfs_getxattr,
+ +      .getxattr       = generic_getxattr,
         .listxattr      = btrfs_listxattr,
         .removexattr    = btrfs_removexattr,
         .update_time    = btrfs_update_time,
diff --combined fs/btrfs/ioctl.c

index 952172ca7e455633c28a79292d18ebbfd68c4d18,709419c98ca5e4e7874dd2aeb2bbe2dda908e3b6..93e7832d1d1b22cc93b484ef34ca030c919ab74b
--- 1/fs/btrfs/ioctl.c
--- 2/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@@ -240,7 -240,7 +240,7 @@@ static int btrfs_ioctl_setflags(struct 
         if (ret)
                 return ret;
   
- -      mutex_lock(&inode->i_mutex);
+ +      inode_lock(inode);
   
         ip_oldflags = ip->flags;
         i_oldflags = inode->i_flags;
@@@ -358,7 -358,7 +358,7 @@@
         }
   
    out_unlock:
- -      mutex_unlock(&inode->i_mutex);
+ +      inode_unlock(inode);
         mnt_drop_write_file(file);
         return ret;
   }
@@@ -881,7 -881,7 +881,7 @@@ out_up_read
   out_dput:
         dput(dentry);
   out_unlock:
- -      mutex_unlock(&dir->i_mutex);
+ +      inode_unlock(dir);
         return error;
   }
   
@@@ -1393,18 -1393,18 +1393,18 @@@ int btrfs_defrag_file(struct inode *ino
                         ra_index += cluster;
                 }
   
- -              mutex_lock(&inode->i_mutex);
+ +              inode_lock(inode);
                 if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)
                         BTRFS_I(inode)->force_compress = compress_type;
                 ret = cluster_pages_for_defrag(inode, pages, i, cluster);
                 if (ret < 0) {
- -                      mutex_unlock(&inode->i_mutex);
+ +                      inode_unlock(inode);
                         goto out_ra;
                 }
   
                 defrag_count += ret;
                 balance_dirty_pages_ratelimited(inode->i_mapping);
- -              mutex_unlock(&inode->i_mutex);
+ +              inode_unlock(inode);
   
                 if (newer_than) {
                         if (newer_off == (u64)-1)
@@@ -1465,9 -1465,9 +1465,9 @@@
   
   out_ra:
         if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) {
- -              mutex_lock(&inode->i_mutex);
+ +              inode_lock(inode);
                 BTRFS_I(inode)->force_compress = BTRFS_COMPRESS_NONE;
- -              mutex_unlock(&inode->i_mutex);
+ +              inode_unlock(inode);
         }
         if (!file)
                 kfree(ra);
@@@ -2430,7 -2430,7 +2430,7 @@@ static noinline int btrfs_ioctl_snap_de
                 goto out_dput;
         }
   
- -      mutex_lock(&inode->i_mutex);
+ +      inode_lock(inode);
   
         /*
          * Don't allow to delete a subvolume with send in progress. This is
@@@ -2543,7 -2543,7 +2543,7 @@@ out_up_write
                 spin_unlock(&dest->root_item_lock);
         }
   out_unlock_inode:
- -      mutex_unlock(&inode->i_mutex);
+ +      inode_unlock(inode);
         if (!err) {
                 d_invalidate(dentry);
                 btrfs_invalidate_inodes(dest);
@@@ -2559,7 -2559,7 +2559,7 @@@
   out_dput:
         dput(dentry);
   out_unlock_dir:
- -      mutex_unlock(&dir->i_mutex);
+ +      inode_unlock(dir);
   out_drop_write:
         mnt_drop_write_file(file);
   out:
@@@ -2857,8 -2857,8 +2857,8 @@@ static inline void lock_extent_range(st
   
   static void btrfs_double_inode_unlock(struct inode *inode1, struct inode *inode2)
   {
- -      mutex_unlock(&inode1->i_mutex);
- -      mutex_unlock(&inode2->i_mutex);
+ +      inode_unlock(inode1);
+ +      inode_unlock(inode2);
   }
   
   static void btrfs_double_inode_lock(struct inode *inode1, struct inode *inode2)
@@@ -2866,8 -2866,8 +2866,8 @@@
         if (inode1 < inode2)
                 swap(inode1, inode2);
   
- -      mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
- -      mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
+ +      inode_lock_nested(inode1, I_MUTEX_PARENT);
+ +      inode_lock_nested(inode2, I_MUTEX_CHILD);
   }
   
   static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1,
@@@ -2974,7 -2974,7 +2974,7 @@@ static int btrfs_cmp_data(struct inode 
                 flush_dcache_page(dst_page);
   
                 if (memcmp(addr, dst_addr, cmp_len))
- -                      ret = BTRFS_SAME_DATA_DIFFERS;
+ +                      ret = -EBADE;
   
                 kunmap_atomic(addr);
                 kunmap_atomic(dst_addr);
@@@ -3026,7 -3026,7 +3026,7 @@@ static int btrfs_extent_same(struct ino
                 return 0;
   
         if (same_inode) {
- -              mutex_lock(&src->i_mutex);
+ +              inode_lock(src);
   
                 ret = extent_same_check_offsets(src, loff, &len, olen);
                 if (ret)
@@@ -3101,7 -3101,7 +3101,7 @@@
         btrfs_cmp_data_free(&cmp);
   out_unlock:
         if (same_inode)
- -              mutex_unlock(&src->i_mutex);
+ +              inode_unlock(src);
         else
                 btrfs_double_inode_unlock(src, dst);
   
@@@ -3110,16 -3110,53 +3110,16 @@@
   
   #define BTRFS_MAX_DEDUPE_LEN  SZ_16M
   
- -static long btrfs_ioctl_file_extent_same(struct file *file,
- -                      struct btrfs_ioctl_same_args __user *argp)
+ +ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen,
+ +                              struct file *dst_file, u64 dst_loff)
   {
- -      struct btrfs_ioctl_same_args *same = NULL;
- -      struct btrfs_ioctl_same_extent_info *info;
- -      struct inode *src = file_inode(file);
- -      u64 off;
- -      u64 len;
- -      int i;
- -      int ret;
- -      unsigned long size;
+ +      struct inode *src = file_inode(src_file);
+ +      struct inode *dst = file_inode(dst_file);
         u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize;
- -      bool is_admin = capable(CAP_SYS_ADMIN);
- -      u16 count;
- -
- -      if (!(file->f_mode & FMODE_READ))
- -              return -EINVAL;
- -
- -      ret = mnt_want_write_file(file);
- -      if (ret)
- -              return ret;
- -
- -      if (get_user(count, &argp->dest_count)) {
- -              ret = -EFAULT;
- -              goto out;
- -      }
- -
- -      size = offsetof(struct btrfs_ioctl_same_args __user, info[count]);
- -
- -      same = memdup_user(argp, size);
- -
- -      if (IS_ERR(same)) {
- -              ret = PTR_ERR(same);
- -              same = NULL;
- -              goto out;
- -      }
+ +      ssize_t res;
   
- -      off = same->logical_offset;
- -      len = same->length;
- -
- -      /*
- -       * Limit the total length we will dedupe for each operation.
- -       * This is intended to bound the total time spent in this
- -       * ioctl to something sane.
- -       */
- -      if (len > BTRFS_MAX_DEDUPE_LEN)
- -              len = BTRFS_MAX_DEDUPE_LEN;
+ +      if (olen > BTRFS_MAX_DEDUPE_LEN)
+ +              olen = BTRFS_MAX_DEDUPE_LEN;
   
         if (WARN_ON_ONCE(bs < PAGE_CACHE_SIZE)) {
                 /*
@@@ -3127,13 -3164,58 +3127,13 @@@
                  * result, btrfs_cmp_data() won't correctly handle
                  * this situation without an update.
                  */
- -              ret = -EINVAL;
- -              goto out;
- -      }
- -
- -      ret = -EISDIR;
- -      if (S_ISDIR(src->i_mode))
- -              goto out;
- -
- -      ret = -EACCES;
- -      if (!S_ISREG(src->i_mode))
- -              goto out;
- -
- -      /* pre-format output fields to sane values */
- -      for (i = 0; i < count; i++) {
- -              same->info[i].bytes_deduped = 0ULL;
- -              same->info[i].status = 0;
- -      }
- -
- -      for (i = 0, info = same->info; i < count; i++, info++) {
- -              struct inode *dst;
- -              struct fd dst_file = fdget(info->fd);
- -              if (!dst_file.file) {
- -                      info->status = -EBADF;
- -                      continue;
- -              }
- -              dst = file_inode(dst_file.file);
- -
- -              if (!(is_admin || (dst_file.file->f_mode & FMODE_WRITE))) {
- -                      info->status = -EINVAL;
- -              } else if (file->f_path.mnt != dst_file.file->f_path.mnt) {
- -                      info->status = -EXDEV;
- -              } else if (S_ISDIR(dst->i_mode)) {
- -                      info->status = -EISDIR;
- -              } else if (!S_ISREG(dst->i_mode)) {
- -                      info->status = -EACCES;
- -              } else {
- -                      info->status = btrfs_extent_same(src, off, len, dst,
- -                                                      info->logical_offset);
- -                      if (info->status == 0)
- -                              info->bytes_deduped += len;
- -              }
- -              fdput(dst_file);
+ +              return -EINVAL;
         }
   
- -      ret = copy_to_user(argp, same, size);
- -      if (ret)
- -              ret = -EFAULT;
- -
- -out:
- -      mnt_drop_write_file(file);
- -      kfree(same);
- -      return ret;
+ +      res = btrfs_extent_same(src, loff, olen, dst, dst_loff);
+ +      if (res)
+ +              return res;
+ +      return olen;
   }
   
   static int clone_finish_inode_update(struct btrfs_trans_handle *trans,
@@@ -3709,16 -3791,17 +3709,16 @@@ out
         return ret;
   }
   
- -static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
- -                                     u64 off, u64 olen, u64 destoff)
+ +static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
+ +                                      u64 off, u64 olen, u64 destoff)
   {
         struct inode *inode = file_inode(file);
+ +      struct inode *src = file_inode(file_src);
         struct btrfs_root *root = BTRFS_I(inode)->root;
- -      struct fd src_file;
- -      struct inode *src;
         int ret;
         u64 len = olen;
         u64 bs = root->fs_info->sb->s_blocksize;
- -      int same_inode = 0;
+ +      int same_inode = src == inode;
   
         /*
          * TODO:
@@@ -3731,25 -3814,54 +3731,25 @@@
          *   be either compressed or non-compressed.
          */
   
- -      /* the destination must be opened for writing */
- -      if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND))
- -              return -EINVAL;
- -
         if (btrfs_root_readonly(root))
                 return -EROFS;
   
- -      ret = mnt_want_write_file(file);
- -      if (ret)
- -              return ret;
- -
- -      src_file = fdget(srcfd);
- -      if (!src_file.file) {
- -              ret = -EBADF;
- -              goto out_drop_write;
- -      }
- -
- -      ret = -EXDEV;
- -      if (src_file.file->f_path.mnt != file->f_path.mnt)
- -              goto out_fput;
- -
- -      src = file_inode(src_file.file);
- -
- -      ret = -EINVAL;
- -      if (src == inode)
- -              same_inode = 1;
- -
- -      /* the src must be open for reading */
- -      if (!(src_file.file->f_mode & FMODE_READ))
- -              goto out_fput;
+ +      if (file_src->f_path.mnt != file->f_path.mnt ||
+ +          src->i_sb != inode->i_sb)
+ +              return -EXDEV;
   
         /* don't make the dst file partly checksummed */
         if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) !=
             (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM))
- -              goto out_fput;
+ +              return -EINVAL;
   
- -      ret = -EISDIR;
         if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode))
- -              goto out_fput;
- -
- -      ret = -EXDEV;
- -      if (src->i_sb != inode->i_sb)
- -              goto out_fput;
+ +              return -EISDIR;
   
         if (!same_inode) {
                 btrfs_double_inode_lock(src, inode);
         } else {
- -              mutex_lock(&src->i_mutex);
+ +              inode_lock(src);
         }
   
         /* determine range to clone */
@@@ -3814,32 -3926,29 +3814,33 @@@
          * Truncate page cache pages so that future reads will see the cloned
          * data immediately and not the previous data.
          */
-       truncate_inode_pages_range(&inode->i_data, destoff,
-                                  PAGE_CACHE_ALIGN(destoff + len) - 1);
+       truncate_inode_pages_range(&inode->i_data,
+                               round_down(destoff, PAGE_CACHE_SIZE),
+                               round_up(destoff + len, PAGE_CACHE_SIZE) - 1);
   out_unlock:
         if (!same_inode)
                 btrfs_double_inode_unlock(src, inode);
         else
- -              mutex_unlock(&src->i_mutex);
- -out_fput:
- -      fdput(src_file);
- -out_drop_write:
- -      mnt_drop_write_file(file);
+ +              inode_unlock(src);
         return ret;
   }
   
- -static long btrfs_ioctl_clone_range(struct file *file, void __user *argp)
+ +ssize_t btrfs_copy_file_range(struct file *file_in, loff_t pos_in,
+ +                            struct file *file_out, loff_t pos_out,
+ +                            size_t len, unsigned int flags)
   {
- -      struct btrfs_ioctl_clone_range_args args;
+ +      ssize_t ret;
   
- -      if (copy_from_user(&args, argp, sizeof(args)))
- -              return -EFAULT;
- -      return btrfs_ioctl_clone(file, args.src_fd, args.src_offset,
- -                               args.src_length, args.dest_offset);
+ +      ret = btrfs_clone_files(file_out, file_in, pos_in, len, pos_out);
+ +      if (ret == 0)
+ +              ret = len;
+ +      return ret;
+ +}
+ +
+ +int btrfs_clone_file_range(struct file *src_file, loff_t off,
+ +              struct file *dst_file, loff_t destoff, u64 len)
+ +{
+ +      return btrfs_clone_files(dst_file, src_file, off, len, destoff);
   }
   
   /*
@@@ -5389,6 -5498,10 +5390,6 @@@ long btrfs_ioctl(struct file *file, uns
                 return btrfs_ioctl_dev_info(root, argp);
         case BTRFS_IOC_BALANCE:
                 return btrfs_ioctl_balance(file, NULL);
- -      case BTRFS_IOC_CLONE:
- -              return btrfs_ioctl_clone(file, arg, 0, 0, 0);
- -      case BTRFS_IOC_CLONE_RANGE:
- -              return btrfs_ioctl_clone_range(file, argp);
         case BTRFS_IOC_TRANS_START:
                 return btrfs_ioctl_trans_start(file);
         case BTRFS_IOC_TRANS_END:
@@@ -5466,6 -5579,8 +5467,6 @@@
                 return btrfs_ioctl_get_fslabel(file, argp);
         case BTRFS_IOC_SET_FSLABEL:
                 return btrfs_ioctl_set_fslabel(file, argp);
- -      case BTRFS_IOC_FILE_EXTENT_SAME:
- -              return btrfs_ioctl_file_extent_same(file, argp);
         case BTRFS_IOC_GET_SUPPORTED_FEATURES:
                 return btrfs_ioctl_get_supported_features(file, argp);
         case BTRFS_IOC_GET_FEATURES:
author	David Sterba <[email protected]>
	Fri, 26 Feb 2016 14:38:28 +0000 (15:38 +0100)
committer	David Sterba <[email protected]>
	Fri, 26 Feb 2016 14:38:28 +0000 (15:38 +0100)
		1	2
fs/btrfs/ctree.h	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/file.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/inode.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/ioctl.c	patch \|	diff1 \|	diff2 \|	blob \| history