Merge tag 'xfs-for-linus-4.10-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git...

author Linus Torvalds <[email protected]>

Thu, 15 Dec 2016 05:35:31 +0000 (21:35 -0800)

committer Linus Torvalds <[email protected]>

Thu, 15 Dec 2016 05:35:31 +0000 (21:35 -0800)
author Linus Torvalds <[email protected]>
Thu, 15 Dec 2016 05:35:31 +0000 (21:35 -0800)
committer Linus Torvalds <[email protected]>
Thu, 15 Dec 2016 05:35:31 +0000 (21:35 -0800)
diff --combined fs/direct-io.c

index 86aa79859d4d347b59467316b6806389717bd5c8,19aa448fde6aa07853a4b729d8d75447e166d223..aeae8c06345155e35e6f9d1567d004c6d073ea66
--- 1/fs/direct-io.c
--- 2/fs/direct-io.c
+++ b/fs/direct-io.c
@@@ -457,7 -457,7 +457,7 @@@ static struct bio *dio_await_one(struc
                 dio->waiter = current;
                 spin_unlock_irqrestore(&dio->bio_lock, flags);
                 if (!(dio->iocb->ki_flags & IOCB_HIPRI) ||
- -                  !blk_poll(bdev_get_queue(dio->bio_bdev), dio->bio_cookie))
+ +                  !blk_mq_poll(bdev_get_queue(dio->bio_bdev), dio->bio_cookie))
                         io_schedule();
                 /* wake up sets us TASK_RUNNING */
                 spin_lock_irqsave(&dio->bio_lock, flags);
@@@ -554,7 -554,7 +554,7 @@@ static inline int dio_bio_reap(struct d
    * filesystems that don't need it and also allows us to create the workqueue
    * late enough so the we can include s_id in the name of the workqueue.
    */
- static int sb_init_dio_done_wq(struct super_block *sb)
+ int sb_init_dio_done_wq(struct super_block *sb)
   {
         struct workqueue_struct *old;
         struct workqueue_struct *wq = alloc_workqueue("dio/%s",
@@@ -842,6 -842,24 +842,6 @@@ out
         return ret;
   }
   
- -/*
- - * Clean any dirty buffers in the blockdev mapping which alias newly-created
- - * file blocks.  Only called for S_ISREG files - blockdevs do not set
- - * buffer_new
- - */
- -static void clean_blockdev_aliases(struct dio *dio, struct buffer_head *map_bh)
- -{
- -      unsigned i;
- -      unsigned nblocks;
- -
- -      nblocks = map_bh->b_size >> dio->inode->i_blkbits;
- -
- -      for (i = 0; i < nblocks; i++) {
- -              unmap_underlying_metadata(map_bh->b_bdev,
- -                                        map_bh->b_blocknr + i);
- -      }
- -}
- -
   /*
    * If we are not writing the entire block and get_block() allocated
    * the block for us, we need to fill-in the unused portion of the
@@@ -942,15 -960,11 +942,15 @@@ static int do_direct_IO(struct dio *dio
                                         goto do_holes;
   
                                 sdio->blocks_available =
- -                                              map_bh->b_size >> sdio->blkbits;
+ +                                              map_bh->b_size >> blkbits;
                                 sdio->next_block_for_io =
                                         map_bh->b_blocknr << sdio->blkfactor;
- -                              if (buffer_new(map_bh))
- -                                      clean_blockdev_aliases(dio, map_bh);
+ +                              if (buffer_new(map_bh)) {
+ +                                      clean_bdev_aliases(
+ +                                              map_bh->b_bdev,
+ +                                              map_bh->b_blocknr,
+ +                                              map_bh->b_size >> blkbits);
+ +                              }
   
                                 if (!sdio->blkfactor)
                                         goto do_holes;
@@@ -1195,7 -1209,7 +1195,7 @@@ do_blockdev_direct_IO(struct kiocb *ioc
         dio->inode = inode;
         if (iov_iter_rw(iter) == WRITE) {
                 dio->op = REQ_OP_WRITE;
- -              dio->op_flags = WRITE_ODIRECT;
+ +              dio->op_flags = REQ_SYNC | REQ_IDLE;
         } else {
                 dio->op = REQ_OP_READ;
         }
diff --combined fs/iomap.c

index 13dd413b2b9c6a52e4ff0a966aff6902fc29df9a,fc244624293540221f2994c89754412ba3e1ce8d..354a123f170e534a016f74ca7006458e3b823ef8
--- 1/fs/iomap.c
--- 2/fs/iomap.c
+++ b/fs/iomap.c
@@@ -24,6 -24,7 +24,7 @@@
   #include <linux/uio.h>
   #include <linux/backing-dev.h>
   #include <linux/buffer_head.h>
+ #include <linux/task_io_accounting_ops.h>
   #include <linux/dax.h>
   #include "internal.h"
   
@@@ -584,3 -585,375 +585,375 @@@ int iomap_fiemap(struct inode *inode, s
         return 0;
   }
   EXPORT_SYMBOL_GPL(iomap_fiemap);
- -      bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_ODIRECT);
+ 
+ /*
+  * Private flags for iomap_dio, must not overlap with the public ones in
+  * iomap.h:
+  */
+ #define IOMAP_DIO_WRITE               (1 << 30)
+ #define IOMAP_DIO_DIRTY               (1 << 31)
+ 
+ struct iomap_dio {
+       struct kiocb            *iocb;
+       iomap_dio_end_io_t      *end_io;
+       loff_t                  i_size;
+       loff_t                  size;
+       atomic_t                ref;
+       unsigned                flags;
+       int                     error;
+ 
+       union {
+               /* used during submission and for synchronous completion: */
+               struct {
+                       struct iov_iter         *iter;
+                       struct task_struct      *waiter;
+                       struct request_queue    *last_queue;
+                       blk_qc_t                cookie;
+               } submit;
+ 
+               /* used for aio completion: */
+               struct {
+                       struct work_struct      work;
+               } aio;
+       };
+ };
+ 
+ static ssize_t iomap_dio_complete(struct iomap_dio *dio)
+ {
+       struct kiocb *iocb = dio->iocb;
+       ssize_t ret;
+ 
+       if (dio->end_io) {
+               ret = dio->end_io(iocb,
+                               dio->error ? dio->error : dio->size,
+                               dio->flags);
+       } else {
+               ret = dio->error;
+       }
+ 
+       if (likely(!ret)) {
+               ret = dio->size;
+               /* check for short read */
+               if (iocb->ki_pos + ret > dio->i_size &&
+                   !(dio->flags & IOMAP_DIO_WRITE))
+                       ret = dio->i_size - iocb->ki_pos;
+               iocb->ki_pos += ret;
+       }
+ 
+       inode_dio_end(file_inode(iocb->ki_filp));
+       kfree(dio);
+ 
+       return ret;
+ }
+ 
+ static void iomap_dio_complete_work(struct work_struct *work)
+ {
+       struct iomap_dio *dio = container_of(work, struct iomap_dio, aio.work);
+       struct kiocb *iocb = dio->iocb;
+       bool is_write = (dio->flags & IOMAP_DIO_WRITE);
+       ssize_t ret;
+ 
+       ret = iomap_dio_complete(dio);
+       if (is_write && ret > 0)
+               ret = generic_write_sync(iocb, ret);
+       iocb->ki_complete(iocb, ret, 0);
+ }
+ 
+ /*
+  * Set an error in the dio if none is set yet.  We have to use cmpxchg
+  * as the submission context and the completion context(s) can race to
+  * update the error.
+  */
+ static inline void iomap_dio_set_error(struct iomap_dio *dio, int ret)
+ {
+       cmpxchg(&dio->error, 0, ret);
+ }
+ 
+ static void iomap_dio_bio_end_io(struct bio *bio)
+ {
+       struct iomap_dio *dio = bio->bi_private;
+       bool should_dirty = (dio->flags & IOMAP_DIO_DIRTY);
+ 
+       if (bio->bi_error)
+               iomap_dio_set_error(dio, bio->bi_error);
+ 
+       if (atomic_dec_and_test(&dio->ref)) {
+               if (is_sync_kiocb(dio->iocb)) {
+                       struct task_struct *waiter = dio->submit.waiter;
+ 
+                       WRITE_ONCE(dio->submit.waiter, NULL);
+                       wake_up_process(waiter);
+               } else if (dio->flags & IOMAP_DIO_WRITE) {
+                       struct inode *inode = file_inode(dio->iocb->ki_filp);
+ 
+                       INIT_WORK(&dio->aio.work, iomap_dio_complete_work);
+                       queue_work(inode->i_sb->s_dio_done_wq, &dio->aio.work);
+               } else {
+                       iomap_dio_complete_work(&dio->aio.work);
+               }
+       }
+ 
+       if (should_dirty) {
+               bio_check_pages_dirty(bio);
+       } else {
+               struct bio_vec *bvec;
+               int i;
+ 
+               bio_for_each_segment_all(bvec, bio, i)
+                       put_page(bvec->bv_page);
+               bio_put(bio);
+       }
+ }
+ 
+ static blk_qc_t
+ iomap_dio_zero(struct iomap_dio *dio, struct iomap *iomap, loff_t pos,
+               unsigned len)
+ {
+       struct page *page = ZERO_PAGE(0);
+       struct bio *bio;
+ 
+       bio = bio_alloc(GFP_KERNEL, 1);
+       bio->bi_bdev = iomap->bdev;
+       bio->bi_iter.bi_sector =
+               iomap->blkno + ((pos - iomap->offset) >> 9);
+       bio->bi_private = dio;
+       bio->bi_end_io = iomap_dio_bio_end_io;
+ 
+       get_page(page);
+       if (bio_add_page(bio, page, len, 0) != len)
+               BUG();
- -                      bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_ODIRECT);
++      bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_SYNC | REQ_IDLE);
+ 
+       atomic_inc(&dio->ref);
+       return submit_bio(bio);
+ }
+ 
+ static loff_t
+ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
+               void *data, struct iomap *iomap)
+ {
+       struct iomap_dio *dio = data;
+       unsigned blkbits = blksize_bits(bdev_logical_block_size(iomap->bdev));
+       unsigned fs_block_size = (1 << inode->i_blkbits), pad;
+       unsigned align = iov_iter_alignment(dio->submit.iter);
+       struct iov_iter iter;
+       struct bio *bio;
+       bool need_zeroout = false;
+       int nr_pages, ret;
+ 
+       if ((pos | length | align) & ((1 << blkbits) - 1))
+               return -EINVAL;
+ 
+       switch (iomap->type) {
+       case IOMAP_HOLE:
+               if (WARN_ON_ONCE(dio->flags & IOMAP_DIO_WRITE))
+                       return -EIO;
+               /*FALLTHRU*/
+       case IOMAP_UNWRITTEN:
+               if (!(dio->flags & IOMAP_DIO_WRITE)) {
+                       iov_iter_zero(length, dio->submit.iter);
+                       dio->size += length;
+                       return length;
+               }
+               dio->flags |= IOMAP_DIO_UNWRITTEN;
+               need_zeroout = true;
+               break;
+       case IOMAP_MAPPED:
+               if (iomap->flags & IOMAP_F_SHARED)
+                       dio->flags |= IOMAP_DIO_COW;
+               if (iomap->flags & IOMAP_F_NEW)
+                       need_zeroout = true;
+               break;
+       default:
+               WARN_ON_ONCE(1);
+               return -EIO;
+       }
+ 
+       /*
+        * Operate on a partial iter trimmed to the extent we were called for.
+        * We'll update the iter in the dio once we're done with this extent.
+        */
+       iter = *dio->submit.iter;
+       iov_iter_truncate(&iter, length);
+ 
+       nr_pages = iov_iter_npages(&iter, BIO_MAX_PAGES);
+       if (nr_pages <= 0)
+               return nr_pages;
+ 
+       if (need_zeroout) {
+               /* zero out from the start of the block to the write offset */
+               pad = pos & (fs_block_size - 1);
+               if (pad)
+                       iomap_dio_zero(dio, iomap, pos - pad, pad);
+       }
+ 
+       do {
+               if (dio->error)
+                       return 0;
+ 
+               bio = bio_alloc(GFP_KERNEL, nr_pages);
+               bio->bi_bdev = iomap->bdev;
+               bio->bi_iter.bi_sector =
+                       iomap->blkno + ((pos - iomap->offset) >> 9);
+               bio->bi_private = dio;
+               bio->bi_end_io = iomap_dio_bio_end_io;
+ 
+               ret = bio_iov_iter_get_pages(bio, &iter);
+               if (unlikely(ret)) {
+                       bio_put(bio);
+                       return ret;
+               }
+ 
+               if (dio->flags & IOMAP_DIO_WRITE) {
- -                          !blk_poll(dio->submit.last_queue,
- -                                      dio->submit.cookie))
++                      bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_SYNC | REQ_IDLE);
+                       task_io_account_write(bio->bi_iter.bi_size);
+               } else {
+                       bio_set_op_attrs(bio, REQ_OP_READ, 0);
+                       if (dio->flags & IOMAP_DIO_DIRTY)
+                               bio_set_pages_dirty(bio);
+               }
+ 
+               dio->size += bio->bi_iter.bi_size;
+               pos += bio->bi_iter.bi_size;
+ 
+               nr_pages = iov_iter_npages(&iter, BIO_MAX_PAGES);
+ 
+               atomic_inc(&dio->ref);
+ 
+               dio->submit.last_queue = bdev_get_queue(iomap->bdev);
+               dio->submit.cookie = submit_bio(bio);
+       } while (nr_pages);
+ 
+       if (need_zeroout) {
+               /* zero out from the end of the write to the end of the block */
+               pad = pos & (fs_block_size - 1);
+               if (pad)
+                       iomap_dio_zero(dio, iomap, pos, fs_block_size - pad);
+       }
+ 
+       iov_iter_advance(dio->submit.iter, length);
+       return length;
+ }
+ 
+ ssize_t
+ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, struct iomap_ops *ops,
+               iomap_dio_end_io_t end_io)
+ {
+       struct address_space *mapping = iocb->ki_filp->f_mapping;
+       struct inode *inode = file_inode(iocb->ki_filp);
+       size_t count = iov_iter_count(iter);
+       loff_t pos = iocb->ki_pos, end = iocb->ki_pos + count - 1, ret = 0;
+       unsigned int flags = IOMAP_DIRECT;
+       struct blk_plug plug;
+       struct iomap_dio *dio;
+ 
+       lockdep_assert_held(&inode->i_rwsem);
+ 
+       if (!count)
+               return 0;
+ 
+       dio = kmalloc(sizeof(*dio), GFP_KERNEL);
+       if (!dio)
+               return -ENOMEM;
+ 
+       dio->iocb = iocb;
+       atomic_set(&dio->ref, 1);
+       dio->size = 0;
+       dio->i_size = i_size_read(inode);
+       dio->end_io = end_io;
+       dio->error = 0;
+       dio->flags = 0;
+ 
+       dio->submit.iter = iter;
+       if (is_sync_kiocb(iocb)) {
+               dio->submit.waiter = current;
+               dio->submit.cookie = BLK_QC_T_NONE;
+               dio->submit.last_queue = NULL;
+       }
+ 
+       if (iov_iter_rw(iter) == READ) {
+               if (pos >= dio->i_size)
+                       goto out_free_dio;
+ 
+               if (iter->type == ITER_IOVEC)
+                       dio->flags |= IOMAP_DIO_DIRTY;
+       } else {
+               dio->flags |= IOMAP_DIO_WRITE;
+               flags |= IOMAP_WRITE;
+       }
+ 
+       if (mapping->nrpages) {
+               ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, end);
+               if (ret)
+                       goto out_free_dio;
+ 
+               ret = invalidate_inode_pages2_range(mapping,
+                               iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT);
+               WARN_ON_ONCE(ret);
+               ret = 0;
+       }
+ 
+       inode_dio_begin(inode);
+ 
+       blk_start_plug(&plug);
+       do {
+               ret = iomap_apply(inode, pos, count, flags, ops, dio,
+                               iomap_dio_actor);
+               if (ret <= 0) {
+                       /* magic error code to fall back to buffered I/O */
+                       if (ret == -ENOTBLK)
+                               ret = 0;
+                       break;
+               }
+               pos += ret;
+       } while ((count = iov_iter_count(iter)) > 0);
+       blk_finish_plug(&plug);
+ 
+       if (ret < 0)
+               iomap_dio_set_error(dio, ret);
+ 
+       if (ret >= 0 && iov_iter_rw(iter) == WRITE && !is_sync_kiocb(iocb) &&
+                       !inode->i_sb->s_dio_done_wq) {
+               ret = sb_init_dio_done_wq(inode->i_sb);
+               if (ret < 0)
+                       iomap_dio_set_error(dio, ret);
+       }
+ 
+       if (!atomic_dec_and_test(&dio->ref)) {
+               if (!is_sync_kiocb(iocb))
+                       return -EIOCBQUEUED;
+ 
+               for (;;) {
+                       set_current_state(TASK_UNINTERRUPTIBLE);
+                       if (!READ_ONCE(dio->submit.waiter))
+                               break;
+ 
+                       if (!(iocb->ki_flags & IOCB_HIPRI) ||
+                           !dio->submit.last_queue ||
++                          !blk_mq_poll(dio->submit.last_queue,
++                                       dio->submit.cookie))
+                               io_schedule();
+               }
+               __set_current_state(TASK_RUNNING);
+       }
+ 
+       /*
+        * Try again to invalidate clean pages which might have been cached by
+        * non-direct readahead, or faulted in by get_user_pages() if the source
+        * of the write was an mmap'ed region of the file we're writing.  Either
+        * one is a pretty crazy thing to do, so we don't support it 100%.  If
+        * this invalidation fails, tough, the write still worked...
+        */
+       if (iov_iter_rw(iter) == WRITE && mapping->nrpages) {
+               ret = invalidate_inode_pages2_range(mapping,
+                               iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT);
+               WARN_ON_ONCE(ret);
+       }
+ 
+       return iomap_dio_complete(dio);
+ 
+ out_free_dio:
+       kfree(dio);
+       return ret;
+ }
+ EXPORT_SYMBOL_GPL(iomap_dio_rw);
diff --combined fs/xfs/xfs_aops.c

index 38755ca96c7a6d884c0c13421ab1d0b08fbc1f4b,265000a093277ed2c681d40170e316aa1381884e..0f56fcd3a5d51517b93c391bb3d97a58f205a544
--- 1/fs/xfs/xfs_aops.c
--- 2/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@@ -37,11 -37,6 +37,6 @@@
   #include <linux/pagevec.h>
   #include <linux/writeback.h>
   
- /* flags for direct write completions */
- #define XFS_DIO_FLAG_UNWRITTEN        (1 << 0)
- #define XFS_DIO_FLAG_APPEND   (1 << 1)
- #define XFS_DIO_FLAG_COW      (1 << 2)
- 
   /*
    * structure owned by writepages passed to individual writepage calls
    */
@@@ -495,8 -490,8 +490,8 @@@ xfs_submit_ioend
   
         ioend->io_bio->bi_private = ioend;
         ioend->io_bio->bi_end_io = xfs_end_bio;
- -      bio_set_op_attrs(ioend->io_bio, REQ_OP_WRITE,
- -                       (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : 0);
+ +      ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);
+ +
         /*
          * If we are failing the IO now, just mark the ioend with an
          * error and finish it. This will run IO completion immediately
@@@ -567,7 -562,8 +562,7 @@@ xfs_chain_bio
   
         bio_chain(ioend->io_bio, new);
         bio_get(ioend->io_bio);         /* for xfs_destroy_ioend */
- -      bio_set_op_attrs(ioend->io_bio, REQ_OP_WRITE,
- -                        (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : 0);
+ +      ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);
         submit_bio(ioend->io_bio);
         ioend->io_bio = new;
   }
@@@ -776,7 -772,7 +771,7 @@@ xfs_map_cow
   {
         struct xfs_inode        *ip = XFS_I(inode);
         struct xfs_bmbt_irec    imap;
-       bool                    is_cow = false, need_alloc = false;
+       bool                    is_cow = false;
         int                     error;
   
         /*
@@@ -794,7 -790,7 +789,7 @@@
          * Else we need to check if there is a COW mapping at this offset.
          */
         xfs_ilock(ip, XFS_ILOCK_SHARED);
-       is_cow = xfs_reflink_find_cow_mapping(ip, offset, &imap, &need_alloc);
+       is_cow = xfs_reflink_find_cow_mapping(ip, offset, &imap);
         xfs_iunlock(ip, XFS_ILOCK_SHARED);
   
         if (!is_cow)
@@@ -804,7 -800,7 +799,7 @@@
          * And if the COW mapping has a delayed extent here we need to
          * allocate real space for it now.
          */
-       if (need_alloc) {
+       if (isnullstartblock(imap.br_startblock)) {
                 error = xfs_iomap_write_allocate(ip, XFS_COW_FORK, offset,
                                 &imap);
                 if (error)
@@@ -1174,45 -1170,6 +1169,6 @@@ xfs_vm_releasepage
         return try_to_free_buffers(page);
   }
   
- /*
-  * When we map a DIO buffer, we may need to pass flags to
-  * xfs_end_io_direct_write to tell it what kind of write IO we are doing.
-  *
-  * Note that for DIO, an IO to the highest supported file block offset (i.e.
-  * 2^63 - 1FSB bytes) will result in the offset + count overflowing a signed 64
-  * bit variable. Hence if we see this overflow, we have to assume that the IO is
-  * extending the file size. We won't know for sure until IO completion is run
-  * and the actual max write offset is communicated to the IO completion
-  * routine.
-  */
- static void
- xfs_map_direct(
-       struct inode            *inode,
-       struct buffer_head      *bh_result,
-       struct xfs_bmbt_irec    *imap,
-       xfs_off_t               offset,
-       bool                    is_cow)
- {
-       uintptr_t               *flags = (uintptr_t *)&bh_result->b_private;
-       xfs_off_t               size = bh_result->b_size;
- 
-       trace_xfs_get_blocks_map_direct(XFS_I(inode), offset, size,
-               ISUNWRITTEN(imap) ? XFS_IO_UNWRITTEN : is_cow ? XFS_IO_COW :
-               XFS_IO_OVERWRITE, imap);
- 
-       if (ISUNWRITTEN(imap)) {
-               *flags |= XFS_DIO_FLAG_UNWRITTEN;
-               set_buffer_defer_completion(bh_result);
-       } else if (is_cow) {
-               *flags |= XFS_DIO_FLAG_COW;
-               set_buffer_defer_completion(bh_result);
-       }
-       if (offset + size > i_size_read(inode) || offset + size < 0) {
-               *flags |= XFS_DIO_FLAG_APPEND;
-               set_buffer_defer_completion(bh_result);
-       }
- }
- 
   /*
    * If this is O_DIRECT or the mpage code calling tell them how large the mapping
    * is, so that we can avoid repeated get_blocks calls.
@@@ -1253,51 -1210,12 +1209,12 @@@ xfs_map_trim_size
         bh_result->b_size = mapping_size;
   }
   
- /* Bounce unaligned directio writes to the page cache. */
   static int
- xfs_bounce_unaligned_dio_write(
-       struct xfs_inode        *ip,
-       xfs_fileoff_t           offset_fsb,
-       struct xfs_bmbt_irec    *imap)
- {
-       struct xfs_bmbt_irec    irec;
-       xfs_fileoff_t           delta;
-       bool                    shared;
-       bool                    x;
-       int                     error;
- 
-       irec = *imap;
-       if (offset_fsb > irec.br_startoff) {
-               delta = offset_fsb - irec.br_startoff;
-               irec.br_blockcount -= delta;
-               irec.br_startblock += delta;
-               irec.br_startoff = offset_fsb;
-       }
-       error = xfs_reflink_trim_around_shared(ip, &irec, &shared, &x);
-       if (error)
-               return error;
- 
-       /*
-        * We're here because we're trying to do a directio write to a
-        * region that isn't aligned to a filesystem block.  If any part
-        * of the extent is shared, fall back to buffered mode to handle
-        * the RMW.  This is done by returning -EREMCHG ("remote addr
-        * changed"), which is caught further up the call stack.
-        */
-       if (shared) {
-               trace_xfs_reflink_bounce_dio_write(ip, imap);
-               return -EREMCHG;
-       }
-       return 0;
- }
- 
- STATIC int
- __xfs_get_blocks(
+ xfs_get_blocks(
         struct inode            *inode,
         sector_t                iblock,
         struct buffer_head      *bh_result,
-       int                     create,
-       bool                    direct)
+       int                     create)
   {
         struct xfs_inode        *ip = XFS_I(inode);
         struct xfs_mount        *mp = ip->i_mount;
@@@ -1308,11 -1226,8 +1225,8 @@@
         int                     nimaps = 1;
         xfs_off_t               offset;
         ssize_t                 size;
-       int                     new = 0;
-       bool                    is_cow = false;
-       bool                    need_alloc = false;
   
-       BUG_ON(create && !direct);
+       BUG_ON(create);
   
         if (XFS_FORCED_SHUTDOWN(mp))
                 return -EIO;
@@@ -1321,7 -1236,7 +1235,7 @@@
         ASSERT(bh_result->b_size >= (1 << inode->i_blkbits));
         size = bh_result->b_size;
   
-       if (!create && offset >= i_size_read(inode))
+       if (offset >= i_size_read(inode))
                 return 0;
   
         /*
@@@ -1336,52 -1251,12 +1250,12 @@@
         end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size);
         offset_fsb = XFS_B_TO_FSBT(mp, offset);
   
-       if (create && direct && xfs_is_reflink_inode(ip))
-               is_cow = xfs_reflink_find_cow_mapping(ip, offset, &imap,
-                                       &need_alloc);
-       if (!is_cow) {
-               error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb,
-                                       &imap, &nimaps, XFS_BMAPI_ENTIRE);
-               /*
-                * Truncate an overwrite extent if there's a pending CoW
-                * reservation before the end of this extent.  This
-                * forces us to come back to get_blocks to take care of
-                * the CoW.
-                */
-               if (create && direct && nimaps &&
-                   imap.br_startblock != HOLESTARTBLOCK &&
-                   imap.br_startblock != DELAYSTARTBLOCK &&
-                   !ISUNWRITTEN(&imap))
-                       xfs_reflink_trim_irec_to_next_cow(ip, offset_fsb,
-                                       &imap);
-       }
-       ASSERT(!need_alloc);
+       error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb,
+                               &imap, &nimaps, XFS_BMAPI_ENTIRE);
         if (error)
                 goto out_unlock;
   
-       /* for DAX, we convert unwritten extents directly */
-       if (create &&
-           (!nimaps ||
-            (imap.br_startblock == HOLESTARTBLOCK ||
-             imap.br_startblock == DELAYSTARTBLOCK) ||
-            (IS_DAX(inode) && ISUNWRITTEN(&imap)))) {
-               /*
-                * xfs_iomap_write_direct() expects the shared lock. It
-                * is unlocked on return.
-                */
-               if (lockmode == XFS_ILOCK_EXCL)
-                       xfs_ilock_demote(ip, lockmode);
- 
-               error = xfs_iomap_write_direct(ip, offset, size,
-                                              &imap, nimaps);
-               if (error)
-                       return error;
-               new = 1;
- 
-               trace_xfs_get_blocks_alloc(ip, offset, size,
-                               ISUNWRITTEN(&imap) ? XFS_IO_UNWRITTEN
-                                                  : XFS_IO_DELALLOC, &imap);
-       } else if (nimaps) {
+       if (nimaps) {
                 trace_xfs_get_blocks_found(ip, offset, size,
                                 ISUNWRITTEN(&imap) ? XFS_IO_UNWRITTEN
                                                    : XFS_IO_OVERWRITE, &imap);
@@@ -1391,12 -1266,6 +1265,6 @@@
                 goto out_unlock;
         }
   
-       if (IS_DAX(inode) && create) {
-               ASSERT(!ISUNWRITTEN(&imap));
-               /* zeroing is not needed at a higher layer */
-               new = 0;
-       }
- 
         /* trim mapping down to size requested */
         xfs_map_trim_size(inode, iblock, bh_result, &imap, offset, size);
   
@@@ -1406,45 -1275,14 +1274,14 @@@
          */
         if (imap.br_startblock != HOLESTARTBLOCK &&
             imap.br_startblock != DELAYSTARTBLOCK &&
-           (create || !ISUNWRITTEN(&imap))) {
-               if (create && direct && !is_cow) {
-                       error = xfs_bounce_unaligned_dio_write(ip, offset_fsb,
-                                       &imap);
-                       if (error)
-                               return error;
-               }
- 
+           !ISUNWRITTEN(&imap))
                 xfs_map_buffer(inode, bh_result, &imap, offset);
-               if (ISUNWRITTEN(&imap))
-                       set_buffer_unwritten(bh_result);
-               /* direct IO needs special help */
-               if (create)
-                       xfs_map_direct(inode, bh_result, &imap, offset, is_cow);
-       }
   
         /*
          * If this is a realtime file, data may be on a different device.
          * to that pointed to from the buffer_head b_bdev currently.
          */
         bh_result->b_bdev = xfs_find_bdev_for_inode(inode);
- 
-       /*
-        * If we previously allocated a block out beyond eof and we are now
-        * coming back to use it then we will need to flag it as new even if it
-        * has a disk address.
-        *
-        * With sub-block writes into unwritten extents we also need to mark
-        * the buffer as new so that the unwritten parts of the buffer gets
-        * correctly zeroed.
-        */
-       if (create &&
-           ((!buffer_mapped(bh_result) && !buffer_uptodate(bh_result)) ||
-            (offset >= i_size_read(inode)) ||
-            (new || ISUNWRITTEN(&imap))))
-               set_buffer_new(bh_result);
- 
-       BUG_ON(direct && imap.br_startblock == DELAYSTARTBLOCK);
- 
         return 0;
   
   out_unlock:
@@@ -1452,100 -1290,6 +1289,6 @@@
         return error;
   }
   
- int
- xfs_get_blocks(
-       struct inode            *inode,
-       sector_t                iblock,
-       struct buffer_head      *bh_result,
-       int                     create)
- {
-       return __xfs_get_blocks(inode, iblock, bh_result, create, false);
- }
- 
- int
- xfs_get_blocks_direct(
-       struct inode            *inode,
-       sector_t                iblock,
-       struct buffer_head      *bh_result,
-       int                     create)
- {
-       return __xfs_get_blocks(inode, iblock, bh_result, create, true);
- }
- 
- /*
-  * Complete a direct I/O write request.
-  *
-  * xfs_map_direct passes us some flags in the private data to tell us what to
-  * do.  If no flags are set, then the write IO is an overwrite wholly within
-  * the existing allocated file size and so there is nothing for us to do.
-  *
-  * Note that in this case the completion can be called in interrupt context,
-  * whereas if we have flags set we will always be called in task context
-  * (i.e. from a workqueue).
-  */
- int
- xfs_end_io_direct_write(
-       struct kiocb            *iocb,
-       loff_t                  offset,
-       ssize_t                 size,
-       void                    *private)
- {
-       struct inode            *inode = file_inode(iocb->ki_filp);
-       struct xfs_inode        *ip = XFS_I(inode);
-       uintptr_t               flags = (uintptr_t)private;
-       int                     error = 0;
- 
-       trace_xfs_end_io_direct_write(ip, offset, size);
- 
-       if (XFS_FORCED_SHUTDOWN(ip->i_mount))
-               return -EIO;
- 
-       if (size <= 0)
-               return size;
- 
-       /*
-        * The flags tell us whether we are doing unwritten extent conversions
-        * or an append transaction that updates the on-disk file size. These
-        * cases are the only cases where we should *potentially* be needing
-        * to update the VFS inode size.
-        */
-       if (flags == 0) {
-               ASSERT(offset + size <= i_size_read(inode));
-               return 0;
-       }
- 
-       /*
-        * We need to update the in-core inode size here so that we don't end up
-        * with the on-disk inode size being outside the in-core inode size. We
-        * have no other method of updating EOF for AIO, so always do it here
-        * if necessary.
-        *
-        * We need to lock the test/set EOF update as we can be racing with
-        * other IO completions here to update the EOF. Failing to serialise
-        * here can result in EOF moving backwards and Bad Things Happen when
-        * that occurs.
-        */
-       spin_lock(&ip->i_flags_lock);
-       if (offset + size > i_size_read(inode))
-               i_size_write(inode, offset + size);
-       spin_unlock(&ip->i_flags_lock);
- 
-       if (flags & XFS_DIO_FLAG_COW)
-               error = xfs_reflink_end_cow(ip, offset, size);
-       if (flags & XFS_DIO_FLAG_UNWRITTEN) {
-               trace_xfs_end_io_direct_write_unwritten(ip, offset, size);
- 
-               error = xfs_iomap_write_unwritten(ip, offset, size);
-       }
-       if (flags & XFS_DIO_FLAG_APPEND) {
-               trace_xfs_end_io_direct_write_append(ip, offset, size);
- 
-               error = xfs_setfilesize(ip, offset, size);
-       }
- 
-       return error;
- }
- 
   STATIC ssize_t
   xfs_vm_direct_IO(
         struct kiocb            *iocb,
@@@ -1566,7 -1310,6 +1309,6 @@@ xfs_vm_bmap
         struct xfs_inode        *ip = XFS_I(inode);
   
         trace_xfs_vm_bmap(XFS_I(inode));
-       xfs_ilock(ip, XFS_IOLOCK_SHARED);
   
         /*
          * The swap code (ab-)uses ->bmap to get a block mapping and then
@@@ -1574,12 -1317,10 +1316,10 @@@
          * that on reflinks inodes, so we have to skip out here.  And yes,
          * 0 is the magic code for a bmap error..
          */
-       if (xfs_is_reflink_inode(ip)) {
-               xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+       if (xfs_is_reflink_inode(ip))
                 return 0;
-       }
+ 
         filemap_write_and_wait(mapping);
-       xfs_iunlock(ip, XFS_IOLOCK_SHARED);
         return generic_block_bmap(mapping, block, xfs_get_blocks);
   }
   
diff --combined fs/xfs/xfs_buf.c

index 33c435f3316c6fe895bdd0a7e3794e8fb5268932,509dd6cc1331faf2d4dd46c9ba4a7baff3583d53..7f0a01f7b592d20932649d1f8a705a836d86ca02
--- 1/fs/xfs/xfs_buf.c
--- 2/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@@ -219,7 -219,6 +219,6 @@@ _xfs_buf_alloc
         init_completion(&bp->b_iowait);
         INIT_LIST_HEAD(&bp->b_lru);
         INIT_LIST_HEAD(&bp->b_list);
-       RB_CLEAR_NODE(&bp->b_rbnode);
         sema_init(&bp->b_sema, 0); /* held, no waiters */
         spin_lock_init(&bp->b_lock);
         XB_SET_OWNER(bp);
@@@ -473,6 -472,62 +472,62 @@@ _xfs_buf_map_pages
   /*
    *    Finding and Reading Buffers
    */
+ static int
+ _xfs_buf_obj_cmp(
+       struct rhashtable_compare_arg   *arg,
+       const void                      *obj)
+ {
+       const struct xfs_buf_map        *map = arg->key;
+       const struct xfs_buf            *bp = obj;
+ 
+       /*
+        * The key hashing in the lookup path depends on the key being the
+        * first element of the compare_arg, make sure to assert this.
+        */
+       BUILD_BUG_ON(offsetof(struct xfs_buf_map, bm_bn) != 0);
+ 
+       if (bp->b_bn != map->bm_bn)
+               return 1;
+ 
+       if (unlikely(bp->b_length != map->bm_len)) {
+               /*
+                * found a block number match. If the range doesn't
+                * match, the only way this is allowed is if the buffer
+                * in the cache is stale and the transaction that made
+                * it stale has not yet committed. i.e. we are
+                * reallocating a busy extent. Skip this buffer and
+                * continue searching for an exact match.
+                */
+               ASSERT(bp->b_flags & XBF_STALE);
+               return 1;
+       }
+       return 0;
+ }
+ 
+ static const struct rhashtable_params xfs_buf_hash_params = {
+       .min_size               = 32,   /* empty AGs have minimal footprint */
+       .nelem_hint             = 16,
+       .key_len                = sizeof(xfs_daddr_t),
+       .key_offset             = offsetof(struct xfs_buf, b_bn),
+       .head_offset            = offsetof(struct xfs_buf, b_rhash_head),
+       .automatic_shrinking    = true,
+       .obj_cmpfn              = _xfs_buf_obj_cmp,
+ };
+ 
+ int
+ xfs_buf_hash_init(
+       struct xfs_perag        *pag)
+ {
+       spin_lock_init(&pag->pag_buf_lock);
+       return rhashtable_init(&pag->pag_buf_hash, &xfs_buf_hash_params);
+ }
+ 
+ void
+ xfs_buf_hash_destroy(
+       struct xfs_perag        *pag)
+ {
+       rhashtable_destroy(&pag->pag_buf_hash);
+ }
   
   /*
    *    Look up, and creates if absent, a lockable buffer for
@@@ -488,27 -543,24 +543,24 @@@ _xfs_buf_find
         xfs_buf_t               *new_bp)
   {
         struct xfs_perag        *pag;
-       struct rb_node          **rbp;
-       struct rb_node          *parent;
         xfs_buf_t               *bp;
-       xfs_daddr_t             blkno = map[0].bm_bn;
+       struct xfs_buf_map      cmap = { .bm_bn = map[0].bm_bn };
         xfs_daddr_t             eofs;
-       int                     numblks = 0;
         int                     i;
   
         for (i = 0; i < nmaps; i++)
-               numblks += map[i].bm_len;
+               cmap.bm_len += map[i].bm_len;
   
         /* Check for IOs smaller than the sector size / not sector aligned */
-       ASSERT(!(BBTOB(numblks) < btp->bt_meta_sectorsize));
-       ASSERT(!(BBTOB(blkno) & (xfs_off_t)btp->bt_meta_sectormask));
+       ASSERT(!(BBTOB(cmap.bm_len) < btp->bt_meta_sectorsize));
+       ASSERT(!(BBTOB(cmap.bm_bn) & (xfs_off_t)btp->bt_meta_sectormask));
   
         /*
          * Corrupted block numbers can get through to here, unfortunately, so we
          * have to check that the buffer falls within the filesystem bounds.
          */
         eofs = XFS_FSB_TO_BB(btp->bt_mount, btp->bt_mount->m_sb.sb_dblocks);
-       if (blkno < 0 || blkno >= eofs) {
+       if (cmap.bm_bn < 0 || cmap.bm_bn >= eofs) {
                 /*
                  * XXX (dgc): we should really be returning -EFSCORRUPTED here,
                  * but none of the higher level infrastructure supports
@@@ -516,53 -568,29 +568,29 @@@
                  */
                 xfs_alert(btp->bt_mount,
                           "%s: Block out of range: block 0x%llx, EOFS 0x%llx ",
-                         __func__, blkno, eofs);
+                         __func__, cmap.bm_bn, eofs);
                 WARN_ON(1);
                 return NULL;
         }
   
-       /* get tree root */
         pag = xfs_perag_get(btp->bt_mount,
-                               xfs_daddr_to_agno(btp->bt_mount, blkno));
+                           xfs_daddr_to_agno(btp->bt_mount, cmap.bm_bn));
   
-       /* walk tree */
         spin_lock(&pag->pag_buf_lock);
-       rbp = &pag->pag_buf_tree.rb_node;
-       parent = NULL;
-       bp = NULL;
-       while (*rbp) {
-               parent = *rbp;
-               bp = rb_entry(parent, struct xfs_buf, b_rbnode);
- 
-               if (blkno < bp->b_bn)
-                       rbp = &(*rbp)->rb_left;
-               else if (blkno > bp->b_bn)
-                       rbp = &(*rbp)->rb_right;
-               else {
-                       /*
-                        * found a block number match. If the range doesn't
-                        * match, the only way this is allowed is if the buffer
-                        * in the cache is stale and the transaction that made
-                        * it stale has not yet committed. i.e. we are
-                        * reallocating a busy extent. Skip this buffer and
-                        * continue searching to the right for an exact match.
-                        */
-                       if (bp->b_length != numblks) {
-                               ASSERT(bp->b_flags & XBF_STALE);
-                               rbp = &(*rbp)->rb_right;
-                               continue;
-                       }
-                       atomic_inc(&bp->b_hold);
-                       goto found;
-               }
+       bp = rhashtable_lookup_fast(&pag->pag_buf_hash, &cmap,
+                                   xfs_buf_hash_params);
+       if (bp) {
+               atomic_inc(&bp->b_hold);
+               goto found;
         }
   
         /* No match found */
         if (new_bp) {
-               rb_link_node(&new_bp->b_rbnode, parent, rbp);
-               rb_insert_color(&new_bp->b_rbnode, &pag->pag_buf_tree);
                 /* the buffer keeps the perag reference until it is freed */
                 new_bp->b_pag = pag;
+               rhashtable_insert_fast(&pag->pag_buf_hash,
+                                      &new_bp->b_rhash_head,
+                                      xfs_buf_hash_params);
                 spin_unlock(&pag->pag_buf_lock);
         } else {
                 XFS_STATS_INC(btp->bt_mount, xb_miss_locked);
@@@ -930,7 -958,6 +958,6 @@@ xfs_buf_rele
   
         if (!pag) {
                 ASSERT(list_empty(&bp->b_lru));
-               ASSERT(RB_EMPTY_NODE(&bp->b_rbnode));
                 if (atomic_dec_and_test(&bp->b_hold)) {
                         xfs_buf_ioacct_dec(bp);
                         xfs_buf_free(bp);
@@@ -938,8 -965,6 +965,6 @@@
                 return;
         }
   
-       ASSERT(!RB_EMPTY_NODE(&bp->b_rbnode));
- 
         ASSERT(atomic_read(&bp->b_hold) > 0);
   
         release = atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock);
@@@ -983,7 -1008,8 +1008,8 @@@
                 }
   
                 ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
-               rb_erase(&bp->b_rbnode, &pag->pag_buf_tree);
+               rhashtable_remove_fast(&pag->pag_buf_hash, &bp->b_rhash_head,
+                                      xfs_buf_hash_params);
                 spin_unlock(&pag->pag_buf_lock);
                 xfs_perag_put(pag);
                 freebuf = true;
@@@ -1304,7 -1330,7 +1330,7 @@@ _xfs_buf_ioapply
         if (bp->b_flags & XBF_WRITE) {
                 op = REQ_OP_WRITE;
                 if (bp->b_flags & XBF_SYNCIO)
- -                      op_flags = WRITE_SYNC;
+ +                      op_flags = REQ_SYNC;
                 if (bp->b_flags & XBF_FUA)
                         op_flags |= REQ_FUA;
                 if (bp->b_flags & XBF_FLUSH)
@@@ -1711,8 -1737,7 +1737,7 @@@ xfs_free_buftarg
         percpu_counter_destroy(&btp->bt_io_count);
         list_lru_destroy(&btp->bt_lru);
   
-       if (mp->m_flags & XFS_MOUNT_BARRIER)
-               xfs_blkdev_issue_flush(btp);
+       xfs_blkdev_issue_flush(btp);
   
         kmem_free(btp);
   }
diff --combined kernel/locking/lockdep.c

index 7bd265f6b0984a612f20d57aeb3c59647cc383d8,cff580a6edf965eda424a50709e1bc809d674975..7c38f8f3d97b7b172dcd61f04df382548cf7ac15
--- 1/kernel/locking/lockdep.c
--- 2/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@@ -506,13 -506,13 +506,13 @@@ static void __print_lock_name(struct lo
         name = class->name;
         if (!name) {
                 name = __get_key_name(class->key, str);
- -              printk("%s", name);
+ +              printk(KERN_CONT "%s", name);
         } else {
- -              printk("%s", name);
+ +              printk(KERN_CONT "%s", name);
                 if (class->name_version > 1)
- -                      printk("#%d", class->name_version);
+ +                      printk(KERN_CONT "#%d", class->name_version);
                 if (class->subclass)
- -                      printk("/%d", class->subclass);
+ +                      printk(KERN_CONT "/%d", class->subclass);
         }
   }
   
@@@ -522,9 -522,9 +522,9 @@@ static void print_lock_name(struct lock
   
         get_usage_chars(class, usage);
   
- -      printk(" (");
+ +      printk(KERN_CONT " (");
         __print_lock_name(class);
- -      printk("){%s}", usage);
+ +      printk(KERN_CONT "){%s}", usage);
   }
   
   static void print_lockdep_cache(struct lockdep_map *lock)
@@@ -536,7 -536,7 +536,7 @@@
         if (!name)
                 name = __get_key_name(lock->key->subkeys, str);
   
- -      printk("%s", name);
+ +      printk(KERN_CONT "%s", name);
   }
   
   static void print_lock(struct held_lock *hlock)
@@@ -551,13 -551,13 +551,13 @@@
         barrier();
   
         if (!class_idx || (class_idx - 1) >= MAX_LOCKDEP_KEYS) {
- -              printk("<RELEASED>\n");
+ +              printk(KERN_CONT "<RELEASED>\n");
                 return;
         }
   
         print_lock_name(lock_classes + class_idx - 1);
- -      printk(", at: ");
- -      print_ip_sym(hlock->acquire_ip);
+ +      printk(KERN_CONT ", at: [<%p>] %pS\n",
+ +              (void *)hlock->acquire_ip, (void *)hlock->acquire_ip);
   }
   
   static void lockdep_print_held_locks(struct task_struct *curr)
@@@ -792,8 -792,8 +792,8 @@@ register_lock_class(struct lockdep_map 
   
                 printk("\nnew class %p: %s", class->key, class->name);
                 if (class->name_version > 1)
- -                      printk("#%d", class->name_version);
- -              printk("\n");
+ +                      printk(KERN_CONT "#%d", class->name_version);
+ +              printk(KERN_CONT "\n");
                 dump_stack();
   
                 if (!graph_lock()) {
@@@ -840,9 -840,9 +840,9 @@@ static struct lock_list *alloc_list_ent
   /*
    * Add a new dependency to the head of the list:
    */
- -static int add_lock_to_list(struct lock_class *class, struct lock_class *this,
- -                          struct list_head *head, unsigned long ip,
- -                          int distance, struct stack_trace *trace)
+ +static int add_lock_to_list(struct lock_class *this, struct list_head *head,
+ +                          unsigned long ip, int distance,
+ +                          struct stack_trace *trace)
   {
         struct lock_list *entry;
         /*
@@@ -1071,7 -1071,7 +1071,7 @@@ print_circular_bug_entry(struct lock_li
                 return 0;
         printk("\n-> #%u", depth);
         print_lock_name(target->class);
- -      printk(":\n");
+ +      printk(KERN_CONT ":\n");
         print_stack_trace(&target->trace, 6);
   
         return 0;
@@@ -1102,11 -1102,11 +1102,11 @@@ print_circular_lock_scenario(struct hel
         if (parent != source) {
                 printk("Chain exists of:\n  ");
                 __print_lock_name(source);
- -              printk(" --> ");
+ +              printk(KERN_CONT " --> ");
                 __print_lock_name(parent);
- -              printk(" --> ");
+ +              printk(KERN_CONT " --> ");
                 __print_lock_name(target);
- -              printk("\n\n");
+ +              printk(KERN_CONT "\n\n");
         }
   
         printk(" Possible unsafe locking scenario:\n\n");
@@@ -1114,16 -1114,16 +1114,16 @@@
         printk("       ----                    ----\n");
         printk("  lock(");
         __print_lock_name(target);
- -      printk(");\n");
+ +      printk(KERN_CONT ");\n");
         printk("                               lock(");
         __print_lock_name(parent);
- -      printk(");\n");
+ +      printk(KERN_CONT ");\n");
         printk("                               lock(");
         __print_lock_name(target);
- -      printk(");\n");
+ +      printk(KERN_CONT ");\n");
         printk("  lock(");
         __print_lock_name(source);
- -      printk(");\n");
+ +      printk(KERN_CONT ");\n");
         printk("\n *** DEADLOCK ***\n\n");
   }
   
@@@ -1359,22 -1359,22 +1359,22 @@@ static void print_lock_class_header(str
   
         printk("%*s->", depth, "");
         print_lock_name(class);
- -      printk(" ops: %lu", class->ops);
- -      printk(" {\n");
+ +      printk(KERN_CONT " ops: %lu", class->ops);
+ +      printk(KERN_CONT " {\n");
   
         for (bit = 0; bit < LOCK_USAGE_STATES; bit++) {
                 if (class->usage_mask & (1 << bit)) {
                         int len = depth;
   
                         len += printk("%*s   %s", depth, "", usage_str[bit]);
- -                      len += printk(" at:\n");
+ +                      len += printk(KERN_CONT " at:\n");
                         print_stack_trace(class->usage_traces + bit, len);
                 }
         }
         printk("%*s }\n", depth, "");
   
- -      printk("%*s ... key      at: ",depth,"");
- -      print_ip_sym((unsigned long)class->key);
+ +      printk("%*s ... key      at: [<%p>] %pS\n",
+ +              depth, "", class->key, class->key);
   }
   
   /*
@@@ -1437,11 -1437,11 +1437,11 @@@ print_irq_lock_scenario(struct lock_lis
         if (middle_class != unsafe_class) {
                 printk("Chain exists of:\n  ");
                 __print_lock_name(safe_class);
- -              printk(" --> ");
+ +              printk(KERN_CONT " --> ");
                 __print_lock_name(middle_class);
- -              printk(" --> ");
+ +              printk(KERN_CONT " --> ");
                 __print_lock_name(unsafe_class);
- -              printk("\n\n");
+ +              printk(KERN_CONT "\n\n");
         }
   
         printk(" Possible interrupt unsafe locking scenario:\n\n");
@@@ -1449,18 -1449,18 +1449,18 @@@
         printk("       ----                    ----\n");
         printk("  lock(");
         __print_lock_name(unsafe_class);
- -      printk(");\n");
+ +      printk(KERN_CONT ");\n");
         printk("                               local_irq_disable();\n");
         printk("                               lock(");
         __print_lock_name(safe_class);
- -      printk(");\n");
+ +      printk(KERN_CONT ");\n");
         printk("                               lock(");
         __print_lock_name(middle_class);
- -      printk(");\n");
+ +      printk(KERN_CONT ");\n");
         printk("  <Interrupt>\n");
         printk("    lock(");
         __print_lock_name(safe_class);
- -      printk(");\n");
+ +      printk(KERN_CONT ");\n");
         printk("\n *** DEADLOCK ***\n\n");
   }
   
@@@ -1497,9 -1497,9 +1497,9 @@@ print_bad_irq_dependency(struct task_st
         print_lock(prev);
         printk("which would create a new lock dependency:\n");
         print_lock_name(hlock_class(prev));
- -      printk(" ->");
+ +      printk(KERN_CONT " ->");
         print_lock_name(hlock_class(next));
- -      printk("\n");
+ +      printk(KERN_CONT "\n");
   
         printk("\nbut this new dependency connects a %s-irq-safe lock:\n",
                 irqclass);
@@@ -1521,7 -1521,8 +1521,7 @@@
   
         lockdep_print_held_locks(curr);
   
- -      printk("\nthe dependencies between %s-irq-safe lock", irqclass);
- -      printk(" and the holding lock:\n");
+ +      printk("\nthe dependencies between %s-irq-safe lock and the holding lock:\n", irqclass);
         if (!save_trace(&prev_root->trace))
                 return 0;
         print_shortest_lock_dependencies(backwards_entry, prev_root);
@@@ -1693,10 -1694,10 +1693,10 @@@ print_deadlock_scenario(struct held_loc
         printk("       ----\n");
         printk("  lock(");
         __print_lock_name(prev);
- -      printk(");\n");
+ +      printk(KERN_CONT ");\n");
         printk("  lock(");
         __print_lock_name(next);
- -      printk(");\n");
+ +      printk(KERN_CONT ");\n");
         printk("\n *** DEADLOCK ***\n\n");
         printk(" May be due to missing lock nesting notation\n\n");
   }
@@@ -1868,14 -1869,14 +1868,14 @@@ check_prev_add(struct task_struct *curr
          * Ok, all validations passed, add the new lock
          * to the previous lock's dependency list:
          */
- -      ret = add_lock_to_list(hlock_class(prev), hlock_class(next),
+ +      ret = add_lock_to_list(hlock_class(next),
                                &hlock_class(prev)->locks_after,
                                next->acquire_ip, distance, &trace);
   
         if (!ret)
                 return 0;
   
- -      ret = add_lock_to_list(hlock_class(next), hlock_class(prev),
+ +      ret = add_lock_to_list(hlock_class(prev),
                                &hlock_class(next)->locks_before,
                                next->acquire_ip, distance, &trace);
         if (!ret)
@@@ -1890,9 -1891,9 +1890,9 @@@
                 graph_unlock();
                 printk("\n new dependency: ");
                 print_lock_name(hlock_class(prev));
- -              printk(" => ");
+ +              printk(KERN_CONT " => ");
                 print_lock_name(hlock_class(next));
- -              printk("\n");
+ +              printk(KERN_CONT "\n");
                 dump_stack();
                 return graph_lock();
         }
@@@ -2342,11 -2343,11 +2342,11 @@@ print_usage_bug_scenario(struct held_lo
         printk("       ----\n");
         printk("  lock(");
         __print_lock_name(class);
- -      printk(");\n");
+ +      printk(KERN_CONT ");\n");
         printk("  <Interrupt>\n");
         printk("    lock(");
         __print_lock_name(class);
- -      printk(");\n");
+ +      printk(KERN_CONT ");\n");
         printk("\n *** DEADLOCK ***\n\n");
   }
   
@@@ -2521,18 -2522,14 +2521,18 @@@ check_usage_backwards(struct task_struc
   void print_irqtrace_events(struct task_struct *curr)
   {
         printk("irq event stamp: %u\n", curr->irq_events);
- -      printk("hardirqs last  enabled at (%u): ", curr->hardirq_enable_event);
- -      print_ip_sym(curr->hardirq_enable_ip);
- -      printk("hardirqs last disabled at (%u): ", curr->hardirq_disable_event);
- -      print_ip_sym(curr->hardirq_disable_ip);
- -      printk("softirqs last  enabled at (%u): ", curr->softirq_enable_event);
- -      print_ip_sym(curr->softirq_enable_ip);
- -      printk("softirqs last disabled at (%u): ", curr->softirq_disable_event);
- -      print_ip_sym(curr->softirq_disable_ip);
+ +      printk("hardirqs last  enabled at (%u): [<%p>] %pS\n",
+ +              curr->hardirq_enable_event, (void *)curr->hardirq_enable_ip,
+ +              (void *)curr->hardirq_enable_ip);
+ +      printk("hardirqs last disabled at (%u): [<%p>] %pS\n",
+ +              curr->hardirq_disable_event, (void *)curr->hardirq_disable_ip,
+ +              (void *)curr->hardirq_disable_ip);
+ +      printk("softirqs last  enabled at (%u): [<%p>] %pS\n",
+ +              curr->softirq_enable_event, (void *)curr->softirq_enable_ip,
+ +              (void *)curr->softirq_enable_ip);
+ +      printk("softirqs last disabled at (%u): [<%p>] %pS\n",
+ +              curr->softirq_disable_event, (void *)curr->softirq_disable_ip,
+ +              (void *)curr->softirq_disable_ip);
   }
   
   static int HARDIRQ_verbose(struct lock_class *class)
@@@ -3191,7 -3188,7 +3191,7 @@@ print_lock_nested_lock_not_held(struct 
         return 0;
   }
   
- static int __lock_is_held(struct lockdep_map *lock);
+ static int __lock_is_held(struct lockdep_map *lock, int read);
   
   /*
    * This gets called for every mutex_lock*()/spin_lock*() operation.
@@@ -3238,8 -3235,8 +3238,8 @@@ static int __lock_acquire(struct lockde
         if (very_verbose(class)) {
                 printk("\nacquire class [%p] %s", class->key, class->name);
                 if (class->name_version > 1)
- -                      printk("#%d", class->name_version);
- -              printk("\n");
+ +                      printk(KERN_CONT "#%d", class->name_version);
+ +              printk(KERN_CONT "\n");
                 dump_stack();
         }
   
@@@ -3332,7 -3329,7 +3332,7 @@@
         }
         chain_key = iterate_chain_key(chain_key, class_idx);
   
-       if (nest_lock && !__lock_is_held(nest_lock))
+       if (nest_lock && !__lock_is_held(nest_lock, -1))
                 return print_lock_nested_lock_not_held(curr, hlock, ip);
   
         if (!validate_chain(curr, lock, hlock, chain_head, chain_key))
@@@ -3381,7 -3378,7 +3381,7 @@@ print_unlock_imbalance_bug(struct task_
         printk("%s/%d is trying to release lock (",
                 curr->comm, task_pid_nr(curr));
         print_lockdep_cache(lock);
- -      printk(") at:\n");
+ +      printk(KERN_CONT ") at:\n");
         print_ip_sym(ip);
         printk("but there are no more locks to release!\n");
         printk("\nother info that might help us debug this:\n");
@@@ -3579,7 -3576,7 +3579,7 @@@ found_it
         return 1;
   }
   
- static int __lock_is_held(struct lockdep_map *lock)
+ static int __lock_is_held(struct lockdep_map *lock, int read)
   {
         struct task_struct *curr = current;
         int i;
@@@ -3587,8 -3584,12 +3587,12 @@@
         for (i = 0; i < curr->lockdep_depth; i++) {
                 struct held_lock *hlock = curr->held_locks + i;
   
-               if (match_held_lock(hlock, lock))
-                       return 1;
+               if (match_held_lock(hlock, lock)) {
+                       if (read == -1 || hlock->read == read)
+                               return 1;
+ 
+                       return 0;
+               }
         }
   
         return 0;
@@@ -3772,7 -3773,7 +3776,7 @@@ void lock_release(struct lockdep_map *l
   }
   EXPORT_SYMBOL_GPL(lock_release);
   
- int lock_is_held(struct lockdep_map *lock)
+ int lock_is_held_type(struct lockdep_map *lock, int read)
   {
         unsigned long flags;
         int ret = 0;
@@@ -3784,13 -3785,13 +3788,13 @@@
         check_flags(flags);
   
         current->lockdep_recursion = 1;
-       ret = __lock_is_held(lock);
+       ret = __lock_is_held(lock, read);
         current->lockdep_recursion = 0;
         raw_local_irq_restore(flags);
   
         return ret;
   }
- EXPORT_SYMBOL_GPL(lock_is_held);
+ EXPORT_SYMBOL_GPL(lock_is_held_type);
   
   struct pin_cookie lock_pin_lock(struct lockdep_map *lock)
   {
@@@ -3874,7 -3875,7 +3878,7 @@@ print_lock_contention_bug(struct task_s
         printk("%s/%d is trying to contend lock (",
                 curr->comm, task_pid_nr(curr));
         print_lockdep_cache(lock);
- -      printk(") at:\n");
+ +      printk(KERN_CONT ") at:\n");
         print_ip_sym(ip);
         printk("but there are no locks held!\n");
         printk("\nother info that might help us debug this:\n");
author	Linus Torvalds <[email protected]>
	Thu, 15 Dec 2016 05:35:31 +0000 (21:35 -0800)
committer	Linus Torvalds <[email protected]>
	Thu, 15 Dec 2016 05:35:31 +0000 (21:35 -0800)
		1	2
fs/direct-io.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/iomap.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/xfs/xfs_aops.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/xfs/xfs_buf.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/locking/lockdep.c	patch \|	diff1 \|	diff2 \|	blob \| history