]> Git Repo - linux.git/commitdiff
Merge tag 'xfs-for-linus-4.10-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorLinus Torvalds <[email protected]>
Thu, 15 Dec 2016 05:35:31 +0000 (21:35 -0800)
committerLinus Torvalds <[email protected]>
Thu, 15 Dec 2016 05:35:31 +0000 (21:35 -0800)
Pull xfs updates from Dave Chinner:
 "There is quite a varied bunch of stuff in this update, and some of it
  you will have already merged through the ext4 tree which imported the
  dax-4.10-iomap-pmd topic branch from the XFS tree.

  There is also a new direct IO implementation that uses the iomap
  infrastructure. It's much simpler, faster, and has lower IO latency
  than the existing direct IO infrastructure.

  Summary:
   - DAX PMD faults via iomap infrastructure
   - Direct-io support in iomap infrastructure
   - removal of now-redundant XFS inode iolock, replaced with VFS
     i_rwsem
   - synchronisation with fixes and changes in userspace libxfs code
   - extent tree lookup helpers
   - lots of little corruption detection improvements to verifiers
   - optimised CRC calculations
   - faster buffer cache lookups
   - deprecation of barrier/nobarrier mount options - we always use
     REQ_FUA/REQ_FLUSH where appropriate for data integrity now
   - cleanups to speculative preallocation
   - miscellaneous minor bug fixes and cleanups"

* tag 'xfs-for-linus-4.10-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs: (63 commits)
  xfs: nuke unused tracepoint definitions
  xfs: use GPF_NOFS when allocating btree cursors
  xfs: use xfs_vn_setattr_size to check on new size
  xfs: deprecate barrier/nobarrier mount option
  xfs: Always flush caches when integrity is required
  xfs: ignore leaf attr ichdr.count in verifier during log replay
  xfs: use rhashtable to track buffer cache
  xfs: optimise CRC updates
  xfs: make xfs btree stats less huge
  xfs: don't cap maximum dedupe request length
  xfs: don't allow di_size with high bit set
  xfs: error out if trying to add attrs and anextents > 0
  xfs: don't crash if reading a directory results in an unexpected hole
  xfs: complain if we don't get nextents bmap records
  xfs: check for bogus values in btree block headers
  xfs: forbid AG btrees with level == 0
  xfs: several xattr functions can be void
  xfs: handle cow fork in xfs_bmap_trace_exlist
  xfs: pass state not whichfork to trace_xfs_extlist
  xfs: Move AGI buffer type setting to xfs_read_agi
  ...

1  2 
fs/direct-io.c
fs/iomap.c
fs/xfs/xfs_aops.c
fs/xfs/xfs_buf.c
kernel/locking/lockdep.c

diff --combined fs/direct-io.c
index 86aa79859d4d347b59467316b6806389717bd5c8,19aa448fde6aa07853a4b729d8d75447e166d223..aeae8c06345155e35e6f9d1567d004c6d073ea66
@@@ -457,7 -457,7 +457,7 @@@ static struct bio *dio_await_one(struc
                dio->waiter = current;
                spin_unlock_irqrestore(&dio->bio_lock, flags);
                if (!(dio->iocb->ki_flags & IOCB_HIPRI) ||
 -                  !blk_poll(bdev_get_queue(dio->bio_bdev), dio->bio_cookie))
 +                  !blk_mq_poll(bdev_get_queue(dio->bio_bdev), dio->bio_cookie))
                        io_schedule();
                /* wake up sets us TASK_RUNNING */
                spin_lock_irqsave(&dio->bio_lock, flags);
@@@ -554,7 -554,7 +554,7 @@@ static inline int dio_bio_reap(struct d
   * filesystems that don't need it and also allows us to create the workqueue
   * late enough so the we can include s_id in the name of the workqueue.
   */
static int sb_init_dio_done_wq(struct super_block *sb)
+ int sb_init_dio_done_wq(struct super_block *sb)
  {
        struct workqueue_struct *old;
        struct workqueue_struct *wq = alloc_workqueue("dio/%s",
@@@ -842,6 -842,24 +842,6 @@@ out
        return ret;
  }
  
 -/*
 - * Clean any dirty buffers in the blockdev mapping which alias newly-created
 - * file blocks.  Only called for S_ISREG files - blockdevs do not set
 - * buffer_new
 - */
 -static void clean_blockdev_aliases(struct dio *dio, struct buffer_head *map_bh)
 -{
 -      unsigned i;
 -      unsigned nblocks;
 -
 -      nblocks = map_bh->b_size >> dio->inode->i_blkbits;
 -
 -      for (i = 0; i < nblocks; i++) {
 -              unmap_underlying_metadata(map_bh->b_bdev,
 -                                        map_bh->b_blocknr + i);
 -      }
 -}
 -
  /*
   * If we are not writing the entire block and get_block() allocated
   * the block for us, we need to fill-in the unused portion of the
@@@ -942,15 -960,11 +942,15 @@@ static int do_direct_IO(struct dio *dio
                                        goto do_holes;
  
                                sdio->blocks_available =
 -                                              map_bh->b_size >> sdio->blkbits;
 +                                              map_bh->b_size >> blkbits;
                                sdio->next_block_for_io =
                                        map_bh->b_blocknr << sdio->blkfactor;
 -                              if (buffer_new(map_bh))
 -                                      clean_blockdev_aliases(dio, map_bh);
 +                              if (buffer_new(map_bh)) {
 +                                      clean_bdev_aliases(
 +                                              map_bh->b_bdev,
 +                                              map_bh->b_blocknr,
 +                                              map_bh->b_size >> blkbits);
 +                              }
  
                                if (!sdio->blkfactor)
                                        goto do_holes;
@@@ -1195,7 -1209,7 +1195,7 @@@ do_blockdev_direct_IO(struct kiocb *ioc
        dio->inode = inode;
        if (iov_iter_rw(iter) == WRITE) {
                dio->op = REQ_OP_WRITE;
 -              dio->op_flags = WRITE_ODIRECT;
 +              dio->op_flags = REQ_SYNC | REQ_IDLE;
        } else {
                dio->op = REQ_OP_READ;
        }
diff --combined fs/iomap.c
index 13dd413b2b9c6a52e4ff0a966aff6902fc29df9a,fc244624293540221f2994c89754412ba3e1ce8d..354a123f170e534a016f74ca7006458e3b823ef8
@@@ -24,6 -24,7 +24,7 @@@
  #include <linux/uio.h>
  #include <linux/backing-dev.h>
  #include <linux/buffer_head.h>
+ #include <linux/task_io_accounting_ops.h>
  #include <linux/dax.h>
  #include "internal.h"
  
@@@ -584,3 -585,375 +585,375 @@@ int iomap_fiemap(struct inode *inode, s
        return 0;
  }
  EXPORT_SYMBOL_GPL(iomap_fiemap);
 -      bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_ODIRECT);
+ /*
+  * Private flags for iomap_dio, must not overlap with the public ones in
+  * iomap.h:
+  */
+ #define IOMAP_DIO_WRITE               (1 << 30)
+ #define IOMAP_DIO_DIRTY               (1 << 31)
+ struct iomap_dio {
+       struct kiocb            *iocb;
+       iomap_dio_end_io_t      *end_io;
+       loff_t                  i_size;
+       loff_t                  size;
+       atomic_t                ref;
+       unsigned                flags;
+       int                     error;
+       union {
+               /* used during submission and for synchronous completion: */
+               struct {
+                       struct iov_iter         *iter;
+                       struct task_struct      *waiter;
+                       struct request_queue    *last_queue;
+                       blk_qc_t                cookie;
+               } submit;
+               /* used for aio completion: */
+               struct {
+                       struct work_struct      work;
+               } aio;
+       };
+ };
+ static ssize_t iomap_dio_complete(struct iomap_dio *dio)
+ {
+       struct kiocb *iocb = dio->iocb;
+       ssize_t ret;
+       if (dio->end_io) {
+               ret = dio->end_io(iocb,
+                               dio->error ? dio->error : dio->size,
+                               dio->flags);
+       } else {
+               ret = dio->error;
+       }
+       if (likely(!ret)) {
+               ret = dio->size;
+               /* check for short read */
+               if (iocb->ki_pos + ret > dio->i_size &&
+                   !(dio->flags & IOMAP_DIO_WRITE))
+                       ret = dio->i_size - iocb->ki_pos;
+               iocb->ki_pos += ret;
+       }
+       inode_dio_end(file_inode(iocb->ki_filp));
+       kfree(dio);
+       return ret;
+ }
+ static void iomap_dio_complete_work(struct work_struct *work)
+ {
+       struct iomap_dio *dio = container_of(work, struct iomap_dio, aio.work);
+       struct kiocb *iocb = dio->iocb;
+       bool is_write = (dio->flags & IOMAP_DIO_WRITE);
+       ssize_t ret;
+       ret = iomap_dio_complete(dio);
+       if (is_write && ret > 0)
+               ret = generic_write_sync(iocb, ret);
+       iocb->ki_complete(iocb, ret, 0);
+ }
+ /*
+  * Set an error in the dio if none is set yet.  We have to use cmpxchg
+  * as the submission context and the completion context(s) can race to
+  * update the error.
+  */
+ static inline void iomap_dio_set_error(struct iomap_dio *dio, int ret)
+ {
+       cmpxchg(&dio->error, 0, ret);
+ }
+ static void iomap_dio_bio_end_io(struct bio *bio)
+ {
+       struct iomap_dio *dio = bio->bi_private;
+       bool should_dirty = (dio->flags & IOMAP_DIO_DIRTY);
+       if (bio->bi_error)
+               iomap_dio_set_error(dio, bio->bi_error);
+       if (atomic_dec_and_test(&dio->ref)) {
+               if (is_sync_kiocb(dio->iocb)) {
+                       struct task_struct *waiter = dio->submit.waiter;
+                       WRITE_ONCE(dio->submit.waiter, NULL);
+                       wake_up_process(waiter);
+               } else if (dio->flags & IOMAP_DIO_WRITE) {
+                       struct inode *inode = file_inode(dio->iocb->ki_filp);
+                       INIT_WORK(&dio->aio.work, iomap_dio_complete_work);
+                       queue_work(inode->i_sb->s_dio_done_wq, &dio->aio.work);
+               } else {
+                       iomap_dio_complete_work(&dio->aio.work);
+               }
+       }
+       if (should_dirty) {
+               bio_check_pages_dirty(bio);
+       } else {
+               struct bio_vec *bvec;
+               int i;
+               bio_for_each_segment_all(bvec, bio, i)
+                       put_page(bvec->bv_page);
+               bio_put(bio);
+       }
+ }
+ static blk_qc_t
+ iomap_dio_zero(struct iomap_dio *dio, struct iomap *iomap, loff_t pos,
+               unsigned len)
+ {
+       struct page *page = ZERO_PAGE(0);
+       struct bio *bio;
+       bio = bio_alloc(GFP_KERNEL, 1);
+       bio->bi_bdev = iomap->bdev;
+       bio->bi_iter.bi_sector =
+               iomap->blkno + ((pos - iomap->offset) >> 9);
+       bio->bi_private = dio;
+       bio->bi_end_io = iomap_dio_bio_end_io;
+       get_page(page);
+       if (bio_add_page(bio, page, len, 0) != len)
+               BUG();
 -                      bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_ODIRECT);
++      bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_SYNC | REQ_IDLE);
+       atomic_inc(&dio->ref);
+       return submit_bio(bio);
+ }
+ static loff_t
+ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
+               void *data, struct iomap *iomap)
+ {
+       struct iomap_dio *dio = data;
+       unsigned blkbits = blksize_bits(bdev_logical_block_size(iomap->bdev));
+       unsigned fs_block_size = (1 << inode->i_blkbits), pad;
+       unsigned align = iov_iter_alignment(dio->submit.iter);
+       struct iov_iter iter;
+       struct bio *bio;
+       bool need_zeroout = false;
+       int nr_pages, ret;
+       if ((pos | length | align) & ((1 << blkbits) - 1))
+               return -EINVAL;
+       switch (iomap->type) {
+       case IOMAP_HOLE:
+               if (WARN_ON_ONCE(dio->flags & IOMAP_DIO_WRITE))
+                       return -EIO;
+               /*FALLTHRU*/
+       case IOMAP_UNWRITTEN:
+               if (!(dio->flags & IOMAP_DIO_WRITE)) {
+                       iov_iter_zero(length, dio->submit.iter);
+                       dio->size += length;
+                       return length;
+               }
+               dio->flags |= IOMAP_DIO_UNWRITTEN;
+               need_zeroout = true;
+               break;
+       case IOMAP_MAPPED:
+               if (iomap->flags & IOMAP_F_SHARED)
+                       dio->flags |= IOMAP_DIO_COW;
+               if (iomap->flags & IOMAP_F_NEW)
+                       need_zeroout = true;
+               break;
+       default:
+               WARN_ON_ONCE(1);
+               return -EIO;
+       }
+       /*
+        * Operate on a partial iter trimmed to the extent we were called for.
+        * We'll update the iter in the dio once we're done with this extent.
+        */
+       iter = *dio->submit.iter;
+       iov_iter_truncate(&iter, length);
+       nr_pages = iov_iter_npages(&iter, BIO_MAX_PAGES);
+       if (nr_pages <= 0)
+               return nr_pages;
+       if (need_zeroout) {
+               /* zero out from the start of the block to the write offset */
+               pad = pos & (fs_block_size - 1);
+               if (pad)
+                       iomap_dio_zero(dio, iomap, pos - pad, pad);
+       }
+       do {
+               if (dio->error)
+                       return 0;
+               bio = bio_alloc(GFP_KERNEL, nr_pages);
+               bio->bi_bdev = iomap->bdev;
+               bio->bi_iter.bi_sector =
+                       iomap->blkno + ((pos - iomap->offset) >> 9);
+               bio->bi_private = dio;
+               bio->bi_end_io = iomap_dio_bio_end_io;
+               ret = bio_iov_iter_get_pages(bio, &iter);
+               if (unlikely(ret)) {
+                       bio_put(bio);
+                       return ret;
+               }
+               if (dio->flags & IOMAP_DIO_WRITE) {
 -                          !blk_poll(dio->submit.last_queue,
 -                                      dio->submit.cookie))
++                      bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_SYNC | REQ_IDLE);
+                       task_io_account_write(bio->bi_iter.bi_size);
+               } else {
+                       bio_set_op_attrs(bio, REQ_OP_READ, 0);
+                       if (dio->flags & IOMAP_DIO_DIRTY)
+                               bio_set_pages_dirty(bio);
+               }
+               dio->size += bio->bi_iter.bi_size;
+               pos += bio->bi_iter.bi_size;
+               nr_pages = iov_iter_npages(&iter, BIO_MAX_PAGES);
+               atomic_inc(&dio->ref);
+               dio->submit.last_queue = bdev_get_queue(iomap->bdev);
+               dio->submit.cookie = submit_bio(bio);
+       } while (nr_pages);
+       if (need_zeroout) {
+               /* zero out from the end of the write to the end of the block */
+               pad = pos & (fs_block_size - 1);
+               if (pad)
+                       iomap_dio_zero(dio, iomap, pos, fs_block_size - pad);
+       }
+       iov_iter_advance(dio->submit.iter, length);
+       return length;
+ }
+ ssize_t
+ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, struct iomap_ops *ops,
+               iomap_dio_end_io_t end_io)
+ {
+       struct address_space *mapping = iocb->ki_filp->f_mapping;
+       struct inode *inode = file_inode(iocb->ki_filp);
+       size_t count = iov_iter_count(iter);
+       loff_t pos = iocb->ki_pos, end = iocb->ki_pos + count - 1, ret = 0;
+       unsigned int flags = IOMAP_DIRECT;
+       struct blk_plug plug;
+       struct iomap_dio *dio;
+       lockdep_assert_held(&inode->i_rwsem);
+       if (!count)
+               return 0;
+       dio = kmalloc(sizeof(*dio), GFP_KERNEL);
+       if (!dio)
+               return -ENOMEM;
+       dio->iocb = iocb;
+       atomic_set(&dio->ref, 1);
+       dio->size = 0;
+       dio->i_size = i_size_read(inode);
+       dio->end_io = end_io;
+       dio->error = 0;
+       dio->flags = 0;
+       dio->submit.iter = iter;
+       if (is_sync_kiocb(iocb)) {
+               dio->submit.waiter = current;
+               dio->submit.cookie = BLK_QC_T_NONE;
+               dio->submit.last_queue = NULL;
+       }
+       if (iov_iter_rw(iter) == READ) {
+               if (pos >= dio->i_size)
+                       goto out_free_dio;
+               if (iter->type == ITER_IOVEC)
+                       dio->flags |= IOMAP_DIO_DIRTY;
+       } else {
+               dio->flags |= IOMAP_DIO_WRITE;
+               flags |= IOMAP_WRITE;
+       }
+       if (mapping->nrpages) {
+               ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, end);
+               if (ret)
+                       goto out_free_dio;
+               ret = invalidate_inode_pages2_range(mapping,
+                               iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT);
+               WARN_ON_ONCE(ret);
+               ret = 0;
+       }
+       inode_dio_begin(inode);
+       blk_start_plug(&plug);
+       do {
+               ret = iomap_apply(inode, pos, count, flags, ops, dio,
+                               iomap_dio_actor);
+               if (ret <= 0) {
+                       /* magic error code to fall back to buffered I/O */
+                       if (ret == -ENOTBLK)
+                               ret = 0;
+                       break;
+               }
+               pos += ret;
+       } while ((count = iov_iter_count(iter)) > 0);
+       blk_finish_plug(&plug);
+       if (ret < 0)
+               iomap_dio_set_error(dio, ret);
+       if (ret >= 0 && iov_iter_rw(iter) == WRITE && !is_sync_kiocb(iocb) &&
+                       !inode->i_sb->s_dio_done_wq) {
+               ret = sb_init_dio_done_wq(inode->i_sb);
+               if (ret < 0)
+                       iomap_dio_set_error(dio, ret);
+       }
+       if (!atomic_dec_and_test(&dio->ref)) {
+               if (!is_sync_kiocb(iocb))
+                       return -EIOCBQUEUED;
+               for (;;) {
+                       set_current_state(TASK_UNINTERRUPTIBLE);
+                       if (!READ_ONCE(dio->submit.waiter))
+                               break;
+                       if (!(iocb->ki_flags & IOCB_HIPRI) ||
+                           !dio->submit.last_queue ||
++                          !blk_mq_poll(dio->submit.last_queue,
++                                       dio->submit.cookie))
+                               io_schedule();
+               }
+               __set_current_state(TASK_RUNNING);
+       }
+       /*
+        * Try again to invalidate clean pages which might have been cached by
+        * non-direct readahead, or faulted in by get_user_pages() if the source
+        * of the write was an mmap'ed region of the file we're writing.  Either
+        * one is a pretty crazy thing to do, so we don't support it 100%.  If
+        * this invalidation fails, tough, the write still worked...
+        */
+       if (iov_iter_rw(iter) == WRITE && mapping->nrpages) {
+               ret = invalidate_inode_pages2_range(mapping,
+                               iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT);
+               WARN_ON_ONCE(ret);
+       }
+       return iomap_dio_complete(dio);
+ out_free_dio:
+       kfree(dio);
+       return ret;
+ }
+ EXPORT_SYMBOL_GPL(iomap_dio_rw);
diff --combined fs/xfs/xfs_aops.c
index 38755ca96c7a6d884c0c13421ab1d0b08fbc1f4b,265000a093277ed2c681d40170e316aa1381884e..0f56fcd3a5d51517b93c391bb3d97a58f205a544
  #include <linux/pagevec.h>
  #include <linux/writeback.h>
  
- /* flags for direct write completions */
- #define XFS_DIO_FLAG_UNWRITTEN        (1 << 0)
- #define XFS_DIO_FLAG_APPEND   (1 << 1)
- #define XFS_DIO_FLAG_COW      (1 << 2)
  /*
   * structure owned by writepages passed to individual writepage calls
   */
@@@ -495,8 -490,8 +490,8 @@@ xfs_submit_ioend
  
        ioend->io_bio->bi_private = ioend;
        ioend->io_bio->bi_end_io = xfs_end_bio;
 -      bio_set_op_attrs(ioend->io_bio, REQ_OP_WRITE,
 -                       (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : 0);
 +      ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);
 +
        /*
         * If we are failing the IO now, just mark the ioend with an
         * error and finish it. This will run IO completion immediately
@@@ -567,7 -562,8 +562,7 @@@ xfs_chain_bio
  
        bio_chain(ioend->io_bio, new);
        bio_get(ioend->io_bio);         /* for xfs_destroy_ioend */
 -      bio_set_op_attrs(ioend->io_bio, REQ_OP_WRITE,
 -                        (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : 0);
 +      ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);
        submit_bio(ioend->io_bio);
        ioend->io_bio = new;
  }
@@@ -776,7 -772,7 +771,7 @@@ xfs_map_cow
  {
        struct xfs_inode        *ip = XFS_I(inode);
        struct xfs_bmbt_irec    imap;
-       bool                    is_cow = false, need_alloc = false;
+       bool                    is_cow = false;
        int                     error;
  
        /*
         * Else we need to check if there is a COW mapping at this offset.
         */
        xfs_ilock(ip, XFS_ILOCK_SHARED);
-       is_cow = xfs_reflink_find_cow_mapping(ip, offset, &imap, &need_alloc);
+       is_cow = xfs_reflink_find_cow_mapping(ip, offset, &imap);
        xfs_iunlock(ip, XFS_ILOCK_SHARED);
  
        if (!is_cow)
         * And if the COW mapping has a delayed extent here we need to
         * allocate real space for it now.
         */
-       if (need_alloc) {
+       if (isnullstartblock(imap.br_startblock)) {
                error = xfs_iomap_write_allocate(ip, XFS_COW_FORK, offset,
                                &imap);
                if (error)
@@@ -1174,45 -1170,6 +1169,6 @@@ xfs_vm_releasepage
        return try_to_free_buffers(page);
  }
  
- /*
-  * When we map a DIO buffer, we may need to pass flags to
-  * xfs_end_io_direct_write to tell it what kind of write IO we are doing.
-  *
-  * Note that for DIO, an IO to the highest supported file block offset (i.e.
-  * 2^63 - 1FSB bytes) will result in the offset + count overflowing a signed 64
-  * bit variable. Hence if we see this overflow, we have to assume that the IO is
-  * extending the file size. We won't know for sure until IO completion is run
-  * and the actual max write offset is communicated to the IO completion
-  * routine.
-  */
- static void
- xfs_map_direct(
-       struct inode            *inode,
-       struct buffer_head      *bh_result,
-       struct xfs_bmbt_irec    *imap,
-       xfs_off_t               offset,
-       bool                    is_cow)
- {
-       uintptr_t               *flags = (uintptr_t *)&bh_result->b_private;
-       xfs_off_t               size = bh_result->b_size;
-       trace_xfs_get_blocks_map_direct(XFS_I(inode), offset, size,
-               ISUNWRITTEN(imap) ? XFS_IO_UNWRITTEN : is_cow ? XFS_IO_COW :
-               XFS_IO_OVERWRITE, imap);
-       if (ISUNWRITTEN(imap)) {
-               *flags |= XFS_DIO_FLAG_UNWRITTEN;
-               set_buffer_defer_completion(bh_result);
-       } else if (is_cow) {
-               *flags |= XFS_DIO_FLAG_COW;
-               set_buffer_defer_completion(bh_result);
-       }
-       if (offset + size > i_size_read(inode) || offset + size < 0) {
-               *flags |= XFS_DIO_FLAG_APPEND;
-               set_buffer_defer_completion(bh_result);
-       }
- }
  /*
   * If this is O_DIRECT or the mpage code calling tell them how large the mapping
   * is, so that we can avoid repeated get_blocks calls.
@@@ -1253,51 -1210,12 +1209,12 @@@ xfs_map_trim_size
        bh_result->b_size = mapping_size;
  }
  
- /* Bounce unaligned directio writes to the page cache. */
  static int
- xfs_bounce_unaligned_dio_write(
-       struct xfs_inode        *ip,
-       xfs_fileoff_t           offset_fsb,
-       struct xfs_bmbt_irec    *imap)
- {
-       struct xfs_bmbt_irec    irec;
-       xfs_fileoff_t           delta;
-       bool                    shared;
-       bool                    x;
-       int                     error;
-       irec = *imap;
-       if (offset_fsb > irec.br_startoff) {
-               delta = offset_fsb - irec.br_startoff;
-               irec.br_blockcount -= delta;
-               irec.br_startblock += delta;
-               irec.br_startoff = offset_fsb;
-       }
-       error = xfs_reflink_trim_around_shared(ip, &irec, &shared, &x);
-       if (error)
-               return error;
-       /*
-        * We're here because we're trying to do a directio write to a
-        * region that isn't aligned to a filesystem block.  If any part
-        * of the extent is shared, fall back to buffered mode to handle
-        * the RMW.  This is done by returning -EREMCHG ("remote addr
-        * changed"), which is caught further up the call stack.
-        */
-       if (shared) {
-               trace_xfs_reflink_bounce_dio_write(ip, imap);
-               return -EREMCHG;
-       }
-       return 0;
- }
- STATIC int
- __xfs_get_blocks(
+ xfs_get_blocks(
        struct inode            *inode,
        sector_t                iblock,
        struct buffer_head      *bh_result,
-       int                     create,
-       bool                    direct)
+       int                     create)
  {
        struct xfs_inode        *ip = XFS_I(inode);
        struct xfs_mount        *mp = ip->i_mount;
        int                     nimaps = 1;
        xfs_off_t               offset;
        ssize_t                 size;
-       int                     new = 0;
-       bool                    is_cow = false;
-       bool                    need_alloc = false;
  
-       BUG_ON(create && !direct);
+       BUG_ON(create);
  
        if (XFS_FORCED_SHUTDOWN(mp))
                return -EIO;
        ASSERT(bh_result->b_size >= (1 << inode->i_blkbits));
        size = bh_result->b_size;
  
-       if (!create && offset >= i_size_read(inode))
+       if (offset >= i_size_read(inode))
                return 0;
  
        /*
        end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size);
        offset_fsb = XFS_B_TO_FSBT(mp, offset);
  
-       if (create && direct && xfs_is_reflink_inode(ip))
-               is_cow = xfs_reflink_find_cow_mapping(ip, offset, &imap,
-                                       &need_alloc);
-       if (!is_cow) {
-               error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb,
-                                       &imap, &nimaps, XFS_BMAPI_ENTIRE);
-               /*
-                * Truncate an overwrite extent if there's a pending CoW
-                * reservation before the end of this extent.  This
-                * forces us to come back to get_blocks to take care of
-                * the CoW.
-                */
-               if (create && direct && nimaps &&
-                   imap.br_startblock != HOLESTARTBLOCK &&
-                   imap.br_startblock != DELAYSTARTBLOCK &&
-                   !ISUNWRITTEN(&imap))
-                       xfs_reflink_trim_irec_to_next_cow(ip, offset_fsb,
-                                       &imap);
-       }
-       ASSERT(!need_alloc);
+       error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb,
+                               &imap, &nimaps, XFS_BMAPI_ENTIRE);
        if (error)
                goto out_unlock;
  
-       /* for DAX, we convert unwritten extents directly */
-       if (create &&
-           (!nimaps ||
-            (imap.br_startblock == HOLESTARTBLOCK ||
-             imap.br_startblock == DELAYSTARTBLOCK) ||
-            (IS_DAX(inode) && ISUNWRITTEN(&imap)))) {
-               /*
-                * xfs_iomap_write_direct() expects the shared lock. It
-                * is unlocked on return.
-                */
-               if (lockmode == XFS_ILOCK_EXCL)
-                       xfs_ilock_demote(ip, lockmode);
-               error = xfs_iomap_write_direct(ip, offset, size,
-                                              &imap, nimaps);
-               if (error)
-                       return error;
-               new = 1;
-               trace_xfs_get_blocks_alloc(ip, offset, size,
-                               ISUNWRITTEN(&imap) ? XFS_IO_UNWRITTEN
-                                                  : XFS_IO_DELALLOC, &imap);
-       } else if (nimaps) {
+       if (nimaps) {
                trace_xfs_get_blocks_found(ip, offset, size,
                                ISUNWRITTEN(&imap) ? XFS_IO_UNWRITTEN
                                                   : XFS_IO_OVERWRITE, &imap);
                goto out_unlock;
        }
  
-       if (IS_DAX(inode) && create) {
-               ASSERT(!ISUNWRITTEN(&imap));
-               /* zeroing is not needed at a higher layer */
-               new = 0;
-       }
        /* trim mapping down to size requested */
        xfs_map_trim_size(inode, iblock, bh_result, &imap, offset, size);
  
         */
        if (imap.br_startblock != HOLESTARTBLOCK &&
            imap.br_startblock != DELAYSTARTBLOCK &&
-           (create || !ISUNWRITTEN(&imap))) {
-               if (create && direct && !is_cow) {
-                       error = xfs_bounce_unaligned_dio_write(ip, offset_fsb,
-                                       &imap);
-                       if (error)
-                               return error;
-               }
+           !ISUNWRITTEN(&imap))
                xfs_map_buffer(inode, bh_result, &imap, offset);
-               if (ISUNWRITTEN(&imap))
-                       set_buffer_unwritten(bh_result);
-               /* direct IO needs special help */
-               if (create)
-                       xfs_map_direct(inode, bh_result, &imap, offset, is_cow);
-       }
  
        /*
         * If this is a realtime file, data may be on a different device.
         * to that pointed to from the buffer_head b_bdev currently.
         */
        bh_result->b_bdev = xfs_find_bdev_for_inode(inode);
-       /*
-        * If we previously allocated a block out beyond eof and we are now
-        * coming back to use it then we will need to flag it as new even if it
-        * has a disk address.
-        *
-        * With sub-block writes into unwritten extents we also need to mark
-        * the buffer as new so that the unwritten parts of the buffer gets
-        * correctly zeroed.
-        */
-       if (create &&
-           ((!buffer_mapped(bh_result) && !buffer_uptodate(bh_result)) ||
-            (offset >= i_size_read(inode)) ||
-            (new || ISUNWRITTEN(&imap))))
-               set_buffer_new(bh_result);
-       BUG_ON(direct && imap.br_startblock == DELAYSTARTBLOCK);
        return 0;
  
  out_unlock:
        return error;
  }
  
- int
- xfs_get_blocks(
-       struct inode            *inode,
-       sector_t                iblock,
-       struct buffer_head      *bh_result,
-       int                     create)
- {
-       return __xfs_get_blocks(inode, iblock, bh_result, create, false);
- }
- int
- xfs_get_blocks_direct(
-       struct inode            *inode,
-       sector_t                iblock,
-       struct buffer_head      *bh_result,
-       int                     create)
- {
-       return __xfs_get_blocks(inode, iblock, bh_result, create, true);
- }
- /*
-  * Complete a direct I/O write request.
-  *
-  * xfs_map_direct passes us some flags in the private data to tell us what to
-  * do.  If no flags are set, then the write IO is an overwrite wholly within
-  * the existing allocated file size and so there is nothing for us to do.
-  *
-  * Note that in this case the completion can be called in interrupt context,
-  * whereas if we have flags set we will always be called in task context
-  * (i.e. from a workqueue).
-  */
- int
- xfs_end_io_direct_write(
-       struct kiocb            *iocb,
-       loff_t                  offset,
-       ssize_t                 size,
-       void                    *private)
- {
-       struct inode            *inode = file_inode(iocb->ki_filp);
-       struct xfs_inode        *ip = XFS_I(inode);
-       uintptr_t               flags = (uintptr_t)private;
-       int                     error = 0;
-       trace_xfs_end_io_direct_write(ip, offset, size);
-       if (XFS_FORCED_SHUTDOWN(ip->i_mount))
-               return -EIO;
-       if (size <= 0)
-               return size;
-       /*
-        * The flags tell us whether we are doing unwritten extent conversions
-        * or an append transaction that updates the on-disk file size. These
-        * cases are the only cases where we should *potentially* be needing
-        * to update the VFS inode size.
-        */
-       if (flags == 0) {
-               ASSERT(offset + size <= i_size_read(inode));
-               return 0;
-       }
-       /*
-        * We need to update the in-core inode size here so that we don't end up
-        * with the on-disk inode size being outside the in-core inode size. We
-        * have no other method of updating EOF for AIO, so always do it here
-        * if necessary.
-        *
-        * We need to lock the test/set EOF update as we can be racing with
-        * other IO completions here to update the EOF. Failing to serialise
-        * here can result in EOF moving backwards and Bad Things Happen when
-        * that occurs.
-        */
-       spin_lock(&ip->i_flags_lock);
-       if (offset + size > i_size_read(inode))
-               i_size_write(inode, offset + size);
-       spin_unlock(&ip->i_flags_lock);
-       if (flags & XFS_DIO_FLAG_COW)
-               error = xfs_reflink_end_cow(ip, offset, size);
-       if (flags & XFS_DIO_FLAG_UNWRITTEN) {
-               trace_xfs_end_io_direct_write_unwritten(ip, offset, size);
-               error = xfs_iomap_write_unwritten(ip, offset, size);
-       }
-       if (flags & XFS_DIO_FLAG_APPEND) {
-               trace_xfs_end_io_direct_write_append(ip, offset, size);
-               error = xfs_setfilesize(ip, offset, size);
-       }
-       return error;
- }
  STATIC ssize_t
  xfs_vm_direct_IO(
        struct kiocb            *iocb,
@@@ -1566,7 -1310,6 +1309,6 @@@ xfs_vm_bmap
        struct xfs_inode        *ip = XFS_I(inode);
  
        trace_xfs_vm_bmap(XFS_I(inode));
-       xfs_ilock(ip, XFS_IOLOCK_SHARED);
  
        /*
         * The swap code (ab-)uses ->bmap to get a block mapping and then
         * that on reflinks inodes, so we have to skip out here.  And yes,
         * 0 is the magic code for a bmap error..
         */
-       if (xfs_is_reflink_inode(ip)) {
-               xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+       if (xfs_is_reflink_inode(ip))
                return 0;
-       }
        filemap_write_and_wait(mapping);
-       xfs_iunlock(ip, XFS_IOLOCK_SHARED);
        return generic_block_bmap(mapping, block, xfs_get_blocks);
  }
  
diff --combined fs/xfs/xfs_buf.c
index 33c435f3316c6fe895bdd0a7e3794e8fb5268932,509dd6cc1331faf2d4dd46c9ba4a7baff3583d53..7f0a01f7b592d20932649d1f8a705a836d86ca02
@@@ -219,7 -219,6 +219,6 @@@ _xfs_buf_alloc
        init_completion(&bp->b_iowait);
        INIT_LIST_HEAD(&bp->b_lru);
        INIT_LIST_HEAD(&bp->b_list);
-       RB_CLEAR_NODE(&bp->b_rbnode);
        sema_init(&bp->b_sema, 0); /* held, no waiters */
        spin_lock_init(&bp->b_lock);
        XB_SET_OWNER(bp);
@@@ -473,6 -472,62 +472,62 @@@ _xfs_buf_map_pages
  /*
   *    Finding and Reading Buffers
   */
+ static int
+ _xfs_buf_obj_cmp(
+       struct rhashtable_compare_arg   *arg,
+       const void                      *obj)
+ {
+       const struct xfs_buf_map        *map = arg->key;
+       const struct xfs_buf            *bp = obj;
+       /*
+        * The key hashing in the lookup path depends on the key being the
+        * first element of the compare_arg, make sure to assert this.
+        */
+       BUILD_BUG_ON(offsetof(struct xfs_buf_map, bm_bn) != 0);
+       if (bp->b_bn != map->bm_bn)
+               return 1;
+       if (unlikely(bp->b_length != map->bm_len)) {
+               /*
+                * found a block number match. If the range doesn't
+                * match, the only way this is allowed is if the buffer
+                * in the cache is stale and the transaction that made
+                * it stale has not yet committed. i.e. we are
+                * reallocating a busy extent. Skip this buffer and
+                * continue searching for an exact match.
+                */
+               ASSERT(bp->b_flags & XBF_STALE);
+               return 1;
+       }
+       return 0;
+ }
+ static const struct rhashtable_params xfs_buf_hash_params = {
+       .min_size               = 32,   /* empty AGs have minimal footprint */
+       .nelem_hint             = 16,
+       .key_len                = sizeof(xfs_daddr_t),
+       .key_offset             = offsetof(struct xfs_buf, b_bn),
+       .head_offset            = offsetof(struct xfs_buf, b_rhash_head),
+       .automatic_shrinking    = true,
+       .obj_cmpfn              = _xfs_buf_obj_cmp,
+ };
+ int
+ xfs_buf_hash_init(
+       struct xfs_perag        *pag)
+ {
+       spin_lock_init(&pag->pag_buf_lock);
+       return rhashtable_init(&pag->pag_buf_hash, &xfs_buf_hash_params);
+ }
+ void
+ xfs_buf_hash_destroy(
+       struct xfs_perag        *pag)
+ {
+       rhashtable_destroy(&pag->pag_buf_hash);
+ }
  
  /*
   *    Look up, and creates if absent, a lockable buffer for
@@@ -488,27 -543,24 +543,24 @@@ _xfs_buf_find
        xfs_buf_t               *new_bp)
  {
        struct xfs_perag        *pag;
-       struct rb_node          **rbp;
-       struct rb_node          *parent;
        xfs_buf_t               *bp;
-       xfs_daddr_t             blkno = map[0].bm_bn;
+       struct xfs_buf_map      cmap = { .bm_bn = map[0].bm_bn };
        xfs_daddr_t             eofs;
-       int                     numblks = 0;
        int                     i;
  
        for (i = 0; i < nmaps; i++)
-               numblks += map[i].bm_len;
+               cmap.bm_len += map[i].bm_len;
  
        /* Check for IOs smaller than the sector size / not sector aligned */
-       ASSERT(!(BBTOB(numblks) < btp->bt_meta_sectorsize));
-       ASSERT(!(BBTOB(blkno) & (xfs_off_t)btp->bt_meta_sectormask));
+       ASSERT(!(BBTOB(cmap.bm_len) < btp->bt_meta_sectorsize));
+       ASSERT(!(BBTOB(cmap.bm_bn) & (xfs_off_t)btp->bt_meta_sectormask));
  
        /*
         * Corrupted block numbers can get through to here, unfortunately, so we
         * have to check that the buffer falls within the filesystem bounds.
         */
        eofs = XFS_FSB_TO_BB(btp->bt_mount, btp->bt_mount->m_sb.sb_dblocks);
-       if (blkno < 0 || blkno >= eofs) {
+       if (cmap.bm_bn < 0 || cmap.bm_bn >= eofs) {
                /*
                 * XXX (dgc): we should really be returning -EFSCORRUPTED here,
                 * but none of the higher level infrastructure supports
                 */
                xfs_alert(btp->bt_mount,
                          "%s: Block out of range: block 0x%llx, EOFS 0x%llx ",
-                         __func__, blkno, eofs);
+                         __func__, cmap.bm_bn, eofs);
                WARN_ON(1);
                return NULL;
        }
  
-       /* get tree root */
        pag = xfs_perag_get(btp->bt_mount,
-                               xfs_daddr_to_agno(btp->bt_mount, blkno));
+                           xfs_daddr_to_agno(btp->bt_mount, cmap.bm_bn));
  
-       /* walk tree */
        spin_lock(&pag->pag_buf_lock);
-       rbp = &pag->pag_buf_tree.rb_node;
-       parent = NULL;
-       bp = NULL;
-       while (*rbp) {
-               parent = *rbp;
-               bp = rb_entry(parent, struct xfs_buf, b_rbnode);
-               if (blkno < bp->b_bn)
-                       rbp = &(*rbp)->rb_left;
-               else if (blkno > bp->b_bn)
-                       rbp = &(*rbp)->rb_right;
-               else {
-                       /*
-                        * found a block number match. If the range doesn't
-                        * match, the only way this is allowed is if the buffer
-                        * in the cache is stale and the transaction that made
-                        * it stale has not yet committed. i.e. we are
-                        * reallocating a busy extent. Skip this buffer and
-                        * continue searching to the right for an exact match.
-                        */
-                       if (bp->b_length != numblks) {
-                               ASSERT(bp->b_flags & XBF_STALE);
-                               rbp = &(*rbp)->rb_right;
-                               continue;
-                       }
-                       atomic_inc(&bp->b_hold);
-                       goto found;
-               }
+       bp = rhashtable_lookup_fast(&pag->pag_buf_hash, &cmap,
+                                   xfs_buf_hash_params);
+       if (bp) {
+               atomic_inc(&bp->b_hold);
+               goto found;
        }
  
        /* No match found */
        if (new_bp) {
-               rb_link_node(&new_bp->b_rbnode, parent, rbp);
-               rb_insert_color(&new_bp->b_rbnode, &pag->pag_buf_tree);
                /* the buffer keeps the perag reference until it is freed */
                new_bp->b_pag = pag;
+               rhashtable_insert_fast(&pag->pag_buf_hash,
+                                      &new_bp->b_rhash_head,
+                                      xfs_buf_hash_params);
                spin_unlock(&pag->pag_buf_lock);
        } else {
                XFS_STATS_INC(btp->bt_mount, xb_miss_locked);
@@@ -930,7 -958,6 +958,6 @@@ xfs_buf_rele
  
        if (!pag) {
                ASSERT(list_empty(&bp->b_lru));
-               ASSERT(RB_EMPTY_NODE(&bp->b_rbnode));
                if (atomic_dec_and_test(&bp->b_hold)) {
                        xfs_buf_ioacct_dec(bp);
                        xfs_buf_free(bp);
                return;
        }
  
-       ASSERT(!RB_EMPTY_NODE(&bp->b_rbnode));
        ASSERT(atomic_read(&bp->b_hold) > 0);
  
        release = atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock);
                }
  
                ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
-               rb_erase(&bp->b_rbnode, &pag->pag_buf_tree);
+               rhashtable_remove_fast(&pag->pag_buf_hash, &bp->b_rhash_head,
+                                      xfs_buf_hash_params);
                spin_unlock(&pag->pag_buf_lock);
                xfs_perag_put(pag);
                freebuf = true;
@@@ -1304,7 -1330,7 +1330,7 @@@ _xfs_buf_ioapply
        if (bp->b_flags & XBF_WRITE) {
                op = REQ_OP_WRITE;
                if (bp->b_flags & XBF_SYNCIO)
 -                      op_flags = WRITE_SYNC;
 +                      op_flags = REQ_SYNC;
                if (bp->b_flags & XBF_FUA)
                        op_flags |= REQ_FUA;
                if (bp->b_flags & XBF_FLUSH)
@@@ -1711,8 -1737,7 +1737,7 @@@ xfs_free_buftarg
        percpu_counter_destroy(&btp->bt_io_count);
        list_lru_destroy(&btp->bt_lru);
  
-       if (mp->m_flags & XFS_MOUNT_BARRIER)
-               xfs_blkdev_issue_flush(btp);
+       xfs_blkdev_issue_flush(btp);
  
        kmem_free(btp);
  }
diff --combined kernel/locking/lockdep.c
index 7bd265f6b0984a612f20d57aeb3c59647cc383d8,cff580a6edf965eda424a50709e1bc809d674975..7c38f8f3d97b7b172dcd61f04df382548cf7ac15
@@@ -506,13 -506,13 +506,13 @@@ static void __print_lock_name(struct lo
        name = class->name;
        if (!name) {
                name = __get_key_name(class->key, str);
 -              printk("%s", name);
 +              printk(KERN_CONT "%s", name);
        } else {
 -              printk("%s", name);
 +              printk(KERN_CONT "%s", name);
                if (class->name_version > 1)
 -                      printk("#%d", class->name_version);
 +                      printk(KERN_CONT "#%d", class->name_version);
                if (class->subclass)
 -                      printk("/%d", class->subclass);
 +                      printk(KERN_CONT "/%d", class->subclass);
        }
  }
  
@@@ -522,9 -522,9 +522,9 @@@ static void print_lock_name(struct lock
  
        get_usage_chars(class, usage);
  
 -      printk(" (");
 +      printk(KERN_CONT " (");
        __print_lock_name(class);
 -      printk("){%s}", usage);
 +      printk(KERN_CONT "){%s}", usage);
  }
  
  static void print_lockdep_cache(struct lockdep_map *lock)
        if (!name)
                name = __get_key_name(lock->key->subkeys, str);
  
 -      printk("%s", name);
 +      printk(KERN_CONT "%s", name);
  }
  
  static void print_lock(struct held_lock *hlock)
        barrier();
  
        if (!class_idx || (class_idx - 1) >= MAX_LOCKDEP_KEYS) {
 -              printk("<RELEASED>\n");
 +              printk(KERN_CONT "<RELEASED>\n");
                return;
        }
  
        print_lock_name(lock_classes + class_idx - 1);
 -      printk(", at: ");
 -      print_ip_sym(hlock->acquire_ip);
 +      printk(KERN_CONT ", at: [<%p>] %pS\n",
 +              (void *)hlock->acquire_ip, (void *)hlock->acquire_ip);
  }
  
  static void lockdep_print_held_locks(struct task_struct *curr)
@@@ -792,8 -792,8 +792,8 @@@ register_lock_class(struct lockdep_map 
  
                printk("\nnew class %p: %s", class->key, class->name);
                if (class->name_version > 1)
 -                      printk("#%d", class->name_version);
 -              printk("\n");
 +                      printk(KERN_CONT "#%d", class->name_version);
 +              printk(KERN_CONT "\n");
                dump_stack();
  
                if (!graph_lock()) {
@@@ -840,9 -840,9 +840,9 @@@ static struct lock_list *alloc_list_ent
  /*
   * Add a new dependency to the head of the list:
   */
 -static int add_lock_to_list(struct lock_class *class, struct lock_class *this,
 -                          struct list_head *head, unsigned long ip,
 -                          int distance, struct stack_trace *trace)
 +static int add_lock_to_list(struct lock_class *this, struct list_head *head,
 +                          unsigned long ip, int distance,
 +                          struct stack_trace *trace)
  {
        struct lock_list *entry;
        /*
@@@ -1071,7 -1071,7 +1071,7 @@@ print_circular_bug_entry(struct lock_li
                return 0;
        printk("\n-> #%u", depth);
        print_lock_name(target->class);
 -      printk(":\n");
 +      printk(KERN_CONT ":\n");
        print_stack_trace(&target->trace, 6);
  
        return 0;
@@@ -1102,11 -1102,11 +1102,11 @@@ print_circular_lock_scenario(struct hel
        if (parent != source) {
                printk("Chain exists of:\n  ");
                __print_lock_name(source);
 -              printk(" --> ");
 +              printk(KERN_CONT " --> ");
                __print_lock_name(parent);
 -              printk(" --> ");
 +              printk(KERN_CONT " --> ");
                __print_lock_name(target);
 -              printk("\n\n");
 +              printk(KERN_CONT "\n\n");
        }
  
        printk(" Possible unsafe locking scenario:\n\n");
        printk("       ----                    ----\n");
        printk("  lock(");
        __print_lock_name(target);
 -      printk(");\n");
 +      printk(KERN_CONT ");\n");
        printk("                               lock(");
        __print_lock_name(parent);
 -      printk(");\n");
 +      printk(KERN_CONT ");\n");
        printk("                               lock(");
        __print_lock_name(target);
 -      printk(");\n");
 +      printk(KERN_CONT ");\n");
        printk("  lock(");
        __print_lock_name(source);
 -      printk(");\n");
 +      printk(KERN_CONT ");\n");
        printk("\n *** DEADLOCK ***\n\n");
  }
  
@@@ -1359,22 -1359,22 +1359,22 @@@ static void print_lock_class_header(str
  
        printk("%*s->", depth, "");
        print_lock_name(class);
 -      printk(" ops: %lu", class->ops);
 -      printk(" {\n");
 +      printk(KERN_CONT " ops: %lu", class->ops);
 +      printk(KERN_CONT " {\n");
  
        for (bit = 0; bit < LOCK_USAGE_STATES; bit++) {
                if (class->usage_mask & (1 << bit)) {
                        int len = depth;
  
                        len += printk("%*s   %s", depth, "", usage_str[bit]);
 -                      len += printk(" at:\n");
 +                      len += printk(KERN_CONT " at:\n");
                        print_stack_trace(class->usage_traces + bit, len);
                }
        }
        printk("%*s }\n", depth, "");
  
 -      printk("%*s ... key      at: ",depth,"");
 -      print_ip_sym((unsigned long)class->key);
 +      printk("%*s ... key      at: [<%p>] %pS\n",
 +              depth, "", class->key, class->key);
  }
  
  /*
@@@ -1437,11 -1437,11 +1437,11 @@@ print_irq_lock_scenario(struct lock_lis
        if (middle_class != unsafe_class) {
                printk("Chain exists of:\n  ");
                __print_lock_name(safe_class);
 -              printk(" --> ");
 +              printk(KERN_CONT " --> ");
                __print_lock_name(middle_class);
 -              printk(" --> ");
 +              printk(KERN_CONT " --> ");
                __print_lock_name(unsafe_class);
 -              printk("\n\n");
 +              printk(KERN_CONT "\n\n");
        }
  
        printk(" Possible interrupt unsafe locking scenario:\n\n");
        printk("       ----                    ----\n");
        printk("  lock(");
        __print_lock_name(unsafe_class);
 -      printk(");\n");
 +      printk(KERN_CONT ");\n");
        printk("                               local_irq_disable();\n");
        printk("                               lock(");
        __print_lock_name(safe_class);
 -      printk(");\n");
 +      printk(KERN_CONT ");\n");
        printk("                               lock(");
        __print_lock_name(middle_class);
 -      printk(");\n");
 +      printk(KERN_CONT ");\n");
        printk("  <Interrupt>\n");
        printk("    lock(");
        __print_lock_name(safe_class);
 -      printk(");\n");
 +      printk(KERN_CONT ");\n");
        printk("\n *** DEADLOCK ***\n\n");
  }
  
@@@ -1497,9 -1497,9 +1497,9 @@@ print_bad_irq_dependency(struct task_st
        print_lock(prev);
        printk("which would create a new lock dependency:\n");
        print_lock_name(hlock_class(prev));
 -      printk(" ->");
 +      printk(KERN_CONT " ->");
        print_lock_name(hlock_class(next));
 -      printk("\n");
 +      printk(KERN_CONT "\n");
  
        printk("\nbut this new dependency connects a %s-irq-safe lock:\n",
                irqclass);
  
        lockdep_print_held_locks(curr);
  
 -      printk("\nthe dependencies between %s-irq-safe lock", irqclass);
 -      printk(" and the holding lock:\n");
 +      printk("\nthe dependencies between %s-irq-safe lock and the holding lock:\n", irqclass);
        if (!save_trace(&prev_root->trace))
                return 0;
        print_shortest_lock_dependencies(backwards_entry, prev_root);
@@@ -1693,10 -1694,10 +1693,10 @@@ print_deadlock_scenario(struct held_loc
        printk("       ----\n");
        printk("  lock(");
        __print_lock_name(prev);
 -      printk(");\n");
 +      printk(KERN_CONT ");\n");
        printk("  lock(");
        __print_lock_name(next);
 -      printk(");\n");
 +      printk(KERN_CONT ");\n");
        printk("\n *** DEADLOCK ***\n\n");
        printk(" May be due to missing lock nesting notation\n\n");
  }
@@@ -1868,14 -1869,14 +1868,14 @@@ check_prev_add(struct task_struct *curr
         * Ok, all validations passed, add the new lock
         * to the previous lock's dependency list:
         */
 -      ret = add_lock_to_list(hlock_class(prev), hlock_class(next),
 +      ret = add_lock_to_list(hlock_class(next),
                               &hlock_class(prev)->locks_after,
                               next->acquire_ip, distance, &trace);
  
        if (!ret)
                return 0;
  
 -      ret = add_lock_to_list(hlock_class(next), hlock_class(prev),
 +      ret = add_lock_to_list(hlock_class(prev),
                               &hlock_class(next)->locks_before,
                               next->acquire_ip, distance, &trace);
        if (!ret)
                graph_unlock();
                printk("\n new dependency: ");
                print_lock_name(hlock_class(prev));
 -              printk(" => ");
 +              printk(KERN_CONT " => ");
                print_lock_name(hlock_class(next));
 -              printk("\n");
 +              printk(KERN_CONT "\n");
                dump_stack();
                return graph_lock();
        }
@@@ -2342,11 -2343,11 +2342,11 @@@ print_usage_bug_scenario(struct held_lo
        printk("       ----\n");
        printk("  lock(");
        __print_lock_name(class);
 -      printk(");\n");
 +      printk(KERN_CONT ");\n");
        printk("  <Interrupt>\n");
        printk("    lock(");
        __print_lock_name(class);
 -      printk(");\n");
 +      printk(KERN_CONT ");\n");
        printk("\n *** DEADLOCK ***\n\n");
  }
  
@@@ -2521,18 -2522,14 +2521,18 @@@ check_usage_backwards(struct task_struc
  void print_irqtrace_events(struct task_struct *curr)
  {
        printk("irq event stamp: %u\n", curr->irq_events);
 -      printk("hardirqs last  enabled at (%u): ", curr->hardirq_enable_event);
 -      print_ip_sym(curr->hardirq_enable_ip);
 -      printk("hardirqs last disabled at (%u): ", curr->hardirq_disable_event);
 -      print_ip_sym(curr->hardirq_disable_ip);
 -      printk("softirqs last  enabled at (%u): ", curr->softirq_enable_event);
 -      print_ip_sym(curr->softirq_enable_ip);
 -      printk("softirqs last disabled at (%u): ", curr->softirq_disable_event);
 -      print_ip_sym(curr->softirq_disable_ip);
 +      printk("hardirqs last  enabled at (%u): [<%p>] %pS\n",
 +              curr->hardirq_enable_event, (void *)curr->hardirq_enable_ip,
 +              (void *)curr->hardirq_enable_ip);
 +      printk("hardirqs last disabled at (%u): [<%p>] %pS\n",
 +              curr->hardirq_disable_event, (void *)curr->hardirq_disable_ip,
 +              (void *)curr->hardirq_disable_ip);
 +      printk("softirqs last  enabled at (%u): [<%p>] %pS\n",
 +              curr->softirq_enable_event, (void *)curr->softirq_enable_ip,
 +              (void *)curr->softirq_enable_ip);
 +      printk("softirqs last disabled at (%u): [<%p>] %pS\n",
 +              curr->softirq_disable_event, (void *)curr->softirq_disable_ip,
 +              (void *)curr->softirq_disable_ip);
  }
  
  static int HARDIRQ_verbose(struct lock_class *class)
@@@ -3191,7 -3188,7 +3191,7 @@@ print_lock_nested_lock_not_held(struct 
        return 0;
  }
  
- static int __lock_is_held(struct lockdep_map *lock);
+ static int __lock_is_held(struct lockdep_map *lock, int read);
  
  /*
   * This gets called for every mutex_lock*()/spin_lock*() operation.
@@@ -3238,8 -3235,8 +3238,8 @@@ static int __lock_acquire(struct lockde
        if (very_verbose(class)) {
                printk("\nacquire class [%p] %s", class->key, class->name);
                if (class->name_version > 1)
 -                      printk("#%d", class->name_version);
 -              printk("\n");
 +                      printk(KERN_CONT "#%d", class->name_version);
 +              printk(KERN_CONT "\n");
                dump_stack();
        }
  
        }
        chain_key = iterate_chain_key(chain_key, class_idx);
  
-       if (nest_lock && !__lock_is_held(nest_lock))
+       if (nest_lock && !__lock_is_held(nest_lock, -1))
                return print_lock_nested_lock_not_held(curr, hlock, ip);
  
        if (!validate_chain(curr, lock, hlock, chain_head, chain_key))
@@@ -3381,7 -3378,7 +3381,7 @@@ print_unlock_imbalance_bug(struct task_
        printk("%s/%d is trying to release lock (",
                curr->comm, task_pid_nr(curr));
        print_lockdep_cache(lock);
 -      printk(") at:\n");
 +      printk(KERN_CONT ") at:\n");
        print_ip_sym(ip);
        printk("but there are no more locks to release!\n");
        printk("\nother info that might help us debug this:\n");
@@@ -3579,7 -3576,7 +3579,7 @@@ found_it
        return 1;
  }
  
- static int __lock_is_held(struct lockdep_map *lock)
+ static int __lock_is_held(struct lockdep_map *lock, int read)
  {
        struct task_struct *curr = current;
        int i;
        for (i = 0; i < curr->lockdep_depth; i++) {
                struct held_lock *hlock = curr->held_locks + i;
  
-               if (match_held_lock(hlock, lock))
-                       return 1;
+               if (match_held_lock(hlock, lock)) {
+                       if (read == -1 || hlock->read == read)
+                               return 1;
+                       return 0;
+               }
        }
  
        return 0;
@@@ -3772,7 -3773,7 +3776,7 @@@ void lock_release(struct lockdep_map *l
  }
  EXPORT_SYMBOL_GPL(lock_release);
  
- int lock_is_held(struct lockdep_map *lock)
+ int lock_is_held_type(struct lockdep_map *lock, int read)
  {
        unsigned long flags;
        int ret = 0;
        check_flags(flags);
  
        current->lockdep_recursion = 1;
-       ret = __lock_is_held(lock);
+       ret = __lock_is_held(lock, read);
        current->lockdep_recursion = 0;
        raw_local_irq_restore(flags);
  
        return ret;
  }
- EXPORT_SYMBOL_GPL(lock_is_held);
+ EXPORT_SYMBOL_GPL(lock_is_held_type);
  
  struct pin_cookie lock_pin_lock(struct lockdep_map *lock)
  {
@@@ -3874,7 -3875,7 +3878,7 @@@ print_lock_contention_bug(struct task_s
        printk("%s/%d is trying to contend lock (",
                curr->comm, task_pid_nr(curr));
        print_lockdep_cache(lock);
 -      printk(") at:\n");
 +      printk(KERN_CONT ") at:\n");
        print_ip_sym(ip);
        printk("but there are no locks held!\n");
        printk("\nother info that might help us debug this:\n");
This page took 0.119213 seconds and 4 git commands to generate.