]> Git Repo - linux.git/commitdiff
Merge branch 'for-4.3/core' of git://git.kernel.dk/linux-block
authorLinus Torvalds <[email protected]>
Wed, 2 Sep 2015 20:10:25 +0000 (13:10 -0700)
committerLinus Torvalds <[email protected]>
Wed, 2 Sep 2015 20:10:25 +0000 (13:10 -0700)
Pull core block updates from Jens Axboe:
 "This first core part of the block IO changes contains:

   - Cleanup of the bio IO error signaling from Christoph.  We used to
     rely on the uptodate bit and passing around of an error, now we
     store the error in the bio itself.

   - Improvement of the above from myself, by shrinking the bio size
     down again to fit in two cachelines on x86-64.

   - Revert of the max_hw_sectors cap removal from a revision again,
     from Jeff Moyer.  This caused performance regressions in various
     tests.  Reinstate the limit, bump it to a more reasonable size
     instead.

   - Make /sys/block/<dev>/queue/discard_max_bytes writeable, by me.
     Most devices have huge trim limits, which can cause nasty latencies
     when deleting files.  Enable the admin to configure the size down.
     We will look into having a more sane default instead of UINT_MAX
     sectors.

   - Improvement of the SGP gaps logic from Keith Busch.

   - Enable the block core to handle arbitrarily sized bios, which
     enables a nice simplification of bio_add_page() (which is an IO hot
     path).  From Kent.

   - Improvements to the partition io stats accounting, making it
     faster.  From Ming Lei.

   - Also from Ming Lei, a basic fixup for overflow of the sysfs pending
     file in blk-mq, as well as a fix for a blk-mq timeout race
     condition.

   - Ming Lin has been carrying Kents above mentioned patches forward
     for a while, and testing them.  Ming also did a few fixes around
     that.

   - Sasha Levin found and fixed a use-after-free problem introduced by
     the bio->bi_error changes from Christoph.

   - Small blk cgroup cleanup from Viresh Kumar"

* 'for-4.3/core' of git://git.kernel.dk/linux-block: (26 commits)
  blk: Fix bio_io_vec index when checking bvec gaps
  block: Replace SG_GAPS with new queue limits mask
  block: bump BLK_DEF_MAX_SECTORS to 2560
  Revert "block: remove artifical max_hw_sectors cap"
  blk-mq: fix race between timeout and freeing request
  blk-mq: fix buffer overflow when reading sysfs file of 'pending'
  Documentation: update notes in biovecs about arbitrarily sized bios
  block: remove bio_get_nr_vecs()
  fs: use helper bio_add_page() instead of open coding on bi_io_vec
  block: kill merge_bvec_fn() completely
  md/raid5: get rid of bio_fits_rdev()
  md/raid5: split bio for chunk_aligned_read
  block: remove split code in blkdev_issue_{discard,write_same}
  btrfs: remove bio splitting and merge_bvec_fn() calls
  bcache: remove driver private bio splitting code
  block: simplify bio_add_page()
  block: make generic_make_request handle arbitrarily sized bios
  blk-cgroup: Drop unlikely before IS_ERR(_OR_NULL)
  block: don't access bio->bi_error after bio_put()
  block: shrink struct bio down to 2 cache lines again
  ...

21 files changed:
1  2 
block/bio.c
block/blk-settings.c
drivers/block/null_blk.c
drivers/block/rbd.c
drivers/block/xen-blkback/blkback.c
drivers/block/xen-blkfront.c
drivers/block/zram/zram_drv.c
drivers/md/dm-cache-target.c
drivers/md/dm-thin.c
drivers/md/dm.c
drivers/md/md.c
drivers/md/raid1.c
drivers/md/raid10.c
drivers/md/raid5.c
drivers/mtd/mtd_blkdevs.c
drivers/s390/block/dcssblk.c
drivers/scsi/sd.c
drivers/staging/lustre/lustre/llite/lloop.c
fs/btrfs/disk-io.c
fs/btrfs/inode.c
fs/f2fs/data.c

diff --combined block/bio.c
index d6e5ba3399f0ae151ea040e2ec1fd1df1c3dba6a,b1f198f9a317b1f01d288c45e24cda56a1e1f778..515b5434fe2de84f0fe9db306fc16adf7711ae5e
@@@ -269,7 -269,6 +269,6 @@@ static void bio_free(struct bio *bio
  void bio_init(struct bio *bio)
  {
        memset(bio, 0, sizeof(*bio));
-       bio->bi_flags = 1 << BIO_UPTODATE;
        atomic_set(&bio->__bi_remaining, 1);
        atomic_set(&bio->__bi_cnt, 1);
  }
@@@ -292,14 -291,17 +291,17 @@@ void bio_reset(struct bio *bio
        __bio_free(bio);
  
        memset(bio, 0, BIO_RESET_BYTES);
-       bio->bi_flags = flags | (1 << BIO_UPTODATE);
+       bio->bi_flags = flags;
        atomic_set(&bio->__bi_remaining, 1);
  }
  EXPORT_SYMBOL(bio_reset);
  
- static void bio_chain_endio(struct bio *bio, int error)
+ static void bio_chain_endio(struct bio *bio)
  {
-       bio_endio(bio->bi_private, error);
+       struct bio *parent = bio->bi_private;
+       parent->bi_error = bio->bi_error;
+       bio_endio(parent);
        bio_put(bio);
  }
  
   */
  static inline void bio_inc_remaining(struct bio *bio)
  {
-       bio->bi_flags |= (1 << BIO_CHAIN);
+       bio_set_flag(bio, BIO_CHAIN);
        smp_mb__before_atomic();
        atomic_inc(&bio->__bi_remaining);
  }
@@@ -493,7 -495,7 +495,7 @@@ struct bio *bio_alloc_bioset(gfp_t gfp_
                if (unlikely(!bvl))
                        goto err_free;
  
-               bio->bi_flags |= 1 << BIO_OWNS_VEC;
+               bio_set_flag(bio, BIO_OWNS_VEC);
        } else if (nr_iovecs) {
                bvl = bio->bi_inline_vecs;
        }
@@@ -578,7 -580,7 +580,7 @@@ void __bio_clone_fast(struct bio *bio, 
         * so we don't set nor calculate new physical/hw segment counts here
         */
        bio->bi_bdev = bio_src->bi_bdev;
-       bio->bi_flags |= 1 << BIO_CLONED;
+       bio_set_flag(bio, BIO_CLONED);
        bio->bi_rw = bio_src->bi_rw;
        bio->bi_iter = bio_src->bi_iter;
        bio->bi_io_vec = bio_src->bi_io_vec;
@@@ -692,31 -694,22 +694,22 @@@ integrity_clone
  EXPORT_SYMBOL(bio_clone_bioset);
  
  /**
-  *    bio_get_nr_vecs         - return approx number of vecs
-  *    @bdev:  I/O target
+  *    bio_add_pc_page -       attempt to add page to bio
+  *    @q: the target queue
+  *    @bio: destination bio
+  *    @page: page to add
+  *    @len: vec entry length
+  *    @offset: vec entry offset
   *
-  *    Return the approximate number of pages we can send to this target.
-  *    There's no guarantee that you will be able to fit this number of pages
-  *    into a bio, it does not account for dynamic restrictions that vary
-  *    on offset.
+  *    Attempt to add a page to the bio_vec maplist. This can fail for a
+  *    number of reasons, such as the bio being full or target block device
+  *    limitations. The target block device must allow bio's up to PAGE_SIZE,
+  *    so it is always possible to add a single page to an empty bio.
+  *
+  *    This should only be used by REQ_PC bios.
   */
- int bio_get_nr_vecs(struct block_device *bdev)
- {
-       struct request_queue *q = bdev_get_queue(bdev);
-       int nr_pages;
-       nr_pages = min_t(unsigned,
-                    queue_max_segments(q),
-                    queue_max_sectors(q) / (PAGE_SIZE >> 9) + 1);
-       return min_t(unsigned, nr_pages, BIO_MAX_PAGES);
- }
- EXPORT_SYMBOL(bio_get_nr_vecs);
- static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
-                         *page, unsigned int len, unsigned int offset,
-                         unsigned int max_sectors)
+ int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page
+                   *page, unsigned int len, unsigned int offset)
  {
        int retried_segments = 0;
        struct bio_vec *bvec;
        if (unlikely(bio_flagged(bio, BIO_CLONED)))
                return 0;
  
-       if (((bio->bi_iter.bi_size + len) >> 9) > max_sectors)
+       if (((bio->bi_iter.bi_size + len) >> 9) > queue_max_hw_sectors(q))
                return 0;
  
        /*
  
                if (page == prev->bv_page &&
                    offset == prev->bv_offset + prev->bv_len) {
-                       unsigned int prev_bv_len = prev->bv_len;
                        prev->bv_len += len;
-                       if (q->merge_bvec_fn) {
-                               struct bvec_merge_data bvm = {
-                                       /* prev_bvec is already charged in
-                                          bi_size, discharge it in order to
-                                          simulate merging updated prev_bvec
-                                          as new bvec. */
-                                       .bi_bdev = bio->bi_bdev,
-                                       .bi_sector = bio->bi_iter.bi_sector,
-                                       .bi_size = bio->bi_iter.bi_size -
-                                               prev_bv_len,
-                                       .bi_rw = bio->bi_rw,
-                               };
-                               if (q->merge_bvec_fn(q, &bvm, prev) < prev->bv_len) {
-                                       prev->bv_len -= len;
-                                       return 0;
-                               }
-                       }
                        bio->bi_iter.bi_size += len;
                        goto done;
                }
                 * If the queue doesn't support SG gaps and adding this
                 * offset would create a gap, disallow it.
                 */
-               if (q->queue_flags & (1 << QUEUE_FLAG_SG_GAPS) &&
-                   bvec_gap_to_prev(prev, offset))
+               if (bvec_gap_to_prev(q, prev, offset))
                        return 0;
        }
  
                blk_recount_segments(q, bio);
        }
  
-       /*
-        * if queue has other restrictions (eg varying max sector size
-        * depending on offset), it can specify a merge_bvec_fn in the
-        * queue to get further control
-        */
-       if (q->merge_bvec_fn) {
-               struct bvec_merge_data bvm = {
-                       .bi_bdev = bio->bi_bdev,
-                       .bi_sector = bio->bi_iter.bi_sector,
-                       .bi_size = bio->bi_iter.bi_size - len,
-                       .bi_rw = bio->bi_rw,
-               };
-               /*
-                * merge_bvec_fn() returns number of bytes it can accept
-                * at this offset
-                */
-               if (q->merge_bvec_fn(q, &bvm, bvec) < bvec->bv_len)
-                       goto failed;
-       }
        /* If we may be able to merge these biovecs, force a recount */
        if (bio->bi_vcnt > 1 && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec)))
-               bio->bi_flags &= ~(1 << BIO_SEG_VALID);
+               bio_clear_flag(bio, BIO_SEG_VALID);
  
   done:
        return len;
        blk_recount_segments(q, bio);
        return 0;
  }
- /**
-  *    bio_add_pc_page -       attempt to add page to bio
-  *    @q: the target queue
-  *    @bio: destination bio
-  *    @page: page to add
-  *    @len: vec entry length
-  *    @offset: vec entry offset
-  *
-  *    Attempt to add a page to the bio_vec maplist. This can fail for a
-  *    number of reasons, such as the bio being full or target block device
-  *    limitations. The target block device must allow bio's up to PAGE_SIZE,
-  *    so it is always possible to add a single page to an empty bio.
-  *
-  *    This should only be used by REQ_PC bios.
-  */
- int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page *page,
-                   unsigned int len, unsigned int offset)
- {
-       return __bio_add_page(q, bio, page, len, offset,
-                             queue_max_hw_sectors(q));
- }
  EXPORT_SYMBOL(bio_add_pc_page);
  
  /**
   *    @len: vec entry length
   *    @offset: vec entry offset
   *
-  *    Attempt to add a page to the bio_vec maplist. This can fail for a
-  *    number of reasons, such as the bio being full or target block device
-  *    limitations. The target block device must allow bio's up to PAGE_SIZE,
-  *    so it is always possible to add a single page to an empty bio.
+  *    Attempt to add a page to the bio_vec maplist. This will only fail
+  *    if either bio->bi_vcnt == bio->bi_max_vecs or it's a cloned bio.
   */
- int bio_add_page(struct bio *bio, struct page *page, unsigned int len,
-                unsigned int offset)
+ int bio_add_page(struct bio *bio, struct page *page,
+                unsigned int len, unsigned int offset)
  {
-       struct request_queue *q = bdev_get_queue(bio->bi_bdev);
-       unsigned int max_sectors;
+       struct bio_vec *bv;
+       /*
+        * cloned bio must not modify vec list
+        */
+       if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
+               return 0;
  
-       max_sectors = blk_max_size_offset(q, bio->bi_iter.bi_sector);
-       if ((max_sectors < (len >> 9)) && !bio->bi_iter.bi_size)
-               max_sectors = len >> 9;
+       /*
+        * For filesystems with a blocksize smaller than the pagesize
+        * we will often be called with the same page as last time and
+        * a consecutive offset.  Optimize this special case.
+        */
+       if (bio->bi_vcnt > 0) {
+               bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
  
-       return __bio_add_page(q, bio, page, len, offset, max_sectors);
+               if (page == bv->bv_page &&
+                   offset == bv->bv_offset + bv->bv_len) {
+                       bv->bv_len += len;
+                       goto done;
+               }
+       }
+       if (bio->bi_vcnt >= bio->bi_max_vecs)
+               return 0;
+       bv              = &bio->bi_io_vec[bio->bi_vcnt];
+       bv->bv_page     = page;
+       bv->bv_len      = len;
+       bv->bv_offset   = offset;
+       bio->bi_vcnt++;
+ done:
+       bio->bi_iter.bi_size += len;
+       return len;
  }
  EXPORT_SYMBOL(bio_add_page);
  
@@@ -896,11 -849,11 +849,11 @@@ struct submit_bio_ret 
        int error;
  };
  
- static void submit_bio_wait_endio(struct bio *bio, int error)
+ static void submit_bio_wait_endio(struct bio *bio)
  {
        struct submit_bio_ret *ret = bio->bi_private;
  
-       ret->error = error;
+       ret->error = bio->bi_error;
        complete(&ret->event);
  }
  
@@@ -1388,7 -1341,7 +1341,7 @@@ struct bio *bio_map_user_iov(struct req
        if (iter->type & WRITE)
                bio->bi_rw |= REQ_WRITE;
  
-       bio->bi_flags |= (1 << BIO_USER_MAPPED);
+       bio_set_flag(bio, BIO_USER_MAPPED);
  
        /*
         * subtle -- if __bio_map_user() ended up bouncing a bio,
@@@ -1445,7 -1398,7 +1398,7 @@@ void bio_unmap_user(struct bio *bio
  }
  EXPORT_SYMBOL(bio_unmap_user);
  
- static void bio_map_kern_endio(struct bio *bio, int err)
+ static void bio_map_kern_endio(struct bio *bio)
  {
        bio_put(bio);
  }
@@@ -1501,13 -1454,13 +1454,13 @@@ struct bio *bio_map_kern(struct request
  }
  EXPORT_SYMBOL(bio_map_kern);
  
- static void bio_copy_kern_endio(struct bio *bio, int err)
+ static void bio_copy_kern_endio(struct bio *bio)
  {
        bio_free_pages(bio);
        bio_put(bio);
  }
  
- static void bio_copy_kern_endio_read(struct bio *bio, int err)
+ static void bio_copy_kern_endio_read(struct bio *bio)
  {
        char *p = bio->bi_private;
        struct bio_vec *bvec;
                p += bvec->bv_len;
        }
  
-       bio_copy_kern_endio(bio, err);
+       bio_copy_kern_endio(bio);
  }
  
  /**
@@@ -1768,7 -1721,7 +1721,7 @@@ static inline bool bio_remaining_done(s
        BUG_ON(atomic_read(&bio->__bi_remaining) <= 0);
  
        if (atomic_dec_and_test(&bio->__bi_remaining)) {
-               clear_bit(BIO_CHAIN, &bio->bi_flags);
+               bio_clear_flag(bio, BIO_CHAIN);
                return true;
        }
  
  /**
   * bio_endio - end I/O on a bio
   * @bio:      bio
-  * @error:    error, if any
   *
   * Description:
-  *   bio_endio() will end I/O on the whole bio. bio_endio() is the
-  *   preferred way to end I/O on a bio, it takes care of clearing
-  *   BIO_UPTODATE on error. @error is 0 on success, and and one of the
-  *   established -Exxxx (-EIO, for instance) error values in case
-  *   something went wrong. No one should call bi_end_io() directly on a
-  *   bio unless they own it and thus know that it has an end_io
-  *   function.
+  *   bio_endio() will end I/O on the whole bio. bio_endio() is the preferred
+  *   way to end I/O on a bio. No one should call bi_end_io() directly on a
+  *   bio unless they own it and thus know that it has an end_io function.
   **/
- void bio_endio(struct bio *bio, int error)
+ void bio_endio(struct bio *bio)
  {
        while (bio) {
-               if (error)
-                       clear_bit(BIO_UPTODATE, &bio->bi_flags);
-               else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
-                       error = -EIO;
                if (unlikely(!bio_remaining_done(bio)))
                        break;
  
                 */
                if (bio->bi_end_io == bio_chain_endio) {
                        struct bio *parent = bio->bi_private;
+                       parent->bi_error = bio->bi_error;
                        bio_put(bio);
                        bio = parent;
                } else {
                        if (bio->bi_end_io)
-                               bio->bi_end_io(bio, error);
+                               bio->bi_end_io(bio);
                        bio = NULL;
                }
        }
@@@ -1831,9 -1775,8 +1775,9 @@@ EXPORT_SYMBOL(bio_endio)
   * Allocates and returns a new bio which represents @sectors from the start of
   * @bio, and updates @bio to represent the remaining sectors.
   *
 - * The newly allocated bio will point to @bio's bi_io_vec; it is the caller's
 - * responsibility to ensure that @bio is not freed before the split.
 + * Unless this is a discard request the newly allocated bio will point
 + * to @bio's bi_io_vec; it is the caller's responsibility to ensure that
 + * @bio is not freed before the split.
   */
  struct bio *bio_split(struct bio *bio, int sectors,
                      gfp_t gfp, struct bio_set *bs)
        BUG_ON(sectors <= 0);
        BUG_ON(sectors >= bio_sectors(bio));
  
 -      split = bio_clone_fast(bio, gfp, bs);
 +      /*
 +       * Discards need a mutable bio_vec to accommodate the payload
 +       * required by the DSM TRIM and UNMAP commands.
 +       */
 +      if (bio->bi_rw & REQ_DISCARD)
 +              split = bio_clone_bioset(bio, gfp, bs);
 +      else
 +              split = bio_clone_fast(bio, gfp, bs);
 +
        if (!split)
                return NULL;
  
@@@ -1882,7 -1817,7 +1826,7 @@@ void bio_trim(struct bio *bio, int offs
        if (offset == 0 && size == bio->bi_iter.bi_size)
                return;
  
-       clear_bit(BIO_SEG_VALID, &bio->bi_flags);
+       bio_clear_flag(bio, BIO_SEG_VALID);
  
        bio_advance(bio, offset << 9);
  
@@@ -2018,7 -1953,6 +1962,7 @@@ int bio_associate_blkcg(struct bio *bio
        bio->bi_css = blkcg_css;
        return 0;
  }
 +EXPORT_SYMBOL_GPL(bio_associate_blkcg);
  
  /**
   * bio_associate_current - associate a bio with %current
@@@ -2049,7 -1983,6 +1993,7 @@@ int bio_associate_current(struct bio *b
        bio->bi_css = task_get_css(current, blkio_cgrp_id);
        return 0;
  }
 +EXPORT_SYMBOL_GPL(bio_associate_current);
  
  /**
   * bio_disassociate_task - undo bio_associate_current()
diff --combined block/blk-settings.c
index e0057d035200c4dd5e42d191f0395a7769489905,f96c72116931e7f6ae4bacac207048490c7dcacc..7d8f129a1516b408d8ebd827e65ffd6d688b2df8
@@@ -53,28 -53,6 +53,6 @@@ void blk_queue_unprep_rq(struct request
  }
  EXPORT_SYMBOL(blk_queue_unprep_rq);
  
- /**
-  * blk_queue_merge_bvec - set a merge_bvec function for queue
-  * @q:                queue
-  * @mbfn:     merge_bvec_fn
-  *
-  * Usually queues have static limitations on the max sectors or segments that
-  * we can put in a request. Stacking drivers may have some settings that
-  * are dynamic, and thus we have to query the queue whether it is ok to
-  * add a new bio_vec to a bio at a given offset or not. If the block device
-  * has such limitations, it needs to register a merge_bvec_fn to control
-  * the size of bio's sent to it. Note that a block device *must* allow a
-  * single page to be added to an empty bio. The block device driver may want
-  * to use the bio_split() function to deal with these bio's. By default
-  * no merge_bvec_fn is defined for a queue, and only the fixed limits are
-  * honored.
-  */
- void blk_queue_merge_bvec(struct request_queue *q, merge_bvec_fn *mbfn)
- {
-       q->merge_bvec_fn = mbfn;
- }
- EXPORT_SYMBOL(blk_queue_merge_bvec);
  void blk_queue_softirq_done(struct request_queue *q, softirq_done_fn *fn)
  {
        q->softirq_done_fn = fn;
@@@ -111,11 -89,13 +89,13 @@@ void blk_set_default_limits(struct queu
        lim->max_segments = BLK_MAX_SEGMENTS;
        lim->max_integrity_segments = 0;
        lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
+       lim->virt_boundary_mask = 0;
        lim->max_segment_size = BLK_MAX_SEGMENT_SIZE;
        lim->max_sectors = lim->max_hw_sectors = BLK_SAFE_MAX_SECTORS;
        lim->chunk_sectors = 0;
        lim->max_write_same_sectors = 0;
        lim->max_discard_sectors = 0;
+       lim->max_hw_discard_sectors = 0;
        lim->discard_granularity = 0;
        lim->discard_alignment = 0;
        lim->discard_misaligned = 0;
@@@ -241,8 -221,8 +221,8 @@@ EXPORT_SYMBOL(blk_queue_bounce_limit)
   * Description:
   *    Enables a low level driver to set a hard upper limit,
   *    max_hw_sectors, on the size of requests.  max_hw_sectors is set by
 - *    the device driver based upon the combined capabilities of I/O
 - *    controller and storage device.
 + *    the device driver based upon the capabilities of the I/O
 + *    controller.
   *
   *    max_sectors is a soft limit imposed by the block layer for
   *    filesystem type requests.  This value can be overridden on a
@@@ -257,7 -237,9 +237,9 @@@ void blk_limits_max_hw_sectors(struct q
                       __func__, max_hw_sectors);
        }
  
-       limits->max_sectors = limits->max_hw_sectors = max_hw_sectors;
+       limits->max_hw_sectors = max_hw_sectors;
+       limits->max_sectors = min_t(unsigned int, max_hw_sectors,
+                                   BLK_DEF_MAX_SECTORS);
  }
  EXPORT_SYMBOL(blk_limits_max_hw_sectors);
  
@@@ -303,6 -285,7 +285,7 @@@ EXPORT_SYMBOL(blk_queue_chunk_sectors)
  void blk_queue_max_discard_sectors(struct request_queue *q,
                unsigned int max_discard_sectors)
  {
+       q->limits.max_hw_discard_sectors = max_discard_sectors;
        q->limits.max_discard_sectors = max_discard_sectors;
  }
  EXPORT_SYMBOL(blk_queue_max_discard_sectors);
@@@ -550,6 -533,8 +533,8 @@@ int blk_stack_limits(struct queue_limit
  
        t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask,
                                            b->seg_boundary_mask);
+       t->virt_boundary_mask = min_not_zero(t->virt_boundary_mask,
+                                           b->virt_boundary_mask);
  
        t->max_segments = min_not_zero(t->max_segments, b->max_segments);
        t->max_integrity_segments = min_not_zero(t->max_integrity_segments,
  
                t->max_discard_sectors = min_not_zero(t->max_discard_sectors,
                                                      b->max_discard_sectors);
+               t->max_hw_discard_sectors = min_not_zero(t->max_hw_discard_sectors,
+                                                        b->max_hw_discard_sectors);
                t->discard_granularity = max(t->discard_granularity,
                                             b->discard_granularity);
                t->discard_alignment = lcm_not_zero(t->discard_alignment, alignment) %
@@@ -787,6 -774,17 +774,17 @@@ void blk_queue_segment_boundary(struct 
  }
  EXPORT_SYMBOL(blk_queue_segment_boundary);
  
+ /**
+  * blk_queue_virt_boundary - set boundary rules for bio merging
+  * @q:  the request queue for the device
+  * @mask:  the memory boundary mask
+  **/
+ void blk_queue_virt_boundary(struct request_queue *q, unsigned long mask)
+ {
+       q->limits.virt_boundary_mask = mask;
+ }
+ EXPORT_SYMBOL(blk_queue_virt_boundary);
  /**
   * blk_queue_dma_alignment - set dma length and memory alignment
   * @q:     the request queue for the device
diff --combined drivers/block/null_blk.c
index 3177b245d2bdf63e821a12a4c0f18cbab1b16229,016a59afcf24eb55bc9b8b94ac5d7c5792e6087c..17269a3b85f282fd33df0c1f750559ea6c749ad7
@@@ -222,7 -222,7 +222,7 @@@ static void end_cmd(struct nullb_cmd *c
                blk_end_request_all(cmd->rq, 0);
                break;
        case NULL_Q_BIO:
-               bio_endio(cmd->bio, 0);
+               bio_endio(cmd->bio);
                break;
        }
  
@@@ -240,19 -240,19 +240,19 @@@ static enum hrtimer_restart null_cmd_ti
        while ((entry = llist_del_all(&cq->list)) != NULL) {
                entry = llist_reverse_order(entry);
                do {
 +                      struct request_queue *q = NULL;
 +
                        cmd = container_of(entry, struct nullb_cmd, ll_list);
                        entry = entry->next;
 +                      if (cmd->rq)
 +                              q = cmd->rq->q;
                        end_cmd(cmd);
  
 -                      if (cmd->rq) {
 -                              struct request_queue *q = cmd->rq->q;
 -
 -                              if (!q->mq_ops && blk_queue_stopped(q)) {
 -                                      spin_lock(q->queue_lock);
 -                                      if (blk_queue_stopped(q))
 -                                              blk_start_queue(q);
 -                                      spin_unlock(q->queue_lock);
 -                              }
 +                      if (q && !q->mq_ops && blk_queue_stopped(q)) {
 +                              spin_lock(q->queue_lock);
 +                              if (blk_queue_stopped(q))
 +                                      blk_start_queue(q);
 +                              spin_unlock(q->queue_lock);
                        }
                } while (entry);
        }
diff --combined drivers/block/rbd.c
index bc67a93aa4f4749f10d1a219789b21661c01ee21,71dd061a7e11154e1c044e009c73de734cf389da..698f761037ce54a6c94be1aeaf0a6179e4c9735b
@@@ -523,7 -523,6 +523,7 @@@ void rbd_warn(struct rbd_device *rbd_de
  #  define rbd_assert(expr)    ((void) 0)
  #endif /* !RBD_DEBUG */
  
 +static void rbd_osd_copyup_callback(struct rbd_obj_request *obj_request);
  static int rbd_img_obj_request_submit(struct rbd_obj_request *obj_request);
  static void rbd_img_parent_read(struct rbd_obj_request *obj_request);
  static void rbd_dev_remove_parent(struct rbd_device *rbd_dev);
@@@ -1819,16 -1818,6 +1819,16 @@@ static void rbd_osd_stat_callback(struc
        obj_request_done_set(obj_request);
  }
  
 +static void rbd_osd_call_callback(struct rbd_obj_request *obj_request)
 +{
 +      dout("%s: obj %p\n", __func__, obj_request);
 +
 +      if (obj_request_img_data_test(obj_request))
 +              rbd_osd_copyup_callback(obj_request);
 +      else
 +              obj_request_done_set(obj_request);
 +}
 +
  static void rbd_osd_req_callback(struct ceph_osd_request *osd_req,
                                struct ceph_msg *msg)
  {
                rbd_osd_discard_callback(obj_request);
                break;
        case CEPH_OSD_OP_CALL:
 +              rbd_osd_call_callback(obj_request);
 +              break;
        case CEPH_OSD_OP_NOTIFY_ACK:
        case CEPH_OSD_OP_WATCH:
                rbd_osd_trivial_callback(obj_request);
@@@ -2543,15 -2530,13 +2543,15 @@@ out_unwind
  }
  
  static void
 -rbd_img_obj_copyup_callback(struct rbd_obj_request *obj_request)
 +rbd_osd_copyup_callback(struct rbd_obj_request *obj_request)
  {
        struct rbd_img_request *img_request;
        struct rbd_device *rbd_dev;
        struct page **pages;
        u32 page_count;
  
 +      dout("%s: obj %p\n", __func__, obj_request);
 +
        rbd_assert(obj_request->type == OBJ_REQUEST_BIO ||
                obj_request->type == OBJ_REQUEST_NODATA);
        rbd_assert(obj_request_img_data_test(obj_request));
        if (!obj_request->result)
                obj_request->xferred = obj_request->length;
  
 -      /* Finish up with the normal image object callback */
 -
 -      rbd_img_obj_callback(obj_request);
 +      obj_request_done_set(obj_request);
  }
  
  static void
@@@ -2663,6 -2650,7 +2663,6 @@@ rbd_img_obj_parent_read_full_callback(s
  
        /* All set, send it off. */
  
 -      orig_request->callback = rbd_img_obj_copyup_callback;
        osdc = &rbd_dev->rbd_client->client->osdc;
        img_result = rbd_obj_request_submit(osdc, orig_request);
        if (!img_result)
@@@ -3474,52 -3462,6 +3474,6 @@@ static int rbd_queue_rq(struct blk_mq_h
        return BLK_MQ_RQ_QUEUE_OK;
  }
  
- /*
-  * a queue callback. Makes sure that we don't create a bio that spans across
-  * multiple osd objects. One exception would be with a single page bios,
-  * which we handle later at bio_chain_clone_range()
-  */
- static int rbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bmd,
-                         struct bio_vec *bvec)
- {
-       struct rbd_device *rbd_dev = q->queuedata;
-       sector_t sector_offset;
-       sector_t sectors_per_obj;
-       sector_t obj_sector_offset;
-       int ret;
-       /*
-        * Find how far into its rbd object the partition-relative
-        * bio start sector is to offset relative to the enclosing
-        * device.
-        */
-       sector_offset = get_start_sect(bmd->bi_bdev) + bmd->bi_sector;
-       sectors_per_obj = 1 << (rbd_dev->header.obj_order - SECTOR_SHIFT);
-       obj_sector_offset = sector_offset & (sectors_per_obj - 1);
-       /*
-        * Compute the number of bytes from that offset to the end
-        * of the object.  Account for what's already used by the bio.
-        */
-       ret = (int) (sectors_per_obj - obj_sector_offset) << SECTOR_SHIFT;
-       if (ret > bmd->bi_size)
-               ret -= bmd->bi_size;
-       else
-               ret = 0;
-       /*
-        * Don't send back more than was asked for.  And if the bio
-        * was empty, let the whole thing through because:  "Note
-        * that a block device *must* allow a single page to be
-        * added to an empty bio."
-        */
-       rbd_assert(bvec->bv_len <= PAGE_SIZE);
-       if (ret > (int) bvec->bv_len || !bmd->bi_size)
-               ret = (int) bvec->bv_len;
-       return ret;
- }
  static void rbd_free_disk(struct rbd_device *rbd_dev)
  {
        struct gendisk *disk = rbd_dev->disk;
@@@ -3815,10 -3757,9 +3769,9 @@@ static int rbd_init_disk(struct rbd_dev
        queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
        q->limits.discard_granularity = segment_size;
        q->limits.discard_alignment = segment_size;
-       q->limits.max_discard_sectors = segment_size / SECTOR_SIZE;
+       blk_queue_max_discard_sectors(q, segment_size / SECTOR_SIZE);
        q->limits.discard_zeroes_data = 1;
  
-       blk_queue_merge_bvec(q, rbd_merge_bvec);
        disk->queue = q;
  
        q->queuedata = rbd_dev;
index 954c0029fb3babc49d1a1f490f9d420934701e30,662648e08596d04bfa5fd6b768e796a0e62133d0..6a685aec6994c2becc628df3fc9eb76f928f69a4
@@@ -369,8 -369,8 +369,8 @@@ static void purge_persistent_gnt(struc
                return;
        }
  
 -      if (work_pending(&blkif->persistent_purge_work)) {
 -              pr_alert_ratelimited("Scheduled work from previous purge is still pending, cannot purge list\n");
 +      if (work_busy(&blkif->persistent_purge_work)) {
 +              pr_alert_ratelimited("Scheduled work from previous purge is still busy, cannot purge list\n");
                return;
        }
  
@@@ -1078,9 -1078,9 +1078,9 @@@ static void __end_block_io_op(struct pe
  /*
   * bio callback.
   */
- static void end_block_io_op(struct bio *bio, int error)
+ static void end_block_io_op(struct bio *bio)
  {
-       __end_block_io_op(bio->bi_private, error);
+       __end_block_io_op(bio->bi_private, bio->bi_error);
        bio_put(bio);
  }
  
index 7a8a73f1fc0462feab5bad706573ff6eb4536ef7,d542db7a6c7337e0c82375366e1658ef93e8e701..5f6b3be0a93cc0ba82c105f1c57a5b3ba4382aea
@@@ -82,7 -82,6 +82,6 @@@ struct blk_shadow 
  struct split_bio {
        struct bio *bio;
        atomic_t pending;
-       int err;
  };
  
  static DEFINE_MUTEX(blkfront_mutex);
@@@ -179,7 -178,6 +178,7 @@@ static DEFINE_SPINLOCK(minor_lock)
        ((_segs + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME)
  
  static int blkfront_setup_indirect(struct blkfront_info *info);
 +static int blkfront_gather_backend_features(struct blkfront_info *info);
  
  static int get_id_from_freelist(struct blkfront_info *info)
  {
@@@ -1129,10 -1127,8 +1128,10 @@@ static void blkif_completion(struct blk
                                 * Add the used indirect page back to the list of
                                 * available pages for indirect grefs.
                                 */
 -                              indirect_page = pfn_to_page(s->indirect_grants[i]->pfn);
 -                              list_add(&indirect_page->lru, &info->indirect_pages);
 +                              if (!info->feature_persistent) {
 +                                      indirect_page = pfn_to_page(s->indirect_grants[i]->pfn);
 +                                      list_add(&indirect_page->lru, &info->indirect_pages);
 +                              }
                                s->indirect_grants[i]->gref = GRANT_INVALID_REF;
                                list_add_tail(&s->indirect_grants[i]->node, &info->grants);
                        }
@@@ -1481,16 -1477,14 +1480,14 @@@ static int blkfront_probe(struct xenbus
        return 0;
  }
  
- static void split_bio_end(struct bio *bio, int error)
+ static void split_bio_end(struct bio *bio)
  {
        struct split_bio *split_bio = bio->bi_private;
  
-       if (error)
-               split_bio->err = error;
        if (atomic_dec_and_test(&split_bio->pending)) {
                split_bio->bio->bi_phys_segments = 0;
-               bio_endio(split_bio->bio, split_bio->err);
+               split_bio->bio->bi_error = bio->bi_error;
+               bio_endio(split_bio->bio);
                kfree(split_bio);
        }
        bio_put(bio);
@@@ -1522,7 -1516,7 +1519,7 @@@ static int blkif_recover(struct blkfron
        info->shadow_free = info->ring.req_prod_pvt;
        info->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff;
  
 -      rc = blkfront_setup_indirect(info);
 +      rc = blkfront_gather_backend_features(info);
        if (rc) {
                kfree(copy);
                return rc;
@@@ -1723,13 -1717,20 +1720,13 @@@ static void blkfront_setup_discard(stru
  
  static int blkfront_setup_indirect(struct blkfront_info *info)
  {
 -      unsigned int indirect_segments, segs;
 +      unsigned int segs;
        int err, i;
  
 -      err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
 -                          "feature-max-indirect-segments", "%u", &indirect_segments,
 -                          NULL);
 -      if (err) {
 -              info->max_indirect_segments = 0;
 +      if (info->max_indirect_segments == 0)
                segs = BLKIF_MAX_SEGMENTS_PER_REQUEST;
 -      } else {
 -              info->max_indirect_segments = min(indirect_segments,
 -                                                xen_blkif_max_segments);
 +      else
                segs = info->max_indirect_segments;
 -      }
  
        err = fill_grant_buffer(info, (segs + INDIRECT_GREFS(segs)) * BLK_RING_SIZE(info));
        if (err)
@@@ -1792,68 -1793,6 +1789,68 @@@ out_of_memory
        return -ENOMEM;
  }
  
 +/*
 + * Gather all backend feature-*
 + */
 +static int blkfront_gather_backend_features(struct blkfront_info *info)
 +{
 +      int err;
 +      int barrier, flush, discard, persistent;
 +      unsigned int indirect_segments;
 +
 +      info->feature_flush = 0;
 +
 +      err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
 +                      "feature-barrier", "%d", &barrier,
 +                      NULL);
 +
 +      /*
 +       * If there's no "feature-barrier" defined, then it means
 +       * we're dealing with a very old backend which writes
 +       * synchronously; nothing to do.
 +       *
 +       * If there are barriers, then we use flush.
 +       */
 +      if (!err && barrier)
 +              info->feature_flush = REQ_FLUSH | REQ_FUA;
 +      /*
 +       * And if there is "feature-flush-cache" use that above
 +       * barriers.
 +       */
 +      err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
 +                      "feature-flush-cache", "%d", &flush,
 +                      NULL);
 +
 +      if (!err && flush)
 +              info->feature_flush = REQ_FLUSH;
 +
 +      err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
 +                      "feature-discard", "%d", &discard,
 +                      NULL);
 +
 +      if (!err && discard)
 +              blkfront_setup_discard(info);
 +
 +      err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
 +                      "feature-persistent", "%u", &persistent,
 +                      NULL);
 +      if (err)
 +              info->feature_persistent = 0;
 +      else
 +              info->feature_persistent = persistent;
 +
 +      err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
 +                          "feature-max-indirect-segments", "%u", &indirect_segments,
 +                          NULL);
 +      if (err)
 +              info->max_indirect_segments = 0;
 +      else
 +              info->max_indirect_segments = min(indirect_segments,
 +                                                xen_blkif_max_segments);
 +
 +      return blkfront_setup_indirect(info);
 +}
 +
  /*
   * Invoked when the backend is finally 'ready' (and has told produced
   * the details about the physical device - #sectors, size, etc).
@@@ -1865,6 -1804,7 +1862,6 @@@ static void blkfront_connect(struct blk
        unsigned int physical_sector_size;
        unsigned int binfo;
        int err;
 -      int barrier, flush, discard, persistent;
  
        switch (info->connected) {
        case BLKIF_STATE_CONNECTED:
        if (err != 1)
                physical_sector_size = sector_size;
  
 -      info->feature_flush = 0;
 -
 -      err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
 -                          "feature-barrier", "%d", &barrier,
 -                          NULL);
 -
 -      /*
 -       * If there's no "feature-barrier" defined, then it means
 -       * we're dealing with a very old backend which writes
 -       * synchronously; nothing to do.
 -       *
 -       * If there are barriers, then we use flush.
 -       */
 -      if (!err && barrier)
 -              info->feature_flush = REQ_FLUSH | REQ_FUA;
 -      /*
 -       * And if there is "feature-flush-cache" use that above
 -       * barriers.
 -       */
 -      err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
 -                          "feature-flush-cache", "%d", &flush,
 -                          NULL);
 -
 -      if (!err && flush)
 -              info->feature_flush = REQ_FLUSH;
 -
 -      err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
 -                          "feature-discard", "%d", &discard,
 -                          NULL);
 -
 -      if (!err && discard)
 -              blkfront_setup_discard(info);
 -
 -      err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
 -                          "feature-persistent", "%u", &persistent,
 -                          NULL);
 -      if (err)
 -              info->feature_persistent = 0;
 -      else
 -              info->feature_persistent = persistent;
 -
 -      err = blkfront_setup_indirect(info);
 +      err = blkfront_gather_backend_features(info);
        if (err) {
                xenbus_dev_fatal(info->xbdev, err, "setup_indirect at %s",
                                 info->xbdev->otherend);
index 763301c7828c72650f2abaa1c723425bdd3c73f4,aec781acee9d597f41c78cb9903adf7a609eb522..9c01f5bfa33fc9a0494a1868b0b6ca7dc79b50b9
@@@ -496,9 -496,10 +496,9 @@@ static void zram_meta_free(struct zram_
        kfree(meta);
  }
  
 -static struct zram_meta *zram_meta_alloc(int device_id, u64 disksize)
 +static struct zram_meta *zram_meta_alloc(char *pool_name, u64 disksize)
  {
        size_t num_pages;
 -      char pool_name[8];
        struct zram_meta *meta = kmalloc(sizeof(*meta), GFP_KERNEL);
  
        if (!meta)
                goto out_error;
        }
  
 -      snprintf(pool_name, sizeof(pool_name), "zram%d", device_id);
        meta->mem_pool = zs_create_pool(pool_name, GFP_NOIO | __GFP_HIGHMEM);
        if (!meta->mem_pool) {
                pr_err("Error creating memory pool\n");
@@@ -848,7 -850,7 +848,7 @@@ static void __zram_make_request(struct 
  
        if (unlikely(bio->bi_rw & REQ_DISCARD)) {
                zram_bio_discard(zram, index, offset, bio);
-               bio_endio(bio, 0);
+               bio_endio(bio);
                return;
        }
  
                update_position(&index, &offset, &bvec);
        }
  
-       set_bit(BIO_UPTODATE, &bio->bi_flags);
-       bio_endio(bio, 0);
+       bio_endio(bio);
        return;
  
  out:
@@@ -899,6 -900,8 +898,8 @@@ static void zram_make_request(struct re
        if (unlikely(!zram_meta_get(zram)))
                goto error;
  
+       blk_queue_split(queue, &bio, queue->bio_split);
        if (!valid_io_request(zram, bio->bi_iter.bi_sector,
                                        bio->bi_iter.bi_size)) {
                atomic64_inc(&zram->stats.invalid_io);
@@@ -1029,7 -1032,7 +1030,7 @@@ static ssize_t disksize_store(struct de
                return -EINVAL;
  
        disksize = PAGE_ALIGN(disksize);
 -      meta = zram_meta_alloc(zram->disk->first_minor, disksize);
 +      meta = zram_meta_alloc(zram->disk->disk_name, disksize);
        if (!meta)
                return -ENOMEM;
  
@@@ -1242,7 -1245,7 +1243,7 @@@ static int zram_add(void
        blk_queue_io_min(zram->disk->queue, PAGE_SIZE);
        blk_queue_io_opt(zram->disk->queue, PAGE_SIZE);
        zram->disk->queue->limits.discard_granularity = PAGE_SIZE;
-       zram->disk->queue->limits.max_discard_sectors = UINT_MAX;
+       blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX);
        /*
         * zram_bio_discard() will clear all logical blocks if logical block
         * size is identical with physical block size(PAGE_SIZE). But if it is
index 1fe93cfea7d309a659d79fe2b953b5f2dbe7b466,d2b5dfbb30cfb92e92940cfc9098f55527a11399..7245071778dba2f28bac62057f7b2bb444fc56e3
@@@ -919,14 -919,14 +919,14 @@@ static void defer_writethrough_bio(stru
        wake_worker(cache);
  }
  
- static void writethrough_endio(struct bio *bio, int err)
+ static void writethrough_endio(struct bio *bio)
  {
        struct per_bio_data *pb = get_per_bio_data(bio, PB_DATA_SIZE_WT);
  
        dm_unhook_bio(&pb->hook_info, bio);
  
-       if (err) {
-               bio_endio(bio, err);
+       if (bio->bi_error) {
+               bio_endio(bio);
                return;
        }
  
@@@ -1231,7 -1231,7 +1231,7 @@@ static void migration_success_post_comm
                         * The block was promoted via an overwrite, so it's dirty.
                         */
                        set_dirty(cache, mg->new_oblock, mg->cblock);
-                       bio_endio(mg->new_ocell->holder, 0);
+                       bio_endio(mg->new_ocell->holder);
                        cell_defer(cache, mg->new_ocell, false);
                }
                free_io_migration(mg);
@@@ -1284,7 -1284,7 +1284,7 @@@ static void issue_copy(struct dm_cache_
        }
  }
  
- static void overwrite_endio(struct bio *bio, int err)
+ static void overwrite_endio(struct bio *bio)
  {
        struct dm_cache_migration *mg = bio->bi_private;
        struct cache *cache = mg->cache;
  
        dm_unhook_bio(&pb->hook_info, bio);
  
-       if (err)
+       if (bio->bi_error)
                mg->err = true;
  
        mg->requeue_holder = false;
@@@ -1358,7 -1358,7 +1358,7 @@@ static void issue_discard(struct dm_cac
                b = to_dblock(from_dblock(b) + 1);
        }
  
-       bio_endio(bio, 0);
+       bio_endio(bio);
        cell_defer(mg->cache, mg->new_ocell, false);
        free_migration(mg);
  }
@@@ -1631,7 -1631,7 +1631,7 @@@ static void process_discard_bio(struct 
  
        calc_discard_block_range(cache, bio, &b, &e);
        if (b == e) {
-               bio_endio(bio, 0);
+               bio_endio(bio);
                return;
        }
  
@@@ -1947,7 -1947,6 +1947,7 @@@ static int commit_if_needed(struct cach
  
  static void process_deferred_bios(struct cache *cache)
  {
 +      bool prealloc_used = false;
        unsigned long flags;
        struct bio_list bios;
        struct bio *bio;
                 * this bio might require one, we pause until there are some
                 * prepared mappings to process.
                 */
 +              prealloc_used = true;
                if (prealloc_data_structs(cache, &structs)) {
                        spin_lock_irqsave(&cache->lock, flags);
                        bio_list_merge(&cache->deferred_bios, &bios);
                        process_bio(cache, &structs, bio);
        }
  
 -      prealloc_free_structs(cache, &structs);
 +      if (prealloc_used)
 +              prealloc_free_structs(cache, &structs);
  }
  
  static void process_deferred_cells(struct cache *cache)
  {
 +      bool prealloc_used = false;
        unsigned long flags;
        struct dm_bio_prison_cell *cell, *tmp;
        struct list_head cells;
                 * this bio might require one, we pause until there are some
                 * prepared mappings to process.
                 */
 +              prealloc_used = true;
                if (prealloc_data_structs(cache, &structs)) {
                        spin_lock_irqsave(&cache->lock, flags);
                        list_splice(&cells, &cache->deferred_cells);
                process_cell(cache, &structs, cell);
        }
  
 -      prealloc_free_structs(cache, &structs);
 +      if (prealloc_used)
 +              prealloc_free_structs(cache, &structs);
  }
  
  static void process_deferred_flush_bios(struct cache *cache, bool submit_bios)
@@@ -2068,7 -2062,7 +2068,7 @@@ static void process_deferred_writethrou
  
  static void writeback_some_dirty_blocks(struct cache *cache)
  {
 -      int r = 0;
 +      bool prealloc_used = false;
        dm_oblock_t oblock;
        dm_cblock_t cblock;
        struct prealloc structs;
        memset(&structs, 0, sizeof(structs));
  
        while (spare_migration_bandwidth(cache)) {
 -              if (prealloc_data_structs(cache, &structs))
 -                      break;
 -
 -              r = policy_writeback_work(cache->policy, &oblock, &cblock, busy);
 -              if (r)
 -                      break;
 +              if (policy_writeback_work(cache->policy, &oblock, &cblock, busy))
 +                      break; /* no work to do */
  
 -              r = get_cell(cache, oblock, &structs, &old_ocell);
 -              if (r) {
 +              prealloc_used = true;
 +              if (prealloc_data_structs(cache, &structs) ||
 +                  get_cell(cache, oblock, &structs, &old_ocell)) {
                        policy_set_dirty(cache->policy, oblock);
                        break;
                }
                writeback(cache, &structs, oblock, cblock, old_ocell);
        }
  
 -      prealloc_free_structs(cache, &structs);
 +      if (prealloc_used)
 +              prealloc_free_structs(cache, &structs);
  }
  
  /*----------------------------------------------------------------
@@@ -2217,8 -2213,10 +2217,10 @@@ static void requeue_deferred_bios(struc
        bio_list_merge(&bios, &cache->deferred_bios);
        bio_list_init(&cache->deferred_bios);
  
-       while ((bio = bio_list_pop(&bios)))
-               bio_endio(bio, DM_ENDIO_REQUEUE);
+       while ((bio = bio_list_pop(&bios))) {
+               bio->bi_error = DM_ENDIO_REQUEUE;
+               bio_endio(bio);
+       }
  }
  
  static int more_work(struct cache *cache)
@@@ -3123,7 -3121,7 +3125,7 @@@ static int cache_map(struct dm_target *
                         * This is a duplicate writethrough io that is no
                         * longer needed because the block has been demoted.
                         */
-                       bio_endio(bio, 0);
+                       bio_endio(bio);
                        // FIXME: remap everything as a miss
                        cell_defer(cache, cell, false);
                        r = DM_MAPIO_SUBMITTED;
@@@ -3500,7 -3498,7 +3502,7 @@@ static void cache_resume(struct dm_targ
   * <#demotions> <#promotions> <#dirty>
   * <#features> <features>*
   * <#core args> <core args>
 - * <policy name> <#policy args> <policy args>* <cache metadata mode>
 + * <policy name> <#policy args> <policy args>* <cache metadata mode> <needs_check>
   */
  static void cache_status(struct dm_target *ti, status_type_t type,
                         unsigned status_flags, char *result, unsigned maxlen)
                else
                        DMEMIT("rw ");
  
 +              if (dm_cache_metadata_needs_check(cache->cmd))
 +                      DMEMIT("needs_check ");
 +              else
 +                      DMEMIT("- ");
 +
                break;
  
        case STATUSTYPE_TABLE:
@@@ -3778,26 -3771,6 +3780,6 @@@ static int cache_iterate_devices(struc
        return r;
  }
  
- /*
-  * We assume I/O is going to the origin (which is the volume
-  * more likely to have restrictions e.g. by being striped).
-  * (Looking up the exact location of the data would be expensive
-  * and could always be out of date by the time the bio is submitted.)
-  */
- static int cache_bvec_merge(struct dm_target *ti,
-                           struct bvec_merge_data *bvm,
-                           struct bio_vec *biovec, int max_size)
- {
-       struct cache *cache = ti->private;
-       struct request_queue *q = bdev_get_queue(cache->origin_dev->bdev);
-       if (!q->merge_bvec_fn)
-               return max_size;
-       bvm->bi_bdev = cache->origin_dev->bdev;
-       return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
- }
  static void set_discard_limits(struct cache *cache, struct queue_limits *limits)
  {
        /*
@@@ -3829,7 -3802,7 +3811,7 @@@ static void cache_io_hints(struct dm_ta
  
  static struct target_type cache_target = {
        .name = "cache",
 -      .version = {1, 7, 0},
 +      .version = {1, 8, 0},
        .module = THIS_MODULE,
        .ctr = cache_ctr,
        .dtr = cache_dtr,
        .status = cache_status,
        .message = cache_message,
        .iterate_devices = cache_iterate_devices,
-       .merge = cache_bvec_merge,
        .io_hints = cache_io_hints,
  };
  
diff --combined drivers/md/dm-thin.c
index d2bbe8cc1e9786b66af798df9d8666d3fb96223c,f352e4990998314f4d6b6128a042168e561b9302..271a6624936313863a753aa6ddde802a41bc27be
@@@ -18,7 -18,6 +18,7 @@@
  #include <linux/init.h>
  #include <linux/module.h>
  #include <linux/slab.h>
 +#include <linux/vmalloc.h>
  #include <linux/sort.h>
  #include <linux/rbtree.h>
  
@@@ -269,7 -268,7 +269,7 @@@ struct pool 
        process_mapping_fn process_prepared_mapping;
        process_mapping_fn process_prepared_discard;
  
 -      struct dm_bio_prison_cell *cell_sort_array[CELL_SORT_ARRAY_SIZE];
 +      struct dm_bio_prison_cell **cell_sort_array;
  };
  
  static enum pool_mode get_pool_mode(struct pool *pool);
@@@ -615,8 -614,10 +615,10 @@@ static void error_bio_list(struct bio_l
  {
        struct bio *bio;
  
-       while ((bio = bio_list_pop(bios)))
-               bio_endio(bio, error);
+       while ((bio = bio_list_pop(bios))) {
+               bio->bi_error = error;
+               bio_endio(bio);
+       }
  }
  
  static void error_thin_bio_list(struct thin_c *tc, struct bio_list *master, int error)
@@@ -666,21 -667,16 +668,21 @@@ static void requeue_io(struct thin_c *t
        requeue_deferred_cells(tc);
  }
  
 -static void error_retry_list(struct pool *pool)
 +static void error_retry_list_with_code(struct pool *pool, int error)
  {
        struct thin_c *tc;
  
        rcu_read_lock();
        list_for_each_entry_rcu(tc, &pool->active_thins, list)
 -              error_thin_bio_list(tc, &tc->retry_on_resume_list, -EIO);
 +              error_thin_bio_list(tc, &tc->retry_on_resume_list, error);
        rcu_read_unlock();
  }
  
 +static void error_retry_list(struct pool *pool)
 +{
 +      return error_retry_list_with_code(pool, -EIO);
 +}
 +
  /*
   * This section of code contains the logic for processing a thin device's IO.
   * Much of the code depends on pool object resources (lists, workqueues, etc)
@@@ -870,14 -866,14 +872,14 @@@ static void copy_complete(int read_err
        complete_mapping_preparation(m);
  }
  
- static void overwrite_endio(struct bio *bio, int err)
+ static void overwrite_endio(struct bio *bio)
  {
        struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
        struct dm_thin_new_mapping *m = h->overwrite_mapping;
  
        bio->bi_end_io = m->saved_bi_end_io;
  
-       m->err = err;
+       m->err = bio->bi_error;
        complete_mapping_preparation(m);
  }
  
@@@ -1002,7 -998,7 +1004,7 @@@ static void process_prepared_mapping(st
         */
        if (bio) {
                inc_remap_and_issue_cell(tc, m->cell, m->data_block);
-               bio_endio(bio, 0);
+               bio_endio(bio);
        } else {
                inc_all_io_entry(tc->pool, m->cell->holder);
                remap_and_issue(tc, m->cell->holder, m->data_block);
@@@ -1032,7 -1028,7 +1034,7 @@@ static void process_prepared_discard_fa
  
  static void process_prepared_discard_success(struct dm_thin_new_mapping *m)
  {
-       bio_endio(m->bio, 0);
+       bio_endio(m->bio);
        free_discard_mapping(m);
  }
  
@@@ -1046,7 -1042,7 +1048,7 @@@ static void process_prepared_discard_no
                metadata_operation_failed(tc->pool, "dm_thin_remove_range", r);
                bio_io_error(m->bio);
        } else
-               bio_endio(m->bio, 0);
+               bio_endio(m->bio);
  
        cell_defer_no_holder(tc, m->cell);
        mempool_free(m, tc->pool->mapping_pool);
@@@ -1117,7 -1113,8 +1119,8 @@@ static void process_prepared_discard_pa
         * Even if r is set, there could be sub discards in flight that we
         * need to wait for.
         */
-       bio_endio(m->bio, r);
+       m->bio->bi_error = r;
+       bio_endio(m->bio);
        cell_defer_no_holder(tc, m->cell);
        mempool_free(m, pool->mapping_pool);
  }
@@@ -1493,9 -1490,10 +1496,10 @@@ static void handle_unserviceable_bio(st
  {
        int error = should_error_unserviceable_bio(pool);
  
-       if (error)
-               bio_endio(bio, error);
-       else
+       if (error) {
+               bio->bi_error = error;
+               bio_endio(bio);
+       } else
                retry_on_resume(bio);
  }
  
@@@ -1631,7 -1629,7 +1635,7 @@@ static void process_discard_cell_passdo
         * will prevent completion until the sub range discards have
         * completed.
         */
-       bio_endio(bio, 0);
+       bio_endio(bio);
  }
  
  static void process_discard_bio(struct thin_c *tc, struct bio *bio)
                /*
                 * The discard covers less than a block.
                 */
-               bio_endio(bio, 0);
+               bio_endio(bio);
                return;
        }
  
@@@ -1790,7 -1788,7 +1794,7 @@@ static void provision_block(struct thin
        if (bio_data_dir(bio) == READ) {
                zero_fill_bio(bio);
                cell_defer_no_holder(tc, cell);
-               bio_endio(bio, 0);
+               bio_endio(bio);
                return;
        }
  
@@@ -1855,7 -1853,7 +1859,7 @@@ static void process_cell(struct thin_c 
  
                        } else {
                                zero_fill_bio(bio);
-                               bio_endio(bio, 0);
+                               bio_endio(bio);
                        }
                } else
                        provision_block(tc, bio, block, cell);
@@@ -1926,7 -1924,7 +1930,7 @@@ static void __process_bio_read_only(str
                }
  
                zero_fill_bio(bio);
-               bio_endio(bio, 0);
+               bio_endio(bio);
                break;
  
        default:
@@@ -1951,7 -1949,7 +1955,7 @@@ static void process_cell_read_only(stru
  
  static void process_bio_success(struct thin_c *tc, struct bio *bio)
  {
-       bio_endio(bio, 0);
+       bio_endio(bio);
  }
  
  static void process_bio_fail(struct thin_c *tc, struct bio *bio)
@@@ -2287,23 -2285,18 +2291,23 @@@ static void do_waker(struct work_struc
        queue_delayed_work(pool->wq, &pool->waker, COMMIT_PERIOD);
  }
  
 +static void notify_of_pool_mode_change_to_oods(struct pool *pool);
 +
  /*
   * We're holding onto IO to allow userland time to react.  After the
   * timeout either the pool will have been resized (and thus back in
 - * PM_WRITE mode), or we degrade to PM_READ_ONLY and start erroring IO.
 + * PM_WRITE mode), or we degrade to PM_OUT_OF_DATA_SPACE w/ error_if_no_space.
   */
  static void do_no_space_timeout(struct work_struct *ws)
  {
        struct pool *pool = container_of(to_delayed_work(ws), struct pool,
                                         no_space_timeout);
  
 -      if (get_pool_mode(pool) == PM_OUT_OF_DATA_SPACE && !pool->pf.error_if_no_space)
 -              set_pool_mode(pool, PM_READ_ONLY);
 +      if (get_pool_mode(pool) == PM_OUT_OF_DATA_SPACE && !pool->pf.error_if_no_space) {
 +              pool->pf.error_if_no_space = true;
 +              notify_of_pool_mode_change_to_oods(pool);
 +              error_retry_list_with_code(pool, -ENOSPC);
 +      }
  }
  
  /*----------------------------------------------------------------*/
@@@ -2381,14 -2374,6 +2385,14 @@@ static void notify_of_pool_mode_change(
               dm_device_name(pool->pool_md), new_mode);
  }
  
 +static void notify_of_pool_mode_change_to_oods(struct pool *pool)
 +{
 +      if (!pool->pf.error_if_no_space)
 +              notify_of_pool_mode_change(pool, "out-of-data-space (queue IO)");
 +      else
 +              notify_of_pool_mode_change(pool, "out-of-data-space (error IO)");
 +}
 +
  static bool passdown_enabled(struct pool_c *pt)
  {
        return pt->adjusted_pf.discard_passdown;
@@@ -2473,7 -2458,7 +2477,7 @@@ static void set_pool_mode(struct pool *
                 * frequently seeing this mode.
                 */
                if (old_mode != new_mode)
 -                      notify_of_pool_mode_change(pool, "out-of-data-space");
 +                      notify_of_pool_mode_change_to_oods(pool);
                pool->process_bio = process_bio_read_only;
                pool->process_discard = process_discard_bio;
                pool->process_cell = process_cell_read_only;
@@@ -2600,7 -2585,8 +2604,8 @@@ static int thin_bio_map(struct dm_targe
        thin_hook_bio(tc, bio);
  
        if (tc->requeue_mode) {
-               bio_endio(bio, DM_ENDIO_REQUEUE);
+               bio->bi_error = DM_ENDIO_REQUEUE;
+               bio_endio(bio);
                return DM_MAPIO_SUBMITTED;
        }
  
@@@ -2796,7 -2782,6 +2801,7 @@@ static void __pool_destroy(struct pool 
  {
        __pool_table_remove(pool);
  
 +      vfree(pool->cell_sort_array);
        if (dm_pool_metadata_close(pool->pmd) < 0)
                DMWARN("%s: dm_pool_metadata_close() failed.", __func__);
  
@@@ -2909,13 -2894,6 +2914,13 @@@ static struct pool *pool_create(struct 
                goto bad_mapping_pool;
        }
  
 +      pool->cell_sort_array = vmalloc(sizeof(*pool->cell_sort_array) * CELL_SORT_ARRAY_SIZE);
 +      if (!pool->cell_sort_array) {
 +              *error = "Error allocating cell sort array";
 +              err_p = ERR_PTR(-ENOMEM);
 +              goto bad_sort_array;
 +      }
 +
        pool->ref_count = 1;
        pool->last_commit_jiffies = jiffies;
        pool->pool_md = pool_md;
  
        return pool;
  
 +bad_sort_array:
 +      mempool_destroy(pool->mapping_pool);
  bad_mapping_pool:
        dm_deferred_set_destroy(pool->all_io_ds);
  bad_all_io_ds:
@@@ -3743,7 -3719,6 +3748,7 @@@ static void emit_flags(struct pool_feat
   * Status line is:
   *    <transaction id> <used metadata sectors>/<total metadata sectors>
   *    <used data sectors>/<total data sectors> <held metadata root>
 + *    <pool mode> <discard config> <no space config> <needs_check>
   */
  static void pool_status(struct dm_target *ti, status_type_t type,
                        unsigned status_flags, char *result, unsigned maxlen)
                else
                        DMEMIT("queue_if_no_space ");
  
 +              if (dm_pool_metadata_needs_check(pool->pmd))
 +                      DMEMIT("needs_check ");
 +              else
 +                      DMEMIT("- ");
 +
                break;
  
        case STATUSTYPE_TABLE:
@@@ -3875,20 -3845,6 +3880,6 @@@ static int pool_iterate_devices(struct 
        return fn(ti, pt->data_dev, 0, ti->len, data);
  }
  
- static int pool_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
-                     struct bio_vec *biovec, int max_size)
- {
-       struct pool_c *pt = ti->private;
-       struct request_queue *q = bdev_get_queue(pt->data_dev->bdev);
-       if (!q->merge_bvec_fn)
-               return max_size;
-       bvm->bi_bdev = pt->data_dev->bdev;
-       return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
- }
  static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits)
  {
        struct pool_c *pt = ti->private;
@@@ -3953,7 -3909,7 +3944,7 @@@ static struct target_type pool_target 
        .name = "thin-pool",
        .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE |
                    DM_TARGET_IMMUTABLE,
 -      .version = {1, 15, 0},
 +      .version = {1, 16, 0},
        .module = THIS_MODULE,
        .ctr = pool_ctr,
        .dtr = pool_dtr,
        .resume = pool_resume,
        .message = pool_message,
        .status = pool_status,
-       .merge = pool_merge,
        .iterate_devices = pool_iterate_devices,
        .io_hints = pool_io_hints,
  };
@@@ -4292,21 -4247,6 +4282,6 @@@ err
        DMEMIT("Error");
  }
  
- static int thin_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
-                     struct bio_vec *biovec, int max_size)
- {
-       struct thin_c *tc = ti->private;
-       struct request_queue *q = bdev_get_queue(tc->pool_dev->bdev);
-       if (!q->merge_bvec_fn)
-               return max_size;
-       bvm->bi_bdev = tc->pool_dev->bdev;
-       bvm->bi_sector = dm_target_offset(ti, bvm->bi_sector);
-       return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
- }
  static int thin_iterate_devices(struct dm_target *ti,
                                iterate_devices_callout_fn fn, void *data)
  {
@@@ -4340,7 -4280,7 +4315,7 @@@ static void thin_io_hints(struct dm_tar
  
  static struct target_type thin_target = {
        .name = "thin",
 -      .version = {1, 15, 0},
 +      .version = {1, 16, 0},
        .module = THIS_MODULE,
        .ctr = thin_ctr,
        .dtr = thin_dtr,
        .presuspend = thin_presuspend,
        .postsuspend = thin_postsuspend,
        .status = thin_status,
-       .merge = thin_merge,
        .iterate_devices = thin_iterate_devices,
        .io_hints = thin_io_hints,
  };
diff --combined drivers/md/dm.c
index 0d7ab20c58dffc40d5c56c9427b7dd7f090c8bd3,8bb1ebb6ca7b5ea5836824d6bf281232143cec48..6ffc01bb85f2a8ee6a127cf3c651bebd3cdf6a32
@@@ -124,9 -124,8 +124,8 @@@ EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo)
  #define DMF_FREEING 3
  #define DMF_DELETING 4
  #define DMF_NOFLUSH_SUSPENDING 5
- #define DMF_MERGE_IS_OPTIONAL 6
- #define DMF_DEFERRED_REMOVE 7
- #define DMF_SUSPENDED_INTERNALLY 8
+ #define DMF_DEFERRED_REMOVE 6
+ #define DMF_SUSPENDED_INTERNALLY 7
  
  /*
   * A dummy definition to make RCU happy.
@@@ -944,7 -943,8 +943,8 @@@ static void dec_pending(struct dm_io *i
                } else {
                        /* done with normal IO or empty flush */
                        trace_block_bio_complete(md->queue, bio, io_error);
-                       bio_endio(bio, io_error);
+                       bio->bi_error = io_error;
+                       bio_endio(bio);
                }
        }
  }
@@@ -957,17 -957,15 +957,15 @@@ static void disable_write_same(struct m
        limits->max_write_same_sectors = 0;
  }
  
- static void clone_endio(struct bio *bio, int error)
+ static void clone_endio(struct bio *bio)
  {
+       int error = bio->bi_error;
        int r = error;
        struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
        struct dm_io *io = tio->io;
        struct mapped_device *md = tio->io->md;
        dm_endio_fn endio = tio->ti->type->end_io;
  
-       if (!bio_flagged(bio, BIO_UPTODATE) && !error)
-               error = -EIO;
        if (endio) {
                r = endio(tio->ti, bio, error);
                if (r < 0 || r == DM_ENDIO_REQUEUE)
  /*
   * Partial completion handling for request-based dm
   */
- static void end_clone_bio(struct bio *clone, int error)
+ static void end_clone_bio(struct bio *clone)
  {
        struct dm_rq_clone_bio_info *info =
                container_of(clone, struct dm_rq_clone_bio_info, clone);
                 * the remainder.
                 */
                return;
-       else if (error) {
+       else if (bio->bi_error) {
                /*
                 * Don't notice the error to the upper layer yet.
                 * The error handling decision is made by the target driver,
                 * when the request is completed.
                 */
-               tio->error = error;
+               tio->error = bio->bi_error;
                return;
        }
  
@@@ -1067,10 -1065,13 +1065,10 @@@ static void rq_end_stats(struct mapped_
   */
  static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
  {
 -      int nr_requests_pending;
 -
        atomic_dec(&md->pending[rw]);
  
        /* nudge anyone waiting on suspend queue */
 -      nr_requests_pending = md_in_flight(md);
 -      if (!nr_requests_pending)
 +      if (!md_in_flight(md))
                wake_up(&md->wait);
  
        /*
        if (run_queue) {
                if (md->queue->mq_ops)
                        blk_mq_run_hw_queues(md->queue, true);
 -              else if (!nr_requests_pending ||
 -                       (nr_requests_pending >= md->queue->nr_congestion_on))
 +              else
                        blk_run_queue_async(md->queue);
        }
  
@@@ -1722,60 -1724,6 +1720,6 @@@ static void __split_and_process_bio(str
   * CRUD END
   *---------------------------------------------------------------*/
  
- static int dm_merge_bvec(struct request_queue *q,
-                        struct bvec_merge_data *bvm,
-                        struct bio_vec *biovec)
- {
-       struct mapped_device *md = q->queuedata;
-       struct dm_table *map = dm_get_live_table_fast(md);
-       struct dm_target *ti;
-       sector_t max_sectors;
-       int max_size = 0;
-       if (unlikely(!map))
-               goto out;
-       ti = dm_table_find_target(map, bvm->bi_sector);
-       if (!dm_target_is_valid(ti))
-               goto out;
-       /*
-        * Find maximum amount of I/O that won't need splitting
-        */
-       max_sectors = min(max_io_len(bvm->bi_sector, ti),
-                         (sector_t) BIO_MAX_SECTORS);
-       max_size = (max_sectors << SECTOR_SHIFT) - bvm->bi_size;
-       if (max_size < 0)
-               max_size = 0;
-       /*
-        * merge_bvec_fn() returns number of bytes
-        * it can accept at this offset
-        * max is precomputed maximal io size
-        */
-       if (max_size && ti->type->merge)
-               max_size = ti->type->merge(ti, bvm, biovec, max_size);
-       /*
-        * If the target doesn't support merge method and some of the devices
-        * provided their merge_bvec method (we know this by looking at
-        * queue_max_hw_sectors), then we can't allow bios with multiple vector
-        * entries.  So always set max_size to 0, and the code below allows
-        * just one page.
-        */
-       else if (queue_max_hw_sectors(q) <= PAGE_SIZE >> 9)
-               max_size = 0;
- out:
-       dm_put_live_table_fast(md);
-       /*
-        * Always allow an entire first page
-        */
-       if (max_size <= biovec->bv_len && !(bvm->bi_size >> SECTOR_SHIFT))
-               max_size = biovec->bv_len;
-       return max_size;
- }
  /*
   * The request function that just remaps the bio built up by
   * dm_merge_bvec.
@@@ -1789,6 -1737,8 +1733,8 @@@ static void dm_make_request(struct requ
  
        map = dm_get_live_table(md, &srcu_idx);
  
+       blk_queue_split(q, &bio, q->bio_split);
        generic_start_io_acct(rw, bio_sectors(bio), &dm_disk(md)->part0);
  
        /* if we're suspended, we have to queue this io for later */
@@@ -2270,6 -2220,8 +2216,6 @@@ static void dm_init_old_md_queue(struc
  
  static void cleanup_mapped_device(struct mapped_device *md)
  {
 -      cleanup_srcu_struct(&md->io_barrier);
 -
        if (md->wq)
                destroy_workqueue(md->wq);
        if (md->kworker_task)
        if (md->bs)
                bioset_free(md->bs);
  
 +      cleanup_srcu_struct(&md->io_barrier);
 +
        if (md->disk) {
                spin_lock(&_minor_lock);
                md->disk->private_data = NULL;
@@@ -2495,59 -2445,6 +2441,6 @@@ static void __set_size(struct mapped_de
        i_size_write(md->bdev->bd_inode, (loff_t)size << SECTOR_SHIFT);
  }
  
- /*
-  * Return 1 if the queue has a compulsory merge_bvec_fn function.
-  *
-  * If this function returns 0, then the device is either a non-dm
-  * device without a merge_bvec_fn, or it is a dm device that is
-  * able to split any bios it receives that are too big.
-  */
- int dm_queue_merge_is_compulsory(struct request_queue *q)
- {
-       struct mapped_device *dev_md;
-       if (!q->merge_bvec_fn)
-               return 0;
-       if (q->make_request_fn == dm_make_request) {
-               dev_md = q->queuedata;
-               if (test_bit(DMF_MERGE_IS_OPTIONAL, &dev_md->flags))
-                       return 0;
-       }
-       return 1;
- }
- static int dm_device_merge_is_compulsory(struct dm_target *ti,
-                                        struct dm_dev *dev, sector_t start,
-                                        sector_t len, void *data)
- {
-       struct block_device *bdev = dev->bdev;
-       struct request_queue *q = bdev_get_queue(bdev);
-       return dm_queue_merge_is_compulsory(q);
- }
- /*
-  * Return 1 if it is acceptable to ignore merge_bvec_fn based
-  * on the properties of the underlying devices.
-  */
- static int dm_table_merge_is_optional(struct dm_table *table)
- {
-       unsigned i = 0;
-       struct dm_target *ti;
-       while (i < dm_table_get_num_targets(table)) {
-               ti = dm_table_get_target(table, i++);
-               if (ti->type->iterate_devices &&
-                   ti->type->iterate_devices(ti, dm_device_merge_is_compulsory, NULL))
-                       return 0;
-       }
-       return 1;
- }
  /*
   * Returns old map, which caller must destroy.
   */
@@@ -2557,7 -2454,6 +2450,6 @@@ static struct dm_table *__bind(struct m
        struct dm_table *old_map;
        struct request_queue *q = md->queue;
        sector_t size;
-       int merge_is_optional;
  
        size = dm_table_get_size(t);
  
  
        __bind_mempools(md, t);
  
-       merge_is_optional = dm_table_merge_is_optional(t);
        old_map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock));
        rcu_assign_pointer(md->map, t);
        md->immutable_target_type = dm_table_get_immutable_target_type(t);
  
        dm_table_set_restrictions(t, q, limits);
-       if (merge_is_optional)
-               set_bit(DMF_MERGE_IS_OPTIONAL, &md->flags);
-       else
-               clear_bit(DMF_MERGE_IS_OPTIONAL, &md->flags);
        if (old_map)
                dm_sync_table(md);
  
@@@ -2874,7 -2764,6 +2760,6 @@@ int dm_setup_md_queue(struct mapped_dev
        case DM_TYPE_BIO_BASED:
                dm_init_old_md_queue(md);
                blk_queue_make_request(md->queue, dm_make_request);
-               blk_queue_merge_bvec(md->queue, dm_merge_bvec);
                break;
        }
  
diff --combined drivers/md/md.c
index e25f00f0138a7b4d82a5ae4f6fc7e1b6f0bb1b30,d28bf5cea2243e9b620895758758f01fbc560e23..40332625713b9758e2c0d1789996e356f4ad10a7
@@@ -257,13 -257,17 +257,17 @@@ static void md_make_request(struct requ
        unsigned int sectors;
        int cpu;
  
+       blk_queue_split(q, &bio, q->bio_split);
        if (mddev == NULL || mddev->pers == NULL
            || !mddev->ready) {
                bio_io_error(bio);
                return;
        }
        if (mddev->ro == 1 && unlikely(rw == WRITE)) {
-               bio_endio(bio, bio_sectors(bio) == 0 ? 0 : -EROFS);
+               if (bio_sectors(bio) != 0)
+                       bio->bi_error = -EROFS;
+               bio_endio(bio);
                return;
        }
        smp_rmb(); /* Ensure implications of  'active' are visible */
@@@ -350,34 -354,11 +354,11 @@@ static int md_congested(void *data, in
        return mddev_congested(mddev, bits);
  }
  
- static int md_mergeable_bvec(struct request_queue *q,
-                            struct bvec_merge_data *bvm,
-                            struct bio_vec *biovec)
- {
-       struct mddev *mddev = q->queuedata;
-       int ret;
-       rcu_read_lock();
-       if (mddev->suspended) {
-               /* Must always allow one vec */
-               if (bvm->bi_size == 0)
-                       ret = biovec->bv_len;
-               else
-                       ret = 0;
-       } else {
-               struct md_personality *pers = mddev->pers;
-               if (pers && pers->mergeable_bvec)
-                       ret = pers->mergeable_bvec(mddev, bvm, biovec);
-               else
-                       ret = biovec->bv_len;
-       }
-       rcu_read_unlock();
-       return ret;
- }
  /*
   * Generic flush handling for md
   */
  
- static void md_end_flush(struct bio *bio, int err)
+ static void md_end_flush(struct bio *bio)
  {
        struct md_rdev *rdev = bio->bi_private;
        struct mddev *mddev = rdev->mddev;
@@@ -433,7 -414,7 +414,7 @@@ static void md_submit_flush_data(struc
  
        if (bio->bi_iter.bi_size == 0)
                /* an empty barrier - all done */
-               bio_endio(bio, 0);
+               bio_endio(bio);
        else {
                bio->bi_rw &= ~REQ_FLUSH;
                mddev->pers->make_request(mddev, bio);
@@@ -728,15 -709,13 +709,13 @@@ void md_rdev_clear(struct md_rdev *rdev
  }
  EXPORT_SYMBOL_GPL(md_rdev_clear);
  
- static void super_written(struct bio *bio, int error)
+ static void super_written(struct bio *bio)
  {
        struct md_rdev *rdev = bio->bi_private;
        struct mddev *mddev = rdev->mddev;
  
-       if (error || !test_bit(BIO_UPTODATE, &bio->bi_flags)) {
-               printk("md: super_written gets error=%d, uptodate=%d\n",
-                      error, test_bit(BIO_UPTODATE, &bio->bi_flags));
-               WARN_ON(test_bit(BIO_UPTODATE, &bio->bi_flags));
+       if (bio->bi_error) {
+               printk("md: super_written gets error=%d\n", bio->bi_error);
                md_error(mddev, rdev);
        }
  
@@@ -791,7 -770,7 +770,7 @@@ int sync_page_io(struct md_rdev *rdev, 
        bio_add_page(bio, page, size, 0);
        submit_bio_wait(rw, bio);
  
-       ret = test_bit(BIO_UPTODATE, &bio->bi_flags);
+       ret = !bio->bi_error;
        bio_put(bio);
        return ret;
  }
@@@ -5186,7 -5165,6 +5165,6 @@@ int md_run(struct mddev *mddev
        if (mddev->queue) {
                mddev->queue->backing_dev_info.congested_data = mddev;
                mddev->queue->backing_dev_info.congested_fn = md_congested;
-               blk_queue_merge_bvec(mddev->queue, md_mergeable_bvec);
        }
        if (pers->sync_request) {
                if (mddev->kobj.sd &&
@@@ -5315,7 -5293,6 +5293,6 @@@ static void md_clean(struct mddev *mdde
        mddev->degraded = 0;
        mddev->safemode = 0;
        mddev->private = NULL;
-       mddev->merge_check_needed = 0;
        mddev->bitmap_info.offset = 0;
        mddev->bitmap_info.default_offset = 0;
        mddev->bitmap_info.default_space = 0;
@@@ -5382,8 -5359,6 +5359,8 @@@ static void __md_stop(struct mddev *mdd
  {
        struct md_personality *pers = mddev->pers;
        mddev_detach(mddev);
 +      /* Ensure ->event_work is done */
 +      flush_workqueue(md_misc_wq);
        spin_lock(&mddev->lock);
        mddev->ready = 0;
        mddev->pers = NULL;
@@@ -5514,7 -5489,6 +5491,6 @@@ static int do_md_stop(struct mddev *mdd
  
                __md_stop_writes(mddev);
                __md_stop(mddev);
-               mddev->queue->merge_bvec_fn = NULL;
                mddev->queue->backing_dev_info.congested_fn = NULL;
  
                /* tell userspace to handle 'inactive' */
@@@ -5759,7 -5733,7 +5735,7 @@@ static int get_bitmap_file(struct mdde
        char *ptr;
        int err;
  
 -      file = kmalloc(sizeof(*file), GFP_NOIO);
 +      file = kzalloc(sizeof(*file), GFP_NOIO);
        if (!file)
                return -ENOMEM;
  
@@@ -7439,7 -7413,7 +7415,7 @@@ int md_setup_cluster(struct mddev *mdde
        err = request_module("md-cluster");
        if (err) {
                pr_err("md-cluster module not found.\n");
 -              return err;
 +              return -ENOENT;
        }
  
        spin_lock(&pers_lock);
diff --combined drivers/md/raid1.c
index 967a4ed73929ff44a38d9475c5e362fc2914c758,0ff06fdb83a9b4d7bcc8036923833f0f53cad7a9..f39d69f884de5b5ef8033fc16fa653f2d4bd47bf
@@@ -255,9 -255,10 +255,10 @@@ static void call_bio_endio(struct r1bi
                done = 1;
  
        if (!test_bit(R1BIO_Uptodate, &r1_bio->state))
-               clear_bit(BIO_UPTODATE, &bio->bi_flags);
+               bio->bi_error = -EIO;
        if (done) {
-               bio_endio(bio, 0);
+               bio_endio(bio);
                /*
                 * Wake up any possible resync thread that waits for the device
                 * to go idle.
@@@ -312,9 -313,9 +313,9 @@@ static int find_bio_disk(struct r1bio *
        return mirror;
  }
  
- static void raid1_end_read_request(struct bio *bio, int error)
+ static void raid1_end_read_request(struct bio *bio)
  {
-       int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
+       int uptodate = !bio->bi_error;
        struct r1bio *r1_bio = bio->bi_private;
        int mirror;
        struct r1conf *conf = r1_bio->mddev->private;
                spin_lock_irqsave(&conf->device_lock, flags);
                if (r1_bio->mddev->degraded == conf->raid_disks ||
                    (r1_bio->mddev->degraded == conf->raid_disks-1 &&
 -                   !test_bit(Faulty, &conf->mirrors[mirror].rdev->flags)))
 +                   test_bit(In_sync, &conf->mirrors[mirror].rdev->flags)))
                        uptodate = 1;
                spin_unlock_irqrestore(&conf->device_lock, flags);
        }
@@@ -397,9 -398,8 +398,8 @@@ static void r1_bio_write_done(struct r1
        }
  }
  
- static void raid1_end_write_request(struct bio *bio, int error)
+ static void raid1_end_write_request(struct bio *bio)
  {
-       int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
        struct r1bio *r1_bio = bio->bi_private;
        int mirror, behind = test_bit(R1BIO_BehindIO, &r1_bio->state);
        struct r1conf *conf = r1_bio->mddev->private;
        /*
         * 'one mirror IO has finished' event handler:
         */
-       if (!uptodate) {
+       if (bio->bi_error) {
                set_bit(WriteErrorSeen,
                        &conf->mirrors[mirror].rdev->flags);
                if (!test_and_set_bit(WantReplacement,
@@@ -541,7 -541,7 +541,7 @@@ static int read_balance(struct r1conf *
  
        if ((conf->mddev->recovery_cp < this_sector + sectors) ||
            (mddev_is_clustered(conf->mddev) &&
 -          md_cluster_ops->area_resyncing(conf->mddev, this_sector,
 +          md_cluster_ops->area_resyncing(conf->mddev, READ, this_sector,
                    this_sector + sectors)))
                choose_first = 1;
        else
                rdev = rcu_dereference(conf->mirrors[disk].rdev);
                if (r1_bio->bios[disk] == IO_BLOCKED
                    || rdev == NULL
-                   || test_bit(Unmerged, &rdev->flags)
                    || test_bit(Faulty, &rdev->flags))
                        continue;
                if (!test_bit(In_sync, &rdev->flags) &&
        return best_disk;
  }
  
- static int raid1_mergeable_bvec(struct mddev *mddev,
-                               struct bvec_merge_data *bvm,
-                               struct bio_vec *biovec)
- {
-       struct r1conf *conf = mddev->private;
-       sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
-       int max = biovec->bv_len;
-       if (mddev->merge_check_needed) {
-               int disk;
-               rcu_read_lock();
-               for (disk = 0; disk < conf->raid_disks * 2; disk++) {
-                       struct md_rdev *rdev = rcu_dereference(
-                               conf->mirrors[disk].rdev);
-                       if (rdev && !test_bit(Faulty, &rdev->flags)) {
-                               struct request_queue *q =
-                                       bdev_get_queue(rdev->bdev);
-                               if (q->merge_bvec_fn) {
-                                       bvm->bi_sector = sector +
-                                               rdev->data_offset;
-                                       bvm->bi_bdev = rdev->bdev;
-                                       max = min(max, q->merge_bvec_fn(
-                                                         q, bvm, biovec));
-                               }
-                       }
-               }
-               rcu_read_unlock();
-       }
-       return max;
- }
  static int raid1_congested(struct mddev *mddev, int bits)
  {
        struct r1conf *conf = mddev->private;
@@@ -793,7 -760,7 +760,7 @@@ static void flush_pending_writes(struc
                        if (unlikely((bio->bi_rw & REQ_DISCARD) &&
                            !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
                                /* Just ignore it */
-                               bio_endio(bio, 0);
+                               bio_endio(bio);
                        else
                                generic_make_request(bio);
                        bio = next;
@@@ -1068,7 -1035,7 +1035,7 @@@ static void raid1_unplug(struct blk_plu
                if (unlikely((bio->bi_rw & REQ_DISCARD) &&
                    !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
                        /* Just ignore it */
-                       bio_endio(bio, 0);
+                       bio_endio(bio);
                else
                        generic_make_request(bio);
                bio = next;
@@@ -1111,8 -1078,7 +1078,8 @@@ static void make_request(struct mddev *
            ((bio_end_sector(bio) > mddev->suspend_lo &&
            bio->bi_iter.bi_sector < mddev->suspend_hi) ||
            (mddev_is_clustered(mddev) &&
 -           md_cluster_ops->area_resyncing(mddev, bio->bi_iter.bi_sector, bio_end_sector(bio))))) {
 +           md_cluster_ops->area_resyncing(mddev, WRITE,
 +                   bio->bi_iter.bi_sector, bio_end_sector(bio))))) {
                /* As the suspend_* range is controlled by
                 * userspace, we want an interruptible
                 * wait.
                        if (bio_end_sector(bio) <= mddev->suspend_lo ||
                            bio->bi_iter.bi_sector >= mddev->suspend_hi ||
                            (mddev_is_clustered(mddev) &&
 -                           !md_cluster_ops->area_resyncing(mddev,
 +                           !md_cluster_ops->area_resyncing(mddev, WRITE,
                                     bio->bi_iter.bi_sector, bio_end_sector(bio))))
                                break;
                        schedule();
         * non-zero, then it is the number of not-completed requests.
         */
        bio->bi_phys_segments = 0;
-       clear_bit(BIO_SEG_VALID, &bio->bi_flags);
+       bio_clear_flag(bio, BIO_SEG_VALID);
  
        if (rw == READ) {
                /*
@@@ -1269,8 -1235,7 +1236,7 @@@ read_again
                        break;
                }
                r1_bio->bios[i] = NULL;
-               if (!rdev || test_bit(Faulty, &rdev->flags)
-                   || test_bit(Unmerged, &rdev->flags)) {
+               if (!rdev || test_bit(Faulty, &rdev->flags)) {
                        if (i < conf->raid_disks)
                                set_bit(R1BIO_Degraded, &r1_bio->state);
                        continue;
@@@ -1476,7 -1441,6 +1442,7 @@@ static void error(struct mddev *mddev, 
  {
        char b[BDEVNAME_SIZE];
        struct r1conf *conf = mddev->private;
 +      unsigned long flags;
  
        /*
         * If it is not operational, then we have already marked it as dead
                return;
        }
        set_bit(Blocked, &rdev->flags);
 +      spin_lock_irqsave(&conf->device_lock, flags);
        if (test_and_clear_bit(In_sync, &rdev->flags)) {
 -              unsigned long flags;
 -              spin_lock_irqsave(&conf->device_lock, flags);
                mddev->degraded++;
                set_bit(Faulty, &rdev->flags);
 -              spin_unlock_irqrestore(&conf->device_lock, flags);
        } else
                set_bit(Faulty, &rdev->flags);
 +      spin_unlock_irqrestore(&conf->device_lock, flags);
        /*
         * if recovery is running, make sure it aborts.
         */
@@@ -1568,10 -1533,7 +1534,10 @@@ static int raid1_spare_active(struct md
         * Find all failed disks within the RAID1 configuration
         * and mark them readable.
         * Called under mddev lock, so rcu protection not needed.
 +       * device_lock used to avoid races with raid1_end_read_request
 +       * which expects 'In_sync' flags and ->degraded to be consistent.
         */
 +      spin_lock_irqsave(&conf->device_lock, flags);
        for (i = 0; i < conf->raid_disks; i++) {
                struct md_rdev *rdev = conf->mirrors[i].rdev;
                struct md_rdev *repl = conf->mirrors[conf->raid_disks + i].rdev;
                        sysfs_notify_dirent_safe(rdev->sysfs_state);
                }
        }
 -      spin_lock_irqsave(&conf->device_lock, flags);
        mddev->degraded -= count;
        spin_unlock_irqrestore(&conf->device_lock, flags);
  
@@@ -1617,7 -1580,6 +1583,6 @@@ static int raid1_add_disk(struct mddev 
        struct raid1_info *p;
        int first = 0;
        int last = conf->raid_disks - 1;
-       struct request_queue *q = bdev_get_queue(rdev->bdev);
  
        if (mddev->recovery_disabled == conf->recovery_disabled)
                return -EBUSY;
        if (rdev->raid_disk >= 0)
                first = last = rdev->raid_disk;
  
-       if (q->merge_bvec_fn) {
-               set_bit(Unmerged, &rdev->flags);
-               mddev->merge_check_needed = 1;
-       }
        for (mirror = first; mirror <= last; mirror++) {
                p = conf->mirrors+mirror;
                if (!p->rdev) {
                        break;
                }
        }
-       if (err == 0 && test_bit(Unmerged, &rdev->flags)) {
-               /* Some requests might not have seen this new
-                * merge_bvec_fn.  We must wait for them to complete
-                * before merging the device fully.
-                * First we make sure any code which has tested
-                * our function has submitted the request, then
-                * we wait for all outstanding requests to complete.
-                */
-               synchronize_sched();
-               freeze_array(conf, 0);
-               unfreeze_array(conf);
-               clear_bit(Unmerged, &rdev->flags);
-       }
        md_integrity_add_rdev(rdev, mddev);
        if (mddev->queue && blk_queue_discard(bdev_get_queue(rdev->bdev)))
                queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
@@@ -1737,7 -1681,7 +1684,7 @@@ abort
        return err;
  }
  
- static void end_sync_read(struct bio *bio, int error)
+ static void end_sync_read(struct bio *bio)
  {
        struct r1bio *r1_bio = bio->bi_private;
  
         * or re-read if the read failed.
         * We don't do much here, just schedule handling by raid1d
         */
-       if (test_bit(BIO_UPTODATE, &bio->bi_flags))
+       if (!bio->bi_error)
                set_bit(R1BIO_Uptodate, &r1_bio->state);
  
        if (atomic_dec_and_test(&r1_bio->remaining))
                reschedule_retry(r1_bio);
  }
  
- static void end_sync_write(struct bio *bio, int error)
+ static void end_sync_write(struct bio *bio)
  {
-       int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
+       int uptodate = !bio->bi_error;
        struct r1bio *r1_bio = bio->bi_private;
        struct mddev *mddev = r1_bio->mddev;
        struct r1conf *conf = mddev->private;
@@@ -1944,7 -1888,7 +1891,7 @@@ static int fix_sync_read_error(struct r
                idx ++;
        }
        set_bit(R1BIO_Uptodate, &r1_bio->state);
-       set_bit(BIO_UPTODATE, &bio->bi_flags);
+       bio->bi_error = 0;
        return 1;
  }
  
@@@ -1968,15 -1912,14 +1915,14 @@@ static void process_checks(struct r1bi
        for (i = 0; i < conf->raid_disks * 2; i++) {
                int j;
                int size;
-               int uptodate;
+               int error;
                struct bio *b = r1_bio->bios[i];
                if (b->bi_end_io != end_sync_read)
                        continue;
-               /* fixup the bio for reuse, but preserve BIO_UPTODATE */
-               uptodate = test_bit(BIO_UPTODATE, &b->bi_flags);
+               /* fixup the bio for reuse, but preserve errno */
+               error = b->bi_error;
                bio_reset(b);
-               if (!uptodate)
-                       clear_bit(BIO_UPTODATE, &b->bi_flags);
+               b->bi_error = error;
                b->bi_vcnt = vcnt;
                b->bi_iter.bi_size = r1_bio->sectors << 9;
                b->bi_iter.bi_sector = r1_bio->sector +
        }
        for (primary = 0; primary < conf->raid_disks * 2; primary++)
                if (r1_bio->bios[primary]->bi_end_io == end_sync_read &&
-                   test_bit(BIO_UPTODATE, &r1_bio->bios[primary]->bi_flags)) {
+                   !r1_bio->bios[primary]->bi_error) {
                        r1_bio->bios[primary]->bi_end_io = NULL;
                        rdev_dec_pending(conf->mirrors[primary].rdev, mddev);
                        break;
                int j;
                struct bio *pbio = r1_bio->bios[primary];
                struct bio *sbio = r1_bio->bios[i];
-               int uptodate = test_bit(BIO_UPTODATE, &sbio->bi_flags);
+               int error = sbio->bi_error;
  
                if (sbio->bi_end_io != end_sync_read)
                        continue;
-               /* Now we can 'fixup' the BIO_UPTODATE flag */
-               set_bit(BIO_UPTODATE, &sbio->bi_flags);
+               /* Now we can 'fixup' the error value */
+               sbio->bi_error = 0;
  
-               if (uptodate) {
+               if (!error) {
                        for (j = vcnt; j-- ; ) {
                                struct page *p, *s;
                                p = pbio->bi_io_vec[j].bv_page;
                if (j >= 0)
                        atomic64_add(r1_bio->sectors, &mddev->resync_mismatches);
                if (j < 0 || (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)
-                             && uptodate)) {
+                             && !error)) {
                        /* No need to write to this device. */
                        sbio->bi_end_io = NULL;
                        rdev_dec_pending(conf->mirrors[i].rdev, mddev);
@@@ -2272,11 -2215,11 +2218,11 @@@ static void handle_sync_write_finished(
                struct bio *bio = r1_bio->bios[m];
                if (bio->bi_end_io == NULL)
                        continue;
-               if (test_bit(BIO_UPTODATE, &bio->bi_flags) &&
+               if (!bio->bi_error &&
                    test_bit(R1BIO_MadeGood, &r1_bio->state)) {
                        rdev_clear_badblocks(rdev, r1_bio->sector, s, 0);
                }
-               if (!test_bit(BIO_UPTODATE, &bio->bi_flags) &&
+               if (bio->bi_error &&
                    test_bit(R1BIO_WriteError, &r1_bio->state)) {
                        if (!rdev_set_badblocks(rdev, r1_bio->sector, s, 0))
                                md_error(conf->mddev, rdev);
@@@ -2715,7 -2658,7 +2661,7 @@@ static sector_t sync_request(struct mdd
                                                /* remove last page from this bio */
                                                bio->bi_vcnt--;
                                                bio->bi_iter.bi_size -= len;
-                                               __clear_bit(BIO_SEG_VALID, &bio->bi_flags);
+                                               bio_clear_flag(bio, BIO_SEG_VALID);
                                        }
                                        goto bio_full;
                                }
@@@ -2810,8 -2753,6 +2756,6 @@@ static struct r1conf *setup_conf(struc
                        goto abort;
                disk->rdev = rdev;
                q = bdev_get_queue(rdev->bdev);
-               if (q->merge_bvec_fn)
-                       mddev->merge_check_needed = 1;
  
                disk->head_position = 0;
                disk->seq_start = MaxSector;
@@@ -3176,7 -3117,6 +3120,6 @@@ static struct md_personality raid1_pers
        .quiesce        = raid1_quiesce,
        .takeover       = raid1_takeover,
        .congested      = raid1_congested,
-       .mergeable_bvec = raid1_mergeable_bvec,
  };
  
  static int __init raid_init(void)
diff --combined drivers/md/raid10.c
index 38c58e19cfce3d7bdea554b26474080a88e02cca,d92098f3e65bdf1bcd1f8997b018d8e064610de3..b0fce2ebf7ad2679f209adc321811ccd6e0de121
@@@ -101,7 -101,7 +101,7 @@@ static int _enough(struct r10conf *conf
  static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
                                int *skipped);
  static void reshape_request_write(struct mddev *mddev, struct r10bio *r10_bio);
- static void end_reshape_write(struct bio *bio, int error);
+ static void end_reshape_write(struct bio *bio);
  static void end_reshape(struct r10conf *conf);
  
  static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data)
@@@ -307,9 -307,9 +307,9 @@@ static void raid_end_bio_io(struct r10b
        } else
                done = 1;
        if (!test_bit(R10BIO_Uptodate, &r10_bio->state))
-               clear_bit(BIO_UPTODATE, &bio->bi_flags);
+               bio->bi_error = -EIO;
        if (done) {
-               bio_endio(bio, 0);
+               bio_endio(bio);
                /*
                 * Wake up any possible resync thread that waits for the device
                 * to go idle.
@@@ -358,9 -358,9 +358,9 @@@ static int find_bio_disk(struct r10con
        return r10_bio->devs[slot].devnum;
  }
  
- static void raid10_end_read_request(struct bio *bio, int error)
+ static void raid10_end_read_request(struct bio *bio)
  {
-       int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
+       int uptodate = !bio->bi_error;
        struct r10bio *r10_bio = bio->bi_private;
        int slot, dev;
        struct md_rdev *rdev;
@@@ -438,9 -438,8 +438,8 @@@ static void one_write_done(struct r10bi
        }
  }
  
- static void raid10_end_write_request(struct bio *bio, int error)
+ static void raid10_end_write_request(struct bio *bio)
  {
-       int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
        struct r10bio *r10_bio = bio->bi_private;
        int dev;
        int dec_rdev = 1;
        /*
         * this branch is our 'one mirror IO has finished' event handler:
         */
-       if (!uptodate) {
+       if (bio->bi_error) {
                if (repl)
                        /* Never record new bad blocks to replacement,
                         * just fail it.
@@@ -672,93 -671,6 +671,6 @@@ static sector_t raid10_find_virt(struc
        return (vchunk << geo->chunk_shift) + offset;
  }
  
- /**
-  *    raid10_mergeable_bvec -- tell bio layer if a two requests can be merged
-  *    @mddev: the md device
-  *    @bvm: properties of new bio
-  *    @biovec: the request that could be merged to it.
-  *
-  *    Return amount of bytes we can accept at this offset
-  *    This requires checking for end-of-chunk if near_copies != raid_disks,
-  *    and for subordinate merge_bvec_fns if merge_check_needed.
-  */
- static int raid10_mergeable_bvec(struct mddev *mddev,
-                                struct bvec_merge_data *bvm,
-                                struct bio_vec *biovec)
- {
-       struct r10conf *conf = mddev->private;
-       sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
-       int max;
-       unsigned int chunk_sectors;
-       unsigned int bio_sectors = bvm->bi_size >> 9;
-       struct geom *geo = &conf->geo;
-       chunk_sectors = (conf->geo.chunk_mask & conf->prev.chunk_mask) + 1;
-       if (conf->reshape_progress != MaxSector &&
-           ((sector >= conf->reshape_progress) !=
-            conf->mddev->reshape_backwards))
-               geo = &conf->prev;
-       if (geo->near_copies < geo->raid_disks) {
-               max = (chunk_sectors - ((sector & (chunk_sectors - 1))
-                                       + bio_sectors)) << 9;
-               if (max < 0)
-                       /* bio_add cannot handle a negative return */
-                       max = 0;
-               if (max <= biovec->bv_len && bio_sectors == 0)
-                       return biovec->bv_len;
-       } else
-               max = biovec->bv_len;
-       if (mddev->merge_check_needed) {
-               struct {
-                       struct r10bio r10_bio;
-                       struct r10dev devs[conf->copies];
-               } on_stack;
-               struct r10bio *r10_bio = &on_stack.r10_bio;
-               int s;
-               if (conf->reshape_progress != MaxSector) {
-                       /* Cannot give any guidance during reshape */
-                       if (max <= biovec->bv_len && bio_sectors == 0)
-                               return biovec->bv_len;
-                       return 0;
-               }
-               r10_bio->sector = sector;
-               raid10_find_phys(conf, r10_bio);
-               rcu_read_lock();
-               for (s = 0; s < conf->copies; s++) {
-                       int disk = r10_bio->devs[s].devnum;
-                       struct md_rdev *rdev = rcu_dereference(
-                               conf->mirrors[disk].rdev);
-                       if (rdev && !test_bit(Faulty, &rdev->flags)) {
-                               struct request_queue *q =
-                                       bdev_get_queue(rdev->bdev);
-                               if (q->merge_bvec_fn) {
-                                       bvm->bi_sector = r10_bio->devs[s].addr
-                                               + rdev->data_offset;
-                                       bvm->bi_bdev = rdev->bdev;
-                                       max = min(max, q->merge_bvec_fn(
-                                                         q, bvm, biovec));
-                               }
-                       }
-                       rdev = rcu_dereference(conf->mirrors[disk].replacement);
-                       if (rdev && !test_bit(Faulty, &rdev->flags)) {
-                               struct request_queue *q =
-                                       bdev_get_queue(rdev->bdev);
-                               if (q->merge_bvec_fn) {
-                                       bvm->bi_sector = r10_bio->devs[s].addr
-                                               + rdev->data_offset;
-                                       bvm->bi_bdev = rdev->bdev;
-                                       max = min(max, q->merge_bvec_fn(
-                                                         q, bvm, biovec));
-                               }
-                       }
-               }
-               rcu_read_unlock();
-       }
-       return max;
- }
  /*
   * This routine returns the disk from which the requested read should
   * be done. There is a per-array 'next expected sequential IO' sector
@@@ -821,12 -733,10 +733,10 @@@ retry
                disk = r10_bio->devs[slot].devnum;
                rdev = rcu_dereference(conf->mirrors[disk].replacement);
                if (rdev == NULL || test_bit(Faulty, &rdev->flags) ||
-                   test_bit(Unmerged, &rdev->flags) ||
                    r10_bio->devs[slot].addr + sectors > rdev->recovery_offset)
                        rdev = rcu_dereference(conf->mirrors[disk].rdev);
                if (rdev == NULL ||
-                   test_bit(Faulty, &rdev->flags) ||
-                   test_bit(Unmerged, &rdev->flags))
+                   test_bit(Faulty, &rdev->flags))
                        continue;
                if (!test_bit(In_sync, &rdev->flags) &&
                    r10_bio->devs[slot].addr + sectors > rdev->recovery_offset)
@@@ -957,7 -867,7 +867,7 @@@ static void flush_pending_writes(struc
                        if (unlikely((bio->bi_rw & REQ_DISCARD) &&
                            !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
                                /* Just ignore it */
-                               bio_endio(bio, 0);
+                               bio_endio(bio);
                        else
                                generic_make_request(bio);
                        bio = next;
@@@ -1133,7 -1043,7 +1043,7 @@@ static void raid10_unplug(struct blk_pl
                if (unlikely((bio->bi_rw & REQ_DISCARD) &&
                    !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
                        /* Just ignore it */
-                       bio_endio(bio, 0);
+                       bio_endio(bio);
                else
                        generic_make_request(bio);
                bio = next;
@@@ -1217,7 -1127,7 +1127,7 @@@ static void __make_request(struct mdde
         * non-zero, then it is the number of not-completed requests.
         */
        bio->bi_phys_segments = 0;
-       clear_bit(BIO_SEG_VALID, &bio->bi_flags);
+       bio_clear_flag(bio, BIO_SEG_VALID);
  
        if (rw == READ) {
                /*
@@@ -1326,11 -1236,9 +1236,9 @@@ retry_write
                        blocked_rdev = rrdev;
                        break;
                }
-               if (rdev && (test_bit(Faulty, &rdev->flags)
-                            || test_bit(Unmerged, &rdev->flags)))
+               if (rdev && (test_bit(Faulty, &rdev->flags)))
                        rdev = NULL;
-               if (rrdev && (test_bit(Faulty, &rrdev->flags)
-                             || test_bit(Unmerged, &rrdev->flags)))
+               if (rrdev && (test_bit(Faulty, &rrdev->flags)))
                        rrdev = NULL;
  
                r10_bio->devs[i].bio = NULL;
@@@ -1777,7 -1685,6 +1685,6 @@@ static int raid10_add_disk(struct mdde
        int mirror;
        int first = 0;
        int last = conf->geo.raid_disks - 1;
-       struct request_queue *q = bdev_get_queue(rdev->bdev);
  
        if (mddev->recovery_cp < MaxSector)
                /* only hot-add to in-sync arrays, as recovery is
        if (rdev->raid_disk >= 0)
                first = last = rdev->raid_disk;
  
-       if (q->merge_bvec_fn) {
-               set_bit(Unmerged, &rdev->flags);
-               mddev->merge_check_needed = 1;
-       }
        if (rdev->saved_raid_disk >= first &&
            conf->mirrors[rdev->saved_raid_disk].rdev == NULL)
                mirror = rdev->saved_raid_disk;
                rcu_assign_pointer(p->rdev, rdev);
                break;
        }
-       if (err == 0 && test_bit(Unmerged, &rdev->flags)) {
-               /* Some requests might not have seen this new
-                * merge_bvec_fn.  We must wait for them to complete
-                * before merging the device fully.
-                * First we make sure any code which has tested
-                * our function has submitted the request, then
-                * we wait for all outstanding requests to complete.
-                */
-               synchronize_sched();
-               freeze_array(conf, 0);
-               unfreeze_array(conf);
-               clear_bit(Unmerged, &rdev->flags);
-       }
        md_integrity_add_rdev(rdev, mddev);
        if (mddev->queue && blk_queue_discard(bdev_get_queue(rdev->bdev)))
                queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
@@@ -1916,7 -1805,7 +1805,7 @@@ abort
        return err;
  }
  
- static void end_sync_read(struct bio *bio, int error)
+ static void end_sync_read(struct bio *bio)
  {
        struct r10bio *r10_bio = bio->bi_private;
        struct r10conf *conf = r10_bio->mddev->private;
        } else
                d = find_bio_disk(conf, r10_bio, bio, NULL, NULL);
  
-       if (test_bit(BIO_UPTODATE, &bio->bi_flags))
+       if (!bio->bi_error)
                set_bit(R10BIO_Uptodate, &r10_bio->state);
        else
                /* The write handler will notice the lack of
@@@ -1977,9 -1866,8 +1866,8 @@@ static void end_sync_request(struct r10
        }
  }
  
- static void end_sync_write(struct bio *bio, int error)
+ static void end_sync_write(struct bio *bio)
  {
-       int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
        struct r10bio *r10_bio = bio->bi_private;
        struct mddev *mddev = r10_bio->mddev;
        struct r10conf *conf = mddev->private;
        else
                rdev = conf->mirrors[d].rdev;
  
-       if (!uptodate) {
+       if (bio->bi_error) {
                if (repl)
                        md_error(mddev, rdev);
                else {
@@@ -2044,7 -1932,7 +1932,7 @@@ static void sync_request_write(struct m
  
        /* find the first device with a block */
        for (i=0; i<conf->copies; i++)
-               if (test_bit(BIO_UPTODATE, &r10_bio->devs[i].bio->bi_flags))
+               if (!r10_bio->devs[i].bio->bi_error)
                        break;
  
        if (i == conf->copies)
                        continue;
                if (i == first)
                        continue;
-               if (test_bit(BIO_UPTODATE, &r10_bio->devs[i].bio->bi_flags)) {
+               if (!r10_bio->devs[i].bio->bi_error) {
                        /* We know that the bi_io_vec layout is the same for
                         * both 'first' and 'i', so we just compare them.
                         * All vec entries are PAGE_SIZE;
@@@ -2394,7 -2282,6 +2282,6 @@@ static void fix_read_error(struct r10co
                        d = r10_bio->devs[sl].devnum;
                        rdev = rcu_dereference(conf->mirrors[d].rdev);
                        if (rdev &&
-                           !test_bit(Unmerged, &rdev->flags) &&
                            test_bit(In_sync, &rdev->flags) &&
                            is_badblock(rdev, r10_bio->devs[sl].addr + sect, s,
                                        &first_bad, &bad_sectors) == 0) {
                        d = r10_bio->devs[sl].devnum;
                        rdev = rcu_dereference(conf->mirrors[d].rdev);
                        if (!rdev ||
-                           test_bit(Unmerged, &rdev->flags) ||
                            !test_bit(In_sync, &rdev->flags))
                                continue;
  
@@@ -2706,8 -2592,7 +2592,7 @@@ static void handle_write_completed(stru
                        rdev = conf->mirrors[dev].rdev;
                        if (r10_bio->devs[m].bio == NULL)
                                continue;
-                       if (test_bit(BIO_UPTODATE,
-                                    &r10_bio->devs[m].bio->bi_flags)) {
+                       if (!r10_bio->devs[m].bio->bi_error) {
                                rdev_clear_badblocks(
                                        rdev,
                                        r10_bio->devs[m].addr,
                        rdev = conf->mirrors[dev].replacement;
                        if (r10_bio->devs[m].repl_bio == NULL)
                                continue;
-                       if (test_bit(BIO_UPTODATE,
-                                    &r10_bio->devs[m].repl_bio->bi_flags)) {
+                       if (!r10_bio->devs[m].repl_bio->bi_error) {
                                rdev_clear_badblocks(
                                        rdev,
                                        r10_bio->devs[m].addr,
                                        r10_bio->devs[m].addr,
                                        r10_bio->sectors, 0);
                                rdev_dec_pending(rdev, conf->mddev);
-                       } else if (bio != NULL &&
-                                  !test_bit(BIO_UPTODATE, &bio->bi_flags)) {
+                       } else if (bio != NULL && bio->bi_error) {
                                if (!narrow_write_error(r10_bio, m)) {
                                        md_error(conf->mddev, rdev);
                                        set_bit(R10BIO_Degraded,
@@@ -3263,7 -3147,7 +3147,7 @@@ static sector_t sync_request(struct mdd
  
                        bio = r10_bio->devs[i].bio;
                        bio_reset(bio);
-                       clear_bit(BIO_UPTODATE, &bio->bi_flags);
+                       bio->bi_error = -EIO;
                        if (conf->mirrors[d].rdev == NULL ||
                            test_bit(Faulty, &conf->mirrors[d].rdev->flags))
                                continue;
                        /* Need to set up for writing to the replacement */
                        bio = r10_bio->devs[i].repl_bio;
                        bio_reset(bio);
-                       clear_bit(BIO_UPTODATE, &bio->bi_flags);
+                       bio->bi_error = -EIO;
  
                        sector = r10_bio->devs[i].addr;
                        atomic_inc(&conf->mirrors[d].rdev->nr_pending);
                                /* remove last page from this bio */
                                bio2->bi_vcnt--;
                                bio2->bi_iter.bi_size -= len;
-                               __clear_bit(BIO_SEG_VALID, &bio2->bi_flags);
+                               bio_clear_flag(bio2, BIO_SEG_VALID);
                        }
                        goto bio_full;
                }
  
                if (bio->bi_end_io == end_sync_read) {
                        md_sync_acct(bio->bi_bdev, nr_sectors);
-                       set_bit(BIO_UPTODATE, &bio->bi_flags);
+                       bio->bi_error = 0;
                        generic_make_request(bio);
                }
        }
@@@ -3556,7 -3440,6 +3440,7 @@@ static struct r10conf *setup_conf(struc
                        /* far_copies must be 1 */
                        conf->prev.stride = conf->dev_sectors;
        }
 +      conf->reshape_safe = conf->reshape_progress;
        spin_lock_init(&conf->device_lock);
        INIT_LIST_HEAD(&conf->retry_list);
  
@@@ -3643,8 -3526,6 +3527,6 @@@ static int run(struct mddev *mddev
                        disk->rdev = rdev;
                }
                q = bdev_get_queue(rdev->bdev);
-               if (q->merge_bvec_fn)
-                       mddev->merge_check_needed = 1;
                diff = (rdev->new_data_offset - rdev->data_offset);
                if (!mddev->reshape_backwards)
                        diff = -diff;
                }
                conf->offset_diff = min_offset_diff;
  
 -              conf->reshape_safe = conf->reshape_progress;
                clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
                clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
                set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
@@@ -4103,7 -3985,6 +3985,7 @@@ static int raid10_start_reshape(struct 
                conf->reshape_progress = size;
        } else
                conf->reshape_progress = 0;
 +      conf->reshape_safe = conf->reshape_progress;
        spin_unlock_irq(&conf->device_lock);
  
        if (mddev->delta_disks && mddev->bitmap) {
@@@ -4171,7 -4052,6 +4053,7 @@@ abort
                rdev->new_data_offset = rdev->data_offset;
        smp_wmb();
        conf->reshape_progress = MaxSector;
 +      conf->reshape_safe = MaxSector;
        mddev->reshape_position = MaxSector;
        spin_unlock_irq(&conf->device_lock);
        return ret;
@@@ -4382,7 -4262,7 +4264,7 @@@ read_more
        read_bio->bi_end_io = end_sync_read;
        read_bio->bi_rw = READ;
        read_bio->bi_flags &= (~0UL << BIO_RESET_BITS);
-       __set_bit(BIO_UPTODATE, &read_bio->bi_flags);
+       read_bio->bi_error = 0;
        read_bio->bi_vcnt = 0;
        read_bio->bi_iter.bi_size = 0;
        r10_bio->master_bio = read_bio;
                                /* Remove last page from this bio */
                                bio2->bi_vcnt--;
                                bio2->bi_iter.bi_size -= len;
-                               __clear_bit(BIO_SEG_VALID, &bio2->bi_flags);
+                               bio_clear_flag(bio2, BIO_SEG_VALID);
                        }
                        goto bio_full;
                }
@@@ -4526,7 -4406,6 +4408,7 @@@ static void end_reshape(struct r10conf 
        md_finish_reshape(conf->mddev);
        smp_wmb();
        conf->reshape_progress = MaxSector;
 +      conf->reshape_safe = MaxSector;
        spin_unlock_irq(&conf->device_lock);
  
        /* read-ahead size must cover two whole stripes, which is
@@@ -4604,9 -4483,8 +4486,8 @@@ static int handle_reshape_read_error(st
        return 0;
  }
  
- static void end_reshape_write(struct bio *bio, int error)
+ static void end_reshape_write(struct bio *bio)
  {
-       int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
        struct r10bio *r10_bio = bio->bi_private;
        struct mddev *mddev = r10_bio->mddev;
        struct r10conf *conf = mddev->private;
                rdev = conf->mirrors[d].rdev;
        }
  
-       if (!uptodate) {
+       if (bio->bi_error) {
                /* FIXME should record badblock */
                md_error(mddev, rdev);
        }
@@@ -4700,7 -4578,6 +4581,6 @@@ static struct md_personality raid10_per
        .start_reshape  = raid10_start_reshape,
        .finish_reshape = raid10_finish_reshape,
        .congested      = raid10_congested,
-       .mergeable_bvec = raid10_mergeable_bvec,
  };
  
  static int __init raid_init(void)
diff --combined drivers/md/raid5.c
index f757023fc4580680bfdd6e178f93acb62cb1f31e,6d20692952d247893092a8bc8f51cee8be1783bb..b29e89cb815b93e0813537ca33249a7d0379d8c8
@@@ -233,7 -233,7 +233,7 @@@ static void return_io(struct bio *retur
                bi->bi_iter.bi_size = 0;
                trace_block_bio_complete(bdev_get_queue(bi->bi_bdev),
                                         bi, 0);
-               bio_endio(bi, 0);
+               bio_endio(bi);
                bi = return_bi;
        }
  }
@@@ -887,9 -887,9 +887,9 @@@ static int use_new_offset(struct r5con
  }
  
  static void
- raid5_end_read_request(struct bio *bi, int error);
+ raid5_end_read_request(struct bio *bi);
  static void
- raid5_end_write_request(struct bio *bi, int error);
+ raid5_end_write_request(struct bio *bi);
  
  static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
  {
@@@ -2162,9 -2162,6 +2162,9 @@@ static int resize_stripes(struct r5con
        if (!sc)
                return -ENOMEM;
  
 +      /* Need to ensure auto-resizing doesn't interfere */
 +      mutex_lock(&conf->cache_size_mutex);
 +
        for (i = conf->max_nr_stripes; i; i--) {
                nsh = alloc_stripe(sc, GFP_KERNEL);
                if (!nsh)
                        kmem_cache_free(sc, nsh);
                }
                kmem_cache_destroy(sc);
 +              mutex_unlock(&conf->cache_size_mutex);
                return -ENOMEM;
        }
        /* Step 2 - Must use GFP_NOIO now.
        } else
                err = -ENOMEM;
  
 +      mutex_unlock(&conf->cache_size_mutex);
        /* Step 4, return new stripes to service */
        while(!list_empty(&newstripes)) {
                nsh = list_entry(newstripes.next, struct stripe_head, lru);
  static int drop_one_stripe(struct r5conf *conf)
  {
        struct stripe_head *sh;
 -      int hash = (conf->max_nr_stripes - 1) % NR_STRIPE_HASH_LOCKS;
 +      int hash = (conf->max_nr_stripes - 1) & STRIPE_HASH_LOCKS_MASK;
  
        spin_lock_irq(conf->hash_locks + hash);
        sh = get_free_stripe(conf, hash);
@@@ -2282,12 -2277,11 +2282,11 @@@ static void shrink_stripes(struct r5con
        conf->slab_cache = NULL;
  }
  
- static void raid5_end_read_request(struct bio * bi, int error)
+ static void raid5_end_read_request(struct bio * bi)
  {
        struct stripe_head *sh = bi->bi_private;
        struct r5conf *conf = sh->raid_conf;
        int disks = sh->disks, i;
-       int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
        char b[BDEVNAME_SIZE];
        struct md_rdev *rdev = NULL;
        sector_t s;
                if (bi == &sh->dev[i].req)
                        break;
  
-       pr_debug("end_read_request %llu/%d, count: %d, uptodate %d.\n",
+       pr_debug("end_read_request %llu/%d, count: %d, error %d.\n",
                (unsigned long long)sh->sector, i, atomic_read(&sh->count),
-               uptodate);
+               bi->bi_error);
        if (i == disks) {
                BUG();
                return;
                s = sh->sector + rdev->new_data_offset;
        else
                s = sh->sector + rdev->data_offset;
-       if (uptodate) {
+       if (!bi->bi_error) {
                set_bit(R5_UPTODATE, &sh->dev[i].flags);
                if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
                        /* Note that this cannot happen on a
        release_stripe(sh);
  }
  
- static void raid5_end_write_request(struct bio *bi, int error)
+ static void raid5_end_write_request(struct bio *bi)
  {
        struct stripe_head *sh = bi->bi_private;
        struct r5conf *conf = sh->raid_conf;
        int disks = sh->disks, i;
        struct md_rdev *uninitialized_var(rdev);
-       int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
        sector_t first_bad;
        int bad_sectors;
        int replacement = 0;
                        break;
                }
        }
-       pr_debug("end_write_request %llu/%d, count %d, uptodate: %d.\n",
+       pr_debug("end_write_request %llu/%d, count %d, error: %d.\n",
                (unsigned long long)sh->sector, i, atomic_read(&sh->count),
-               uptodate);
+               bi->bi_error);
        if (i == disks) {
                BUG();
                return;
        }
  
        if (replacement) {
-               if (!uptodate)
+               if (bi->bi_error)
                        md_error(conf->mddev, rdev);
                else if (is_badblock(rdev, sh->sector,
                                     STRIPE_SECTORS,
                                     &first_bad, &bad_sectors))
                        set_bit(R5_MadeGoodRepl, &sh->dev[i].flags);
        } else {
-               if (!uptodate) {
+               if (bi->bi_error) {
                        set_bit(STRIPE_DEGRADED, &sh->state);
                        set_bit(WriteErrorSeen, &rdev->flags);
                        set_bit(R5_WriteError, &sh->dev[i].flags);
        }
        rdev_dec_pending(rdev, conf->mddev);
  
-       if (sh->batch_head && !uptodate && !replacement)
+       if (sh->batch_head && bi->bi_error && !replacement)
                set_bit(STRIPE_BATCH_ERR, &sh->batch_head->state);
  
        if (!test_and_clear_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags))
@@@ -3112,7 -3105,8 +3110,8 @@@ handle_failed_stripe(struct r5conf *con
                while (bi && bi->bi_iter.bi_sector <
                        sh->dev[i].sector + STRIPE_SECTORS) {
                        struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector);
-                       clear_bit(BIO_UPTODATE, &bi->bi_flags);
+                       bi->bi_error = -EIO;
                        if (!raid5_dec_bi_active_stripes(bi)) {
                                md_write_end(conf->mddev);
                                bi->bi_next = *return_bi;
                while (bi && bi->bi_iter.bi_sector <
                       sh->dev[i].sector + STRIPE_SECTORS) {
                        struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector);
-                       clear_bit(BIO_UPTODATE, &bi->bi_flags);
+                       bi->bi_error = -EIO;
                        if (!raid5_dec_bi_active_stripes(bi)) {
                                md_write_end(conf->mddev);
                                bi->bi_next = *return_bi;
                               sh->dev[i].sector + STRIPE_SECTORS) {
                                struct bio *nextbi =
                                        r5_next_bio(bi, sh->dev[i].sector);
-                               clear_bit(BIO_UPTODATE, &bi->bi_flags);
+                               bi->bi_error = -EIO;
                                if (!raid5_dec_bi_active_stripes(bi)) {
                                        bi->bi_next = *return_bi;
                                        *return_bi = bi;
@@@ -4066,10 -4062,8 +4067,10 @@@ static void analyse_stripe(struct strip
                                 &first_bad, &bad_sectors))
                        set_bit(R5_ReadRepl, &dev->flags);
                else {
 -                      if (rdev)
 +                      if (rdev && !test_bit(Faulty, &rdev->flags))
                                set_bit(R5_NeedReplace, &dev->flags);
 +                      else
 +                              clear_bit(R5_NeedReplace, &dev->flags);
                        rdev = rcu_dereference(conf->disks[i].rdev);
                        clear_bit(R5_ReadRepl, &dev->flags);
                }
@@@ -4669,35 -4663,6 +4670,6 @@@ static int raid5_congested(struct mdde
        return 0;
  }
  
- /* We want read requests to align with chunks where possible,
-  * but write requests don't need to.
-  */
- static int raid5_mergeable_bvec(struct mddev *mddev,
-                               struct bvec_merge_data *bvm,
-                               struct bio_vec *biovec)
- {
-       sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
-       int max;
-       unsigned int chunk_sectors = mddev->chunk_sectors;
-       unsigned int bio_sectors = bvm->bi_size >> 9;
-       /*
-        * always allow writes to be mergeable, read as well if array
-        * is degraded as we'll go through stripe cache anyway.
-        */
-       if ((bvm->bi_rw & 1) == WRITE || mddev->degraded)
-               return biovec->bv_len;
-       if (mddev->new_chunk_sectors < mddev->chunk_sectors)
-               chunk_sectors = mddev->new_chunk_sectors;
-       max =  (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9;
-       if (max < 0) max = 0;
-       if (max <= biovec->bv_len && bio_sectors == 0)
-               return biovec->bv_len;
-       else
-               return max;
- }
  static int in_chunk_boundary(struct mddev *mddev, struct bio *bio)
  {
        sector_t sector = bio->bi_iter.bi_sector + get_start_sect(bio->bi_bdev);
@@@ -4756,13 -4721,13 +4728,13 @@@ static struct bio *remove_bio_from_retr
   *  first).
   *  If the read failed..
   */
- static void raid5_align_endio(struct bio *bi, int error)
+ static void raid5_align_endio(struct bio *bi)
  {
        struct bio* raid_bi  = bi->bi_private;
        struct mddev *mddev;
        struct r5conf *conf;
-       int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
        struct md_rdev *rdev;
+       int error = bi->bi_error;
  
        bio_put(bi);
  
  
        rdev_dec_pending(rdev, conf->mddev);
  
-       if (!error && uptodate) {
+       if (!error) {
                trace_block_bio_complete(bdev_get_queue(raid_bi->bi_bdev),
                                         raid_bi, 0);
-               bio_endio(raid_bi, 0);
+               bio_endio(raid_bi);
                if (atomic_dec_and_test(&conf->active_aligned_reads))
                        wake_up(&conf->wait_for_quiescent);
                return;
        add_bio_to_retry(raid_bi, conf);
  }
  
- static int bio_fits_rdev(struct bio *bi)
- {
-       struct request_queue *q = bdev_get_queue(bi->bi_bdev);
-       if (bio_sectors(bi) > queue_max_sectors(q))
-               return 0;
-       blk_recount_segments(q, bi);
-       if (bi->bi_phys_segments > queue_max_segments(q))
-               return 0;
-       if (q->merge_bvec_fn)
-               /* it's too hard to apply the merge_bvec_fn at this stage,
-                * just just give up
-                */
-               return 0;
-       return 1;
- }
- static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
+ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
  {
        struct r5conf *conf = mddev->private;
        int dd_idx;
        sector_t end_sector;
  
        if (!in_chunk_boundary(mddev, raid_bio)) {
-               pr_debug("chunk_aligned_read : non aligned\n");
+               pr_debug("%s: non aligned\n", __func__);
                return 0;
        }
        /*
                rcu_read_unlock();
                raid_bio->bi_next = (void*)rdev;
                align_bi->bi_bdev =  rdev->bdev;
-               __clear_bit(BIO_SEG_VALID, &align_bi->bi_flags);
+               bio_clear_flag(align_bi, BIO_SEG_VALID);
  
-               if (!bio_fits_rdev(align_bi) ||
-                   is_badblock(rdev, align_bi->bi_iter.bi_sector,
+               if (is_badblock(rdev, align_bi->bi_iter.bi_sector,
                                bio_sectors(align_bi),
                                &first_bad, &bad_sectors)) {
-                       /* too big in some way, or has a known bad block */
                        bio_put(align_bi);
                        rdev_dec_pending(rdev, mddev);
                        return 0;
        }
  }
  
+ static struct bio *chunk_aligned_read(struct mddev *mddev, struct bio *raid_bio)
+ {
+       struct bio *split;
+       do {
+               sector_t sector = raid_bio->bi_iter.bi_sector;
+               unsigned chunk_sects = mddev->chunk_sectors;
+               unsigned sectors = chunk_sects - (sector & (chunk_sects-1));
+               if (sectors < bio_sectors(raid_bio)) {
+                       split = bio_split(raid_bio, sectors, GFP_NOIO, fs_bio_set);
+                       bio_chain(split, raid_bio);
+               } else
+                       split = raid_bio;
+               if (!raid5_read_one_chunk(mddev, split)) {
+                       if (split != raid_bio)
+                               generic_make_request(raid_bio);
+                       return split;
+               }
+       } while (split != raid_bio);
+       return NULL;
+ }
  /* __get_priority_stripe - get the next stripe to process
   *
   * Full stripe writes are allowed to pass preread active stripes up until
@@@ -5140,7 -5109,7 +5116,7 @@@ static void make_discard_request(struc
        remaining = raid5_dec_bi_active_stripes(bi);
        if (remaining == 0) {
                md_write_end(mddev);
-               bio_endio(bi, 0);
+               bio_endio(bi);
        }
  }
  
@@@ -5169,9 -5138,11 +5145,11 @@@ static void make_request(struct mddev *
         * data on failed drives.
         */
        if (rw == READ && mddev->degraded == 0 &&
-            mddev->reshape_position == MaxSector &&
-            chunk_aligned_read(mddev,bi))
-               return;
+           mddev->reshape_position == MaxSector) {
+               bi = chunk_aligned_read(mddev, bi);
+               if (!bi)
+                       return;
+       }
  
        if (unlikely(bi->bi_rw & REQ_DISCARD)) {
                make_discard_request(mddev, bi);
                        release_stripe_plug(mddev, sh);
                } else {
                        /* cannot get stripe for read-ahead, just give-up */
-                       clear_bit(BIO_UPTODATE, &bi->bi_flags);
+                       bi->bi_error = -EIO;
                        break;
                }
        }
  
                trace_block_bio_complete(bdev_get_queue(bi->bi_bdev),
                                         bi, 0);
-               bio_endio(bi, 0);
+               bio_endio(bi);
        }
  }
  
@@@ -5714,7 -5685,7 +5692,7 @@@ static int  retry_aligned_read(struct r
        if (remaining == 0) {
                trace_block_bio_complete(bdev_get_queue(raid_bio->bi_bdev),
                                         raid_bio, 0);
-               bio_endio(raid_bio, 0);
+               bio_endio(raid_bio);
        }
        if (atomic_dec_and_test(&conf->active_aligned_reads))
                wake_up(&conf->wait_for_quiescent);
@@@ -5864,14 -5835,12 +5842,14 @@@ static void raid5d(struct md_thread *th
        pr_debug("%d stripes handled\n", handled);
  
        spin_unlock_irq(&conf->device_lock);
 -      if (test_and_clear_bit(R5_ALLOC_MORE, &conf->cache_state)) {
 +      if (test_and_clear_bit(R5_ALLOC_MORE, &conf->cache_state) &&
 +          mutex_trylock(&conf->cache_size_mutex)) {
                grow_one_stripe(conf, __GFP_NOWARN);
                /* Set flag even if allocation failed.  This helps
                 * slow down allocation requests when mem is short
                 */
                set_bit(R5_DID_ALLOC, &conf->cache_state);
 +              mutex_unlock(&conf->cache_size_mutex);
        }
  
        async_tx_issue_pending_all();
@@@ -5903,22 -5872,18 +5881,22 @@@ raid5_set_cache_size(struct mddev *mdde
                return -EINVAL;
  
        conf->min_nr_stripes = size;
 +      mutex_lock(&conf->cache_size_mutex);
        while (size < conf->max_nr_stripes &&
               drop_one_stripe(conf))
                ;
 +      mutex_unlock(&conf->cache_size_mutex);
  
  
        err = md_allow_write(mddev);
        if (err)
                return err;
  
 +      mutex_lock(&conf->cache_size_mutex);
        while (size > conf->max_nr_stripes)
                if (!grow_one_stripe(conf, GFP_KERNEL))
                        break;
 +      mutex_unlock(&conf->cache_size_mutex);
  
        return 0;
  }
@@@ -6384,19 -6349,11 +6362,19 @@@ static unsigned long raid5_cache_scan(s
                                      struct shrink_control *sc)
  {
        struct r5conf *conf = container_of(shrink, struct r5conf, shrinker);
 -      int ret = 0;
 -      while (ret < sc->nr_to_scan) {
 -              if (drop_one_stripe(conf) == 0)
 -                      return SHRINK_STOP;
 -              ret++;
 +      unsigned long ret = SHRINK_STOP;
 +
 +      if (mutex_trylock(&conf->cache_size_mutex)) {
 +              ret= 0;
 +              while (ret < sc->nr_to_scan &&
 +                     conf->max_nr_stripes > conf->min_nr_stripes) {
 +                      if (drop_one_stripe(conf) == 0) {
 +                              ret = SHRINK_STOP;
 +                              break;
 +                      }
 +                      ret++;
 +              }
 +              mutex_unlock(&conf->cache_size_mutex);
        }
        return ret;
  }
@@@ -6465,7 -6422,6 +6443,7 @@@ static struct r5conf *setup_conf(struc
                goto abort;
        spin_lock_init(&conf->device_lock);
        seqcount_init(&conf->gen_lock);
 +      mutex_init(&conf->cache_size_mutex);
        init_waitqueue_head(&conf->wait_for_quiescent);
        for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++) {
                init_waitqueue_head(&conf->wait_for_stripe[i]);
@@@ -7779,7 -7735,6 +7757,6 @@@ static struct md_personality raid6_pers
        .quiesce        = raid5_quiesce,
        .takeover       = raid6_takeover,
        .congested      = raid5_congested,
-       .mergeable_bvec = raid5_mergeable_bvec,
  };
  static struct md_personality raid5_personality =
  {
        .quiesce        = raid5_quiesce,
        .takeover       = raid5_takeover,
        .congested      = raid5_congested,
-       .mergeable_bvec = raid5_mergeable_bvec,
  };
  
  static struct md_personality raid4_personality =
        .quiesce        = raid5_quiesce,
        .takeover       = raid4_takeover,
        .congested      = raid5_congested,
-       .mergeable_bvec = raid5_mergeable_bvec,
  };
  
  static int __init raid5_init(void)
index 88304751eb8a9f1be47e3838d80fd4782cbf862c,1b96cf771d2b53bf50d692b6cd7a53bf5ebf2d8f..44dc965a2f7c0d10d2f55245568274a59db0a1ff
@@@ -97,13 -97,14 +97,13 @@@ static int do_blktrans_request(struct m
        if (req->cmd_flags & REQ_DISCARD)
                return tr->discard(dev, block, nsect);
  
 -      switch(rq_data_dir(req)) {
 -      case READ:
 +      if (rq_data_dir(req) == READ) {
                for (; nsect > 0; nsect--, block++, buf += tr->blksize)
                        if (tr->readsect(dev, block, buf))
                                return -EIO;
                rq_flush_dcache_pages(req);
                return 0;
 -      case WRITE:
 +      } else {
                if (!tr->writesect)
                        return -EIO;
  
                        if (tr->writesect(dev, block, buf))
                                return -EIO;
                return 0;
 -      default:
 -              printk(KERN_NOTICE "Unknown request %u\n", rq_data_dir(req));
 -              return -EIO;
        }
  }
  
@@@ -419,7 -423,7 +419,7 @@@ int add_mtd_blktrans_dev(struct mtd_blk
  
        if (tr->discard) {
                queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, new->rq);
-               new->rq->limits.max_discard_sectors = UINT_MAX;
+               blk_queue_max_discard_sectors(new->rq, UINT_MAX);
        }
  
        gd->queue = new->rq;
index dff3fcb69a785102ab2905b3504606688d11b270,29ea2394c8966ff55380a8c5fd9414f4d32dc45e..2b744fbba68e04d699c31552a2ee5ca9c56ba86c
@@@ -548,10 -548,10 +548,10 @@@ dcssblk_add_store(struct device *dev, s
         */
        num_of_segments = 0;
        for (i = 0; (i < count && (buf[i] != '\0') && (buf[i] != '\n')); i++) {
 -              for (j = i; (buf[j] != ':') &&
 +              for (j = i; j < count &&
 +                      (buf[j] != ':') &&
                        (buf[j] != '\0') &&
 -                      (buf[j] != '\n') &&
 -                      j < count; j++) {
 +                      (buf[j] != '\n'); j++) {
                        local_buf[j-i] = toupper(buf[j]);
                }
                local_buf[j-i] = '\0';
@@@ -723,7 -723,7 +723,7 @@@ dcssblk_remove_store(struct device *dev
        /*
         * parse input
         */
 -      for (i = 0; ((*(buf+i)!='\0') && (*(buf+i)!='\n') && i < count); i++) {
 +      for (i = 0; (i < count && (*(buf+i)!='\0') && (*(buf+i)!='\n')); i++) {
                local_buf[i] = toupper(buf[i]);
        }
        local_buf[i] = '\0';
@@@ -826,6 -826,8 +826,8 @@@ dcssblk_make_request(struct request_que
        unsigned long source_addr;
        unsigned long bytes_done;
  
+       blk_queue_split(q, &bio, q->bio_split);
        bytes_done = 0;
        dev_info = bio->bi_bdev->bd_disk->private_data;
        if (dev_info == NULL)
                }
                bytes_done += bvec.bv_len;
        }
-       bio_endio(bio, 0);
+       bio_endio(bio);
        return;
  fail:
        bio_io_error(bio);
@@@ -904,10 -906,10 +906,10 @@@ dcssblk_check_params(void
  
        for (i = 0; (i < DCSSBLK_PARM_LEN) && (dcssblk_segments[i] != '\0');
             i++) {
 -              for (j = i; (dcssblk_segments[j] != ',')  &&
 +              for (j = i; (j < DCSSBLK_PARM_LEN) &&
 +                          (dcssblk_segments[j] != ',')  &&
                            (dcssblk_segments[j] != '\0') &&
 -                          (dcssblk_segments[j] != '(')  &&
 -                          (j < DCSSBLK_PARM_LEN); j++)
 +                          (dcssblk_segments[j] != '('); j++)
                {
                        buf[j-i] = dcssblk_segments[j];
                }
diff --combined drivers/scsi/sd.c
index a20da8c25b4f960224fb4d772aafea38c57e1656,160e44e7b24a215ddad5b98c925385a58535c74a..3f370228bf310a223eaee279c4839a5c9602410f
@@@ -647,7 -647,7 +647,7 @@@ static void sd_config_discard(struct sc
        switch (mode) {
  
        case SD_LBP_DISABLE:
-               q->limits.max_discard_sectors = 0;
+               blk_queue_max_discard_sectors(q, 0);
                queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);
                return;
  
                break;
        }
  
-       q->limits.max_discard_sectors = max_blocks * (logical_block_size >> 9);
+       blk_queue_max_discard_sectors(q, max_blocks * (logical_block_size >> 9));
        queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
  }
  
@@@ -2770,9 -2770,9 +2770,9 @@@ static int sd_revalidate_disk(struct ge
        max_xfer = sdkp->max_xfer_blocks;
        max_xfer <<= ilog2(sdp->sector_size) - 9;
  
 -      max_xfer = min_not_zero(queue_max_hw_sectors(sdkp->disk->queue),
 -                              max_xfer);
 -      blk_queue_max_hw_sectors(sdkp->disk->queue, max_xfer);
 +      sdkp->disk->queue->limits.max_sectors =
 +              min_not_zero(queue_max_hw_sectors(sdkp->disk->queue), max_xfer);
 +
        set_capacity(disk, sdkp->capacity);
        sd_config_write_same(sdkp);
        kfree(buffer);
index 06f5e51ecd9e1a8141c275232278dd6f081ef68a,1e33d540b2234a6ac7953feff2150a90f963a7b0..5f0d80cc97183e2104c1fcbcd71610750bc08626
@@@ -162,7 -162,7 +162,7 @@@ static int max_loop = MAX_LOOP_DEFAULT
  static struct lloop_device *loop_dev;
  static struct gendisk **disks;
  static struct mutex lloop_mutex;
 -static void *ll_iocontrol_magic = NULL;
 +static void *ll_iocontrol_magic;
  
  static loff_t get_loop_size(struct lloop_device *lo, struct file *file)
  {
@@@ -340,6 -340,8 +340,8 @@@ static void loop_make_request(struct re
        int rw = bio_rw(old_bio);
        int inactive;
  
+       blk_queue_split(q, &old_bio, q->bio_split);
        if (!lo)
                goto err;
  
        loop_add_bio(lo, old_bio);
        return;
  err:
 -      cfs_bio_io_error(old_bio, old_bio->bi_iter.bi_size);
 +      bio_io_error(old_bio);
  }
  
  
@@@ -376,7 -378,7 +378,8 @@@ static inline void loop_handle_bio(stru
        while (bio) {
                struct bio *tmp = bio->bi_next;
                bio->bi_next = NULL;
-               bio_endio(bio, ret);
 -              cfs_bio_endio(bio, bio->bi_iter.bi_size, ret);
++              bio->bi_error = ret;
++              bio_endio(bio);
                bio = tmp;
        }
  }
diff --combined fs/btrfs/disk-io.c
index f556c3732c2c16e22e0bcbd35f9ee1277179be5b,a8c0de888a9d564ebdc3f219aa98a8a44e67af71..5e307bd0471ab29fc166f7e7313835d2c80a6971
@@@ -703,7 -703,7 +703,7 @@@ static int btree_io_failed_hook(struct 
        return -EIO;    /* we fixed nothing */
  }
  
- static void end_workqueue_bio(struct bio *bio, int err)
+ static void end_workqueue_bio(struct bio *bio)
  {
        struct btrfs_end_io_wq *end_io_wq = bio->bi_private;
        struct btrfs_fs_info *fs_info;
        btrfs_work_func_t func;
  
        fs_info = end_io_wq->info;
-       end_io_wq->error = err;
+       end_io_wq->error = bio->bi_error;
  
        if (bio->bi_rw & REQ_WRITE) {
                if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) {
@@@ -808,7 -808,8 +808,8 @@@ static void run_one_async_done(struct b
  
        /* If an error occured we just want to clean up the bio and move on */
        if (async->error) {
-               bio_endio(async->bio, async->error);
+               async->bio->bi_error = async->error;
+               bio_endio(async->bio);
                return;
        }
  
@@@ -908,8 -909,10 +909,10 @@@ static int __btree_submit_bio_done(stru
         * submission context.  Just jump into btrfs_map_bio
         */
        ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 1);
-       if (ret)
-               bio_endio(bio, ret);
+       if (ret) {
+               bio->bi_error = ret;
+               bio_endio(bio);
+       }
        return ret;
  }
  
@@@ -960,10 -963,13 +963,13 @@@ static int btree_submit_bio_hook(struc
                                          __btree_submit_bio_done);
        }
  
-       if (ret) {
+       if (ret)
+               goto out_w_error;
+       return 0;
  out_w_error:
-               bio_endio(bio, ret);
-       }
+       bio->bi_error = ret;
+       bio_endio(bio);
        return ret;
  }
  
@@@ -1735,16 -1741,15 +1741,15 @@@ static void end_workqueue_fn(struct btr
  {
        struct bio *bio;
        struct btrfs_end_io_wq *end_io_wq;
-       int error;
  
        end_io_wq = container_of(work, struct btrfs_end_io_wq, work);
        bio = end_io_wq->bio;
  
-       error = end_io_wq->error;
+       bio->bi_error = end_io_wq->error;
        bio->bi_private = end_io_wq->private;
        bio->bi_end_io = end_io_wq->end_io;
        kmem_cache_free(btrfs_end_io_wq_cache, end_io_wq);
-       bio_endio(bio, error);
+       bio_endio(bio);
  }
  
  static int cleaner_kthread(void *arg)
@@@ -2842,7 -2847,6 +2847,7 @@@ int open_ctree(struct super_block *sb
            !extent_buffer_uptodate(chunk_root->node)) {
                printk(KERN_ERR "BTRFS: failed to read chunk root on %s\n",
                       sb->s_id);
 +              chunk_root->node = NULL;
                goto fail_tree_roots;
        }
        btrfs_set_root_node(&chunk_root->root_item, chunk_root->node);
@@@ -2880,7 -2884,7 +2885,7 @@@ retry_root_backup
            !extent_buffer_uptodate(tree_root->node)) {
                printk(KERN_WARNING "BTRFS: failed to read tree root on %s\n",
                       sb->s_id);
 -
 +              tree_root->node = NULL;
                goto recovery_tree_root;
        }
  
@@@ -3324,10 -3328,8 +3329,8 @@@ static int write_dev_supers(struct btrf
   * endio for the write_dev_flush, this will wake anyone waiting
   * for the barrier when it is done
   */
- static void btrfs_end_empty_barrier(struct bio *bio, int err)
+ static void btrfs_end_empty_barrier(struct bio *bio)
  {
-       if (err)
-               clear_bit(BIO_UPTODATE, &bio->bi_flags);
        if (bio->bi_private)
                complete(bio->bi_private);
        bio_put(bio);
@@@ -3355,8 -3357,8 +3358,8 @@@ static int write_dev_flush(struct btrfs
  
                wait_for_completion(&device->flush_wait);
  
-               if (!bio_flagged(bio, BIO_UPTODATE)) {
-                       ret = -EIO;
+               if (bio->bi_error) {
+                       ret = bio->bi_error;
                        btrfs_dev_stat_inc_and_print(device,
                                BTRFS_DEV_STAT_FLUSH_ERRS);
                }
diff --combined fs/btrfs/inode.c
index e33dff356460687fcade4b56202f83f318e3ebb4,8635ef01a04a989d13d2aef27d7130c383a17874..f924d9a6270075d785ebbd41b617ffc219fa0493
@@@ -1845,8 -1845,10 +1845,10 @@@ static int __btrfs_submit_bio_done(stru
        int ret;
  
        ret = btrfs_map_bio(root, rw, bio, mirror_num, 1);
-       if (ret)
-               bio_endio(bio, ret);
+       if (ret) {
+               bio->bi_error = ret;
+               bio_endio(bio);
+       }
        return ret;
  }
  
@@@ -1906,8 -1908,10 +1908,10 @@@ mapit
        ret = btrfs_map_bio(root, rw, bio, mirror_num, 0);
  
  out:
-       if (ret < 0)
-               bio_endio(bio, ret);
+       if (ret < 0) {
+               bio->bi_error = ret;
+               bio_endio(bio);
+       }
        return ret;
  }
  
@@@ -4209,7 -4213,7 +4213,7 @@@ int btrfs_truncate_inode_items(struct b
        u64 extent_num_bytes = 0;
        u64 extent_offset = 0;
        u64 item_end = 0;
 -      u64 last_size = (u64)-1;
 +      u64 last_size = new_size;
        u32 found_type = (u8)-1;
        int found_extent;
        int del_item;
@@@ -4493,7 -4497,8 +4497,7 @@@ out
                        btrfs_abort_transaction(trans, root, ret);
        }
  error:
 -      if (last_size != (u64)-1 &&
 -          root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
 +      if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
                btrfs_ordered_update_i_size(inode, last_size, NULL);
  
        btrfs_free_path(path);
@@@ -7688,13 -7693,13 +7692,13 @@@ struct btrfs_retry_complete 
        int uptodate;
  };
  
- static void btrfs_retry_endio_nocsum(struct bio *bio, int err)
+ static void btrfs_retry_endio_nocsum(struct bio *bio)
  {
        struct btrfs_retry_complete *done = bio->bi_private;
        struct bio_vec *bvec;
        int i;
  
-       if (err)
+       if (bio->bi_error)
                goto end;
  
        done->uptodate = 1;
@@@ -7743,7 -7748,7 +7747,7 @@@ try_again
        return 0;
  }
  
- static void btrfs_retry_endio(struct bio *bio, int err)
+ static void btrfs_retry_endio(struct bio *bio)
  {
        struct btrfs_retry_complete *done = bio->bi_private;
        struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
        int ret;
        int i;
  
-       if (err)
+       if (bio->bi_error)
                goto end;
  
        uptodate = 1;
@@@ -7835,12 -7840,13 +7839,13 @@@ static int btrfs_subio_endio_read(struc
        }
  }
  
- static void btrfs_endio_direct_read(struct bio *bio, int err)
+ static void btrfs_endio_direct_read(struct bio *bio)
  {
        struct btrfs_dio_private *dip = bio->bi_private;
        struct inode *inode = dip->inode;
        struct bio *dio_bio;
        struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
+       int err = bio->bi_error;
  
        if (dip->flags & BTRFS_DIO_ORIG_BIO_SUBMITTED)
                err = btrfs_subio_endio_read(inode, io_bio, err);
  
        kfree(dip);
  
-       /* If we had a csum failure make sure to clear the uptodate flag */
-       if (err)
-               clear_bit(BIO_UPTODATE, &dio_bio->bi_flags);
-       dio_end_io(dio_bio, err);
+       dio_end_io(dio_bio, bio->bi_error);
  
        if (io_bio->end_io)
                io_bio->end_io(io_bio, err);
        bio_put(bio);
  }
  
- static void btrfs_endio_direct_write(struct bio *bio, int err)
+ static void btrfs_endio_direct_write(struct bio *bio)
  {
        struct btrfs_dio_private *dip = bio->bi_private;
        struct inode *inode = dip->inode;
  again:
        ret = btrfs_dec_test_first_ordered_pending(inode, &ordered,
                                                   &ordered_offset,
-                                                  ordered_bytes, !err);
+                                                  ordered_bytes,
+                                                  !bio->bi_error);
        if (!ret)
                goto out_test;
  
@@@ -7898,10 -7902,7 +7901,7 @@@ out_test
  
        kfree(dip);
  
-       /* If we had an error make sure to clear the uptodate flag */
-       if (err)
-               clear_bit(BIO_UPTODATE, &dio_bio->bi_flags);
-       dio_end_io(dio_bio, err);
+       dio_end_io(dio_bio, bio->bi_error);
        bio_put(bio);
  }
  
@@@ -7916,9 -7917,10 +7916,10 @@@ static int __btrfs_submit_bio_start_dir
        return 0;
  }
  
- static void btrfs_end_dio_bio(struct bio *bio, int err)
+ static void btrfs_end_dio_bio(struct bio *bio)
  {
        struct btrfs_dio_private *dip = bio->bi_private;
+       int err = bio->bi_error;
  
        if (err)
                btrfs_warn(BTRFS_I(dip->inode)->root->fs_info,
        if (dip->errors) {
                bio_io_error(dip->orig_bio);
        } else {
-               set_bit(BIO_UPTODATE, &dip->dio_bio->bi_flags);
-               bio_endio(dip->orig_bio, 0);
+               dip->dio_bio->bi_error = 0;
+               bio_endio(dip->orig_bio);
        }
  out:
        bio_put(bio);
  static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev,
                                       u64 first_sector, gfp_t gfp_flags)
  {
-       int nr_vecs = bio_get_nr_vecs(bdev);
-       return btrfs_bio_alloc(bdev, first_sector, nr_vecs, gfp_flags);
+       return btrfs_bio_alloc(bdev, first_sector, BIO_MAX_PAGES, gfp_flags);
  }
  
  static inline int btrfs_lookup_and_bind_dio_csum(struct btrfs_root *root,
@@@ -8219,7 -8220,8 +8219,8 @@@ free_ordered
         * callbacks - they require an allocated dip and a clone of dio_bio.
         */
        if (io_bio && dip) {
-               bio_endio(io_bio, ret);
+               io_bio->bi_error = -EIO;
+               bio_endio(io_bio);
                /*
                 * The end io callbacks free our dip, do the final put on io_bio
                 * and all the cleanup and final put for dio_bio (through
                        unlock_extent(&BTRFS_I(inode)->io_tree, file_offset,
                              file_offset + dio_bio->bi_iter.bi_size - 1);
                }
-               clear_bit(BIO_UPTODATE, &dio_bio->bi_flags);
+               dio_bio->bi_error = -EIO;
                /*
                 * Releases and cleans up our dio_bio, no need to bio_put()
                 * nor bio_endio()/bio_io_error() against dio_bio.
diff --combined fs/f2fs/data.c
index f71e19a9dd3c18fc6ee7b3bcb4f3da9c8b8c3d4c,b478accb24d98b0897866fa6416ed2b67a8468df..c414d49aa2de17553badaaa754bf3b967bf51cf1
  static struct kmem_cache *extent_tree_slab;
  static struct kmem_cache *extent_node_slab;
  
- static void f2fs_read_end_io(struct bio *bio, int err)
+ static void f2fs_read_end_io(struct bio *bio)
  {
        struct bio_vec *bvec;
        int i;
  
        if (f2fs_bio_encrypted(bio)) {
-               if (err) {
+               if (bio->bi_error) {
                        f2fs_release_crypto_ctx(bio->bi_private);
                } else {
                        f2fs_end_io_crypto_work(bio->bi_private, bio);
@@@ -46,7 -46,7 +46,7 @@@
        bio_for_each_segment_all(bvec, bio, i) {
                struct page *page = bvec->bv_page;
  
-               if (!err) {
+               if (!bio->bi_error) {
                        SetPageUptodate(page);
                } else {
                        ClearPageUptodate(page);
@@@ -57,7 -57,7 +57,7 @@@
        bio_put(bio);
  }
  
- static void f2fs_write_end_io(struct bio *bio, int err)
+ static void f2fs_write_end_io(struct bio *bio)
  {
        struct f2fs_sb_info *sbi = bio->bi_private;
        struct bio_vec *bvec;
@@@ -68,7 -68,7 +68,7 @@@
  
                f2fs_restore_and_release_control_page(&page);
  
-               if (unlikely(err)) {
+               if (unlikely(bio->bi_error)) {
                        set_page_dirty(page);
                        set_bit(AS_EIO, &page->mapping->flags);
                        f2fs_stop_checkpoint(sbi);
@@@ -1552,7 -1552,7 +1552,7 @@@ submit_and_realloc
                        }
  
                        bio = bio_alloc(GFP_KERNEL,
-                               min_t(int, nr_pages, bio_get_nr_vecs(bdev)));
+                               min_t(int, nr_pages, BIO_MAX_PAGES));
                        if (!bio) {
                                if (ctx)
                                        f2fs_release_crypto_ctx(ctx);
@@@ -2072,6 -2072,8 +2072,6 @@@ static int f2fs_set_data_page_dirty(str
                return 1;
        }
  
 -      mark_inode_dirty(inode);
 -
        if (!PageDirty(page)) {
                __set_page_dirty_nobuffers(page);
                update_dirty_page(inode, page);
This page took 0.336212 seconds and 4 git commands to generate.