Merge branch 'for-4.3/core' of git://git.kernel.dk/linux-block

author Linus Torvalds <[email protected]>

Wed, 2 Sep 2015 20:10:25 +0000 (13:10 -0700)

committer Linus Torvalds <[email protected]>

Wed, 2 Sep 2015 20:10:25 +0000 (13:10 -0700)
author Linus Torvalds <[email protected]>
Wed, 2 Sep 2015 20:10:25 +0000 (13:10 -0700)
committer Linus Torvalds <[email protected]>
Wed, 2 Sep 2015 20:10:25 +0000 (13:10 -0700)
diff --combined block/bio.c

index d6e5ba3399f0ae151ea040e2ec1fd1df1c3dba6a,b1f198f9a317b1f01d288c45e24cda56a1e1f778..515b5434fe2de84f0fe9db306fc16adf7711ae5e
--- 1/block/bio.c
--- 2/block/bio.c
+++ b/block/bio.c
@@@ -269,7 -269,6 +269,6 @@@ static void bio_free(struct bio *bio
   void bio_init(struct bio *bio)
   {
         memset(bio, 0, sizeof(*bio));
-       bio->bi_flags = 1 << BIO_UPTODATE;
         atomic_set(&bio->__bi_remaining, 1);
         atomic_set(&bio->__bi_cnt, 1);
   }
@@@ -292,14 -291,17 +291,17 @@@ void bio_reset(struct bio *bio
         __bio_free(bio);
   
         memset(bio, 0, BIO_RESET_BYTES);
-       bio->bi_flags = flags | (1 << BIO_UPTODATE);
+       bio->bi_flags = flags;
         atomic_set(&bio->__bi_remaining, 1);
   }
   EXPORT_SYMBOL(bio_reset);
   
- static void bio_chain_endio(struct bio *bio, int error)
+ static void bio_chain_endio(struct bio *bio)
   {
-       bio_endio(bio->bi_private, error);
+       struct bio *parent = bio->bi_private;
+ 
+       parent->bi_error = bio->bi_error;
+       bio_endio(parent);
         bio_put(bio);
   }
   
@@@ -309,7 -311,7 +311,7 @@@
    */
   static inline void bio_inc_remaining(struct bio *bio)
   {
-       bio->bi_flags |= (1 << BIO_CHAIN);
+       bio_set_flag(bio, BIO_CHAIN);
         smp_mb__before_atomic();
         atomic_inc(&bio->__bi_remaining);
   }
@@@ -493,7 -495,7 +495,7 @@@ struct bio *bio_alloc_bioset(gfp_t gfp_
                 if (unlikely(!bvl))
                         goto err_free;
   
-               bio->bi_flags |= 1 << BIO_OWNS_VEC;
+               bio_set_flag(bio, BIO_OWNS_VEC);
         } else if (nr_iovecs) {
                 bvl = bio->bi_inline_vecs;
         }
@@@ -578,7 -580,7 +580,7 @@@ void __bio_clone_fast(struct bio *bio, 
          * so we don't set nor calculate new physical/hw segment counts here
          */
         bio->bi_bdev = bio_src->bi_bdev;
-       bio->bi_flags |= 1 << BIO_CLONED;
+       bio_set_flag(bio, BIO_CLONED);
         bio->bi_rw = bio_src->bi_rw;
         bio->bi_iter = bio_src->bi_iter;
         bio->bi_io_vec = bio_src->bi_io_vec;
@@@ -692,31 -694,22 +694,22 @@@ integrity_clone
   EXPORT_SYMBOL(bio_clone_bioset);
   
   /**
-  *    bio_get_nr_vecs         - return approx number of vecs
-  *    @bdev:  I/O target
+  *    bio_add_pc_page -       attempt to add page to bio
+  *    @q: the target queue
+  *    @bio: destination bio
+  *    @page: page to add
+  *    @len: vec entry length
+  *    @offset: vec entry offset
    *
-  *    Return the approximate number of pages we can send to this target.
-  *    There's no guarantee that you will be able to fit this number of pages
-  *    into a bio, it does not account for dynamic restrictions that vary
-  *    on offset.
+  *    Attempt to add a page to the bio_vec maplist. This can fail for a
+  *    number of reasons, such as the bio being full or target block device
+  *    limitations. The target block device must allow bio's up to PAGE_SIZE,
+  *    so it is always possible to add a single page to an empty bio.
+  *
+  *    This should only be used by REQ_PC bios.
    */
- int bio_get_nr_vecs(struct block_device *bdev)
- {
-       struct request_queue *q = bdev_get_queue(bdev);
-       int nr_pages;
- 
-       nr_pages = min_t(unsigned,
-                    queue_max_segments(q),
-                    queue_max_sectors(q) / (PAGE_SIZE >> 9) + 1);
- 
-       return min_t(unsigned, nr_pages, BIO_MAX_PAGES);
- 
- }
- EXPORT_SYMBOL(bio_get_nr_vecs);
- 
- static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
-                         *page, unsigned int len, unsigned int offset,
-                         unsigned int max_sectors)
+ int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page
+                   *page, unsigned int len, unsigned int offset)
   {
         int retried_segments = 0;
         struct bio_vec *bvec;
@@@ -727,7 -720,7 +720,7 @@@
         if (unlikely(bio_flagged(bio, BIO_CLONED)))
                 return 0;
   
-       if (((bio->bi_iter.bi_size + len) >> 9) > max_sectors)
+       if (((bio->bi_iter.bi_size + len) >> 9) > queue_max_hw_sectors(q))
                 return 0;
   
         /*
@@@ -740,28 -733,7 +733,7 @@@
   
                 if (page == prev->bv_page &&
                     offset == prev->bv_offset + prev->bv_len) {
-                       unsigned int prev_bv_len = prev->bv_len;
                         prev->bv_len += len;
- 
-                       if (q->merge_bvec_fn) {
-                               struct bvec_merge_data bvm = {
-                                       /* prev_bvec is already charged in
-                                          bi_size, discharge it in order to
-                                          simulate merging updated prev_bvec
-                                          as new bvec. */
-                                       .bi_bdev = bio->bi_bdev,
-                                       .bi_sector = bio->bi_iter.bi_sector,
-                                       .bi_size = bio->bi_iter.bi_size -
-                                               prev_bv_len,
-                                       .bi_rw = bio->bi_rw,
-                               };
- 
-                               if (q->merge_bvec_fn(q, &bvm, prev) < prev->bv_len) {
-                                       prev->bv_len -= len;
-                                       return 0;
-                               }
-                       }
- 
                         bio->bi_iter.bi_size += len;
                         goto done;
                 }
@@@ -770,8 -742,7 +742,7 @@@
                  * If the queue doesn't support SG gaps and adding this
                  * offset would create a gap, disallow it.
                  */
-               if (q->queue_flags & (1 << QUEUE_FLAG_SG_GAPS) &&
-                   bvec_gap_to_prev(prev, offset))
+               if (bvec_gap_to_prev(q, prev, offset))
                         return 0;
         }
   
@@@ -804,30 -775,9 +775,9 @@@
                 blk_recount_segments(q, bio);
         }
   
-       /*
-        * if queue has other restrictions (eg varying max sector size
-        * depending on offset), it can specify a merge_bvec_fn in the
-        * queue to get further control
-        */
-       if (q->merge_bvec_fn) {
-               struct bvec_merge_data bvm = {
-                       .bi_bdev = bio->bi_bdev,
-                       .bi_sector = bio->bi_iter.bi_sector,
-                       .bi_size = bio->bi_iter.bi_size - len,
-                       .bi_rw = bio->bi_rw,
-               };
- 
-               /*
-                * merge_bvec_fn() returns number of bytes it can accept
-                * at this offset
-                */
-               if (q->merge_bvec_fn(q, &bvm, bvec) < bvec->bv_len)
-                       goto failed;
-       }
- 
         /* If we may be able to merge these biovecs, force a recount */
         if (bio->bi_vcnt > 1 && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec)))
-               bio->bi_flags &= ~(1 << BIO_SEG_VALID);
+               bio_clear_flag(bio, BIO_SEG_VALID);
   
    done:
         return len;
@@@ -841,28 -791,6 +791,6 @@@
         blk_recount_segments(q, bio);
         return 0;
   }
- 
- /**
-  *    bio_add_pc_page -       attempt to add page to bio
-  *    @q: the target queue
-  *    @bio: destination bio
-  *    @page: page to add
-  *    @len: vec entry length
-  *    @offset: vec entry offset
-  *
-  *    Attempt to add a page to the bio_vec maplist. This can fail for a
-  *    number of reasons, such as the bio being full or target block device
-  *    limitations. The target block device must allow bio's up to PAGE_SIZE,
-  *    so it is always possible to add a single page to an empty bio.
-  *
-  *    This should only be used by REQ_PC bios.
-  */
- int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page *page,
-                   unsigned int len, unsigned int offset)
- {
-       return __bio_add_page(q, bio, page, len, offset,
-                             queue_max_hw_sectors(q));
- }
   EXPORT_SYMBOL(bio_add_pc_page);
   
   /**
@@@ -872,22 -800,47 +800,47 @@@
    *    @len: vec entry length
    *    @offset: vec entry offset
    *
-  *    Attempt to add a page to the bio_vec maplist. This can fail for a
-  *    number of reasons, such as the bio being full or target block device
-  *    limitations. The target block device must allow bio's up to PAGE_SIZE,
-  *    so it is always possible to add a single page to an empty bio.
+  *    Attempt to add a page to the bio_vec maplist. This will only fail
+  *    if either bio->bi_vcnt == bio->bi_max_vecs or it's a cloned bio.
    */
- int bio_add_page(struct bio *bio, struct page *page, unsigned int len,
-                unsigned int offset)
+ int bio_add_page(struct bio *bio, struct page *page,
+                unsigned int len, unsigned int offset)
   {
-       struct request_queue *q = bdev_get_queue(bio->bi_bdev);
-       unsigned int max_sectors;
+       struct bio_vec *bv;
+ 
+       /*
+        * cloned bio must not modify vec list
+        */
+       if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
+               return 0;
   
-       max_sectors = blk_max_size_offset(q, bio->bi_iter.bi_sector);
-       if ((max_sectors < (len >> 9)) && !bio->bi_iter.bi_size)
-               max_sectors = len >> 9;
+       /*
+        * For filesystems with a blocksize smaller than the pagesize
+        * we will often be called with the same page as last time and
+        * a consecutive offset.  Optimize this special case.
+        */
+       if (bio->bi_vcnt > 0) {
+               bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
   
-       return __bio_add_page(q, bio, page, len, offset, max_sectors);
+               if (page == bv->bv_page &&
+                   offset == bv->bv_offset + bv->bv_len) {
+                       bv->bv_len += len;
+                       goto done;
+               }
+       }
+ 
+       if (bio->bi_vcnt >= bio->bi_max_vecs)
+               return 0;
+ 
+       bv              = &bio->bi_io_vec[bio->bi_vcnt];
+       bv->bv_page     = page;
+       bv->bv_len      = len;
+       bv->bv_offset   = offset;
+ 
+       bio->bi_vcnt++;
+ done:
+       bio->bi_iter.bi_size += len;
+       return len;
   }
   EXPORT_SYMBOL(bio_add_page);
   
@@@ -896,11 -849,11 +849,11 @@@ struct submit_bio_ret 
         int error;
   };
   
- static void submit_bio_wait_endio(struct bio *bio, int error)
+ static void submit_bio_wait_endio(struct bio *bio)
   {
         struct submit_bio_ret *ret = bio->bi_private;
   
-       ret->error = error;
+       ret->error = bio->bi_error;
         complete(&ret->event);
   }
   
@@@ -1388,7 -1341,7 +1341,7 @@@ struct bio *bio_map_user_iov(struct req
         if (iter->type & WRITE)
                 bio->bi_rw |= REQ_WRITE;
   
-       bio->bi_flags |= (1 << BIO_USER_MAPPED);
+       bio_set_flag(bio, BIO_USER_MAPPED);
   
         /*
          * subtle -- if __bio_map_user() ended up bouncing a bio,
@@@ -1445,7 -1398,7 +1398,7 @@@ void bio_unmap_user(struct bio *bio
   }
   EXPORT_SYMBOL(bio_unmap_user);
   
- static void bio_map_kern_endio(struct bio *bio, int err)
+ static void bio_map_kern_endio(struct bio *bio)
   {
         bio_put(bio);
   }
@@@ -1501,13 -1454,13 +1454,13 @@@ struct bio *bio_map_kern(struct request
   }
   EXPORT_SYMBOL(bio_map_kern);
   
- static void bio_copy_kern_endio(struct bio *bio, int err)
+ static void bio_copy_kern_endio(struct bio *bio)
   {
         bio_free_pages(bio);
         bio_put(bio);
   }
   
- static void bio_copy_kern_endio_read(struct bio *bio, int err)
+ static void bio_copy_kern_endio_read(struct bio *bio)
   {
         char *p = bio->bi_private;
         struct bio_vec *bvec;
@@@ -1518,7 -1471,7 +1471,7 @@@
                 p += bvec->bv_len;
         }
   
-       bio_copy_kern_endio(bio, err);
+       bio_copy_kern_endio(bio);
   }
   
   /**
@@@ -1768,7 -1721,7 +1721,7 @@@ static inline bool bio_remaining_done(s
         BUG_ON(atomic_read(&bio->__bi_remaining) <= 0);
   
         if (atomic_dec_and_test(&bio->__bi_remaining)) {
-               clear_bit(BIO_CHAIN, &bio->bi_flags);
+               bio_clear_flag(bio, BIO_CHAIN);
                 return true;
         }
   
@@@ -1778,25 -1731,15 +1731,15 @@@
   /**
    * bio_endio - end I/O on a bio
    * @bio:      bio
-  * @error:    error, if any
    *
    * Description:
-  *   bio_endio() will end I/O on the whole bio. bio_endio() is the
-  *   preferred way to end I/O on a bio, it takes care of clearing
-  *   BIO_UPTODATE on error. @error is 0 on success, and and one of the
-  *   established -Exxxx (-EIO, for instance) error values in case
-  *   something went wrong. No one should call bi_end_io() directly on a
-  *   bio unless they own it and thus know that it has an end_io
-  *   function.
+  *   bio_endio() will end I/O on the whole bio. bio_endio() is the preferred
+  *   way to end I/O on a bio. No one should call bi_end_io() directly on a
+  *   bio unless they own it and thus know that it has an end_io function.
    **/
- void bio_endio(struct bio *bio, int error)
+ void bio_endio(struct bio *bio)
   {
         while (bio) {
-               if (error)
-                       clear_bit(BIO_UPTODATE, &bio->bi_flags);
-               else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
-                       error = -EIO;
- 
                 if (unlikely(!bio_remaining_done(bio)))
                         break;
   
@@@ -1810,11 -1753,12 +1753,12 @@@
                  */
                 if (bio->bi_end_io == bio_chain_endio) {
                         struct bio *parent = bio->bi_private;
+                       parent->bi_error = bio->bi_error;
                         bio_put(bio);
                         bio = parent;
                 } else {
                         if (bio->bi_end_io)
-                               bio->bi_end_io(bio, error);
+                               bio->bi_end_io(bio);
                         bio = NULL;
                 }
         }
@@@ -1831,9 -1775,8 +1775,9 @@@ EXPORT_SYMBOL(bio_endio)
    * Allocates and returns a new bio which represents @sectors from the start of
    * @bio, and updates @bio to represent the remaining sectors.
    *
- - * The newly allocated bio will point to @bio's bi_io_vec; it is the caller's
- - * responsibility to ensure that @bio is not freed before the split.
+ + * Unless this is a discard request the newly allocated bio will point
+ + * to @bio's bi_io_vec; it is the caller's responsibility to ensure that
+ + * @bio is not freed before the split.
    */
   struct bio *bio_split(struct bio *bio, int sectors,
                       gfp_t gfp, struct bio_set *bs)
@@@ -1843,15 -1786,7 +1787,15 @@@
         BUG_ON(sectors <= 0);
         BUG_ON(sectors >= bio_sectors(bio));
   
- -      split = bio_clone_fast(bio, gfp, bs);
+ +      /*
+ +       * Discards need a mutable bio_vec to accommodate the payload
+ +       * required by the DSM TRIM and UNMAP commands.
+ +       */
+ +      if (bio->bi_rw & REQ_DISCARD)
+ +              split = bio_clone_bioset(bio, gfp, bs);
+ +      else
+ +              split = bio_clone_fast(bio, gfp, bs);
+ +
         if (!split)
                 return NULL;
   
@@@ -1882,7 -1817,7 +1826,7 @@@ void bio_trim(struct bio *bio, int offs
         if (offset == 0 && size == bio->bi_iter.bi_size)
                 return;
   
-       clear_bit(BIO_SEG_VALID, &bio->bi_flags);
+       bio_clear_flag(bio, BIO_SEG_VALID);
   
         bio_advance(bio, offset << 9);
   
@@@ -2018,7 -1953,6 +1962,7 @@@ int bio_associate_blkcg(struct bio *bio
         bio->bi_css = blkcg_css;
         return 0;
   }
+ +EXPORT_SYMBOL_GPL(bio_associate_blkcg);
   
   /**
    * bio_associate_current - associate a bio with %current
@@@ -2049,7 -1983,6 +1993,7 @@@ int bio_associate_current(struct bio *b
         bio->bi_css = task_get_css(current, blkio_cgrp_id);
         return 0;
   }
+ +EXPORT_SYMBOL_GPL(bio_associate_current);
   
   /**
    * bio_disassociate_task - undo bio_associate_current()
diff --combined block/blk-settings.c

index e0057d035200c4dd5e42d191f0395a7769489905,f96c72116931e7f6ae4bacac207048490c7dcacc..7d8f129a1516b408d8ebd827e65ffd6d688b2df8
--- 1/block/blk-settings.c
--- 2/block/blk-settings.c
+++ b/block/blk-settings.c
@@@ -53,28 -53,6 +53,6 @@@ void blk_queue_unprep_rq(struct request
   }
   EXPORT_SYMBOL(blk_queue_unprep_rq);
   
- /**
-  * blk_queue_merge_bvec - set a merge_bvec function for queue
-  * @q:                queue
-  * @mbfn:     merge_bvec_fn
-  *
-  * Usually queues have static limitations on the max sectors or segments that
-  * we can put in a request. Stacking drivers may have some settings that
-  * are dynamic, and thus we have to query the queue whether it is ok to
-  * add a new bio_vec to a bio at a given offset or not. If the block device
-  * has such limitations, it needs to register a merge_bvec_fn to control
-  * the size of bio's sent to it. Note that a block device *must* allow a
-  * single page to be added to an empty bio. The block device driver may want
-  * to use the bio_split() function to deal with these bio's. By default
-  * no merge_bvec_fn is defined for a queue, and only the fixed limits are
-  * honored.
-  */
- void blk_queue_merge_bvec(struct request_queue *q, merge_bvec_fn *mbfn)
- {
-       q->merge_bvec_fn = mbfn;
- }
- EXPORT_SYMBOL(blk_queue_merge_bvec);
- 
   void blk_queue_softirq_done(struct request_queue *q, softirq_done_fn *fn)
   {
         q->softirq_done_fn = fn;
@@@ -111,11 -89,13 +89,13 @@@ void blk_set_default_limits(struct queu
         lim->max_segments = BLK_MAX_SEGMENTS;
         lim->max_integrity_segments = 0;
         lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
+       lim->virt_boundary_mask = 0;
         lim->max_segment_size = BLK_MAX_SEGMENT_SIZE;
         lim->max_sectors = lim->max_hw_sectors = BLK_SAFE_MAX_SECTORS;
         lim->chunk_sectors = 0;
         lim->max_write_same_sectors = 0;
         lim->max_discard_sectors = 0;
+       lim->max_hw_discard_sectors = 0;
         lim->discard_granularity = 0;
         lim->discard_alignment = 0;
         lim->discard_misaligned = 0;
@@@ -241,8 -221,8 +221,8 @@@ EXPORT_SYMBOL(blk_queue_bounce_limit)
    * Description:
    *    Enables a low level driver to set a hard upper limit,
    *    max_hw_sectors, on the size of requests.  max_hw_sectors is set by
- - *    the device driver based upon the combined capabilities of I/O
- - *    controller and storage device.
+ + *    the device driver based upon the capabilities of the I/O
+ + *    controller.
    *
    *    max_sectors is a soft limit imposed by the block layer for
    *    filesystem type requests.  This value can be overridden on a
@@@ -257,7 -237,9 +237,9 @@@ void blk_limits_max_hw_sectors(struct q
                        __func__, max_hw_sectors);
         }
   
-       limits->max_sectors = limits->max_hw_sectors = max_hw_sectors;
+       limits->max_hw_sectors = max_hw_sectors;
+       limits->max_sectors = min_t(unsigned int, max_hw_sectors,
+                                   BLK_DEF_MAX_SECTORS);
   }
   EXPORT_SYMBOL(blk_limits_max_hw_sectors);
   
@@@ -303,6 -285,7 +285,7 @@@ EXPORT_SYMBOL(blk_queue_chunk_sectors)
   void blk_queue_max_discard_sectors(struct request_queue *q,
                 unsigned int max_discard_sectors)
   {
+       q->limits.max_hw_discard_sectors = max_discard_sectors;
         q->limits.max_discard_sectors = max_discard_sectors;
   }
   EXPORT_SYMBOL(blk_queue_max_discard_sectors);
@@@ -550,6 -533,8 +533,8 @@@ int blk_stack_limits(struct queue_limit
   
         t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask,
                                             b->seg_boundary_mask);
+       t->virt_boundary_mask = min_not_zero(t->virt_boundary_mask,
+                                           b->virt_boundary_mask);
   
         t->max_segments = min_not_zero(t->max_segments, b->max_segments);
         t->max_integrity_segments = min_not_zero(t->max_integrity_segments,
@@@ -641,6 -626,8 +626,8 @@@
   
                 t->max_discard_sectors = min_not_zero(t->max_discard_sectors,
                                                       b->max_discard_sectors);
+               t->max_hw_discard_sectors = min_not_zero(t->max_hw_discard_sectors,
+                                                        b->max_hw_discard_sectors);
                 t->discard_granularity = max(t->discard_granularity,
                                              b->discard_granularity);
                 t->discard_alignment = lcm_not_zero(t->discard_alignment, alignment) %
@@@ -787,6 -774,17 +774,17 @@@ void blk_queue_segment_boundary(struct 
   }
   EXPORT_SYMBOL(blk_queue_segment_boundary);
   
+ /**
+  * blk_queue_virt_boundary - set boundary rules for bio merging
+  * @q:  the request queue for the device
+  * @mask:  the memory boundary mask
+  **/
+ void blk_queue_virt_boundary(struct request_queue *q, unsigned long mask)
+ {
+       q->limits.virt_boundary_mask = mask;
+ }
+ EXPORT_SYMBOL(blk_queue_virt_boundary);
+ 
   /**
    * blk_queue_dma_alignment - set dma length and memory alignment
    * @q:     the request queue for the device
diff --combined drivers/block/null_blk.c

index 3177b245d2bdf63e821a12a4c0f18cbab1b16229,016a59afcf24eb55bc9b8b94ac5d7c5792e6087c..17269a3b85f282fd33df0c1f750559ea6c749ad7
--- 1/drivers/block/null_blk.c
--- 2/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@@ -222,7 -222,7 +222,7 @@@ static void end_cmd(struct nullb_cmd *c
                 blk_end_request_all(cmd->rq, 0);
                 break;
         case NULL_Q_BIO:
-               bio_endio(cmd->bio, 0);
+               bio_endio(cmd->bio);
                 break;
         }
   
@@@ -240,19 -240,19 +240,19 @@@ static enum hrtimer_restart null_cmd_ti
         while ((entry = llist_del_all(&cq->list)) != NULL) {
                 entry = llist_reverse_order(entry);
                 do {
+ +                      struct request_queue *q = NULL;
+ +
                         cmd = container_of(entry, struct nullb_cmd, ll_list);
                         entry = entry->next;
+ +                      if (cmd->rq)
+ +                              q = cmd->rq->q;
                         end_cmd(cmd);
   
- -                      if (cmd->rq) {
- -                              struct request_queue *q = cmd->rq->q;
- -
- -                              if (!q->mq_ops && blk_queue_stopped(q)) {
- -                                      spin_lock(q->queue_lock);
- -                                      if (blk_queue_stopped(q))
- -                                              blk_start_queue(q);
- -                                      spin_unlock(q->queue_lock);
- -                              }
+ +                      if (q && !q->mq_ops && blk_queue_stopped(q)) {
+ +                              spin_lock(q->queue_lock);
+ +                              if (blk_queue_stopped(q))
+ +                                      blk_start_queue(q);
+ +                              spin_unlock(q->queue_lock);
                         }
                 } while (entry);
         }
diff --combined drivers/block/rbd.c

index bc67a93aa4f4749f10d1a219789b21661c01ee21,71dd061a7e11154e1c044e009c73de734cf389da..698f761037ce54a6c94be1aeaf0a6179e4c9735b
--- 1/drivers/block/rbd.c
--- 2/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@@ -523,7 -523,6 +523,7 @@@ void rbd_warn(struct rbd_device *rbd_de
   #  define rbd_assert(expr)    ((void) 0)
   #endif /* !RBD_DEBUG */
   
+ +static void rbd_osd_copyup_callback(struct rbd_obj_request *obj_request);
   static int rbd_img_obj_request_submit(struct rbd_obj_request *obj_request);
   static void rbd_img_parent_read(struct rbd_obj_request *obj_request);
   static void rbd_dev_remove_parent(struct rbd_device *rbd_dev);
@@@ -1819,16 -1818,6 +1819,16 @@@ static void rbd_osd_stat_callback(struc
         obj_request_done_set(obj_request);
   }
   
+ +static void rbd_osd_call_callback(struct rbd_obj_request *obj_request)
+ +{
+ +      dout("%s: obj %p\n", __func__, obj_request);
+ +
+ +      if (obj_request_img_data_test(obj_request))
+ +              rbd_osd_copyup_callback(obj_request);
+ +      else
+ +              obj_request_done_set(obj_request);
+ +}
+ +
   static void rbd_osd_req_callback(struct ceph_osd_request *osd_req,
                                 struct ceph_msg *msg)
   {
@@@ -1877,8 -1866,6 +1877,8 @@@
                 rbd_osd_discard_callback(obj_request);
                 break;
         case CEPH_OSD_OP_CALL:
+ +              rbd_osd_call_callback(obj_request);
+ +              break;
         case CEPH_OSD_OP_NOTIFY_ACK:
         case CEPH_OSD_OP_WATCH:
                 rbd_osd_trivial_callback(obj_request);
@@@ -2543,15 -2530,13 +2543,15 @@@ out_unwind
   }
   
   static void
- -rbd_img_obj_copyup_callback(struct rbd_obj_request *obj_request)
+ +rbd_osd_copyup_callback(struct rbd_obj_request *obj_request)
   {
         struct rbd_img_request *img_request;
         struct rbd_device *rbd_dev;
         struct page **pages;
         u32 page_count;
   
+ +      dout("%s: obj %p\n", __func__, obj_request);
+ +
         rbd_assert(obj_request->type == OBJ_REQUEST_BIO ||
                 obj_request->type == OBJ_REQUEST_NODATA);
         rbd_assert(obj_request_img_data_test(obj_request));
@@@ -2578,7 -2563,9 +2578,7 @@@
         if (!obj_request->result)
                 obj_request->xferred = obj_request->length;
   
- -      /* Finish up with the normal image object callback */
- -
- -      rbd_img_obj_callback(obj_request);
+ +      obj_request_done_set(obj_request);
   }
   
   static void
@@@ -2663,6 -2650,7 +2663,6 @@@ rbd_img_obj_parent_read_full_callback(s
   
         /* All set, send it off. */
   
- -      orig_request->callback = rbd_img_obj_copyup_callback;
         osdc = &rbd_dev->rbd_client->client->osdc;
         img_result = rbd_obj_request_submit(osdc, orig_request);
         if (!img_result)
@@@ -3474,52 -3462,6 +3474,6 @@@ static int rbd_queue_rq(struct blk_mq_h
         return BLK_MQ_RQ_QUEUE_OK;
   }
   
- /*
-  * a queue callback. Makes sure that we don't create a bio that spans across
-  * multiple osd objects. One exception would be with a single page bios,
-  * which we handle later at bio_chain_clone_range()
-  */
- static int rbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bmd,
-                         struct bio_vec *bvec)
- {
-       struct rbd_device *rbd_dev = q->queuedata;
-       sector_t sector_offset;
-       sector_t sectors_per_obj;
-       sector_t obj_sector_offset;
-       int ret;
- 
-       /*
-        * Find how far into its rbd object the partition-relative
-        * bio start sector is to offset relative to the enclosing
-        * device.
-        */
-       sector_offset = get_start_sect(bmd->bi_bdev) + bmd->bi_sector;
-       sectors_per_obj = 1 << (rbd_dev->header.obj_order - SECTOR_SHIFT);
-       obj_sector_offset = sector_offset & (sectors_per_obj - 1);
- 
-       /*
-        * Compute the number of bytes from that offset to the end
-        * of the object.  Account for what's already used by the bio.
-        */
-       ret = (int) (sectors_per_obj - obj_sector_offset) << SECTOR_SHIFT;
-       if (ret > bmd->bi_size)
-               ret -= bmd->bi_size;
-       else
-               ret = 0;
- 
-       /*
-        * Don't send back more than was asked for.  And if the bio
-        * was empty, let the whole thing through because:  "Note
-        * that a block device *must* allow a single page to be
-        * added to an empty bio."
-        */
-       rbd_assert(bvec->bv_len <= PAGE_SIZE);
-       if (ret > (int) bvec->bv_len || !bmd->bi_size)
-               ret = (int) bvec->bv_len;
- 
-       return ret;
- }
- 
   static void rbd_free_disk(struct rbd_device *rbd_dev)
   {
         struct gendisk *disk = rbd_dev->disk;
@@@ -3815,10 -3757,9 +3769,9 @@@ static int rbd_init_disk(struct rbd_dev
         queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
         q->limits.discard_granularity = segment_size;
         q->limits.discard_alignment = segment_size;
-       q->limits.max_discard_sectors = segment_size / SECTOR_SIZE;
+       blk_queue_max_discard_sectors(q, segment_size / SECTOR_SIZE);
         q->limits.discard_zeroes_data = 1;
   
-       blk_queue_merge_bvec(q, rbd_merge_bvec);
         disk->queue = q;
   
         q->queuedata = rbd_dev;
diff --combined drivers/block/xen-blkback/blkback.c

index 954c0029fb3babc49d1a1f490f9d420934701e30,662648e08596d04bfa5fd6b768e796a0e62133d0..6a685aec6994c2becc628df3fc9eb76f928f69a4
--- 1/drivers/block/xen-blkback/blkback.c
--- 2/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@@ -369,8 -369,8 +369,8 @@@ static void purge_persistent_gnt(struc
                 return;
         }
   
- -      if (work_pending(&blkif->persistent_purge_work)) {
- -              pr_alert_ratelimited("Scheduled work from previous purge is still pending, cannot purge list\n");
+ +      if (work_busy(&blkif->persistent_purge_work)) {
+ +              pr_alert_ratelimited("Scheduled work from previous purge is still busy, cannot purge list\n");
                 return;
         }
   
@@@ -1078,9 -1078,9 +1078,9 @@@ static void __end_block_io_op(struct pe
   /*
    * bio callback.
    */
- static void end_block_io_op(struct bio *bio, int error)
+ static void end_block_io_op(struct bio *bio)
   {
-       __end_block_io_op(bio->bi_private, error);
+       __end_block_io_op(bio->bi_private, bio->bi_error);
         bio_put(bio);
   }
   
diff --combined drivers/block/xen-blkfront.c

index 7a8a73f1fc0462feab5bad706573ff6eb4536ef7,d542db7a6c7337e0c82375366e1658ef93e8e701..5f6b3be0a93cc0ba82c105f1c57a5b3ba4382aea
--- 1/drivers/block/xen-blkfront.c
--- 2/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@@ -82,7 -82,6 +82,6 @@@ struct blk_shadow 
   struct split_bio {
         struct bio *bio;
         atomic_t pending;
-       int err;
   };
   
   static DEFINE_MUTEX(blkfront_mutex);
@@@ -179,7 -178,6 +178,7 @@@ static DEFINE_SPINLOCK(minor_lock)
         ((_segs + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME)
   
   static int blkfront_setup_indirect(struct blkfront_info *info);
+ +static int blkfront_gather_backend_features(struct blkfront_info *info);
   
   static int get_id_from_freelist(struct blkfront_info *info)
   {
@@@ -1129,10 -1127,8 +1128,10 @@@ static void blkif_completion(struct blk
                                  * Add the used indirect page back to the list of
                                  * available pages for indirect grefs.
                                  */
- -                              indirect_page = pfn_to_page(s->indirect_grants[i]->pfn);
- -                              list_add(&indirect_page->lru, &info->indirect_pages);
+ +                              if (!info->feature_persistent) {
+ +                                      indirect_page = pfn_to_page(s->indirect_grants[i]->pfn);
+ +                                      list_add(&indirect_page->lru, &info->indirect_pages);
+ +                              }
                                 s->indirect_grants[i]->gref = GRANT_INVALID_REF;
                                 list_add_tail(&s->indirect_grants[i]->node, &info->grants);
                         }
@@@ -1481,16 -1477,14 +1480,14 @@@ static int blkfront_probe(struct xenbus
         return 0;
   }
   
- static void split_bio_end(struct bio *bio, int error)
+ static void split_bio_end(struct bio *bio)
   {
         struct split_bio *split_bio = bio->bi_private;
   
-       if (error)
-               split_bio->err = error;
- 
         if (atomic_dec_and_test(&split_bio->pending)) {
                 split_bio->bio->bi_phys_segments = 0;
-               bio_endio(split_bio->bio, split_bio->err);
+               split_bio->bio->bi_error = bio->bi_error;
+               bio_endio(split_bio->bio);
                 kfree(split_bio);
         }
         bio_put(bio);
@@@ -1522,7 -1516,7 +1519,7 @@@ static int blkif_recover(struct blkfron
         info->shadow_free = info->ring.req_prod_pvt;
         info->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff;
   
- -      rc = blkfront_setup_indirect(info);
+ +      rc = blkfront_gather_backend_features(info);
         if (rc) {
                 kfree(copy);
                 return rc;
@@@ -1723,13 -1717,20 +1720,13 @@@ static void blkfront_setup_discard(stru
   
   static int blkfront_setup_indirect(struct blkfront_info *info)
   {
- -      unsigned int indirect_segments, segs;
+ +      unsigned int segs;
         int err, i;
   
- -      err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
- -                          "feature-max-indirect-segments", "%u", &indirect_segments,
- -                          NULL);
- -      if (err) {
- -              info->max_indirect_segments = 0;
+ +      if (info->max_indirect_segments == 0)
                 segs = BLKIF_MAX_SEGMENTS_PER_REQUEST;
- -      } else {
- -              info->max_indirect_segments = min(indirect_segments,
- -                                                xen_blkif_max_segments);
+ +      else
                 segs = info->max_indirect_segments;
- -      }
   
         err = fill_grant_buffer(info, (segs + INDIRECT_GREFS(segs)) * BLK_RING_SIZE(info));
         if (err)
@@@ -1792,68 -1793,6 +1789,68 @@@ out_of_memory
         return -ENOMEM;
   }
   
+ +/*
+ + * Gather all backend feature-*
+ + */
+ +static int blkfront_gather_backend_features(struct blkfront_info *info)
+ +{
+ +      int err;
+ +      int barrier, flush, discard, persistent;
+ +      unsigned int indirect_segments;
+ +
+ +      info->feature_flush = 0;
+ +
+ +      err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
+ +                      "feature-barrier", "%d", &barrier,
+ +                      NULL);
+ +
+ +      /*
+ +       * If there's no "feature-barrier" defined, then it means
+ +       * we're dealing with a very old backend which writes
+ +       * synchronously; nothing to do.
+ +       *
+ +       * If there are barriers, then we use flush.
+ +       */
+ +      if (!err && barrier)
+ +              info->feature_flush = REQ_FLUSH | REQ_FUA;
+ +      /*
+ +       * And if there is "feature-flush-cache" use that above
+ +       * barriers.
+ +       */
+ +      err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
+ +                      "feature-flush-cache", "%d", &flush,
+ +                      NULL);
+ +
+ +      if (!err && flush)
+ +              info->feature_flush = REQ_FLUSH;
+ +
+ +      err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
+ +                      "feature-discard", "%d", &discard,
+ +                      NULL);
+ +
+ +      if (!err && discard)
+ +              blkfront_setup_discard(info);
+ +
+ +      err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
+ +                      "feature-persistent", "%u", &persistent,
+ +                      NULL);
+ +      if (err)
+ +              info->feature_persistent = 0;
+ +      else
+ +              info->feature_persistent = persistent;
+ +
+ +      err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
+ +                          "feature-max-indirect-segments", "%u", &indirect_segments,
+ +                          NULL);
+ +      if (err)
+ +              info->max_indirect_segments = 0;
+ +      else
+ +              info->max_indirect_segments = min(indirect_segments,
+ +                                                xen_blkif_max_segments);
+ +
+ +      return blkfront_setup_indirect(info);
+ +}
+ +
   /*
    * Invoked when the backend is finally 'ready' (and has told produced
    * the details about the physical device - #sectors, size, etc).
@@@ -1865,6 -1804,7 +1862,6 @@@ static void blkfront_connect(struct blk
         unsigned int physical_sector_size;
         unsigned int binfo;
         int err;
- -      int barrier, flush, discard, persistent;
   
         switch (info->connected) {
         case BLKIF_STATE_CONNECTED:
@@@ -1921,7 -1861,48 +1918,7 @@@
         if (err != 1)
                 physical_sector_size = sector_size;
   
- -      info->feature_flush = 0;
- -
- -      err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
- -                          "feature-barrier", "%d", &barrier,
- -                          NULL);
- -
- -      /*
- -       * If there's no "feature-barrier" defined, then it means
- -       * we're dealing with a very old backend which writes
- -       * synchronously; nothing to do.
- -       *
- -       * If there are barriers, then we use flush.
- -       */
- -      if (!err && barrier)
- -              info->feature_flush = REQ_FLUSH | REQ_FUA;
- -      /*
- -       * And if there is "feature-flush-cache" use that above
- -       * barriers.
- -       */
- -      err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
- -                          "feature-flush-cache", "%d", &flush,
- -                          NULL);
- -
- -      if (!err && flush)
- -              info->feature_flush = REQ_FLUSH;
- -
- -      err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
- -                          "feature-discard", "%d", &discard,
- -                          NULL);
- -
- -      if (!err && discard)
- -              blkfront_setup_discard(info);
- -
- -      err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
- -                          "feature-persistent", "%u", &persistent,
- -                          NULL);
- -      if (err)
- -              info->feature_persistent = 0;
- -      else
- -              info->feature_persistent = persistent;
- -
- -      err = blkfront_setup_indirect(info);
+ +      err = blkfront_gather_backend_features(info);
         if (err) {
                 xenbus_dev_fatal(info->xbdev, err, "setup_indirect at %s",
                                  info->xbdev->otherend);
diff --combined drivers/block/zram/zram_drv.c

index 763301c7828c72650f2abaa1c723425bdd3c73f4,aec781acee9d597f41c78cb9903adf7a609eb522..9c01f5bfa33fc9a0494a1868b0b6ca7dc79b50b9
--- 1/drivers/block/zram/zram_drv.c
--- 2/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@@ -496,9 -496,10 +496,9 @@@ static void zram_meta_free(struct zram_
         kfree(meta);
   }
   
- -static struct zram_meta *zram_meta_alloc(int device_id, u64 disksize)
+ +static struct zram_meta *zram_meta_alloc(char *pool_name, u64 disksize)
   {
         size_t num_pages;
- -      char pool_name[8];
         struct zram_meta *meta = kmalloc(sizeof(*meta), GFP_KERNEL);
   
         if (!meta)
@@@ -511,6 -512,7 +511,6 @@@
                 goto out_error;
         }
   
- -      snprintf(pool_name, sizeof(pool_name), "zram%d", device_id);
         meta->mem_pool = zs_create_pool(pool_name, GFP_NOIO | __GFP_HIGHMEM);
         if (!meta->mem_pool) {
                 pr_err("Error creating memory pool\n");
@@@ -848,7 -850,7 +848,7 @@@ static void __zram_make_request(struct 
   
         if (unlikely(bio->bi_rw & REQ_DISCARD)) {
                 zram_bio_discard(zram, index, offset, bio);
-               bio_endio(bio, 0);
+               bio_endio(bio);
                 return;
         }
   
@@@ -881,8 -883,7 +881,7 @@@
                 update_position(&index, &offset, &bvec);
         }
   
-       set_bit(BIO_UPTODATE, &bio->bi_flags);
-       bio_endio(bio, 0);
+       bio_endio(bio);
         return;
   
   out:
@@@ -899,6 -900,8 +898,8 @@@ static void zram_make_request(struct re
         if (unlikely(!zram_meta_get(zram)))
                 goto error;
   
+       blk_queue_split(queue, &bio, queue->bio_split);
+ 
         if (!valid_io_request(zram, bio->bi_iter.bi_sector,
                                         bio->bi_iter.bi_size)) {
                 atomic64_inc(&zram->stats.invalid_io);
@@@ -1029,7 -1032,7 +1030,7 @@@ static ssize_t disksize_store(struct de
                 return -EINVAL;
   
         disksize = PAGE_ALIGN(disksize);
- -      meta = zram_meta_alloc(zram->disk->first_minor, disksize);
+ +      meta = zram_meta_alloc(zram->disk->disk_name, disksize);
         if (!meta)
                 return -ENOMEM;
   
@@@ -1242,7 -1245,7 +1243,7 @@@ static int zram_add(void
         blk_queue_io_min(zram->disk->queue, PAGE_SIZE);
         blk_queue_io_opt(zram->disk->queue, PAGE_SIZE);
         zram->disk->queue->limits.discard_granularity = PAGE_SIZE;
-       zram->disk->queue->limits.max_discard_sectors = UINT_MAX;
+       blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX);
         /*
          * zram_bio_discard() will clear all logical blocks if logical block
          * size is identical with physical block size(PAGE_SIZE). But if it is
diff --combined drivers/md/dm-cache-target.c

index 1fe93cfea7d309a659d79fe2b953b5f2dbe7b466,d2b5dfbb30cfb92e92940cfc9098f55527a11399..7245071778dba2f28bac62057f7b2bb444fc56e3
--- 1/drivers/md/dm-cache-target.c
--- 2/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@@ -919,14 -919,14 +919,14 @@@ static void defer_writethrough_bio(stru
         wake_worker(cache);
   }
   
- static void writethrough_endio(struct bio *bio, int err)
+ static void writethrough_endio(struct bio *bio)
   {
         struct per_bio_data *pb = get_per_bio_data(bio, PB_DATA_SIZE_WT);
   
         dm_unhook_bio(&pb->hook_info, bio);
   
-       if (err) {
-               bio_endio(bio, err);
+       if (bio->bi_error) {
+               bio_endio(bio);
                 return;
         }
   
@@@ -1231,7 -1231,7 +1231,7 @@@ static void migration_success_post_comm
                          * The block was promoted via an overwrite, so it's dirty.
                          */
                         set_dirty(cache, mg->new_oblock, mg->cblock);
-                       bio_endio(mg->new_ocell->holder, 0);
+                       bio_endio(mg->new_ocell->holder);
                         cell_defer(cache, mg->new_ocell, false);
                 }
                 free_io_migration(mg);
@@@ -1284,7 -1284,7 +1284,7 @@@ static void issue_copy(struct dm_cache_
         }
   }
   
- static void overwrite_endio(struct bio *bio, int err)
+ static void overwrite_endio(struct bio *bio)
   {
         struct dm_cache_migration *mg = bio->bi_private;
         struct cache *cache = mg->cache;
@@@ -1294,7 -1294,7 +1294,7 @@@
   
         dm_unhook_bio(&pb->hook_info, bio);
   
-       if (err)
+       if (bio->bi_error)
                 mg->err = true;
   
         mg->requeue_holder = false;
@@@ -1358,7 -1358,7 +1358,7 @@@ static void issue_discard(struct dm_cac
                 b = to_dblock(from_dblock(b) + 1);
         }
   
-       bio_endio(bio, 0);
+       bio_endio(bio);
         cell_defer(mg->cache, mg->new_ocell, false);
         free_migration(mg);
   }
@@@ -1631,7 -1631,7 +1631,7 @@@ static void process_discard_bio(struct 
   
         calc_discard_block_range(cache, bio, &b, &e);
         if (b == e) {
-               bio_endio(bio, 0);
+               bio_endio(bio);
                 return;
         }
   
@@@ -1947,7 -1947,6 +1947,7 @@@ static int commit_if_needed(struct cach
   
   static void process_deferred_bios(struct cache *cache)
   {
+ +      bool prealloc_used = false;
         unsigned long flags;
         struct bio_list bios;
         struct bio *bio;
@@@ -1967,7 -1966,6 +1967,7 @@@
                  * this bio might require one, we pause until there are some
                  * prepared mappings to process.
                  */
+ +              prealloc_used = true;
                 if (prealloc_data_structs(cache, &structs)) {
                         spin_lock_irqsave(&cache->lock, flags);
                         bio_list_merge(&cache->deferred_bios, &bios);
@@@ -1985,13 -1983,11 +1985,13 @@@
                         process_bio(cache, &structs, bio);
         }
   
- -      prealloc_free_structs(cache, &structs);
+ +      if (prealloc_used)
+ +              prealloc_free_structs(cache, &structs);
   }
   
   static void process_deferred_cells(struct cache *cache)
   {
+ +      bool prealloc_used = false;
         unsigned long flags;
         struct dm_bio_prison_cell *cell, *tmp;
         struct list_head cells;
@@@ -2011,7 -2007,6 +2011,7 @@@
                  * this bio might require one, we pause until there are some
                  * prepared mappings to process.
                  */
+ +              prealloc_used = true;
                 if (prealloc_data_structs(cache, &structs)) {
                         spin_lock_irqsave(&cache->lock, flags);
                         list_splice(&cells, &cache->deferred_cells);
@@@ -2022,8 -2017,7 +2022,8 @@@
                 process_cell(cache, &structs, cell);
         }
   
- -      prealloc_free_structs(cache, &structs);
+ +      if (prealloc_used)
+ +              prealloc_free_structs(cache, &structs);
   }
   
   static void process_deferred_flush_bios(struct cache *cache, bool submit_bios)
@@@ -2068,7 -2062,7 +2068,7 @@@ static void process_deferred_writethrou
   
   static void writeback_some_dirty_blocks(struct cache *cache)
   {
- -      int r = 0;
+ +      bool prealloc_used = false;
         dm_oblock_t oblock;
         dm_cblock_t cblock;
         struct prealloc structs;
@@@ -2078,12 -2072,15 +2078,12 @@@
         memset(&structs, 0, sizeof(structs));
   
         while (spare_migration_bandwidth(cache)) {
- -              if (prealloc_data_structs(cache, &structs))
- -                      break;
- -
- -              r = policy_writeback_work(cache->policy, &oblock, &cblock, busy);
- -              if (r)
- -                      break;
+ +              if (policy_writeback_work(cache->policy, &oblock, &cblock, busy))
+ +                      break; /* no work to do */
   
- -              r = get_cell(cache, oblock, &structs, &old_ocell);
- -              if (r) {
+ +              prealloc_used = true;
+ +              if (prealloc_data_structs(cache, &structs) ||
+ +                  get_cell(cache, oblock, &structs, &old_ocell)) {
                         policy_set_dirty(cache->policy, oblock);
                         break;
                 }
@@@ -2091,8 -2088,7 +2091,8 @@@
                 writeback(cache, &structs, oblock, cblock, old_ocell);
         }
   
- -      prealloc_free_structs(cache, &structs);
+ +      if (prealloc_used)
+ +              prealloc_free_structs(cache, &structs);
   }
   
   /*----------------------------------------------------------------
@@@ -2217,8 -2213,10 +2217,10 @@@ static void requeue_deferred_bios(struc
         bio_list_merge(&bios, &cache->deferred_bios);
         bio_list_init(&cache->deferred_bios);
   
-       while ((bio = bio_list_pop(&bios)))
-               bio_endio(bio, DM_ENDIO_REQUEUE);
+       while ((bio = bio_list_pop(&bios))) {
+               bio->bi_error = DM_ENDIO_REQUEUE;
+               bio_endio(bio);
+       }
   }
   
   static int more_work(struct cache *cache)
@@@ -3123,7 -3121,7 +3125,7 @@@ static int cache_map(struct dm_target *
                          * This is a duplicate writethrough io that is no
                          * longer needed because the block has been demoted.
                          */
-                       bio_endio(bio, 0);
+                       bio_endio(bio);
                         // FIXME: remap everything as a miss
                         cell_defer(cache, cell, false);
                         r = DM_MAPIO_SUBMITTED;
@@@ -3500,7 -3498,7 +3502,7 @@@ static void cache_resume(struct dm_targ
    * <#demotions> <#promotions> <#dirty>
    * <#features> <features>*
    * <#core args> <core args>
- - * <policy name> <#policy args> <policy args>* <cache metadata mode>
+ + * <policy name> <#policy args> <policy args>* <cache metadata mode> <needs_check>
    */
   static void cache_status(struct dm_target *ti, status_type_t type,
                          unsigned status_flags, char *result, unsigned maxlen)
@@@ -3586,11 -3584,6 +3588,11 @@@
                 else
                         DMEMIT("rw ");
   
+ +              if (dm_cache_metadata_needs_check(cache->cmd))
+ +                      DMEMIT("needs_check ");
+ +              else
+ +                      DMEMIT("- ");
+ +
                 break;
   
         case STATUSTYPE_TABLE:
@@@ -3778,26 -3771,6 +3780,6 @@@ static int cache_iterate_devices(struc
         return r;
   }
   
- /*
-  * We assume I/O is going to the origin (which is the volume
-  * more likely to have restrictions e.g. by being striped).
-  * (Looking up the exact location of the data would be expensive
-  * and could always be out of date by the time the bio is submitted.)
-  */
- static int cache_bvec_merge(struct dm_target *ti,
-                           struct bvec_merge_data *bvm,
-                           struct bio_vec *biovec, int max_size)
- {
-       struct cache *cache = ti->private;
-       struct request_queue *q = bdev_get_queue(cache->origin_dev->bdev);
- 
-       if (!q->merge_bvec_fn)
-               return max_size;
- 
-       bvm->bi_bdev = cache->origin_dev->bdev;
-       return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
- }
- 
   static void set_discard_limits(struct cache *cache, struct queue_limits *limits)
   {
         /*
@@@ -3829,7 -3802,7 +3811,7 @@@ static void cache_io_hints(struct dm_ta
   
   static struct target_type cache_target = {
         .name = "cache",
- -      .version = {1, 7, 0},
+ +      .version = {1, 8, 0},
         .module = THIS_MODULE,
         .ctr = cache_ctr,
         .dtr = cache_dtr,
@@@ -3841,7 -3814,6 +3823,6 @@@
         .status = cache_status,
         .message = cache_message,
         .iterate_devices = cache_iterate_devices,
-       .merge = cache_bvec_merge,
         .io_hints = cache_io_hints,
   };
   
diff --combined drivers/md/dm-thin.c

index d2bbe8cc1e9786b66af798df9d8666d3fb96223c,f352e4990998314f4d6b6128a042168e561b9302..271a6624936313863a753aa6ddde802a41bc27be
--- 1/drivers/md/dm-thin.c
--- 2/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@@ -18,7 -18,6 +18,7 @@@
   #include <linux/init.h>
   #include <linux/module.h>
   #include <linux/slab.h>
+ +#include <linux/vmalloc.h>
   #include <linux/sort.h>
   #include <linux/rbtree.h>
   
@@@ -269,7 -268,7 +269,7 @@@ struct pool 
         process_mapping_fn process_prepared_mapping;
         process_mapping_fn process_prepared_discard;
   
- -      struct dm_bio_prison_cell *cell_sort_array[CELL_SORT_ARRAY_SIZE];
+ +      struct dm_bio_prison_cell **cell_sort_array;
   };
   
   static enum pool_mode get_pool_mode(struct pool *pool);
@@@ -615,8 -614,10 +615,10 @@@ static void error_bio_list(struct bio_l
   {
         struct bio *bio;
   
-       while ((bio = bio_list_pop(bios)))
-               bio_endio(bio, error);
+       while ((bio = bio_list_pop(bios))) {
+               bio->bi_error = error;
+               bio_endio(bio);
+       }
   }
   
   static void error_thin_bio_list(struct thin_c *tc, struct bio_list *master, int error)
@@@ -666,21 -667,16 +668,21 @@@ static void requeue_io(struct thin_c *t
         requeue_deferred_cells(tc);
   }
   
- -static void error_retry_list(struct pool *pool)
+ +static void error_retry_list_with_code(struct pool *pool, int error)
   {
         struct thin_c *tc;
   
         rcu_read_lock();
         list_for_each_entry_rcu(tc, &pool->active_thins, list)
- -              error_thin_bio_list(tc, &tc->retry_on_resume_list, -EIO);
+ +              error_thin_bio_list(tc, &tc->retry_on_resume_list, error);
         rcu_read_unlock();
   }
   
+ +static void error_retry_list(struct pool *pool)
+ +{
+ +      return error_retry_list_with_code(pool, -EIO);
+ +}
+ +
   /*
    * This section of code contains the logic for processing a thin device's IO.
    * Much of the code depends on pool object resources (lists, workqueues, etc)
@@@ -870,14 -866,14 +872,14 @@@ static void copy_complete(int read_err
         complete_mapping_preparation(m);
   }
   
- static void overwrite_endio(struct bio *bio, int err)
+ static void overwrite_endio(struct bio *bio)
   {
         struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
         struct dm_thin_new_mapping *m = h->overwrite_mapping;
   
         bio->bi_end_io = m->saved_bi_end_io;
   
-       m->err = err;
+       m->err = bio->bi_error;
         complete_mapping_preparation(m);
   }
   
@@@ -1002,7 -998,7 +1004,7 @@@ static void process_prepared_mapping(st
          */
         if (bio) {
                 inc_remap_and_issue_cell(tc, m->cell, m->data_block);
-               bio_endio(bio, 0);
+               bio_endio(bio);
         } else {
                 inc_all_io_entry(tc->pool, m->cell->holder);
                 remap_and_issue(tc, m->cell->holder, m->data_block);
@@@ -1032,7 -1028,7 +1034,7 @@@ static void process_prepared_discard_fa
   
   static void process_prepared_discard_success(struct dm_thin_new_mapping *m)
   {
-       bio_endio(m->bio, 0);
+       bio_endio(m->bio);
         free_discard_mapping(m);
   }
   
@@@ -1046,7 -1042,7 +1048,7 @@@ static void process_prepared_discard_no
                 metadata_operation_failed(tc->pool, "dm_thin_remove_range", r);
                 bio_io_error(m->bio);
         } else
-               bio_endio(m->bio, 0);
+               bio_endio(m->bio);
   
         cell_defer_no_holder(tc, m->cell);
         mempool_free(m, tc->pool->mapping_pool);
@@@ -1117,7 -1113,8 +1119,8 @@@ static void process_prepared_discard_pa
          * Even if r is set, there could be sub discards in flight that we
          * need to wait for.
          */
-       bio_endio(m->bio, r);
+       m->bio->bi_error = r;
+       bio_endio(m->bio);
         cell_defer_no_holder(tc, m->cell);
         mempool_free(m, pool->mapping_pool);
   }
@@@ -1493,9 -1490,10 +1496,10 @@@ static void handle_unserviceable_bio(st
   {
         int error = should_error_unserviceable_bio(pool);
   
-       if (error)
-               bio_endio(bio, error);
-       else
+       if (error) {
+               bio->bi_error = error;
+               bio_endio(bio);
+       } else
                 retry_on_resume(bio);
   }
   
@@@ -1631,7 -1629,7 +1635,7 @@@ static void process_discard_cell_passdo
          * will prevent completion until the sub range discards have
          * completed.
          */
-       bio_endio(bio, 0);
+       bio_endio(bio);
   }
   
   static void process_discard_bio(struct thin_c *tc, struct bio *bio)
@@@ -1645,7 -1643,7 +1649,7 @@@
                 /*
                  * The discard covers less than a block.
                  */
-               bio_endio(bio, 0);
+               bio_endio(bio);
                 return;
         }
   
@@@ -1790,7 -1788,7 +1794,7 @@@ static void provision_block(struct thin
         if (bio_data_dir(bio) == READ) {
                 zero_fill_bio(bio);
                 cell_defer_no_holder(tc, cell);
-               bio_endio(bio, 0);
+               bio_endio(bio);
                 return;
         }
   
@@@ -1855,7 -1853,7 +1859,7 @@@ static void process_cell(struct thin_c 
   
                         } else {
                                 zero_fill_bio(bio);
-                               bio_endio(bio, 0);
+                               bio_endio(bio);
                         }
                 } else
                         provision_block(tc, bio, block, cell);
@@@ -1926,7 -1924,7 +1930,7 @@@ static void __process_bio_read_only(str
                 }
   
                 zero_fill_bio(bio);
-               bio_endio(bio, 0);
+               bio_endio(bio);
                 break;
   
         default:
@@@ -1951,7 -1949,7 +1955,7 @@@ static void process_cell_read_only(stru
   
   static void process_bio_success(struct thin_c *tc, struct bio *bio)
   {
-       bio_endio(bio, 0);
+       bio_endio(bio);
   }
   
   static void process_bio_fail(struct thin_c *tc, struct bio *bio)
@@@ -2287,23 -2285,18 +2291,23 @@@ static void do_waker(struct work_struc
         queue_delayed_work(pool->wq, &pool->waker, COMMIT_PERIOD);
   }
   
+ +static void notify_of_pool_mode_change_to_oods(struct pool *pool);
+ +
   /*
    * We're holding onto IO to allow userland time to react.  After the
    * timeout either the pool will have been resized (and thus back in
- - * PM_WRITE mode), or we degrade to PM_READ_ONLY and start erroring IO.
+ + * PM_WRITE mode), or we degrade to PM_OUT_OF_DATA_SPACE w/ error_if_no_space.
    */
   static void do_no_space_timeout(struct work_struct *ws)
   {
         struct pool *pool = container_of(to_delayed_work(ws), struct pool,
                                          no_space_timeout);
   
- -      if (get_pool_mode(pool) == PM_OUT_OF_DATA_SPACE && !pool->pf.error_if_no_space)
- -              set_pool_mode(pool, PM_READ_ONLY);
+ +      if (get_pool_mode(pool) == PM_OUT_OF_DATA_SPACE && !pool->pf.error_if_no_space) {
+ +              pool->pf.error_if_no_space = true;
+ +              notify_of_pool_mode_change_to_oods(pool);
+ +              error_retry_list_with_code(pool, -ENOSPC);
+ +      }
   }
   
   /*----------------------------------------------------------------*/
@@@ -2381,14 -2374,6 +2385,14 @@@ static void notify_of_pool_mode_change(
                dm_device_name(pool->pool_md), new_mode);
   }
   
+ +static void notify_of_pool_mode_change_to_oods(struct pool *pool)
+ +{
+ +      if (!pool->pf.error_if_no_space)
+ +              notify_of_pool_mode_change(pool, "out-of-data-space (queue IO)");
+ +      else
+ +              notify_of_pool_mode_change(pool, "out-of-data-space (error IO)");
+ +}
+ +
   static bool passdown_enabled(struct pool_c *pt)
   {
         return pt->adjusted_pf.discard_passdown;
@@@ -2473,7 -2458,7 +2477,7 @@@ static void set_pool_mode(struct pool *
                  * frequently seeing this mode.
                  */
                 if (old_mode != new_mode)
- -                      notify_of_pool_mode_change(pool, "out-of-data-space");
+ +                      notify_of_pool_mode_change_to_oods(pool);
                 pool->process_bio = process_bio_read_only;
                 pool->process_discard = process_discard_bio;
                 pool->process_cell = process_cell_read_only;
@@@ -2600,7 -2585,8 +2604,8 @@@ static int thin_bio_map(struct dm_targe
         thin_hook_bio(tc, bio);
   
         if (tc->requeue_mode) {
-               bio_endio(bio, DM_ENDIO_REQUEUE);
+               bio->bi_error = DM_ENDIO_REQUEUE;
+               bio_endio(bio);
                 return DM_MAPIO_SUBMITTED;
         }
   
@@@ -2796,7 -2782,6 +2801,7 @@@ static void __pool_destroy(struct pool 
   {
         __pool_table_remove(pool);
   
+ +      vfree(pool->cell_sort_array);
         if (dm_pool_metadata_close(pool->pmd) < 0)
                 DMWARN("%s: dm_pool_metadata_close() failed.", __func__);
   
@@@ -2909,13 -2894,6 +2914,13 @@@ static struct pool *pool_create(struct 
                 goto bad_mapping_pool;
         }
   
+ +      pool->cell_sort_array = vmalloc(sizeof(*pool->cell_sort_array) * CELL_SORT_ARRAY_SIZE);
+ +      if (!pool->cell_sort_array) {
+ +              *error = "Error allocating cell sort array";
+ +              err_p = ERR_PTR(-ENOMEM);
+ +              goto bad_sort_array;
+ +      }
+ +
         pool->ref_count = 1;
         pool->last_commit_jiffies = jiffies;
         pool->pool_md = pool_md;
@@@ -2924,8 -2902,6 +2929,8 @@@
   
         return pool;
   
+ +bad_sort_array:
+ +      mempool_destroy(pool->mapping_pool);
   bad_mapping_pool:
         dm_deferred_set_destroy(pool->all_io_ds);
   bad_all_io_ds:
@@@ -3743,7 -3719,6 +3748,7 @@@ static void emit_flags(struct pool_feat
    * Status line is:
    *    <transaction id> <used metadata sectors>/<total metadata sectors>
    *    <used data sectors>/<total data sectors> <held metadata root>
+ + *    <pool mode> <discard config> <no space config> <needs_check>
    */
   static void pool_status(struct dm_target *ti, status_type_t type,
                         unsigned status_flags, char *result, unsigned maxlen)
@@@ -3845,11 -3820,6 +3850,11 @@@
                 else
                         DMEMIT("queue_if_no_space ");
   
+ +              if (dm_pool_metadata_needs_check(pool->pmd))
+ +                      DMEMIT("needs_check ");
+ +              else
+ +                      DMEMIT("- ");
+ +
                 break;
   
         case STATUSTYPE_TABLE:
@@@ -3875,20 -3845,6 +3880,6 @@@ static int pool_iterate_devices(struct 
         return fn(ti, pt->data_dev, 0, ti->len, data);
   }
   
- static int pool_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
-                     struct bio_vec *biovec, int max_size)
- {
-       struct pool_c *pt = ti->private;
-       struct request_queue *q = bdev_get_queue(pt->data_dev->bdev);
- 
-       if (!q->merge_bvec_fn)
-               return max_size;
- 
-       bvm->bi_bdev = pt->data_dev->bdev;
- 
-       return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
- }
- 
   static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits)
   {
         struct pool_c *pt = ti->private;
@@@ -3953,7 -3909,7 +3944,7 @@@ static struct target_type pool_target 
         .name = "thin-pool",
         .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE |
                     DM_TARGET_IMMUTABLE,
- -      .version = {1, 15, 0},
+ +      .version = {1, 16, 0},
         .module = THIS_MODULE,
         .ctr = pool_ctr,
         .dtr = pool_dtr,
@@@ -3965,7 -3921,6 +3956,6 @@@
         .resume = pool_resume,
         .message = pool_message,
         .status = pool_status,
-       .merge = pool_merge,
         .iterate_devices = pool_iterate_devices,
         .io_hints = pool_io_hints,
   };
@@@ -4292,21 -4247,6 +4282,6 @@@ err
         DMEMIT("Error");
   }
   
- static int thin_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
-                     struct bio_vec *biovec, int max_size)
- {
-       struct thin_c *tc = ti->private;
-       struct request_queue *q = bdev_get_queue(tc->pool_dev->bdev);
- 
-       if (!q->merge_bvec_fn)
-               return max_size;
- 
-       bvm->bi_bdev = tc->pool_dev->bdev;
-       bvm->bi_sector = dm_target_offset(ti, bvm->bi_sector);
- 
-       return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
- }
- 
   static int thin_iterate_devices(struct dm_target *ti,
                                 iterate_devices_callout_fn fn, void *data)
   {
@@@ -4340,7 -4280,7 +4315,7 @@@ static void thin_io_hints(struct dm_tar
   
   static struct target_type thin_target = {
         .name = "thin",
- -      .version = {1, 15, 0},
+ +      .version = {1, 16, 0},
         .module = THIS_MODULE,
         .ctr = thin_ctr,
         .dtr = thin_dtr,
@@@ -4350,7 -4290,6 +4325,6 @@@
         .presuspend = thin_presuspend,
         .postsuspend = thin_postsuspend,
         .status = thin_status,
-       .merge = thin_merge,
         .iterate_devices = thin_iterate_devices,
         .io_hints = thin_io_hints,
   };
diff --combined drivers/md/dm.c

index 0d7ab20c58dffc40d5c56c9427b7dd7f090c8bd3,8bb1ebb6ca7b5ea5836824d6bf281232143cec48..6ffc01bb85f2a8ee6a127cf3c651bebd3cdf6a32
--- 1/drivers/md/dm.c
--- 2/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@@ -124,9 -124,8 +124,8 @@@ EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo)
   #define DMF_FREEING 3
   #define DMF_DELETING 4
   #define DMF_NOFLUSH_SUSPENDING 5
- #define DMF_MERGE_IS_OPTIONAL 6
- #define DMF_DEFERRED_REMOVE 7
- #define DMF_SUSPENDED_INTERNALLY 8
+ #define DMF_DEFERRED_REMOVE 6
+ #define DMF_SUSPENDED_INTERNALLY 7
   
   /*
    * A dummy definition to make RCU happy.
@@@ -944,7 -943,8 +943,8 @@@ static void dec_pending(struct dm_io *i
                 } else {
                         /* done with normal IO or empty flush */
                         trace_block_bio_complete(md->queue, bio, io_error);
-                       bio_endio(bio, io_error);
+                       bio->bi_error = io_error;
+                       bio_endio(bio);
                 }
         }
   }
@@@ -957,17 -957,15 +957,15 @@@ static void disable_write_same(struct m
         limits->max_write_same_sectors = 0;
   }
   
- static void clone_endio(struct bio *bio, int error)
+ static void clone_endio(struct bio *bio)
   {
+       int error = bio->bi_error;
         int r = error;
         struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
         struct dm_io *io = tio->io;
         struct mapped_device *md = tio->io->md;
         dm_endio_fn endio = tio->ti->type->end_io;
   
-       if (!bio_flagged(bio, BIO_UPTODATE) && !error)
-               error = -EIO;
- 
         if (endio) {
                 r = endio(tio->ti, bio, error);
                 if (r < 0 || r == DM_ENDIO_REQUEUE)
@@@ -996,7 -994,7 +994,7 @@@
   /*
    * Partial completion handling for request-based dm
    */
- static void end_clone_bio(struct bio *clone, int error)
+ static void end_clone_bio(struct bio *clone)
   {
         struct dm_rq_clone_bio_info *info =
                 container_of(clone, struct dm_rq_clone_bio_info, clone);
@@@ -1013,13 -1011,13 +1011,13 @@@
                  * the remainder.
                  */
                 return;
-       else if (error) {
+       else if (bio->bi_error) {
                 /*
                  * Don't notice the error to the upper layer yet.
                  * The error handling decision is made by the target driver,
                  * when the request is completed.
                  */
-               tio->error = error;
+               tio->error = bio->bi_error;
                 return;
         }
   
@@@ -1067,10 -1065,13 +1065,10 @@@ static void rq_end_stats(struct mapped_
    */
   static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
   {
- -      int nr_requests_pending;
- -
         atomic_dec(&md->pending[rw]);
   
         /* nudge anyone waiting on suspend queue */
- -      nr_requests_pending = md_in_flight(md);
- -      if (!nr_requests_pending)
+ +      if (!md_in_flight(md))
                 wake_up(&md->wait);
   
         /*
@@@ -1082,7 -1083,8 +1080,7 @@@
         if (run_queue) {
                 if (md->queue->mq_ops)
                         blk_mq_run_hw_queues(md->queue, true);
- -              else if (!nr_requests_pending ||
- -                       (nr_requests_pending >= md->queue->nr_congestion_on))
+ +              else
                         blk_run_queue_async(md->queue);
         }
   
@@@ -1722,60 -1724,6 +1720,6 @@@ static void __split_and_process_bio(str
    * CRUD END
    *---------------------------------------------------------------*/
   
- static int dm_merge_bvec(struct request_queue *q,
-                        struct bvec_merge_data *bvm,
-                        struct bio_vec *biovec)
- {
-       struct mapped_device *md = q->queuedata;
-       struct dm_table *map = dm_get_live_table_fast(md);
-       struct dm_target *ti;
-       sector_t max_sectors;
-       int max_size = 0;
- 
-       if (unlikely(!map))
-               goto out;
- 
-       ti = dm_table_find_target(map, bvm->bi_sector);
-       if (!dm_target_is_valid(ti))
-               goto out;
- 
-       /*
-        * Find maximum amount of I/O that won't need splitting
-        */
-       max_sectors = min(max_io_len(bvm->bi_sector, ti),
-                         (sector_t) BIO_MAX_SECTORS);
-       max_size = (max_sectors << SECTOR_SHIFT) - bvm->bi_size;
-       if (max_size < 0)
-               max_size = 0;
- 
-       /*
-        * merge_bvec_fn() returns number of bytes
-        * it can accept at this offset
-        * max is precomputed maximal io size
-        */
-       if (max_size && ti->type->merge)
-               max_size = ti->type->merge(ti, bvm, biovec, max_size);
-       /*
-        * If the target doesn't support merge method and some of the devices
-        * provided their merge_bvec method (we know this by looking at
-        * queue_max_hw_sectors), then we can't allow bios with multiple vector
-        * entries.  So always set max_size to 0, and the code below allows
-        * just one page.
-        */
-       else if (queue_max_hw_sectors(q) <= PAGE_SIZE >> 9)
-               max_size = 0;
- 
- out:
-       dm_put_live_table_fast(md);
-       /*
-        * Always allow an entire first page
-        */
-       if (max_size <= biovec->bv_len && !(bvm->bi_size >> SECTOR_SHIFT))
-               max_size = biovec->bv_len;
- 
-       return max_size;
- }
- 
   /*
    * The request function that just remaps the bio built up by
    * dm_merge_bvec.
@@@ -1789,6 -1737,8 +1733,8 @@@ static void dm_make_request(struct requ
   
         map = dm_get_live_table(md, &srcu_idx);
   
+       blk_queue_split(q, &bio, q->bio_split);
+ 
         generic_start_io_acct(rw, bio_sectors(bio), &dm_disk(md)->part0);
   
         /* if we're suspended, we have to queue this io for later */
@@@ -2270,6 -2220,8 +2216,6 @@@ static void dm_init_old_md_queue(struc
   
   static void cleanup_mapped_device(struct mapped_device *md)
   {
- -      cleanup_srcu_struct(&md->io_barrier);
- -
         if (md->wq)
                 destroy_workqueue(md->wq);
         if (md->kworker_task)
@@@ -2281,8 -2233,6 +2227,8 @@@
         if (md->bs)
                 bioset_free(md->bs);
   
+ +      cleanup_srcu_struct(&md->io_barrier);
+ +
         if (md->disk) {
                 spin_lock(&_minor_lock);
                 md->disk->private_data = NULL;
@@@ -2495,59 -2445,6 +2441,6 @@@ static void __set_size(struct mapped_de
         i_size_write(md->bdev->bd_inode, (loff_t)size << SECTOR_SHIFT);
   }
   
- /*
-  * Return 1 if the queue has a compulsory merge_bvec_fn function.
-  *
-  * If this function returns 0, then the device is either a non-dm
-  * device without a merge_bvec_fn, or it is a dm device that is
-  * able to split any bios it receives that are too big.
-  */
- int dm_queue_merge_is_compulsory(struct request_queue *q)
- {
-       struct mapped_device *dev_md;
- 
-       if (!q->merge_bvec_fn)
-               return 0;
- 
-       if (q->make_request_fn == dm_make_request) {
-               dev_md = q->queuedata;
-               if (test_bit(DMF_MERGE_IS_OPTIONAL, &dev_md->flags))
-                       return 0;
-       }
- 
-       return 1;
- }
- 
- static int dm_device_merge_is_compulsory(struct dm_target *ti,
-                                        struct dm_dev *dev, sector_t start,
-                                        sector_t len, void *data)
- {
-       struct block_device *bdev = dev->bdev;
-       struct request_queue *q = bdev_get_queue(bdev);
- 
-       return dm_queue_merge_is_compulsory(q);
- }
- 
- /*
-  * Return 1 if it is acceptable to ignore merge_bvec_fn based
-  * on the properties of the underlying devices.
-  */
- static int dm_table_merge_is_optional(struct dm_table *table)
- {
-       unsigned i = 0;
-       struct dm_target *ti;
- 
-       while (i < dm_table_get_num_targets(table)) {
-               ti = dm_table_get_target(table, i++);
- 
-               if (ti->type->iterate_devices &&
-                   ti->type->iterate_devices(ti, dm_device_merge_is_compulsory, NULL))
-                       return 0;
-       }
- 
-       return 1;
- }
- 
   /*
    * Returns old map, which caller must destroy.
    */
@@@ -2557,7 -2454,6 +2450,6 @@@ static struct dm_table *__bind(struct m
         struct dm_table *old_map;
         struct request_queue *q = md->queue;
         sector_t size;
-       int merge_is_optional;
   
         size = dm_table_get_size(t);
   
@@@ -2583,17 -2479,11 +2475,11 @@@
   
         __bind_mempools(md, t);
   
-       merge_is_optional = dm_table_merge_is_optional(t);
- 
         old_map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock));
         rcu_assign_pointer(md->map, t);
         md->immutable_target_type = dm_table_get_immutable_target_type(t);
   
         dm_table_set_restrictions(t, q, limits);
-       if (merge_is_optional)
-               set_bit(DMF_MERGE_IS_OPTIONAL, &md->flags);
-       else
-               clear_bit(DMF_MERGE_IS_OPTIONAL, &md->flags);
         if (old_map)
                 dm_sync_table(md);
   
@@@ -2874,7 -2764,6 +2760,6 @@@ int dm_setup_md_queue(struct mapped_dev
         case DM_TYPE_BIO_BASED:
                 dm_init_old_md_queue(md);
                 blk_queue_make_request(md->queue, dm_make_request);
-               blk_queue_merge_bvec(md->queue, dm_merge_bvec);
                 break;
         }
   
diff --combined drivers/md/md.c

index e25f00f0138a7b4d82a5ae4f6fc7e1b6f0bb1b30,d28bf5cea2243e9b620895758758f01fbc560e23..40332625713b9758e2c0d1789996e356f4ad10a7
--- 1/drivers/md/md.c
--- 2/drivers/md/md.c
+++ b/drivers/md/md.c
@@@ -257,13 -257,17 +257,17 @@@ static void md_make_request(struct requ
         unsigned int sectors;
         int cpu;
   
+       blk_queue_split(q, &bio, q->bio_split);
+ 
         if (mddev == NULL || mddev->pers == NULL
             || !mddev->ready) {
                 bio_io_error(bio);
                 return;
         }
         if (mddev->ro == 1 && unlikely(rw == WRITE)) {
-               bio_endio(bio, bio_sectors(bio) == 0 ? 0 : -EROFS);
+               if (bio_sectors(bio) != 0)
+                       bio->bi_error = -EROFS;
+               bio_endio(bio);
                 return;
         }
         smp_rmb(); /* Ensure implications of  'active' are visible */
@@@ -350,34 -354,11 +354,11 @@@ static int md_congested(void *data, in
         return mddev_congested(mddev, bits);
   }
   
- static int md_mergeable_bvec(struct request_queue *q,
-                            struct bvec_merge_data *bvm,
-                            struct bio_vec *biovec)
- {
-       struct mddev *mddev = q->queuedata;
-       int ret;
-       rcu_read_lock();
-       if (mddev->suspended) {
-               /* Must always allow one vec */
-               if (bvm->bi_size == 0)
-                       ret = biovec->bv_len;
-               else
-                       ret = 0;
-       } else {
-               struct md_personality *pers = mddev->pers;
-               if (pers && pers->mergeable_bvec)
-                       ret = pers->mergeable_bvec(mddev, bvm, biovec);
-               else
-                       ret = biovec->bv_len;
-       }
-       rcu_read_unlock();
-       return ret;
- }
   /*
    * Generic flush handling for md
    */
   
- static void md_end_flush(struct bio *bio, int err)
+ static void md_end_flush(struct bio *bio)
   {
         struct md_rdev *rdev = bio->bi_private;
         struct mddev *mddev = rdev->mddev;
@@@ -433,7 -414,7 +414,7 @@@ static void md_submit_flush_data(struc
   
         if (bio->bi_iter.bi_size == 0)
                 /* an empty barrier - all done */
-               bio_endio(bio, 0);
+               bio_endio(bio);
         else {
                 bio->bi_rw &= ~REQ_FLUSH;
                 mddev->pers->make_request(mddev, bio);
@@@ -728,15 -709,13 +709,13 @@@ void md_rdev_clear(struct md_rdev *rdev
   }
   EXPORT_SYMBOL_GPL(md_rdev_clear);
   
- static void super_written(struct bio *bio, int error)
+ static void super_written(struct bio *bio)
   {
         struct md_rdev *rdev = bio->bi_private;
         struct mddev *mddev = rdev->mddev;
   
-       if (error || !test_bit(BIO_UPTODATE, &bio->bi_flags)) {
-               printk("md: super_written gets error=%d, uptodate=%d\n",
-                      error, test_bit(BIO_UPTODATE, &bio->bi_flags));
-               WARN_ON(test_bit(BIO_UPTODATE, &bio->bi_flags));
+       if (bio->bi_error) {
+               printk("md: super_written gets error=%d\n", bio->bi_error);
                 md_error(mddev, rdev);
         }
   
@@@ -791,7 -770,7 +770,7 @@@ int sync_page_io(struct md_rdev *rdev, 
         bio_add_page(bio, page, size, 0);
         submit_bio_wait(rw, bio);
   
-       ret = test_bit(BIO_UPTODATE, &bio->bi_flags);
+       ret = !bio->bi_error;
         bio_put(bio);
         return ret;
   }
@@@ -5186,7 -5165,6 +5165,6 @@@ int md_run(struct mddev *mddev
         if (mddev->queue) {
                 mddev->queue->backing_dev_info.congested_data = mddev;
                 mddev->queue->backing_dev_info.congested_fn = md_congested;
-               blk_queue_merge_bvec(mddev->queue, md_mergeable_bvec);
         }
         if (pers->sync_request) {
                 if (mddev->kobj.sd &&
@@@ -5315,7 -5293,6 +5293,6 @@@ static void md_clean(struct mddev *mdde
         mddev->degraded = 0;
         mddev->safemode = 0;
         mddev->private = NULL;
-       mddev->merge_check_needed = 0;
         mddev->bitmap_info.offset = 0;
         mddev->bitmap_info.default_offset = 0;
         mddev->bitmap_info.default_space = 0;
@@@ -5382,8 -5359,6 +5359,8 @@@ static void __md_stop(struct mddev *mdd
   {
         struct md_personality *pers = mddev->pers;
         mddev_detach(mddev);
+ +      /* Ensure ->event_work is done */
+ +      flush_workqueue(md_misc_wq);
         spin_lock(&mddev->lock);
         mddev->ready = 0;
         mddev->pers = NULL;
@@@ -5514,7 -5489,6 +5491,6 @@@ static int do_md_stop(struct mddev *mdd
   
                 __md_stop_writes(mddev);
                 __md_stop(mddev);
-               mddev->queue->merge_bvec_fn = NULL;
                 mddev->queue->backing_dev_info.congested_fn = NULL;
   
                 /* tell userspace to handle 'inactive' */
@@@ -5759,7 -5733,7 +5735,7 @@@ static int get_bitmap_file(struct mdde
         char *ptr;
         int err;
   
- -      file = kmalloc(sizeof(*file), GFP_NOIO);
+ +      file = kzalloc(sizeof(*file), GFP_NOIO);
         if (!file)
                 return -ENOMEM;
   
@@@ -7439,7 -7413,7 +7415,7 @@@ int md_setup_cluster(struct mddev *mdde
         err = request_module("md-cluster");
         if (err) {
                 pr_err("md-cluster module not found.\n");
- -              return err;
+ +              return -ENOENT;
         }
   
         spin_lock(&pers_lock);
diff --combined drivers/md/raid1.c

index 967a4ed73929ff44a38d9475c5e362fc2914c758,0ff06fdb83a9b4d7bcc8036923833f0f53cad7a9..f39d69f884de5b5ef8033fc16fa653f2d4bd47bf
--- 1/drivers/md/raid1.c
--- 2/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@@ -255,9 -255,10 +255,10 @@@ static void call_bio_endio(struct r1bi
                 done = 1;
   
         if (!test_bit(R1BIO_Uptodate, &r1_bio->state))
-               clear_bit(BIO_UPTODATE, &bio->bi_flags);
+               bio->bi_error = -EIO;
+ 
         if (done) {
-               bio_endio(bio, 0);
+               bio_endio(bio);
                 /*
                  * Wake up any possible resync thread that waits for the device
                  * to go idle.
@@@ -312,9 -313,9 +313,9 @@@ static int find_bio_disk(struct r1bio *
         return mirror;
   }
   
- static void raid1_end_read_request(struct bio *bio, int error)
+ static void raid1_end_read_request(struct bio *bio)
   {
-       int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
+       int uptodate = !bio->bi_error;
         struct r1bio *r1_bio = bio->bi_private;
         int mirror;
         struct r1conf *conf = r1_bio->mddev->private;
@@@ -336,7 -337,7 +337,7 @@@
                 spin_lock_irqsave(&conf->device_lock, flags);
                 if (r1_bio->mddev->degraded == conf->raid_disks ||
                     (r1_bio->mddev->degraded == conf->raid_disks-1 &&
- -                   !test_bit(Faulty, &conf->mirrors[mirror].rdev->flags)))
+ +                   test_bit(In_sync, &conf->mirrors[mirror].rdev->flags)))
                         uptodate = 1;
                 spin_unlock_irqrestore(&conf->device_lock, flags);
         }
@@@ -397,9 -398,8 +398,8 @@@ static void r1_bio_write_done(struct r1
         }
   }
   
- static void raid1_end_write_request(struct bio *bio, int error)
+ static void raid1_end_write_request(struct bio *bio)
   {
-       int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
         struct r1bio *r1_bio = bio->bi_private;
         int mirror, behind = test_bit(R1BIO_BehindIO, &r1_bio->state);
         struct r1conf *conf = r1_bio->mddev->private;
@@@ -410,7 -410,7 +410,7 @@@
         /*
          * 'one mirror IO has finished' event handler:
          */
-       if (!uptodate) {
+       if (bio->bi_error) {
                 set_bit(WriteErrorSeen,
                         &conf->mirrors[mirror].rdev->flags);
                 if (!test_and_set_bit(WantReplacement,
@@@ -541,7 -541,7 +541,7 @@@ static int read_balance(struct r1conf *
   
         if ((conf->mddev->recovery_cp < this_sector + sectors) ||
             (mddev_is_clustered(conf->mddev) &&
- -          md_cluster_ops->area_resyncing(conf->mddev, this_sector,
+ +          md_cluster_ops->area_resyncing(conf->mddev, READ, this_sector,
                     this_sector + sectors)))
                 choose_first = 1;
         else
@@@ -557,7 -557,6 +557,6 @@@
                 rdev = rcu_dereference(conf->mirrors[disk].rdev);
                 if (r1_bio->bios[disk] == IO_BLOCKED
                     || rdev == NULL
-                   || test_bit(Unmerged, &rdev->flags)
                     || test_bit(Faulty, &rdev->flags))
                         continue;
                 if (!test_bit(In_sync, &rdev->flags) &&
@@@ -708,38 -707,6 +707,6 @@@
         return best_disk;
   }
   
- static int raid1_mergeable_bvec(struct mddev *mddev,
-                               struct bvec_merge_data *bvm,
-                               struct bio_vec *biovec)
- {
-       struct r1conf *conf = mddev->private;
-       sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
-       int max = biovec->bv_len;
- 
-       if (mddev->merge_check_needed) {
-               int disk;
-               rcu_read_lock();
-               for (disk = 0; disk < conf->raid_disks * 2; disk++) {
-                       struct md_rdev *rdev = rcu_dereference(
-                               conf->mirrors[disk].rdev);
-                       if (rdev && !test_bit(Faulty, &rdev->flags)) {
-                               struct request_queue *q =
-                                       bdev_get_queue(rdev->bdev);
-                               if (q->merge_bvec_fn) {
-                                       bvm->bi_sector = sector +
-                                               rdev->data_offset;
-                                       bvm->bi_bdev = rdev->bdev;
-                                       max = min(max, q->merge_bvec_fn(
-                                                         q, bvm, biovec));
-                               }
-                       }
-               }
-               rcu_read_unlock();
-       }
-       return max;
- 
- }
- 
   static int raid1_congested(struct mddev *mddev, int bits)
   {
         struct r1conf *conf = mddev->private;
@@@ -793,7 -760,7 +760,7 @@@ static void flush_pending_writes(struc
                         if (unlikely((bio->bi_rw & REQ_DISCARD) &&
                             !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
                                 /* Just ignore it */
-                               bio_endio(bio, 0);
+                               bio_endio(bio);
                         else
                                 generic_make_request(bio);
                         bio = next;
@@@ -1068,7 -1035,7 +1035,7 @@@ static void raid1_unplug(struct blk_plu
                 if (unlikely((bio->bi_rw & REQ_DISCARD) &&
                     !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
                         /* Just ignore it */
-                       bio_endio(bio, 0);
+                       bio_endio(bio);
                 else
                         generic_make_request(bio);
                 bio = next;
@@@ -1111,8 -1078,7 +1078,8 @@@ static void make_request(struct mddev *
             ((bio_end_sector(bio) > mddev->suspend_lo &&
             bio->bi_iter.bi_sector < mddev->suspend_hi) ||
             (mddev_is_clustered(mddev) &&
- -           md_cluster_ops->area_resyncing(mddev, bio->bi_iter.bi_sector, bio_end_sector(bio))))) {
+ +           md_cluster_ops->area_resyncing(mddev, WRITE,
+ +                   bio->bi_iter.bi_sector, bio_end_sector(bio))))) {
                 /* As the suspend_* range is controlled by
                  * userspace, we want an interruptible
                  * wait.
@@@ -1125,7 -1091,7 +1092,7 @@@
                         if (bio_end_sector(bio) <= mddev->suspend_lo ||
                             bio->bi_iter.bi_sector >= mddev->suspend_hi ||
                             (mddev_is_clustered(mddev) &&
- -                           !md_cluster_ops->area_resyncing(mddev,
+ +                           !md_cluster_ops->area_resyncing(mddev, WRITE,
                                      bio->bi_iter.bi_sector, bio_end_sector(bio))))
                                 break;
                         schedule();
@@@ -1158,7 -1124,7 +1125,7 @@@
          * non-zero, then it is the number of not-completed requests.
          */
         bio->bi_phys_segments = 0;
-       clear_bit(BIO_SEG_VALID, &bio->bi_flags);
+       bio_clear_flag(bio, BIO_SEG_VALID);
   
         if (rw == READ) {
                 /*
@@@ -1269,8 -1235,7 +1236,7 @@@ read_again
                         break;
                 }
                 r1_bio->bios[i] = NULL;
-               if (!rdev || test_bit(Faulty, &rdev->flags)
-                   || test_bit(Unmerged, &rdev->flags)) {
+               if (!rdev || test_bit(Faulty, &rdev->flags)) {
                         if (i < conf->raid_disks)
                                 set_bit(R1BIO_Degraded, &r1_bio->state);
                         continue;
@@@ -1476,7 -1441,6 +1442,7 @@@ static void error(struct mddev *mddev, 
   {
         char b[BDEVNAME_SIZE];
         struct r1conf *conf = mddev->private;
+ +      unsigned long flags;
   
         /*
          * If it is not operational, then we have already marked it as dead
@@@ -1496,13 -1460,14 +1462,13 @@@
                 return;
         }
         set_bit(Blocked, &rdev->flags);
+ +      spin_lock_irqsave(&conf->device_lock, flags);
         if (test_and_clear_bit(In_sync, &rdev->flags)) {
- -              unsigned long flags;
- -              spin_lock_irqsave(&conf->device_lock, flags);
                 mddev->degraded++;
                 set_bit(Faulty, &rdev->flags);
- -              spin_unlock_irqrestore(&conf->device_lock, flags);
         } else
                 set_bit(Faulty, &rdev->flags);
+ +      spin_unlock_irqrestore(&conf->device_lock, flags);
         /*
          * if recovery is running, make sure it aborts.
          */
@@@ -1568,10 -1533,7 +1534,10 @@@ static int raid1_spare_active(struct md
          * Find all failed disks within the RAID1 configuration
          * and mark them readable.
          * Called under mddev lock, so rcu protection not needed.
+ +       * device_lock used to avoid races with raid1_end_read_request
+ +       * which expects 'In_sync' flags and ->degraded to be consistent.
          */
+ +      spin_lock_irqsave(&conf->device_lock, flags);
         for (i = 0; i < conf->raid_disks; i++) {
                 struct md_rdev *rdev = conf->mirrors[i].rdev;
                 struct md_rdev *repl = conf->mirrors[conf->raid_disks + i].rdev;
@@@ -1602,6 -1564,7 +1568,6 @@@
                         sysfs_notify_dirent_safe(rdev->sysfs_state);
                 }
         }
- -      spin_lock_irqsave(&conf->device_lock, flags);
         mddev->degraded -= count;
         spin_unlock_irqrestore(&conf->device_lock, flags);
   
@@@ -1617,7 -1580,6 +1583,6 @@@ static int raid1_add_disk(struct mddev 
         struct raid1_info *p;
         int first = 0;
         int last = conf->raid_disks - 1;
-       struct request_queue *q = bdev_get_queue(rdev->bdev);
   
         if (mddev->recovery_disabled == conf->recovery_disabled)
                 return -EBUSY;
@@@ -1625,11 -1587,6 +1590,6 @@@
         if (rdev->raid_disk >= 0)
                 first = last = rdev->raid_disk;
   
-       if (q->merge_bvec_fn) {
-               set_bit(Unmerged, &rdev->flags);
-               mddev->merge_check_needed = 1;
-       }
- 
         for (mirror = first; mirror <= last; mirror++) {
                 p = conf->mirrors+mirror;
                 if (!p->rdev) {
@@@ -1661,19 -1618,6 +1621,6 @@@
                         break;
                 }
         }
-       if (err == 0 && test_bit(Unmerged, &rdev->flags)) {
-               /* Some requests might not have seen this new
-                * merge_bvec_fn.  We must wait for them to complete
-                * before merging the device fully.
-                * First we make sure any code which has tested
-                * our function has submitted the request, then
-                * we wait for all outstanding requests to complete.
-                */
-               synchronize_sched();
-               freeze_array(conf, 0);
-               unfreeze_array(conf);
-               clear_bit(Unmerged, &rdev->flags);
-       }
         md_integrity_add_rdev(rdev, mddev);
         if (mddev->queue && blk_queue_discard(bdev_get_queue(rdev->bdev)))
                 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
@@@ -1737,7 -1681,7 +1684,7 @@@ abort
         return err;
   }
   
- static void end_sync_read(struct bio *bio, int error)
+ static void end_sync_read(struct bio *bio)
   {
         struct r1bio *r1_bio = bio->bi_private;
   
@@@ -1748,16 -1692,16 +1695,16 @@@
          * or re-read if the read failed.
          * We don't do much here, just schedule handling by raid1d
          */
-       if (test_bit(BIO_UPTODATE, &bio->bi_flags))
+       if (!bio->bi_error)
                 set_bit(R1BIO_Uptodate, &r1_bio->state);
   
         if (atomic_dec_and_test(&r1_bio->remaining))
                 reschedule_retry(r1_bio);
   }
   
- static void end_sync_write(struct bio *bio, int error)
+ static void end_sync_write(struct bio *bio)
   {
-       int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
+       int uptodate = !bio->bi_error;
         struct r1bio *r1_bio = bio->bi_private;
         struct mddev *mddev = r1_bio->mddev;
         struct r1conf *conf = mddev->private;
@@@ -1944,7 -1888,7 +1891,7 @@@ static int fix_sync_read_error(struct r
                 idx ++;
         }
         set_bit(R1BIO_Uptodate, &r1_bio->state);
-       set_bit(BIO_UPTODATE, &bio->bi_flags);
+       bio->bi_error = 0;
         return 1;
   }
   
@@@ -1968,15 -1912,14 +1915,14 @@@ static void process_checks(struct r1bi
         for (i = 0; i < conf->raid_disks * 2; i++) {
                 int j;
                 int size;
-               int uptodate;
+               int error;
                 struct bio *b = r1_bio->bios[i];
                 if (b->bi_end_io != end_sync_read)
                         continue;
-               /* fixup the bio for reuse, but preserve BIO_UPTODATE */
-               uptodate = test_bit(BIO_UPTODATE, &b->bi_flags);
+               /* fixup the bio for reuse, but preserve errno */
+               error = b->bi_error;
                 bio_reset(b);
-               if (!uptodate)
-                       clear_bit(BIO_UPTODATE, &b->bi_flags);
+               b->bi_error = error;
                 b->bi_vcnt = vcnt;
                 b->bi_iter.bi_size = r1_bio->sectors << 9;
                 b->bi_iter.bi_sector = r1_bio->sector +
@@@ -1999,7 -1942,7 +1945,7 @@@
         }
         for (primary = 0; primary < conf->raid_disks * 2; primary++)
                 if (r1_bio->bios[primary]->bi_end_io == end_sync_read &&
-                   test_bit(BIO_UPTODATE, &r1_bio->bios[primary]->bi_flags)) {
+                   !r1_bio->bios[primary]->bi_error) {
                         r1_bio->bios[primary]->bi_end_io = NULL;
                         rdev_dec_pending(conf->mirrors[primary].rdev, mddev);
                         break;
@@@ -2009,14 -1952,14 +1955,14 @@@
                 int j;
                 struct bio *pbio = r1_bio->bios[primary];
                 struct bio *sbio = r1_bio->bios[i];
-               int uptodate = test_bit(BIO_UPTODATE, &sbio->bi_flags);
+               int error = sbio->bi_error;
   
                 if (sbio->bi_end_io != end_sync_read)
                         continue;
-               /* Now we can 'fixup' the BIO_UPTODATE flag */
-               set_bit(BIO_UPTODATE, &sbio->bi_flags);
+               /* Now we can 'fixup' the error value */
+               sbio->bi_error = 0;
   
-               if (uptodate) {
+               if (!error) {
                         for (j = vcnt; j-- ; ) {
                                 struct page *p, *s;
                                 p = pbio->bi_io_vec[j].bv_page;
@@@ -2031,7 -1974,7 +1977,7 @@@
                 if (j >= 0)
                         atomic64_add(r1_bio->sectors, &mddev->resync_mismatches);
                 if (j < 0 || (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)
-                             && uptodate)) {
+                             && !error)) {
                         /* No need to write to this device. */
                         sbio->bi_end_io = NULL;
                         rdev_dec_pending(conf->mirrors[i].rdev, mddev);
@@@ -2272,11 -2215,11 +2218,11 @@@ static void handle_sync_write_finished(
                 struct bio *bio = r1_bio->bios[m];
                 if (bio->bi_end_io == NULL)
                         continue;
-               if (test_bit(BIO_UPTODATE, &bio->bi_flags) &&
+               if (!bio->bi_error &&
                     test_bit(R1BIO_MadeGood, &r1_bio->state)) {
                         rdev_clear_badblocks(rdev, r1_bio->sector, s, 0);
                 }
-               if (!test_bit(BIO_UPTODATE, &bio->bi_flags) &&
+               if (bio->bi_error &&
                     test_bit(R1BIO_WriteError, &r1_bio->state)) {
                         if (!rdev_set_badblocks(rdev, r1_bio->sector, s, 0))
                                 md_error(conf->mddev, rdev);
@@@ -2715,7 -2658,7 +2661,7 @@@ static sector_t sync_request(struct mdd
                                                 /* remove last page from this bio */
                                                 bio->bi_vcnt--;
                                                 bio->bi_iter.bi_size -= len;
-                                               __clear_bit(BIO_SEG_VALID, &bio->bi_flags);
+                                               bio_clear_flag(bio, BIO_SEG_VALID);
                                         }
                                         goto bio_full;
                                 }
@@@ -2810,8 -2753,6 +2756,6 @@@ static struct r1conf *setup_conf(struc
                         goto abort;
                 disk->rdev = rdev;
                 q = bdev_get_queue(rdev->bdev);
-               if (q->merge_bvec_fn)
-                       mddev->merge_check_needed = 1;
   
                 disk->head_position = 0;
                 disk->seq_start = MaxSector;
@@@ -3176,7 -3117,6 +3120,6 @@@ static struct md_personality raid1_pers
         .quiesce        = raid1_quiesce,
         .takeover       = raid1_takeover,
         .congested      = raid1_congested,
-       .mergeable_bvec = raid1_mergeable_bvec,
   };
   
   static int __init raid_init(void)
diff --combined drivers/md/raid10.c

index 38c58e19cfce3d7bdea554b26474080a88e02cca,d92098f3e65bdf1bcd1f8997b018d8e064610de3..b0fce2ebf7ad2679f209adc321811ccd6e0de121
--- 1/drivers/md/raid10.c
--- 2/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@@ -101,7 -101,7 +101,7 @@@ static int _enough(struct r10conf *conf
   static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
                                 int *skipped);
   static void reshape_request_write(struct mddev *mddev, struct r10bio *r10_bio);
- static void end_reshape_write(struct bio *bio, int error);
+ static void end_reshape_write(struct bio *bio);
   static void end_reshape(struct r10conf *conf);
   
   static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data)
@@@ -307,9 -307,9 +307,9 @@@ static void raid_end_bio_io(struct r10b
         } else
                 done = 1;
         if (!test_bit(R10BIO_Uptodate, &r10_bio->state))
-               clear_bit(BIO_UPTODATE, &bio->bi_flags);
+               bio->bi_error = -EIO;
         if (done) {
-               bio_endio(bio, 0);
+               bio_endio(bio);
                 /*
                  * Wake up any possible resync thread that waits for the device
                  * to go idle.
@@@ -358,9 -358,9 +358,9 @@@ static int find_bio_disk(struct r10con
         return r10_bio->devs[slot].devnum;
   }
   
- static void raid10_end_read_request(struct bio *bio, int error)
+ static void raid10_end_read_request(struct bio *bio)
   {
-       int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
+       int uptodate = !bio->bi_error;
         struct r10bio *r10_bio = bio->bi_private;
         int slot, dev;
         struct md_rdev *rdev;
@@@ -438,9 -438,8 +438,8 @@@ static void one_write_done(struct r10bi
         }
   }
   
- static void raid10_end_write_request(struct bio *bio, int error)
+ static void raid10_end_write_request(struct bio *bio)
   {
-       int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
         struct r10bio *r10_bio = bio->bi_private;
         int dev;
         int dec_rdev = 1;
@@@ -460,7 -459,7 +459,7 @@@
         /*
          * this branch is our 'one mirror IO has finished' event handler:
          */
-       if (!uptodate) {
+       if (bio->bi_error) {
                 if (repl)
                         /* Never record new bad blocks to replacement,
                          * just fail it.
@@@ -672,93 -671,6 +671,6 @@@ static sector_t raid10_find_virt(struc
         return (vchunk << geo->chunk_shift) + offset;
   }
   
- /**
-  *    raid10_mergeable_bvec -- tell bio layer if a two requests can be merged
-  *    @mddev: the md device
-  *    @bvm: properties of new bio
-  *    @biovec: the request that could be merged to it.
-  *
-  *    Return amount of bytes we can accept at this offset
-  *    This requires checking for end-of-chunk if near_copies != raid_disks,
-  *    and for subordinate merge_bvec_fns if merge_check_needed.
-  */
- static int raid10_mergeable_bvec(struct mddev *mddev,
-                                struct bvec_merge_data *bvm,
-                                struct bio_vec *biovec)
- {
-       struct r10conf *conf = mddev->private;
-       sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
-       int max;
-       unsigned int chunk_sectors;
-       unsigned int bio_sectors = bvm->bi_size >> 9;
-       struct geom *geo = &conf->geo;
- 
-       chunk_sectors = (conf->geo.chunk_mask & conf->prev.chunk_mask) + 1;
-       if (conf->reshape_progress != MaxSector &&
-           ((sector >= conf->reshape_progress) !=
-            conf->mddev->reshape_backwards))
-               geo = &conf->prev;
- 
-       if (geo->near_copies < geo->raid_disks) {
-               max = (chunk_sectors - ((sector & (chunk_sectors - 1))
-                                       + bio_sectors)) << 9;
-               if (max < 0)
-                       /* bio_add cannot handle a negative return */
-                       max = 0;
-               if (max <= biovec->bv_len && bio_sectors == 0)
-                       return biovec->bv_len;
-       } else
-               max = biovec->bv_len;
- 
-       if (mddev->merge_check_needed) {
-               struct {
-                       struct r10bio r10_bio;
-                       struct r10dev devs[conf->copies];
-               } on_stack;
-               struct r10bio *r10_bio = &on_stack.r10_bio;
-               int s;
-               if (conf->reshape_progress != MaxSector) {
-                       /* Cannot give any guidance during reshape */
-                       if (max <= biovec->bv_len && bio_sectors == 0)
-                               return biovec->bv_len;
-                       return 0;
-               }
-               r10_bio->sector = sector;
-               raid10_find_phys(conf, r10_bio);
-               rcu_read_lock();
-               for (s = 0; s < conf->copies; s++) {
-                       int disk = r10_bio->devs[s].devnum;
-                       struct md_rdev *rdev = rcu_dereference(
-                               conf->mirrors[disk].rdev);
-                       if (rdev && !test_bit(Faulty, &rdev->flags)) {
-                               struct request_queue *q =
-                                       bdev_get_queue(rdev->bdev);
-                               if (q->merge_bvec_fn) {
-                                       bvm->bi_sector = r10_bio->devs[s].addr
-                                               + rdev->data_offset;
-                                       bvm->bi_bdev = rdev->bdev;
-                                       max = min(max, q->merge_bvec_fn(
-                                                         q, bvm, biovec));
-                               }
-                       }
-                       rdev = rcu_dereference(conf->mirrors[disk].replacement);
-                       if (rdev && !test_bit(Faulty, &rdev->flags)) {
-                               struct request_queue *q =
-                                       bdev_get_queue(rdev->bdev);
-                               if (q->merge_bvec_fn) {
-                                       bvm->bi_sector = r10_bio->devs[s].addr
-                                               + rdev->data_offset;
-                                       bvm->bi_bdev = rdev->bdev;
-                                       max = min(max, q->merge_bvec_fn(
-                                                         q, bvm, biovec));
-                               }
-                       }
-               }
-               rcu_read_unlock();
-       }
-       return max;
- }
- 
   /*
    * This routine returns the disk from which the requested read should
    * be done. There is a per-array 'next expected sequential IO' sector
@@@ -821,12 -733,10 +733,10 @@@ retry
                 disk = r10_bio->devs[slot].devnum;
                 rdev = rcu_dereference(conf->mirrors[disk].replacement);
                 if (rdev == NULL || test_bit(Faulty, &rdev->flags) ||
-                   test_bit(Unmerged, &rdev->flags) ||
                     r10_bio->devs[slot].addr + sectors > rdev->recovery_offset)
                         rdev = rcu_dereference(conf->mirrors[disk].rdev);
                 if (rdev == NULL ||
-                   test_bit(Faulty, &rdev->flags) ||
-                   test_bit(Unmerged, &rdev->flags))
+                   test_bit(Faulty, &rdev->flags))
                         continue;
                 if (!test_bit(In_sync, &rdev->flags) &&
                     r10_bio->devs[slot].addr + sectors > rdev->recovery_offset)
@@@ -957,7 -867,7 +867,7 @@@ static void flush_pending_writes(struc
                         if (unlikely((bio->bi_rw & REQ_DISCARD) &&
                             !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
                                 /* Just ignore it */
-                               bio_endio(bio, 0);
+                               bio_endio(bio);
                         else
                                 generic_make_request(bio);
                         bio = next;
@@@ -1133,7 -1043,7 +1043,7 @@@ static void raid10_unplug(struct blk_pl
                 if (unlikely((bio->bi_rw & REQ_DISCARD) &&
                     !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
                         /* Just ignore it */
-                       bio_endio(bio, 0);
+                       bio_endio(bio);
                 else
                         generic_make_request(bio);
                 bio = next;
@@@ -1217,7 -1127,7 +1127,7 @@@ static void __make_request(struct mdde
          * non-zero, then it is the number of not-completed requests.
          */
         bio->bi_phys_segments = 0;
-       clear_bit(BIO_SEG_VALID, &bio->bi_flags);
+       bio_clear_flag(bio, BIO_SEG_VALID);
   
         if (rw == READ) {
                 /*
@@@ -1326,11 -1236,9 +1236,9 @@@ retry_write
                         blocked_rdev = rrdev;
                         break;
                 }
-               if (rdev && (test_bit(Faulty, &rdev->flags)
-                            || test_bit(Unmerged, &rdev->flags)))
+               if (rdev && (test_bit(Faulty, &rdev->flags)))
                         rdev = NULL;
-               if (rrdev && (test_bit(Faulty, &rrdev->flags)
-                             || test_bit(Unmerged, &rrdev->flags)))
+               if (rrdev && (test_bit(Faulty, &rrdev->flags)))
                         rrdev = NULL;
   
                 r10_bio->devs[i].bio = NULL;
@@@ -1777,7 -1685,6 +1685,6 @@@ static int raid10_add_disk(struct mdde
         int mirror;
         int first = 0;
         int last = conf->geo.raid_disks - 1;
-       struct request_queue *q = bdev_get_queue(rdev->bdev);
   
         if (mddev->recovery_cp < MaxSector)
                 /* only hot-add to in-sync arrays, as recovery is
@@@ -1790,11 -1697,6 +1697,6 @@@
         if (rdev->raid_disk >= 0)
                 first = last = rdev->raid_disk;
   
-       if (q->merge_bvec_fn) {
-               set_bit(Unmerged, &rdev->flags);
-               mddev->merge_check_needed = 1;
-       }
- 
         if (rdev->saved_raid_disk >= first &&
             conf->mirrors[rdev->saved_raid_disk].rdev == NULL)
                 mirror = rdev->saved_raid_disk;
@@@ -1833,19 -1735,6 +1735,6 @@@
                 rcu_assign_pointer(p->rdev, rdev);
                 break;
         }
-       if (err == 0 && test_bit(Unmerged, &rdev->flags)) {
-               /* Some requests might not have seen this new
-                * merge_bvec_fn.  We must wait for them to complete
-                * before merging the device fully.
-                * First we make sure any code which has tested
-                * our function has submitted the request, then
-                * we wait for all outstanding requests to complete.
-                */
-               synchronize_sched();
-               freeze_array(conf, 0);
-               unfreeze_array(conf);
-               clear_bit(Unmerged, &rdev->flags);
-       }
         md_integrity_add_rdev(rdev, mddev);
         if (mddev->queue && blk_queue_discard(bdev_get_queue(rdev->bdev)))
                 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
@@@ -1916,7 -1805,7 +1805,7 @@@ abort
         return err;
   }
   
- static void end_sync_read(struct bio *bio, int error)
+ static void end_sync_read(struct bio *bio)
   {
         struct r10bio *r10_bio = bio->bi_private;
         struct r10conf *conf = r10_bio->mddev->private;
@@@ -1928,7 -1817,7 +1817,7 @@@
         } else
                 d = find_bio_disk(conf, r10_bio, bio, NULL, NULL);
   
-       if (test_bit(BIO_UPTODATE, &bio->bi_flags))
+       if (!bio->bi_error)
                 set_bit(R10BIO_Uptodate, &r10_bio->state);
         else
                 /* The write handler will notice the lack of
@@@ -1977,9 -1866,8 +1866,8 @@@ static void end_sync_request(struct r10
         }
   }
   
- static void end_sync_write(struct bio *bio, int error)
+ static void end_sync_write(struct bio *bio)
   {
-       int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
         struct r10bio *r10_bio = bio->bi_private;
         struct mddev *mddev = r10_bio->mddev;
         struct r10conf *conf = mddev->private;
@@@ -1996,7 -1884,7 +1884,7 @@@
         else
                 rdev = conf->mirrors[d].rdev;
   
-       if (!uptodate) {
+       if (bio->bi_error) {
                 if (repl)
                         md_error(mddev, rdev);
                 else {
@@@ -2044,7 -1932,7 +1932,7 @@@ static void sync_request_write(struct m
   
         /* find the first device with a block */
         for (i=0; i<conf->copies; i++)
-               if (test_bit(BIO_UPTODATE, &r10_bio->devs[i].bio->bi_flags))
+               if (!r10_bio->devs[i].bio->bi_error)
                         break;
   
         if (i == conf->copies)
@@@ -2064,7 -1952,7 +1952,7 @@@
                         continue;
                 if (i == first)
                         continue;
-               if (test_bit(BIO_UPTODATE, &r10_bio->devs[i].bio->bi_flags)) {
+               if (!r10_bio->devs[i].bio->bi_error) {
                         /* We know that the bi_io_vec layout is the same for
                          * both 'first' and 'i', so we just compare them.
                          * All vec entries are PAGE_SIZE;
@@@ -2394,7 -2282,6 +2282,6 @@@ static void fix_read_error(struct r10co
                         d = r10_bio->devs[sl].devnum;
                         rdev = rcu_dereference(conf->mirrors[d].rdev);
                         if (rdev &&
-                           !test_bit(Unmerged, &rdev->flags) &&
                             test_bit(In_sync, &rdev->flags) &&
                             is_badblock(rdev, r10_bio->devs[sl].addr + sect, s,
                                         &first_bad, &bad_sectors) == 0) {
@@@ -2448,7 -2335,6 +2335,6 @@@
                         d = r10_bio->devs[sl].devnum;
                         rdev = rcu_dereference(conf->mirrors[d].rdev);
                         if (!rdev ||
-                           test_bit(Unmerged, &rdev->flags) ||
                             !test_bit(In_sync, &rdev->flags))
                                 continue;
   
@@@ -2706,8 -2592,7 +2592,7 @@@ static void handle_write_completed(stru
                         rdev = conf->mirrors[dev].rdev;
                         if (r10_bio->devs[m].bio == NULL)
                                 continue;
-                       if (test_bit(BIO_UPTODATE,
-                                    &r10_bio->devs[m].bio->bi_flags)) {
+                       if (!r10_bio->devs[m].bio->bi_error) {
                                 rdev_clear_badblocks(
                                         rdev,
                                         r10_bio->devs[m].addr,
@@@ -2722,8 -2607,8 +2607,8 @@@
                         rdev = conf->mirrors[dev].replacement;
                         if (r10_bio->devs[m].repl_bio == NULL)
                                 continue;
-                       if (test_bit(BIO_UPTODATE,
-                                    &r10_bio->devs[m].repl_bio->bi_flags)) {
+ 
+                       if (!r10_bio->devs[m].repl_bio->bi_error) {
                                 rdev_clear_badblocks(
                                         rdev,
                                         r10_bio->devs[m].addr,
@@@ -2748,8 -2633,7 +2633,7 @@@
                                         r10_bio->devs[m].addr,
                                         r10_bio->sectors, 0);
                                 rdev_dec_pending(rdev, conf->mddev);
-                       } else if (bio != NULL &&
-                                  !test_bit(BIO_UPTODATE, &bio->bi_flags)) {
+                       } else if (bio != NULL && bio->bi_error) {
                                 if (!narrow_write_error(r10_bio, m)) {
                                         md_error(conf->mddev, rdev);
                                         set_bit(R10BIO_Degraded,
@@@ -3263,7 -3147,7 +3147,7 @@@ static sector_t sync_request(struct mdd
   
                         bio = r10_bio->devs[i].bio;
                         bio_reset(bio);
-                       clear_bit(BIO_UPTODATE, &bio->bi_flags);
+                       bio->bi_error = -EIO;
                         if (conf->mirrors[d].rdev == NULL ||
                             test_bit(Faulty, &conf->mirrors[d].rdev->flags))
                                 continue;
@@@ -3300,7 -3184,7 +3184,7 @@@
                         /* Need to set up for writing to the replacement */
                         bio = r10_bio->devs[i].repl_bio;
                         bio_reset(bio);
-                       clear_bit(BIO_UPTODATE, &bio->bi_flags);
+                       bio->bi_error = -EIO;
   
                         sector = r10_bio->devs[i].addr;
                         atomic_inc(&conf->mirrors[d].rdev->nr_pending);
@@@ -3357,7 -3241,7 +3241,7 @@@
                                 /* remove last page from this bio */
                                 bio2->bi_vcnt--;
                                 bio2->bi_iter.bi_size -= len;
-                               __clear_bit(BIO_SEG_VALID, &bio2->bi_flags);
+                               bio_clear_flag(bio2, BIO_SEG_VALID);
                         }
                         goto bio_full;
                 }
@@@ -3377,7 -3261,7 +3261,7 @@@
   
                 if (bio->bi_end_io == end_sync_read) {
                         md_sync_acct(bio->bi_bdev, nr_sectors);
-                       set_bit(BIO_UPTODATE, &bio->bi_flags);
+                       bio->bi_error = 0;
                         generic_make_request(bio);
                 }
         }
@@@ -3556,7 -3440,6 +3440,7 @@@ static struct r10conf *setup_conf(struc
                         /* far_copies must be 1 */
                         conf->prev.stride = conf->dev_sectors;
         }
+ +      conf->reshape_safe = conf->reshape_progress;
         spin_lock_init(&conf->device_lock);
         INIT_LIST_HEAD(&conf->retry_list);
   
@@@ -3643,8 -3526,6 +3527,6 @@@ static int run(struct mddev *mddev
                         disk->rdev = rdev;
                 }
                 q = bdev_get_queue(rdev->bdev);
-               if (q->merge_bvec_fn)
-                       mddev->merge_check_needed = 1;
                 diff = (rdev->new_data_offset - rdev->data_offset);
                 if (!mddev->reshape_backwards)
                         diff = -diff;
@@@ -3761,6 -3642,7 +3643,6 @@@
                 }
                 conf->offset_diff = min_offset_diff;
   
- -              conf->reshape_safe = conf->reshape_progress;
                 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
                 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
                 set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
@@@ -4103,7 -3985,6 +3985,7 @@@ static int raid10_start_reshape(struct 
                 conf->reshape_progress = size;
         } else
                 conf->reshape_progress = 0;
+ +      conf->reshape_safe = conf->reshape_progress;
         spin_unlock_irq(&conf->device_lock);
   
         if (mddev->delta_disks && mddev->bitmap) {
@@@ -4171,7 -4052,6 +4053,7 @@@ abort
                 rdev->new_data_offset = rdev->data_offset;
         smp_wmb();
         conf->reshape_progress = MaxSector;
+ +      conf->reshape_safe = MaxSector;
         mddev->reshape_position = MaxSector;
         spin_unlock_irq(&conf->device_lock);
         return ret;
@@@ -4382,7 -4262,7 +4264,7 @@@ read_more
         read_bio->bi_end_io = end_sync_read;
         read_bio->bi_rw = READ;
         read_bio->bi_flags &= (~0UL << BIO_RESET_BITS);
-       __set_bit(BIO_UPTODATE, &read_bio->bi_flags);
+       read_bio->bi_error = 0;
         read_bio->bi_vcnt = 0;
         read_bio->bi_iter.bi_size = 0;
         r10_bio->master_bio = read_bio;
@@@ -4439,7 -4319,7 +4321,7 @@@
                                 /* Remove last page from this bio */
                                 bio2->bi_vcnt--;
                                 bio2->bi_iter.bi_size -= len;
-                               __clear_bit(BIO_SEG_VALID, &bio2->bi_flags);
+                               bio_clear_flag(bio2, BIO_SEG_VALID);
                         }
                         goto bio_full;
                 }
@@@ -4526,7 -4406,6 +4408,7 @@@ static void end_reshape(struct r10conf 
         md_finish_reshape(conf->mddev);
         smp_wmb();
         conf->reshape_progress = MaxSector;
+ +      conf->reshape_safe = MaxSector;
         spin_unlock_irq(&conf->device_lock);
   
         /* read-ahead size must cover two whole stripes, which is
@@@ -4604,9 -4483,8 +4486,8 @@@ static int handle_reshape_read_error(st
         return 0;
   }
   
- static void end_reshape_write(struct bio *bio, int error)
+ static void end_reshape_write(struct bio *bio)
   {
-       int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
         struct r10bio *r10_bio = bio->bi_private;
         struct mddev *mddev = r10_bio->mddev;
         struct r10conf *conf = mddev->private;
@@@ -4623,7 -4501,7 +4504,7 @@@
                 rdev = conf->mirrors[d].rdev;
         }
   
-       if (!uptodate) {
+       if (bio->bi_error) {
                 /* FIXME should record badblock */
                 md_error(mddev, rdev);
         }
@@@ -4700,7 -4578,6 +4581,6 @@@ static struct md_personality raid10_per
         .start_reshape  = raid10_start_reshape,
         .finish_reshape = raid10_finish_reshape,
         .congested      = raid10_congested,
-       .mergeable_bvec = raid10_mergeable_bvec,
   };
   
   static int __init raid_init(void)
diff --combined drivers/md/raid5.c

index f757023fc4580680bfdd6e178f93acb62cb1f31e,6d20692952d247893092a8bc8f51cee8be1783bb..b29e89cb815b93e0813537ca33249a7d0379d8c8
--- 1/drivers/md/raid5.c
--- 2/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@@ -233,7 -233,7 +233,7 @@@ static void return_io(struct bio *retur
                 bi->bi_iter.bi_size = 0;
                 trace_block_bio_complete(bdev_get_queue(bi->bi_bdev),
                                          bi, 0);
-               bio_endio(bi, 0);
+               bio_endio(bi);
                 bi = return_bi;
         }
   }
@@@ -887,9 -887,9 +887,9 @@@ static int use_new_offset(struct r5con
   }
   
   static void
- raid5_end_read_request(struct bio *bi, int error);
+ raid5_end_read_request(struct bio *bi);
   static void
- raid5_end_write_request(struct bio *bi, int error);
+ raid5_end_write_request(struct bio *bi);
   
   static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
   {
@@@ -2162,9 -2162,6 +2162,9 @@@ static int resize_stripes(struct r5con
         if (!sc)
                 return -ENOMEM;
   
+ +      /* Need to ensure auto-resizing doesn't interfere */
+ +      mutex_lock(&conf->cache_size_mutex);
+ +
         for (i = conf->max_nr_stripes; i; i--) {
                 nsh = alloc_stripe(sc, GFP_KERNEL);
                 if (!nsh)
@@@ -2181,7 -2178,6 +2181,7 @@@
                         kmem_cache_free(sc, nsh);
                 }
                 kmem_cache_destroy(sc);
+ +              mutex_unlock(&conf->cache_size_mutex);
                 return -ENOMEM;
         }
         /* Step 2 - Must use GFP_NOIO now.
@@@ -2228,7 -2224,6 +2228,7 @@@
         } else
                 err = -ENOMEM;
   
+ +      mutex_unlock(&conf->cache_size_mutex);
         /* Step 4, return new stripes to service */
         while(!list_empty(&newstripes)) {
                 nsh = list_entry(newstripes.next, struct stripe_head, lru);
@@@ -2256,7 -2251,7 +2256,7 @@@
   static int drop_one_stripe(struct r5conf *conf)
   {
         struct stripe_head *sh;
- -      int hash = (conf->max_nr_stripes - 1) % NR_STRIPE_HASH_LOCKS;
+ +      int hash = (conf->max_nr_stripes - 1) & STRIPE_HASH_LOCKS_MASK;
   
         spin_lock_irq(conf->hash_locks + hash);
         sh = get_free_stripe(conf, hash);
@@@ -2282,12 -2277,11 +2282,11 @@@ static void shrink_stripes(struct r5con
         conf->slab_cache = NULL;
   }
   
- static void raid5_end_read_request(struct bio * bi, int error)
+ static void raid5_end_read_request(struct bio * bi)
   {
         struct stripe_head *sh = bi->bi_private;
         struct r5conf *conf = sh->raid_conf;
         int disks = sh->disks, i;
-       int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
         char b[BDEVNAME_SIZE];
         struct md_rdev *rdev = NULL;
         sector_t s;
@@@ -2296,9 -2290,9 +2295,9 @@@
                 if (bi == &sh->dev[i].req)
                         break;
   
-       pr_debug("end_read_request %llu/%d, count: %d, uptodate %d.\n",
+       pr_debug("end_read_request %llu/%d, count: %d, error %d.\n",
                 (unsigned long long)sh->sector, i, atomic_read(&sh->count),
-               uptodate);
+               bi->bi_error);
         if (i == disks) {
                 BUG();
                 return;
@@@ -2317,7 -2311,7 +2316,7 @@@
                 s = sh->sector + rdev->new_data_offset;
         else
                 s = sh->sector + rdev->data_offset;
-       if (uptodate) {
+       if (!bi->bi_error) {
                 set_bit(R5_UPTODATE, &sh->dev[i].flags);
                 if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
                         /* Note that this cannot happen on a
@@@ -2405,13 -2399,12 +2404,12 @@@
         release_stripe(sh);
   }
   
- static void raid5_end_write_request(struct bio *bi, int error)
+ static void raid5_end_write_request(struct bio *bi)
   {
         struct stripe_head *sh = bi->bi_private;
         struct r5conf *conf = sh->raid_conf;
         int disks = sh->disks, i;
         struct md_rdev *uninitialized_var(rdev);
-       int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
         sector_t first_bad;
         int bad_sectors;
         int replacement = 0;
@@@ -2434,23 -2427,23 +2432,23 @@@
                         break;
                 }
         }
-       pr_debug("end_write_request %llu/%d, count %d, uptodate: %d.\n",
+       pr_debug("end_write_request %llu/%d, count %d, error: %d.\n",
                 (unsigned long long)sh->sector, i, atomic_read(&sh->count),
-               uptodate);
+               bi->bi_error);
         if (i == disks) {
                 BUG();
                 return;
         }
   
         if (replacement) {
-               if (!uptodate)
+               if (bi->bi_error)
                         md_error(conf->mddev, rdev);
                 else if (is_badblock(rdev, sh->sector,
                                      STRIPE_SECTORS,
                                      &first_bad, &bad_sectors))
                         set_bit(R5_MadeGoodRepl, &sh->dev[i].flags);
         } else {
-               if (!uptodate) {
+               if (bi->bi_error) {
                         set_bit(STRIPE_DEGRADED, &sh->state);
                         set_bit(WriteErrorSeen, &rdev->flags);
                         set_bit(R5_WriteError, &sh->dev[i].flags);
@@@ -2471,7 -2464,7 +2469,7 @@@
         }
         rdev_dec_pending(rdev, conf->mddev);
   
-       if (sh->batch_head && !uptodate && !replacement)
+       if (sh->batch_head && bi->bi_error && !replacement)
                 set_bit(STRIPE_BATCH_ERR, &sh->batch_head->state);
   
         if (!test_and_clear_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags))
@@@ -3112,7 -3105,8 +3110,8 @@@ handle_failed_stripe(struct r5conf *con
                 while (bi && bi->bi_iter.bi_sector <
                         sh->dev[i].sector + STRIPE_SECTORS) {
                         struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector);
-                       clear_bit(BIO_UPTODATE, &bi->bi_flags);
+ 
+                       bi->bi_error = -EIO;
                         if (!raid5_dec_bi_active_stripes(bi)) {
                                 md_write_end(conf->mddev);
                                 bi->bi_next = *return_bi;
@@@ -3136,7 -3130,8 +3135,8 @@@
                 while (bi && bi->bi_iter.bi_sector <
                        sh->dev[i].sector + STRIPE_SECTORS) {
                         struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector);
-                       clear_bit(BIO_UPTODATE, &bi->bi_flags);
+ 
+                       bi->bi_error = -EIO;
                         if (!raid5_dec_bi_active_stripes(bi)) {
                                 md_write_end(conf->mddev);
                                 bi->bi_next = *return_bi;
@@@ -3161,7 -3156,8 +3161,8 @@@
                                sh->dev[i].sector + STRIPE_SECTORS) {
                                 struct bio *nextbi =
                                         r5_next_bio(bi, sh->dev[i].sector);
-                               clear_bit(BIO_UPTODATE, &bi->bi_flags);
+ 
+                               bi->bi_error = -EIO;
                                 if (!raid5_dec_bi_active_stripes(bi)) {
                                         bi->bi_next = *return_bi;
                                         *return_bi = bi;
@@@ -4066,10 -4062,8 +4067,10 @@@ static void analyse_stripe(struct strip
                                  &first_bad, &bad_sectors))
                         set_bit(R5_ReadRepl, &dev->flags);
                 else {
- -                      if (rdev)
+ +                      if (rdev && !test_bit(Faulty, &rdev->flags))
                                 set_bit(R5_NeedReplace, &dev->flags);
+ +                      else
+ +                              clear_bit(R5_NeedReplace, &dev->flags);
                         rdev = rcu_dereference(conf->disks[i].rdev);
                         clear_bit(R5_ReadRepl, &dev->flags);
                 }
@@@ -4669,35 -4663,6 +4670,6 @@@ static int raid5_congested(struct mdde
         return 0;
   }
   
- /* We want read requests to align with chunks where possible,
-  * but write requests don't need to.
-  */
- static int raid5_mergeable_bvec(struct mddev *mddev,
-                               struct bvec_merge_data *bvm,
-                               struct bio_vec *biovec)
- {
-       sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
-       int max;
-       unsigned int chunk_sectors = mddev->chunk_sectors;
-       unsigned int bio_sectors = bvm->bi_size >> 9;
- 
-       /*
-        * always allow writes to be mergeable, read as well if array
-        * is degraded as we'll go through stripe cache anyway.
-        */
-       if ((bvm->bi_rw & 1) == WRITE || mddev->degraded)
-               return biovec->bv_len;
- 
-       if (mddev->new_chunk_sectors < mddev->chunk_sectors)
-               chunk_sectors = mddev->new_chunk_sectors;
-       max =  (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9;
-       if (max < 0) max = 0;
-       if (max <= biovec->bv_len && bio_sectors == 0)
-               return biovec->bv_len;
-       else
-               return max;
- }
- 
   static int in_chunk_boundary(struct mddev *mddev, struct bio *bio)
   {
         sector_t sector = bio->bi_iter.bi_sector + get_start_sect(bio->bi_bdev);
@@@ -4756,13 -4721,13 +4728,13 @@@ static struct bio *remove_bio_from_retr
    *  first).
    *  If the read failed..
    */
- static void raid5_align_endio(struct bio *bi, int error)
+ static void raid5_align_endio(struct bio *bi)
   {
         struct bio* raid_bi  = bi->bi_private;
         struct mddev *mddev;
         struct r5conf *conf;
-       int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
         struct md_rdev *rdev;
+       int error = bi->bi_error;
   
         bio_put(bi);
   
@@@ -4773,10 -4738,10 +4745,10 @@@
   
         rdev_dec_pending(rdev, conf->mddev);
   
-       if (!error && uptodate) {
+       if (!error) {
                 trace_block_bio_complete(bdev_get_queue(raid_bi->bi_bdev),
                                          raid_bi, 0);
-               bio_endio(raid_bi, 0);
+               bio_endio(raid_bi);
                 if (atomic_dec_and_test(&conf->active_aligned_reads))
                         wake_up(&conf->wait_for_quiescent);
                 return;
@@@ -4787,26 -4752,7 +4759,7 @@@
         add_bio_to_retry(raid_bi, conf);
   }
   
- static int bio_fits_rdev(struct bio *bi)
- {
-       struct request_queue *q = bdev_get_queue(bi->bi_bdev);
- 
-       if (bio_sectors(bi) > queue_max_sectors(q))
-               return 0;
-       blk_recount_segments(q, bi);
-       if (bi->bi_phys_segments > queue_max_segments(q))
-               return 0;
- 
-       if (q->merge_bvec_fn)
-               /* it's too hard to apply the merge_bvec_fn at this stage,
-                * just just give up
-                */
-               return 0;
- 
-       return 1;
- }
- 
- static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
+ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
   {
         struct r5conf *conf = mddev->private;
         int dd_idx;
@@@ -4815,7 -4761,7 +4768,7 @@@
         sector_t end_sector;
   
         if (!in_chunk_boundary(mddev, raid_bio)) {
-               pr_debug("chunk_aligned_read : non aligned\n");
+               pr_debug("%s: non aligned\n", __func__);
                 return 0;
         }
         /*
@@@ -4857,13 -4803,11 +4810,11 @@@
                 rcu_read_unlock();
                 raid_bio->bi_next = (void*)rdev;
                 align_bi->bi_bdev =  rdev->bdev;
-               __clear_bit(BIO_SEG_VALID, &align_bi->bi_flags);
+               bio_clear_flag(align_bi, BIO_SEG_VALID);
   
-               if (!bio_fits_rdev(align_bi) ||
-                   is_badblock(rdev, align_bi->bi_iter.bi_sector,
+               if (is_badblock(rdev, align_bi->bi_iter.bi_sector,
                                 bio_sectors(align_bi),
                                 &first_bad, &bad_sectors)) {
-                       /* too big in some way, or has a known bad block */
                         bio_put(align_bi);
                         rdev_dec_pending(rdev, mddev);
                         return 0;
@@@ -4892,6 -4836,31 +4843,31 @@@
         }
   }
   
+ static struct bio *chunk_aligned_read(struct mddev *mddev, struct bio *raid_bio)
+ {
+       struct bio *split;
+ 
+       do {
+               sector_t sector = raid_bio->bi_iter.bi_sector;
+               unsigned chunk_sects = mddev->chunk_sectors;
+               unsigned sectors = chunk_sects - (sector & (chunk_sects-1));
+ 
+               if (sectors < bio_sectors(raid_bio)) {
+                       split = bio_split(raid_bio, sectors, GFP_NOIO, fs_bio_set);
+                       bio_chain(split, raid_bio);
+               } else
+                       split = raid_bio;
+ 
+               if (!raid5_read_one_chunk(mddev, split)) {
+                       if (split != raid_bio)
+                               generic_make_request(raid_bio);
+                       return split;
+               }
+       } while (split != raid_bio);
+ 
+       return NULL;
+ }
+ 
   /* __get_priority_stripe - get the next stripe to process
    *
    * Full stripe writes are allowed to pass preread active stripes up until
@@@ -5140,7 -5109,7 +5116,7 @@@ static void make_discard_request(struc
         remaining = raid5_dec_bi_active_stripes(bi);
         if (remaining == 0) {
                 md_write_end(mddev);
-               bio_endio(bi, 0);
+               bio_endio(bi);
         }
   }
   
@@@ -5169,9 -5138,11 +5145,11 @@@ static void make_request(struct mddev *
          * data on failed drives.
          */
         if (rw == READ && mddev->degraded == 0 &&
-            mddev->reshape_position == MaxSector &&
-            chunk_aligned_read(mddev,bi))
-               return;
+           mddev->reshape_position == MaxSector) {
+               bi = chunk_aligned_read(mddev, bi);
+               if (!bi)
+                       return;
+       }
   
         if (unlikely(bi->bi_rw & REQ_DISCARD)) {
                 make_discard_request(mddev, bi);
@@@ -5304,7 -5275,7 +5282,7 @@@
                         release_stripe_plug(mddev, sh);
                 } else {
                         /* cannot get stripe for read-ahead, just give-up */
-                       clear_bit(BIO_UPTODATE, &bi->bi_flags);
+                       bi->bi_error = -EIO;
                         break;
                 }
         }
@@@ -5318,7 -5289,7 +5296,7 @@@
   
                 trace_block_bio_complete(bdev_get_queue(bi->bi_bdev),
                                          bi, 0);
-               bio_endio(bi, 0);
+               bio_endio(bi);
         }
   }
   
@@@ -5714,7 -5685,7 +5692,7 @@@ static int  retry_aligned_read(struct r
         if (remaining == 0) {
                 trace_block_bio_complete(bdev_get_queue(raid_bio->bi_bdev),
                                          raid_bio, 0);
-               bio_endio(raid_bio, 0);
+               bio_endio(raid_bio);
         }
         if (atomic_dec_and_test(&conf->active_aligned_reads))
                 wake_up(&conf->wait_for_quiescent);
@@@ -5864,14 -5835,12 +5842,14 @@@ static void raid5d(struct md_thread *th
         pr_debug("%d stripes handled\n", handled);
   
         spin_unlock_irq(&conf->device_lock);
- -      if (test_and_clear_bit(R5_ALLOC_MORE, &conf->cache_state)) {
+ +      if (test_and_clear_bit(R5_ALLOC_MORE, &conf->cache_state) &&
+ +          mutex_trylock(&conf->cache_size_mutex)) {
                 grow_one_stripe(conf, __GFP_NOWARN);
                 /* Set flag even if allocation failed.  This helps
                  * slow down allocation requests when mem is short
                  */
                 set_bit(R5_DID_ALLOC, &conf->cache_state);
+ +              mutex_unlock(&conf->cache_size_mutex);
         }
   
         async_tx_issue_pending_all();
@@@ -5903,22 -5872,18 +5881,22 @@@ raid5_set_cache_size(struct mddev *mdde
                 return -EINVAL;
   
         conf->min_nr_stripes = size;
+ +      mutex_lock(&conf->cache_size_mutex);
         while (size < conf->max_nr_stripes &&
                drop_one_stripe(conf))
                 ;
+ +      mutex_unlock(&conf->cache_size_mutex);
   
   
         err = md_allow_write(mddev);
         if (err)
                 return err;
   
+ +      mutex_lock(&conf->cache_size_mutex);
         while (size > conf->max_nr_stripes)
                 if (!grow_one_stripe(conf, GFP_KERNEL))
                         break;
+ +      mutex_unlock(&conf->cache_size_mutex);
   
         return 0;
   }
@@@ -6384,19 -6349,11 +6362,19 @@@ static unsigned long raid5_cache_scan(s
                                       struct shrink_control *sc)
   {
         struct r5conf *conf = container_of(shrink, struct r5conf, shrinker);
- -      int ret = 0;
- -      while (ret < sc->nr_to_scan) {
- -              if (drop_one_stripe(conf) == 0)
- -                      return SHRINK_STOP;
- -              ret++;
+ +      unsigned long ret = SHRINK_STOP;
+ +
+ +      if (mutex_trylock(&conf->cache_size_mutex)) {
+ +              ret= 0;
+ +              while (ret < sc->nr_to_scan &&
+ +                     conf->max_nr_stripes > conf->min_nr_stripes) {
+ +                      if (drop_one_stripe(conf) == 0) {
+ +                              ret = SHRINK_STOP;
+ +                              break;
+ +                      }
+ +                      ret++;
+ +              }
+ +              mutex_unlock(&conf->cache_size_mutex);
         }
         return ret;
   }
@@@ -6465,7 -6422,6 +6443,7 @@@ static struct r5conf *setup_conf(struc
                 goto abort;
         spin_lock_init(&conf->device_lock);
         seqcount_init(&conf->gen_lock);
+ +      mutex_init(&conf->cache_size_mutex);
         init_waitqueue_head(&conf->wait_for_quiescent);
         for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++) {
                 init_waitqueue_head(&conf->wait_for_stripe[i]);
@@@ -7779,7 -7735,6 +7757,6 @@@ static struct md_personality raid6_pers
         .quiesce        = raid5_quiesce,
         .takeover       = raid6_takeover,
         .congested      = raid5_congested,
-       .mergeable_bvec = raid5_mergeable_bvec,
   };
   static struct md_personality raid5_personality =
   {
@@@ -7803,7 -7758,6 +7780,6 @@@
         .quiesce        = raid5_quiesce,
         .takeover       = raid5_takeover,
         .congested      = raid5_congested,
-       .mergeable_bvec = raid5_mergeable_bvec,
   };
   
   static struct md_personality raid4_personality =
@@@ -7828,7 -7782,6 +7804,6 @@@
         .quiesce        = raid5_quiesce,
         .takeover       = raid4_takeover,
         .congested      = raid5_congested,
-       .mergeable_bvec = raid5_mergeable_bvec,
   };
   
   static int __init raid5_init(void)
diff --combined drivers/mtd/mtd_blkdevs.c

index 88304751eb8a9f1be47e3838d80fd4782cbf862c,1b96cf771d2b53bf50d692b6cd7a53bf5ebf2d8f..44dc965a2f7c0d10d2f55245568274a59db0a1ff
--- 1/drivers/mtd/mtd_blkdevs.c
--- 2/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@@ -97,13 -97,14 +97,13 @@@ static int do_blktrans_request(struct m
         if (req->cmd_flags & REQ_DISCARD)
                 return tr->discard(dev, block, nsect);
   
- -      switch(rq_data_dir(req)) {
- -      case READ:
+ +      if (rq_data_dir(req) == READ) {
                 for (; nsect > 0; nsect--, block++, buf += tr->blksize)
                         if (tr->readsect(dev, block, buf))
                                 return -EIO;
                 rq_flush_dcache_pages(req);
                 return 0;
- -      case WRITE:
+ +      } else {
                 if (!tr->writesect)
                         return -EIO;
   
@@@ -112,6 -113,9 +112,6 @@@
                         if (tr->writesect(dev, block, buf))
                                 return -EIO;
                 return 0;
- -      default:
- -              printk(KERN_NOTICE "Unknown request %u\n", rq_data_dir(req));
- -              return -EIO;
         }
   }
   
@@@ -419,7 -423,7 +419,7 @@@ int add_mtd_blktrans_dev(struct mtd_blk
   
         if (tr->discard) {
                 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, new->rq);
-               new->rq->limits.max_discard_sectors = UINT_MAX;
+               blk_queue_max_discard_sectors(new->rq, UINT_MAX);
         }
   
         gd->queue = new->rq;
diff --combined drivers/s390/block/dcssblk.c

index dff3fcb69a785102ab2905b3504606688d11b270,29ea2394c8966ff55380a8c5fd9414f4d32dc45e..2b744fbba68e04d699c31552a2ee5ca9c56ba86c
--- 1/drivers/s390/block/dcssblk.c
--- 2/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@@ -548,10 -548,10 +548,10 @@@ dcssblk_add_store(struct device *dev, s
          */
         num_of_segments = 0;
         for (i = 0; (i < count && (buf[i] != '\0') && (buf[i] != '\n')); i++) {
- -              for (j = i; (buf[j] != ':') &&
+ +              for (j = i; j < count &&
+ +                      (buf[j] != ':') &&
                         (buf[j] != '\0') &&
- -                      (buf[j] != '\n') &&
- -                      j < count; j++) {
+ +                      (buf[j] != '\n'); j++) {
                         local_buf[j-i] = toupper(buf[j]);
                 }
                 local_buf[j-i] = '\0';
@@@ -723,7 -723,7 +723,7 @@@ dcssblk_remove_store(struct device *dev
         /*
          * parse input
          */
- -      for (i = 0; ((*(buf+i)!='\0') && (*(buf+i)!='\n') && i < count); i++) {
+ +      for (i = 0; (i < count && (*(buf+i)!='\0') && (*(buf+i)!='\n')); i++) {
                 local_buf[i] = toupper(buf[i]);
         }
         local_buf[i] = '\0';
@@@ -826,6 -826,8 +826,8 @@@ dcssblk_make_request(struct request_que
         unsigned long source_addr;
         unsigned long bytes_done;
   
+       blk_queue_split(q, &bio, q->bio_split);
+ 
         bytes_done = 0;
         dev_info = bio->bi_bdev->bd_disk->private_data;
         if (dev_info == NULL)
@@@ -871,7 -873,7 +873,7 @@@
                 }
                 bytes_done += bvec.bv_len;
         }
-       bio_endio(bio, 0);
+       bio_endio(bio);
         return;
   fail:
         bio_io_error(bio);
@@@ -904,10 -906,10 +906,10 @@@ dcssblk_check_params(void
   
         for (i = 0; (i < DCSSBLK_PARM_LEN) && (dcssblk_segments[i] != '\0');
              i++) {
- -              for (j = i; (dcssblk_segments[j] != ',')  &&
+ +              for (j = i; (j < DCSSBLK_PARM_LEN) &&
+ +                          (dcssblk_segments[j] != ',')  &&
                             (dcssblk_segments[j] != '\0') &&
- -                          (dcssblk_segments[j] != '(')  &&
- -                          (j < DCSSBLK_PARM_LEN); j++)
+ +                          (dcssblk_segments[j] != '('); j++)
                 {
                         buf[j-i] = dcssblk_segments[j];
                 }
diff --combined drivers/scsi/sd.c

index a20da8c25b4f960224fb4d772aafea38c57e1656,160e44e7b24a215ddad5b98c925385a58535c74a..3f370228bf310a223eaee279c4839a5c9602410f
--- 1/drivers/scsi/sd.c
--- 2/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@@ -647,7 -647,7 +647,7 @@@ static void sd_config_discard(struct sc
         switch (mode) {
   
         case SD_LBP_DISABLE:
-               q->limits.max_discard_sectors = 0;
+               blk_queue_max_discard_sectors(q, 0);
                 queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);
                 return;
   
@@@ -675,7 -675,7 +675,7 @@@
                 break;
         }
   
-       q->limits.max_discard_sectors = max_blocks * (logical_block_size >> 9);
+       blk_queue_max_discard_sectors(q, max_blocks * (logical_block_size >> 9));
         queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
   }
   
@@@ -2770,9 -2770,9 +2770,9 @@@ static int sd_revalidate_disk(struct ge
         max_xfer = sdkp->max_xfer_blocks;
         max_xfer <<= ilog2(sdp->sector_size) - 9;
   
- -      max_xfer = min_not_zero(queue_max_hw_sectors(sdkp->disk->queue),
- -                              max_xfer);
- -      blk_queue_max_hw_sectors(sdkp->disk->queue, max_xfer);
+ +      sdkp->disk->queue->limits.max_sectors =
+ +              min_not_zero(queue_max_hw_sectors(sdkp->disk->queue), max_xfer);
+ +
         set_capacity(disk, sdkp->capacity);
         sd_config_write_same(sdkp);
         kfree(buffer);
diff --combined drivers/staging/lustre/lustre/llite/lloop.c

index 06f5e51ecd9e1a8141c275232278dd6f081ef68a,1e33d540b2234a6ac7953feff2150a90f963a7b0..5f0d80cc97183e2104c1fcbcd71610750bc08626
--- 1/drivers/staging/lustre/lustre/llite/lloop.c
--- 2/drivers/staging/lustre/lustre/llite/lloop.c
+++ b/drivers/staging/lustre/lustre/llite/lloop.c
@@@ -162,7 -162,7 +162,7 @@@ static int max_loop = MAX_LOOP_DEFAULT
   static struct lloop_device *loop_dev;
   static struct gendisk **disks;
   static struct mutex lloop_mutex;
- -static void *ll_iocontrol_magic = NULL;
+ +static void *ll_iocontrol_magic;
   
   static loff_t get_loop_size(struct lloop_device *lo, struct file *file)
   {
@@@ -340,6 -340,8 +340,8 @@@ static void loop_make_request(struct re
         int rw = bio_rw(old_bio);
         int inactive;
   
+       blk_queue_split(q, &old_bio, q->bio_split);
+ 
         if (!lo)
                 goto err;
   
@@@ -365,7 -367,7 +367,7 @@@
         loop_add_bio(lo, old_bio);
         return;
   err:
- -      cfs_bio_io_error(old_bio, old_bio->bi_iter.bi_size);
+ +      bio_io_error(old_bio);
   }
   
   
@@@ -376,7 -378,7 +378,8 @@@ static inline void loop_handle_bio(stru
         while (bio) {
                 struct bio *tmp = bio->bi_next;
                 bio->bi_next = NULL;
-               bio_endio(bio, ret);
- -              cfs_bio_endio(bio, bio->bi_iter.bi_size, ret);
++              bio->bi_error = ret;
++              bio_endio(bio);
                 bio = tmp;
         }
   }
diff --combined fs/btrfs/disk-io.c

index f556c3732c2c16e22e0bcbd35f9ee1277179be5b,a8c0de888a9d564ebdc3f219aa98a8a44e67af71..5e307bd0471ab29fc166f7e7313835d2c80a6971
--- 1/fs/btrfs/disk-io.c
--- 2/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@@ -703,7 -703,7 +703,7 @@@ static int btree_io_failed_hook(struct 
         return -EIO;    /* we fixed nothing */
   }
   
- static void end_workqueue_bio(struct bio *bio, int err)
+ static void end_workqueue_bio(struct bio *bio)
   {
         struct btrfs_end_io_wq *end_io_wq = bio->bi_private;
         struct btrfs_fs_info *fs_info;
@@@ -711,7 -711,7 +711,7 @@@
         btrfs_work_func_t func;
   
         fs_info = end_io_wq->info;
-       end_io_wq->error = err;
+       end_io_wq->error = bio->bi_error;
   
         if (bio->bi_rw & REQ_WRITE) {
                 if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) {
@@@ -808,7 -808,8 +808,8 @@@ static void run_one_async_done(struct b
   
         /* If an error occured we just want to clean up the bio and move on */
         if (async->error) {
-               bio_endio(async->bio, async->error);
+               async->bio->bi_error = async->error;
+               bio_endio(async->bio);
                 return;
         }
   
@@@ -908,8 -909,10 +909,10 @@@ static int __btree_submit_bio_done(stru
          * submission context.  Just jump into btrfs_map_bio
          */
         ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 1);
-       if (ret)
-               bio_endio(bio, ret);
+       if (ret) {
+               bio->bi_error = ret;
+               bio_endio(bio);
+       }
         return ret;
   }
   
@@@ -960,10 -963,13 +963,13 @@@ static int btree_submit_bio_hook(struc
                                           __btree_submit_bio_done);
         }
   
-       if (ret) {
+       if (ret)
+               goto out_w_error;
+       return 0;
+ 
   out_w_error:
-               bio_endio(bio, ret);
-       }
+       bio->bi_error = ret;
+       bio_endio(bio);
         return ret;
   }
   
@@@ -1735,16 -1741,15 +1741,15 @@@ static void end_workqueue_fn(struct btr
   {
         struct bio *bio;
         struct btrfs_end_io_wq *end_io_wq;
-       int error;
   
         end_io_wq = container_of(work, struct btrfs_end_io_wq, work);
         bio = end_io_wq->bio;
   
-       error = end_io_wq->error;
+       bio->bi_error = end_io_wq->error;
         bio->bi_private = end_io_wq->private;
         bio->bi_end_io = end_io_wq->end_io;
         kmem_cache_free(btrfs_end_io_wq_cache, end_io_wq);
-       bio_endio(bio, error);
+       bio_endio(bio);
   }
   
   static int cleaner_kthread(void *arg)
@@@ -2842,7 -2847,6 +2847,7 @@@ int open_ctree(struct super_block *sb
             !extent_buffer_uptodate(chunk_root->node)) {
                 printk(KERN_ERR "BTRFS: failed to read chunk root on %s\n",
                        sb->s_id);
+ +              chunk_root->node = NULL;
                 goto fail_tree_roots;
         }
         btrfs_set_root_node(&chunk_root->root_item, chunk_root->node);
@@@ -2880,7 -2884,7 +2885,7 @@@ retry_root_backup
             !extent_buffer_uptodate(tree_root->node)) {
                 printk(KERN_WARNING "BTRFS: failed to read tree root on %s\n",
                        sb->s_id);
- -
+ +              tree_root->node = NULL;
                 goto recovery_tree_root;
         }
   
@@@ -3324,10 -3328,8 +3329,8 @@@ static int write_dev_supers(struct btrf
    * endio for the write_dev_flush, this will wake anyone waiting
    * for the barrier when it is done
    */
- static void btrfs_end_empty_barrier(struct bio *bio, int err)
+ static void btrfs_end_empty_barrier(struct bio *bio)
   {
-       if (err)
-               clear_bit(BIO_UPTODATE, &bio->bi_flags);
         if (bio->bi_private)
                 complete(bio->bi_private);
         bio_put(bio);
@@@ -3355,8 -3357,8 +3358,8 @@@ static int write_dev_flush(struct btrfs
   
                 wait_for_completion(&device->flush_wait);
   
-               if (!bio_flagged(bio, BIO_UPTODATE)) {
-                       ret = -EIO;
+               if (bio->bi_error) {
+                       ret = bio->bi_error;
                         btrfs_dev_stat_inc_and_print(device,
                                 BTRFS_DEV_STAT_FLUSH_ERRS);
                 }
diff --combined fs/btrfs/inode.c

index e33dff356460687fcade4b56202f83f318e3ebb4,8635ef01a04a989d13d2aef27d7130c383a17874..f924d9a6270075d785ebbd41b617ffc219fa0493
--- 1/fs/btrfs/inode.c
--- 2/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@@ -1845,8 -1845,10 +1845,10 @@@ static int __btrfs_submit_bio_done(stru
         int ret;
   
         ret = btrfs_map_bio(root, rw, bio, mirror_num, 1);
-       if (ret)
-               bio_endio(bio, ret);
+       if (ret) {
+               bio->bi_error = ret;
+               bio_endio(bio);
+       }
         return ret;
   }
   
@@@ -1906,8 -1908,10 +1908,10 @@@ mapit
         ret = btrfs_map_bio(root, rw, bio, mirror_num, 0);
   
   out:
-       if (ret < 0)
-               bio_endio(bio, ret);
+       if (ret < 0) {
+               bio->bi_error = ret;
+               bio_endio(bio);
+       }
         return ret;
   }
   
@@@ -4209,7 -4213,7 +4213,7 @@@ int btrfs_truncate_inode_items(struct b
         u64 extent_num_bytes = 0;
         u64 extent_offset = 0;
         u64 item_end = 0;
- -      u64 last_size = (u64)-1;
+ +      u64 last_size = new_size;
         u32 found_type = (u8)-1;
         int found_extent;
         int del_item;
@@@ -4493,7 -4497,8 +4497,7 @@@ out
                         btrfs_abort_transaction(trans, root, ret);
         }
   error:
- -      if (last_size != (u64)-1 &&
- -          root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
+ +      if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
                 btrfs_ordered_update_i_size(inode, last_size, NULL);
   
         btrfs_free_path(path);
@@@ -7688,13 -7693,13 +7692,13 @@@ struct btrfs_retry_complete 
         int uptodate;
   };
   
- static void btrfs_retry_endio_nocsum(struct bio *bio, int err)
+ static void btrfs_retry_endio_nocsum(struct bio *bio)
   {
         struct btrfs_retry_complete *done = bio->bi_private;
         struct bio_vec *bvec;
         int i;
   
-       if (err)
+       if (bio->bi_error)
                 goto end;
   
         done->uptodate = 1;
@@@ -7743,7 -7748,7 +7747,7 @@@ try_again
         return 0;
   }
   
- static void btrfs_retry_endio(struct bio *bio, int err)
+ static void btrfs_retry_endio(struct bio *bio)
   {
         struct btrfs_retry_complete *done = bio->bi_private;
         struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
@@@ -7752,7 -7757,7 +7756,7 @@@
         int ret;
         int i;
   
-       if (err)
+       if (bio->bi_error)
                 goto end;
   
         uptodate = 1;
@@@ -7835,12 -7840,13 +7839,13 @@@ static int btrfs_subio_endio_read(struc
         }
   }
   
- static void btrfs_endio_direct_read(struct bio *bio, int err)
+ static void btrfs_endio_direct_read(struct bio *bio)
   {
         struct btrfs_dio_private *dip = bio->bi_private;
         struct inode *inode = dip->inode;
         struct bio *dio_bio;
         struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
+       int err = bio->bi_error;
   
         if (dip->flags & BTRFS_DIO_ORIG_BIO_SUBMITTED)
                 err = btrfs_subio_endio_read(inode, io_bio, err);
@@@ -7851,17 -7857,14 +7856,14 @@@
   
         kfree(dip);
   
-       /* If we had a csum failure make sure to clear the uptodate flag */
-       if (err)
-               clear_bit(BIO_UPTODATE, &dio_bio->bi_flags);
-       dio_end_io(dio_bio, err);
+       dio_end_io(dio_bio, bio->bi_error);
   
         if (io_bio->end_io)
                 io_bio->end_io(io_bio, err);
         bio_put(bio);
   }
   
- static void btrfs_endio_direct_write(struct bio *bio, int err)
+ static void btrfs_endio_direct_write(struct bio *bio)
   {
         struct btrfs_dio_private *dip = bio->bi_private;
         struct inode *inode = dip->inode;
@@@ -7875,7 -7878,8 +7877,8 @@@
   again:
         ret = btrfs_dec_test_first_ordered_pending(inode, &ordered,
                                                    &ordered_offset,
-                                                  ordered_bytes, !err);
+                                                  ordered_bytes,
+                                                  !bio->bi_error);
         if (!ret)
                 goto out_test;
   
@@@ -7898,10 -7902,7 +7901,7 @@@ out_test
   
         kfree(dip);
   
-       /* If we had an error make sure to clear the uptodate flag */
-       if (err)
-               clear_bit(BIO_UPTODATE, &dio_bio->bi_flags);
-       dio_end_io(dio_bio, err);
+       dio_end_io(dio_bio, bio->bi_error);
         bio_put(bio);
   }
   
@@@ -7916,9 -7917,10 +7916,10 @@@ static int __btrfs_submit_bio_start_dir
         return 0;
   }
   
- static void btrfs_end_dio_bio(struct bio *bio, int err)
+ static void btrfs_end_dio_bio(struct bio *bio)
   {
         struct btrfs_dio_private *dip = bio->bi_private;
+       int err = bio->bi_error;
   
         if (err)
                 btrfs_warn(BTRFS_I(dip->inode)->root->fs_info,
@@@ -7947,8 -7949,8 +7948,8 @@@
         if (dip->errors) {
                 bio_io_error(dip->orig_bio);
         } else {
-               set_bit(BIO_UPTODATE, &dip->dio_bio->bi_flags);
-               bio_endio(dip->orig_bio, 0);
+               dip->dio_bio->bi_error = 0;
+               bio_endio(dip->orig_bio);
         }
   out:
         bio_put(bio);
@@@ -7957,8 -7959,7 +7958,7 @@@
   static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev,
                                        u64 first_sector, gfp_t gfp_flags)
   {
-       int nr_vecs = bio_get_nr_vecs(bdev);
-       return btrfs_bio_alloc(bdev, first_sector, nr_vecs, gfp_flags);
+       return btrfs_bio_alloc(bdev, first_sector, BIO_MAX_PAGES, gfp_flags);
   }
   
   static inline int btrfs_lookup_and_bind_dio_csum(struct btrfs_root *root,
@@@ -8219,7 -8220,8 +8219,8 @@@ free_ordered
          * callbacks - they require an allocated dip and a clone of dio_bio.
          */
         if (io_bio && dip) {
-               bio_endio(io_bio, ret);
+               io_bio->bi_error = -EIO;
+               bio_endio(io_bio);
                 /*
                  * The end io callbacks free our dip, do the final put on io_bio
                  * and all the cleanup and final put for dio_bio (through
@@@ -8246,7 -8248,7 +8247,7 @@@
                         unlock_extent(&BTRFS_I(inode)->io_tree, file_offset,
                               file_offset + dio_bio->bi_iter.bi_size - 1);
                 }
-               clear_bit(BIO_UPTODATE, &dio_bio->bi_flags);
+               dio_bio->bi_error = -EIO;
                 /*
                  * Releases and cleans up our dio_bio, no need to bio_put()
                  * nor bio_endio()/bio_io_error() against dio_bio.
diff --combined fs/f2fs/data.c

index f71e19a9dd3c18fc6ee7b3bcb4f3da9c8b8c3d4c,b478accb24d98b0897866fa6416ed2b67a8468df..c414d49aa2de17553badaaa754bf3b967bf51cf1
--- 1/fs/f2fs/data.c
--- 2/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@@ -29,13 -29,13 +29,13 @@@
   static struct kmem_cache *extent_tree_slab;
   static struct kmem_cache *extent_node_slab;
   
- static void f2fs_read_end_io(struct bio *bio, int err)
+ static void f2fs_read_end_io(struct bio *bio)
   {
         struct bio_vec *bvec;
         int i;
   
         if (f2fs_bio_encrypted(bio)) {
-               if (err) {
+               if (bio->bi_error) {
                         f2fs_release_crypto_ctx(bio->bi_private);
                 } else {
                         f2fs_end_io_crypto_work(bio->bi_private, bio);
@@@ -46,7 -46,7 +46,7 @@@
         bio_for_each_segment_all(bvec, bio, i) {
                 struct page *page = bvec->bv_page;
   
-               if (!err) {
+               if (!bio->bi_error) {
                         SetPageUptodate(page);
                 } else {
                         ClearPageUptodate(page);
@@@ -57,7 -57,7 +57,7 @@@
         bio_put(bio);
   }
   
- static void f2fs_write_end_io(struct bio *bio, int err)
+ static void f2fs_write_end_io(struct bio *bio)
   {
         struct f2fs_sb_info *sbi = bio->bi_private;
         struct bio_vec *bvec;
@@@ -68,7 -68,7 +68,7 @@@
   
                 f2fs_restore_and_release_control_page(&page);
   
-               if (unlikely(err)) {
+               if (unlikely(bio->bi_error)) {
                         set_page_dirty(page);
                         set_bit(AS_EIO, &page->mapping->flags);
                         f2fs_stop_checkpoint(sbi);
@@@ -1552,7 -1552,7 +1552,7 @@@ submit_and_realloc
                         }
   
                         bio = bio_alloc(GFP_KERNEL,
-                               min_t(int, nr_pages, bio_get_nr_vecs(bdev)));
+                               min_t(int, nr_pages, BIO_MAX_PAGES));
                         if (!bio) {
                                 if (ctx)
                                         f2fs_release_crypto_ctx(ctx);
@@@ -2072,6 -2072,8 +2072,6 @@@ static int f2fs_set_data_page_dirty(str
                 return 1;
         }
   
- -      mark_inode_dirty(inode);
- -
         if (!PageDirty(page)) {
                 __set_page_dirty_nobuffers(page);
                 update_dirty_page(inode, page);
author	Linus Torvalds <[email protected]>
	Wed, 2 Sep 2015 20:10:25 +0000 (13:10 -0700)
committer	Linus Torvalds <[email protected]>
	Wed, 2 Sep 2015 20:10:25 +0000 (13:10 -0700)
		1	2
block/bio.c	patch \|	diff1 \|	diff2 \|	blob \| history
block/blk-settings.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/block/null_blk.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/block/rbd.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/block/xen-blkback/blkback.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/block/xen-blkfront.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/block/zram/zram_drv.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/md/dm-cache-target.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/md/dm-thin.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/md/dm.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/md/md.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/md/raid1.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/md/raid10.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/md/raid5.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/mtd/mtd_blkdevs.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/s390/block/dcssblk.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/scsi/sd.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/staging/lustre/lustre/llite/lloop.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/disk-io.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/inode.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/f2fs/data.c	patch \|	diff1 \|	diff2 \|	blob \| history