Merge tag 'for-5.8/block-2020-06-01' of git://git.kernel.dk/linux-block

author Linus Torvalds <[email protected]>

Tue, 2 Jun 2020 22:29:19 +0000 (15:29 -0700)

committer Linus Torvalds <[email protected]>

Tue, 2 Jun 2020 22:29:19 +0000 (15:29 -0700)
author Linus Torvalds <[email protected]>
Tue, 2 Jun 2020 22:29:19 +0000 (15:29 -0700)
committer Linus Torvalds <[email protected]>
Tue, 2 Jun 2020 22:29:19 +0000 (15:29 -0700)
diff --combined block/blk-core.c

index 38d7b1f160673405b1ac9c57a459d1dd6f0ceb11,a01fb2b508f0efa042f86085092b51cb08741e7e..03252af8c82c82c5580f7f56cff092c75c441bf0
--- 1/block/blk-core.c
--- 2/block/blk-core.c
+++ b/block/blk-core.c
@@@ -20,7 -20,6 +20,7 @@@
   #include <linux/blk-mq.h>
   #include <linux/highmem.h>
   #include <linux/mm.h>
+ +#include <linux/pagemap.h>
   #include <linux/kernel_stat.h>
   #include <linux/string.h>
   #include <linux/init.h>
@@@ -39,6 -38,8 +39,8 @@@
   #include <linux/debugfs.h>
   #include <linux/bpf.h>
   #include <linux/psi.h>
+ #include <linux/sched/sysctl.h>
+ #include <linux/blk-crypto.h>
   
   #define CREATE_TRACE_POINTS
   #include <trace/events/block.h>
@@@ -121,6 -122,7 +123,7 @@@ void blk_rq_init(struct request_queue *
         rq->start_time_ns = ktime_get_ns();
         rq->part = NULL;
         refcount_set(&rq->ref, 1);
+       blk_crypto_rq_set_defaults(rq);
   }
   EXPORT_SYMBOL(blk_rq_init);
   
@@@ -136,6 -138,7 +139,7 @@@ static const char *const blk_op_name[] 
         REQ_OP_NAME(ZONE_OPEN),
         REQ_OP_NAME(ZONE_CLOSE),
         REQ_OP_NAME(ZONE_FINISH),
+       REQ_OP_NAME(ZONE_APPEND),
         REQ_OP_NAME(WRITE_SAME),
         REQ_OP_NAME(WRITE_ZEROES),
         REQ_OP_NAME(SCSI_IN),
@@@ -241,6 -244,17 +245,17 @@@ static void req_bio_endio(struct reques
   
         bio_advance(bio, nbytes);
   
+       if (req_op(rq) == REQ_OP_ZONE_APPEND && error == BLK_STS_OK) {
+               /*
+                * Partial zone append completions cannot be supported as the
+                * BIO fragments may end up not being written sequentially.
+                */
+               if (bio->bi_iter.bi_size)
+                       bio->bi_status = BLK_STS_IOERR;
+               else
+                       bio->bi_iter.bi_sector = rq->__sector;
+       }
+ 
         /* don't actually finish bio if it's part of flush sequence */
         if (bio->bi_iter.bi_size == 0 && !(rq->rq_flags & RQF_FLUSH_SEQ))
                 bio_endio(bio);
@@@ -441,6 -455,23 +456,23 @@@ int blk_queue_enter(struct request_queu
         }
   }
   
+ static inline int bio_queue_enter(struct bio *bio)
+ {
+       struct request_queue *q = bio->bi_disk->queue;
+       bool nowait = bio->bi_opf & REQ_NOWAIT;
+       int ret;
+ 
+       ret = blk_queue_enter(q, nowait ? BLK_MQ_REQ_NOWAIT : 0);
+       if (unlikely(ret)) {
+               if (nowait && !blk_queue_dying(q))
+                       bio_wouldblock_error(bio);
+               else
+                       bio_io_error(bio);
+       }
+ 
+       return ret;
+ }
+ 
   void blk_queue_exit(struct request_queue *q)
   {
         percpu_ref_put(&q->q_usage_counter);
@@@ -485,7 -516,7 +517,7 @@@ struct request_queue *__blk_alloc_queue
         if (ret)
                 goto fail_id;
   
-       q->backing_dev_info = bdi_alloc_node(GFP_KERNEL, node_id);
+       q->backing_dev_info = bdi_alloc(node_id);
         if (!q->backing_dev_info)
                 goto fail_split;
   
@@@ -495,7 -526,6 +527,6 @@@
   
         q->backing_dev_info->ra_pages = VM_READAHEAD_PAGES;
         q->backing_dev_info->capabilities = BDI_CAP_CGROUP_WRITEBACK;
-       q->backing_dev_info->name = "block";
         q->node = node_id;
   
         timer_setup(&q->backing_dev_info->laptop_mode_wb_timer,
@@@ -606,6 -636,16 +637,16 @@@ void blk_put_request(struct request *re
   }
   EXPORT_SYMBOL(blk_put_request);
   
+ static void blk_account_io_merge_bio(struct request *req)
+ {
+       if (!blk_do_io_stat(req))
+               return;
+ 
+       part_stat_lock();
+       part_stat_inc(req->part, merges[op_stat_group(req_op(req))]);
+       part_stat_unlock();
+ }
+ 
   bool bio_attempt_back_merge(struct request *req, struct bio *bio,
                 unsigned int nr_segs)
   {
@@@ -624,7 -664,9 +665,9 @@@
         req->biotail = bio;
         req->__data_len += bio->bi_iter.bi_size;
   
-       blk_account_io_start(req, false);
+       bio_crypt_free_ctx(bio);
+ 
+       blk_account_io_merge_bio(req);
         return true;
   }
   
@@@ -648,7 -690,9 +691,9 @@@ bool bio_attempt_front_merge(struct req
         req->__sector = bio->bi_iter.bi_sector;
         req->__data_len += bio->bi_iter.bi_size;
   
-       blk_account_io_start(req, false);
+       bio_crypt_do_front_merge(req, bio);
+ 
+       blk_account_io_merge_bio(req);
         return true;
   }
   
@@@ -670,7 -714,7 +715,7 @@@ bool bio_attempt_discard_merge(struct r
         req->__data_len += bio->bi_iter.bi_size;
         req->nr_phys_segments = segments + 1;
   
-       blk_account_io_start(req, false);
+       blk_account_io_merge_bio(req);
         return true;
   no_merge:
         req_set_nomerge(q, req);
@@@ -872,6 -916,41 +917,41 @@@ out
         return ret;
   }
   
+ /*
+  * Check write append to a zoned block device.
+  */
+ static inline blk_status_t blk_check_zone_append(struct request_queue *q,
+                                                struct bio *bio)
+ {
+       sector_t pos = bio->bi_iter.bi_sector;
+       int nr_sectors = bio_sectors(bio);
+ 
+       /* Only applicable to zoned block devices */
+       if (!blk_queue_is_zoned(q))
+               return BLK_STS_NOTSUPP;
+ 
+       /* The bio sector must point to the start of a sequential zone */
+       if (pos & (blk_queue_zone_sectors(q) - 1) ||
+           !blk_queue_zone_is_seq(q, pos))
+               return BLK_STS_IOERR;
+ 
+       /*
+        * Not allowed to cross zone boundaries. Otherwise, the BIO will be
+        * split and could result in non-contiguous sectors being written in
+        * different zones.
+        */
+       if (nr_sectors > q->limits.chunk_sectors)
+               return BLK_STS_IOERR;
+ 
+       /* Make sure the BIO is small enough and will not get split */
+       if (nr_sectors > q->limits.max_zone_append_sectors)
+               return BLK_STS_IOERR;
+ 
+       bio->bi_opf |= REQ_NOMERGE;
+ 
+       return BLK_STS_OK;
+ }
+ 
   static noinline_for_stack bool
   generic_make_request_checks(struct bio *bio)
   {
@@@ -892,11 -971,14 +972,11 @@@
         }
   
         /*
- -       * Non-mq queues do not honor REQ_NOWAIT, so complete a bio
- -       * with BLK_STS_AGAIN status in order to catch -EAGAIN and
- -       * to give a chance to the caller to repeat request gracefully.
+ +       * For a REQ_NOWAIT based request, return -EOPNOTSUPP
+ +       * if queue is not a request based queue.
          */
- -      if ((bio->bi_opf & REQ_NOWAIT) && !queue_is_mq(q)) {
- -              status = BLK_STS_AGAIN;
- -              goto end_io;
- -      }
+ +      if ((bio->bi_opf & REQ_NOWAIT) && !queue_is_mq(q))
+ +              goto not_supported;
   
         if (should_fail_bio(bio))
                 goto end_io;
@@@ -941,6 -1023,11 +1021,11 @@@
                 if (!q->limits.max_write_same_sectors)
                         goto not_supported;
                 break;
+       case REQ_OP_ZONE_APPEND:
+               status = blk_check_zone_append(q, bio);
+               if (status != BLK_STS_OK)
+                       goto end_io;
+               break;
         case REQ_OP_ZONE_RESET:
         case REQ_OP_ZONE_OPEN:
         case REQ_OP_ZONE_CLOSE:
@@@ -961,12 -1048,13 +1046,13 @@@
         }
   
         /*
-        * Various block parts want %current->io_context and lazy ioc
-        * allocation ends up trading a lot of pain for a small amount of
-        * memory.  Just allocate it upfront.  This may fail and block
-        * layer knows how to live with it.
+        * Various block parts want %current->io_context, so allocate it up
+        * front rather than dealing with lots of pain to allocate it only
+        * where needed. This may fail and the block layer knows how to live
+        * with it.
          */
-       create_io_context(GFP_ATOMIC, q->node);
+       if (unlikely(!current->io_context))
+               create_task_io_context(current, GFP_ATOMIC, q->node);
   
         if (!blkcg_bio_issue_check(q, bio))
                 return false;
@@@ -988,29 -1076,28 +1074,28 @@@ end_io
         return false;
   }
   
+ static blk_qc_t do_make_request(struct bio *bio)
+ {
+       struct request_queue *q = bio->bi_disk->queue;
+       blk_qc_t ret = BLK_QC_T_NONE;
+ 
+       if (blk_crypto_bio_prep(&bio)) {
+               if (!q->make_request_fn)
+                       return blk_mq_make_request(q, bio);
+               ret = q->make_request_fn(q, bio);
+       }
+       blk_queue_exit(q);
+       return ret;
+ }
+ 
   /**
-  * generic_make_request - hand a buffer to its device driver for I/O
+  * generic_make_request - re-submit a bio to the block device layer for I/O
    * @bio:  The bio describing the location in memory and on the device.
    *
-  * generic_make_request() is used to make I/O requests of block
-  * devices. It is passed a &struct bio, which describes the I/O that needs
-  * to be done.
-  *
-  * generic_make_request() does not return any status.  The
-  * success/failure status of the request, along with notification of
-  * completion, is delivered asynchronously through the bio->bi_end_io
-  * function described (one day) else where.
-  *
-  * The caller of generic_make_request must make sure that bi_io_vec
-  * are set to describe the memory buffer, and that bi_dev and bi_sector are
-  * set to describe the device address, and the
-  * bi_end_io and optionally bi_private are set to describe how
-  * completion notification should be signaled.
-  *
-  * generic_make_request and the drivers it calls may use bi_next if this
-  * bio happens to be merged with someone else, and may resubmit the bio to
-  * a lower device by calling into generic_make_request recursively, which
-  * means the bio should NOT be touched after the call to ->make_request_fn.
+  * This is a version of submit_bio() that shall only be used for I/O that is
+  * resubmitted to lower level drivers by stacking block drivers.  All file
+  * systems and other upper level users of the block layer should use
+  * submit_bio() instead.
    */
   blk_qc_t generic_make_request(struct bio *bio)
   {
@@@ -1061,18 -1148,14 +1146,14 @@@
         current->bio_list = bio_list_on_stack;
         do {
                 struct request_queue *q = bio->bi_disk->queue;
-               blk_mq_req_flags_t flags = bio->bi_opf & REQ_NOWAIT ?
-                       BLK_MQ_REQ_NOWAIT : 0;
   
-               if (likely(blk_queue_enter(q, flags) == 0)) {
+               if (likely(bio_queue_enter(bio) == 0)) {
                         struct bio_list lower, same;
   
                         /* Create a fresh bio_list for all subordinate requests */
                         bio_list_on_stack[1] = bio_list_on_stack[0];
                         bio_list_init(&bio_list_on_stack[0]);
-                       ret = q->make_request_fn(q, bio);
- 
-                       blk_queue_exit(q);
+                       ret = do_make_request(bio);
   
                         /* sort new bios into those for a lower level
                          * and those for the same level
@@@ -1088,12 -1171,6 +1169,6 @@@
                         bio_list_merge(&bio_list_on_stack[0], &lower);
                         bio_list_merge(&bio_list_on_stack[0], &same);
                         bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]);
-               } else {
-                       if (unlikely(!blk_queue_dying(q) &&
-                                       (bio->bi_opf & REQ_NOWAIT)))
-                               bio_wouldblock_error(bio);
-                       else
-                               bio_io_error(bio);
                 }
                 bio = bio_list_pop(&bio_list_on_stack[0]);
         } while (bio);
@@@ -1110,30 -1187,25 +1185,25 @@@ EXPORT_SYMBOL(generic_make_request)
    *
    * This function behaves like generic_make_request(), but does not protect
    * against recursion.  Must only be used if the called driver is known
-  * to not call generic_make_request (or direct_make_request) again from
-  * its make_request function.  (Calling direct_make_request again from
-  * a workqueue is perfectly fine as that doesn't recurse).
+  * to be blk-mq based.
    */
   blk_qc_t direct_make_request(struct bio *bio)
   {
         struct request_queue *q = bio->bi_disk->queue;
-       bool nowait = bio->bi_opf & REQ_NOWAIT;
-       blk_qc_t ret;
   
+       if (WARN_ON_ONCE(q->make_request_fn)) {
+               bio_io_error(bio);
+               return BLK_QC_T_NONE;
+       }
         if (!generic_make_request_checks(bio))
                 return BLK_QC_T_NONE;
- 
-       if (unlikely(blk_queue_enter(q, nowait ? BLK_MQ_REQ_NOWAIT : 0))) {
-               if (nowait && !blk_queue_dying(q))
-                       bio_wouldblock_error(bio);
-               else
-                       bio_io_error(bio);
+       if (unlikely(bio_queue_enter(bio)))
+               return BLK_QC_T_NONE;
+       if (!blk_crypto_bio_prep(&bio)) {
+               blk_queue_exit(q);
                 return BLK_QC_T_NONE;
         }
- 
-       ret = q->make_request_fn(q, bio);
-       blk_queue_exit(q);
-       return ret;
+       return blk_mq_make_request(q, bio);
   }
   EXPORT_SYMBOL_GPL(direct_make_request);
   
@@@ -1141,17 -1213,17 +1211,17 @@@
    * submit_bio - submit a bio to the block device layer for I/O
    * @bio: The &struct bio which describes the I/O
    *
-  * submit_bio() is very similar in purpose to generic_make_request(), and
-  * uses that function to do most of the work. Both are fairly rough
-  * interfaces; @bio must be presetup and ready for I/O.
+  * submit_bio() is used to submit I/O requests to block devices.  It is passed a
+  * fully set up &struct bio that describes the I/O that needs to be done.  The
+  * bio will be send to the device described by the bi_disk and bi_partno fields.
    *
+  * The success/failure status of the request, along with notification of
+  * completion, is delivered asynchronously through the ->bi_end_io() callback
+  * in @bio.  The bio must NOT be touched by thecaller until ->bi_end_io() has
+  * been called.
    */
   blk_qc_t submit_bio(struct bio *bio)
   {
-       bool workingset_read = false;
-       unsigned long pflags;
-       blk_qc_t ret;
- 
         if (blkcg_punt_bio_submit(bio))
                 return BLK_QC_T_NONE;
   
@@@ -1170,8 -1242,6 +1240,6 @@@
                 if (op_is_write(bio_op(bio))) {
                         count_vm_events(PGPGOUT, count);
                 } else {
-                       if (bio_flagged(bio, BIO_WORKINGSET))
-                               workingset_read = true;
                         task_io_account_read(bio->bi_iter.bi_size);
                         count_vm_events(PGPGIN, count);
                 }
@@@ -1187,20 -1257,24 +1255,24 @@@
         }
   
         /*
-        * If we're reading data that is part of the userspace
-        * workingset, count submission time as memory stall. When the
-        * device is congested, or the submitting cgroup IO-throttled,
-        * submission can be a significant part of overall IO time.
+        * If we're reading data that is part of the userspace workingset, count
+        * submission time as memory stall.  When the device is congested, or
+        * the submitting cgroup IO-throttled, submission can be a significant
+        * part of overall IO time.
          */
-       if (workingset_read)
-               psi_memstall_enter(&pflags);
- 
-       ret = generic_make_request(bio);
+       if (unlikely(bio_op(bio) == REQ_OP_READ &&
+           bio_flagged(bio, BIO_WORKINGSET))) {
+               unsigned long pflags;
+               blk_qc_t ret;
   
-       if (workingset_read)
+               psi_memstall_enter(&pflags);
+               ret = generic_make_request(bio);
                 psi_memstall_leave(&pflags);
   
-       return ret;
+               return ret;
+       }
+ 
+       return generic_make_request(bio);
   }
   EXPORT_SYMBOL(submit_bio);
   
@@@ -1261,8 -1335,11 +1333,11 @@@ blk_status_t blk_insert_cloned_request(
             should_fail_request(&rq->rq_disk->part0, blk_rq_bytes(rq)))
                 return BLK_STS_IOERR;
   
+       if (blk_crypto_insert_cloned_request(rq))
+               return BLK_STS_IOERR;
+ 
         if (blk_queue_io_stat(q))
-               blk_account_io_start(rq, true);
+               blk_account_io_start(rq);
   
         /*
          * Since we have a scheduler attached on the top device,
@@@ -1314,7 -1391,22 +1389,22 @@@ unsigned int blk_rq_err_bytes(const str
   }
   EXPORT_SYMBOL_GPL(blk_rq_err_bytes);
   
- void blk_account_io_completion(struct request *req, unsigned int bytes)
+ static void update_io_ticks(struct hd_struct *part, unsigned long now, bool end)
+ {
+       unsigned long stamp;
+ again:
+       stamp = READ_ONCE(part->stamp);
+       if (unlikely(stamp != now)) {
+               if (likely(cmpxchg(&part->stamp, stamp, now) == stamp))
+                       __part_stat_add(part, io_ticks, end ? now - stamp : 1);
+       }
+       if (part->partno) {
+               part = &part_to_disk(part)->part0;
+               goto again;
+       }
+ }
+ 
+ static void blk_account_io_completion(struct request *req, unsigned int bytes)
   {
         if (req->part && blk_do_io_stat(req)) {
                 const int sgrp = op_stat_group(req_op(req));
@@@ -1345,48 -1437,57 +1435,57 @@@ void blk_account_io_done(struct reques
                 update_io_ticks(part, jiffies, true);
                 part_stat_inc(part, ios[sgrp]);
                 part_stat_add(part, nsecs[sgrp], now - req->start_time_ns);
-               part_dec_in_flight(req->q, part, rq_data_dir(req));
+               part_stat_unlock();
   
                 hd_struct_put(part);
-               part_stat_unlock();
         }
   }
   
- void blk_account_io_start(struct request *rq, bool new_io)
+ void blk_account_io_start(struct request *rq)
   {
-       struct hd_struct *part;
-       int rw = rq_data_dir(rq);
- 
         if (!blk_do_io_stat(rq))
                 return;
   
+       rq->part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
+ 
         part_stat_lock();
+       update_io_ticks(rq->part, jiffies, false);
+       part_stat_unlock();
+ }
   
-       if (!new_io) {
-               part = rq->part;
-               part_stat_inc(part, merges[rw]);
-       } else {
-               part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
-               if (!hd_struct_try_get(part)) {
-                       /*
-                        * The partition is already being removed,
-                        * the request will be accounted on the disk only
-                        *
-                        * We take a reference on disk->part0 although that
-                        * partition will never be deleted, so we can treat
-                        * it as any other partition.
-                        */
-                       part = &rq->rq_disk->part0;
-                       hd_struct_get(part);
-               }
-               part_inc_in_flight(rq->q, part, rw);
-               rq->part = part;
-       }
+ unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors,
+               unsigned int op)
+ {
+       struct hd_struct *part = &disk->part0;
+       const int sgrp = op_stat_group(op);
+       unsigned long now = READ_ONCE(jiffies);
+ 
+       part_stat_lock();
+       update_io_ticks(part, now, false);
+       part_stat_inc(part, ios[sgrp]);
+       part_stat_add(part, sectors[sgrp], sectors);
+       part_stat_local_inc(part, in_flight[op_is_write(op)]);
+       part_stat_unlock();
   
-       update_io_ticks(part, jiffies, false);
+       return now;
+ }
+ EXPORT_SYMBOL(disk_start_io_acct);
+ 
+ void disk_end_io_acct(struct gendisk *disk, unsigned int op,
+               unsigned long start_time)
+ {
+       struct hd_struct *part = &disk->part0;
+       const int sgrp = op_stat_group(op);
+       unsigned long now = READ_ONCE(jiffies);
+       unsigned long duration = now - start_time;
   
+       part_stat_lock();
+       update_io_ticks(part, now, true);
+       part_stat_add(part, nsecs[sgrp], jiffies_to_nsecs(duration));
+       part_stat_local_dec(part, in_flight[op_is_write(op)]);
         part_stat_unlock();
   }
+ EXPORT_SYMBOL(disk_end_io_acct);
   
   /*
    * Steal bios from a request and add them to a bio list.
@@@ -1636,7 -1737,9 +1735,9 @@@ int blk_rq_prep_clone(struct request *r
         }
         rq->nr_phys_segments = rq_src->nr_phys_segments;
         rq->ioprio = rq_src->ioprio;
-       rq->extra_len = rq_src->extra_len;
+ 
+       if (rq->bio)
+               blk_crypto_rq_bio_prep(rq, rq->bio, gfp_mask);
   
         return 0;
   
@@@ -1778,6 -1881,18 +1879,18 @@@ void blk_finish_plug(struct blk_plug *p
   }
   EXPORT_SYMBOL(blk_finish_plug);
   
+ void blk_io_schedule(void)
+ {
+       /* Prevent hang_check timer from firing at us during very long I/O */
+       unsigned long timeout = sysctl_hung_task_timeout_secs * HZ / 2;
+ 
+       if (timeout)
+               io_schedule_timeout(timeout);
+       else
+               io_schedule();
+ }
+ EXPORT_SYMBOL_GPL(blk_io_schedule);
+ 
   int __init blk_dev_init(void)
   {
         BUILD_BUG_ON(REQ_OP_LAST >= (1 << REQ_OP_BITS));
diff --combined drivers/base/core.c

index edb6fd2032a1b3192461634a2bae11a9211a8b51,fb8ae248e5aa8ec69924d0bea737d45190dd0d6b..de808c5a187b37f9dc2e090e81eadcc3c3591111
--- 1/drivers/base/core.c
--- 2/drivers/base/core.c
+++ b/drivers/base/core.c
@@@ -365,7 -365,6 +365,7 @@@ struct device_link *device_link_add(str
                                 link->flags |= DL_FLAG_STATELESS;
                                 goto reorder;
                         } else {
+ +                              link->flags |= DL_FLAG_STATELESS;
                                 goto out;
                         }
                 }
@@@ -434,16 -433,12 +434,16 @@@
             flags & DL_FLAG_PM_RUNTIME)
                 pm_runtime_resume(supplier);
   
+ +      list_add_tail_rcu(&link->s_node, &supplier->links.consumers);
+ +      list_add_tail_rcu(&link->c_node, &consumer->links.suppliers);
+ +
         if (flags & DL_FLAG_SYNC_STATE_ONLY) {
                 dev_dbg(consumer,
                         "Linked as a sync state only consumer to %s\n",
                         dev_name(supplier));
                 goto out;
         }
+ +
   reorder:
         /*
          * Move the consumer and all of the devices depending on it to the end
@@@ -454,9 -449,12 +454,9 @@@
          */
         device_reorder_to_tail(consumer, NULL);
   
- -      list_add_tail_rcu(&link->s_node, &supplier->links.consumers);
- -      list_add_tail_rcu(&link->c_node, &consumer->links.suppliers);
- -
         dev_dbg(consumer, "Linked as a consumer to %s\n", dev_name(supplier));
   
- - out:
+ +out:
         device_pm_unlock();
         device_links_write_unlock();
   
@@@ -831,13 -829,6 +831,13 @@@ static void __device_links_supplier_def
                 list_add_tail(&sup->links.defer_sync, &deferred_sync);
   }
   
+ +static void device_link_drop_managed(struct device_link *link)
+ +{
+ +      link->flags &= ~DL_FLAG_MANAGED;
+ +      WRITE_ONCE(link->status, DL_STATE_NONE);
+ +      kref_put(&link->kref, __device_link_del);
+ +}
+ +
   /**
    * device_links_driver_bound - Update device links after probing its driver.
    * @dev: Device to update the links for.
@@@ -851,7 -842,7 +851,7 @@@
    */
   void device_links_driver_bound(struct device *dev)
   {
- -      struct device_link *link;
+ +      struct device_link *link, *ln;
         LIST_HEAD(sync_list);
   
         /*
@@@ -891,35 -882,18 +891,35 @@@
         else
                 __device_links_queue_sync_state(dev, &sync_list);
   
- -      list_for_each_entry(link, &dev->links.suppliers, c_node) {
+ +      list_for_each_entry_safe(link, ln, &dev->links.suppliers, c_node) {
+ +              struct device *supplier;
+ +
                 if (!(link->flags & DL_FLAG_MANAGED))
                         continue;
   
- -              WARN_ON(link->status != DL_STATE_CONSUMER_PROBE);
- -              WRITE_ONCE(link->status, DL_STATE_ACTIVE);
+ +              supplier = link->supplier;
+ +              if (link->flags & DL_FLAG_SYNC_STATE_ONLY) {
+ +                      /*
+ +                       * When DL_FLAG_SYNC_STATE_ONLY is set, it means no
+ +                       * other DL_MANAGED_LINK_FLAGS have been set. So, it's
+ +                       * save to drop the managed link completely.
+ +                       */
+ +                      device_link_drop_managed(link);
+ +              } else {
+ +                      WARN_ON(link->status != DL_STATE_CONSUMER_PROBE);
+ +                      WRITE_ONCE(link->status, DL_STATE_ACTIVE);
+ +              }
   
+ +              /*
+ +               * This needs to be done even for the deleted
+ +               * DL_FLAG_SYNC_STATE_ONLY device link in case it was the last
+ +               * device link that was preventing the supplier from getting a
+ +               * sync_state() call.
+ +               */
                 if (defer_sync_state_count)
- -                      __device_links_supplier_defer_sync(link->supplier);
+ +                      __device_links_supplier_defer_sync(supplier);
                 else
- -                      __device_links_queue_sync_state(link->supplier,
- -                                                      &sync_list);
+ +                      __device_links_queue_sync_state(supplier, &sync_list);
         }
   
         dev->links.status = DL_DEV_DRIVER_BOUND;
@@@ -929,6 -903,13 +929,6 @@@
         device_links_flush_sync_list(&sync_list, dev);
   }
   
- -static void device_link_drop_managed(struct device_link *link)
- -{
- -      link->flags &= ~DL_FLAG_MANAGED;
- -      WRITE_ONCE(link->status, DL_STATE_NONE);
- -      kref_put(&link->kref, __device_link_del);
- -}
- -
   /**
    * __device_links_no_driver - Update links of a device without a driver.
    * @dev: Device without a drvier.
@@@ -1393,7 -1374,7 +1393,7 @@@ static void device_release(struct kobje
         else if (dev->class && dev->class->dev_release)
                 dev->class->dev_release(dev);
         else
- -              WARN(1, KERN_ERR "Device '%s' does not have a release() function, it is broken and must be fixed. See Documentation/kobject.txt.\n",
+ +              WARN(1, KERN_ERR "Device '%s' does not have a release() function, it is broken and must be fixed. See Documentation/core-api/kobject.rst.\n",
                         dev_name(dev));
         kfree(p);
   }
@@@ -2389,11 -2370,6 +2389,11 @@@ u32 fw_devlink_get_flags(void
         return fw_devlink_flags;
   }
   
+ +static bool fw_devlink_is_permissive(void)
+ +{
+ +      return fw_devlink_flags == DL_FLAG_SYNC_STATE_ONLY;
+ +}
+ +
   /**
    * device_add - add device to device hierarchy.
    * @dev: device.
@@@ -2548,7 -2524,7 +2548,7 @@@ int device_add(struct device *dev
         if (fw_devlink_flags && is_fwnode_dev &&
             fwnode_has_op(dev->fwnode, add_links)) {
                 fw_ret = fwnode_call_int_op(dev->fwnode, add_links, dev);
- -              if (fw_ret == -ENODEV)
+ +              if (fw_ret == -ENODEV && !fw_devlink_is_permissive())
                         device_link_wait_for_mandatory_supplier(dev);
                 else if (fw_ret)
                         device_link_wait_for_optional_supplier(dev);
@@@ -3212,40 -3188,6 +3212,6 @@@ error
         return ERR_PTR(retval);
   }
   
- /**
-  * device_create_vargs - creates a device and registers it with sysfs
-  * @class: pointer to the struct class that this device should be registered to
-  * @parent: pointer to the parent struct device of this new device, if any
-  * @devt: the dev_t for the char device to be added
-  * @drvdata: the data to be added to the device for callbacks
-  * @fmt: string for the device's name
-  * @args: va_list for the device's name
-  *
-  * This function can be used by char device classes.  A struct device
-  * will be created in sysfs, registered to the specified class.
-  *
-  * A "dev" file will be created, showing the dev_t for the device, if
-  * the dev_t is not 0,0.
-  * If a pointer to a parent struct device is passed in, the newly created
-  * struct device will be a child of that device in sysfs.
-  * The pointer to the struct device will be returned from the call.
-  * Any further sysfs files that might be required can be created using this
-  * pointer.
-  *
-  * Returns &struct device pointer on success, or ERR_PTR() on error.
-  *
-  * Note: the struct class passed to this function must have previously
-  * been created with a call to class_create().
-  */
- struct device *device_create_vargs(struct class *class, struct device *parent,
-                                  dev_t devt, void *drvdata, const char *fmt,
-                                  va_list args)
- {
-       return device_create_groups_vargs(class, parent, devt, drvdata, NULL,
-                                         fmt, args);
- }
- EXPORT_SYMBOL_GPL(device_create_vargs);
- 
   /**
    * device_create - creates a device and registers it with sysfs
    * @class: pointer to the struct class that this device should be registered to
@@@ -3277,7 -3219,8 +3243,8 @@@ struct device *device_create(struct cla
         struct device *dev;
   
         va_start(vargs, fmt);
-       dev = device_create_vargs(class, parent, devt, drvdata, fmt, vargs);
+       dev = device_create_groups_vargs(class, parent, devt, drvdata, NULL,
+                                         fmt, vargs);
         va_end(vargs);
         return dev;
   }
@@@ -3915,7 -3858,6 +3882,7 @@@ void set_secondary_fwnode(struct devic
         else
                 dev->fwnode = fwnode;
   }
+ +EXPORT_SYMBOL_GPL(set_secondary_fwnode);
   
   /**
    * device_set_of_node_from_dev - reuse device-tree node of another device
diff --combined drivers/block/loop.c

index d89c25ba3b89a8d984e2a71e287251fd7ce2adcf,d7904b4d8d1263db7689d7f2010a5a570b72b966..13dbe2f168820b3d460f0114048b5d34e34ff471
--- 1/drivers/block/loop.c
--- 2/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@@ -919,7 -919,7 +919,7 @@@ static void loop_unprepare_queue(struc
   
   static int loop_kthread_worker_fn(void *worker_ptr)
   {
- -      current->flags |= PF_LESS_THROTTLE | PF_MEMALLOC_NOIO;
+ +      current->flags |= PF_LOCAL_THROTTLE | PF_MEMALLOC_NOIO;
         return kthread_worker_fn(worker_ptr);
   }
   
@@@ -2037,7 -2037,7 +2037,7 @@@ static int loop_add(struct loop_device 
         lo->tag_set.queue_depth = 128;
         lo->tag_set.numa_node = NUMA_NO_NODE;
         lo->tag_set.cmd_size = sizeof(struct loop_cmd);
-       lo->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
+       lo->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_STACKING;
         lo->tag_set.driver_data = lo;
   
         err = blk_mq_alloc_tag_set(&lo->tag_set);
diff --combined drivers/block/null_blk_main.c

index ce9e33603a4d9540df90f2c7065332a0a4d112ae,5a1548a74d6284ef47655e98501a234e290e4547..87b31f9ca362ee17b6407811b223842cf28df88f
--- 1/drivers/block/null_blk_main.c
--- 2/drivers/block/null_blk_main.c
+++ b/drivers/block/null_blk_main.c
@@@ -1250,8 -1250,34 +1250,34 @@@ static inline blk_status_t null_handle_
         return errno_to_blk_status(err);
   }
   
+ static void nullb_zero_read_cmd_buffer(struct nullb_cmd *cmd)
+ {
+       struct nullb_device *dev = cmd->nq->dev;
+       struct bio *bio;
+ 
+       if (dev->memory_backed)
+               return;
+ 
+       if (dev->queue_mode == NULL_Q_BIO && bio_op(cmd->bio) == REQ_OP_READ) {
+               zero_fill_bio(cmd->bio);
+       } else if (req_op(cmd->rq) == REQ_OP_READ) {
+               __rq_for_each_bio(bio, cmd->rq)
+                       zero_fill_bio(bio);
+       }
+ }
+ 
   static inline void nullb_complete_cmd(struct nullb_cmd *cmd)
   {
+       /*
+        * Since root privileges are required to configure the null_blk
+        * driver, it is fine that this driver does not initialize the
+        * data buffers of read commands. Zero-initialize these buffers
+        * anyway if KMSAN is enabled to prevent that KMSAN complains
+        * about null_blk not initializing read data buffers.
+        */
+       if (IS_ENABLED(CONFIG_KMSAN))
+               nullb_zero_read_cmd_buffer(cmd);
+ 
         /* Complete IO by inline, softirq or timer */
         switch (cmd->nq->dev->irqmode) {
         case NULL_IRQ_SOFTIRQ:
@@@ -1397,7 -1423,7 +1423,7 @@@ static bool should_requeue_request(stru
   static enum blk_eh_timer_return null_timeout_rq(struct request *rq, bool res)
   {
         pr_info("rq %p timed out\n", rq);
-       blk_mq_complete_request(rq);
+       blk_mq_force_complete_rq(rq);
         return BLK_EH_DONE;
   }
   
@@@ -1535,13 -1561,6 +1561,13 @@@ static void null_config_discard(struct 
   {
         if (nullb->dev->discard == false)
                 return;
+ +
+ +      if (nullb->dev->zoned) {
+ +              nullb->dev->discard = false;
+ +              pr_info("discard option is ignored in zoned mode\n");
+ +              return;
+ +      }
+ +
         nullb->q->limits.discard_granularity = nullb->dev->blocksize;
         nullb->q->limits.discard_alignment = nullb->dev->blocksize;
         blk_queue_max_discard_sectors(nullb->q, UINT_MAX >> 9);
diff --combined drivers/block/null_blk_zoned.c

index ed5458f2d367de26264fb754fbfa6a1598eca269,9c19f747f394a0784e7fdad27e8bc6ad4351d61c..cc47606d8ffe06270d8ebe08a8a0ab91f407ca5c
--- 1/drivers/block/null_blk_zoned.c
--- 2/drivers/block/null_blk_zoned.c
+++ b/drivers/block/null_blk_zoned.c
@@@ -23,10 -23,6 +23,10 @@@ int null_init_zoned_dev(struct nullb_de
                 pr_err("zone_size must be power-of-two\n");
                 return -EINVAL;
         }
+ +      if (dev->zone_size > dev->size) {
+ +              pr_err("Zone size larger than device capacity\n");
+ +              return -EINVAL;
+ +      }
   
         dev->zone_size_sects = dev->zone_size << ZONE_SIZE_SHIFT;
         dev->nr_zones = dev_size >>
@@@ -74,13 -70,20 +74,20 @@@
   
   int null_register_zoned_dev(struct nullb *nullb)
   {
+       struct nullb_device *dev = nullb->dev;
         struct request_queue *q = nullb->q;
   
-       if (queue_is_mq(q))
-               return blk_revalidate_disk_zones(nullb->disk);
+       if (queue_is_mq(q)) {
+               int ret = blk_revalidate_disk_zones(nullb->disk, NULL);
+ 
+               if (ret)
+                       return ret;
+       } else {
+               blk_queue_chunk_sectors(q, dev->zone_size_sects);
+               q->nr_zones = blkdev_nr_zones(nullb->disk);
+       }
   
-       blk_queue_chunk_sectors(q, nullb->dev->zone_size_sects);
-       q->nr_zones = blkdev_nr_zones(nullb->disk);
+       blk_queue_max_zone_append_sectors(q, dev->zone_size_sects);
   
         return 0;
   }
@@@ -142,7 -145,7 +149,7 @@@ size_t null_zone_valid_read_len(struct 
   }
   
   static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
-                    unsigned int nr_sectors)
+                                   unsigned int nr_sectors, bool append)
   {
         struct nullb_device *dev = cmd->nq->dev;
         unsigned int zno = null_zone_no(dev, sector);
@@@ -162,9 -165,21 +169,21 @@@
         case BLK_ZONE_COND_IMP_OPEN:
         case BLK_ZONE_COND_EXP_OPEN:
         case BLK_ZONE_COND_CLOSED:
-               /* Writes must be at the write pointer position */
-               if (sector != zone->wp)
+               /*
+                * Regular writes must be at the write pointer position.
+                * Zone append writes are automatically issued at the write
+                * pointer and the position returned using the request or BIO
+                * sector.
+                */
+               if (append) {
+                       sector = zone->wp;
+                       if (cmd->bio)
+                               cmd->bio->bi_iter.bi_sector = sector;
+                       else
+                               cmd->rq->__sector = sector;
+               } else if (sector != zone->wp) {
                         return BLK_STS_IOERR;
+               }
   
                 if (zone->cond != BLK_ZONE_COND_EXP_OPEN)
                         zone->cond = BLK_ZONE_COND_IMP_OPEN;
@@@ -246,7 -261,9 +265,9 @@@ blk_status_t null_process_zoned_cmd(str
   {
         switch (op) {
         case REQ_OP_WRITE:
-               return null_zone_write(cmd, sector, nr_sectors);
+               return null_zone_write(cmd, sector, nr_sectors, false);
+       case REQ_OP_ZONE_APPEND:
+               return null_zone_write(cmd, sector, nr_sectors, true);
         case REQ_OP_ZONE_RESET:
         case REQ_OP_ZONE_RESET_ALL:
         case REQ_OP_ZONE_OPEN:
diff --combined drivers/mtd/mtdcore.c

index 29d41003d6e0d2f57e625db58df115a5d18de487,fcb018ce17c3dd845336494f4924af594590da87..b47691e1b81cc54452b361a04d4e41b17e641919
--- 1/drivers/mtd/mtdcore.c
--- 2/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@@ -555,7 -555,7 +555,7 @@@ static int mtd_nvmem_add(struct mtd_inf
   
         config.id = -1;
         config.dev = &mtd->dev;
- -      config.name = mtd->name;
+ +      config.name = dev_name(&mtd->dev);
         config.owner = THIS_MODULE;
         config.reg_read = mtd_nvmem_reg_read;
         config.size = mtd->size;
@@@ -2036,11 -2036,10 +2036,10 @@@ static struct backing_dev_info * __ini
         struct backing_dev_info *bdi;
         int ret;
   
-       bdi = bdi_alloc(GFP_KERNEL);
+       bdi = bdi_alloc(NUMA_NO_NODE);
         if (!bdi)
                 return ERR_PTR(-ENOMEM);
   
-       bdi->name = name;
         /*
          * We put '-0' suffix to the name to get the same name format as we
          * used to get. Since this is called only once, we get a unique name. 
diff --combined drivers/scsi/scsi_lib.c

index 06c260f6cdae3f43fc8de4ced5d5b98a0d36bac9,82ad0244b3d0b94f3e32d6f4134edabbcfb8c1c7..df4905df5cd42aa3f4b37e2ff2f6dd4372ac5627
--- 1/drivers/scsi/scsi_lib.c
--- 2/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@@ -978,28 -978,12 +978,12 @@@ void scsi_io_completion(struct scsi_cmn
                 scsi_io_completion_action(cmd, result);
   }
   
- static blk_status_t scsi_init_sgtable(struct request *req,
-               struct scsi_data_buffer *sdb)
+ static inline bool scsi_cmd_needs_dma_drain(struct scsi_device *sdev,
+               struct request *rq)
   {
-       int count;
- 
-       /*
-        * If sg table allocation fails, requeue request later.
-        */
-       if (unlikely(sg_alloc_table_chained(&sdb->table,
-                       blk_rq_nr_phys_segments(req), sdb->table.sgl,
-                       SCSI_INLINE_SG_CNT)))
-               return BLK_STS_RESOURCE;
- 
-       /* 
-        * Next, walk the list, and fill in the addresses and sizes of
-        * each segment.
-        */
-       count = blk_rq_map_sg(req->q, req, sdb->table.sgl);
-       BUG_ON(count > sdb->table.nents);
-       sdb->table.nents = count;
-       sdb->length = blk_rq_payload_bytes(req);
-       return BLK_STS_OK;
+       return sdev->dma_drain_len && blk_rq_is_passthrough(rq) &&
+              !op_is_write(req_op(rq)) &&
+              sdev->host->hostt->dma_need_drain(rq);
   }
   
   /*
@@@ -1015,19 -999,62 +999,62 @@@
    */
   blk_status_t scsi_init_io(struct scsi_cmnd *cmd)
   {
+       struct scsi_device *sdev = cmd->device;
         struct request *rq = cmd->request;
+       unsigned short nr_segs = blk_rq_nr_phys_segments(rq);
+       struct scatterlist *last_sg = NULL;
         blk_status_t ret;
+       bool need_drain = scsi_cmd_needs_dma_drain(sdev, rq);
+       int count;
   
-       if (WARN_ON_ONCE(!blk_rq_nr_phys_segments(rq)))
+       if (WARN_ON_ONCE(!nr_segs))
                 return BLK_STS_IOERR;
   
-       ret = scsi_init_sgtable(rq, &cmd->sdb);
-       if (ret)
-               return ret;
+       /*
+        * Make sure there is space for the drain.  The driver must adjust
+        * max_hw_segments to be prepared for this.
+        */
+       if (need_drain)
+               nr_segs++;
+ 
+       /*
+        * If sg table allocation fails, requeue request later.
+        */
+       if (unlikely(sg_alloc_table_chained(&cmd->sdb.table, nr_segs,
+                       cmd->sdb.table.sgl, SCSI_INLINE_SG_CNT)))
+               return BLK_STS_RESOURCE;
+ 
+       /*
+        * Next, walk the list, and fill in the addresses and sizes of
+        * each segment.
+        */
+       count = __blk_rq_map_sg(rq->q, rq, cmd->sdb.table.sgl, &last_sg);
+ 
+       if (blk_rq_bytes(rq) & rq->q->dma_pad_mask) {
+               unsigned int pad_len =
+                       (rq->q->dma_pad_mask & ~blk_rq_bytes(rq)) + 1;
+ 
+               last_sg->length += pad_len;
+               cmd->extra_len += pad_len;
+       }
+ 
+       if (need_drain) {
+               sg_unmark_end(last_sg);
+               last_sg = sg_next(last_sg);
+               sg_set_buf(last_sg, sdev->dma_drain_buf, sdev->dma_drain_len);
+               sg_mark_end(last_sg);
+ 
+               cmd->extra_len += sdev->dma_drain_len;
+               count++;
+       }
+ 
+       BUG_ON(count > cmd->sdb.table.nents);
+       cmd->sdb.table.nents = count;
+       cmd->sdb.length = blk_rq_payload_bytes(rq);
   
         if (blk_integrity_rq(rq)) {
                 struct scsi_data_buffer *prot_sdb = cmd->prot_sdb;
-               int ivecs, count;
+               int ivecs;
   
                 if (WARN_ON_ONCE(!prot_sdb)) {
                         /*
@@@ -1610,12 -1637,7 +1637,7 @@@ static bool scsi_mq_get_budget(struct b
         struct request_queue *q = hctx->queue;
         struct scsi_device *sdev = q->queuedata;
   
-       if (scsi_dev_queue_ready(q, sdev))
-               return true;
- 
-       if (atomic_read(&sdev->device_busy) == 0 && !scsi_device_blocked(sdev))
-               blk_mq_delay_run_hw_queue(hctx, SCSI_QUEUE_DELAY);
-       return false;
+       return scsi_dev_queue_ready(q, sdev);
   }
   
   static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
@@@ -1684,6 -1706,7 +1706,7 @@@ out_put_budget
         case BLK_STS_OK:
                 break;
         case BLK_STS_RESOURCE:
+       case BLK_STS_ZONE_RESOURCE:
                 if (atomic_read(&sdev->device_busy) ||
                     scsi_device_blocked(sdev))
                         ret = BLK_STS_DEV_RESOURCE;
@@@ -2284,7 -2307,6 +2307,7 @@@ scsi_device_set_state(struct scsi_devic
                 switch (oldstate) {
                 case SDEV_RUNNING:
                 case SDEV_CREATED_BLOCK:
+ +              case SDEV_QUIESCE:
                 case SDEV_OFFLINE:
                         break;
                 default:
diff --combined drivers/scsi/sd_zbc.c

index 8be27426aa66f0b340ab4c08e7ab75293b2cc94c,bb87fbba2a0904860b9c37cc0c8e5881352631ae..6f7eba66687e91ad17344059f8624ebf000f7804
--- 1/drivers/scsi/sd_zbc.c
--- 2/drivers/scsi/sd_zbc.c
+++ b/drivers/scsi/sd_zbc.c
@@@ -11,6 -11,7 +11,7 @@@
   #include <linux/blkdev.h>
   #include <linux/vmalloc.h>
   #include <linux/sched/mm.h>
+ #include <linux/mutex.h>
   
   #include <asm/unaligned.h>
   
@@@ -19,11 -20,36 +20,36 @@@
   
   #include "sd.h"
   
+ static unsigned int sd_zbc_get_zone_wp_offset(struct blk_zone *zone)
+ {
+       if (zone->type == ZBC_ZONE_TYPE_CONV)
+               return 0;
+ 
+       switch (zone->cond) {
+       case BLK_ZONE_COND_IMP_OPEN:
+       case BLK_ZONE_COND_EXP_OPEN:
+       case BLK_ZONE_COND_CLOSED:
+               return zone->wp - zone->start;
+       case BLK_ZONE_COND_FULL:
+               return zone->len;
+       case BLK_ZONE_COND_EMPTY:
+       case BLK_ZONE_COND_OFFLINE:
+       case BLK_ZONE_COND_READONLY:
+       default:
+               /*
+                * Offline and read-only zones do not have a valid
+                * write pointer. Use 0 as for an empty zone.
+                */
+               return 0;
+       }
+ }
+ 
   static int sd_zbc_parse_report(struct scsi_disk *sdkp, u8 *buf,
                                unsigned int idx, report_zones_cb cb, void *data)
   {
         struct scsi_device *sdp = sdkp->device;
         struct blk_zone zone = { 0 };
+       int ret;
   
         zone.type = buf[0] & 0x0f;
         zone.cond = (buf[1] >> 4) & 0xf;
@@@ -39,7 -65,14 +65,14 @@@
             zone.cond == ZBC_ZONE_COND_FULL)
                 zone.wp = zone.start + zone.len;
   
-       return cb(&zone, idx, data);
+       ret = cb(&zone, idx, data);
+       if (ret)
+               return ret;
+ 
+       if (sdkp->rev_wp_offset)
+               sdkp->rev_wp_offset[idx] = sd_zbc_get_zone_wp_offset(&zone);
+ 
+       return 0;
   }
   
   /**
@@@ -136,7 -169,8 +169,7 @@@ static void *sd_zbc_alloc_report_buffer
   
         while (bufsize >= SECTOR_SIZE) {
                 buf = __vmalloc(bufsize,
- -                              GFP_KERNEL | __GFP_ZERO | __GFP_NORETRY,
- -                              PAGE_KERNEL);
+ +                              GFP_KERNEL | __GFP_ZERO | __GFP_NORETRY);
                 if (buf) {
                         *buflen = bufsize;
                         return buf;
@@@ -208,6 -242,136 +241,136 @@@ out
         return ret;
   }
   
+ static blk_status_t sd_zbc_cmnd_checks(struct scsi_cmnd *cmd)
+ {
+       struct request *rq = cmd->request;
+       struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
+       sector_t sector = blk_rq_pos(rq);
+ 
+       if (!sd_is_zoned(sdkp))
+               /* Not a zoned device */
+               return BLK_STS_IOERR;
+ 
+       if (sdkp->device->changed)
+               return BLK_STS_IOERR;
+ 
+       if (sector & (sd_zbc_zone_sectors(sdkp) - 1))
+               /* Unaligned request */
+               return BLK_STS_IOERR;
+ 
+       return BLK_STS_OK;
+ }
+ 
+ #define SD_ZBC_INVALID_WP_OFST        (~0u)
+ #define SD_ZBC_UPDATING_WP_OFST       (SD_ZBC_INVALID_WP_OFST - 1)
+ 
+ static int sd_zbc_update_wp_offset_cb(struct blk_zone *zone, unsigned int idx,
+                                   void *data)
+ {
+       struct scsi_disk *sdkp = data;
+ 
+       lockdep_assert_held(&sdkp->zones_wp_offset_lock);
+ 
+       sdkp->zones_wp_offset[idx] = sd_zbc_get_zone_wp_offset(zone);
+ 
+       return 0;
+ }
+ 
+ static void sd_zbc_update_wp_offset_workfn(struct work_struct *work)
+ {
+       struct scsi_disk *sdkp;
+       unsigned int zno;
+       int ret;
+ 
+       sdkp = container_of(work, struct scsi_disk, zone_wp_offset_work);
+ 
+       spin_lock_bh(&sdkp->zones_wp_offset_lock);
+       for (zno = 0; zno < sdkp->nr_zones; zno++) {
+               if (sdkp->zones_wp_offset[zno] != SD_ZBC_UPDATING_WP_OFST)
+                       continue;
+ 
+               spin_unlock_bh(&sdkp->zones_wp_offset_lock);
+               ret = sd_zbc_do_report_zones(sdkp, sdkp->zone_wp_update_buf,
+                                            SD_BUF_SIZE,
+                                            zno * sdkp->zone_blocks, true);
+               spin_lock_bh(&sdkp->zones_wp_offset_lock);
+               if (!ret)
+                       sd_zbc_parse_report(sdkp, sdkp->zone_wp_update_buf + 64,
+                                           zno, sd_zbc_update_wp_offset_cb,
+                                           sdkp);
+       }
+       spin_unlock_bh(&sdkp->zones_wp_offset_lock);
+ 
+       scsi_device_put(sdkp->device);
+ }
+ 
+ /**
+  * sd_zbc_prepare_zone_append() - Prepare an emulated ZONE_APPEND command.
+  * @cmd: the command to setup
+  * @lba: the LBA to patch
+  * @nr_blocks: the number of LBAs to be written
+  *
+  * Called from sd_setup_read_write_cmnd() for REQ_OP_ZONE_APPEND.
+  * @sd_zbc_prepare_zone_append() handles the necessary zone wrote locking and
+  * patching of the lba for an emulated ZONE_APPEND command.
+  *
+  * In case the cached write pointer offset is %SD_ZBC_INVALID_WP_OFST it will
+  * schedule a REPORT ZONES command and return BLK_STS_IOERR.
+  */
+ blk_status_t sd_zbc_prepare_zone_append(struct scsi_cmnd *cmd, sector_t *lba,
+                                       unsigned int nr_blocks)
+ {
+       struct request *rq = cmd->request;
+       struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
+       unsigned int wp_offset, zno = blk_rq_zone_no(rq);
+       blk_status_t ret;
+ 
+       ret = sd_zbc_cmnd_checks(cmd);
+       if (ret != BLK_STS_OK)
+               return ret;
+ 
+       if (!blk_rq_zone_is_seq(rq))
+               return BLK_STS_IOERR;
+ 
+       /* Unlock of the write lock will happen in sd_zbc_complete() */
+       if (!blk_req_zone_write_trylock(rq))
+               return BLK_STS_ZONE_RESOURCE;
+ 
+       spin_lock_bh(&sdkp->zones_wp_offset_lock);
+       wp_offset = sdkp->zones_wp_offset[zno];
+       switch (wp_offset) {
+       case SD_ZBC_INVALID_WP_OFST:
+               /*
+                * We are about to schedule work to update a zone write pointer
+                * offset, which will cause the zone append command to be
+                * requeued. So make sure that the scsi device does not go away
+                * while the work is being processed.
+                */
+               if (scsi_device_get(sdkp->device)) {
+                       ret = BLK_STS_IOERR;
+                       break;
+               }
+               sdkp->zones_wp_offset[zno] = SD_ZBC_UPDATING_WP_OFST;
+               schedule_work(&sdkp->zone_wp_offset_work);
+               fallthrough;
+       case SD_ZBC_UPDATING_WP_OFST:
+               ret = BLK_STS_DEV_RESOURCE;
+               break;
+       default:
+               wp_offset = sectors_to_logical(sdkp->device, wp_offset);
+               if (wp_offset + nr_blocks > sdkp->zone_blocks) {
+                       ret = BLK_STS_IOERR;
+                       break;
+               }
+ 
+               *lba += wp_offset;
+       }
+       spin_unlock_bh(&sdkp->zones_wp_offset_lock);
+       if (ret)
+               blk_req_zone_write_unlock(rq);
+       return ret;
+ }
+ 
   /**
    * sd_zbc_setup_zone_mgmt_cmnd - Prepare a zone ZBC_OUT command. The operations
    *                    can be RESET WRITE POINTER, OPEN, CLOSE or FINISH.
@@@ -222,20 -386,14 +385,14 @@@ blk_status_t sd_zbc_setup_zone_mgmt_cmn
                                          unsigned char op, bool all)
   {
         struct request *rq = cmd->request;
-       struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
         sector_t sector = blk_rq_pos(rq);
+       struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
         sector_t block = sectors_to_logical(sdkp->device, sector);
+       blk_status_t ret;
   
-       if (!sd_is_zoned(sdkp))
-               /* Not a zoned device */
-               return BLK_STS_IOERR;
- 
-       if (sdkp->device->changed)
-               return BLK_STS_IOERR;
- 
-       if (sector & (sd_zbc_zone_sectors(sdkp) - 1))
-               /* Unaligned request */
-               return BLK_STS_IOERR;
+       ret = sd_zbc_cmnd_checks(cmd);
+       if (ret != BLK_STS_OK)
+               return ret;
   
         cmd->cmd_len = 16;
         memset(cmd->cmnd, 0, cmd->cmd_len);
@@@ -254,16 -412,105 +411,105 @@@
         return BLK_STS_OK;
   }
   
+ static bool sd_zbc_need_zone_wp_update(struct request *rq)
+ {
+       switch (req_op(rq)) {
+       case REQ_OP_ZONE_APPEND:
+       case REQ_OP_ZONE_FINISH:
+       case REQ_OP_ZONE_RESET:
+       case REQ_OP_ZONE_RESET_ALL:
+               return true;
+       case REQ_OP_WRITE:
+       case REQ_OP_WRITE_ZEROES:
+       case REQ_OP_WRITE_SAME:
+               return blk_rq_zone_is_seq(rq);
+       default:
+               return false;
+       }
+ }
+ 
+ /**
+  * sd_zbc_zone_wp_update - Update cached zone write pointer upon cmd completion
+  * @cmd: Completed command
+  * @good_bytes: Command reply bytes
+  *
+  * Called from sd_zbc_complete() to handle the update of the cached zone write
+  * pointer value in case an update is needed.
+  */
+ static unsigned int sd_zbc_zone_wp_update(struct scsi_cmnd *cmd,
+                                         unsigned int good_bytes)
+ {
+       int result = cmd->result;
+       struct request *rq = cmd->request;
+       struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
+       unsigned int zno = blk_rq_zone_no(rq);
+       enum req_opf op = req_op(rq);
+ 
+       /*
+        * If we got an error for a command that needs updating the write
+        * pointer offset cache, we must mark the zone wp offset entry as
+        * invalid to force an update from disk the next time a zone append
+        * command is issued.
+        */
+       spin_lock_bh(&sdkp->zones_wp_offset_lock);
+ 
+       if (result && op != REQ_OP_ZONE_RESET_ALL) {
+               if (op == REQ_OP_ZONE_APPEND) {
+                       /* Force complete completion (no retry) */
+                       good_bytes = 0;
+                       scsi_set_resid(cmd, blk_rq_bytes(rq));
+               }
+ 
+               /*
+                * Force an update of the zone write pointer offset on
+                * the next zone append access.
+                */
+               if (sdkp->zones_wp_offset[zno] != SD_ZBC_UPDATING_WP_OFST)
+                       sdkp->zones_wp_offset[zno] = SD_ZBC_INVALID_WP_OFST;
+               goto unlock_wp_offset;
+       }
+ 
+       switch (op) {
+       case REQ_OP_ZONE_APPEND:
+               rq->__sector += sdkp->zones_wp_offset[zno];
+               fallthrough;
+       case REQ_OP_WRITE_ZEROES:
+       case REQ_OP_WRITE_SAME:
+       case REQ_OP_WRITE:
+               if (sdkp->zones_wp_offset[zno] < sd_zbc_zone_sectors(sdkp))
+                       sdkp->zones_wp_offset[zno] +=
+                                               good_bytes >> SECTOR_SHIFT;
+               break;
+       case REQ_OP_ZONE_RESET:
+               sdkp->zones_wp_offset[zno] = 0;
+               break;
+       case REQ_OP_ZONE_FINISH:
+               sdkp->zones_wp_offset[zno] = sd_zbc_zone_sectors(sdkp);
+               break;
+       case REQ_OP_ZONE_RESET_ALL:
+               memset(sdkp->zones_wp_offset, 0,
+                      sdkp->nr_zones * sizeof(unsigned int));
+               break;
+       default:
+               break;
+       }
+ 
+ unlock_wp_offset:
+       spin_unlock_bh(&sdkp->zones_wp_offset_lock);
+ 
+       return good_bytes;
+ }
+ 
   /**
    * sd_zbc_complete - ZBC command post processing.
    * @cmd: Completed command
    * @good_bytes: Command reply bytes
    * @sshdr: command sense header
    *
-  * Called from sd_done(). Process report zones reply and handle reset zone
-  * and write commands errors.
+  * Called from sd_done() to handle zone commands errors and updates to the
+  * device queue zone write pointer offset cahce.
    */
- void sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes,
+ unsigned int sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes,
                      struct scsi_sense_hdr *sshdr)
   {
         int result = cmd->result;
@@@ -279,7 -526,13 +525,13 @@@
                  * so be quiet about the error.
                  */
                 rq->rq_flags |= RQF_QUIET;
-       }
+       } else if (sd_zbc_need_zone_wp_update(rq))
+               good_bytes = sd_zbc_zone_wp_update(cmd, good_bytes);
+ 
+       if (req_op(rq) == REQ_OP_ZONE_APPEND)
+               blk_req_zone_write_unlock(rq);
+ 
+       return good_bytes;
   }
   
   /**
@@@ -381,11 -634,67 +633,67 @@@ static int sd_zbc_check_capacity(struc
         return 0;
   }
   
+ static void sd_zbc_revalidate_zones_cb(struct gendisk *disk)
+ {
+       struct scsi_disk *sdkp = scsi_disk(disk);
+ 
+       swap(sdkp->zones_wp_offset, sdkp->rev_wp_offset);
+ }
+ 
+ static int sd_zbc_revalidate_zones(struct scsi_disk *sdkp,
+                                  u32 zone_blocks,
+                                  unsigned int nr_zones)
+ {
+       struct gendisk *disk = sdkp->disk;
+       int ret = 0;
+ 
+       /*
+        * Make sure revalidate zones are serialized to ensure exclusive
+        * updates of the scsi disk data.
+        */
+       mutex_lock(&sdkp->rev_mutex);
+ 
+       /*
+        * Revalidate the disk zones to update the device request queue zone
+        * bitmaps and the zone write pointer offset array. Do this only once
+        * the device capacity is set on the second revalidate execution for
+        * disk scan or if something changed when executing a normal revalidate.
+        */
+       if (sdkp->first_scan) {
+               sdkp->zone_blocks = zone_blocks;
+               sdkp->nr_zones = nr_zones;
+               goto unlock;
+       }
+ 
+       if (sdkp->zone_blocks == zone_blocks &&
+           sdkp->nr_zones == nr_zones &&
+           disk->queue->nr_zones == nr_zones)
+               goto unlock;
+ 
+       sdkp->rev_wp_offset = kvcalloc(nr_zones, sizeof(u32), GFP_NOIO);
+       if (!sdkp->rev_wp_offset) {
+               ret = -ENOMEM;
+               goto unlock;
+       }
+ 
+       ret = blk_revalidate_disk_zones(disk, sd_zbc_revalidate_zones_cb);
+ 
+       kvfree(sdkp->rev_wp_offset);
+       sdkp->rev_wp_offset = NULL;
+ 
+ unlock:
+       mutex_unlock(&sdkp->rev_mutex);
+ 
+       return ret;
+ }
+ 
   int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf)
   {
         struct gendisk *disk = sdkp->disk;
+       struct request_queue *q = disk->queue;
         unsigned int nr_zones;
         u32 zone_blocks = 0;
+       u32 max_append;
         int ret;
   
         if (!sd_is_zoned(sdkp))
@@@ -406,35 -715,31 +714,31 @@@
                 goto err;
   
         /* The drive satisfies the kernel restrictions: set it up */
-       blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, sdkp->disk->queue);
-       blk_queue_required_elevator_features(sdkp->disk->queue,
-                                            ELEVATOR_F_ZBD_SEQ_WRITE);
+       blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
+       blk_queue_required_elevator_features(q, ELEVATOR_F_ZBD_SEQ_WRITE);
         nr_zones = round_up(sdkp->capacity, zone_blocks) >> ilog2(zone_blocks);
   
         /* READ16/WRITE16 is mandatory for ZBC disks */
         sdkp->device->use_16_for_rw = 1;
         sdkp->device->use_10_for_rw = 0;
   
+       ret = sd_zbc_revalidate_zones(sdkp, zone_blocks, nr_zones);
+       if (ret)
+               goto err;
+ 
         /*
-        * Revalidate the disk zone bitmaps once the block device capacity is
-        * set on the second revalidate execution during disk scan and if
-        * something changed when executing a normal revalidate.
+        * On the first scan 'chunk_sectors' isn't setup yet, so calling
+        * blk_queue_max_zone_append_sectors() will result in a WARN(). Defer
+        * this setting to the second scan.
          */
-       if (sdkp->first_scan) {
-               sdkp->zone_blocks = zone_blocks;
-               sdkp->nr_zones = nr_zones;
+       if (sdkp->first_scan)
                 return 0;
-       }
   
-       if (sdkp->zone_blocks != zone_blocks ||
-           sdkp->nr_zones != nr_zones ||
-           disk->queue->nr_zones != nr_zones) {
-               ret = blk_revalidate_disk_zones(disk);
-               if (ret != 0)
-                       goto err;
-               sdkp->zone_blocks = zone_blocks;
-               sdkp->nr_zones = nr_zones;
-       }
+       max_append = min_t(u32, logical_to_sectors(sdkp->device, zone_blocks),
+                          q->limits.max_segments << (PAGE_SHIFT - 9));
+       max_append = min_t(u32, max_append, queue_max_hw_sectors(q));
+ 
+       blk_queue_max_zone_append_sectors(q, max_append);
   
         return 0;
   
@@@ -460,3 -765,28 +764,28 @@@ void sd_zbc_print_zones(struct scsi_dis
                           sdkp->nr_zones,
                           sdkp->zone_blocks);
   }
+ 
+ int sd_zbc_init_disk(struct scsi_disk *sdkp)
+ {
+       if (!sd_is_zoned(sdkp))
+               return 0;
+ 
+       sdkp->zones_wp_offset = NULL;
+       spin_lock_init(&sdkp->zones_wp_offset_lock);
+       sdkp->rev_wp_offset = NULL;
+       mutex_init(&sdkp->rev_mutex);
+       INIT_WORK(&sdkp->zone_wp_offset_work, sd_zbc_update_wp_offset_workfn);
+       sdkp->zone_wp_update_buf = kzalloc(SD_BUF_SIZE, GFP_KERNEL);
+       if (!sdkp->zone_wp_update_buf)
+               return -ENOMEM;
+ 
+       return 0;
+ }
+ 
+ void sd_zbc_release_disk(struct scsi_disk *sdkp)
+ {
+       kvfree(sdkp->zones_wp_offset);
+       sdkp->zones_wp_offset = NULL;
+       kfree(sdkp->zone_wp_update_buf);
+       sdkp->zone_wp_update_buf = NULL;
+ }
diff --combined fs/block_dev.c

index 1e3030dd43ba8f45e7306f8455182b1fc1202618,d1e08bba925a49c0f96320a4b0fda7b8d604d072..632538d6f1dc6623ace5ffb1ae860b1b45700d77
--- 1/fs/block_dev.c
--- 2/fs/block_dev.c
+++ b/fs/block_dev.c
@@@ -255,7 -255,7 +255,7 @@@ __blkdev_direct_IO_simple(struct kiocb 
                         break;
                 if (!(iocb->ki_flags & IOCB_HIPRI) ||
                     !blk_poll(bdev_get_queue(bdev), qc, true))
-                       io_schedule();
+                       blk_io_schedule();
         }
         __set_current_state(TASK_RUNNING);
   
@@@ -449,7 -449,7 +449,7 @@@ __blkdev_direct_IO(struct kiocb *iocb, 
   
                 if (!(iocb->ki_flags & IOCB_HIPRI) ||
                     !blk_poll(bdev_get_queue(bdev), qc, true))
-                       io_schedule();
+                       blk_io_schedule();
         }
         __set_current_state(TASK_RUNNING);
   
@@@ -614,9 -614,10 +614,9 @@@ static int blkdev_readpage(struct file 
         return block_read_full_page(page, blkdev_get_block);
   }
   
- -static int blkdev_readpages(struct file *file, struct address_space *mapping,
- -                      struct list_head *pages, unsigned nr_pages)
+ +static void blkdev_readahead(struct readahead_control *rac)
   {
- -      return mpage_readpages(mapping, pages, nr_pages, blkdev_get_block);
+ +      mpage_readahead(rac, blkdev_get_block);
   }
   
   static int blkdev_write_begin(struct file *file, struct address_space *mapping,
@@@ -671,7 -672,7 +671,7 @@@ int blkdev_fsync(struct file *filp, lof
          * i_mutex and doing so causes performance issues with concurrent
          * O_SYNC writers to a block device.
          */
-       error = blkdev_issue_flush(bdev, GFP_KERNEL, NULL);
+       error = blkdev_issue_flush(bdev, GFP_KERNEL);
         if (error == -EOPNOTSUPP)
                 error = 0;
   
@@@ -712,7 -713,6 +712,6 @@@ int bdev_read_page(struct block_device 
         blk_queue_exit(bdev->bd_queue);
         return result;
   }
- EXPORT_SYMBOL_GPL(bdev_read_page);
   
   /**
    * bdev_write_page() - Start writing a page to a block device
@@@ -757,7 -757,6 +756,6 @@@ int bdev_write_page(struct block_devic
         blk_queue_exit(bdev->bd_queue);
         return result;
   }
- EXPORT_SYMBOL_GPL(bdev_write_page);
   
   /*
    * pseudo-fs
@@@ -881,21 -880,6 +879,6 @@@ static int bdev_set(struct inode *inode
   
   static LIST_HEAD(all_bdevs);
   
- /*
-  * If there is a bdev inode for this device, unhash it so that it gets evicted
-  * as soon as last inode reference is dropped.
-  */
- void bdev_unhash_inode(dev_t dev)
- {
-       struct inode *inode;
- 
-       inode = ilookup5(blockdev_superblock, hash(dev), bdev_test, &dev);
-       if (inode) {
-               remove_inode_hash(inode);
-               iput(inode);
-       }
- }
- 
   struct block_device *bdget(dev_t dev)
   {
         struct block_device *bdev;
@@@ -1515,7 -1499,7 +1498,7 @@@ int bdev_disk_changed(struct block_devi
         lockdep_assert_held(&bdev->bd_mutex);
   
   rescan:
-       ret = blk_drop_partitions(disk, bdev);
+       ret = blk_drop_partitions(bdev);
         if (ret)
                 return ret;
   
@@@ -2022,7 -2006,8 +2005,7 @@@ ssize_t blkdev_write_iter(struct kiocb 
         if (bdev_read_only(I_BDEV(bd_inode)))
                 return -EPERM;
   
- -      /* uswsusp needs write permission to the swap */
- -      if (IS_SWAPFILE(bd_inode) && !hibernation_available())
+ +      if (IS_SWAPFILE(bd_inode) && !is_hibernate_resume_dev(bd_inode))
                 return -ETXTBSY;
   
         if (!iov_iter_count(from))
@@@ -2083,7 -2068,7 +2066,7 @@@ static int blkdev_writepages(struct add
   
   static const struct address_space_operations def_blk_aops = {
         .readpage       = blkdev_readpage,
- -      .readpages      = blkdev_readpages,
+ +      .readahead      = blkdev_readahead,
         .writepage      = blkdev_writepage,
         .write_begin    = blkdev_write_begin,
         .write_end      = blkdev_write_end,
diff --combined fs/ext4/super.c

index 4a3d21972011bb907a35483ba669b283622faf46,629a56b5c859ffb3c64d7ff169a258c3e5ad6018..9824cd8203e8f9058afe49cff12a4459d89e11b0
--- 1/fs/ext4/super.c
--- 2/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@@ -1106,7 -1106,6 +1106,7 @@@ static void ext4_put_super(struct super
                 crypto_free_shash(sbi->s_chksum_driver);
         kfree(sbi->s_blockgroup_lock);
         fs_put_dax(sbi->s_daxdev);
+ +      fscrypt_free_dummy_context(&sbi->s_dummy_enc_ctx);
   #ifdef CONFIG_UNICODE
         utf8_unload(sbi->s_encoding);
   #endif
@@@ -1390,10 -1389,9 +1390,10 @@@ retry
         return res;
   }
   
- -static bool ext4_dummy_context(struct inode *inode)
+ +static const union fscrypt_context *
+ +ext4_get_dummy_context(struct super_block *sb)
   {
- -      return DUMMY_ENCRYPTION_ENABLED(EXT4_SB(inode->i_sb));
+ +      return EXT4_SB(sb)->s_dummy_enc_ctx.ctx;
   }
   
   static bool ext4_has_stable_inodes(struct super_block *sb)
@@@ -1412,7 -1410,7 +1412,7 @@@ static const struct fscrypt_operations 
         .key_prefix             = "ext4:",
         .get_context            = ext4_get_context,
         .set_context            = ext4_set_context,
- -      .dummy_context          = ext4_dummy_context,
+ +      .get_dummy_context      = ext4_get_dummy_context,
         .empty_dir              = ext4_empty_dir,
         .max_namelen            = EXT4_NAME_LEN,
         .has_stable_inodes      = ext4_has_stable_inodes,
@@@ -1607,7 -1605,6 +1607,7 @@@ static const match_table_t tokens = 
         {Opt_init_itable, "init_itable"},
         {Opt_noinit_itable, "noinit_itable"},
         {Opt_max_dir_size_kb, "max_dir_size_kb=%u"},
+ +      {Opt_test_dummy_encryption, "test_dummy_encryption=%s"},
         {Opt_test_dummy_encryption, "test_dummy_encryption"},
         {Opt_nombcache, "nombcache"},
         {Opt_nombcache, "no_mbcache"},  /* for backward compatibility */
@@@ -1819,7 -1816,7 +1819,7 @@@ static const struct mount_opts 
         {Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT},
         {Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT},
         {Opt_max_dir_size_kb, 0, MOPT_GTE0},
- -      {Opt_test_dummy_encryption, 0, MOPT_GTE0},
+ +      {Opt_test_dummy_encryption, 0, MOPT_STRING},
         {Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET},
         {Opt_err, 0, 0}
   };
@@@ -1854,48 -1851,6 +1854,48 @@@ static int ext4_sb_read_encoding(const 
   }
   #endif
   
+ +static int ext4_set_test_dummy_encryption(struct super_block *sb,
+ +                                        const char *opt,
+ +                                        const substring_t *arg,
+ +                                        bool is_remount)
+ +{
+ +#ifdef CONFIG_FS_ENCRYPTION
+ +      struct ext4_sb_info *sbi = EXT4_SB(sb);
+ +      int err;
+ +
+ +      /*
+ +       * This mount option is just for testing, and it's not worthwhile to
+ +       * implement the extra complexity (e.g. RCU protection) that would be
+ +       * needed to allow it to be set or changed during remount.  We do allow
+ +       * it to be specified during remount, but only if there is no change.
+ +       */
+ +      if (is_remount && !sbi->s_dummy_enc_ctx.ctx) {
+ +              ext4_msg(sb, KERN_WARNING,
+ +                       "Can't set test_dummy_encryption on remount");
+ +              return -1;
+ +      }
+ +      err = fscrypt_set_test_dummy_encryption(sb, arg, &sbi->s_dummy_enc_ctx);
+ +      if (err) {
+ +              if (err == -EEXIST)
+ +                      ext4_msg(sb, KERN_WARNING,
+ +                               "Can't change test_dummy_encryption on remount");
+ +              else if (err == -EINVAL)
+ +                      ext4_msg(sb, KERN_WARNING,
+ +                               "Value of option \"%s\" is unrecognized", opt);
+ +              else
+ +                      ext4_msg(sb, KERN_WARNING,
+ +                               "Error processing option \"%s\" [%d]",
+ +                               opt, err);
+ +              return -1;
+ +      }
+ +      ext4_msg(sb, KERN_WARNING, "Test dummy encryption mode enabled");
+ +#else
+ +      ext4_msg(sb, KERN_WARNING,
+ +               "Test dummy encryption mount option ignored");
+ +#endif
+ +      return 1;
+ +}
+ +
   static int handle_mount_opt(struct super_block *sb, char *opt, int token,
                             substring_t *args, unsigned long *journal_devnum,
                             unsigned int *journal_ioprio, int is_remount)
@@@ -2092,8 -2047,14 +2092,8 @@@
                 *journal_ioprio =
                         IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, arg);
         } else if (token == Opt_test_dummy_encryption) {
- -#ifdef CONFIG_FS_ENCRYPTION
- -              sbi->s_mount_flags |= EXT4_MF_TEST_DUMMY_ENCRYPTION;
- -              ext4_msg(sb, KERN_WARNING,
- -                       "Test dummy encryption mode enabled");
- -#else
- -              ext4_msg(sb, KERN_WARNING,
- -                       "Test dummy encryption mount option ignored");
- -#endif
+ +              return ext4_set_test_dummy_encryption(sb, opt, &args[0],
+ +                                                    is_remount);
         } else if (m->flags & MOPT_DATAJ) {
                 if (is_remount) {
                         if (!sbi->s_journal)
@@@ -2350,8 -2311,8 +2350,8 @@@ static int _ext4_show_options(struct se
                 SEQ_OPTS_PRINT("max_dir_size_kb=%u", sbi->s_max_dir_size_kb);
         if (test_opt(sb, DATA_ERR_ABORT))
                 SEQ_OPTS_PUTS("data_err=abort");
- -      if (DUMMY_ENCRYPTION_ENABLED(sbi))
- -              SEQ_OPTS_PUTS("test_dummy_encryption");
+ +
+ +      fscrypt_show_test_dummy_encryption(seq, sep, sb);
   
         ext4_show_quota_options(seq, sb);
         return 0;
@@@ -4819,7 -4780,6 +4819,7 @@@ failed_mount
         for (i = 0; i < EXT4_MAXQUOTAS; i++)
                 kfree(get_qf_name(sb, sbi, i));
   #endif
+ +      fscrypt_free_dummy_context(&sbi->s_dummy_enc_ctx);
         ext4_blkdev_remove(sbi);
         brelse(bh);
   out_fail:
@@@ -5296,7 -5256,7 +5296,7 @@@ static int ext4_sync_fs(struct super_bl
                 needs_barrier = true;
         if (needs_barrier) {
                 int err;
-               err = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL);
+               err = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL);
                 if (!ret)
                         ret = err;
         }
diff --combined fs/fs-writeback.c

index c5bdf46e3b4bc743002e5261a6182d78193e7c6f,d85323607b49f3e95c9a3d673c7494612fc30cb6..a750381d554a788b9e359c6cd64bfdd30d2801fd
--- 1/fs/fs-writeback.c
--- 2/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@@ -1070,6 -1070,7 +1070,6 @@@ static void bdi_split_work_to_wbs(struc
   static unsigned long get_nr_dirty_pages(void)
   {
         return global_node_page_state(NR_FILE_DIRTY) +
- -              global_node_page_state(NR_UNSTABLE_NFS) +
                 get_nr_dirty_inodes();
   }
   
@@@ -2319,7 -2320,7 +2319,7 @@@ void __mark_inode_dirty(struct inode *i
   
                         WARN(bdi_cap_writeback_dirty(wb->bdi) &&
                              !test_bit(WB_registered, &wb->state),
-                            "bdi-%s not registered\n", wb->bdi->name);
+                            "bdi-%s not registered\n", bdi_dev_name(wb->bdi));
   
                         inode->dirtied_when = jiffies;
                         if (dirtytime)
diff --combined fs/isofs/inode.c

index 95b1f377ad090868c5b633937f3776bd62285cdd,276107cdaaf13878995cf802c9e1515b34afaef5..d634561f871a569847c3c6d4ed6159142c6e889d
--- 1/fs/isofs/inode.c
--- 2/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@@ -544,43 -544,41 +544,41 @@@ static int isofs_show_options(struct se
   
   static unsigned int isofs_get_last_session(struct super_block *sb, s32 session)
   {
-       struct cdrom_multisession ms_info;
-       unsigned int vol_desc_start;
-       struct block_device *bdev = sb->s_bdev;
-       int i;
+       struct cdrom_device_info *cdi = disk_to_cdi(sb->s_bdev->bd_disk);
+       unsigned int vol_desc_start = 0;
   
-       vol_desc_start=0;
-       ms_info.addr_format=CDROM_LBA;
         if (session > 0) {
-               struct cdrom_tocentry Te;
-               Te.cdte_track=session;
-               Te.cdte_format=CDROM_LBA;
-               i = ioctl_by_bdev(bdev, CDROMREADTOCENTRY, (unsigned long) &Te);
-               if (!i) {
+               struct cdrom_tocentry te;
+ 
+               if (!cdi)
+                       return 0;
+ 
+               te.cdte_track = session;
+               te.cdte_format = CDROM_LBA;
+               if (cdrom_read_tocentry(cdi, &te) == 0) {
                         printk(KERN_DEBUG "ISOFS: Session %d start %d type %d\n",
-                               session, Te.cdte_addr.lba,
-                               Te.cdte_ctrl&CDROM_DATA_TRACK);
-                       if ((Te.cdte_ctrl&CDROM_DATA_TRACK) == 4)
-                               return Te.cdte_addr.lba;
+                               session, te.cdte_addr.lba,
+                               te.cdte_ctrl & CDROM_DATA_TRACK);
+                       if ((te.cdte_ctrl & CDROM_DATA_TRACK) == 4)
+                               return te.cdte_addr.lba;
                 }
   
                 printk(KERN_ERR "ISOFS: Invalid session number or type of track\n");
         }
-       i = ioctl_by_bdev(bdev, CDROMMULTISESSION, (unsigned long) &ms_info);
-       if (session > 0)
-               printk(KERN_ERR "ISOFS: Invalid session number\n");
- #if 0
-       printk(KERN_DEBUG "isofs.inode: CDROMMULTISESSION: rc=%d\n",i);
-       if (i==0) {
-               printk(KERN_DEBUG "isofs.inode: XA disk: %s\n",ms_info.xa_flag?"yes":"no");
-               printk(KERN_DEBUG "isofs.inode: vol_desc_start = %d\n", ms_info.addr.lba);
-       }
- #endif
-       if (i==0)
+ 
+       if (cdi) {
+               struct cdrom_multisession ms_info;
+ 
+               ms_info.addr_format = CDROM_LBA;
+               if (cdrom_multisession(cdi, &ms_info) == 0) {
   #if WE_OBEY_THE_WRITTEN_STANDARDS
-               if (ms_info.xa_flag) /* necessary for a valid ms_info.addr */
+                       /* necessary for a valid ms_info.addr */
+                       if (ms_info.xa_flag)
   #endif
-                       vol_desc_start=ms_info.addr.lba;
+                               vol_desc_start = ms_info.addr.lba;
+               }
+       }
+ 
         return vol_desc_start;
   }
   
@@@ -1185,9 -1183,10 +1183,9 @@@ static int isofs_readpage(struct file *
         return mpage_readpage(page, isofs_get_block);
   }
   
- -static int isofs_readpages(struct file *file, struct address_space *mapping,
- -                      struct list_head *pages, unsigned nr_pages)
+ +static void isofs_readahead(struct readahead_control *rac)
   {
- -      return mpage_readpages(mapping, pages, nr_pages, isofs_get_block);
+ +      mpage_readahead(rac, isofs_get_block);
   }
   
   static sector_t _isofs_bmap(struct address_space *mapping, sector_t block)
@@@ -1197,7 -1196,7 +1195,7 @@@
   
   static const struct address_space_operations isofs_aops = {
         .readpage = isofs_readpage,
- -      .readpages = isofs_readpages,
+ +      .readahead = isofs_readahead,
         .bmap = _isofs_bmap
   };
   
diff --combined fs/super.c

index a288cd60d2aed3f58f442a2a768c8ac785c015ec,4991f441988e079ae58a4bea2fa53e164d4b9182..bf3b7685b52a9d8e3d962880cecad989e636e736
--- 1/fs/super.c
--- 2/fs/super.c
+++ b/fs/super.c
@@@ -1302,8 -1302,8 +1302,8 @@@ int get_tree_bdev(struct fs_context *fc
         mutex_lock(&bdev->bd_fsfreeze_mutex);
         if (bdev->bd_fsfreeze_count > 0) {
                 mutex_unlock(&bdev->bd_fsfreeze_mutex);
- -              blkdev_put(bdev, mode);
                 warnf(fc, "%pg: Can't mount, blockdev is frozen", bdev);
+ +              blkdev_put(bdev, mode);
                 return -EBUSY;
         }
   
@@@ -1598,12 -1598,10 +1598,10 @@@ int super_setup_bdi_name(struct super_b
         int err;
         va_list args;
   
-       bdi = bdi_alloc(GFP_KERNEL);
+       bdi = bdi_alloc(NUMA_NO_NODE);
         if (!bdi)
                 return -ENOMEM;
   
-       bdi->name = sb->s_type->name;
- 
         va_start(args, fmt);
         err = bdi_register_va(bdi, fmt, args);
         va_end(args);
diff --combined fs/zonefs/super.c

index dba874a61fc5c3859b0b29f6f92f3643d0bdccf6,25afcf55aa41e8d468da688caee9524f82b8bc5f..d79b821ed1c780e2f2bee74b6a2cab73dddae2fb
--- 1/fs/zonefs/super.c
--- 2/fs/zonefs/super.c
+++ b/fs/zonefs/super.c
@@@ -20,6 -20,7 +20,7 @@@
   #include <linux/mman.h>
   #include <linux/sched/mm.h>
   #include <linux/crc32.h>
+ #include <linux/task_io_accounting_ops.h>
   
   #include "zonefs.h"
   
@@@ -78,9 -79,10 +79,9 @@@ static int zonefs_readpage(struct file 
         return iomap_readpage(page, &zonefs_iomap_ops);
   }
   
- -static int zonefs_readpages(struct file *unused, struct address_space *mapping,
- -                          struct list_head *pages, unsigned int nr_pages)
+ +static void zonefs_readahead(struct readahead_control *rac)
   {
- -      return iomap_readpages(mapping, pages, nr_pages, &zonefs_iomap_ops);
+ +      iomap_readahead(rac, &zonefs_iomap_ops);
   }
   
   /*
@@@ -127,7 -129,7 +128,7 @@@ static int zonefs_writepages(struct add
   
   static const struct address_space_operations zonefs_file_aops = {
         .readpage               = zonefs_readpage,
- -      .readpages              = zonefs_readpages,
+ +      .readahead              = zonefs_readahead,
         .writepage              = zonefs_writepage,
         .writepages             = zonefs_writepages,
         .set_page_dirty         = iomap_set_page_dirty,
@@@ -477,7 -479,7 +478,7 @@@ static int zonefs_file_fsync(struct fil
         if (ZONEFS_I(inode)->i_ztype == ZONEFS_ZTYPE_CNV)
                 ret = file_write_and_wait_range(file, start, end);
         if (!ret)
-               ret = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
+               ret = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL);
   
         if (ret)
                 zonefs_io_error(inode, true);
@@@ -595,6 -597,61 +596,61 @@@ static const struct iomap_dio_ops zonef
         .end_io                 = zonefs_file_write_dio_end_io,
   };
   
+ static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from)
+ {
+       struct inode *inode = file_inode(iocb->ki_filp);
+       struct zonefs_inode_info *zi = ZONEFS_I(inode);
+       struct block_device *bdev = inode->i_sb->s_bdev;
+       unsigned int max;
+       struct bio *bio;
+       ssize_t size;
+       int nr_pages;
+       ssize_t ret;
+ 
+       nr_pages = iov_iter_npages(from, BIO_MAX_PAGES);
+       if (!nr_pages)
+               return 0;
+ 
+       max = queue_max_zone_append_sectors(bdev_get_queue(bdev));
+       max = ALIGN_DOWN(max << SECTOR_SHIFT, inode->i_sb->s_blocksize);
+       iov_iter_truncate(from, max);
+ 
+       bio = bio_alloc_bioset(GFP_NOFS, nr_pages, &fs_bio_set);
+       if (!bio)
+               return -ENOMEM;
+ 
+       bio_set_dev(bio, bdev);
+       bio->bi_iter.bi_sector = zi->i_zsector;
+       bio->bi_write_hint = iocb->ki_hint;
+       bio->bi_ioprio = iocb->ki_ioprio;
+       bio->bi_opf = REQ_OP_ZONE_APPEND | REQ_SYNC | REQ_IDLE;
+       if (iocb->ki_flags & IOCB_DSYNC)
+               bio->bi_opf |= REQ_FUA;
+ 
+       ret = bio_iov_iter_get_pages(bio, from);
+       if (unlikely(ret)) {
+               bio_io_error(bio);
+               return ret;
+       }
+       size = bio->bi_iter.bi_size;
+       task_io_account_write(ret);
+ 
+       if (iocb->ki_flags & IOCB_HIPRI)
+               bio_set_polled(bio, iocb);
+ 
+       ret = submit_bio_wait(bio);
+ 
+       bio_put(bio);
+ 
+       zonefs_file_write_dio_end_io(iocb, size, ret, 0);
+       if (ret >= 0) {
+               iocb->ki_pos += size;
+               return size;
+       }
+ 
+       return ret;
+ }
+ 
   /*
    * Handle direct writes. For sequential zone files, this is the only possible
    * write path. For these files, check that the user is issuing writes
@@@ -610,6 -667,8 +666,8 @@@ static ssize_t zonefs_file_dio_write(st
         struct inode *inode = file_inode(iocb->ki_filp);
         struct zonefs_inode_info *zi = ZONEFS_I(inode);
         struct super_block *sb = inode->i_sb;
+       bool sync = is_sync_kiocb(iocb);
+       bool append = false;
         size_t count;
         ssize_t ret;
   
@@@ -618,7 -677,7 +676,7 @@@
          * as this can cause write reordering (e.g. the first aio gets EAGAIN
          * on the inode lock but the second goes through but is now unaligned).
          */
-       if (zi->i_ztype == ZONEFS_ZTYPE_SEQ && !is_sync_kiocb(iocb) &&
+       if (zi->i_ztype == ZONEFS_ZTYPE_SEQ && !sync &&
             (iocb->ki_flags & IOCB_NOWAIT))
                 return -EOPNOTSUPP;
   
@@@ -642,16 -701,22 +700,22 @@@
         }
   
         /* Enforce sequential writes (append only) in sequential zones */
-       mutex_lock(&zi->i_truncate_mutex);
-       if (zi->i_ztype == ZONEFS_ZTYPE_SEQ && iocb->ki_pos != zi->i_wpoffset) {
+       if (zi->i_ztype == ZONEFS_ZTYPE_SEQ) {
+               mutex_lock(&zi->i_truncate_mutex);
+               if (iocb->ki_pos != zi->i_wpoffset) {
+                       mutex_unlock(&zi->i_truncate_mutex);
+                       ret = -EINVAL;
+                       goto inode_unlock;
+               }
                 mutex_unlock(&zi->i_truncate_mutex);
-               ret = -EINVAL;
-               goto inode_unlock;
+               append = sync;
         }
-       mutex_unlock(&zi->i_truncate_mutex);
   
-       ret = iomap_dio_rw(iocb, from, &zonefs_iomap_ops,
-                          &zonefs_write_dio_ops, is_sync_kiocb(iocb));
+       if (append)
+               ret = zonefs_file_dio_append(iocb, from);
+       else
+               ret = iomap_dio_rw(iocb, from, &zonefs_iomap_ops,
+                                  &zonefs_write_dio_ops, sync);
         if (zi->i_ztype == ZONEFS_ZTYPE_SEQ &&
             (ret > 0 || ret == -EIOCBQUEUED)) {
                 if (ret > 0)
diff --combined include/linux/fs.h

index ef6acd2062eb272cbdf963aa136e401f82a9021c,1a95e5158811334332f6c25499c40d727ebb507e..4fdd148dd7637eeee6ff1fd3a47276ebb9b49778
--- 1/include/linux/fs.h
--- 2/include/linux/fs.h
+++ b/include/linux/fs.h
@@@ -292,7 -292,6 +292,7 @@@ enum positive_aop_returns 
   struct page;
   struct address_space;
   struct writeback_control;
+ +struct readahead_control;
   
   /*
    * Write life time hint values.
@@@ -376,7 -375,6 +376,7 @@@ struct address_space_operations 
          */
         int (*readpages)(struct file *filp, struct address_space *mapping,
                         struct list_head *pages, unsigned nr_pages);
+ +      void (*readahead)(struct readahead_control *);
   
         int (*write_begin)(struct file *, struct address_space *mapping,
                                 loff_t pos, unsigned len, unsigned flags,
@@@ -978,7 -976,6 +978,7 @@@ struct file 
   #endif /* #ifdef CONFIG_EPOLL */
         struct address_space    *f_mapping;
         errseq_t                f_wb_err;
+ +      errseq_t                f_sb_err; /* for syncfs */
   } __randomize_layout
     __attribute__((aligned(4)));        /* lest something weird decides that 2 is OK */
   
@@@ -986,7 -983,7 +986,7 @@@ struct file_handle 
         __u32 handle_bytes;
         int handle_type;
         /* file identifier */
- -      unsigned char f_handle[0];
+ +      unsigned char f_handle[];
   };
   
   static inline struct file *get_file(struct file *f)
@@@ -1523,9 -1520,6 +1523,9 @@@ struct super_block 
         /* Being remounted read-only */
         int s_readonly_remount;
   
+ +      /* per-sb errseq_t for reporting writeback errors via syncfs */
+ +      errseq_t s_wb_err;
+ +
         /* AIO completions deferred from interrupt context */
         struct workqueue_struct *s_dio_done_wq;
         struct hlist_head s_pins;
@@@ -1727,11 -1721,7 +1727,11 @@@ extern int vfs_link(struct dentry *, st
   extern int vfs_rmdir(struct inode *, struct dentry *);
   extern int vfs_unlink(struct inode *, struct dentry *, struct inode **);
   extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **, unsigned int);
- -extern int vfs_whiteout(struct inode *, struct dentry *);
+ +
+ +static inline int vfs_whiteout(struct inode *dir, struct dentry *dentry)
+ +{
+ +      return vfs_mknod(dir, dentry, S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV);
+ +}
   
   extern struct dentry *vfs_tmpfile(struct dentry *dentry, umode_t mode,
                                   int open_flag);
@@@ -2591,7 -2581,6 +2591,6 @@@ extern struct kmem_cache *names_cachep
   #ifdef CONFIG_BLOCK
   extern int register_blkdev(unsigned int, const char *);
   extern void unregister_blkdev(unsigned int, const char *);
- extern void bdev_unhash_inode(dev_t dev);
   extern struct block_device *bdget(dev_t);
   extern struct block_device *bdgrab(struct block_device *bdev);
   extern void bd_set_size(struct block_device *, loff_t size);
@@@ -2733,7 -2722,6 +2732,6 @@@ extern bool is_bad_inode(struct inode *
   extern int revalidate_disk(struct gendisk *);
   extern int check_disk_change(struct block_device *);
   extern int __invalidate_device(struct block_device *, bool);
- extern int invalidate_partition(struct gendisk *, int);
   #endif
   unsigned long invalidate_mapping_pages(struct address_space *mapping,
                                         pgoff_t start, pgoff_t end);
@@@ -2837,18 -2825,6 +2835,18 @@@ static inline errseq_t filemap_sample_w
         return errseq_sample(&mapping->wb_err);
   }
   
+ +/**
+ + * file_sample_sb_err - sample the current errseq_t to test for later errors
+ + * @mapping: mapping to be sampled
+ + *
+ + * Grab the most current superblock-level errseq_t value for the given
+ + * struct file.
+ + */
+ +static inline errseq_t file_sample_sb_err(struct file *file)
+ +{
+ +      return errseq_sample(&file->f_path.dentry->d_sb->s_wb_err);
+ +}
+ +
   static inline int filemap_nr_thps(struct address_space *mapping)
   {
   #ifdef CONFIG_READ_ONLY_THP_FOR_FS
author	Linus Torvalds <[email protected]>
	Tue, 2 Jun 2020 22:29:19 +0000 (15:29 -0700)
committer	Linus Torvalds <[email protected]>
	Tue, 2 Jun 2020 22:29:19 +0000 (15:29 -0700)
		1	2
block/blk-core.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/base/core.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/block/loop.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/block/null_blk_main.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/block/null_blk_zoned.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/mtd/mtdcore.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/scsi/scsi_lib.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/scsi/sd_zbc.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/block_dev.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/super.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/fs-writeback.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/isofs/inode.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/super.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/zonefs/super.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/fs.h	patch \|	diff1 \|	diff2 \|	blob \| history