]> Git Repo - linux.git/commitdiff
Merge tag 'for-5.8/block-2020-06-01' of git://git.kernel.dk/linux-block
authorLinus Torvalds <[email protected]>
Tue, 2 Jun 2020 22:29:19 +0000 (15:29 -0700)
committerLinus Torvalds <[email protected]>
Tue, 2 Jun 2020 22:29:19 +0000 (15:29 -0700)
Pull block updates from Jens Axboe:
 "Core block changes that have been queued up for this release:

   - Remove dead blk-throttle and blk-wbt code (Guoqing)

   - Include pid in blktrace note traces (Jan)

   - Don't spew I/O errors on wouldblock termination (me)

   - Zone append addition (Johannes, Keith, Damien)

   - IO accounting improvements (Konstantin, Christoph)

   - blk-mq hardware map update improvements (Ming)

   - Scheduler dispatch improvement (Salman)

   - Inline block encryption support (Satya)

   - Request map fixes and improvements (Weiping)

   - blk-iocost tweaks (Tejun)

   - Fix for timeout failing with error injection (Keith)

   - Queue re-run fixes (Douglas)

   - CPU hotplug improvements (Christoph)

   - Queue entry/exit improvements (Christoph)

   - Move DMA drain handling to the few drivers that use it (Christoph)

   - Partition handling cleanups (Christoph)"

* tag 'for-5.8/block-2020-06-01' of git://git.kernel.dk/linux-block: (127 commits)
  block: mark bio_wouldblock_error() bio with BIO_QUIET
  blk-wbt: rename __wbt_update_limits to wbt_update_limits
  blk-wbt: remove wbt_update_limits
  blk-throttle: remove tg_drain_bios
  blk-throttle: remove blk_throtl_drain
  null_blk: force complete for timeout request
  blk-mq: drain I/O when all CPUs in a hctx are offline
  blk-mq: add blk_mq_all_tag_iter
  blk-mq: open code __blk_mq_alloc_request in blk_mq_alloc_request_hctx
  blk-mq: use BLK_MQ_NO_TAG in more places
  blk-mq: rename BLK_MQ_TAG_FAIL to BLK_MQ_NO_TAG
  blk-mq: move more request initialization to blk_mq_rq_ctx_init
  blk-mq: simplify the blk_mq_get_request calling convention
  blk-mq: remove the bio argument to ->prepare_request
  nvme: force complete cancelled requests
  blk-mq: blk-mq: provide forced completion method
  block: fix a warning when blkdev.h is included for !CONFIG_BLOCK builds
  block: blk-crypto-fallback: remove redundant initialization of variable err
  block: reduce part_stat_lock() scope
  block: use __this_cpu_add() instead of access by smp_processor_id()
  ...

15 files changed:
1  2 
block/blk-core.c
drivers/base/core.c
drivers/block/loop.c
drivers/block/null_blk_main.c
drivers/block/null_blk_zoned.c
drivers/mtd/mtdcore.c
drivers/scsi/scsi_lib.c
drivers/scsi/sd_zbc.c
fs/block_dev.c
fs/ext4/super.c
fs/fs-writeback.c
fs/isofs/inode.c
fs/super.c
fs/zonefs/super.c
include/linux/fs.h

diff --combined block/blk-core.c
index 38d7b1f160673405b1ac9c57a459d1dd6f0ceb11,a01fb2b508f0efa042f86085092b51cb08741e7e..03252af8c82c82c5580f7f56cff092c75c441bf0
@@@ -20,7 -20,6 +20,7 @@@
  #include <linux/blk-mq.h>
  #include <linux/highmem.h>
  #include <linux/mm.h>
 +#include <linux/pagemap.h>
  #include <linux/kernel_stat.h>
  #include <linux/string.h>
  #include <linux/init.h>
@@@ -39,6 -38,8 +39,8 @@@
  #include <linux/debugfs.h>
  #include <linux/bpf.h>
  #include <linux/psi.h>
+ #include <linux/sched/sysctl.h>
+ #include <linux/blk-crypto.h>
  
  #define CREATE_TRACE_POINTS
  #include <trace/events/block.h>
@@@ -121,6 -122,7 +123,7 @@@ void blk_rq_init(struct request_queue *
        rq->start_time_ns = ktime_get_ns();
        rq->part = NULL;
        refcount_set(&rq->ref, 1);
+       blk_crypto_rq_set_defaults(rq);
  }
  EXPORT_SYMBOL(blk_rq_init);
  
@@@ -136,6 -138,7 +139,7 @@@ static const char *const blk_op_name[] 
        REQ_OP_NAME(ZONE_OPEN),
        REQ_OP_NAME(ZONE_CLOSE),
        REQ_OP_NAME(ZONE_FINISH),
+       REQ_OP_NAME(ZONE_APPEND),
        REQ_OP_NAME(WRITE_SAME),
        REQ_OP_NAME(WRITE_ZEROES),
        REQ_OP_NAME(SCSI_IN),
@@@ -241,6 -244,17 +245,17 @@@ static void req_bio_endio(struct reques
  
        bio_advance(bio, nbytes);
  
+       if (req_op(rq) == REQ_OP_ZONE_APPEND && error == BLK_STS_OK) {
+               /*
+                * Partial zone append completions cannot be supported as the
+                * BIO fragments may end up not being written sequentially.
+                */
+               if (bio->bi_iter.bi_size)
+                       bio->bi_status = BLK_STS_IOERR;
+               else
+                       bio->bi_iter.bi_sector = rq->__sector;
+       }
        /* don't actually finish bio if it's part of flush sequence */
        if (bio->bi_iter.bi_size == 0 && !(rq->rq_flags & RQF_FLUSH_SEQ))
                bio_endio(bio);
@@@ -441,6 -455,23 +456,23 @@@ int blk_queue_enter(struct request_queu
        }
  }
  
+ static inline int bio_queue_enter(struct bio *bio)
+ {
+       struct request_queue *q = bio->bi_disk->queue;
+       bool nowait = bio->bi_opf & REQ_NOWAIT;
+       int ret;
+       ret = blk_queue_enter(q, nowait ? BLK_MQ_REQ_NOWAIT : 0);
+       if (unlikely(ret)) {
+               if (nowait && !blk_queue_dying(q))
+                       bio_wouldblock_error(bio);
+               else
+                       bio_io_error(bio);
+       }
+       return ret;
+ }
  void blk_queue_exit(struct request_queue *q)
  {
        percpu_ref_put(&q->q_usage_counter);
@@@ -485,7 -516,7 +517,7 @@@ struct request_queue *__blk_alloc_queue
        if (ret)
                goto fail_id;
  
-       q->backing_dev_info = bdi_alloc_node(GFP_KERNEL, node_id);
+       q->backing_dev_info = bdi_alloc(node_id);
        if (!q->backing_dev_info)
                goto fail_split;
  
  
        q->backing_dev_info->ra_pages = VM_READAHEAD_PAGES;
        q->backing_dev_info->capabilities = BDI_CAP_CGROUP_WRITEBACK;
-       q->backing_dev_info->name = "block";
        q->node = node_id;
  
        timer_setup(&q->backing_dev_info->laptop_mode_wb_timer,
@@@ -606,6 -636,16 +637,16 @@@ void blk_put_request(struct request *re
  }
  EXPORT_SYMBOL(blk_put_request);
  
+ static void blk_account_io_merge_bio(struct request *req)
+ {
+       if (!blk_do_io_stat(req))
+               return;
+       part_stat_lock();
+       part_stat_inc(req->part, merges[op_stat_group(req_op(req))]);
+       part_stat_unlock();
+ }
  bool bio_attempt_back_merge(struct request *req, struct bio *bio,
                unsigned int nr_segs)
  {
        req->biotail = bio;
        req->__data_len += bio->bi_iter.bi_size;
  
-       blk_account_io_start(req, false);
+       bio_crypt_free_ctx(bio);
+       blk_account_io_merge_bio(req);
        return true;
  }
  
@@@ -648,7 -690,9 +691,9 @@@ bool bio_attempt_front_merge(struct req
        req->__sector = bio->bi_iter.bi_sector;
        req->__data_len += bio->bi_iter.bi_size;
  
-       blk_account_io_start(req, false);
+       bio_crypt_do_front_merge(req, bio);
+       blk_account_io_merge_bio(req);
        return true;
  }
  
@@@ -670,7 -714,7 +715,7 @@@ bool bio_attempt_discard_merge(struct r
        req->__data_len += bio->bi_iter.bi_size;
        req->nr_phys_segments = segments + 1;
  
-       blk_account_io_start(req, false);
+       blk_account_io_merge_bio(req);
        return true;
  no_merge:
        req_set_nomerge(q, req);
@@@ -872,6 -916,41 +917,41 @@@ out
        return ret;
  }
  
+ /*
+  * Check write append to a zoned block device.
+  */
+ static inline blk_status_t blk_check_zone_append(struct request_queue *q,
+                                                struct bio *bio)
+ {
+       sector_t pos = bio->bi_iter.bi_sector;
+       int nr_sectors = bio_sectors(bio);
+       /* Only applicable to zoned block devices */
+       if (!blk_queue_is_zoned(q))
+               return BLK_STS_NOTSUPP;
+       /* The bio sector must point to the start of a sequential zone */
+       if (pos & (blk_queue_zone_sectors(q) - 1) ||
+           !blk_queue_zone_is_seq(q, pos))
+               return BLK_STS_IOERR;
+       /*
+        * Not allowed to cross zone boundaries. Otherwise, the BIO will be
+        * split and could result in non-contiguous sectors being written in
+        * different zones.
+        */
+       if (nr_sectors > q->limits.chunk_sectors)
+               return BLK_STS_IOERR;
+       /* Make sure the BIO is small enough and will not get split */
+       if (nr_sectors > q->limits.max_zone_append_sectors)
+               return BLK_STS_IOERR;
+       bio->bi_opf |= REQ_NOMERGE;
+       return BLK_STS_OK;
+ }
  static noinline_for_stack bool
  generic_make_request_checks(struct bio *bio)
  {
        }
  
        /*
 -       * Non-mq queues do not honor REQ_NOWAIT, so complete a bio
 -       * with BLK_STS_AGAIN status in order to catch -EAGAIN and
 -       * to give a chance to the caller to repeat request gracefully.
 +       * For a REQ_NOWAIT based request, return -EOPNOTSUPP
 +       * if queue is not a request based queue.
         */
 -      if ((bio->bi_opf & REQ_NOWAIT) && !queue_is_mq(q)) {
 -              status = BLK_STS_AGAIN;
 -              goto end_io;
 -      }
 +      if ((bio->bi_opf & REQ_NOWAIT) && !queue_is_mq(q))
 +              goto not_supported;
  
        if (should_fail_bio(bio))
                goto end_io;
                if (!q->limits.max_write_same_sectors)
                        goto not_supported;
                break;
+       case REQ_OP_ZONE_APPEND:
+               status = blk_check_zone_append(q, bio);
+               if (status != BLK_STS_OK)
+                       goto end_io;
+               break;
        case REQ_OP_ZONE_RESET:
        case REQ_OP_ZONE_OPEN:
        case REQ_OP_ZONE_CLOSE:
        }
  
        /*
-        * Various block parts want %current->io_context and lazy ioc
-        * allocation ends up trading a lot of pain for a small amount of
-        * memory.  Just allocate it upfront.  This may fail and block
-        * layer knows how to live with it.
+        * Various block parts want %current->io_context, so allocate it up
+        * front rather than dealing with lots of pain to allocate it only
+        * where needed. This may fail and the block layer knows how to live
+        * with it.
         */
-       create_io_context(GFP_ATOMIC, q->node);
+       if (unlikely(!current->io_context))
+               create_task_io_context(current, GFP_ATOMIC, q->node);
  
        if (!blkcg_bio_issue_check(q, bio))
                return false;
@@@ -988,29 -1076,28 +1074,28 @@@ end_io
        return false;
  }
  
+ static blk_qc_t do_make_request(struct bio *bio)
+ {
+       struct request_queue *q = bio->bi_disk->queue;
+       blk_qc_t ret = BLK_QC_T_NONE;
+       if (blk_crypto_bio_prep(&bio)) {
+               if (!q->make_request_fn)
+                       return blk_mq_make_request(q, bio);
+               ret = q->make_request_fn(q, bio);
+       }
+       blk_queue_exit(q);
+       return ret;
+ }
  /**
-  * generic_make_request - hand a buffer to its device driver for I/O
+  * generic_make_request - re-submit a bio to the block device layer for I/O
   * @bio:  The bio describing the location in memory and on the device.
   *
-  * generic_make_request() is used to make I/O requests of block
-  * devices. It is passed a &struct bio, which describes the I/O that needs
-  * to be done.
-  *
-  * generic_make_request() does not return any status.  The
-  * success/failure status of the request, along with notification of
-  * completion, is delivered asynchronously through the bio->bi_end_io
-  * function described (one day) else where.
-  *
-  * The caller of generic_make_request must make sure that bi_io_vec
-  * are set to describe the memory buffer, and that bi_dev and bi_sector are
-  * set to describe the device address, and the
-  * bi_end_io and optionally bi_private are set to describe how
-  * completion notification should be signaled.
-  *
-  * generic_make_request and the drivers it calls may use bi_next if this
-  * bio happens to be merged with someone else, and may resubmit the bio to
-  * a lower device by calling into generic_make_request recursively, which
-  * means the bio should NOT be touched after the call to ->make_request_fn.
+  * This is a version of submit_bio() that shall only be used for I/O that is
+  * resubmitted to lower level drivers by stacking block drivers.  All file
+  * systems and other upper level users of the block layer should use
+  * submit_bio() instead.
   */
  blk_qc_t generic_make_request(struct bio *bio)
  {
        current->bio_list = bio_list_on_stack;
        do {
                struct request_queue *q = bio->bi_disk->queue;
-               blk_mq_req_flags_t flags = bio->bi_opf & REQ_NOWAIT ?
-                       BLK_MQ_REQ_NOWAIT : 0;
  
-               if (likely(blk_queue_enter(q, flags) == 0)) {
+               if (likely(bio_queue_enter(bio) == 0)) {
                        struct bio_list lower, same;
  
                        /* Create a fresh bio_list for all subordinate requests */
                        bio_list_on_stack[1] = bio_list_on_stack[0];
                        bio_list_init(&bio_list_on_stack[0]);
-                       ret = q->make_request_fn(q, bio);
-                       blk_queue_exit(q);
+                       ret = do_make_request(bio);
  
                        /* sort new bios into those for a lower level
                         * and those for the same level
                        bio_list_merge(&bio_list_on_stack[0], &lower);
                        bio_list_merge(&bio_list_on_stack[0], &same);
                        bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]);
-               } else {
-                       if (unlikely(!blk_queue_dying(q) &&
-                                       (bio->bi_opf & REQ_NOWAIT)))
-                               bio_wouldblock_error(bio);
-                       else
-                               bio_io_error(bio);
                }
                bio = bio_list_pop(&bio_list_on_stack[0]);
        } while (bio);
@@@ -1110,30 -1187,25 +1185,25 @@@ EXPORT_SYMBOL(generic_make_request)
   *
   * This function behaves like generic_make_request(), but does not protect
   * against recursion.  Must only be used if the called driver is known
-  * to not call generic_make_request (or direct_make_request) again from
-  * its make_request function.  (Calling direct_make_request again from
-  * a workqueue is perfectly fine as that doesn't recurse).
+  * to be blk-mq based.
   */
  blk_qc_t direct_make_request(struct bio *bio)
  {
        struct request_queue *q = bio->bi_disk->queue;
-       bool nowait = bio->bi_opf & REQ_NOWAIT;
-       blk_qc_t ret;
  
+       if (WARN_ON_ONCE(q->make_request_fn)) {
+               bio_io_error(bio);
+               return BLK_QC_T_NONE;
+       }
        if (!generic_make_request_checks(bio))
                return BLK_QC_T_NONE;
-       if (unlikely(blk_queue_enter(q, nowait ? BLK_MQ_REQ_NOWAIT : 0))) {
-               if (nowait && !blk_queue_dying(q))
-                       bio_wouldblock_error(bio);
-               else
-                       bio_io_error(bio);
+       if (unlikely(bio_queue_enter(bio)))
+               return BLK_QC_T_NONE;
+       if (!blk_crypto_bio_prep(&bio)) {
+               blk_queue_exit(q);
                return BLK_QC_T_NONE;
        }
-       ret = q->make_request_fn(q, bio);
-       blk_queue_exit(q);
-       return ret;
+       return blk_mq_make_request(q, bio);
  }
  EXPORT_SYMBOL_GPL(direct_make_request);
  
   * submit_bio - submit a bio to the block device layer for I/O
   * @bio: The &struct bio which describes the I/O
   *
-  * submit_bio() is very similar in purpose to generic_make_request(), and
-  * uses that function to do most of the work. Both are fairly rough
-  * interfaces; @bio must be presetup and ready for I/O.
+  * submit_bio() is used to submit I/O requests to block devices.  It is passed a
+  * fully set up &struct bio that describes the I/O that needs to be done.  The
+  * bio will be send to the device described by the bi_disk and bi_partno fields.
   *
+  * The success/failure status of the request, along with notification of
+  * completion, is delivered asynchronously through the ->bi_end_io() callback
+  * in @bio.  The bio must NOT be touched by thecaller until ->bi_end_io() has
+  * been called.
   */
  blk_qc_t submit_bio(struct bio *bio)
  {
-       bool workingset_read = false;
-       unsigned long pflags;
-       blk_qc_t ret;
        if (blkcg_punt_bio_submit(bio))
                return BLK_QC_T_NONE;
  
                if (op_is_write(bio_op(bio))) {
                        count_vm_events(PGPGOUT, count);
                } else {
-                       if (bio_flagged(bio, BIO_WORKINGSET))
-                               workingset_read = true;
                        task_io_account_read(bio->bi_iter.bi_size);
                        count_vm_events(PGPGIN, count);
                }
        }
  
        /*
-        * If we're reading data that is part of the userspace
-        * workingset, count submission time as memory stall. When the
-        * device is congested, or the submitting cgroup IO-throttled,
-        * submission can be a significant part of overall IO time.
+        * If we're reading data that is part of the userspace workingset, count
+        * submission time as memory stall.  When the device is congested, or
+        * the submitting cgroup IO-throttled, submission can be a significant
+        * part of overall IO time.
         */
-       if (workingset_read)
-               psi_memstall_enter(&pflags);
-       ret = generic_make_request(bio);
+       if (unlikely(bio_op(bio) == REQ_OP_READ &&
+           bio_flagged(bio, BIO_WORKINGSET))) {
+               unsigned long pflags;
+               blk_qc_t ret;
  
-       if (workingset_read)
+               psi_memstall_enter(&pflags);
+               ret = generic_make_request(bio);
                psi_memstall_leave(&pflags);
  
-       return ret;
+               return ret;
+       }
+       return generic_make_request(bio);
  }
  EXPORT_SYMBOL(submit_bio);
  
@@@ -1261,8 -1335,11 +1333,11 @@@ blk_status_t blk_insert_cloned_request(
            should_fail_request(&rq->rq_disk->part0, blk_rq_bytes(rq)))
                return BLK_STS_IOERR;
  
+       if (blk_crypto_insert_cloned_request(rq))
+               return BLK_STS_IOERR;
        if (blk_queue_io_stat(q))
-               blk_account_io_start(rq, true);
+               blk_account_io_start(rq);
  
        /*
         * Since we have a scheduler attached on the top device,
@@@ -1314,7 -1391,22 +1389,22 @@@ unsigned int blk_rq_err_bytes(const str
  }
  EXPORT_SYMBOL_GPL(blk_rq_err_bytes);
  
- void blk_account_io_completion(struct request *req, unsigned int bytes)
+ static void update_io_ticks(struct hd_struct *part, unsigned long now, bool end)
+ {
+       unsigned long stamp;
+ again:
+       stamp = READ_ONCE(part->stamp);
+       if (unlikely(stamp != now)) {
+               if (likely(cmpxchg(&part->stamp, stamp, now) == stamp))
+                       __part_stat_add(part, io_ticks, end ? now - stamp : 1);
+       }
+       if (part->partno) {
+               part = &part_to_disk(part)->part0;
+               goto again;
+       }
+ }
+ static void blk_account_io_completion(struct request *req, unsigned int bytes)
  {
        if (req->part && blk_do_io_stat(req)) {
                const int sgrp = op_stat_group(req_op(req));
@@@ -1345,48 -1437,57 +1435,57 @@@ void blk_account_io_done(struct reques
                update_io_ticks(part, jiffies, true);
                part_stat_inc(part, ios[sgrp]);
                part_stat_add(part, nsecs[sgrp], now - req->start_time_ns);
-               part_dec_in_flight(req->q, part, rq_data_dir(req));
+               part_stat_unlock();
  
                hd_struct_put(part);
-               part_stat_unlock();
        }
  }
  
- void blk_account_io_start(struct request *rq, bool new_io)
+ void blk_account_io_start(struct request *rq)
  {
-       struct hd_struct *part;
-       int rw = rq_data_dir(rq);
        if (!blk_do_io_stat(rq))
                return;
  
+       rq->part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
        part_stat_lock();
+       update_io_ticks(rq->part, jiffies, false);
+       part_stat_unlock();
+ }
  
-       if (!new_io) {
-               part = rq->part;
-               part_stat_inc(part, merges[rw]);
-       } else {
-               part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
-               if (!hd_struct_try_get(part)) {
-                       /*
-                        * The partition is already being removed,
-                        * the request will be accounted on the disk only
-                        *
-                        * We take a reference on disk->part0 although that
-                        * partition will never be deleted, so we can treat
-                        * it as any other partition.
-                        */
-                       part = &rq->rq_disk->part0;
-                       hd_struct_get(part);
-               }
-               part_inc_in_flight(rq->q, part, rw);
-               rq->part = part;
-       }
+ unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors,
+               unsigned int op)
+ {
+       struct hd_struct *part = &disk->part0;
+       const int sgrp = op_stat_group(op);
+       unsigned long now = READ_ONCE(jiffies);
+       part_stat_lock();
+       update_io_ticks(part, now, false);
+       part_stat_inc(part, ios[sgrp]);
+       part_stat_add(part, sectors[sgrp], sectors);
+       part_stat_local_inc(part, in_flight[op_is_write(op)]);
+       part_stat_unlock();
  
-       update_io_ticks(part, jiffies, false);
+       return now;
+ }
+ EXPORT_SYMBOL(disk_start_io_acct);
+ void disk_end_io_acct(struct gendisk *disk, unsigned int op,
+               unsigned long start_time)
+ {
+       struct hd_struct *part = &disk->part0;
+       const int sgrp = op_stat_group(op);
+       unsigned long now = READ_ONCE(jiffies);
+       unsigned long duration = now - start_time;
  
+       part_stat_lock();
+       update_io_ticks(part, now, true);
+       part_stat_add(part, nsecs[sgrp], jiffies_to_nsecs(duration));
+       part_stat_local_dec(part, in_flight[op_is_write(op)]);
        part_stat_unlock();
  }
+ EXPORT_SYMBOL(disk_end_io_acct);
  
  /*
   * Steal bios from a request and add them to a bio list.
@@@ -1636,7 -1737,9 +1735,9 @@@ int blk_rq_prep_clone(struct request *r
        }
        rq->nr_phys_segments = rq_src->nr_phys_segments;
        rq->ioprio = rq_src->ioprio;
-       rq->extra_len = rq_src->extra_len;
+       if (rq->bio)
+               blk_crypto_rq_bio_prep(rq, rq->bio, gfp_mask);
  
        return 0;
  
@@@ -1778,6 -1881,18 +1879,18 @@@ void blk_finish_plug(struct blk_plug *p
  }
  EXPORT_SYMBOL(blk_finish_plug);
  
+ void blk_io_schedule(void)
+ {
+       /* Prevent hang_check timer from firing at us during very long I/O */
+       unsigned long timeout = sysctl_hung_task_timeout_secs * HZ / 2;
+       if (timeout)
+               io_schedule_timeout(timeout);
+       else
+               io_schedule();
+ }
+ EXPORT_SYMBOL_GPL(blk_io_schedule);
  int __init blk_dev_init(void)
  {
        BUILD_BUG_ON(REQ_OP_LAST >= (1 << REQ_OP_BITS));
diff --combined drivers/base/core.c
index edb6fd2032a1b3192461634a2bae11a9211a8b51,fb8ae248e5aa8ec69924d0bea737d45190dd0d6b..de808c5a187b37f9dc2e090e81eadcc3c3591111
@@@ -365,7 -365,6 +365,7 @@@ struct device_link *device_link_add(str
                                link->flags |= DL_FLAG_STATELESS;
                                goto reorder;
                        } else {
 +                              link->flags |= DL_FLAG_STATELESS;
                                goto out;
                        }
                }
            flags & DL_FLAG_PM_RUNTIME)
                pm_runtime_resume(supplier);
  
 +      list_add_tail_rcu(&link->s_node, &supplier->links.consumers);
 +      list_add_tail_rcu(&link->c_node, &consumer->links.suppliers);
 +
        if (flags & DL_FLAG_SYNC_STATE_ONLY) {
                dev_dbg(consumer,
                        "Linked as a sync state only consumer to %s\n",
                        dev_name(supplier));
                goto out;
        }
 +
  reorder:
        /*
         * Move the consumer and all of the devices depending on it to the end
         */
        device_reorder_to_tail(consumer, NULL);
  
 -      list_add_tail_rcu(&link->s_node, &supplier->links.consumers);
 -      list_add_tail_rcu(&link->c_node, &consumer->links.suppliers);
 -
        dev_dbg(consumer, "Linked as a consumer to %s\n", dev_name(supplier));
  
 - out:
 +out:
        device_pm_unlock();
        device_links_write_unlock();
  
@@@ -831,13 -829,6 +831,13 @@@ static void __device_links_supplier_def
                list_add_tail(&sup->links.defer_sync, &deferred_sync);
  }
  
 +static void device_link_drop_managed(struct device_link *link)
 +{
 +      link->flags &= ~DL_FLAG_MANAGED;
 +      WRITE_ONCE(link->status, DL_STATE_NONE);
 +      kref_put(&link->kref, __device_link_del);
 +}
 +
  /**
   * device_links_driver_bound - Update device links after probing its driver.
   * @dev: Device to update the links for.
   */
  void device_links_driver_bound(struct device *dev)
  {
 -      struct device_link *link;
 +      struct device_link *link, *ln;
        LIST_HEAD(sync_list);
  
        /*
        else
                __device_links_queue_sync_state(dev, &sync_list);
  
 -      list_for_each_entry(link, &dev->links.suppliers, c_node) {
 +      list_for_each_entry_safe(link, ln, &dev->links.suppliers, c_node) {
 +              struct device *supplier;
 +
                if (!(link->flags & DL_FLAG_MANAGED))
                        continue;
  
 -              WARN_ON(link->status != DL_STATE_CONSUMER_PROBE);
 -              WRITE_ONCE(link->status, DL_STATE_ACTIVE);
 +              supplier = link->supplier;
 +              if (link->flags & DL_FLAG_SYNC_STATE_ONLY) {
 +                      /*
 +                       * When DL_FLAG_SYNC_STATE_ONLY is set, it means no
 +                       * other DL_MANAGED_LINK_FLAGS have been set. So, it's
 +                       * save to drop the managed link completely.
 +                       */
 +                      device_link_drop_managed(link);
 +              } else {
 +                      WARN_ON(link->status != DL_STATE_CONSUMER_PROBE);
 +                      WRITE_ONCE(link->status, DL_STATE_ACTIVE);
 +              }
  
 +              /*
 +               * This needs to be done even for the deleted
 +               * DL_FLAG_SYNC_STATE_ONLY device link in case it was the last
 +               * device link that was preventing the supplier from getting a
 +               * sync_state() call.
 +               */
                if (defer_sync_state_count)
 -                      __device_links_supplier_defer_sync(link->supplier);
 +                      __device_links_supplier_defer_sync(supplier);
                else
 -                      __device_links_queue_sync_state(link->supplier,
 -                                                      &sync_list);
 +                      __device_links_queue_sync_state(supplier, &sync_list);
        }
  
        dev->links.status = DL_DEV_DRIVER_BOUND;
        device_links_flush_sync_list(&sync_list, dev);
  }
  
 -static void device_link_drop_managed(struct device_link *link)
 -{
 -      link->flags &= ~DL_FLAG_MANAGED;
 -      WRITE_ONCE(link->status, DL_STATE_NONE);
 -      kref_put(&link->kref, __device_link_del);
 -}
 -
  /**
   * __device_links_no_driver - Update links of a device without a driver.
   * @dev: Device without a drvier.
@@@ -1393,7 -1374,7 +1393,7 @@@ static void device_release(struct kobje
        else if (dev->class && dev->class->dev_release)
                dev->class->dev_release(dev);
        else
 -              WARN(1, KERN_ERR "Device '%s' does not have a release() function, it is broken and must be fixed. See Documentation/kobject.txt.\n",
 +              WARN(1, KERN_ERR "Device '%s' does not have a release() function, it is broken and must be fixed. See Documentation/core-api/kobject.rst.\n",
                        dev_name(dev));
        kfree(p);
  }
@@@ -2389,11 -2370,6 +2389,11 @@@ u32 fw_devlink_get_flags(void
        return fw_devlink_flags;
  }
  
 +static bool fw_devlink_is_permissive(void)
 +{
 +      return fw_devlink_flags == DL_FLAG_SYNC_STATE_ONLY;
 +}
 +
  /**
   * device_add - add device to device hierarchy.
   * @dev: device.
@@@ -2548,7 -2524,7 +2548,7 @@@ int device_add(struct device *dev
        if (fw_devlink_flags && is_fwnode_dev &&
            fwnode_has_op(dev->fwnode, add_links)) {
                fw_ret = fwnode_call_int_op(dev->fwnode, add_links, dev);
 -              if (fw_ret == -ENODEV)
 +              if (fw_ret == -ENODEV && !fw_devlink_is_permissive())
                        device_link_wait_for_mandatory_supplier(dev);
                else if (fw_ret)
                        device_link_wait_for_optional_supplier(dev);
@@@ -3212,40 -3188,6 +3212,6 @@@ error
        return ERR_PTR(retval);
  }
  
- /**
-  * device_create_vargs - creates a device and registers it with sysfs
-  * @class: pointer to the struct class that this device should be registered to
-  * @parent: pointer to the parent struct device of this new device, if any
-  * @devt: the dev_t for the char device to be added
-  * @drvdata: the data to be added to the device for callbacks
-  * @fmt: string for the device's name
-  * @args: va_list for the device's name
-  *
-  * This function can be used by char device classes.  A struct device
-  * will be created in sysfs, registered to the specified class.
-  *
-  * A "dev" file will be created, showing the dev_t for the device, if
-  * the dev_t is not 0,0.
-  * If a pointer to a parent struct device is passed in, the newly created
-  * struct device will be a child of that device in sysfs.
-  * The pointer to the struct device will be returned from the call.
-  * Any further sysfs files that might be required can be created using this
-  * pointer.
-  *
-  * Returns &struct device pointer on success, or ERR_PTR() on error.
-  *
-  * Note: the struct class passed to this function must have previously
-  * been created with a call to class_create().
-  */
- struct device *device_create_vargs(struct class *class, struct device *parent,
-                                  dev_t devt, void *drvdata, const char *fmt,
-                                  va_list args)
- {
-       return device_create_groups_vargs(class, parent, devt, drvdata, NULL,
-                                         fmt, args);
- }
- EXPORT_SYMBOL_GPL(device_create_vargs);
  /**
   * device_create - creates a device and registers it with sysfs
   * @class: pointer to the struct class that this device should be registered to
@@@ -3277,7 -3219,8 +3243,8 @@@ struct device *device_create(struct cla
        struct device *dev;
  
        va_start(vargs, fmt);
-       dev = device_create_vargs(class, parent, devt, drvdata, fmt, vargs);
+       dev = device_create_groups_vargs(class, parent, devt, drvdata, NULL,
+                                         fmt, vargs);
        va_end(vargs);
        return dev;
  }
@@@ -3915,7 -3858,6 +3882,7 @@@ void set_secondary_fwnode(struct devic
        else
                dev->fwnode = fwnode;
  }
 +EXPORT_SYMBOL_GPL(set_secondary_fwnode);
  
  /**
   * device_set_of_node_from_dev - reuse device-tree node of another device
diff --combined drivers/block/loop.c
index d89c25ba3b89a8d984e2a71e287251fd7ce2adcf,d7904b4d8d1263db7689d7f2010a5a570b72b966..13dbe2f168820b3d460f0114048b5d34e34ff471
@@@ -919,7 -919,7 +919,7 @@@ static void loop_unprepare_queue(struc
  
  static int loop_kthread_worker_fn(void *worker_ptr)
  {
 -      current->flags |= PF_LESS_THROTTLE | PF_MEMALLOC_NOIO;
 +      current->flags |= PF_LOCAL_THROTTLE | PF_MEMALLOC_NOIO;
        return kthread_worker_fn(worker_ptr);
  }
  
@@@ -2037,7 -2037,7 +2037,7 @@@ static int loop_add(struct loop_device 
        lo->tag_set.queue_depth = 128;
        lo->tag_set.numa_node = NUMA_NO_NODE;
        lo->tag_set.cmd_size = sizeof(struct loop_cmd);
-       lo->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
+       lo->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_STACKING;
        lo->tag_set.driver_data = lo;
  
        err = blk_mq_alloc_tag_set(&lo->tag_set);
index ce9e33603a4d9540df90f2c7065332a0a4d112ae,5a1548a74d6284ef47655e98501a234e290e4547..87b31f9ca362ee17b6407811b223842cf28df88f
@@@ -1250,8 -1250,34 +1250,34 @@@ static inline blk_status_t null_handle_
        return errno_to_blk_status(err);
  }
  
+ static void nullb_zero_read_cmd_buffer(struct nullb_cmd *cmd)
+ {
+       struct nullb_device *dev = cmd->nq->dev;
+       struct bio *bio;
+       if (dev->memory_backed)
+               return;
+       if (dev->queue_mode == NULL_Q_BIO && bio_op(cmd->bio) == REQ_OP_READ) {
+               zero_fill_bio(cmd->bio);
+       } else if (req_op(cmd->rq) == REQ_OP_READ) {
+               __rq_for_each_bio(bio, cmd->rq)
+                       zero_fill_bio(bio);
+       }
+ }
  static inline void nullb_complete_cmd(struct nullb_cmd *cmd)
  {
+       /*
+        * Since root privileges are required to configure the null_blk
+        * driver, it is fine that this driver does not initialize the
+        * data buffers of read commands. Zero-initialize these buffers
+        * anyway if KMSAN is enabled to prevent that KMSAN complains
+        * about null_blk not initializing read data buffers.
+        */
+       if (IS_ENABLED(CONFIG_KMSAN))
+               nullb_zero_read_cmd_buffer(cmd);
        /* Complete IO by inline, softirq or timer */
        switch (cmd->nq->dev->irqmode) {
        case NULL_IRQ_SOFTIRQ:
@@@ -1397,7 -1423,7 +1423,7 @@@ static bool should_requeue_request(stru
  static enum blk_eh_timer_return null_timeout_rq(struct request *rq, bool res)
  {
        pr_info("rq %p timed out\n", rq);
-       blk_mq_complete_request(rq);
+       blk_mq_force_complete_rq(rq);
        return BLK_EH_DONE;
  }
  
@@@ -1535,13 -1561,6 +1561,13 @@@ static void null_config_discard(struct 
  {
        if (nullb->dev->discard == false)
                return;
 +
 +      if (nullb->dev->zoned) {
 +              nullb->dev->discard = false;
 +              pr_info("discard option is ignored in zoned mode\n");
 +              return;
 +      }
 +
        nullb->q->limits.discard_granularity = nullb->dev->blocksize;
        nullb->q->limits.discard_alignment = nullb->dev->blocksize;
        blk_queue_max_discard_sectors(nullb->q, UINT_MAX >> 9);
index ed5458f2d367de26264fb754fbfa6a1598eca269,9c19f747f394a0784e7fdad27e8bc6ad4351d61c..cc47606d8ffe06270d8ebe08a8a0ab91f407ca5c
@@@ -23,10 -23,6 +23,10 @@@ int null_init_zoned_dev(struct nullb_de
                pr_err("zone_size must be power-of-two\n");
                return -EINVAL;
        }
 +      if (dev->zone_size > dev->size) {
 +              pr_err("Zone size larger than device capacity\n");
 +              return -EINVAL;
 +      }
  
        dev->zone_size_sects = dev->zone_size << ZONE_SIZE_SHIFT;
        dev->nr_zones = dev_size >>
  
  int null_register_zoned_dev(struct nullb *nullb)
  {
+       struct nullb_device *dev = nullb->dev;
        struct request_queue *q = nullb->q;
  
-       if (queue_is_mq(q))
-               return blk_revalidate_disk_zones(nullb->disk);
+       if (queue_is_mq(q)) {
+               int ret = blk_revalidate_disk_zones(nullb->disk, NULL);
+               if (ret)
+                       return ret;
+       } else {
+               blk_queue_chunk_sectors(q, dev->zone_size_sects);
+               q->nr_zones = blkdev_nr_zones(nullb->disk);
+       }
  
-       blk_queue_chunk_sectors(q, nullb->dev->zone_size_sects);
-       q->nr_zones = blkdev_nr_zones(nullb->disk);
+       blk_queue_max_zone_append_sectors(q, dev->zone_size_sects);
  
        return 0;
  }
@@@ -142,7 -145,7 +149,7 @@@ size_t null_zone_valid_read_len(struct 
  }
  
  static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
-                    unsigned int nr_sectors)
+                                   unsigned int nr_sectors, bool append)
  {
        struct nullb_device *dev = cmd->nq->dev;
        unsigned int zno = null_zone_no(dev, sector);
        case BLK_ZONE_COND_IMP_OPEN:
        case BLK_ZONE_COND_EXP_OPEN:
        case BLK_ZONE_COND_CLOSED:
-               /* Writes must be at the write pointer position */
-               if (sector != zone->wp)
+               /*
+                * Regular writes must be at the write pointer position.
+                * Zone append writes are automatically issued at the write
+                * pointer and the position returned using the request or BIO
+                * sector.
+                */
+               if (append) {
+                       sector = zone->wp;
+                       if (cmd->bio)
+                               cmd->bio->bi_iter.bi_sector = sector;
+                       else
+                               cmd->rq->__sector = sector;
+               } else if (sector != zone->wp) {
                        return BLK_STS_IOERR;
+               }
  
                if (zone->cond != BLK_ZONE_COND_EXP_OPEN)
                        zone->cond = BLK_ZONE_COND_IMP_OPEN;
@@@ -246,7 -261,9 +265,9 @@@ blk_status_t null_process_zoned_cmd(str
  {
        switch (op) {
        case REQ_OP_WRITE:
-               return null_zone_write(cmd, sector, nr_sectors);
+               return null_zone_write(cmd, sector, nr_sectors, false);
+       case REQ_OP_ZONE_APPEND:
+               return null_zone_write(cmd, sector, nr_sectors, true);
        case REQ_OP_ZONE_RESET:
        case REQ_OP_ZONE_RESET_ALL:
        case REQ_OP_ZONE_OPEN:
diff --combined drivers/mtd/mtdcore.c
index 29d41003d6e0d2f57e625db58df115a5d18de487,fcb018ce17c3dd845336494f4924af594590da87..b47691e1b81cc54452b361a04d4e41b17e641919
@@@ -555,7 -555,7 +555,7 @@@ static int mtd_nvmem_add(struct mtd_inf
  
        config.id = -1;
        config.dev = &mtd->dev;
 -      config.name = mtd->name;
 +      config.name = dev_name(&mtd->dev);
        config.owner = THIS_MODULE;
        config.reg_read = mtd_nvmem_reg_read;
        config.size = mtd->size;
@@@ -2036,11 -2036,10 +2036,10 @@@ static struct backing_dev_info * __ini
        struct backing_dev_info *bdi;
        int ret;
  
-       bdi = bdi_alloc(GFP_KERNEL);
+       bdi = bdi_alloc(NUMA_NO_NODE);
        if (!bdi)
                return ERR_PTR(-ENOMEM);
  
-       bdi->name = name;
        /*
         * We put '-0' suffix to the name to get the same name format as we
         * used to get. Since this is called only once, we get a unique name. 
diff --combined drivers/scsi/scsi_lib.c
index 06c260f6cdae3f43fc8de4ced5d5b98a0d36bac9,82ad0244b3d0b94f3e32d6f4134edabbcfb8c1c7..df4905df5cd42aa3f4b37e2ff2f6dd4372ac5627
@@@ -978,28 -978,12 +978,12 @@@ void scsi_io_completion(struct scsi_cmn
                scsi_io_completion_action(cmd, result);
  }
  
- static blk_status_t scsi_init_sgtable(struct request *req,
-               struct scsi_data_buffer *sdb)
+ static inline bool scsi_cmd_needs_dma_drain(struct scsi_device *sdev,
+               struct request *rq)
  {
-       int count;
-       /*
-        * If sg table allocation fails, requeue request later.
-        */
-       if (unlikely(sg_alloc_table_chained(&sdb->table,
-                       blk_rq_nr_phys_segments(req), sdb->table.sgl,
-                       SCSI_INLINE_SG_CNT)))
-               return BLK_STS_RESOURCE;
-       /* 
-        * Next, walk the list, and fill in the addresses and sizes of
-        * each segment.
-        */
-       count = blk_rq_map_sg(req->q, req, sdb->table.sgl);
-       BUG_ON(count > sdb->table.nents);
-       sdb->table.nents = count;
-       sdb->length = blk_rq_payload_bytes(req);
-       return BLK_STS_OK;
+       return sdev->dma_drain_len && blk_rq_is_passthrough(rq) &&
+              !op_is_write(req_op(rq)) &&
+              sdev->host->hostt->dma_need_drain(rq);
  }
  
  /*
   */
  blk_status_t scsi_init_io(struct scsi_cmnd *cmd)
  {
+       struct scsi_device *sdev = cmd->device;
        struct request *rq = cmd->request;
+       unsigned short nr_segs = blk_rq_nr_phys_segments(rq);
+       struct scatterlist *last_sg = NULL;
        blk_status_t ret;
+       bool need_drain = scsi_cmd_needs_dma_drain(sdev, rq);
+       int count;
  
-       if (WARN_ON_ONCE(!blk_rq_nr_phys_segments(rq)))
+       if (WARN_ON_ONCE(!nr_segs))
                return BLK_STS_IOERR;
  
-       ret = scsi_init_sgtable(rq, &cmd->sdb);
-       if (ret)
-               return ret;
+       /*
+        * Make sure there is space for the drain.  The driver must adjust
+        * max_hw_segments to be prepared for this.
+        */
+       if (need_drain)
+               nr_segs++;
+       /*
+        * If sg table allocation fails, requeue request later.
+        */
+       if (unlikely(sg_alloc_table_chained(&cmd->sdb.table, nr_segs,
+                       cmd->sdb.table.sgl, SCSI_INLINE_SG_CNT)))
+               return BLK_STS_RESOURCE;
+       /*
+        * Next, walk the list, and fill in the addresses and sizes of
+        * each segment.
+        */
+       count = __blk_rq_map_sg(rq->q, rq, cmd->sdb.table.sgl, &last_sg);
+       if (blk_rq_bytes(rq) & rq->q->dma_pad_mask) {
+               unsigned int pad_len =
+                       (rq->q->dma_pad_mask & ~blk_rq_bytes(rq)) + 1;
+               last_sg->length += pad_len;
+               cmd->extra_len += pad_len;
+       }
+       if (need_drain) {
+               sg_unmark_end(last_sg);
+               last_sg = sg_next(last_sg);
+               sg_set_buf(last_sg, sdev->dma_drain_buf, sdev->dma_drain_len);
+               sg_mark_end(last_sg);
+               cmd->extra_len += sdev->dma_drain_len;
+               count++;
+       }
+       BUG_ON(count > cmd->sdb.table.nents);
+       cmd->sdb.table.nents = count;
+       cmd->sdb.length = blk_rq_payload_bytes(rq);
  
        if (blk_integrity_rq(rq)) {
                struct scsi_data_buffer *prot_sdb = cmd->prot_sdb;
-               int ivecs, count;
+               int ivecs;
  
                if (WARN_ON_ONCE(!prot_sdb)) {
                        /*
@@@ -1610,12 -1637,7 +1637,7 @@@ static bool scsi_mq_get_budget(struct b
        struct request_queue *q = hctx->queue;
        struct scsi_device *sdev = q->queuedata;
  
-       if (scsi_dev_queue_ready(q, sdev))
-               return true;
-       if (atomic_read(&sdev->device_busy) == 0 && !scsi_device_blocked(sdev))
-               blk_mq_delay_run_hw_queue(hctx, SCSI_QUEUE_DELAY);
-       return false;
+       return scsi_dev_queue_ready(q, sdev);
  }
  
  static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
@@@ -1684,6 -1706,7 +1706,7 @@@ out_put_budget
        case BLK_STS_OK:
                break;
        case BLK_STS_RESOURCE:
+       case BLK_STS_ZONE_RESOURCE:
                if (atomic_read(&sdev->device_busy) ||
                    scsi_device_blocked(sdev))
                        ret = BLK_STS_DEV_RESOURCE;
@@@ -2284,7 -2307,6 +2307,7 @@@ scsi_device_set_state(struct scsi_devic
                switch (oldstate) {
                case SDEV_RUNNING:
                case SDEV_CREATED_BLOCK:
 +              case SDEV_QUIESCE:
                case SDEV_OFFLINE:
                        break;
                default:
diff --combined drivers/scsi/sd_zbc.c
index 8be27426aa66f0b340ab4c08e7ab75293b2cc94c,bb87fbba2a0904860b9c37cc0c8e5881352631ae..6f7eba66687e91ad17344059f8624ebf000f7804
@@@ -11,6 -11,7 +11,7 @@@
  #include <linux/blkdev.h>
  #include <linux/vmalloc.h>
  #include <linux/sched/mm.h>
+ #include <linux/mutex.h>
  
  #include <asm/unaligned.h>
  
  
  #include "sd.h"
  
+ static unsigned int sd_zbc_get_zone_wp_offset(struct blk_zone *zone)
+ {
+       if (zone->type == ZBC_ZONE_TYPE_CONV)
+               return 0;
+       switch (zone->cond) {
+       case BLK_ZONE_COND_IMP_OPEN:
+       case BLK_ZONE_COND_EXP_OPEN:
+       case BLK_ZONE_COND_CLOSED:
+               return zone->wp - zone->start;
+       case BLK_ZONE_COND_FULL:
+               return zone->len;
+       case BLK_ZONE_COND_EMPTY:
+       case BLK_ZONE_COND_OFFLINE:
+       case BLK_ZONE_COND_READONLY:
+       default:
+               /*
+                * Offline and read-only zones do not have a valid
+                * write pointer. Use 0 as for an empty zone.
+                */
+               return 0;
+       }
+ }
  static int sd_zbc_parse_report(struct scsi_disk *sdkp, u8 *buf,
                               unsigned int idx, report_zones_cb cb, void *data)
  {
        struct scsi_device *sdp = sdkp->device;
        struct blk_zone zone = { 0 };
+       int ret;
  
        zone.type = buf[0] & 0x0f;
        zone.cond = (buf[1] >> 4) & 0xf;
            zone.cond == ZBC_ZONE_COND_FULL)
                zone.wp = zone.start + zone.len;
  
-       return cb(&zone, idx, data);
+       ret = cb(&zone, idx, data);
+       if (ret)
+               return ret;
+       if (sdkp->rev_wp_offset)
+               sdkp->rev_wp_offset[idx] = sd_zbc_get_zone_wp_offset(&zone);
+       return 0;
  }
  
  /**
@@@ -136,7 -169,8 +169,7 @@@ static void *sd_zbc_alloc_report_buffer
  
        while (bufsize >= SECTOR_SIZE) {
                buf = __vmalloc(bufsize,
 -                              GFP_KERNEL | __GFP_ZERO | __GFP_NORETRY,
 -                              PAGE_KERNEL);
 +                              GFP_KERNEL | __GFP_ZERO | __GFP_NORETRY);
                if (buf) {
                        *buflen = bufsize;
                        return buf;
@@@ -208,6 -242,136 +241,136 @@@ out
        return ret;
  }
  
+ static blk_status_t sd_zbc_cmnd_checks(struct scsi_cmnd *cmd)
+ {
+       struct request *rq = cmd->request;
+       struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
+       sector_t sector = blk_rq_pos(rq);
+       if (!sd_is_zoned(sdkp))
+               /* Not a zoned device */
+               return BLK_STS_IOERR;
+       if (sdkp->device->changed)
+               return BLK_STS_IOERR;
+       if (sector & (sd_zbc_zone_sectors(sdkp) - 1))
+               /* Unaligned request */
+               return BLK_STS_IOERR;
+       return BLK_STS_OK;
+ }
+ #define SD_ZBC_INVALID_WP_OFST        (~0u)
+ #define SD_ZBC_UPDATING_WP_OFST       (SD_ZBC_INVALID_WP_OFST - 1)
+ static int sd_zbc_update_wp_offset_cb(struct blk_zone *zone, unsigned int idx,
+                                   void *data)
+ {
+       struct scsi_disk *sdkp = data;
+       lockdep_assert_held(&sdkp->zones_wp_offset_lock);
+       sdkp->zones_wp_offset[idx] = sd_zbc_get_zone_wp_offset(zone);
+       return 0;
+ }
+ static void sd_zbc_update_wp_offset_workfn(struct work_struct *work)
+ {
+       struct scsi_disk *sdkp;
+       unsigned int zno;
+       int ret;
+       sdkp = container_of(work, struct scsi_disk, zone_wp_offset_work);
+       spin_lock_bh(&sdkp->zones_wp_offset_lock);
+       for (zno = 0; zno < sdkp->nr_zones; zno++) {
+               if (sdkp->zones_wp_offset[zno] != SD_ZBC_UPDATING_WP_OFST)
+                       continue;
+               spin_unlock_bh(&sdkp->zones_wp_offset_lock);
+               ret = sd_zbc_do_report_zones(sdkp, sdkp->zone_wp_update_buf,
+                                            SD_BUF_SIZE,
+                                            zno * sdkp->zone_blocks, true);
+               spin_lock_bh(&sdkp->zones_wp_offset_lock);
+               if (!ret)
+                       sd_zbc_parse_report(sdkp, sdkp->zone_wp_update_buf + 64,
+                                           zno, sd_zbc_update_wp_offset_cb,
+                                           sdkp);
+       }
+       spin_unlock_bh(&sdkp->zones_wp_offset_lock);
+       scsi_device_put(sdkp->device);
+ }
+ /**
+  * sd_zbc_prepare_zone_append() - Prepare an emulated ZONE_APPEND command.
+  * @cmd: the command to setup
+  * @lba: the LBA to patch
+  * @nr_blocks: the number of LBAs to be written
+  *
+  * Called from sd_setup_read_write_cmnd() for REQ_OP_ZONE_APPEND.
+  * @sd_zbc_prepare_zone_append() handles the necessary zone wrote locking and
+  * patching of the lba for an emulated ZONE_APPEND command.
+  *
+  * In case the cached write pointer offset is %SD_ZBC_INVALID_WP_OFST it will
+  * schedule a REPORT ZONES command and return BLK_STS_IOERR.
+  */
+ blk_status_t sd_zbc_prepare_zone_append(struct scsi_cmnd *cmd, sector_t *lba,
+                                       unsigned int nr_blocks)
+ {
+       struct request *rq = cmd->request;
+       struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
+       unsigned int wp_offset, zno = blk_rq_zone_no(rq);
+       blk_status_t ret;
+       ret = sd_zbc_cmnd_checks(cmd);
+       if (ret != BLK_STS_OK)
+               return ret;
+       if (!blk_rq_zone_is_seq(rq))
+               return BLK_STS_IOERR;
+       /* Unlock of the write lock will happen in sd_zbc_complete() */
+       if (!blk_req_zone_write_trylock(rq))
+               return BLK_STS_ZONE_RESOURCE;
+       spin_lock_bh(&sdkp->zones_wp_offset_lock);
+       wp_offset = sdkp->zones_wp_offset[zno];
+       switch (wp_offset) {
+       case SD_ZBC_INVALID_WP_OFST:
+               /*
+                * We are about to schedule work to update a zone write pointer
+                * offset, which will cause the zone append command to be
+                * requeued. So make sure that the scsi device does not go away
+                * while the work is being processed.
+                */
+               if (scsi_device_get(sdkp->device)) {
+                       ret = BLK_STS_IOERR;
+                       break;
+               }
+               sdkp->zones_wp_offset[zno] = SD_ZBC_UPDATING_WP_OFST;
+               schedule_work(&sdkp->zone_wp_offset_work);
+               fallthrough;
+       case SD_ZBC_UPDATING_WP_OFST:
+               ret = BLK_STS_DEV_RESOURCE;
+               break;
+       default:
+               wp_offset = sectors_to_logical(sdkp->device, wp_offset);
+               if (wp_offset + nr_blocks > sdkp->zone_blocks) {
+                       ret = BLK_STS_IOERR;
+                       break;
+               }
+               *lba += wp_offset;
+       }
+       spin_unlock_bh(&sdkp->zones_wp_offset_lock);
+       if (ret)
+               blk_req_zone_write_unlock(rq);
+       return ret;
+ }
  /**
   * sd_zbc_setup_zone_mgmt_cmnd - Prepare a zone ZBC_OUT command. The operations
   *                    can be RESET WRITE POINTER, OPEN, CLOSE or FINISH.
@@@ -222,20 -386,14 +385,14 @@@ blk_status_t sd_zbc_setup_zone_mgmt_cmn
                                         unsigned char op, bool all)
  {
        struct request *rq = cmd->request;
-       struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
        sector_t sector = blk_rq_pos(rq);
+       struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
        sector_t block = sectors_to_logical(sdkp->device, sector);
+       blk_status_t ret;
  
-       if (!sd_is_zoned(sdkp))
-               /* Not a zoned device */
-               return BLK_STS_IOERR;
-       if (sdkp->device->changed)
-               return BLK_STS_IOERR;
-       if (sector & (sd_zbc_zone_sectors(sdkp) - 1))
-               /* Unaligned request */
-               return BLK_STS_IOERR;
+       ret = sd_zbc_cmnd_checks(cmd);
+       if (ret != BLK_STS_OK)
+               return ret;
  
        cmd->cmd_len = 16;
        memset(cmd->cmnd, 0, cmd->cmd_len);
        return BLK_STS_OK;
  }
  
+ static bool sd_zbc_need_zone_wp_update(struct request *rq)
+ {
+       switch (req_op(rq)) {
+       case REQ_OP_ZONE_APPEND:
+       case REQ_OP_ZONE_FINISH:
+       case REQ_OP_ZONE_RESET:
+       case REQ_OP_ZONE_RESET_ALL:
+               return true;
+       case REQ_OP_WRITE:
+       case REQ_OP_WRITE_ZEROES:
+       case REQ_OP_WRITE_SAME:
+               return blk_rq_zone_is_seq(rq);
+       default:
+               return false;
+       }
+ }
+ /**
+  * sd_zbc_zone_wp_update - Update cached zone write pointer upon cmd completion
+  * @cmd: Completed command
+  * @good_bytes: Command reply bytes
+  *
+  * Called from sd_zbc_complete() to handle the update of the cached zone write
+  * pointer value in case an update is needed.
+  */
+ static unsigned int sd_zbc_zone_wp_update(struct scsi_cmnd *cmd,
+                                         unsigned int good_bytes)
+ {
+       int result = cmd->result;
+       struct request *rq = cmd->request;
+       struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
+       unsigned int zno = blk_rq_zone_no(rq);
+       enum req_opf op = req_op(rq);
+       /*
+        * If we got an error for a command that needs updating the write
+        * pointer offset cache, we must mark the zone wp offset entry as
+        * invalid to force an update from disk the next time a zone append
+        * command is issued.
+        */
+       spin_lock_bh(&sdkp->zones_wp_offset_lock);
+       if (result && op != REQ_OP_ZONE_RESET_ALL) {
+               if (op == REQ_OP_ZONE_APPEND) {
+                       /* Force complete completion (no retry) */
+                       good_bytes = 0;
+                       scsi_set_resid(cmd, blk_rq_bytes(rq));
+               }
+               /*
+                * Force an update of the zone write pointer offset on
+                * the next zone append access.
+                */
+               if (sdkp->zones_wp_offset[zno] != SD_ZBC_UPDATING_WP_OFST)
+                       sdkp->zones_wp_offset[zno] = SD_ZBC_INVALID_WP_OFST;
+               goto unlock_wp_offset;
+       }
+       switch (op) {
+       case REQ_OP_ZONE_APPEND:
+               rq->__sector += sdkp->zones_wp_offset[zno];
+               fallthrough;
+       case REQ_OP_WRITE_ZEROES:
+       case REQ_OP_WRITE_SAME:
+       case REQ_OP_WRITE:
+               if (sdkp->zones_wp_offset[zno] < sd_zbc_zone_sectors(sdkp))
+                       sdkp->zones_wp_offset[zno] +=
+                                               good_bytes >> SECTOR_SHIFT;
+               break;
+       case REQ_OP_ZONE_RESET:
+               sdkp->zones_wp_offset[zno] = 0;
+               break;
+       case REQ_OP_ZONE_FINISH:
+               sdkp->zones_wp_offset[zno] = sd_zbc_zone_sectors(sdkp);
+               break;
+       case REQ_OP_ZONE_RESET_ALL:
+               memset(sdkp->zones_wp_offset, 0,
+                      sdkp->nr_zones * sizeof(unsigned int));
+               break;
+       default:
+               break;
+       }
+ unlock_wp_offset:
+       spin_unlock_bh(&sdkp->zones_wp_offset_lock);
+       return good_bytes;
+ }
  /**
   * sd_zbc_complete - ZBC command post processing.
   * @cmd: Completed command
   * @good_bytes: Command reply bytes
   * @sshdr: command sense header
   *
-  * Called from sd_done(). Process report zones reply and handle reset zone
-  * and write commands errors.
+  * Called from sd_done() to handle zone commands errors and updates to the
+  * device queue zone write pointer offset cahce.
   */
void sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes,
unsigned int sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes,
                     struct scsi_sense_hdr *sshdr)
  {
        int result = cmd->result;
                 * so be quiet about the error.
                 */
                rq->rq_flags |= RQF_QUIET;
-       }
+       } else if (sd_zbc_need_zone_wp_update(rq))
+               good_bytes = sd_zbc_zone_wp_update(cmd, good_bytes);
+       if (req_op(rq) == REQ_OP_ZONE_APPEND)
+               blk_req_zone_write_unlock(rq);
+       return good_bytes;
  }
  
  /**
@@@ -381,11 -634,67 +633,67 @@@ static int sd_zbc_check_capacity(struc
        return 0;
  }
  
+ static void sd_zbc_revalidate_zones_cb(struct gendisk *disk)
+ {
+       struct scsi_disk *sdkp = scsi_disk(disk);
+       swap(sdkp->zones_wp_offset, sdkp->rev_wp_offset);
+ }
+ static int sd_zbc_revalidate_zones(struct scsi_disk *sdkp,
+                                  u32 zone_blocks,
+                                  unsigned int nr_zones)
+ {
+       struct gendisk *disk = sdkp->disk;
+       int ret = 0;
+       /*
+        * Make sure revalidate zones are serialized to ensure exclusive
+        * updates of the scsi disk data.
+        */
+       mutex_lock(&sdkp->rev_mutex);
+       /*
+        * Revalidate the disk zones to update the device request queue zone
+        * bitmaps and the zone write pointer offset array. Do this only once
+        * the device capacity is set on the second revalidate execution for
+        * disk scan or if something changed when executing a normal revalidate.
+        */
+       if (sdkp->first_scan) {
+               sdkp->zone_blocks = zone_blocks;
+               sdkp->nr_zones = nr_zones;
+               goto unlock;
+       }
+       if (sdkp->zone_blocks == zone_blocks &&
+           sdkp->nr_zones == nr_zones &&
+           disk->queue->nr_zones == nr_zones)
+               goto unlock;
+       sdkp->rev_wp_offset = kvcalloc(nr_zones, sizeof(u32), GFP_NOIO);
+       if (!sdkp->rev_wp_offset) {
+               ret = -ENOMEM;
+               goto unlock;
+       }
+       ret = blk_revalidate_disk_zones(disk, sd_zbc_revalidate_zones_cb);
+       kvfree(sdkp->rev_wp_offset);
+       sdkp->rev_wp_offset = NULL;
+ unlock:
+       mutex_unlock(&sdkp->rev_mutex);
+       return ret;
+ }
  int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf)
  {
        struct gendisk *disk = sdkp->disk;
+       struct request_queue *q = disk->queue;
        unsigned int nr_zones;
        u32 zone_blocks = 0;
+       u32 max_append;
        int ret;
  
        if (!sd_is_zoned(sdkp))
                goto err;
  
        /* The drive satisfies the kernel restrictions: set it up */
-       blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, sdkp->disk->queue);
-       blk_queue_required_elevator_features(sdkp->disk->queue,
-                                            ELEVATOR_F_ZBD_SEQ_WRITE);
+       blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
+       blk_queue_required_elevator_features(q, ELEVATOR_F_ZBD_SEQ_WRITE);
        nr_zones = round_up(sdkp->capacity, zone_blocks) >> ilog2(zone_blocks);
  
        /* READ16/WRITE16 is mandatory for ZBC disks */
        sdkp->device->use_16_for_rw = 1;
        sdkp->device->use_10_for_rw = 0;
  
+       ret = sd_zbc_revalidate_zones(sdkp, zone_blocks, nr_zones);
+       if (ret)
+               goto err;
        /*
-        * Revalidate the disk zone bitmaps once the block device capacity is
-        * set on the second revalidate execution during disk scan and if
-        * something changed when executing a normal revalidate.
+        * On the first scan 'chunk_sectors' isn't setup yet, so calling
+        * blk_queue_max_zone_append_sectors() will result in a WARN(). Defer
+        * this setting to the second scan.
         */
-       if (sdkp->first_scan) {
-               sdkp->zone_blocks = zone_blocks;
-               sdkp->nr_zones = nr_zones;
+       if (sdkp->first_scan)
                return 0;
-       }
  
-       if (sdkp->zone_blocks != zone_blocks ||
-           sdkp->nr_zones != nr_zones ||
-           disk->queue->nr_zones != nr_zones) {
-               ret = blk_revalidate_disk_zones(disk);
-               if (ret != 0)
-                       goto err;
-               sdkp->zone_blocks = zone_blocks;
-               sdkp->nr_zones = nr_zones;
-       }
+       max_append = min_t(u32, logical_to_sectors(sdkp->device, zone_blocks),
+                          q->limits.max_segments << (PAGE_SHIFT - 9));
+       max_append = min_t(u32, max_append, queue_max_hw_sectors(q));
+       blk_queue_max_zone_append_sectors(q, max_append);
  
        return 0;
  
@@@ -460,3 -765,28 +764,28 @@@ void sd_zbc_print_zones(struct scsi_dis
                          sdkp->nr_zones,
                          sdkp->zone_blocks);
  }
+ int sd_zbc_init_disk(struct scsi_disk *sdkp)
+ {
+       if (!sd_is_zoned(sdkp))
+               return 0;
+       sdkp->zones_wp_offset = NULL;
+       spin_lock_init(&sdkp->zones_wp_offset_lock);
+       sdkp->rev_wp_offset = NULL;
+       mutex_init(&sdkp->rev_mutex);
+       INIT_WORK(&sdkp->zone_wp_offset_work, sd_zbc_update_wp_offset_workfn);
+       sdkp->zone_wp_update_buf = kzalloc(SD_BUF_SIZE, GFP_KERNEL);
+       if (!sdkp->zone_wp_update_buf)
+               return -ENOMEM;
+       return 0;
+ }
+ void sd_zbc_release_disk(struct scsi_disk *sdkp)
+ {
+       kvfree(sdkp->zones_wp_offset);
+       sdkp->zones_wp_offset = NULL;
+       kfree(sdkp->zone_wp_update_buf);
+       sdkp->zone_wp_update_buf = NULL;
+ }
diff --combined fs/block_dev.c
index 1e3030dd43ba8f45e7306f8455182b1fc1202618,d1e08bba925a49c0f96320a4b0fda7b8d604d072..632538d6f1dc6623ace5ffb1ae860b1b45700d77
@@@ -255,7 -255,7 +255,7 @@@ __blkdev_direct_IO_simple(struct kiocb 
                        break;
                if (!(iocb->ki_flags & IOCB_HIPRI) ||
                    !blk_poll(bdev_get_queue(bdev), qc, true))
-                       io_schedule();
+                       blk_io_schedule();
        }
        __set_current_state(TASK_RUNNING);
  
@@@ -449,7 -449,7 +449,7 @@@ __blkdev_direct_IO(struct kiocb *iocb, 
  
                if (!(iocb->ki_flags & IOCB_HIPRI) ||
                    !blk_poll(bdev_get_queue(bdev), qc, true))
-                       io_schedule();
+                       blk_io_schedule();
        }
        __set_current_state(TASK_RUNNING);
  
@@@ -614,9 -614,10 +614,9 @@@ static int blkdev_readpage(struct file 
        return block_read_full_page(page, blkdev_get_block);
  }
  
 -static int blkdev_readpages(struct file *file, struct address_space *mapping,
 -                      struct list_head *pages, unsigned nr_pages)
 +static void blkdev_readahead(struct readahead_control *rac)
  {
 -      return mpage_readpages(mapping, pages, nr_pages, blkdev_get_block);
 +      mpage_readahead(rac, blkdev_get_block);
  }
  
  static int blkdev_write_begin(struct file *file, struct address_space *mapping,
@@@ -671,7 -672,7 +671,7 @@@ int blkdev_fsync(struct file *filp, lof
         * i_mutex and doing so causes performance issues with concurrent
         * O_SYNC writers to a block device.
         */
-       error = blkdev_issue_flush(bdev, GFP_KERNEL, NULL);
+       error = blkdev_issue_flush(bdev, GFP_KERNEL);
        if (error == -EOPNOTSUPP)
                error = 0;
  
@@@ -712,7 -713,6 +712,6 @@@ int bdev_read_page(struct block_device 
        blk_queue_exit(bdev->bd_queue);
        return result;
  }
- EXPORT_SYMBOL_GPL(bdev_read_page);
  
  /**
   * bdev_write_page() - Start writing a page to a block device
@@@ -757,7 -757,6 +756,6 @@@ int bdev_write_page(struct block_devic
        blk_queue_exit(bdev->bd_queue);
        return result;
  }
- EXPORT_SYMBOL_GPL(bdev_write_page);
  
  /*
   * pseudo-fs
@@@ -881,21 -880,6 +879,6 @@@ static int bdev_set(struct inode *inode
  
  static LIST_HEAD(all_bdevs);
  
- /*
-  * If there is a bdev inode for this device, unhash it so that it gets evicted
-  * as soon as last inode reference is dropped.
-  */
- void bdev_unhash_inode(dev_t dev)
- {
-       struct inode *inode;
-       inode = ilookup5(blockdev_superblock, hash(dev), bdev_test, &dev);
-       if (inode) {
-               remove_inode_hash(inode);
-               iput(inode);
-       }
- }
  struct block_device *bdget(dev_t dev)
  {
        struct block_device *bdev;
@@@ -1515,7 -1499,7 +1498,7 @@@ int bdev_disk_changed(struct block_devi
        lockdep_assert_held(&bdev->bd_mutex);
  
  rescan:
-       ret = blk_drop_partitions(disk, bdev);
+       ret = blk_drop_partitions(bdev);
        if (ret)
                return ret;
  
@@@ -2022,7 -2006,8 +2005,7 @@@ ssize_t blkdev_write_iter(struct kiocb 
        if (bdev_read_only(I_BDEV(bd_inode)))
                return -EPERM;
  
 -      /* uswsusp needs write permission to the swap */
 -      if (IS_SWAPFILE(bd_inode) && !hibernation_available())
 +      if (IS_SWAPFILE(bd_inode) && !is_hibernate_resume_dev(bd_inode))
                return -ETXTBSY;
  
        if (!iov_iter_count(from))
@@@ -2083,7 -2068,7 +2066,7 @@@ static int blkdev_writepages(struct add
  
  static const struct address_space_operations def_blk_aops = {
        .readpage       = blkdev_readpage,
 -      .readpages      = blkdev_readpages,
 +      .readahead      = blkdev_readahead,
        .writepage      = blkdev_writepage,
        .write_begin    = blkdev_write_begin,
        .write_end      = blkdev_write_end,
diff --combined fs/ext4/super.c
index 4a3d21972011bb907a35483ba669b283622faf46,629a56b5c859ffb3c64d7ff169a258c3e5ad6018..9824cd8203e8f9058afe49cff12a4459d89e11b0
@@@ -1106,7 -1106,6 +1106,7 @@@ static void ext4_put_super(struct super
                crypto_free_shash(sbi->s_chksum_driver);
        kfree(sbi->s_blockgroup_lock);
        fs_put_dax(sbi->s_daxdev);
 +      fscrypt_free_dummy_context(&sbi->s_dummy_enc_ctx);
  #ifdef CONFIG_UNICODE
        utf8_unload(sbi->s_encoding);
  #endif
@@@ -1390,10 -1389,9 +1390,10 @@@ retry
        return res;
  }
  
 -static bool ext4_dummy_context(struct inode *inode)
 +static const union fscrypt_context *
 +ext4_get_dummy_context(struct super_block *sb)
  {
 -      return DUMMY_ENCRYPTION_ENABLED(EXT4_SB(inode->i_sb));
 +      return EXT4_SB(sb)->s_dummy_enc_ctx.ctx;
  }
  
  static bool ext4_has_stable_inodes(struct super_block *sb)
@@@ -1412,7 -1410,7 +1412,7 @@@ static const struct fscrypt_operations 
        .key_prefix             = "ext4:",
        .get_context            = ext4_get_context,
        .set_context            = ext4_set_context,
 -      .dummy_context          = ext4_dummy_context,
 +      .get_dummy_context      = ext4_get_dummy_context,
        .empty_dir              = ext4_empty_dir,
        .max_namelen            = EXT4_NAME_LEN,
        .has_stable_inodes      = ext4_has_stable_inodes,
@@@ -1607,7 -1605,6 +1607,7 @@@ static const match_table_t tokens = 
        {Opt_init_itable, "init_itable"},
        {Opt_noinit_itable, "noinit_itable"},
        {Opt_max_dir_size_kb, "max_dir_size_kb=%u"},
 +      {Opt_test_dummy_encryption, "test_dummy_encryption=%s"},
        {Opt_test_dummy_encryption, "test_dummy_encryption"},
        {Opt_nombcache, "nombcache"},
        {Opt_nombcache, "no_mbcache"},  /* for backward compatibility */
@@@ -1819,7 -1816,7 +1819,7 @@@ static const struct mount_opts 
        {Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT},
        {Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT},
        {Opt_max_dir_size_kb, 0, MOPT_GTE0},
 -      {Opt_test_dummy_encryption, 0, MOPT_GTE0},
 +      {Opt_test_dummy_encryption, 0, MOPT_STRING},
        {Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET},
        {Opt_err, 0, 0}
  };
@@@ -1854,48 -1851,6 +1854,48 @@@ static int ext4_sb_read_encoding(const 
  }
  #endif
  
 +static int ext4_set_test_dummy_encryption(struct super_block *sb,
 +                                        const char *opt,
 +                                        const substring_t *arg,
 +                                        bool is_remount)
 +{
 +#ifdef CONFIG_FS_ENCRYPTION
 +      struct ext4_sb_info *sbi = EXT4_SB(sb);
 +      int err;
 +
 +      /*
 +       * This mount option is just for testing, and it's not worthwhile to
 +       * implement the extra complexity (e.g. RCU protection) that would be
 +       * needed to allow it to be set or changed during remount.  We do allow
 +       * it to be specified during remount, but only if there is no change.
 +       */
 +      if (is_remount && !sbi->s_dummy_enc_ctx.ctx) {
 +              ext4_msg(sb, KERN_WARNING,
 +                       "Can't set test_dummy_encryption on remount");
 +              return -1;
 +      }
 +      err = fscrypt_set_test_dummy_encryption(sb, arg, &sbi->s_dummy_enc_ctx);
 +      if (err) {
 +              if (err == -EEXIST)
 +                      ext4_msg(sb, KERN_WARNING,
 +                               "Can't change test_dummy_encryption on remount");
 +              else if (err == -EINVAL)
 +                      ext4_msg(sb, KERN_WARNING,
 +                               "Value of option \"%s\" is unrecognized", opt);
 +              else
 +                      ext4_msg(sb, KERN_WARNING,
 +                               "Error processing option \"%s\" [%d]",
 +                               opt, err);
 +              return -1;
 +      }
 +      ext4_msg(sb, KERN_WARNING, "Test dummy encryption mode enabled");
 +#else
 +      ext4_msg(sb, KERN_WARNING,
 +               "Test dummy encryption mount option ignored");
 +#endif
 +      return 1;
 +}
 +
  static int handle_mount_opt(struct super_block *sb, char *opt, int token,
                            substring_t *args, unsigned long *journal_devnum,
                            unsigned int *journal_ioprio, int is_remount)
                *journal_ioprio =
                        IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, arg);
        } else if (token == Opt_test_dummy_encryption) {
 -#ifdef CONFIG_FS_ENCRYPTION
 -              sbi->s_mount_flags |= EXT4_MF_TEST_DUMMY_ENCRYPTION;
 -              ext4_msg(sb, KERN_WARNING,
 -                       "Test dummy encryption mode enabled");
 -#else
 -              ext4_msg(sb, KERN_WARNING,
 -                       "Test dummy encryption mount option ignored");
 -#endif
 +              return ext4_set_test_dummy_encryption(sb, opt, &args[0],
 +                                                    is_remount);
        } else if (m->flags & MOPT_DATAJ) {
                if (is_remount) {
                        if (!sbi->s_journal)
@@@ -2350,8 -2311,8 +2350,8 @@@ static int _ext4_show_options(struct se
                SEQ_OPTS_PRINT("max_dir_size_kb=%u", sbi->s_max_dir_size_kb);
        if (test_opt(sb, DATA_ERR_ABORT))
                SEQ_OPTS_PUTS("data_err=abort");
 -      if (DUMMY_ENCRYPTION_ENABLED(sbi))
 -              SEQ_OPTS_PUTS("test_dummy_encryption");
 +
 +      fscrypt_show_test_dummy_encryption(seq, sep, sb);
  
        ext4_show_quota_options(seq, sb);
        return 0;
@@@ -4819,7 -4780,6 +4819,7 @@@ failed_mount
        for (i = 0; i < EXT4_MAXQUOTAS; i++)
                kfree(get_qf_name(sb, sbi, i));
  #endif
 +      fscrypt_free_dummy_context(&sbi->s_dummy_enc_ctx);
        ext4_blkdev_remove(sbi);
        brelse(bh);
  out_fail:
@@@ -5296,7 -5256,7 +5296,7 @@@ static int ext4_sync_fs(struct super_bl
                needs_barrier = true;
        if (needs_barrier) {
                int err;
-               err = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL);
+               err = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL);
                if (!ret)
                        ret = err;
        }
diff --combined fs/fs-writeback.c
index c5bdf46e3b4bc743002e5261a6182d78193e7c6f,d85323607b49f3e95c9a3d673c7494612fc30cb6..a750381d554a788b9e359c6cd64bfdd30d2801fd
@@@ -1070,6 -1070,7 +1070,6 @@@ static void bdi_split_work_to_wbs(struc
  static unsigned long get_nr_dirty_pages(void)
  {
        return global_node_page_state(NR_FILE_DIRTY) +
 -              global_node_page_state(NR_UNSTABLE_NFS) +
                get_nr_dirty_inodes();
  }
  
@@@ -2319,7 -2320,7 +2319,7 @@@ void __mark_inode_dirty(struct inode *i
  
                        WARN(bdi_cap_writeback_dirty(wb->bdi) &&
                             !test_bit(WB_registered, &wb->state),
-                            "bdi-%s not registered\n", wb->bdi->name);
+                            "bdi-%s not registered\n", bdi_dev_name(wb->bdi));
  
                        inode->dirtied_when = jiffies;
                        if (dirtytime)
diff --combined fs/isofs/inode.c
index 95b1f377ad090868c5b633937f3776bd62285cdd,276107cdaaf13878995cf802c9e1515b34afaef5..d634561f871a569847c3c6d4ed6159142c6e889d
@@@ -544,43 -544,41 +544,41 @@@ static int isofs_show_options(struct se
  
  static unsigned int isofs_get_last_session(struct super_block *sb, s32 session)
  {
-       struct cdrom_multisession ms_info;
-       unsigned int vol_desc_start;
-       struct block_device *bdev = sb->s_bdev;
-       int i;
+       struct cdrom_device_info *cdi = disk_to_cdi(sb->s_bdev->bd_disk);
+       unsigned int vol_desc_start = 0;
  
-       vol_desc_start=0;
-       ms_info.addr_format=CDROM_LBA;
        if (session > 0) {
-               struct cdrom_tocentry Te;
-               Te.cdte_track=session;
-               Te.cdte_format=CDROM_LBA;
-               i = ioctl_by_bdev(bdev, CDROMREADTOCENTRY, (unsigned long) &Te);
-               if (!i) {
+               struct cdrom_tocentry te;
+               if (!cdi)
+                       return 0;
+               te.cdte_track = session;
+               te.cdte_format = CDROM_LBA;
+               if (cdrom_read_tocentry(cdi, &te) == 0) {
                        printk(KERN_DEBUG "ISOFS: Session %d start %d type %d\n",
-                               session, Te.cdte_addr.lba,
-                               Te.cdte_ctrl&CDROM_DATA_TRACK);
-                       if ((Te.cdte_ctrl&CDROM_DATA_TRACK) == 4)
-                               return Te.cdte_addr.lba;
+                               session, te.cdte_addr.lba,
+                               te.cdte_ctrl & CDROM_DATA_TRACK);
+                       if ((te.cdte_ctrl & CDROM_DATA_TRACK) == 4)
+                               return te.cdte_addr.lba;
                }
  
                printk(KERN_ERR "ISOFS: Invalid session number or type of track\n");
        }
-       i = ioctl_by_bdev(bdev, CDROMMULTISESSION, (unsigned long) &ms_info);
-       if (session > 0)
-               printk(KERN_ERR "ISOFS: Invalid session number\n");
- #if 0
-       printk(KERN_DEBUG "isofs.inode: CDROMMULTISESSION: rc=%d\n",i);
-       if (i==0) {
-               printk(KERN_DEBUG "isofs.inode: XA disk: %s\n",ms_info.xa_flag?"yes":"no");
-               printk(KERN_DEBUG "isofs.inode: vol_desc_start = %d\n", ms_info.addr.lba);
-       }
- #endif
-       if (i==0)
+       if (cdi) {
+               struct cdrom_multisession ms_info;
+               ms_info.addr_format = CDROM_LBA;
+               if (cdrom_multisession(cdi, &ms_info) == 0) {
  #if WE_OBEY_THE_WRITTEN_STANDARDS
-               if (ms_info.xa_flag) /* necessary for a valid ms_info.addr */
+                       /* necessary for a valid ms_info.addr */
+                       if (ms_info.xa_flag)
  #endif
-                       vol_desc_start=ms_info.addr.lba;
+                               vol_desc_start = ms_info.addr.lba;
+               }
+       }
        return vol_desc_start;
  }
  
@@@ -1185,9 -1183,10 +1183,9 @@@ static int isofs_readpage(struct file *
        return mpage_readpage(page, isofs_get_block);
  }
  
 -static int isofs_readpages(struct file *file, struct address_space *mapping,
 -                      struct list_head *pages, unsigned nr_pages)
 +static void isofs_readahead(struct readahead_control *rac)
  {
 -      return mpage_readpages(mapping, pages, nr_pages, isofs_get_block);
 +      mpage_readahead(rac, isofs_get_block);
  }
  
  static sector_t _isofs_bmap(struct address_space *mapping, sector_t block)
  
  static const struct address_space_operations isofs_aops = {
        .readpage = isofs_readpage,
 -      .readpages = isofs_readpages,
 +      .readahead = isofs_readahead,
        .bmap = _isofs_bmap
  };
  
diff --combined fs/super.c
index a288cd60d2aed3f58f442a2a768c8ac785c015ec,4991f441988e079ae58a4bea2fa53e164d4b9182..bf3b7685b52a9d8e3d962880cecad989e636e736
@@@ -1302,8 -1302,8 +1302,8 @@@ int get_tree_bdev(struct fs_context *fc
        mutex_lock(&bdev->bd_fsfreeze_mutex);
        if (bdev->bd_fsfreeze_count > 0) {
                mutex_unlock(&bdev->bd_fsfreeze_mutex);
 -              blkdev_put(bdev, mode);
                warnf(fc, "%pg: Can't mount, blockdev is frozen", bdev);
 +              blkdev_put(bdev, mode);
                return -EBUSY;
        }
  
@@@ -1598,12 -1598,10 +1598,10 @@@ int super_setup_bdi_name(struct super_b
        int err;
        va_list args;
  
-       bdi = bdi_alloc(GFP_KERNEL);
+       bdi = bdi_alloc(NUMA_NO_NODE);
        if (!bdi)
                return -ENOMEM;
  
-       bdi->name = sb->s_type->name;
        va_start(args, fmt);
        err = bdi_register_va(bdi, fmt, args);
        va_end(args);
diff --combined fs/zonefs/super.c
index dba874a61fc5c3859b0b29f6f92f3643d0bdccf6,25afcf55aa41e8d468da688caee9524f82b8bc5f..d79b821ed1c780e2f2bee74b6a2cab73dddae2fb
@@@ -20,6 -20,7 +20,7 @@@
  #include <linux/mman.h>
  #include <linux/sched/mm.h>
  #include <linux/crc32.h>
+ #include <linux/task_io_accounting_ops.h>
  
  #include "zonefs.h"
  
@@@ -78,9 -79,10 +79,9 @@@ static int zonefs_readpage(struct file 
        return iomap_readpage(page, &zonefs_iomap_ops);
  }
  
 -static int zonefs_readpages(struct file *unused, struct address_space *mapping,
 -                          struct list_head *pages, unsigned int nr_pages)
 +static void zonefs_readahead(struct readahead_control *rac)
  {
 -      return iomap_readpages(mapping, pages, nr_pages, &zonefs_iomap_ops);
 +      iomap_readahead(rac, &zonefs_iomap_ops);
  }
  
  /*
@@@ -127,7 -129,7 +128,7 @@@ static int zonefs_writepages(struct add
  
  static const struct address_space_operations zonefs_file_aops = {
        .readpage               = zonefs_readpage,
 -      .readpages              = zonefs_readpages,
 +      .readahead              = zonefs_readahead,
        .writepage              = zonefs_writepage,
        .writepages             = zonefs_writepages,
        .set_page_dirty         = iomap_set_page_dirty,
@@@ -477,7 -479,7 +478,7 @@@ static int zonefs_file_fsync(struct fil
        if (ZONEFS_I(inode)->i_ztype == ZONEFS_ZTYPE_CNV)
                ret = file_write_and_wait_range(file, start, end);
        if (!ret)
-               ret = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
+               ret = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL);
  
        if (ret)
                zonefs_io_error(inode, true);
@@@ -595,6 -597,61 +596,61 @@@ static const struct iomap_dio_ops zonef
        .end_io                 = zonefs_file_write_dio_end_io,
  };
  
+ static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from)
+ {
+       struct inode *inode = file_inode(iocb->ki_filp);
+       struct zonefs_inode_info *zi = ZONEFS_I(inode);
+       struct block_device *bdev = inode->i_sb->s_bdev;
+       unsigned int max;
+       struct bio *bio;
+       ssize_t size;
+       int nr_pages;
+       ssize_t ret;
+       nr_pages = iov_iter_npages(from, BIO_MAX_PAGES);
+       if (!nr_pages)
+               return 0;
+       max = queue_max_zone_append_sectors(bdev_get_queue(bdev));
+       max = ALIGN_DOWN(max << SECTOR_SHIFT, inode->i_sb->s_blocksize);
+       iov_iter_truncate(from, max);
+       bio = bio_alloc_bioset(GFP_NOFS, nr_pages, &fs_bio_set);
+       if (!bio)
+               return -ENOMEM;
+       bio_set_dev(bio, bdev);
+       bio->bi_iter.bi_sector = zi->i_zsector;
+       bio->bi_write_hint = iocb->ki_hint;
+       bio->bi_ioprio = iocb->ki_ioprio;
+       bio->bi_opf = REQ_OP_ZONE_APPEND | REQ_SYNC | REQ_IDLE;
+       if (iocb->ki_flags & IOCB_DSYNC)
+               bio->bi_opf |= REQ_FUA;
+       ret = bio_iov_iter_get_pages(bio, from);
+       if (unlikely(ret)) {
+               bio_io_error(bio);
+               return ret;
+       }
+       size = bio->bi_iter.bi_size;
+       task_io_account_write(ret);
+       if (iocb->ki_flags & IOCB_HIPRI)
+               bio_set_polled(bio, iocb);
+       ret = submit_bio_wait(bio);
+       bio_put(bio);
+       zonefs_file_write_dio_end_io(iocb, size, ret, 0);
+       if (ret >= 0) {
+               iocb->ki_pos += size;
+               return size;
+       }
+       return ret;
+ }
  /*
   * Handle direct writes. For sequential zone files, this is the only possible
   * write path. For these files, check that the user is issuing writes
@@@ -610,6 -667,8 +666,8 @@@ static ssize_t zonefs_file_dio_write(st
        struct inode *inode = file_inode(iocb->ki_filp);
        struct zonefs_inode_info *zi = ZONEFS_I(inode);
        struct super_block *sb = inode->i_sb;
+       bool sync = is_sync_kiocb(iocb);
+       bool append = false;
        size_t count;
        ssize_t ret;
  
         * as this can cause write reordering (e.g. the first aio gets EAGAIN
         * on the inode lock but the second goes through but is now unaligned).
         */
-       if (zi->i_ztype == ZONEFS_ZTYPE_SEQ && !is_sync_kiocb(iocb) &&
+       if (zi->i_ztype == ZONEFS_ZTYPE_SEQ && !sync &&
            (iocb->ki_flags & IOCB_NOWAIT))
                return -EOPNOTSUPP;
  
        }
  
        /* Enforce sequential writes (append only) in sequential zones */
-       mutex_lock(&zi->i_truncate_mutex);
-       if (zi->i_ztype == ZONEFS_ZTYPE_SEQ && iocb->ki_pos != zi->i_wpoffset) {
+       if (zi->i_ztype == ZONEFS_ZTYPE_SEQ) {
+               mutex_lock(&zi->i_truncate_mutex);
+               if (iocb->ki_pos != zi->i_wpoffset) {
+                       mutex_unlock(&zi->i_truncate_mutex);
+                       ret = -EINVAL;
+                       goto inode_unlock;
+               }
                mutex_unlock(&zi->i_truncate_mutex);
-               ret = -EINVAL;
-               goto inode_unlock;
+               append = sync;
        }
-       mutex_unlock(&zi->i_truncate_mutex);
  
-       ret = iomap_dio_rw(iocb, from, &zonefs_iomap_ops,
-                          &zonefs_write_dio_ops, is_sync_kiocb(iocb));
+       if (append)
+               ret = zonefs_file_dio_append(iocb, from);
+       else
+               ret = iomap_dio_rw(iocb, from, &zonefs_iomap_ops,
+                                  &zonefs_write_dio_ops, sync);
        if (zi->i_ztype == ZONEFS_ZTYPE_SEQ &&
            (ret > 0 || ret == -EIOCBQUEUED)) {
                if (ret > 0)
diff --combined include/linux/fs.h
index ef6acd2062eb272cbdf963aa136e401f82a9021c,1a95e5158811334332f6c25499c40d727ebb507e..4fdd148dd7637eeee6ff1fd3a47276ebb9b49778
@@@ -292,7 -292,6 +292,7 @@@ enum positive_aop_returns 
  struct page;
  struct address_space;
  struct writeback_control;
 +struct readahead_control;
  
  /*
   * Write life time hint values.
@@@ -376,7 -375,6 +376,7 @@@ struct address_space_operations 
         */
        int (*readpages)(struct file *filp, struct address_space *mapping,
                        struct list_head *pages, unsigned nr_pages);
 +      void (*readahead)(struct readahead_control *);
  
        int (*write_begin)(struct file *, struct address_space *mapping,
                                loff_t pos, unsigned len, unsigned flags,
@@@ -978,7 -976,6 +978,7 @@@ struct file 
  #endif /* #ifdef CONFIG_EPOLL */
        struct address_space    *f_mapping;
        errseq_t                f_wb_err;
 +      errseq_t                f_sb_err; /* for syncfs */
  } __randomize_layout
    __attribute__((aligned(4)));        /* lest something weird decides that 2 is OK */
  
@@@ -986,7 -983,7 +986,7 @@@ struct file_handle 
        __u32 handle_bytes;
        int handle_type;
        /* file identifier */
 -      unsigned char f_handle[0];
 +      unsigned char f_handle[];
  };
  
  static inline struct file *get_file(struct file *f)
@@@ -1523,9 -1520,6 +1523,9 @@@ struct super_block 
        /* Being remounted read-only */
        int s_readonly_remount;
  
 +      /* per-sb errseq_t for reporting writeback errors via syncfs */
 +      errseq_t s_wb_err;
 +
        /* AIO completions deferred from interrupt context */
        struct workqueue_struct *s_dio_done_wq;
        struct hlist_head s_pins;
@@@ -1727,11 -1721,7 +1727,11 @@@ extern int vfs_link(struct dentry *, st
  extern int vfs_rmdir(struct inode *, struct dentry *);
  extern int vfs_unlink(struct inode *, struct dentry *, struct inode **);
  extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **, unsigned int);
 -extern int vfs_whiteout(struct inode *, struct dentry *);
 +
 +static inline int vfs_whiteout(struct inode *dir, struct dentry *dentry)
 +{
 +      return vfs_mknod(dir, dentry, S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV);
 +}
  
  extern struct dentry *vfs_tmpfile(struct dentry *dentry, umode_t mode,
                                  int open_flag);
@@@ -2591,7 -2581,6 +2591,6 @@@ extern struct kmem_cache *names_cachep
  #ifdef CONFIG_BLOCK
  extern int register_blkdev(unsigned int, const char *);
  extern void unregister_blkdev(unsigned int, const char *);
- extern void bdev_unhash_inode(dev_t dev);
  extern struct block_device *bdget(dev_t);
  extern struct block_device *bdgrab(struct block_device *bdev);
  extern void bd_set_size(struct block_device *, loff_t size);
@@@ -2733,7 -2722,6 +2732,6 @@@ extern bool is_bad_inode(struct inode *
  extern int revalidate_disk(struct gendisk *);
  extern int check_disk_change(struct block_device *);
  extern int __invalidate_device(struct block_device *, bool);
- extern int invalidate_partition(struct gendisk *, int);
  #endif
  unsigned long invalidate_mapping_pages(struct address_space *mapping,
                                        pgoff_t start, pgoff_t end);
@@@ -2837,18 -2825,6 +2835,18 @@@ static inline errseq_t filemap_sample_w
        return errseq_sample(&mapping->wb_err);
  }
  
 +/**
 + * file_sample_sb_err - sample the current errseq_t to test for later errors
 + * @mapping: mapping to be sampled
 + *
 + * Grab the most current superblock-level errseq_t value for the given
 + * struct file.
 + */
 +static inline errseq_t file_sample_sb_err(struct file *file)
 +{
 +      return errseq_sample(&file->f_path.dentry->d_sb->s_wb_err);
 +}
 +
  static inline int filemap_nr_thps(struct address_space *mapping)
  {
  #ifdef CONFIG_READ_ONLY_THP_FOR_FS
This page took 0.203681 seconds and 4 git commands to generate.