]> Git Repo - linux.git/commitdiff
nvme-multipath: avoid hang on inaccessible namespaces
authorHannes Reinecke <[email protected]>
Sat, 14 Sep 2024 12:01:23 +0000 (14:01 +0200)
committerKeith Busch <[email protected]>
Wed, 25 Sep 2024 06:30:28 +0000 (23:30 -0700)
During repetitive namespace remapping operations on the target the
namespace might have changed between the time the initial scan
was performed, and partition scan was invoked by device_add_disk()
in nvme_mpath_set_live(). We then end up with a stuck scanning process:

[<0>] folio_wait_bit_common+0x12a/0x310
[<0>] filemap_read_folio+0x97/0xd0
[<0>] do_read_cache_folio+0x108/0x390
[<0>] read_part_sector+0x31/0xa0
[<0>] read_lba+0xc5/0x160
[<0>] efi_partition+0xd9/0x8f0
[<0>] bdev_disk_changed+0x23d/0x6d0
[<0>] blkdev_get_whole+0x78/0xc0
[<0>] bdev_open+0x2c6/0x3b0
[<0>] bdev_file_open_by_dev+0xcb/0x120
[<0>] disk_scan_partitions+0x5d/0x100
[<0>] device_add_disk+0x402/0x420
[<0>] nvme_mpath_set_live+0x4f/0x1f0 [nvme_core]
[<0>] nvme_mpath_add_disk+0x107/0x120 [nvme_core]
[<0>] nvme_alloc_ns+0xac6/0xe60 [nvme_core]
[<0>] nvme_scan_ns+0x2dd/0x3e0 [nvme_core]
[<0>] nvme_scan_work+0x1a3/0x490 [nvme_core]

This happens when we have several paths, some of which are inaccessible,
and the active paths are removed first. Then nvme_find_path() will requeue
I/O in the ns_head (as paths are present), but the requeue list is never
triggered as all remaining paths are inactive.

This patch checks for NVME_NSHEAD_DISK_LIVE in nvme_available_path(),
and requeue I/O after NVME_NSHEAD_DISK_LIVE has been cleared once
the last path has been removed to properly terminate pending I/O.

Signed-off-by: Hannes Reinecke <[email protected]>
Reviewed-by: Sagi Grimberg <[email protected]>
Signed-off-by: Keith Busch <[email protected]>
drivers/nvme/host/multipath.c

index 6d97058cde7a11fbe4d9d863b948f24ae83f7a85..48e7a8906d01211e44cd90bfc33e56fbb7c7b972 100644 (file)
@@ -421,6 +421,9 @@ static bool nvme_available_path(struct nvme_ns_head *head)
 {
        struct nvme_ns *ns;
 
+       if (!test_bit(NVME_NSHEAD_DISK_LIVE, &head->flags))
+               return NULL;
+
        list_for_each_entry_rcu(ns, &head->list, siblings) {
                if (test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ns->ctrl->flags))
                        continue;
@@ -969,11 +972,16 @@ void nvme_mpath_shutdown_disk(struct nvme_ns_head *head)
 {
        if (!head->disk)
                return;
-       kblockd_schedule_work(&head->requeue_work);
-       if (test_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) {
+       if (test_and_clear_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) {
                nvme_cdev_del(&head->cdev, &head->cdev_device);
                del_gendisk(head->disk);
        }
+       /*
+        * requeue I/O after NVME_NSHEAD_DISK_LIVE has been cleared
+        * to allow multipath to fail all I/O.
+        */
+       synchronize_srcu(&head->srcu);
+       kblockd_schedule_work(&head->requeue_work);
 }
 
 void nvme_mpath_remove_disk(struct nvme_ns_head *head)
This page took 0.062844 seconds and 4 git commands to generate.