]> Git Repo - linux.git/commitdiff
Merge tag 'nvme-6.13-2024-12-05' of git://git.infradead.org/nvme into block-6.13
authorJens Axboe <[email protected]>
Thu, 5 Dec 2024 17:14:36 +0000 (10:14 -0700)
committerJens Axboe <[email protected]>
Thu, 5 Dec 2024 17:14:36 +0000 (10:14 -0700)
Pull NVMe fixess from Keith:

"nvme fixes for Linux 6.13

 - Target fix using incorrect zero buffer (Nilay)
 - Device specifc deallocate quirk fixes (Christoph, Keith)
 - Fabrics fix for handling max command target bugs (Maurizio)
 - Cocci fix usage for kzalloc (Yu-Chen)
 - DMA size fix for host memory buffer feature (Christoph)
 - Fabrics queue cleanup fixes (Chunguang)"

* tag 'nvme-6.13-2024-12-05' of git://git.infradead.org/nvme:
  nvme-tcp: simplify nvme_tcp_teardown_io_queues()
  nvme-tcp: no need to quiesce admin_q in nvme_tcp_teardown_io_queues()
  nvme-rdma: unquiesce admin_q before destroy it
  nvme-tcp: fix the memleak while create new ctrl failed
  nvme-pci: don't use dma_alloc_noncontiguous with 0 merge boundary
  nvmet: replace kmalloc + memset with kzalloc for data allocation
  nvme-fabrics: handle zero MAXCMD without closing the connection
  nvme-pci: remove two deallocate zeroes quirks
  nvme: don't apply NVME_QUIRK_DEALLOCATE_ZEROES when DSM is not supported
  nvmet: use kzalloc instead of ZERO_PAGE in nvme_execute_identify_ns_nvm()

1  2 
drivers/nvme/host/core.c

diff --combined drivers/nvme/host/core.c
index 52b01a15aad18b1adbe48ea75a00f91a3d0129f8,1e904a794a4f34cd73cdcdc701a82a603063bf29..d169a30eb935e034bfbb78d1a31751dbedf0e397
@@@ -93,17 -93,6 +93,17 @@@ module_param(apst_secondary_latency_tol
  MODULE_PARM_DESC(apst_secondary_latency_tol_us,
        "secondary APST latency tolerance in us");
  
 +/*
 + * Older kernels didn't enable protection information if it was at an offset.
 + * Newer kernels do, so it breaks reads on the upgrade if such formats were
 + * used in prior kernels since the metadata written did not contain a valid
 + * checksum.
 + */
 +static bool disable_pi_offsets = false;
 +module_param(disable_pi_offsets, bool, 0444);
 +MODULE_PARM_DESC(disable_pi_offsets,
 +      "disable protection information if it has an offset");
 +
  /*
   * nvme_wq - hosts nvme related works that are not reset or delete
   * nvme_reset_wq - hosts nvme reset works
@@@ -702,7 -691,7 +702,7 @@@ void nvme_put_ns(struct nvme_ns *ns
  {
        kref_put(&ns->kref, nvme_free_ns);
  }
 -EXPORT_SYMBOL_NS_GPL(nvme_put_ns, NVME_TARGET_PASSTHRU);
 +EXPORT_SYMBOL_NS_GPL(nvme_put_ns, "NVME_TARGET_PASSTHRU");
  
  static inline void nvme_clear_nvme_request(struct request *req)
  {
@@@ -1123,7 -1112,7 +1123,7 @@@ int nvme_execute_rq(struct request *rq
                return nvme_req(rq)->status;
        return blk_status_to_errno(status);
  }
 -EXPORT_SYMBOL_NS_GPL(nvme_execute_rq, NVME_TARGET_PASSTHRU);
 +EXPORT_SYMBOL_NS_GPL(nvme_execute_rq, "NVME_TARGET_PASSTHRU");
  
  /*
   * Returns 0 on success.  If the result is negative, it's a Linux error code;
@@@ -1203,7 -1192,7 +1203,7 @@@ u32 nvme_command_effects(struct nvme_ct
  
        return effects;
  }
 -EXPORT_SYMBOL_NS_GPL(nvme_command_effects, NVME_TARGET_PASSTHRU);
 +EXPORT_SYMBOL_NS_GPL(nvme_command_effects, "NVME_TARGET_PASSTHRU");
  
  u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode)
  {
        }
        return effects;
  }
 -EXPORT_SYMBOL_NS_GPL(nvme_passthru_start, NVME_TARGET_PASSTHRU);
 +EXPORT_SYMBOL_NS_GPL(nvme_passthru_start, "NVME_TARGET_PASSTHRU");
  
  void nvme_passthru_end(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u32 effects,
                       struct nvme_command *cmd, int status)
                break;
        }
  }
 -EXPORT_SYMBOL_NS_GPL(nvme_passthru_end, NVME_TARGET_PASSTHRU);
 +EXPORT_SYMBOL_NS_GPL(nvme_passthru_end, "NVME_TARGET_PASSTHRU");
  
  /*
   * Recommended frequency for KATO commands per NVMe 1.4 section 7.12.1:
@@@ -1406,30 -1395,17 +1406,30 @@@ static void nvme_update_keep_alive(stru
        nvme_start_keep_alive(ctrl);
  }
  
 -/*
 - * In NVMe 1.0 the CNS field was just a binary controller or namespace
 - * flag, thus sending any new CNS opcodes has a big chance of not working.
 - * Qemu unfortunately had that bug after reporting a 1.1 version compliance
 - * (but not for any later version).
 - */
 -static bool nvme_ctrl_limited_cns(struct nvme_ctrl *ctrl)
 +static bool nvme_id_cns_ok(struct nvme_ctrl *ctrl, u8 cns)
  {
 -      if (ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS)
 -              return ctrl->vs < NVME_VS(1, 2, 0);
 -      return ctrl->vs < NVME_VS(1, 1, 0);
 +      /*
 +       * The CNS field occupies a full byte starting with NVMe 1.2
 +       */
 +      if (ctrl->vs >= NVME_VS(1, 2, 0))
 +              return true;
 +
 +      /*
 +       * NVMe 1.1 expanded the CNS value to two bits, which means values
 +       * larger than that could get truncated and treated as an incorrect
 +       * value.
 +       *
 +       * Qemu implemented 1.0 behavior for controllers claiming 1.1
 +       * compliance, so they need to be quirked here.
 +       */
 +      if (ctrl->vs >= NVME_VS(1, 1, 0) &&
 +          !(ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS))
 +              return cns <= 3;
 +
 +      /*
 +       * NVMe 1.0 used a single bit for the CNS value.
 +       */
 +      return cns <= 1;
  }
  
  static int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
@@@ -1944,12 -1920,8 +1944,12 @@@ static void nvme_configure_metadata(str
  
        if (head->pi_size && head->ms >= head->pi_size)
                head->pi_type = id->dps & NVME_NS_DPS_PI_MASK;
 -      if (!(id->dps & NVME_NS_DPS_PI_FIRST))
 -              info->pi_offset = head->ms - head->pi_size;
 +      if (!(id->dps & NVME_NS_DPS_PI_FIRST)) {
 +              if (disable_pi_offsets)
 +                      head->pi_type = 0;
 +              else
 +                      info->pi_offset = head->ms - head->pi_size;
 +      }
  
        if (ctrl->ops->flags & NVME_F_FABRICS) {
                /*
@@@ -2071,7 -2043,8 +2071,8 @@@ static bool nvme_update_disk_info(struc
        lim->physical_block_size = min(phys_bs, atomic_bs);
        lim->io_min = phys_bs;
        lim->io_opt = io_opt;
-       if (ns->ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES)
+       if ((ns->ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES) &&
+           (ns->ctrl->oncs & NVME_CTRL_ONCS_DSM))
                lim->max_write_zeroes_sectors = UINT_MAX;
        else
                lim->max_write_zeroes_sectors = ns->ctrl->max_zeroes_sectors;
@@@ -3142,7 -3115,7 +3143,7 @@@ static int nvme_init_non_mdts_limits(st
                ctrl->max_zeroes_sectors = 0;
  
        if (ctrl->subsys->subtype != NVME_NQN_NVME ||
 -          nvme_ctrl_limited_cns(ctrl) ||
 +          !nvme_id_cns_ok(ctrl, NVME_ID_CNS_CS_CTRL) ||
            test_bit(NVME_CTRL_SKIP_ID_CNS_CS, &ctrl->flags))
                return 0;
  
@@@ -3260,8 -3233,9 +3261,9 @@@ static int nvme_check_ctrl_fabric_info(
        }
  
        if (!ctrl->maxcmd) {
-               dev_err(ctrl->device, "Maximum outstanding commands is 0\n");
-               return -EINVAL;
+               dev_warn(ctrl->device,
+                       "Firmware bug: maximum outstanding commands is 0\n");
+               ctrl->maxcmd = ctrl->sqsize + 1;
        }
  
        return 0;
@@@ -3806,8 -3780,7 +3808,8 @@@ struct nvme_ns *nvme_find_get_ns(struc
        int srcu_idx;
  
        srcu_idx = srcu_read_lock(&ctrl->srcu);
 -      list_for_each_entry_rcu(ns, &ctrl->namespaces, list) {
 +      list_for_each_entry_srcu(ns, &ctrl->namespaces, list,
 +                               srcu_read_lock_held(&ctrl->srcu)) {
                if (ns->head->ns_id == nsid) {
                        if (!nvme_get_ns(ns))
                                continue;
        srcu_read_unlock(&ctrl->srcu, srcu_idx);
        return ret;
  }
 -EXPORT_SYMBOL_NS_GPL(nvme_find_get_ns, NVME_TARGET_PASSTHRU);
 +EXPORT_SYMBOL_NS_GPL(nvme_find_get_ns, "NVME_TARGET_PASSTHRU");
  
  /*
   * Add the namespace to the controller list while keeping the list ordered.
@@@ -4241,7 -4214,7 +4243,7 @@@ static void nvme_scan_work(struct work_
        }
  
        mutex_lock(&ctrl->scan_lock);
 -      if (nvme_ctrl_limited_cns(ctrl)) {
 +      if (!nvme_id_cns_ok(ctrl, NVME_ID_CNS_NS_ACTIVE_LIST)) {
                nvme_scan_ns_sequential(ctrl);
        } else {
                /*
@@@ -4897,8 -4870,7 +4899,8 @@@ void nvme_mark_namespaces_dead(struct n
        int srcu_idx;
  
        srcu_idx = srcu_read_lock(&ctrl->srcu);
 -      list_for_each_entry_rcu(ns, &ctrl->namespaces, list)
 +      list_for_each_entry_srcu(ns, &ctrl->namespaces, list,
 +                               srcu_read_lock_held(&ctrl->srcu))
                blk_mark_disk_dead(ns->disk);
        srcu_read_unlock(&ctrl->srcu, srcu_idx);
  }
@@@ -4910,8 -4882,7 +4912,8 @@@ void nvme_unfreeze(struct nvme_ctrl *ct
        int srcu_idx;
  
        srcu_idx = srcu_read_lock(&ctrl->srcu);
 -      list_for_each_entry_rcu(ns, &ctrl->namespaces, list)
 +      list_for_each_entry_srcu(ns, &ctrl->namespaces, list,
 +                               srcu_read_lock_held(&ctrl->srcu))
                blk_mq_unfreeze_queue_non_owner(ns->queue);
        srcu_read_unlock(&ctrl->srcu, srcu_idx);
        clear_bit(NVME_CTRL_FROZEN, &ctrl->flags);
@@@ -4924,8 -4895,7 +4926,8 @@@ int nvme_wait_freeze_timeout(struct nvm
        int srcu_idx;
  
        srcu_idx = srcu_read_lock(&ctrl->srcu);
 -      list_for_each_entry_rcu(ns, &ctrl->namespaces, list) {
 +      list_for_each_entry_srcu(ns, &ctrl->namespaces, list,
 +                               srcu_read_lock_held(&ctrl->srcu)) {
                timeout = blk_mq_freeze_queue_wait_timeout(ns->queue, timeout);
                if (timeout <= 0)
                        break;
@@@ -4941,8 -4911,7 +4943,8 @@@ void nvme_wait_freeze(struct nvme_ctrl 
        int srcu_idx;
  
        srcu_idx = srcu_read_lock(&ctrl->srcu);
 -      list_for_each_entry_rcu(ns, &ctrl->namespaces, list)
 +      list_for_each_entry_srcu(ns, &ctrl->namespaces, list,
 +                               srcu_read_lock_held(&ctrl->srcu))
                blk_mq_freeze_queue_wait(ns->queue);
        srcu_read_unlock(&ctrl->srcu, srcu_idx);
  }
@@@ -4955,8 -4924,7 +4957,8 @@@ void nvme_start_freeze(struct nvme_ctr
  
        set_bit(NVME_CTRL_FROZEN, &ctrl->flags);
        srcu_idx = srcu_read_lock(&ctrl->srcu);
 -      list_for_each_entry_rcu(ns, &ctrl->namespaces, list)
 +      list_for_each_entry_srcu(ns, &ctrl->namespaces, list,
 +                               srcu_read_lock_held(&ctrl->srcu))
                /*
                 * Typical non_owner use case is from pci driver, in which
                 * start_freeze is called from timeout work function, but
@@@ -5009,8 -4977,7 +5011,8 @@@ void nvme_sync_io_queues(struct nvme_ct
        int srcu_idx;
  
        srcu_idx = srcu_read_lock(&ctrl->srcu);
 -      list_for_each_entry_rcu(ns, &ctrl->namespaces, list)
 +      list_for_each_entry_srcu(ns, &ctrl->namespaces, list,
 +                               srcu_read_lock_held(&ctrl->srcu))
                blk_sync_queue(ns->queue);
        srcu_read_unlock(&ctrl->srcu, srcu_idx);
  }
@@@ -5030,7 -4997,7 +5032,7 @@@ struct nvme_ctrl *nvme_ctrl_from_file(s
                return NULL;
        return file->private_data;
  }
 -EXPORT_SYMBOL_NS_GPL(nvme_ctrl_from_file, NVME_TARGET_PASSTHRU);
 +EXPORT_SYMBOL_NS_GPL(nvme_ctrl_from_file, "NVME_TARGET_PASSTHRU");
  
  /*
   * Check we didn't inadvertently grow the command structure sizes:
This page took 0.105524 seconds and 4 git commands to generate.