]> Git Repo - linux.git/blob - drivers/nvme/target/io-cmd-bdev.c
Merge tag 'cxl-for-6.0' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl
[linux.git] / drivers / nvme / target / io-cmd-bdev.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * NVMe I/O command implementation.
4  * Copyright (c) 2015-2016 HGST, a Western Digital Company.
5  */
6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7 #include <linux/blkdev.h>
8 #include <linux/blk-integrity.h>
9 #include <linux/memremap.h>
10 #include <linux/module.h>
11 #include "nvmet.h"
12
13 void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id)
14 {
15         const struct queue_limits *ql = &bdev_get_queue(bdev)->limits;
16         /* Number of logical blocks per physical block. */
17         const u32 lpp = ql->physical_block_size / ql->logical_block_size;
18         /* Logical blocks per physical block, 0's based. */
19         const __le16 lpp0b = to0based(lpp);
20
21         /*
22          * For NVMe 1.2 and later, bit 1 indicates that the fields NAWUN,
23          * NAWUPF, and NACWU are defined for this namespace and should be
24          * used by the host for this namespace instead of the AWUN, AWUPF,
25          * and ACWU fields in the Identify Controller data structure. If
26          * any of these fields are zero that means that the corresponding
27          * field from the identify controller data structure should be used.
28          */
29         id->nsfeat |= 1 << 1;
30         id->nawun = lpp0b;
31         id->nawupf = lpp0b;
32         id->nacwu = lpp0b;
33
34         /*
35          * Bit 4 indicates that the fields NPWG, NPWA, NPDG, NPDA, and
36          * NOWS are defined for this namespace and should be used by
37          * the host for I/O optimization.
38          */
39         id->nsfeat |= 1 << 4;
40         /* NPWG = Namespace Preferred Write Granularity. 0's based */
41         id->npwg = lpp0b;
42         /* NPWA = Namespace Preferred Write Alignment. 0's based */
43         id->npwa = id->npwg;
44         /* NPDG = Namespace Preferred Deallocate Granularity. 0's based */
45         id->npdg = to0based(ql->discard_granularity / ql->logical_block_size);
46         /* NPDG = Namespace Preferred Deallocate Alignment */
47         id->npda = id->npdg;
48         /* NOWS = Namespace Optimal Write Size */
49         id->nows = to0based(ql->io_opt / ql->logical_block_size);
50 }
51
52 void nvmet_bdev_ns_disable(struct nvmet_ns *ns)
53 {
54         if (ns->bdev) {
55                 blkdev_put(ns->bdev, FMODE_WRITE | FMODE_READ);
56                 ns->bdev = NULL;
57         }
58 }
59
60 static void nvmet_bdev_ns_enable_integrity(struct nvmet_ns *ns)
61 {
62         struct blk_integrity *bi = bdev_get_integrity(ns->bdev);
63
64         if (bi) {
65                 ns->metadata_size = bi->tuple_size;
66                 if (bi->profile == &t10_pi_type1_crc)
67                         ns->pi_type = NVME_NS_DPS_PI_TYPE1;
68                 else if (bi->profile == &t10_pi_type3_crc)
69                         ns->pi_type = NVME_NS_DPS_PI_TYPE3;
70                 else
71                         /* Unsupported metadata type */
72                         ns->metadata_size = 0;
73         }
74 }
75
76 int nvmet_bdev_ns_enable(struct nvmet_ns *ns)
77 {
78         int ret;
79
80         /*
81          * When buffered_io namespace attribute is enabled that means user want
82          * this block device to be used as a file, so block device can take
83          * an advantage of cache.
84          */
85         if (ns->buffered_io)
86                 return -ENOTBLK;
87
88         ns->bdev = blkdev_get_by_path(ns->device_path,
89                         FMODE_READ | FMODE_WRITE, NULL);
90         if (IS_ERR(ns->bdev)) {
91                 ret = PTR_ERR(ns->bdev);
92                 if (ret != -ENOTBLK) {
93                         pr_err("failed to open block device %s: (%ld)\n",
94                                         ns->device_path, PTR_ERR(ns->bdev));
95                 }
96                 ns->bdev = NULL;
97                 return ret;
98         }
99         ns->size = bdev_nr_bytes(ns->bdev);
100         ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev));
101
102         ns->pi_type = 0;
103         ns->metadata_size = 0;
104         if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY_T10))
105                 nvmet_bdev_ns_enable_integrity(ns);
106
107         if (bdev_is_zoned(ns->bdev)) {
108                 if (!nvmet_bdev_zns_enable(ns)) {
109                         nvmet_bdev_ns_disable(ns);
110                         return -EINVAL;
111                 }
112                 ns->csi = NVME_CSI_ZNS;
113         }
114
115         return 0;
116 }
117
118 void nvmet_bdev_ns_revalidate(struct nvmet_ns *ns)
119 {
120         ns->size = bdev_nr_bytes(ns->bdev);
121 }
122
123 u16 blk_to_nvme_status(struct nvmet_req *req, blk_status_t blk_sts)
124 {
125         u16 status = NVME_SC_SUCCESS;
126
127         if (likely(blk_sts == BLK_STS_OK))
128                 return status;
129         /*
130          * Right now there exists M : 1 mapping between block layer error
131          * to the NVMe status code (see nvme_error_status()). For consistency,
132          * when we reverse map we use most appropriate NVMe Status code from
133          * the group of the NVMe staus codes used in the nvme_error_status().
134          */
135         switch (blk_sts) {
136         case BLK_STS_NOSPC:
137                 status = NVME_SC_CAP_EXCEEDED | NVME_SC_DNR;
138                 req->error_loc = offsetof(struct nvme_rw_command, length);
139                 break;
140         case BLK_STS_TARGET:
141                 status = NVME_SC_LBA_RANGE | NVME_SC_DNR;
142                 req->error_loc = offsetof(struct nvme_rw_command, slba);
143                 break;
144         case BLK_STS_NOTSUPP:
145                 req->error_loc = offsetof(struct nvme_common_command, opcode);
146                 switch (req->cmd->common.opcode) {
147                 case nvme_cmd_dsm:
148                 case nvme_cmd_write_zeroes:
149                         status = NVME_SC_ONCS_NOT_SUPPORTED | NVME_SC_DNR;
150                         break;
151                 default:
152                         status = NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
153                 }
154                 break;
155         case BLK_STS_MEDIUM:
156                 status = NVME_SC_ACCESS_DENIED;
157                 req->error_loc = offsetof(struct nvme_rw_command, nsid);
158                 break;
159         case BLK_STS_IOERR:
160         default:
161                 status = NVME_SC_INTERNAL | NVME_SC_DNR;
162                 req->error_loc = offsetof(struct nvme_common_command, opcode);
163         }
164
165         switch (req->cmd->common.opcode) {
166         case nvme_cmd_read:
167         case nvme_cmd_write:
168                 req->error_slba = le64_to_cpu(req->cmd->rw.slba);
169                 break;
170         case nvme_cmd_write_zeroes:
171                 req->error_slba =
172                         le64_to_cpu(req->cmd->write_zeroes.slba);
173                 break;
174         default:
175                 req->error_slba = 0;
176         }
177         return status;
178 }
179
180 static void nvmet_bio_done(struct bio *bio)
181 {
182         struct nvmet_req *req = bio->bi_private;
183
184         nvmet_req_complete(req, blk_to_nvme_status(req, bio->bi_status));
185         nvmet_req_bio_put(req, bio);
186 }
187
188 #ifdef CONFIG_BLK_DEV_INTEGRITY
189 static int nvmet_bdev_alloc_bip(struct nvmet_req *req, struct bio *bio,
190                                 struct sg_mapping_iter *miter)
191 {
192         struct blk_integrity *bi;
193         struct bio_integrity_payload *bip;
194         int rc;
195         size_t resid, len;
196
197         bi = bdev_get_integrity(req->ns->bdev);
198         if (unlikely(!bi)) {
199                 pr_err("Unable to locate bio_integrity\n");
200                 return -ENODEV;
201         }
202
203         bip = bio_integrity_alloc(bio, GFP_NOIO,
204                                         bio_max_segs(req->metadata_sg_cnt));
205         if (IS_ERR(bip)) {
206                 pr_err("Unable to allocate bio_integrity_payload\n");
207                 return PTR_ERR(bip);
208         }
209
210         bip->bip_iter.bi_size = bio_integrity_bytes(bi, bio_sectors(bio));
211         /* virtual start sector must be in integrity interval units */
212         bip_set_seed(bip, bio->bi_iter.bi_sector >>
213                      (bi->interval_exp - SECTOR_SHIFT));
214
215         resid = bip->bip_iter.bi_size;
216         while (resid > 0 && sg_miter_next(miter)) {
217                 len = min_t(size_t, miter->length, resid);
218                 rc = bio_integrity_add_page(bio, miter->page, len,
219                                             offset_in_page(miter->addr));
220                 if (unlikely(rc != len)) {
221                         pr_err("bio_integrity_add_page() failed; %d\n", rc);
222                         sg_miter_stop(miter);
223                         return -ENOMEM;
224                 }
225
226                 resid -= len;
227                 if (len < miter->length)
228                         miter->consumed -= miter->length - len;
229         }
230         sg_miter_stop(miter);
231
232         return 0;
233 }
234 #else
235 static int nvmet_bdev_alloc_bip(struct nvmet_req *req, struct bio *bio,
236                                 struct sg_mapping_iter *miter)
237 {
238         return -EINVAL;
239 }
240 #endif /* CONFIG_BLK_DEV_INTEGRITY */
241
242 static void nvmet_bdev_execute_rw(struct nvmet_req *req)
243 {
244         unsigned int sg_cnt = req->sg_cnt;
245         struct bio *bio;
246         struct scatterlist *sg;
247         struct blk_plug plug;
248         sector_t sector;
249         blk_opf_t opf;
250         int i, rc;
251         struct sg_mapping_iter prot_miter;
252         unsigned int iter_flags;
253         unsigned int total_len = nvmet_rw_data_len(req) + req->metadata_len;
254
255         if (!nvmet_check_transfer_len(req, total_len))
256                 return;
257
258         if (!req->sg_cnt) {
259                 nvmet_req_complete(req, 0);
260                 return;
261         }
262
263         if (req->cmd->rw.opcode == nvme_cmd_write) {
264                 opf = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE;
265                 if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA))
266                         opf |= REQ_FUA;
267                 iter_flags = SG_MITER_TO_SG;
268         } else {
269                 opf = REQ_OP_READ;
270                 iter_flags = SG_MITER_FROM_SG;
271         }
272
273         if (is_pci_p2pdma_page(sg_page(req->sg)))
274                 opf |= REQ_NOMERGE;
275
276         sector = nvmet_lba_to_sect(req->ns, req->cmd->rw.slba);
277
278         if (nvmet_use_inline_bvec(req)) {
279                 bio = &req->b.inline_bio;
280                 bio_init(bio, req->ns->bdev, req->inline_bvec,
281                          ARRAY_SIZE(req->inline_bvec), opf);
282         } else {
283                 bio = bio_alloc(req->ns->bdev, bio_max_segs(sg_cnt), opf,
284                                 GFP_KERNEL);
285         }
286         bio->bi_iter.bi_sector = sector;
287         bio->bi_private = req;
288         bio->bi_end_io = nvmet_bio_done;
289
290         blk_start_plug(&plug);
291         if (req->metadata_len)
292                 sg_miter_start(&prot_miter, req->metadata_sg,
293                                req->metadata_sg_cnt, iter_flags);
294
295         for_each_sg(req->sg, sg, req->sg_cnt, i) {
296                 while (bio_add_page(bio, sg_page(sg), sg->length, sg->offset)
297                                 != sg->length) {
298                         struct bio *prev = bio;
299
300                         if (req->metadata_len) {
301                                 rc = nvmet_bdev_alloc_bip(req, bio,
302                                                           &prot_miter);
303                                 if (unlikely(rc)) {
304                                         bio_io_error(bio);
305                                         return;
306                                 }
307                         }
308
309                         bio = bio_alloc(req->ns->bdev, bio_max_segs(sg_cnt),
310                                         opf, GFP_KERNEL);
311                         bio->bi_iter.bi_sector = sector;
312
313                         bio_chain(bio, prev);
314                         submit_bio(prev);
315                 }
316
317                 sector += sg->length >> 9;
318                 sg_cnt--;
319         }
320
321         if (req->metadata_len) {
322                 rc = nvmet_bdev_alloc_bip(req, bio, &prot_miter);
323                 if (unlikely(rc)) {
324                         bio_io_error(bio);
325                         return;
326                 }
327         }
328
329         submit_bio(bio);
330         blk_finish_plug(&plug);
331 }
332
333 static void nvmet_bdev_execute_flush(struct nvmet_req *req)
334 {
335         struct bio *bio = &req->b.inline_bio;
336
337         if (!nvmet_check_transfer_len(req, 0))
338                 return;
339
340         bio_init(bio, req->ns->bdev, req->inline_bvec,
341                  ARRAY_SIZE(req->inline_bvec), REQ_OP_WRITE | REQ_PREFLUSH);
342         bio->bi_private = req;
343         bio->bi_end_io = nvmet_bio_done;
344
345         submit_bio(bio);
346 }
347
348 u16 nvmet_bdev_flush(struct nvmet_req *req)
349 {
350         if (blkdev_issue_flush(req->ns->bdev))
351                 return NVME_SC_INTERNAL | NVME_SC_DNR;
352         return 0;
353 }
354
355 static u16 nvmet_bdev_discard_range(struct nvmet_req *req,
356                 struct nvme_dsm_range *range, struct bio **bio)
357 {
358         struct nvmet_ns *ns = req->ns;
359         int ret;
360
361         ret = __blkdev_issue_discard(ns->bdev,
362                         nvmet_lba_to_sect(ns, range->slba),
363                         le32_to_cpu(range->nlb) << (ns->blksize_shift - 9),
364                         GFP_KERNEL, bio);
365         if (ret && ret != -EOPNOTSUPP) {
366                 req->error_slba = le64_to_cpu(range->slba);
367                 return errno_to_nvme_status(req, ret);
368         }
369         return NVME_SC_SUCCESS;
370 }
371
372 static void nvmet_bdev_execute_discard(struct nvmet_req *req)
373 {
374         struct nvme_dsm_range range;
375         struct bio *bio = NULL;
376         int i;
377         u16 status;
378
379         for (i = 0; i <= le32_to_cpu(req->cmd->dsm.nr); i++) {
380                 status = nvmet_copy_from_sgl(req, i * sizeof(range), &range,
381                                 sizeof(range));
382                 if (status)
383                         break;
384
385                 status = nvmet_bdev_discard_range(req, &range, &bio);
386                 if (status)
387                         break;
388         }
389
390         if (bio) {
391                 bio->bi_private = req;
392                 bio->bi_end_io = nvmet_bio_done;
393                 if (status)
394                         bio_io_error(bio);
395                 else
396                         submit_bio(bio);
397         } else {
398                 nvmet_req_complete(req, status);
399         }
400 }
401
402 static void nvmet_bdev_execute_dsm(struct nvmet_req *req)
403 {
404         if (!nvmet_check_data_len_lte(req, nvmet_dsm_len(req)))
405                 return;
406
407         switch (le32_to_cpu(req->cmd->dsm.attributes)) {
408         case NVME_DSMGMT_AD:
409                 nvmet_bdev_execute_discard(req);
410                 return;
411         case NVME_DSMGMT_IDR:
412         case NVME_DSMGMT_IDW:
413         default:
414                 /* Not supported yet */
415                 nvmet_req_complete(req, 0);
416                 return;
417         }
418 }
419
420 static void nvmet_bdev_execute_write_zeroes(struct nvmet_req *req)
421 {
422         struct nvme_write_zeroes_cmd *write_zeroes = &req->cmd->write_zeroes;
423         struct bio *bio = NULL;
424         sector_t sector;
425         sector_t nr_sector;
426         int ret;
427
428         if (!nvmet_check_transfer_len(req, 0))
429                 return;
430
431         sector = nvmet_lba_to_sect(req->ns, write_zeroes->slba);
432         nr_sector = (((sector_t)le16_to_cpu(write_zeroes->length) + 1) <<
433                 (req->ns->blksize_shift - 9));
434
435         ret = __blkdev_issue_zeroout(req->ns->bdev, sector, nr_sector,
436                         GFP_KERNEL, &bio, 0);
437         if (bio) {
438                 bio->bi_private = req;
439                 bio->bi_end_io = nvmet_bio_done;
440                 submit_bio(bio);
441         } else {
442                 nvmet_req_complete(req, errno_to_nvme_status(req, ret));
443         }
444 }
445
446 u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req)
447 {
448         switch (req->cmd->common.opcode) {
449         case nvme_cmd_read:
450         case nvme_cmd_write:
451                 req->execute = nvmet_bdev_execute_rw;
452                 if (req->sq->ctrl->pi_support && nvmet_ns_has_pi(req->ns))
453                         req->metadata_len = nvmet_rw_metadata_len(req);
454                 return 0;
455         case nvme_cmd_flush:
456                 req->execute = nvmet_bdev_execute_flush;
457                 return 0;
458         case nvme_cmd_dsm:
459                 req->execute = nvmet_bdev_execute_dsm;
460                 return 0;
461         case nvme_cmd_write_zeroes:
462                 req->execute = nvmet_bdev_execute_write_zeroes;
463                 return 0;
464         default:
465                 return nvmet_report_invalid_opcode(req);
466         }
467 }
This page took 0.060606 seconds and 4 git commands to generate.