]>
Commit | Line | Data |
---|---|---|
83d290c5 | 1 | // SPDX-License-Identifier: GPL-2.0+ |
982388ea ZZ |
2 | /* |
3 | * Copyright (C) 2017 NXP Semiconductors | |
4 | * Copyright (C) 2017 Bin Meng <[email protected]> | |
982388ea ZZ |
5 | */ |
6 | ||
e6f6f9e6 | 7 | #include <blk.h> |
758c706c | 8 | #include <bootdev.h> |
1eb69ae4 | 9 | #include <cpu_func.h> |
982388ea ZZ |
10 | #include <dm.h> |
11 | #include <errno.h> | |
f7ae49fc | 12 | #include <log.h> |
336d4615 | 13 | #include <malloc.h> |
982388ea | 14 | #include <memalign.h> |
1045315d | 15 | #include <time.h> |
982388ea | 16 | #include <dm/device-internal.h> |
336d4615 | 17 | #include <linux/compat.h> |
982388ea ZZ |
18 | #include "nvme.h" |
19 | ||
982388ea ZZ |
20 | #define NVME_Q_DEPTH 2 |
21 | #define NVME_AQ_DEPTH 2 | |
22 | #define NVME_SQ_SIZE(depth) (depth * sizeof(struct nvme_command)) | |
23 | #define NVME_CQ_SIZE(depth) (depth * sizeof(struct nvme_completion)) | |
d0c04926 AP |
24 | #define NVME_CQ_ALLOCATION ALIGN(NVME_CQ_SIZE(NVME_Q_DEPTH), \ |
25 | ARCH_DMA_MINALIGN) | |
982388ea ZZ |
26 | #define ADMIN_TIMEOUT 60 |
27 | #define IO_TIMEOUT 30 | |
28 | #define MAX_PRP_POOL 512 | |
29 | ||
e9ac3a93 | 30 | static int nvme_wait_csts(struct nvme_dev *dev, u32 mask, u32 val) |
982388ea | 31 | { |
04d2a384 BM |
32 | int timeout; |
33 | ulong start; | |
982388ea | 34 | |
04d2a384 BM |
35 | /* Timeout field in the CAP register is in 500 millisecond units */ |
36 | timeout = NVME_CAP_TIMEOUT(dev->cap) * 500; | |
982388ea | 37 | |
04d2a384 BM |
38 | start = get_timer(0); |
39 | while (get_timer(start) < timeout) { | |
e9ac3a93 | 40 | if ((readl(&dev->bar->csts) & mask) == val) |
04d2a384 BM |
41 | return 0; |
42 | } | |
43 | ||
44 | return -ETIME; | |
982388ea ZZ |
45 | } |
46 | ||
47 | static int nvme_setup_prps(struct nvme_dev *dev, u64 *prp2, | |
48 | int total_len, u64 dma_addr) | |
49 | { | |
50 | u32 page_size = dev->page_size; | |
51 | int offset = dma_addr & (page_size - 1); | |
52 | u64 *prp_pool; | |
53 | int length = total_len; | |
54 | int i, nprps; | |
859b33c9 | 55 | u32 prps_per_page = page_size >> 3; |
b21dcebf AW |
56 | u32 num_pages; |
57 | ||
982388ea ZZ |
58 | length -= (page_size - offset); |
59 | ||
60 | if (length <= 0) { | |
61 | *prp2 = 0; | |
62 | return 0; | |
63 | } | |
64 | ||
65 | if (length) | |
66 | dma_addr += (page_size - offset); | |
67 | ||
68 | if (length <= page_size) { | |
69 | *prp2 = dma_addr; | |
70 | return 0; | |
71 | } | |
72 | ||
73 | nprps = DIV_ROUND_UP(length, page_size); | |
4ca8d95c | 74 | num_pages = DIV_ROUND_UP(nprps - 1, prps_per_page - 1); |
982388ea ZZ |
75 | |
76 | if (nprps > dev->prp_entry_num) { | |
77 | free(dev->prp_pool); | |
b21dcebf AW |
78 | /* |
79 | * Always increase in increments of pages. It doesn't waste | |
80 | * much memory and reduces the number of allocations. | |
81 | */ | |
82 | dev->prp_pool = memalign(page_size, num_pages * page_size); | |
982388ea ZZ |
83 | if (!dev->prp_pool) { |
84 | printf("Error: malloc prp_pool fail\n"); | |
85 | return -ENOMEM; | |
86 | } | |
4ca8d95c | 87 | dev->prp_entry_num = num_pages * (prps_per_page - 1) + 1; |
982388ea ZZ |
88 | } |
89 | ||
90 | prp_pool = dev->prp_pool; | |
91 | i = 0; | |
92 | while (nprps) { | |
4ca8d95c | 93 | if ((i == (prps_per_page - 1)) && nprps > 1) { |
982388ea ZZ |
94 | *(prp_pool + i) = cpu_to_le64((ulong)prp_pool + |
95 | page_size); | |
96 | i = 0; | |
97 | prp_pool += page_size; | |
98 | } | |
99 | *(prp_pool + i++) = cpu_to_le64(dma_addr); | |
100 | dma_addr += page_size; | |
101 | nprps--; | |
102 | } | |
103 | *prp2 = (ulong)dev->prp_pool; | |
104 | ||
8c403402 | 105 | flush_dcache_range((ulong)dev->prp_pool, (ulong)dev->prp_pool + |
4ca8d95c | 106 | num_pages * page_size); |
8c403402 | 107 | |
982388ea ZZ |
108 | return 0; |
109 | } | |
110 | ||
111 | static __le16 nvme_get_cmd_id(void) | |
112 | { | |
113 | static unsigned short cmdid; | |
114 | ||
115 | return cpu_to_le16((cmdid < USHRT_MAX) ? cmdid++ : 0); | |
116 | } | |
117 | ||
118 | static u16 nvme_read_completion_status(struct nvme_queue *nvmeq, u16 index) | |
119 | { | |
d0c04926 AP |
120 | /* |
121 | * Single CQ entries are always smaller than a cache line, so we | |
122 | * can't invalidate them individually. However CQ entries are | |
123 | * read only by the CPU, so it's safe to always invalidate all of them, | |
124 | * as the cache line should never become dirty. | |
125 | */ | |
126 | ulong start = (ulong)&nvmeq->cqes[0]; | |
127 | ulong stop = start + NVME_CQ_ALLOCATION; | |
982388ea ZZ |
128 | |
129 | invalidate_dcache_range(start, stop); | |
130 | ||
b12f6237 | 131 | return readw(&(nvmeq->cqes[index].status)); |
982388ea ZZ |
132 | } |
133 | ||
134 | /** | |
135 | * nvme_submit_cmd() - copy a command into a queue and ring the doorbell | |
136 | * | |
137 | * @nvmeq: The queue to use | |
138 | * @cmd: The command to send | |
139 | */ | |
140 | static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd) | |
141 | { | |
19d9dad3 | 142 | struct nvme_ops *ops; |
982388ea ZZ |
143 | u16 tail = nvmeq->sq_tail; |
144 | ||
145 | memcpy(&nvmeq->sq_cmds[tail], cmd, sizeof(*cmd)); | |
146 | flush_dcache_range((ulong)&nvmeq->sq_cmds[tail], | |
147 | (ulong)&nvmeq->sq_cmds[tail] + sizeof(*cmd)); | |
148 | ||
19d9dad3 MK |
149 | ops = (struct nvme_ops *)nvmeq->dev->udev->driver->ops; |
150 | if (ops && ops->submit_cmd) { | |
151 | ops->submit_cmd(nvmeq, cmd); | |
152 | return; | |
153 | } | |
154 | ||
982388ea ZZ |
155 | if (++tail == nvmeq->q_depth) |
156 | tail = 0; | |
157 | writel(tail, nvmeq->q_db); | |
158 | nvmeq->sq_tail = tail; | |
159 | } | |
160 | ||
161 | static int nvme_submit_sync_cmd(struct nvme_queue *nvmeq, | |
162 | struct nvme_command *cmd, | |
163 | u32 *result, unsigned timeout) | |
164 | { | |
19d9dad3 | 165 | struct nvme_ops *ops; |
982388ea ZZ |
166 | u16 head = nvmeq->cq_head; |
167 | u16 phase = nvmeq->cq_phase; | |
168 | u16 status; | |
169 | ulong start_time; | |
170 | ulong timeout_us = timeout * 100000; | |
171 | ||
172 | cmd->common.command_id = nvme_get_cmd_id(); | |
173 | nvme_submit_cmd(nvmeq, cmd); | |
174 | ||
175 | start_time = timer_get_us(); | |
176 | ||
177 | for (;;) { | |
178 | status = nvme_read_completion_status(nvmeq, head); | |
179 | if ((status & 0x01) == phase) | |
180 | break; | |
181 | if (timeout_us > 0 && (timer_get_us() - start_time) | |
182 | >= timeout_us) | |
183 | return -ETIMEDOUT; | |
184 | } | |
185 | ||
19d9dad3 MK |
186 | ops = (struct nvme_ops *)nvmeq->dev->udev->driver->ops; |
187 | if (ops && ops->complete_cmd) | |
188 | ops->complete_cmd(nvmeq, cmd); | |
189 | ||
982388ea ZZ |
190 | status >>= 1; |
191 | if (status) { | |
192 | printf("ERROR: status = %x, phase = %d, head = %d\n", | |
193 | status, phase, head); | |
194 | status = 0; | |
195 | if (++head == nvmeq->q_depth) { | |
196 | head = 0; | |
197 | phase = !phase; | |
198 | } | |
199 | writel(head, nvmeq->q_db + nvmeq->dev->db_stride); | |
200 | nvmeq->cq_head = head; | |
201 | nvmeq->cq_phase = phase; | |
202 | ||
203 | return -EIO; | |
204 | } | |
205 | ||
206 | if (result) | |
b12f6237 | 207 | *result = readl(&(nvmeq->cqes[head].result)); |
982388ea ZZ |
208 | |
209 | if (++head == nvmeq->q_depth) { | |
210 | head = 0; | |
211 | phase = !phase; | |
212 | } | |
213 | writel(head, nvmeq->q_db + nvmeq->dev->db_stride); | |
214 | nvmeq->cq_head = head; | |
215 | nvmeq->cq_phase = phase; | |
216 | ||
217 | return status; | |
218 | } | |
219 | ||
220 | static int nvme_submit_admin_cmd(struct nvme_dev *dev, struct nvme_command *cmd, | |
221 | u32 *result) | |
222 | { | |
722e668d BM |
223 | return nvme_submit_sync_cmd(dev->queues[NVME_ADMIN_Q], cmd, |
224 | result, ADMIN_TIMEOUT); | |
982388ea ZZ |
225 | } |
226 | ||
227 | static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, | |
228 | int qid, int depth) | |
229 | { | |
19d9dad3 | 230 | struct nvme_ops *ops; |
982388ea ZZ |
231 | struct nvme_queue *nvmeq = malloc(sizeof(*nvmeq)); |
232 | if (!nvmeq) | |
233 | return NULL; | |
234 | memset(nvmeq, 0, sizeof(*nvmeq)); | |
235 | ||
d0c04926 | 236 | nvmeq->cqes = (void *)memalign(4096, NVME_CQ_ALLOCATION); |
982388ea ZZ |
237 | if (!nvmeq->cqes) |
238 | goto free_nvmeq; | |
239 | memset((void *)nvmeq->cqes, 0, NVME_CQ_SIZE(depth)); | |
240 | ||
241 | nvmeq->sq_cmds = (void *)memalign(4096, NVME_SQ_SIZE(depth)); | |
242 | if (!nvmeq->sq_cmds) | |
243 | goto free_queue; | |
244 | memset((void *)nvmeq->sq_cmds, 0, NVME_SQ_SIZE(depth)); | |
245 | ||
246 | nvmeq->dev = dev; | |
247 | ||
248 | nvmeq->cq_head = 0; | |
249 | nvmeq->cq_phase = 1; | |
250 | nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride]; | |
251 | nvmeq->q_depth = depth; | |
252 | nvmeq->qid = qid; | |
253 | dev->queue_count++; | |
254 | dev->queues[qid] = nvmeq; | |
255 | ||
19d9dad3 MK |
256 | ops = (struct nvme_ops *)dev->udev->driver->ops; |
257 | if (ops && ops->setup_queue) | |
258 | ops->setup_queue(nvmeq); | |
259 | ||
982388ea ZZ |
260 | return nvmeq; |
261 | ||
262 | free_queue: | |
263 | free((void *)nvmeq->cqes); | |
264 | free_nvmeq: | |
265 | free(nvmeq); | |
266 | ||
267 | return NULL; | |
268 | } | |
269 | ||
270 | static int nvme_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id) | |
271 | { | |
272 | struct nvme_command c; | |
273 | ||
274 | memset(&c, 0, sizeof(c)); | |
275 | c.delete_queue.opcode = opcode; | |
276 | c.delete_queue.qid = cpu_to_le16(id); | |
277 | ||
278 | return nvme_submit_admin_cmd(dev, &c, NULL); | |
279 | } | |
280 | ||
281 | static int nvme_delete_sq(struct nvme_dev *dev, u16 sqid) | |
282 | { | |
283 | return nvme_delete_queue(dev, nvme_admin_delete_sq, sqid); | |
284 | } | |
285 | ||
286 | static int nvme_delete_cq(struct nvme_dev *dev, u16 cqid) | |
287 | { | |
288 | return nvme_delete_queue(dev, nvme_admin_delete_cq, cqid); | |
289 | } | |
290 | ||
291 | static int nvme_enable_ctrl(struct nvme_dev *dev) | |
292 | { | |
293 | dev->ctrl_config &= ~NVME_CC_SHN_MASK; | |
294 | dev->ctrl_config |= NVME_CC_ENABLE; | |
b12f6237 | 295 | writel(dev->ctrl_config, &dev->bar->cc); |
982388ea | 296 | |
e9ac3a93 | 297 | return nvme_wait_csts(dev, NVME_CSTS_RDY, NVME_CSTS_RDY); |
982388ea ZZ |
298 | } |
299 | ||
300 | static int nvme_disable_ctrl(struct nvme_dev *dev) | |
301 | { | |
302 | dev->ctrl_config &= ~NVME_CC_SHN_MASK; | |
303 | dev->ctrl_config &= ~NVME_CC_ENABLE; | |
b12f6237 | 304 | writel(dev->ctrl_config, &dev->bar->cc); |
982388ea | 305 | |
e9ac3a93 HM |
306 | return nvme_wait_csts(dev, NVME_CSTS_RDY, 0); |
307 | } | |
308 | ||
309 | static int nvme_shutdown_ctrl(struct nvme_dev *dev) | |
310 | { | |
311 | dev->ctrl_config &= ~NVME_CC_SHN_MASK; | |
312 | dev->ctrl_config |= NVME_CC_SHN_NORMAL; | |
313 | writel(dev->ctrl_config, &dev->bar->cc); | |
314 | ||
315 | return nvme_wait_csts(dev, NVME_CSTS_SHST_MASK, NVME_CSTS_SHST_CMPLT); | |
982388ea ZZ |
316 | } |
317 | ||
318 | static void nvme_free_queue(struct nvme_queue *nvmeq) | |
319 | { | |
320 | free((void *)nvmeq->cqes); | |
321 | free(nvmeq->sq_cmds); | |
322 | free(nvmeq); | |
323 | } | |
324 | ||
325 | static void nvme_free_queues(struct nvme_dev *dev, int lowest) | |
326 | { | |
327 | int i; | |
328 | ||
329 | for (i = dev->queue_count - 1; i >= lowest; i--) { | |
330 | struct nvme_queue *nvmeq = dev->queues[i]; | |
331 | dev->queue_count--; | |
332 | dev->queues[i] = NULL; | |
333 | nvme_free_queue(nvmeq); | |
334 | } | |
335 | } | |
336 | ||
337 | static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid) | |
338 | { | |
339 | struct nvme_dev *dev = nvmeq->dev; | |
340 | ||
341 | nvmeq->sq_tail = 0; | |
342 | nvmeq->cq_head = 0; | |
343 | nvmeq->cq_phase = 1; | |
344 | nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride]; | |
345 | memset((void *)nvmeq->cqes, 0, NVME_CQ_SIZE(nvmeq->q_depth)); | |
346 | flush_dcache_range((ulong)nvmeq->cqes, | |
d0c04926 | 347 | (ulong)nvmeq->cqes + NVME_CQ_ALLOCATION); |
982388ea ZZ |
348 | dev->online_queues++; |
349 | } | |
350 | ||
351 | static int nvme_configure_admin_queue(struct nvme_dev *dev) | |
352 | { | |
353 | int result; | |
354 | u32 aqa; | |
b65c6921 | 355 | u64 cap = dev->cap; |
982388ea ZZ |
356 | struct nvme_queue *nvmeq; |
357 | /* most architectures use 4KB as the page size */ | |
358 | unsigned page_shift = 12; | |
359 | unsigned dev_page_min = NVME_CAP_MPSMIN(cap) + 12; | |
360 | unsigned dev_page_max = NVME_CAP_MPSMAX(cap) + 12; | |
361 | ||
362 | if (page_shift < dev_page_min) { | |
363 | debug("Device minimum page size (%u) too large for host (%u)\n", | |
364 | 1 << dev_page_min, 1 << page_shift); | |
365 | return -ENODEV; | |
366 | } | |
367 | ||
368 | if (page_shift > dev_page_max) { | |
369 | debug("Device maximum page size (%u) smaller than host (%u)\n", | |
370 | 1 << dev_page_max, 1 << page_shift); | |
371 | page_shift = dev_page_max; | |
372 | } | |
373 | ||
374 | result = nvme_disable_ctrl(dev); | |
375 | if (result < 0) | |
376 | return result; | |
377 | ||
722e668d | 378 | nvmeq = dev->queues[NVME_ADMIN_Q]; |
982388ea ZZ |
379 | if (!nvmeq) { |
380 | nvmeq = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH); | |
381 | if (!nvmeq) | |
382 | return -ENOMEM; | |
383 | } | |
384 | ||
385 | aqa = nvmeq->q_depth - 1; | |
386 | aqa |= aqa << 16; | |
982388ea ZZ |
387 | |
388 | dev->page_size = 1 << page_shift; | |
389 | ||
390 | dev->ctrl_config = NVME_CC_CSS_NVM; | |
391 | dev->ctrl_config |= (page_shift - 12) << NVME_CC_MPS_SHIFT; | |
392 | dev->ctrl_config |= NVME_CC_ARB_RR | NVME_CC_SHN_NONE; | |
393 | dev->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES; | |
394 | ||
395 | writel(aqa, &dev->bar->aqa); | |
396 | nvme_writeq((ulong)nvmeq->sq_cmds, &dev->bar->asq); | |
397 | nvme_writeq((ulong)nvmeq->cqes, &dev->bar->acq); | |
398 | ||
399 | result = nvme_enable_ctrl(dev); | |
400 | if (result) | |
401 | goto free_nvmeq; | |
402 | ||
403 | nvmeq->cq_vector = 0; | |
404 | ||
722e668d | 405 | nvme_init_queue(dev->queues[NVME_ADMIN_Q], 0); |
982388ea ZZ |
406 | |
407 | return result; | |
408 | ||
409 | free_nvmeq: | |
410 | nvme_free_queues(dev, 0); | |
411 | ||
412 | return result; | |
413 | } | |
414 | ||
415 | static int nvme_alloc_cq(struct nvme_dev *dev, u16 qid, | |
416 | struct nvme_queue *nvmeq) | |
417 | { | |
418 | struct nvme_command c; | |
419 | int flags = NVME_QUEUE_PHYS_CONTIG | NVME_CQ_IRQ_ENABLED; | |
420 | ||
421 | memset(&c, 0, sizeof(c)); | |
422 | c.create_cq.opcode = nvme_admin_create_cq; | |
423 | c.create_cq.prp1 = cpu_to_le64((ulong)nvmeq->cqes); | |
424 | c.create_cq.cqid = cpu_to_le16(qid); | |
425 | c.create_cq.qsize = cpu_to_le16(nvmeq->q_depth - 1); | |
426 | c.create_cq.cq_flags = cpu_to_le16(flags); | |
427 | c.create_cq.irq_vector = cpu_to_le16(nvmeq->cq_vector); | |
428 | ||
429 | return nvme_submit_admin_cmd(dev, &c, NULL); | |
430 | } | |
431 | ||
432 | static int nvme_alloc_sq(struct nvme_dev *dev, u16 qid, | |
433 | struct nvme_queue *nvmeq) | |
434 | { | |
435 | struct nvme_command c; | |
436 | int flags = NVME_QUEUE_PHYS_CONTIG | NVME_SQ_PRIO_MEDIUM; | |
437 | ||
438 | memset(&c, 0, sizeof(c)); | |
439 | c.create_sq.opcode = nvme_admin_create_sq; | |
440 | c.create_sq.prp1 = cpu_to_le64((ulong)nvmeq->sq_cmds); | |
441 | c.create_sq.sqid = cpu_to_le16(qid); | |
442 | c.create_sq.qsize = cpu_to_le16(nvmeq->q_depth - 1); | |
443 | c.create_sq.sq_flags = cpu_to_le16(flags); | |
444 | c.create_sq.cqid = cpu_to_le16(qid); | |
445 | ||
446 | return nvme_submit_admin_cmd(dev, &c, NULL); | |
447 | } | |
448 | ||
449 | int nvme_identify(struct nvme_dev *dev, unsigned nsid, | |
450 | unsigned cns, dma_addr_t dma_addr) | |
451 | { | |
452 | struct nvme_command c; | |
453 | u32 page_size = dev->page_size; | |
454 | int offset = dma_addr & (page_size - 1); | |
455 | int length = sizeof(struct nvme_id_ctrl); | |
704e040a | 456 | int ret; |
982388ea ZZ |
457 | |
458 | memset(&c, 0, sizeof(c)); | |
459 | c.identify.opcode = nvme_admin_identify; | |
460 | c.identify.nsid = cpu_to_le32(nsid); | |
461 | c.identify.prp1 = cpu_to_le64(dma_addr); | |
462 | ||
463 | length -= (page_size - offset); | |
464 | if (length <= 0) { | |
465 | c.identify.prp2 = 0; | |
466 | } else { | |
467 | dma_addr += (page_size - offset); | |
3e185629 | 468 | c.identify.prp2 = cpu_to_le64(dma_addr); |
982388ea ZZ |
469 | } |
470 | ||
471 | c.identify.cns = cpu_to_le32(cns); | |
472 | ||
1a027a90 JT |
473 | invalidate_dcache_range(dma_addr, |
474 | dma_addr + sizeof(struct nvme_id_ctrl)); | |
475 | ||
704e040a BM |
476 | ret = nvme_submit_admin_cmd(dev, &c, NULL); |
477 | if (!ret) | |
478 | invalidate_dcache_range(dma_addr, | |
479 | dma_addr + sizeof(struct nvme_id_ctrl)); | |
480 | ||
481 | return ret; | |
982388ea ZZ |
482 | } |
483 | ||
484 | int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid, | |
485 | dma_addr_t dma_addr, u32 *result) | |
486 | { | |
487 | struct nvme_command c; | |
4c498796 | 488 | int ret; |
982388ea ZZ |
489 | |
490 | memset(&c, 0, sizeof(c)); | |
491 | c.features.opcode = nvme_admin_get_features; | |
492 | c.features.nsid = cpu_to_le32(nsid); | |
493 | c.features.prp1 = cpu_to_le64(dma_addr); | |
494 | c.features.fid = cpu_to_le32(fid); | |
495 | ||
4c498796 AP |
496 | ret = nvme_submit_admin_cmd(dev, &c, result); |
497 | ||
704e040a | 498 | /* |
4c498796 AP |
499 | * TODO: Add some cache invalidation when a DMA buffer is involved |
500 | * in the request, here and before the command gets submitted. The | |
501 | * buffer size varies by feature, also some features use a different | |
502 | * field in the command packet to hold the buffer address. | |
503 | * Section 5.21.1 (Set Features command) in the NVMe specification | |
504 | * details the buffer requirements for each feature. | |
505 | * | |
506 | * At the moment there is no user of this function. | |
704e040a BM |
507 | */ |
508 | ||
4c498796 | 509 | return ret; |
982388ea ZZ |
510 | } |
511 | ||
512 | int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11, | |
513 | dma_addr_t dma_addr, u32 *result) | |
514 | { | |
515 | struct nvme_command c; | |
516 | ||
517 | memset(&c, 0, sizeof(c)); | |
518 | c.features.opcode = nvme_admin_set_features; | |
519 | c.features.prp1 = cpu_to_le64(dma_addr); | |
520 | c.features.fid = cpu_to_le32(fid); | |
521 | c.features.dword11 = cpu_to_le32(dword11); | |
522 | ||
704e040a | 523 | /* |
4c498796 AP |
524 | * TODO: Add a cache clean (aka flush) operation when a DMA buffer is |
525 | * involved in the request. The buffer size varies by feature, also | |
526 | * some features use a different field in the command packet to hold | |
527 | * the buffer address. Section 5.21.1 (Set Features command) in the | |
528 | * NVMe specification details the buffer requirements for each | |
529 | * feature. | |
530 | * At the moment the only user of this function is not using | |
531 | * any DMA buffer at all. | |
704e040a BM |
532 | */ |
533 | ||
982388ea ZZ |
534 | return nvme_submit_admin_cmd(dev, &c, result); |
535 | } | |
536 | ||
537 | static int nvme_create_queue(struct nvme_queue *nvmeq, int qid) | |
538 | { | |
539 | struct nvme_dev *dev = nvmeq->dev; | |
540 | int result; | |
541 | ||
542 | nvmeq->cq_vector = qid - 1; | |
543 | result = nvme_alloc_cq(dev, qid, nvmeq); | |
544 | if (result < 0) | |
545 | goto release_cq; | |
546 | ||
547 | result = nvme_alloc_sq(dev, qid, nvmeq); | |
548 | if (result < 0) | |
549 | goto release_sq; | |
550 | ||
551 | nvme_init_queue(nvmeq, qid); | |
552 | ||
553 | return result; | |
554 | ||
555 | release_sq: | |
556 | nvme_delete_sq(dev, qid); | |
557 | release_cq: | |
558 | nvme_delete_cq(dev, qid); | |
559 | ||
560 | return result; | |
561 | } | |
562 | ||
563 | static int nvme_set_queue_count(struct nvme_dev *dev, int count) | |
564 | { | |
565 | int status; | |
566 | u32 result; | |
567 | u32 q_count = (count - 1) | ((count - 1) << 16); | |
568 | ||
569 | status = nvme_set_features(dev, NVME_FEAT_NUM_QUEUES, | |
570 | q_count, 0, &result); | |
571 | ||
572 | if (status < 0) | |
573 | return status; | |
574 | if (status > 1) | |
575 | return 0; | |
576 | ||
577 | return min(result & 0xffff, result >> 16) + 1; | |
578 | } | |
579 | ||
1dd00b1b | 580 | static int nvme_create_io_queues(struct nvme_dev *dev) |
982388ea ZZ |
581 | { |
582 | unsigned int i; | |
1dd00b1b | 583 | int ret; |
982388ea ZZ |
584 | |
585 | for (i = dev->queue_count; i <= dev->max_qid; i++) | |
586 | if (!nvme_alloc_queue(dev, i, dev->q_depth)) | |
1dd00b1b | 587 | return log_msg_ret("all", -ENOMEM); |
982388ea | 588 | |
1dd00b1b SG |
589 | for (i = dev->online_queues; i <= dev->queue_count - 1; i++) { |
590 | ret = nvme_create_queue(dev->queues[i], i); | |
591 | if (ret) | |
592 | return log_msg_ret("cre", ret); | |
593 | } | |
594 | ||
595 | return 0; | |
982388ea ZZ |
596 | } |
597 | ||
598 | static int nvme_setup_io_queues(struct nvme_dev *dev) | |
599 | { | |
600 | int nr_io_queues; | |
601 | int result; | |
602 | ||
603 | nr_io_queues = 1; | |
604 | result = nvme_set_queue_count(dev, nr_io_queues); | |
1dd00b1b SG |
605 | if (result <= 0) { |
606 | log_debug("Cannot set queue count (err=%dE)\n", result); | |
982388ea | 607 | return result; |
1dd00b1b | 608 | } |
982388ea | 609 | |
982388ea ZZ |
610 | dev->max_qid = nr_io_queues; |
611 | ||
612 | /* Free previously allocated queues */ | |
613 | nvme_free_queues(dev, nr_io_queues + 1); | |
1dd00b1b SG |
614 | result = nvme_create_io_queues(dev); |
615 | if (result) | |
616 | return result; | |
982388ea ZZ |
617 | |
618 | return 0; | |
619 | } | |
620 | ||
621 | static int nvme_get_info_from_identify(struct nvme_dev *dev) | |
622 | { | |
2f83481d | 623 | struct nvme_id_ctrl *ctrl; |
982388ea | 624 | int ret; |
b65c6921 | 625 | int shift = NVME_CAP_MPSMIN(dev->cap) + 12; |
982388ea | 626 | |
2f83481d PW |
627 | ctrl = memalign(dev->page_size, sizeof(struct nvme_id_ctrl)); |
628 | if (!ctrl) | |
629 | return -ENOMEM; | |
630 | ||
5b2a20e9 | 631 | ret = nvme_identify(dev, 0, 1, (dma_addr_t)(long)ctrl); |
2f83481d PW |
632 | if (ret) { |
633 | free(ctrl); | |
982388ea | 634 | return -EIO; |
2f83481d | 635 | } |
982388ea ZZ |
636 | |
637 | dev->nn = le32_to_cpu(ctrl->nn); | |
638 | dev->vwc = ctrl->vwc; | |
639 | memcpy(dev->serial, ctrl->sn, sizeof(ctrl->sn)); | |
640 | memcpy(dev->model, ctrl->mn, sizeof(ctrl->mn)); | |
641 | memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr)); | |
642 | if (ctrl->mdts) | |
643 | dev->max_transfer_shift = (ctrl->mdts + shift); | |
beb5f521 BM |
644 | else { |
645 | /* | |
646 | * Maximum Data Transfer Size (MDTS) field indicates the maximum | |
647 | * data transfer size between the host and the controller. The | |
648 | * host should not submit a command that exceeds this transfer | |
649 | * size. The value is in units of the minimum memory page size | |
650 | * and is reported as a power of two (2^n). | |
651 | * | |
652 | * The spec also says: a value of 0h indicates no restrictions | |
653 | * on transfer size. But in nvme_blk_read/write() below we have | |
654 | * the following algorithm for maximum number of logic blocks | |
655 | * per transfer: | |
656 | * | |
657 | * u16 lbas = 1 << (dev->max_transfer_shift - ns->lba_shift); | |
658 | * | |
659 | * In order for lbas not to overflow, the maximum number is 15 | |
660 | * which means dev->max_transfer_shift = 15 + 9 (ns->lba_shift). | |
661 | * Let's use 20 which provides 1MB size. | |
662 | */ | |
663 | dev->max_transfer_shift = 20; | |
664 | } | |
982388ea | 665 | |
2f83481d | 666 | free(ctrl); |
982388ea ZZ |
667 | return 0; |
668 | } | |
669 | ||
c50b2883 PW |
670 | int nvme_get_namespace_id(struct udevice *udev, u32 *ns_id, u8 *eui64) |
671 | { | |
672 | struct nvme_ns *ns = dev_get_priv(udev); | |
673 | ||
674 | if (ns_id) | |
675 | *ns_id = ns->ns_id; | |
676 | if (eui64) | |
677 | memcpy(eui64, ns->eui64, sizeof(ns->eui64)); | |
678 | ||
679 | return 0; | |
680 | } | |
681 | ||
982388ea ZZ |
682 | int nvme_scan_namespace(void) |
683 | { | |
684 | struct uclass *uc; | |
685 | struct udevice *dev; | |
686 | int ret; | |
687 | ||
688 | ret = uclass_get(UCLASS_NVME, &uc); | |
689 | if (ret) | |
690 | return ret; | |
691 | ||
692 | uclass_foreach_dev(dev, uc) { | |
693 | ret = device_probe(dev); | |
1dd00b1b SG |
694 | if (ret) { |
695 | log_err("Failed to probe '%s': err=%dE\n", dev->name, | |
696 | ret); | |
85946d69 MF |
697 | /* Bail if we ran out of memory, else keep trying */ |
698 | if (ret != -EBUSY) | |
699 | return ret; | |
1dd00b1b | 700 | } |
982388ea ZZ |
701 | } |
702 | ||
703 | return 0; | |
704 | } | |
705 | ||
706 | static int nvme_blk_probe(struct udevice *udev) | |
707 | { | |
708 | struct nvme_dev *ndev = dev_get_priv(udev->parent); | |
caa4daa2 | 709 | struct blk_desc *desc = dev_get_uclass_plat(udev); |
982388ea ZZ |
710 | struct nvme_ns *ns = dev_get_priv(udev); |
711 | u8 flbas; | |
2f83481d PW |
712 | struct nvme_id_ns *id; |
713 | ||
714 | id = memalign(ndev->page_size, sizeof(struct nvme_id_ns)); | |
715 | if (!id) | |
716 | return -ENOMEM; | |
982388ea | 717 | |
982388ea | 718 | ns->dev = ndev; |
18aa5a41 | 719 | /* extract the namespace id from the block device name */ |
8c60d40d | 720 | ns->ns_id = trailing_strtol(udev->name); |
2f83481d PW |
721 | if (nvme_identify(ndev, ns->ns_id, 0, (dma_addr_t)(long)id)) { |
722 | free(id); | |
982388ea | 723 | return -EIO; |
2f83481d | 724 | } |
982388ea | 725 | |
c50b2883 | 726 | memcpy(&ns->eui64, &id->eui64, sizeof(id->eui64)); |
982388ea ZZ |
727 | flbas = id->flbas & NVME_NS_FLBAS_LBA_MASK; |
728 | ns->flbas = flbas; | |
729 | ns->lba_shift = id->lbaf[flbas].ds; | |
982388ea ZZ |
730 | list_add(&ns->list, &ndev->namespaces); |
731 | ||
757cc4b1 | 732 | desc->lba = le64_to_cpu(id->nsze); |
982388ea ZZ |
733 | desc->log2blksz = ns->lba_shift; |
734 | desc->blksz = 1 << ns->lba_shift; | |
735 | desc->bdev = udev; | |
045474be | 736 | memcpy(desc->vendor, ndev->vendor, sizeof(ndev->vendor)); |
982388ea ZZ |
737 | memcpy(desc->product, ndev->serial, sizeof(ndev->serial)); |
738 | memcpy(desc->revision, ndev->firmware_rev, sizeof(ndev->firmware_rev)); | |
982388ea | 739 | |
2f83481d | 740 | free(id); |
982388ea ZZ |
741 | return 0; |
742 | } | |
743 | ||
625a483c BM |
744 | static ulong nvme_blk_rw(struct udevice *udev, lbaint_t blknr, |
745 | lbaint_t blkcnt, void *buffer, bool read) | |
982388ea ZZ |
746 | { |
747 | struct nvme_ns *ns = dev_get_priv(udev); | |
748 | struct nvme_dev *dev = ns->dev; | |
749 | struct nvme_command c; | |
caa4daa2 | 750 | struct blk_desc *desc = dev_get_uclass_plat(udev); |
982388ea ZZ |
751 | int status; |
752 | u64 prp2; | |
753 | u64 total_len = blkcnt << desc->log2blksz; | |
754 | u64 temp_len = total_len; | |
30ac0b49 | 755 | uintptr_t temp_buffer = (uintptr_t)buffer; |
982388ea ZZ |
756 | |
757 | u64 slba = blknr; | |
758 | u16 lbas = 1 << (dev->max_transfer_shift - ns->lba_shift); | |
759 | u64 total_lbas = blkcnt; | |
760 | ||
8c403402 PW |
761 | flush_dcache_range((unsigned long)buffer, |
762 | (unsigned long)buffer + total_len); | |
704e040a | 763 | |
625a483c | 764 | c.rw.opcode = read ? nvme_cmd_read : nvme_cmd_write; |
982388ea ZZ |
765 | c.rw.flags = 0; |
766 | c.rw.nsid = cpu_to_le32(ns->ns_id); | |
767 | c.rw.control = 0; | |
768 | c.rw.dsmgmt = 0; | |
769 | c.rw.reftag = 0; | |
770 | c.rw.apptag = 0; | |
771 | c.rw.appmask = 0; | |
772 | c.rw.metadata = 0; | |
773 | ||
774 | while (total_lbas) { | |
775 | if (total_lbas < lbas) { | |
776 | lbas = (u16)total_lbas; | |
777 | total_lbas = 0; | |
778 | } else { | |
779 | total_lbas -= lbas; | |
780 | } | |
781 | ||
625a483c | 782 | if (nvme_setup_prps(dev, &prp2, |
30ac0b49 | 783 | lbas << ns->lba_shift, temp_buffer)) |
982388ea ZZ |
784 | return -EIO; |
785 | c.rw.slba = cpu_to_le64(slba); | |
786 | slba += lbas; | |
787 | c.rw.length = cpu_to_le16(lbas - 1); | |
30ac0b49 | 788 | c.rw.prp1 = cpu_to_le64(temp_buffer); |
982388ea | 789 | c.rw.prp2 = cpu_to_le64(prp2); |
722e668d | 790 | status = nvme_submit_sync_cmd(dev->queues[NVME_IO_Q], |
982388ea ZZ |
791 | &c, NULL, IO_TIMEOUT); |
792 | if (status) | |
793 | break; | |
52a5690e | 794 | temp_len -= (u32)lbas << ns->lba_shift; |
30ac0b49 | 795 | temp_buffer += lbas << ns->lba_shift; |
982388ea ZZ |
796 | } |
797 | ||
704e040a BM |
798 | if (read) |
799 | invalidate_dcache_range((unsigned long)buffer, | |
800 | (unsigned long)buffer + total_len); | |
801 | ||
982388ea ZZ |
802 | return (total_len - temp_len) >> desc->log2blksz; |
803 | } | |
804 | ||
625a483c BM |
805 | static ulong nvme_blk_read(struct udevice *udev, lbaint_t blknr, |
806 | lbaint_t blkcnt, void *buffer) | |
807 | { | |
808 | return nvme_blk_rw(udev, blknr, blkcnt, buffer, true); | |
809 | } | |
810 | ||
982388ea ZZ |
811 | static ulong nvme_blk_write(struct udevice *udev, lbaint_t blknr, |
812 | lbaint_t blkcnt, const void *buffer) | |
813 | { | |
625a483c | 814 | return nvme_blk_rw(udev, blknr, blkcnt, (void *)buffer, false); |
982388ea ZZ |
815 | } |
816 | ||
817 | static const struct blk_ops nvme_blk_ops = { | |
818 | .read = nvme_blk_read, | |
819 | .write = nvme_blk_write, | |
820 | }; | |
821 | ||
822 | U_BOOT_DRIVER(nvme_blk) = { | |
823 | .name = "nvme-blk", | |
824 | .id = UCLASS_BLK, | |
825 | .probe = nvme_blk_probe, | |
826 | .ops = &nvme_blk_ops, | |
41575d8e | 827 | .priv_auto = sizeof(struct nvme_ns), |
982388ea ZZ |
828 | }; |
829 | ||
045474be | 830 | int nvme_init(struct udevice *udev) |
982388ea | 831 | { |
982388ea | 832 | struct nvme_dev *ndev = dev_get_priv(udev); |
478f7fc6 | 833 | struct nvme_id_ns *id; |
045474be | 834 | int ret; |
982388ea | 835 | |
19d9dad3 | 836 | ndev->udev = udev; |
982388ea | 837 | INIT_LIST_HEAD(&ndev->namespaces); |
982388ea | 838 | if (readl(&ndev->bar->csts) == -1) { |
5775b0f7 MF |
839 | ret = -EBUSY; |
840 | printf("Error: %s: Controller not ready!\n", udev->name); | |
982388ea ZZ |
841 | goto free_nvme; |
842 | } | |
843 | ||
722e668d | 844 | ndev->queues = malloc(NVME_Q_NUM * sizeof(struct nvme_queue *)); |
982388ea ZZ |
845 | if (!ndev->queues) { |
846 | ret = -ENOMEM; | |
847 | printf("Error: %s: Out of memory!\n", udev->name); | |
848 | goto free_nvme; | |
849 | } | |
37d46870 | 850 | memset(ndev->queues, 0, NVME_Q_NUM * sizeof(struct nvme_queue *)); |
982388ea | 851 | |
b65c6921 BM |
852 | ndev->cap = nvme_readq(&ndev->bar->cap); |
853 | ndev->q_depth = min_t(int, NVME_CAP_MQES(ndev->cap) + 1, NVME_Q_DEPTH); | |
854 | ndev->db_stride = 1 << NVME_CAP_STRIDE(ndev->cap); | |
982388ea ZZ |
855 | ndev->dbs = ((void __iomem *)ndev->bar) + 4096; |
856 | ||
857 | ret = nvme_configure_admin_queue(ndev); | |
1dd00b1b SG |
858 | if (ret) { |
859 | log_debug("Unable to configure admin queue (err=%dE)\n", ret); | |
982388ea | 860 | goto free_queue; |
1dd00b1b | 861 | } |
982388ea | 862 | |
b21dcebf AW |
863 | /* Allocate after the page size is known */ |
864 | ndev->prp_pool = memalign(ndev->page_size, MAX_PRP_POOL); | |
865 | if (!ndev->prp_pool) { | |
866 | ret = -ENOMEM; | |
867 | printf("Error: %s: Out of memory!\n", udev->name); | |
868 | goto free_nvme; | |
869 | } | |
870 | ndev->prp_entry_num = MAX_PRP_POOL >> 3; | |
871 | ||
982388ea | 872 | ret = nvme_setup_io_queues(ndev); |
1dd00b1b SG |
873 | if (ret) { |
874 | log_debug("Unable to setup I/O queues(err=%dE)\n", ret); | |
982388ea | 875 | goto free_queue; |
1dd00b1b | 876 | } |
982388ea ZZ |
877 | |
878 | nvme_get_info_from_identify(ndev); | |
982388ea | 879 | |
84344258 | 880 | /* Create a blk device for each namespace */ |
478f7fc6 BM |
881 | |
882 | id = memalign(ndev->page_size, sizeof(struct nvme_id_ns)); | |
883 | if (!id) { | |
884 | ret = -ENOMEM; | |
885 | goto free_queue; | |
886 | } | |
887 | ||
8c60d40d | 888 | for (int i = 1; i <= ndev->nn; i++) { |
84344258 BM |
889 | struct udevice *ns_udev; |
890 | char name[20]; | |
891 | ||
478f7fc6 | 892 | memset(id, 0, sizeof(*id)); |
8c60d40d | 893 | if (nvme_identify(ndev, i, 0, (dma_addr_t)(long)id)) { |
478f7fc6 BM |
894 | ret = -EIO; |
895 | goto free_id; | |
896 | } | |
897 | ||
898 | /* skip inactive namespace */ | |
899 | if (!id->nsze) | |
900 | continue; | |
901 | ||
84344258 BM |
902 | /* |
903 | * Encode the namespace id to the device name so that | |
904 | * we can extract it when doing the probe. | |
905 | */ | |
906 | sprintf(name, "blk#%d", i); | |
907 | ||
908 | /* The real blksz and size will be set by nvme_blk_probe() */ | |
e33a5c6b | 909 | ret = blk_create_devicef(udev, "nvme-blk", name, UCLASS_NVME, |
7020b2ec | 910 | -1, DEFAULT_BLKSZ, 0, &ns_udev); |
84344258 | 911 | if (ret) |
478f7fc6 | 912 | goto free_id; |
df1ed8b2 | 913 | |
d7d78576 | 914 | ret = bootdev_setup_for_sibling_blk(ns_udev, "nvme_bootdev"); |
758c706c SG |
915 | if (ret) |
916 | return log_msg_ret("bootdev", ret); | |
917 | ||
df1ed8b2 AT |
918 | ret = blk_probe_or_unbind(ns_udev); |
919 | if (ret) | |
920 | goto free_id; | |
84344258 BM |
921 | } |
922 | ||
478f7fc6 | 923 | free(id); |
982388ea ZZ |
924 | return 0; |
925 | ||
478f7fc6 BM |
926 | free_id: |
927 | free(id); | |
982388ea ZZ |
928 | free_queue: |
929 | free((void *)ndev->queues); | |
930 | free_nvme: | |
931 | return ret; | |
932 | } | |
ca99a17e MK |
933 | |
934 | int nvme_shutdown(struct udevice *udev) | |
935 | { | |
936 | struct nvme_dev *ndev = dev_get_priv(udev); | |
e9ac3a93 HM |
937 | int ret; |
938 | ||
939 | ret = nvme_shutdown_ctrl(ndev); | |
940 | if (ret < 0) { | |
941 | printf("Error: %s: Shutdown timed out!\n", udev->name); | |
942 | return ret; | |
943 | } | |
ca99a17e MK |
944 | |
945 | return nvme_disable_ctrl(ndev); | |
946 | } |