]> Git Repo - qemu.git/blob - hw/block/nvme.c
misc: Fix new collection of typos
[qemu.git] / hw / block / nvme.c
1 /*
2  * QEMU NVM Express Controller
3  *
4  * Copyright (c) 2012, Intel Corporation
5  *
6  * Written by Keith Busch <[email protected]>
7  *
8  * This code is licensed under the GNU GPL v2 or later.
9  */
10
11 /**
12  * Reference Specs: http://www.nvmexpress.org, 1.1, 1.0e
13  *
14  *  http://www.nvmexpress.org/resources/
15  */
16
17 /**
18  * Usage: add options:
19  *      -drive file=<file>,if=none,id=<drive_id>
20  *      -device nvme,drive=<drive_id>,serial=<serial>,id=<id[optional]>
21  */
22
23 #include <hw/block/block.h>
24 #include <hw/hw.h>
25 #include <hw/pci/msix.h>
26 #include <hw/pci/pci.h>
27 #include "sysemu/sysemu.h"
28 #include "qapi/visitor.h"
29 #include "sysemu/block-backend.h"
30
31 #include "nvme.h"
32
33 static void nvme_process_sq(void *opaque);
34
35 static int nvme_check_sqid(NvmeCtrl *n, uint16_t sqid)
36 {
37     return sqid < n->num_queues && n->sq[sqid] != NULL ? 0 : -1;
38 }
39
40 static int nvme_check_cqid(NvmeCtrl *n, uint16_t cqid)
41 {
42     return cqid < n->num_queues && n->cq[cqid] != NULL ? 0 : -1;
43 }
44
45 static void nvme_inc_cq_tail(NvmeCQueue *cq)
46 {
47     cq->tail++;
48     if (cq->tail >= cq->size) {
49         cq->tail = 0;
50         cq->phase = !cq->phase;
51     }
52 }
53
54 static void nvme_inc_sq_head(NvmeSQueue *sq)
55 {
56     sq->head = (sq->head + 1) % sq->size;
57 }
58
59 static uint8_t nvme_cq_full(NvmeCQueue *cq)
60 {
61     return (cq->tail + 1) % cq->size == cq->head;
62 }
63
64 static uint8_t nvme_sq_empty(NvmeSQueue *sq)
65 {
66     return sq->head == sq->tail;
67 }
68
69 static void nvme_isr_notify(NvmeCtrl *n, NvmeCQueue *cq)
70 {
71     if (cq->irq_enabled) {
72         if (msix_enabled(&(n->parent_obj))) {
73             msix_notify(&(n->parent_obj), cq->vector);
74         } else {
75             pci_irq_pulse(&n->parent_obj);
76         }
77     }
78 }
79
80 static uint16_t nvme_map_prp(QEMUSGList *qsg, uint64_t prp1, uint64_t prp2,
81     uint32_t len, NvmeCtrl *n)
82 {
83     hwaddr trans_len = n->page_size - (prp1 % n->page_size);
84     trans_len = MIN(len, trans_len);
85     int num_prps = (len >> n->page_bits) + 1;
86
87     if (!prp1) {
88         return NVME_INVALID_FIELD | NVME_DNR;
89     }
90
91     pci_dma_sglist_init(qsg, &n->parent_obj, num_prps);
92     qemu_sglist_add(qsg, prp1, trans_len);
93     len -= trans_len;
94     if (len) {
95         if (!prp2) {
96             goto unmap;
97         }
98         if (len > n->page_size) {
99             uint64_t prp_list[n->max_prp_ents];
100             uint32_t nents, prp_trans;
101             int i = 0;
102
103             nents = (len + n->page_size - 1) >> n->page_bits;
104             prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t);
105             pci_dma_read(&n->parent_obj, prp2, (void *)prp_list, prp_trans);
106             while (len != 0) {
107                 uint64_t prp_ent = le64_to_cpu(prp_list[i]);
108
109                 if (i == n->max_prp_ents - 1 && len > n->page_size) {
110                     if (!prp_ent || prp_ent & (n->page_size - 1)) {
111                         goto unmap;
112                     }
113
114                     i = 0;
115                     nents = (len + n->page_size - 1) >> n->page_bits;
116                     prp_trans = MIN(n->max_prp_ents, nents) * sizeof(uint64_t);
117                     pci_dma_read(&n->parent_obj, prp_ent, (void *)prp_list,
118                         prp_trans);
119                     prp_ent = le64_to_cpu(prp_list[i]);
120                 }
121
122                 if (!prp_ent || prp_ent & (n->page_size - 1)) {
123                     goto unmap;
124                 }
125
126                 trans_len = MIN(len, n->page_size);
127                 qemu_sglist_add(qsg, prp_ent, trans_len);
128                 len -= trans_len;
129                 i++;
130             }
131         } else {
132             if (prp2 & (n->page_size - 1)) {
133                 goto unmap;
134             }
135             qemu_sglist_add(qsg, prp2, len);
136         }
137     }
138     return NVME_SUCCESS;
139
140  unmap:
141     qemu_sglist_destroy(qsg);
142     return NVME_INVALID_FIELD | NVME_DNR;
143 }
144
145 static uint16_t nvme_dma_read_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
146     uint64_t prp1, uint64_t prp2)
147 {
148     QEMUSGList qsg;
149
150     if (nvme_map_prp(&qsg, prp1, prp2, len, n)) {
151         return NVME_INVALID_FIELD | NVME_DNR;
152     }
153     if (dma_buf_read(ptr, len, &qsg)) {
154         qemu_sglist_destroy(&qsg);
155         return NVME_INVALID_FIELD | NVME_DNR;
156     }
157     return NVME_SUCCESS;
158 }
159
160 static void nvme_post_cqes(void *opaque)
161 {
162     NvmeCQueue *cq = opaque;
163     NvmeCtrl *n = cq->ctrl;
164     NvmeRequest *req, *next;
165
166     QTAILQ_FOREACH_SAFE(req, &cq->req_list, entry, next) {
167         NvmeSQueue *sq;
168         hwaddr addr;
169
170         if (nvme_cq_full(cq)) {
171             break;
172         }
173
174         QTAILQ_REMOVE(&cq->req_list, req, entry);
175         sq = req->sq;
176         req->cqe.status = cpu_to_le16((req->status << 1) | cq->phase);
177         req->cqe.sq_id = cpu_to_le16(sq->sqid);
178         req->cqe.sq_head = cpu_to_le16(sq->head);
179         addr = cq->dma_addr + cq->tail * n->cqe_size;
180         nvme_inc_cq_tail(cq);
181         pci_dma_write(&n->parent_obj, addr, (void *)&req->cqe,
182             sizeof(req->cqe));
183         QTAILQ_INSERT_TAIL(&sq->req_list, req, entry);
184     }
185     nvme_isr_notify(n, cq);
186 }
187
188 static void nvme_enqueue_req_completion(NvmeCQueue *cq, NvmeRequest *req)
189 {
190     assert(cq->cqid == req->sq->cqid);
191     QTAILQ_REMOVE(&req->sq->out_req_list, req, entry);
192     QTAILQ_INSERT_TAIL(&cq->req_list, req, entry);
193     timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500);
194 }
195
196 static void nvme_rw_cb(void *opaque, int ret)
197 {
198     NvmeRequest *req = opaque;
199     NvmeSQueue *sq = req->sq;
200     NvmeCtrl *n = sq->ctrl;
201     NvmeCQueue *cq = n->cq[sq->cqid];
202
203     block_acct_done(blk_get_stats(n->conf.blk), &req->acct);
204     if (!ret) {
205         req->status = NVME_SUCCESS;
206     } else {
207         req->status = NVME_INTERNAL_DEV_ERROR;
208     }
209
210     qemu_sglist_destroy(&req->qsg);
211     nvme_enqueue_req_completion(cq, req);
212 }
213
214 static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
215     NvmeRequest *req)
216 {
217     NvmeRwCmd *rw = (NvmeRwCmd *)cmd;
218     uint32_t nlb  = le32_to_cpu(rw->nlb) + 1;
219     uint64_t slba = le64_to_cpu(rw->slba);
220     uint64_t prp1 = le64_to_cpu(rw->prp1);
221     uint64_t prp2 = le64_to_cpu(rw->prp2);
222
223     uint8_t lba_index  = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas);
224     uint8_t data_shift = ns->id_ns.lbaf[lba_index].ds;
225     uint64_t data_size = (uint64_t)nlb << data_shift;
226     uint64_t aio_slba  = slba << (data_shift - BDRV_SECTOR_BITS);
227     int is_write = rw->opcode == NVME_CMD_WRITE ? 1 : 0;
228
229     if ((slba + nlb) > ns->id_ns.nsze) {
230         return NVME_LBA_RANGE | NVME_DNR;
231     }
232     if (nvme_map_prp(&req->qsg, prp1, prp2, data_size, n)) {
233         return NVME_INVALID_FIELD | NVME_DNR;
234     }
235     assert((nlb << data_shift) == req->qsg.size);
236
237     dma_acct_start(n->conf.blk, &req->acct, &req->qsg,
238                    is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ);
239     req->aiocb = is_write ?
240         dma_blk_write(n->conf.blk, &req->qsg, aio_slba, nvme_rw_cb, req) :
241         dma_blk_read(n->conf.blk, &req->qsg, aio_slba, nvme_rw_cb, req);
242
243     return NVME_NO_COMPLETE;
244 }
245
246 static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
247 {
248     NvmeNamespace *ns;
249     uint32_t nsid = le32_to_cpu(cmd->nsid);
250
251     if (nsid == 0 || nsid > n->num_namespaces) {
252         return NVME_INVALID_NSID | NVME_DNR;
253     }
254
255     ns = &n->namespaces[nsid - 1];
256     switch (cmd->opcode) {
257     case NVME_CMD_FLUSH:
258         return NVME_SUCCESS;
259     case NVME_CMD_WRITE:
260     case NVME_CMD_READ:
261         return nvme_rw(n, ns, cmd, req);
262     default:
263         return NVME_INVALID_OPCODE | NVME_DNR;
264     }
265 }
266
267 static void nvme_free_sq(NvmeSQueue *sq, NvmeCtrl *n)
268 {
269     n->sq[sq->sqid] = NULL;
270     timer_del(sq->timer);
271     timer_free(sq->timer);
272     g_free(sq->io_req);
273     if (sq->sqid) {
274         g_free(sq);
275     }
276 }
277
278 static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeCmd *cmd)
279 {
280     NvmeDeleteQ *c = (NvmeDeleteQ *)cmd;
281     NvmeRequest *req, *next;
282     NvmeSQueue *sq;
283     NvmeCQueue *cq;
284     uint16_t qid = le16_to_cpu(c->qid);
285
286     if (!qid || nvme_check_sqid(n, qid)) {
287         return NVME_INVALID_QID | NVME_DNR;
288     }
289
290     sq = n->sq[qid];
291     while (!QTAILQ_EMPTY(&sq->out_req_list)) {
292         req = QTAILQ_FIRST(&sq->out_req_list);
293         assert(req->aiocb);
294         blk_aio_cancel(req->aiocb);
295     }
296     if (!nvme_check_cqid(n, sq->cqid)) {
297         cq = n->cq[sq->cqid];
298         QTAILQ_REMOVE(&cq->sq_list, sq, entry);
299
300         nvme_post_cqes(cq);
301         QTAILQ_FOREACH_SAFE(req, &cq->req_list, entry, next) {
302             if (req->sq == sq) {
303                 QTAILQ_REMOVE(&cq->req_list, req, entry);
304                 QTAILQ_INSERT_TAIL(&sq->req_list, req, entry);
305             }
306         }
307     }
308
309     nvme_free_sq(sq, n);
310     return NVME_SUCCESS;
311 }
312
313 static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, uint64_t dma_addr,
314     uint16_t sqid, uint16_t cqid, uint16_t size)
315 {
316     int i;
317     NvmeCQueue *cq;
318
319     sq->ctrl = n;
320     sq->dma_addr = dma_addr;
321     sq->sqid = sqid;
322     sq->size = size;
323     sq->cqid = cqid;
324     sq->head = sq->tail = 0;
325     sq->io_req = g_new(NvmeRequest, sq->size);
326
327     QTAILQ_INIT(&sq->req_list);
328     QTAILQ_INIT(&sq->out_req_list);
329     for (i = 0; i < sq->size; i++) {
330         sq->io_req[i].sq = sq;
331         QTAILQ_INSERT_TAIL(&(sq->req_list), &sq->io_req[i], entry);
332     }
333     sq->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_process_sq, sq);
334
335     assert(n->cq[cqid]);
336     cq = n->cq[cqid];
337     QTAILQ_INSERT_TAIL(&(cq->sq_list), sq, entry);
338     n->sq[sqid] = sq;
339 }
340
341 static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeCmd *cmd)
342 {
343     NvmeSQueue *sq;
344     NvmeCreateSq *c = (NvmeCreateSq *)cmd;
345
346     uint16_t cqid = le16_to_cpu(c->cqid);
347     uint16_t sqid = le16_to_cpu(c->sqid);
348     uint16_t qsize = le16_to_cpu(c->qsize);
349     uint16_t qflags = le16_to_cpu(c->sq_flags);
350     uint64_t prp1 = le64_to_cpu(c->prp1);
351
352     if (!cqid || nvme_check_cqid(n, cqid)) {
353         return NVME_INVALID_CQID | NVME_DNR;
354     }
355     if (!sqid || (sqid && !nvme_check_sqid(n, sqid))) {
356         return NVME_INVALID_QID | NVME_DNR;
357     }
358     if (!qsize || qsize > NVME_CAP_MQES(n->bar.cap)) {
359         return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR;
360     }
361     if (!prp1 || prp1 & (n->page_size - 1)) {
362         return NVME_INVALID_FIELD | NVME_DNR;
363     }
364     if (!(NVME_SQ_FLAGS_PC(qflags))) {
365         return NVME_INVALID_FIELD | NVME_DNR;
366     }
367     sq = g_malloc0(sizeof(*sq));
368     nvme_init_sq(sq, n, prp1, sqid, cqid, qsize + 1);
369     return NVME_SUCCESS;
370 }
371
372 static void nvme_free_cq(NvmeCQueue *cq, NvmeCtrl *n)
373 {
374     n->cq[cq->cqid] = NULL;
375     timer_del(cq->timer);
376     timer_free(cq->timer);
377     msix_vector_unuse(&n->parent_obj, cq->vector);
378     if (cq->cqid) {
379         g_free(cq);
380     }
381 }
382
383 static uint16_t nvme_del_cq(NvmeCtrl *n, NvmeCmd *cmd)
384 {
385     NvmeDeleteQ *c = (NvmeDeleteQ *)cmd;
386     NvmeCQueue *cq;
387     uint16_t qid = le16_to_cpu(c->qid);
388
389     if (!qid || nvme_check_cqid(n, qid)) {
390         return NVME_INVALID_CQID | NVME_DNR;
391     }
392
393     cq = n->cq[qid];
394     if (!QTAILQ_EMPTY(&cq->sq_list)) {
395         return NVME_INVALID_QUEUE_DEL;
396     }
397     nvme_free_cq(cq, n);
398     return NVME_SUCCESS;
399 }
400
401 static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr,
402     uint16_t cqid, uint16_t vector, uint16_t size, uint16_t irq_enabled)
403 {
404     cq->ctrl = n;
405     cq->cqid = cqid;
406     cq->size = size;
407     cq->dma_addr = dma_addr;
408     cq->phase = 1;
409     cq->irq_enabled = irq_enabled;
410     cq->vector = vector;
411     cq->head = cq->tail = 0;
412     QTAILQ_INIT(&cq->req_list);
413     QTAILQ_INIT(&cq->sq_list);
414     msix_vector_use(&n->parent_obj, cq->vector);
415     n->cq[cqid] = cq;
416     cq->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_post_cqes, cq);
417 }
418
419 static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeCmd *cmd)
420 {
421     NvmeCQueue *cq;
422     NvmeCreateCq *c = (NvmeCreateCq *)cmd;
423     uint16_t cqid = le16_to_cpu(c->cqid);
424     uint16_t vector = le16_to_cpu(c->irq_vector);
425     uint16_t qsize = le16_to_cpu(c->qsize);
426     uint16_t qflags = le16_to_cpu(c->cq_flags);
427     uint64_t prp1 = le64_to_cpu(c->prp1);
428
429     if (!cqid || (cqid && !nvme_check_cqid(n, cqid))) {
430         return NVME_INVALID_CQID | NVME_DNR;
431     }
432     if (!qsize || qsize > NVME_CAP_MQES(n->bar.cap)) {
433         return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR;
434     }
435     if (!prp1) {
436         return NVME_INVALID_FIELD | NVME_DNR;
437     }
438     if (vector > n->num_queues) {
439         return NVME_INVALID_IRQ_VECTOR | NVME_DNR;
440     }
441     if (!(NVME_CQ_FLAGS_PC(qflags))) {
442         return NVME_INVALID_FIELD | NVME_DNR;
443     }
444
445     cq = g_malloc0(sizeof(*cq));
446     nvme_init_cq(cq, n, prp1, cqid, vector, qsize + 1,
447         NVME_CQ_FLAGS_IEN(qflags));
448     return NVME_SUCCESS;
449 }
450
451 static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd)
452 {
453     NvmeNamespace *ns;
454     NvmeIdentify *c = (NvmeIdentify *)cmd;
455     uint32_t cns  = le32_to_cpu(c->cns);
456     uint32_t nsid = le32_to_cpu(c->nsid);
457     uint64_t prp1 = le64_to_cpu(c->prp1);
458     uint64_t prp2 = le64_to_cpu(c->prp2);
459
460     if (cns) {
461         return nvme_dma_read_prp(n, (uint8_t *)&n->id_ctrl, sizeof(n->id_ctrl),
462             prp1, prp2);
463     }
464     if (nsid == 0 || nsid > n->num_namespaces) {
465         return NVME_INVALID_NSID | NVME_DNR;
466     }
467
468     ns = &n->namespaces[nsid - 1];
469     return nvme_dma_read_prp(n, (uint8_t *)&ns->id_ns, sizeof(ns->id_ns),
470         prp1, prp2);
471 }
472
473 static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
474 {
475     uint32_t dw10 = le32_to_cpu(cmd->cdw10);
476
477     switch (dw10) {
478     case NVME_NUMBER_OF_QUEUES:
479         req->cqe.result =
480             cpu_to_le32((n->num_queues - 1) | ((n->num_queues - 1) << 16));
481         break;
482     default:
483         return NVME_INVALID_FIELD | NVME_DNR;
484     }
485     return NVME_SUCCESS;
486 }
487
488 static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
489 {
490     uint32_t dw10 = le32_to_cpu(cmd->cdw10);
491
492     switch (dw10) {
493     case NVME_NUMBER_OF_QUEUES:
494         req->cqe.result =
495             cpu_to_le32((n->num_queues - 1) | ((n->num_queues - 1) << 16));
496         break;
497     default:
498         return NVME_INVALID_FIELD | NVME_DNR;
499     }
500     return NVME_SUCCESS;
501 }
502
503 static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
504 {
505     switch (cmd->opcode) {
506     case NVME_ADM_CMD_DELETE_SQ:
507         return nvme_del_sq(n, cmd);
508     case NVME_ADM_CMD_CREATE_SQ:
509         return nvme_create_sq(n, cmd);
510     case NVME_ADM_CMD_DELETE_CQ:
511         return nvme_del_cq(n, cmd);
512     case NVME_ADM_CMD_CREATE_CQ:
513         return nvme_create_cq(n, cmd);
514     case NVME_ADM_CMD_IDENTIFY:
515         return nvme_identify(n, cmd);
516     case NVME_ADM_CMD_SET_FEATURES:
517         return nvme_set_feature(n, cmd, req);
518     case NVME_ADM_CMD_GET_FEATURES:
519         return nvme_get_feature(n, cmd, req);
520     default:
521         return NVME_INVALID_OPCODE | NVME_DNR;
522     }
523 }
524
525 static void nvme_process_sq(void *opaque)
526 {
527     NvmeSQueue *sq = opaque;
528     NvmeCtrl *n = sq->ctrl;
529     NvmeCQueue *cq = n->cq[sq->cqid];
530
531     uint16_t status;
532     hwaddr addr;
533     NvmeCmd cmd;
534     NvmeRequest *req;
535
536     while (!(nvme_sq_empty(sq) || QTAILQ_EMPTY(&sq->req_list))) {
537         addr = sq->dma_addr + sq->head * n->sqe_size;
538         pci_dma_read(&n->parent_obj, addr, (void *)&cmd, sizeof(cmd));
539         nvme_inc_sq_head(sq);
540
541         req = QTAILQ_FIRST(&sq->req_list);
542         QTAILQ_REMOVE(&sq->req_list, req, entry);
543         QTAILQ_INSERT_TAIL(&sq->out_req_list, req, entry);
544         memset(&req->cqe, 0, sizeof(req->cqe));
545         req->cqe.cid = cmd.cid;
546
547         status = sq->sqid ? nvme_io_cmd(n, &cmd, req) :
548             nvme_admin_cmd(n, &cmd, req);
549         if (status != NVME_NO_COMPLETE) {
550             req->status = status;
551             nvme_enqueue_req_completion(cq, req);
552         }
553     }
554 }
555
556 static void nvme_clear_ctrl(NvmeCtrl *n)
557 {
558     int i;
559
560     for (i = 0; i < n->num_queues; i++) {
561         if (n->sq[i] != NULL) {
562             nvme_free_sq(n->sq[i], n);
563         }
564     }
565     for (i = 0; i < n->num_queues; i++) {
566         if (n->cq[i] != NULL) {
567             nvme_free_cq(n->cq[i], n);
568         }
569     }
570
571     blk_flush(n->conf.blk);
572     n->bar.cc = 0;
573 }
574
575 static int nvme_start_ctrl(NvmeCtrl *n)
576 {
577     uint32_t page_bits = NVME_CC_MPS(n->bar.cc) + 12;
578     uint32_t page_size = 1 << page_bits;
579
580     if (n->cq[0] || n->sq[0] || !n->bar.asq || !n->bar.acq ||
581             n->bar.asq & (page_size - 1) || n->bar.acq & (page_size - 1) ||
582             NVME_CC_MPS(n->bar.cc) < NVME_CAP_MPSMIN(n->bar.cap) ||
583             NVME_CC_MPS(n->bar.cc) > NVME_CAP_MPSMAX(n->bar.cap) ||
584             NVME_CC_IOCQES(n->bar.cc) < NVME_CTRL_CQES_MIN(n->id_ctrl.cqes) ||
585             NVME_CC_IOCQES(n->bar.cc) > NVME_CTRL_CQES_MAX(n->id_ctrl.cqes) ||
586             NVME_CC_IOSQES(n->bar.cc) < NVME_CTRL_SQES_MIN(n->id_ctrl.sqes) ||
587             NVME_CC_IOSQES(n->bar.cc) > NVME_CTRL_SQES_MAX(n->id_ctrl.sqes) ||
588             !NVME_AQA_ASQS(n->bar.aqa) || !NVME_AQA_ACQS(n->bar.aqa)) {
589         return -1;
590     }
591
592     n->page_bits = page_bits;
593     n->page_size = page_size;
594     n->max_prp_ents = n->page_size / sizeof(uint64_t);
595     n->cqe_size = 1 << NVME_CC_IOCQES(n->bar.cc);
596     n->sqe_size = 1 << NVME_CC_IOSQES(n->bar.cc);
597     nvme_init_cq(&n->admin_cq, n, n->bar.acq, 0, 0,
598         NVME_AQA_ACQS(n->bar.aqa) + 1, 1);
599     nvme_init_sq(&n->admin_sq, n, n->bar.asq, 0, 0,
600         NVME_AQA_ASQS(n->bar.aqa) + 1);
601
602     return 0;
603 }
604
605 static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data,
606     unsigned size)
607 {
608     switch (offset) {
609     case 0xc:
610         n->bar.intms |= data & 0xffffffff;
611         n->bar.intmc = n->bar.intms;
612         break;
613     case 0x10:
614         n->bar.intms &= ~(data & 0xffffffff);
615         n->bar.intmc = n->bar.intms;
616         break;
617     case 0x14:
618         if (NVME_CC_EN(data) && !NVME_CC_EN(n->bar.cc)) {
619             n->bar.cc = data;
620             if (nvme_start_ctrl(n)) {
621                 n->bar.csts = NVME_CSTS_FAILED;
622             } else {
623                 n->bar.csts = NVME_CSTS_READY;
624             }
625         } else if (!NVME_CC_EN(data) && NVME_CC_EN(n->bar.cc)) {
626             nvme_clear_ctrl(n);
627             n->bar.csts &= ~NVME_CSTS_READY;
628         }
629         if (NVME_CC_SHN(data) && !(NVME_CC_SHN(n->bar.cc))) {
630                 nvme_clear_ctrl(n);
631                 n->bar.cc = data;
632                 n->bar.csts |= NVME_CSTS_SHST_COMPLETE;
633         } else if (!NVME_CC_SHN(data) && NVME_CC_SHN(n->bar.cc)) {
634                 n->bar.csts &= ~NVME_CSTS_SHST_COMPLETE;
635                 n->bar.cc = data;
636         }
637         break;
638     case 0x24:
639         n->bar.aqa = data & 0xffffffff;
640         break;
641     case 0x28:
642         n->bar.asq = data;
643         break;
644     case 0x2c:
645         n->bar.asq |= data << 32;
646         break;
647     case 0x30:
648         n->bar.acq = data;
649         break;
650     case 0x34:
651         n->bar.acq |= data << 32;
652         break;
653     default:
654         break;
655     }
656 }
657
658 static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, unsigned size)
659 {
660     NvmeCtrl *n = (NvmeCtrl *)opaque;
661     uint8_t *ptr = (uint8_t *)&n->bar;
662     uint64_t val = 0;
663
664     if (addr < sizeof(n->bar)) {
665         memcpy(&val, ptr + addr, size);
666     }
667     return val;
668 }
669
670 static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val)
671 {
672     uint32_t qid;
673
674     if (addr & ((1 << 2) - 1)) {
675         return;
676     }
677
678     if (((addr - 0x1000) >> 2) & 1) {
679         uint16_t new_head = val & 0xffff;
680         int start_sqs;
681         NvmeCQueue *cq;
682
683         qid = (addr - (0x1000 + (1 << 2))) >> 3;
684         if (nvme_check_cqid(n, qid)) {
685             return;
686         }
687
688         cq = n->cq[qid];
689         if (new_head >= cq->size) {
690             return;
691         }
692
693         start_sqs = nvme_cq_full(cq) ? 1 : 0;
694         cq->head = new_head;
695         if (start_sqs) {
696             NvmeSQueue *sq;
697             QTAILQ_FOREACH(sq, &cq->sq_list, entry) {
698                 timer_mod(sq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500);
699             }
700             timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500);
701         }
702
703         if (cq->tail != cq->head) {
704             nvme_isr_notify(n, cq);
705         }
706     } else {
707         uint16_t new_tail = val & 0xffff;
708         NvmeSQueue *sq;
709
710         qid = (addr - 0x1000) >> 3;
711         if (nvme_check_sqid(n, qid)) {
712             return;
713         }
714
715         sq = n->sq[qid];
716         if (new_tail >= sq->size) {
717             return;
718         }
719
720         sq->tail = new_tail;
721         timer_mod(sq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500);
722     }
723 }
724
725 static void nvme_mmio_write(void *opaque, hwaddr addr, uint64_t data,
726     unsigned size)
727 {
728     NvmeCtrl *n = (NvmeCtrl *)opaque;
729     if (addr < sizeof(n->bar)) {
730         nvme_write_bar(n, addr, data, size);
731     } else if (addr >= 0x1000) {
732         nvme_process_db(n, addr, data);
733     }
734 }
735
736 static const MemoryRegionOps nvme_mmio_ops = {
737     .read = nvme_mmio_read,
738     .write = nvme_mmio_write,
739     .endianness = DEVICE_LITTLE_ENDIAN,
740     .impl = {
741         .min_access_size = 2,
742         .max_access_size = 8,
743     },
744 };
745
746 static int nvme_init(PCIDevice *pci_dev)
747 {
748     NvmeCtrl *n = NVME(pci_dev);
749     NvmeIdCtrl *id = &n->id_ctrl;
750
751     int i;
752     int64_t bs_size;
753     uint8_t *pci_conf;
754
755     if (!n->conf.blk) {
756         return -1;
757     }
758
759     bs_size = blk_getlength(n->conf.blk);
760     if (bs_size < 0) {
761         return -1;
762     }
763
764     blkconf_serial(&n->conf, &n->serial);
765     if (!n->serial) {
766         return -1;
767     }
768     blkconf_blocksizes(&n->conf);
769
770     pci_conf = pci_dev->config;
771     pci_conf[PCI_INTERRUPT_PIN] = 1;
772     pci_config_set_prog_interface(pci_dev->config, 0x2);
773     pci_config_set_class(pci_dev->config, PCI_CLASS_STORAGE_EXPRESS);
774     pcie_endpoint_cap_init(&n->parent_obj, 0x80);
775
776     n->num_namespaces = 1;
777     n->num_queues = 64;
778     n->reg_size = 1 << qemu_fls(0x1004 + 2 * (n->num_queues + 1) * 4);
779     n->ns_size = bs_size / (uint64_t)n->num_namespaces;
780
781     n->namespaces = g_new0(NvmeNamespace, n->num_namespaces);
782     n->sq = g_new0(NvmeSQueue *, n->num_queues);
783     n->cq = g_new0(NvmeCQueue *, n->num_queues);
784
785     memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n,
786                           "nvme", n->reg_size);
787     pci_register_bar(&n->parent_obj, 0,
788         PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64,
789         &n->iomem);
790     msix_init_exclusive_bar(&n->parent_obj, n->num_queues, 4);
791
792     id->vid = cpu_to_le16(pci_get_word(pci_conf + PCI_VENDOR_ID));
793     id->ssvid = cpu_to_le16(pci_get_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID));
794     strpadcpy((char *)id->mn, sizeof(id->mn), "QEMU NVMe Ctrl", ' ');
795     strpadcpy((char *)id->fr, sizeof(id->fr), "1.0", ' ');
796     strpadcpy((char *)id->sn, sizeof(id->sn), n->serial, ' ');
797     id->rab = 6;
798     id->ieee[0] = 0x00;
799     id->ieee[1] = 0x02;
800     id->ieee[2] = 0xb3;
801     id->oacs = cpu_to_le16(0);
802     id->frmw = 7 << 1;
803     id->lpa = 1 << 0;
804     id->sqes = (0x6 << 4) | 0x6;
805     id->cqes = (0x4 << 4) | 0x4;
806     id->nn = cpu_to_le32(n->num_namespaces);
807     id->psd[0].mp = cpu_to_le16(0x9c4);
808     id->psd[0].enlat = cpu_to_le32(0x10);
809     id->psd[0].exlat = cpu_to_le32(0x4);
810
811     n->bar.cap = 0;
812     NVME_CAP_SET_MQES(n->bar.cap, 0x7ff);
813     NVME_CAP_SET_CQR(n->bar.cap, 1);
814     NVME_CAP_SET_AMS(n->bar.cap, 1);
815     NVME_CAP_SET_TO(n->bar.cap, 0xf);
816     NVME_CAP_SET_CSS(n->bar.cap, 1);
817     NVME_CAP_SET_MPSMAX(n->bar.cap, 4);
818
819     n->bar.vs = 0x00010100;
820     n->bar.intmc = n->bar.intms = 0;
821
822     for (i = 0; i < n->num_namespaces; i++) {
823         NvmeNamespace *ns = &n->namespaces[i];
824         NvmeIdNs *id_ns = &ns->id_ns;
825         id_ns->nsfeat = 0;
826         id_ns->nlbaf = 0;
827         id_ns->flbas = 0;
828         id_ns->mc = 0;
829         id_ns->dpc = 0;
830         id_ns->dps = 0;
831         id_ns->lbaf[0].ds = BDRV_SECTOR_BITS;
832         id_ns->ncap  = id_ns->nuse = id_ns->nsze =
833             cpu_to_le64(n->ns_size >>
834                 id_ns->lbaf[NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas)].ds);
835     }
836     return 0;
837 }
838
839 static void nvme_exit(PCIDevice *pci_dev)
840 {
841     NvmeCtrl *n = NVME(pci_dev);
842
843     nvme_clear_ctrl(n);
844     g_free(n->namespaces);
845     g_free(n->cq);
846     g_free(n->sq);
847     msix_uninit_exclusive_bar(pci_dev);
848 }
849
850 static Property nvme_props[] = {
851     DEFINE_BLOCK_PROPERTIES(NvmeCtrl, conf),
852     DEFINE_PROP_STRING("serial", NvmeCtrl, serial),
853     DEFINE_PROP_END_OF_LIST(),
854 };
855
856 static const VMStateDescription nvme_vmstate = {
857     .name = "nvme",
858     .unmigratable = 1,
859 };
860
861 static void nvme_class_init(ObjectClass *oc, void *data)
862 {
863     DeviceClass *dc = DEVICE_CLASS(oc);
864     PCIDeviceClass *pc = PCI_DEVICE_CLASS(oc);
865
866     pc->init = nvme_init;
867     pc->exit = nvme_exit;
868     pc->class_id = PCI_CLASS_STORAGE_EXPRESS;
869     pc->vendor_id = PCI_VENDOR_ID_INTEL;
870     pc->device_id = 0x5845;
871     pc->revision = 1;
872     pc->is_express = 1;
873
874     set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
875     dc->desc = "Non-Volatile Memory Express";
876     dc->props = nvme_props;
877     dc->vmsd = &nvme_vmstate;
878 }
879
880 static void nvme_get_bootindex(Object *obj, Visitor *v, void *opaque,
881                                   const char *name, Error **errp)
882 {
883     NvmeCtrl *s = NVME(obj);
884
885     visit_type_int32(v, &s->conf.bootindex, name, errp);
886 }
887
888 static void nvme_set_bootindex(Object *obj, Visitor *v, void *opaque,
889                                   const char *name, Error **errp)
890 {
891     NvmeCtrl *s = NVME(obj);
892     int32_t boot_index;
893     Error *local_err = NULL;
894
895     visit_type_int32(v, &boot_index, name, &local_err);
896     if (local_err) {
897         goto out;
898     }
899     /* check whether bootindex is present in fw_boot_order list  */
900     check_boot_index(boot_index, &local_err);
901     if (local_err) {
902         goto out;
903     }
904     /* change bootindex to a new one */
905     s->conf.bootindex = boot_index;
906
907 out:
908     if (local_err) {
909         error_propagate(errp, local_err);
910     }
911 }
912
913 static void nvme_instance_init(Object *obj)
914 {
915     object_property_add(obj, "bootindex", "int32",
916                         nvme_get_bootindex,
917                         nvme_set_bootindex, NULL, NULL, NULL);
918     object_property_set_int(obj, -1, "bootindex", NULL);
919 }
920
921 static const TypeInfo nvme_info = {
922     .name          = "nvme",
923     .parent        = TYPE_PCI_DEVICE,
924     .instance_size = sizeof(NvmeCtrl),
925     .class_init    = nvme_class_init,
926     .instance_init = nvme_instance_init,
927 };
928
929 static void nvme_register_types(void)
930 {
931     type_register_static(&nvme_info);
932 }
933
934 type_init(nvme_register_types)
This page took 0.074152 seconds and 4 git commands to generate.