]> Git Repo - qemu.git/blob - hw/rdma/rdma_backend.c
Merge remote-tracking branch 'remotes/marcel/tags/rdma-pull-request' into staging
[qemu.git] / hw / rdma / rdma_backend.c
1 /*
2  * QEMU paravirtual RDMA - Generic RDMA backend
3  *
4  * Copyright (C) 2018 Oracle
5  * Copyright (C) 2018 Red Hat Inc
6  *
7  * Authors:
8  *     Yuval Shaia <[email protected]>
9  *     Marcel Apfelbaum <[email protected]>
10  *
11  * This work is licensed under the terms of the GNU GPL, version 2 or later.
12  * See the COPYING file in the top-level directory.
13  *
14  */
15
16 #include "qemu/osdep.h"
17 #include "qemu/error-report.h"
18 #include "qapi/error.h"
19
20 #include <infiniband/verbs.h>
21
22 #include "trace.h"
23 #include "rdma_utils.h"
24 #include "rdma_rm.h"
25 #include "rdma_backend.h"
26
27 /* Vendor Errors */
28 #define VENDOR_ERR_FAIL_BACKEND     0x201
29 #define VENDOR_ERR_TOO_MANY_SGES    0x202
30 #define VENDOR_ERR_NOMEM            0x203
31 #define VENDOR_ERR_QP0              0x204
32 #define VENDOR_ERR_NO_SGE           0x205
33 #define VENDOR_ERR_MAD_SEND         0x206
34 #define VENDOR_ERR_INVLKEY          0x207
35 #define VENDOR_ERR_MR_SMALL         0x208
36
37 #define THR_NAME_LEN 16
38
39 typedef struct BackendCtx {
40     uint64_t req_id;
41     void *up_ctx;
42     bool is_tx_req;
43 } BackendCtx;
44
45 static void (*comp_handler)(int status, unsigned int vendor_err, void *ctx);
46
47 static void dummy_comp_handler(int status, unsigned int vendor_err, void *ctx)
48 {
49     pr_err("No completion handler is registered\n");
50 }
51
52 static void poll_cq(RdmaDeviceResources *rdma_dev_res, struct ibv_cq *ibcq)
53 {
54     int i, ne;
55     BackendCtx *bctx;
56     struct ibv_wc wc[2];
57
58     pr_dbg("Entering poll_cq loop on cq %p\n", ibcq);
59     do {
60         ne = ibv_poll_cq(ibcq, ARRAY_SIZE(wc), wc);
61
62         pr_dbg("Got %d completion(s) from cq %p\n", ne, ibcq);
63
64         for (i = 0; i < ne; i++) {
65             pr_dbg("wr_id=0x%" PRIx64 "\n", wc[i].wr_id);
66             pr_dbg("status=%d\n", wc[i].status);
67
68             bctx = rdma_rm_get_cqe_ctx(rdma_dev_res, wc[i].wr_id);
69             if (unlikely(!bctx)) {
70                 pr_dbg("Error: Failed to find ctx for req %" PRId64 "\n",
71                        wc[i].wr_id);
72                 continue;
73             }
74             pr_dbg("Processing %s CQE\n", bctx->is_tx_req ? "send" : "recv");
75
76             comp_handler(wc[i].status, wc[i].vendor_err, bctx->up_ctx);
77
78             rdma_rm_dealloc_cqe_ctx(rdma_dev_res, wc[i].wr_id);
79             g_free(bctx);
80         }
81     } while (ne > 0);
82
83     if (ne < 0) {
84         pr_dbg("Got error %d from ibv_poll_cq\n", ne);
85     }
86 }
87
88 static void *comp_handler_thread(void *arg)
89 {
90     RdmaBackendDev *backend_dev = (RdmaBackendDev *)arg;
91     int rc;
92     struct ibv_cq *ev_cq;
93     void *ev_ctx;
94
95     pr_dbg("Starting\n");
96
97     while (backend_dev->comp_thread.run) {
98         pr_dbg("Waiting for completion on channel %p\n", backend_dev->channel);
99         rc = ibv_get_cq_event(backend_dev->channel, &ev_cq, &ev_ctx);
100         pr_dbg("ibv_get_cq_event=%d\n", rc);
101         if (unlikely(rc)) {
102             pr_dbg("---> ibv_get_cq_event (%d)\n", rc);
103             continue;
104         }
105
106         rc = ibv_req_notify_cq(ev_cq, 0);
107         if (unlikely(rc)) {
108             pr_dbg("Error %d from ibv_req_notify_cq\n", rc);
109         }
110
111         poll_cq(backend_dev->rdma_dev_res, ev_cq);
112
113         ibv_ack_cq_events(ev_cq, 1);
114     }
115
116     pr_dbg("Going down\n");
117
118     /* TODO: Post cqe for all remaining buffs that were posted */
119
120     return NULL;
121 }
122
123 void rdma_backend_register_comp_handler(void (*handler)(int status,
124                                         unsigned int vendor_err, void *ctx))
125 {
126     comp_handler = handler;
127 }
128
129 void rdma_backend_unregister_comp_handler(void)
130 {
131     rdma_backend_register_comp_handler(dummy_comp_handler);
132 }
133
134 int rdma_backend_query_port(RdmaBackendDev *backend_dev,
135                             struct ibv_port_attr *port_attr)
136 {
137     int rc;
138
139     rc = ibv_query_port(backend_dev->context, backend_dev->port_num, port_attr);
140     if (rc) {
141         pr_dbg("Error %d from ibv_query_port\n", rc);
142         return -EIO;
143     }
144
145     return 0;
146 }
147
148 void rdma_backend_poll_cq(RdmaDeviceResources *rdma_dev_res, RdmaBackendCQ *cq)
149 {
150     poll_cq(rdma_dev_res, cq->ibcq);
151 }
152
153 static GHashTable *ah_hash;
154
155 static struct ibv_ah *create_ah(RdmaBackendDev *backend_dev, struct ibv_pd *pd,
156                                 uint8_t sgid_idx, union ibv_gid *dgid)
157 {
158     GBytes *ah_key = g_bytes_new(dgid, sizeof(*dgid));
159     struct ibv_ah *ah = g_hash_table_lookup(ah_hash, ah_key);
160
161     if (ah) {
162         trace_create_ah_cache_hit(be64_to_cpu(dgid->global.subnet_prefix),
163                                   be64_to_cpu(dgid->global.interface_id));
164         g_bytes_unref(ah_key);
165     } else {
166         struct ibv_ah_attr ah_attr = {
167             .is_global     = 1,
168             .port_num      = backend_dev->port_num,
169             .grh.hop_limit = 1,
170         };
171
172         ah_attr.grh.dgid = *dgid;
173         ah_attr.grh.sgid_index = sgid_idx;
174
175         ah = ibv_create_ah(pd, &ah_attr);
176         if (ah) {
177             g_hash_table_insert(ah_hash, ah_key, ah);
178         } else {
179             g_bytes_unref(ah_key);
180             pr_dbg("Fail to create AH for gid <0x%" PRIx64 ", 0x%" PRIx64 ">\n",
181                     be64_to_cpu(dgid->global.subnet_prefix),
182                     be64_to_cpu(dgid->global.interface_id));
183         }
184
185         trace_create_ah_cache_miss(be64_to_cpu(dgid->global.subnet_prefix),
186                                    be64_to_cpu(dgid->global.interface_id));
187     }
188
189     return ah;
190 }
191
192 static void destroy_ah_hash_key(gpointer data)
193 {
194     g_bytes_unref(data);
195 }
196
197 static void destroy_ah_hast_data(gpointer data)
198 {
199     struct ibv_ah *ah = data;
200
201     ibv_destroy_ah(ah);
202 }
203
204 static void ah_cache_init(void)
205 {
206     ah_hash = g_hash_table_new_full(g_bytes_hash, g_bytes_equal,
207                                     destroy_ah_hash_key, destroy_ah_hast_data);
208 }
209
210 static int build_host_sge_array(RdmaDeviceResources *rdma_dev_res,
211                                 struct ibv_sge *dsge, struct ibv_sge *ssge,
212                                 uint8_t num_sge)
213 {
214     RdmaRmMR *mr;
215     int ssge_idx;
216
217     pr_dbg("num_sge=%d\n", num_sge);
218
219     for (ssge_idx = 0; ssge_idx < num_sge; ssge_idx++) {
220         mr = rdma_rm_get_mr(rdma_dev_res, ssge[ssge_idx].lkey);
221         if (unlikely(!mr)) {
222             pr_dbg("Invalid lkey 0x%x\n", ssge[ssge_idx].lkey);
223             return VENDOR_ERR_INVLKEY | ssge[ssge_idx].lkey;
224         }
225
226         dsge->addr = (uintptr_t)mr->user_mr.host_virt + ssge[ssge_idx].addr -
227                      mr->user_mr.guest_start;
228         dsge->length = ssge[ssge_idx].length;
229         dsge->lkey = rdma_backend_mr_lkey(&mr->backend_mr);
230
231         pr_dbg("ssge->addr=0x%" PRIx64 "\n", ssge[ssge_idx].addr);
232         pr_dbg("dsge->addr=0x%" PRIx64 "\n", dsge->addr);
233         pr_dbg("dsge->length=%d\n", dsge->length);
234         pr_dbg("dsge->lkey=0x%x\n", dsge->lkey);
235
236         dsge++;
237     }
238
239     return 0;
240 }
241
242 void rdma_backend_post_send(RdmaBackendDev *backend_dev,
243                             RdmaBackendQP *qp, uint8_t qp_type,
244                             struct ibv_sge *sge, uint32_t num_sge,
245                             union ibv_gid *dgid, uint32_t dqpn,
246                             uint32_t dqkey, void *ctx)
247 {
248     BackendCtx *bctx;
249     struct ibv_sge new_sge[MAX_SGE];
250     uint32_t bctx_id;
251     int rc;
252     struct ibv_send_wr wr = {0}, *bad_wr;
253
254     if (!qp->ibqp) { /* This field does not get initialized for QP0 and QP1 */
255         if (qp_type == IBV_QPT_SMI) {
256             pr_dbg("QP0 unsupported\n");
257             comp_handler(IBV_WC_GENERAL_ERR, VENDOR_ERR_QP0, ctx);
258         } else if (qp_type == IBV_QPT_GSI) {
259             pr_dbg("QP1\n");
260             comp_handler(IBV_WC_GENERAL_ERR, VENDOR_ERR_MAD_SEND, ctx);
261         }
262         pr_dbg("qp->ibqp is NULL for qp_type %d!!!\n", qp_type);
263         return;
264     }
265
266     pr_dbg("num_sge=%d\n", num_sge);
267     if (!num_sge) {
268         pr_dbg("num_sge=0\n");
269         comp_handler(IBV_WC_GENERAL_ERR, VENDOR_ERR_NO_SGE, ctx);
270         return;
271     }
272
273     bctx = g_malloc0(sizeof(*bctx));
274     bctx->up_ctx = ctx;
275     bctx->is_tx_req = 1;
276
277     rc = rdma_rm_alloc_cqe_ctx(backend_dev->rdma_dev_res, &bctx_id, bctx);
278     if (unlikely(rc)) {
279         pr_dbg("Failed to allocate cqe_ctx\n");
280         comp_handler(IBV_WC_GENERAL_ERR, VENDOR_ERR_NOMEM, ctx);
281         goto out_free_bctx;
282     }
283
284     rc = build_host_sge_array(backend_dev->rdma_dev_res, new_sge, sge, num_sge);
285     if (rc) {
286         pr_dbg("Error: Failed to build host SGE array\n");
287         comp_handler(IBV_WC_GENERAL_ERR, rc, ctx);
288         goto out_dealloc_cqe_ctx;
289     }
290
291     if (qp_type == IBV_QPT_UD) {
292         wr.wr.ud.ah = create_ah(backend_dev, qp->ibpd,
293                                 backend_dev->backend_gid_idx, dgid);
294         wr.wr.ud.remote_qpn = dqpn;
295         wr.wr.ud.remote_qkey = dqkey;
296     }
297
298     wr.num_sge = num_sge;
299     wr.opcode = IBV_WR_SEND;
300     wr.send_flags = IBV_SEND_SIGNALED;
301     wr.sg_list = new_sge;
302     wr.wr_id = bctx_id;
303
304     rc = ibv_post_send(qp->ibqp, &wr, &bad_wr);
305     pr_dbg("ibv_post_send=%d\n", rc);
306     if (rc) {
307         pr_dbg("Fail (%d, %d) to post send WQE to qpn %d\n", rc, errno,
308                 qp->ibqp->qp_num);
309         comp_handler(IBV_WC_GENERAL_ERR, VENDOR_ERR_FAIL_BACKEND, ctx);
310         goto out_dealloc_cqe_ctx;
311     }
312
313     return;
314
315 out_dealloc_cqe_ctx:
316     rdma_rm_dealloc_cqe_ctx(backend_dev->rdma_dev_res, bctx_id);
317
318 out_free_bctx:
319     g_free(bctx);
320 }
321
322 void rdma_backend_post_recv(RdmaBackendDev *backend_dev,
323                             RdmaDeviceResources *rdma_dev_res,
324                             RdmaBackendQP *qp, uint8_t qp_type,
325                             struct ibv_sge *sge, uint32_t num_sge, void *ctx)
326 {
327     BackendCtx *bctx;
328     struct ibv_sge new_sge[MAX_SGE];
329     uint32_t bctx_id;
330     int rc;
331     struct ibv_recv_wr wr = {0}, *bad_wr;
332
333     if (!qp->ibqp) { /* This field does not get initialized for QP0 and QP1 */
334         if (qp_type == IBV_QPT_SMI) {
335             pr_dbg("QP0 unsupported\n");
336             comp_handler(IBV_WC_GENERAL_ERR, VENDOR_ERR_QP0, ctx);
337         }
338         if (qp_type == IBV_QPT_GSI) {
339             pr_dbg("QP1\n");
340             comp_handler(IBV_WC_GENERAL_ERR, VENDOR_ERR_MAD_SEND, ctx);
341         }
342         return;
343     }
344
345     pr_dbg("num_sge=%d\n", num_sge);
346     if (!num_sge) {
347         pr_dbg("num_sge=0\n");
348         comp_handler(IBV_WC_GENERAL_ERR, VENDOR_ERR_NO_SGE, ctx);
349         return;
350     }
351
352     bctx = g_malloc0(sizeof(*bctx));
353     bctx->up_ctx = ctx;
354     bctx->is_tx_req = 0;
355
356     rc = rdma_rm_alloc_cqe_ctx(rdma_dev_res, &bctx_id, bctx);
357     if (unlikely(rc)) {
358         pr_dbg("Failed to allocate cqe_ctx\n");
359         comp_handler(IBV_WC_GENERAL_ERR, VENDOR_ERR_NOMEM, ctx);
360         goto out_free_bctx;
361     }
362
363     rc = build_host_sge_array(rdma_dev_res, new_sge, sge, num_sge);
364     if (rc) {
365         pr_dbg("Error: Failed to build host SGE array\n");
366         comp_handler(IBV_WC_GENERAL_ERR, rc, ctx);
367         goto out_dealloc_cqe_ctx;
368     }
369
370     wr.num_sge = num_sge;
371     wr.sg_list = new_sge;
372     wr.wr_id = bctx_id;
373     rc = ibv_post_recv(qp->ibqp, &wr, &bad_wr);
374     pr_dbg("ibv_post_recv=%d\n", rc);
375     if (rc) {
376         pr_dbg("Fail (%d, %d) to post recv WQE to qpn %d\n", rc, errno,
377                 qp->ibqp->qp_num);
378         comp_handler(IBV_WC_GENERAL_ERR, VENDOR_ERR_FAIL_BACKEND, ctx);
379         goto out_dealloc_cqe_ctx;
380     }
381
382     return;
383
384 out_dealloc_cqe_ctx:
385     rdma_rm_dealloc_cqe_ctx(rdma_dev_res, bctx_id);
386
387 out_free_bctx:
388     g_free(bctx);
389 }
390
391 int rdma_backend_create_pd(RdmaBackendDev *backend_dev, RdmaBackendPD *pd)
392 {
393     pd->ibpd = ibv_alloc_pd(backend_dev->context);
394
395     return pd->ibpd ? 0 : -EIO;
396 }
397
398 void rdma_backend_destroy_pd(RdmaBackendPD *pd)
399 {
400     if (pd->ibpd) {
401         ibv_dealloc_pd(pd->ibpd);
402     }
403 }
404
405 int rdma_backend_create_mr(RdmaBackendMR *mr, RdmaBackendPD *pd, void *addr,
406                            size_t length, int access)
407 {
408     pr_dbg("addr=0x%p\n", addr);
409     pr_dbg("len=%zu\n", length);
410     mr->ibmr = ibv_reg_mr(pd->ibpd, addr, length, access);
411     if (mr->ibmr) {
412         pr_dbg("lkey=0x%x\n", mr->ibmr->lkey);
413         pr_dbg("rkey=0x%x\n", mr->ibmr->rkey);
414         mr->ibpd = pd->ibpd;
415     }
416
417     return mr->ibmr ? 0 : -EIO;
418 }
419
420 void rdma_backend_destroy_mr(RdmaBackendMR *mr)
421 {
422     if (mr->ibmr) {
423         ibv_dereg_mr(mr->ibmr);
424     }
425 }
426
427 int rdma_backend_create_cq(RdmaBackendDev *backend_dev, RdmaBackendCQ *cq,
428                            int cqe)
429 {
430     int rc;
431
432     pr_dbg("cqe=%d\n", cqe);
433
434     pr_dbg("dev->channel=%p\n", backend_dev->channel);
435     cq->ibcq = ibv_create_cq(backend_dev->context, cqe + 1, NULL,
436                              backend_dev->channel, 0);
437
438     if (cq->ibcq) {
439         rc = ibv_req_notify_cq(cq->ibcq, 0);
440         if (rc) {
441             pr_dbg("Error %d from ibv_req_notify_cq\n", rc);
442         }
443         cq->backend_dev = backend_dev;
444     }
445
446     return cq->ibcq ? 0 : -EIO;
447 }
448
449 void rdma_backend_destroy_cq(RdmaBackendCQ *cq)
450 {
451     if (cq->ibcq) {
452         ibv_destroy_cq(cq->ibcq);
453     }
454 }
455
456 int rdma_backend_create_qp(RdmaBackendQP *qp, uint8_t qp_type,
457                            RdmaBackendPD *pd, RdmaBackendCQ *scq,
458                            RdmaBackendCQ *rcq, uint32_t max_send_wr,
459                            uint32_t max_recv_wr, uint32_t max_send_sge,
460                            uint32_t max_recv_sge)
461 {
462     struct ibv_qp_init_attr attr = {0};
463
464     qp->ibqp = 0;
465     pr_dbg("qp_type=%d\n", qp_type);
466
467     switch (qp_type) {
468     case IBV_QPT_GSI:
469         pr_dbg("QP1 unsupported\n");
470         return 0;
471
472     case IBV_QPT_RC:
473         /* fall through */
474     case IBV_QPT_UD:
475         /* do nothing */
476         break;
477
478     default:
479         pr_dbg("Unsupported QP type %d\n", qp_type);
480         return -EIO;
481     }
482
483     attr.qp_type = qp_type;
484     attr.send_cq = scq->ibcq;
485     attr.recv_cq = rcq->ibcq;
486     attr.cap.max_send_wr = max_send_wr;
487     attr.cap.max_recv_wr = max_recv_wr;
488     attr.cap.max_send_sge = max_send_sge;
489     attr.cap.max_recv_sge = max_recv_sge;
490
491     pr_dbg("max_send_wr=%d\n", max_send_wr);
492     pr_dbg("max_recv_wr=%d\n", max_recv_wr);
493     pr_dbg("max_send_sge=%d\n", max_send_sge);
494     pr_dbg("max_recv_sge=%d\n", max_recv_sge);
495
496     qp->ibqp = ibv_create_qp(pd->ibpd, &attr);
497     if (likely(!qp->ibqp)) {
498         pr_dbg("Error from ibv_create_qp\n");
499         return -EIO;
500     }
501
502     qp->ibpd = pd->ibpd;
503
504     /* TODO: Query QP to get max_inline_data and save it to be used in send */
505
506     pr_dbg("qpn=0x%x\n", qp->ibqp->qp_num);
507
508     return 0;
509 }
510
511 int rdma_backend_qp_state_init(RdmaBackendDev *backend_dev, RdmaBackendQP *qp,
512                                uint8_t qp_type, uint32_t qkey)
513 {
514     struct ibv_qp_attr attr = {0};
515     int rc, attr_mask;
516
517     pr_dbg("qpn=0x%x\n", qp->ibqp->qp_num);
518     pr_dbg("sport_num=%d\n", backend_dev->port_num);
519
520     attr_mask = IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT;
521     attr.qp_state        = IBV_QPS_INIT;
522     attr.pkey_index      = 0;
523     attr.port_num        = backend_dev->port_num;
524
525     switch (qp_type) {
526     case IBV_QPT_RC:
527         attr_mask |= IBV_QP_ACCESS_FLAGS;
528         break;
529
530     case IBV_QPT_UD:
531         attr.qkey = qkey;
532         attr_mask |= IBV_QP_QKEY;
533         break;
534
535     default:
536         pr_dbg("Unsupported QP type %d\n", qp_type);
537         return -EIO;
538     }
539
540     rc = ibv_modify_qp(qp->ibqp, &attr, attr_mask);
541     if (rc) {
542         pr_dbg("Error %d from ibv_modify_qp\n", rc);
543         return -EIO;
544     }
545
546     return 0;
547 }
548
549 int rdma_backend_qp_state_rtr(RdmaBackendDev *backend_dev, RdmaBackendQP *qp,
550                               uint8_t qp_type, union ibv_gid *dgid,
551                               uint32_t dqpn, uint32_t rq_psn, uint32_t qkey,
552                               bool use_qkey)
553 {
554     struct ibv_qp_attr attr = {0};
555     union ibv_gid ibv_gid = {
556         .global.interface_id = dgid->global.interface_id,
557         .global.subnet_prefix = dgid->global.subnet_prefix
558     };
559     int rc, attr_mask;
560
561     attr.qp_state = IBV_QPS_RTR;
562     attr_mask = IBV_QP_STATE;
563
564     switch (qp_type) {
565     case IBV_QPT_RC:
566         pr_dbg("dgid=0x%" PRIx64 ",%" PRIx64 "\n",
567                be64_to_cpu(ibv_gid.global.subnet_prefix),
568                be64_to_cpu(ibv_gid.global.interface_id));
569         pr_dbg("dqpn=0x%x\n", dqpn);
570         pr_dbg("sgid_idx=%d\n", backend_dev->backend_gid_idx);
571         pr_dbg("sport_num=%d\n", backend_dev->port_num);
572         pr_dbg("rq_psn=0x%x\n", rq_psn);
573
574         attr.path_mtu               = IBV_MTU_1024;
575         attr.dest_qp_num            = dqpn;
576         attr.max_dest_rd_atomic     = 1;
577         attr.min_rnr_timer          = 12;
578         attr.ah_attr.port_num       = backend_dev->port_num;
579         attr.ah_attr.is_global      = 1;
580         attr.ah_attr.grh.hop_limit  = 1;
581         attr.ah_attr.grh.dgid       = ibv_gid;
582         attr.ah_attr.grh.sgid_index = backend_dev->backend_gid_idx;
583         attr.rq_psn                 = rq_psn;
584
585         attr_mask |= IBV_QP_AV | IBV_QP_PATH_MTU | IBV_QP_DEST_QPN |
586                      IBV_QP_RQ_PSN | IBV_QP_MAX_DEST_RD_ATOMIC |
587                      IBV_QP_MIN_RNR_TIMER;
588         break;
589
590     case IBV_QPT_UD:
591         if (use_qkey) {
592             pr_dbg("qkey=0x%x\n", qkey);
593             attr.qkey = qkey;
594             attr_mask |= IBV_QP_QKEY;
595         }
596         break;
597     }
598
599     rc = ibv_modify_qp(qp->ibqp, &attr, attr_mask);
600     if (rc) {
601         pr_dbg("Error %d from ibv_modify_qp\n", rc);
602         return -EIO;
603     }
604
605     return 0;
606 }
607
608 int rdma_backend_qp_state_rts(RdmaBackendQP *qp, uint8_t qp_type,
609                               uint32_t sq_psn, uint32_t qkey, bool use_qkey)
610 {
611     struct ibv_qp_attr attr = {0};
612     int rc, attr_mask;
613
614     pr_dbg("qpn=0x%x\n", qp->ibqp->qp_num);
615     pr_dbg("sq_psn=0x%x\n", sq_psn);
616
617     attr.qp_state = IBV_QPS_RTS;
618     attr.sq_psn = sq_psn;
619     attr_mask = IBV_QP_STATE | IBV_QP_SQ_PSN;
620
621     switch (qp_type) {
622     case IBV_QPT_RC:
623         attr.timeout       = 14;
624         attr.retry_cnt     = 7;
625         attr.rnr_retry     = 7;
626         attr.max_rd_atomic = 1;
627
628         attr_mask |= IBV_QP_TIMEOUT | IBV_QP_RETRY_CNT | IBV_QP_RNR_RETRY |
629                      IBV_QP_MAX_QP_RD_ATOMIC;
630         break;
631
632     case IBV_QPT_UD:
633         if (use_qkey) {
634             pr_dbg("qkey=0x%x\n", qkey);
635             attr.qkey = qkey;
636             attr_mask |= IBV_QP_QKEY;
637         }
638         break;
639     }
640
641     rc = ibv_modify_qp(qp->ibqp, &attr, attr_mask);
642     if (rc) {
643         pr_dbg("Error %d from ibv_modify_qp\n", rc);
644         return -EIO;
645     }
646
647     return 0;
648 }
649
650 int rdma_backend_query_qp(RdmaBackendQP *qp, struct ibv_qp_attr *attr,
651                           int attr_mask, struct ibv_qp_init_attr *init_attr)
652 {
653     if (!qp->ibqp) {
654         pr_dbg("QP1\n");
655         attr->qp_state = IBV_QPS_RTS;
656         return 0;
657     }
658
659     return ibv_query_qp(qp->ibqp, attr, attr_mask, init_attr);
660 }
661
662 void rdma_backend_destroy_qp(RdmaBackendQP *qp)
663 {
664     if (qp->ibqp) {
665         ibv_destroy_qp(qp->ibqp);
666     }
667 }
668
669 #define CHK_ATTR(req, dev, member, fmt) ({ \
670     pr_dbg("%s="fmt","fmt"\n", #member, dev.member, req->member); \
671     if (req->member > dev.member) { \
672         warn_report("%s = "fmt" is higher than host device capability "fmt, \
673                     #member, req->member, dev.member); \
674         req->member = dev.member; \
675     } \
676     pr_dbg("%s="fmt"\n", #member, req->member); })
677
678 static int init_device_caps(RdmaBackendDev *backend_dev,
679                             struct ibv_device_attr *dev_attr)
680 {
681     if (ibv_query_device(backend_dev->context, &backend_dev->dev_attr)) {
682         return -EIO;
683     }
684
685     CHK_ATTR(dev_attr, backend_dev->dev_attr, max_mr_size, "%" PRId64);
686     CHK_ATTR(dev_attr, backend_dev->dev_attr, max_qp, "%d");
687     CHK_ATTR(dev_attr, backend_dev->dev_attr, max_sge, "%d");
688     CHK_ATTR(dev_attr, backend_dev->dev_attr, max_qp_wr, "%d");
689     CHK_ATTR(dev_attr, backend_dev->dev_attr, max_cq, "%d");
690     CHK_ATTR(dev_attr, backend_dev->dev_attr, max_cqe, "%d");
691     CHK_ATTR(dev_attr, backend_dev->dev_attr, max_mr, "%d");
692     CHK_ATTR(dev_attr, backend_dev->dev_attr, max_pd, "%d");
693     CHK_ATTR(dev_attr, backend_dev->dev_attr, max_qp_rd_atom, "%d");
694     CHK_ATTR(dev_attr, backend_dev->dev_attr, max_qp_init_rd_atom, "%d");
695     CHK_ATTR(dev_attr, backend_dev->dev_attr, max_ah, "%d");
696
697     return 0;
698 }
699
700 int rdma_backend_init(RdmaBackendDev *backend_dev,
701                       RdmaDeviceResources *rdma_dev_res,
702                       const char *backend_device_name, uint8_t port_num,
703                       uint8_t backend_gid_idx, struct ibv_device_attr *dev_attr,
704                       Error **errp)
705 {
706     int i;
707     int ret = 0;
708     int num_ibv_devices;
709     char thread_name[THR_NAME_LEN] = {0};
710     struct ibv_device **dev_list;
711     struct ibv_port_attr port_attr;
712
713     backend_dev->backend_gid_idx = backend_gid_idx;
714     backend_dev->port_num = port_num;
715     backend_dev->rdma_dev_res = rdma_dev_res;
716
717     rdma_backend_register_comp_handler(dummy_comp_handler);
718
719     dev_list = ibv_get_device_list(&num_ibv_devices);
720     if (!dev_list) {
721         error_setg(errp, "Failed to get IB devices list");
722         return -EIO;
723     }
724
725     if (num_ibv_devices == 0) {
726         error_setg(errp, "No IB devices were found");
727         ret = -ENXIO;
728         goto out_free_dev_list;
729     }
730
731     if (backend_device_name) {
732         for (i = 0; dev_list[i]; ++i) {
733             if (!strcmp(ibv_get_device_name(dev_list[i]),
734                         backend_device_name)) {
735                 break;
736             }
737         }
738
739         backend_dev->ib_dev = dev_list[i];
740         if (!backend_dev->ib_dev) {
741             error_setg(errp, "Failed to find IB device %s",
742                        backend_device_name);
743             ret = -EIO;
744             goto out_free_dev_list;
745         }
746     } else {
747         backend_dev->ib_dev = *dev_list;
748     }
749
750     pr_dbg("Using backend device %s, port %d, gid_idx %d\n",
751            ibv_get_device_name(backend_dev->ib_dev),
752            backend_dev->port_num, backend_dev->backend_gid_idx);
753
754     backend_dev->context = ibv_open_device(backend_dev->ib_dev);
755     if (!backend_dev->context) {
756         error_setg(errp, "Failed to open IB device");
757         ret = -EIO;
758         goto out;
759     }
760
761     backend_dev->channel = ibv_create_comp_channel(backend_dev->context);
762     if (!backend_dev->channel) {
763         error_setg(errp, "Failed to create IB communication channel");
764         ret = -EIO;
765         goto out_close_device;
766     }
767     pr_dbg("dev->backend_dev.channel=%p\n", backend_dev->channel);
768
769     ret = ibv_query_port(backend_dev->context, backend_dev->port_num,
770                          &port_attr);
771     if (ret) {
772         error_setg(errp, "Error %d from ibv_query_port", ret);
773         ret = -EIO;
774         goto out_destroy_comm_channel;
775     }
776
777     if (backend_dev->backend_gid_idx > port_attr.gid_tbl_len) {
778         error_setg(errp, "Invalid backend_gid_idx, should be less than %d",
779                    port_attr.gid_tbl_len);
780         goto out_destroy_comm_channel;
781     }
782
783     ret = init_device_caps(backend_dev, dev_attr);
784     if (ret) {
785         error_setg(errp, "Failed to initialize device capabilities");
786         ret = -EIO;
787         goto out_destroy_comm_channel;
788     }
789
790     ret = ibv_query_gid(backend_dev->context, backend_dev->port_num,
791                          backend_dev->backend_gid_idx, &backend_dev->gid);
792     if (ret) {
793         error_setg(errp, "Failed to query gid %d",
794                    backend_dev->backend_gid_idx);
795         ret = -EIO;
796         goto out_destroy_comm_channel;
797     }
798     pr_dbg("subnet_prefix=0x%" PRIx64 "\n",
799            be64_to_cpu(backend_dev->gid.global.subnet_prefix));
800     pr_dbg("interface_id=0x%" PRIx64 "\n",
801            be64_to_cpu(backend_dev->gid.global.interface_id));
802
803     snprintf(thread_name, sizeof(thread_name), "rdma_comp_%s",
804              ibv_get_device_name(backend_dev->ib_dev));
805     backend_dev->comp_thread.run = true;
806     qemu_thread_create(&backend_dev->comp_thread.thread, thread_name,
807                        comp_handler_thread, backend_dev, QEMU_THREAD_DETACHED);
808
809     ah_cache_init();
810
811     goto out_free_dev_list;
812
813 out_destroy_comm_channel:
814     ibv_destroy_comp_channel(backend_dev->channel);
815
816 out_close_device:
817     ibv_close_device(backend_dev->context);
818
819 out_free_dev_list:
820     ibv_free_device_list(dev_list);
821
822 out:
823     return ret;
824 }
825
826 void rdma_backend_fini(RdmaBackendDev *backend_dev)
827 {
828     g_hash_table_destroy(ah_hash);
829     ibv_destroy_comp_channel(backend_dev->channel);
830     ibv_close_device(backend_dev->context);
831 }
This page took 0.069826 seconds and 4 git commands to generate.