]> Git Repo - qemu.git/blob - hw/rdma/rdma_rm.c
bf4a5c71b456fdac4d0672a6f5f6bac4fc7aef10
[qemu.git] / hw / rdma / rdma_rm.c
1 /*
2  * QEMU paravirtual RDMA - Resource Manager Implementation
3  *
4  * Copyright (C) 2018 Oracle
5  * Copyright (C) 2018 Red Hat Inc
6  *
7  * Authors:
8  *     Yuval Shaia <[email protected]>
9  *     Marcel Apfelbaum <[email protected]>
10  *
11  * This work is licensed under the terms of the GNU GPL, version 2 or later.
12  * See the COPYING file in the top-level directory.
13  *
14  */
15
16 #include "qemu/osdep.h"
17 #include "qapi/error.h"
18 #include "cpu.h"
19
20 #include "rdma_utils.h"
21 #include "rdma_backend.h"
22 #include "rdma_rm.h"
23
24 /* Page directory and page tables */
25 #define PG_DIR_SZ { TARGET_PAGE_SIZE / sizeof(__u64) }
26 #define PG_TBL_SZ { TARGET_PAGE_SIZE / sizeof(__u64) }
27
28 static inline void res_tbl_init(const char *name, RdmaRmResTbl *tbl,
29                                 uint32_t tbl_sz, uint32_t res_sz)
30 {
31     tbl->tbl = g_malloc(tbl_sz * res_sz);
32
33     strncpy(tbl->name, name, MAX_RM_TBL_NAME);
34     tbl->name[MAX_RM_TBL_NAME - 1] = 0;
35
36     tbl->bitmap = bitmap_new(tbl_sz);
37     tbl->tbl_sz = tbl_sz;
38     tbl->res_sz = res_sz;
39     qemu_mutex_init(&tbl->lock);
40 }
41
42 static inline void res_tbl_free(RdmaRmResTbl *tbl)
43 {
44     qemu_mutex_destroy(&tbl->lock);
45     g_free(tbl->tbl);
46     bitmap_zero_extend(tbl->bitmap, tbl->tbl_sz, 0);
47 }
48
49 static inline void *res_tbl_get(RdmaRmResTbl *tbl, uint32_t handle)
50 {
51     pr_dbg("%s, handle=%d\n", tbl->name, handle);
52
53     if ((handle < tbl->tbl_sz) && (test_bit(handle, tbl->bitmap))) {
54         return tbl->tbl + handle * tbl->res_sz;
55     } else {
56         pr_dbg("Invalid handle %d\n", handle);
57         return NULL;
58     }
59 }
60
61 static inline void *res_tbl_alloc(RdmaRmResTbl *tbl, uint32_t *handle)
62 {
63     qemu_mutex_lock(&tbl->lock);
64
65     *handle = find_first_zero_bit(tbl->bitmap, tbl->tbl_sz);
66     if (*handle > tbl->tbl_sz) {
67         pr_dbg("Failed to alloc, bitmap is full\n");
68         qemu_mutex_unlock(&tbl->lock);
69         return NULL;
70     }
71
72     set_bit(*handle, tbl->bitmap);
73
74     qemu_mutex_unlock(&tbl->lock);
75
76     memset(tbl->tbl + *handle * tbl->res_sz, 0, tbl->res_sz);
77
78     pr_dbg("%s, handle=%d\n", tbl->name, *handle);
79
80     return tbl->tbl + *handle * tbl->res_sz;
81 }
82
83 static inline void res_tbl_dealloc(RdmaRmResTbl *tbl, uint32_t handle)
84 {
85     pr_dbg("%s, handle=%d\n", tbl->name, handle);
86
87     qemu_mutex_lock(&tbl->lock);
88
89     if (handle < tbl->tbl_sz) {
90         clear_bit(handle, tbl->bitmap);
91     }
92
93     qemu_mutex_unlock(&tbl->lock);
94 }
95
96 int rdma_rm_alloc_pd(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
97                      uint32_t *pd_handle, uint32_t ctx_handle)
98 {
99     RdmaRmPD *pd;
100     int ret = -ENOMEM;
101
102     pd = res_tbl_alloc(&dev_res->pd_tbl, pd_handle);
103     if (!pd) {
104         goto out;
105     }
106
107     ret = rdma_backend_create_pd(backend_dev, &pd->backend_pd);
108     if (ret) {
109         ret = -EIO;
110         goto out_tbl_dealloc;
111     }
112
113     pd->ctx_handle = ctx_handle;
114
115     return 0;
116
117 out_tbl_dealloc:
118     res_tbl_dealloc(&dev_res->pd_tbl, *pd_handle);
119
120 out:
121     return ret;
122 }
123
124 RdmaRmPD *rdma_rm_get_pd(RdmaDeviceResources *dev_res, uint32_t pd_handle)
125 {
126     return res_tbl_get(&dev_res->pd_tbl, pd_handle);
127 }
128
129 void rdma_rm_dealloc_pd(RdmaDeviceResources *dev_res, uint32_t pd_handle)
130 {
131     RdmaRmPD *pd = rdma_rm_get_pd(dev_res, pd_handle);
132
133     if (pd) {
134         rdma_backend_destroy_pd(&pd->backend_pd);
135         res_tbl_dealloc(&dev_res->pd_tbl, pd_handle);
136     }
137 }
138
139 int rdma_rm_alloc_mr(RdmaDeviceResources *dev_res, uint32_t pd_handle,
140                      uint64_t guest_start, size_t guest_length, void *host_virt,
141                      int access_flags, uint32_t *mr_handle, uint32_t *lkey,
142                      uint32_t *rkey)
143 {
144     RdmaRmMR *mr;
145     int ret = 0;
146     RdmaRmPD *pd;
147
148     pd = rdma_rm_get_pd(dev_res, pd_handle);
149     if (!pd) {
150         pr_dbg("Invalid PD\n");
151         return -EINVAL;
152     }
153
154     mr = res_tbl_alloc(&dev_res->mr_tbl, mr_handle);
155     if (!mr) {
156         pr_dbg("Failed to allocate obj in table\n");
157         return -ENOMEM;
158     }
159     pr_dbg("mr_handle=%d\n", *mr_handle);
160
161     pr_dbg("host_virt=0x%p\n", host_virt);
162     pr_dbg("guest_start=0x%" PRIx64 "\n", guest_start);
163     pr_dbg("length=%zu\n", guest_length);
164
165     if (host_virt) {
166         mr->virt = host_virt;
167         mr->start = guest_start;
168         mr->length = guest_length;
169
170         ret = rdma_backend_create_mr(&mr->backend_mr, &pd->backend_pd, mr->virt,
171                                      mr->length, access_flags);
172         if (ret) {
173             pr_dbg("Fail in rdma_backend_create_mr, err=%d\n", ret);
174             ret = -EIO;
175             goto out_dealloc_mr;
176         }
177     }
178
179     /* We keep mr_handle in lkey so send and recv get get mr ptr */
180     *lkey = *mr_handle;
181     *rkey = -1;
182
183     mr->pd_handle = pd_handle;
184
185     return 0;
186
187 out_dealloc_mr:
188     res_tbl_dealloc(&dev_res->mr_tbl, *mr_handle);
189
190     return ret;
191 }
192
193 RdmaRmMR *rdma_rm_get_mr(RdmaDeviceResources *dev_res, uint32_t mr_handle)
194 {
195     return res_tbl_get(&dev_res->mr_tbl, mr_handle);
196 }
197
198 void rdma_rm_dealloc_mr(RdmaDeviceResources *dev_res, uint32_t mr_handle)
199 {
200     RdmaRmMR *mr = rdma_rm_get_mr(dev_res, mr_handle);
201
202     if (mr) {
203         rdma_backend_destroy_mr(&mr->backend_mr);
204         pr_dbg("start=0x%" PRIx64 "\n", mr->start);
205         if (mr->start) {
206             munmap(mr->virt, mr->length);
207         }
208         res_tbl_dealloc(&dev_res->mr_tbl, mr_handle);
209     }
210 }
211
212 int rdma_rm_alloc_uc(RdmaDeviceResources *dev_res, uint32_t pfn,
213                      uint32_t *uc_handle)
214 {
215     RdmaRmUC *uc;
216
217     /* TODO: Need to make sure pfn is between bar start address and
218      * bsd+RDMA_BAR2_UAR_SIZE
219     if (pfn > RDMA_BAR2_UAR_SIZE) {
220         pr_err("pfn out of range (%d > %d)\n", pfn, RDMA_BAR2_UAR_SIZE);
221         return -ENOMEM;
222     }
223     */
224
225     uc = res_tbl_alloc(&dev_res->uc_tbl, uc_handle);
226     if (!uc) {
227         return -ENOMEM;
228     }
229
230     return 0;
231 }
232
233 RdmaRmUC *rdma_rm_get_uc(RdmaDeviceResources *dev_res, uint32_t uc_handle)
234 {
235     return res_tbl_get(&dev_res->uc_tbl, uc_handle);
236 }
237
238 void rdma_rm_dealloc_uc(RdmaDeviceResources *dev_res, uint32_t uc_handle)
239 {
240     RdmaRmUC *uc = rdma_rm_get_uc(dev_res, uc_handle);
241
242     if (uc) {
243         res_tbl_dealloc(&dev_res->uc_tbl, uc_handle);
244     }
245 }
246
247 RdmaRmCQ *rdma_rm_get_cq(RdmaDeviceResources *dev_res, uint32_t cq_handle)
248 {
249     return res_tbl_get(&dev_res->cq_tbl, cq_handle);
250 }
251
252 int rdma_rm_alloc_cq(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
253                      uint32_t cqe, uint32_t *cq_handle, void *opaque)
254 {
255     int rc;
256     RdmaRmCQ *cq;
257
258     cq = res_tbl_alloc(&dev_res->cq_tbl, cq_handle);
259     if (!cq) {
260         return -ENOMEM;
261     }
262
263     cq->opaque = opaque;
264     cq->notify = false;
265
266     rc = rdma_backend_create_cq(backend_dev, &cq->backend_cq, cqe);
267     if (rc) {
268         rc = -EIO;
269         goto out_dealloc_cq;
270     }
271
272     return 0;
273
274 out_dealloc_cq:
275     rdma_rm_dealloc_cq(dev_res, *cq_handle);
276
277     return rc;
278 }
279
280 void rdma_rm_req_notify_cq(RdmaDeviceResources *dev_res, uint32_t cq_handle,
281                            bool notify)
282 {
283     RdmaRmCQ *cq;
284
285     pr_dbg("cq_handle=%d, notify=0x%x\n", cq_handle, notify);
286
287     cq = rdma_rm_get_cq(dev_res, cq_handle);
288     if (!cq) {
289         return;
290     }
291
292     cq->notify = notify;
293     pr_dbg("notify=%d\n", cq->notify);
294 }
295
296 void rdma_rm_dealloc_cq(RdmaDeviceResources *dev_res, uint32_t cq_handle)
297 {
298     RdmaRmCQ *cq;
299
300     cq = rdma_rm_get_cq(dev_res, cq_handle);
301     if (!cq) {
302         return;
303     }
304
305     rdma_backend_destroy_cq(&cq->backend_cq);
306
307     res_tbl_dealloc(&dev_res->cq_tbl, cq_handle);
308 }
309
310 RdmaRmQP *rdma_rm_get_qp(RdmaDeviceResources *dev_res, uint32_t qpn)
311 {
312     GBytes *key = g_bytes_new(&qpn, sizeof(qpn));
313
314     RdmaRmQP *qp = g_hash_table_lookup(dev_res->qp_hash, key);
315
316     g_bytes_unref(key);
317
318     return qp;
319 }
320
321 int rdma_rm_alloc_qp(RdmaDeviceResources *dev_res, uint32_t pd_handle,
322                      uint8_t qp_type, uint32_t max_send_wr,
323                      uint32_t max_send_sge, uint32_t send_cq_handle,
324                      uint32_t max_recv_wr, uint32_t max_recv_sge,
325                      uint32_t recv_cq_handle, void *opaque, uint32_t *qpn)
326 {
327     int rc;
328     RdmaRmQP *qp;
329     RdmaRmCQ *scq, *rcq;
330     RdmaRmPD *pd;
331     uint32_t rm_qpn;
332
333     pr_dbg("qp_type=%d\n", qp_type);
334
335     pd = rdma_rm_get_pd(dev_res, pd_handle);
336     if (!pd) {
337         pr_err("Invalid pd handle (%d)\n", pd_handle);
338         return -EINVAL;
339     }
340
341     scq = rdma_rm_get_cq(dev_res, send_cq_handle);
342     rcq = rdma_rm_get_cq(dev_res, recv_cq_handle);
343
344     if (!scq || !rcq) {
345         pr_err("Invalid send_cqn or recv_cqn (%d, %d)\n",
346                send_cq_handle, recv_cq_handle);
347         return -EINVAL;
348     }
349
350     qp = res_tbl_alloc(&dev_res->qp_tbl, &rm_qpn);
351     if (!qp) {
352         return -ENOMEM;
353     }
354     pr_dbg("rm_qpn=%d\n", rm_qpn);
355
356     qp->qpn = rm_qpn;
357     qp->qp_state = IBV_QPS_RESET;
358     qp->qp_type = qp_type;
359     qp->send_cq_handle = send_cq_handle;
360     qp->recv_cq_handle = recv_cq_handle;
361     qp->opaque = opaque;
362
363     rc = rdma_backend_create_qp(&qp->backend_qp, qp_type, &pd->backend_pd,
364                                 &scq->backend_cq, &rcq->backend_cq, max_send_wr,
365                                 max_recv_wr, max_send_sge, max_recv_sge);
366     if (rc) {
367         rc = -EIO;
368         goto out_dealloc_qp;
369     }
370
371     *qpn = rdma_backend_qpn(&qp->backend_qp);
372     pr_dbg("rm_qpn=%d, backend_qpn=0x%x\n", rm_qpn, *qpn);
373     g_hash_table_insert(dev_res->qp_hash, g_bytes_new(qpn, sizeof(*qpn)), qp);
374
375     return 0;
376
377 out_dealloc_qp:
378     res_tbl_dealloc(&dev_res->qp_tbl, qp->qpn);
379
380     return rc;
381 }
382
383 int rdma_rm_modify_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
384                       uint32_t qp_handle, uint32_t attr_mask,
385                       union ibv_gid *dgid, uint32_t dqpn,
386                       enum ibv_qp_state qp_state, uint32_t qkey,
387                       uint32_t rq_psn, uint32_t sq_psn)
388 {
389     RdmaRmQP *qp;
390     int ret;
391
392     pr_dbg("qpn=%d\n", qp_handle);
393
394     qp = rdma_rm_get_qp(dev_res, qp_handle);
395     if (!qp) {
396         return -EINVAL;
397     }
398
399     pr_dbg("qp_type=%d\n", qp->qp_type);
400     pr_dbg("attr_mask=0x%x\n", attr_mask);
401
402     if (qp->qp_type == IBV_QPT_SMI) {
403         pr_dbg("QP0 unsupported\n");
404         return -EPERM;
405     } else if (qp->qp_type == IBV_QPT_GSI) {
406         pr_dbg("QP1\n");
407         return 0;
408     }
409
410     if (attr_mask & IBV_QP_STATE) {
411         qp->qp_state = qp_state;
412         pr_dbg("qp_state=%d\n", qp->qp_state);
413
414         if (qp->qp_state == IBV_QPS_INIT) {
415             ret = rdma_backend_qp_state_init(backend_dev, &qp->backend_qp,
416                                              qp->qp_type, qkey);
417             if (ret) {
418                 return -EIO;
419             }
420         }
421
422         if (qp->qp_state == IBV_QPS_RTR) {
423             ret = rdma_backend_qp_state_rtr(backend_dev, &qp->backend_qp,
424                                             qp->qp_type, dgid, dqpn, rq_psn,
425                                             qkey, attr_mask & IBV_QP_QKEY);
426             if (ret) {
427                 return -EIO;
428             }
429         }
430
431         if (qp->qp_state == IBV_QPS_RTS) {
432             ret = rdma_backend_qp_state_rts(&qp->backend_qp, qp->qp_type,
433                                             sq_psn, qkey,
434                                             attr_mask & IBV_QP_QKEY);
435             if (ret) {
436                 return -EIO;
437             }
438         }
439     }
440
441     return 0;
442 }
443
444 int rdma_rm_query_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
445                      uint32_t qp_handle, struct ibv_qp_attr *attr,
446                      int attr_mask, struct ibv_qp_init_attr *init_attr)
447 {
448     RdmaRmQP *qp;
449
450     pr_dbg("qpn=%d\n", qp_handle);
451
452     qp = rdma_rm_get_qp(dev_res, qp_handle);
453     if (!qp) {
454         return -EINVAL;
455     }
456
457     pr_dbg("qp_type=%d\n", qp->qp_type);
458
459     return rdma_backend_query_qp(&qp->backend_qp, attr, attr_mask, init_attr);
460 }
461
462 void rdma_rm_dealloc_qp(RdmaDeviceResources *dev_res, uint32_t qp_handle)
463 {
464     RdmaRmQP *qp;
465     GBytes *key;
466
467     key = g_bytes_new(&qp_handle, sizeof(qp_handle));
468     qp = g_hash_table_lookup(dev_res->qp_hash, key);
469     g_hash_table_remove(dev_res->qp_hash, key);
470     g_bytes_unref(key);
471
472     if (!qp) {
473         return;
474     }
475
476     rdma_backend_destroy_qp(&qp->backend_qp);
477
478     res_tbl_dealloc(&dev_res->qp_tbl, qp->qpn);
479 }
480
481 void *rdma_rm_get_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id)
482 {
483     void **cqe_ctx;
484
485     cqe_ctx = res_tbl_get(&dev_res->cqe_ctx_tbl, cqe_ctx_id);
486     if (!cqe_ctx) {
487         return NULL;
488     }
489
490     pr_dbg("ctx=%p\n", *cqe_ctx);
491
492     return *cqe_ctx;
493 }
494
495 int rdma_rm_alloc_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t *cqe_ctx_id,
496                           void *ctx)
497 {
498     void **cqe_ctx;
499
500     cqe_ctx = res_tbl_alloc(&dev_res->cqe_ctx_tbl, cqe_ctx_id);
501     if (!cqe_ctx) {
502         return -ENOMEM;
503     }
504
505     pr_dbg("ctx=%p\n", ctx);
506     *cqe_ctx = ctx;
507
508     return 0;
509 }
510
511 void rdma_rm_dealloc_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id)
512 {
513     res_tbl_dealloc(&dev_res->cqe_ctx_tbl, cqe_ctx_id);
514 }
515
516 static void destroy_qp_hash_key(gpointer data)
517 {
518     g_bytes_unref(data);
519 }
520
521 int rdma_rm_init(RdmaDeviceResources *dev_res, struct ibv_device_attr *dev_attr,
522                  Error **errp)
523 {
524     dev_res->qp_hash = g_hash_table_new_full(g_bytes_hash, g_bytes_equal,
525                                              destroy_qp_hash_key, NULL);
526     if (!dev_res->qp_hash) {
527         return -ENOMEM;
528     }
529
530     res_tbl_init("PD", &dev_res->pd_tbl, dev_attr->max_pd, sizeof(RdmaRmPD));
531     res_tbl_init("CQ", &dev_res->cq_tbl, dev_attr->max_cq, sizeof(RdmaRmCQ));
532     res_tbl_init("MR", &dev_res->mr_tbl, dev_attr->max_mr, sizeof(RdmaRmMR));
533     res_tbl_init("QP", &dev_res->qp_tbl, dev_attr->max_qp, sizeof(RdmaRmQP));
534     res_tbl_init("CQE_CTX", &dev_res->cqe_ctx_tbl, dev_attr->max_qp *
535                        dev_attr->max_qp_wr, sizeof(void *));
536     res_tbl_init("UC", &dev_res->uc_tbl, MAX_UCS, sizeof(RdmaRmUC));
537
538     return 0;
539 }
540
541 void rdma_rm_fini(RdmaDeviceResources *dev_res)
542 {
543     res_tbl_free(&dev_res->uc_tbl);
544     res_tbl_free(&dev_res->cqe_ctx_tbl);
545     res_tbl_free(&dev_res->qp_tbl);
546     res_tbl_free(&dev_res->cq_tbl);
547     res_tbl_free(&dev_res->mr_tbl);
548     res_tbl_free(&dev_res->pd_tbl);
549     g_hash_table_destroy(dev_res->qp_hash);
550 }
This page took 0.047332 seconds and 2 git commands to generate.