2 * QEMU paravirtual RDMA - Resource Manager Implementation
4 * Copyright (C) 2018 Oracle
5 * Copyright (C) 2018 Red Hat Inc
11 * This work is licensed under the terms of the GNU GPL, version 2 or later.
12 * See the COPYING file in the top-level directory.
16 #include "qemu/osdep.h"
17 #include "qapi/error.h"
20 #include "rdma_utils.h"
21 #include "rdma_backend.h"
24 /* Page directory and page tables */
25 #define PG_DIR_SZ { TARGET_PAGE_SIZE / sizeof(__u64) }
26 #define PG_TBL_SZ { TARGET_PAGE_SIZE / sizeof(__u64) }
28 static inline void res_tbl_init(const char *name, RdmaRmResTbl *tbl,
29 uint32_t tbl_sz, uint32_t res_sz)
31 tbl->tbl = g_malloc(tbl_sz * res_sz);
33 strncpy(tbl->name, name, MAX_RM_TBL_NAME);
34 tbl->name[MAX_RM_TBL_NAME - 1] = 0;
36 tbl->bitmap = bitmap_new(tbl_sz);
39 qemu_mutex_init(&tbl->lock);
42 static inline void res_tbl_free(RdmaRmResTbl *tbl)
44 qemu_mutex_destroy(&tbl->lock);
46 bitmap_zero_extend(tbl->bitmap, tbl->tbl_sz, 0);
49 static inline void *res_tbl_get(RdmaRmResTbl *tbl, uint32_t handle)
51 pr_dbg("%s, handle=%d\n", tbl->name, handle);
53 if ((handle < tbl->tbl_sz) && (test_bit(handle, tbl->bitmap))) {
54 return tbl->tbl + handle * tbl->res_sz;
56 pr_dbg("Invalid handle %d\n", handle);
61 static inline void *res_tbl_alloc(RdmaRmResTbl *tbl, uint32_t *handle)
63 qemu_mutex_lock(&tbl->lock);
65 *handle = find_first_zero_bit(tbl->bitmap, tbl->tbl_sz);
66 if (*handle > tbl->tbl_sz) {
67 pr_dbg("Failed to alloc, bitmap is full\n");
68 qemu_mutex_unlock(&tbl->lock);
72 set_bit(*handle, tbl->bitmap);
74 qemu_mutex_unlock(&tbl->lock);
76 memset(tbl->tbl + *handle * tbl->res_sz, 0, tbl->res_sz);
78 pr_dbg("%s, handle=%d\n", tbl->name, *handle);
80 return tbl->tbl + *handle * tbl->res_sz;
83 static inline void res_tbl_dealloc(RdmaRmResTbl *tbl, uint32_t handle)
85 pr_dbg("%s, handle=%d\n", tbl->name, handle);
87 qemu_mutex_lock(&tbl->lock);
89 if (handle < tbl->tbl_sz) {
90 clear_bit(handle, tbl->bitmap);
93 qemu_mutex_unlock(&tbl->lock);
96 int rdma_rm_alloc_pd(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
97 uint32_t *pd_handle, uint32_t ctx_handle)
102 pd = res_tbl_alloc(&dev_res->pd_tbl, pd_handle);
107 ret = rdma_backend_create_pd(backend_dev, &pd->backend_pd);
110 goto out_tbl_dealloc;
113 pd->ctx_handle = ctx_handle;
118 res_tbl_dealloc(&dev_res->pd_tbl, *pd_handle);
124 RdmaRmPD *rdma_rm_get_pd(RdmaDeviceResources *dev_res, uint32_t pd_handle)
126 return res_tbl_get(&dev_res->pd_tbl, pd_handle);
129 void rdma_rm_dealloc_pd(RdmaDeviceResources *dev_res, uint32_t pd_handle)
131 RdmaRmPD *pd = rdma_rm_get_pd(dev_res, pd_handle);
134 rdma_backend_destroy_pd(&pd->backend_pd);
135 res_tbl_dealloc(&dev_res->pd_tbl, pd_handle);
139 int rdma_rm_alloc_mr(RdmaDeviceResources *dev_res, uint32_t pd_handle,
140 uint64_t guest_start, size_t guest_length, void *host_virt,
141 int access_flags, uint32_t *mr_handle, uint32_t *lkey,
148 pd = rdma_rm_get_pd(dev_res, pd_handle);
150 pr_dbg("Invalid PD\n");
154 mr = res_tbl_alloc(&dev_res->mr_tbl, mr_handle);
156 pr_dbg("Failed to allocate obj in table\n");
159 pr_dbg("mr_handle=%d\n", *mr_handle);
161 pr_dbg("host_virt=0x%p\n", host_virt);
162 pr_dbg("guest_start=0x%" PRIx64 "\n", guest_start);
163 pr_dbg("length=%zu\n", guest_length);
166 mr->virt = host_virt;
167 mr->start = guest_start;
168 mr->length = guest_length;
170 ret = rdma_backend_create_mr(&mr->backend_mr, &pd->backend_pd, mr->virt,
171 mr->length, access_flags);
173 pr_dbg("Fail in rdma_backend_create_mr, err=%d\n", ret);
179 /* We keep mr_handle in lkey so send and recv get get mr ptr */
183 mr->pd_handle = pd_handle;
188 res_tbl_dealloc(&dev_res->mr_tbl, *mr_handle);
193 RdmaRmMR *rdma_rm_get_mr(RdmaDeviceResources *dev_res, uint32_t mr_handle)
195 return res_tbl_get(&dev_res->mr_tbl, mr_handle);
198 void rdma_rm_dealloc_mr(RdmaDeviceResources *dev_res, uint32_t mr_handle)
200 RdmaRmMR *mr = rdma_rm_get_mr(dev_res, mr_handle);
203 rdma_backend_destroy_mr(&mr->backend_mr);
204 pr_dbg("start=0x%" PRIx64 "\n", mr->start);
206 munmap(mr->virt, mr->length);
208 res_tbl_dealloc(&dev_res->mr_tbl, mr_handle);
212 int rdma_rm_alloc_uc(RdmaDeviceResources *dev_res, uint32_t pfn,
217 /* TODO: Need to make sure pfn is between bar start address and
218 * bsd+RDMA_BAR2_UAR_SIZE
219 if (pfn > RDMA_BAR2_UAR_SIZE) {
220 pr_err("pfn out of range (%d > %d)\n", pfn, RDMA_BAR2_UAR_SIZE);
225 uc = res_tbl_alloc(&dev_res->uc_tbl, uc_handle);
233 RdmaRmUC *rdma_rm_get_uc(RdmaDeviceResources *dev_res, uint32_t uc_handle)
235 return res_tbl_get(&dev_res->uc_tbl, uc_handle);
238 void rdma_rm_dealloc_uc(RdmaDeviceResources *dev_res, uint32_t uc_handle)
240 RdmaRmUC *uc = rdma_rm_get_uc(dev_res, uc_handle);
243 res_tbl_dealloc(&dev_res->uc_tbl, uc_handle);
247 RdmaRmCQ *rdma_rm_get_cq(RdmaDeviceResources *dev_res, uint32_t cq_handle)
249 return res_tbl_get(&dev_res->cq_tbl, cq_handle);
252 int rdma_rm_alloc_cq(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
253 uint32_t cqe, uint32_t *cq_handle, void *opaque)
258 cq = res_tbl_alloc(&dev_res->cq_tbl, cq_handle);
266 rc = rdma_backend_create_cq(backend_dev, &cq->backend_cq, cqe);
275 rdma_rm_dealloc_cq(dev_res, *cq_handle);
280 void rdma_rm_req_notify_cq(RdmaDeviceResources *dev_res, uint32_t cq_handle,
285 pr_dbg("cq_handle=%d, notify=0x%x\n", cq_handle, notify);
287 cq = rdma_rm_get_cq(dev_res, cq_handle);
293 pr_dbg("notify=%d\n", cq->notify);
296 void rdma_rm_dealloc_cq(RdmaDeviceResources *dev_res, uint32_t cq_handle)
300 cq = rdma_rm_get_cq(dev_res, cq_handle);
305 rdma_backend_destroy_cq(&cq->backend_cq);
307 res_tbl_dealloc(&dev_res->cq_tbl, cq_handle);
310 RdmaRmQP *rdma_rm_get_qp(RdmaDeviceResources *dev_res, uint32_t qpn)
312 GBytes *key = g_bytes_new(&qpn, sizeof(qpn));
314 RdmaRmQP *qp = g_hash_table_lookup(dev_res->qp_hash, key);
321 int rdma_rm_alloc_qp(RdmaDeviceResources *dev_res, uint32_t pd_handle,
322 uint8_t qp_type, uint32_t max_send_wr,
323 uint32_t max_send_sge, uint32_t send_cq_handle,
324 uint32_t max_recv_wr, uint32_t max_recv_sge,
325 uint32_t recv_cq_handle, void *opaque, uint32_t *qpn)
333 pr_dbg("qp_type=%d\n", qp_type);
335 pd = rdma_rm_get_pd(dev_res, pd_handle);
337 pr_err("Invalid pd handle (%d)\n", pd_handle);
341 scq = rdma_rm_get_cq(dev_res, send_cq_handle);
342 rcq = rdma_rm_get_cq(dev_res, recv_cq_handle);
345 pr_err("Invalid send_cqn or recv_cqn (%d, %d)\n",
346 send_cq_handle, recv_cq_handle);
350 qp = res_tbl_alloc(&dev_res->qp_tbl, &rm_qpn);
354 pr_dbg("rm_qpn=%d\n", rm_qpn);
357 qp->qp_state = IBV_QPS_RESET;
358 qp->qp_type = qp_type;
359 qp->send_cq_handle = send_cq_handle;
360 qp->recv_cq_handle = recv_cq_handle;
363 rc = rdma_backend_create_qp(&qp->backend_qp, qp_type, &pd->backend_pd,
364 &scq->backend_cq, &rcq->backend_cq, max_send_wr,
365 max_recv_wr, max_send_sge, max_recv_sge);
371 *qpn = rdma_backend_qpn(&qp->backend_qp);
372 pr_dbg("rm_qpn=%d, backend_qpn=0x%x\n", rm_qpn, *qpn);
373 g_hash_table_insert(dev_res->qp_hash, g_bytes_new(qpn, sizeof(*qpn)), qp);
378 res_tbl_dealloc(&dev_res->qp_tbl, qp->qpn);
383 int rdma_rm_modify_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
384 uint32_t qp_handle, uint32_t attr_mask,
385 union ibv_gid *dgid, uint32_t dqpn,
386 enum ibv_qp_state qp_state, uint32_t qkey,
387 uint32_t rq_psn, uint32_t sq_psn)
392 pr_dbg("qpn=%d\n", qp_handle);
394 qp = rdma_rm_get_qp(dev_res, qp_handle);
399 pr_dbg("qp_type=%d\n", qp->qp_type);
400 pr_dbg("attr_mask=0x%x\n", attr_mask);
402 if (qp->qp_type == IBV_QPT_SMI) {
403 pr_dbg("QP0 unsupported\n");
405 } else if (qp->qp_type == IBV_QPT_GSI) {
410 if (attr_mask & IBV_QP_STATE) {
411 qp->qp_state = qp_state;
412 pr_dbg("qp_state=%d\n", qp->qp_state);
414 if (qp->qp_state == IBV_QPS_INIT) {
415 ret = rdma_backend_qp_state_init(backend_dev, &qp->backend_qp,
422 if (qp->qp_state == IBV_QPS_RTR) {
423 ret = rdma_backend_qp_state_rtr(backend_dev, &qp->backend_qp,
424 qp->qp_type, dgid, dqpn, rq_psn,
425 qkey, attr_mask & IBV_QP_QKEY);
431 if (qp->qp_state == IBV_QPS_RTS) {
432 ret = rdma_backend_qp_state_rts(&qp->backend_qp, qp->qp_type,
434 attr_mask & IBV_QP_QKEY);
444 int rdma_rm_query_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
445 uint32_t qp_handle, struct ibv_qp_attr *attr,
446 int attr_mask, struct ibv_qp_init_attr *init_attr)
450 pr_dbg("qpn=%d\n", qp_handle);
452 qp = rdma_rm_get_qp(dev_res, qp_handle);
457 pr_dbg("qp_type=%d\n", qp->qp_type);
459 return rdma_backend_query_qp(&qp->backend_qp, attr, attr_mask, init_attr);
462 void rdma_rm_dealloc_qp(RdmaDeviceResources *dev_res, uint32_t qp_handle)
467 key = g_bytes_new(&qp_handle, sizeof(qp_handle));
468 qp = g_hash_table_lookup(dev_res->qp_hash, key);
469 g_hash_table_remove(dev_res->qp_hash, key);
476 rdma_backend_destroy_qp(&qp->backend_qp);
478 res_tbl_dealloc(&dev_res->qp_tbl, qp->qpn);
481 void *rdma_rm_get_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id)
485 cqe_ctx = res_tbl_get(&dev_res->cqe_ctx_tbl, cqe_ctx_id);
490 pr_dbg("ctx=%p\n", *cqe_ctx);
495 int rdma_rm_alloc_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t *cqe_ctx_id,
500 cqe_ctx = res_tbl_alloc(&dev_res->cqe_ctx_tbl, cqe_ctx_id);
505 pr_dbg("ctx=%p\n", ctx);
511 void rdma_rm_dealloc_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id)
513 res_tbl_dealloc(&dev_res->cqe_ctx_tbl, cqe_ctx_id);
516 static void destroy_qp_hash_key(gpointer data)
521 int rdma_rm_init(RdmaDeviceResources *dev_res, struct ibv_device_attr *dev_attr,
524 dev_res->qp_hash = g_hash_table_new_full(g_bytes_hash, g_bytes_equal,
525 destroy_qp_hash_key, NULL);
526 if (!dev_res->qp_hash) {
530 res_tbl_init("PD", &dev_res->pd_tbl, dev_attr->max_pd, sizeof(RdmaRmPD));
531 res_tbl_init("CQ", &dev_res->cq_tbl, dev_attr->max_cq, sizeof(RdmaRmCQ));
532 res_tbl_init("MR", &dev_res->mr_tbl, dev_attr->max_mr, sizeof(RdmaRmMR));
533 res_tbl_init("QP", &dev_res->qp_tbl, dev_attr->max_qp, sizeof(RdmaRmQP));
534 res_tbl_init("CQE_CTX", &dev_res->cqe_ctx_tbl, dev_attr->max_qp *
535 dev_attr->max_qp_wr, sizeof(void *));
536 res_tbl_init("UC", &dev_res->uc_tbl, MAX_UCS, sizeof(RdmaRmUC));
541 void rdma_rm_fini(RdmaDeviceResources *dev_res)
543 res_tbl_free(&dev_res->uc_tbl);
544 res_tbl_free(&dev_res->cqe_ctx_tbl);
545 res_tbl_free(&dev_res->qp_tbl);
546 res_tbl_free(&dev_res->mr_tbl);
547 res_tbl_free(&dev_res->cq_tbl);
548 res_tbl_free(&dev_res->pd_tbl);
550 g_hash_table_destroy(dev_res->qp_hash);