]> Git Repo - J-linux.git/blob - drivers/infiniband/hw/erdma/erdma_verbs.c
Merge tag 'amd-drm-next-6.5-2023-06-09' of https://gitlab.freedesktop.org/agd5f/linux...
[J-linux.git] / drivers / infiniband / hw / erdma / erdma_verbs.c
1 // SPDX-License-Identifier: GPL-2.0
2
3 /* Authors: Cheng Xu <[email protected]> */
4 /*          Kai Shen <[email protected]> */
5 /* Copyright (c) 2020-2022, Alibaba Group. */
6
7 /* Authors: Bernard Metzler <[email protected]> */
8 /* Copyright (c) 2008-2019, IBM Corporation */
9
10 /* Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. */
11
12 #include <linux/vmalloc.h>
13 #include <net/addrconf.h>
14 #include <rdma/erdma-abi.h>
15 #include <rdma/ib_umem.h>
16 #include <rdma/uverbs_ioctl.h>
17
18 #include "erdma.h"
19 #include "erdma_cm.h"
20 #include "erdma_verbs.h"
21
22 static int create_qp_cmd(struct erdma_dev *dev, struct erdma_qp *qp)
23 {
24         struct erdma_cmdq_create_qp_req req;
25         struct erdma_pd *pd = to_epd(qp->ibqp.pd);
26         struct erdma_uqp *user_qp;
27         u64 resp0, resp1;
28         int err;
29
30         erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
31                                 CMDQ_OPCODE_CREATE_QP);
32
33         req.cfg0 = FIELD_PREP(ERDMA_CMD_CREATE_QP_SQ_DEPTH_MASK,
34                               ilog2(qp->attrs.sq_size)) |
35                    FIELD_PREP(ERDMA_CMD_CREATE_QP_QPN_MASK, QP_ID(qp));
36         req.cfg1 = FIELD_PREP(ERDMA_CMD_CREATE_QP_RQ_DEPTH_MASK,
37                               ilog2(qp->attrs.rq_size)) |
38                    FIELD_PREP(ERDMA_CMD_CREATE_QP_PD_MASK, pd->pdn);
39
40         if (rdma_is_kernel_res(&qp->ibqp.res)) {
41                 u32 pgsz_range = ilog2(SZ_1M) - ERDMA_HW_PAGE_SHIFT;
42
43                 req.sq_cqn_mtt_cfg =
44                         FIELD_PREP(ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK,
45                                    pgsz_range) |
46                         FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->scq->cqn);
47                 req.rq_cqn_mtt_cfg =
48                         FIELD_PREP(ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK,
49                                    pgsz_range) |
50                         FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->rcq->cqn);
51
52                 req.sq_mtt_cfg =
53                         FIELD_PREP(ERDMA_CMD_CREATE_QP_PAGE_OFFSET_MASK, 0) |
54                         FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_CNT_MASK, 1) |
55                         FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_TYPE_MASK,
56                                    ERDMA_MR_INLINE_MTT);
57                 req.rq_mtt_cfg = req.sq_mtt_cfg;
58
59                 req.rq_buf_addr = qp->kern_qp.rq_buf_dma_addr;
60                 req.sq_buf_addr = qp->kern_qp.sq_buf_dma_addr;
61                 req.sq_db_info_dma_addr = qp->kern_qp.sq_buf_dma_addr +
62                                           (qp->attrs.sq_size << SQEBB_SHIFT);
63                 req.rq_db_info_dma_addr = qp->kern_qp.rq_buf_dma_addr +
64                                           (qp->attrs.rq_size << RQE_SHIFT);
65         } else {
66                 user_qp = &qp->user_qp;
67                 req.sq_cqn_mtt_cfg = FIELD_PREP(
68                         ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK,
69                         ilog2(user_qp->sq_mtt.page_size) - ERDMA_HW_PAGE_SHIFT);
70                 req.sq_cqn_mtt_cfg |=
71                         FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->scq->cqn);
72
73                 req.rq_cqn_mtt_cfg = FIELD_PREP(
74                         ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK,
75                         ilog2(user_qp->rq_mtt.page_size) - ERDMA_HW_PAGE_SHIFT);
76                 req.rq_cqn_mtt_cfg |=
77                         FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->rcq->cqn);
78
79                 req.sq_mtt_cfg = user_qp->sq_mtt.page_offset;
80                 req.sq_mtt_cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_CNT_MASK,
81                                              user_qp->sq_mtt.mtt_nents) |
82                                   FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_TYPE_MASK,
83                                              user_qp->sq_mtt.mtt_type);
84
85                 req.rq_mtt_cfg = user_qp->rq_mtt.page_offset;
86                 req.rq_mtt_cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_CNT_MASK,
87                                              user_qp->rq_mtt.mtt_nents) |
88                                   FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_TYPE_MASK,
89                                              user_qp->rq_mtt.mtt_type);
90
91                 req.sq_buf_addr = user_qp->sq_mtt.mtt_entry[0];
92                 req.rq_buf_addr = user_qp->rq_mtt.mtt_entry[0];
93
94                 req.sq_db_info_dma_addr = user_qp->sq_db_info_dma_addr;
95                 req.rq_db_info_dma_addr = user_qp->rq_db_info_dma_addr;
96         }
97
98         err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), &resp0,
99                                   &resp1);
100         if (!err)
101                 qp->attrs.cookie =
102                         FIELD_GET(ERDMA_CMDQ_CREATE_QP_RESP_COOKIE_MASK, resp0);
103
104         return err;
105 }
106
107 static int regmr_cmd(struct erdma_dev *dev, struct erdma_mr *mr)
108 {
109         struct erdma_cmdq_reg_mr_req req;
110         struct erdma_pd *pd = to_epd(mr->ibmr.pd);
111         u64 *phy_addr;
112         int i;
113
114         erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA, CMDQ_OPCODE_REG_MR);
115
116         req.cfg0 = FIELD_PREP(ERDMA_CMD_MR_VALID_MASK, mr->valid) |
117                    FIELD_PREP(ERDMA_CMD_MR_KEY_MASK, mr->ibmr.lkey & 0xFF) |
118                    FIELD_PREP(ERDMA_CMD_MR_MPT_IDX_MASK, mr->ibmr.lkey >> 8);
119         req.cfg1 = FIELD_PREP(ERDMA_CMD_REGMR_PD_MASK, pd->pdn) |
120                    FIELD_PREP(ERDMA_CMD_REGMR_TYPE_MASK, mr->type) |
121                    FIELD_PREP(ERDMA_CMD_REGMR_RIGHT_MASK, mr->access);
122         req.cfg2 = FIELD_PREP(ERDMA_CMD_REGMR_PAGESIZE_MASK,
123                               ilog2(mr->mem.page_size)) |
124                    FIELD_PREP(ERDMA_CMD_REGMR_MTT_TYPE_MASK, mr->mem.mtt_type) |
125                    FIELD_PREP(ERDMA_CMD_REGMR_MTT_CNT_MASK, mr->mem.page_cnt);
126
127         if (mr->type == ERDMA_MR_TYPE_DMA)
128                 goto post_cmd;
129
130         if (mr->type == ERDMA_MR_TYPE_NORMAL) {
131                 req.start_va = mr->mem.va;
132                 req.size = mr->mem.len;
133         }
134
135         if (mr->type == ERDMA_MR_TYPE_FRMR ||
136             mr->mem.mtt_type == ERDMA_MR_INDIRECT_MTT) {
137                 phy_addr = req.phy_addr;
138                 *phy_addr = mr->mem.mtt_entry[0];
139         } else {
140                 phy_addr = req.phy_addr;
141                 for (i = 0; i < mr->mem.mtt_nents; i++)
142                         *phy_addr++ = mr->mem.mtt_entry[i];
143         }
144
145 post_cmd:
146         return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
147 }
148
149 static int create_cq_cmd(struct erdma_dev *dev, struct erdma_cq *cq)
150 {
151         struct erdma_cmdq_create_cq_req req;
152         u32 page_size;
153         struct erdma_mem *mtt;
154
155         erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
156                                 CMDQ_OPCODE_CREATE_CQ);
157
158         req.cfg0 = FIELD_PREP(ERDMA_CMD_CREATE_CQ_CQN_MASK, cq->cqn) |
159                    FIELD_PREP(ERDMA_CMD_CREATE_CQ_DEPTH_MASK, ilog2(cq->depth));
160         req.cfg1 = FIELD_PREP(ERDMA_CMD_CREATE_CQ_EQN_MASK, cq->assoc_eqn);
161
162         if (rdma_is_kernel_res(&cq->ibcq.res)) {
163                 page_size = SZ_32M;
164                 req.cfg0 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_PAGESIZE_MASK,
165                                        ilog2(page_size) - ERDMA_HW_PAGE_SHIFT);
166                 req.qbuf_addr_l = lower_32_bits(cq->kern_cq.qbuf_dma_addr);
167                 req.qbuf_addr_h = upper_32_bits(cq->kern_cq.qbuf_dma_addr);
168
169                 req.cfg1 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_CNT_MASK, 1) |
170                             FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_TYPE_MASK,
171                                        ERDMA_MR_INLINE_MTT);
172
173                 req.first_page_offset = 0;
174                 req.cq_db_info_addr =
175                         cq->kern_cq.qbuf_dma_addr + (cq->depth << CQE_SHIFT);
176         } else {
177                 mtt = &cq->user_cq.qbuf_mtt;
178                 req.cfg0 |=
179                         FIELD_PREP(ERDMA_CMD_CREATE_CQ_PAGESIZE_MASK,
180                                    ilog2(mtt->page_size) - ERDMA_HW_PAGE_SHIFT);
181                 if (mtt->mtt_nents == 1) {
182                         req.qbuf_addr_l = lower_32_bits(*(u64 *)mtt->mtt_buf);
183                         req.qbuf_addr_h = upper_32_bits(*(u64 *)mtt->mtt_buf);
184                 } else {
185                         req.qbuf_addr_l = lower_32_bits(mtt->mtt_entry[0]);
186                         req.qbuf_addr_h = upper_32_bits(mtt->mtt_entry[0]);
187                 }
188                 req.cfg1 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_CNT_MASK,
189                                        mtt->mtt_nents);
190                 req.cfg1 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_TYPE_MASK,
191                                        mtt->mtt_type);
192
193                 req.first_page_offset = mtt->page_offset;
194                 req.cq_db_info_addr = cq->user_cq.db_info_dma_addr;
195         }
196
197         return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
198 }
199
200 static int erdma_alloc_idx(struct erdma_resource_cb *res_cb)
201 {
202         int idx;
203         unsigned long flags;
204
205         spin_lock_irqsave(&res_cb->lock, flags);
206         idx = find_next_zero_bit(res_cb->bitmap, res_cb->max_cap,
207                                  res_cb->next_alloc_idx);
208         if (idx == res_cb->max_cap) {
209                 idx = find_first_zero_bit(res_cb->bitmap, res_cb->max_cap);
210                 if (idx == res_cb->max_cap) {
211                         res_cb->next_alloc_idx = 1;
212                         spin_unlock_irqrestore(&res_cb->lock, flags);
213                         return -ENOSPC;
214                 }
215         }
216
217         set_bit(idx, res_cb->bitmap);
218         res_cb->next_alloc_idx = idx + 1;
219         spin_unlock_irqrestore(&res_cb->lock, flags);
220
221         return idx;
222 }
223
224 static inline void erdma_free_idx(struct erdma_resource_cb *res_cb, u32 idx)
225 {
226         unsigned long flags;
227         u32 used;
228
229         spin_lock_irqsave(&res_cb->lock, flags);
230         used = __test_and_clear_bit(idx, res_cb->bitmap);
231         spin_unlock_irqrestore(&res_cb->lock, flags);
232         WARN_ON(!used);
233 }
234
235 static struct rdma_user_mmap_entry *
236 erdma_user_mmap_entry_insert(struct erdma_ucontext *uctx, void *address,
237                              u32 size, u8 mmap_flag, u64 *mmap_offset)
238 {
239         struct erdma_user_mmap_entry *entry =
240                 kzalloc(sizeof(*entry), GFP_KERNEL);
241         int ret;
242
243         if (!entry)
244                 return NULL;
245
246         entry->address = (u64)address;
247         entry->mmap_flag = mmap_flag;
248
249         size = PAGE_ALIGN(size);
250
251         ret = rdma_user_mmap_entry_insert(&uctx->ibucontext, &entry->rdma_entry,
252                                           size);
253         if (ret) {
254                 kfree(entry);
255                 return NULL;
256         }
257
258         *mmap_offset = rdma_user_mmap_get_offset(&entry->rdma_entry);
259
260         return &entry->rdma_entry;
261 }
262
263 int erdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr,
264                        struct ib_udata *unused)
265 {
266         struct erdma_dev *dev = to_edev(ibdev);
267
268         memset(attr, 0, sizeof(*attr));
269
270         attr->max_mr_size = dev->attrs.max_mr_size;
271         attr->vendor_id = PCI_VENDOR_ID_ALIBABA;
272         attr->vendor_part_id = dev->pdev->device;
273         attr->hw_ver = dev->pdev->revision;
274         attr->max_qp = dev->attrs.max_qp - 1;
275         attr->max_qp_wr = min(dev->attrs.max_send_wr, dev->attrs.max_recv_wr);
276         attr->max_qp_rd_atom = dev->attrs.max_ord;
277         attr->max_qp_init_rd_atom = dev->attrs.max_ird;
278         attr->max_res_rd_atom = dev->attrs.max_qp * dev->attrs.max_ird;
279         attr->device_cap_flags = IB_DEVICE_MEM_MGT_EXTENSIONS;
280         attr->kernel_cap_flags = IBK_LOCAL_DMA_LKEY;
281         ibdev->local_dma_lkey = dev->attrs.local_dma_key;
282         attr->max_send_sge = dev->attrs.max_send_sge;
283         attr->max_recv_sge = dev->attrs.max_recv_sge;
284         attr->max_sge_rd = dev->attrs.max_sge_rd;
285         attr->max_cq = dev->attrs.max_cq - 1;
286         attr->max_cqe = dev->attrs.max_cqe;
287         attr->max_mr = dev->attrs.max_mr;
288         attr->max_pd = dev->attrs.max_pd;
289         attr->max_mw = dev->attrs.max_mw;
290         attr->max_fast_reg_page_list_len = ERDMA_MAX_FRMR_PA;
291         attr->page_size_cap = ERDMA_PAGE_SIZE_SUPPORT;
292
293         if (dev->attrs.cap_flags & ERDMA_DEV_CAP_FLAGS_ATOMIC)
294                 attr->atomic_cap = IB_ATOMIC_GLOB;
295
296         attr->fw_ver = dev->attrs.fw_version;
297
298         if (dev->netdev)
299                 addrconf_addr_eui48((u8 *)&attr->sys_image_guid,
300                                     dev->netdev->dev_addr);
301
302         return 0;
303 }
304
305 int erdma_query_gid(struct ib_device *ibdev, u32 port, int idx,
306                     union ib_gid *gid)
307 {
308         struct erdma_dev *dev = to_edev(ibdev);
309
310         memset(gid, 0, sizeof(*gid));
311         ether_addr_copy(gid->raw, dev->attrs.peer_addr);
312
313         return 0;
314 }
315
316 int erdma_query_port(struct ib_device *ibdev, u32 port,
317                      struct ib_port_attr *attr)
318 {
319         struct erdma_dev *dev = to_edev(ibdev);
320         struct net_device *ndev = dev->netdev;
321
322         memset(attr, 0, sizeof(*attr));
323
324         attr->gid_tbl_len = 1;
325         attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_DEVICE_MGMT_SUP;
326         attr->max_msg_sz = -1;
327
328         if (!ndev)
329                 goto out;
330
331         ib_get_eth_speed(ibdev, port, &attr->active_speed, &attr->active_width);
332         attr->max_mtu = ib_mtu_int_to_enum(ndev->mtu);
333         attr->active_mtu = ib_mtu_int_to_enum(ndev->mtu);
334         if (netif_running(ndev) && netif_carrier_ok(ndev))
335                 dev->state = IB_PORT_ACTIVE;
336         else
337                 dev->state = IB_PORT_DOWN;
338         attr->state = dev->state;
339
340 out:
341         if (dev->state == IB_PORT_ACTIVE)
342                 attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
343         else
344                 attr->phys_state = IB_PORT_PHYS_STATE_DISABLED;
345
346         return 0;
347 }
348
349 int erdma_get_port_immutable(struct ib_device *ibdev, u32 port,
350                              struct ib_port_immutable *port_immutable)
351 {
352         port_immutable->gid_tbl_len = 1;
353         port_immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
354
355         return 0;
356 }
357
358 int erdma_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
359 {
360         struct erdma_pd *pd = to_epd(ibpd);
361         struct erdma_dev *dev = to_edev(ibpd->device);
362         int pdn;
363
364         pdn = erdma_alloc_idx(&dev->res_cb[ERDMA_RES_TYPE_PD]);
365         if (pdn < 0)
366                 return pdn;
367
368         pd->pdn = pdn;
369
370         return 0;
371 }
372
373 int erdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
374 {
375         struct erdma_pd *pd = to_epd(ibpd);
376         struct erdma_dev *dev = to_edev(ibpd->device);
377
378         erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_PD], pd->pdn);
379
380         return 0;
381 }
382
383 static void erdma_flush_worker(struct work_struct *work)
384 {
385         struct delayed_work *dwork = to_delayed_work(work);
386         struct erdma_qp *qp =
387                 container_of(dwork, struct erdma_qp, reflush_dwork);
388         struct erdma_cmdq_reflush_req req;
389
390         erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
391                                 CMDQ_OPCODE_REFLUSH);
392         req.qpn = QP_ID(qp);
393         req.sq_pi = qp->kern_qp.sq_pi;
394         req.rq_pi = qp->kern_qp.rq_pi;
395         erdma_post_cmd_wait(&qp->dev->cmdq, &req, sizeof(req), NULL, NULL);
396 }
397
398 static int erdma_qp_validate_cap(struct erdma_dev *dev,
399                                  struct ib_qp_init_attr *attrs)
400 {
401         if ((attrs->cap.max_send_wr > dev->attrs.max_send_wr) ||
402             (attrs->cap.max_recv_wr > dev->attrs.max_recv_wr) ||
403             (attrs->cap.max_send_sge > dev->attrs.max_send_sge) ||
404             (attrs->cap.max_recv_sge > dev->attrs.max_recv_sge) ||
405             (attrs->cap.max_inline_data > ERDMA_MAX_INLINE) ||
406             !attrs->cap.max_send_wr || !attrs->cap.max_recv_wr) {
407                 return -EINVAL;
408         }
409
410         return 0;
411 }
412
413 static int erdma_qp_validate_attr(struct erdma_dev *dev,
414                                   struct ib_qp_init_attr *attrs)
415 {
416         if (attrs->qp_type != IB_QPT_RC)
417                 return -EOPNOTSUPP;
418
419         if (attrs->srq)
420                 return -EOPNOTSUPP;
421
422         if (!attrs->send_cq || !attrs->recv_cq)
423                 return -EOPNOTSUPP;
424
425         return 0;
426 }
427
428 static void free_kernel_qp(struct erdma_qp *qp)
429 {
430         struct erdma_dev *dev = qp->dev;
431
432         vfree(qp->kern_qp.swr_tbl);
433         vfree(qp->kern_qp.rwr_tbl);
434
435         if (qp->kern_qp.sq_buf)
436                 dma_free_coherent(
437                         &dev->pdev->dev,
438                         WARPPED_BUFSIZE(qp->attrs.sq_size << SQEBB_SHIFT),
439                         qp->kern_qp.sq_buf, qp->kern_qp.sq_buf_dma_addr);
440
441         if (qp->kern_qp.rq_buf)
442                 dma_free_coherent(
443                         &dev->pdev->dev,
444                         WARPPED_BUFSIZE(qp->attrs.rq_size << RQE_SHIFT),
445                         qp->kern_qp.rq_buf, qp->kern_qp.rq_buf_dma_addr);
446 }
447
448 static int init_kernel_qp(struct erdma_dev *dev, struct erdma_qp *qp,
449                           struct ib_qp_init_attr *attrs)
450 {
451         struct erdma_kqp *kqp = &qp->kern_qp;
452         int size;
453
454         if (attrs->sq_sig_type == IB_SIGNAL_ALL_WR)
455                 kqp->sig_all = 1;
456
457         kqp->sq_pi = 0;
458         kqp->sq_ci = 0;
459         kqp->rq_pi = 0;
460         kqp->rq_ci = 0;
461         kqp->hw_sq_db =
462                 dev->func_bar + (ERDMA_SDB_SHARED_PAGE_INDEX << PAGE_SHIFT);
463         kqp->hw_rq_db = dev->func_bar + ERDMA_BAR_RQDB_SPACE_OFFSET;
464
465         kqp->swr_tbl = vmalloc(qp->attrs.sq_size * sizeof(u64));
466         kqp->rwr_tbl = vmalloc(qp->attrs.rq_size * sizeof(u64));
467         if (!kqp->swr_tbl || !kqp->rwr_tbl)
468                 goto err_out;
469
470         size = (qp->attrs.sq_size << SQEBB_SHIFT) + ERDMA_EXTRA_BUFFER_SIZE;
471         kqp->sq_buf = dma_alloc_coherent(&dev->pdev->dev, size,
472                                          &kqp->sq_buf_dma_addr, GFP_KERNEL);
473         if (!kqp->sq_buf)
474                 goto err_out;
475
476         size = (qp->attrs.rq_size << RQE_SHIFT) + ERDMA_EXTRA_BUFFER_SIZE;
477         kqp->rq_buf = dma_alloc_coherent(&dev->pdev->dev, size,
478                                          &kqp->rq_buf_dma_addr, GFP_KERNEL);
479         if (!kqp->rq_buf)
480                 goto err_out;
481
482         kqp->sq_db_info = kqp->sq_buf + (qp->attrs.sq_size << SQEBB_SHIFT);
483         kqp->rq_db_info = kqp->rq_buf + (qp->attrs.rq_size << RQE_SHIFT);
484
485         return 0;
486
487 err_out:
488         free_kernel_qp(qp);
489         return -ENOMEM;
490 }
491
492 static int get_mtt_entries(struct erdma_dev *dev, struct erdma_mem *mem,
493                            u64 start, u64 len, int access, u64 virt,
494                            unsigned long req_page_size, u8 force_indirect_mtt)
495 {
496         struct ib_block_iter biter;
497         uint64_t *phy_addr = NULL;
498         int ret = 0;
499
500         mem->umem = ib_umem_get(&dev->ibdev, start, len, access);
501         if (IS_ERR(mem->umem)) {
502                 ret = PTR_ERR(mem->umem);
503                 mem->umem = NULL;
504                 return ret;
505         }
506
507         mem->va = virt;
508         mem->len = len;
509         mem->page_size = ib_umem_find_best_pgsz(mem->umem, req_page_size, virt);
510         mem->page_offset = start & (mem->page_size - 1);
511         mem->mtt_nents = ib_umem_num_dma_blocks(mem->umem, mem->page_size);
512         mem->page_cnt = mem->mtt_nents;
513
514         if (mem->page_cnt > ERDMA_MAX_INLINE_MTT_ENTRIES ||
515             force_indirect_mtt) {
516                 mem->mtt_type = ERDMA_MR_INDIRECT_MTT;
517                 mem->mtt_buf =
518                         alloc_pages_exact(MTT_SIZE(mem->page_cnt), GFP_KERNEL);
519                 if (!mem->mtt_buf) {
520                         ret = -ENOMEM;
521                         goto error_ret;
522                 }
523                 phy_addr = mem->mtt_buf;
524         } else {
525                 mem->mtt_type = ERDMA_MR_INLINE_MTT;
526                 phy_addr = mem->mtt_entry;
527         }
528
529         rdma_umem_for_each_dma_block(mem->umem, &biter, mem->page_size) {
530                 *phy_addr = rdma_block_iter_dma_address(&biter);
531                 phy_addr++;
532         }
533
534         if (mem->mtt_type == ERDMA_MR_INDIRECT_MTT) {
535                 mem->mtt_entry[0] =
536                         dma_map_single(&dev->pdev->dev, mem->mtt_buf,
537                                        MTT_SIZE(mem->page_cnt), DMA_TO_DEVICE);
538                 if (dma_mapping_error(&dev->pdev->dev, mem->mtt_entry[0])) {
539                         free_pages_exact(mem->mtt_buf, MTT_SIZE(mem->page_cnt));
540                         mem->mtt_buf = NULL;
541                         ret = -ENOMEM;
542                         goto error_ret;
543                 }
544         }
545
546         return 0;
547
548 error_ret:
549         if (mem->umem) {
550                 ib_umem_release(mem->umem);
551                 mem->umem = NULL;
552         }
553
554         return ret;
555 }
556
557 static void put_mtt_entries(struct erdma_dev *dev, struct erdma_mem *mem)
558 {
559         if (mem->mtt_buf) {
560                 dma_unmap_single(&dev->pdev->dev, mem->mtt_entry[0],
561                                  MTT_SIZE(mem->page_cnt), DMA_TO_DEVICE);
562                 free_pages_exact(mem->mtt_buf, MTT_SIZE(mem->page_cnt));
563         }
564
565         if (mem->umem) {
566                 ib_umem_release(mem->umem);
567                 mem->umem = NULL;
568         }
569 }
570
571 static int erdma_map_user_dbrecords(struct erdma_ucontext *ctx,
572                                     u64 dbrecords_va,
573                                     struct erdma_user_dbrecords_page **dbr_page,
574                                     dma_addr_t *dma_addr)
575 {
576         struct erdma_user_dbrecords_page *page = NULL;
577         int rv = 0;
578
579         mutex_lock(&ctx->dbrecords_page_mutex);
580
581         list_for_each_entry(page, &ctx->dbrecords_page_list, list)
582                 if (page->va == (dbrecords_va & PAGE_MASK))
583                         goto found;
584
585         page = kmalloc(sizeof(*page), GFP_KERNEL);
586         if (!page) {
587                 rv = -ENOMEM;
588                 goto out;
589         }
590
591         page->va = (dbrecords_va & PAGE_MASK);
592         page->refcnt = 0;
593
594         page->umem = ib_umem_get(ctx->ibucontext.device,
595                                  dbrecords_va & PAGE_MASK, PAGE_SIZE, 0);
596         if (IS_ERR(page->umem)) {
597                 rv = PTR_ERR(page->umem);
598                 kfree(page);
599                 goto out;
600         }
601
602         list_add(&page->list, &ctx->dbrecords_page_list);
603
604 found:
605         *dma_addr = sg_dma_address(page->umem->sgt_append.sgt.sgl) +
606                     (dbrecords_va & ~PAGE_MASK);
607         *dbr_page = page;
608         page->refcnt++;
609
610 out:
611         mutex_unlock(&ctx->dbrecords_page_mutex);
612         return rv;
613 }
614
615 static void
616 erdma_unmap_user_dbrecords(struct erdma_ucontext *ctx,
617                            struct erdma_user_dbrecords_page **dbr_page)
618 {
619         if (!ctx || !(*dbr_page))
620                 return;
621
622         mutex_lock(&ctx->dbrecords_page_mutex);
623         if (--(*dbr_page)->refcnt == 0) {
624                 list_del(&(*dbr_page)->list);
625                 ib_umem_release((*dbr_page)->umem);
626                 kfree(*dbr_page);
627         }
628
629         *dbr_page = NULL;
630         mutex_unlock(&ctx->dbrecords_page_mutex);
631 }
632
633 static int init_user_qp(struct erdma_qp *qp, struct erdma_ucontext *uctx,
634                         u64 va, u32 len, u64 db_info_va)
635 {
636         dma_addr_t db_info_dma_addr;
637         u32 rq_offset;
638         int ret;
639
640         if (len < (ALIGN(qp->attrs.sq_size * SQEBB_SIZE, ERDMA_HW_PAGE_SIZE) +
641                    qp->attrs.rq_size * RQE_SIZE))
642                 return -EINVAL;
643
644         ret = get_mtt_entries(qp->dev, &qp->user_qp.sq_mtt, va,
645                               qp->attrs.sq_size << SQEBB_SHIFT, 0, va,
646                               (SZ_1M - SZ_4K), 1);
647         if (ret)
648                 return ret;
649
650         rq_offset = ALIGN(qp->attrs.sq_size << SQEBB_SHIFT, ERDMA_HW_PAGE_SIZE);
651         qp->user_qp.rq_offset = rq_offset;
652
653         ret = get_mtt_entries(qp->dev, &qp->user_qp.rq_mtt, va + rq_offset,
654                               qp->attrs.rq_size << RQE_SHIFT, 0, va + rq_offset,
655                               (SZ_1M - SZ_4K), 1);
656         if (ret)
657                 goto put_sq_mtt;
658
659         ret = erdma_map_user_dbrecords(uctx, db_info_va,
660                                        &qp->user_qp.user_dbr_page,
661                                        &db_info_dma_addr);
662         if (ret)
663                 goto put_rq_mtt;
664
665         qp->user_qp.sq_db_info_dma_addr = db_info_dma_addr;
666         qp->user_qp.rq_db_info_dma_addr = db_info_dma_addr + ERDMA_DB_SIZE;
667
668         return 0;
669
670 put_rq_mtt:
671         put_mtt_entries(qp->dev, &qp->user_qp.rq_mtt);
672
673 put_sq_mtt:
674         put_mtt_entries(qp->dev, &qp->user_qp.sq_mtt);
675
676         return ret;
677 }
678
679 static void free_user_qp(struct erdma_qp *qp, struct erdma_ucontext *uctx)
680 {
681         put_mtt_entries(qp->dev, &qp->user_qp.sq_mtt);
682         put_mtt_entries(qp->dev, &qp->user_qp.rq_mtt);
683         erdma_unmap_user_dbrecords(uctx, &qp->user_qp.user_dbr_page);
684 }
685
686 int erdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs,
687                     struct ib_udata *udata)
688 {
689         struct erdma_qp *qp = to_eqp(ibqp);
690         struct erdma_dev *dev = to_edev(ibqp->device);
691         struct erdma_ucontext *uctx = rdma_udata_to_drv_context(
692                 udata, struct erdma_ucontext, ibucontext);
693         struct erdma_ureq_create_qp ureq;
694         struct erdma_uresp_create_qp uresp;
695         int ret;
696
697         ret = erdma_qp_validate_cap(dev, attrs);
698         if (ret)
699                 goto err_out;
700
701         ret = erdma_qp_validate_attr(dev, attrs);
702         if (ret)
703                 goto err_out;
704
705         qp->scq = to_ecq(attrs->send_cq);
706         qp->rcq = to_ecq(attrs->recv_cq);
707         qp->dev = dev;
708         qp->attrs.cc = dev->attrs.cc;
709
710         init_rwsem(&qp->state_lock);
711         kref_init(&qp->ref);
712         init_completion(&qp->safe_free);
713
714         ret = xa_alloc_cyclic(&dev->qp_xa, &qp->ibqp.qp_num, qp,
715                               XA_LIMIT(1, dev->attrs.max_qp - 1),
716                               &dev->next_alloc_qpn, GFP_KERNEL);
717         if (ret < 0) {
718                 ret = -ENOMEM;
719                 goto err_out;
720         }
721
722         qp->attrs.sq_size = roundup_pow_of_two(attrs->cap.max_send_wr *
723                                                ERDMA_MAX_WQEBB_PER_SQE);
724         qp->attrs.rq_size = roundup_pow_of_two(attrs->cap.max_recv_wr);
725
726         if (uctx) {
727                 ret = ib_copy_from_udata(&ureq, udata,
728                                          min(sizeof(ureq), udata->inlen));
729                 if (ret)
730                         goto err_out_xa;
731
732                 ret = init_user_qp(qp, uctx, ureq.qbuf_va, ureq.qbuf_len,
733                                    ureq.db_record_va);
734                 if (ret)
735                         goto err_out_xa;
736
737                 memset(&uresp, 0, sizeof(uresp));
738
739                 uresp.num_sqe = qp->attrs.sq_size;
740                 uresp.num_rqe = qp->attrs.rq_size;
741                 uresp.qp_id = QP_ID(qp);
742                 uresp.rq_offset = qp->user_qp.rq_offset;
743
744                 ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
745                 if (ret)
746                         goto err_out_cmd;
747         } else {
748                 init_kernel_qp(dev, qp, attrs);
749         }
750
751         qp->attrs.max_send_sge = attrs->cap.max_send_sge;
752         qp->attrs.max_recv_sge = attrs->cap.max_recv_sge;
753         qp->attrs.state = ERDMA_QP_STATE_IDLE;
754         INIT_DELAYED_WORK(&qp->reflush_dwork, erdma_flush_worker);
755
756         ret = create_qp_cmd(dev, qp);
757         if (ret)
758                 goto err_out_cmd;
759
760         spin_lock_init(&qp->lock);
761
762         return 0;
763
764 err_out_cmd:
765         if (uctx)
766                 free_user_qp(qp, uctx);
767         else
768                 free_kernel_qp(qp);
769 err_out_xa:
770         xa_erase(&dev->qp_xa, QP_ID(qp));
771 err_out:
772         return ret;
773 }
774
775 static int erdma_create_stag(struct erdma_dev *dev, u32 *stag)
776 {
777         int stag_idx;
778
779         stag_idx = erdma_alloc_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX]);
780         if (stag_idx < 0)
781                 return stag_idx;
782
783         /* For now, we always let key field be zero. */
784         *stag = (stag_idx << 8);
785
786         return 0;
787 }
788
789 struct ib_mr *erdma_get_dma_mr(struct ib_pd *ibpd, int acc)
790 {
791         struct erdma_dev *dev = to_edev(ibpd->device);
792         struct erdma_mr *mr;
793         u32 stag;
794         int ret;
795
796         mr = kzalloc(sizeof(*mr), GFP_KERNEL);
797         if (!mr)
798                 return ERR_PTR(-ENOMEM);
799
800         ret = erdma_create_stag(dev, &stag);
801         if (ret)
802                 goto out_free;
803
804         mr->type = ERDMA_MR_TYPE_DMA;
805
806         mr->ibmr.lkey = stag;
807         mr->ibmr.rkey = stag;
808         mr->ibmr.pd = ibpd;
809         mr->access = ERDMA_MR_ACC_LR | to_erdma_access_flags(acc);
810         ret = regmr_cmd(dev, mr);
811         if (ret)
812                 goto out_remove_stag;
813
814         return &mr->ibmr;
815
816 out_remove_stag:
817         erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX],
818                        mr->ibmr.lkey >> 8);
819
820 out_free:
821         kfree(mr);
822
823         return ERR_PTR(ret);
824 }
825
826 struct ib_mr *erdma_ib_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
827                                 u32 max_num_sg)
828 {
829         struct erdma_mr *mr;
830         struct erdma_dev *dev = to_edev(ibpd->device);
831         int ret;
832         u32 stag;
833
834         if (mr_type != IB_MR_TYPE_MEM_REG)
835                 return ERR_PTR(-EOPNOTSUPP);
836
837         if (max_num_sg > ERDMA_MR_MAX_MTT_CNT)
838                 return ERR_PTR(-EINVAL);
839
840         mr = kzalloc(sizeof(*mr), GFP_KERNEL);
841         if (!mr)
842                 return ERR_PTR(-ENOMEM);
843
844         ret = erdma_create_stag(dev, &stag);
845         if (ret)
846                 goto out_free;
847
848         mr->type = ERDMA_MR_TYPE_FRMR;
849
850         mr->ibmr.lkey = stag;
851         mr->ibmr.rkey = stag;
852         mr->ibmr.pd = ibpd;
853         /* update it in FRMR. */
854         mr->access = ERDMA_MR_ACC_LR | ERDMA_MR_ACC_LW | ERDMA_MR_ACC_RR |
855                      ERDMA_MR_ACC_RW;
856
857         mr->mem.page_size = PAGE_SIZE; /* update it later. */
858         mr->mem.page_cnt = max_num_sg;
859         mr->mem.mtt_type = ERDMA_MR_INDIRECT_MTT;
860         mr->mem.mtt_buf =
861                 alloc_pages_exact(MTT_SIZE(mr->mem.page_cnt), GFP_KERNEL);
862         if (!mr->mem.mtt_buf) {
863                 ret = -ENOMEM;
864                 goto out_remove_stag;
865         }
866
867         mr->mem.mtt_entry[0] =
868                 dma_map_single(&dev->pdev->dev, mr->mem.mtt_buf,
869                                MTT_SIZE(mr->mem.page_cnt), DMA_TO_DEVICE);
870         if (dma_mapping_error(&dev->pdev->dev, mr->mem.mtt_entry[0])) {
871                 ret = -ENOMEM;
872                 goto out_free_mtt;
873         }
874
875         ret = regmr_cmd(dev, mr);
876         if (ret)
877                 goto out_dma_unmap;
878
879         return &mr->ibmr;
880
881 out_dma_unmap:
882         dma_unmap_single(&dev->pdev->dev, mr->mem.mtt_entry[0],
883                          MTT_SIZE(mr->mem.page_cnt), DMA_TO_DEVICE);
884 out_free_mtt:
885         free_pages_exact(mr->mem.mtt_buf, MTT_SIZE(mr->mem.page_cnt));
886
887 out_remove_stag:
888         erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX],
889                        mr->ibmr.lkey >> 8);
890
891 out_free:
892         kfree(mr);
893
894         return ERR_PTR(ret);
895 }
896
897 static int erdma_set_page(struct ib_mr *ibmr, u64 addr)
898 {
899         struct erdma_mr *mr = to_emr(ibmr);
900
901         if (mr->mem.mtt_nents >= mr->mem.page_cnt)
902                 return -1;
903
904         *((u64 *)mr->mem.mtt_buf + mr->mem.mtt_nents) = addr;
905         mr->mem.mtt_nents++;
906
907         return 0;
908 }
909
910 int erdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
911                     unsigned int *sg_offset)
912 {
913         struct erdma_mr *mr = to_emr(ibmr);
914         int num;
915
916         mr->mem.mtt_nents = 0;
917
918         num = ib_sg_to_pages(&mr->ibmr, sg, sg_nents, sg_offset,
919                              erdma_set_page);
920
921         return num;
922 }
923
924 struct ib_mr *erdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
925                                 u64 virt, int access, struct ib_udata *udata)
926 {
927         struct erdma_mr *mr = NULL;
928         struct erdma_dev *dev = to_edev(ibpd->device);
929         u32 stag;
930         int ret;
931
932         if (!len || len > dev->attrs.max_mr_size)
933                 return ERR_PTR(-EINVAL);
934
935         mr = kzalloc(sizeof(*mr), GFP_KERNEL);
936         if (!mr)
937                 return ERR_PTR(-ENOMEM);
938
939         ret = get_mtt_entries(dev, &mr->mem, start, len, access, virt,
940                               SZ_2G - SZ_4K, 0);
941         if (ret)
942                 goto err_out_free;
943
944         ret = erdma_create_stag(dev, &stag);
945         if (ret)
946                 goto err_out_put_mtt;
947
948         mr->ibmr.lkey = mr->ibmr.rkey = stag;
949         mr->ibmr.pd = ibpd;
950         mr->mem.va = virt;
951         mr->mem.len = len;
952         mr->access = ERDMA_MR_ACC_LR | to_erdma_access_flags(access);
953         mr->valid = 1;
954         mr->type = ERDMA_MR_TYPE_NORMAL;
955
956         ret = regmr_cmd(dev, mr);
957         if (ret)
958                 goto err_out_mr;
959
960         return &mr->ibmr;
961
962 err_out_mr:
963         erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX],
964                        mr->ibmr.lkey >> 8);
965
966 err_out_put_mtt:
967         put_mtt_entries(dev, &mr->mem);
968
969 err_out_free:
970         kfree(mr);
971
972         return ERR_PTR(ret);
973 }
974
975 int erdma_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
976 {
977         struct erdma_mr *mr;
978         struct erdma_dev *dev = to_edev(ibmr->device);
979         struct erdma_cmdq_dereg_mr_req req;
980         int ret;
981
982         mr = to_emr(ibmr);
983
984         erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
985                                 CMDQ_OPCODE_DEREG_MR);
986
987         req.cfg = FIELD_PREP(ERDMA_CMD_MR_MPT_IDX_MASK, ibmr->lkey >> 8) |
988                   FIELD_PREP(ERDMA_CMD_MR_KEY_MASK, ibmr->lkey & 0xFF);
989
990         ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
991         if (ret)
992                 return ret;
993
994         erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX], ibmr->lkey >> 8);
995
996         put_mtt_entries(dev, &mr->mem);
997
998         kfree(mr);
999         return 0;
1000 }
1001
1002 int erdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
1003 {
1004         struct erdma_cq *cq = to_ecq(ibcq);
1005         struct erdma_dev *dev = to_edev(ibcq->device);
1006         struct erdma_ucontext *ctx = rdma_udata_to_drv_context(
1007                 udata, struct erdma_ucontext, ibucontext);
1008         int err;
1009         struct erdma_cmdq_destroy_cq_req req;
1010
1011         erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
1012                                 CMDQ_OPCODE_DESTROY_CQ);
1013         req.cqn = cq->cqn;
1014
1015         err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
1016         if (err)
1017                 return err;
1018
1019         if (rdma_is_kernel_res(&cq->ibcq.res)) {
1020                 dma_free_coherent(&dev->pdev->dev,
1021                                   WARPPED_BUFSIZE(cq->depth << CQE_SHIFT),
1022                                   cq->kern_cq.qbuf, cq->kern_cq.qbuf_dma_addr);
1023         } else {
1024                 erdma_unmap_user_dbrecords(ctx, &cq->user_cq.user_dbr_page);
1025                 put_mtt_entries(dev, &cq->user_cq.qbuf_mtt);
1026         }
1027
1028         xa_erase(&dev->cq_xa, cq->cqn);
1029
1030         return 0;
1031 }
1032
1033 int erdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
1034 {
1035         struct erdma_qp *qp = to_eqp(ibqp);
1036         struct erdma_dev *dev = to_edev(ibqp->device);
1037         struct erdma_ucontext *ctx = rdma_udata_to_drv_context(
1038                 udata, struct erdma_ucontext, ibucontext);
1039         struct erdma_qp_attrs qp_attrs;
1040         int err;
1041         struct erdma_cmdq_destroy_qp_req req;
1042
1043         down_write(&qp->state_lock);
1044         qp_attrs.state = ERDMA_QP_STATE_ERROR;
1045         erdma_modify_qp_internal(qp, &qp_attrs, ERDMA_QP_ATTR_STATE);
1046         up_write(&qp->state_lock);
1047
1048         cancel_delayed_work_sync(&qp->reflush_dwork);
1049
1050         erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
1051                                 CMDQ_OPCODE_DESTROY_QP);
1052         req.qpn = QP_ID(qp);
1053
1054         err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
1055         if (err)
1056                 return err;
1057
1058         erdma_qp_put(qp);
1059         wait_for_completion(&qp->safe_free);
1060
1061         if (rdma_is_kernel_res(&qp->ibqp.res)) {
1062                 vfree(qp->kern_qp.swr_tbl);
1063                 vfree(qp->kern_qp.rwr_tbl);
1064                 dma_free_coherent(
1065                         &dev->pdev->dev,
1066                         WARPPED_BUFSIZE(qp->attrs.rq_size << RQE_SHIFT),
1067                         qp->kern_qp.rq_buf, qp->kern_qp.rq_buf_dma_addr);
1068                 dma_free_coherent(
1069                         &dev->pdev->dev,
1070                         WARPPED_BUFSIZE(qp->attrs.sq_size << SQEBB_SHIFT),
1071                         qp->kern_qp.sq_buf, qp->kern_qp.sq_buf_dma_addr);
1072         } else {
1073                 put_mtt_entries(dev, &qp->user_qp.sq_mtt);
1074                 put_mtt_entries(dev, &qp->user_qp.rq_mtt);
1075                 erdma_unmap_user_dbrecords(ctx, &qp->user_qp.user_dbr_page);
1076         }
1077
1078         if (qp->cep)
1079                 erdma_cep_put(qp->cep);
1080         xa_erase(&dev->qp_xa, QP_ID(qp));
1081
1082         return 0;
1083 }
1084
1085 void erdma_qp_get_ref(struct ib_qp *ibqp)
1086 {
1087         erdma_qp_get(to_eqp(ibqp));
1088 }
1089
1090 void erdma_qp_put_ref(struct ib_qp *ibqp)
1091 {
1092         erdma_qp_put(to_eqp(ibqp));
1093 }
1094
1095 int erdma_mmap(struct ib_ucontext *ctx, struct vm_area_struct *vma)
1096 {
1097         struct rdma_user_mmap_entry *rdma_entry;
1098         struct erdma_user_mmap_entry *entry;
1099         pgprot_t prot;
1100         int err;
1101
1102         rdma_entry = rdma_user_mmap_entry_get(ctx, vma);
1103         if (!rdma_entry)
1104                 return -EINVAL;
1105
1106         entry = to_emmap(rdma_entry);
1107
1108         switch (entry->mmap_flag) {
1109         case ERDMA_MMAP_IO_NC:
1110                 /* map doorbell. */
1111                 prot = pgprot_device(vma->vm_page_prot);
1112                 break;
1113         default:
1114                 err = -EINVAL;
1115                 goto put_entry;
1116         }
1117
1118         err = rdma_user_mmap_io(ctx, vma, PFN_DOWN(entry->address), PAGE_SIZE,
1119                                 prot, rdma_entry);
1120
1121 put_entry:
1122         rdma_user_mmap_entry_put(rdma_entry);
1123         return err;
1124 }
1125
1126 void erdma_mmap_free(struct rdma_user_mmap_entry *rdma_entry)
1127 {
1128         struct erdma_user_mmap_entry *entry = to_emmap(rdma_entry);
1129
1130         kfree(entry);
1131 }
1132
1133 #define ERDMA_SDB_PAGE 0
1134 #define ERDMA_SDB_ENTRY 1
1135 #define ERDMA_SDB_SHARED 2
1136
1137 static void alloc_db_resources(struct erdma_dev *dev,
1138                                struct erdma_ucontext *ctx)
1139 {
1140         u32 bitmap_idx;
1141         struct erdma_devattr *attrs = &dev->attrs;
1142
1143         if (attrs->disable_dwqe)
1144                 goto alloc_normal_db;
1145
1146         /* Try to alloc independent SDB page. */
1147         spin_lock(&dev->db_bitmap_lock);
1148         bitmap_idx = find_first_zero_bit(dev->sdb_page, attrs->dwqe_pages);
1149         if (bitmap_idx != attrs->dwqe_pages) {
1150                 set_bit(bitmap_idx, dev->sdb_page);
1151                 spin_unlock(&dev->db_bitmap_lock);
1152
1153                 ctx->sdb_type = ERDMA_SDB_PAGE;
1154                 ctx->sdb_idx = bitmap_idx;
1155                 ctx->sdb_page_idx = bitmap_idx;
1156                 ctx->sdb = dev->func_bar_addr + ERDMA_BAR_SQDB_SPACE_OFFSET +
1157                            (bitmap_idx << PAGE_SHIFT);
1158                 ctx->sdb_page_off = 0;
1159
1160                 return;
1161         }
1162
1163         bitmap_idx = find_first_zero_bit(dev->sdb_entry, attrs->dwqe_entries);
1164         if (bitmap_idx != attrs->dwqe_entries) {
1165                 set_bit(bitmap_idx, dev->sdb_entry);
1166                 spin_unlock(&dev->db_bitmap_lock);
1167
1168                 ctx->sdb_type = ERDMA_SDB_ENTRY;
1169                 ctx->sdb_idx = bitmap_idx;
1170                 ctx->sdb_page_idx = attrs->dwqe_pages +
1171                                     bitmap_idx / ERDMA_DWQE_TYPE1_CNT_PER_PAGE;
1172                 ctx->sdb_page_off = bitmap_idx % ERDMA_DWQE_TYPE1_CNT_PER_PAGE;
1173
1174                 ctx->sdb = dev->func_bar_addr + ERDMA_BAR_SQDB_SPACE_OFFSET +
1175                            (ctx->sdb_page_idx << PAGE_SHIFT);
1176
1177                 return;
1178         }
1179
1180         spin_unlock(&dev->db_bitmap_lock);
1181
1182 alloc_normal_db:
1183         ctx->sdb_type = ERDMA_SDB_SHARED;
1184         ctx->sdb_idx = 0;
1185         ctx->sdb_page_idx = ERDMA_SDB_SHARED_PAGE_INDEX;
1186         ctx->sdb_page_off = 0;
1187
1188         ctx->sdb = dev->func_bar_addr + (ctx->sdb_page_idx << PAGE_SHIFT);
1189 }
1190
1191 static void erdma_uctx_user_mmap_entries_remove(struct erdma_ucontext *uctx)
1192 {
1193         rdma_user_mmap_entry_remove(uctx->sq_db_mmap_entry);
1194         rdma_user_mmap_entry_remove(uctx->rq_db_mmap_entry);
1195         rdma_user_mmap_entry_remove(uctx->cq_db_mmap_entry);
1196 }
1197
1198 int erdma_alloc_ucontext(struct ib_ucontext *ibctx, struct ib_udata *udata)
1199 {
1200         struct erdma_ucontext *ctx = to_ectx(ibctx);
1201         struct erdma_dev *dev = to_edev(ibctx->device);
1202         int ret;
1203         struct erdma_uresp_alloc_ctx uresp = {};
1204
1205         if (atomic_inc_return(&dev->num_ctx) > ERDMA_MAX_CONTEXT) {
1206                 ret = -ENOMEM;
1207                 goto err_out;
1208         }
1209
1210         INIT_LIST_HEAD(&ctx->dbrecords_page_list);
1211         mutex_init(&ctx->dbrecords_page_mutex);
1212
1213         alloc_db_resources(dev, ctx);
1214
1215         ctx->rdb = dev->func_bar_addr + ERDMA_BAR_RQDB_SPACE_OFFSET;
1216         ctx->cdb = dev->func_bar_addr + ERDMA_BAR_CQDB_SPACE_OFFSET;
1217
1218         if (udata->outlen < sizeof(uresp)) {
1219                 ret = -EINVAL;
1220                 goto err_out;
1221         }
1222
1223         ctx->sq_db_mmap_entry = erdma_user_mmap_entry_insert(
1224                 ctx, (void *)ctx->sdb, PAGE_SIZE, ERDMA_MMAP_IO_NC, &uresp.sdb);
1225         if (!ctx->sq_db_mmap_entry) {
1226                 ret = -ENOMEM;
1227                 goto err_out;
1228         }
1229
1230         ctx->rq_db_mmap_entry = erdma_user_mmap_entry_insert(
1231                 ctx, (void *)ctx->rdb, PAGE_SIZE, ERDMA_MMAP_IO_NC, &uresp.rdb);
1232         if (!ctx->rq_db_mmap_entry) {
1233                 ret = -EINVAL;
1234                 goto err_out;
1235         }
1236
1237         ctx->cq_db_mmap_entry = erdma_user_mmap_entry_insert(
1238                 ctx, (void *)ctx->cdb, PAGE_SIZE, ERDMA_MMAP_IO_NC, &uresp.cdb);
1239         if (!ctx->cq_db_mmap_entry) {
1240                 ret = -EINVAL;
1241                 goto err_out;
1242         }
1243
1244         uresp.dev_id = dev->pdev->device;
1245         uresp.sdb_type = ctx->sdb_type;
1246         uresp.sdb_offset = ctx->sdb_page_off;
1247
1248         ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
1249         if (ret)
1250                 goto err_out;
1251
1252         return 0;
1253
1254 err_out:
1255         erdma_uctx_user_mmap_entries_remove(ctx);
1256         atomic_dec(&dev->num_ctx);
1257         return ret;
1258 }
1259
1260 void erdma_dealloc_ucontext(struct ib_ucontext *ibctx)
1261 {
1262         struct erdma_ucontext *ctx = to_ectx(ibctx);
1263         struct erdma_dev *dev = to_edev(ibctx->device);
1264
1265         spin_lock(&dev->db_bitmap_lock);
1266         if (ctx->sdb_type == ERDMA_SDB_PAGE)
1267                 clear_bit(ctx->sdb_idx, dev->sdb_page);
1268         else if (ctx->sdb_type == ERDMA_SDB_ENTRY)
1269                 clear_bit(ctx->sdb_idx, dev->sdb_entry);
1270
1271         erdma_uctx_user_mmap_entries_remove(ctx);
1272
1273         spin_unlock(&dev->db_bitmap_lock);
1274
1275         atomic_dec(&dev->num_ctx);
1276 }
1277
1278 static int ib_qp_state_to_erdma_qp_state[IB_QPS_ERR + 1] = {
1279         [IB_QPS_RESET] = ERDMA_QP_STATE_IDLE,
1280         [IB_QPS_INIT] = ERDMA_QP_STATE_IDLE,
1281         [IB_QPS_RTR] = ERDMA_QP_STATE_RTR,
1282         [IB_QPS_RTS] = ERDMA_QP_STATE_RTS,
1283         [IB_QPS_SQD] = ERDMA_QP_STATE_CLOSING,
1284         [IB_QPS_SQE] = ERDMA_QP_STATE_TERMINATE,
1285         [IB_QPS_ERR] = ERDMA_QP_STATE_ERROR
1286 };
1287
1288 int erdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
1289                     struct ib_udata *udata)
1290 {
1291         struct erdma_qp_attrs new_attrs;
1292         enum erdma_qp_attr_mask erdma_attr_mask = 0;
1293         struct erdma_qp *qp = to_eqp(ibqp);
1294         int ret = 0;
1295
1296         if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
1297                 return -EOPNOTSUPP;
1298
1299         memset(&new_attrs, 0, sizeof(new_attrs));
1300
1301         if (attr_mask & IB_QP_STATE) {
1302                 new_attrs.state = ib_qp_state_to_erdma_qp_state[attr->qp_state];
1303
1304                 erdma_attr_mask |= ERDMA_QP_ATTR_STATE;
1305         }
1306
1307         down_write(&qp->state_lock);
1308
1309         ret = erdma_modify_qp_internal(qp, &new_attrs, erdma_attr_mask);
1310
1311         up_write(&qp->state_lock);
1312
1313         return ret;
1314 }
1315
1316 int erdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
1317                    int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
1318 {
1319         struct erdma_qp *qp;
1320         struct erdma_dev *dev;
1321
1322         if (ibqp && qp_attr && qp_init_attr) {
1323                 qp = to_eqp(ibqp);
1324                 dev = to_edev(ibqp->device);
1325         } else {
1326                 return -EINVAL;
1327         }
1328
1329         qp_attr->cap.max_inline_data = ERDMA_MAX_INLINE;
1330         qp_init_attr->cap.max_inline_data = ERDMA_MAX_INLINE;
1331
1332         qp_attr->cap.max_send_wr = qp->attrs.sq_size;
1333         qp_attr->cap.max_recv_wr = qp->attrs.rq_size;
1334         qp_attr->cap.max_send_sge = qp->attrs.max_send_sge;
1335         qp_attr->cap.max_recv_sge = qp->attrs.max_recv_sge;
1336
1337         qp_attr->path_mtu = ib_mtu_int_to_enum(dev->netdev->mtu);
1338         qp_attr->max_rd_atomic = qp->attrs.irq_size;
1339         qp_attr->max_dest_rd_atomic = qp->attrs.orq_size;
1340
1341         qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE |
1342                                    IB_ACCESS_REMOTE_WRITE |
1343                                    IB_ACCESS_REMOTE_READ;
1344
1345         qp_init_attr->cap = qp_attr->cap;
1346
1347         return 0;
1348 }
1349
1350 static int erdma_init_user_cq(struct erdma_ucontext *ctx, struct erdma_cq *cq,
1351                               struct erdma_ureq_create_cq *ureq)
1352 {
1353         int ret;
1354         struct erdma_dev *dev = to_edev(cq->ibcq.device);
1355
1356         ret = get_mtt_entries(dev, &cq->user_cq.qbuf_mtt, ureq->qbuf_va,
1357                               ureq->qbuf_len, 0, ureq->qbuf_va, SZ_64M - SZ_4K,
1358                               1);
1359         if (ret)
1360                 return ret;
1361
1362         ret = erdma_map_user_dbrecords(ctx, ureq->db_record_va,
1363                                        &cq->user_cq.user_dbr_page,
1364                                        &cq->user_cq.db_info_dma_addr);
1365         if (ret)
1366                 put_mtt_entries(dev, &cq->user_cq.qbuf_mtt);
1367
1368         return ret;
1369 }
1370
1371 static int erdma_init_kernel_cq(struct erdma_cq *cq)
1372 {
1373         struct erdma_dev *dev = to_edev(cq->ibcq.device);
1374
1375         cq->kern_cq.qbuf =
1376                 dma_alloc_coherent(&dev->pdev->dev,
1377                                    WARPPED_BUFSIZE(cq->depth << CQE_SHIFT),
1378                                    &cq->kern_cq.qbuf_dma_addr, GFP_KERNEL);
1379         if (!cq->kern_cq.qbuf)
1380                 return -ENOMEM;
1381
1382         cq->kern_cq.db_record =
1383                 (u64 *)(cq->kern_cq.qbuf + (cq->depth << CQE_SHIFT));
1384         spin_lock_init(&cq->kern_cq.lock);
1385         /* use default cqdb addr */
1386         cq->kern_cq.db = dev->func_bar + ERDMA_BAR_CQDB_SPACE_OFFSET;
1387
1388         return 0;
1389 }
1390
1391 int erdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
1392                     struct ib_udata *udata)
1393 {
1394         struct erdma_cq *cq = to_ecq(ibcq);
1395         struct erdma_dev *dev = to_edev(ibcq->device);
1396         unsigned int depth = attr->cqe;
1397         int ret;
1398         struct erdma_ucontext *ctx = rdma_udata_to_drv_context(
1399                 udata, struct erdma_ucontext, ibucontext);
1400
1401         if (depth > dev->attrs.max_cqe)
1402                 return -EINVAL;
1403
1404         depth = roundup_pow_of_two(depth);
1405         cq->ibcq.cqe = depth;
1406         cq->depth = depth;
1407         cq->assoc_eqn = attr->comp_vector + 1;
1408
1409         ret = xa_alloc_cyclic(&dev->cq_xa, &cq->cqn, cq,
1410                               XA_LIMIT(1, dev->attrs.max_cq - 1),
1411                               &dev->next_alloc_cqn, GFP_KERNEL);
1412         if (ret < 0)
1413                 return ret;
1414
1415         if (!rdma_is_kernel_res(&ibcq->res)) {
1416                 struct erdma_ureq_create_cq ureq;
1417                 struct erdma_uresp_create_cq uresp;
1418
1419                 ret = ib_copy_from_udata(&ureq, udata,
1420                                          min(udata->inlen, sizeof(ureq)));
1421                 if (ret)
1422                         goto err_out_xa;
1423
1424                 ret = erdma_init_user_cq(ctx, cq, &ureq);
1425                 if (ret)
1426                         goto err_out_xa;
1427
1428                 uresp.cq_id = cq->cqn;
1429                 uresp.num_cqe = depth;
1430
1431                 ret = ib_copy_to_udata(udata, &uresp,
1432                                        min(sizeof(uresp), udata->outlen));
1433                 if (ret)
1434                         goto err_free_res;
1435         } else {
1436                 ret = erdma_init_kernel_cq(cq);
1437                 if (ret)
1438                         goto err_out_xa;
1439         }
1440
1441         ret = create_cq_cmd(dev, cq);
1442         if (ret)
1443                 goto err_free_res;
1444
1445         return 0;
1446
1447 err_free_res:
1448         if (!rdma_is_kernel_res(&ibcq->res)) {
1449                 erdma_unmap_user_dbrecords(ctx, &cq->user_cq.user_dbr_page);
1450                 put_mtt_entries(dev, &cq->user_cq.qbuf_mtt);
1451         } else {
1452                 dma_free_coherent(&dev->pdev->dev,
1453                                   WARPPED_BUFSIZE(depth << CQE_SHIFT),
1454                                   cq->kern_cq.qbuf, cq->kern_cq.qbuf_dma_addr);
1455         }
1456
1457 err_out_xa:
1458         xa_erase(&dev->cq_xa, cq->cqn);
1459
1460         return ret;
1461 }
1462
1463 void erdma_set_mtu(struct erdma_dev *dev, u32 mtu)
1464 {
1465         struct erdma_cmdq_config_mtu_req req;
1466
1467         erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON,
1468                                 CMDQ_OPCODE_CONF_MTU);
1469         req.mtu = mtu;
1470
1471         erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
1472 }
1473
1474 void erdma_port_event(struct erdma_dev *dev, enum ib_event_type reason)
1475 {
1476         struct ib_event event;
1477
1478         event.device = &dev->ibdev;
1479         event.element.port_num = 1;
1480         event.event = reason;
1481
1482         ib_dispatch_event(&event);
1483 }
This page took 0.117578 seconds and 4 git commands to generate.