1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
5 /* Copyright (c) 2020-2022, Alibaba Group. */
9 static void arm_cmdq_cq(struct erdma_cmdq *cmdq)
11 struct erdma_dev *dev = container_of(cmdq, struct erdma_dev, cmdq);
12 u64 db_data = FIELD_PREP(ERDMA_CQDB_CI_MASK, cmdq->cq.ci) |
13 FIELD_PREP(ERDMA_CQDB_ARM_MASK, 1) |
14 FIELD_PREP(ERDMA_CQDB_CMDSN_MASK, cmdq->cq.cmdsn) |
15 FIELD_PREP(ERDMA_CQDB_IDX_MASK, cmdq->cq.cmdsn);
17 *cmdq->cq.dbrec = db_data;
18 writeq(db_data, dev->func_bar + ERDMA_CMDQ_CQDB_REG);
20 atomic64_inc(&cmdq->cq.armed_num);
23 static void kick_cmdq_db(struct erdma_cmdq *cmdq)
25 struct erdma_dev *dev = container_of(cmdq, struct erdma_dev, cmdq);
26 u64 db_data = FIELD_PREP(ERDMA_CMD_HDR_WQEBB_INDEX_MASK, cmdq->sq.pi);
28 *cmdq->sq.dbrec = db_data;
29 writeq(db_data, dev->func_bar + ERDMA_CMDQ_SQDB_REG);
32 static struct erdma_comp_wait *get_comp_wait(struct erdma_cmdq *cmdq)
36 spin_lock(&cmdq->lock);
37 comp_idx = find_first_zero_bit(cmdq->comp_wait_bitmap,
38 cmdq->max_outstandings);
39 if (comp_idx == cmdq->max_outstandings) {
40 spin_unlock(&cmdq->lock);
41 return ERR_PTR(-ENOMEM);
44 __set_bit(comp_idx, cmdq->comp_wait_bitmap);
45 spin_unlock(&cmdq->lock);
47 return &cmdq->wait_pool[comp_idx];
50 static void put_comp_wait(struct erdma_cmdq *cmdq,
51 struct erdma_comp_wait *comp_wait)
55 cmdq->wait_pool[comp_wait->ctx_id].cmd_status = ERDMA_CMD_STATUS_INIT;
56 spin_lock(&cmdq->lock);
57 used = __test_and_clear_bit(comp_wait->ctx_id, cmdq->comp_wait_bitmap);
58 spin_unlock(&cmdq->lock);
63 static int erdma_cmdq_wait_res_init(struct erdma_dev *dev,
64 struct erdma_cmdq *cmdq)
69 devm_kcalloc(&dev->pdev->dev, cmdq->max_outstandings,
70 sizeof(struct erdma_comp_wait), GFP_KERNEL);
74 spin_lock_init(&cmdq->lock);
75 cmdq->comp_wait_bitmap = devm_bitmap_zalloc(
76 &dev->pdev->dev, cmdq->max_outstandings, GFP_KERNEL);
77 if (!cmdq->comp_wait_bitmap)
80 for (i = 0; i < cmdq->max_outstandings; i++) {
81 init_completion(&cmdq->wait_pool[i].wait_event);
82 cmdq->wait_pool[i].ctx_id = i;
88 static int erdma_cmdq_sq_init(struct erdma_dev *dev)
90 struct erdma_cmdq *cmdq = &dev->cmdq;
91 struct erdma_cmdq_sq *sq = &cmdq->sq;
93 sq->wqebb_cnt = SQEBB_COUNT(ERDMA_CMDQ_SQE_SIZE);
94 sq->depth = cmdq->max_outstandings * sq->wqebb_cnt;
96 sq->qbuf = dma_alloc_coherent(&dev->pdev->dev, sq->depth << SQEBB_SHIFT,
97 &sq->qbuf_dma_addr, GFP_KERNEL);
101 sq->dbrec = dma_pool_zalloc(dev->db_pool, GFP_KERNEL, &sq->dbrec_dma);
105 spin_lock_init(&sq->lock);
107 erdma_reg_write32(dev, ERDMA_REGS_CMDQ_SQ_ADDR_H_REG,
108 upper_32_bits(sq->qbuf_dma_addr));
109 erdma_reg_write32(dev, ERDMA_REGS_CMDQ_SQ_ADDR_L_REG,
110 lower_32_bits(sq->qbuf_dma_addr));
111 erdma_reg_write32(dev, ERDMA_REGS_CMDQ_DEPTH_REG, sq->depth);
112 erdma_reg_write64(dev, ERDMA_CMDQ_SQ_DB_HOST_ADDR_REG, sq->dbrec_dma);
117 dma_free_coherent(&dev->pdev->dev, sq->depth << SQEBB_SHIFT,
118 sq->qbuf, sq->qbuf_dma_addr);
123 static int erdma_cmdq_cq_init(struct erdma_dev *dev)
125 struct erdma_cmdq *cmdq = &dev->cmdq;
126 struct erdma_cmdq_cq *cq = &cmdq->cq;
128 cq->depth = cmdq->sq.depth;
129 cq->qbuf = dma_alloc_coherent(&dev->pdev->dev, cq->depth << CQE_SHIFT,
130 &cq->qbuf_dma_addr, GFP_KERNEL);
134 spin_lock_init(&cq->lock);
136 cq->dbrec = dma_pool_zalloc(dev->db_pool, GFP_KERNEL, &cq->dbrec_dma);
140 atomic64_set(&cq->armed_num, 0);
142 erdma_reg_write32(dev, ERDMA_REGS_CMDQ_CQ_ADDR_H_REG,
143 upper_32_bits(cq->qbuf_dma_addr));
144 erdma_reg_write32(dev, ERDMA_REGS_CMDQ_CQ_ADDR_L_REG,
145 lower_32_bits(cq->qbuf_dma_addr));
146 erdma_reg_write64(dev, ERDMA_CMDQ_CQ_DB_HOST_ADDR_REG, cq->dbrec_dma);
151 dma_free_coherent(&dev->pdev->dev, cq->depth << CQE_SHIFT, cq->qbuf,
157 static int erdma_cmdq_eq_init(struct erdma_dev *dev)
159 struct erdma_cmdq *cmdq = &dev->cmdq;
160 struct erdma_eq *eq = &cmdq->eq;
162 eq->depth = cmdq->max_outstandings;
163 eq->qbuf = dma_alloc_coherent(&dev->pdev->dev, eq->depth << EQE_SHIFT,
164 &eq->qbuf_dma_addr, GFP_KERNEL);
168 spin_lock_init(&eq->lock);
169 atomic64_set(&eq->event_num, 0);
171 eq->db = dev->func_bar + ERDMA_REGS_CEQ_DB_BASE_REG;
172 eq->dbrec = dma_pool_zalloc(dev->db_pool, GFP_KERNEL, &eq->dbrec_dma);
176 erdma_reg_write32(dev, ERDMA_REGS_CMDQ_EQ_ADDR_H_REG,
177 upper_32_bits(eq->qbuf_dma_addr));
178 erdma_reg_write32(dev, ERDMA_REGS_CMDQ_EQ_ADDR_L_REG,
179 lower_32_bits(eq->qbuf_dma_addr));
180 erdma_reg_write32(dev, ERDMA_REGS_CMDQ_EQ_DEPTH_REG, eq->depth);
181 erdma_reg_write64(dev, ERDMA_CMDQ_EQ_DB_HOST_ADDR_REG, eq->dbrec_dma);
186 dma_free_coherent(&dev->pdev->dev, eq->depth << EQE_SHIFT, eq->qbuf,
192 int erdma_cmdq_init(struct erdma_dev *dev)
194 struct erdma_cmdq *cmdq = &dev->cmdq;
197 cmdq->max_outstandings = ERDMA_CMDQ_MAX_OUTSTANDING;
198 cmdq->use_event = false;
200 sema_init(&cmdq->credits, cmdq->max_outstandings);
202 err = erdma_cmdq_wait_res_init(dev, cmdq);
206 err = erdma_cmdq_sq_init(dev);
210 err = erdma_cmdq_cq_init(dev);
214 err = erdma_cmdq_eq_init(dev);
218 set_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state);
223 dma_free_coherent(&dev->pdev->dev, cmdq->cq.depth << CQE_SHIFT,
224 cmdq->cq.qbuf, cmdq->cq.qbuf_dma_addr);
226 dma_pool_free(dev->db_pool, cmdq->cq.dbrec, cmdq->cq.dbrec_dma);
229 dma_free_coherent(&dev->pdev->dev, cmdq->sq.depth << SQEBB_SHIFT,
230 cmdq->sq.qbuf, cmdq->sq.qbuf_dma_addr);
232 dma_pool_free(dev->db_pool, cmdq->sq.dbrec, cmdq->sq.dbrec_dma);
237 void erdma_finish_cmdq_init(struct erdma_dev *dev)
239 /* after device init successfully, change cmdq to event mode. */
240 dev->cmdq.use_event = true;
241 arm_cmdq_cq(&dev->cmdq);
244 void erdma_cmdq_destroy(struct erdma_dev *dev)
246 struct erdma_cmdq *cmdq = &dev->cmdq;
248 clear_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state);
250 dma_free_coherent(&dev->pdev->dev, cmdq->eq.depth << EQE_SHIFT,
251 cmdq->eq.qbuf, cmdq->eq.qbuf_dma_addr);
253 dma_pool_free(dev->db_pool, cmdq->eq.dbrec, cmdq->eq.dbrec_dma);
255 dma_free_coherent(&dev->pdev->dev, cmdq->sq.depth << SQEBB_SHIFT,
256 cmdq->sq.qbuf, cmdq->sq.qbuf_dma_addr);
258 dma_pool_free(dev->db_pool, cmdq->sq.dbrec, cmdq->sq.dbrec_dma);
260 dma_free_coherent(&dev->pdev->dev, cmdq->cq.depth << CQE_SHIFT,
261 cmdq->cq.qbuf, cmdq->cq.qbuf_dma_addr);
263 dma_pool_free(dev->db_pool, cmdq->cq.dbrec, cmdq->cq.dbrec_dma);
266 static void *get_next_valid_cmdq_cqe(struct erdma_cmdq *cmdq)
268 __be32 *cqe = get_queue_entry(cmdq->cq.qbuf, cmdq->cq.ci,
269 cmdq->cq.depth, CQE_SHIFT);
270 u32 owner = FIELD_GET(ERDMA_CQE_HDR_OWNER_MASK,
271 be32_to_cpu(READ_ONCE(*cqe)));
273 return owner ^ !!(cmdq->cq.ci & cmdq->cq.depth) ? cqe : NULL;
276 static void push_cmdq_sqe(struct erdma_cmdq *cmdq, u64 *req, size_t req_len,
277 struct erdma_comp_wait *comp_wait)
282 comp_wait->cmd_status = ERDMA_CMD_STATUS_ISSUED;
283 reinit_completion(&comp_wait->wait_event);
284 comp_wait->sq_pi = cmdq->sq.pi;
286 wqe = get_queue_entry(cmdq->sq.qbuf, cmdq->sq.pi, cmdq->sq.depth,
288 memcpy(wqe, req, req_len);
290 cmdq->sq.pi += cmdq->sq.wqebb_cnt;
291 hdr |= FIELD_PREP(ERDMA_CMD_HDR_WQEBB_INDEX_MASK, cmdq->sq.pi) |
292 FIELD_PREP(ERDMA_CMD_HDR_CONTEXT_COOKIE_MASK,
294 FIELD_PREP(ERDMA_CMD_HDR_WQEBB_CNT_MASK, cmdq->sq.wqebb_cnt - 1);
295 *wqe = cpu_to_le64(hdr);
300 static int erdma_poll_single_cmd_completion(struct erdma_cmdq *cmdq)
302 struct erdma_comp_wait *comp_wait;
308 cqe = get_next_valid_cmdq_cqe(cmdq);
315 hdr0 = be32_to_cpu(*cqe);
316 sqe_idx = be32_to_cpu(*(cqe + 1));
318 sqe = get_queue_entry(cmdq->sq.qbuf, sqe_idx, cmdq->sq.depth,
320 ctx_id = FIELD_GET(ERDMA_CMD_HDR_CONTEXT_COOKIE_MASK, *sqe);
321 comp_wait = &cmdq->wait_pool[ctx_id];
322 if (comp_wait->cmd_status != ERDMA_CMD_STATUS_ISSUED)
325 comp_wait->cmd_status = ERDMA_CMD_STATUS_FINISHED;
326 comp_wait->comp_status = FIELD_GET(ERDMA_CQE_HDR_SYNDROME_MASK, hdr0);
327 cmdq->sq.ci += cmdq->sq.wqebb_cnt;
328 /* Copy 16B comp data after cqe hdr to outer */
329 be32_to_cpu_array(comp_wait->comp_data, cqe + 2, 4);
332 complete(&comp_wait->wait_event);
337 static void erdma_polling_cmd_completions(struct erdma_cmdq *cmdq)
342 spin_lock_irqsave(&cmdq->cq.lock, flags);
344 /* We must have less than # of max_outstandings
345 * completions at one time.
347 for (comp_num = 0; comp_num < cmdq->max_outstandings; comp_num++)
348 if (erdma_poll_single_cmd_completion(cmdq))
351 if (comp_num && cmdq->use_event)
354 spin_unlock_irqrestore(&cmdq->cq.lock, flags);
357 void erdma_cmdq_completion_handler(struct erdma_cmdq *cmdq)
361 if (!test_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state) ||
365 while (get_next_valid_eqe(&cmdq->eq)) {
372 erdma_polling_cmd_completions(cmdq);
375 notify_eq(&cmdq->eq);
378 static int erdma_poll_cmd_completion(struct erdma_comp_wait *comp_ctx,
379 struct erdma_cmdq *cmdq, u32 timeout)
381 unsigned long comp_timeout = jiffies + msecs_to_jiffies(timeout);
384 erdma_polling_cmd_completions(cmdq);
385 if (comp_ctx->cmd_status != ERDMA_CMD_STATUS_ISSUED)
388 if (time_is_before_jiffies(comp_timeout))
397 static int erdma_wait_cmd_completion(struct erdma_comp_wait *comp_ctx,
398 struct erdma_cmdq *cmdq, u32 timeout)
400 unsigned long flags = 0;
402 wait_for_completion_timeout(&comp_ctx->wait_event,
403 msecs_to_jiffies(timeout));
405 if (unlikely(comp_ctx->cmd_status != ERDMA_CMD_STATUS_FINISHED)) {
406 spin_lock_irqsave(&cmdq->cq.lock, flags);
407 comp_ctx->cmd_status = ERDMA_CMD_STATUS_TIMEOUT;
408 spin_unlock_irqrestore(&cmdq->cq.lock, flags);
415 void erdma_cmdq_build_reqhdr(u64 *hdr, u32 mod, u32 op)
417 *hdr = FIELD_PREP(ERDMA_CMD_HDR_SUB_MOD_MASK, mod) |
418 FIELD_PREP(ERDMA_CMD_HDR_OPCODE_MASK, op);
421 int erdma_post_cmd_wait(struct erdma_cmdq *cmdq, void *req, u32 req_size,
422 u64 *resp0, u64 *resp1)
424 struct erdma_comp_wait *comp_wait;
427 if (!test_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state))
430 down(&cmdq->credits);
432 comp_wait = get_comp_wait(cmdq);
433 if (IS_ERR(comp_wait)) {
434 clear_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state);
435 set_bit(ERDMA_CMDQ_STATE_CTX_ERR_BIT, &cmdq->state);
437 return PTR_ERR(comp_wait);
440 spin_lock(&cmdq->sq.lock);
441 push_cmdq_sqe(cmdq, req, req_size, comp_wait);
442 spin_unlock(&cmdq->sq.lock);
445 ret = erdma_wait_cmd_completion(comp_wait, cmdq,
446 ERDMA_CMDQ_TIMEOUT_MS);
448 ret = erdma_poll_cmd_completion(comp_wait, cmdq,
449 ERDMA_CMDQ_TIMEOUT_MS);
452 set_bit(ERDMA_CMDQ_STATE_TIMEOUT_BIT, &cmdq->state);
453 clear_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state);
457 if (comp_wait->comp_status)
460 if (resp0 && resp1) {
461 *resp0 = *((u64 *)&comp_wait->comp_data[0]);
462 *resp1 = *((u64 *)&comp_wait->comp_data[2]);
464 put_comp_wait(cmdq, comp_wait);