1 // SPDX-License-Identifier: GPL-2.0
3 * Shared Memory Communications over RDMA (SMC-R) and RoCE
5 * Link Layer Control (LLC)
7 * Copyright IBM Corp. 2016
14 #include <rdma/ib_verbs.h>
21 #define SMC_LLC_DATA_LEN 40
24 struct smc_wr_rx_hdr common;
26 #if defined(__BIG_ENDIAN_BITFIELD)
29 #elif defined(__LITTLE_ENDIAN_BITFIELD)
30 u8 add_link_rej_rsn:4,
36 #define SMC_LLC_FLAG_NO_RMBE_EYEC 0x03
38 struct smc_llc_msg_confirm_link { /* type 0x01 */
39 struct smc_llc_hdr hd;
40 u8 sender_mac[ETH_ALEN];
41 u8 sender_gid[SMC_GID_SIZE];
44 u8 link_uid[SMC_LGR_ID_SIZE];
49 #define SMC_LLC_FLAG_ADD_LNK_REJ 0x40
50 #define SMC_LLC_REJ_RSN_NO_ALT_PATH 1
52 #define SMC_LLC_ADD_LNK_MAX_LINKS 2
54 struct smc_llc_msg_add_link { /* type 0x02 */
55 struct smc_llc_hdr hd;
56 u8 sender_mac[ETH_ALEN];
58 u8 sender_gid[SMC_GID_SIZE];
61 u8 flags2; /* QP mtu */
66 #define SMC_LLC_FLAG_DEL_LINK_ALL 0x40
67 #define SMC_LLC_FLAG_DEL_LINK_ORDERLY 0x20
69 struct smc_llc_msg_del_link { /* type 0x04 */
70 struct smc_llc_hdr hd;
74 } __packed; /* format defined in RFC7609 */
76 struct smc_llc_msg_test_link { /* type 0x07 */
77 struct smc_llc_hdr hd;
82 struct smc_rmb_rtoken {
84 u8 num_rkeys; /* first rtoken byte of CONFIRM LINK msg */
85 /* is actually the num of rtokens, first */
86 /* rtoken is always for the current link */
87 u8 link_id; /* link id of the rtoken */
91 } __packed; /* format defined in RFC7609 */
93 #define SMC_LLC_RKEYS_PER_MSG 3
95 struct smc_llc_msg_confirm_rkey { /* type 0x06 */
96 struct smc_llc_hdr hd;
97 struct smc_rmb_rtoken rtoken[SMC_LLC_RKEYS_PER_MSG];
101 struct smc_llc_msg_confirm_rkey_cont { /* type 0x08 */
102 struct smc_llc_hdr hd;
104 struct smc_rmb_rtoken rtoken[SMC_LLC_RKEYS_PER_MSG];
107 #define SMC_LLC_DEL_RKEY_MAX 8
108 #define SMC_LLC_FLAG_RKEY_NEG 0x20
110 struct smc_llc_msg_delete_rkey { /* type 0x09 */
111 struct smc_llc_hdr hd;
120 struct smc_llc_msg_confirm_link confirm_link;
121 struct smc_llc_msg_add_link add_link;
122 struct smc_llc_msg_del_link delete_link;
124 struct smc_llc_msg_confirm_rkey confirm_rkey;
125 struct smc_llc_msg_confirm_rkey_cont confirm_rkey_cont;
126 struct smc_llc_msg_delete_rkey delete_rkey;
128 struct smc_llc_msg_test_link test_link;
130 struct smc_llc_hdr hdr;
131 u8 data[SMC_LLC_DATA_LEN];
135 #define SMC_LLC_FLAG_RESP 0x80
137 /********************************** send *************************************/
139 struct smc_llc_tx_pend {
142 /* handler for send/transmission completion of an LLC msg */
143 static void smc_llc_tx_handler(struct smc_wr_tx_pend_priv *pend,
144 struct smc_link *link,
145 enum ib_wc_status wc_status)
147 /* future work: handle wc_status error for recovery and failover */
151 * smc_llc_add_pending_send() - add LLC control message to pending WQE transmits
152 * @link: Pointer to SMC link used for sending LLC control message.
153 * @wr_buf: Out variable returning pointer to work request payload buffer.
154 * @pend: Out variable returning pointer to private pending WR tracking.
155 * It's the context the transmit complete handler will get.
157 * Reserves and pre-fills an entry for a pending work request send/tx.
158 * Used by mid-level smc_llc_send_msg() to prepare for later actual send/tx.
159 * Can sleep due to smc_get_ctrl_buf (if not in softirq context).
161 * Return: 0 on success, otherwise an error value.
163 static int smc_llc_add_pending_send(struct smc_link *link,
164 struct smc_wr_buf **wr_buf,
165 struct smc_wr_tx_pend_priv **pend)
169 rc = smc_wr_tx_get_free_slot(link, smc_llc_tx_handler, wr_buf, pend);
173 sizeof(union smc_llc_msg) > SMC_WR_BUF_SIZE,
174 "must increase SMC_WR_BUF_SIZE to at least sizeof(struct smc_llc_msg)");
176 sizeof(union smc_llc_msg) != SMC_WR_TX_SIZE,
177 "must adapt SMC_WR_TX_SIZE to sizeof(struct smc_llc_msg); if not all smc_wr upper layer protocols use the same message size any more, must start to set link->wr_tx_sges[i].length on each individual smc_wr_tx_send()");
179 sizeof(struct smc_llc_tx_pend) > SMC_WR_TX_PEND_PRIV_SIZE,
180 "must increase SMC_WR_TX_PEND_PRIV_SIZE to at least sizeof(struct smc_llc_tx_pend)");
184 /* high-level API to send LLC confirm link */
185 int smc_llc_send_confirm_link(struct smc_link *link, u8 mac[],
187 enum smc_llc_reqresp reqresp)
189 struct smc_link_group *lgr = container_of(link, struct smc_link_group,
190 lnk[SMC_SINGLE_LINK]);
191 struct smc_llc_msg_confirm_link *confllc;
192 struct smc_wr_tx_pend_priv *pend;
193 struct smc_wr_buf *wr_buf;
196 rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
199 confllc = (struct smc_llc_msg_confirm_link *)wr_buf;
200 memset(confllc, 0, sizeof(*confllc));
201 confllc->hd.common.type = SMC_LLC_CONFIRM_LINK;
202 confllc->hd.length = sizeof(struct smc_llc_msg_confirm_link);
203 confllc->hd.flags |= SMC_LLC_FLAG_NO_RMBE_EYEC;
204 if (reqresp == SMC_LLC_RESP)
205 confllc->hd.flags |= SMC_LLC_FLAG_RESP;
206 memcpy(confllc->sender_mac, mac, ETH_ALEN);
207 memcpy(confllc->sender_gid, gid, SMC_GID_SIZE);
208 hton24(confllc->sender_qp_num, link->roce_qp->qp_num);
209 confllc->link_num = link->link_id;
210 memcpy(confllc->link_uid, lgr->id, SMC_LGR_ID_SIZE);
211 confllc->max_links = SMC_LLC_ADD_LNK_MAX_LINKS; /* enforce peer resp. */
212 /* send llc message */
213 rc = smc_wr_tx_send(link, pend);
217 /* send LLC confirm rkey request */
218 static int smc_llc_send_confirm_rkey(struct smc_link *link,
219 struct smc_buf_desc *rmb_desc)
221 struct smc_llc_msg_confirm_rkey *rkeyllc;
222 struct smc_wr_tx_pend_priv *pend;
223 struct smc_wr_buf *wr_buf;
226 rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
229 rkeyllc = (struct smc_llc_msg_confirm_rkey *)wr_buf;
230 memset(rkeyllc, 0, sizeof(*rkeyllc));
231 rkeyllc->hd.common.type = SMC_LLC_CONFIRM_RKEY;
232 rkeyllc->hd.length = sizeof(struct smc_llc_msg_confirm_rkey);
233 rkeyllc->rtoken[0].rmb_key =
234 htonl(rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
235 rkeyllc->rtoken[0].rmb_vaddr = cpu_to_be64(
236 (u64)sg_dma_address(rmb_desc->sgt[SMC_SINGLE_LINK].sgl));
237 /* send llc message */
238 rc = smc_wr_tx_send(link, pend);
242 /* prepare an add link message */
243 static void smc_llc_prep_add_link(struct smc_llc_msg_add_link *addllc,
244 struct smc_link *link, u8 mac[],
246 enum smc_llc_reqresp reqresp)
248 memset(addllc, 0, sizeof(*addllc));
249 addllc->hd.common.type = SMC_LLC_ADD_LINK;
250 addllc->hd.length = sizeof(struct smc_llc_msg_add_link);
251 if (reqresp == SMC_LLC_RESP) {
252 addllc->hd.flags |= SMC_LLC_FLAG_RESP;
253 /* always reject more links for now */
254 addllc->hd.flags |= SMC_LLC_FLAG_ADD_LNK_REJ;
255 addllc->hd.add_link_rej_rsn = SMC_LLC_REJ_RSN_NO_ALT_PATH;
257 memcpy(addllc->sender_mac, mac, ETH_ALEN);
258 memcpy(addllc->sender_gid, gid, SMC_GID_SIZE);
261 /* send ADD LINK request or response */
262 int smc_llc_send_add_link(struct smc_link *link, u8 mac[],
264 enum smc_llc_reqresp reqresp)
266 struct smc_llc_msg_add_link *addllc;
267 struct smc_wr_tx_pend_priv *pend;
268 struct smc_wr_buf *wr_buf;
271 rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
274 addllc = (struct smc_llc_msg_add_link *)wr_buf;
275 smc_llc_prep_add_link(addllc, link, mac, gid, reqresp);
276 /* send llc message */
277 rc = smc_wr_tx_send(link, pend);
281 /* prepare a delete link message */
282 static void smc_llc_prep_delete_link(struct smc_llc_msg_del_link *delllc,
283 struct smc_link *link,
284 enum smc_llc_reqresp reqresp)
286 memset(delllc, 0, sizeof(*delllc));
287 delllc->hd.common.type = SMC_LLC_DELETE_LINK;
288 delllc->hd.length = sizeof(struct smc_llc_msg_add_link);
289 if (reqresp == SMC_LLC_RESP)
290 delllc->hd.flags |= SMC_LLC_FLAG_RESP;
291 /* DEL_LINK_ALL because only 1 link supported */
292 delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ALL;
293 delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY;
294 delllc->link_num = link->link_id;
297 /* send DELETE LINK request or response */
298 int smc_llc_send_delete_link(struct smc_link *link,
299 enum smc_llc_reqresp reqresp)
301 struct smc_llc_msg_del_link *delllc;
302 struct smc_wr_tx_pend_priv *pend;
303 struct smc_wr_buf *wr_buf;
306 rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
309 delllc = (struct smc_llc_msg_del_link *)wr_buf;
310 smc_llc_prep_delete_link(delllc, link, reqresp);
311 /* send llc message */
312 rc = smc_wr_tx_send(link, pend);
316 /* send LLC test link request */
317 static int smc_llc_send_test_link(struct smc_link *link, u8 user_data[16])
319 struct smc_llc_msg_test_link *testllc;
320 struct smc_wr_tx_pend_priv *pend;
321 struct smc_wr_buf *wr_buf;
324 rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
327 testllc = (struct smc_llc_msg_test_link *)wr_buf;
328 memset(testllc, 0, sizeof(*testllc));
329 testllc->hd.common.type = SMC_LLC_TEST_LINK;
330 testllc->hd.length = sizeof(struct smc_llc_msg_test_link);
331 memcpy(testllc->user_data, user_data, sizeof(testllc->user_data));
332 /* send llc message */
333 rc = smc_wr_tx_send(link, pend);
337 struct smc_llc_send_work {
338 struct work_struct work;
339 struct smc_link *link;
341 union smc_llc_msg llcbuf;
344 /* worker that sends a prepared message */
345 static void smc_llc_send_message_work(struct work_struct *work)
347 struct smc_llc_send_work *llcwrk = container_of(work,
348 struct smc_llc_send_work, work);
349 struct smc_wr_tx_pend_priv *pend;
350 struct smc_wr_buf *wr_buf;
353 if (llcwrk->link->state == SMC_LNK_INACTIVE)
355 rc = smc_llc_add_pending_send(llcwrk->link, &wr_buf, &pend);
358 memcpy(wr_buf, &llcwrk->llcbuf, llcwrk->llclen);
359 smc_wr_tx_send(llcwrk->link, pend);
364 /* copy llcbuf and schedule an llc send on link */
365 static int smc_llc_send_message(struct smc_link *link, void *llcbuf, int llclen)
367 struct smc_llc_send_work *wrk = kmalloc(sizeof(*wrk), GFP_ATOMIC);
371 INIT_WORK(&wrk->work, smc_llc_send_message_work);
373 wrk->llclen = llclen;
374 memcpy(&wrk->llcbuf, llcbuf, llclen);
375 queue_work(link->llc_wq, &wrk->work);
379 /********************************* receive ***********************************/
381 static void smc_llc_rx_confirm_link(struct smc_link *link,
382 struct smc_llc_msg_confirm_link *llc)
384 struct smc_link_group *lgr;
387 lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]);
389 /* RMBE eyecatchers are not supported */
390 if (llc->hd.flags & SMC_LLC_FLAG_NO_RMBE_EYEC)
395 if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
396 if (lgr->role == SMC_SERV &&
397 link->state == SMC_LNK_ACTIVATING) {
398 link->llc_confirm_resp_rc = conf_rc;
399 complete(&link->llc_confirm_resp);
402 if (lgr->role == SMC_CLNT &&
403 link->state == SMC_LNK_ACTIVATING) {
404 link->llc_confirm_rc = conf_rc;
405 link->link_id = llc->link_num;
406 complete(&link->llc_confirm);
411 static void smc_llc_rx_add_link(struct smc_link *link,
412 struct smc_llc_msg_add_link *llc)
414 struct smc_link_group *lgr = container_of(link, struct smc_link_group,
415 lnk[SMC_SINGLE_LINK]);
417 if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
418 if (link->state == SMC_LNK_ACTIVATING)
419 complete(&link->llc_add_resp);
421 if (link->state == SMC_LNK_ACTIVATING) {
422 complete(&link->llc_add);
426 if (lgr->role == SMC_SERV) {
427 smc_llc_prep_add_link(llc, link,
428 link->smcibdev->mac[link->ibport - 1],
429 &link->smcibdev->gid[link->ibport - 1],
433 smc_llc_prep_add_link(llc, link,
434 link->smcibdev->mac[link->ibport - 1],
435 &link->smcibdev->gid[link->ibport - 1],
438 smc_llc_send_message(link, llc, sizeof(*llc));
442 static void smc_llc_rx_delete_link(struct smc_link *link,
443 struct smc_llc_msg_del_link *llc)
445 struct smc_link_group *lgr = container_of(link, struct smc_link_group,
446 lnk[SMC_SINGLE_LINK]);
448 if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
449 if (lgr->role == SMC_SERV)
450 smc_lgr_terminate(lgr);
452 if (lgr->role == SMC_SERV) {
454 smc_llc_prep_delete_link(llc, link, SMC_LLC_REQ);
455 smc_llc_send_message(link, llc, sizeof(*llc));
457 smc_llc_prep_delete_link(llc, link, SMC_LLC_RESP);
458 smc_llc_send_message(link, llc, sizeof(*llc));
459 smc_lgr_terminate(lgr);
464 static void smc_llc_rx_test_link(struct smc_link *link,
465 struct smc_llc_msg_test_link *llc)
467 if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
468 if (link->state == SMC_LNK_ACTIVE)
469 complete(&link->llc_testlink_resp);
471 llc->hd.flags |= SMC_LLC_FLAG_RESP;
472 smc_llc_send_message(link, llc, sizeof(*llc));
476 static void smc_llc_rx_confirm_rkey(struct smc_link *link,
477 struct smc_llc_msg_confirm_rkey *llc)
479 struct smc_link_group *lgr;
482 lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]);
484 if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
485 link->llc_confirm_rkey_rc = llc->hd.flags &
486 SMC_LLC_FLAG_RKEY_NEG;
487 complete(&link->llc_confirm_rkey);
489 rc = smc_rtoken_add(lgr,
490 llc->rtoken[0].rmb_vaddr,
491 llc->rtoken[0].rmb_key);
493 /* ignore rtokens for other links, we have only one link */
495 llc->hd.flags |= SMC_LLC_FLAG_RESP;
497 llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG;
498 smc_llc_send_message(link, llc, sizeof(*llc));
502 static void smc_llc_rx_confirm_rkey_cont(struct smc_link *link,
503 struct smc_llc_msg_confirm_rkey_cont *llc)
505 if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
506 /* unused as long as we don't send this type of msg */
508 /* ignore rtokens for other links, we have only one link */
509 llc->hd.flags |= SMC_LLC_FLAG_RESP;
510 smc_llc_send_message(link, llc, sizeof(*llc));
514 static void smc_llc_rx_delete_rkey(struct smc_link *link,
515 struct smc_llc_msg_delete_rkey *llc)
517 struct smc_link_group *lgr;
521 lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]);
523 if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
524 /* unused as long as we don't send this type of msg */
526 max = min_t(u8, llc->num_rkeys, SMC_LLC_DEL_RKEY_MAX);
527 for (i = 0; i < max; i++) {
528 if (smc_rtoken_delete(lgr, llc->rkey[i]))
529 err_mask |= 1 << (SMC_LLC_DEL_RKEY_MAX - 1 - i);
533 llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG;
534 llc->err_mask = err_mask;
537 llc->hd.flags |= SMC_LLC_FLAG_RESP;
538 smc_llc_send_message(link, llc, sizeof(*llc));
542 static void smc_llc_rx_handler(struct ib_wc *wc, void *buf)
544 struct smc_link *link = (struct smc_link *)wc->qp->qp_context;
545 union smc_llc_msg *llc = buf;
547 if (wc->byte_len < sizeof(*llc))
548 return; /* short message */
549 if (llc->raw.hdr.length != sizeof(*llc))
550 return; /* invalid message */
551 if (link->state == SMC_LNK_INACTIVE)
552 return; /* link not active, drop msg */
554 switch (llc->raw.hdr.common.type) {
555 case SMC_LLC_TEST_LINK:
556 smc_llc_rx_test_link(link, &llc->test_link);
558 case SMC_LLC_CONFIRM_LINK:
559 smc_llc_rx_confirm_link(link, &llc->confirm_link);
561 case SMC_LLC_ADD_LINK:
562 smc_llc_rx_add_link(link, &llc->add_link);
564 case SMC_LLC_DELETE_LINK:
565 smc_llc_rx_delete_link(link, &llc->delete_link);
567 case SMC_LLC_CONFIRM_RKEY:
568 smc_llc_rx_confirm_rkey(link, &llc->confirm_rkey);
570 case SMC_LLC_CONFIRM_RKEY_CONT:
571 smc_llc_rx_confirm_rkey_cont(link, &llc->confirm_rkey_cont);
573 case SMC_LLC_DELETE_RKEY:
574 smc_llc_rx_delete_rkey(link, &llc->delete_rkey);
579 /***************************** worker, utils *********************************/
581 static void smc_llc_testlink_work(struct work_struct *work)
583 struct smc_link *link = container_of(to_delayed_work(work),
584 struct smc_link, llc_testlink_wrk);
585 unsigned long next_interval;
586 struct smc_link_group *lgr;
587 unsigned long expire_time;
588 u8 user_data[16] = { 0 };
591 lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]);
592 if (link->state != SMC_LNK_ACTIVE)
593 return; /* don't reschedule worker */
594 expire_time = link->wr_rx_tstamp + link->llc_testlink_time;
595 if (time_is_after_jiffies(expire_time)) {
596 next_interval = expire_time - jiffies;
599 reinit_completion(&link->llc_testlink_resp);
600 smc_llc_send_test_link(link, user_data);
601 /* receive TEST LINK response over RoCE fabric */
602 rc = wait_for_completion_interruptible_timeout(&link->llc_testlink_resp,
605 smc_lgr_terminate(lgr);
608 next_interval = link->llc_testlink_time;
610 queue_delayed_work(link->llc_wq, &link->llc_testlink_wrk,
614 int smc_llc_link_init(struct smc_link *link)
616 struct smc_link_group *lgr = container_of(link, struct smc_link_group,
617 lnk[SMC_SINGLE_LINK]);
618 link->llc_wq = alloc_ordered_workqueue("llc_wq-%x:%x)", WQ_MEM_RECLAIM,
623 init_completion(&link->llc_confirm);
624 init_completion(&link->llc_confirm_resp);
625 init_completion(&link->llc_add);
626 init_completion(&link->llc_add_resp);
627 init_completion(&link->llc_confirm_rkey);
628 init_completion(&link->llc_testlink_resp);
629 INIT_DELAYED_WORK(&link->llc_testlink_wrk, smc_llc_testlink_work);
633 void smc_llc_link_active(struct smc_link *link, int testlink_time)
635 link->state = SMC_LNK_ACTIVE;
637 link->llc_testlink_time = testlink_time * HZ;
638 queue_delayed_work(link->llc_wq, &link->llc_testlink_wrk,
639 link->llc_testlink_time);
643 /* called in tasklet context */
644 void smc_llc_link_inactive(struct smc_link *link)
646 link->state = SMC_LNK_INACTIVE;
647 cancel_delayed_work(&link->llc_testlink_wrk);
650 /* called in worker context */
651 void smc_llc_link_clear(struct smc_link *link)
653 flush_workqueue(link->llc_wq);
654 destroy_workqueue(link->llc_wq);
657 /* register a new rtoken at the remote peer */
658 int smc_llc_do_confirm_rkey(struct smc_link *link,
659 struct smc_buf_desc *rmb_desc)
663 reinit_completion(&link->llc_confirm_rkey);
664 smc_llc_send_confirm_rkey(link, rmb_desc);
665 /* receive CONFIRM RKEY response from server over RoCE fabric */
666 rc = wait_for_completion_interruptible_timeout(&link->llc_confirm_rkey,
668 if (rc <= 0 || link->llc_confirm_rkey_rc)
673 /***************************** init, exit, misc ******************************/
675 static struct smc_wr_rx_handler smc_llc_rx_handlers[] = {
677 .handler = smc_llc_rx_handler,
678 .type = SMC_LLC_CONFIRM_LINK
681 .handler = smc_llc_rx_handler,
682 .type = SMC_LLC_TEST_LINK
685 .handler = smc_llc_rx_handler,
686 .type = SMC_LLC_ADD_LINK
689 .handler = smc_llc_rx_handler,
690 .type = SMC_LLC_DELETE_LINK
693 .handler = smc_llc_rx_handler,
694 .type = SMC_LLC_CONFIRM_RKEY
697 .handler = smc_llc_rx_handler,
698 .type = SMC_LLC_CONFIRM_RKEY_CONT
701 .handler = smc_llc_rx_handler,
702 .type = SMC_LLC_DELETE_RKEY
709 int __init smc_llc_init(void)
711 struct smc_wr_rx_handler *handler;
714 for (handler = smc_llc_rx_handlers; handler->handler; handler++) {
715 INIT_HLIST_NODE(&handler->list);
716 rc = smc_wr_rx_register_handler(handler);