1 // SPDX-License-Identifier: GPL-2.0
3 * Shared Memory Communications over RDMA (SMC-R) and RoCE
5 * Link Layer Control (LLC)
7 * Copyright IBM Corp. 2016
14 #include <rdma/ib_verbs.h>
22 #define SMC_LLC_DATA_LEN 40
25 struct smc_wr_rx_hdr common;
27 #if defined(__BIG_ENDIAN_BITFIELD)
30 #elif defined(__LITTLE_ENDIAN_BITFIELD)
31 u8 add_link_rej_rsn:4,
37 #define SMC_LLC_FLAG_NO_RMBE_EYEC 0x03
39 struct smc_llc_msg_confirm_link { /* type 0x01 */
40 struct smc_llc_hdr hd;
41 u8 sender_mac[ETH_ALEN];
42 u8 sender_gid[SMC_GID_SIZE];
45 u8 link_uid[SMC_LGR_ID_SIZE];
50 #define SMC_LLC_FLAG_ADD_LNK_REJ 0x40
51 #define SMC_LLC_REJ_RSN_NO_ALT_PATH 1
53 #define SMC_LLC_ADD_LNK_MAX_LINKS 2
55 struct smc_llc_msg_add_link { /* type 0x02 */
56 struct smc_llc_hdr hd;
57 u8 sender_mac[ETH_ALEN];
59 u8 sender_gid[SMC_GID_SIZE];
62 #if defined(__BIG_ENDIAN_BITFIELD)
65 #elif defined(__LITTLE_ENDIAN_BITFIELD)
73 struct smc_llc_msg_add_link_cont_rt {
79 #define SMC_LLC_RKEYS_PER_CONT_MSG 2
81 struct smc_llc_msg_add_link_cont { /* type 0x03 */
82 struct smc_llc_hdr hd;
86 struct smc_llc_msg_add_link_cont_rt rt[SMC_LLC_RKEYS_PER_CONT_MSG];
88 } __packed; /* format defined in RFC7609 */
90 #define SMC_LLC_FLAG_DEL_LINK_ALL 0x40
91 #define SMC_LLC_FLAG_DEL_LINK_ORDERLY 0x20
93 struct smc_llc_msg_del_link { /* type 0x04 */
94 struct smc_llc_hdr hd;
98 } __packed; /* format defined in RFC7609 */
100 struct smc_llc_msg_test_link { /* type 0x07 */
101 struct smc_llc_hdr hd;
106 struct smc_rmb_rtoken {
108 u8 num_rkeys; /* first rtoken byte of CONFIRM LINK msg */
109 /* is actually the num of rtokens, first */
110 /* rtoken is always for the current link */
111 u8 link_id; /* link id of the rtoken */
115 } __packed; /* format defined in RFC7609 */
117 #define SMC_LLC_RKEYS_PER_MSG 3
119 struct smc_llc_msg_confirm_rkey { /* type 0x06 */
120 struct smc_llc_hdr hd;
121 struct smc_rmb_rtoken rtoken[SMC_LLC_RKEYS_PER_MSG];
125 #define SMC_LLC_DEL_RKEY_MAX 8
126 #define SMC_LLC_FLAG_RKEY_RETRY 0x10
127 #define SMC_LLC_FLAG_RKEY_NEG 0x20
129 struct smc_llc_msg_delete_rkey { /* type 0x09 */
130 struct smc_llc_hdr hd;
139 struct smc_llc_msg_confirm_link confirm_link;
140 struct smc_llc_msg_add_link add_link;
141 struct smc_llc_msg_add_link_cont add_link_cont;
142 struct smc_llc_msg_del_link delete_link;
144 struct smc_llc_msg_confirm_rkey confirm_rkey;
145 struct smc_llc_msg_delete_rkey delete_rkey;
147 struct smc_llc_msg_test_link test_link;
149 struct smc_llc_hdr hdr;
150 u8 data[SMC_LLC_DATA_LEN];
154 #define SMC_LLC_FLAG_RESP 0x80
156 struct smc_llc_qentry {
157 struct list_head list;
158 struct smc_link *link;
159 union smc_llc_msg msg;
162 static void smc_llc_enqueue(struct smc_link *link, union smc_llc_msg *llc);
164 struct smc_llc_qentry *smc_llc_flow_qentry_clr(struct smc_llc_flow *flow)
166 struct smc_llc_qentry *qentry = flow->qentry;
172 void smc_llc_flow_qentry_del(struct smc_llc_flow *flow)
174 struct smc_llc_qentry *qentry;
177 qentry = flow->qentry;
183 static inline void smc_llc_flow_qentry_set(struct smc_llc_flow *flow,
184 struct smc_llc_qentry *qentry)
186 flow->qentry = qentry;
189 /* try to start a new llc flow, initiated by an incoming llc msg */
190 static bool smc_llc_flow_start(struct smc_llc_flow *flow,
191 struct smc_llc_qentry *qentry)
193 struct smc_link_group *lgr = qentry->link->lgr;
195 spin_lock_bh(&lgr->llc_flow_lock);
197 /* a flow is already active */
198 if ((qentry->msg.raw.hdr.common.type == SMC_LLC_ADD_LINK ||
199 qentry->msg.raw.hdr.common.type == SMC_LLC_DELETE_LINK) &&
200 !lgr->delayed_event) {
201 lgr->delayed_event = qentry;
203 /* forget this llc request */
206 spin_unlock_bh(&lgr->llc_flow_lock);
209 switch (qentry->msg.raw.hdr.common.type) {
210 case SMC_LLC_ADD_LINK:
211 flow->type = SMC_LLC_FLOW_ADD_LINK;
213 case SMC_LLC_DELETE_LINK:
214 flow->type = SMC_LLC_FLOW_DEL_LINK;
216 case SMC_LLC_CONFIRM_RKEY:
217 case SMC_LLC_DELETE_RKEY:
218 flow->type = SMC_LLC_FLOW_RKEY;
221 flow->type = SMC_LLC_FLOW_NONE;
223 if (qentry == lgr->delayed_event)
224 lgr->delayed_event = NULL;
225 spin_unlock_bh(&lgr->llc_flow_lock);
226 smc_llc_flow_qentry_set(flow, qentry);
230 /* start a new local llc flow, wait till current flow finished */
231 int smc_llc_flow_initiate(struct smc_link_group *lgr,
232 enum smc_llc_flowtype type)
234 enum smc_llc_flowtype allowed_remote = SMC_LLC_FLOW_NONE;
237 /* all flows except confirm_rkey and delete_rkey are exclusive,
238 * confirm/delete rkey flows can run concurrently (local and remote)
240 if (type == SMC_LLC_FLOW_RKEY)
241 allowed_remote = SMC_LLC_FLOW_RKEY;
243 if (list_empty(&lgr->list))
245 spin_lock_bh(&lgr->llc_flow_lock);
246 if (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE &&
247 (lgr->llc_flow_rmt.type == SMC_LLC_FLOW_NONE ||
248 lgr->llc_flow_rmt.type == allowed_remote)) {
249 lgr->llc_flow_lcl.type = type;
250 spin_unlock_bh(&lgr->llc_flow_lock);
253 spin_unlock_bh(&lgr->llc_flow_lock);
254 rc = wait_event_interruptible_timeout(lgr->llc_waiter,
255 (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE &&
256 (lgr->llc_flow_rmt.type == SMC_LLC_FLOW_NONE ||
257 lgr->llc_flow_rmt.type == allowed_remote)),
264 /* finish the current llc flow */
265 void smc_llc_flow_stop(struct smc_link_group *lgr, struct smc_llc_flow *flow)
267 spin_lock_bh(&lgr->llc_flow_lock);
268 memset(flow, 0, sizeof(*flow));
269 flow->type = SMC_LLC_FLOW_NONE;
270 spin_unlock_bh(&lgr->llc_flow_lock);
271 if (!list_empty(&lgr->list) && lgr->delayed_event &&
272 flow == &lgr->llc_flow_lcl)
273 schedule_work(&lgr->llc_event_work);
275 wake_up_interruptible(&lgr->llc_waiter);
278 /* lnk is optional and used for early wakeup when link goes down, useful in
279 * cases where we wait for a response on the link after we sent a request
281 struct smc_llc_qentry *smc_llc_wait(struct smc_link_group *lgr,
282 struct smc_link *lnk,
283 int time_out, u8 exp_msg)
285 struct smc_llc_flow *flow = &lgr->llc_flow_lcl;
287 wait_event_interruptible_timeout(lgr->llc_waiter,
289 (lnk && !smc_link_usable(lnk)) ||
290 list_empty(&lgr->list)),
293 (lnk && !smc_link_usable(lnk)) || list_empty(&lgr->list)) {
294 smc_llc_flow_qentry_del(flow);
297 if (exp_msg && flow->qentry->msg.raw.hdr.common.type != exp_msg) {
298 if (exp_msg == SMC_LLC_ADD_LINK &&
299 flow->qentry->msg.raw.hdr.common.type ==
300 SMC_LLC_DELETE_LINK) {
301 /* flow_start will delay the unexpected msg */
302 smc_llc_flow_start(&lgr->llc_flow_lcl,
303 smc_llc_flow_qentry_clr(flow));
306 smc_llc_flow_qentry_del(flow);
312 /********************************** send *************************************/
314 struct smc_llc_tx_pend {
317 /* handler for send/transmission completion of an LLC msg */
318 static void smc_llc_tx_handler(struct smc_wr_tx_pend_priv *pend,
319 struct smc_link *link,
320 enum ib_wc_status wc_status)
322 /* future work: handle wc_status error for recovery and failover */
326 * smc_llc_add_pending_send() - add LLC control message to pending WQE transmits
327 * @link: Pointer to SMC link used for sending LLC control message.
328 * @wr_buf: Out variable returning pointer to work request payload buffer.
329 * @pend: Out variable returning pointer to private pending WR tracking.
330 * It's the context the transmit complete handler will get.
332 * Reserves and pre-fills an entry for a pending work request send/tx.
333 * Used by mid-level smc_llc_send_msg() to prepare for later actual send/tx.
334 * Can sleep due to smc_get_ctrl_buf (if not in softirq context).
336 * Return: 0 on success, otherwise an error value.
338 static int smc_llc_add_pending_send(struct smc_link *link,
339 struct smc_wr_buf **wr_buf,
340 struct smc_wr_tx_pend_priv **pend)
344 rc = smc_wr_tx_get_free_slot(link, smc_llc_tx_handler, wr_buf, NULL,
349 sizeof(union smc_llc_msg) > SMC_WR_BUF_SIZE,
350 "must increase SMC_WR_BUF_SIZE to at least sizeof(struct smc_llc_msg)");
352 sizeof(union smc_llc_msg) != SMC_WR_TX_SIZE,
353 "must adapt SMC_WR_TX_SIZE to sizeof(struct smc_llc_msg); if not all smc_wr upper layer protocols use the same message size any more, must start to set link->wr_tx_sges[i].length on each individual smc_wr_tx_send()");
355 sizeof(struct smc_llc_tx_pend) > SMC_WR_TX_PEND_PRIV_SIZE,
356 "must increase SMC_WR_TX_PEND_PRIV_SIZE to at least sizeof(struct smc_llc_tx_pend)");
360 /* high-level API to send LLC confirm link */
361 int smc_llc_send_confirm_link(struct smc_link *link,
362 enum smc_llc_reqresp reqresp)
364 struct smc_llc_msg_confirm_link *confllc;
365 struct smc_wr_tx_pend_priv *pend;
366 struct smc_wr_buf *wr_buf;
369 rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
372 confllc = (struct smc_llc_msg_confirm_link *)wr_buf;
373 memset(confllc, 0, sizeof(*confllc));
374 confllc->hd.common.type = SMC_LLC_CONFIRM_LINK;
375 confllc->hd.length = sizeof(struct smc_llc_msg_confirm_link);
376 confllc->hd.flags |= SMC_LLC_FLAG_NO_RMBE_EYEC;
377 if (reqresp == SMC_LLC_RESP)
378 confllc->hd.flags |= SMC_LLC_FLAG_RESP;
379 memcpy(confllc->sender_mac, link->smcibdev->mac[link->ibport - 1],
381 memcpy(confllc->sender_gid, link->gid, SMC_GID_SIZE);
382 hton24(confllc->sender_qp_num, link->roce_qp->qp_num);
383 confllc->link_num = link->link_id;
384 memcpy(confllc->link_uid, link->link_uid, SMC_LGR_ID_SIZE);
385 confllc->max_links = SMC_LLC_ADD_LNK_MAX_LINKS;
386 /* send llc message */
387 rc = smc_wr_tx_send(link, pend);
391 /* send LLC confirm rkey request */
392 static int smc_llc_send_confirm_rkey(struct smc_link *send_link,
393 struct smc_buf_desc *rmb_desc)
395 struct smc_llc_msg_confirm_rkey *rkeyllc;
396 struct smc_wr_tx_pend_priv *pend;
397 struct smc_wr_buf *wr_buf;
398 struct smc_link *link;
401 rc = smc_llc_add_pending_send(send_link, &wr_buf, &pend);
404 rkeyllc = (struct smc_llc_msg_confirm_rkey *)wr_buf;
405 memset(rkeyllc, 0, sizeof(*rkeyllc));
406 rkeyllc->hd.common.type = SMC_LLC_CONFIRM_RKEY;
407 rkeyllc->hd.length = sizeof(struct smc_llc_msg_confirm_rkey);
410 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
411 link = &send_link->lgr->lnk[i];
412 if (link->state == SMC_LNK_ACTIVE && link != send_link) {
413 rkeyllc->rtoken[rtok_ix].link_id = link->link_id;
414 rkeyllc->rtoken[rtok_ix].rmb_key =
415 htonl(rmb_desc->mr_rx[link->link_idx]->rkey);
416 rkeyllc->rtoken[rtok_ix].rmb_vaddr = cpu_to_be64(
418 rmb_desc->sgt[link->link_idx].sgl));
422 /* rkey of send_link is in rtoken[0] */
423 rkeyllc->rtoken[0].num_rkeys = rtok_ix - 1;
424 rkeyllc->rtoken[0].rmb_key =
425 htonl(rmb_desc->mr_rx[send_link->link_idx]->rkey);
426 rkeyllc->rtoken[0].rmb_vaddr = cpu_to_be64(
427 (u64)sg_dma_address(rmb_desc->sgt[send_link->link_idx].sgl));
428 /* send llc message */
429 rc = smc_wr_tx_send(send_link, pend);
433 /* send LLC delete rkey request */
434 static int smc_llc_send_delete_rkey(struct smc_link *link,
435 struct smc_buf_desc *rmb_desc)
437 struct smc_llc_msg_delete_rkey *rkeyllc;
438 struct smc_wr_tx_pend_priv *pend;
439 struct smc_wr_buf *wr_buf;
442 rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
445 rkeyllc = (struct smc_llc_msg_delete_rkey *)wr_buf;
446 memset(rkeyllc, 0, sizeof(*rkeyllc));
447 rkeyllc->hd.common.type = SMC_LLC_DELETE_RKEY;
448 rkeyllc->hd.length = sizeof(struct smc_llc_msg_delete_rkey);
449 rkeyllc->num_rkeys = 1;
450 rkeyllc->rkey[0] = htonl(rmb_desc->mr_rx[link->link_idx]->rkey);
451 /* send llc message */
452 rc = smc_wr_tx_send(link, pend);
456 /* send ADD LINK request or response */
457 int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[],
458 struct smc_link *link_new,
459 enum smc_llc_reqresp reqresp)
461 struct smc_llc_msg_add_link *addllc;
462 struct smc_wr_tx_pend_priv *pend;
463 struct smc_wr_buf *wr_buf;
466 rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
469 addllc = (struct smc_llc_msg_add_link *)wr_buf;
471 memset(addllc, 0, sizeof(*addllc));
472 addllc->hd.common.type = SMC_LLC_ADD_LINK;
473 addllc->hd.length = sizeof(struct smc_llc_msg_add_link);
474 if (reqresp == SMC_LLC_RESP)
475 addllc->hd.flags |= SMC_LLC_FLAG_RESP;
476 memcpy(addllc->sender_mac, mac, ETH_ALEN);
477 memcpy(addllc->sender_gid, gid, SMC_GID_SIZE);
479 addllc->link_num = link_new->link_id;
480 hton24(addllc->sender_qp_num, link_new->roce_qp->qp_num);
481 hton24(addllc->initial_psn, link_new->psn_initial);
482 if (reqresp == SMC_LLC_REQ)
483 addllc->qp_mtu = link_new->path_mtu;
485 addllc->qp_mtu = min(link_new->path_mtu,
488 /* send llc message */
489 rc = smc_wr_tx_send(link, pend);
493 /* send DELETE LINK request or response */
494 int smc_llc_send_delete_link(struct smc_link *link, u8 link_del_id,
495 enum smc_llc_reqresp reqresp, bool orderly,
498 struct smc_llc_msg_del_link *delllc;
499 struct smc_wr_tx_pend_priv *pend;
500 struct smc_wr_buf *wr_buf;
503 rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
506 delllc = (struct smc_llc_msg_del_link *)wr_buf;
508 memset(delllc, 0, sizeof(*delllc));
509 delllc->hd.common.type = SMC_LLC_DELETE_LINK;
510 delllc->hd.length = sizeof(struct smc_llc_msg_del_link);
511 if (reqresp == SMC_LLC_RESP)
512 delllc->hd.flags |= SMC_LLC_FLAG_RESP;
514 delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY;
516 delllc->link_num = link_del_id;
518 delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ALL;
519 delllc->reason = htonl(reason);
520 /* send llc message */
521 rc = smc_wr_tx_send(link, pend);
525 /* send LLC test link request */
526 static int smc_llc_send_test_link(struct smc_link *link, u8 user_data[16])
528 struct smc_llc_msg_test_link *testllc;
529 struct smc_wr_tx_pend_priv *pend;
530 struct smc_wr_buf *wr_buf;
533 rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
536 testllc = (struct smc_llc_msg_test_link *)wr_buf;
537 memset(testllc, 0, sizeof(*testllc));
538 testllc->hd.common.type = SMC_LLC_TEST_LINK;
539 testllc->hd.length = sizeof(struct smc_llc_msg_test_link);
540 memcpy(testllc->user_data, user_data, sizeof(testllc->user_data));
541 /* send llc message */
542 rc = smc_wr_tx_send(link, pend);
546 /* schedule an llc send on link, may wait for buffers */
547 static int smc_llc_send_message(struct smc_link *link, void *llcbuf)
549 struct smc_wr_tx_pend_priv *pend;
550 struct smc_wr_buf *wr_buf;
553 if (!smc_link_usable(link))
555 rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
558 memcpy(wr_buf, llcbuf, sizeof(union smc_llc_msg));
559 return smc_wr_tx_send(link, pend);
562 /* schedule an llc send on link, may wait for buffers,
563 * and wait for send completion notification.
564 * @return 0 on success
566 static int smc_llc_send_message_wait(struct smc_link *link, void *llcbuf)
568 struct smc_wr_tx_pend_priv *pend;
569 struct smc_wr_buf *wr_buf;
572 if (!smc_link_usable(link))
574 rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
577 memcpy(wr_buf, llcbuf, sizeof(union smc_llc_msg));
578 return smc_wr_tx_send_wait(link, pend, SMC_LLC_WAIT_TIME);
581 /********************************* receive ***********************************/
583 static int smc_llc_alloc_alt_link(struct smc_link_group *lgr,
584 enum smc_lgr_type lgr_new_t)
588 if (lgr->type == SMC_LGR_SYMMETRIC ||
589 (lgr->type != SMC_LGR_SINGLE &&
590 (lgr_new_t == SMC_LGR_ASYMMETRIC_LOCAL ||
591 lgr_new_t == SMC_LGR_ASYMMETRIC_PEER)))
594 if (lgr_new_t == SMC_LGR_ASYMMETRIC_LOCAL ||
595 lgr_new_t == SMC_LGR_ASYMMETRIC_PEER) {
596 for (i = SMC_LINKS_PER_LGR_MAX - 1; i >= 0; i--)
597 if (lgr->lnk[i].state == SMC_LNK_UNUSED)
600 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++)
601 if (lgr->lnk[i].state == SMC_LNK_UNUSED)
607 /* return first buffer from any of the next buf lists */
608 static struct smc_buf_desc *_smc_llc_get_next_rmb(struct smc_link_group *lgr,
611 struct smc_buf_desc *buf_pos;
613 while (*buf_lst < SMC_RMBE_SIZES) {
614 buf_pos = list_first_entry_or_null(&lgr->rmbs[*buf_lst],
615 struct smc_buf_desc, list);
623 /* return next rmb from buffer lists */
624 static struct smc_buf_desc *smc_llc_get_next_rmb(struct smc_link_group *lgr,
626 struct smc_buf_desc *buf_pos)
628 struct smc_buf_desc *buf_next;
630 if (!buf_pos || list_is_last(&buf_pos->list, &lgr->rmbs[*buf_lst])) {
632 return _smc_llc_get_next_rmb(lgr, buf_lst);
634 buf_next = list_next_entry(buf_pos, list);
638 static struct smc_buf_desc *smc_llc_get_first_rmb(struct smc_link_group *lgr,
642 return smc_llc_get_next_rmb(lgr, buf_lst, NULL);
645 /* send one add_link_continue msg */
646 static int smc_llc_add_link_cont(struct smc_link *link,
647 struct smc_link *link_new, u8 *num_rkeys_todo,
648 int *buf_lst, struct smc_buf_desc **buf_pos)
650 struct smc_llc_msg_add_link_cont *addc_llc;
651 struct smc_link_group *lgr = link->lgr;
652 int prim_lnk_idx, lnk_idx, i, rc;
653 struct smc_wr_tx_pend_priv *pend;
654 struct smc_wr_buf *wr_buf;
655 struct smc_buf_desc *rmb;
658 rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
661 addc_llc = (struct smc_llc_msg_add_link_cont *)wr_buf;
662 memset(addc_llc, 0, sizeof(*addc_llc));
664 prim_lnk_idx = link->link_idx;
665 lnk_idx = link_new->link_idx;
666 addc_llc->link_num = link_new->link_id;
667 addc_llc->num_rkeys = *num_rkeys_todo;
669 for (i = 0; i < min_t(u8, n, SMC_LLC_RKEYS_PER_CONT_MSG); i++) {
671 addc_llc->num_rkeys = addc_llc->num_rkeys -
678 addc_llc->rt[i].rmb_key = htonl(rmb->mr_rx[prim_lnk_idx]->rkey);
679 addc_llc->rt[i].rmb_key_new = htonl(rmb->mr_rx[lnk_idx]->rkey);
680 addc_llc->rt[i].rmb_vaddr_new =
681 cpu_to_be64((u64)sg_dma_address(rmb->sgt[lnk_idx].sgl));
684 *buf_pos = smc_llc_get_next_rmb(lgr, buf_lst, *buf_pos);
685 while (*buf_pos && !(*buf_pos)->used)
686 *buf_pos = smc_llc_get_next_rmb(lgr, buf_lst, *buf_pos);
688 addc_llc->hd.common.type = SMC_LLC_ADD_LINK_CONT;
689 addc_llc->hd.length = sizeof(struct smc_llc_msg_add_link_cont);
690 if (lgr->role == SMC_CLNT)
691 addc_llc->hd.flags |= SMC_LLC_FLAG_RESP;
692 return smc_wr_tx_send(link, pend);
695 static int smc_llc_cli_rkey_exchange(struct smc_link *link,
696 struct smc_link *link_new)
698 struct smc_llc_msg_add_link_cont *addc_llc;
699 struct smc_link_group *lgr = link->lgr;
700 u8 max, num_rkeys_send, num_rkeys_recv;
701 struct smc_llc_qentry *qentry;
702 struct smc_buf_desc *buf_pos;
707 mutex_lock(&lgr->rmbs_lock);
708 num_rkeys_send = lgr->conns_num;
709 buf_pos = smc_llc_get_first_rmb(lgr, &buf_lst);
711 qentry = smc_llc_wait(lgr, NULL, SMC_LLC_WAIT_TIME,
712 SMC_LLC_ADD_LINK_CONT);
717 addc_llc = &qentry->msg.add_link_cont;
718 num_rkeys_recv = addc_llc->num_rkeys;
719 max = min_t(u8, num_rkeys_recv, SMC_LLC_RKEYS_PER_CONT_MSG);
720 for (i = 0; i < max; i++) {
721 smc_rtoken_set(lgr, link->link_idx, link_new->link_idx,
722 addc_llc->rt[i].rmb_key,
723 addc_llc->rt[i].rmb_vaddr_new,
724 addc_llc->rt[i].rmb_key_new);
727 smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
728 rc = smc_llc_add_link_cont(link, link_new, &num_rkeys_send,
732 } while (num_rkeys_send || num_rkeys_recv);
734 mutex_unlock(&lgr->rmbs_lock);
738 /* prepare and send an add link reject response */
739 static int smc_llc_cli_add_link_reject(struct smc_llc_qentry *qentry)
741 qentry->msg.raw.hdr.flags |= SMC_LLC_FLAG_RESP;
742 qentry->msg.raw.hdr.flags |= SMC_LLC_FLAG_ADD_LNK_REJ;
743 qentry->msg.raw.hdr.add_link_rej_rsn = SMC_LLC_REJ_RSN_NO_ALT_PATH;
744 return smc_llc_send_message(qentry->link, &qentry->msg);
747 static int smc_llc_cli_conf_link(struct smc_link *link,
748 struct smc_init_info *ini,
749 struct smc_link *link_new,
750 enum smc_lgr_type lgr_new_t)
752 struct smc_link_group *lgr = link->lgr;
753 struct smc_llc_qentry *qentry = NULL;
756 /* receive CONFIRM LINK request over RoCE fabric */
757 qentry = smc_llc_wait(lgr, NULL, SMC_LLC_WAIT_FIRST_TIME, 0);
759 rc = smc_llc_send_delete_link(link, link_new->link_id,
761 SMC_LLC_DEL_LOST_PATH);
764 if (qentry->msg.raw.hdr.common.type != SMC_LLC_CONFIRM_LINK) {
765 /* received DELETE_LINK instead */
766 qentry->msg.raw.hdr.flags |= SMC_LLC_FLAG_RESP;
767 smc_llc_send_message(link, &qentry->msg);
768 smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
771 smc_llc_save_peer_uid(qentry);
772 smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
774 rc = smc_ib_modify_qp_rts(link_new);
776 smc_llc_send_delete_link(link, link_new->link_id, SMC_LLC_REQ,
777 false, SMC_LLC_DEL_LOST_PATH);
780 smc_wr_remember_qp_attr(link_new);
782 rc = smcr_buf_reg_lgr(link_new);
784 smc_llc_send_delete_link(link, link_new->link_id, SMC_LLC_REQ,
785 false, SMC_LLC_DEL_LOST_PATH);
789 /* send CONFIRM LINK response over RoCE fabric */
790 rc = smc_llc_send_confirm_link(link_new, SMC_LLC_RESP);
792 smc_llc_send_delete_link(link, link_new->link_id, SMC_LLC_REQ,
793 false, SMC_LLC_DEL_LOST_PATH);
796 smc_llc_link_active(link_new);
797 if (lgr_new_t == SMC_LGR_ASYMMETRIC_LOCAL ||
798 lgr_new_t == SMC_LGR_ASYMMETRIC_PEER)
799 smcr_lgr_set_type_asym(lgr, lgr_new_t, link_new->link_idx);
801 smcr_lgr_set_type(lgr, lgr_new_t);
805 static void smc_llc_save_add_link_info(struct smc_link *link,
806 struct smc_llc_msg_add_link *add_llc)
808 link->peer_qpn = ntoh24(add_llc->sender_qp_num);
809 memcpy(link->peer_gid, add_llc->sender_gid, SMC_GID_SIZE);
810 memcpy(link->peer_mac, add_llc->sender_mac, ETH_ALEN);
811 link->peer_psn = ntoh24(add_llc->initial_psn);
812 link->peer_mtu = add_llc->qp_mtu;
815 /* as an SMC client, process an add link request */
816 int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry)
818 struct smc_llc_msg_add_link *llc = &qentry->msg.add_link;
819 enum smc_lgr_type lgr_new_t = SMC_LGR_SYMMETRIC;
820 struct smc_link_group *lgr = smc_get_lgr(link);
821 struct smc_link *lnk_new = NULL;
822 struct smc_init_info ini;
825 ini.vlan_id = lgr->vlan_id;
826 smc_pnet_find_alt_roce(lgr, &ini, link->smcibdev);
827 if (!memcmp(llc->sender_gid, link->peer_gid, SMC_GID_SIZE) &&
828 !memcmp(llc->sender_mac, link->peer_mac, ETH_ALEN)) {
831 lgr_new_t = SMC_LGR_ASYMMETRIC_PEER;
834 lgr_new_t = SMC_LGR_ASYMMETRIC_LOCAL;
835 ini.ib_dev = link->smcibdev;
836 ini.ib_port = link->ibport;
838 lnk_idx = smc_llc_alloc_alt_link(lgr, lgr_new_t);
841 lnk_new = &lgr->lnk[lnk_idx];
842 rc = smcr_link_init(lgr, lnk_new, lnk_idx, &ini);
845 smc_llc_save_add_link_info(lnk_new, llc);
846 lnk_new->link_id = llc->link_num; /* SMC server assigns link id */
847 smc_llc_link_set_uid(lnk_new);
849 rc = smc_ib_ready_link(lnk_new);
853 rc = smcr_buf_map_lgr(lnk_new);
857 rc = smc_llc_send_add_link(link,
858 lnk_new->smcibdev->mac[ini.ib_port - 1],
859 lnk_new->gid, lnk_new, SMC_LLC_RESP);
862 rc = smc_llc_cli_rkey_exchange(link, lnk_new);
867 rc = smc_llc_cli_conf_link(link, &ini, lnk_new, lgr_new_t);
871 smcr_link_clear(lnk_new, false);
873 smc_llc_cli_add_link_reject(qentry);
879 static void smc_llc_process_cli_add_link(struct smc_link_group *lgr)
881 struct smc_llc_qentry *qentry;
883 qentry = smc_llc_flow_qentry_clr(&lgr->llc_flow_lcl);
885 mutex_lock(&lgr->llc_conf_mutex);
886 smc_llc_cli_add_link(qentry->link, qentry);
887 mutex_unlock(&lgr->llc_conf_mutex);
890 static int smc_llc_active_link_count(struct smc_link_group *lgr)
892 int i, link_count = 0;
894 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
895 if (!smc_link_usable(&lgr->lnk[i]))
902 /* find the asymmetric link when 3 links are established */
903 static struct smc_link *smc_llc_find_asym_link(struct smc_link_group *lgr)
905 int asym_idx = -ENOENT;
909 /* determine asymmetric link */
911 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
912 for (j = i + 1; j < SMC_LINKS_PER_LGR_MAX; j++) {
913 if (!smc_link_usable(&lgr->lnk[i]) ||
914 !smc_link_usable(&lgr->lnk[j]))
916 if (!memcmp(lgr->lnk[i].gid, lgr->lnk[j].gid,
918 found = true; /* asym_lnk is i or j */
926 goto out; /* no asymmetric link */
927 for (k = 0; k < SMC_LINKS_PER_LGR_MAX; k++) {
928 if (!smc_link_usable(&lgr->lnk[k]))
931 !memcmp(lgr->lnk[i].peer_gid, lgr->lnk[k].peer_gid,
937 !memcmp(lgr->lnk[j].peer_gid, lgr->lnk[k].peer_gid,
944 return (asym_idx < 0) ? NULL : &lgr->lnk[asym_idx];
947 static void smc_llc_delete_asym_link(struct smc_link_group *lgr)
949 struct smc_link *lnk_new = NULL, *lnk_asym;
950 struct smc_llc_qentry *qentry;
953 lnk_asym = smc_llc_find_asym_link(lgr);
955 return; /* no asymmetric link */
956 if (!smc_link_downing(&lnk_asym->state))
958 lnk_new = smc_switch_conns(lgr, lnk_asym, false);
959 smc_wr_tx_wait_no_pending_sends(lnk_asym);
962 /* change flow type from ADD_LINK into DEL_LINK */
963 lgr->llc_flow_lcl.type = SMC_LLC_FLOW_DEL_LINK;
964 rc = smc_llc_send_delete_link(lnk_new, lnk_asym->link_id, SMC_LLC_REQ,
965 true, SMC_LLC_DEL_NO_ASYM_NEEDED);
967 smcr_link_down_cond(lnk_new);
970 qentry = smc_llc_wait(lgr, lnk_new, SMC_LLC_WAIT_TIME,
971 SMC_LLC_DELETE_LINK);
973 smcr_link_down_cond(lnk_new);
976 smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
978 smcr_link_clear(lnk_asym, true);
981 static int smc_llc_srv_rkey_exchange(struct smc_link *link,
982 struct smc_link *link_new)
984 struct smc_llc_msg_add_link_cont *addc_llc;
985 struct smc_link_group *lgr = link->lgr;
986 u8 max, num_rkeys_send, num_rkeys_recv;
987 struct smc_llc_qentry *qentry = NULL;
988 struct smc_buf_desc *buf_pos;
993 mutex_lock(&lgr->rmbs_lock);
994 num_rkeys_send = lgr->conns_num;
995 buf_pos = smc_llc_get_first_rmb(lgr, &buf_lst);
997 smc_llc_add_link_cont(link, link_new, &num_rkeys_send,
999 qentry = smc_llc_wait(lgr, link, SMC_LLC_WAIT_TIME,
1000 SMC_LLC_ADD_LINK_CONT);
1005 addc_llc = &qentry->msg.add_link_cont;
1006 num_rkeys_recv = addc_llc->num_rkeys;
1007 max = min_t(u8, num_rkeys_recv, SMC_LLC_RKEYS_PER_CONT_MSG);
1008 for (i = 0; i < max; i++) {
1009 smc_rtoken_set(lgr, link->link_idx, link_new->link_idx,
1010 addc_llc->rt[i].rmb_key,
1011 addc_llc->rt[i].rmb_vaddr_new,
1012 addc_llc->rt[i].rmb_key_new);
1015 smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
1016 } while (num_rkeys_send || num_rkeys_recv);
1018 mutex_unlock(&lgr->rmbs_lock);
1022 static int smc_llc_srv_conf_link(struct smc_link *link,
1023 struct smc_link *link_new,
1024 enum smc_lgr_type lgr_new_t)
1026 struct smc_link_group *lgr = link->lgr;
1027 struct smc_llc_qentry *qentry = NULL;
1030 /* send CONFIRM LINK request over the RoCE fabric */
1031 rc = smc_llc_send_confirm_link(link_new, SMC_LLC_REQ);
1034 /* receive CONFIRM LINK response over the RoCE fabric */
1035 qentry = smc_llc_wait(lgr, link, SMC_LLC_WAIT_FIRST_TIME,
1036 SMC_LLC_CONFIRM_LINK);
1038 /* send DELETE LINK */
1039 smc_llc_send_delete_link(link, link_new->link_id, SMC_LLC_REQ,
1040 false, SMC_LLC_DEL_LOST_PATH);
1043 smc_llc_save_peer_uid(qentry);
1044 smc_llc_link_active(link_new);
1045 if (lgr_new_t == SMC_LGR_ASYMMETRIC_LOCAL ||
1046 lgr_new_t == SMC_LGR_ASYMMETRIC_PEER)
1047 smcr_lgr_set_type_asym(lgr, lgr_new_t, link_new->link_idx);
1049 smcr_lgr_set_type(lgr, lgr_new_t);
1050 smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
1054 int smc_llc_srv_add_link(struct smc_link *link)
1056 enum smc_lgr_type lgr_new_t = SMC_LGR_SYMMETRIC;
1057 struct smc_link_group *lgr = link->lgr;
1058 struct smc_llc_msg_add_link *add_llc;
1059 struct smc_llc_qentry *qentry = NULL;
1060 struct smc_link *link_new;
1061 struct smc_init_info ini;
1062 int lnk_idx, rc = 0;
1064 /* ignore client add link recommendation, start new flow */
1065 ini.vlan_id = lgr->vlan_id;
1066 smc_pnet_find_alt_roce(lgr, &ini, link->smcibdev);
1068 lgr_new_t = SMC_LGR_ASYMMETRIC_LOCAL;
1069 ini.ib_dev = link->smcibdev;
1070 ini.ib_port = link->ibport;
1072 lnk_idx = smc_llc_alloc_alt_link(lgr, lgr_new_t);
1076 rc = smcr_link_init(lgr, &lgr->lnk[lnk_idx], lnk_idx, &ini);
1079 link_new = &lgr->lnk[lnk_idx];
1080 rc = smc_llc_send_add_link(link,
1081 link_new->smcibdev->mac[ini.ib_port - 1],
1082 link_new->gid, link_new, SMC_LLC_REQ);
1085 /* receive ADD LINK response over the RoCE fabric */
1086 qentry = smc_llc_wait(lgr, link, SMC_LLC_WAIT_TIME, SMC_LLC_ADD_LINK);
1091 add_llc = &qentry->msg.add_link;
1092 if (add_llc->hd.flags & SMC_LLC_FLAG_ADD_LNK_REJ) {
1093 smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
1097 if (lgr->type == SMC_LGR_SINGLE &&
1098 (!memcmp(add_llc->sender_gid, link->peer_gid, SMC_GID_SIZE) &&
1099 !memcmp(add_llc->sender_mac, link->peer_mac, ETH_ALEN))) {
1100 lgr_new_t = SMC_LGR_ASYMMETRIC_PEER;
1102 smc_llc_save_add_link_info(link_new, add_llc);
1103 smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
1105 rc = smc_ib_ready_link(link_new);
1108 rc = smcr_buf_map_lgr(link_new);
1111 rc = smcr_buf_reg_lgr(link_new);
1114 rc = smc_llc_srv_rkey_exchange(link, link_new);
1117 rc = smc_llc_srv_conf_link(link, link_new, lgr_new_t);
1122 smcr_link_clear(link_new, false);
1126 static void smc_llc_process_srv_add_link(struct smc_link_group *lgr)
1128 struct smc_link *link = lgr->llc_flow_lcl.qentry->link;
1131 smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
1133 mutex_lock(&lgr->llc_conf_mutex);
1134 rc = smc_llc_srv_add_link(link);
1135 if (!rc && lgr->type == SMC_LGR_SYMMETRIC) {
1136 /* delete any asymmetric link */
1137 smc_llc_delete_asym_link(lgr);
1139 mutex_unlock(&lgr->llc_conf_mutex);
1142 /* enqueue a local add_link req to trigger a new add_link flow, only as SERV */
1143 void smc_llc_srv_add_link_local(struct smc_link *link)
1145 struct smc_llc_msg_add_link add_llc = {0};
1147 add_llc.hd.length = sizeof(add_llc);
1148 add_llc.hd.common.type = SMC_LLC_ADD_LINK;
1149 /* no dev and port needed, we as server ignore client data anyway */
1150 smc_llc_enqueue(link, (union smc_llc_msg *)&add_llc);
1153 /* worker to process an add link message */
1154 static void smc_llc_add_link_work(struct work_struct *work)
1156 struct smc_link_group *lgr = container_of(work, struct smc_link_group,
1159 if (list_empty(&lgr->list)) {
1160 /* link group is terminating */
1161 smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
1165 if (lgr->role == SMC_CLNT)
1166 smc_llc_process_cli_add_link(lgr);
1168 smc_llc_process_srv_add_link(lgr);
1170 smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
1173 /* enqueue a local del_link msg to trigger a new del_link flow,
1174 * called only for role SMC_SERV
1176 void smc_llc_srv_delete_link_local(struct smc_link *link, u8 del_link_id)
1178 struct smc_llc_msg_del_link del_llc = {0};
1180 del_llc.hd.length = sizeof(del_llc);
1181 del_llc.hd.common.type = SMC_LLC_DELETE_LINK;
1182 del_llc.link_num = del_link_id;
1183 del_llc.reason = htonl(SMC_LLC_DEL_LOST_PATH);
1184 del_llc.hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY;
1185 smc_llc_enqueue(link, (union smc_llc_msg *)&del_llc);
1188 static void smc_llc_process_cli_delete_link(struct smc_link_group *lgr)
1190 struct smc_link *lnk_del = NULL, *lnk_asym, *lnk;
1191 struct smc_llc_msg_del_link *del_llc;
1192 struct smc_llc_qentry *qentry;
1196 qentry = smc_llc_flow_qentry_clr(&lgr->llc_flow_lcl);
1198 del_llc = &qentry->msg.delete_link;
1200 if (del_llc->hd.flags & SMC_LLC_FLAG_DEL_LINK_ALL) {
1201 smc_lgr_terminate_sched(lgr);
1204 mutex_lock(&lgr->llc_conf_mutex);
1205 /* delete single link */
1206 for (lnk_idx = 0; lnk_idx < SMC_LINKS_PER_LGR_MAX; lnk_idx++) {
1207 if (lgr->lnk[lnk_idx].link_id != del_llc->link_num)
1209 lnk_del = &lgr->lnk[lnk_idx];
1212 del_llc->hd.flags |= SMC_LLC_FLAG_RESP;
1214 /* link was not found */
1215 del_llc->reason = htonl(SMC_LLC_DEL_NOLNK);
1216 smc_llc_send_message(lnk, &qentry->msg);
1219 lnk_asym = smc_llc_find_asym_link(lgr);
1221 del_llc->reason = 0;
1222 smc_llc_send_message(lnk, &qentry->msg); /* response */
1224 if (smc_link_downing(&lnk_del->state)) {
1225 smc_switch_conns(lgr, lnk_del, false);
1226 smc_wr_tx_wait_no_pending_sends(lnk_del);
1228 smcr_link_clear(lnk_del, true);
1230 active_links = smc_llc_active_link_count(lgr);
1231 if (lnk_del == lnk_asym) {
1232 /* expected deletion of asym link, don't change lgr state */
1233 } else if (active_links == 1) {
1234 smcr_lgr_set_type(lgr, SMC_LGR_SINGLE);
1235 } else if (!active_links) {
1236 smcr_lgr_set_type(lgr, SMC_LGR_NONE);
1237 smc_lgr_terminate_sched(lgr);
1240 mutex_unlock(&lgr->llc_conf_mutex);
1245 /* try to send a DELETE LINK ALL request on any active link,
1246 * waiting for send completion
1248 void smc_llc_send_link_delete_all(struct smc_link_group *lgr, bool ord, u32 rsn)
1250 struct smc_llc_msg_del_link delllc = {0};
1253 delllc.hd.common.type = SMC_LLC_DELETE_LINK;
1254 delllc.hd.length = sizeof(delllc);
1256 delllc.hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY;
1257 delllc.hd.flags |= SMC_LLC_FLAG_DEL_LINK_ALL;
1258 delllc.reason = htonl(rsn);
1260 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1261 if (!smc_link_usable(&lgr->lnk[i]))
1263 if (!smc_llc_send_message_wait(&lgr->lnk[i], &delllc))
1268 static void smc_llc_process_srv_delete_link(struct smc_link_group *lgr)
1270 struct smc_llc_msg_del_link *del_llc;
1271 struct smc_link *lnk, *lnk_del;
1272 struct smc_llc_qentry *qentry;
1276 mutex_lock(&lgr->llc_conf_mutex);
1277 qentry = smc_llc_flow_qentry_clr(&lgr->llc_flow_lcl);
1279 del_llc = &qentry->msg.delete_link;
1281 if (qentry->msg.delete_link.hd.flags & SMC_LLC_FLAG_DEL_LINK_ALL) {
1282 /* delete entire lgr */
1283 smc_llc_send_link_delete_all(lgr, true, ntohl(
1284 qentry->msg.delete_link.reason));
1285 smc_lgr_terminate_sched(lgr);
1288 /* delete single link */
1290 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1291 if (lgr->lnk[i].link_id == del_llc->link_num) {
1292 lnk_del = &lgr->lnk[i];
1297 goto out; /* asymmetric link already deleted */
1299 if (smc_link_downing(&lnk_del->state)) {
1300 smc_switch_conns(lgr, lnk_del, false);
1301 smc_wr_tx_wait_no_pending_sends(lnk_del);
1303 if (!list_empty(&lgr->list)) {
1304 /* qentry is either a request from peer (send it back to
1305 * initiate the DELETE_LINK processing), or a locally
1306 * enqueued DELETE_LINK request (forward it)
1308 if (!smc_llc_send_message(lnk, &qentry->msg)) {
1309 struct smc_llc_qentry *qentry2;
1311 qentry2 = smc_llc_wait(lgr, lnk, SMC_LLC_WAIT_TIME,
1312 SMC_LLC_DELETE_LINK);
1314 smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
1317 smcr_link_clear(lnk_del, true);
1319 active_links = smc_llc_active_link_count(lgr);
1320 if (active_links == 1) {
1321 smcr_lgr_set_type(lgr, SMC_LGR_SINGLE);
1322 } else if (!active_links) {
1323 smcr_lgr_set_type(lgr, SMC_LGR_NONE);
1324 smc_lgr_terminate_sched(lgr);
1327 if (lgr->type == SMC_LGR_SINGLE && !list_empty(&lgr->list)) {
1328 /* trigger setup of asymm alt link */
1329 smc_llc_srv_add_link_local(lnk);
1332 mutex_unlock(&lgr->llc_conf_mutex);
1336 static void smc_llc_delete_link_work(struct work_struct *work)
1338 struct smc_link_group *lgr = container_of(work, struct smc_link_group,
1341 if (list_empty(&lgr->list)) {
1342 /* link group is terminating */
1343 smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
1347 if (lgr->role == SMC_CLNT)
1348 smc_llc_process_cli_delete_link(lgr);
1350 smc_llc_process_srv_delete_link(lgr);
1352 smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
1355 /* process a confirm_rkey request from peer, remote flow */
1356 static void smc_llc_rmt_conf_rkey(struct smc_link_group *lgr)
1358 struct smc_llc_msg_confirm_rkey *llc;
1359 struct smc_llc_qentry *qentry;
1360 struct smc_link *link;
1365 qentry = lgr->llc_flow_rmt.qentry;
1366 llc = &qentry->msg.confirm_rkey;
1367 link = qentry->link;
1369 num_entries = llc->rtoken[0].num_rkeys;
1370 /* first rkey entry is for receiving link */
1371 rk_idx = smc_rtoken_add(link,
1372 llc->rtoken[0].rmb_vaddr,
1373 llc->rtoken[0].rmb_key);
1377 for (i = 1; i <= min_t(u8, num_entries, SMC_LLC_RKEYS_PER_MSG - 1); i++)
1378 smc_rtoken_set2(lgr, rk_idx, llc->rtoken[i].link_id,
1379 llc->rtoken[i].rmb_vaddr,
1380 llc->rtoken[i].rmb_key);
1381 /* max links is 3 so there is no need to support conf_rkey_cont msgs */
1384 llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG;
1385 llc->hd.flags |= SMC_LLC_FLAG_RKEY_RETRY;
1387 llc->hd.flags |= SMC_LLC_FLAG_RESP;
1388 smc_llc_send_message(link, &qentry->msg);
1389 smc_llc_flow_qentry_del(&lgr->llc_flow_rmt);
1392 /* process a delete_rkey request from peer, remote flow */
1393 static void smc_llc_rmt_delete_rkey(struct smc_link_group *lgr)
1395 struct smc_llc_msg_delete_rkey *llc;
1396 struct smc_llc_qentry *qentry;
1397 struct smc_link *link;
1401 qentry = lgr->llc_flow_rmt.qentry;
1402 llc = &qentry->msg.delete_rkey;
1403 link = qentry->link;
1405 max = min_t(u8, llc->num_rkeys, SMC_LLC_DEL_RKEY_MAX);
1406 for (i = 0; i < max; i++) {
1407 if (smc_rtoken_delete(link, llc->rkey[i]))
1408 err_mask |= 1 << (SMC_LLC_DEL_RKEY_MAX - 1 - i);
1411 llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG;
1412 llc->err_mask = err_mask;
1414 llc->hd.flags |= SMC_LLC_FLAG_RESP;
1415 smc_llc_send_message(link, &qentry->msg);
1416 smc_llc_flow_qentry_del(&lgr->llc_flow_rmt);
1419 static void smc_llc_protocol_violation(struct smc_link_group *lgr, u8 type)
1421 pr_warn_ratelimited("smc: SMC-R lg %*phN LLC protocol violation: "
1422 "llc_type %d\n", SMC_LGR_ID_SIZE, &lgr->id, type);
1423 smc_llc_set_termination_rsn(lgr, SMC_LLC_DEL_PROT_VIOL);
1424 smc_lgr_terminate_sched(lgr);
1427 /* flush the llc event queue */
1428 static void smc_llc_event_flush(struct smc_link_group *lgr)
1430 struct smc_llc_qentry *qentry, *q;
1432 spin_lock_bh(&lgr->llc_event_q_lock);
1433 list_for_each_entry_safe(qentry, q, &lgr->llc_event_q, list) {
1434 list_del_init(&qentry->list);
1437 spin_unlock_bh(&lgr->llc_event_q_lock);
1440 static void smc_llc_event_handler(struct smc_llc_qentry *qentry)
1442 union smc_llc_msg *llc = &qentry->msg;
1443 struct smc_link *link = qentry->link;
1444 struct smc_link_group *lgr = link->lgr;
1446 if (!smc_link_usable(link))
1449 switch (llc->raw.hdr.common.type) {
1450 case SMC_LLC_TEST_LINK:
1451 llc->test_link.hd.flags |= SMC_LLC_FLAG_RESP;
1452 smc_llc_send_message(link, llc);
1454 case SMC_LLC_ADD_LINK:
1455 if (list_empty(&lgr->list))
1456 goto out; /* lgr is terminating */
1457 if (lgr->role == SMC_CLNT) {
1458 if (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_ADD_LINK) {
1459 /* a flow is waiting for this message */
1460 smc_llc_flow_qentry_set(&lgr->llc_flow_lcl,
1462 wake_up_interruptible(&lgr->llc_waiter);
1463 } else if (smc_llc_flow_start(&lgr->llc_flow_lcl,
1465 schedule_work(&lgr->llc_add_link_work);
1467 } else if (smc_llc_flow_start(&lgr->llc_flow_lcl, qentry)) {
1468 /* as smc server, handle client suggestion */
1469 schedule_work(&lgr->llc_add_link_work);
1472 case SMC_LLC_CONFIRM_LINK:
1473 case SMC_LLC_ADD_LINK_CONT:
1474 if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) {
1475 /* a flow is waiting for this message */
1476 smc_llc_flow_qentry_set(&lgr->llc_flow_lcl, qentry);
1477 wake_up_interruptible(&lgr->llc_waiter);
1481 case SMC_LLC_DELETE_LINK:
1482 if (lgr->role == SMC_CLNT) {
1483 /* server requests to delete this link, send response */
1484 if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) {
1485 /* DEL LINK REQ during ADD LINK SEQ */
1486 smc_llc_flow_qentry_set(&lgr->llc_flow_lcl,
1488 wake_up_interruptible(&lgr->llc_waiter);
1489 } else if (smc_llc_flow_start(&lgr->llc_flow_lcl,
1491 schedule_work(&lgr->llc_del_link_work);
1494 if (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_ADD_LINK &&
1495 !lgr->llc_flow_lcl.qentry) {
1496 /* DEL LINK REQ during ADD LINK SEQ */
1497 smc_llc_flow_qentry_set(&lgr->llc_flow_lcl,
1499 wake_up_interruptible(&lgr->llc_waiter);
1500 } else if (smc_llc_flow_start(&lgr->llc_flow_lcl,
1502 schedule_work(&lgr->llc_del_link_work);
1506 case SMC_LLC_CONFIRM_RKEY:
1507 /* new request from remote, assign to remote flow */
1508 if (smc_llc_flow_start(&lgr->llc_flow_rmt, qentry)) {
1509 /* process here, does not wait for more llc msgs */
1510 smc_llc_rmt_conf_rkey(lgr);
1511 smc_llc_flow_stop(lgr, &lgr->llc_flow_rmt);
1514 case SMC_LLC_CONFIRM_RKEY_CONT:
1515 /* not used because max links is 3, and 3 rkeys fit into
1516 * one CONFIRM_RKEY message
1519 case SMC_LLC_DELETE_RKEY:
1520 /* new request from remote, assign to remote flow */
1521 if (smc_llc_flow_start(&lgr->llc_flow_rmt, qentry)) {
1522 /* process here, does not wait for more llc msgs */
1523 smc_llc_rmt_delete_rkey(lgr);
1524 smc_llc_flow_stop(lgr, &lgr->llc_flow_rmt);
1528 smc_llc_protocol_violation(lgr, llc->raw.hdr.common.type);
1535 /* worker to process llc messages on the event queue */
1536 static void smc_llc_event_work(struct work_struct *work)
1538 struct smc_link_group *lgr = container_of(work, struct smc_link_group,
1540 struct smc_llc_qentry *qentry;
1542 if (!lgr->llc_flow_lcl.type && lgr->delayed_event) {
1543 if (smc_link_usable(lgr->delayed_event->link)) {
1544 smc_llc_event_handler(lgr->delayed_event);
1546 qentry = lgr->delayed_event;
1547 lgr->delayed_event = NULL;
1553 spin_lock_bh(&lgr->llc_event_q_lock);
1554 if (!list_empty(&lgr->llc_event_q)) {
1555 qentry = list_first_entry(&lgr->llc_event_q,
1556 struct smc_llc_qentry, list);
1557 list_del_init(&qentry->list);
1558 spin_unlock_bh(&lgr->llc_event_q_lock);
1559 smc_llc_event_handler(qentry);
1562 spin_unlock_bh(&lgr->llc_event_q_lock);
1565 /* process llc responses in tasklet context */
1566 static void smc_llc_rx_response(struct smc_link *link,
1567 struct smc_llc_qentry *qentry)
1569 u8 llc_type = qentry->msg.raw.hdr.common.type;
1572 case SMC_LLC_TEST_LINK:
1573 if (link->state == SMC_LNK_ACTIVE)
1574 complete(&link->llc_testlink_resp);
1576 case SMC_LLC_ADD_LINK:
1577 case SMC_LLC_DELETE_LINK:
1578 case SMC_LLC_CONFIRM_LINK:
1579 case SMC_LLC_ADD_LINK_CONT:
1580 case SMC_LLC_CONFIRM_RKEY:
1581 case SMC_LLC_DELETE_RKEY:
1582 /* assign responses to the local flow, we requested them */
1583 smc_llc_flow_qentry_set(&link->lgr->llc_flow_lcl, qentry);
1584 wake_up_interruptible(&link->lgr->llc_waiter);
1586 case SMC_LLC_CONFIRM_RKEY_CONT:
1587 /* not used because max links is 3 */
1590 smc_llc_protocol_violation(link->lgr, llc_type);
1596 static void smc_llc_enqueue(struct smc_link *link, union smc_llc_msg *llc)
1598 struct smc_link_group *lgr = link->lgr;
1599 struct smc_llc_qentry *qentry;
1600 unsigned long flags;
1602 qentry = kmalloc(sizeof(*qentry), GFP_ATOMIC);
1605 qentry->link = link;
1606 INIT_LIST_HEAD(&qentry->list);
1607 memcpy(&qentry->msg, llc, sizeof(union smc_llc_msg));
1609 /* process responses immediately */
1610 if (llc->raw.hdr.flags & SMC_LLC_FLAG_RESP) {
1611 smc_llc_rx_response(link, qentry);
1615 /* add requests to event queue */
1616 spin_lock_irqsave(&lgr->llc_event_q_lock, flags);
1617 list_add_tail(&qentry->list, &lgr->llc_event_q);
1618 spin_unlock_irqrestore(&lgr->llc_event_q_lock, flags);
1619 schedule_work(&link->lgr->llc_event_work);
1622 /* copy received msg and add it to the event queue */
1623 static void smc_llc_rx_handler(struct ib_wc *wc, void *buf)
1625 struct smc_link *link = (struct smc_link *)wc->qp->qp_context;
1626 union smc_llc_msg *llc = buf;
1628 if (wc->byte_len < sizeof(*llc))
1629 return; /* short message */
1630 if (llc->raw.hdr.length != sizeof(*llc))
1631 return; /* invalid message */
1633 smc_llc_enqueue(link, llc);
1636 /***************************** worker, utils *********************************/
1638 static void smc_llc_testlink_work(struct work_struct *work)
1640 struct smc_link *link = container_of(to_delayed_work(work),
1641 struct smc_link, llc_testlink_wrk);
1642 unsigned long next_interval;
1643 unsigned long expire_time;
1644 u8 user_data[16] = { 0 };
1647 if (link->state != SMC_LNK_ACTIVE)
1648 return; /* don't reschedule worker */
1649 expire_time = link->wr_rx_tstamp + link->llc_testlink_time;
1650 if (time_is_after_jiffies(expire_time)) {
1651 next_interval = expire_time - jiffies;
1654 reinit_completion(&link->llc_testlink_resp);
1655 smc_llc_send_test_link(link, user_data);
1656 /* receive TEST LINK response over RoCE fabric */
1657 rc = wait_for_completion_interruptible_timeout(&link->llc_testlink_resp,
1659 if (link->state != SMC_LNK_ACTIVE)
1660 return; /* link state changed */
1662 smcr_link_down_cond_sched(link);
1665 next_interval = link->llc_testlink_time;
1667 schedule_delayed_work(&link->llc_testlink_wrk, next_interval);
1670 void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc)
1672 struct net *net = sock_net(smc->clcsock->sk);
1674 INIT_WORK(&lgr->llc_event_work, smc_llc_event_work);
1675 INIT_WORK(&lgr->llc_add_link_work, smc_llc_add_link_work);
1676 INIT_WORK(&lgr->llc_del_link_work, smc_llc_delete_link_work);
1677 INIT_LIST_HEAD(&lgr->llc_event_q);
1678 spin_lock_init(&lgr->llc_event_q_lock);
1679 spin_lock_init(&lgr->llc_flow_lock);
1680 init_waitqueue_head(&lgr->llc_waiter);
1681 mutex_init(&lgr->llc_conf_mutex);
1682 lgr->llc_testlink_time = net->ipv4.sysctl_tcp_keepalive_time;
1685 /* called after lgr was removed from lgr_list */
1686 void smc_llc_lgr_clear(struct smc_link_group *lgr)
1688 smc_llc_event_flush(lgr);
1689 wake_up_interruptible_all(&lgr->llc_waiter);
1690 cancel_work_sync(&lgr->llc_event_work);
1691 cancel_work_sync(&lgr->llc_add_link_work);
1692 cancel_work_sync(&lgr->llc_del_link_work);
1693 if (lgr->delayed_event) {
1694 kfree(lgr->delayed_event);
1695 lgr->delayed_event = NULL;
1699 int smc_llc_link_init(struct smc_link *link)
1701 init_completion(&link->llc_testlink_resp);
1702 INIT_DELAYED_WORK(&link->llc_testlink_wrk, smc_llc_testlink_work);
1706 void smc_llc_link_active(struct smc_link *link)
1708 pr_warn_ratelimited("smc: SMC-R lg %*phN link added: id %*phN, "
1709 "peerid %*phN, ibdev %s, ibport %d\n",
1710 SMC_LGR_ID_SIZE, &link->lgr->id,
1711 SMC_LGR_ID_SIZE, &link->link_uid,
1712 SMC_LGR_ID_SIZE, &link->peer_link_uid,
1713 link->smcibdev->ibdev->name, link->ibport);
1714 link->state = SMC_LNK_ACTIVE;
1715 if (link->lgr->llc_testlink_time) {
1716 link->llc_testlink_time = link->lgr->llc_testlink_time * HZ;
1717 schedule_delayed_work(&link->llc_testlink_wrk,
1718 link->llc_testlink_time);
1722 /* called in worker context */
1723 void smc_llc_link_clear(struct smc_link *link, bool log)
1726 pr_warn_ratelimited("smc: SMC-R lg %*phN link removed: id %*phN"
1727 ", peerid %*phN, ibdev %s, ibport %d\n",
1728 SMC_LGR_ID_SIZE, &link->lgr->id,
1729 SMC_LGR_ID_SIZE, &link->link_uid,
1730 SMC_LGR_ID_SIZE, &link->peer_link_uid,
1731 link->smcibdev->ibdev->name, link->ibport);
1732 complete(&link->llc_testlink_resp);
1733 cancel_delayed_work_sync(&link->llc_testlink_wrk);
1734 smc_wr_wakeup_reg_wait(link);
1735 smc_wr_wakeup_tx_wait(link);
1738 /* register a new rtoken at the remote peer (for all links) */
1739 int smc_llc_do_confirm_rkey(struct smc_link *send_link,
1740 struct smc_buf_desc *rmb_desc)
1742 struct smc_link_group *lgr = send_link->lgr;
1743 struct smc_llc_qentry *qentry = NULL;
1746 rc = smc_llc_send_confirm_rkey(send_link, rmb_desc);
1749 /* receive CONFIRM RKEY response from server over RoCE fabric */
1750 qentry = smc_llc_wait(lgr, send_link, SMC_LLC_WAIT_TIME,
1751 SMC_LLC_CONFIRM_RKEY);
1752 if (!qentry || (qentry->msg.raw.hdr.flags & SMC_LLC_FLAG_RKEY_NEG))
1756 smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
1760 /* unregister an rtoken at the remote peer */
1761 int smc_llc_do_delete_rkey(struct smc_link_group *lgr,
1762 struct smc_buf_desc *rmb_desc)
1764 struct smc_llc_qentry *qentry = NULL;
1765 struct smc_link *send_link;
1768 send_link = smc_llc_usable_link(lgr);
1772 /* protected by llc_flow control */
1773 rc = smc_llc_send_delete_rkey(send_link, rmb_desc);
1776 /* receive DELETE RKEY response from server over RoCE fabric */
1777 qentry = smc_llc_wait(lgr, send_link, SMC_LLC_WAIT_TIME,
1778 SMC_LLC_DELETE_RKEY);
1779 if (!qentry || (qentry->msg.raw.hdr.flags & SMC_LLC_FLAG_RKEY_NEG))
1783 smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
1787 void smc_llc_link_set_uid(struct smc_link *link)
1791 link_uid = htonl(*((u32 *)link->lgr->id) + link->link_id);
1792 memcpy(link->link_uid, &link_uid, SMC_LGR_ID_SIZE);
1795 /* save peers link user id, used for debug purposes */
1796 void smc_llc_save_peer_uid(struct smc_llc_qentry *qentry)
1798 memcpy(qentry->link->peer_link_uid, qentry->msg.confirm_link.link_uid,
1802 /* evaluate confirm link request or response */
1803 int smc_llc_eval_conf_link(struct smc_llc_qentry *qentry,
1804 enum smc_llc_reqresp type)
1806 if (type == SMC_LLC_REQ) { /* SMC server assigns link_id */
1807 qentry->link->link_id = qentry->msg.confirm_link.link_num;
1808 smc_llc_link_set_uid(qentry->link);
1810 if (!(qentry->msg.raw.hdr.flags & SMC_LLC_FLAG_NO_RMBE_EYEC))
1815 /***************************** init, exit, misc ******************************/
1817 static struct smc_wr_rx_handler smc_llc_rx_handlers[] = {
1819 .handler = smc_llc_rx_handler,
1820 .type = SMC_LLC_CONFIRM_LINK
1823 .handler = smc_llc_rx_handler,
1824 .type = SMC_LLC_TEST_LINK
1827 .handler = smc_llc_rx_handler,
1828 .type = SMC_LLC_ADD_LINK
1831 .handler = smc_llc_rx_handler,
1832 .type = SMC_LLC_ADD_LINK_CONT
1835 .handler = smc_llc_rx_handler,
1836 .type = SMC_LLC_DELETE_LINK
1839 .handler = smc_llc_rx_handler,
1840 .type = SMC_LLC_CONFIRM_RKEY
1843 .handler = smc_llc_rx_handler,
1844 .type = SMC_LLC_CONFIRM_RKEY_CONT
1847 .handler = smc_llc_rx_handler,
1848 .type = SMC_LLC_DELETE_RKEY
1855 int __init smc_llc_init(void)
1857 struct smc_wr_rx_handler *handler;
1860 for (handler = smc_llc_rx_handlers; handler->handler; handler++) {
1861 INIT_HLIST_NODE(&handler->list);
1862 rc = smc_wr_rx_register_handler(handler);