]> Git Repo - linux.git/blame - net/smc/smc_core.c
net/smc: Add netlink net namespace support
[linux.git] / net / smc / smc_core.c
CommitLineData
b2441318 1// SPDX-License-Identifier: GPL-2.0
0cfdd8f9
UB
2/*
3 * Shared Memory Communications over RDMA (SMC-R) and RoCE
4 *
5 * Basic Transport Functions exploiting Infiniband API
6 *
7 * Copyright IBM Corp. 2016
8 *
9 * Author(s): Ursula Braun <[email protected]>
10 */
11
12#include <linux/socket.h>
13#include <linux/if_vlan.h>
14#include <linux/random.h>
15#include <linux/workqueue.h>
6dabd405 16#include <linux/wait.h>
a33a803c 17#include <linux/reboot.h>
92f3cb0e 18#include <linux/mutex.h>
099b990b
GG
19#include <linux/list.h>
20#include <linux/smc.h>
0cfdd8f9
UB
21#include <net/tcp.h>
22#include <net/sock.h>
23#include <rdma/ib_verbs.h>
ddb457c6 24#include <rdma/ib_cache.h>
0cfdd8f9
UB
25
26#include "smc.h"
27#include "smc_clc.h"
28#include "smc_core.h"
29#include "smc_ib.h"
f38ba179 30#include "smc_wr.h"
9bf9abea 31#include "smc_llc.h"
5f08318f 32#include "smc_cdc.h"
b38d7324 33#include "smc_close.h"
c6ba7c9b 34#include "smc_ism.h"
099b990b 35#include "smc_netlink.h"
e0e4b8fa 36#include "smc_stats.h"
a3a0e81b 37#include "smc_tracepoint.h"
0cfdd8f9 38
5bc11ddb
UB
39#define SMC_LGR_NUM_INCR 256
40#define SMC_LGR_FREE_DELAY_SERV (600 * HZ)
7f58a1ad 41#define SMC_LGR_FREE_DELAY_CLNT (SMC_LGR_FREE_DELAY_SERV + 10 * HZ)
0cfdd8f9 42
a3db10ef 43struct smc_lgr_list smc_lgr_list = { /* established link groups */
9fda3510
HW
44 .lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock),
45 .list = LIST_HEAD_INIT(smc_lgr_list.list),
46 .num = 0,
47};
9bf9abea 48
29115cef 49static atomic_t lgr_cnt = ATOMIC_INIT(0); /* number of existing link groups */
6dabd405
UB
50static DECLARE_WAIT_QUEUE_HEAD(lgrs_deleted);
51
6511aad3
HW
52static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
53 struct smc_buf_desc *buf_desc);
5f78fe96 54static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft);
a6920d1d 55
541afa10 56static void smc_link_down_work(struct work_struct *work);
1f90a05d 57
a0a62ee1
UB
58/* return head of link group list and its lock for a given link group */
59static inline struct list_head *smc_lgr_list_head(struct smc_link_group *lgr,
60 spinlock_t **lgr_lock)
61{
62 if (lgr->is_smcd) {
63 *lgr_lock = &lgr->smcd->lgr_lock;
64 return &lgr->smcd->lgr_list;
65 }
66
67 *lgr_lock = &smc_lgr_list.lock;
68 return &smc_lgr_list.list;
69}
70
ddc99286
GG
71static void smc_ibdev_cnt_inc(struct smc_link *lnk)
72{
73 atomic_inc(&lnk->smcibdev->lnk_cnt_by_port[lnk->ibport - 1]);
74}
75
76static void smc_ibdev_cnt_dec(struct smc_link *lnk)
77{
78 atomic_dec(&lnk->smcibdev->lnk_cnt_by_port[lnk->ibport - 1]);
79}
80
97cdbc42
KG
81static void smc_lgr_schedule_free_work(struct smc_link_group *lgr)
82{
83 /* client link group creation always follows the server link group
84 * creation. For client use a somewhat higher removal delay time,
85 * otherwise there is a risk of out-of-sync link groups.
86 */
f9aab6f2 87 if (!lgr->freeing) {
8e316b9e
UB
88 mod_delayed_work(system_wq, &lgr->free_work,
89 (!lgr->is_smcd && lgr->role == SMC_CLNT) ?
90 SMC_LGR_FREE_DELAY_CLNT :
91 SMC_LGR_FREE_DELAY_SERV);
92 }
97cdbc42
KG
93}
94
0cfdd8f9
UB
95/* Register connection's alert token in our lookup structure.
96 * To use rbtrees we have to implement our own insert core.
97 * Requires @conns_lock
98 * @smc connection to register
99 * Returns 0 on success, != otherwise.
100 */
101static void smc_lgr_add_alert_token(struct smc_connection *conn)
102{
103 struct rb_node **link, *parent = NULL;
104 u32 token = conn->alert_token_local;
105
106 link = &conn->lgr->conns_all.rb_node;
107 while (*link) {
108 struct smc_connection *cur = rb_entry(*link,
109 struct smc_connection, alert_node);
110
111 parent = *link;
112 if (cur->alert_token_local > token)
113 link = &parent->rb_left;
114 else
115 link = &parent->rb_right;
116 }
117 /* Put the new node there */
118 rb_link_node(&conn->alert_node, parent, link);
119 rb_insert_color(&conn->alert_node, &conn->lgr->conns_all);
120}
121
56bc3b20
KG
122/* assign an SMC-R link to the connection */
123static int smcr_lgr_conn_assign_link(struct smc_connection *conn, bool first)
124{
125 enum smc_link_state expected = first ? SMC_LNK_ACTIVATING :
126 SMC_LNK_ACTIVE;
127 int i, j;
128
129 /* do link balancing */
130 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
131 struct smc_link *lnk = &conn->lgr->lnk[i];
132
ad6c111b 133 if (lnk->state != expected || lnk->link_is_asym)
56bc3b20
KG
134 continue;
135 if (conn->lgr->role == SMC_CLNT) {
136 conn->lnk = lnk; /* temporary, SMC server assigns link*/
137 break;
138 }
139 if (conn->lgr->conns_num % 2) {
140 for (j = i + 1; j < SMC_LINKS_PER_LGR_MAX; j++) {
141 struct smc_link *lnk2;
142
143 lnk2 = &conn->lgr->lnk[j];
ad6c111b
KG
144 if (lnk2->state == expected &&
145 !lnk2->link_is_asym) {
56bc3b20
KG
146 conn->lnk = lnk2;
147 break;
148 }
149 }
150 }
151 if (!conn->lnk)
152 conn->lnk = lnk;
153 break;
154 }
155 if (!conn->lnk)
156 return SMC_CLC_DECL_NOACTLINK;
07d51580 157 atomic_inc(&conn->lnk->conn_cnt);
56bc3b20
KG
158 return 0;
159}
160
0cfdd8f9
UB
161/* Register connection in link group by assigning an alert token
162 * registered in a search tree.
163 * Requires @conns_lock
164 * Note that '0' is a reserved value and not assigned.
165 */
56bc3b20 166static int smc_lgr_register_conn(struct smc_connection *conn, bool first)
0cfdd8f9
UB
167{
168 struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
169 static atomic_t nexttoken = ATOMIC_INIT(0);
56bc3b20 170 int rc;
0cfdd8f9 171
56bc3b20
KG
172 if (!conn->lgr->is_smcd) {
173 rc = smcr_lgr_conn_assign_link(conn, first);
174 if (rc)
175 return rc;
176 }
0cfdd8f9
UB
177 /* find a new alert_token_local value not yet used by some connection
178 * in this link group
179 */
180 sock_hold(&smc->sk); /* sock_put in smc_lgr_unregister_conn() */
181 while (!conn->alert_token_local) {
182 conn->alert_token_local = atomic_inc_return(&nexttoken);
183 if (smc_lgr_find_conn(conn->alert_token_local, conn->lgr))
184 conn->alert_token_local = 0;
185 }
186 smc_lgr_add_alert_token(conn);
187 conn->lgr->conns_num++;
b9247544 188 return 0;
0cfdd8f9
UB
189}
190
191/* Unregister connection and reset the alert token of the given connection<
192 */
193static void __smc_lgr_unregister_conn(struct smc_connection *conn)
194{
195 struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
196 struct smc_link_group *lgr = conn->lgr;
197
198 rb_erase(&conn->alert_node, &lgr->conns_all);
07d51580
GG
199 if (conn->lnk)
200 atomic_dec(&conn->lnk->conn_cnt);
0cfdd8f9
UB
201 lgr->conns_num--;
202 conn->alert_token_local = 0;
0cfdd8f9
UB
203 sock_put(&smc->sk); /* sock_hold in smc_lgr_register_conn() */
204}
205
fb692ec4 206/* Unregister connection from lgr
0cfdd8f9
UB
207 */
208static void smc_lgr_unregister_conn(struct smc_connection *conn)
209{
210 struct smc_link_group *lgr = conn->lgr;
0cfdd8f9 211
77f838ac
KG
212 if (!lgr)
213 return;
0cfdd8f9
UB
214 write_lock_bh(&lgr->conns_lock);
215 if (conn->alert_token_local) {
0cfdd8f9
UB
216 __smc_lgr_unregister_conn(conn);
217 }
218 write_unlock_bh(&lgr->conns_lock);
2a0674ff 219 conn->lgr = NULL;
0cfdd8f9
UB
220}
221
099b990b
GG
222int smc_nl_get_sys_info(struct sk_buff *skb, struct netlink_callback *cb)
223{
224 struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb);
225 char hostname[SMC_MAX_HOSTNAME_LEN + 1];
226 char smc_seid[SMC_MAX_EID_LEN + 1];
099b990b
GG
227 struct nlattr *attrs;
228 u8 *seid = NULL;
229 u8 *host = NULL;
230 void *nlh;
231
232 nlh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
233 &smc_gen_nl_family, NLM_F_MULTI,
234 SMC_NETLINK_GET_SYS_INFO);
235 if (!nlh)
236 goto errmsg;
237 if (cb_ctx->pos[0])
238 goto errout;
239 attrs = nla_nest_start(skb, SMC_GEN_SYS_INFO);
240 if (!attrs)
241 goto errout;
242 if (nla_put_u8(skb, SMC_NLA_SYS_VER, SMC_V2))
243 goto errattr;
244 if (nla_put_u8(skb, SMC_NLA_SYS_REL, SMC_RELEASE))
245 goto errattr;
246 if (nla_put_u8(skb, SMC_NLA_SYS_IS_ISM_V2, smc_ism_is_v2_capable()))
247 goto errattr;
b0539f5e
KG
248 if (nla_put_u8(skb, SMC_NLA_SYS_IS_SMCR_V2, true))
249 goto errattr;
099b990b
GG
250 smc_clc_get_hostname(&host);
251 if (host) {
25fe2c9c
JK
252 memcpy(hostname, host, SMC_MAX_HOSTNAME_LEN);
253 hostname[SMC_MAX_HOSTNAME_LEN] = 0;
099b990b
GG
254 if (nla_put_string(skb, SMC_NLA_SYS_LOCAL_HOST, hostname))
255 goto errattr;
256 }
11a26c59
KG
257 if (smc_ism_is_v2_capable()) {
258 smc_ism_get_system_eid(&seid);
8a446536
GG
259 memcpy(smc_seid, seid, SMC_MAX_EID_LEN);
260 smc_seid[SMC_MAX_EID_LEN] = 0;
099b990b
GG
261 if (nla_put_string(skb, SMC_NLA_SYS_SEID, smc_seid))
262 goto errattr;
263 }
264 nla_nest_end(skb, attrs);
265 genlmsg_end(skb, nlh);
266 cb_ctx->pos[0] = 1;
267 return skb->len;
268
269errattr:
270 nla_nest_cancel(skb, attrs);
271errout:
272 genlmsg_cancel(skb, nlh);
273errmsg:
274 return skb->len;
275}
276
b0539f5e
KG
277/* Fill SMC_NLA_LGR_D_V2_COMMON/SMC_NLA_LGR_R_V2_COMMON nested attributes */
278static int smc_nl_fill_lgr_v2_common(struct smc_link_group *lgr,
279 struct sk_buff *skb,
280 struct netlink_callback *cb,
281 struct nlattr *v2_attrs)
282{
283 char smc_host[SMC_MAX_HOSTNAME_LEN + 1];
284 char smc_eid[SMC_MAX_EID_LEN + 1];
285
286 if (nla_put_u8(skb, SMC_NLA_LGR_V2_VER, lgr->smc_version))
287 goto errv2attr;
288 if (nla_put_u8(skb, SMC_NLA_LGR_V2_REL, lgr->peer_smc_release))
289 goto errv2attr;
290 if (nla_put_u8(skb, SMC_NLA_LGR_V2_OS, lgr->peer_os))
291 goto errv2attr;
292 memcpy(smc_host, lgr->peer_hostname, SMC_MAX_HOSTNAME_LEN);
293 smc_host[SMC_MAX_HOSTNAME_LEN] = 0;
294 if (nla_put_string(skb, SMC_NLA_LGR_V2_PEER_HOST, smc_host))
295 goto errv2attr;
296 memcpy(smc_eid, lgr->negotiated_eid, SMC_MAX_EID_LEN);
297 smc_eid[SMC_MAX_EID_LEN] = 0;
298 if (nla_put_string(skb, SMC_NLA_LGR_V2_NEG_EID, smc_eid))
299 goto errv2attr;
300
301 nla_nest_end(skb, v2_attrs);
302 return 0;
303
304errv2attr:
305 nla_nest_cancel(skb, v2_attrs);
306 return -EMSGSIZE;
307}
308
309static int smc_nl_fill_smcr_lgr_v2(struct smc_link_group *lgr,
310 struct sk_buff *skb,
311 struct netlink_callback *cb)
312{
313 struct nlattr *v2_attrs;
314
315 v2_attrs = nla_nest_start(skb, SMC_NLA_LGR_R_V2);
316 if (!v2_attrs)
317 goto errattr;
318 if (nla_put_u8(skb, SMC_NLA_LGR_R_V2_DIRECT, !lgr->uses_gateway))
319 goto errv2attr;
320
321 nla_nest_end(skb, v2_attrs);
322 return 0;
323
324errv2attr:
325 nla_nest_cancel(skb, v2_attrs);
326errattr:
327 return -EMSGSIZE;
328}
329
e9b8c845
GG
330static int smc_nl_fill_lgr(struct smc_link_group *lgr,
331 struct sk_buff *skb,
332 struct netlink_callback *cb)
333{
334 char smc_target[SMC_MAX_PNETID_LEN + 1];
b0539f5e 335 struct nlattr *attrs, *v2_attrs;
e9b8c845
GG
336
337 attrs = nla_nest_start(skb, SMC_GEN_LGR_SMCR);
338 if (!attrs)
339 goto errout;
340
341 if (nla_put_u32(skb, SMC_NLA_LGR_R_ID, *((u32 *)&lgr->id)))
342 goto errattr;
343 if (nla_put_u32(skb, SMC_NLA_LGR_R_CONNS_NUM, lgr->conns_num))
344 goto errattr;
345 if (nla_put_u8(skb, SMC_NLA_LGR_R_ROLE, lgr->role))
346 goto errattr;
347 if (nla_put_u8(skb, SMC_NLA_LGR_R_TYPE, lgr->type))
348 goto errattr;
349 if (nla_put_u8(skb, SMC_NLA_LGR_R_VLAN_ID, lgr->vlan_id))
350 goto errattr;
79d39fc5
TL
351 if (nla_put_u64_64bit(skb, SMC_NLA_LGR_R_NET_COOKIE,
352 lgr->net->net_cookie, SMC_NLA_LGR_R_PAD))
353 goto errattr;
8a446536
GG
354 memcpy(smc_target, lgr->pnet_id, SMC_MAX_PNETID_LEN);
355 smc_target[SMC_MAX_PNETID_LEN] = 0;
e9b8c845
GG
356 if (nla_put_string(skb, SMC_NLA_LGR_R_PNETID, smc_target))
357 goto errattr;
b0539f5e
KG
358 if (lgr->smc_version > SMC_V1) {
359 v2_attrs = nla_nest_start(skb, SMC_NLA_LGR_R_V2_COMMON);
360 if (!v2_attrs)
361 goto errattr;
362 if (smc_nl_fill_lgr_v2_common(lgr, skb, cb, v2_attrs))
363 goto errattr;
364 if (smc_nl_fill_smcr_lgr_v2(lgr, skb, cb))
365 goto errattr;
366 }
e9b8c845
GG
367
368 nla_nest_end(skb, attrs);
369 return 0;
370errattr:
371 nla_nest_cancel(skb, attrs);
372errout:
373 return -EMSGSIZE;
374}
375
5a7e09d5
GG
376static int smc_nl_fill_lgr_link(struct smc_link_group *lgr,
377 struct smc_link *link,
378 struct sk_buff *skb,
379 struct netlink_callback *cb)
380{
8a446536 381 char smc_ibname[IB_DEVICE_NAME_MAX];
5a7e09d5
GG
382 u8 smc_gid_target[41];
383 struct nlattr *attrs;
384 u32 link_uid = 0;
385 void *nlh;
386
387 nlh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
388 &smc_gen_nl_family, NLM_F_MULTI,
389 SMC_NETLINK_GET_LINK_SMCR);
390 if (!nlh)
391 goto errmsg;
392
393 attrs = nla_nest_start(skb, SMC_GEN_LINK_SMCR);
394 if (!attrs)
395 goto errout;
396
397 if (nla_put_u8(skb, SMC_NLA_LINK_ID, link->link_id))
398 goto errattr;
399 if (nla_put_u32(skb, SMC_NLA_LINK_STATE, link->state))
400 goto errattr;
401 if (nla_put_u32(skb, SMC_NLA_LINK_CONN_CNT,
402 atomic_read(&link->conn_cnt)))
403 goto errattr;
404 if (nla_put_u8(skb, SMC_NLA_LINK_IB_PORT, link->ibport))
405 goto errattr;
406 if (nla_put_u32(skb, SMC_NLA_LINK_NET_DEV, link->ndev_ifidx))
407 goto errattr;
408 snprintf(smc_ibname, sizeof(smc_ibname), "%s", link->ibname);
409 if (nla_put_string(skb, SMC_NLA_LINK_IB_DEV, smc_ibname))
410 goto errattr;
411 memcpy(&link_uid, link->link_uid, sizeof(link_uid));
412 if (nla_put_u32(skb, SMC_NLA_LINK_UID, link_uid))
413 goto errattr;
414 memcpy(&link_uid, link->peer_link_uid, sizeof(link_uid));
415 if (nla_put_u32(skb, SMC_NLA_LINK_PEER_UID, link_uid))
416 goto errattr;
417 memset(smc_gid_target, 0, sizeof(smc_gid_target));
418 smc_gid_be16_convert(smc_gid_target, link->gid);
419 if (nla_put_string(skb, SMC_NLA_LINK_GID, smc_gid_target))
420 goto errattr;
421 memset(smc_gid_target, 0, sizeof(smc_gid_target));
422 smc_gid_be16_convert(smc_gid_target, link->peer_gid);
423 if (nla_put_string(skb, SMC_NLA_LINK_PEER_GID, smc_gid_target))
424 goto errattr;
425
426 nla_nest_end(skb, attrs);
427 genlmsg_end(skb, nlh);
428 return 0;
429errattr:
430 nla_nest_cancel(skb, attrs);
431errout:
432 genlmsg_cancel(skb, nlh);
433errmsg:
434 return -EMSGSIZE;
435}
436
e9b8c845
GG
437static int smc_nl_handle_lgr(struct smc_link_group *lgr,
438 struct sk_buff *skb,
5a7e09d5
GG
439 struct netlink_callback *cb,
440 bool list_links)
e9b8c845
GG
441{
442 void *nlh;
5a7e09d5 443 int i;
e9b8c845
GG
444
445 nlh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
446 &smc_gen_nl_family, NLM_F_MULTI,
447 SMC_NETLINK_GET_LGR_SMCR);
448 if (!nlh)
449 goto errmsg;
450 if (smc_nl_fill_lgr(lgr, skb, cb))
451 goto errout;
452
453 genlmsg_end(skb, nlh);
5a7e09d5
GG
454 if (!list_links)
455 goto out;
456 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
457 if (!smc_link_usable(&lgr->lnk[i]))
458 continue;
459 if (smc_nl_fill_lgr_link(lgr, &lgr->lnk[i], skb, cb))
460 goto errout;
461 }
462out:
e9b8c845
GG
463 return 0;
464
465errout:
466 genlmsg_cancel(skb, nlh);
467errmsg:
468 return -EMSGSIZE;
469}
470
471static void smc_nl_fill_lgr_list(struct smc_lgr_list *smc_lgr,
472 struct sk_buff *skb,
5a7e09d5
GG
473 struct netlink_callback *cb,
474 bool list_links)
e9b8c845
GG
475{
476 struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb);
477 struct smc_link_group *lgr;
478 int snum = cb_ctx->pos[0];
479 int num = 0;
480
481 spin_lock_bh(&smc_lgr->lock);
482 list_for_each_entry(lgr, &smc_lgr->list, list) {
483 if (num < snum)
484 goto next;
5a7e09d5 485 if (smc_nl_handle_lgr(lgr, skb, cb, list_links))
e9b8c845
GG
486 goto errout;
487next:
488 num++;
489 }
490errout:
491 spin_unlock_bh(&smc_lgr->lock);
492 cb_ctx->pos[0] = num;
493}
494
8f9dde4b
GG
495static int smc_nl_fill_smcd_lgr(struct smc_link_group *lgr,
496 struct sk_buff *skb,
497 struct netlink_callback *cb)
498{
8f9dde4b 499 char smc_pnet[SMC_MAX_PNETID_LEN + 1];
8f9dde4b
GG
500 struct nlattr *attrs;
501 void *nlh;
502
503 nlh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
504 &smc_gen_nl_family, NLM_F_MULTI,
505 SMC_NETLINK_GET_LGR_SMCD);
506 if (!nlh)
507 goto errmsg;
508
509 attrs = nla_nest_start(skb, SMC_GEN_LGR_SMCD);
510 if (!attrs)
511 goto errout;
512
513 if (nla_put_u32(skb, SMC_NLA_LGR_D_ID, *((u32 *)&lgr->id)))
514 goto errattr;
515 if (nla_put_u64_64bit(skb, SMC_NLA_LGR_D_GID, lgr->smcd->local_gid,
516 SMC_NLA_LGR_D_PAD))
517 goto errattr;
518 if (nla_put_u64_64bit(skb, SMC_NLA_LGR_D_PEER_GID, lgr->peer_gid,
519 SMC_NLA_LGR_D_PAD))
520 goto errattr;
521 if (nla_put_u8(skb, SMC_NLA_LGR_D_VLAN_ID, lgr->vlan_id))
522 goto errattr;
523 if (nla_put_u32(skb, SMC_NLA_LGR_D_CONNS_NUM, lgr->conns_num))
524 goto errattr;
525 if (nla_put_u32(skb, SMC_NLA_LGR_D_CHID, smc_ism_get_chid(lgr->smcd)))
526 goto errattr;
8a446536
GG
527 memcpy(smc_pnet, lgr->smcd->pnetid, SMC_MAX_PNETID_LEN);
528 smc_pnet[SMC_MAX_PNETID_LEN] = 0;
8f9dde4b
GG
529 if (nla_put_string(skb, SMC_NLA_LGR_D_PNETID, smc_pnet))
530 goto errattr;
b0539f5e
KG
531 if (lgr->smc_version > SMC_V1) {
532 struct nlattr *v2_attrs;
8f9dde4b 533
b0539f5e
KG
534 v2_attrs = nla_nest_start(skb, SMC_NLA_LGR_D_V2_COMMON);
535 if (!v2_attrs)
536 goto errattr;
537 if (smc_nl_fill_lgr_v2_common(lgr, skb, cb, v2_attrs))
538 goto errattr;
539 }
8f9dde4b
GG
540 nla_nest_end(skb, attrs);
541 genlmsg_end(skb, nlh);
542 return 0;
543
8f9dde4b
GG
544errattr:
545 nla_nest_cancel(skb, attrs);
546errout:
547 genlmsg_cancel(skb, nlh);
548errmsg:
549 return -EMSGSIZE;
550}
551
552static int smc_nl_handle_smcd_lgr(struct smcd_dev *dev,
553 struct sk_buff *skb,
554 struct netlink_callback *cb)
555{
556 struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb);
557 struct smc_link_group *lgr;
558 int snum = cb_ctx->pos[1];
559 int rc = 0, num = 0;
560
561 spin_lock_bh(&dev->lgr_lock);
562 list_for_each_entry(lgr, &dev->lgr_list, list) {
563 if (!lgr->is_smcd)
564 continue;
565 if (num < snum)
566 goto next;
567 rc = smc_nl_fill_smcd_lgr(lgr, skb, cb);
568 if (rc)
569 goto errout;
570next:
571 num++;
572 }
573errout:
574 spin_unlock_bh(&dev->lgr_lock);
575 cb_ctx->pos[1] = num;
576 return rc;
577}
578
579static int smc_nl_fill_smcd_dev(struct smcd_dev_list *dev_list,
580 struct sk_buff *skb,
581 struct netlink_callback *cb)
582{
583 struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb);
584 struct smcd_dev *smcd_dev;
585 int snum = cb_ctx->pos[0];
586 int rc = 0, num = 0;
587
588 mutex_lock(&dev_list->mutex);
589 list_for_each_entry(smcd_dev, &dev_list->list, list) {
590 if (list_empty(&smcd_dev->lgr_list))
591 continue;
592 if (num < snum)
593 goto next;
594 rc = smc_nl_handle_smcd_lgr(smcd_dev, skb, cb);
595 if (rc)
596 goto errout;
597next:
598 num++;
599 }
600errout:
601 mutex_unlock(&dev_list->mutex);
602 cb_ctx->pos[0] = num;
603 return rc;
604}
605
e9b8c845
GG
606int smcr_nl_get_lgr(struct sk_buff *skb, struct netlink_callback *cb)
607{
5a7e09d5
GG
608 bool list_links = false;
609
610 smc_nl_fill_lgr_list(&smc_lgr_list, skb, cb, list_links);
611 return skb->len;
612}
613
614int smcr_nl_get_link(struct sk_buff *skb, struct netlink_callback *cb)
615{
616 bool list_links = true;
617
618 smc_nl_fill_lgr_list(&smc_lgr_list, skb, cb, list_links);
e9b8c845
GG
619 return skb->len;
620}
621
8f9dde4b
GG
622int smcd_nl_get_lgr(struct sk_buff *skb, struct netlink_callback *cb)
623{
624 smc_nl_fill_smcd_dev(&smcd_dev_list, skb, cb);
625 return skb->len;
626}
627
51e3dfa8
UB
628void smc_lgr_cleanup_early(struct smc_connection *conn)
629{
630 struct smc_link_group *lgr = conn->lgr;
9ec6bf19 631 spinlock_t *lgr_lock;
51e3dfa8
UB
632
633 if (!lgr)
634 return;
635
636 smc_conn_free(conn);
789b6cc2 637 smc_lgr_list_head(lgr, &lgr_lock);
9ec6bf19
KG
638 spin_lock_bh(lgr_lock);
639 /* do not use this link group for new connections */
789b6cc2
DL
640 if (!list_empty(&lgr->list))
641 list_del_init(&lgr->list);
9ec6bf19 642 spin_unlock_bh(lgr_lock);
f9aab6f2 643 __smc_lgr_terminate(lgr, true);
51e3dfa8
UB
644}
645
a52bcc91
KG
646static void smcr_lgr_link_deactivate_all(struct smc_link_group *lgr)
647{
648 int i;
649
650 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
651 struct smc_link *lnk = &lgr->lnk[i];
652
90cee52f 653 if (smc_link_sendable(lnk))
a52bcc91
KG
654 lnk->state = SMC_LNK_INACTIVE;
655 }
6778a6be
KG
656 wake_up_all(&lgr->llc_msg_waiter);
657 wake_up_all(&lgr->llc_flow_waiter);
a52bcc91
KG
658}
659
3f3f0e36
UB
660static void smc_lgr_free(struct smc_link_group *lgr);
661
0cfdd8f9
UB
662static void smc_lgr_free_work(struct work_struct *work)
663{
664 struct smc_link_group *lgr = container_of(to_delayed_work(work),
665 struct smc_link_group,
666 free_work);
a0a62ee1 667 spinlock_t *lgr_lock;
0cfdd8f9
UB
668 bool conns;
669
a0a62ee1
UB
670 smc_lgr_list_head(lgr, &lgr_lock);
671 spin_lock_bh(lgr_lock);
8e316b9e
UB
672 if (lgr->freeing) {
673 spin_unlock_bh(lgr_lock);
674 return;
675 }
0cfdd8f9
UB
676 read_lock_bh(&lgr->conns_lock);
677 conns = RB_EMPTY_ROOT(&lgr->conns_all);
678 read_unlock_bh(&lgr->conns_lock);
679 if (!conns) { /* number of lgr connections is no longer zero */
a0a62ee1 680 spin_unlock_bh(lgr_lock);
0cfdd8f9
UB
681 return;
682 }
8caa6544 683 list_del_init(&lgr->list); /* remove from smc_lgr_list */
8e316b9e
UB
684 lgr->freeing = 1; /* this instance does the freeing, no new schedule */
685 spin_unlock_bh(lgr_lock);
686 cancel_delayed_work(&lgr->free_work);
0d18a0cb 687
f3811fd7
KG
688 if (!lgr->is_smcd && !lgr->terminating)
689 smc_llc_send_link_delete_all(lgr, true,
690 SMC_LLC_DEL_PROG_INIT_TERM);
42bfba9e 691 if (lgr->is_smcd && !lgr->terminating)
8e316b9e 692 smc_ism_signal_shutdown(lgr);
a52bcc91
KG
693 if (!lgr->is_smcd)
694 smcr_lgr_link_deactivate_all(lgr);
8e316b9e 695 smc_lgr_free(lgr);
0cfdd8f9
UB
696}
697
f528ba24
UB
698static void smc_lgr_terminate_work(struct work_struct *work)
699{
700 struct smc_link_group *lgr = container_of(work, struct smc_link_group,
701 terminate_work);
702
5f78fe96 703 __smc_lgr_terminate(lgr, true);
f528ba24
UB
704}
705
026c381f
KG
706/* return next unique link id for the lgr */
707static u8 smcr_next_link_id(struct smc_link_group *lgr)
708{
709 u8 link_id;
710 int i;
711
712 while (1) {
cf4f5530 713again:
026c381f
KG
714 link_id = ++lgr->next_link_id;
715 if (!link_id) /* skip zero as link_id */
716 link_id = ++lgr->next_link_id;
717 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
d854fcbf 718 if (smc_link_usable(&lgr->lnk[i]) &&
026c381f 719 lgr->lnk[i].link_id == link_id)
cf4f5530 720 goto again;
026c381f
KG
721 }
722 break;
723 }
724 return link_id;
725}
726
6443b2f6
GG
727static void smcr_copy_dev_info_to_link(struct smc_link *link)
728{
729 struct smc_ib_device *smcibdev = link->smcibdev;
730
731 snprintf(link->ibname, sizeof(link->ibname), "%s",
732 smcibdev->ibdev->name);
733 link->ndev_ifidx = smcibdev->ndev_ifidx[link->ibport - 1];
734}
735
336ba09f
KG
736int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
737 u8 link_idx, struct smc_init_info *ini)
f3c1dedd 738{
e49300a6 739 struct smc_ib_device *smcibdev;
f3c1dedd
KG
740 u8 rndvec[3];
741 int rc;
742
e49300a6
KG
743 if (lgr->smc_version == SMC_V2) {
744 lnk->smcibdev = ini->smcrv2.ib_dev_v2;
745 lnk->ibport = ini->smcrv2.ib_port_v2;
746 } else {
747 lnk->smcibdev = ini->ib_dev;
748 lnk->ibport = ini->ib_port;
749 }
750 get_device(&lnk->smcibdev->ibdev->dev);
751 atomic_inc(&lnk->smcibdev->lnk_cnt);
752 lnk->path_mtu = lnk->smcibdev->pattr[lnk->ibport - 1].active_mtu;
026c381f 753 lnk->link_id = smcr_next_link_id(lgr);
387707fd 754 lnk->lgr = lgr;
026c381f 755 lnk->link_idx = link_idx;
ddc99286 756 smc_ibdev_cnt_inc(lnk);
6443b2f6 757 smcr_copy_dev_info_to_link(lnk);
07d51580 758 atomic_set(&lnk->conn_cnt, 0);
45fa8da0 759 smc_llc_link_set_uid(lnk);
541afa10 760 INIT_WORK(&lnk->link_down_wrk, smc_link_down_work);
e49300a6
KG
761 if (!lnk->smcibdev->initialized) {
762 rc = (int)smc_ib_setup_per_ibdev(lnk->smcibdev);
f3c1dedd
KG
763 if (rc)
764 goto out;
765 }
766 get_random_bytes(rndvec, sizeof(rndvec));
767 lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) +
768 (rndvec[2] << 16);
769 rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport,
24fb6811
KG
770 ini->vlan_id, lnk->gid, &lnk->sgid_index,
771 lgr->smc_version == SMC_V2 ?
772 &ini->smcrv2 : NULL);
f3c1dedd
KG
773 if (rc)
774 goto out;
775 rc = smc_llc_link_init(lnk);
776 if (rc)
777 goto out;
778 rc = smc_wr_alloc_link_mem(lnk);
779 if (rc)
780 goto clear_llc_lnk;
781 rc = smc_ib_create_protection_domain(lnk);
782 if (rc)
783 goto free_link_mem;
784 rc = smc_ib_create_queue_pair(lnk);
785 if (rc)
786 goto dealloc_pd;
787 rc = smc_wr_create_link(lnk);
788 if (rc)
789 goto destroy_qp;
741a49a4 790 lnk->state = SMC_LNK_ACTIVATING;
f3c1dedd
KG
791 return 0;
792
793destroy_qp:
794 smc_ib_destroy_queue_pair(lnk);
795dealloc_pd:
796 smc_ib_dealloc_protection_domain(lnk);
797free_link_mem:
798 smc_wr_free_link_mem(lnk);
799clear_llc_lnk:
0a99be43 800 smc_llc_link_clear(lnk, false);
f3c1dedd 801out:
ddc99286 802 smc_ibdev_cnt_dec(lnk);
e49300a6
KG
803 put_device(&lnk->smcibdev->ibdev->dev);
804 smcibdev = lnk->smcibdev;
f3c1dedd 805 memset(lnk, 0, sizeof(struct smc_link));
d854fcbf 806 lnk->state = SMC_LNK_UNUSED;
e49300a6
KG
807 if (!atomic_dec_return(&smcibdev->lnk_cnt))
808 wake_up(&smcibdev->lnks_deleted);
f3c1dedd
KG
809 return rc;
810}
811
0cfdd8f9 812/* create a new SMC link group */
bc36d2fc 813static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
0cfdd8f9
UB
814{
815 struct smc_link_group *lgr;
a2351c5d 816 struct list_head *lgr_list;
0cfdd8f9 817 struct smc_link *lnk;
a0a62ee1 818 spinlock_t *lgr_lock;
026c381f 819 u8 link_idx;
0cfdd8f9 820 int rc = 0;
cd6851f3 821 int i;
0cfdd8f9 822
bc36d2fc 823 if (ini->is_smcd && ini->vlan_id) {
5c21c4cc
UB
824 if (smc_ism_get_vlan(ini->ism_dev[ini->ism_selected],
825 ini->vlan_id)) {
7a62725a 826 rc = SMC_CLC_DECL_ISMVLANERR;
c6ba7c9b 827 goto out;
7a62725a 828 }
c6ba7c9b
HW
829 }
830
0cfdd8f9
UB
831 lgr = kzalloc(sizeof(*lgr), GFP_KERNEL);
832 if (!lgr) {
7a62725a 833 rc = SMC_CLC_DECL_MEM;
29ee2701 834 goto ism_put_vlan;
0cfdd8f9 835 }
22ef473d
KG
836 lgr->tx_wq = alloc_workqueue("smc_tx_wq-%*phN", 0, 0,
837 SMC_LGR_ID_SIZE, &lgr->id);
838 if (!lgr->tx_wq) {
839 rc = -ENOMEM;
840 goto free_lgr;
841 }
bc36d2fc 842 lgr->is_smcd = ini->is_smcd;
517c300e 843 lgr->sync_err = 0;
8e316b9e 844 lgr->terminating = 0;
8e316b9e 845 lgr->freeing = 0;
bc36d2fc 846 lgr->vlan_id = ini->vlan_id;
faca5360
KG
847 mutex_init(&lgr->sndbufs_lock);
848 mutex_init(&lgr->rmbs_lock);
c6ba7c9b 849 rwlock_init(&lgr->conns_lock);
cd6851f3
UB
850 for (i = 0; i < SMC_RMBE_SIZES; i++) {
851 INIT_LIST_HEAD(&lgr->sndbufs[i]);
852 INIT_LIST_HEAD(&lgr->rmbs[i]);
853 }
026c381f 854 lgr->next_link_id = 0;
9fda3510
HW
855 smc_lgr_list.num += SMC_LGR_NUM_INCR;
856 memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE);
0cfdd8f9 857 INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work);
f528ba24 858 INIT_WORK(&lgr->terminate_work, smc_lgr_terminate_work);
0cfdd8f9 859 lgr->conns_all = RB_ROOT;
bc36d2fc 860 if (ini->is_smcd) {
c6ba7c9b 861 /* SMC-D specific settings */
5c21c4cc
UB
862 get_device(&ini->ism_dev[ini->ism_selected]->dev);
863 lgr->peer_gid = ini->ism_peer_gid[ini->ism_selected];
864 lgr->smcd = ini->ism_dev[ini->ism_selected];
865 lgr_list = &ini->ism_dev[ini->ism_selected]->lgr_list;
a0a62ee1 866 lgr_lock = &lgr->smcd->lgr_lock;
b81a5eb7 867 lgr->smc_version = ini->smcd_version;
50c6b20e 868 lgr->peer_shutdown = 0;
5c21c4cc 869 atomic_inc(&ini->ism_dev[ini->ism_selected]->lgr_cnt);
c6ba7c9b
HW
870 } else {
871 /* SMC-R specific settings */
e49300a6
KG
872 struct smc_ib_device *ibdev;
873 int ibport;
874
c6ba7c9b 875 lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
e49300a6
KG
876 lgr->smc_version = ini->smcr_version;
877 memcpy(lgr->peer_systemid, ini->peer_systemid,
bc36d2fc 878 SMC_SYSTEMID_LEN);
e49300a6
KG
879 if (lgr->smc_version == SMC_V2) {
880 ibdev = ini->smcrv2.ib_dev_v2;
881 ibport = ini->smcrv2.ib_port_v2;
882 lgr->saddr = ini->smcrv2.saddr;
883 lgr->uses_gateway = ini->smcrv2.uses_gateway;
884 memcpy(lgr->nexthop_mac, ini->smcrv2.nexthop_mac,
885 ETH_ALEN);
886 } else {
887 ibdev = ini->ib_dev;
888 ibport = ini->ib_port;
889 }
890 memcpy(lgr->pnet_id, ibdev->pnetid[ibport - 1],
35dcf7ec 891 SMC_MAX_PNETID_LEN);
8799e310
KG
892 if (smc_wr_alloc_lgr_mem(lgr))
893 goto free_wq;
00a049cf
KG
894 smc_llc_lgr_init(lgr, smc);
895
026c381f
KG
896 link_idx = SMC_SINGLE_LINK;
897 lnk = &lgr->lnk[link_idx];
898 rc = smcr_link_init(lgr, lnk, link_idx, ini);
8799e310
KG
899 if (rc) {
900 smc_wr_free_lgr_mem(lgr);
22ef473d 901 goto free_wq;
8799e310 902 }
0237a3a6 903 lgr->net = smc_ib_net(lnk->smcibdev);
f3c1dedd
KG
904 lgr_list = &smc_lgr_list.list;
905 lgr_lock = &smc_lgr_list.lock;
6dabd405 906 atomic_inc(&lgr_cnt);
c6ba7c9b 907 }
0cfdd8f9 908 smc->conn.lgr = lgr;
a0a62ee1 909 spin_lock_bh(lgr_lock);
a9e44502 910 list_add_tail(&lgr->list, lgr_list);
a0a62ee1 911 spin_unlock_bh(lgr_lock);
f38ba179
UB
912 return 0;
913
22ef473d
KG
914free_wq:
915 destroy_workqueue(lgr->tx_wq);
f38ba179
UB
916free_lgr:
917 kfree(lgr);
29ee2701
UB
918ism_put_vlan:
919 if (ini->is_smcd && ini->vlan_id)
5c21c4cc 920 smc_ism_put_vlan(ini->ism_dev[ini->ism_selected], ini->vlan_id);
0cfdd8f9 921out:
7a62725a
KG
922 if (rc < 0) {
923 if (rc == -ENOMEM)
924 rc = SMC_CLC_DECL_MEM;
925 else
926 rc = SMC_CLC_DECL_INTERR;
927 }
0cfdd8f9
UB
928 return rc;
929}
930
c6f02ebe
KG
931static int smc_write_space(struct smc_connection *conn)
932{
933 int buffer_len = conn->peer_rmbe_size;
934 union smc_host_cursor prod;
935 union smc_host_cursor cons;
936 int space;
937
938 smc_curs_copy(&prod, &conn->local_tx_ctrl.prod, conn);
939 smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn);
940 /* determine rx_buf space */
941 space = buffer_len - smc_curs_diff(buffer_len, &cons, &prod);
942 return space;
943}
944
b8ded9de
KG
945static int smc_switch_cursor(struct smc_sock *smc, struct smc_cdc_tx_pend *pend,
946 struct smc_wr_buf *wr_buf)
c6f02ebe
KG
947{
948 struct smc_connection *conn = &smc->conn;
949 union smc_host_cursor cons, fin;
950 int rc = 0;
951 int diff;
952
953 smc_curs_copy(&conn->tx_curs_sent, &conn->tx_curs_fin, conn);
954 smc_curs_copy(&fin, &conn->local_tx_ctrl_fin, conn);
955 /* set prod cursor to old state, enforce tx_rdma_writes() */
956 smc_curs_copy(&conn->local_tx_ctrl.prod, &fin, conn);
957 smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn);
958
959 if (smc_curs_comp(conn->peer_rmbe_size, &cons, &fin) < 0) {
960 /* cons cursor advanced more than fin, and prod was set
961 * fin above, so now prod is smaller than cons. Fix that.
962 */
963 diff = smc_curs_diff(conn->peer_rmbe_size, &fin, &cons);
964 smc_curs_add(conn->sndbuf_desc->len,
965 &conn->tx_curs_sent, diff);
966 smc_curs_add(conn->sndbuf_desc->len,
967 &conn->tx_curs_fin, diff);
968
969 smp_mb__before_atomic();
970 atomic_add(diff, &conn->sndbuf_space);
971 smp_mb__after_atomic();
972
973 smc_curs_add(conn->peer_rmbe_size,
974 &conn->local_tx_ctrl.prod, diff);
975 smc_curs_add(conn->peer_rmbe_size,
976 &conn->local_tx_ctrl_fin, diff);
977 }
978 /* recalculate, value is used by tx_rdma_writes() */
979 atomic_set(&smc->conn.peer_rmbe_space, smc_write_space(conn));
980
981 if (smc->sk.sk_state != SMC_INIT &&
982 smc->sk.sk_state != SMC_CLOSED) {
b8ded9de 983 rc = smcr_cdc_msg_send_validation(conn, pend, wr_buf);
c6f02ebe 984 if (!rc) {
22ef473d 985 queue_delayed_work(conn->lgr->tx_wq, &conn->tx_work, 0);
c6f02ebe
KG
986 smc->sk.sk_data_ready(&smc->sk);
987 }
b8ded9de
KG
988 } else {
989 smc_wr_tx_put_slot(conn->lnk,
990 (struct smc_wr_tx_pend_priv *)pend);
c6f02ebe
KG
991 }
992 return rc;
993}
994
64513d26
GG
995void smc_switch_link_and_count(struct smc_connection *conn,
996 struct smc_link *to_lnk)
07d51580
GG
997{
998 atomic_dec(&conn->lnk->conn_cnt);
999 conn->lnk = to_lnk;
1000 atomic_inc(&conn->lnk->conn_cnt);
1001}
1002
c6f02ebe
KG
1003struct smc_link *smc_switch_conns(struct smc_link_group *lgr,
1004 struct smc_link *from_lnk, bool is_dev_err)
1005{
1006 struct smc_link *to_lnk = NULL;
b8ded9de 1007 struct smc_cdc_tx_pend *pend;
c6f02ebe 1008 struct smc_connection *conn;
b8ded9de 1009 struct smc_wr_buf *wr_buf;
c6f02ebe
KG
1010 struct smc_sock *smc;
1011 struct rb_node *node;
1012 int i, rc = 0;
1013
1014 /* link is inactive, wake up tx waiters */
1015 smc_wr_wakeup_tx_wait(from_lnk);
1016
1017 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
741a49a4 1018 if (!smc_link_active(&lgr->lnk[i]) || i == from_lnk->link_idx)
c6f02ebe
KG
1019 continue;
1020 if (is_dev_err && from_lnk->smcibdev == lgr->lnk[i].smcibdev &&
1021 from_lnk->ibport == lgr->lnk[i].ibport) {
1022 continue;
1023 }
1024 to_lnk = &lgr->lnk[i];
1025 break;
1026 }
95f7f3e7 1027 if (!to_lnk || !smc_wr_tx_link_hold(to_lnk)) {
c6f02ebe
KG
1028 smc_lgr_terminate_sched(lgr);
1029 return NULL;
1030 }
1031again:
1032 read_lock_bh(&lgr->conns_lock);
1033 for (node = rb_first(&lgr->conns_all); node; node = rb_next(node)) {
1034 conn = rb_entry(node, struct smc_connection, alert_node);
1035 if (conn->lnk != from_lnk)
1036 continue;
1037 smc = container_of(conn, struct smc_sock, conn);
1038 /* conn->lnk not yet set in SMC_INIT state */
1039 if (smc->sk.sk_state == SMC_INIT)
1040 continue;
1041 if (smc->sk.sk_state == SMC_CLOSED ||
1042 smc->sk.sk_state == SMC_PEERCLOSEWAIT1 ||
1043 smc->sk.sk_state == SMC_PEERCLOSEWAIT2 ||
1044 smc->sk.sk_state == SMC_APPFINCLOSEWAIT ||
1045 smc->sk.sk_state == SMC_APPCLOSEWAIT1 ||
1046 smc->sk.sk_state == SMC_APPCLOSEWAIT2 ||
1047 smc->sk.sk_state == SMC_PEERFINCLOSEWAIT ||
1048 smc->sk.sk_state == SMC_PEERABORTWAIT ||
1049 smc->sk.sk_state == SMC_PROCESSABORT) {
1050 spin_lock_bh(&conn->send_lock);
07d51580 1051 smc_switch_link_and_count(conn, to_lnk);
c6f02ebe
KG
1052 spin_unlock_bh(&conn->send_lock);
1053 continue;
1054 }
1055 sock_hold(&smc->sk);
1056 read_unlock_bh(&lgr->conns_lock);
b8ded9de
KG
1057 /* pre-fetch buffer outside of send_lock, might sleep */
1058 rc = smc_cdc_get_free_slot(conn, to_lnk, &wr_buf, NULL, &pend);
95f7f3e7
KG
1059 if (rc)
1060 goto err_out;
c6f02ebe
KG
1061 /* avoid race with smcr_tx_sndbuf_nonempty() */
1062 spin_lock_bh(&conn->send_lock);
07d51580 1063 smc_switch_link_and_count(conn, to_lnk);
b8ded9de 1064 rc = smc_switch_cursor(smc, pend, wr_buf);
c6f02ebe
KG
1065 spin_unlock_bh(&conn->send_lock);
1066 sock_put(&smc->sk);
95f7f3e7
KG
1067 if (rc)
1068 goto err_out;
c6f02ebe
KG
1069 goto again;
1070 }
1071 read_unlock_bh(&lgr->conns_lock);
95f7f3e7 1072 smc_wr_tx_link_put(to_lnk);
c6f02ebe 1073 return to_lnk;
95f7f3e7
KG
1074
1075err_out:
1076 smcr_link_down_cond_sched(to_lnk);
1077 smc_wr_tx_link_put(to_lnk);
1078 return NULL;
c6f02ebe
KG
1079}
1080
b9247544 1081static void smcr_buf_unuse(struct smc_buf_desc *rmb_desc,
6d74c3a8 1082 struct smc_link_group *lgr)
b9247544 1083{
d5500667
KG
1084 int rc;
1085
b9247544
KG
1086 if (rmb_desc->is_conf_rkey && !list_empty(&lgr->list)) {
1087 /* unregister rmb with peer */
d5500667
KG
1088 rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY);
1089 if (!rc) {
1090 /* protect against smc_llc_cli_rkey_exchange() */
1091 mutex_lock(&lgr->llc_conf_mutex);
1092 smc_llc_do_delete_rkey(lgr, rmb_desc);
1093 rmb_desc->is_conf_rkey = false;
1094 mutex_unlock(&lgr->llc_conf_mutex);
1095 smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
1096 }
b9247544 1097 }
d5500667 1098
b9247544
KG
1099 if (rmb_desc->is_reg_err) {
1100 /* buf registration failed, reuse not possible */
faca5360 1101 mutex_lock(&lgr->rmbs_lock);
b9247544 1102 list_del(&rmb_desc->list);
faca5360 1103 mutex_unlock(&lgr->rmbs_lock);
b9247544
KG
1104
1105 smc_buf_free(lgr, true, rmb_desc);
1106 } else {
1107 rmb_desc->used = 0;
1c552696 1108 memset(rmb_desc->cpu_addr, 0, rmb_desc->len);
b9247544
KG
1109 }
1110}
1111
fb692ec4
KG
1112static void smc_buf_unuse(struct smc_connection *conn,
1113 struct smc_link_group *lgr)
cd6851f3 1114{
1c552696 1115 if (conn->sndbuf_desc) {
cd6851f3 1116 conn->sndbuf_desc->used = 0;
1c552696
TL
1117 memset(conn->sndbuf_desc->cpu_addr, 0, conn->sndbuf_desc->len);
1118 }
1119 if (conn->rmb_desc && lgr->is_smcd) {
b9247544 1120 conn->rmb_desc->used = 0;
1c552696
TL
1121 memset(conn->rmb_desc->cpu_addr, 0, conn->rmb_desc->len +
1122 sizeof(struct smcd_cdc_msg));
1123 } else if (conn->rmb_desc) {
6d74c3a8 1124 smcr_buf_unuse(conn->rmb_desc, lgr);
1c552696 1125 }
cd6851f3
UB
1126}
1127
0cfdd8f9
UB
1128/* remove a finished connection from its link group */
1129void smc_conn_free(struct smc_connection *conn)
1130{
fb692ec4
KG
1131 struct smc_link_group *lgr = conn->lgr;
1132
1133 if (!lgr)
0cfdd8f9 1134 return;
fb692ec4 1135 if (lgr->is_smcd) {
42bfba9e
UB
1136 if (!list_empty(&lgr->list))
1137 smc_ism_unset_conn(conn);
be244f28
HW
1138 tasklet_kill(&conn->rx_tsklet);
1139 } else {
349d4312 1140 smc_cdc_wait_pend_tx_wr(conn);
b286a065
KG
1141 if (current_work() != &conn->abort_work)
1142 cancel_work_sync(&conn->abort_work);
be244f28 1143 }
2a0674ff
UB
1144 if (!list_empty(&lgr->list)) {
1145 smc_lgr_unregister_conn(conn);
1146 smc_buf_unuse(conn, lgr); /* allow buffer reuse */
1147 }
fb692ec4
KG
1148
1149 if (!lgr->conns_num)
1150 smc_lgr_schedule_free_work(lgr);
0cfdd8f9
UB
1151}
1152
4a3641c1
KG
1153/* unregister a link from a buf_desc */
1154static void smcr_buf_unmap_link(struct smc_buf_desc *buf_desc, bool is_rmb,
1155 struct smc_link *lnk)
1156{
1157 if (is_rmb)
1158 buf_desc->is_reg_mr[lnk->link_idx] = false;
1159 if (!buf_desc->is_map_ib[lnk->link_idx])
1160 return;
1161 if (is_rmb) {
1162 if (buf_desc->mr_rx[lnk->link_idx]) {
1163 smc_ib_put_memory_region(
1164 buf_desc->mr_rx[lnk->link_idx]);
1165 buf_desc->mr_rx[lnk->link_idx] = NULL;
1166 }
1167 smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_FROM_DEVICE);
1168 } else {
1169 smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_TO_DEVICE);
1170 }
1171 sg_free_table(&buf_desc->sgt[lnk->link_idx]);
1172 buf_desc->is_map_ib[lnk->link_idx] = false;
1173}
1174
1175/* unmap all buffers of lgr for a deleted link */
1176static void smcr_buf_unmap_lgr(struct smc_link *lnk)
1177{
1178 struct smc_link_group *lgr = lnk->lgr;
1179 struct smc_buf_desc *buf_desc, *bf;
1180 int i;
1181
1182 for (i = 0; i < SMC_RMBE_SIZES; i++) {
1183 mutex_lock(&lgr->rmbs_lock);
1184 list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list)
1185 smcr_buf_unmap_link(buf_desc, true, lnk);
1186 mutex_unlock(&lgr->rmbs_lock);
1187 mutex_lock(&lgr->sndbufs_lock);
1188 list_for_each_entry_safe(buf_desc, bf, &lgr->sndbufs[i],
1189 list)
1190 smcr_buf_unmap_link(buf_desc, false, lnk);
1191 mutex_unlock(&lgr->sndbufs_lock);
1192 }
1193}
1194
1195static void smcr_rtoken_clear_link(struct smc_link *lnk)
1196{
1197 struct smc_link_group *lgr = lnk->lgr;
1198 int i;
1199
1200 for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
1201 lgr->rtokens[i][lnk->link_idx].rkey = 0;
1202 lgr->rtokens[i][lnk->link_idx].dma_addr = 0;
1203 }
1204}
1205
d5500667 1206/* must be called under lgr->llc_conf_mutex lock */
0a99be43 1207void smcr_link_clear(struct smc_link *lnk, bool log)
0cfdd8f9 1208{
d854fcbf
KG
1209 struct smc_ib_device *smcibdev;
1210
4a3641c1 1211 if (!lnk->lgr || lnk->state == SMC_LNK_UNUSED)
b9247544 1212 return;
0cfdd8f9 1213 lnk->peer_qpn = 0;
0a99be43 1214 smc_llc_link_clear(lnk, log);
4a3641c1
KG
1215 smcr_buf_unmap_lgr(lnk);
1216 smcr_rtoken_clear_link(lnk);
349d4312 1217 smc_ib_modify_qp_error(lnk);
f38ba179 1218 smc_wr_free_link(lnk);
bd4ad577
UB
1219 smc_ib_destroy_queue_pair(lnk);
1220 smc_ib_dealloc_protection_domain(lnk);
f38ba179 1221 smc_wr_free_link_mem(lnk);
ddc99286 1222 smc_ibdev_cnt_dec(lnk);
f3c1dedd 1223 put_device(&lnk->smcibdev->ibdev->dev);
d854fcbf
KG
1224 smcibdev = lnk->smcibdev;
1225 memset(lnk, 0, sizeof(struct smc_link));
1226 lnk->state = SMC_LNK_UNUSED;
1227 if (!atomic_dec_return(&smcibdev->lnk_cnt))
1228 wake_up(&smcibdev->lnks_deleted);
0cfdd8f9
UB
1229}
1230
c6ba7c9b
HW
1231static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb,
1232 struct smc_buf_desc *buf_desc)
cd6851f3 1233{
b9247544 1234 int i;
6511aad3 1235
4a3641c1
KG
1236 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++)
1237 smcr_buf_unmap_link(buf_desc, is_rmb, &lgr->lnk[i]);
387707fd 1238
2ef4f27a
SR
1239 if (buf_desc->pages)
1240 __free_pages(buf_desc->pages, buf_desc->order);
3e034725 1241 kfree(buf_desc);
cd6851f3
UB
1242}
1243
c6ba7c9b
HW
1244static void smcd_buf_free(struct smc_link_group *lgr, bool is_dmb,
1245 struct smc_buf_desc *buf_desc)
1246{
be244f28
HW
1247 if (is_dmb) {
1248 /* restore original buf len */
1249 buf_desc->len += sizeof(struct smcd_cdc_msg);
c6ba7c9b 1250 smc_ism_unregister_dmb(lgr->smcd, buf_desc);
be244f28 1251 } else {
c6ba7c9b 1252 kfree(buf_desc->cpu_addr);
be244f28 1253 }
c6ba7c9b
HW
1254 kfree(buf_desc);
1255}
1256
1257static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
1258 struct smc_buf_desc *buf_desc)
1259{
1260 if (lgr->is_smcd)
1261 smcd_buf_free(lgr, is_rmb, buf_desc);
1262 else
1263 smcr_buf_free(lgr, is_rmb, buf_desc);
1264}
1265
3e034725 1266static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb)
cd6851f3 1267{
3e034725
UB
1268 struct smc_buf_desc *buf_desc, *bf_desc;
1269 struct list_head *buf_list;
cd6851f3
UB
1270 int i;
1271
1272 for (i = 0; i < SMC_RMBE_SIZES; i++) {
3e034725
UB
1273 if (is_rmb)
1274 buf_list = &lgr->rmbs[i];
1275 else
1276 buf_list = &lgr->sndbufs[i];
1277 list_for_each_entry_safe(buf_desc, bf_desc, buf_list,
cd6851f3 1278 list) {
3e034725 1279 list_del(&buf_desc->list);
6511aad3 1280 smc_buf_free(lgr, is_rmb, buf_desc);
cd6851f3
UB
1281 }
1282 }
1283}
1284
3e034725
UB
1285static void smc_lgr_free_bufs(struct smc_link_group *lgr)
1286{
1287 /* free send buffers */
1288 __smc_lgr_free_bufs(lgr, false);
1289 /* free rmbs */
1290 __smc_lgr_free_bufs(lgr, true);
1291}
1292
0cfdd8f9 1293/* remove a link group */
3f3f0e36 1294static void smc_lgr_free(struct smc_link_group *lgr)
0cfdd8f9 1295{
b9247544
KG
1296 int i;
1297
a52bcc91
KG
1298 if (!lgr->is_smcd) {
1299 mutex_lock(&lgr->llc_conf_mutex);
1300 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1301 if (lgr->lnk[i].state != SMC_LNK_UNUSED)
0a99be43 1302 smcr_link_clear(&lgr->lnk[i], false);
a52bcc91
KG
1303 }
1304 mutex_unlock(&lgr->llc_conf_mutex);
1305 smc_llc_lgr_clear(lgr);
1306 }
1307
3e034725 1308 smc_lgr_free_bufs(lgr);
22ef473d 1309 destroy_workqueue(lgr->tx_wq);
b3cb53c0 1310 if (lgr->is_smcd) {
f9aab6f2
UB
1311 smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
1312 put_device(&lgr->smcd->dev);
5edd6b9c
UB
1313 if (!atomic_dec_return(&lgr->smcd->lgr_cnt))
1314 wake_up(&lgr->smcd->lgrs_deleted);
b3cb53c0 1315 } else {
8799e310 1316 smc_wr_free_lgr_mem(lgr);
6dabd405
UB
1317 if (!atomic_dec_return(&lgr_cnt))
1318 wake_up(&lgrs_deleted);
b3cb53c0 1319 }
0cfdd8f9
UB
1320 kfree(lgr);
1321}
1322
2a0674ff
UB
1323static void smc_sk_wake_ups(struct smc_sock *smc)
1324{
1325 smc->sk.sk_write_space(&smc->sk);
1326 smc->sk.sk_data_ready(&smc->sk);
1327 smc->sk.sk_state_change(&smc->sk);
1328}
1329
1330/* kill a connection */
5421ec28 1331static void smc_conn_kill(struct smc_connection *conn, bool soft)
2a0674ff
UB
1332{
1333 struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
1334
50c6b20e
UB
1335 if (conn->lgr->is_smcd && conn->lgr->peer_shutdown)
1336 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
1337 else
1338 smc_close_abort(conn);
2a0674ff 1339 conn->killed = 1;
50c6b20e 1340 smc->sk.sk_err = ECONNABORTED;
2a0674ff 1341 smc_sk_wake_ups(smc);
42bfba9e
UB
1342 if (conn->lgr->is_smcd) {
1343 smc_ism_unset_conn(conn);
5421ec28
UB
1344 if (soft)
1345 tasklet_kill(&conn->rx_tsklet);
1346 else
1347 tasklet_unlock_wait(&conn->rx_tsklet);
6a37ad3d 1348 } else {
349d4312 1349 smc_cdc_wait_pend_tx_wr(conn);
42bfba9e 1350 }
2a0674ff 1351 smc_lgr_unregister_conn(conn);
81cf4f47 1352 smc_close_active_abort(smc);
2a0674ff
UB
1353}
1354
42bfba9e
UB
1355static void smc_lgr_cleanup(struct smc_link_group *lgr)
1356{
1357 if (lgr->is_smcd) {
1358 smc_ism_signal_shutdown(lgr);
42bfba9e 1359 } else {
3e0c40af
KG
1360 u32 rsn = lgr->llc_termination_rsn;
1361
1362 if (!rsn)
1363 rsn = SMC_LLC_DEL_PROG_INIT_TERM;
1364 smc_llc_send_link_delete_all(lgr, false, rsn);
a52bcc91 1365 smcr_lgr_link_deactivate_all(lgr);
42bfba9e
UB
1366 }
1367}
1368
ba952060
KG
1369/* terminate link group
1370 * @soft: true if link group shutdown can take its time
1371 * false if immediate link group shutdown is required
1372 */
5421ec28 1373static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft)
0cfdd8f9
UB
1374{
1375 struct smc_connection *conn;
b38d7324 1376 struct smc_sock *smc;
0cfdd8f9
UB
1377 struct rb_node *node;
1378
517c300e
KG
1379 if (lgr->terminating)
1380 return; /* lgr already terminating */
a52bcc91
KG
1381 /* cancel free_work sync, will terminate when lgr->freeing is set */
1382 cancel_delayed_work_sync(&lgr->free_work);
517c300e 1383 lgr->terminating = 1;
0cfdd8f9 1384
69318b52
UB
1385 /* kill remaining link group connections */
1386 read_lock_bh(&lgr->conns_lock);
0cfdd8f9
UB
1387 node = rb_first(&lgr->conns_all);
1388 while (node) {
69318b52 1389 read_unlock_bh(&lgr->conns_lock);
0cfdd8f9 1390 conn = rb_entry(node, struct smc_connection, alert_node);
b38d7324 1391 smc = container_of(conn, struct smc_sock, conn);
81cf4f47 1392 sock_hold(&smc->sk); /* sock_put below */
69318b52 1393 lock_sock(&smc->sk);
5421ec28 1394 smc_conn_kill(conn, soft);
69318b52 1395 release_sock(&smc->sk);
81cf4f47 1396 sock_put(&smc->sk); /* sock_hold above */
69318b52 1397 read_lock_bh(&lgr->conns_lock);
0cfdd8f9
UB
1398 node = rb_first(&lgr->conns_all);
1399 }
69318b52 1400 read_unlock_bh(&lgr->conns_lock);
42bfba9e 1401 smc_lgr_cleanup(lgr);
a52bcc91 1402 smc_lgr_free(lgr);
0cfdd8f9
UB
1403}
1404
5f78fe96
KG
1405/* unlink link group and schedule termination */
1406void smc_lgr_terminate_sched(struct smc_link_group *lgr)
b9f227c3 1407{
a0a62ee1
UB
1408 spinlock_t *lgr_lock;
1409
1410 smc_lgr_list_head(lgr, &lgr_lock);
1411 spin_lock_bh(lgr_lock);
3739707c 1412 if (list_empty(&lgr->list) || lgr->terminating || lgr->freeing) {
8caa6544
UB
1413 spin_unlock_bh(lgr_lock);
1414 return; /* lgr already terminating */
1415 }
1416 list_del_init(&lgr->list);
a52bcc91 1417 lgr->freeing = 1;
a0a62ee1 1418 spin_unlock_bh(lgr_lock);
5f78fe96 1419 schedule_work(&lgr->terminate_work);
b9f227c3
HW
1420}
1421
5421ec28 1422/* Called when peer lgr shutdown (regularly or abnormally) is received */
0512f69e 1423void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan)
c6ba7c9b
HW
1424{
1425 struct smc_link_group *lgr, *l;
1426 LIST_HEAD(lgr_free_list);
1427
1428 /* run common cleanup function and build free list */
a0a62ee1 1429 spin_lock_bh(&dev->lgr_lock);
a2351c5d
UB
1430 list_for_each_entry_safe(lgr, l, &dev->lgr_list, list) {
1431 if ((!peer_gid || lgr->peer_gid == peer_gid) &&
0512f69e 1432 (vlan == VLAN_VID_MASK || lgr->vlan_id == vlan)) {
50c6b20e
UB
1433 if (peer_gid) /* peer triggered termination */
1434 lgr->peer_shutdown = 1;
c6ba7c9b 1435 list_move(&lgr->list, &lgr_free_list);
a52bcc91 1436 lgr->freeing = 1;
c6ba7c9b
HW
1437 }
1438 }
a0a62ee1 1439 spin_unlock_bh(&dev->lgr_lock);
c6ba7c9b
HW
1440
1441 /* cancel the regular free workers and actually free lgrs */
1442 list_for_each_entry_safe(lgr, l, &lgr_free_list, list) {
1443 list_del_init(&lgr->list);
50c6b20e 1444 schedule_work(&lgr->terminate_work);
c6ba7c9b
HW
1445 }
1446}
1447
5421ec28
UB
1448/* Called when an SMCD device is removed or the smc module is unloaded */
1449void smc_smcd_terminate_all(struct smcd_dev *smcd)
1450{
1451 struct smc_link_group *lgr, *lg;
1452 LIST_HEAD(lgr_free_list);
1453
1454 spin_lock_bh(&smcd->lgr_lock);
1455 list_splice_init(&smcd->lgr_list, &lgr_free_list);
1456 list_for_each_entry(lgr, &lgr_free_list, list)
1457 lgr->freeing = 1;
1458 spin_unlock_bh(&smcd->lgr_lock);
1459
1460 list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) {
1461 list_del_init(&lgr->list);
1462 __smc_lgr_terminate(lgr, false);
1463 }
5edd6b9c
UB
1464
1465 if (atomic_read(&smcd->lgr_cnt))
1466 wait_event(smcd->lgrs_deleted, !atomic_read(&smcd->lgr_cnt));
5421ec28
UB
1467}
1468
0b29ec64
UB
1469/* Called when an SMCR device is removed or the smc module is unloaded.
1470 * If smcibdev is given, all SMCR link groups using this device are terminated.
1471 * If smcibdev is NULL, all SMCR link groups are terminated.
349d4312
DL
1472 *
1473 * We must wait here for QPs been destroyed before we destroy the CQs,
1474 * or we won't received any CQEs and cdc_pend_tx_wr cannot reach 0 thus
1475 * smc_sock cannot be released.
0b29ec64
UB
1476 */
1477void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
1478{
1479 struct smc_link_group *lgr, *lg;
1480 LIST_HEAD(lgr_free_list);
349d4312 1481 LIST_HEAD(lgr_linkdown_list);
b9247544 1482 int i;
0b29ec64
UB
1483
1484 spin_lock_bh(&smc_lgr_list.lock);
1485 if (!smcibdev) {
1486 list_splice_init(&smc_lgr_list.list, &lgr_free_list);
1487 list_for_each_entry(lgr, &lgr_free_list, list)
1488 lgr->freeing = 1;
1489 } else {
1490 list_for_each_entry_safe(lgr, lg, &smc_lgr_list.list, list) {
b9247544 1491 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
87523930 1492 if (lgr->lnk[i].smcibdev == smcibdev)
349d4312 1493 list_move_tail(&lgr->list, &lgr_linkdown_list);
0b29ec64
UB
1494 }
1495 }
1496 }
1497 spin_unlock_bh(&smc_lgr_list.lock);
1498
1499 list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) {
1500 list_del_init(&lgr->list);
3e0c40af 1501 smc_llc_set_termination_rsn(lgr, SMC_LLC_DEL_OP_INIT_TERM);
0b29ec64
UB
1502 __smc_lgr_terminate(lgr, false);
1503 }
6dabd405 1504
349d4312
DL
1505 list_for_each_entry_safe(lgr, lg, &lgr_linkdown_list, list) {
1506 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1507 if (lgr->lnk[i].smcibdev == smcibdev) {
1508 mutex_lock(&lgr->llc_conf_mutex);
1509 smcr_link_down_cond(&lgr->lnk[i]);
1510 mutex_unlock(&lgr->llc_conf_mutex);
1511 }
1512 }
1513 }
1514
6dabd405
UB
1515 if (smcibdev) {
1516 if (atomic_read(&smcibdev->lnk_cnt))
1517 wait_event(smcibdev->lnks_deleted,
1518 !atomic_read(&smcibdev->lnk_cnt));
1519 } else {
1520 if (atomic_read(&lgr_cnt))
1521 wait_event(lgrs_deleted, !atomic_read(&lgr_cnt));
1522 }
0b29ec64
UB
1523}
1524
ad6c111b
KG
1525/* set new lgr type and clear all asymmetric link tagging */
1526void smcr_lgr_set_type(struct smc_link_group *lgr, enum smc_lgr_type new_type)
1527{
0a99be43 1528 char *lgr_type = "";
ad6c111b
KG
1529 int i;
1530
1531 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++)
1532 if (smc_link_usable(&lgr->lnk[i]))
1533 lgr->lnk[i].link_is_asym = false;
0a99be43
KG
1534 if (lgr->type == new_type)
1535 return;
ad6c111b 1536 lgr->type = new_type;
0a99be43
KG
1537
1538 switch (lgr->type) {
1539 case SMC_LGR_NONE:
1540 lgr_type = "NONE";
1541 break;
1542 case SMC_LGR_SINGLE:
1543 lgr_type = "SINGLE";
1544 break;
1545 case SMC_LGR_SYMMETRIC:
1546 lgr_type = "SYMMETRIC";
1547 break;
1548 case SMC_LGR_ASYMMETRIC_PEER:
1549 lgr_type = "ASYMMETRIC_PEER";
1550 break;
1551 case SMC_LGR_ASYMMETRIC_LOCAL:
1552 lgr_type = "ASYMMETRIC_LOCAL";
1553 break;
1554 }
1555 pr_warn_ratelimited("smc: SMC-R lg %*phN state changed: "
1556 "%s, pnetid %.16s\n", SMC_LGR_ID_SIZE, &lgr->id,
1557 lgr_type, lgr->pnet_id);
ad6c111b
KG
1558}
1559
1560/* set new lgr type and tag a link as asymmetric */
1561void smcr_lgr_set_type_asym(struct smc_link_group *lgr,
1562 enum smc_lgr_type new_type, int asym_lnk_idx)
1563{
1564 smcr_lgr_set_type(lgr, new_type);
1565 lgr->lnk[asym_lnk_idx].link_is_asym = true;
1566}
1567
b286a065
KG
1568/* abort connection, abort_work scheduled from tasklet context */
1569static void smc_conn_abort_work(struct work_struct *work)
1570{
1571 struct smc_connection *conn = container_of(work,
1572 struct smc_connection,
1573 abort_work);
1574 struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
1575
a18cee47 1576 lock_sock(&smc->sk);
b286a065 1577 smc_conn_kill(conn, true);
a18cee47 1578 release_sock(&smc->sk);
b286a065
KG
1579 sock_put(&smc->sk); /* sock_hold done by schedulers of abort_work */
1580}
1581
1f90a05d
KG
1582void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport)
1583{
1f90a05d
KG
1584 struct smc_link_group *lgr, *n;
1585
1586 list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) {
c48254fa
KG
1587 struct smc_link *link;
1588
1f90a05d
KG
1589 if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id,
1590 SMC_MAX_PNETID_LEN) ||
1591 lgr->type == SMC_LGR_SYMMETRIC ||
0237a3a6
TL
1592 lgr->type == SMC_LGR_ASYMMETRIC_PEER ||
1593 !rdma_dev_access_netns(smcibdev->ibdev, lgr->net))
1f90a05d 1594 continue;
c48254fa
KG
1595
1596 /* trigger local add link processing */
1597 link = smc_llc_usable_link(lgr);
1598 if (link)
1599 smc_llc_add_link_local(link);
1f90a05d
KG
1600 }
1601}
1602
541afa10
KG
1603/* link is down - switch connections to alternate link,
1604 * must be called under lgr->llc_conf_mutex lock
1605 */
1606static void smcr_link_down(struct smc_link *lnk)
1607{
1608 struct smc_link_group *lgr = lnk->lgr;
1609 struct smc_link *to_lnk;
1610 int del_link_id;
1611
1612 if (!lgr || lnk->state == SMC_LNK_UNUSED || list_empty(&lgr->list))
1613 return;
1614
c6f02ebe 1615 to_lnk = smc_switch_conns(lgr, lnk, true);
541afa10 1616 if (!to_lnk) { /* no backup link available */
0a99be43 1617 smcr_link_clear(lnk, true);
541afa10
KG
1618 return;
1619 }
ad6c111b 1620 smcr_lgr_set_type(lgr, SMC_LGR_SINGLE);
541afa10
KG
1621 del_link_id = lnk->link_id;
1622
1623 if (lgr->role == SMC_SERV) {
1624 /* trigger local delete link processing */
4dadd151 1625 smc_llc_srv_delete_link_local(to_lnk, del_link_id);
541afa10
KG
1626 } else {
1627 if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) {
1628 /* another llc task is ongoing */
1629 mutex_unlock(&lgr->llc_conf_mutex);
6778a6be
KG
1630 wait_event_timeout(lgr->llc_flow_waiter,
1631 (list_empty(&lgr->list) ||
1632 lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE),
541afa10
KG
1633 SMC_LLC_WAIT_TIME);
1634 mutex_lock(&lgr->llc_conf_mutex);
1635 }
68fd8942 1636 if (!list_empty(&lgr->list)) {
6778a6be
KG
1637 smc_llc_send_delete_link(to_lnk, del_link_id,
1638 SMC_LLC_REQ, true,
1639 SMC_LLC_DEL_LOST_PATH);
68fd8942
KG
1640 smcr_link_clear(lnk, true);
1641 }
6778a6be 1642 wake_up(&lgr->llc_flow_waiter); /* wake up next waiter */
541afa10
KG
1643 }
1644}
1645
1646/* must be called under lgr->llc_conf_mutex lock */
1647void smcr_link_down_cond(struct smc_link *lnk)
1648{
a3a0e81b
TL
1649 if (smc_link_downing(&lnk->state)) {
1650 trace_smcr_link_down(lnk, __builtin_return_address(0));
541afa10 1651 smcr_link_down(lnk);
a3a0e81b 1652 }
541afa10
KG
1653}
1654
1655/* will get the lgr->llc_conf_mutex lock */
1656void smcr_link_down_cond_sched(struct smc_link *lnk)
1657{
a3a0e81b
TL
1658 if (smc_link_downing(&lnk->state)) {
1659 trace_smcr_link_down(lnk, __builtin_return_address(0));
541afa10 1660 schedule_work(&lnk->link_down_wrk);
a3a0e81b 1661 }
541afa10
KG
1662}
1663
1664void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport)
1665{
1666 struct smc_link_group *lgr, *n;
1667 int i;
1668
1669 list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) {
1670 if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id,
1671 SMC_MAX_PNETID_LEN))
1672 continue; /* lgr is not affected */
1673 if (list_empty(&lgr->list))
1674 continue;
1675 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1676 struct smc_link *lnk = &lgr->lnk[i];
1677
1678 if (smc_link_usable(lnk) &&
1679 lnk->smcibdev == smcibdev && lnk->ibport == ibport)
1680 smcr_link_down_cond_sched(lnk);
1681 }
1682 }
1683}
1684
541afa10
KG
1685static void smc_link_down_work(struct work_struct *work)
1686{
1687 struct smc_link *link = container_of(work, struct smc_link,
1688 link_down_wrk);
1689 struct smc_link_group *lgr = link->lgr;
1690
1691 if (list_empty(&lgr->list))
1692 return;
6778a6be 1693 wake_up_all(&lgr->llc_msg_waiter);
541afa10
KG
1694 mutex_lock(&lgr->llc_conf_mutex);
1695 smcr_link_down(link);
1696 mutex_unlock(&lgr->llc_conf_mutex);
1697}
1698
587acad4
KG
1699static int smc_vlan_by_tcpsk_walk(struct net_device *lower_dev,
1700 struct netdev_nested_priv *priv)
1701{
1702 unsigned short *vlan_id = (unsigned short *)priv->data;
1703
1704 if (is_vlan_dev(lower_dev)) {
1705 *vlan_id = vlan_dev_vlan_id(lower_dev);
1706 return 1;
1707 }
1708
1709 return 0;
1710}
1711
1712/* Determine vlan of internal TCP socket. */
bc36d2fc 1713int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini)
0cfdd8f9
UB
1714{
1715 struct dst_entry *dst = sk_dst_get(clcsock->sk);
587acad4 1716 struct netdev_nested_priv priv;
cb9d43f6 1717 struct net_device *ndev;
587acad4 1718 int rc = 0;
0cfdd8f9 1719
bc36d2fc 1720 ini->vlan_id = 0;
0cfdd8f9
UB
1721 if (!dst) {
1722 rc = -ENOTCONN;
1723 goto out;
1724 }
1725 if (!dst->dev) {
1726 rc = -ENODEV;
1727 goto out_rel;
1728 }
1729
cb9d43f6
UB
1730 ndev = dst->dev;
1731 if (is_vlan_dev(ndev)) {
bc36d2fc 1732 ini->vlan_id = vlan_dev_vlan_id(ndev);
cb9d43f6
UB
1733 goto out_rel;
1734 }
1735
587acad4 1736 priv.data = (void *)&ini->vlan_id;
cb9d43f6 1737 rtnl_lock();
587acad4 1738 netdev_walk_all_lower_dev(ndev, smc_vlan_by_tcpsk_walk, &priv);
cb9d43f6 1739 rtnl_unlock();
0cfdd8f9
UB
1740
1741out_rel:
1742 dst_release(dst);
1743out:
1744 return rc;
1745}
1746
e49300a6
KG
1747static bool smcr_lgr_match(struct smc_link_group *lgr, u8 smcr_version,
1748 u8 peer_systemid[],
1749 u8 peer_gid[],
1750 u8 peer_mac_v1[],
0237a3a6
TL
1751 enum smc_lgr_role role, u32 clcqpn,
1752 struct net *net)
0cfdd8f9 1753{
0237a3a6 1754 struct smc_link *lnk;
b9247544
KG
1755 int i;
1756
e49300a6 1757 if (memcmp(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN) ||
b9247544
KG
1758 lgr->role != role)
1759 return false;
1760
1761 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
0237a3a6
TL
1762 lnk = &lgr->lnk[i];
1763
1764 if (!smc_link_active(lnk))
b9247544 1765 continue;
0237a3a6
TL
1766 /* use verbs API to check netns, instead of lgr->net */
1767 if (!rdma_dev_access_netns(lnk->smcibdev->ibdev, net))
1768 return false;
1769 if ((lgr->role == SMC_SERV || lnk->peer_qpn == clcqpn) &&
1770 !memcmp(lnk->peer_gid, peer_gid, SMC_GID_SIZE) &&
e49300a6 1771 (smcr_version == SMC_V2 ||
0237a3a6 1772 !memcmp(lnk->peer_mac, peer_mac_v1, ETH_ALEN)))
b9247544
KG
1773 return true;
1774 }
1775 return false;
c6ba7c9b 1776}
0cfdd8f9 1777
c6ba7c9b
HW
1778static bool smcd_lgr_match(struct smc_link_group *lgr,
1779 struct smcd_dev *smcismdev, u64 peer_gid)
1780{
1781 return lgr->peer_gid == peer_gid && lgr->smcd == smcismdev;
0cfdd8f9
UB
1782}
1783
1784/* create a new SMC connection (and a new link group if necessary) */
bc36d2fc 1785int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
0cfdd8f9
UB
1786{
1787 struct smc_connection *conn = &smc->conn;
0237a3a6 1788 struct net *net = sock_net(&smc->sk);
a2351c5d 1789 struct list_head *lgr_list;
0cfdd8f9 1790 struct smc_link_group *lgr;
0cfdd8f9 1791 enum smc_lgr_role role;
a0a62ee1 1792 spinlock_t *lgr_lock;
0cfdd8f9
UB
1793 int rc = 0;
1794
5c21c4cc 1795 lgr_list = ini->is_smcd ? &ini->ism_dev[ini->ism_selected]->lgr_list :
3fc64937 1796 &smc_lgr_list.list;
5c21c4cc 1797 lgr_lock = ini->is_smcd ? &ini->ism_dev[ini->ism_selected]->lgr_lock :
3fc64937 1798 &smc_lgr_list.lock;
5ac54d87 1799 ini->first_contact_local = 1;
0cfdd8f9 1800 role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
5ac54d87 1801 if (role == SMC_CLNT && ini->first_contact_peer)
0cfdd8f9
UB
1802 /* create new link group as well */
1803 goto create;
1804
1805 /* determine if an existing link group can be reused */
a0a62ee1 1806 spin_lock_bh(lgr_lock);
a2351c5d 1807 list_for_each_entry(lgr, lgr_list, list) {
0cfdd8f9 1808 write_lock_bh(&lgr->conns_lock);
bc36d2fc 1809 if ((ini->is_smcd ?
5c21c4cc
UB
1810 smcd_lgr_match(lgr, ini->ism_dev[ini->ism_selected],
1811 ini->ism_peer_gid[ini->ism_selected]) :
e49300a6
KG
1812 smcr_lgr_match(lgr, ini->smcr_version,
1813 ini->peer_systemid,
1814 ini->peer_gid, ini->peer_mac, role,
0237a3a6 1815 ini->ib_clcqpn, net)) &&
0cfdd8f9 1816 !lgr->sync_err &&
0530bd6e
KG
1817 (ini->smcd_version == SMC_V2 ||
1818 lgr->vlan_id == ini->vlan_id) &&
a9e44502 1819 (role == SMC_CLNT || ini->is_smcd ||
c6ba7c9b 1820 lgr->conns_num < SMC_RMBS_PER_LGR_MAX)) {
0cfdd8f9 1821 /* link group found */
5ac54d87 1822 ini->first_contact_local = 0;
0cfdd8f9 1823 conn->lgr = lgr;
56bc3b20 1824 rc = smc_lgr_register_conn(conn, false);
0cfdd8f9 1825 write_unlock_bh(&lgr->conns_lock);
b9247544
KG
1826 if (!rc && delayed_work_pending(&lgr->free_work))
1827 cancel_delayed_work(&lgr->free_work);
0cfdd8f9
UB
1828 break;
1829 }
1830 write_unlock_bh(&lgr->conns_lock);
1831 }
a0a62ee1 1832 spin_unlock_bh(lgr_lock);
b9247544
KG
1833 if (rc)
1834 return rc;
0cfdd8f9 1835
5ac54d87
UB
1836 if (role == SMC_CLNT && !ini->first_contact_peer &&
1837 ini->first_contact_local) {
0cfdd8f9
UB
1838 /* Server reuses a link group, but Client wants to start
1839 * a new one
1840 * send out_of_sync decline, reason synchr. error
1841 */
7a62725a 1842 return SMC_CLC_DECL_SYNCERR;
0cfdd8f9
UB
1843 }
1844
1845create:
5ac54d87 1846 if (ini->first_contact_local) {
bc36d2fc 1847 rc = smc_lgr_create(smc, ini);
0cfdd8f9
UB
1848 if (rc)
1849 goto out;
44808792
HZ
1850 lgr = conn->lgr;
1851 write_lock_bh(&lgr->conns_lock);
56bc3b20 1852 rc = smc_lgr_register_conn(conn, true);
44808792 1853 write_unlock_bh(&lgr->conns_lock);
b9247544
KG
1854 if (rc)
1855 goto out;
0cfdd8f9 1856 }
5f08318f 1857 conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;
cbba07a7 1858 conn->local_tx_ctrl.len = SMC_WR_TX_SIZE;
de8474eb 1859 conn->urg_state = SMC_URG_READ;
349d4312 1860 init_waitqueue_head(&conn->cdc_pend_tx_wq);
b286a065 1861 INIT_WORK(&smc->conn.abort_work, smc_conn_abort_work);
bc36d2fc 1862 if (ini->is_smcd) {
be244f28
HW
1863 conn->rx_off = sizeof(struct smcd_cdc_msg);
1864 smcd_cdc_rx_init(conn); /* init tasklet for this conn */
2d2bfeb8
UB
1865 } else {
1866 conn->rx_off = 0;
be244f28 1867 }
5f08318f
UB
1868#ifndef KERNEL_HAS_ATOMIC64
1869 spin_lock_init(&conn->acurs_lock);
1870#endif
0cfdd8f9
UB
1871
1872out:
7a62725a 1873 return rc;
0cfdd8f9 1874}
cd6851f3 1875
67161779
SR
1876#define SMCD_DMBE_SIZES 6 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */
1877#define SMCR_RMBE_SIZES 5 /* 0 -> 16KB, 1 -> 32KB, .. 5 -> 512KB */
1878
1879/* convert the RMB size into the compressed notation (minimum 16K, see
1880 * SMCD/R_DMBE_SIZES.
2f6becaf
HW
1881 * In contrast to plain ilog2, this rounds towards the next power of 2,
1882 * so the socket application gets at least its desired sndbuf / rcvbuf size.
1883 */
67161779 1884static u8 smc_compress_bufsize(int size, bool is_smcd, bool is_rmb)
2f6becaf 1885{
67161779 1886 const unsigned int max_scat = SG_MAX_SINGLE_ALLOC * PAGE_SIZE;
2f6becaf
HW
1887 u8 compressed;
1888
1889 if (size <= SMC_BUF_MIN_SIZE)
1890 return 0;
1891
67161779
SR
1892 size = (size - 1) >> 14; /* convert to 16K multiple */
1893 compressed = min_t(u8, ilog2(size) + 1,
1894 is_smcd ? SMCD_DMBE_SIZES : SMCR_RMBE_SIZES);
1895
1896 if (!is_smcd && is_rmb)
1897 /* RMBs are backed by & limited to max size of scatterlists */
1898 compressed = min_t(u8, compressed, ilog2(max_scat >> 14));
1899
2f6becaf
HW
1900 return compressed;
1901}
1902
1903/* convert the RMB size from compressed notation into integer */
1904int smc_uncompress_bufsize(u8 compressed)
1905{
1906 u32 size;
1907
1908 size = 0x00000001 << (((int)compressed) + 14);
1909 return (int)size;
1910}
1911
3e034725
UB
1912/* try to reuse a sndbuf or rmb description slot for a certain
1913 * buffer size; if not available, return NULL
cd6851f3 1914 */
8437bda0 1915static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize,
faca5360 1916 struct mutex *lock,
8437bda0 1917 struct list_head *buf_list)
cd6851f3 1918{
3e034725 1919 struct smc_buf_desc *buf_slot;
cd6851f3 1920
faca5360 1921 mutex_lock(lock);
3e034725
UB
1922 list_for_each_entry(buf_slot, buf_list, list) {
1923 if (cmpxchg(&buf_slot->used, 0, 1) == 0) {
faca5360 1924 mutex_unlock(lock);
3e034725 1925 return buf_slot;
cd6851f3
UB
1926 }
1927 }
faca5360 1928 mutex_unlock(lock);
cd6851f3
UB
1929 return NULL;
1930}
1931
952310cc
UB
1932/* one of the conditions for announcing a receiver's current window size is
1933 * that it "results in a minimum increase in the window size of 10% of the
1934 * receive buffer space" [RFC7609]
1935 */
1936static inline int smc_rmb_wnd_update_limit(int rmbe_size)
1937{
1938 return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2);
1939}
1940
b9247544
KG
1941/* map an rmb buf to a link */
1942static int smcr_buf_map_link(struct smc_buf_desc *buf_desc, bool is_rmb,
1943 struct smc_link *lnk)
1944{
1945 int rc;
1946
1947 if (buf_desc->is_map_ib[lnk->link_idx])
1948 return 0;
1949
1950 rc = sg_alloc_table(&buf_desc->sgt[lnk->link_idx], 1, GFP_KERNEL);
1951 if (rc)
1952 return rc;
1953 sg_set_buf(buf_desc->sgt[lnk->link_idx].sgl,
1954 buf_desc->cpu_addr, buf_desc->len);
1955
1956 /* map sg table to DMA address */
1957 rc = smc_ib_buf_map_sg(lnk, buf_desc,
1958 is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
1959 /* SMC protocol depends on mapping to one DMA address only */
1960 if (rc != 1) {
1961 rc = -EAGAIN;
1962 goto free_table;
1963 }
1964
1965 /* create a new memory region for the RMB */
1966 if (is_rmb) {
1967 rc = smc_ib_get_memory_region(lnk->roce_pd,
1968 IB_ACCESS_REMOTE_WRITE |
1969 IB_ACCESS_LOCAL_WRITE,
1970 buf_desc, lnk->link_idx);
1971 if (rc)
1972 goto buf_unmap;
1973 smc_ib_sync_sg_for_device(lnk, buf_desc, DMA_FROM_DEVICE);
1974 }
1975 buf_desc->is_map_ib[lnk->link_idx] = true;
1976 return 0;
1977
1978buf_unmap:
1979 smc_ib_buf_unmap_sg(lnk, buf_desc,
1980 is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
1981free_table:
1982 sg_free_table(&buf_desc->sgt[lnk->link_idx]);
1983 return rc;
1984}
1985
d5500667
KG
1986/* register a new rmb on IB device,
1987 * must be called under lgr->llc_conf_mutex lock
1988 */
7562a13d
KG
1989int smcr_link_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc)
1990{
1991 if (list_empty(&link->lgr->list))
1992 return -ENOLINK;
1993 if (!rmb_desc->is_reg_mr[link->link_idx]) {
1994 /* register memory region for new rmb */
1995 if (smc_wr_reg_send(link, rmb_desc->mr_rx[link->link_idx])) {
1996 rmb_desc->is_reg_err = true;
1997 return -EFAULT;
1998 }
1999 rmb_desc->is_reg_mr[link->link_idx] = true;
2000 }
2001 return 0;
2002}
2003
fb33d277
KG
2004static int _smcr_buf_map_lgr(struct smc_link *lnk, struct mutex *lock,
2005 struct list_head *lst, bool is_rmb)
2006{
2007 struct smc_buf_desc *buf_desc, *bf;
2008 int rc = 0;
2009
2010 mutex_lock(lock);
2011 list_for_each_entry_safe(buf_desc, bf, lst, list) {
2012 if (!buf_desc->used)
2013 continue;
2014 rc = smcr_buf_map_link(buf_desc, is_rmb, lnk);
2015 if (rc)
2016 goto out;
2017 }
2018out:
2019 mutex_unlock(lock);
2020 return rc;
2021}
2022
2023/* map all used buffers of lgr for a new link */
2024int smcr_buf_map_lgr(struct smc_link *lnk)
2025{
2026 struct smc_link_group *lgr = lnk->lgr;
2027 int i, rc = 0;
2028
2029 for (i = 0; i < SMC_RMBE_SIZES; i++) {
2030 rc = _smcr_buf_map_lgr(lnk, &lgr->rmbs_lock,
2031 &lgr->rmbs[i], true);
2032 if (rc)
2033 return rc;
2034 rc = _smcr_buf_map_lgr(lnk, &lgr->sndbufs_lock,
2035 &lgr->sndbufs[i], false);
2036 if (rc)
2037 return rc;
2038 }
2039 return 0;
2040}
2041
d5500667
KG
2042/* register all used buffers of lgr for a new link,
2043 * must be called under lgr->llc_conf_mutex lock
2044 */
fb33d277
KG
2045int smcr_buf_reg_lgr(struct smc_link *lnk)
2046{
2047 struct smc_link_group *lgr = lnk->lgr;
2048 struct smc_buf_desc *buf_desc, *bf;
2049 int i, rc = 0;
2050
2051 mutex_lock(&lgr->rmbs_lock);
2052 for (i = 0; i < SMC_RMBE_SIZES; i++) {
2053 list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list) {
2054 if (!buf_desc->used)
2055 continue;
2056 rc = smcr_link_reg_rmb(lnk, buf_desc);
2057 if (rc)
2058 goto out;
2059 }
2060 }
2061out:
2062 mutex_unlock(&lgr->rmbs_lock);
2063 return rc;
2064}
2065
c6ba7c9b
HW
2066static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr,
2067 bool is_rmb, int bufsize)
b33982c3
UB
2068{
2069 struct smc_buf_desc *buf_desc;
b33982c3
UB
2070
2071 /* try to alloc a new buffer */
2072 buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
2073 if (!buf_desc)
2074 return ERR_PTR(-ENOMEM);
2075
2ef4f27a
SR
2076 buf_desc->order = get_order(bufsize);
2077 buf_desc->pages = alloc_pages(GFP_KERNEL | __GFP_NOWARN |
2078 __GFP_NOMEMALLOC | __GFP_COMP |
2079 __GFP_NORETRY | __GFP_ZERO,
2080 buf_desc->order);
2081 if (!buf_desc->pages) {
b33982c3
UB
2082 kfree(buf_desc);
2083 return ERR_PTR(-EAGAIN);
2084 }
2ef4f27a 2085 buf_desc->cpu_addr = (void *)page_address(buf_desc->pages);
b9247544
KG
2086 buf_desc->len = bufsize;
2087 return buf_desc;
2088}
b33982c3 2089
b9247544
KG
2090/* map buf_desc on all usable links,
2091 * unused buffers stay mapped as long as the link is up
2092 */
2093static int smcr_buf_map_usable_links(struct smc_link_group *lgr,
2094 struct smc_buf_desc *buf_desc, bool is_rmb)
2095{
2096 int i, rc = 0;
b33982c3 2097
d5500667
KG
2098 /* protect against parallel link reconfiguration */
2099 mutex_lock(&lgr->llc_conf_mutex);
b9247544
KG
2100 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
2101 struct smc_link *lnk = &lgr->lnk[i];
b33982c3 2102
d854fcbf 2103 if (!smc_link_usable(lnk))
b9247544
KG
2104 continue;
2105 if (smcr_buf_map_link(buf_desc, is_rmb, lnk)) {
b9247544
KG
2106 rc = -ENOMEM;
2107 goto out;
b33982c3
UB
2108 }
2109 }
b9247544 2110out:
d5500667 2111 mutex_unlock(&lgr->llc_conf_mutex);
b9247544 2112 return rc;
b33982c3
UB
2113}
2114
c6ba7c9b
HW
2115static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr,
2116 bool is_dmb, int bufsize)
2117{
2118 struct smc_buf_desc *buf_desc;
2119 int rc;
2120
c6ba7c9b
HW
2121 /* try to alloc a new DMB */
2122 buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
2123 if (!buf_desc)
2124 return ERR_PTR(-ENOMEM);
2125 if (is_dmb) {
2126 rc = smc_ism_register_dmb(lgr, bufsize, buf_desc);
2127 if (rc) {
2128 kfree(buf_desc);
96d6fded
KG
2129 if (rc == -ENOMEM)
2130 return ERR_PTR(-EAGAIN);
2131 if (rc == -ENOSPC)
2132 return ERR_PTR(-ENOSPC);
2133 return ERR_PTR(-EIO);
c6ba7c9b 2134 }
be244f28
HW
2135 buf_desc->pages = virt_to_page(buf_desc->cpu_addr);
2136 /* CDC header stored in buf. So, pretend it was smaller */
2137 buf_desc->len = bufsize - sizeof(struct smcd_cdc_msg);
c6ba7c9b
HW
2138 } else {
2139 buf_desc->cpu_addr = kzalloc(bufsize, GFP_KERNEL |
2140 __GFP_NOWARN | __GFP_NORETRY |
2141 __GFP_NOMEMALLOC);
2142 if (!buf_desc->cpu_addr) {
2143 kfree(buf_desc);
2144 return ERR_PTR(-EAGAIN);
2145 }
2146 buf_desc->len = bufsize;
2147 }
2148 return buf_desc;
2149}
2150
2151static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
cd6851f3 2152{
8437bda0 2153 struct smc_buf_desc *buf_desc = ERR_PTR(-ENOMEM);
cd6851f3
UB
2154 struct smc_connection *conn = &smc->conn;
2155 struct smc_link_group *lgr = conn->lgr;
3e034725 2156 struct list_head *buf_list;
c45abf31 2157 int bufsize, bufsize_short;
e0e4b8fa 2158 bool is_dgraded = false;
faca5360 2159 struct mutex *lock; /* lock buffer list */
3e034725 2160 int sk_buf_size;
cd6851f3 2161
3e034725
UB
2162 if (is_rmb)
2163 /* use socket recv buffer size (w/o overhead) as start value */
2164 sk_buf_size = smc->sk.sk_rcvbuf / 2;
2165 else
2166 /* use socket send buffer size (w/o overhead) as start value */
2167 sk_buf_size = smc->sk.sk_sndbuf / 2;
2168
67161779 2169 for (bufsize_short = smc_compress_bufsize(sk_buf_size, is_smcd, is_rmb);
c45abf31 2170 bufsize_short >= 0; bufsize_short--) {
3e034725
UB
2171 if (is_rmb) {
2172 lock = &lgr->rmbs_lock;
2173 buf_list = &lgr->rmbs[bufsize_short];
2174 } else {
2175 lock = &lgr->sndbufs_lock;
2176 buf_list = &lgr->sndbufs[bufsize_short];
9d8fb617 2177 }
c45abf31 2178 bufsize = smc_uncompress_bufsize(bufsize_short);
a3fe3d01 2179
3e034725 2180 /* check for reusable slot in the link group */
8437bda0 2181 buf_desc = smc_buf_get_slot(bufsize_short, lock, buf_list);
3e034725 2182 if (buf_desc) {
194730a9
GG
2183 SMC_STAT_RMB_SIZE(smc, is_smcd, is_rmb, bufsize);
2184 SMC_STAT_BUF_REUSE(smc, is_smcd, is_rmb);
cd6851f3
UB
2185 break; /* found reusable slot */
2186 }
a3fe3d01 2187
c6ba7c9b
HW
2188 if (is_smcd)
2189 buf_desc = smcd_new_buf_create(lgr, is_rmb, bufsize);
2190 else
2191 buf_desc = smcr_new_buf_create(lgr, is_rmb, bufsize);
2192
b33982c3
UB
2193 if (PTR_ERR(buf_desc) == -ENOMEM)
2194 break;
e0e4b8fa
GG
2195 if (IS_ERR(buf_desc)) {
2196 if (!is_dgraded) {
2197 is_dgraded = true;
194730a9 2198 SMC_STAT_RMB_DOWNGRADED(smc, is_smcd, is_rmb);
e0e4b8fa 2199 }
a3fe3d01 2200 continue;
e0e4b8fa 2201 }
897e1c24 2202
194730a9
GG
2203 SMC_STAT_RMB_ALLOC(smc, is_smcd, is_rmb);
2204 SMC_STAT_RMB_SIZE(smc, is_smcd, is_rmb, bufsize);
3e034725 2205 buf_desc->used = 1;
faca5360 2206 mutex_lock(lock);
3e034725 2207 list_add(&buf_desc->list, buf_list);
faca5360 2208 mutex_unlock(lock);
3e034725 2209 break; /* found */
cd6851f3 2210 }
3e034725 2211
b33982c3 2212 if (IS_ERR(buf_desc))
72b7f6c4 2213 return PTR_ERR(buf_desc);
3e034725 2214
b9247544
KG
2215 if (!is_smcd) {
2216 if (smcr_buf_map_usable_links(lgr, buf_desc, is_rmb)) {
6d74c3a8 2217 smcr_buf_unuse(buf_desc, lgr);
b9247544
KG
2218 return -ENOMEM;
2219 }
2220 }
2221
3e034725
UB
2222 if (is_rmb) {
2223 conn->rmb_desc = buf_desc;
c45abf31
UB
2224 conn->rmbe_size_short = bufsize_short;
2225 smc->sk.sk_rcvbuf = bufsize * 2;
5f08318f 2226 atomic_set(&conn->bytes_to_rcv, 0);
be244f28
HW
2227 conn->rmbe_update_limit =
2228 smc_rmb_wnd_update_limit(buf_desc->len);
c6ba7c9b
HW
2229 if (is_smcd)
2230 smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */
cd6851f3 2231 } else {
3e034725 2232 conn->sndbuf_desc = buf_desc;
3e034725
UB
2233 smc->sk.sk_sndbuf = bufsize * 2;
2234 atomic_set(&conn->sndbuf_space, bufsize);
cd6851f3 2235 }
3e034725
UB
2236 return 0;
2237}
2238
10428dd8
UB
2239void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn)
2240{
741a49a4 2241 if (!conn->lgr || conn->lgr->is_smcd || !smc_link_active(conn->lnk))
c6ba7c9b 2242 return;
387707fd 2243 smc_ib_sync_sg_for_cpu(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE);
10428dd8
UB
2244}
2245
2246void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn)
2247{
741a49a4 2248 if (!conn->lgr || conn->lgr->is_smcd || !smc_link_active(conn->lnk))
c6ba7c9b 2249 return;
387707fd 2250 smc_ib_sync_sg_for_device(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE);
10428dd8
UB
2251}
2252
2253void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn)
2254{
b9247544 2255 int i;
10428dd8 2256
c6ba7c9b
HW
2257 if (!conn->lgr || conn->lgr->is_smcd)
2258 return;
b9247544 2259 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
741a49a4 2260 if (!smc_link_active(&conn->lgr->lnk[i]))
b9247544
KG
2261 continue;
2262 smc_ib_sync_sg_for_cpu(&conn->lgr->lnk[i], conn->rmb_desc,
2263 DMA_FROM_DEVICE);
2264 }
10428dd8
UB
2265}
2266
2267void smc_rmb_sync_sg_for_device(struct smc_connection *conn)
2268{
b9247544 2269 int i;
10428dd8 2270
c6ba7c9b
HW
2271 if (!conn->lgr || conn->lgr->is_smcd)
2272 return;
b9247544 2273 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
741a49a4 2274 if (!smc_link_active(&conn->lgr->lnk[i]))
b9247544
KG
2275 continue;
2276 smc_ib_sync_sg_for_device(&conn->lgr->lnk[i], conn->rmb_desc,
2277 DMA_FROM_DEVICE);
2278 }
10428dd8
UB
2279}
2280
3e034725
UB
2281/* create the send and receive buffer for an SMC socket;
2282 * receive buffers are called RMBs;
2283 * (even though the SMC protocol allows more than one RMB-element per RMB,
2284 * the Linux implementation uses just one RMB-element per RMB, i.e. uses an
2285 * extra RMB for every connection in a link group
2286 */
c6ba7c9b 2287int smc_buf_create(struct smc_sock *smc, bool is_smcd)
3e034725
UB
2288{
2289 int rc;
2290
2291 /* create send buffer */
c6ba7c9b 2292 rc = __smc_buf_create(smc, is_smcd, false);
3e034725
UB
2293 if (rc)
2294 return rc;
2295 /* create rmb */
c6ba7c9b 2296 rc = __smc_buf_create(smc, is_smcd, true);
fd7f3a74
KG
2297 if (rc) {
2298 mutex_lock(&smc->conn.lgr->sndbufs_lock);
2299 list_del(&smc->conn.sndbuf_desc->list);
2300 mutex_unlock(&smc->conn.lgr->sndbufs_lock);
6511aad3 2301 smc_buf_free(smc->conn.lgr, false, smc->conn.sndbuf_desc);
1d8df41d 2302 smc->conn.sndbuf_desc = NULL;
fd7f3a74 2303 }
3e034725 2304 return rc;
cd6851f3 2305}
bd4ad577
UB
2306
2307static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr)
2308{
2309 int i;
2310
2311 for_each_clear_bit(i, lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX) {
2312 if (!test_and_set_bit(i, lgr->rtokens_used_mask))
2313 return i;
2314 }
2315 return -ENOSPC;
2316}
2317
ba21abd2
KG
2318static int smc_rtoken_find_by_link(struct smc_link_group *lgr, int lnk_idx,
2319 u32 rkey)
2320{
2321 int i;
2322
2323 for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
2324 if (test_bit(i, lgr->rtokens_used_mask) &&
2325 lgr->rtokens[i][lnk_idx].rkey == rkey)
2326 return i;
2327 }
2328 return -ENOENT;
2329}
2330
2331/* set rtoken for a new link to an existing rmb */
2332void smc_rtoken_set(struct smc_link_group *lgr, int link_idx, int link_idx_new,
2333 __be32 nw_rkey_known, __be64 nw_vaddr, __be32 nw_rkey)
2334{
2335 int rtok_idx;
2336
2337 rtok_idx = smc_rtoken_find_by_link(lgr, link_idx, ntohl(nw_rkey_known));
2338 if (rtok_idx == -ENOENT)
2339 return;
2340 lgr->rtokens[rtok_idx][link_idx_new].rkey = ntohl(nw_rkey);
2341 lgr->rtokens[rtok_idx][link_idx_new].dma_addr = be64_to_cpu(nw_vaddr);
2342}
2343
2344/* set rtoken for a new link whose link_id is given */
2345void smc_rtoken_set2(struct smc_link_group *lgr, int rtok_idx, int link_id,
2346 __be64 nw_vaddr, __be32 nw_rkey)
2347{
2348 u64 dma_addr = be64_to_cpu(nw_vaddr);
2349 u32 rkey = ntohl(nw_rkey);
2350 bool found = false;
2351 int link_idx;
2352
2353 for (link_idx = 0; link_idx < SMC_LINKS_PER_LGR_MAX; link_idx++) {
2354 if (lgr->lnk[link_idx].link_id == link_id) {
2355 found = true;
2356 break;
2357 }
2358 }
2359 if (!found)
2360 return;
2361 lgr->rtokens[rtok_idx][link_idx].rkey = rkey;
2362 lgr->rtokens[rtok_idx][link_idx].dma_addr = dma_addr;
2363}
2364
4ed75de5 2365/* add a new rtoken from peer */
387707fd 2366int smc_rtoken_add(struct smc_link *lnk, __be64 nw_vaddr, __be32 nw_rkey)
bd4ad577 2367{
387707fd 2368 struct smc_link_group *lgr = smc_get_lgr(lnk);
4ed75de5
KG
2369 u64 dma_addr = be64_to_cpu(nw_vaddr);
2370 u32 rkey = ntohl(nw_rkey);
bd4ad577
UB
2371 int i;
2372
2373 for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
387707fd
KG
2374 if (lgr->rtokens[i][lnk->link_idx].rkey == rkey &&
2375 lgr->rtokens[i][lnk->link_idx].dma_addr == dma_addr &&
bd4ad577 2376 test_bit(i, lgr->rtokens_used_mask)) {
4ed75de5
KG
2377 /* already in list */
2378 return i;
2379 }
2380 }
2381 i = smc_rmb_reserve_rtoken_idx(lgr);
2382 if (i < 0)
2383 return i;
387707fd
KG
2384 lgr->rtokens[i][lnk->link_idx].rkey = rkey;
2385 lgr->rtokens[i][lnk->link_idx].dma_addr = dma_addr;
4ed75de5
KG
2386 return i;
2387}
2388
e07d31dc 2389/* delete an rtoken from all links */
387707fd 2390int smc_rtoken_delete(struct smc_link *lnk, __be32 nw_rkey)
4ed75de5 2391{
387707fd 2392 struct smc_link_group *lgr = smc_get_lgr(lnk);
4ed75de5 2393 u32 rkey = ntohl(nw_rkey);
e07d31dc 2394 int i, j;
4ed75de5
KG
2395
2396 for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
387707fd 2397 if (lgr->rtokens[i][lnk->link_idx].rkey == rkey &&
4ed75de5 2398 test_bit(i, lgr->rtokens_used_mask)) {
e07d31dc
KG
2399 for (j = 0; j < SMC_LINKS_PER_LGR_MAX; j++) {
2400 lgr->rtokens[i][j].rkey = 0;
2401 lgr->rtokens[i][j].dma_addr = 0;
2402 }
4ed75de5 2403 clear_bit(i, lgr->rtokens_used_mask);
bd4ad577
UB
2404 return 0;
2405 }
2406 }
4ed75de5
KG
2407 return -ENOENT;
2408}
2409
2410/* save rkey and dma_addr received from peer during clc handshake */
2411int smc_rmb_rtoken_handling(struct smc_connection *conn,
e07d31dc 2412 struct smc_link *lnk,
4ed75de5
KG
2413 struct smc_clc_msg_accept_confirm *clc)
2414{
3d9725a6
UB
2415 conn->rtoken_idx = smc_rtoken_add(lnk, clc->r0.rmb_dma_addr,
2416 clc->r0.rmb_rkey);
bd4ad577
UB
2417 if (conn->rtoken_idx < 0)
2418 return conn->rtoken_idx;
bd4ad577
UB
2419 return 0;
2420}
9fda3510 2421
c3d9494e
UB
2422static void smc_core_going_away(void)
2423{
2424 struct smc_ib_device *smcibdev;
2425 struct smcd_dev *smcd;
2426
92f3cb0e 2427 mutex_lock(&smc_ib_devices.mutex);
c3d9494e
UB
2428 list_for_each_entry(smcibdev, &smc_ib_devices.list, list) {
2429 int i;
2430
2431 for (i = 0; i < SMC_MAX_PORTS; i++)
2432 set_bit(i, smcibdev->ports_going_away);
2433 }
92f3cb0e 2434 mutex_unlock(&smc_ib_devices.mutex);
c3d9494e 2435
82087c03 2436 mutex_lock(&smcd_dev_list.mutex);
c3d9494e
UB
2437 list_for_each_entry(smcd, &smcd_dev_list.list, list) {
2438 smcd->going_away = 1;
2439 }
82087c03 2440 mutex_unlock(&smcd_dev_list.mutex);
c3d9494e
UB
2441}
2442
5421ec28
UB
2443/* Clean up all SMC link groups */
2444static void smc_lgrs_shutdown(void)
9fda3510 2445{
a2351c5d 2446 struct smcd_dev *smcd;
9fda3510 2447
c3d9494e
UB
2448 smc_core_going_away();
2449
0b29ec64 2450 smc_smcr_terminate_all(NULL);
a2351c5d 2451
82087c03 2452 mutex_lock(&smcd_dev_list.mutex);
a2351c5d 2453 list_for_each_entry(smcd, &smcd_dev_list.list, list)
5421ec28 2454 smc_smcd_terminate_all(smcd);
82087c03 2455 mutex_unlock(&smcd_dev_list.mutex);
9fda3510 2456}
5421ec28 2457
a33a803c
UB
2458static int smc_core_reboot_event(struct notifier_block *this,
2459 unsigned long event, void *ptr)
2460{
2461 smc_lgrs_shutdown();
28a3b840 2462 smc_ib_unregister_client();
a33a803c
UB
2463 return 0;
2464}
2465
2466static struct notifier_block smc_reboot_notifier = {
2467 .notifier_call = smc_core_reboot_event,
2468};
2469
6dabd405
UB
2470int __init smc_core_init(void)
2471{
a33a803c 2472 return register_reboot_notifier(&smc_reboot_notifier);
6dabd405
UB
2473}
2474
5421ec28
UB
2475/* Called (from smc_exit) when module is removed */
2476void smc_core_exit(void)
2477{
a33a803c 2478 unregister_reboot_notifier(&smc_reboot_notifier);
5421ec28
UB
2479 smc_lgrs_shutdown();
2480}
This page took 0.875713 seconds and 4 git commands to generate.