]>
Commit | Line | Data |
---|---|---|
b2441318 | 1 | // SPDX-License-Identifier: GPL-2.0 |
0cfdd8f9 UB |
2 | /* |
3 | * Shared Memory Communications over RDMA (SMC-R) and RoCE | |
4 | * | |
5 | * Basic Transport Functions exploiting Infiniband API | |
6 | * | |
7 | * Copyright IBM Corp. 2016 | |
8 | * | |
9 | * Author(s): Ursula Braun <[email protected]> | |
10 | */ | |
11 | ||
12 | #include <linux/socket.h> | |
13 | #include <linux/if_vlan.h> | |
14 | #include <linux/random.h> | |
15 | #include <linux/workqueue.h> | |
16 | #include <net/tcp.h> | |
17 | #include <net/sock.h> | |
18 | #include <rdma/ib_verbs.h> | |
ddb457c6 | 19 | #include <rdma/ib_cache.h> |
0cfdd8f9 UB |
20 | |
21 | #include "smc.h" | |
22 | #include "smc_clc.h" | |
23 | #include "smc_core.h" | |
24 | #include "smc_ib.h" | |
f38ba179 | 25 | #include "smc_wr.h" |
9bf9abea | 26 | #include "smc_llc.h" |
5f08318f | 27 | #include "smc_cdc.h" |
b38d7324 | 28 | #include "smc_close.h" |
c6ba7c9b | 29 | #include "smc_ism.h" |
0cfdd8f9 | 30 | |
5bc11ddb UB |
31 | #define SMC_LGR_NUM_INCR 256 |
32 | #define SMC_LGR_FREE_DELAY_SERV (600 * HZ) | |
7f58a1ad | 33 | #define SMC_LGR_FREE_DELAY_CLNT (SMC_LGR_FREE_DELAY_SERV + 10 * HZ) |
0d18a0cb | 34 | #define SMC_LGR_FREE_DELAY_FAST (8 * HZ) |
0cfdd8f9 | 35 | |
9fda3510 HW |
36 | static struct smc_lgr_list smc_lgr_list = { /* established link groups */ |
37 | .lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock), | |
38 | .list = LIST_HEAD_INIT(smc_lgr_list.list), | |
39 | .num = 0, | |
40 | }; | |
9bf9abea | 41 | |
6511aad3 HW |
42 | static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb, |
43 | struct smc_buf_desc *buf_desc); | |
a6920d1d | 44 | |
a0a62ee1 UB |
45 | /* return head of link group list and its lock for a given link group */ |
46 | static inline struct list_head *smc_lgr_list_head(struct smc_link_group *lgr, | |
47 | spinlock_t **lgr_lock) | |
48 | { | |
49 | if (lgr->is_smcd) { | |
50 | *lgr_lock = &lgr->smcd->lgr_lock; | |
51 | return &lgr->smcd->lgr_list; | |
52 | } | |
53 | ||
54 | *lgr_lock = &smc_lgr_list.lock; | |
55 | return &smc_lgr_list.list; | |
56 | } | |
57 | ||
97cdbc42 KG |
58 | static void smc_lgr_schedule_free_work(struct smc_link_group *lgr) |
59 | { | |
60 | /* client link group creation always follows the server link group | |
61 | * creation. For client use a somewhat higher removal delay time, | |
62 | * otherwise there is a risk of out-of-sync link groups. | |
63 | */ | |
8e316b9e UB |
64 | if (!lgr->freeing && !lgr->freefast) { |
65 | mod_delayed_work(system_wq, &lgr->free_work, | |
66 | (!lgr->is_smcd && lgr->role == SMC_CLNT) ? | |
67 | SMC_LGR_FREE_DELAY_CLNT : | |
68 | SMC_LGR_FREE_DELAY_SERV); | |
69 | } | |
97cdbc42 KG |
70 | } |
71 | ||
0d18a0cb KG |
72 | void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr) |
73 | { | |
8e316b9e UB |
74 | if (!lgr->freeing && !lgr->freefast) { |
75 | lgr->freefast = 1; | |
76 | mod_delayed_work(system_wq, &lgr->free_work, | |
77 | SMC_LGR_FREE_DELAY_FAST); | |
78 | } | |
97cdbc42 KG |
79 | } |
80 | ||
0cfdd8f9 UB |
81 | /* Register connection's alert token in our lookup structure. |
82 | * To use rbtrees we have to implement our own insert core. | |
83 | * Requires @conns_lock | |
84 | * @smc connection to register | |
85 | * Returns 0 on success, != otherwise. | |
86 | */ | |
87 | static void smc_lgr_add_alert_token(struct smc_connection *conn) | |
88 | { | |
89 | struct rb_node **link, *parent = NULL; | |
90 | u32 token = conn->alert_token_local; | |
91 | ||
92 | link = &conn->lgr->conns_all.rb_node; | |
93 | while (*link) { | |
94 | struct smc_connection *cur = rb_entry(*link, | |
95 | struct smc_connection, alert_node); | |
96 | ||
97 | parent = *link; | |
98 | if (cur->alert_token_local > token) | |
99 | link = &parent->rb_left; | |
100 | else | |
101 | link = &parent->rb_right; | |
102 | } | |
103 | /* Put the new node there */ | |
104 | rb_link_node(&conn->alert_node, parent, link); | |
105 | rb_insert_color(&conn->alert_node, &conn->lgr->conns_all); | |
106 | } | |
107 | ||
108 | /* Register connection in link group by assigning an alert token | |
109 | * registered in a search tree. | |
110 | * Requires @conns_lock | |
111 | * Note that '0' is a reserved value and not assigned. | |
112 | */ | |
113 | static void smc_lgr_register_conn(struct smc_connection *conn) | |
114 | { | |
115 | struct smc_sock *smc = container_of(conn, struct smc_sock, conn); | |
116 | static atomic_t nexttoken = ATOMIC_INIT(0); | |
117 | ||
118 | /* find a new alert_token_local value not yet used by some connection | |
119 | * in this link group | |
120 | */ | |
121 | sock_hold(&smc->sk); /* sock_put in smc_lgr_unregister_conn() */ | |
122 | while (!conn->alert_token_local) { | |
123 | conn->alert_token_local = atomic_inc_return(&nexttoken); | |
124 | if (smc_lgr_find_conn(conn->alert_token_local, conn->lgr)) | |
125 | conn->alert_token_local = 0; | |
126 | } | |
127 | smc_lgr_add_alert_token(conn); | |
128 | conn->lgr->conns_num++; | |
129 | } | |
130 | ||
131 | /* Unregister connection and reset the alert token of the given connection< | |
132 | */ | |
133 | static void __smc_lgr_unregister_conn(struct smc_connection *conn) | |
134 | { | |
135 | struct smc_sock *smc = container_of(conn, struct smc_sock, conn); | |
136 | struct smc_link_group *lgr = conn->lgr; | |
137 | ||
138 | rb_erase(&conn->alert_node, &lgr->conns_all); | |
139 | lgr->conns_num--; | |
140 | conn->alert_token_local = 0; | |
0cfdd8f9 UB |
141 | sock_put(&smc->sk); /* sock_hold in smc_lgr_register_conn() */ |
142 | } | |
143 | ||
fb692ec4 | 144 | /* Unregister connection from lgr |
0cfdd8f9 UB |
145 | */ |
146 | static void smc_lgr_unregister_conn(struct smc_connection *conn) | |
147 | { | |
148 | struct smc_link_group *lgr = conn->lgr; | |
0cfdd8f9 | 149 | |
77f838ac KG |
150 | if (!lgr) |
151 | return; | |
0cfdd8f9 UB |
152 | write_lock_bh(&lgr->conns_lock); |
153 | if (conn->alert_token_local) { | |
0cfdd8f9 UB |
154 | __smc_lgr_unregister_conn(conn); |
155 | } | |
156 | write_unlock_bh(&lgr->conns_lock); | |
2a0674ff | 157 | conn->lgr = NULL; |
0cfdd8f9 UB |
158 | } |
159 | ||
0d18a0cb KG |
160 | /* Send delete link, either as client to request the initiation |
161 | * of the DELETE LINK sequence from server; or as server to | |
162 | * initiate the delete processing. See smc_llc_rx_delete_link(). | |
163 | */ | |
164 | static int smc_link_send_delete(struct smc_link *lnk) | |
165 | { | |
166 | if (lnk->state == SMC_LNK_ACTIVE && | |
167 | !smc_llc_send_delete_link(lnk, SMC_LLC_REQ, true)) { | |
168 | smc_llc_link_deleting(lnk); | |
169 | return 0; | |
170 | } | |
171 | return -ENOTCONN; | |
172 | } | |
173 | ||
3f3f0e36 UB |
174 | static void smc_lgr_free(struct smc_link_group *lgr); |
175 | ||
0cfdd8f9 UB |
176 | static void smc_lgr_free_work(struct work_struct *work) |
177 | { | |
178 | struct smc_link_group *lgr = container_of(to_delayed_work(work), | |
179 | struct smc_link_group, | |
180 | free_work); | |
a0a62ee1 | 181 | spinlock_t *lgr_lock; |
8e316b9e | 182 | struct smc_link *lnk; |
0cfdd8f9 UB |
183 | bool conns; |
184 | ||
a0a62ee1 UB |
185 | smc_lgr_list_head(lgr, &lgr_lock); |
186 | spin_lock_bh(lgr_lock); | |
8e316b9e UB |
187 | if (lgr->freeing) { |
188 | spin_unlock_bh(lgr_lock); | |
189 | return; | |
190 | } | |
0cfdd8f9 UB |
191 | read_lock_bh(&lgr->conns_lock); |
192 | conns = RB_EMPTY_ROOT(&lgr->conns_all); | |
193 | read_unlock_bh(&lgr->conns_lock); | |
194 | if (!conns) { /* number of lgr connections is no longer zero */ | |
a0a62ee1 | 195 | spin_unlock_bh(lgr_lock); |
0cfdd8f9 UB |
196 | return; |
197 | } | |
8caa6544 | 198 | list_del_init(&lgr->list); /* remove from smc_lgr_list */ |
0d18a0cb | 199 | |
8e316b9e | 200 | lnk = &lgr->lnk[SMC_SINGLE_LINK]; |
0d18a0cb KG |
201 | if (!lgr->is_smcd && !lgr->terminating) { |
202 | /* try to send del link msg, on error free lgr immediately */ | |
90d8b29c UB |
203 | if (lnk->state == SMC_LNK_ACTIVE && |
204 | !smc_link_send_delete(lnk)) { | |
0d18a0cb KG |
205 | /* reschedule in case we never receive a response */ |
206 | smc_lgr_schedule_free_work(lgr); | |
8e316b9e | 207 | spin_unlock_bh(lgr_lock); |
0d18a0cb KG |
208 | return; |
209 | } | |
210 | } | |
8e316b9e UB |
211 | lgr->freeing = 1; /* this instance does the freeing, no new schedule */ |
212 | spin_unlock_bh(lgr_lock); | |
213 | cancel_delayed_work(&lgr->free_work); | |
0d18a0cb | 214 | |
8e316b9e UB |
215 | if (!lgr->is_smcd && lnk->state != SMC_LNK_INACTIVE) |
216 | smc_llc_link_inactive(lnk); | |
217 | if (lgr->is_smcd) | |
218 | smc_ism_signal_shutdown(lgr); | |
219 | smc_lgr_free(lgr); | |
0cfdd8f9 UB |
220 | } |
221 | ||
f528ba24 UB |
222 | static void smc_lgr_terminate_work(struct work_struct *work) |
223 | { | |
224 | struct smc_link_group *lgr = container_of(work, struct smc_link_group, | |
225 | terminate_work); | |
226 | ||
227 | smc_lgr_terminate(lgr); | |
228 | } | |
229 | ||
0cfdd8f9 | 230 | /* create a new SMC link group */ |
bc36d2fc | 231 | static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) |
0cfdd8f9 UB |
232 | { |
233 | struct smc_link_group *lgr; | |
a2351c5d | 234 | struct list_head *lgr_list; |
0cfdd8f9 | 235 | struct smc_link *lnk; |
a0a62ee1 | 236 | spinlock_t *lgr_lock; |
0cfdd8f9 UB |
237 | u8 rndvec[3]; |
238 | int rc = 0; | |
cd6851f3 | 239 | int i; |
0cfdd8f9 | 240 | |
bc36d2fc | 241 | if (ini->is_smcd && ini->vlan_id) { |
7a62725a KG |
242 | if (smc_ism_get_vlan(ini->ism_dev, ini->vlan_id)) { |
243 | rc = SMC_CLC_DECL_ISMVLANERR; | |
c6ba7c9b | 244 | goto out; |
7a62725a | 245 | } |
c6ba7c9b HW |
246 | } |
247 | ||
0cfdd8f9 UB |
248 | lgr = kzalloc(sizeof(*lgr), GFP_KERNEL); |
249 | if (!lgr) { | |
7a62725a | 250 | rc = SMC_CLC_DECL_MEM; |
29ee2701 | 251 | goto ism_put_vlan; |
0cfdd8f9 | 252 | } |
bc36d2fc | 253 | lgr->is_smcd = ini->is_smcd; |
517c300e | 254 | lgr->sync_err = 0; |
8e316b9e UB |
255 | lgr->terminating = 0; |
256 | lgr->freefast = 0; | |
257 | lgr->freeing = 0; | |
bc36d2fc | 258 | lgr->vlan_id = ini->vlan_id; |
cd6851f3 UB |
259 | rwlock_init(&lgr->sndbufs_lock); |
260 | rwlock_init(&lgr->rmbs_lock); | |
c6ba7c9b | 261 | rwlock_init(&lgr->conns_lock); |
cd6851f3 UB |
262 | for (i = 0; i < SMC_RMBE_SIZES; i++) { |
263 | INIT_LIST_HEAD(&lgr->sndbufs[i]); | |
264 | INIT_LIST_HEAD(&lgr->rmbs[i]); | |
265 | } | |
9fda3510 HW |
266 | smc_lgr_list.num += SMC_LGR_NUM_INCR; |
267 | memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE); | |
0cfdd8f9 | 268 | INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work); |
f528ba24 | 269 | INIT_WORK(&lgr->terminate_work, smc_lgr_terminate_work); |
0cfdd8f9 | 270 | lgr->conns_all = RB_ROOT; |
bc36d2fc | 271 | if (ini->is_smcd) { |
c6ba7c9b | 272 | /* SMC-D specific settings */ |
b3cb53c0 | 273 | get_device(&ini->ism_dev->dev); |
bc36d2fc KG |
274 | lgr->peer_gid = ini->ism_gid; |
275 | lgr->smcd = ini->ism_dev; | |
a2351c5d | 276 | lgr_list = &ini->ism_dev->lgr_list; |
a0a62ee1 | 277 | lgr_lock = &lgr->smcd->lgr_lock; |
50c6b20e | 278 | lgr->peer_shutdown = 0; |
c6ba7c9b HW |
279 | } else { |
280 | /* SMC-R specific settings */ | |
b3cb53c0 | 281 | get_device(&ini->ib_dev->ibdev->dev); |
c6ba7c9b | 282 | lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT; |
bc36d2fc KG |
283 | memcpy(lgr->peer_systemid, ini->ib_lcl->id_for_peer, |
284 | SMC_SYSTEMID_LEN); | |
c6ba7c9b HW |
285 | |
286 | lnk = &lgr->lnk[SMC_SINGLE_LINK]; | |
287 | /* initialize link */ | |
288 | lnk->state = SMC_LNK_ACTIVATING; | |
289 | lnk->link_id = SMC_SINGLE_LINK; | |
bc36d2fc KG |
290 | lnk->smcibdev = ini->ib_dev; |
291 | lnk->ibport = ini->ib_port; | |
a2351c5d | 292 | lgr_list = &smc_lgr_list.list; |
a0a62ee1 | 293 | lgr_lock = &smc_lgr_list.lock; |
bc36d2fc KG |
294 | lnk->path_mtu = |
295 | ini->ib_dev->pattr[ini->ib_port - 1].active_mtu; | |
296 | if (!ini->ib_dev->initialized) | |
297 | smc_ib_setup_per_ibdev(ini->ib_dev); | |
c6ba7c9b HW |
298 | get_random_bytes(rndvec, sizeof(rndvec)); |
299 | lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) + | |
300 | (rndvec[2] << 16); | |
7005ada6 | 301 | rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport, |
bc36d2fc KG |
302 | ini->vlan_id, lnk->gid, |
303 | &lnk->sgid_index); | |
7005ada6 UB |
304 | if (rc) |
305 | goto free_lgr; | |
c6ba7c9b HW |
306 | rc = smc_llc_link_init(lnk); |
307 | if (rc) | |
308 | goto free_lgr; | |
309 | rc = smc_wr_alloc_link_mem(lnk); | |
310 | if (rc) | |
311 | goto clear_llc_lnk; | |
312 | rc = smc_ib_create_protection_domain(lnk); | |
313 | if (rc) | |
314 | goto free_link_mem; | |
315 | rc = smc_ib_create_queue_pair(lnk); | |
316 | if (rc) | |
317 | goto dealloc_pd; | |
318 | rc = smc_wr_create_link(lnk); | |
319 | if (rc) | |
320 | goto destroy_qp; | |
321 | } | |
0cfdd8f9 | 322 | smc->conn.lgr = lgr; |
a0a62ee1 | 323 | spin_lock_bh(lgr_lock); |
a2351c5d | 324 | list_add(&lgr->list, lgr_list); |
a0a62ee1 | 325 | spin_unlock_bh(lgr_lock); |
f38ba179 UB |
326 | return 0; |
327 | ||
bd4ad577 UB |
328 | destroy_qp: |
329 | smc_ib_destroy_queue_pair(lnk); | |
330 | dealloc_pd: | |
331 | smc_ib_dealloc_protection_domain(lnk); | |
332 | free_link_mem: | |
333 | smc_wr_free_link_mem(lnk); | |
2a4c57a9 KG |
334 | clear_llc_lnk: |
335 | smc_llc_link_clear(lnk); | |
f38ba179 UB |
336 | free_lgr: |
337 | kfree(lgr); | |
29ee2701 UB |
338 | ism_put_vlan: |
339 | if (ini->is_smcd && ini->vlan_id) | |
340 | smc_ism_put_vlan(ini->ism_dev, ini->vlan_id); | |
0cfdd8f9 | 341 | out: |
7a62725a KG |
342 | if (rc < 0) { |
343 | if (rc == -ENOMEM) | |
344 | rc = SMC_CLC_DECL_MEM; | |
345 | else | |
346 | rc = SMC_CLC_DECL_INTERR; | |
347 | } | |
0cfdd8f9 UB |
348 | return rc; |
349 | } | |
350 | ||
fb692ec4 KG |
351 | static void smc_buf_unuse(struct smc_connection *conn, |
352 | struct smc_link_group *lgr) | |
cd6851f3 | 353 | { |
69cb7dc0 | 354 | if (conn->sndbuf_desc) |
cd6851f3 | 355 | conn->sndbuf_desc->used = 0; |
cd6851f3 | 356 | if (conn->rmb_desc) { |
a6920d1d | 357 | if (!conn->rmb_desc->regerr) { |
2a0674ff | 358 | if (!lgr->is_smcd && !list_empty(&lgr->list)) { |
c7674c00 KG |
359 | /* unregister rmb with peer */ |
360 | smc_llc_do_delete_rkey( | |
361 | &lgr->lnk[SMC_SINGLE_LINK], | |
362 | conn->rmb_desc); | |
363 | } | |
a5e04318 | 364 | conn->rmb_desc->used = 0; |
a6920d1d KG |
365 | } else { |
366 | /* buf registration failed, reuse not possible */ | |
a6920d1d KG |
367 | write_lock_bh(&lgr->rmbs_lock); |
368 | list_del(&conn->rmb_desc->list); | |
369 | write_unlock_bh(&lgr->rmbs_lock); | |
370 | ||
6511aad3 | 371 | smc_buf_free(lgr, true, conn->rmb_desc); |
a6920d1d | 372 | } |
cd6851f3 UB |
373 | } |
374 | } | |
375 | ||
0cfdd8f9 UB |
376 | /* remove a finished connection from its link group */ |
377 | void smc_conn_free(struct smc_connection *conn) | |
378 | { | |
fb692ec4 KG |
379 | struct smc_link_group *lgr = conn->lgr; |
380 | ||
381 | if (!lgr) | |
0cfdd8f9 | 382 | return; |
fb692ec4 | 383 | if (lgr->is_smcd) { |
c6ba7c9b | 384 | smc_ism_unset_conn(conn); |
be244f28 HW |
385 | tasklet_kill(&conn->rx_tsklet); |
386 | } else { | |
c6ba7c9b | 387 | smc_cdc_tx_dismiss_slots(conn); |
be244f28 | 388 | } |
2a0674ff UB |
389 | if (!list_empty(&lgr->list)) { |
390 | smc_lgr_unregister_conn(conn); | |
391 | smc_buf_unuse(conn, lgr); /* allow buffer reuse */ | |
392 | } | |
fb692ec4 KG |
393 | |
394 | if (!lgr->conns_num) | |
395 | smc_lgr_schedule_free_work(lgr); | |
0cfdd8f9 UB |
396 | } |
397 | ||
398 | static void smc_link_clear(struct smc_link *lnk) | |
399 | { | |
400 | lnk->peer_qpn = 0; | |
2a4c57a9 | 401 | smc_llc_link_clear(lnk); |
bd4ad577 | 402 | smc_ib_modify_qp_reset(lnk); |
f38ba179 | 403 | smc_wr_free_link(lnk); |
bd4ad577 UB |
404 | smc_ib_destroy_queue_pair(lnk); |
405 | smc_ib_dealloc_protection_domain(lnk); | |
f38ba179 | 406 | smc_wr_free_link_mem(lnk); |
0cfdd8f9 UB |
407 | } |
408 | ||
c6ba7c9b HW |
409 | static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb, |
410 | struct smc_buf_desc *buf_desc) | |
cd6851f3 | 411 | { |
6511aad3 HW |
412 | struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK]; |
413 | ||
3e034725 UB |
414 | if (is_rmb) { |
415 | if (buf_desc->mr_rx[SMC_SINGLE_LINK]) | |
416 | smc_ib_put_memory_region( | |
417 | buf_desc->mr_rx[SMC_SINGLE_LINK]); | |
418 | smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc, | |
419 | DMA_FROM_DEVICE); | |
420 | } else { | |
421 | smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc, | |
422 | DMA_TO_DEVICE); | |
cd6851f3 | 423 | } |
3e034725 | 424 | sg_free_table(&buf_desc->sgt[SMC_SINGLE_LINK]); |
2ef4f27a SR |
425 | if (buf_desc->pages) |
426 | __free_pages(buf_desc->pages, buf_desc->order); | |
3e034725 | 427 | kfree(buf_desc); |
cd6851f3 UB |
428 | } |
429 | ||
c6ba7c9b HW |
430 | static void smcd_buf_free(struct smc_link_group *lgr, bool is_dmb, |
431 | struct smc_buf_desc *buf_desc) | |
432 | { | |
be244f28 HW |
433 | if (is_dmb) { |
434 | /* restore original buf len */ | |
435 | buf_desc->len += sizeof(struct smcd_cdc_msg); | |
c6ba7c9b | 436 | smc_ism_unregister_dmb(lgr->smcd, buf_desc); |
be244f28 | 437 | } else { |
c6ba7c9b | 438 | kfree(buf_desc->cpu_addr); |
be244f28 | 439 | } |
c6ba7c9b HW |
440 | kfree(buf_desc); |
441 | } | |
442 | ||
443 | static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb, | |
444 | struct smc_buf_desc *buf_desc) | |
445 | { | |
446 | if (lgr->is_smcd) | |
447 | smcd_buf_free(lgr, is_rmb, buf_desc); | |
448 | else | |
449 | smcr_buf_free(lgr, is_rmb, buf_desc); | |
450 | } | |
451 | ||
3e034725 | 452 | static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb) |
cd6851f3 | 453 | { |
3e034725 UB |
454 | struct smc_buf_desc *buf_desc, *bf_desc; |
455 | struct list_head *buf_list; | |
cd6851f3 UB |
456 | int i; |
457 | ||
458 | for (i = 0; i < SMC_RMBE_SIZES; i++) { | |
3e034725 UB |
459 | if (is_rmb) |
460 | buf_list = &lgr->rmbs[i]; | |
461 | else | |
462 | buf_list = &lgr->sndbufs[i]; | |
463 | list_for_each_entry_safe(buf_desc, bf_desc, buf_list, | |
cd6851f3 | 464 | list) { |
3e034725 | 465 | list_del(&buf_desc->list); |
6511aad3 | 466 | smc_buf_free(lgr, is_rmb, buf_desc); |
cd6851f3 UB |
467 | } |
468 | } | |
469 | } | |
470 | ||
3e034725 UB |
471 | static void smc_lgr_free_bufs(struct smc_link_group *lgr) |
472 | { | |
473 | /* free send buffers */ | |
474 | __smc_lgr_free_bufs(lgr, false); | |
475 | /* free rmbs */ | |
476 | __smc_lgr_free_bufs(lgr, true); | |
477 | } | |
478 | ||
0cfdd8f9 | 479 | /* remove a link group */ |
3f3f0e36 | 480 | static void smc_lgr_free(struct smc_link_group *lgr) |
0cfdd8f9 | 481 | { |
3e034725 | 482 | smc_lgr_free_bufs(lgr); |
b3cb53c0 | 483 | if (lgr->is_smcd) { |
c6ba7c9b | 484 | smc_ism_put_vlan(lgr->smcd, lgr->vlan_id); |
b3cb53c0 UB |
485 | put_device(&lgr->smcd->dev); |
486 | } else { | |
c6ba7c9b | 487 | smc_link_clear(&lgr->lnk[SMC_SINGLE_LINK]); |
b3cb53c0 UB |
488 | put_device(&lgr->lnk[SMC_SINGLE_LINK].smcibdev->ibdev->dev); |
489 | } | |
0cfdd8f9 UB |
490 | kfree(lgr); |
491 | } | |
492 | ||
9651b934 KG |
493 | void smc_lgr_forget(struct smc_link_group *lgr) |
494 | { | |
a0a62ee1 UB |
495 | struct list_head *lgr_list; |
496 | spinlock_t *lgr_lock; | |
497 | ||
498 | lgr_list = smc_lgr_list_head(lgr, &lgr_lock); | |
499 | spin_lock_bh(lgr_lock); | |
9651b934 | 500 | /* do not use this link group for new connections */ |
a0a62ee1 UB |
501 | if (!list_empty(lgr_list)) |
502 | list_del_init(lgr_list); | |
503 | spin_unlock_bh(lgr_lock); | |
9651b934 KG |
504 | } |
505 | ||
2a0674ff UB |
506 | static void smc_sk_wake_ups(struct smc_sock *smc) |
507 | { | |
508 | smc->sk.sk_write_space(&smc->sk); | |
509 | smc->sk.sk_data_ready(&smc->sk); | |
510 | smc->sk.sk_state_change(&smc->sk); | |
511 | } | |
512 | ||
513 | /* kill a connection */ | |
514 | static void smc_conn_kill(struct smc_connection *conn) | |
515 | { | |
516 | struct smc_sock *smc = container_of(conn, struct smc_sock, conn); | |
517 | ||
50c6b20e UB |
518 | if (conn->lgr->is_smcd && conn->lgr->peer_shutdown) |
519 | conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; | |
520 | else | |
521 | smc_close_abort(conn); | |
2a0674ff | 522 | conn->killed = 1; |
50c6b20e | 523 | smc->sk.sk_err = ECONNABORTED; |
2a0674ff | 524 | smc_sk_wake_ups(smc); |
50c6b20e UB |
525 | if (conn->lgr->is_smcd) |
526 | tasklet_kill(&conn->rx_tsklet); | |
2a0674ff | 527 | smc_lgr_unregister_conn(conn); |
81cf4f47 | 528 | smc_close_active_abort(smc); |
2a0674ff UB |
529 | } |
530 | ||
8caa6544 | 531 | /* terminate link group */ |
b9f227c3 | 532 | static void __smc_lgr_terminate(struct smc_link_group *lgr) |
0cfdd8f9 UB |
533 | { |
534 | struct smc_connection *conn; | |
b38d7324 | 535 | struct smc_sock *smc; |
0cfdd8f9 UB |
536 | struct rb_node *node; |
537 | ||
517c300e KG |
538 | if (lgr->terminating) |
539 | return; /* lgr already terminating */ | |
540 | lgr->terminating = 1; | |
c6ba7c9b HW |
541 | if (!lgr->is_smcd) |
542 | smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]); | |
0cfdd8f9 | 543 | |
69318b52 UB |
544 | /* kill remaining link group connections */ |
545 | read_lock_bh(&lgr->conns_lock); | |
0cfdd8f9 UB |
546 | node = rb_first(&lgr->conns_all); |
547 | while (node) { | |
69318b52 | 548 | read_unlock_bh(&lgr->conns_lock); |
0cfdd8f9 | 549 | conn = rb_entry(node, struct smc_connection, alert_node); |
b38d7324 | 550 | smc = container_of(conn, struct smc_sock, conn); |
81cf4f47 | 551 | sock_hold(&smc->sk); /* sock_put below */ |
69318b52 | 552 | lock_sock(&smc->sk); |
2a0674ff | 553 | smc_conn_kill(conn); |
69318b52 | 554 | release_sock(&smc->sk); |
81cf4f47 | 555 | sock_put(&smc->sk); /* sock_hold above */ |
69318b52 | 556 | read_lock_bh(&lgr->conns_lock); |
0cfdd8f9 UB |
557 | node = rb_first(&lgr->conns_all); |
558 | } | |
69318b52 | 559 | read_unlock_bh(&lgr->conns_lock); |
c6ba7c9b HW |
560 | if (!lgr->is_smcd) |
561 | wake_up(&lgr->lnk[SMC_SINGLE_LINK].wr_reg_wait); | |
8e316b9e | 562 | smc_lgr_schedule_free_work_fast(lgr); |
0cfdd8f9 UB |
563 | } |
564 | ||
8caa6544 | 565 | /* unlink and terminate link group */ |
b9f227c3 HW |
566 | void smc_lgr_terminate(struct smc_link_group *lgr) |
567 | { | |
a0a62ee1 UB |
568 | spinlock_t *lgr_lock; |
569 | ||
570 | smc_lgr_list_head(lgr, &lgr_lock); | |
571 | spin_lock_bh(lgr_lock); | |
8caa6544 UB |
572 | if (lgr->terminating) { |
573 | spin_unlock_bh(lgr_lock); | |
574 | return; /* lgr already terminating */ | |
575 | } | |
576 | list_del_init(&lgr->list); | |
a0a62ee1 | 577 | spin_unlock_bh(lgr_lock); |
8caa6544 | 578 | __smc_lgr_terminate(lgr); |
b9f227c3 HW |
579 | } |
580 | ||
9fda3510 HW |
581 | /* Called when IB port is terminated */ |
582 | void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport) | |
583 | { | |
584 | struct smc_link_group *lgr, *l; | |
8caa6544 | 585 | LIST_HEAD(lgr_free_list); |
9fda3510 | 586 | |
b9f227c3 | 587 | spin_lock_bh(&smc_lgr_list.lock); |
9fda3510 | 588 | list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) { |
c6ba7c9b HW |
589 | if (!lgr->is_smcd && |
590 | lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev && | |
9fda3510 | 591 | lgr->lnk[SMC_SINGLE_LINK].ibport == ibport) |
8caa6544 | 592 | list_move(&lgr->list, &lgr_free_list); |
9fda3510 | 593 | } |
b9f227c3 | 594 | spin_unlock_bh(&smc_lgr_list.lock); |
8caa6544 UB |
595 | |
596 | list_for_each_entry_safe(lgr, l, &lgr_free_list, list) { | |
597 | list_del_init(&lgr->list); | |
598 | __smc_lgr_terminate(lgr); | |
599 | } | |
9fda3510 HW |
600 | } |
601 | ||
c6ba7c9b | 602 | /* Called when SMC-D device is terminated or peer is lost */ |
0512f69e | 603 | void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan) |
c6ba7c9b HW |
604 | { |
605 | struct smc_link_group *lgr, *l; | |
606 | LIST_HEAD(lgr_free_list); | |
607 | ||
608 | /* run common cleanup function and build free list */ | |
a0a62ee1 | 609 | spin_lock_bh(&dev->lgr_lock); |
a2351c5d UB |
610 | list_for_each_entry_safe(lgr, l, &dev->lgr_list, list) { |
611 | if ((!peer_gid || lgr->peer_gid == peer_gid) && | |
0512f69e | 612 | (vlan == VLAN_VID_MASK || lgr->vlan_id == vlan)) { |
50c6b20e UB |
613 | if (peer_gid) /* peer triggered termination */ |
614 | lgr->peer_shutdown = 1; | |
c6ba7c9b HW |
615 | list_move(&lgr->list, &lgr_free_list); |
616 | } | |
617 | } | |
a0a62ee1 | 618 | spin_unlock_bh(&dev->lgr_lock); |
c6ba7c9b HW |
619 | |
620 | /* cancel the regular free workers and actually free lgrs */ | |
621 | list_for_each_entry_safe(lgr, l, &lgr_free_list, list) { | |
622 | list_del_init(&lgr->list); | |
50c6b20e | 623 | schedule_work(&lgr->terminate_work); |
c6ba7c9b HW |
624 | } |
625 | } | |
626 | ||
0cfdd8f9 UB |
627 | /* Determine vlan of internal TCP socket. |
628 | * @vlan_id: address to store the determined vlan id into | |
629 | */ | |
bc36d2fc | 630 | int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini) |
0cfdd8f9 UB |
631 | { |
632 | struct dst_entry *dst = sk_dst_get(clcsock->sk); | |
cb9d43f6 UB |
633 | struct net_device *ndev; |
634 | int i, nest_lvl, rc = 0; | |
0cfdd8f9 | 635 | |
bc36d2fc | 636 | ini->vlan_id = 0; |
0cfdd8f9 UB |
637 | if (!dst) { |
638 | rc = -ENOTCONN; | |
639 | goto out; | |
640 | } | |
641 | if (!dst->dev) { | |
642 | rc = -ENODEV; | |
643 | goto out_rel; | |
644 | } | |
645 | ||
cb9d43f6 UB |
646 | ndev = dst->dev; |
647 | if (is_vlan_dev(ndev)) { | |
bc36d2fc | 648 | ini->vlan_id = vlan_dev_vlan_id(ndev); |
cb9d43f6 UB |
649 | goto out_rel; |
650 | } | |
651 | ||
652 | rtnl_lock(); | |
f3b0a18b | 653 | nest_lvl = ndev->lower_level; |
cb9d43f6 UB |
654 | for (i = 0; i < nest_lvl; i++) { |
655 | struct list_head *lower = &ndev->adj_list.lower; | |
656 | ||
657 | if (list_empty(lower)) | |
658 | break; | |
659 | lower = lower->next; | |
660 | ndev = (struct net_device *)netdev_lower_get_next(ndev, &lower); | |
661 | if (is_vlan_dev(ndev)) { | |
bc36d2fc | 662 | ini->vlan_id = vlan_dev_vlan_id(ndev); |
cb9d43f6 UB |
663 | break; |
664 | } | |
665 | } | |
666 | rtnl_unlock(); | |
0cfdd8f9 UB |
667 | |
668 | out_rel: | |
669 | dst_release(dst); | |
670 | out: | |
671 | return rc; | |
672 | } | |
673 | ||
c6ba7c9b HW |
674 | static bool smcr_lgr_match(struct smc_link_group *lgr, |
675 | struct smc_clc_msg_local *lcl, | |
ee05ff7a | 676 | enum smc_lgr_role role, u32 clcqpn) |
0cfdd8f9 | 677 | { |
c6ba7c9b HW |
678 | return !memcmp(lgr->peer_systemid, lcl->id_for_peer, |
679 | SMC_SYSTEMID_LEN) && | |
680 | !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_gid, &lcl->gid, | |
681 | SMC_GID_SIZE) && | |
682 | !memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_mac, lcl->mac, | |
683 | sizeof(lcl->mac)) && | |
ee05ff7a KG |
684 | lgr->role == role && |
685 | (lgr->role == SMC_SERV || | |
686 | lgr->lnk[SMC_SINGLE_LINK].peer_qpn == clcqpn); | |
c6ba7c9b | 687 | } |
0cfdd8f9 | 688 | |
c6ba7c9b HW |
689 | static bool smcd_lgr_match(struct smc_link_group *lgr, |
690 | struct smcd_dev *smcismdev, u64 peer_gid) | |
691 | { | |
692 | return lgr->peer_gid == peer_gid && lgr->smcd == smcismdev; | |
0cfdd8f9 UB |
693 | } |
694 | ||
695 | /* create a new SMC connection (and a new link group if necessary) */ | |
bc36d2fc | 696 | int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini) |
0cfdd8f9 UB |
697 | { |
698 | struct smc_connection *conn = &smc->conn; | |
a2351c5d | 699 | struct list_head *lgr_list; |
0cfdd8f9 | 700 | struct smc_link_group *lgr; |
0cfdd8f9 | 701 | enum smc_lgr_role role; |
a0a62ee1 | 702 | spinlock_t *lgr_lock; |
0cfdd8f9 UB |
703 | int rc = 0; |
704 | ||
a2351c5d | 705 | lgr_list = ini->is_smcd ? &ini->ism_dev->lgr_list : &smc_lgr_list.list; |
a0a62ee1 | 706 | lgr_lock = ini->is_smcd ? &ini->ism_dev->lgr_lock : &smc_lgr_list.lock; |
7a62725a | 707 | ini->cln_first_contact = SMC_FIRST_CONTACT; |
0cfdd8f9 | 708 | role = smc->listen_smc ? SMC_SERV : SMC_CLNT; |
bc36d2fc | 709 | if (role == SMC_CLNT && ini->srv_first_contact) |
0cfdd8f9 UB |
710 | /* create new link group as well */ |
711 | goto create; | |
712 | ||
713 | /* determine if an existing link group can be reused */ | |
a0a62ee1 | 714 | spin_lock_bh(lgr_lock); |
a2351c5d | 715 | list_for_each_entry(lgr, lgr_list, list) { |
0cfdd8f9 | 716 | write_lock_bh(&lgr->conns_lock); |
bc36d2fc KG |
717 | if ((ini->is_smcd ? |
718 | smcd_lgr_match(lgr, ini->ism_dev, ini->ism_gid) : | |
719 | smcr_lgr_match(lgr, ini->ib_lcl, role, ini->ib_clcqpn)) && | |
0cfdd8f9 | 720 | !lgr->sync_err && |
bc36d2fc | 721 | lgr->vlan_id == ini->vlan_id && |
c6ba7c9b HW |
722 | (role == SMC_CLNT || |
723 | lgr->conns_num < SMC_RMBS_PER_LGR_MAX)) { | |
0cfdd8f9 | 724 | /* link group found */ |
7a62725a | 725 | ini->cln_first_contact = SMC_REUSE_CONTACT; |
0cfdd8f9 UB |
726 | conn->lgr = lgr; |
727 | smc_lgr_register_conn(conn); /* add smc conn to lgr */ | |
77f838ac KG |
728 | if (delayed_work_pending(&lgr->free_work)) |
729 | cancel_delayed_work(&lgr->free_work); | |
0cfdd8f9 UB |
730 | write_unlock_bh(&lgr->conns_lock); |
731 | break; | |
732 | } | |
733 | write_unlock_bh(&lgr->conns_lock); | |
734 | } | |
a0a62ee1 | 735 | spin_unlock_bh(lgr_lock); |
0cfdd8f9 | 736 | |
bc36d2fc | 737 | if (role == SMC_CLNT && !ini->srv_first_contact && |
7a62725a | 738 | ini->cln_first_contact == SMC_FIRST_CONTACT) { |
0cfdd8f9 UB |
739 | /* Server reuses a link group, but Client wants to start |
740 | * a new one | |
741 | * send out_of_sync decline, reason synchr. error | |
742 | */ | |
7a62725a | 743 | return SMC_CLC_DECL_SYNCERR; |
0cfdd8f9 UB |
744 | } |
745 | ||
746 | create: | |
7a62725a | 747 | if (ini->cln_first_contact == SMC_FIRST_CONTACT) { |
bc36d2fc | 748 | rc = smc_lgr_create(smc, ini); |
0cfdd8f9 UB |
749 | if (rc) |
750 | goto out; | |
44808792 HZ |
751 | lgr = conn->lgr; |
752 | write_lock_bh(&lgr->conns_lock); | |
0cfdd8f9 | 753 | smc_lgr_register_conn(conn); /* add smc conn to lgr */ |
44808792 | 754 | write_unlock_bh(&lgr->conns_lock); |
0cfdd8f9 | 755 | } |
5f08318f | 756 | conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE; |
cbba07a7 | 757 | conn->local_tx_ctrl.len = SMC_WR_TX_SIZE; |
de8474eb | 758 | conn->urg_state = SMC_URG_READ; |
bc36d2fc | 759 | if (ini->is_smcd) { |
be244f28 HW |
760 | conn->rx_off = sizeof(struct smcd_cdc_msg); |
761 | smcd_cdc_rx_init(conn); /* init tasklet for this conn */ | |
762 | } | |
5f08318f UB |
763 | #ifndef KERNEL_HAS_ATOMIC64 |
764 | spin_lock_init(&conn->acurs_lock); | |
765 | #endif | |
0cfdd8f9 UB |
766 | |
767 | out: | |
7a62725a | 768 | return rc; |
0cfdd8f9 | 769 | } |
cd6851f3 | 770 | |
2f6becaf HW |
771 | /* convert the RMB size into the compressed notation - minimum 16K. |
772 | * In contrast to plain ilog2, this rounds towards the next power of 2, | |
773 | * so the socket application gets at least its desired sndbuf / rcvbuf size. | |
774 | */ | |
775 | static u8 smc_compress_bufsize(int size) | |
776 | { | |
777 | u8 compressed; | |
778 | ||
779 | if (size <= SMC_BUF_MIN_SIZE) | |
780 | return 0; | |
781 | ||
782 | size = (size - 1) >> 14; | |
783 | compressed = ilog2(size) + 1; | |
784 | if (compressed >= SMC_RMBE_SIZES) | |
785 | compressed = SMC_RMBE_SIZES - 1; | |
786 | return compressed; | |
787 | } | |
788 | ||
789 | /* convert the RMB size from compressed notation into integer */ | |
790 | int smc_uncompress_bufsize(u8 compressed) | |
791 | { | |
792 | u32 size; | |
793 | ||
794 | size = 0x00000001 << (((int)compressed) + 14); | |
795 | return (int)size; | |
796 | } | |
797 | ||
3e034725 UB |
798 | /* try to reuse a sndbuf or rmb description slot for a certain |
799 | * buffer size; if not available, return NULL | |
cd6851f3 | 800 | */ |
8437bda0 HW |
801 | static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize, |
802 | rwlock_t *lock, | |
803 | struct list_head *buf_list) | |
cd6851f3 | 804 | { |
3e034725 | 805 | struct smc_buf_desc *buf_slot; |
cd6851f3 | 806 | |
3e034725 UB |
807 | read_lock_bh(lock); |
808 | list_for_each_entry(buf_slot, buf_list, list) { | |
809 | if (cmpxchg(&buf_slot->used, 0, 1) == 0) { | |
810 | read_unlock_bh(lock); | |
811 | return buf_slot; | |
cd6851f3 UB |
812 | } |
813 | } | |
3e034725 | 814 | read_unlock_bh(lock); |
cd6851f3 UB |
815 | return NULL; |
816 | } | |
817 | ||
952310cc UB |
818 | /* one of the conditions for announcing a receiver's current window size is |
819 | * that it "results in a minimum increase in the window size of 10% of the | |
820 | * receive buffer space" [RFC7609] | |
821 | */ | |
822 | static inline int smc_rmb_wnd_update_limit(int rmbe_size) | |
823 | { | |
824 | return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2); | |
825 | } | |
826 | ||
c6ba7c9b HW |
827 | static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr, |
828 | bool is_rmb, int bufsize) | |
b33982c3 UB |
829 | { |
830 | struct smc_buf_desc *buf_desc; | |
831 | struct smc_link *lnk; | |
832 | int rc; | |
833 | ||
834 | /* try to alloc a new buffer */ | |
835 | buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL); | |
836 | if (!buf_desc) | |
837 | return ERR_PTR(-ENOMEM); | |
838 | ||
2ef4f27a SR |
839 | buf_desc->order = get_order(bufsize); |
840 | buf_desc->pages = alloc_pages(GFP_KERNEL | __GFP_NOWARN | | |
841 | __GFP_NOMEMALLOC | __GFP_COMP | | |
842 | __GFP_NORETRY | __GFP_ZERO, | |
843 | buf_desc->order); | |
844 | if (!buf_desc->pages) { | |
b33982c3 UB |
845 | kfree(buf_desc); |
846 | return ERR_PTR(-EAGAIN); | |
847 | } | |
2ef4f27a | 848 | buf_desc->cpu_addr = (void *)page_address(buf_desc->pages); |
b33982c3 UB |
849 | |
850 | /* build the sg table from the pages */ | |
851 | lnk = &lgr->lnk[SMC_SINGLE_LINK]; | |
852 | rc = sg_alloc_table(&buf_desc->sgt[SMC_SINGLE_LINK], 1, | |
853 | GFP_KERNEL); | |
854 | if (rc) { | |
6511aad3 | 855 | smc_buf_free(lgr, is_rmb, buf_desc); |
b33982c3 UB |
856 | return ERR_PTR(rc); |
857 | } | |
858 | sg_set_buf(buf_desc->sgt[SMC_SINGLE_LINK].sgl, | |
859 | buf_desc->cpu_addr, bufsize); | |
860 | ||
861 | /* map sg table to DMA address */ | |
862 | rc = smc_ib_buf_map_sg(lnk->smcibdev, buf_desc, | |
863 | is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE); | |
864 | /* SMC protocol depends on mapping to one DMA address only */ | |
865 | if (rc != 1) { | |
6511aad3 | 866 | smc_buf_free(lgr, is_rmb, buf_desc); |
b33982c3 UB |
867 | return ERR_PTR(-EAGAIN); |
868 | } | |
869 | ||
870 | /* create a new memory region for the RMB */ | |
871 | if (is_rmb) { | |
872 | rc = smc_ib_get_memory_region(lnk->roce_pd, | |
873 | IB_ACCESS_REMOTE_WRITE | | |
874 | IB_ACCESS_LOCAL_WRITE, | |
875 | buf_desc); | |
876 | if (rc) { | |
6511aad3 | 877 | smc_buf_free(lgr, is_rmb, buf_desc); |
b33982c3 UB |
878 | return ERR_PTR(rc); |
879 | } | |
880 | } | |
881 | ||
69cb7dc0 | 882 | buf_desc->len = bufsize; |
b33982c3 UB |
883 | return buf_desc; |
884 | } | |
885 | ||
c6ba7c9b HW |
886 | #define SMCD_DMBE_SIZES 7 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */ |
887 | ||
888 | static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr, | |
889 | bool is_dmb, int bufsize) | |
890 | { | |
891 | struct smc_buf_desc *buf_desc; | |
892 | int rc; | |
893 | ||
894 | if (smc_compress_bufsize(bufsize) > SMCD_DMBE_SIZES) | |
895 | return ERR_PTR(-EAGAIN); | |
896 | ||
897 | /* try to alloc a new DMB */ | |
898 | buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL); | |
899 | if (!buf_desc) | |
900 | return ERR_PTR(-ENOMEM); | |
901 | if (is_dmb) { | |
902 | rc = smc_ism_register_dmb(lgr, bufsize, buf_desc); | |
903 | if (rc) { | |
904 | kfree(buf_desc); | |
905 | return ERR_PTR(-EAGAIN); | |
906 | } | |
be244f28 HW |
907 | buf_desc->pages = virt_to_page(buf_desc->cpu_addr); |
908 | /* CDC header stored in buf. So, pretend it was smaller */ | |
909 | buf_desc->len = bufsize - sizeof(struct smcd_cdc_msg); | |
c6ba7c9b HW |
910 | } else { |
911 | buf_desc->cpu_addr = kzalloc(bufsize, GFP_KERNEL | | |
912 | __GFP_NOWARN | __GFP_NORETRY | | |
913 | __GFP_NOMEMALLOC); | |
914 | if (!buf_desc->cpu_addr) { | |
915 | kfree(buf_desc); | |
916 | return ERR_PTR(-EAGAIN); | |
917 | } | |
918 | buf_desc->len = bufsize; | |
919 | } | |
920 | return buf_desc; | |
921 | } | |
922 | ||
923 | static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) | |
cd6851f3 | 924 | { |
8437bda0 | 925 | struct smc_buf_desc *buf_desc = ERR_PTR(-ENOMEM); |
cd6851f3 UB |
926 | struct smc_connection *conn = &smc->conn; |
927 | struct smc_link_group *lgr = conn->lgr; | |
3e034725 | 928 | struct list_head *buf_list; |
c45abf31 | 929 | int bufsize, bufsize_short; |
3e034725 UB |
930 | int sk_buf_size; |
931 | rwlock_t *lock; | |
cd6851f3 | 932 | |
3e034725 UB |
933 | if (is_rmb) |
934 | /* use socket recv buffer size (w/o overhead) as start value */ | |
935 | sk_buf_size = smc->sk.sk_rcvbuf / 2; | |
936 | else | |
937 | /* use socket send buffer size (w/o overhead) as start value */ | |
938 | sk_buf_size = smc->sk.sk_sndbuf / 2; | |
939 | ||
4e1061f4 | 940 | for (bufsize_short = smc_compress_bufsize(sk_buf_size); |
c45abf31 | 941 | bufsize_short >= 0; bufsize_short--) { |
9d8fb617 | 942 | |
3e034725 UB |
943 | if (is_rmb) { |
944 | lock = &lgr->rmbs_lock; | |
945 | buf_list = &lgr->rmbs[bufsize_short]; | |
946 | } else { | |
947 | lock = &lgr->sndbufs_lock; | |
948 | buf_list = &lgr->sndbufs[bufsize_short]; | |
9d8fb617 | 949 | } |
c45abf31 | 950 | bufsize = smc_uncompress_bufsize(bufsize_short); |
a3fe3d01 UB |
951 | if ((1 << get_order(bufsize)) > SG_MAX_SINGLE_ALLOC) |
952 | continue; | |
953 | ||
3e034725 | 954 | /* check for reusable slot in the link group */ |
8437bda0 | 955 | buf_desc = smc_buf_get_slot(bufsize_short, lock, buf_list); |
3e034725 UB |
956 | if (buf_desc) { |
957 | memset(buf_desc->cpu_addr, 0, bufsize); | |
cd6851f3 UB |
958 | break; /* found reusable slot */ |
959 | } | |
a3fe3d01 | 960 | |
c6ba7c9b HW |
961 | if (is_smcd) |
962 | buf_desc = smcd_new_buf_create(lgr, is_rmb, bufsize); | |
963 | else | |
964 | buf_desc = smcr_new_buf_create(lgr, is_rmb, bufsize); | |
965 | ||
b33982c3 UB |
966 | if (PTR_ERR(buf_desc) == -ENOMEM) |
967 | break; | |
968 | if (IS_ERR(buf_desc)) | |
a3fe3d01 | 969 | continue; |
897e1c24 | 970 | |
3e034725 UB |
971 | buf_desc->used = 1; |
972 | write_lock_bh(lock); | |
973 | list_add(&buf_desc->list, buf_list); | |
974 | write_unlock_bh(lock); | |
975 | break; /* found */ | |
cd6851f3 | 976 | } |
3e034725 | 977 | |
b33982c3 | 978 | if (IS_ERR(buf_desc)) |
3e034725 UB |
979 | return -ENOMEM; |
980 | ||
981 | if (is_rmb) { | |
982 | conn->rmb_desc = buf_desc; | |
c45abf31 UB |
983 | conn->rmbe_size_short = bufsize_short; |
984 | smc->sk.sk_rcvbuf = bufsize * 2; | |
5f08318f | 985 | atomic_set(&conn->bytes_to_rcv, 0); |
be244f28 HW |
986 | conn->rmbe_update_limit = |
987 | smc_rmb_wnd_update_limit(buf_desc->len); | |
c6ba7c9b HW |
988 | if (is_smcd) |
989 | smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */ | |
cd6851f3 | 990 | } else { |
3e034725 | 991 | conn->sndbuf_desc = buf_desc; |
3e034725 UB |
992 | smc->sk.sk_sndbuf = bufsize * 2; |
993 | atomic_set(&conn->sndbuf_space, bufsize); | |
cd6851f3 | 994 | } |
3e034725 UB |
995 | return 0; |
996 | } | |
997 | ||
10428dd8 UB |
998 | void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn) |
999 | { | |
1000 | struct smc_link_group *lgr = conn->lgr; | |
1001 | ||
c6ba7c9b HW |
1002 | if (!conn->lgr || conn->lgr->is_smcd) |
1003 | return; | |
10428dd8 UB |
1004 | smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev, |
1005 | conn->sndbuf_desc, DMA_TO_DEVICE); | |
1006 | } | |
1007 | ||
1008 | void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn) | |
1009 | { | |
1010 | struct smc_link_group *lgr = conn->lgr; | |
1011 | ||
c6ba7c9b HW |
1012 | if (!conn->lgr || conn->lgr->is_smcd) |
1013 | return; | |
10428dd8 UB |
1014 | smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev, |
1015 | conn->sndbuf_desc, DMA_TO_DEVICE); | |
1016 | } | |
1017 | ||
1018 | void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn) | |
1019 | { | |
1020 | struct smc_link_group *lgr = conn->lgr; | |
1021 | ||
c6ba7c9b HW |
1022 | if (!conn->lgr || conn->lgr->is_smcd) |
1023 | return; | |
10428dd8 UB |
1024 | smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev, |
1025 | conn->rmb_desc, DMA_FROM_DEVICE); | |
1026 | } | |
1027 | ||
1028 | void smc_rmb_sync_sg_for_device(struct smc_connection *conn) | |
1029 | { | |
1030 | struct smc_link_group *lgr = conn->lgr; | |
1031 | ||
c6ba7c9b HW |
1032 | if (!conn->lgr || conn->lgr->is_smcd) |
1033 | return; | |
10428dd8 UB |
1034 | smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev, |
1035 | conn->rmb_desc, DMA_FROM_DEVICE); | |
1036 | } | |
1037 | ||
3e034725 UB |
1038 | /* create the send and receive buffer for an SMC socket; |
1039 | * receive buffers are called RMBs; | |
1040 | * (even though the SMC protocol allows more than one RMB-element per RMB, | |
1041 | * the Linux implementation uses just one RMB-element per RMB, i.e. uses an | |
1042 | * extra RMB for every connection in a link group | |
1043 | */ | |
c6ba7c9b | 1044 | int smc_buf_create(struct smc_sock *smc, bool is_smcd) |
3e034725 UB |
1045 | { |
1046 | int rc; | |
1047 | ||
1048 | /* create send buffer */ | |
c6ba7c9b | 1049 | rc = __smc_buf_create(smc, is_smcd, false); |
3e034725 UB |
1050 | if (rc) |
1051 | return rc; | |
1052 | /* create rmb */ | |
c6ba7c9b | 1053 | rc = __smc_buf_create(smc, is_smcd, true); |
3e034725 | 1054 | if (rc) |
6511aad3 | 1055 | smc_buf_free(smc->conn.lgr, false, smc->conn.sndbuf_desc); |
3e034725 | 1056 | return rc; |
cd6851f3 | 1057 | } |
bd4ad577 UB |
1058 | |
1059 | static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr) | |
1060 | { | |
1061 | int i; | |
1062 | ||
1063 | for_each_clear_bit(i, lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX) { | |
1064 | if (!test_and_set_bit(i, lgr->rtokens_used_mask)) | |
1065 | return i; | |
1066 | } | |
1067 | return -ENOSPC; | |
1068 | } | |
1069 | ||
4ed75de5 KG |
1070 | /* add a new rtoken from peer */ |
1071 | int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey) | |
bd4ad577 | 1072 | { |
4ed75de5 KG |
1073 | u64 dma_addr = be64_to_cpu(nw_vaddr); |
1074 | u32 rkey = ntohl(nw_rkey); | |
bd4ad577 UB |
1075 | int i; |
1076 | ||
1077 | for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { | |
1078 | if ((lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey) && | |
263eec9b | 1079 | (lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr == dma_addr) && |
bd4ad577 | 1080 | test_bit(i, lgr->rtokens_used_mask)) { |
4ed75de5 KG |
1081 | /* already in list */ |
1082 | return i; | |
1083 | } | |
1084 | } | |
1085 | i = smc_rmb_reserve_rtoken_idx(lgr); | |
1086 | if (i < 0) | |
1087 | return i; | |
1088 | lgr->rtokens[i][SMC_SINGLE_LINK].rkey = rkey; | |
1089 | lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = dma_addr; | |
1090 | return i; | |
1091 | } | |
1092 | ||
1093 | /* delete an rtoken */ | |
1094 | int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey) | |
1095 | { | |
1096 | u32 rkey = ntohl(nw_rkey); | |
1097 | int i; | |
1098 | ||
1099 | for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { | |
1100 | if (lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey && | |
1101 | test_bit(i, lgr->rtokens_used_mask)) { | |
1102 | lgr->rtokens[i][SMC_SINGLE_LINK].rkey = 0; | |
1103 | lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = 0; | |
1104 | ||
1105 | clear_bit(i, lgr->rtokens_used_mask); | |
bd4ad577 UB |
1106 | return 0; |
1107 | } | |
1108 | } | |
4ed75de5 KG |
1109 | return -ENOENT; |
1110 | } | |
1111 | ||
1112 | /* save rkey and dma_addr received from peer during clc handshake */ | |
1113 | int smc_rmb_rtoken_handling(struct smc_connection *conn, | |
1114 | struct smc_clc_msg_accept_confirm *clc) | |
1115 | { | |
1116 | conn->rtoken_idx = smc_rtoken_add(conn->lgr, clc->rmb_dma_addr, | |
1117 | clc->rmb_rkey); | |
bd4ad577 UB |
1118 | if (conn->rtoken_idx < 0) |
1119 | return conn->rtoken_idx; | |
bd4ad577 UB |
1120 | return 0; |
1121 | } | |
9fda3510 | 1122 | |
c3d9494e UB |
1123 | static void smc_core_going_away(void) |
1124 | { | |
1125 | struct smc_ib_device *smcibdev; | |
1126 | struct smcd_dev *smcd; | |
1127 | ||
1128 | spin_lock(&smc_ib_devices.lock); | |
1129 | list_for_each_entry(smcibdev, &smc_ib_devices.list, list) { | |
1130 | int i; | |
1131 | ||
1132 | for (i = 0; i < SMC_MAX_PORTS; i++) | |
1133 | set_bit(i, smcibdev->ports_going_away); | |
1134 | } | |
1135 | spin_unlock(&smc_ib_devices.lock); | |
1136 | ||
1137 | spin_lock(&smcd_dev_list.lock); | |
1138 | list_for_each_entry(smcd, &smcd_dev_list.list, list) { | |
1139 | smcd->going_away = 1; | |
1140 | } | |
1141 | spin_unlock(&smcd_dev_list.lock); | |
1142 | } | |
1143 | ||
9fda3510 HW |
1144 | /* Called (from smc_exit) when module is removed */ |
1145 | void smc_core_exit(void) | |
1146 | { | |
1147 | struct smc_link_group *lgr, *lg; | |
1148 | LIST_HEAD(lgr_freeing_list); | |
a2351c5d | 1149 | struct smcd_dev *smcd; |
9fda3510 | 1150 | |
c3d9494e UB |
1151 | smc_core_going_away(); |
1152 | ||
9fda3510 | 1153 | spin_lock_bh(&smc_lgr_list.lock); |
a2351c5d | 1154 | list_splice_init(&smc_lgr_list.list, &lgr_freeing_list); |
9fda3510 | 1155 | spin_unlock_bh(&smc_lgr_list.lock); |
a2351c5d UB |
1156 | |
1157 | spin_lock(&smcd_dev_list.lock); | |
1158 | list_for_each_entry(smcd, &smcd_dev_list.list, list) | |
1159 | list_splice_init(&smcd->lgr_list, &lgr_freeing_list); | |
1160 | spin_unlock(&smcd_dev_list.lock); | |
1161 | ||
9fda3510 HW |
1162 | list_for_each_entry_safe(lgr, lg, &lgr_freeing_list, list) { |
1163 | list_del_init(&lgr->list); | |
0d18a0cb KG |
1164 | if (!lgr->is_smcd) { |
1165 | struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK]; | |
1166 | ||
1167 | if (lnk->state == SMC_LNK_ACTIVE) | |
1168 | smc_llc_send_delete_link(lnk, SMC_LLC_REQ, | |
1169 | false); | |
1170 | smc_llc_link_inactive(lnk); | |
1171 | } | |
9fda3510 | 1172 | cancel_delayed_work_sync(&lgr->free_work); |
0512f69e HW |
1173 | if (lgr->is_smcd) |
1174 | smc_ism_signal_shutdown(lgr); | |
9fda3510 HW |
1175 | smc_lgr_free(lgr); /* free link group */ |
1176 | } | |
1177 | } |