]> Git Repo - linux.git/blob - net/smc/smc_core.c
Merge tag 'scsi-misc' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi
[linux.git] / net / smc / smc_core.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *  Shared Memory Communications over RDMA (SMC-R) and RoCE
4  *
5  *  Basic Transport Functions exploiting Infiniband API
6  *
7  *  Copyright IBM Corp. 2016
8  *
9  *  Author(s):  Ursula Braun <[email protected]>
10  */
11
12 #include <linux/socket.h>
13 #include <linux/if_vlan.h>
14 #include <linux/random.h>
15 #include <linux/workqueue.h>
16 #include <linux/wait.h>
17 #include <linux/reboot.h>
18 #include <net/tcp.h>
19 #include <net/sock.h>
20 #include <rdma/ib_verbs.h>
21 #include <rdma/ib_cache.h>
22
23 #include "smc.h"
24 #include "smc_clc.h"
25 #include "smc_core.h"
26 #include "smc_ib.h"
27 #include "smc_wr.h"
28 #include "smc_llc.h"
29 #include "smc_cdc.h"
30 #include "smc_close.h"
31 #include "smc_ism.h"
32
33 #define SMC_LGR_NUM_INCR                256
34 #define SMC_LGR_FREE_DELAY_SERV         (600 * HZ)
35 #define SMC_LGR_FREE_DELAY_CLNT         (SMC_LGR_FREE_DELAY_SERV + 10 * HZ)
36 #define SMC_LGR_FREE_DELAY_FAST         (8 * HZ)
37
38 static struct smc_lgr_list smc_lgr_list = {     /* established link groups */
39         .lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock),
40         .list = LIST_HEAD_INIT(smc_lgr_list.list),
41         .num = 0,
42 };
43
44 static atomic_t lgr_cnt = ATOMIC_INIT(0); /* number of existing link groups */
45 static DECLARE_WAIT_QUEUE_HEAD(lgrs_deleted);
46
47 struct smc_ib_up_work {
48         struct work_struct      work;
49         struct smc_link_group   *lgr;
50         struct smc_ib_device    *smcibdev;
51         u8                      ibport;
52 };
53
54 static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
55                          struct smc_buf_desc *buf_desc);
56 static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft);
57
58 static void smc_link_up_work(struct work_struct *work);
59 static void smc_link_down_work(struct work_struct *work);
60
61 /* return head of link group list and its lock for a given link group */
62 static inline struct list_head *smc_lgr_list_head(struct smc_link_group *lgr,
63                                                   spinlock_t **lgr_lock)
64 {
65         if (lgr->is_smcd) {
66                 *lgr_lock = &lgr->smcd->lgr_lock;
67                 return &lgr->smcd->lgr_list;
68         }
69
70         *lgr_lock = &smc_lgr_list.lock;
71         return &smc_lgr_list.list;
72 }
73
74 static void smc_lgr_schedule_free_work(struct smc_link_group *lgr)
75 {
76         /* client link group creation always follows the server link group
77          * creation. For client use a somewhat higher removal delay time,
78          * otherwise there is a risk of out-of-sync link groups.
79          */
80         if (!lgr->freeing && !lgr->freefast) {
81                 mod_delayed_work(system_wq, &lgr->free_work,
82                                  (!lgr->is_smcd && lgr->role == SMC_CLNT) ?
83                                                 SMC_LGR_FREE_DELAY_CLNT :
84                                                 SMC_LGR_FREE_DELAY_SERV);
85         }
86 }
87
88 void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr)
89 {
90         if (!lgr->freeing && !lgr->freefast) {
91                 lgr->freefast = 1;
92                 mod_delayed_work(system_wq, &lgr->free_work,
93                                  SMC_LGR_FREE_DELAY_FAST);
94         }
95 }
96
97 /* Register connection's alert token in our lookup structure.
98  * To use rbtrees we have to implement our own insert core.
99  * Requires @conns_lock
100  * @smc         connection to register
101  * Returns 0 on success, != otherwise.
102  */
103 static void smc_lgr_add_alert_token(struct smc_connection *conn)
104 {
105         struct rb_node **link, *parent = NULL;
106         u32 token = conn->alert_token_local;
107
108         link = &conn->lgr->conns_all.rb_node;
109         while (*link) {
110                 struct smc_connection *cur = rb_entry(*link,
111                                         struct smc_connection, alert_node);
112
113                 parent = *link;
114                 if (cur->alert_token_local > token)
115                         link = &parent->rb_left;
116                 else
117                         link = &parent->rb_right;
118         }
119         /* Put the new node there */
120         rb_link_node(&conn->alert_node, parent, link);
121         rb_insert_color(&conn->alert_node, &conn->lgr->conns_all);
122 }
123
124 /* assign an SMC-R link to the connection */
125 static int smcr_lgr_conn_assign_link(struct smc_connection *conn, bool first)
126 {
127         enum smc_link_state expected = first ? SMC_LNK_ACTIVATING :
128                                        SMC_LNK_ACTIVE;
129         int i, j;
130
131         /* do link balancing */
132         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
133                 struct smc_link *lnk = &conn->lgr->lnk[i];
134
135                 if (lnk->state != expected || lnk->link_is_asym)
136                         continue;
137                 if (conn->lgr->role == SMC_CLNT) {
138                         conn->lnk = lnk; /* temporary, SMC server assigns link*/
139                         break;
140                 }
141                 if (conn->lgr->conns_num % 2) {
142                         for (j = i + 1; j < SMC_LINKS_PER_LGR_MAX; j++) {
143                                 struct smc_link *lnk2;
144
145                                 lnk2 = &conn->lgr->lnk[j];
146                                 if (lnk2->state == expected &&
147                                     !lnk2->link_is_asym) {
148                                         conn->lnk = lnk2;
149                                         break;
150                                 }
151                         }
152                 }
153                 if (!conn->lnk)
154                         conn->lnk = lnk;
155                 break;
156         }
157         if (!conn->lnk)
158                 return SMC_CLC_DECL_NOACTLINK;
159         return 0;
160 }
161
162 /* Register connection in link group by assigning an alert token
163  * registered in a search tree.
164  * Requires @conns_lock
165  * Note that '0' is a reserved value and not assigned.
166  */
167 static int smc_lgr_register_conn(struct smc_connection *conn, bool first)
168 {
169         struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
170         static atomic_t nexttoken = ATOMIC_INIT(0);
171         int rc;
172
173         if (!conn->lgr->is_smcd) {
174                 rc = smcr_lgr_conn_assign_link(conn, first);
175                 if (rc)
176                         return rc;
177         }
178         /* find a new alert_token_local value not yet used by some connection
179          * in this link group
180          */
181         sock_hold(&smc->sk); /* sock_put in smc_lgr_unregister_conn() */
182         while (!conn->alert_token_local) {
183                 conn->alert_token_local = atomic_inc_return(&nexttoken);
184                 if (smc_lgr_find_conn(conn->alert_token_local, conn->lgr))
185                         conn->alert_token_local = 0;
186         }
187         smc_lgr_add_alert_token(conn);
188         conn->lgr->conns_num++;
189         return 0;
190 }
191
192 /* Unregister connection and reset the alert token of the given connection<
193  */
194 static void __smc_lgr_unregister_conn(struct smc_connection *conn)
195 {
196         struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
197         struct smc_link_group *lgr = conn->lgr;
198
199         rb_erase(&conn->alert_node, &lgr->conns_all);
200         lgr->conns_num--;
201         conn->alert_token_local = 0;
202         sock_put(&smc->sk); /* sock_hold in smc_lgr_register_conn() */
203 }
204
205 /* Unregister connection from lgr
206  */
207 static void smc_lgr_unregister_conn(struct smc_connection *conn)
208 {
209         struct smc_link_group *lgr = conn->lgr;
210
211         if (!lgr)
212                 return;
213         write_lock_bh(&lgr->conns_lock);
214         if (conn->alert_token_local) {
215                 __smc_lgr_unregister_conn(conn);
216         }
217         write_unlock_bh(&lgr->conns_lock);
218         conn->lgr = NULL;
219 }
220
221 void smc_lgr_cleanup_early(struct smc_connection *conn)
222 {
223         struct smc_link_group *lgr = conn->lgr;
224         struct list_head *lgr_list;
225         spinlock_t *lgr_lock;
226
227         if (!lgr)
228                 return;
229
230         smc_conn_free(conn);
231         lgr_list = smc_lgr_list_head(lgr, &lgr_lock);
232         spin_lock_bh(lgr_lock);
233         /* do not use this link group for new connections */
234         if (!list_empty(lgr_list))
235                 list_del_init(lgr_list);
236         spin_unlock_bh(lgr_lock);
237         smc_lgr_schedule_free_work_fast(lgr);
238 }
239
240 static void smcr_lgr_link_deactivate_all(struct smc_link_group *lgr)
241 {
242         int i;
243
244         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
245                 struct smc_link *lnk = &lgr->lnk[i];
246
247                 if (smc_link_usable(lnk))
248                         lnk->state = SMC_LNK_INACTIVE;
249         }
250         wake_up_interruptible_all(&lgr->llc_waiter);
251 }
252
253 static void smc_lgr_free(struct smc_link_group *lgr);
254
255 static void smc_lgr_free_work(struct work_struct *work)
256 {
257         struct smc_link_group *lgr = container_of(to_delayed_work(work),
258                                                   struct smc_link_group,
259                                                   free_work);
260         spinlock_t *lgr_lock;
261         bool conns;
262
263         smc_lgr_list_head(lgr, &lgr_lock);
264         spin_lock_bh(lgr_lock);
265         if (lgr->freeing) {
266                 spin_unlock_bh(lgr_lock);
267                 return;
268         }
269         read_lock_bh(&lgr->conns_lock);
270         conns = RB_EMPTY_ROOT(&lgr->conns_all);
271         read_unlock_bh(&lgr->conns_lock);
272         if (!conns) { /* number of lgr connections is no longer zero */
273                 spin_unlock_bh(lgr_lock);
274                 return;
275         }
276         list_del_init(&lgr->list); /* remove from smc_lgr_list */
277         lgr->freeing = 1; /* this instance does the freeing, no new schedule */
278         spin_unlock_bh(lgr_lock);
279         cancel_delayed_work(&lgr->free_work);
280
281         if (!lgr->is_smcd && !lgr->terminating)
282                 smc_llc_send_link_delete_all(lgr, true,
283                                              SMC_LLC_DEL_PROG_INIT_TERM);
284         if (lgr->is_smcd && !lgr->terminating)
285                 smc_ism_signal_shutdown(lgr);
286         if (!lgr->is_smcd)
287                 smcr_lgr_link_deactivate_all(lgr);
288         smc_lgr_free(lgr);
289 }
290
291 static void smc_lgr_terminate_work(struct work_struct *work)
292 {
293         struct smc_link_group *lgr = container_of(work, struct smc_link_group,
294                                                   terminate_work);
295
296         __smc_lgr_terminate(lgr, true);
297 }
298
299 /* return next unique link id for the lgr */
300 static u8 smcr_next_link_id(struct smc_link_group *lgr)
301 {
302         u8 link_id;
303         int i;
304
305         while (1) {
306                 link_id = ++lgr->next_link_id;
307                 if (!link_id)   /* skip zero as link_id */
308                         link_id = ++lgr->next_link_id;
309                 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
310                         if (smc_link_usable(&lgr->lnk[i]) &&
311                             lgr->lnk[i].link_id == link_id)
312                                 continue;
313                 }
314                 break;
315         }
316         return link_id;
317 }
318
319 int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
320                    u8 link_idx, struct smc_init_info *ini)
321 {
322         u8 rndvec[3];
323         int rc;
324
325         get_device(&ini->ib_dev->ibdev->dev);
326         atomic_inc(&ini->ib_dev->lnk_cnt);
327         lnk->state = SMC_LNK_ACTIVATING;
328         lnk->link_id = smcr_next_link_id(lgr);
329         lnk->lgr = lgr;
330         lnk->link_idx = link_idx;
331         lnk->smcibdev = ini->ib_dev;
332         lnk->ibport = ini->ib_port;
333         lnk->path_mtu = ini->ib_dev->pattr[ini->ib_port - 1].active_mtu;
334         smc_llc_link_set_uid(lnk);
335         INIT_WORK(&lnk->link_down_wrk, smc_link_down_work);
336         if (!ini->ib_dev->initialized) {
337                 rc = (int)smc_ib_setup_per_ibdev(ini->ib_dev);
338                 if (rc)
339                         goto out;
340         }
341         get_random_bytes(rndvec, sizeof(rndvec));
342         lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) +
343                 (rndvec[2] << 16);
344         rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport,
345                                   ini->vlan_id, lnk->gid, &lnk->sgid_index);
346         if (rc)
347                 goto out;
348         rc = smc_llc_link_init(lnk);
349         if (rc)
350                 goto out;
351         rc = smc_wr_alloc_link_mem(lnk);
352         if (rc)
353                 goto clear_llc_lnk;
354         rc = smc_ib_create_protection_domain(lnk);
355         if (rc)
356                 goto free_link_mem;
357         rc = smc_ib_create_queue_pair(lnk);
358         if (rc)
359                 goto dealloc_pd;
360         rc = smc_wr_create_link(lnk);
361         if (rc)
362                 goto destroy_qp;
363         return 0;
364
365 destroy_qp:
366         smc_ib_destroy_queue_pair(lnk);
367 dealloc_pd:
368         smc_ib_dealloc_protection_domain(lnk);
369 free_link_mem:
370         smc_wr_free_link_mem(lnk);
371 clear_llc_lnk:
372         smc_llc_link_clear(lnk, false);
373 out:
374         put_device(&ini->ib_dev->ibdev->dev);
375         memset(lnk, 0, sizeof(struct smc_link));
376         lnk->state = SMC_LNK_UNUSED;
377         if (!atomic_dec_return(&ini->ib_dev->lnk_cnt))
378                 wake_up(&ini->ib_dev->lnks_deleted);
379         return rc;
380 }
381
382 /* create a new SMC link group */
383 static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
384 {
385         struct smc_link_group *lgr;
386         struct list_head *lgr_list;
387         struct smc_link *lnk;
388         spinlock_t *lgr_lock;
389         u8 link_idx;
390         int rc = 0;
391         int i;
392
393         if (ini->is_smcd && ini->vlan_id) {
394                 if (smc_ism_get_vlan(ini->ism_dev, ini->vlan_id)) {
395                         rc = SMC_CLC_DECL_ISMVLANERR;
396                         goto out;
397                 }
398         }
399
400         lgr = kzalloc(sizeof(*lgr), GFP_KERNEL);
401         if (!lgr) {
402                 rc = SMC_CLC_DECL_MEM;
403                 goto ism_put_vlan;
404         }
405         lgr->is_smcd = ini->is_smcd;
406         lgr->sync_err = 0;
407         lgr->terminating = 0;
408         lgr->freefast = 0;
409         lgr->freeing = 0;
410         lgr->vlan_id = ini->vlan_id;
411         mutex_init(&lgr->sndbufs_lock);
412         mutex_init(&lgr->rmbs_lock);
413         rwlock_init(&lgr->conns_lock);
414         for (i = 0; i < SMC_RMBE_SIZES; i++) {
415                 INIT_LIST_HEAD(&lgr->sndbufs[i]);
416                 INIT_LIST_HEAD(&lgr->rmbs[i]);
417         }
418         lgr->next_link_id = 0;
419         smc_lgr_list.num += SMC_LGR_NUM_INCR;
420         memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE);
421         INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work);
422         INIT_WORK(&lgr->terminate_work, smc_lgr_terminate_work);
423         lgr->conns_all = RB_ROOT;
424         if (ini->is_smcd) {
425                 /* SMC-D specific settings */
426                 get_device(&ini->ism_dev->dev);
427                 lgr->peer_gid = ini->ism_gid;
428                 lgr->smcd = ini->ism_dev;
429                 lgr_list = &ini->ism_dev->lgr_list;
430                 lgr_lock = &lgr->smcd->lgr_lock;
431                 lgr->peer_shutdown = 0;
432                 atomic_inc(&ini->ism_dev->lgr_cnt);
433         } else {
434                 /* SMC-R specific settings */
435                 lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
436                 memcpy(lgr->peer_systemid, ini->ib_lcl->id_for_peer,
437                        SMC_SYSTEMID_LEN);
438                 memcpy(lgr->pnet_id, ini->ib_dev->pnetid[ini->ib_port - 1],
439                        SMC_MAX_PNETID_LEN);
440                 smc_llc_lgr_init(lgr, smc);
441
442                 link_idx = SMC_SINGLE_LINK;
443                 lnk = &lgr->lnk[link_idx];
444                 rc = smcr_link_init(lgr, lnk, link_idx, ini);
445                 if (rc)
446                         goto free_lgr;
447                 lgr_list = &smc_lgr_list.list;
448                 lgr_lock = &smc_lgr_list.lock;
449                 atomic_inc(&lgr_cnt);
450         }
451         smc->conn.lgr = lgr;
452         spin_lock_bh(lgr_lock);
453         list_add(&lgr->list, lgr_list);
454         spin_unlock_bh(lgr_lock);
455         return 0;
456
457 free_lgr:
458         kfree(lgr);
459 ism_put_vlan:
460         if (ini->is_smcd && ini->vlan_id)
461                 smc_ism_put_vlan(ini->ism_dev, ini->vlan_id);
462 out:
463         if (rc < 0) {
464                 if (rc == -ENOMEM)
465                         rc = SMC_CLC_DECL_MEM;
466                 else
467                         rc = SMC_CLC_DECL_INTERR;
468         }
469         return rc;
470 }
471
472 static int smc_write_space(struct smc_connection *conn)
473 {
474         int buffer_len = conn->peer_rmbe_size;
475         union smc_host_cursor prod;
476         union smc_host_cursor cons;
477         int space;
478
479         smc_curs_copy(&prod, &conn->local_tx_ctrl.prod, conn);
480         smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn);
481         /* determine rx_buf space */
482         space = buffer_len - smc_curs_diff(buffer_len, &cons, &prod);
483         return space;
484 }
485
486 static int smc_switch_cursor(struct smc_sock *smc, struct smc_cdc_tx_pend *pend,
487                              struct smc_wr_buf *wr_buf)
488 {
489         struct smc_connection *conn = &smc->conn;
490         union smc_host_cursor cons, fin;
491         int rc = 0;
492         int diff;
493
494         smc_curs_copy(&conn->tx_curs_sent, &conn->tx_curs_fin, conn);
495         smc_curs_copy(&fin, &conn->local_tx_ctrl_fin, conn);
496         /* set prod cursor to old state, enforce tx_rdma_writes() */
497         smc_curs_copy(&conn->local_tx_ctrl.prod, &fin, conn);
498         smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn);
499
500         if (smc_curs_comp(conn->peer_rmbe_size, &cons, &fin) < 0) {
501                 /* cons cursor advanced more than fin, and prod was set
502                  * fin above, so now prod is smaller than cons. Fix that.
503                  */
504                 diff = smc_curs_diff(conn->peer_rmbe_size, &fin, &cons);
505                 smc_curs_add(conn->sndbuf_desc->len,
506                              &conn->tx_curs_sent, diff);
507                 smc_curs_add(conn->sndbuf_desc->len,
508                              &conn->tx_curs_fin, diff);
509
510                 smp_mb__before_atomic();
511                 atomic_add(diff, &conn->sndbuf_space);
512                 smp_mb__after_atomic();
513
514                 smc_curs_add(conn->peer_rmbe_size,
515                              &conn->local_tx_ctrl.prod, diff);
516                 smc_curs_add(conn->peer_rmbe_size,
517                              &conn->local_tx_ctrl_fin, diff);
518         }
519         /* recalculate, value is used by tx_rdma_writes() */
520         atomic_set(&smc->conn.peer_rmbe_space, smc_write_space(conn));
521
522         if (smc->sk.sk_state != SMC_INIT &&
523             smc->sk.sk_state != SMC_CLOSED) {
524                 rc = smcr_cdc_msg_send_validation(conn, pend, wr_buf);
525                 if (!rc) {
526                         schedule_delayed_work(&conn->tx_work, 0);
527                         smc->sk.sk_data_ready(&smc->sk);
528                 }
529         } else {
530                 smc_wr_tx_put_slot(conn->lnk,
531                                    (struct smc_wr_tx_pend_priv *)pend);
532         }
533         return rc;
534 }
535
536 struct smc_link *smc_switch_conns(struct smc_link_group *lgr,
537                                   struct smc_link *from_lnk, bool is_dev_err)
538 {
539         struct smc_link *to_lnk = NULL;
540         struct smc_cdc_tx_pend *pend;
541         struct smc_connection *conn;
542         struct smc_wr_buf *wr_buf;
543         struct smc_sock *smc;
544         struct rb_node *node;
545         int i, rc = 0;
546
547         /* link is inactive, wake up tx waiters */
548         smc_wr_wakeup_tx_wait(from_lnk);
549
550         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
551                 if (lgr->lnk[i].state != SMC_LNK_ACTIVE ||
552                     i == from_lnk->link_idx)
553                         continue;
554                 if (is_dev_err && from_lnk->smcibdev == lgr->lnk[i].smcibdev &&
555                     from_lnk->ibport == lgr->lnk[i].ibport) {
556                         continue;
557                 }
558                 to_lnk = &lgr->lnk[i];
559                 break;
560         }
561         if (!to_lnk) {
562                 smc_lgr_terminate_sched(lgr);
563                 return NULL;
564         }
565 again:
566         read_lock_bh(&lgr->conns_lock);
567         for (node = rb_first(&lgr->conns_all); node; node = rb_next(node)) {
568                 conn = rb_entry(node, struct smc_connection, alert_node);
569                 if (conn->lnk != from_lnk)
570                         continue;
571                 smc = container_of(conn, struct smc_sock, conn);
572                 /* conn->lnk not yet set in SMC_INIT state */
573                 if (smc->sk.sk_state == SMC_INIT)
574                         continue;
575                 if (smc->sk.sk_state == SMC_CLOSED ||
576                     smc->sk.sk_state == SMC_PEERCLOSEWAIT1 ||
577                     smc->sk.sk_state == SMC_PEERCLOSEWAIT2 ||
578                     smc->sk.sk_state == SMC_APPFINCLOSEWAIT ||
579                     smc->sk.sk_state == SMC_APPCLOSEWAIT1 ||
580                     smc->sk.sk_state == SMC_APPCLOSEWAIT2 ||
581                     smc->sk.sk_state == SMC_PEERFINCLOSEWAIT ||
582                     smc->sk.sk_state == SMC_PEERABORTWAIT ||
583                     smc->sk.sk_state == SMC_PROCESSABORT) {
584                         spin_lock_bh(&conn->send_lock);
585                         conn->lnk = to_lnk;
586                         spin_unlock_bh(&conn->send_lock);
587                         continue;
588                 }
589                 sock_hold(&smc->sk);
590                 read_unlock_bh(&lgr->conns_lock);
591                 /* pre-fetch buffer outside of send_lock, might sleep */
592                 rc = smc_cdc_get_free_slot(conn, to_lnk, &wr_buf, NULL, &pend);
593                 if (rc) {
594                         smcr_link_down_cond_sched(to_lnk);
595                         return NULL;
596                 }
597                 /* avoid race with smcr_tx_sndbuf_nonempty() */
598                 spin_lock_bh(&conn->send_lock);
599                 conn->lnk = to_lnk;
600                 rc = smc_switch_cursor(smc, pend, wr_buf);
601                 spin_unlock_bh(&conn->send_lock);
602                 sock_put(&smc->sk);
603                 if (rc) {
604                         smcr_link_down_cond_sched(to_lnk);
605                         return NULL;
606                 }
607                 goto again;
608         }
609         read_unlock_bh(&lgr->conns_lock);
610         return to_lnk;
611 }
612
613 static void smcr_buf_unuse(struct smc_buf_desc *rmb_desc,
614                            struct smc_link_group *lgr)
615 {
616         int rc;
617
618         if (rmb_desc->is_conf_rkey && !list_empty(&lgr->list)) {
619                 /* unregister rmb with peer */
620                 rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY);
621                 if (!rc) {
622                         /* protect against smc_llc_cli_rkey_exchange() */
623                         mutex_lock(&lgr->llc_conf_mutex);
624                         smc_llc_do_delete_rkey(lgr, rmb_desc);
625                         rmb_desc->is_conf_rkey = false;
626                         mutex_unlock(&lgr->llc_conf_mutex);
627                         smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
628                 }
629         }
630
631         if (rmb_desc->is_reg_err) {
632                 /* buf registration failed, reuse not possible */
633                 mutex_lock(&lgr->rmbs_lock);
634                 list_del(&rmb_desc->list);
635                 mutex_unlock(&lgr->rmbs_lock);
636
637                 smc_buf_free(lgr, true, rmb_desc);
638         } else {
639                 rmb_desc->used = 0;
640         }
641 }
642
643 static void smc_buf_unuse(struct smc_connection *conn,
644                           struct smc_link_group *lgr)
645 {
646         if (conn->sndbuf_desc)
647                 conn->sndbuf_desc->used = 0;
648         if (conn->rmb_desc && lgr->is_smcd)
649                 conn->rmb_desc->used = 0;
650         else if (conn->rmb_desc)
651                 smcr_buf_unuse(conn->rmb_desc, lgr);
652 }
653
654 /* remove a finished connection from its link group */
655 void smc_conn_free(struct smc_connection *conn)
656 {
657         struct smc_link_group *lgr = conn->lgr;
658
659         if (!lgr)
660                 return;
661         if (lgr->is_smcd) {
662                 if (!list_empty(&lgr->list))
663                         smc_ism_unset_conn(conn);
664                 tasklet_kill(&conn->rx_tsklet);
665         } else {
666                 smc_cdc_tx_dismiss_slots(conn);
667                 if (current_work() != &conn->abort_work)
668                         cancel_work_sync(&conn->abort_work);
669         }
670         if (!list_empty(&lgr->list)) {
671                 smc_lgr_unregister_conn(conn);
672                 smc_buf_unuse(conn, lgr); /* allow buffer reuse */
673         }
674
675         if (!lgr->conns_num)
676                 smc_lgr_schedule_free_work(lgr);
677 }
678
679 /* unregister a link from a buf_desc */
680 static void smcr_buf_unmap_link(struct smc_buf_desc *buf_desc, bool is_rmb,
681                                 struct smc_link *lnk)
682 {
683         if (is_rmb)
684                 buf_desc->is_reg_mr[lnk->link_idx] = false;
685         if (!buf_desc->is_map_ib[lnk->link_idx])
686                 return;
687         if (is_rmb) {
688                 if (buf_desc->mr_rx[lnk->link_idx]) {
689                         smc_ib_put_memory_region(
690                                         buf_desc->mr_rx[lnk->link_idx]);
691                         buf_desc->mr_rx[lnk->link_idx] = NULL;
692                 }
693                 smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_FROM_DEVICE);
694         } else {
695                 smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_TO_DEVICE);
696         }
697         sg_free_table(&buf_desc->sgt[lnk->link_idx]);
698         buf_desc->is_map_ib[lnk->link_idx] = false;
699 }
700
701 /* unmap all buffers of lgr for a deleted link */
702 static void smcr_buf_unmap_lgr(struct smc_link *lnk)
703 {
704         struct smc_link_group *lgr = lnk->lgr;
705         struct smc_buf_desc *buf_desc, *bf;
706         int i;
707
708         for (i = 0; i < SMC_RMBE_SIZES; i++) {
709                 mutex_lock(&lgr->rmbs_lock);
710                 list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list)
711                         smcr_buf_unmap_link(buf_desc, true, lnk);
712                 mutex_unlock(&lgr->rmbs_lock);
713                 mutex_lock(&lgr->sndbufs_lock);
714                 list_for_each_entry_safe(buf_desc, bf, &lgr->sndbufs[i],
715                                          list)
716                         smcr_buf_unmap_link(buf_desc, false, lnk);
717                 mutex_unlock(&lgr->sndbufs_lock);
718         }
719 }
720
721 static void smcr_rtoken_clear_link(struct smc_link *lnk)
722 {
723         struct smc_link_group *lgr = lnk->lgr;
724         int i;
725
726         for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
727                 lgr->rtokens[i][lnk->link_idx].rkey = 0;
728                 lgr->rtokens[i][lnk->link_idx].dma_addr = 0;
729         }
730 }
731
732 /* must be called under lgr->llc_conf_mutex lock */
733 void smcr_link_clear(struct smc_link *lnk, bool log)
734 {
735         struct smc_ib_device *smcibdev;
736
737         if (!lnk->lgr || lnk->state == SMC_LNK_UNUSED)
738                 return;
739         lnk->peer_qpn = 0;
740         smc_llc_link_clear(lnk, log);
741         smcr_buf_unmap_lgr(lnk);
742         smcr_rtoken_clear_link(lnk);
743         smc_ib_modify_qp_reset(lnk);
744         smc_wr_free_link(lnk);
745         smc_ib_destroy_queue_pair(lnk);
746         smc_ib_dealloc_protection_domain(lnk);
747         smc_wr_free_link_mem(lnk);
748         put_device(&lnk->smcibdev->ibdev->dev);
749         smcibdev = lnk->smcibdev;
750         memset(lnk, 0, sizeof(struct smc_link));
751         lnk->state = SMC_LNK_UNUSED;
752         if (!atomic_dec_return(&smcibdev->lnk_cnt))
753                 wake_up(&smcibdev->lnks_deleted);
754 }
755
756 static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb,
757                           struct smc_buf_desc *buf_desc)
758 {
759         int i;
760
761         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++)
762                 smcr_buf_unmap_link(buf_desc, is_rmb, &lgr->lnk[i]);
763
764         if (buf_desc->pages)
765                 __free_pages(buf_desc->pages, buf_desc->order);
766         kfree(buf_desc);
767 }
768
769 static void smcd_buf_free(struct smc_link_group *lgr, bool is_dmb,
770                           struct smc_buf_desc *buf_desc)
771 {
772         if (is_dmb) {
773                 /* restore original buf len */
774                 buf_desc->len += sizeof(struct smcd_cdc_msg);
775                 smc_ism_unregister_dmb(lgr->smcd, buf_desc);
776         } else {
777                 kfree(buf_desc->cpu_addr);
778         }
779         kfree(buf_desc);
780 }
781
782 static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
783                          struct smc_buf_desc *buf_desc)
784 {
785         if (lgr->is_smcd)
786                 smcd_buf_free(lgr, is_rmb, buf_desc);
787         else
788                 smcr_buf_free(lgr, is_rmb, buf_desc);
789 }
790
791 static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb)
792 {
793         struct smc_buf_desc *buf_desc, *bf_desc;
794         struct list_head *buf_list;
795         int i;
796
797         for (i = 0; i < SMC_RMBE_SIZES; i++) {
798                 if (is_rmb)
799                         buf_list = &lgr->rmbs[i];
800                 else
801                         buf_list = &lgr->sndbufs[i];
802                 list_for_each_entry_safe(buf_desc, bf_desc, buf_list,
803                                          list) {
804                         list_del(&buf_desc->list);
805                         smc_buf_free(lgr, is_rmb, buf_desc);
806                 }
807         }
808 }
809
810 static void smc_lgr_free_bufs(struct smc_link_group *lgr)
811 {
812         /* free send buffers */
813         __smc_lgr_free_bufs(lgr, false);
814         /* free rmbs */
815         __smc_lgr_free_bufs(lgr, true);
816 }
817
818 /* remove a link group */
819 static void smc_lgr_free(struct smc_link_group *lgr)
820 {
821         int i;
822
823         if (!lgr->is_smcd) {
824                 mutex_lock(&lgr->llc_conf_mutex);
825                 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
826                         if (lgr->lnk[i].state != SMC_LNK_UNUSED)
827                                 smcr_link_clear(&lgr->lnk[i], false);
828                 }
829                 mutex_unlock(&lgr->llc_conf_mutex);
830                 smc_llc_lgr_clear(lgr);
831         }
832
833         smc_lgr_free_bufs(lgr);
834         if (lgr->is_smcd) {
835                 if (!lgr->terminating) {
836                         smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
837                         put_device(&lgr->smcd->dev);
838                 }
839                 if (!atomic_dec_return(&lgr->smcd->lgr_cnt))
840                         wake_up(&lgr->smcd->lgrs_deleted);
841         } else {
842                 if (!atomic_dec_return(&lgr_cnt))
843                         wake_up(&lgrs_deleted);
844         }
845         kfree(lgr);
846 }
847
848 static void smcd_unregister_all_dmbs(struct smc_link_group *lgr)
849 {
850         int i;
851
852         for (i = 0; i < SMC_RMBE_SIZES; i++) {
853                 struct smc_buf_desc *buf_desc;
854
855                 list_for_each_entry(buf_desc, &lgr->rmbs[i], list) {
856                         buf_desc->len += sizeof(struct smcd_cdc_msg);
857                         smc_ism_unregister_dmb(lgr->smcd, buf_desc);
858                 }
859         }
860 }
861
862 static void smc_sk_wake_ups(struct smc_sock *smc)
863 {
864         smc->sk.sk_write_space(&smc->sk);
865         smc->sk.sk_data_ready(&smc->sk);
866         smc->sk.sk_state_change(&smc->sk);
867 }
868
869 /* kill a connection */
870 static void smc_conn_kill(struct smc_connection *conn, bool soft)
871 {
872         struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
873
874         if (conn->lgr->is_smcd && conn->lgr->peer_shutdown)
875                 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
876         else
877                 smc_close_abort(conn);
878         conn->killed = 1;
879         smc->sk.sk_err = ECONNABORTED;
880         smc_sk_wake_ups(smc);
881         if (conn->lgr->is_smcd) {
882                 smc_ism_unset_conn(conn);
883                 if (soft)
884                         tasklet_kill(&conn->rx_tsklet);
885                 else
886                         tasklet_unlock_wait(&conn->rx_tsklet);
887         } else {
888                 smc_cdc_tx_dismiss_slots(conn);
889         }
890         smc_lgr_unregister_conn(conn);
891         smc_close_active_abort(smc);
892 }
893
894 static void smc_lgr_cleanup(struct smc_link_group *lgr)
895 {
896         if (lgr->is_smcd) {
897                 smc_ism_signal_shutdown(lgr);
898                 smcd_unregister_all_dmbs(lgr);
899                 smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
900                 put_device(&lgr->smcd->dev);
901         } else {
902                 u32 rsn = lgr->llc_termination_rsn;
903
904                 if (!rsn)
905                         rsn = SMC_LLC_DEL_PROG_INIT_TERM;
906                 smc_llc_send_link_delete_all(lgr, false, rsn);
907                 smcr_lgr_link_deactivate_all(lgr);
908         }
909 }
910
911 /* terminate link group
912  * @soft: true if link group shutdown can take its time
913  *        false if immediate link group shutdown is required
914  */
915 static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft)
916 {
917         struct smc_connection *conn;
918         struct smc_sock *smc;
919         struct rb_node *node;
920
921         if (lgr->terminating)
922                 return; /* lgr already terminating */
923         /* cancel free_work sync, will terminate when lgr->freeing is set */
924         cancel_delayed_work_sync(&lgr->free_work);
925         lgr->terminating = 1;
926
927         /* kill remaining link group connections */
928         read_lock_bh(&lgr->conns_lock);
929         node = rb_first(&lgr->conns_all);
930         while (node) {
931                 read_unlock_bh(&lgr->conns_lock);
932                 conn = rb_entry(node, struct smc_connection, alert_node);
933                 smc = container_of(conn, struct smc_sock, conn);
934                 sock_hold(&smc->sk); /* sock_put below */
935                 lock_sock(&smc->sk);
936                 smc_conn_kill(conn, soft);
937                 release_sock(&smc->sk);
938                 sock_put(&smc->sk); /* sock_hold above */
939                 read_lock_bh(&lgr->conns_lock);
940                 node = rb_first(&lgr->conns_all);
941         }
942         read_unlock_bh(&lgr->conns_lock);
943         smc_lgr_cleanup(lgr);
944         smc_lgr_free(lgr);
945 }
946
947 /* unlink link group and schedule termination */
948 void smc_lgr_terminate_sched(struct smc_link_group *lgr)
949 {
950         spinlock_t *lgr_lock;
951
952         smc_lgr_list_head(lgr, &lgr_lock);
953         spin_lock_bh(lgr_lock);
954         if (list_empty(&lgr->list) || lgr->terminating || lgr->freeing) {
955                 spin_unlock_bh(lgr_lock);
956                 return; /* lgr already terminating */
957         }
958         list_del_init(&lgr->list);
959         lgr->freeing = 1;
960         spin_unlock_bh(lgr_lock);
961         schedule_work(&lgr->terminate_work);
962 }
963
964 /* Called when peer lgr shutdown (regularly or abnormally) is received */
965 void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan)
966 {
967         struct smc_link_group *lgr, *l;
968         LIST_HEAD(lgr_free_list);
969
970         /* run common cleanup function and build free list */
971         spin_lock_bh(&dev->lgr_lock);
972         list_for_each_entry_safe(lgr, l, &dev->lgr_list, list) {
973                 if ((!peer_gid || lgr->peer_gid == peer_gid) &&
974                     (vlan == VLAN_VID_MASK || lgr->vlan_id == vlan)) {
975                         if (peer_gid) /* peer triggered termination */
976                                 lgr->peer_shutdown = 1;
977                         list_move(&lgr->list, &lgr_free_list);
978                         lgr->freeing = 1;
979                 }
980         }
981         spin_unlock_bh(&dev->lgr_lock);
982
983         /* cancel the regular free workers and actually free lgrs */
984         list_for_each_entry_safe(lgr, l, &lgr_free_list, list) {
985                 list_del_init(&lgr->list);
986                 schedule_work(&lgr->terminate_work);
987         }
988 }
989
990 /* Called when an SMCD device is removed or the smc module is unloaded */
991 void smc_smcd_terminate_all(struct smcd_dev *smcd)
992 {
993         struct smc_link_group *lgr, *lg;
994         LIST_HEAD(lgr_free_list);
995
996         spin_lock_bh(&smcd->lgr_lock);
997         list_splice_init(&smcd->lgr_list, &lgr_free_list);
998         list_for_each_entry(lgr, &lgr_free_list, list)
999                 lgr->freeing = 1;
1000         spin_unlock_bh(&smcd->lgr_lock);
1001
1002         list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) {
1003                 list_del_init(&lgr->list);
1004                 __smc_lgr_terminate(lgr, false);
1005         }
1006
1007         if (atomic_read(&smcd->lgr_cnt))
1008                 wait_event(smcd->lgrs_deleted, !atomic_read(&smcd->lgr_cnt));
1009 }
1010
1011 /* Called when an SMCR device is removed or the smc module is unloaded.
1012  * If smcibdev is given, all SMCR link groups using this device are terminated.
1013  * If smcibdev is NULL, all SMCR link groups are terminated.
1014  */
1015 void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
1016 {
1017         struct smc_link_group *lgr, *lg;
1018         LIST_HEAD(lgr_free_list);
1019         int i;
1020
1021         spin_lock_bh(&smc_lgr_list.lock);
1022         if (!smcibdev) {
1023                 list_splice_init(&smc_lgr_list.list, &lgr_free_list);
1024                 list_for_each_entry(lgr, &lgr_free_list, list)
1025                         lgr->freeing = 1;
1026         } else {
1027                 list_for_each_entry_safe(lgr, lg, &smc_lgr_list.list, list) {
1028                         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1029                                 if (lgr->lnk[i].smcibdev == smcibdev)
1030                                         smcr_link_down_cond_sched(&lgr->lnk[i]);
1031                         }
1032                 }
1033         }
1034         spin_unlock_bh(&smc_lgr_list.lock);
1035
1036         list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) {
1037                 list_del_init(&lgr->list);
1038                 smc_llc_set_termination_rsn(lgr, SMC_LLC_DEL_OP_INIT_TERM);
1039                 __smc_lgr_terminate(lgr, false);
1040         }
1041
1042         if (smcibdev) {
1043                 if (atomic_read(&smcibdev->lnk_cnt))
1044                         wait_event(smcibdev->lnks_deleted,
1045                                    !atomic_read(&smcibdev->lnk_cnt));
1046         } else {
1047                 if (atomic_read(&lgr_cnt))
1048                         wait_event(lgrs_deleted, !atomic_read(&lgr_cnt));
1049         }
1050 }
1051
1052 /* set new lgr type and clear all asymmetric link tagging */
1053 void smcr_lgr_set_type(struct smc_link_group *lgr, enum smc_lgr_type new_type)
1054 {
1055         char *lgr_type = "";
1056         int i;
1057
1058         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++)
1059                 if (smc_link_usable(&lgr->lnk[i]))
1060                         lgr->lnk[i].link_is_asym = false;
1061         if (lgr->type == new_type)
1062                 return;
1063         lgr->type = new_type;
1064
1065         switch (lgr->type) {
1066         case SMC_LGR_NONE:
1067                 lgr_type = "NONE";
1068                 break;
1069         case SMC_LGR_SINGLE:
1070                 lgr_type = "SINGLE";
1071                 break;
1072         case SMC_LGR_SYMMETRIC:
1073                 lgr_type = "SYMMETRIC";
1074                 break;
1075         case SMC_LGR_ASYMMETRIC_PEER:
1076                 lgr_type = "ASYMMETRIC_PEER";
1077                 break;
1078         case SMC_LGR_ASYMMETRIC_LOCAL:
1079                 lgr_type = "ASYMMETRIC_LOCAL";
1080                 break;
1081         }
1082         pr_warn_ratelimited("smc: SMC-R lg %*phN state changed: "
1083                             "%s, pnetid %.16s\n", SMC_LGR_ID_SIZE, &lgr->id,
1084                             lgr_type, lgr->pnet_id);
1085 }
1086
1087 /* set new lgr type and tag a link as asymmetric */
1088 void smcr_lgr_set_type_asym(struct smc_link_group *lgr,
1089                             enum smc_lgr_type new_type, int asym_lnk_idx)
1090 {
1091         smcr_lgr_set_type(lgr, new_type);
1092         lgr->lnk[asym_lnk_idx].link_is_asym = true;
1093 }
1094
1095 /* abort connection, abort_work scheduled from tasklet context */
1096 static void smc_conn_abort_work(struct work_struct *work)
1097 {
1098         struct smc_connection *conn = container_of(work,
1099                                                    struct smc_connection,
1100                                                    abort_work);
1101         struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
1102
1103         smc_conn_kill(conn, true);
1104         sock_put(&smc->sk); /* sock_hold done by schedulers of abort_work */
1105 }
1106
1107 /* link is up - establish alternate link if applicable */
1108 static void smcr_link_up(struct smc_link_group *lgr,
1109                          struct smc_ib_device *smcibdev, u8 ibport)
1110 {
1111         struct smc_link *link = NULL;
1112
1113         if (list_empty(&lgr->list) ||
1114             lgr->type == SMC_LGR_SYMMETRIC ||
1115             lgr->type == SMC_LGR_ASYMMETRIC_PEER)
1116                 return;
1117
1118         if (lgr->role == SMC_SERV) {
1119                 /* trigger local add link processing */
1120                 link = smc_llc_usable_link(lgr);
1121                 if (!link)
1122                         return;
1123                 smc_llc_srv_add_link_local(link);
1124         } else {
1125                 /* invite server to start add link processing */
1126                 u8 gid[SMC_GID_SIZE];
1127
1128                 if (smc_ib_determine_gid(smcibdev, ibport, lgr->vlan_id, gid,
1129                                          NULL))
1130                         return;
1131                 if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) {
1132                         /* some other llc task is ongoing */
1133                         wait_event_interruptible_timeout(lgr->llc_waiter,
1134                                 (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE),
1135                                 SMC_LLC_WAIT_TIME);
1136                 }
1137                 if (list_empty(&lgr->list) ||
1138                     !smc_ib_port_active(smcibdev, ibport))
1139                         return; /* lgr or device no longer active */
1140                 link = smc_llc_usable_link(lgr);
1141                 if (!link)
1142                         return;
1143                 smc_llc_send_add_link(link, smcibdev->mac[ibport - 1], gid,
1144                                       NULL, SMC_LLC_REQ);
1145         }
1146 }
1147
1148 void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport)
1149 {
1150         struct smc_ib_up_work *ib_work;
1151         struct smc_link_group *lgr, *n;
1152
1153         list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) {
1154                 if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id,
1155                             SMC_MAX_PNETID_LEN) ||
1156                     lgr->type == SMC_LGR_SYMMETRIC ||
1157                     lgr->type == SMC_LGR_ASYMMETRIC_PEER)
1158                         continue;
1159                 ib_work = kmalloc(sizeof(*ib_work), GFP_KERNEL);
1160                 if (!ib_work)
1161                         continue;
1162                 INIT_WORK(&ib_work->work, smc_link_up_work);
1163                 ib_work->lgr = lgr;
1164                 ib_work->smcibdev = smcibdev;
1165                 ib_work->ibport = ibport;
1166                 schedule_work(&ib_work->work);
1167         }
1168 }
1169
1170 /* link is down - switch connections to alternate link,
1171  * must be called under lgr->llc_conf_mutex lock
1172  */
1173 static void smcr_link_down(struct smc_link *lnk)
1174 {
1175         struct smc_link_group *lgr = lnk->lgr;
1176         struct smc_link *to_lnk;
1177         int del_link_id;
1178
1179         if (!lgr || lnk->state == SMC_LNK_UNUSED || list_empty(&lgr->list))
1180                 return;
1181
1182         smc_ib_modify_qp_reset(lnk);
1183         to_lnk = smc_switch_conns(lgr, lnk, true);
1184         if (!to_lnk) { /* no backup link available */
1185                 smcr_link_clear(lnk, true);
1186                 return;
1187         }
1188         smcr_lgr_set_type(lgr, SMC_LGR_SINGLE);
1189         del_link_id = lnk->link_id;
1190
1191         if (lgr->role == SMC_SERV) {
1192                 /* trigger local delete link processing */
1193                 smc_llc_srv_delete_link_local(to_lnk, del_link_id);
1194         } else {
1195                 if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) {
1196                         /* another llc task is ongoing */
1197                         mutex_unlock(&lgr->llc_conf_mutex);
1198                         wait_event_interruptible_timeout(lgr->llc_waiter,
1199                                 (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE),
1200                                 SMC_LLC_WAIT_TIME);
1201                         mutex_lock(&lgr->llc_conf_mutex);
1202                 }
1203                 smc_llc_send_delete_link(to_lnk, del_link_id, SMC_LLC_REQ, true,
1204                                          SMC_LLC_DEL_LOST_PATH);
1205         }
1206 }
1207
1208 /* must be called under lgr->llc_conf_mutex lock */
1209 void smcr_link_down_cond(struct smc_link *lnk)
1210 {
1211         if (smc_link_downing(&lnk->state))
1212                 smcr_link_down(lnk);
1213 }
1214
1215 /* will get the lgr->llc_conf_mutex lock */
1216 void smcr_link_down_cond_sched(struct smc_link *lnk)
1217 {
1218         if (smc_link_downing(&lnk->state))
1219                 schedule_work(&lnk->link_down_wrk);
1220 }
1221
1222 void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport)
1223 {
1224         struct smc_link_group *lgr, *n;
1225         int i;
1226
1227         list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) {
1228                 if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id,
1229                             SMC_MAX_PNETID_LEN))
1230                         continue; /* lgr is not affected */
1231                 if (list_empty(&lgr->list))
1232                         continue;
1233                 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1234                         struct smc_link *lnk = &lgr->lnk[i];
1235
1236                         if (smc_link_usable(lnk) &&
1237                             lnk->smcibdev == smcibdev && lnk->ibport == ibport)
1238                                 smcr_link_down_cond_sched(lnk);
1239                 }
1240         }
1241 }
1242
1243 static void smc_link_up_work(struct work_struct *work)
1244 {
1245         struct smc_ib_up_work *ib_work = container_of(work,
1246                                                       struct smc_ib_up_work,
1247                                                       work);
1248         struct smc_link_group *lgr = ib_work->lgr;
1249
1250         if (list_empty(&lgr->list))
1251                 goto out;
1252         smcr_link_up(lgr, ib_work->smcibdev, ib_work->ibport);
1253 out:
1254         kfree(ib_work);
1255 }
1256
1257 static void smc_link_down_work(struct work_struct *work)
1258 {
1259         struct smc_link *link = container_of(work, struct smc_link,
1260                                              link_down_wrk);
1261         struct smc_link_group *lgr = link->lgr;
1262
1263         if (list_empty(&lgr->list))
1264                 return;
1265         wake_up_interruptible_all(&lgr->llc_waiter);
1266         mutex_lock(&lgr->llc_conf_mutex);
1267         smcr_link_down(link);
1268         mutex_unlock(&lgr->llc_conf_mutex);
1269 }
1270
1271 /* Determine vlan of internal TCP socket.
1272  * @vlan_id: address to store the determined vlan id into
1273  */
1274 int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini)
1275 {
1276         struct dst_entry *dst = sk_dst_get(clcsock->sk);
1277         struct net_device *ndev;
1278         int i, nest_lvl, rc = 0;
1279
1280         ini->vlan_id = 0;
1281         if (!dst) {
1282                 rc = -ENOTCONN;
1283                 goto out;
1284         }
1285         if (!dst->dev) {
1286                 rc = -ENODEV;
1287                 goto out_rel;
1288         }
1289
1290         ndev = dst->dev;
1291         if (is_vlan_dev(ndev)) {
1292                 ini->vlan_id = vlan_dev_vlan_id(ndev);
1293                 goto out_rel;
1294         }
1295
1296         rtnl_lock();
1297         nest_lvl = ndev->lower_level;
1298         for (i = 0; i < nest_lvl; i++) {
1299                 struct list_head *lower = &ndev->adj_list.lower;
1300
1301                 if (list_empty(lower))
1302                         break;
1303                 lower = lower->next;
1304                 ndev = (struct net_device *)netdev_lower_get_next(ndev, &lower);
1305                 if (is_vlan_dev(ndev)) {
1306                         ini->vlan_id = vlan_dev_vlan_id(ndev);
1307                         break;
1308                 }
1309         }
1310         rtnl_unlock();
1311
1312 out_rel:
1313         dst_release(dst);
1314 out:
1315         return rc;
1316 }
1317
1318 static bool smcr_lgr_match(struct smc_link_group *lgr,
1319                            struct smc_clc_msg_local *lcl,
1320                            enum smc_lgr_role role, u32 clcqpn)
1321 {
1322         int i;
1323
1324         if (memcmp(lgr->peer_systemid, lcl->id_for_peer, SMC_SYSTEMID_LEN) ||
1325             lgr->role != role)
1326                 return false;
1327
1328         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1329                 if (lgr->lnk[i].state != SMC_LNK_ACTIVE)
1330                         continue;
1331                 if ((lgr->role == SMC_SERV || lgr->lnk[i].peer_qpn == clcqpn) &&
1332                     !memcmp(lgr->lnk[i].peer_gid, &lcl->gid, SMC_GID_SIZE) &&
1333                     !memcmp(lgr->lnk[i].peer_mac, lcl->mac, sizeof(lcl->mac)))
1334                         return true;
1335         }
1336         return false;
1337 }
1338
1339 static bool smcd_lgr_match(struct smc_link_group *lgr,
1340                            struct smcd_dev *smcismdev, u64 peer_gid)
1341 {
1342         return lgr->peer_gid == peer_gid && lgr->smcd == smcismdev;
1343 }
1344
1345 /* create a new SMC connection (and a new link group if necessary) */
1346 int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
1347 {
1348         struct smc_connection *conn = &smc->conn;
1349         struct list_head *lgr_list;
1350         struct smc_link_group *lgr;
1351         enum smc_lgr_role role;
1352         spinlock_t *lgr_lock;
1353         int rc = 0;
1354
1355         lgr_list = ini->is_smcd ? &ini->ism_dev->lgr_list : &smc_lgr_list.list;
1356         lgr_lock = ini->is_smcd ? &ini->ism_dev->lgr_lock : &smc_lgr_list.lock;
1357         ini->cln_first_contact = SMC_FIRST_CONTACT;
1358         role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
1359         if (role == SMC_CLNT && ini->srv_first_contact)
1360                 /* create new link group as well */
1361                 goto create;
1362
1363         /* determine if an existing link group can be reused */
1364         spin_lock_bh(lgr_lock);
1365         list_for_each_entry(lgr, lgr_list, list) {
1366                 write_lock_bh(&lgr->conns_lock);
1367                 if ((ini->is_smcd ?
1368                      smcd_lgr_match(lgr, ini->ism_dev, ini->ism_gid) :
1369                      smcr_lgr_match(lgr, ini->ib_lcl, role, ini->ib_clcqpn)) &&
1370                     !lgr->sync_err &&
1371                     lgr->vlan_id == ini->vlan_id &&
1372                     (role == SMC_CLNT ||
1373                      lgr->conns_num < SMC_RMBS_PER_LGR_MAX)) {
1374                         /* link group found */
1375                         ini->cln_first_contact = SMC_REUSE_CONTACT;
1376                         conn->lgr = lgr;
1377                         rc = smc_lgr_register_conn(conn, false);
1378                         write_unlock_bh(&lgr->conns_lock);
1379                         if (!rc && delayed_work_pending(&lgr->free_work))
1380                                 cancel_delayed_work(&lgr->free_work);
1381                         break;
1382                 }
1383                 write_unlock_bh(&lgr->conns_lock);
1384         }
1385         spin_unlock_bh(lgr_lock);
1386         if (rc)
1387                 return rc;
1388
1389         if (role == SMC_CLNT && !ini->srv_first_contact &&
1390             ini->cln_first_contact == SMC_FIRST_CONTACT) {
1391                 /* Server reuses a link group, but Client wants to start
1392                  * a new one
1393                  * send out_of_sync decline, reason synchr. error
1394                  */
1395                 return SMC_CLC_DECL_SYNCERR;
1396         }
1397
1398 create:
1399         if (ini->cln_first_contact == SMC_FIRST_CONTACT) {
1400                 rc = smc_lgr_create(smc, ini);
1401                 if (rc)
1402                         goto out;
1403                 lgr = conn->lgr;
1404                 write_lock_bh(&lgr->conns_lock);
1405                 rc = smc_lgr_register_conn(conn, true);
1406                 write_unlock_bh(&lgr->conns_lock);
1407                 if (rc)
1408                         goto out;
1409         }
1410         conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;
1411         conn->local_tx_ctrl.len = SMC_WR_TX_SIZE;
1412         conn->urg_state = SMC_URG_READ;
1413         INIT_WORK(&smc->conn.abort_work, smc_conn_abort_work);
1414         if (ini->is_smcd) {
1415                 conn->rx_off = sizeof(struct smcd_cdc_msg);
1416                 smcd_cdc_rx_init(conn); /* init tasklet for this conn */
1417         }
1418 #ifndef KERNEL_HAS_ATOMIC64
1419         spin_lock_init(&conn->acurs_lock);
1420 #endif
1421
1422 out:
1423         return rc;
1424 }
1425
1426 /* convert the RMB size into the compressed notation - minimum 16K.
1427  * In contrast to plain ilog2, this rounds towards the next power of 2,
1428  * so the socket application gets at least its desired sndbuf / rcvbuf size.
1429  */
1430 static u8 smc_compress_bufsize(int size)
1431 {
1432         u8 compressed;
1433
1434         if (size <= SMC_BUF_MIN_SIZE)
1435                 return 0;
1436
1437         size = (size - 1) >> 14;
1438         compressed = ilog2(size) + 1;
1439         if (compressed >= SMC_RMBE_SIZES)
1440                 compressed = SMC_RMBE_SIZES - 1;
1441         return compressed;
1442 }
1443
1444 /* convert the RMB size from compressed notation into integer */
1445 int smc_uncompress_bufsize(u8 compressed)
1446 {
1447         u32 size;
1448
1449         size = 0x00000001 << (((int)compressed) + 14);
1450         return (int)size;
1451 }
1452
1453 /* try to reuse a sndbuf or rmb description slot for a certain
1454  * buffer size; if not available, return NULL
1455  */
1456 static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize,
1457                                              struct mutex *lock,
1458                                              struct list_head *buf_list)
1459 {
1460         struct smc_buf_desc *buf_slot;
1461
1462         mutex_lock(lock);
1463         list_for_each_entry(buf_slot, buf_list, list) {
1464                 if (cmpxchg(&buf_slot->used, 0, 1) == 0) {
1465                         mutex_unlock(lock);
1466                         return buf_slot;
1467                 }
1468         }
1469         mutex_unlock(lock);
1470         return NULL;
1471 }
1472
1473 /* one of the conditions for announcing a receiver's current window size is
1474  * that it "results in a minimum increase in the window size of 10% of the
1475  * receive buffer space" [RFC7609]
1476  */
1477 static inline int smc_rmb_wnd_update_limit(int rmbe_size)
1478 {
1479         return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2);
1480 }
1481
1482 /* map an rmb buf to a link */
1483 static int smcr_buf_map_link(struct smc_buf_desc *buf_desc, bool is_rmb,
1484                              struct smc_link *lnk)
1485 {
1486         int rc;
1487
1488         if (buf_desc->is_map_ib[lnk->link_idx])
1489                 return 0;
1490
1491         rc = sg_alloc_table(&buf_desc->sgt[lnk->link_idx], 1, GFP_KERNEL);
1492         if (rc)
1493                 return rc;
1494         sg_set_buf(buf_desc->sgt[lnk->link_idx].sgl,
1495                    buf_desc->cpu_addr, buf_desc->len);
1496
1497         /* map sg table to DMA address */
1498         rc = smc_ib_buf_map_sg(lnk, buf_desc,
1499                                is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
1500         /* SMC protocol depends on mapping to one DMA address only */
1501         if (rc != 1) {
1502                 rc = -EAGAIN;
1503                 goto free_table;
1504         }
1505
1506         /* create a new memory region for the RMB */
1507         if (is_rmb) {
1508                 rc = smc_ib_get_memory_region(lnk->roce_pd,
1509                                               IB_ACCESS_REMOTE_WRITE |
1510                                               IB_ACCESS_LOCAL_WRITE,
1511                                               buf_desc, lnk->link_idx);
1512                 if (rc)
1513                         goto buf_unmap;
1514                 smc_ib_sync_sg_for_device(lnk, buf_desc, DMA_FROM_DEVICE);
1515         }
1516         buf_desc->is_map_ib[lnk->link_idx] = true;
1517         return 0;
1518
1519 buf_unmap:
1520         smc_ib_buf_unmap_sg(lnk, buf_desc,
1521                             is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
1522 free_table:
1523         sg_free_table(&buf_desc->sgt[lnk->link_idx]);
1524         return rc;
1525 }
1526
1527 /* register a new rmb on IB device,
1528  * must be called under lgr->llc_conf_mutex lock
1529  */
1530 int smcr_link_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc)
1531 {
1532         if (list_empty(&link->lgr->list))
1533                 return -ENOLINK;
1534         if (!rmb_desc->is_reg_mr[link->link_idx]) {
1535                 /* register memory region for new rmb */
1536                 if (smc_wr_reg_send(link, rmb_desc->mr_rx[link->link_idx])) {
1537                         rmb_desc->is_reg_err = true;
1538                         return -EFAULT;
1539                 }
1540                 rmb_desc->is_reg_mr[link->link_idx] = true;
1541         }
1542         return 0;
1543 }
1544
1545 static int _smcr_buf_map_lgr(struct smc_link *lnk, struct mutex *lock,
1546                              struct list_head *lst, bool is_rmb)
1547 {
1548         struct smc_buf_desc *buf_desc, *bf;
1549         int rc = 0;
1550
1551         mutex_lock(lock);
1552         list_for_each_entry_safe(buf_desc, bf, lst, list) {
1553                 if (!buf_desc->used)
1554                         continue;
1555                 rc = smcr_buf_map_link(buf_desc, is_rmb, lnk);
1556                 if (rc)
1557                         goto out;
1558         }
1559 out:
1560         mutex_unlock(lock);
1561         return rc;
1562 }
1563
1564 /* map all used buffers of lgr for a new link */
1565 int smcr_buf_map_lgr(struct smc_link *lnk)
1566 {
1567         struct smc_link_group *lgr = lnk->lgr;
1568         int i, rc = 0;
1569
1570         for (i = 0; i < SMC_RMBE_SIZES; i++) {
1571                 rc = _smcr_buf_map_lgr(lnk, &lgr->rmbs_lock,
1572                                        &lgr->rmbs[i], true);
1573                 if (rc)
1574                         return rc;
1575                 rc = _smcr_buf_map_lgr(lnk, &lgr->sndbufs_lock,
1576                                        &lgr->sndbufs[i], false);
1577                 if (rc)
1578                         return rc;
1579         }
1580         return 0;
1581 }
1582
1583 /* register all used buffers of lgr for a new link,
1584  * must be called under lgr->llc_conf_mutex lock
1585  */
1586 int smcr_buf_reg_lgr(struct smc_link *lnk)
1587 {
1588         struct smc_link_group *lgr = lnk->lgr;
1589         struct smc_buf_desc *buf_desc, *bf;
1590         int i, rc = 0;
1591
1592         mutex_lock(&lgr->rmbs_lock);
1593         for (i = 0; i < SMC_RMBE_SIZES; i++) {
1594                 list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list) {
1595                         if (!buf_desc->used)
1596                                 continue;
1597                         rc = smcr_link_reg_rmb(lnk, buf_desc);
1598                         if (rc)
1599                                 goto out;
1600                 }
1601         }
1602 out:
1603         mutex_unlock(&lgr->rmbs_lock);
1604         return rc;
1605 }
1606
1607 static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr,
1608                                                 bool is_rmb, int bufsize)
1609 {
1610         struct smc_buf_desc *buf_desc;
1611
1612         /* try to alloc a new buffer */
1613         buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
1614         if (!buf_desc)
1615                 return ERR_PTR(-ENOMEM);
1616
1617         buf_desc->order = get_order(bufsize);
1618         buf_desc->pages = alloc_pages(GFP_KERNEL | __GFP_NOWARN |
1619                                       __GFP_NOMEMALLOC | __GFP_COMP |
1620                                       __GFP_NORETRY | __GFP_ZERO,
1621                                       buf_desc->order);
1622         if (!buf_desc->pages) {
1623                 kfree(buf_desc);
1624                 return ERR_PTR(-EAGAIN);
1625         }
1626         buf_desc->cpu_addr = (void *)page_address(buf_desc->pages);
1627         buf_desc->len = bufsize;
1628         return buf_desc;
1629 }
1630
1631 /* map buf_desc on all usable links,
1632  * unused buffers stay mapped as long as the link is up
1633  */
1634 static int smcr_buf_map_usable_links(struct smc_link_group *lgr,
1635                                      struct smc_buf_desc *buf_desc, bool is_rmb)
1636 {
1637         int i, rc = 0;
1638
1639         /* protect against parallel link reconfiguration */
1640         mutex_lock(&lgr->llc_conf_mutex);
1641         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1642                 struct smc_link *lnk = &lgr->lnk[i];
1643
1644                 if (!smc_link_usable(lnk))
1645                         continue;
1646                 if (smcr_buf_map_link(buf_desc, is_rmb, lnk)) {
1647                         rc = -ENOMEM;
1648                         goto out;
1649                 }
1650         }
1651 out:
1652         mutex_unlock(&lgr->llc_conf_mutex);
1653         return rc;
1654 }
1655
1656 #define SMCD_DMBE_SIZES         7 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */
1657
1658 static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr,
1659                                                 bool is_dmb, int bufsize)
1660 {
1661         struct smc_buf_desc *buf_desc;
1662         int rc;
1663
1664         if (smc_compress_bufsize(bufsize) > SMCD_DMBE_SIZES)
1665                 return ERR_PTR(-EAGAIN);
1666
1667         /* try to alloc a new DMB */
1668         buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
1669         if (!buf_desc)
1670                 return ERR_PTR(-ENOMEM);
1671         if (is_dmb) {
1672                 rc = smc_ism_register_dmb(lgr, bufsize, buf_desc);
1673                 if (rc) {
1674                         kfree(buf_desc);
1675                         return ERR_PTR(-EAGAIN);
1676                 }
1677                 buf_desc->pages = virt_to_page(buf_desc->cpu_addr);
1678                 /* CDC header stored in buf. So, pretend it was smaller */
1679                 buf_desc->len = bufsize - sizeof(struct smcd_cdc_msg);
1680         } else {
1681                 buf_desc->cpu_addr = kzalloc(bufsize, GFP_KERNEL |
1682                                              __GFP_NOWARN | __GFP_NORETRY |
1683                                              __GFP_NOMEMALLOC);
1684                 if (!buf_desc->cpu_addr) {
1685                         kfree(buf_desc);
1686                         return ERR_PTR(-EAGAIN);
1687                 }
1688                 buf_desc->len = bufsize;
1689         }
1690         return buf_desc;
1691 }
1692
1693 static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
1694 {
1695         struct smc_buf_desc *buf_desc = ERR_PTR(-ENOMEM);
1696         struct smc_connection *conn = &smc->conn;
1697         struct smc_link_group *lgr = conn->lgr;
1698         struct list_head *buf_list;
1699         int bufsize, bufsize_short;
1700         struct mutex *lock;     /* lock buffer list */
1701         int sk_buf_size;
1702
1703         if (is_rmb)
1704                 /* use socket recv buffer size (w/o overhead) as start value */
1705                 sk_buf_size = smc->sk.sk_rcvbuf / 2;
1706         else
1707                 /* use socket send buffer size (w/o overhead) as start value */
1708                 sk_buf_size = smc->sk.sk_sndbuf / 2;
1709
1710         for (bufsize_short = smc_compress_bufsize(sk_buf_size);
1711              bufsize_short >= 0; bufsize_short--) {
1712
1713                 if (is_rmb) {
1714                         lock = &lgr->rmbs_lock;
1715                         buf_list = &lgr->rmbs[bufsize_short];
1716                 } else {
1717                         lock = &lgr->sndbufs_lock;
1718                         buf_list = &lgr->sndbufs[bufsize_short];
1719                 }
1720                 bufsize = smc_uncompress_bufsize(bufsize_short);
1721                 if ((1 << get_order(bufsize)) > SG_MAX_SINGLE_ALLOC)
1722                         continue;
1723
1724                 /* check for reusable slot in the link group */
1725                 buf_desc = smc_buf_get_slot(bufsize_short, lock, buf_list);
1726                 if (buf_desc) {
1727                         memset(buf_desc->cpu_addr, 0, bufsize);
1728                         break; /* found reusable slot */
1729                 }
1730
1731                 if (is_smcd)
1732                         buf_desc = smcd_new_buf_create(lgr, is_rmb, bufsize);
1733                 else
1734                         buf_desc = smcr_new_buf_create(lgr, is_rmb, bufsize);
1735
1736                 if (PTR_ERR(buf_desc) == -ENOMEM)
1737                         break;
1738                 if (IS_ERR(buf_desc))
1739                         continue;
1740
1741                 buf_desc->used = 1;
1742                 mutex_lock(lock);
1743                 list_add(&buf_desc->list, buf_list);
1744                 mutex_unlock(lock);
1745                 break; /* found */
1746         }
1747
1748         if (IS_ERR(buf_desc))
1749                 return -ENOMEM;
1750
1751         if (!is_smcd) {
1752                 if (smcr_buf_map_usable_links(lgr, buf_desc, is_rmb)) {
1753                         smcr_buf_unuse(buf_desc, lgr);
1754                         return -ENOMEM;
1755                 }
1756         }
1757
1758         if (is_rmb) {
1759                 conn->rmb_desc = buf_desc;
1760                 conn->rmbe_size_short = bufsize_short;
1761                 smc->sk.sk_rcvbuf = bufsize * 2;
1762                 atomic_set(&conn->bytes_to_rcv, 0);
1763                 conn->rmbe_update_limit =
1764                         smc_rmb_wnd_update_limit(buf_desc->len);
1765                 if (is_smcd)
1766                         smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */
1767         } else {
1768                 conn->sndbuf_desc = buf_desc;
1769                 smc->sk.sk_sndbuf = bufsize * 2;
1770                 atomic_set(&conn->sndbuf_space, bufsize);
1771         }
1772         return 0;
1773 }
1774
1775 void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn)
1776 {
1777         if (!conn->lgr || conn->lgr->is_smcd || !smc_link_usable(conn->lnk))
1778                 return;
1779         smc_ib_sync_sg_for_cpu(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE);
1780 }
1781
1782 void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn)
1783 {
1784         if (!conn->lgr || conn->lgr->is_smcd || !smc_link_usable(conn->lnk))
1785                 return;
1786         smc_ib_sync_sg_for_device(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE);
1787 }
1788
1789 void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn)
1790 {
1791         int i;
1792
1793         if (!conn->lgr || conn->lgr->is_smcd)
1794                 return;
1795         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1796                 if (!smc_link_usable(&conn->lgr->lnk[i]))
1797                         continue;
1798                 smc_ib_sync_sg_for_cpu(&conn->lgr->lnk[i], conn->rmb_desc,
1799                                        DMA_FROM_DEVICE);
1800         }
1801 }
1802
1803 void smc_rmb_sync_sg_for_device(struct smc_connection *conn)
1804 {
1805         int i;
1806
1807         if (!conn->lgr || conn->lgr->is_smcd)
1808                 return;
1809         for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1810                 if (!smc_link_usable(&conn->lgr->lnk[i]))
1811                         continue;
1812                 smc_ib_sync_sg_for_device(&conn->lgr->lnk[i], conn->rmb_desc,
1813                                           DMA_FROM_DEVICE);
1814         }
1815 }
1816
1817 /* create the send and receive buffer for an SMC socket;
1818  * receive buffers are called RMBs;
1819  * (even though the SMC protocol allows more than one RMB-element per RMB,
1820  * the Linux implementation uses just one RMB-element per RMB, i.e. uses an
1821  * extra RMB for every connection in a link group
1822  */
1823 int smc_buf_create(struct smc_sock *smc, bool is_smcd)
1824 {
1825         int rc;
1826
1827         /* create send buffer */
1828         rc = __smc_buf_create(smc, is_smcd, false);
1829         if (rc)
1830                 return rc;
1831         /* create rmb */
1832         rc = __smc_buf_create(smc, is_smcd, true);
1833         if (rc)
1834                 smc_buf_free(smc->conn.lgr, false, smc->conn.sndbuf_desc);
1835         return rc;
1836 }
1837
1838 static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr)
1839 {
1840         int i;
1841
1842         for_each_clear_bit(i, lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX) {
1843                 if (!test_and_set_bit(i, lgr->rtokens_used_mask))
1844                         return i;
1845         }
1846         return -ENOSPC;
1847 }
1848
1849 static int smc_rtoken_find_by_link(struct smc_link_group *lgr, int lnk_idx,
1850                                    u32 rkey)
1851 {
1852         int i;
1853
1854         for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
1855                 if (test_bit(i, lgr->rtokens_used_mask) &&
1856                     lgr->rtokens[i][lnk_idx].rkey == rkey)
1857                         return i;
1858         }
1859         return -ENOENT;
1860 }
1861
1862 /* set rtoken for a new link to an existing rmb */
1863 void smc_rtoken_set(struct smc_link_group *lgr, int link_idx, int link_idx_new,
1864                     __be32 nw_rkey_known, __be64 nw_vaddr, __be32 nw_rkey)
1865 {
1866         int rtok_idx;
1867
1868         rtok_idx = smc_rtoken_find_by_link(lgr, link_idx, ntohl(nw_rkey_known));
1869         if (rtok_idx == -ENOENT)
1870                 return;
1871         lgr->rtokens[rtok_idx][link_idx_new].rkey = ntohl(nw_rkey);
1872         lgr->rtokens[rtok_idx][link_idx_new].dma_addr = be64_to_cpu(nw_vaddr);
1873 }
1874
1875 /* set rtoken for a new link whose link_id is given */
1876 void smc_rtoken_set2(struct smc_link_group *lgr, int rtok_idx, int link_id,
1877                      __be64 nw_vaddr, __be32 nw_rkey)
1878 {
1879         u64 dma_addr = be64_to_cpu(nw_vaddr);
1880         u32 rkey = ntohl(nw_rkey);
1881         bool found = false;
1882         int link_idx;
1883
1884         for (link_idx = 0; link_idx < SMC_LINKS_PER_LGR_MAX; link_idx++) {
1885                 if (lgr->lnk[link_idx].link_id == link_id) {
1886                         found = true;
1887                         break;
1888                 }
1889         }
1890         if (!found)
1891                 return;
1892         lgr->rtokens[rtok_idx][link_idx].rkey = rkey;
1893         lgr->rtokens[rtok_idx][link_idx].dma_addr = dma_addr;
1894 }
1895
1896 /* add a new rtoken from peer */
1897 int smc_rtoken_add(struct smc_link *lnk, __be64 nw_vaddr, __be32 nw_rkey)
1898 {
1899         struct smc_link_group *lgr = smc_get_lgr(lnk);
1900         u64 dma_addr = be64_to_cpu(nw_vaddr);
1901         u32 rkey = ntohl(nw_rkey);
1902         int i;
1903
1904         for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
1905                 if (lgr->rtokens[i][lnk->link_idx].rkey == rkey &&
1906                     lgr->rtokens[i][lnk->link_idx].dma_addr == dma_addr &&
1907                     test_bit(i, lgr->rtokens_used_mask)) {
1908                         /* already in list */
1909                         return i;
1910                 }
1911         }
1912         i = smc_rmb_reserve_rtoken_idx(lgr);
1913         if (i < 0)
1914                 return i;
1915         lgr->rtokens[i][lnk->link_idx].rkey = rkey;
1916         lgr->rtokens[i][lnk->link_idx].dma_addr = dma_addr;
1917         return i;
1918 }
1919
1920 /* delete an rtoken from all links */
1921 int smc_rtoken_delete(struct smc_link *lnk, __be32 nw_rkey)
1922 {
1923         struct smc_link_group *lgr = smc_get_lgr(lnk);
1924         u32 rkey = ntohl(nw_rkey);
1925         int i, j;
1926
1927         for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
1928                 if (lgr->rtokens[i][lnk->link_idx].rkey == rkey &&
1929                     test_bit(i, lgr->rtokens_used_mask)) {
1930                         for (j = 0; j < SMC_LINKS_PER_LGR_MAX; j++) {
1931                                 lgr->rtokens[i][j].rkey = 0;
1932                                 lgr->rtokens[i][j].dma_addr = 0;
1933                         }
1934                         clear_bit(i, lgr->rtokens_used_mask);
1935                         return 0;
1936                 }
1937         }
1938         return -ENOENT;
1939 }
1940
1941 /* save rkey and dma_addr received from peer during clc handshake */
1942 int smc_rmb_rtoken_handling(struct smc_connection *conn,
1943                             struct smc_link *lnk,
1944                             struct smc_clc_msg_accept_confirm *clc)
1945 {
1946         conn->rtoken_idx = smc_rtoken_add(lnk, clc->rmb_dma_addr,
1947                                           clc->rmb_rkey);
1948         if (conn->rtoken_idx < 0)
1949                 return conn->rtoken_idx;
1950         return 0;
1951 }
1952
1953 static void smc_core_going_away(void)
1954 {
1955         struct smc_ib_device *smcibdev;
1956         struct smcd_dev *smcd;
1957
1958         spin_lock(&smc_ib_devices.lock);
1959         list_for_each_entry(smcibdev, &smc_ib_devices.list, list) {
1960                 int i;
1961
1962                 for (i = 0; i < SMC_MAX_PORTS; i++)
1963                         set_bit(i, smcibdev->ports_going_away);
1964         }
1965         spin_unlock(&smc_ib_devices.lock);
1966
1967         spin_lock(&smcd_dev_list.lock);
1968         list_for_each_entry(smcd, &smcd_dev_list.list, list) {
1969                 smcd->going_away = 1;
1970         }
1971         spin_unlock(&smcd_dev_list.lock);
1972 }
1973
1974 /* Clean up all SMC link groups */
1975 static void smc_lgrs_shutdown(void)
1976 {
1977         struct smcd_dev *smcd;
1978
1979         smc_core_going_away();
1980
1981         smc_smcr_terminate_all(NULL);
1982
1983         spin_lock(&smcd_dev_list.lock);
1984         list_for_each_entry(smcd, &smcd_dev_list.list, list)
1985                 smc_smcd_terminate_all(smcd);
1986         spin_unlock(&smcd_dev_list.lock);
1987 }
1988
1989 static int smc_core_reboot_event(struct notifier_block *this,
1990                                  unsigned long event, void *ptr)
1991 {
1992         smc_lgrs_shutdown();
1993         smc_ib_unregister_client();
1994         return 0;
1995 }
1996
1997 static struct notifier_block smc_reboot_notifier = {
1998         .notifier_call = smc_core_reboot_event,
1999 };
2000
2001 int __init smc_core_init(void)
2002 {
2003         return register_reboot_notifier(&smc_reboot_notifier);
2004 }
2005
2006 /* Called (from smc_exit) when module is removed */
2007 void smc_core_exit(void)
2008 {
2009         unregister_reboot_notifier(&smc_reboot_notifier);
2010         smc_lgrs_shutdown();
2011 }
This page took 0.144846 seconds and 4 git commands to generate.