2 * Shared Memory Communications over RDMA (SMC-R) and RoCE
4 * Socket Closing - normal and abnormal
6 * Copyright IBM Corp. 2016
11 #include <linux/workqueue.h>
17 #include "smc_close.h"
19 #define SMC_CLOSE_WAIT_TX_PENDS_TIME (5 * HZ)
21 static void smc_close_cleanup_listen(struct sock *parent)
25 /* Close non-accepted connections */
26 while ((sk = smc_accept_dequeue(parent, NULL)))
27 smc_close_non_accepted(sk);
30 static void smc_close_wait_tx_pends(struct smc_sock *smc)
32 DEFINE_WAIT_FUNC(wait, woken_wake_function);
33 struct sock *sk = &smc->sk;
36 timeout = SMC_CLOSE_WAIT_TX_PENDS_TIME;
37 add_wait_queue(sk_sleep(sk), &wait);
38 while (!signal_pending(current) && timeout) {
41 rc = sk_wait_event(sk, &timeout,
42 !smc_cdc_tx_has_pending(&smc->conn),
47 remove_wait_queue(sk_sleep(sk), &wait);
50 /* wait for sndbuf data being transmitted */
51 static void smc_close_stream_wait(struct smc_sock *smc, long timeout)
53 DEFINE_WAIT_FUNC(wait, woken_wake_function);
54 struct sock *sk = &smc->sk;
59 if (!smc_tx_prepared_sends(&smc->conn))
62 smc->wait_close_tx_prepared = 1;
63 add_wait_queue(sk_sleep(sk), &wait);
64 while (!signal_pending(current) && timeout) {
67 rc = sk_wait_event(sk, &timeout,
68 !smc_tx_prepared_sends(&smc->conn) ||
69 (sk->sk_err == ECONNABORTED) ||
70 (sk->sk_err == ECONNRESET),
75 remove_wait_queue(sk_sleep(sk), &wait);
76 smc->wait_close_tx_prepared = 0;
79 void smc_close_wake_tx_prepared(struct smc_sock *smc)
81 if (smc->wait_close_tx_prepared)
82 /* wake up socket closing */
83 smc->sk.sk_state_change(&smc->sk);
86 static int smc_close_wr(struct smc_connection *conn)
88 conn->local_tx_ctrl.conn_state_flags.peer_done_writing = 1;
90 return smc_cdc_get_slot_and_msg_send(conn);
93 static int smc_close_final(struct smc_connection *conn)
95 if (atomic_read(&conn->bytes_to_rcv))
96 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
98 conn->local_tx_ctrl.conn_state_flags.peer_conn_closed = 1;
100 return smc_cdc_get_slot_and_msg_send(conn);
103 static int smc_close_abort(struct smc_connection *conn)
105 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
107 return smc_cdc_get_slot_and_msg_send(conn);
110 /* terminate smc socket abnormally - active abort
111 * RDMA communication no longer possible
113 void smc_close_active_abort(struct smc_sock *smc)
115 struct smc_cdc_conn_state_flags *txflags =
116 &smc->conn.local_tx_ctrl.conn_state_flags;
118 bh_lock_sock(&smc->sk);
119 smc->sk.sk_err = ECONNABORTED;
120 if (smc->clcsock && smc->clcsock->sk) {
121 smc->clcsock->sk->sk_err = ECONNABORTED;
122 smc->clcsock->sk->sk_state_change(smc->clcsock->sk);
124 switch (smc->sk.sk_state) {
126 smc->sk.sk_state = SMC_PEERABORTWAIT;
128 case SMC_APPCLOSEWAIT1:
129 case SMC_APPCLOSEWAIT2:
130 txflags->peer_conn_abort = 1;
131 sock_release(smc->clcsock);
132 if (!smc_cdc_rxed_any_close(&smc->conn))
133 smc->sk.sk_state = SMC_PEERABORTWAIT;
135 smc->sk.sk_state = SMC_CLOSED;
137 case SMC_PEERCLOSEWAIT1:
138 case SMC_PEERCLOSEWAIT2:
139 if (!txflags->peer_conn_closed) {
140 smc->sk.sk_state = SMC_PEERABORTWAIT;
141 txflags->peer_conn_abort = 1;
142 sock_release(smc->clcsock);
144 smc->sk.sk_state = SMC_CLOSED;
147 case SMC_PROCESSABORT:
148 case SMC_APPFINCLOSEWAIT:
149 if (!txflags->peer_conn_closed) {
150 txflags->peer_conn_abort = 1;
151 sock_release(smc->clcsock);
153 smc->sk.sk_state = SMC_CLOSED;
155 case SMC_PEERFINCLOSEWAIT:
156 case SMC_PEERABORTWAIT:
161 sock_set_flag(&smc->sk, SOCK_DEAD);
162 bh_unlock_sock(&smc->sk);
163 smc->sk.sk_state_change(&smc->sk);
166 int smc_close_active(struct smc_sock *smc)
168 struct smc_cdc_conn_state_flags *txflags =
169 &smc->conn.local_tx_ctrl.conn_state_flags;
170 long timeout = SMC_MAX_STREAM_WAIT_TIMEOUT;
171 struct smc_connection *conn = &smc->conn;
172 struct sock *sk = &smc->sk;
176 if (sock_flag(sk, SOCK_LINGER) &&
177 !(current->flags & PF_EXITING))
178 timeout = sk->sk_lingertime;
181 old_state = sk->sk_state;
184 sk->sk_state = SMC_CLOSED;
185 if (smc->smc_listen_work.func)
186 flush_work(&smc->smc_listen_work);
190 sk->sk_state = SMC_CLOSED;
191 sk->sk_state_change(sk); /* wake up accept */
192 if (smc->clcsock && smc->clcsock->sk) {
193 rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR);
194 /* wake up kernel_accept of smc_tcp_listen_worker */
195 smc->clcsock->sk->sk_data_ready(smc->clcsock->sk);
198 smc_close_cleanup_listen(sk);
199 flush_work(&smc->tcp_listen_work);
203 smc_close_stream_wait(smc, timeout);
205 cancel_work_sync(&conn->tx_work);
207 if (sk->sk_state == SMC_ACTIVE) {
208 /* send close request */
209 rc = smc_close_final(conn);
210 sk->sk_state = SMC_PEERCLOSEWAIT1;
212 /* peer event has changed the state */
216 case SMC_APPFINCLOSEWAIT:
217 /* socket already shutdown wr or both (active close) */
218 if (txflags->peer_done_writing &&
219 !txflags->peer_conn_closed) {
220 /* just shutdown wr done, send close request */
221 rc = smc_close_final(conn);
223 sk->sk_state = SMC_CLOSED;
224 smc_close_wait_tx_pends(smc);
226 case SMC_APPCLOSEWAIT1:
227 case SMC_APPCLOSEWAIT2:
228 if (!smc_cdc_rxed_any_close(conn))
229 smc_close_stream_wait(smc, timeout);
231 cancel_work_sync(&conn->tx_work);
233 if (sk->sk_err != ECONNABORTED) {
234 /* confirm close from peer */
235 rc = smc_close_final(conn);
239 if (smc_cdc_rxed_any_close(conn))
240 /* peer has closed the socket already */
241 sk->sk_state = SMC_CLOSED;
243 /* peer has just issued a shutdown write */
244 sk->sk_state = SMC_PEERFINCLOSEWAIT;
245 smc_close_wait_tx_pends(smc);
247 case SMC_PEERCLOSEWAIT1:
248 case SMC_PEERCLOSEWAIT2:
249 case SMC_PEERFINCLOSEWAIT:
250 /* peer sending PeerConnectionClosed will cause transition */
252 case SMC_PROCESSABORT:
253 cancel_work_sync(&conn->tx_work);
254 smc_close_abort(conn);
255 sk->sk_state = SMC_CLOSED;
256 smc_close_wait_tx_pends(smc);
258 case SMC_PEERABORTWAIT:
260 /* nothing to do, add tracing in future patch */
264 if (old_state != sk->sk_state)
265 sk->sk_state_change(&smc->sk);
269 static void smc_close_passive_abort_received(struct smc_sock *smc)
271 struct smc_cdc_conn_state_flags *txflags =
272 &smc->conn.local_tx_ctrl.conn_state_flags;
273 struct sock *sk = &smc->sk;
275 switch (sk->sk_state) {
277 case SMC_APPFINCLOSEWAIT:
278 case SMC_APPCLOSEWAIT1:
279 case SMC_APPCLOSEWAIT2:
280 smc_close_abort(&smc->conn);
281 sk->sk_state = SMC_PROCESSABORT;
283 case SMC_PEERCLOSEWAIT1:
284 case SMC_PEERCLOSEWAIT2:
285 if (txflags->peer_done_writing &&
286 !txflags->peer_conn_closed) {
287 /* just shutdown, but not yet closed locally */
288 smc_close_abort(&smc->conn);
289 sk->sk_state = SMC_PROCESSABORT;
291 sk->sk_state = SMC_CLOSED;
294 case SMC_PEERFINCLOSEWAIT:
295 case SMC_PEERABORTWAIT:
296 sk->sk_state = SMC_CLOSED;
299 case SMC_PROCESSABORT:
300 /* nothing to do, add tracing in future patch */
305 /* Some kind of closing has been received: peer_conn_closed, peer_conn_abort,
306 * or peer_done_writing.
307 * Called under tasklet context.
309 void smc_close_passive_received(struct smc_sock *smc)
311 struct smc_cdc_conn_state_flags *rxflags =
312 &smc->conn.local_rx_ctrl.conn_state_flags;
313 struct sock *sk = &smc->sk;
316 sk->sk_shutdown |= RCV_SHUTDOWN;
317 if (smc->clcsock && smc->clcsock->sk)
318 smc->clcsock->sk->sk_shutdown |= RCV_SHUTDOWN;
319 sock_set_flag(&smc->sk, SOCK_DONE);
321 old_state = sk->sk_state;
323 if (rxflags->peer_conn_abort) {
324 smc_close_passive_abort_received(smc);
328 switch (sk->sk_state) {
330 if (atomic_read(&smc->conn.bytes_to_rcv) ||
331 (rxflags->peer_done_writing &&
332 !rxflags->peer_conn_closed))
333 sk->sk_state = SMC_APPCLOSEWAIT1;
335 sk->sk_state = SMC_CLOSED;
338 sk->sk_state = SMC_APPCLOSEWAIT1;
340 case SMC_PEERCLOSEWAIT1:
341 if (rxflags->peer_done_writing)
342 sk->sk_state = SMC_PEERCLOSEWAIT2;
343 /* fall through to check for closing */
344 case SMC_PEERCLOSEWAIT2:
345 case SMC_PEERFINCLOSEWAIT:
346 if (!smc_cdc_rxed_any_close(&smc->conn))
348 if (sock_flag(sk, SOCK_DEAD) &&
349 (sk->sk_shutdown == SHUTDOWN_MASK)) {
350 /* smc_release has already been called locally */
351 sk->sk_state = SMC_CLOSED;
353 /* just shutdown, but not yet closed locally */
354 sk->sk_state = SMC_APPFINCLOSEWAIT;
357 case SMC_APPCLOSEWAIT1:
358 case SMC_APPCLOSEWAIT2:
359 case SMC_APPFINCLOSEWAIT:
360 case SMC_PEERABORTWAIT:
361 case SMC_PROCESSABORT:
363 /* nothing to do, add tracing in future patch */
368 if (old_state != sk->sk_state)
369 sk->sk_state_change(sk);
370 sk->sk_data_ready(sk); /* wakeup blocked rcvbuf consumers */
371 sk->sk_write_space(sk); /* wakeup blocked sndbuf producers */
373 if ((sk->sk_state == SMC_CLOSED) &&
374 (sock_flag(sk, SOCK_DEAD) || (old_state == SMC_INIT))) {
375 smc_conn_free(&smc->conn);
376 schedule_delayed_work(&smc->sock_put_work,
377 SMC_CLOSE_SOCK_PUT_DELAY);
381 void smc_close_sock_put_work(struct work_struct *work)
383 struct smc_sock *smc = container_of(to_delayed_work(work),
387 smc->sk.sk_prot->unhash(&smc->sk);
391 int smc_close_shutdown_write(struct smc_sock *smc)
393 struct smc_connection *conn = &smc->conn;
394 long timeout = SMC_MAX_STREAM_WAIT_TIMEOUT;
395 struct sock *sk = &smc->sk;
399 if (sock_flag(sk, SOCK_LINGER))
400 timeout = sk->sk_lingertime;
403 old_state = sk->sk_state;
406 smc_close_stream_wait(smc, timeout);
408 cancel_work_sync(&conn->tx_work);
410 /* send close wr request */
411 rc = smc_close_wr(conn);
412 if (sk->sk_state == SMC_ACTIVE)
413 sk->sk_state = SMC_PEERCLOSEWAIT1;
417 case SMC_APPCLOSEWAIT1:
419 if (!smc_cdc_rxed_any_close(conn))
420 smc_close_stream_wait(smc, timeout);
422 cancel_work_sync(&conn->tx_work);
424 /* confirm close from peer */
425 rc = smc_close_wr(conn);
426 sk->sk_state = SMC_APPCLOSEWAIT2;
428 case SMC_APPCLOSEWAIT2:
429 case SMC_PEERFINCLOSEWAIT:
430 case SMC_PEERCLOSEWAIT1:
431 case SMC_PEERCLOSEWAIT2:
432 case SMC_APPFINCLOSEWAIT:
433 case SMC_PROCESSABORT:
434 case SMC_PEERABORTWAIT:
435 /* nothing to do, add tracing in future patch */
439 if (old_state != sk->sk_state)
440 sk->sk_state_change(&smc->sk);