]>
Commit | Line | Data |
---|---|---|
b2441318 | 1 | // SPDX-License-Identifier: GPL-2.0 |
952310cc UB |
2 | /* |
3 | * Shared Memory Communications over RDMA (SMC-R) and RoCE | |
4 | * | |
5 | * Manage RMBE | |
6 | * copy new RMBE data into user space | |
7 | * | |
8 | * Copyright IBM Corp. 2016 | |
9 | * | |
10 | * Author(s): Ursula Braun <[email protected]> | |
11 | */ | |
12 | ||
13 | #include <linux/net.h> | |
14 | #include <linux/rcupdate.h> | |
c3edc401 IM |
15 | #include <linux/sched/signal.h> |
16 | ||
952310cc UB |
17 | #include <net/sock.h> |
18 | ||
19 | #include "smc.h" | |
20 | #include "smc_core.h" | |
21 | #include "smc_cdc.h" | |
22 | #include "smc_tx.h" /* smc_tx_consumer_update() */ | |
23 | #include "smc_rx.h" | |
24 | ||
b51fa1b1 | 25 | /* callback implementation to wakeup consumers blocked with smc_rx_wait(). |
952310cc UB |
26 | * indirectly called by smc_cdc_msg_recv_action(). |
27 | */ | |
b51fa1b1 | 28 | static void smc_rx_wake_up(struct sock *sk) |
952310cc UB |
29 | { |
30 | struct socket_wq *wq; | |
31 | ||
32 | /* derived from sock_def_readable() */ | |
33 | /* called already in smc_listen_work() */ | |
34 | rcu_read_lock(); | |
35 | wq = rcu_dereference(sk->sk_wq); | |
36 | if (skwq_has_sleeper(wq)) | |
a9a08845 LT |
37 | wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN | EPOLLPRI | |
38 | EPOLLRDNORM | EPOLLRDBAND); | |
90e9517e | 39 | sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); |
952310cc UB |
40 | if ((sk->sk_shutdown == SHUTDOWN_MASK) || |
41 | (sk->sk_state == SMC_CLOSED)) | |
42 | sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP); | |
952310cc UB |
43 | rcu_read_unlock(); |
44 | } | |
45 | ||
9014db20 SR |
46 | /* Update consumer cursor |
47 | * @conn connection to update | |
48 | * @cons consumer cursor | |
49 | * @len number of Bytes consumed | |
de8474eb SR |
50 | * Returns: |
51 | * 1 if we should end our receive, 0 otherwise | |
9014db20 | 52 | */ |
de8474eb SR |
53 | static int smc_rx_update_consumer(struct smc_sock *smc, |
54 | union smc_host_cursor cons, size_t len) | |
9014db20 | 55 | { |
de8474eb SR |
56 | struct smc_connection *conn = &smc->conn; |
57 | struct sock *sk = &smc->sk; | |
58 | bool force = false; | |
59 | int diff, rc = 0; | |
60 | ||
69cb7dc0 | 61 | smc_curs_add(conn->rmb_desc->len, &cons, len); |
de8474eb SR |
62 | |
63 | /* did we process urgent data? */ | |
64 | if (conn->urg_state == SMC_URG_VALID || conn->urg_rx_skip_pend) { | |
65 | diff = smc_curs_comp(conn->rmb_desc->len, &cons, | |
66 | &conn->urg_curs); | |
67 | if (sock_flag(sk, SOCK_URGINLINE)) { | |
68 | if (diff == 0) { | |
69 | force = true; | |
70 | rc = 1; | |
71 | conn->urg_state = SMC_URG_READ; | |
72 | } | |
73 | } else { | |
74 | if (diff == 1) { | |
75 | /* skip urgent byte */ | |
76 | force = true; | |
77 | smc_curs_add(conn->rmb_desc->len, &cons, 1); | |
78 | conn->urg_rx_skip_pend = false; | |
79 | } else if (diff < -1) | |
80 | /* we read past urgent byte */ | |
81 | conn->urg_state = SMC_URG_READ; | |
82 | } | |
83 | } | |
84 | ||
bac6de7b | 85 | smc_curs_copy(&conn->local_tx_ctrl.cons, &cons, conn); |
de8474eb | 86 | |
9014db20 SR |
87 | /* send consumer cursor update if required */ |
88 | /* similar to advertising new TCP rcv_wnd if required */ | |
de8474eb SR |
89 | smc_tx_consumer_update(conn, force); |
90 | ||
91 | return rc; | |
92 | } | |
93 | ||
94 | static void smc_rx_update_cons(struct smc_sock *smc, size_t len) | |
95 | { | |
96 | struct smc_connection *conn = &smc->conn; | |
97 | union smc_host_cursor cons; | |
98 | ||
bac6de7b | 99 | smc_curs_copy(&cons, &conn->local_tx_ctrl.cons, conn); |
de8474eb | 100 | smc_rx_update_consumer(smc, cons, len); |
9014db20 SR |
101 | } |
102 | ||
103 | struct smc_spd_priv { | |
104 | struct smc_sock *smc; | |
105 | size_t len; | |
106 | }; | |
107 | ||
108 | static void smc_rx_pipe_buf_release(struct pipe_inode_info *pipe, | |
109 | struct pipe_buffer *buf) | |
110 | { | |
111 | struct smc_spd_priv *priv = (struct smc_spd_priv *)buf->private; | |
112 | struct smc_sock *smc = priv->smc; | |
113 | struct smc_connection *conn; | |
9014db20 SR |
114 | struct sock *sk = &smc->sk; |
115 | ||
116 | if (sk->sk_state == SMC_CLOSED || | |
117 | sk->sk_state == SMC_PEERFINCLOSEWAIT || | |
118 | sk->sk_state == SMC_APPFINCLOSEWAIT) | |
119 | goto out; | |
120 | conn = &smc->conn; | |
121 | lock_sock(sk); | |
de8474eb | 122 | smc_rx_update_cons(smc, priv->len); |
9014db20 SR |
123 | release_sock(sk); |
124 | if (atomic_sub_and_test(priv->len, &conn->splice_pending)) | |
125 | smc_rx_wake_up(sk); | |
126 | out: | |
127 | kfree(priv); | |
128 | put_page(buf->page); | |
129 | sock_put(sk); | |
130 | } | |
131 | ||
132 | static int smc_rx_pipe_buf_nosteal(struct pipe_inode_info *pipe, | |
133 | struct pipe_buffer *buf) | |
134 | { | |
135 | return 1; | |
136 | } | |
137 | ||
138 | static const struct pipe_buf_operations smc_pipe_ops = { | |
139 | .can_merge = 0, | |
140 | .confirm = generic_pipe_buf_confirm, | |
141 | .release = smc_rx_pipe_buf_release, | |
142 | .steal = smc_rx_pipe_buf_nosteal, | |
143 | .get = generic_pipe_buf_get | |
144 | }; | |
145 | ||
146 | static void smc_rx_spd_release(struct splice_pipe_desc *spd, | |
147 | unsigned int i) | |
148 | { | |
149 | put_page(spd->pages[i]); | |
150 | } | |
151 | ||
152 | static int smc_rx_splice(struct pipe_inode_info *pipe, char *src, size_t len, | |
153 | struct smc_sock *smc) | |
154 | { | |
155 | struct splice_pipe_desc spd; | |
156 | struct partial_page partial; | |
157 | struct smc_spd_priv *priv; | |
9014db20 SR |
158 | int bytes; |
159 | ||
9014db20 SR |
160 | priv = kzalloc(sizeof(*priv), GFP_KERNEL); |
161 | if (!priv) | |
162 | return -ENOMEM; | |
163 | priv->len = len; | |
164 | priv->smc = smc; | |
165 | partial.offset = src - (char *)smc->conn.rmb_desc->cpu_addr; | |
166 | partial.len = len; | |
167 | partial.private = (unsigned long)priv; | |
168 | ||
169 | spd.nr_pages_max = 1; | |
170 | spd.nr_pages = 1; | |
48bf5231 | 171 | spd.pages = &smc->conn.rmb_desc->pages; |
9014db20 SR |
172 | spd.partial = &partial; |
173 | spd.ops = &smc_pipe_ops; | |
174 | spd.spd_release = smc_rx_spd_release; | |
175 | ||
176 | bytes = splice_to_pipe(pipe, &spd); | |
177 | if (bytes > 0) { | |
178 | sock_hold(&smc->sk); | |
179 | get_page(smc->conn.rmb_desc->pages); | |
180 | atomic_add(bytes, &smc->conn.splice_pending); | |
181 | } | |
182 | ||
183 | return bytes; | |
184 | } | |
185 | ||
186 | static int smc_rx_data_available_and_no_splice_pend(struct smc_connection *conn) | |
187 | { | |
188 | return atomic_read(&conn->bytes_to_rcv) && | |
189 | !atomic_read(&conn->splice_pending); | |
190 | } | |
191 | ||
952310cc UB |
192 | /* blocks rcvbuf consumer until >=len bytes available or timeout or interrupted |
193 | * @smc smc socket | |
194 | * @timeo pointer to max seconds to wait, pointer to value 0 for no timeout | |
b51fa1b1 | 195 | * @fcrit add'l criterion to evaluate as function pointer |
952310cc UB |
196 | * Returns: |
197 | * 1 if at least 1 byte available in rcvbuf or if socket error/shutdown. | |
198 | * 0 otherwise (nothing in rcvbuf nor timeout, e.g. interrupted). | |
199 | */ | |
b51fa1b1 SR |
200 | int smc_rx_wait(struct smc_sock *smc, long *timeo, |
201 | int (*fcrit)(struct smc_connection *conn)) | |
952310cc UB |
202 | { |
203 | DEFINE_WAIT_FUNC(wait, woken_wake_function); | |
204 | struct smc_connection *conn = &smc->conn; | |
205 | struct sock *sk = &smc->sk; | |
206 | int rc; | |
207 | ||
b51fa1b1 | 208 | if (fcrit(conn)) |
952310cc UB |
209 | return 1; |
210 | sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); | |
211 | add_wait_queue(sk_sleep(sk), &wait); | |
212 | rc = sk_wait_event(sk, timeo, | |
213 | sk->sk_err || | |
214 | sk->sk_shutdown & RCV_SHUTDOWN || | |
b51fa1b1 | 215 | fcrit(conn) || |
952310cc UB |
216 | smc_cdc_rxed_any_close_or_senddone(conn), |
217 | &wait); | |
218 | remove_wait_queue(sk_sleep(sk), &wait); | |
219 | sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); | |
220 | return rc; | |
221 | } | |
222 | ||
de8474eb SR |
223 | static int smc_rx_recv_urg(struct smc_sock *smc, struct msghdr *msg, int len, |
224 | int flags) | |
225 | { | |
226 | struct smc_connection *conn = &smc->conn; | |
227 | union smc_host_cursor cons; | |
228 | struct sock *sk = &smc->sk; | |
229 | int rc = 0; | |
230 | ||
231 | if (sock_flag(sk, SOCK_URGINLINE) || | |
232 | !(conn->urg_state == SMC_URG_VALID) || | |
233 | conn->urg_state == SMC_URG_READ) | |
234 | return -EINVAL; | |
235 | ||
236 | if (conn->urg_state == SMC_URG_VALID) { | |
237 | if (!(flags & MSG_PEEK)) | |
238 | smc->conn.urg_state = SMC_URG_READ; | |
239 | msg->msg_flags |= MSG_OOB; | |
240 | if (len > 0) { | |
241 | if (!(flags & MSG_TRUNC)) | |
242 | rc = memcpy_to_msg(msg, &conn->urg_rx_byte, 1); | |
243 | len = 1; | |
bac6de7b | 244 | smc_curs_copy(&cons, &conn->local_tx_ctrl.cons, conn); |
de8474eb SR |
245 | if (smc_curs_diff(conn->rmb_desc->len, &cons, |
246 | &conn->urg_curs) > 1) | |
247 | conn->urg_rx_skip_pend = true; | |
248 | /* Urgent Byte was already accounted for, but trigger | |
249 | * skipping the urgent byte in non-inline case | |
250 | */ | |
251 | if (!(flags & MSG_PEEK)) | |
252 | smc_rx_update_consumer(smc, cons, 0); | |
253 | } else { | |
254 | msg->msg_flags |= MSG_TRUNC; | |
255 | } | |
256 | ||
257 | return rc ? -EFAULT : len; | |
258 | } | |
259 | ||
260 | if (sk->sk_state == SMC_CLOSED || sk->sk_shutdown & RCV_SHUTDOWN) | |
261 | return 0; | |
262 | ||
263 | return -EAGAIN; | |
264 | } | |
265 | ||
9014db20 SR |
266 | /* smc_rx_recvmsg - receive data from RMBE |
267 | * @msg: copy data to receive buffer | |
268 | * @pipe: copy data to pipe if set - indicates splice() call | |
269 | * | |
270 | * rcvbuf consumer: main API called by socket layer. | |
271 | * Called under sk lock. | |
952310cc | 272 | */ |
9014db20 SR |
273 | int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg, |
274 | struct pipe_inode_info *pipe, size_t len, int flags) | |
952310cc UB |
275 | { |
276 | size_t copylen, read_done = 0, read_remaining = len; | |
277 | size_t chunk_len, chunk_off, chunk_len_sum; | |
278 | struct smc_connection *conn = &smc->conn; | |
9014db20 | 279 | int (*func)(struct smc_connection *conn); |
952310cc UB |
280 | union smc_host_cursor cons; |
281 | int readable, chunk; | |
282 | char *rcvbuf_base; | |
283 | struct sock *sk; | |
9014db20 | 284 | int splbytes; |
952310cc UB |
285 | long timeo; |
286 | int target; /* Read at least these many bytes */ | |
287 | int rc; | |
288 | ||
289 | if (unlikely(flags & MSG_ERRQUEUE)) | |
290 | return -EINVAL; /* future work for sk.sk_family == AF_SMC */ | |
952310cc UB |
291 | |
292 | sk = &smc->sk; | |
293 | if (sk->sk_state == SMC_LISTEN) | |
294 | return -ENOTCONN; | |
de8474eb SR |
295 | if (flags & MSG_OOB) |
296 | return smc_rx_recv_urg(smc, msg, len, flags); | |
952310cc UB |
297 | timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); |
298 | target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); | |
299 | ||
952310cc | 300 | /* we currently use 1 RMBE per RMB, so RMBE == RMB base addr */ |
be244f28 | 301 | rcvbuf_base = conn->rx_off + conn->rmb_desc->cpu_addr; |
952310cc UB |
302 | |
303 | do { /* while (read_remaining) */ | |
9014db20 | 304 | if (read_done >= target || (pipe && read_done)) |
952310cc UB |
305 | break; |
306 | ||
307 | if (atomic_read(&conn->bytes_to_rcv)) | |
308 | goto copy; | |
de8474eb SR |
309 | else if (conn->urg_state == SMC_URG_VALID) |
310 | /* we received a single urgent Byte - skip */ | |
311 | smc_rx_update_cons(smc, 0); | |
952310cc | 312 | |
c8b8ec8e SR |
313 | if (sk->sk_shutdown & RCV_SHUTDOWN || |
314 | smc_cdc_rxed_any_close_or_senddone(conn) || | |
315 | conn->local_tx_ctrl.conn_state_flags.peer_conn_abort) | |
316 | break; | |
317 | ||
952310cc UB |
318 | if (read_done) { |
319 | if (sk->sk_err || | |
320 | sk->sk_state == SMC_CLOSED || | |
952310cc | 321 | !timeo || |
c8b8ec8e | 322 | signal_pending(current)) |
952310cc UB |
323 | break; |
324 | } else { | |
952310cc UB |
325 | if (sk->sk_err) { |
326 | read_done = sock_error(sk); | |
327 | break; | |
328 | } | |
952310cc UB |
329 | if (sk->sk_state == SMC_CLOSED) { |
330 | if (!sock_flag(sk, SOCK_DONE)) { | |
331 | /* This occurs when user tries to read | |
332 | * from never connected socket. | |
333 | */ | |
334 | read_done = -ENOTCONN; | |
335 | break; | |
336 | } | |
337 | break; | |
338 | } | |
339 | if (signal_pending(current)) { | |
340 | read_done = sock_intr_errno(timeo); | |
341 | break; | |
342 | } | |
846e344e HW |
343 | if (!timeo) |
344 | return -EAGAIN; | |
952310cc UB |
345 | } |
346 | ||
b51fa1b1 SR |
347 | if (!smc_rx_data_available(conn)) { |
348 | smc_rx_wait(smc, &timeo, smc_rx_data_available); | |
952310cc UB |
349 | continue; |
350 | } | |
351 | ||
352 | copy: | |
353 | /* initialize variables for 1st iteration of subsequent loop */ | |
b51fa1b1 | 354 | /* could be just 1 byte, even after waiting on data above */ |
952310cc | 355 | readable = atomic_read(&conn->bytes_to_rcv); |
9014db20 SR |
356 | splbytes = atomic_read(&conn->splice_pending); |
357 | if (!readable || (msg && splbytes)) { | |
358 | if (splbytes) | |
359 | func = smc_rx_data_available_and_no_splice_pend; | |
360 | else | |
361 | func = smc_rx_data_available; | |
362 | smc_rx_wait(smc, &timeo, func); | |
363 | continue; | |
364 | } | |
365 | ||
bac6de7b | 366 | smc_curs_copy(&cons, &conn->local_tx_ctrl.cons, conn); |
9014db20 SR |
367 | /* subsequent splice() calls pick up where previous left */ |
368 | if (splbytes) | |
69cb7dc0 | 369 | smc_curs_add(conn->rmb_desc->len, &cons, splbytes); |
de8474eb SR |
370 | if (conn->urg_state == SMC_URG_VALID && |
371 | sock_flag(&smc->sk, SOCK_URGINLINE) && | |
372 | readable > 1) | |
373 | readable--; /* always stop at urgent Byte */ | |
374 | /* not more than what user space asked for */ | |
375 | copylen = min_t(size_t, read_remaining, readable); | |
952310cc UB |
376 | /* determine chunks where to read from rcvbuf */ |
377 | /* either unwrapped case, or 1st chunk of wrapped case */ | |
69cb7dc0 HW |
378 | chunk_len = min_t(size_t, copylen, conn->rmb_desc->len - |
379 | cons.count); | |
952310cc UB |
380 | chunk_len_sum = chunk_len; |
381 | chunk_off = cons.count; | |
10428dd8 | 382 | smc_rmb_sync_sg_for_cpu(conn); |
952310cc UB |
383 | for (chunk = 0; chunk < 2; chunk++) { |
384 | if (!(flags & MSG_TRUNC)) { | |
9014db20 SR |
385 | if (msg) { |
386 | rc = memcpy_to_msg(msg, rcvbuf_base + | |
387 | chunk_off, | |
388 | chunk_len); | |
389 | } else { | |
390 | rc = smc_rx_splice(pipe, rcvbuf_base + | |
391 | chunk_off, chunk_len, | |
392 | smc); | |
393 | } | |
394 | if (rc < 0) { | |
952310cc UB |
395 | if (!read_done) |
396 | read_done = -EFAULT; | |
10428dd8 | 397 | smc_rmb_sync_sg_for_device(conn); |
952310cc UB |
398 | goto out; |
399 | } | |
400 | } | |
401 | read_remaining -= chunk_len; | |
402 | read_done += chunk_len; | |
403 | ||
404 | if (chunk_len_sum == copylen) | |
405 | break; /* either on 1st or 2nd iteration */ | |
406 | /* prepare next (== 2nd) iteration */ | |
407 | chunk_len = copylen - chunk_len; /* remainder */ | |
408 | chunk_len_sum += chunk_len; | |
409 | chunk_off = 0; /* modulo offset in recv ring buffer */ | |
410 | } | |
10428dd8 | 411 | smc_rmb_sync_sg_for_device(conn); |
952310cc UB |
412 | |
413 | /* update cursors */ | |
414 | if (!(flags & MSG_PEEK)) { | |
952310cc UB |
415 | /* increased in recv tasklet smc_cdc_msg_rcv() */ |
416 | smp_mb__before_atomic(); | |
417 | atomic_sub(copylen, &conn->bytes_to_rcv); | |
69cb7dc0 | 418 | /* guarantee 0 <= bytes_to_rcv <= rmb_desc->len */ |
952310cc | 419 | smp_mb__after_atomic(); |
de8474eb SR |
420 | if (msg && smc_rx_update_consumer(smc, cons, copylen)) |
421 | goto out; | |
952310cc UB |
422 | } |
423 | } while (read_remaining); | |
424 | out: | |
425 | return read_done; | |
426 | } | |
427 | ||
428 | /* Initialize receive properties on connection establishment. NB: not __init! */ | |
429 | void smc_rx_init(struct smc_sock *smc) | |
430 | { | |
b51fa1b1 | 431 | smc->sk.sk_data_ready = smc_rx_wake_up; |
9014db20 | 432 | atomic_set(&smc->conn.splice_pending, 0); |
de8474eb | 433 | smc->conn.urg_state = SMC_URG_READ; |
952310cc | 434 | } |