1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/kernel.h>
3 #include <linux/errno.h>
4 #include <linux/file.h>
5 #include <linux/slab.h>
7 #include <linux/compat.h>
8 #include <net/compat.h>
9 #include <linux/io_uring.h>
11 #include <uapi/linux/io_uring.h>
15 #include "alloc_cache.h"
20 #if defined(CONFIG_NET)
28 struct sockaddr __user *addr;
48 struct sockaddr __user *addr;
51 bool seen_econnaborted;
67 struct compat_msghdr __user *umsg_compat;
68 struct user_msghdr __user *umsg;
74 unsigned nr_multishot_loops;
76 /* initialised and used only by !msg send variants */
79 void __user *msg_control;
80 /* used only for send zerocopy */
81 struct io_kiocb *notif;
85 * Number of times we'll try and do receives if there's more data. If we
86 * exceed this limit, then add us to the back of the queue and retry from
87 * there. This helps fairness between flooding clients.
89 #define MULTISHOT_MAX_RETRY 32
91 int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
93 struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown);
95 if (unlikely(sqe->off || sqe->addr || sqe->rw_flags ||
96 sqe->buf_index || sqe->splice_fd_in))
99 shutdown->how = READ_ONCE(sqe->len);
100 req->flags |= REQ_F_FORCE_ASYNC;
104 int io_shutdown(struct io_kiocb *req, unsigned int issue_flags)
106 struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown);
110 WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK);
112 sock = sock_from_file(req->file);
116 ret = __sys_shutdown_sock(sock, shutdown->how);
117 io_req_set_res(req, ret, 0);
121 static bool io_net_retry(struct socket *sock, int flags)
123 if (!(flags & MSG_WAITALL))
125 return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET;
128 static void io_netmsg_iovec_free(struct io_async_msghdr *kmsg)
130 if (kmsg->free_iov) {
131 kfree(kmsg->free_iov);
132 kmsg->free_iov_nr = 0;
133 kmsg->free_iov = NULL;
137 static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags)
139 struct io_async_msghdr *hdr = req->async_data;
142 /* can't recycle, ensure we free the iovec if we have one */
143 if (unlikely(issue_flags & IO_URING_F_UNLOCKED)) {
144 io_netmsg_iovec_free(hdr);
148 /* Let normal cleanup path reap it if we fail adding to the cache */
150 if (io_alloc_cache_put(&req->ctx->netmsg_cache, hdr)) {
152 kasan_mempool_poison_object(iov);
153 req->async_data = NULL;
154 req->flags &= ~REQ_F_ASYNC_DATA;
158 static struct io_async_msghdr *io_msg_alloc_async(struct io_kiocb *req)
160 struct io_ring_ctx *ctx = req->ctx;
161 struct io_async_msghdr *hdr;
163 hdr = io_alloc_cache_get(&ctx->netmsg_cache);
166 kasan_mempool_unpoison_object(hdr->free_iov,
167 hdr->free_iov_nr * sizeof(struct iovec));
168 req->flags |= REQ_F_NEED_CLEANUP;
170 req->flags |= REQ_F_ASYNC_DATA;
171 req->async_data = hdr;
175 if (!io_alloc_async_data(req)) {
176 hdr = req->async_data;
177 hdr->free_iov_nr = 0;
178 hdr->free_iov = NULL;
184 /* assign new iovec to kmsg, if we need to */
185 static int io_net_vec_assign(struct io_kiocb *req, struct io_async_msghdr *kmsg,
189 req->flags |= REQ_F_NEED_CLEANUP;
190 kmsg->free_iov_nr = kmsg->msg.msg_iter.nr_segs;
192 kfree(kmsg->free_iov);
193 kmsg->free_iov = iov;
198 static inline void io_mshot_prep_retry(struct io_kiocb *req,
199 struct io_async_msghdr *kmsg)
201 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
203 req->flags &= ~REQ_F_BL_EMPTY;
205 sr->len = 0; /* get from the provided buffer */
206 req->buf_index = sr->buf_group;
210 static int io_compat_msg_copy_hdr(struct io_kiocb *req,
211 struct io_async_msghdr *iomsg,
212 struct compat_msghdr *msg, int ddir)
214 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
215 struct compat_iovec __user *uiov;
219 if (iomsg->free_iov) {
220 nr_segs = iomsg->free_iov_nr;
221 iov = iomsg->free_iov;
223 iov = &iomsg->fast_iov;
227 if (copy_from_user(msg, sr->umsg_compat, sizeof(*msg)))
230 uiov = compat_ptr(msg->msg_iov);
231 if (req->flags & REQ_F_BUFFER_SELECT) {
234 if (msg->msg_iovlen == 0) {
235 sr->len = iov->iov_len = 0;
236 iov->iov_base = NULL;
237 } else if (msg->msg_iovlen > 1) {
240 if (!access_ok(uiov, sizeof(*uiov)))
242 if (__get_user(clen, &uiov->iov_len))
252 ret = __import_iovec(ddir, (struct iovec __user *)uiov, msg->msg_iovlen,
253 nr_segs, &iov, &iomsg->msg.msg_iter, true);
254 if (unlikely(ret < 0))
257 return io_net_vec_assign(req, iomsg, iov);
261 static int io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg,
262 struct user_msghdr *msg, int ddir)
264 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
265 struct user_msghdr __user *umsg = sr->umsg;
269 if (iomsg->free_iov) {
270 nr_segs = iomsg->free_iov_nr;
271 iov = iomsg->free_iov;
273 iov = &iomsg->fast_iov;
277 if (!user_access_begin(umsg, sizeof(*umsg)))
281 unsafe_get_user(msg->msg_name, &umsg->msg_name, ua_end);
282 unsafe_get_user(msg->msg_namelen, &umsg->msg_namelen, ua_end);
283 unsafe_get_user(msg->msg_iov, &umsg->msg_iov, ua_end);
284 unsafe_get_user(msg->msg_iovlen, &umsg->msg_iovlen, ua_end);
285 unsafe_get_user(msg->msg_control, &umsg->msg_control, ua_end);
286 unsafe_get_user(msg->msg_controllen, &umsg->msg_controllen, ua_end);
289 if (req->flags & REQ_F_BUFFER_SELECT) {
290 if (msg->msg_iovlen == 0) {
291 sr->len = iov->iov_len = 0;
292 iov->iov_base = NULL;
293 } else if (msg->msg_iovlen > 1) {
297 /* we only need the length for provided buffers */
298 if (!access_ok(&msg->msg_iov[0].iov_len, sizeof(__kernel_size_t)))
300 unsafe_get_user(iov->iov_len, &msg->msg_iov[0].iov_len,
302 sr->len = iov->iov_len;
311 ret = __import_iovec(ddir, msg->msg_iov, msg->msg_iovlen, nr_segs,
312 &iov, &iomsg->msg.msg_iter, false);
313 if (unlikely(ret < 0))
316 return io_net_vec_assign(req, iomsg, iov);
319 static int io_sendmsg_copy_hdr(struct io_kiocb *req,
320 struct io_async_msghdr *iomsg)
322 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
323 struct user_msghdr msg;
326 iomsg->msg.msg_name = &iomsg->addr;
327 iomsg->msg.msg_iter.nr_segs = 0;
330 if (unlikely(req->ctx->compat)) {
331 struct compat_msghdr cmsg;
333 ret = io_compat_msg_copy_hdr(req, iomsg, &cmsg, ITER_SOURCE);
337 return __get_compat_msghdr(&iomsg->msg, &cmsg, NULL);
341 ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_SOURCE);
345 ret = __copy_msghdr(&iomsg->msg, &msg, NULL);
347 /* save msg_control as sys_sendmsg() overwrites it */
348 sr->msg_control = iomsg->msg.msg_control_user;
352 void io_sendmsg_recvmsg_cleanup(struct io_kiocb *req)
354 struct io_async_msghdr *io = req->async_data;
356 io_netmsg_iovec_free(io);
359 static int io_send_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe)
361 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
362 struct io_async_msghdr *kmsg = req->async_data;
367 sr->buf = u64_to_user_ptr(READ_ONCE(sqe->addr));
369 if (READ_ONCE(sqe->__pad3[0]))
372 kmsg->msg.msg_name = NULL;
373 kmsg->msg.msg_namelen = 0;
374 kmsg->msg.msg_control = NULL;
375 kmsg->msg.msg_controllen = 0;
376 kmsg->msg.msg_ubuf = NULL;
378 addr = u64_to_user_ptr(READ_ONCE(sqe->addr2));
379 addr_len = READ_ONCE(sqe->addr_len);
381 ret = move_addr_to_kernel(addr, addr_len, &kmsg->addr);
382 if (unlikely(ret < 0))
384 kmsg->msg.msg_name = &kmsg->addr;
385 kmsg->msg.msg_namelen = addr_len;
387 if (!io_do_buffer_select(req)) {
388 ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len,
389 &kmsg->msg.msg_iter);
390 if (unlikely(ret < 0))
396 static int io_sendmsg_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe)
398 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
399 struct io_async_msghdr *kmsg = req->async_data;
402 sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
404 ret = io_sendmsg_copy_hdr(req, kmsg);
406 req->flags |= REQ_F_NEED_CLEANUP;
410 #define SENDMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_BUNDLE)
412 int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
414 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
418 if (req->opcode != IORING_OP_SEND) {
419 if (sqe->addr2 || sqe->file_index)
423 sr->len = READ_ONCE(sqe->len);
424 sr->flags = READ_ONCE(sqe->ioprio);
425 if (sr->flags & ~SENDMSG_FLAGS)
427 sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
428 if (sr->msg_flags & MSG_DONTWAIT)
429 req->flags |= REQ_F_NOWAIT;
430 if (sr->flags & IORING_RECVSEND_BUNDLE) {
431 if (req->opcode == IORING_OP_SENDMSG)
433 if (!(req->flags & REQ_F_BUFFER_SELECT))
435 sr->msg_flags |= MSG_WAITALL;
436 sr->buf_group = req->buf_index;
437 req->buf_list = NULL;
441 if (req->ctx->compat)
442 sr->msg_flags |= MSG_CMSG_COMPAT;
444 if (unlikely(!io_msg_alloc_async(req)))
446 if (req->opcode != IORING_OP_SENDMSG)
447 return io_send_setup(req, sqe);
448 return io_sendmsg_setup(req, sqe);
451 static void io_req_msg_cleanup(struct io_kiocb *req,
452 unsigned int issue_flags)
454 req->flags &= ~REQ_F_NEED_CLEANUP;
455 io_netmsg_recycle(req, issue_flags);
459 * For bundle completions, we need to figure out how many segments we consumed.
460 * A bundle could be using a single ITER_UBUF if that's all we mapped, or it
461 * could be using an ITER_IOVEC. If the latter, then if we consumed all of
462 * the segments, then it's a trivial questiont o answer. If we have residual
463 * data in the iter, then loop the segments to figure out how much we
466 static int io_bundle_nbufs(struct io_async_msghdr *kmsg, int ret)
471 /* no data is always zero segments, and a ubuf is always 1 segment */
474 if (iter_is_ubuf(&kmsg->msg.msg_iter))
477 iov = kmsg->free_iov;
479 iov = &kmsg->fast_iov;
481 /* if all data was transferred, it's basic pointer math */
482 if (!iov_iter_count(&kmsg->msg.msg_iter))
483 return iter_iov(&kmsg->msg.msg_iter) - iov;
485 /* short transfer, count segments */
488 int this_len = min_t(int, iov[nbufs].iov_len, ret);
497 static inline bool io_send_finish(struct io_kiocb *req, int *ret,
498 struct io_async_msghdr *kmsg,
499 unsigned issue_flags)
501 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
502 bool bundle_finished = *ret <= 0;
505 if (!(sr->flags & IORING_RECVSEND_BUNDLE)) {
506 cflags = io_put_kbuf(req, *ret, issue_flags);
510 cflags = io_put_kbufs(req, *ret, io_bundle_nbufs(kmsg, *ret), issue_flags);
512 if (bundle_finished || req->flags & REQ_F_BL_EMPTY)
516 * Fill CQE for this receive and see if we should keep trying to
517 * receive from this socket.
519 if (io_req_post_cqe(req, *ret, cflags | IORING_CQE_F_MORE)) {
520 io_mshot_prep_retry(req, kmsg);
524 /* Otherwise stop bundle and use the current result. */
526 io_req_set_res(req, *ret, cflags);
531 int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
533 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
534 struct io_async_msghdr *kmsg = req->async_data;
540 sock = sock_from_file(req->file);
544 if (!(req->flags & REQ_F_POLLED) &&
545 (sr->flags & IORING_RECVSEND_POLL_FIRST))
548 flags = sr->msg_flags;
549 if (issue_flags & IO_URING_F_NONBLOCK)
550 flags |= MSG_DONTWAIT;
551 if (flags & MSG_WAITALL)
552 min_ret = iov_iter_count(&kmsg->msg.msg_iter);
554 kmsg->msg.msg_control_user = sr->msg_control;
556 ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags);
559 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
561 if (ret > 0 && io_net_retry(sock, flags)) {
562 kmsg->msg.msg_controllen = 0;
563 kmsg->msg.msg_control = NULL;
565 req->flags |= REQ_F_BL_NO_RECYCLE;
568 if (ret == -ERESTARTSYS)
572 io_req_msg_cleanup(req, issue_flags);
575 else if (sr->done_io)
577 io_req_set_res(req, ret, 0);
581 int io_send(struct io_kiocb *req, unsigned int issue_flags)
583 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
584 struct io_async_msghdr *kmsg = req->async_data;
590 sock = sock_from_file(req->file);
594 if (!(req->flags & REQ_F_POLLED) &&
595 (sr->flags & IORING_RECVSEND_POLL_FIRST))
598 flags = sr->msg_flags;
599 if (issue_flags & IO_URING_F_NONBLOCK)
600 flags |= MSG_DONTWAIT;
603 if (io_do_buffer_select(req)) {
604 struct buf_sel_arg arg = {
605 .iovs = &kmsg->fast_iov,
606 .max_len = min_not_zero(sr->len, INT_MAX),
610 if (kmsg->free_iov) {
611 arg.nr_iovs = kmsg->free_iov_nr;
612 arg.iovs = kmsg->free_iov;
613 arg.mode = KBUF_MODE_FREE;
616 if (!(sr->flags & IORING_RECVSEND_BUNDLE))
619 arg.mode |= KBUF_MODE_EXPAND;
621 ret = io_buffers_select(req, &arg, issue_flags);
622 if (unlikely(ret < 0))
625 if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->free_iov) {
626 kmsg->free_iov_nr = ret;
627 kmsg->free_iov = arg.iovs;
628 req->flags |= REQ_F_NEED_CLEANUP;
630 sr->len = arg.out_len;
633 sr->buf = arg.iovs[0].iov_base;
634 ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len,
635 &kmsg->msg.msg_iter);
639 iov_iter_init(&kmsg->msg.msg_iter, ITER_SOURCE,
640 arg.iovs, ret, arg.out_len);
645 * If MSG_WAITALL is set, or this is a bundle send, then we need
646 * the full amount. If just bundle is set, if we do a short send
647 * then we complete the bundle sequence rather than continue on.
649 if (flags & MSG_WAITALL || sr->flags & IORING_RECVSEND_BUNDLE)
650 min_ret = iov_iter_count(&kmsg->msg.msg_iter);
652 flags &= ~MSG_INTERNAL_SENDMSG_FLAGS;
653 kmsg->msg.msg_flags = flags;
654 ret = sock_sendmsg(sock, &kmsg->msg);
656 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
659 if (ret > 0 && io_net_retry(sock, flags)) {
663 req->flags |= REQ_F_BL_NO_RECYCLE;
666 if (ret == -ERESTARTSYS)
672 else if (sr->done_io)
675 if (!io_send_finish(req, &ret, kmsg, issue_flags))
678 io_req_msg_cleanup(req, issue_flags);
682 static int io_recvmsg_mshot_prep(struct io_kiocb *req,
683 struct io_async_msghdr *iomsg,
684 int namelen, size_t controllen)
686 if ((req->flags & (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) ==
687 (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) {
690 if (unlikely(namelen < 0))
692 if (check_add_overflow(sizeof(struct io_uring_recvmsg_out),
695 if (check_add_overflow(hdr, controllen, &hdr))
698 iomsg->namelen = namelen;
699 iomsg->controllen = controllen;
706 static int io_recvmsg_copy_hdr(struct io_kiocb *req,
707 struct io_async_msghdr *iomsg)
709 struct user_msghdr msg;
712 iomsg->msg.msg_name = &iomsg->addr;
713 iomsg->msg.msg_iter.nr_segs = 0;
716 if (unlikely(req->ctx->compat)) {
717 struct compat_msghdr cmsg;
719 ret = io_compat_msg_copy_hdr(req, iomsg, &cmsg, ITER_DEST);
723 ret = __get_compat_msghdr(&iomsg->msg, &cmsg, &iomsg->uaddr);
727 return io_recvmsg_mshot_prep(req, iomsg, cmsg.msg_namelen,
728 cmsg.msg_controllen);
732 ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_DEST);
736 ret = __copy_msghdr(&iomsg->msg, &msg, &iomsg->uaddr);
740 return io_recvmsg_mshot_prep(req, iomsg, msg.msg_namelen,
744 static int io_recvmsg_prep_setup(struct io_kiocb *req)
746 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
747 struct io_async_msghdr *kmsg;
750 kmsg = io_msg_alloc_async(req);
754 if (req->opcode == IORING_OP_RECV) {
755 kmsg->msg.msg_name = NULL;
756 kmsg->msg.msg_namelen = 0;
757 kmsg->msg.msg_inq = 0;
758 kmsg->msg.msg_control = NULL;
759 kmsg->msg.msg_get_inq = 1;
760 kmsg->msg.msg_controllen = 0;
761 kmsg->msg.msg_iocb = NULL;
762 kmsg->msg.msg_ubuf = NULL;
764 if (!io_do_buffer_select(req)) {
765 ret = import_ubuf(ITER_DEST, sr->buf, sr->len,
766 &kmsg->msg.msg_iter);
773 ret = io_recvmsg_copy_hdr(req, kmsg);
775 req->flags |= REQ_F_NEED_CLEANUP;
779 #define RECVMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECV_MULTISHOT | \
780 IORING_RECVSEND_BUNDLE)
782 int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
784 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
788 if (unlikely(sqe->file_index || sqe->addr2))
791 sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
792 sr->len = READ_ONCE(sqe->len);
793 sr->flags = READ_ONCE(sqe->ioprio);
794 if (sr->flags & ~RECVMSG_FLAGS)
796 sr->msg_flags = READ_ONCE(sqe->msg_flags);
797 if (sr->msg_flags & MSG_DONTWAIT)
798 req->flags |= REQ_F_NOWAIT;
799 if (sr->msg_flags & MSG_ERRQUEUE)
800 req->flags |= REQ_F_CLEAR_POLLIN;
801 if (req->flags & REQ_F_BUFFER_SELECT) {
803 * Store the buffer group for this multishot receive separately,
804 * as if we end up doing an io-wq based issue that selects a
805 * buffer, it has to be committed immediately and that will
806 * clear ->buf_list. This means we lose the link to the buffer
807 * list, and the eventual buffer put on completion then cannot
810 sr->buf_group = req->buf_index;
811 req->buf_list = NULL;
813 if (sr->flags & IORING_RECV_MULTISHOT) {
814 if (!(req->flags & REQ_F_BUFFER_SELECT))
816 if (sr->msg_flags & MSG_WAITALL)
818 if (req->opcode == IORING_OP_RECV && sr->len)
820 req->flags |= REQ_F_APOLL_MULTISHOT;
822 if (sr->flags & IORING_RECVSEND_BUNDLE) {
823 if (req->opcode == IORING_OP_RECVMSG)
828 if (req->ctx->compat)
829 sr->msg_flags |= MSG_CMSG_COMPAT;
831 sr->nr_multishot_loops = 0;
832 return io_recvmsg_prep_setup(req);
836 * Finishes io_recv and io_recvmsg.
838 * Returns true if it is actually finished, or false if it should run
839 * again (for multishot).
841 static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
842 struct io_async_msghdr *kmsg,
843 bool mshot_finished, unsigned issue_flags)
845 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
846 unsigned int cflags = 0;
848 if (kmsg->msg.msg_inq > 0)
849 cflags |= IORING_CQE_F_SOCK_NONEMPTY;
851 if (sr->flags & IORING_RECVSEND_BUNDLE) {
852 cflags |= io_put_kbufs(req, *ret, io_bundle_nbufs(kmsg, *ret),
854 /* bundle with no more immediate buffers, we're done */
855 if (req->flags & REQ_F_BL_EMPTY)
858 cflags |= io_put_kbuf(req, *ret, issue_flags);
862 * Fill CQE for this receive and see if we should keep trying to
863 * receive from this socket.
865 if ((req->flags & REQ_F_APOLL_MULTISHOT) && !mshot_finished &&
866 io_req_post_cqe(req, *ret, cflags | IORING_CQE_F_MORE)) {
867 int mshot_retry_ret = IOU_ISSUE_SKIP_COMPLETE;
869 io_mshot_prep_retry(req, kmsg);
870 /* Known not-empty or unknown state, retry */
871 if (cflags & IORING_CQE_F_SOCK_NONEMPTY || kmsg->msg.msg_inq < 0) {
872 if (sr->nr_multishot_loops++ < MULTISHOT_MAX_RETRY)
874 /* mshot retries exceeded, force a requeue */
875 sr->nr_multishot_loops = 0;
876 mshot_retry_ret = IOU_REQUEUE;
878 if (issue_flags & IO_URING_F_MULTISHOT)
879 *ret = mshot_retry_ret;
885 /* Finish the request / stop multishot. */
887 io_req_set_res(req, *ret, cflags);
889 if (issue_flags & IO_URING_F_MULTISHOT)
890 *ret = IOU_STOP_MULTISHOT;
893 io_req_msg_cleanup(req, issue_flags);
897 static int io_recvmsg_prep_multishot(struct io_async_msghdr *kmsg,
898 struct io_sr_msg *sr, void __user **buf,
901 unsigned long ubuf = (unsigned long) *buf;
904 hdr = sizeof(struct io_uring_recvmsg_out) + kmsg->namelen +
909 if (kmsg->controllen) {
910 unsigned long control = ubuf + hdr - kmsg->controllen;
912 kmsg->msg.msg_control_user = (void __user *) control;
913 kmsg->msg.msg_controllen = kmsg->controllen;
916 sr->buf = *buf; /* stash for later copy */
917 *buf = (void __user *) (ubuf + hdr);
918 kmsg->payloadlen = *len = *len - hdr;
922 struct io_recvmsg_multishot_hdr {
923 struct io_uring_recvmsg_out msg;
924 struct sockaddr_storage addr;
927 static int io_recvmsg_multishot(struct socket *sock, struct io_sr_msg *io,
928 struct io_async_msghdr *kmsg,
929 unsigned int flags, bool *finished)
933 struct io_recvmsg_multishot_hdr hdr;
936 kmsg->msg.msg_name = &hdr.addr;
937 kmsg->msg.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
938 kmsg->msg.msg_namelen = 0;
940 if (sock->file->f_flags & O_NONBLOCK)
941 flags |= MSG_DONTWAIT;
943 err = sock_recvmsg(sock, &kmsg->msg, flags);
944 *finished = err <= 0;
948 hdr.msg = (struct io_uring_recvmsg_out) {
949 .controllen = kmsg->controllen - kmsg->msg.msg_controllen,
950 .flags = kmsg->msg.msg_flags & ~MSG_CMSG_COMPAT
953 hdr.msg.payloadlen = err;
954 if (err > kmsg->payloadlen)
955 err = kmsg->payloadlen;
957 copy_len = sizeof(struct io_uring_recvmsg_out);
958 if (kmsg->msg.msg_namelen > kmsg->namelen)
959 copy_len += kmsg->namelen;
961 copy_len += kmsg->msg.msg_namelen;
964 * "fromlen shall refer to the value before truncation.."
967 hdr.msg.namelen = kmsg->msg.msg_namelen;
969 /* ensure that there is no gap between hdr and sockaddr_storage */
970 BUILD_BUG_ON(offsetof(struct io_recvmsg_multishot_hdr, addr) !=
971 sizeof(struct io_uring_recvmsg_out));
972 if (copy_to_user(io->buf, &hdr, copy_len)) {
977 return sizeof(struct io_uring_recvmsg_out) + kmsg->namelen +
978 kmsg->controllen + err;
981 int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
983 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
984 struct io_async_msghdr *kmsg = req->async_data;
987 int ret, min_ret = 0;
988 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
989 bool mshot_finished = true;
991 sock = sock_from_file(req->file);
995 if (!(req->flags & REQ_F_POLLED) &&
996 (sr->flags & IORING_RECVSEND_POLL_FIRST))
999 flags = sr->msg_flags;
1001 flags |= MSG_DONTWAIT;
1004 if (io_do_buffer_select(req)) {
1006 size_t len = sr->len;
1008 buf = io_buffer_select(req, &len, issue_flags);
1012 if (req->flags & REQ_F_APOLL_MULTISHOT) {
1013 ret = io_recvmsg_prep_multishot(kmsg, sr, &buf, &len);
1015 io_kbuf_recycle(req, issue_flags);
1020 iov_iter_ubuf(&kmsg->msg.msg_iter, ITER_DEST, buf, len);
1023 kmsg->msg.msg_get_inq = 1;
1024 kmsg->msg.msg_inq = -1;
1025 if (req->flags & REQ_F_APOLL_MULTISHOT) {
1026 ret = io_recvmsg_multishot(sock, sr, kmsg, flags,
1029 /* disable partial retry for recvmsg with cmsg attached */
1030 if (flags & MSG_WAITALL && !kmsg->msg.msg_controllen)
1031 min_ret = iov_iter_count(&kmsg->msg.msg_iter);
1033 ret = __sys_recvmsg_sock(sock, &kmsg->msg, sr->umsg,
1034 kmsg->uaddr, flags);
1037 if (ret < min_ret) {
1038 if (ret == -EAGAIN && force_nonblock) {
1039 if (issue_flags & IO_URING_F_MULTISHOT) {
1040 io_kbuf_recycle(req, issue_flags);
1041 return IOU_ISSUE_SKIP_COMPLETE;
1045 if (ret > 0 && io_net_retry(sock, flags)) {
1047 req->flags |= REQ_F_BL_NO_RECYCLE;
1050 if (ret == -ERESTARTSYS)
1053 } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
1059 else if (sr->done_io)
1062 io_kbuf_recycle(req, issue_flags);
1064 if (!io_recv_finish(req, &ret, kmsg, mshot_finished, issue_flags))
1065 goto retry_multishot;
1070 static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg,
1071 size_t *len, unsigned int issue_flags)
1073 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
1077 * If the ring isn't locked, then don't use the peek interface
1078 * to grab multiple buffers as we will lock/unlock between
1079 * this selection and posting the buffers.
1081 if (!(issue_flags & IO_URING_F_UNLOCKED) &&
1082 sr->flags & IORING_RECVSEND_BUNDLE) {
1083 struct buf_sel_arg arg = {
1084 .iovs = &kmsg->fast_iov,
1086 .mode = KBUF_MODE_EXPAND,
1089 if (kmsg->free_iov) {
1090 arg.nr_iovs = kmsg->free_iov_nr;
1091 arg.iovs = kmsg->free_iov;
1092 arg.mode |= KBUF_MODE_FREE;
1095 if (kmsg->msg.msg_inq > 0)
1096 arg.max_len = min_not_zero(sr->len, kmsg->msg.msg_inq);
1098 ret = io_buffers_peek(req, &arg);
1099 if (unlikely(ret < 0))
1102 /* special case 1 vec, can be a fast path */
1104 sr->buf = arg.iovs[0].iov_base;
1105 sr->len = arg.iovs[0].iov_len;
1108 iov_iter_init(&kmsg->msg.msg_iter, ITER_DEST, arg.iovs, ret,
1110 if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->free_iov) {
1111 kmsg->free_iov_nr = ret;
1112 kmsg->free_iov = arg.iovs;
1113 req->flags |= REQ_F_NEED_CLEANUP;
1119 buf = io_buffer_select(req, len, issue_flags);
1125 ret = import_ubuf(ITER_DEST, sr->buf, sr->len,
1126 &kmsg->msg.msg_iter);
1134 int io_recv(struct io_kiocb *req, unsigned int issue_flags)
1136 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
1137 struct io_async_msghdr *kmsg = req->async_data;
1138 struct socket *sock;
1140 int ret, min_ret = 0;
1141 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
1142 size_t len = sr->len;
1143 bool mshot_finished;
1145 if (!(req->flags & REQ_F_POLLED) &&
1146 (sr->flags & IORING_RECVSEND_POLL_FIRST))
1149 sock = sock_from_file(req->file);
1150 if (unlikely(!sock))
1153 flags = sr->msg_flags;
1155 flags |= MSG_DONTWAIT;
1158 if (io_do_buffer_select(req)) {
1159 ret = io_recv_buf_select(req, kmsg, &len, issue_flags);
1160 if (unlikely(ret)) {
1161 kmsg->msg.msg_inq = -1;
1167 kmsg->msg.msg_flags = 0;
1168 kmsg->msg.msg_inq = -1;
1170 if (flags & MSG_WAITALL)
1171 min_ret = iov_iter_count(&kmsg->msg.msg_iter);
1173 ret = sock_recvmsg(sock, &kmsg->msg, flags);
1174 if (ret < min_ret) {
1175 if (ret == -EAGAIN && force_nonblock) {
1176 if (issue_flags & IO_URING_F_MULTISHOT) {
1177 io_kbuf_recycle(req, issue_flags);
1178 return IOU_ISSUE_SKIP_COMPLETE;
1183 if (ret > 0 && io_net_retry(sock, flags)) {
1187 req->flags |= REQ_F_BL_NO_RECYCLE;
1190 if (ret == -ERESTARTSYS)
1193 } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
1198 mshot_finished = ret <= 0;
1201 else if (sr->done_io)
1204 io_kbuf_recycle(req, issue_flags);
1206 if (!io_recv_finish(req, &ret, kmsg, mshot_finished, issue_flags))
1207 goto retry_multishot;
1212 void io_send_zc_cleanup(struct io_kiocb *req)
1214 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
1215 struct io_async_msghdr *io = req->async_data;
1217 if (req_has_async_data(req))
1218 io_netmsg_iovec_free(io);
1220 io_notif_flush(zc->notif);
1225 #define IO_ZC_FLAGS_COMMON (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_FIXED_BUF)
1226 #define IO_ZC_FLAGS_VALID (IO_ZC_FLAGS_COMMON | IORING_SEND_ZC_REPORT_USAGE)
1228 int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1230 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
1231 struct io_ring_ctx *ctx = req->ctx;
1232 struct io_kiocb *notif;
1235 req->flags |= REQ_F_POLL_NO_LAZY;
1237 if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3)))
1239 /* we don't support IOSQE_CQE_SKIP_SUCCESS just yet */
1240 if (req->flags & REQ_F_CQE_SKIP)
1243 notif = zc->notif = io_alloc_notif(ctx);
1246 notif->cqe.user_data = req->cqe.user_data;
1248 notif->cqe.flags = IORING_CQE_F_NOTIF;
1249 req->flags |= REQ_F_NEED_CLEANUP;
1251 zc->flags = READ_ONCE(sqe->ioprio);
1252 if (unlikely(zc->flags & ~IO_ZC_FLAGS_COMMON)) {
1253 if (zc->flags & ~IO_ZC_FLAGS_VALID)
1255 if (zc->flags & IORING_SEND_ZC_REPORT_USAGE) {
1256 struct io_notif_data *nd = io_notif_to_data(notif);
1258 nd->zc_report = true;
1259 nd->zc_used = false;
1260 nd->zc_copied = false;
1264 if (req->opcode != IORING_OP_SEND_ZC) {
1265 if (unlikely(sqe->addr2 || sqe->file_index))
1267 if (unlikely(zc->flags & IORING_RECVSEND_FIXED_BUF))
1271 zc->len = READ_ONCE(sqe->len);
1272 zc->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL | MSG_ZEROCOPY;
1273 zc->buf_index = READ_ONCE(sqe->buf_index);
1274 if (zc->msg_flags & MSG_DONTWAIT)
1275 req->flags |= REQ_F_NOWAIT;
1277 #ifdef CONFIG_COMPAT
1278 if (req->ctx->compat)
1279 zc->msg_flags |= MSG_CMSG_COMPAT;
1281 if (unlikely(!io_msg_alloc_async(req)))
1283 if (req->opcode != IORING_OP_SENDMSG_ZC)
1284 return io_send_setup(req, sqe);
1285 return io_sendmsg_setup(req, sqe);
1288 static int io_sg_from_iter_iovec(struct sk_buff *skb,
1289 struct iov_iter *from, size_t length)
1291 skb_zcopy_downgrade_managed(skb);
1292 return zerocopy_fill_skb_from_iter(skb, from, length);
1295 static int io_sg_from_iter(struct sk_buff *skb,
1296 struct iov_iter *from, size_t length)
1298 struct skb_shared_info *shinfo = skb_shinfo(skb);
1299 int frag = shinfo->nr_frags;
1301 struct bvec_iter bi;
1303 unsigned long truesize = 0;
1306 shinfo->flags |= SKBFL_MANAGED_FRAG_REFS;
1307 else if (unlikely(!skb_zcopy_managed(skb)))
1308 return zerocopy_fill_skb_from_iter(skb, from, length);
1310 bi.bi_size = min(from->count, length);
1311 bi.bi_bvec_done = from->iov_offset;
1314 while (bi.bi_size && frag < MAX_SKB_FRAGS) {
1315 struct bio_vec v = mp_bvec_iter_bvec(from->bvec, bi);
1318 truesize += PAGE_ALIGN(v.bv_len + v.bv_offset);
1319 __skb_fill_page_desc_noacc(shinfo, frag++, v.bv_page,
1320 v.bv_offset, v.bv_len);
1321 bvec_iter_advance_single(from->bvec, &bi, v.bv_len);
1326 shinfo->nr_frags = frag;
1327 from->bvec += bi.bi_idx;
1328 from->nr_segs -= bi.bi_idx;
1329 from->count -= copied;
1330 from->iov_offset = bi.bi_bvec_done;
1332 skb->data_len += copied;
1334 skb->truesize += truesize;
1338 static int io_send_zc_import(struct io_kiocb *req, unsigned int issue_flags)
1340 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
1341 struct io_async_msghdr *kmsg = req->async_data;
1344 if (sr->flags & IORING_RECVSEND_FIXED_BUF) {
1345 struct io_ring_ctx *ctx = req->ctx;
1346 struct io_rsrc_node *node;
1349 io_ring_submit_lock(ctx, issue_flags);
1350 node = io_rsrc_node_lookup(&ctx->buf_table, sr->buf_index);
1352 io_req_assign_buf_node(sr->notif, node);
1355 io_ring_submit_unlock(ctx, issue_flags);
1360 ret = io_import_fixed(ITER_SOURCE, &kmsg->msg.msg_iter,
1361 node->buf, (u64)(uintptr_t)sr->buf,
1365 kmsg->msg.sg_from_iter = io_sg_from_iter;
1367 ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len, &kmsg->msg.msg_iter);
1370 ret = io_notif_account_mem(sr->notif, sr->len);
1373 kmsg->msg.sg_from_iter = io_sg_from_iter_iovec;
1379 int io_send_zc(struct io_kiocb *req, unsigned int issue_flags)
1381 struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
1382 struct io_async_msghdr *kmsg = req->async_data;
1383 struct socket *sock;
1385 int ret, min_ret = 0;
1387 sock = sock_from_file(req->file);
1388 if (unlikely(!sock))
1390 if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags))
1393 if (!(req->flags & REQ_F_POLLED) &&
1394 (zc->flags & IORING_RECVSEND_POLL_FIRST))
1398 ret = io_send_zc_import(req, issue_flags);
1403 msg_flags = zc->msg_flags;
1404 if (issue_flags & IO_URING_F_NONBLOCK)
1405 msg_flags |= MSG_DONTWAIT;
1406 if (msg_flags & MSG_WAITALL)
1407 min_ret = iov_iter_count(&kmsg->msg.msg_iter);
1408 msg_flags &= ~MSG_INTERNAL_SENDMSG_FLAGS;
1410 kmsg->msg.msg_flags = msg_flags;
1411 kmsg->msg.msg_ubuf = &io_notif_to_data(zc->notif)->uarg;
1412 ret = sock_sendmsg(sock, &kmsg->msg);
1414 if (unlikely(ret < min_ret)) {
1415 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
1418 if (ret > 0 && io_net_retry(sock, kmsg->msg.msg_flags)) {
1422 req->flags |= REQ_F_BL_NO_RECYCLE;
1425 if (ret == -ERESTARTSYS)
1432 else if (zc->done_io)
1436 * If we're in io-wq we can't rely on tw ordering guarantees, defer
1437 * flushing notif to io_send_zc_cleanup()
1439 if (!(issue_flags & IO_URING_F_UNLOCKED)) {
1440 io_notif_flush(zc->notif);
1441 io_req_msg_cleanup(req, 0);
1443 io_req_set_res(req, ret, IORING_CQE_F_MORE);
1447 int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags)
1449 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
1450 struct io_async_msghdr *kmsg = req->async_data;
1451 struct socket *sock;
1453 int ret, min_ret = 0;
1455 sock = sock_from_file(req->file);
1456 if (unlikely(!sock))
1458 if (!test_bit(SOCK_SUPPORT_ZC, &sock->flags))
1461 if (!(req->flags & REQ_F_POLLED) &&
1462 (sr->flags & IORING_RECVSEND_POLL_FIRST))
1465 flags = sr->msg_flags;
1466 if (issue_flags & IO_URING_F_NONBLOCK)
1467 flags |= MSG_DONTWAIT;
1468 if (flags & MSG_WAITALL)
1469 min_ret = iov_iter_count(&kmsg->msg.msg_iter);
1471 kmsg->msg.msg_control_user = sr->msg_control;
1472 kmsg->msg.msg_ubuf = &io_notif_to_data(sr->notif)->uarg;
1473 kmsg->msg.sg_from_iter = io_sg_from_iter_iovec;
1474 ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags);
1476 if (unlikely(ret < min_ret)) {
1477 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
1480 if (ret > 0 && io_net_retry(sock, flags)) {
1482 req->flags |= REQ_F_BL_NO_RECYCLE;
1485 if (ret == -ERESTARTSYS)
1492 else if (sr->done_io)
1496 * If we're in io-wq we can't rely on tw ordering guarantees, defer
1497 * flushing notif to io_send_zc_cleanup()
1499 if (!(issue_flags & IO_URING_F_UNLOCKED)) {
1500 io_notif_flush(sr->notif);
1501 io_req_msg_cleanup(req, 0);
1503 io_req_set_res(req, ret, IORING_CQE_F_MORE);
1507 void io_sendrecv_fail(struct io_kiocb *req)
1509 struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
1512 req->cqe.res = sr->done_io;
1514 if ((req->flags & REQ_F_NEED_CLEANUP) &&
1515 (req->opcode == IORING_OP_SEND_ZC || req->opcode == IORING_OP_SENDMSG_ZC))
1516 req->cqe.flags |= IORING_CQE_F_MORE;
1519 #define ACCEPT_FLAGS (IORING_ACCEPT_MULTISHOT | IORING_ACCEPT_DONTWAIT | \
1520 IORING_ACCEPT_POLL_FIRST)
1522 int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1524 struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept);
1526 if (sqe->len || sqe->buf_index)
1529 accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
1530 accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2));
1531 accept->flags = READ_ONCE(sqe->accept_flags);
1532 accept->nofile = rlimit(RLIMIT_NOFILE);
1533 accept->iou_flags = READ_ONCE(sqe->ioprio);
1534 if (accept->iou_flags & ~ACCEPT_FLAGS)
1537 accept->file_slot = READ_ONCE(sqe->file_index);
1538 if (accept->file_slot) {
1539 if (accept->flags & SOCK_CLOEXEC)
1541 if (accept->iou_flags & IORING_ACCEPT_MULTISHOT &&
1542 accept->file_slot != IORING_FILE_INDEX_ALLOC)
1545 if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1547 if (SOCK_NONBLOCK != O_NONBLOCK && (accept->flags & SOCK_NONBLOCK))
1548 accept->flags = (accept->flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1549 if (accept->iou_flags & IORING_ACCEPT_MULTISHOT)
1550 req->flags |= REQ_F_APOLL_MULTISHOT;
1551 if (accept->iou_flags & IORING_ACCEPT_DONTWAIT)
1552 req->flags |= REQ_F_NOWAIT;
1556 int io_accept(struct io_kiocb *req, unsigned int issue_flags)
1558 struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept);
1559 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
1560 bool fixed = !!accept->file_slot;
1561 struct proto_accept_arg arg = {
1562 .flags = force_nonblock ? O_NONBLOCK : 0,
1568 if (!(req->flags & REQ_F_POLLED) &&
1569 accept->iou_flags & IORING_ACCEPT_POLL_FIRST)
1574 fd = __get_unused_fd_flags(accept->flags, accept->nofile);
1575 if (unlikely(fd < 0))
1580 file = do_accept(req->file, &arg, accept->addr, accept->addr_len,
1585 ret = PTR_ERR(file);
1586 if (ret == -EAGAIN && force_nonblock &&
1587 !(accept->iou_flags & IORING_ACCEPT_DONTWAIT)) {
1589 * if it's multishot and polled, we don't need to
1590 * return EAGAIN to arm the poll infra since it
1591 * has already been done
1593 if (issue_flags & IO_URING_F_MULTISHOT)
1594 return IOU_ISSUE_SKIP_COMPLETE;
1597 if (ret == -ERESTARTSYS)
1600 } else if (!fixed) {
1601 fd_install(fd, file);
1604 ret = io_fixed_fd_install(req, issue_flags, file,
1610 cflags |= IORING_CQE_F_SOCK_NONEMPTY;
1612 if (!(req->flags & REQ_F_APOLL_MULTISHOT)) {
1613 io_req_set_res(req, ret, cflags);
1619 if (io_req_post_cqe(req, ret, cflags | IORING_CQE_F_MORE)) {
1620 if (cflags & IORING_CQE_F_SOCK_NONEMPTY || arg.is_empty == -1)
1622 if (issue_flags & IO_URING_F_MULTISHOT)
1623 return IOU_ISSUE_SKIP_COMPLETE;
1627 io_req_set_res(req, ret, cflags);
1628 return IOU_STOP_MULTISHOT;
1631 int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1633 struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket);
1635 if (sqe->addr || sqe->rw_flags || sqe->buf_index)
1638 sock->domain = READ_ONCE(sqe->fd);
1639 sock->type = READ_ONCE(sqe->off);
1640 sock->protocol = READ_ONCE(sqe->len);
1641 sock->file_slot = READ_ONCE(sqe->file_index);
1642 sock->nofile = rlimit(RLIMIT_NOFILE);
1644 sock->flags = sock->type & ~SOCK_TYPE_MASK;
1645 if (sock->file_slot && (sock->flags & SOCK_CLOEXEC))
1647 if (sock->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1652 int io_socket(struct io_kiocb *req, unsigned int issue_flags)
1654 struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket);
1655 bool fixed = !!sock->file_slot;
1660 fd = __get_unused_fd_flags(sock->flags, sock->nofile);
1661 if (unlikely(fd < 0))
1664 file = __sys_socket_file(sock->domain, sock->type, sock->protocol);
1668 ret = PTR_ERR(file);
1669 if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
1671 if (ret == -ERESTARTSYS)
1674 } else if (!fixed) {
1675 fd_install(fd, file);
1678 ret = io_fixed_fd_install(req, issue_flags, file,
1681 io_req_set_res(req, ret, 0);
1685 int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1687 struct io_connect *conn = io_kiocb_to_cmd(req, struct io_connect);
1688 struct io_async_msghdr *io;
1690 if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in)
1693 conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
1694 conn->addr_len = READ_ONCE(sqe->addr2);
1695 conn->in_progress = conn->seen_econnaborted = false;
1697 io = io_msg_alloc_async(req);
1701 return move_addr_to_kernel(conn->addr, conn->addr_len, &io->addr);
1704 int io_connect(struct io_kiocb *req, unsigned int issue_flags)
1706 struct io_connect *connect = io_kiocb_to_cmd(req, struct io_connect);
1707 struct io_async_msghdr *io = req->async_data;
1708 unsigned file_flags;
1710 bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
1712 file_flags = force_nonblock ? O_NONBLOCK : 0;
1714 ret = __sys_connect_file(req->file, &io->addr, connect->addr_len,
1716 if ((ret == -EAGAIN || ret == -EINPROGRESS || ret == -ECONNABORTED)
1717 && force_nonblock) {
1718 if (ret == -EINPROGRESS) {
1719 connect->in_progress = true;
1720 } else if (ret == -ECONNABORTED) {
1721 if (connect->seen_econnaborted)
1723 connect->seen_econnaborted = true;
1727 if (connect->in_progress) {
1729 * At least bluetooth will return -EBADFD on a re-connect
1730 * attempt, and it's (supposedly) also valid to get -EISCONN
1731 * which means the previous result is good. For both of these,
1732 * grab the sock_error() and use that for the completion.
1734 if (ret == -EBADFD || ret == -EISCONN)
1735 ret = sock_error(sock_from_file(req->file)->sk);
1737 if (ret == -ERESTARTSYS)
1742 io_req_msg_cleanup(req, issue_flags);
1743 io_req_set_res(req, ret, 0);
1747 int io_bind_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1749 struct io_bind *bind = io_kiocb_to_cmd(req, struct io_bind);
1750 struct sockaddr __user *uaddr;
1751 struct io_async_msghdr *io;
1753 if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in)
1756 uaddr = u64_to_user_ptr(READ_ONCE(sqe->addr));
1757 bind->addr_len = READ_ONCE(sqe->addr2);
1759 io = io_msg_alloc_async(req);
1762 return move_addr_to_kernel(uaddr, bind->addr_len, &io->addr);
1765 int io_bind(struct io_kiocb *req, unsigned int issue_flags)
1767 struct io_bind *bind = io_kiocb_to_cmd(req, struct io_bind);
1768 struct io_async_msghdr *io = req->async_data;
1769 struct socket *sock;
1772 sock = sock_from_file(req->file);
1773 if (unlikely(!sock))
1776 ret = __sys_bind_socket(sock, &io->addr, bind->addr_len);
1779 io_req_set_res(req, ret, 0);
1783 int io_listen_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1785 struct io_listen *listen = io_kiocb_to_cmd(req, struct io_listen);
1787 if (sqe->addr || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in || sqe->addr2)
1790 listen->backlog = READ_ONCE(sqe->len);
1794 int io_listen(struct io_kiocb *req, unsigned int issue_flags)
1796 struct io_listen *listen = io_kiocb_to_cmd(req, struct io_listen);
1797 struct socket *sock;
1800 sock = sock_from_file(req->file);
1801 if (unlikely(!sock))
1804 ret = __sys_listen_socket(sock, listen->backlog);
1807 io_req_set_res(req, ret, 0);
1811 void io_netmsg_cache_free(const void *entry)
1813 struct io_async_msghdr *kmsg = (struct io_async_msghdr *) entry;
1815 if (kmsg->free_iov) {
1816 kasan_mempool_unpoison_object(kmsg->free_iov,
1817 kmsg->free_iov_nr * sizeof(struct iovec));
1818 io_netmsg_iovec_free(kmsg);