]> Git Repo - J-linux.git/commitdiff
Merge tag 'for-6.9/io_uring-20240310' of git://git.kernel.dk/linux
authorLinus Torvalds <[email protected]>
Mon, 11 Mar 2024 18:35:31 +0000 (11:35 -0700)
committerLinus Torvalds <[email protected]>
Mon, 11 Mar 2024 18:35:31 +0000 (11:35 -0700)
Pull io_uring updates from Jens Axboe:

 - Make running of task_work internal loops more fair, and unify how the
   different methods deal with them (me)

 - Support for per-ring NAPI. The two minor networking patches are in a
   shared branch with netdev (Stefan)

 - Add support for truncate (Tony)

 - Export SQPOLL utilization stats (Xiaobing)

 - Multishot fixes (Pavel)

 - Fix for a race in manipulating the request flags via poll (Pavel)

 - Cleanup the multishot checking by making it generic, moving it out of
   opcode handlers (Pavel)

 - Various tweaks and cleanups (me, Kunwu, Alexander)

* tag 'for-6.9/io_uring-20240310' of git://git.kernel.dk/linux: (53 commits)
  io_uring: Fix sqpoll utilization check racing with dying sqpoll
  io_uring/net: dedup io_recv_finish req completion
  io_uring: refactor DEFER_TASKRUN multishot checks
  io_uring: fix mshot io-wq checks
  io_uring/net: add io_req_msg_cleanup() helper
  io_uring/net: simplify msghd->msg_inq checking
  io_uring/kbuf: rename REQ_F_PARTIAL_IO to REQ_F_BL_NO_RECYCLE
  io_uring/net: remove dependency on REQ_F_PARTIAL_IO for sr->done_io
  io_uring/net: correctly handle multishot recvmsg retry setup
  io_uring/net: clear REQ_F_BL_EMPTY in the multishot retry handler
  io_uring: fix io_queue_proc modifying req->flags
  io_uring: fix mshot read defer taskrun cqe posting
  io_uring/net: fix overflow check in io_recvmsg_mshot_prep()
  io_uring/net: correct the type of variable
  io_uring/sqpoll: statistics of the true utilization of sq threads
  io_uring/net: move recv/recvmsg flags out of retry loop
  io_uring/kbuf: flag request if buffer pool is empty after buffer pick
  io_uring/net: improve the usercopy for sendmsg/recvmsg
  io_uring/net: move receive multishot out of the generic msghdr path
  io_uring/net: unify how recvmsg and sendmsg copy in the msghdr
  ...

1  2 
fs/internal.h
fs/open.c
io_uring/net.c
net/core/dev.c

diff --combined fs/internal.h
index 7d3edcdf59cc159e54e804b5751beb738cf7d6e6,8509cbd6d115d9028b0be26dc4da6d2690f09bbc..49c1fcfee4b35a8f8f653ad916ec4a4459f1ed34
@@@ -183,6 -183,7 +183,7 @@@ extern struct open_how build_open_how(i
  extern int build_open_flags(const struct open_how *how, struct open_flags *op);
  struct file *file_close_fd_locked(struct files_struct *files, unsigned fd);
  
+ long do_ftruncate(struct file *file, loff_t length, int small);
  long do_sys_ftruncate(unsigned int fd, loff_t length, int small);
  int chmod_common(const struct path *path, umode_t mode);
  int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group,
@@@ -310,10 -311,3 +311,10 @@@ ssize_t __kernel_write_iter(struct fil
  struct mnt_idmap *alloc_mnt_idmap(struct user_namespace *mnt_userns);
  struct mnt_idmap *mnt_idmap_get(struct mnt_idmap *idmap);
  void mnt_idmap_put(struct mnt_idmap *idmap);
 +struct stashed_operations {
 +      void (*put_data)(void *data);
 +      void (*init_inode)(struct inode *inode, void *data);
 +};
 +int path_from_stashed(struct dentry **stashed, unsigned long ino,
 +                    struct vfsmount *mnt, void *data, struct path *path);
 +void stashed_dentry_prune(struct dentry *dentry);
diff --combined fs/open.c
index 0a73afe04d34b15ae14f9b3e5177ac206290f6da,0410cb52bb4b73ebe94c9d4b1c9b3d789db06cc7..a7d4bb2c725f1e9bdde176f769c3a52df0627ab9
+++ b/fs/open.c
@@@ -154,49 -154,52 +154,52 @@@ COMPAT_SYSCALL_DEFINE2(truncate, const 
  }
  #endif
  
- long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
+ long do_ftruncate(struct file *file, loff_t length, int small)
  {
        struct inode *inode;
        struct dentry *dentry;
-       struct fd f;
        int error;
  
-       error = -EINVAL;
-       if (length < 0)
-               goto out;
-       error = -EBADF;
-       f = fdget(fd);
-       if (!f.file)
-               goto out;
        /* explicitly opened as large or we are on 64-bit box */
-       if (f.file->f_flags & O_LARGEFILE)
+       if (file->f_flags & O_LARGEFILE)
                small = 0;
  
-       dentry = f.file->f_path.dentry;
+       dentry = file->f_path.dentry;
        inode = dentry->d_inode;
-       error = -EINVAL;
-       if (!S_ISREG(inode->i_mode) || !(f.file->f_mode & FMODE_WRITE))
-               goto out_putf;
+       if (!S_ISREG(inode->i_mode) || !(file->f_mode & FMODE_WRITE))
+               return -EINVAL;
  
-       error = -EINVAL;
        /* Cannot ftruncate over 2^31 bytes without large file support */
        if (small && length > MAX_NON_LFS)
-               goto out_putf;
+               return -EINVAL;
  
-       error = -EPERM;
        /* Check IS_APPEND on real upper inode */
-       if (IS_APPEND(file_inode(f.file)))
-               goto out_putf;
+       if (IS_APPEND(file_inode(file)))
+               return -EPERM;
        sb_start_write(inode->i_sb);
-       error = security_file_truncate(f.file);
+       error = security_file_truncate(file);
        if (!error)
-               error = do_truncate(file_mnt_idmap(f.file), dentry, length,
-                                   ATTR_MTIME | ATTR_CTIME, f.file);
+               error = do_truncate(file_mnt_idmap(file), dentry, length,
+                                   ATTR_MTIME | ATTR_CTIME, file);
        sb_end_write(inode->i_sb);
- out_putf:
+       return error;
+ }
+ long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
+ {
+       struct fd f;
+       int error;
+       if (length < 0)
+               return -EINVAL;
+       f = fdget(fd);
+       if (!f.file)
+               return -EBADF;
+       error = do_ftruncate(f.file, length, small);
        fdput(f);
- out:
        return error;
  }
  
@@@ -1364,7 -1367,7 +1367,7 @@@ struct file *filp_open(const char *file
  {
        struct filename *name = getname_kernel(filename);
        struct file *file = ERR_CAST(name);
 -      
 +
        if (!IS_ERR(name)) {
                file = file_open_name(name, flags, mode);
                putname(name);
diff --combined io_uring/net.c
index 161622029147ca3e6c13784274ea084d05695ccc,2892236fb021c42db6de35301d62224364ac51ba..19451f0dbf813664f9b26e0a27c12b52be080944
@@@ -78,19 -78,6 +78,6 @@@ struct io_sr_msg 
   */
  #define MULTISHOT_MAX_RETRY   32
  
- static inline bool io_check_multishot(struct io_kiocb *req,
-                                     unsigned int issue_flags)
- {
-       /*
-        * When ->locked_cq is set we only allow to post CQEs from the original
-        * task context. Usual request completions will be handled in other
-        * generic paths but multipoll may decide to post extra cqes.
-        */
-       return !(issue_flags & IO_URING_F_IOWQ) ||
-               !(issue_flags & IO_URING_F_MULTISHOT) ||
-               !req->ctx->task_complete;
- }
  int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
  {
        struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown);
@@@ -204,16 -191,130 +191,130 @@@ static int io_setup_async_msg(struct io
        return -EAGAIN;
  }
  
+ #ifdef CONFIG_COMPAT
+ static int io_compat_msg_copy_hdr(struct io_kiocb *req,
+                                 struct io_async_msghdr *iomsg,
+                                 struct compat_msghdr *msg, int ddir)
+ {
+       struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
+       struct compat_iovec __user *uiov;
+       int ret;
+       if (copy_from_user(msg, sr->umsg_compat, sizeof(*msg)))
+               return -EFAULT;
+       uiov = compat_ptr(msg->msg_iov);
+       if (req->flags & REQ_F_BUFFER_SELECT) {
+               compat_ssize_t clen;
+               iomsg->free_iov = NULL;
+               if (msg->msg_iovlen == 0) {
+                       sr->len = 0;
+               } else if (msg->msg_iovlen > 1) {
+                       return -EINVAL;
+               } else {
+                       if (!access_ok(uiov, sizeof(*uiov)))
+                               return -EFAULT;
+                       if (__get_user(clen, &uiov->iov_len))
+                               return -EFAULT;
+                       if (clen < 0)
+                               return -EINVAL;
+                       sr->len = clen;
+               }
+               return 0;
+       }
+       iomsg->free_iov = iomsg->fast_iov;
+       ret = __import_iovec(ddir, (struct iovec __user *)uiov, msg->msg_iovlen,
+                               UIO_FASTIOV, &iomsg->free_iov,
+                               &iomsg->msg.msg_iter, true);
+       if (unlikely(ret < 0))
+               return ret;
+       return 0;
+ }
+ #endif
+ static int io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg,
+                          struct user_msghdr *msg, int ddir)
+ {
+       struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
+       int ret;
+       if (!user_access_begin(sr->umsg, sizeof(*sr->umsg)))
+               return -EFAULT;
+       ret = -EFAULT;
+       unsafe_get_user(msg->msg_name, &sr->umsg->msg_name, ua_end);
+       unsafe_get_user(msg->msg_namelen, &sr->umsg->msg_namelen, ua_end);
+       unsafe_get_user(msg->msg_iov, &sr->umsg->msg_iov, ua_end);
+       unsafe_get_user(msg->msg_iovlen, &sr->umsg->msg_iovlen, ua_end);
+       unsafe_get_user(msg->msg_control, &sr->umsg->msg_control, ua_end);
+       unsafe_get_user(msg->msg_controllen, &sr->umsg->msg_controllen, ua_end);
+       msg->msg_flags = 0;
+       if (req->flags & REQ_F_BUFFER_SELECT) {
+               if (msg->msg_iovlen == 0) {
+                       sr->len = iomsg->fast_iov[0].iov_len = 0;
+                       iomsg->fast_iov[0].iov_base = NULL;
+                       iomsg->free_iov = NULL;
+               } else if (msg->msg_iovlen > 1) {
+                       ret = -EINVAL;
+                       goto ua_end;
+               } else {
+                       /* we only need the length for provided buffers */
+                       if (!access_ok(&msg->msg_iov[0].iov_len, sizeof(__kernel_size_t)))
+                               goto ua_end;
+                       unsafe_get_user(iomsg->fast_iov[0].iov_len,
+                                       &msg->msg_iov[0].iov_len, ua_end);
+                       sr->len = iomsg->fast_iov[0].iov_len;
+                       iomsg->free_iov = NULL;
+               }
+               ret = 0;
+ ua_end:
+               user_access_end();
+               return ret;
+       }
+       user_access_end();
+       iomsg->free_iov = iomsg->fast_iov;
+       ret = __import_iovec(ddir, msg->msg_iov, msg->msg_iovlen, UIO_FASTIOV,
+                               &iomsg->free_iov, &iomsg->msg.msg_iter, false);
+       if (unlikely(ret < 0))
+               return ret;
+       return 0;
+ }
  static int io_sendmsg_copy_hdr(struct io_kiocb *req,
                               struct io_async_msghdr *iomsg)
  {
        struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
+       struct user_msghdr msg;
        int ret;
  
        iomsg->msg.msg_name = &iomsg->addr;
-       iomsg->free_iov = iomsg->fast_iov;
-       ret = sendmsg_copy_msghdr(&iomsg->msg, sr->umsg, sr->msg_flags,
-                                       &iomsg->free_iov);
+       iomsg->msg.msg_iter.nr_segs = 0;
+ #ifdef CONFIG_COMPAT
+       if (unlikely(req->ctx->compat)) {
+               struct compat_msghdr cmsg;
+               ret = io_compat_msg_copy_hdr(req, iomsg, &cmsg, ITER_SOURCE);
+               if (unlikely(ret))
+                       return ret;
+               return __get_compat_msghdr(&iomsg->msg, &cmsg, NULL);
+       }
+ #endif
+       ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_SOURCE);
+       if (unlikely(ret))
+               return ret;
+       ret = __copy_msghdr(&iomsg->msg, &msg, NULL);
        /* save msg_control as sys_sendmsg() overwrites it */
        sr->msg_control = iomsg->msg.msg_control_user;
        return ret;
@@@ -273,6 -374,8 +374,8 @@@ int io_sendmsg_prep(struct io_kiocb *re
  {
        struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
  
+       sr->done_io = 0;
        if (req->opcode == IORING_OP_SEND) {
                if (READ_ONCE(sqe->__pad3[0]))
                        return -EINVAL;
        if (req->ctx->compat)
                sr->msg_flags |= MSG_CMSG_COMPAT;
  #endif
-       sr->done_io = 0;
        return 0;
  }
  
+ static void io_req_msg_cleanup(struct io_kiocb *req,
+                              struct io_async_msghdr *kmsg,
+                              unsigned int issue_flags)
+ {
+       req->flags &= ~REQ_F_NEED_CLEANUP;
+       /* fast path, check for non-NULL to avoid function call */
+       if (kmsg->free_iov)
+               kfree(kmsg->free_iov);
+       io_netmsg_recycle(req, issue_flags);
+ }
  int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
  {
        struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
                        kmsg->msg.msg_controllen = 0;
                        kmsg->msg.msg_control = NULL;
                        sr->done_io += ret;
-                       req->flags |= REQ_F_PARTIAL_IO;
+                       req->flags |= REQ_F_BL_NO_RECYCLE;
                        return io_setup_async_msg(req, kmsg, issue_flags);
                }
                if (ret == -ERESTARTSYS)
                        ret = -EINTR;
                req_set_fail(req);
        }
-       /* fast path, check for non-NULL to avoid function call */
-       if (kmsg->free_iov)
-               kfree(kmsg->free_iov);
-       req->flags &= ~REQ_F_NEED_CLEANUP;
-       io_netmsg_recycle(req, issue_flags);
+       io_req_msg_cleanup(req, kmsg, issue_flags);
        if (ret >= 0)
                ret += sr->done_io;
        else if (sr->done_io)
@@@ -420,7 -529,7 +529,7 @@@ int io_send(struct io_kiocb *req, unsig
                        sr->len -= ret;
                        sr->buf += ret;
                        sr->done_io += ret;
-                       req->flags |= REQ_F_PARTIAL_IO;
+                       req->flags |= REQ_F_BL_NO_RECYCLE;
                        return io_setup_async_addr(req, &__address, issue_flags);
                }
                if (ret == -ERESTARTSYS)
        return IOU_OK;
  }
  
- static bool io_recvmsg_multishot_overflow(struct io_async_msghdr *iomsg)
+ static int io_recvmsg_mshot_prep(struct io_kiocb *req,
+                                struct io_async_msghdr *iomsg,
+                                int namelen, size_t controllen)
  {
-       int hdr;
-       if (iomsg->namelen < 0)
-               return true;
-       if (check_add_overflow((int)sizeof(struct io_uring_recvmsg_out),
-                              iomsg->namelen, &hdr))
-               return true;
-       if (check_add_overflow(hdr, (int)iomsg->controllen, &hdr))
-               return true;
+       if ((req->flags & (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) ==
+                         (REQ_F_APOLL_MULTISHOT|REQ_F_BUFFER_SELECT)) {
+               int hdr;
+               if (unlikely(namelen < 0))
+                       return -EOVERFLOW;
+               if (check_add_overflow(sizeof(struct io_uring_recvmsg_out),
+                                       namelen, &hdr))
+                       return -EOVERFLOW;
+               if (check_add_overflow(hdr, controllen, &hdr))
+                       return -EOVERFLOW;
+               iomsg->namelen = namelen;
+               iomsg->controllen = controllen;
+               return 0;
+       }
  
-       return false;
+       return 0;
  }
  
- static int __io_recvmsg_copy_hdr(struct io_kiocb *req,
-                                struct io_async_msghdr *iomsg)
+ static int io_recvmsg_copy_hdr(struct io_kiocb *req,
+                              struct io_async_msghdr *iomsg)
  {
-       struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
        struct user_msghdr msg;
        int ret;
  
-       if (copy_from_user(&msg, sr->umsg, sizeof(*sr->umsg)))
-               return -EFAULT;
-       ret = __copy_msghdr(&iomsg->msg, &msg, &iomsg->uaddr);
-       if (ret)
-               return ret;
-       if (req->flags & REQ_F_BUFFER_SELECT) {
-               if (msg.msg_iovlen == 0) {
-                       sr->len = iomsg->fast_iov[0].iov_len = 0;
-                       iomsg->fast_iov[0].iov_base = NULL;
-                       iomsg->free_iov = NULL;
-               } else if (msg.msg_iovlen > 1) {
-                       return -EINVAL;
-               } else {
-                       if (copy_from_user(iomsg->fast_iov, msg.msg_iov, sizeof(*msg.msg_iov)))
-                               return -EFAULT;
-                       sr->len = iomsg->fast_iov[0].iov_len;
-                       iomsg->free_iov = NULL;
-               }
-               if (req->flags & REQ_F_APOLL_MULTISHOT) {
-                       iomsg->namelen = msg.msg_namelen;
-                       iomsg->controllen = msg.msg_controllen;
-                       if (io_recvmsg_multishot_overflow(iomsg))
-                               return -EOVERFLOW;
-               }
-       } else {
-               iomsg->free_iov = iomsg->fast_iov;
-               ret = __import_iovec(ITER_DEST, msg.msg_iov, msg.msg_iovlen, UIO_FASTIOV,
-                                    &iomsg->free_iov, &iomsg->msg.msg_iter,
-                                    false);
-               if (ret > 0)
-                       ret = 0;
-       }
-       return ret;
- }
+       iomsg->msg.msg_name = &iomsg->addr;
+       iomsg->msg.msg_iter.nr_segs = 0;
  
  #ifdef CONFIG_COMPAT
- static int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req,
-                                       struct io_async_msghdr *iomsg)
- {
-       struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
-       struct compat_msghdr msg;
-       struct compat_iovec __user *uiov;
-       int ret;
-       if (copy_from_user(&msg, sr->umsg_compat, sizeof(msg)))
-               return -EFAULT;
+       if (unlikely(req->ctx->compat)) {
+               struct compat_msghdr cmsg;
  
-       ret = __get_compat_msghdr(&iomsg->msg, &msg, &iomsg->uaddr);
-       if (ret)
-               return ret;
-       uiov = compat_ptr(msg.msg_iov);
-       if (req->flags & REQ_F_BUFFER_SELECT) {
-               compat_ssize_t clen;
-               iomsg->free_iov = NULL;
-               if (msg.msg_iovlen == 0) {
-                       sr->len = 0;
-               } else if (msg.msg_iovlen > 1) {
-                       return -EINVAL;
-               } else {
-                       if (!access_ok(uiov, sizeof(*uiov)))
-                               return -EFAULT;
-                       if (__get_user(clen, &uiov->iov_len))
-                               return -EFAULT;
-                       if (clen < 0)
-                               return -EINVAL;
-                       sr->len = clen;
-               }
+               ret = io_compat_msg_copy_hdr(req, iomsg, &cmsg, ITER_DEST);
+               if (unlikely(ret))
+                       return ret;
  
-               if (req->flags & REQ_F_APOLL_MULTISHOT) {
-                       iomsg->namelen = msg.msg_namelen;
-                       iomsg->controllen = msg.msg_controllen;
-                       if (io_recvmsg_multishot_overflow(iomsg))
-                               return -EOVERFLOW;
-               }
-       } else {
-               iomsg->free_iov = iomsg->fast_iov;
-               ret = __import_iovec(ITER_DEST, (struct iovec __user *)uiov, msg.msg_iovlen,
-                                  UIO_FASTIOV, &iomsg->free_iov,
-                                  &iomsg->msg.msg_iter, true);
-               if (ret < 0)
+               ret = __get_compat_msghdr(&iomsg->msg, &cmsg, &iomsg->uaddr);
+               if (unlikely(ret))
                        return ret;
-       }
  
-       return 0;
- }
+               return io_recvmsg_mshot_prep(req, iomsg, cmsg.msg_namelen,
+                                               cmsg.msg_controllen);
+       }
  #endif
  
- static int io_recvmsg_copy_hdr(struct io_kiocb *req,
-                              struct io_async_msghdr *iomsg)
- {
-       iomsg->msg.msg_name = &iomsg->addr;
-       iomsg->msg.msg_iter.nr_segs = 0;
+       ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_DEST);
+       if (unlikely(ret))
+               return ret;
  
- #ifdef CONFIG_COMPAT
-       if (req->ctx->compat)
-               return __io_compat_recvmsg_copy_hdr(req, iomsg);
- #endif
+       ret = __copy_msghdr(&iomsg->msg, &msg, &iomsg->uaddr);
+       if (unlikely(ret))
+               return ret;
  
-       return __io_recvmsg_copy_hdr(req, iomsg);
+       return io_recvmsg_mshot_prep(req, iomsg, msg.msg_namelen,
+                                       msg.msg_controllen);
  }
  
  int io_recvmsg_prep_async(struct io_kiocb *req)
  {
+       struct io_async_msghdr *iomsg;
        int ret;
  
        if (!io_msg_alloc_async_prep(req))
                return -ENOMEM;
-       ret = io_recvmsg_copy_hdr(req, req->async_data);
+       iomsg = req->async_data;
+       ret = io_recvmsg_copy_hdr(req, iomsg);
        if (!ret)
                req->flags |= REQ_F_NEED_CLEANUP;
        return ret;
@@@ -582,6 -626,8 +626,8 @@@ int io_recvmsg_prep(struct io_kiocb *re
  {
        struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
  
+       sr->done_io = 0;
        if (unlikely(sqe->file_index || sqe->addr2))
                return -EINVAL;
  
        if (req->ctx->compat)
                sr->msg_flags |= MSG_CMSG_COMPAT;
  #endif
-       sr->done_io = 0;
        sr->nr_multishot_loops = 0;
        return 0;
  }
@@@ -627,6 -672,7 +672,7 @@@ static inline void io_recv_prep_retry(s
  {
        struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
  
+       req->flags &= ~REQ_F_BL_EMPTY;
        sr->done_io = 0;
        sr->len = 0; /* get from the provided buffer */
        req->buf_index = sr->buf_group;
@@@ -645,30 -691,22 +691,22 @@@ static inline bool io_recv_finish(struc
        unsigned int cflags;
  
        cflags = io_put_kbuf(req, issue_flags);
-       if (msg->msg_inq && msg->msg_inq != -1)
+       if (msg->msg_inq > 0)
                cflags |= IORING_CQE_F_SOCK_NONEMPTY;
  
-       if (!(req->flags & REQ_F_APOLL_MULTISHOT)) {
-               io_req_set_res(req, *ret, cflags);
-               *ret = IOU_OK;
-               return true;
-       }
-       if (mshot_finished)
-               goto finish;
        /*
         * Fill CQE for this receive and see if we should keep trying to
         * receive from this socket.
         */
-       if (io_fill_cqe_req_aux(req, issue_flags & IO_URING_F_COMPLETE_DEFER,
+       if ((req->flags & REQ_F_APOLL_MULTISHOT) && !mshot_finished &&
+           io_fill_cqe_req_aux(req, issue_flags & IO_URING_F_COMPLETE_DEFER,
                                *ret, cflags | IORING_CQE_F_MORE)) {
                struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
                int mshot_retry_ret = IOU_ISSUE_SKIP_COMPLETE;
  
                io_recv_prep_retry(req);
                /* Known not-empty or unknown state, retry */
-               if (cflags & IORING_CQE_F_SOCK_NONEMPTY || msg->msg_inq == -1) {
+               if (cflags & IORING_CQE_F_SOCK_NONEMPTY || msg->msg_inq < 0) {
                        if (sr->nr_multishot_loops++ < MULTISHOT_MAX_RETRY)
                                return false;
                        /* mshot retries exceeded, force a requeue */
                        *ret = -EAGAIN;
                return true;
        }
-       /* Otherwise stop multishot but use the current result. */
- finish:
+       /* Finish the request / stop multishot. */
        io_req_set_res(req, *ret, cflags);
  
        if (issue_flags & IO_URING_F_MULTISHOT)
@@@ -803,8 -841,9 +841,9 @@@ int io_recvmsg(struct io_kiocb *req, un
            (sr->flags & IORING_RECVSEND_POLL_FIRST))
                return io_setup_async_msg(req, kmsg, issue_flags);
  
-       if (!io_check_multishot(req, issue_flags))
-               return io_setup_async_msg(req, kmsg, issue_flags);
+       flags = sr->msg_flags;
+       if (force_nonblock)
+               flags |= MSG_DONTWAIT;
  
  retry_multishot:
        if (io_do_buffer_select(req)) {
                iov_iter_ubuf(&kmsg->msg.msg_iter, ITER_DEST, buf, len);
        }
  
-       flags = sr->msg_flags;
-       if (force_nonblock)
-               flags |= MSG_DONTWAIT;
        kmsg->msg.msg_get_inq = 1;
        kmsg->msg.msg_inq = -1;
        if (req->flags & REQ_F_APOLL_MULTISHOT) {
                }
                if (ret > 0 && io_net_retry(sock, flags)) {
                        sr->done_io += ret;
-                       req->flags |= REQ_F_PARTIAL_IO;
+                       req->flags |= REQ_F_BL_NO_RECYCLE;
                        return io_setup_async_msg(req, kmsg, issue_flags);
                }
                if (ret == -ERESTARTSYS)
        if (!io_recv_finish(req, &ret, &kmsg->msg, mshot_finished, issue_flags))
                goto retry_multishot;
  
-       if (mshot_finished) {
-               /* fast path, check for non-NULL to avoid function call */
-               if (kmsg->free_iov)
-                       kfree(kmsg->free_iov);
-               io_netmsg_recycle(req, issue_flags);
-               req->flags &= ~REQ_F_NEED_CLEANUP;
-       }
+       if (mshot_finished)
+               io_req_msg_cleanup(req, kmsg, issue_flags);
+       else if (ret == -EAGAIN)
+               return io_setup_async_msg(req, kmsg, issue_flags);
  
        return ret;
  }
@@@ -900,9 -932,6 +932,6 @@@ int io_recv(struct io_kiocb *req, unsig
            (sr->flags & IORING_RECVSEND_POLL_FIRST))
                return -EAGAIN;
  
-       if (!io_check_multishot(req, issue_flags))
-               return -EAGAIN;
        sock = sock_from_file(req->file);
        if (unlikely(!sock))
                return -ENOTSOCK;
        msg.msg_iocb = NULL;
        msg.msg_ubuf = NULL;
  
+       flags = sr->msg_flags;
+       if (force_nonblock)
+               flags |= MSG_DONTWAIT;
  retry_multishot:
        if (io_do_buffer_select(req)) {
                void __user *buf;
        msg.msg_inq = -1;
        msg.msg_flags = 0;
  
-       flags = sr->msg_flags;
-       if (force_nonblock)
-               flags |= MSG_DONTWAIT;
        if (flags & MSG_WAITALL)
                min_ret = iov_iter_count(&msg.msg_iter);
  
                        sr->len -= ret;
                        sr->buf += ret;
                        sr->done_io += ret;
-                       req->flags |= REQ_F_PARTIAL_IO;
+                       req->flags |= REQ_F_BL_NO_RECYCLE;
                        return -EAGAIN;
                }
                if (ret == -ERESTARTSYS)
@@@ -1003,6 -1033,8 +1033,8 @@@ int io_send_zc_prep(struct io_kiocb *re
        struct io_ring_ctx *ctx = req->ctx;
        struct io_kiocb *notif;
  
+       zc->done_io = 0;
        if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3)))
                return -EINVAL;
        /* we don't support IOSQE_CQE_SKIP_SUCCESS just yet */
        if (zc->msg_flags & MSG_DONTWAIT)
                req->flags |= REQ_F_NOWAIT;
  
-       zc->done_io = 0;
  #ifdef CONFIG_COMPAT
        if (req->ctx->compat)
                zc->msg_flags |= MSG_CMSG_COMPAT;
@@@ -1196,7 -1226,7 +1226,7 @@@ int io_send_zc(struct io_kiocb *req, un
                        zc->len -= ret;
                        zc->buf += ret;
                        zc->done_io += ret;
-                       req->flags |= REQ_F_PARTIAL_IO;
+                       req->flags |= REQ_F_BL_NO_RECYCLE;
                        return io_setup_async_addr(req, &__address, issue_flags);
                }
                if (ret == -ERESTARTSYS)
@@@ -1266,7 -1296,7 +1296,7 @@@ int io_sendmsg_zc(struct io_kiocb *req
  
                if (ret > 0 && io_net_retry(sock, flags)) {
                        sr->done_io += ret;
-                       req->flags |= REQ_F_PARTIAL_IO;
+                       req->flags |= REQ_F_BL_NO_RECYCLE;
                        return io_setup_async_msg(req, kmsg, issue_flags);
                }
                if (ret == -ERESTARTSYS)
@@@ -1301,7 -1331,7 +1331,7 @@@ void io_sendrecv_fail(struct io_kiocb *
  {
        struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
  
-       if (req->flags & REQ_F_PARTIAL_IO)
+       if (sr->done_io)
                req->cqe.res = sr->done_io;
  
        if ((req->flags & REQ_F_NEED_CLEANUP) &&
@@@ -1351,8 -1381,6 +1381,6 @@@ int io_accept(struct io_kiocb *req, uns
        struct file *file;
        int ret, fd;
  
-       if (!io_check_multishot(req, issue_flags))
-               return -EAGAIN;
  retry:
        if (!fixed) {
                fd = __get_unused_fd_flags(accept->flags, accept->nofile);
                         * has already been done
                         */
                        if (issue_flags & IO_URING_F_MULTISHOT)
 -                              ret = IOU_ISSUE_SKIP_COMPLETE;
 +                              return IOU_ISSUE_SKIP_COMPLETE;
                        return ret;
                }
                if (ret == -ERESTARTSYS)
                                ret, IORING_CQE_F_MORE))
                goto retry;
  
 -      return -ECANCELED;
 +      io_req_set_res(req, ret, 0);
 +      return IOU_STOP_MULTISHOT;
  }
  
  int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
diff --combined net/core/dev.c
index 76e6438f4858e246dfebb78364a253e77f9a86b4,ffa394f3e7968eddb48ba2fbe41ca2e301553be8..a892f72651890d4da0c1b954991130c07eb91ee9
@@@ -336,7 -336,7 +336,7 @@@ int netdev_name_node_alt_create(struct 
                return -ENOMEM;
        netdev_name_node_add(net, name_node);
        /* The node that holds dev->name acts as a head of per-device list. */
 -      list_add_tail(&name_node->list, &dev->name_node->list);
 +      list_add_tail_rcu(&name_node->list, &dev->name_node->list);
  
        return 0;
  }
@@@ -6177,8 -6177,13 +6177,13 @@@ static void __busy_poll_stop(struct nap
        clear_bit(NAPI_STATE_SCHED, &napi->state);
  }
  
- static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock, bool prefer_busy_poll,
-                          u16 budget)
+ enum {
+       NAPI_F_PREFER_BUSY_POLL = 1,
+       NAPI_F_END_ON_RESCHED   = 2,
+ };
+ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock,
+                          unsigned flags, u16 budget)
  {
        bool skip_schedule = false;
        unsigned long timeout;
  
        local_bh_disable();
  
-       if (prefer_busy_poll) {
+       if (flags & NAPI_F_PREFER_BUSY_POLL) {
                napi->defer_hard_irqs_count = READ_ONCE(napi->dev->napi_defer_hard_irqs);
                timeout = READ_ONCE(napi->dev->gro_flush_timeout);
                if (napi->defer_hard_irqs_count && timeout) {
        local_bh_enable();
  }
  
void napi_busy_loop(unsigned int napi_id,
-                   bool (*loop_end)(void *, unsigned long),
-                   void *loop_end_arg, bool prefer_busy_poll, u16 budget)
static void __napi_busy_loop(unsigned int napi_id,
+                     bool (*loop_end)(void *, unsigned long),
+                     void *loop_end_arg, unsigned flags, u16 budget)
  {
        unsigned long start_time = loop_end ? busy_loop_current_time() : 0;
        int (*napi_poll)(struct napi_struct *napi, int budget);
        void *have_poll_lock = NULL;
        struct napi_struct *napi;
  
+       WARN_ON_ONCE(!rcu_read_lock_held());
  restart:
        napi_poll = NULL;
  
-       rcu_read_lock();
        napi = napi_by_id(napi_id);
        if (!napi)
-               goto out;
+               return;
  
        if (!IS_ENABLED(CONFIG_PREEMPT_RT))
                preempt_disable();
                         */
                        if (val & (NAPIF_STATE_DISABLE | NAPIF_STATE_SCHED |
                                   NAPIF_STATE_IN_BUSY_POLL)) {
-                               if (prefer_busy_poll)
+                               if (flags & NAPI_F_PREFER_BUSY_POLL)
                                        set_bit(NAPI_STATE_PREFER_BUSY_POLL, &napi->state);
                                goto count;
                        }
                        if (cmpxchg(&napi->state, val,
                                    val | NAPIF_STATE_IN_BUSY_POLL |
                                          NAPIF_STATE_SCHED) != val) {
-                               if (prefer_busy_poll)
+                               if (flags & NAPI_F_PREFER_BUSY_POLL)
                                        set_bit(NAPI_STATE_PREFER_BUSY_POLL, &napi->state);
                                goto count;
                        }
@@@ -6281,12 -6286,15 +6286,15 @@@ count
                        break;
  
                if (unlikely(need_resched())) {
+                       if (flags & NAPI_F_END_ON_RESCHED)
+                               break;
                        if (napi_poll)
-                               busy_poll_stop(napi, have_poll_lock, prefer_busy_poll, budget);
+                               busy_poll_stop(napi, have_poll_lock, flags, budget);
                        if (!IS_ENABLED(CONFIG_PREEMPT_RT))
                                preempt_enable();
                        rcu_read_unlock();
                        cond_resched();
+                       rcu_read_lock();
                        if (loop_end(loop_end_arg, start_time))
                                return;
                        goto restart;
                cpu_relax();
        }
        if (napi_poll)
-               busy_poll_stop(napi, have_poll_lock, prefer_busy_poll, budget);
+               busy_poll_stop(napi, have_poll_lock, flags, budget);
        if (!IS_ENABLED(CONFIG_PREEMPT_RT))
                preempt_enable();
- out:
+ }
+ void napi_busy_loop_rcu(unsigned int napi_id,
+                       bool (*loop_end)(void *, unsigned long),
+                       void *loop_end_arg, bool prefer_busy_poll, u16 budget)
+ {
+       unsigned flags = NAPI_F_END_ON_RESCHED;
+       if (prefer_busy_poll)
+               flags |= NAPI_F_PREFER_BUSY_POLL;
+       __napi_busy_loop(napi_id, loop_end, loop_end_arg, flags, budget);
+ }
+ void napi_busy_loop(unsigned int napi_id,
+                   bool (*loop_end)(void *, unsigned long),
+                   void *loop_end_arg, bool prefer_busy_poll, u16 budget)
+ {
+       unsigned flags = prefer_busy_poll ? NAPI_F_PREFER_BUSY_POLL : 0;
+       rcu_read_lock();
+       __napi_busy_loop(napi_id, loop_end, loop_end_arg, flags, budget);
        rcu_read_unlock();
  }
  EXPORT_SYMBOL(napi_busy_loop);
@@@ -9074,6 -9103,28 +9103,6 @@@ bool netdev_port_same_parent_id(struct 
  }
  EXPORT_SYMBOL(netdev_port_same_parent_id);
  
 -static void netdev_dpll_pin_assign(struct net_device *dev, struct dpll_pin *dpll_pin)
 -{
 -#if IS_ENABLED(CONFIG_DPLL)
 -      rtnl_lock();
 -      dev->dpll_pin = dpll_pin;
 -      rtnl_unlock();
 -#endif
 -}
 -
 -void netdev_dpll_pin_set(struct net_device *dev, struct dpll_pin *dpll_pin)
 -{
 -      WARN_ON(!dpll_pin);
 -      netdev_dpll_pin_assign(dev, dpll_pin);
 -}
 -EXPORT_SYMBOL(netdev_dpll_pin_set);
 -
 -void netdev_dpll_pin_clear(struct net_device *dev)
 -{
 -      netdev_dpll_pin_assign(dev, NULL);
 -}
 -EXPORT_SYMBOL(netdev_dpll_pin_clear);
 -
  /**
   *    dev_change_proto_down - set carrier according to proto_down.
   *
@@@ -11630,12 -11681,11 +11659,12 @@@ static void __init net_dev_struct_check
        CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_tx, 160);
  
        /* TXRX read-mostly hotpath */
 +      CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, lstats);
        CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, flags);
        CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, hard_header_len);
        CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, features);
        CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, ip6_ptr);
 -      CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_txrx, 30);
 +      CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_txrx, 38);
  
        /* RX read-mostly hotpath */
        CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, ptype_specific);
This page took 0.122613 seconds and 4 git commands to generate.