]> Git Repo - linux.git/commitdiff
io_uring: add memory region registration
authorPavel Begunkov <[email protected]>
Fri, 15 Nov 2024 16:54:42 +0000 (16:54 +0000)
committerJens Axboe <[email protected]>
Fri, 15 Nov 2024 16:58:34 +0000 (09:58 -0700)
Regions will serve multiple purposes. First, with it we can decouple
ring/etc. object creation from registration / mapping of the memory they
will be placed in. We already have hacks that allow to put both SQ and
CQ into the same huge page, in the future we should be able to:

region = create_region(io_ring);
create_pbuf_ring(io_uring, region, offset=0);
create_pbuf_ring(io_uring, region, offset=N);

The second use case is efficiently passing parameters. The following
patch enables back on top of regions IORING_ENTER_EXT_ARG_REG, which
optimises wait arguments. It'll also be useful for request arguments
replacing iovecs, msghdr, etc. pointers. Eventually it would also be
handy for BPF as well if it comes to fruition.

Signed-off-by: Pavel Begunkov <[email protected]>
Link: https://lore.kernel.org/r/0798cf3a14fad19cfc96fc9feca5f3e11481691d.1731689588.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <[email protected]>
include/linux/io_uring_types.h
include/uapi/linux/io_uring.h
io_uring/io_uring.c
io_uring/register.c

index 1d3a37234acecfb9821d7b8724d0af0e1e3fb9f7..e1d69123e164b46e3644b8b38842102c9ff78ace 100644 (file)
@@ -429,6 +429,9 @@ struct io_ring_ctx {
        unsigned short                  n_sqe_pages;
        struct page                     **ring_pages;
        struct page                     **sqe_pages;
+
+       /* used for optimised request parameter and wait argument passing  */
+       struct io_mapped_region         param_region;
 };
 
 struct io_tw_state {
index 5cbfd330c688b515b4a4de0facd571c4057ddbbf..1ee35890125b0f9ae66ad1b640b3c529de6e6fd7 100644 (file)
@@ -627,6 +627,8 @@ enum io_uring_register_op {
        /* resize CQ ring */
        IORING_REGISTER_RESIZE_RINGS            = 33,
 
+       IORING_REGISTER_MEM_REGION              = 34,
+
        /* this goes last */
        IORING_REGISTER_LAST,
 
@@ -661,6 +663,12 @@ struct io_uring_region_desc {
        __u64 __resv[4];
 };
 
+struct io_uring_mem_region_reg {
+       __u64 region_uptr; /* struct io_uring_region_desc * */
+       __u64 flags;
+       __u64 __resv[2];
+};
+
 /*
  * Register a fully sparse file space, rather than pass in an array of all
  * -1 file descriptors.
index 286b7bb73978fe5ba86ab853a37b9be018f4416b..c640b8a4ceeed90b047c76dcf7227b415ae6dd1c 100644 (file)
@@ -2709,6 +2709,7 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
        io_alloc_cache_free(&ctx->msg_cache, io_msg_cache_free);
        io_futex_cache_free(ctx);
        io_destroy_buffers(ctx);
+       io_free_region(ctx, &ctx->param_region);
        mutex_unlock(&ctx->uring_lock);
        if (ctx->sq_creds)
                put_cred(ctx->sq_creds);
index 3c5a3cfb186bb7919f9314a6828c769bd63d79a2..2cbac3d9b2881c520d26d2d87a1edb44763ad444 100644 (file)
@@ -570,6 +570,37 @@ out:
        return ret;
 }
 
+static int io_register_mem_region(struct io_ring_ctx *ctx, void __user *uarg)
+{
+       struct io_uring_mem_region_reg __user *reg_uptr = uarg;
+       struct io_uring_mem_region_reg reg;
+       struct io_uring_region_desc __user *rd_uptr;
+       struct io_uring_region_desc rd;
+       int ret;
+
+       if (io_region_is_set(&ctx->param_region))
+               return -EBUSY;
+       if (copy_from_user(&reg, reg_uptr, sizeof(reg)))
+               return -EFAULT;
+       rd_uptr = u64_to_user_ptr(reg.region_uptr);
+       if (copy_from_user(&rd, rd_uptr, sizeof(rd)))
+               return -EFAULT;
+
+       if (memchr_inv(&reg.__resv, 0, sizeof(reg.__resv)))
+               return -EINVAL;
+       if (reg.flags)
+               return -EINVAL;
+
+       ret = io_create_region(ctx, &ctx->param_region, &rd);
+       if (ret)
+               return ret;
+       if (copy_to_user(rd_uptr, &rd, sizeof(rd))) {
+               io_free_region(ctx, &ctx->param_region);
+               return -EFAULT;
+       }
+       return 0;
+}
+
 static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
                               void __user *arg, unsigned nr_args)
        __releases(ctx->uring_lock)
@@ -764,6 +795,12 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
                        break;
                ret = io_register_resize_rings(ctx, arg);
                break;
+       case IORING_REGISTER_MEM_REGION:
+               ret = -EINVAL;
+               if (!arg || nr_args != 1)
+                       break;
+               ret = io_register_mem_region(ctx, arg);
+               break;
        default:
                ret = -EINVAL;
                break;
This page took 0.074101 seconds and 4 git commands to generate.