]> Git Repo - linux.git/blame - io_uring/register.c
Linux 6.14-rc3
[linux.git] / io_uring / register.c
CommitLineData
c4320315
JA
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Code related to the io_uring_register() syscall
4 *
5 * Copyright (C) 2023 Jens Axboe
6 */
7#include <linux/kernel.h>
8#include <linux/errno.h>
9#include <linux/syscalls.h>
10#include <linux/refcount.h>
11#include <linux/bits.h>
12#include <linux/fs.h>
13#include <linux/file.h>
14#include <linux/slab.h>
15#include <linux/uaccess.h>
16#include <linux/nospec.h>
baf59771 17#include <linux/compat.h>
c4320315
JA
18#include <linux/io_uring.h>
19#include <linux/io_uring_types.h>
20
21#include "io_uring.h"
22#include "opdef.h"
23#include "tctx.h"
24#include "rsrc.h"
25#include "sqpoll.h"
26#include "register.h"
27#include "cancel.h"
28#include "kbuf.h"
ef1186c1 29#include "napi.h"
200f3abd 30#include "eventfd.h"
a3771321 31#include "msg_ring.h"
79cfe9e5 32#include "memmap.h"
c4320315
JA
33
34#define IORING_MAX_RESTRICTIONS (IORING_RESTRICTION_LAST + \
35 IORING_REGISTER_LAST + IORING_OP_LAST)
36
c4320315
JA
37static __cold int io_probe(struct io_ring_ctx *ctx, void __user *arg,
38 unsigned nr_args)
39{
40 struct io_uring_probe *p;
41 size_t size;
42 int i, ret;
43
6bc9199d
GKB
44 if (nr_args > IORING_OP_LAST)
45 nr_args = IORING_OP_LAST;
46
c4320315 47 size = struct_size(p, ops, nr_args);
c4320315
JA
48 p = kzalloc(size, GFP_KERNEL);
49 if (!p)
50 return -ENOMEM;
51
52 ret = -EFAULT;
53 if (copy_from_user(p, arg, size))
54 goto out;
55 ret = -EINVAL;
56 if (memchr_inv(p, 0, size))
57 goto out;
58
59 p->last_op = IORING_OP_LAST - 1;
c4320315
JA
60
61 for (i = 0; i < nr_args; i++) {
62 p->ops[i].op = i;
3e05b222 63 if (io_uring_op_supported(i))
c4320315
JA
64 p->ops[i].flags = IO_URING_OP_SUPPORTED;
65 }
66 p->ops_len = i;
67
68 ret = 0;
69 if (copy_to_user(arg, p, size))
70 ret = -EFAULT;
71out:
72 kfree(p);
73 return ret;
74}
75
76int io_unregister_personality(struct io_ring_ctx *ctx, unsigned id)
77{
78 const struct cred *creds;
79
80 creds = xa_erase(&ctx->personalities, id);
81 if (creds) {
82 put_cred(creds);
83 return 0;
84 }
85
86 return -EINVAL;
87}
88
89
90static int io_register_personality(struct io_ring_ctx *ctx)
91{
92 const struct cred *creds;
93 u32 id;
94 int ret;
95
96 creds = get_current_cred();
97
98 ret = xa_alloc_cyclic(&ctx->personalities, &id, (void *)creds,
99 XA_LIMIT(0, USHRT_MAX), &ctx->pers_next, GFP_KERNEL);
100 if (ret < 0) {
101 put_cred(creds);
102 return ret;
103 }
104 return id;
105}
106
53745105
JT
107static __cold int io_parse_restrictions(void __user *arg, unsigned int nr_args,
108 struct io_restriction *restrictions)
c4320315
JA
109{
110 struct io_uring_restriction *res;
111 size_t size;
112 int i, ret;
113
c4320315
JA
114 if (!arg || nr_args > IORING_MAX_RESTRICTIONS)
115 return -EINVAL;
116
117 size = array_size(nr_args, sizeof(*res));
118 if (size == SIZE_MAX)
119 return -EOVERFLOW;
120
121 res = memdup_user(arg, size);
122 if (IS_ERR(res))
123 return PTR_ERR(res);
124
53745105 125 ret = -EINVAL;
c4320315
JA
126
127 for (i = 0; i < nr_args; i++) {
128 switch (res[i].opcode) {
129 case IORING_RESTRICTION_REGISTER_OP:
53745105
JT
130 if (res[i].register_op >= IORING_REGISTER_LAST)
131 goto err;
132 __set_bit(res[i].register_op, restrictions->register_op);
c4320315
JA
133 break;
134 case IORING_RESTRICTION_SQE_OP:
53745105
JT
135 if (res[i].sqe_op >= IORING_OP_LAST)
136 goto err;
137 __set_bit(res[i].sqe_op, restrictions->sqe_op);
c4320315
JA
138 break;
139 case IORING_RESTRICTION_SQE_FLAGS_ALLOWED:
53745105 140 restrictions->sqe_flags_allowed = res[i].sqe_flags;
c4320315
JA
141 break;
142 case IORING_RESTRICTION_SQE_FLAGS_REQUIRED:
53745105 143 restrictions->sqe_flags_required = res[i].sqe_flags;
c4320315
JA
144 break;
145 default:
53745105 146 goto err;
c4320315
JA
147 }
148 }
149
53745105
JT
150 ret = 0;
151
152err:
153 kfree(res);
154 return ret;
155}
156
157static __cold int io_register_restrictions(struct io_ring_ctx *ctx,
158 void __user *arg, unsigned int nr_args)
159{
160 int ret;
161
162 /* Restrictions allowed only if rings started disabled */
163 if (!(ctx->flags & IORING_SETUP_R_DISABLED))
164 return -EBADFD;
165
166 /* We allow only a single restrictions registration */
167 if (ctx->restrictions.registered)
168 return -EBUSY;
169
170 ret = io_parse_restrictions(arg, nr_args, &ctx->restrictions);
c4320315
JA
171 /* Reset all restrictions if an error happened */
172 if (ret != 0)
173 memset(&ctx->restrictions, 0, sizeof(ctx->restrictions));
174 else
175 ctx->restrictions.registered = true;
c4320315
JA
176 return ret;
177}
178
179static int io_register_enable_rings(struct io_ring_ctx *ctx)
180{
181 if (!(ctx->flags & IORING_SETUP_R_DISABLED))
182 return -EBADFD;
183
184 if (ctx->flags & IORING_SETUP_SINGLE_ISSUER && !ctx->submitter_task) {
185 WRITE_ONCE(ctx->submitter_task, get_task_struct(current));
186 /*
187 * Lazy activation attempts would fail if it was polled before
188 * submitter_task is set.
189 */
190 if (wq_has_sleeper(&ctx->poll_wq))
191 io_activate_pollwq(ctx);
192 }
193
194 if (ctx->restrictions.registered)
195 ctx->restricted = 1;
196
197 ctx->flags &= ~IORING_SETUP_R_DISABLED;
198 if (ctx->sq_data && wq_has_sleeper(&ctx->sq_data->wait))
199 wake_up(&ctx->sq_data->wait);
200 return 0;
201}
202
203static __cold int __io_register_iowq_aff(struct io_ring_ctx *ctx,
204 cpumask_var_t new_mask)
205{
206 int ret;
207
208 if (!(ctx->flags & IORING_SETUP_SQPOLL)) {
209 ret = io_wq_cpu_affinity(current->io_uring, new_mask);
210 } else {
211 mutex_unlock(&ctx->uring_lock);
212 ret = io_sqpoll_wq_cpu_affinity(ctx, new_mask);
213 mutex_lock(&ctx->uring_lock);
214 }
215
216 return ret;
217}
218
219static __cold int io_register_iowq_aff(struct io_ring_ctx *ctx,
220 void __user *arg, unsigned len)
221{
222 cpumask_var_t new_mask;
223 int ret;
224
225 if (!alloc_cpumask_var(&new_mask, GFP_KERNEL))
226 return -ENOMEM;
227
228 cpumask_clear(new_mask);
229 if (len > cpumask_size())
230 len = cpumask_size();
231
baf59771
JA
232#ifdef CONFIG_COMPAT
233 if (in_compat_syscall())
c4320315
JA
234 ret = compat_get_bitmap(cpumask_bits(new_mask),
235 (const compat_ulong_t __user *)arg,
236 len * 8 /* CHAR_BIT */);
baf59771
JA
237 else
238#endif
c4320315 239 ret = copy_from_user(new_mask, arg, len);
c4320315
JA
240
241 if (ret) {
242 free_cpumask_var(new_mask);
243 return -EFAULT;
244 }
245
246 ret = __io_register_iowq_aff(ctx, new_mask);
247 free_cpumask_var(new_mask);
248 return ret;
249}
250
251static __cold int io_unregister_iowq_aff(struct io_ring_ctx *ctx)
252{
253 return __io_register_iowq_aff(ctx, NULL);
254}
255
256static __cold int io_register_iowq_max_workers(struct io_ring_ctx *ctx,
257 void __user *arg)
258 __must_hold(&ctx->uring_lock)
259{
260 struct io_tctx_node *node;
261 struct io_uring_task *tctx = NULL;
262 struct io_sq_data *sqd = NULL;
263 __u32 new_count[2];
264 int i, ret;
265
266 if (copy_from_user(new_count, arg, sizeof(new_count)))
267 return -EFAULT;
268 for (i = 0; i < ARRAY_SIZE(new_count); i++)
269 if (new_count[i] > INT_MAX)
270 return -EINVAL;
271
272 if (ctx->flags & IORING_SETUP_SQPOLL) {
273 sqd = ctx->sq_data;
274 if (sqd) {
275 /*
276 * Observe the correct sqd->lock -> ctx->uring_lock
277 * ordering. Fine to drop uring_lock here, we hold
278 * a ref to the ctx.
279 */
280 refcount_inc(&sqd->refs);
281 mutex_unlock(&ctx->uring_lock);
282 mutex_lock(&sqd->lock);
283 mutex_lock(&ctx->uring_lock);
284 if (sqd->thread)
285 tctx = sqd->thread->io_uring;
286 }
287 } else {
288 tctx = current->io_uring;
289 }
290
291 BUILD_BUG_ON(sizeof(new_count) != sizeof(ctx->iowq_limits));
292
293 for (i = 0; i < ARRAY_SIZE(new_count); i++)
294 if (new_count[i])
295 ctx->iowq_limits[i] = new_count[i];
296 ctx->iowq_limits_set = true;
297
298 if (tctx && tctx->io_wq) {
299 ret = io_wq_max_workers(tctx->io_wq, new_count);
300 if (ret)
301 goto err;
302 } else {
303 memset(new_count, 0, sizeof(new_count));
304 }
305
306 if (sqd) {
73254a29 307 mutex_unlock(&ctx->uring_lock);
c4320315
JA
308 mutex_unlock(&sqd->lock);
309 io_put_sq_data(sqd);
73254a29 310 mutex_lock(&ctx->uring_lock);
c4320315
JA
311 }
312
313 if (copy_to_user(arg, new_count, sizeof(new_count)))
314 return -EFAULT;
315
316 /* that's it for SQPOLL, only the SQPOLL task creates requests */
317 if (sqd)
318 return 0;
319
320 /* now propagate the restriction to all registered users */
321 list_for_each_entry(node, &ctx->tctx_list, ctx_node) {
1da2f311 322 tctx = node->task->io_uring;
c4320315
JA
323 if (WARN_ON_ONCE(!tctx->io_wq))
324 continue;
325
326 for (i = 0; i < ARRAY_SIZE(new_count); i++)
327 new_count[i] = ctx->iowq_limits[i];
328 /* ignore errors, it always returns zero anyway */
329 (void)io_wq_max_workers(tctx->io_wq, new_count);
330 }
331 return 0;
332err:
333 if (sqd) {
73254a29 334 mutex_unlock(&ctx->uring_lock);
c4320315
JA
335 mutex_unlock(&sqd->lock);
336 io_put_sq_data(sqd);
73254a29 337 mutex_lock(&ctx->uring_lock);
c4320315
JA
338 }
339 return ret;
340}
341
2b8e976b
PB
342static int io_register_clock(struct io_ring_ctx *ctx,
343 struct io_uring_clock_register __user *arg)
344{
345 struct io_uring_clock_register reg;
346
347 if (copy_from_user(&reg, arg, sizeof(reg)))
348 return -EFAULT;
349 if (memchr_inv(&reg.__resv, 0, sizeof(reg.__resv)))
350 return -EINVAL;
351
352 switch (reg.clockid) {
353 case CLOCK_MONOTONIC:
354 ctx->clock_offset = 0;
355 break;
356 case CLOCK_BOOTTIME:
357 ctx->clock_offset = TK_OFFS_BOOT;
358 break;
359 default:
360 return -EINVAL;
361 }
362
363 ctx->clockid = reg.clockid;
364 return 0;
365}
366
79cfe9e5
JA
367/*
368 * State to maintain until we can swap. Both new and old state, used for
369 * either mapping or freeing.
370 */
371struct io_ring_ctx_rings {
79cfe9e5 372 struct io_rings *rings;
8078486e 373 struct io_uring_sqe *sq_sqes;
81a4058e 374
8078486e 375 struct io_mapped_region sq_region;
81a4058e 376 struct io_mapped_region ring_region;
79cfe9e5
JA
377};
378
02255d55
PB
379static void io_register_free_rings(struct io_ring_ctx *ctx,
380 struct io_uring_params *p,
79cfe9e5
JA
381 struct io_ring_ctx_rings *r)
382{
8078486e 383 io_free_region(ctx, &r->sq_region);
81a4058e 384 io_free_region(ctx, &r->ring_region);
79cfe9e5
JA
385}
386
387#define swap_old(ctx, o, n, field) \
388 do { \
389 (o).field = (ctx)->field; \
390 (ctx)->field = (n).field; \
391 } while (0)
392
393#define RESIZE_FLAGS (IORING_SETUP_CQSIZE | IORING_SETUP_CLAMP)
394#define COPY_FLAGS (IORING_SETUP_NO_SQARRAY | IORING_SETUP_SQE128 | \
395 IORING_SETUP_CQE32 | IORING_SETUP_NO_MMAP)
396
397static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg)
398{
8078486e 399 struct io_uring_region_desc rd;
79cfe9e5
JA
400 struct io_ring_ctx_rings o = { }, n = { }, *to_free = NULL;
401 size_t size, sq_array_offset;
6f7a644e 402 unsigned i, tail, old_head;
79cfe9e5 403 struct io_uring_params p;
79cfe9e5
JA
404 int ret;
405
406 /* for single issuer, must be owner resizing */
407 if (ctx->flags & IORING_SETUP_SINGLE_ISSUER &&
408 current != ctx->submitter_task)
409 return -EEXIST;
c261e4f1
JA
410 /* limited to DEFER_TASKRUN for now */
411 if (!(ctx->flags & IORING_SETUP_DEFER_TASKRUN))
412 return -EINVAL;
79cfe9e5
JA
413 if (copy_from_user(&p, arg, sizeof(p)))
414 return -EFAULT;
415 if (p.flags & ~RESIZE_FLAGS)
416 return -EINVAL;
417
418 /* properties that are always inherited */
419 p.flags |= (ctx->flags & COPY_FLAGS);
420
421 ret = io_uring_fill_params(p.sq_entries, &p);
422 if (unlikely(ret))
423 return ret;
424
425 /* nothing to do, but copy params back */
426 if (p.sq_entries == ctx->sq_entries && p.cq_entries == ctx->cq_entries) {
427 if (copy_to_user(arg, &p, sizeof(p)))
428 return -EFAULT;
429 return 0;
430 }
431
432 size = rings_size(p.flags, p.sq_entries, p.cq_entries,
433 &sq_array_offset);
434 if (size == SIZE_MAX)
435 return -EOVERFLOW;
436
81a4058e
PB
437 memset(&rd, 0, sizeof(rd));
438 rd.size = PAGE_ALIGN(size);
439 if (p.flags & IORING_SETUP_NO_MMAP) {
440 rd.user_addr = p.cq_off.user_addr;
441 rd.flags |= IORING_MEM_REGION_TYPE_USER;
442 }
443 ret = io_create_region_mmap_safe(ctx, &n.ring_region, &rd, IORING_OFF_CQ_RING);
444 if (ret) {
445 io_register_free_rings(ctx, &p, &n);
446 return ret;
447 }
448 n.rings = io_region_get_ptr(&n.ring_region);
79cfe9e5 449
2c5aae12
JA
450 /*
451 * At this point n.rings is shared with userspace, just like o.rings
452 * is as well. While we don't expect userspace to modify it while
453 * a resize is in progress, and it's most likely that userspace will
454 * shoot itself in the foot if it does, we can't always assume good
455 * intent... Use read/write once helpers from here on to indicate the
456 * shared nature of it.
457 */
458 WRITE_ONCE(n.rings->sq_ring_mask, p.sq_entries - 1);
459 WRITE_ONCE(n.rings->cq_ring_mask, p.cq_entries - 1);
460 WRITE_ONCE(n.rings->sq_ring_entries, p.sq_entries);
461 WRITE_ONCE(n.rings->cq_ring_entries, p.cq_entries);
79cfe9e5
JA
462
463 if (copy_to_user(arg, &p, sizeof(p))) {
02255d55 464 io_register_free_rings(ctx, &p, &n);
79cfe9e5
JA
465 return -EFAULT;
466 }
467
468 if (p.flags & IORING_SETUP_SQE128)
469 size = array_size(2 * sizeof(struct io_uring_sqe), p.sq_entries);
470 else
471 size = array_size(sizeof(struct io_uring_sqe), p.sq_entries);
472 if (size == SIZE_MAX) {
02255d55 473 io_register_free_rings(ctx, &p, &n);
79cfe9e5
JA
474 return -EOVERFLOW;
475 }
476
8078486e
PB
477 memset(&rd, 0, sizeof(rd));
478 rd.size = PAGE_ALIGN(size);
479 if (p.flags & IORING_SETUP_NO_MMAP) {
480 rd.user_addr = p.sq_off.user_addr;
481 rd.flags |= IORING_MEM_REGION_TYPE_USER;
482 }
483 ret = io_create_region_mmap_safe(ctx, &n.sq_region, &rd, IORING_OFF_SQES);
484 if (ret) {
02255d55 485 io_register_free_rings(ctx, &p, &n);
8078486e 486 return ret;
79cfe9e5 487 }
8078486e 488 n.sq_sqes = io_region_get_ptr(&n.sq_region);
79cfe9e5
JA
489
490 /*
491 * If using SQPOLL, park the thread
492 */
493 if (ctx->sq_data) {
494 mutex_unlock(&ctx->uring_lock);
495 io_sq_thread_park(ctx->sq_data);
496 mutex_lock(&ctx->uring_lock);
497 }
498
499 /*
943d0609 500 * We'll do the swap. Grab the ctx->mmap_lock, which will exclude
79cfe9e5
JA
501 * any new mmap's on the ring fd. Clear out existing mappings to prevent
502 * mmap from seeing them, as we'll unmap them. Any attempt to mmap
503 * existing rings beyond this point will fail. Not that it could proceed
504 * at this point anyway, as the io_uring mmap side needs go grab the
943d0609 505 * ctx->mmap_lock as well. Likewise, hold the completion lock over the
79cfe9e5
JA
506 * duration of the actual swap.
507 */
943d0609 508 mutex_lock(&ctx->mmap_lock);
79cfe9e5
JA
509 spin_lock(&ctx->completion_lock);
510 o.rings = ctx->rings;
511 ctx->rings = NULL;
512 o.sq_sqes = ctx->sq_sqes;
513 ctx->sq_sqes = NULL;
514
515 /*
516 * Now copy SQ and CQ entries, if any. If either of the destination
517 * rings can't hold what is already there, then fail the operation.
518 */
2c5aae12 519 tail = READ_ONCE(o.rings->sq.tail);
6f7a644e
JA
520 old_head = READ_ONCE(o.rings->sq.head);
521 if (tail - old_head > p.sq_entries)
79cfe9e5 522 goto overflow;
6f7a644e 523 for (i = old_head; i < tail; i++) {
79cfe9e5 524 unsigned src_head = i & (ctx->sq_entries - 1);
8911798d 525 unsigned dst_head = i & (p.sq_entries - 1);
79cfe9e5
JA
526
527 n.sq_sqes[dst_head] = o.sq_sqes[src_head];
528 }
a312e170
LT
529 WRITE_ONCE(n.rings->sq.head, old_head);
530 WRITE_ONCE(n.rings->sq.tail, tail);
79cfe9e5 531
2c5aae12 532 tail = READ_ONCE(o.rings->cq.tail);
6f7a644e
JA
533 old_head = READ_ONCE(o.rings->cq.head);
534 if (tail - old_head > p.cq_entries) {
79cfe9e5
JA
535overflow:
536 /* restore old rings, and return -EOVERFLOW via cleanup path */
537 ctx->rings = o.rings;
538 ctx->sq_sqes = o.sq_sqes;
539 to_free = &n;
540 ret = -EOVERFLOW;
541 goto out;
542 }
6f7a644e 543 for (i = old_head; i < tail; i++) {
79cfe9e5 544 unsigned src_head = i & (ctx->cq_entries - 1);
8911798d 545 unsigned dst_head = i & (p.cq_entries - 1);
79cfe9e5
JA
546
547 n.rings->cqes[dst_head] = o.rings->cqes[src_head];
548 }
a312e170
LT
549 WRITE_ONCE(n.rings->cq.head, old_head);
550 WRITE_ONCE(n.rings->cq.tail, tail);
79cfe9e5
JA
551 /* invalidate cached cqe refill */
552 ctx->cqe_cached = ctx->cqe_sentinel = NULL;
553
2c5aae12 554 WRITE_ONCE(n.rings->sq_dropped, READ_ONCE(o.rings->sq_dropped));
a23ad06b 555 atomic_set(&n.rings->sq_flags, atomic_read(&o.rings->sq_flags));
2c5aae12
JA
556 WRITE_ONCE(n.rings->cq_flags, READ_ONCE(o.rings->cq_flags));
557 WRITE_ONCE(n.rings->cq_overflow, READ_ONCE(o.rings->cq_overflow));
79cfe9e5
JA
558
559 /* all done, store old pointers and assign new ones */
560 if (!(ctx->flags & IORING_SETUP_NO_SQARRAY))
561 ctx->sq_array = (u32 *)((char *)n.rings + sq_array_offset);
562
563 ctx->sq_entries = p.sq_entries;
564 ctx->cq_entries = p.cq_entries;
565
566 ctx->rings = n.rings;
567 ctx->sq_sqes = n.sq_sqes;
81a4058e 568 swap_old(ctx, o, n, ring_region);
8078486e 569 swap_old(ctx, o, n, sq_region);
79cfe9e5
JA
570 to_free = &o;
571 ret = 0;
572out:
573 spin_unlock(&ctx->completion_lock);
943d0609 574 mutex_unlock(&ctx->mmap_lock);
02255d55 575 io_register_free_rings(ctx, &p, to_free);
79cfe9e5
JA
576
577 if (ctx->sq_data)
578 io_sq_thread_unpark(ctx->sq_data);
579
580 return ret;
581}
582
93238e66
PB
583static int io_register_mem_region(struct io_ring_ctx *ctx, void __user *uarg)
584{
585 struct io_uring_mem_region_reg __user *reg_uptr = uarg;
586 struct io_uring_mem_region_reg reg;
587 struct io_uring_region_desc __user *rd_uptr;
588 struct io_uring_region_desc rd;
589 int ret;
590
591 if (io_region_is_set(&ctx->param_region))
592 return -EBUSY;
593 if (copy_from_user(&reg, reg_uptr, sizeof(reg)))
594 return -EFAULT;
595 rd_uptr = u64_to_user_ptr(reg.region_uptr);
596 if (copy_from_user(&rd, rd_uptr, sizeof(rd)))
597 return -EFAULT;
93238e66
PB
598 if (memchr_inv(&reg.__resv, 0, sizeof(reg.__resv)))
599 return -EINVAL;
d617b314
PB
600 if (reg.flags & ~IORING_MEM_REGION_REG_WAIT_ARG)
601 return -EINVAL;
602
603 /*
604 * This ensures there are no waiters. Waiters are unlocked and it's
605 * hard to synchronise with them, especially if we need to initialise
606 * the region.
607 */
608 if ((reg.flags & IORING_MEM_REGION_REG_WAIT_ARG) &&
609 !(ctx->flags & IORING_SETUP_R_DISABLED))
93238e66
PB
610 return -EINVAL;
611
087f9978
PB
612 ret = io_create_region_mmap_safe(ctx, &ctx->param_region, &rd,
613 IORING_MAP_OFF_PARAM_REGION);
93238e66
PB
614 if (ret)
615 return ret;
616 if (copy_to_user(rd_uptr, &rd, sizeof(rd))) {
617 io_free_region(ctx, &ctx->param_region);
618 return -EFAULT;
619 }
d617b314
PB
620
621 if (reg.flags & IORING_MEM_REGION_REG_WAIT_ARG) {
622 ctx->cq_wait_arg = io_region_get_ptr(&ctx->param_region);
623 ctx->cq_wait_size = rd.size;
624 }
93238e66
PB
625 return 0;
626}
627
c4320315
JA
628static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
629 void __user *arg, unsigned nr_args)
630 __releases(ctx->uring_lock)
631 __acquires(ctx->uring_lock)
632{
633 int ret;
634
635 /*
636 * We don't quiesce the refs for register anymore and so it can't be
637 * dying as we're holding a file ref here.
638 */
639 if (WARN_ON_ONCE(percpu_ref_is_dying(&ctx->refs)))
640 return -ENXIO;
641
642 if (ctx->submitter_task && ctx->submitter_task != current)
643 return -EEXIST;
644
645 if (ctx->restricted) {
646 opcode = array_index_nospec(opcode, IORING_REGISTER_LAST);
647 if (!test_bit(opcode, ctx->restrictions.register_op))
648 return -EACCES;
649 }
650
651 switch (opcode) {
652 case IORING_REGISTER_BUFFERS:
653 ret = -EFAULT;
654 if (!arg)
655 break;
656 ret = io_sqe_buffers_register(ctx, arg, nr_args, NULL);
657 break;
658 case IORING_UNREGISTER_BUFFERS:
659 ret = -EINVAL;
660 if (arg || nr_args)
661 break;
662 ret = io_sqe_buffers_unregister(ctx);
663 break;
664 case IORING_REGISTER_FILES:
665 ret = -EFAULT;
666 if (!arg)
667 break;
668 ret = io_sqe_files_register(ctx, arg, nr_args, NULL);
669 break;
670 case IORING_UNREGISTER_FILES:
671 ret = -EINVAL;
672 if (arg || nr_args)
673 break;
674 ret = io_sqe_files_unregister(ctx);
675 break;
676 case IORING_REGISTER_FILES_UPDATE:
677 ret = io_register_files_update(ctx, arg, nr_args);
678 break;
679 case IORING_REGISTER_EVENTFD:
680 ret = -EINVAL;
681 if (nr_args != 1)
682 break;
683 ret = io_eventfd_register(ctx, arg, 0);
684 break;
685 case IORING_REGISTER_EVENTFD_ASYNC:
686 ret = -EINVAL;
687 if (nr_args != 1)
688 break;
689 ret = io_eventfd_register(ctx, arg, 1);
690 break;
691 case IORING_UNREGISTER_EVENTFD:
692 ret = -EINVAL;
693 if (arg || nr_args)
694 break;
695 ret = io_eventfd_unregister(ctx);
696 break;
697 case IORING_REGISTER_PROBE:
698 ret = -EINVAL;
699 if (!arg || nr_args > 256)
700 break;
701 ret = io_probe(ctx, arg, nr_args);
702 break;
703 case IORING_REGISTER_PERSONALITY:
704 ret = -EINVAL;
705 if (arg || nr_args)
706 break;
707 ret = io_register_personality(ctx);
708 break;
709 case IORING_UNREGISTER_PERSONALITY:
710 ret = -EINVAL;
711 if (arg)
712 break;
713 ret = io_unregister_personality(ctx, nr_args);
714 break;
715 case IORING_REGISTER_ENABLE_RINGS:
716 ret = -EINVAL;
717 if (arg || nr_args)
718 break;
719 ret = io_register_enable_rings(ctx);
720 break;
721 case IORING_REGISTER_RESTRICTIONS:
722 ret = io_register_restrictions(ctx, arg, nr_args);
723 break;
724 case IORING_REGISTER_FILES2:
725 ret = io_register_rsrc(ctx, arg, nr_args, IORING_RSRC_FILE);
726 break;
727 case IORING_REGISTER_FILES_UPDATE2:
728 ret = io_register_rsrc_update(ctx, arg, nr_args,
729 IORING_RSRC_FILE);
730 break;
731 case IORING_REGISTER_BUFFERS2:
732 ret = io_register_rsrc(ctx, arg, nr_args, IORING_RSRC_BUFFER);
733 break;
734 case IORING_REGISTER_BUFFERS_UPDATE:
735 ret = io_register_rsrc_update(ctx, arg, nr_args,
736 IORING_RSRC_BUFFER);
737 break;
738 case IORING_REGISTER_IOWQ_AFF:
739 ret = -EINVAL;
740 if (!arg || !nr_args)
741 break;
742 ret = io_register_iowq_aff(ctx, arg, nr_args);
743 break;
744 case IORING_UNREGISTER_IOWQ_AFF:
745 ret = -EINVAL;
746 if (arg || nr_args)
747 break;
748 ret = io_unregister_iowq_aff(ctx);
749 break;
750 case IORING_REGISTER_IOWQ_MAX_WORKERS:
751 ret = -EINVAL;
752 if (!arg || nr_args != 2)
753 break;
754 ret = io_register_iowq_max_workers(ctx, arg);
755 break;
756 case IORING_REGISTER_RING_FDS:
757 ret = io_ringfd_register(ctx, arg, nr_args);
758 break;
759 case IORING_UNREGISTER_RING_FDS:
760 ret = io_ringfd_unregister(ctx, arg, nr_args);
761 break;
762 case IORING_REGISTER_PBUF_RING:
763 ret = -EINVAL;
764 if (!arg || nr_args != 1)
765 break;
766 ret = io_register_pbuf_ring(ctx, arg);
767 break;
768 case IORING_UNREGISTER_PBUF_RING:
769 ret = -EINVAL;
770 if (!arg || nr_args != 1)
771 break;
772 ret = io_unregister_pbuf_ring(ctx, arg);
773 break;
774 case IORING_REGISTER_SYNC_CANCEL:
775 ret = -EINVAL;
776 if (!arg || nr_args != 1)
777 break;
778 ret = io_sync_cancel(ctx, arg);
779 break;
780 case IORING_REGISTER_FILE_ALLOC_RANGE:
781 ret = -EINVAL;
782 if (!arg || nr_args)
783 break;
784 ret = io_register_file_alloc_range(ctx, arg);
785 break;
d293b1a8
JA
786 case IORING_REGISTER_PBUF_STATUS:
787 ret = -EINVAL;
788 if (!arg || nr_args != 1)
789 break;
790 ret = io_register_pbuf_status(ctx, arg);
791 break;
ef1186c1
SR
792 case IORING_REGISTER_NAPI:
793 ret = -EINVAL;
794 if (!arg || nr_args != 1)
795 break;
796 ret = io_register_napi(ctx, arg);
797 break;
798 case IORING_UNREGISTER_NAPI:
799 ret = -EINVAL;
800 if (nr_args != 1)
801 break;
802 ret = io_unregister_napi(ctx, arg);
803 break;
2b8e976b
PB
804 case IORING_REGISTER_CLOCK:
805 ret = -EINVAL;
806 if (!arg || nr_args)
807 break;
808 ret = io_register_clock(ctx, arg);
809 break;
636119af 810 case IORING_REGISTER_CLONE_BUFFERS:
7cc2a6ea
JA
811 ret = -EINVAL;
812 if (!arg || nr_args != 1)
813 break;
636119af 814 ret = io_register_clone_buffers(ctx, arg);
7cc2a6ea 815 break;
79cfe9e5
JA
816 case IORING_REGISTER_RESIZE_RINGS:
817 ret = -EINVAL;
818 if (!arg || nr_args != 1)
819 break;
820 ret = io_register_resize_rings(ctx, arg);
821 break;
93238e66
PB
822 case IORING_REGISTER_MEM_REGION:
823 ret = -EINVAL;
824 if (!arg || nr_args != 1)
825 break;
826 ret = io_register_mem_region(ctx, arg);
827 break;
c4320315
JA
828 default:
829 ret = -EINVAL;
830 break;
831 }
832
833 return ret;
834}
835
0b6d253e
JA
836/*
837 * Given an 'fd' value, return the ctx associated with if. If 'registered' is
838 * true, then the registered index is used. Otherwise, the normal fd table.
839 * Caller must call fput() on the returned file, unless it's an ERR_PTR.
840 */
2f6a55e4 841struct file *io_uring_register_get_file(unsigned int fd, bool registered)
c4320315 842{
c4320315 843 struct file *file;
c4320315 844
0b6d253e 845 if (registered) {
c4320315
JA
846 /*
847 * Ring fd has been registered via IORING_REGISTER_RING_FDS, we
848 * need only dereference our task private array to find it.
849 */
850 struct io_uring_task *tctx = current->io_uring;
851
852 if (unlikely(!tctx || fd >= IO_RINGFD_REG_MAX))
0b6d253e 853 return ERR_PTR(-EINVAL);
c4320315
JA
854 fd = array_index_nospec(fd, IO_RINGFD_REG_MAX);
855 file = tctx->registered_rings[fd];
bb2d7634
PB
856 if (file)
857 get_file(file);
c4320315
JA
858 } else {
859 file = fget(fd);
c4320315
JA
860 }
861
0b6d253e
JA
862 if (unlikely(!file))
863 return ERR_PTR(-EBADF);
864 if (io_is_uring_fops(file))
865 return file;
866 fput(file);
867 return ERR_PTR(-EOPNOTSUPP);
868}
869
a3771321
JA
870/*
871 * "blind" registration opcodes are ones where there's no ring given, and
872 * hence the source fd must be -1.
873 */
874static int io_uring_register_blind(unsigned int opcode, void __user *arg,
875 unsigned int nr_args)
876{
877 switch (opcode) {
878 case IORING_REGISTER_SEND_MSG_RING: {
879 struct io_uring_sqe sqe;
880
881 if (!arg || nr_args != 1)
882 return -EINVAL;
883 if (copy_from_user(&sqe, arg, sizeof(sqe)))
884 return -EFAULT;
885 /* no flags supported */
886 if (sqe.flags)
887 return -EINVAL;
888 if (sqe.opcode == IORING_OP_MSG_RING)
889 return io_uring_sync_msg_ring(&sqe);
890 }
891 }
892
893 return -EINVAL;
894}
895
0b6d253e
JA
896SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode,
897 void __user *, arg, unsigned int, nr_args)
898{
899 struct io_ring_ctx *ctx;
900 long ret = -EBADF;
901 struct file *file;
902 bool use_registered_ring;
903
904 use_registered_ring = !!(opcode & IORING_REGISTER_USE_REGISTERED_RING);
905 opcode &= ~IORING_REGISTER_USE_REGISTERED_RING;
906
907 if (opcode >= IORING_REGISTER_LAST)
908 return -EINVAL;
909
a3771321
JA
910 if (fd == -1)
911 return io_uring_register_blind(opcode, arg, nr_args);
912
0b6d253e
JA
913 file = io_uring_register_get_file(fd, use_registered_ring);
914 if (IS_ERR(file))
915 return PTR_ERR(file);
c4320315
JA
916 ctx = file->private_data;
917
918 mutex_lock(&ctx->uring_lock);
919 ret = __io_uring_register(ctx, opcode, arg, nr_args);
e358e09a 920
3597f278
JA
921 trace_io_uring_register(ctx, opcode, ctx->file_table.data.nr,
922 ctx->buf_table.nr, ret);
e358e09a 923 mutex_unlock(&ctx->uring_lock);
bb2d7634
PB
924
925 fput(file);
c4320315
JA
926 return ret;
927}
This page took 0.241567 seconds and 4 git commands to generate.