Loading...
Note: File does not exist in v6.2.
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Code related to the io_uring_register() syscall
4 *
5 * Copyright (C) 2023 Jens Axboe
6 */
7#include <linux/kernel.h>
8#include <linux/errno.h>
9#include <linux/syscalls.h>
10#include <linux/refcount.h>
11#include <linux/bits.h>
12#include <linux/fs.h>
13#include <linux/file.h>
14#include <linux/slab.h>
15#include <linux/uaccess.h>
16#include <linux/nospec.h>
17#include <linux/compat.h>
18#include <linux/io_uring.h>
19#include <linux/io_uring_types.h>
20
21#include "io_uring.h"
22#include "opdef.h"
23#include "tctx.h"
24#include "rsrc.h"
25#include "sqpoll.h"
26#include "register.h"
27#include "cancel.h"
28#include "kbuf.h"
29#include "napi.h"
30#include "eventfd.h"
31#include "msg_ring.h"
32#include "memmap.h"
33
34#define IORING_MAX_RESTRICTIONS (IORING_RESTRICTION_LAST + \
35 IORING_REGISTER_LAST + IORING_OP_LAST)
36
37static __cold int io_probe(struct io_ring_ctx *ctx, void __user *arg,
38 unsigned nr_args)
39{
40 struct io_uring_probe *p;
41 size_t size;
42 int i, ret;
43
44 if (nr_args > IORING_OP_LAST)
45 nr_args = IORING_OP_LAST;
46
47 size = struct_size(p, ops, nr_args);
48 p = kzalloc(size, GFP_KERNEL);
49 if (!p)
50 return -ENOMEM;
51
52 ret = -EFAULT;
53 if (copy_from_user(p, arg, size))
54 goto out;
55 ret = -EINVAL;
56 if (memchr_inv(p, 0, size))
57 goto out;
58
59 p->last_op = IORING_OP_LAST - 1;
60
61 for (i = 0; i < nr_args; i++) {
62 p->ops[i].op = i;
63 if (io_uring_op_supported(i))
64 p->ops[i].flags = IO_URING_OP_SUPPORTED;
65 }
66 p->ops_len = i;
67
68 ret = 0;
69 if (copy_to_user(arg, p, size))
70 ret = -EFAULT;
71out:
72 kfree(p);
73 return ret;
74}
75
76int io_unregister_personality(struct io_ring_ctx *ctx, unsigned id)
77{
78 const struct cred *creds;
79
80 creds = xa_erase(&ctx->personalities, id);
81 if (creds) {
82 put_cred(creds);
83 return 0;
84 }
85
86 return -EINVAL;
87}
88
89
90static int io_register_personality(struct io_ring_ctx *ctx)
91{
92 const struct cred *creds;
93 u32 id;
94 int ret;
95
96 creds = get_current_cred();
97
98 ret = xa_alloc_cyclic(&ctx->personalities, &id, (void *)creds,
99 XA_LIMIT(0, USHRT_MAX), &ctx->pers_next, GFP_KERNEL);
100 if (ret < 0) {
101 put_cred(creds);
102 return ret;
103 }
104 return id;
105}
106
107static __cold int io_register_restrictions(struct io_ring_ctx *ctx,
108 void __user *arg, unsigned int nr_args)
109{
110 struct io_uring_restriction *res;
111 size_t size;
112 int i, ret;
113
114 /* Restrictions allowed only if rings started disabled */
115 if (!(ctx->flags & IORING_SETUP_R_DISABLED))
116 return -EBADFD;
117
118 /* We allow only a single restrictions registration */
119 if (ctx->restrictions.registered)
120 return -EBUSY;
121
122 if (!arg || nr_args > IORING_MAX_RESTRICTIONS)
123 return -EINVAL;
124
125 size = array_size(nr_args, sizeof(*res));
126 if (size == SIZE_MAX)
127 return -EOVERFLOW;
128
129 res = memdup_user(arg, size);
130 if (IS_ERR(res))
131 return PTR_ERR(res);
132
133 ret = 0;
134
135 for (i = 0; i < nr_args; i++) {
136 switch (res[i].opcode) {
137 case IORING_RESTRICTION_REGISTER_OP:
138 if (res[i].register_op >= IORING_REGISTER_LAST) {
139 ret = -EINVAL;
140 goto out;
141 }
142
143 __set_bit(res[i].register_op,
144 ctx->restrictions.register_op);
145 break;
146 case IORING_RESTRICTION_SQE_OP:
147 if (res[i].sqe_op >= IORING_OP_LAST) {
148 ret = -EINVAL;
149 goto out;
150 }
151
152 __set_bit(res[i].sqe_op, ctx->restrictions.sqe_op);
153 break;
154 case IORING_RESTRICTION_SQE_FLAGS_ALLOWED:
155 ctx->restrictions.sqe_flags_allowed = res[i].sqe_flags;
156 break;
157 case IORING_RESTRICTION_SQE_FLAGS_REQUIRED:
158 ctx->restrictions.sqe_flags_required = res[i].sqe_flags;
159 break;
160 default:
161 ret = -EINVAL;
162 goto out;
163 }
164 }
165
166out:
167 /* Reset all restrictions if an error happened */
168 if (ret != 0)
169 memset(&ctx->restrictions, 0, sizeof(ctx->restrictions));
170 else
171 ctx->restrictions.registered = true;
172
173 kfree(res);
174 return ret;
175}
176
177static int io_register_enable_rings(struct io_ring_ctx *ctx)
178{
179 if (!(ctx->flags & IORING_SETUP_R_DISABLED))
180 return -EBADFD;
181
182 if (ctx->flags & IORING_SETUP_SINGLE_ISSUER && !ctx->submitter_task) {
183 WRITE_ONCE(ctx->submitter_task, get_task_struct(current));
184 /*
185 * Lazy activation attempts would fail if it was polled before
186 * submitter_task is set.
187 */
188 if (wq_has_sleeper(&ctx->poll_wq))
189 io_activate_pollwq(ctx);
190 }
191
192 if (ctx->restrictions.registered)
193 ctx->restricted = 1;
194
195 ctx->flags &= ~IORING_SETUP_R_DISABLED;
196 if (ctx->sq_data && wq_has_sleeper(&ctx->sq_data->wait))
197 wake_up(&ctx->sq_data->wait);
198 return 0;
199}
200
201static __cold int __io_register_iowq_aff(struct io_ring_ctx *ctx,
202 cpumask_var_t new_mask)
203{
204 int ret;
205
206 if (!(ctx->flags & IORING_SETUP_SQPOLL)) {
207 ret = io_wq_cpu_affinity(current->io_uring, new_mask);
208 } else {
209 mutex_unlock(&ctx->uring_lock);
210 ret = io_sqpoll_wq_cpu_affinity(ctx, new_mask);
211 mutex_lock(&ctx->uring_lock);
212 }
213
214 return ret;
215}
216
217static __cold int io_register_iowq_aff(struct io_ring_ctx *ctx,
218 void __user *arg, unsigned len)
219{
220 cpumask_var_t new_mask;
221 int ret;
222
223 if (!alloc_cpumask_var(&new_mask, GFP_KERNEL))
224 return -ENOMEM;
225
226 cpumask_clear(new_mask);
227 if (len > cpumask_size())
228 len = cpumask_size();
229
230#ifdef CONFIG_COMPAT
231 if (in_compat_syscall())
232 ret = compat_get_bitmap(cpumask_bits(new_mask),
233 (const compat_ulong_t __user *)arg,
234 len * 8 /* CHAR_BIT */);
235 else
236#endif
237 ret = copy_from_user(new_mask, arg, len);
238
239 if (ret) {
240 free_cpumask_var(new_mask);
241 return -EFAULT;
242 }
243
244 ret = __io_register_iowq_aff(ctx, new_mask);
245 free_cpumask_var(new_mask);
246 return ret;
247}
248
249static __cold int io_unregister_iowq_aff(struct io_ring_ctx *ctx)
250{
251 return __io_register_iowq_aff(ctx, NULL);
252}
253
254static __cold int io_register_iowq_max_workers(struct io_ring_ctx *ctx,
255 void __user *arg)
256 __must_hold(&ctx->uring_lock)
257{
258 struct io_tctx_node *node;
259 struct io_uring_task *tctx = NULL;
260 struct io_sq_data *sqd = NULL;
261 __u32 new_count[2];
262 int i, ret;
263
264 if (copy_from_user(new_count, arg, sizeof(new_count)))
265 return -EFAULT;
266 for (i = 0; i < ARRAY_SIZE(new_count); i++)
267 if (new_count[i] > INT_MAX)
268 return -EINVAL;
269
270 if (ctx->flags & IORING_SETUP_SQPOLL) {
271 sqd = ctx->sq_data;
272 if (sqd) {
273 /*
274 * Observe the correct sqd->lock -> ctx->uring_lock
275 * ordering. Fine to drop uring_lock here, we hold
276 * a ref to the ctx.
277 */
278 refcount_inc(&sqd->refs);
279 mutex_unlock(&ctx->uring_lock);
280 mutex_lock(&sqd->lock);
281 mutex_lock(&ctx->uring_lock);
282 if (sqd->thread)
283 tctx = sqd->thread->io_uring;
284 }
285 } else {
286 tctx = current->io_uring;
287 }
288
289 BUILD_BUG_ON(sizeof(new_count) != sizeof(ctx->iowq_limits));
290
291 for (i = 0; i < ARRAY_SIZE(new_count); i++)
292 if (new_count[i])
293 ctx->iowq_limits[i] = new_count[i];
294 ctx->iowq_limits_set = true;
295
296 if (tctx && tctx->io_wq) {
297 ret = io_wq_max_workers(tctx->io_wq, new_count);
298 if (ret)
299 goto err;
300 } else {
301 memset(new_count, 0, sizeof(new_count));
302 }
303
304 if (sqd) {
305 mutex_unlock(&ctx->uring_lock);
306 mutex_unlock(&sqd->lock);
307 io_put_sq_data(sqd);
308 mutex_lock(&ctx->uring_lock);
309 }
310
311 if (copy_to_user(arg, new_count, sizeof(new_count)))
312 return -EFAULT;
313
314 /* that's it for SQPOLL, only the SQPOLL task creates requests */
315 if (sqd)
316 return 0;
317
318 /* now propagate the restriction to all registered users */
319 list_for_each_entry(node, &ctx->tctx_list, ctx_node) {
320 tctx = node->task->io_uring;
321 if (WARN_ON_ONCE(!tctx->io_wq))
322 continue;
323
324 for (i = 0; i < ARRAY_SIZE(new_count); i++)
325 new_count[i] = ctx->iowq_limits[i];
326 /* ignore errors, it always returns zero anyway */
327 (void)io_wq_max_workers(tctx->io_wq, new_count);
328 }
329 return 0;
330err:
331 if (sqd) {
332 mutex_unlock(&ctx->uring_lock);
333 mutex_unlock(&sqd->lock);
334 io_put_sq_data(sqd);
335 mutex_lock(&ctx->uring_lock);
336 }
337 return ret;
338}
339
340static int io_register_clock(struct io_ring_ctx *ctx,
341 struct io_uring_clock_register __user *arg)
342{
343 struct io_uring_clock_register reg;
344
345 if (copy_from_user(®, arg, sizeof(reg)))
346 return -EFAULT;
347 if (memchr_inv(®.__resv, 0, sizeof(reg.__resv)))
348 return -EINVAL;
349
350 switch (reg.clockid) {
351 case CLOCK_MONOTONIC:
352 ctx->clock_offset = 0;
353 break;
354 case CLOCK_BOOTTIME:
355 ctx->clock_offset = TK_OFFS_BOOT;
356 break;
357 default:
358 return -EINVAL;
359 }
360
361 ctx->clockid = reg.clockid;
362 return 0;
363}
364
365/*
366 * State to maintain until we can swap. Both new and old state, used for
367 * either mapping or freeing.
368 */
369struct io_ring_ctx_rings {
370 unsigned short n_ring_pages;
371 unsigned short n_sqe_pages;
372 struct page **ring_pages;
373 struct page **sqe_pages;
374 struct io_uring_sqe *sq_sqes;
375 struct io_rings *rings;
376};
377
378static void io_register_free_rings(struct io_uring_params *p,
379 struct io_ring_ctx_rings *r)
380{
381 if (!(p->flags & IORING_SETUP_NO_MMAP)) {
382 io_pages_unmap(r->rings, &r->ring_pages, &r->n_ring_pages,
383 true);
384 io_pages_unmap(r->sq_sqes, &r->sqe_pages, &r->n_sqe_pages,
385 true);
386 } else {
387 io_pages_free(&r->ring_pages, r->n_ring_pages);
388 io_pages_free(&r->sqe_pages, r->n_sqe_pages);
389 vunmap(r->rings);
390 vunmap(r->sq_sqes);
391 }
392}
393
394#define swap_old(ctx, o, n, field) \
395 do { \
396 (o).field = (ctx)->field; \
397 (ctx)->field = (n).field; \
398 } while (0)
399
400#define RESIZE_FLAGS (IORING_SETUP_CQSIZE | IORING_SETUP_CLAMP)
401#define COPY_FLAGS (IORING_SETUP_NO_SQARRAY | IORING_SETUP_SQE128 | \
402 IORING_SETUP_CQE32 | IORING_SETUP_NO_MMAP)
403
404static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg)
405{
406 struct io_ring_ctx_rings o = { }, n = { }, *to_free = NULL;
407 size_t size, sq_array_offset;
408 unsigned i, tail, old_head;
409 struct io_uring_params p;
410 void *ptr;
411 int ret;
412
413 /* for single issuer, must be owner resizing */
414 if (ctx->flags & IORING_SETUP_SINGLE_ISSUER &&
415 current != ctx->submitter_task)
416 return -EEXIST;
417 /* limited to DEFER_TASKRUN for now */
418 if (!(ctx->flags & IORING_SETUP_DEFER_TASKRUN))
419 return -EINVAL;
420 if (copy_from_user(&p, arg, sizeof(p)))
421 return -EFAULT;
422 if (p.flags & ~RESIZE_FLAGS)
423 return -EINVAL;
424
425 /* properties that are always inherited */
426 p.flags |= (ctx->flags & COPY_FLAGS);
427
428 ret = io_uring_fill_params(p.sq_entries, &p);
429 if (unlikely(ret))
430 return ret;
431
432 /* nothing to do, but copy params back */
433 if (p.sq_entries == ctx->sq_entries && p.cq_entries == ctx->cq_entries) {
434 if (copy_to_user(arg, &p, sizeof(p)))
435 return -EFAULT;
436 return 0;
437 }
438
439 size = rings_size(p.flags, p.sq_entries, p.cq_entries,
440 &sq_array_offset);
441 if (size == SIZE_MAX)
442 return -EOVERFLOW;
443
444 if (!(p.flags & IORING_SETUP_NO_MMAP))
445 n.rings = io_pages_map(&n.ring_pages, &n.n_ring_pages, size);
446 else
447 n.rings = __io_uaddr_map(&n.ring_pages, &n.n_ring_pages,
448 p.cq_off.user_addr, size);
449 if (IS_ERR(n.rings))
450 return PTR_ERR(n.rings);
451
452 /*
453 * At this point n.rings is shared with userspace, just like o.rings
454 * is as well. While we don't expect userspace to modify it while
455 * a resize is in progress, and it's most likely that userspace will
456 * shoot itself in the foot if it does, we can't always assume good
457 * intent... Use read/write once helpers from here on to indicate the
458 * shared nature of it.
459 */
460 WRITE_ONCE(n.rings->sq_ring_mask, p.sq_entries - 1);
461 WRITE_ONCE(n.rings->cq_ring_mask, p.cq_entries - 1);
462 WRITE_ONCE(n.rings->sq_ring_entries, p.sq_entries);
463 WRITE_ONCE(n.rings->cq_ring_entries, p.cq_entries);
464
465 if (copy_to_user(arg, &p, sizeof(p))) {
466 io_register_free_rings(&p, &n);
467 return -EFAULT;
468 }
469
470 if (p.flags & IORING_SETUP_SQE128)
471 size = array_size(2 * sizeof(struct io_uring_sqe), p.sq_entries);
472 else
473 size = array_size(sizeof(struct io_uring_sqe), p.sq_entries);
474 if (size == SIZE_MAX) {
475 io_register_free_rings(&p, &n);
476 return -EOVERFLOW;
477 }
478
479 if (!(p.flags & IORING_SETUP_NO_MMAP))
480 ptr = io_pages_map(&n.sqe_pages, &n.n_sqe_pages, size);
481 else
482 ptr = __io_uaddr_map(&n.sqe_pages, &n.n_sqe_pages,
483 p.sq_off.user_addr,
484 size);
485 if (IS_ERR(ptr)) {
486 io_register_free_rings(&p, &n);
487 return PTR_ERR(ptr);
488 }
489
490 /*
491 * If using SQPOLL, park the thread
492 */
493 if (ctx->sq_data) {
494 mutex_unlock(&ctx->uring_lock);
495 io_sq_thread_park(ctx->sq_data);
496 mutex_lock(&ctx->uring_lock);
497 }
498
499 /*
500 * We'll do the swap. Grab the ctx->resize_lock, which will exclude
501 * any new mmap's on the ring fd. Clear out existing mappings to prevent
502 * mmap from seeing them, as we'll unmap them. Any attempt to mmap
503 * existing rings beyond this point will fail. Not that it could proceed
504 * at this point anyway, as the io_uring mmap side needs go grab the
505 * ctx->resize_lock as well. Likewise, hold the completion lock over the
506 * duration of the actual swap.
507 */
508 mutex_lock(&ctx->resize_lock);
509 spin_lock(&ctx->completion_lock);
510 o.rings = ctx->rings;
511 ctx->rings = NULL;
512 o.sq_sqes = ctx->sq_sqes;
513 ctx->sq_sqes = NULL;
514
515 /*
516 * Now copy SQ and CQ entries, if any. If either of the destination
517 * rings can't hold what is already there, then fail the operation.
518 */
519 n.sq_sqes = ptr;
520 tail = READ_ONCE(o.rings->sq.tail);
521 old_head = READ_ONCE(o.rings->sq.head);
522 if (tail - old_head > p.sq_entries)
523 goto overflow;
524 for (i = old_head; i < tail; i++) {
525 unsigned src_head = i & (ctx->sq_entries - 1);
526 unsigned dst_head = i & (p.sq_entries - 1);
527
528 n.sq_sqes[dst_head] = o.sq_sqes[src_head];
529 }
530 WRITE_ONCE(n.rings->sq.head, READ_ONCE(o.rings->sq.head));
531 WRITE_ONCE(n.rings->sq.tail, READ_ONCE(o.rings->sq.tail));
532
533 tail = READ_ONCE(o.rings->cq.tail);
534 old_head = READ_ONCE(o.rings->cq.head);
535 if (tail - old_head > p.cq_entries) {
536overflow:
537 /* restore old rings, and return -EOVERFLOW via cleanup path */
538 ctx->rings = o.rings;
539 ctx->sq_sqes = o.sq_sqes;
540 to_free = &n;
541 ret = -EOVERFLOW;
542 goto out;
543 }
544 for (i = old_head; i < tail; i++) {
545 unsigned src_head = i & (ctx->cq_entries - 1);
546 unsigned dst_head = i & (p.cq_entries - 1);
547
548 n.rings->cqes[dst_head] = o.rings->cqes[src_head];
549 }
550 WRITE_ONCE(n.rings->cq.head, READ_ONCE(o.rings->cq.head));
551 WRITE_ONCE(n.rings->cq.tail, READ_ONCE(o.rings->cq.tail));
552 /* invalidate cached cqe refill */
553 ctx->cqe_cached = ctx->cqe_sentinel = NULL;
554
555 WRITE_ONCE(n.rings->sq_dropped, READ_ONCE(o.rings->sq_dropped));
556 atomic_set(&n.rings->sq_flags, atomic_read(&o.rings->sq_flags));
557 WRITE_ONCE(n.rings->cq_flags, READ_ONCE(o.rings->cq_flags));
558 WRITE_ONCE(n.rings->cq_overflow, READ_ONCE(o.rings->cq_overflow));
559
560 /* all done, store old pointers and assign new ones */
561 if (!(ctx->flags & IORING_SETUP_NO_SQARRAY))
562 ctx->sq_array = (u32 *)((char *)n.rings + sq_array_offset);
563
564 ctx->sq_entries = p.sq_entries;
565 ctx->cq_entries = p.cq_entries;
566
567 ctx->rings = n.rings;
568 ctx->sq_sqes = n.sq_sqes;
569 swap_old(ctx, o, n, n_ring_pages);
570 swap_old(ctx, o, n, n_sqe_pages);
571 swap_old(ctx, o, n, ring_pages);
572 swap_old(ctx, o, n, sqe_pages);
573 to_free = &o;
574 ret = 0;
575out:
576 spin_unlock(&ctx->completion_lock);
577 mutex_unlock(&ctx->resize_lock);
578 io_register_free_rings(&p, to_free);
579
580 if (ctx->sq_data)
581 io_sq_thread_unpark(ctx->sq_data);
582
583 return ret;
584}
585
586static int io_register_mem_region(struct io_ring_ctx *ctx, void __user *uarg)
587{
588 struct io_uring_mem_region_reg __user *reg_uptr = uarg;
589 struct io_uring_mem_region_reg reg;
590 struct io_uring_region_desc __user *rd_uptr;
591 struct io_uring_region_desc rd;
592 int ret;
593
594 if (io_region_is_set(&ctx->param_region))
595 return -EBUSY;
596 if (copy_from_user(®, reg_uptr, sizeof(reg)))
597 return -EFAULT;
598 rd_uptr = u64_to_user_ptr(reg.region_uptr);
599 if (copy_from_user(&rd, rd_uptr, sizeof(rd)))
600 return -EFAULT;
601
602 if (memchr_inv(®.__resv, 0, sizeof(reg.__resv)))
603 return -EINVAL;
604 if (reg.flags & ~IORING_MEM_REGION_REG_WAIT_ARG)
605 return -EINVAL;
606
607 /*
608 * This ensures there are no waiters. Waiters are unlocked and it's
609 * hard to synchronise with them, especially if we need to initialise
610 * the region.
611 */
612 if ((reg.flags & IORING_MEM_REGION_REG_WAIT_ARG) &&
613 !(ctx->flags & IORING_SETUP_R_DISABLED))
614 return -EINVAL;
615
616 ret = io_create_region(ctx, &ctx->param_region, &rd);
617 if (ret)
618 return ret;
619 if (copy_to_user(rd_uptr, &rd, sizeof(rd))) {
620 io_free_region(ctx, &ctx->param_region);
621 return -EFAULT;
622 }
623
624 if (reg.flags & IORING_MEM_REGION_REG_WAIT_ARG) {
625 ctx->cq_wait_arg = io_region_get_ptr(&ctx->param_region);
626 ctx->cq_wait_size = rd.size;
627 }
628 return 0;
629}
630
631static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
632 void __user *arg, unsigned nr_args)
633 __releases(ctx->uring_lock)
634 __acquires(ctx->uring_lock)
635{
636 int ret;
637
638 /*
639 * We don't quiesce the refs for register anymore and so it can't be
640 * dying as we're holding a file ref here.
641 */
642 if (WARN_ON_ONCE(percpu_ref_is_dying(&ctx->refs)))
643 return -ENXIO;
644
645 if (ctx->submitter_task && ctx->submitter_task != current)
646 return -EEXIST;
647
648 if (ctx->restricted) {
649 opcode = array_index_nospec(opcode, IORING_REGISTER_LAST);
650 if (!test_bit(opcode, ctx->restrictions.register_op))
651 return -EACCES;
652 }
653
654 switch (opcode) {
655 case IORING_REGISTER_BUFFERS:
656 ret = -EFAULT;
657 if (!arg)
658 break;
659 ret = io_sqe_buffers_register(ctx, arg, nr_args, NULL);
660 break;
661 case IORING_UNREGISTER_BUFFERS:
662 ret = -EINVAL;
663 if (arg || nr_args)
664 break;
665 ret = io_sqe_buffers_unregister(ctx);
666 break;
667 case IORING_REGISTER_FILES:
668 ret = -EFAULT;
669 if (!arg)
670 break;
671 ret = io_sqe_files_register(ctx, arg, nr_args, NULL);
672 break;
673 case IORING_UNREGISTER_FILES:
674 ret = -EINVAL;
675 if (arg || nr_args)
676 break;
677 ret = io_sqe_files_unregister(ctx);
678 break;
679 case IORING_REGISTER_FILES_UPDATE:
680 ret = io_register_files_update(ctx, arg, nr_args);
681 break;
682 case IORING_REGISTER_EVENTFD:
683 ret = -EINVAL;
684 if (nr_args != 1)
685 break;
686 ret = io_eventfd_register(ctx, arg, 0);
687 break;
688 case IORING_REGISTER_EVENTFD_ASYNC:
689 ret = -EINVAL;
690 if (nr_args != 1)
691 break;
692 ret = io_eventfd_register(ctx, arg, 1);
693 break;
694 case IORING_UNREGISTER_EVENTFD:
695 ret = -EINVAL;
696 if (arg || nr_args)
697 break;
698 ret = io_eventfd_unregister(ctx);
699 break;
700 case IORING_REGISTER_PROBE:
701 ret = -EINVAL;
702 if (!arg || nr_args > 256)
703 break;
704 ret = io_probe(ctx, arg, nr_args);
705 break;
706 case IORING_REGISTER_PERSONALITY:
707 ret = -EINVAL;
708 if (arg || nr_args)
709 break;
710 ret = io_register_personality(ctx);
711 break;
712 case IORING_UNREGISTER_PERSONALITY:
713 ret = -EINVAL;
714 if (arg)
715 break;
716 ret = io_unregister_personality(ctx, nr_args);
717 break;
718 case IORING_REGISTER_ENABLE_RINGS:
719 ret = -EINVAL;
720 if (arg || nr_args)
721 break;
722 ret = io_register_enable_rings(ctx);
723 break;
724 case IORING_REGISTER_RESTRICTIONS:
725 ret = io_register_restrictions(ctx, arg, nr_args);
726 break;
727 case IORING_REGISTER_FILES2:
728 ret = io_register_rsrc(ctx, arg, nr_args, IORING_RSRC_FILE);
729 break;
730 case IORING_REGISTER_FILES_UPDATE2:
731 ret = io_register_rsrc_update(ctx, arg, nr_args,
732 IORING_RSRC_FILE);
733 break;
734 case IORING_REGISTER_BUFFERS2:
735 ret = io_register_rsrc(ctx, arg, nr_args, IORING_RSRC_BUFFER);
736 break;
737 case IORING_REGISTER_BUFFERS_UPDATE:
738 ret = io_register_rsrc_update(ctx, arg, nr_args,
739 IORING_RSRC_BUFFER);
740 break;
741 case IORING_REGISTER_IOWQ_AFF:
742 ret = -EINVAL;
743 if (!arg || !nr_args)
744 break;
745 ret = io_register_iowq_aff(ctx, arg, nr_args);
746 break;
747 case IORING_UNREGISTER_IOWQ_AFF:
748 ret = -EINVAL;
749 if (arg || nr_args)
750 break;
751 ret = io_unregister_iowq_aff(ctx);
752 break;
753 case IORING_REGISTER_IOWQ_MAX_WORKERS:
754 ret = -EINVAL;
755 if (!arg || nr_args != 2)
756 break;
757 ret = io_register_iowq_max_workers(ctx, arg);
758 break;
759 case IORING_REGISTER_RING_FDS:
760 ret = io_ringfd_register(ctx, arg, nr_args);
761 break;
762 case IORING_UNREGISTER_RING_FDS:
763 ret = io_ringfd_unregister(ctx, arg, nr_args);
764 break;
765 case IORING_REGISTER_PBUF_RING:
766 ret = -EINVAL;
767 if (!arg || nr_args != 1)
768 break;
769 ret = io_register_pbuf_ring(ctx, arg);
770 break;
771 case IORING_UNREGISTER_PBUF_RING:
772 ret = -EINVAL;
773 if (!arg || nr_args != 1)
774 break;
775 ret = io_unregister_pbuf_ring(ctx, arg);
776 break;
777 case IORING_REGISTER_SYNC_CANCEL:
778 ret = -EINVAL;
779 if (!arg || nr_args != 1)
780 break;
781 ret = io_sync_cancel(ctx, arg);
782 break;
783 case IORING_REGISTER_FILE_ALLOC_RANGE:
784 ret = -EINVAL;
785 if (!arg || nr_args)
786 break;
787 ret = io_register_file_alloc_range(ctx, arg);
788 break;
789 case IORING_REGISTER_PBUF_STATUS:
790 ret = -EINVAL;
791 if (!arg || nr_args != 1)
792 break;
793 ret = io_register_pbuf_status(ctx, arg);
794 break;
795 case IORING_REGISTER_NAPI:
796 ret = -EINVAL;
797 if (!arg || nr_args != 1)
798 break;
799 ret = io_register_napi(ctx, arg);
800 break;
801 case IORING_UNREGISTER_NAPI:
802 ret = -EINVAL;
803 if (nr_args != 1)
804 break;
805 ret = io_unregister_napi(ctx, arg);
806 break;
807 case IORING_REGISTER_CLOCK:
808 ret = -EINVAL;
809 if (!arg || nr_args)
810 break;
811 ret = io_register_clock(ctx, arg);
812 break;
813 case IORING_REGISTER_CLONE_BUFFERS:
814 ret = -EINVAL;
815 if (!arg || nr_args != 1)
816 break;
817 ret = io_register_clone_buffers(ctx, arg);
818 break;
819 case IORING_REGISTER_RESIZE_RINGS:
820 ret = -EINVAL;
821 if (!arg || nr_args != 1)
822 break;
823 ret = io_register_resize_rings(ctx, arg);
824 break;
825 case IORING_REGISTER_MEM_REGION:
826 ret = -EINVAL;
827 if (!arg || nr_args != 1)
828 break;
829 ret = io_register_mem_region(ctx, arg);
830 break;
831 default:
832 ret = -EINVAL;
833 break;
834 }
835
836 return ret;
837}
838
839/*
840 * Given an 'fd' value, return the ctx associated with if. If 'registered' is
841 * true, then the registered index is used. Otherwise, the normal fd table.
842 * Caller must call fput() on the returned file, unless it's an ERR_PTR.
843 */
844struct file *io_uring_register_get_file(unsigned int fd, bool registered)
845{
846 struct file *file;
847
848 if (registered) {
849 /*
850 * Ring fd has been registered via IORING_REGISTER_RING_FDS, we
851 * need only dereference our task private array to find it.
852 */
853 struct io_uring_task *tctx = current->io_uring;
854
855 if (unlikely(!tctx || fd >= IO_RINGFD_REG_MAX))
856 return ERR_PTR(-EINVAL);
857 fd = array_index_nospec(fd, IO_RINGFD_REG_MAX);
858 file = tctx->registered_rings[fd];
859 } else {
860 file = fget(fd);
861 }
862
863 if (unlikely(!file))
864 return ERR_PTR(-EBADF);
865 if (io_is_uring_fops(file))
866 return file;
867 fput(file);
868 return ERR_PTR(-EOPNOTSUPP);
869}
870
871/*
872 * "blind" registration opcodes are ones where there's no ring given, and
873 * hence the source fd must be -1.
874 */
875static int io_uring_register_blind(unsigned int opcode, void __user *arg,
876 unsigned int nr_args)
877{
878 switch (opcode) {
879 case IORING_REGISTER_SEND_MSG_RING: {
880 struct io_uring_sqe sqe;
881
882 if (!arg || nr_args != 1)
883 return -EINVAL;
884 if (copy_from_user(&sqe, arg, sizeof(sqe)))
885 return -EFAULT;
886 /* no flags supported */
887 if (sqe.flags)
888 return -EINVAL;
889 if (sqe.opcode == IORING_OP_MSG_RING)
890 return io_uring_sync_msg_ring(&sqe);
891 }
892 }
893
894 return -EINVAL;
895}
896
897SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode,
898 void __user *, arg, unsigned int, nr_args)
899{
900 struct io_ring_ctx *ctx;
901 long ret = -EBADF;
902 struct file *file;
903 bool use_registered_ring;
904
905 use_registered_ring = !!(opcode & IORING_REGISTER_USE_REGISTERED_RING);
906 opcode &= ~IORING_REGISTER_USE_REGISTERED_RING;
907
908 if (opcode >= IORING_REGISTER_LAST)
909 return -EINVAL;
910
911 if (fd == -1)
912 return io_uring_register_blind(opcode, arg, nr_args);
913
914 file = io_uring_register_get_file(fd, use_registered_ring);
915 if (IS_ERR(file))
916 return PTR_ERR(file);
917 ctx = file->private_data;
918
919 mutex_lock(&ctx->uring_lock);
920 ret = __io_uring_register(ctx, opcode, arg, nr_args);
921
922 trace_io_uring_register(ctx, opcode, ctx->file_table.data.nr,
923 ctx->buf_table.nr, ret);
924 mutex_unlock(&ctx->uring_lock);
925 if (!use_registered_ring)
926 fput(file);
927 return ret;
928}