1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/kernel.h>
3 #include <linux/errno.h>
4 #include <linux/file.h>
5 #include <linux/io_uring.h>
7 #include <trace/events/io_uring.h>
9 #include <uapi/linux/io_uring.h>
21 struct list_head list;
22 /* head of the link, used by linked timeouts only */
23 struct io_kiocb *head;
24 /* for linked completions */
25 struct io_kiocb *prev;
28 struct io_timeout_rem {
38 static inline bool io_is_timeout_noseq(struct io_kiocb *req)
40 struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout);
41 struct io_timeout_data *data = req->async_data;
43 return !timeout->off || data->flags & IORING_TIMEOUT_MULTISHOT;
46 static inline void io_put_req(struct io_kiocb *req)
48 if (req_ref_put_and_test(req)) {
54 static inline bool io_timeout_finish(struct io_timeout *timeout,
55 struct io_timeout_data *data)
57 if (!(data->flags & IORING_TIMEOUT_MULTISHOT))
60 if (!timeout->off || (timeout->repeats && --timeout->repeats))
66 static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer);
68 static void io_timeout_complete(struct io_kiocb *req, struct io_tw_state *ts)
70 struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout);
71 struct io_timeout_data *data = req->async_data;
72 struct io_ring_ctx *ctx = req->ctx;
74 if (!io_timeout_finish(timeout, data)) {
75 if (io_req_post_cqe(req, -ETIME, IORING_CQE_F_MORE)) {
77 raw_spin_lock_irq(&ctx->timeout_lock);
78 list_add(&timeout->list, ctx->timeout_list.prev);
79 hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), data->mode);
80 raw_spin_unlock_irq(&ctx->timeout_lock);
85 io_req_task_complete(req, ts);
88 static __cold bool io_flush_killed_timeouts(struct list_head *list, int err)
93 while (!list_empty(list)) {
94 struct io_timeout *timeout;
97 timeout = list_first_entry(list, struct io_timeout, list);
98 list_del_init(&timeout->list);
99 req = cmd_to_io_kiocb(timeout);
102 io_req_queue_tw_complete(req, err);
108 static void io_kill_timeout(struct io_kiocb *req, struct list_head *list)
109 __must_hold(&req->ctx->timeout_lock)
111 struct io_timeout_data *io = req->async_data;
113 if (hrtimer_try_to_cancel(&io->timer) != -1) {
114 struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout);
116 atomic_set(&req->ctx->cq_timeouts,
117 atomic_read(&req->ctx->cq_timeouts) + 1);
118 list_move_tail(&timeout->list, list);
122 __cold void io_flush_timeouts(struct io_ring_ctx *ctx)
124 struct io_timeout *timeout, *tmp;
128 raw_spin_lock_irq(&ctx->timeout_lock);
129 seq = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts);
131 list_for_each_entry_safe(timeout, tmp, &ctx->timeout_list, list) {
132 struct io_kiocb *req = cmd_to_io_kiocb(timeout);
133 u32 events_needed, events_got;
135 if (io_is_timeout_noseq(req))
139 * Since seq can easily wrap around over time, subtract
140 * the last seq at which timeouts were flushed before comparing.
141 * Assuming not more than 2^31-1 events have happened since,
142 * these subtractions won't have wrapped, so we can check if
143 * target is in [last_seq, current_seq] by comparing the two.
145 events_needed = timeout->target_seq - ctx->cq_last_tm_flush;
146 events_got = seq - ctx->cq_last_tm_flush;
147 if (events_got < events_needed)
150 io_kill_timeout(req, &list);
152 ctx->cq_last_tm_flush = seq;
153 raw_spin_unlock_irq(&ctx->timeout_lock);
154 io_flush_killed_timeouts(&list, 0);
157 static void io_req_tw_fail_links(struct io_kiocb *link, struct io_tw_state *ts)
159 io_tw_lock(link->ctx, ts);
161 struct io_kiocb *nxt = link->link;
162 long res = -ECANCELED;
164 if (link->flags & REQ_F_FAIL)
167 io_req_set_res(link, res, 0);
168 io_req_task_complete(link, ts);
173 static void io_fail_links(struct io_kiocb *req)
174 __must_hold(&req->ctx->completion_lock)
176 struct io_kiocb *link = req->link;
177 bool ignore_cqes = req->flags & REQ_F_SKIP_LINK_CQES;
184 link->flags |= REQ_F_CQE_SKIP;
186 link->flags &= ~REQ_F_CQE_SKIP;
187 trace_io_uring_fail_link(req, link);
192 link->io_task_work.func = io_req_tw_fail_links;
193 io_req_task_work_add(link);
197 static inline void io_remove_next_linked(struct io_kiocb *req)
199 struct io_kiocb *nxt = req->link;
201 req->link = nxt->link;
205 void io_disarm_next(struct io_kiocb *req)
206 __must_hold(&req->ctx->completion_lock)
208 struct io_kiocb *link = NULL;
210 if (req->flags & REQ_F_ARM_LTIMEOUT) {
212 req->flags &= ~REQ_F_ARM_LTIMEOUT;
213 if (link && link->opcode == IORING_OP_LINK_TIMEOUT) {
214 io_remove_next_linked(req);
215 io_req_queue_tw_complete(link, -ECANCELED);
217 } else if (req->flags & REQ_F_LINK_TIMEOUT) {
218 struct io_ring_ctx *ctx = req->ctx;
220 raw_spin_lock_irq(&ctx->timeout_lock);
221 link = io_disarm_linked_timeout(req);
222 raw_spin_unlock_irq(&ctx->timeout_lock);
224 io_req_queue_tw_complete(link, -ECANCELED);
226 if (unlikely((req->flags & REQ_F_FAIL) &&
227 !(req->flags & REQ_F_HARDLINK)))
231 struct io_kiocb *__io_disarm_linked_timeout(struct io_kiocb *req,
232 struct io_kiocb *link)
233 __must_hold(&req->ctx->completion_lock)
234 __must_hold(&req->ctx->timeout_lock)
236 struct io_timeout_data *io = link->async_data;
237 struct io_timeout *timeout = io_kiocb_to_cmd(link, struct io_timeout);
239 io_remove_next_linked(req);
240 timeout->head = NULL;
241 if (hrtimer_try_to_cancel(&io->timer) != -1) {
242 list_del(&timeout->list);
249 static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer)
251 struct io_timeout_data *data = container_of(timer,
252 struct io_timeout_data, timer);
253 struct io_kiocb *req = data->req;
254 struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout);
255 struct io_ring_ctx *ctx = req->ctx;
258 raw_spin_lock_irqsave(&ctx->timeout_lock, flags);
259 list_del_init(&timeout->list);
260 atomic_set(&req->ctx->cq_timeouts,
261 atomic_read(&req->ctx->cq_timeouts) + 1);
262 raw_spin_unlock_irqrestore(&ctx->timeout_lock, flags);
264 if (!(data->flags & IORING_TIMEOUT_ETIME_SUCCESS))
267 io_req_set_res(req, -ETIME, 0);
268 req->io_task_work.func = io_timeout_complete;
269 io_req_task_work_add(req);
270 return HRTIMER_NORESTART;
273 static struct io_kiocb *io_timeout_extract(struct io_ring_ctx *ctx,
274 struct io_cancel_data *cd)
275 __must_hold(&ctx->timeout_lock)
277 struct io_timeout *timeout;
278 struct io_timeout_data *io;
279 struct io_kiocb *req = NULL;
281 list_for_each_entry(timeout, &ctx->timeout_list, list) {
282 struct io_kiocb *tmp = cmd_to_io_kiocb(timeout);
284 if (io_cancel_req_match(tmp, cd)) {
290 return ERR_PTR(-ENOENT);
292 io = req->async_data;
293 if (hrtimer_try_to_cancel(&io->timer) == -1)
294 return ERR_PTR(-EALREADY);
295 timeout = io_kiocb_to_cmd(req, struct io_timeout);
296 list_del_init(&timeout->list);
300 int io_timeout_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd)
301 __must_hold(&ctx->completion_lock)
303 struct io_kiocb *req;
305 raw_spin_lock_irq(&ctx->timeout_lock);
306 req = io_timeout_extract(ctx, cd);
307 raw_spin_unlock_irq(&ctx->timeout_lock);
311 io_req_task_queue_fail(req, -ECANCELED);
315 static void io_req_task_link_timeout(struct io_kiocb *req, struct io_tw_state *ts)
317 struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout);
318 struct io_kiocb *prev = timeout->prev;
322 if (!io_should_terminate_tw()) {
323 struct io_cancel_data cd = {
325 .data = prev->cqe.user_data,
328 ret = io_try_cancel(req->tctx, &cd, 0);
332 io_req_set_res(req, ret ?: -ETIME, 0);
333 io_req_task_complete(req, ts);
336 io_req_set_res(req, -ETIME, 0);
337 io_req_task_complete(req, ts);
341 static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
343 struct io_timeout_data *data = container_of(timer,
344 struct io_timeout_data, timer);
345 struct io_kiocb *prev, *req = data->req;
346 struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout);
347 struct io_ring_ctx *ctx = req->ctx;
350 raw_spin_lock_irqsave(&ctx->timeout_lock, flags);
351 prev = timeout->head;
352 timeout->head = NULL;
355 * We don't expect the list to be empty, that will only happen if we
356 * race with the completion of the linked work.
359 io_remove_next_linked(prev);
360 if (!req_ref_inc_not_zero(prev))
363 list_del(&timeout->list);
364 timeout->prev = prev;
365 raw_spin_unlock_irqrestore(&ctx->timeout_lock, flags);
367 req->io_task_work.func = io_req_task_link_timeout;
368 io_req_task_work_add(req);
369 return HRTIMER_NORESTART;
372 static clockid_t io_timeout_get_clock(struct io_timeout_data *data)
374 switch (data->flags & IORING_TIMEOUT_CLOCK_MASK) {
375 case IORING_TIMEOUT_BOOTTIME:
376 return CLOCK_BOOTTIME;
377 case IORING_TIMEOUT_REALTIME:
378 return CLOCK_REALTIME;
380 /* can't happen, vetted at prep time */
384 return CLOCK_MONOTONIC;
388 static int io_linked_timeout_update(struct io_ring_ctx *ctx, __u64 user_data,
389 struct timespec64 *ts, enum hrtimer_mode mode)
390 __must_hold(&ctx->timeout_lock)
392 struct io_timeout_data *io;
393 struct io_timeout *timeout;
394 struct io_kiocb *req = NULL;
396 list_for_each_entry(timeout, &ctx->ltimeout_list, list) {
397 struct io_kiocb *tmp = cmd_to_io_kiocb(timeout);
399 if (user_data == tmp->cqe.user_data) {
407 io = req->async_data;
408 if (hrtimer_try_to_cancel(&io->timer) == -1)
410 hrtimer_init(&io->timer, io_timeout_get_clock(io), mode);
411 io->timer.function = io_link_timeout_fn;
412 hrtimer_start(&io->timer, timespec64_to_ktime(*ts), mode);
416 static int io_timeout_update(struct io_ring_ctx *ctx, __u64 user_data,
417 struct timespec64 *ts, enum hrtimer_mode mode)
418 __must_hold(&ctx->timeout_lock)
420 struct io_cancel_data cd = { .ctx = ctx, .data = user_data, };
421 struct io_kiocb *req = io_timeout_extract(ctx, &cd);
422 struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout);
423 struct io_timeout_data *data;
428 timeout->off = 0; /* noseq */
429 data = req->async_data;
430 list_add_tail(&timeout->list, &ctx->timeout_list);
431 hrtimer_init(&data->timer, io_timeout_get_clock(data), mode);
432 data->timer.function = io_timeout_fn;
433 hrtimer_start(&data->timer, timespec64_to_ktime(*ts), mode);
437 int io_timeout_remove_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
439 struct io_timeout_rem *tr = io_kiocb_to_cmd(req, struct io_timeout_rem);
441 if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
443 if (sqe->buf_index || sqe->len || sqe->splice_fd_in)
446 tr->ltimeout = false;
447 tr->addr = READ_ONCE(sqe->addr);
448 tr->flags = READ_ONCE(sqe->timeout_flags);
449 if (tr->flags & IORING_TIMEOUT_UPDATE_MASK) {
450 if (hweight32(tr->flags & IORING_TIMEOUT_CLOCK_MASK) > 1)
452 if (tr->flags & IORING_LINK_TIMEOUT_UPDATE)
454 if (tr->flags & ~(IORING_TIMEOUT_UPDATE_MASK|IORING_TIMEOUT_ABS))
456 if (get_timespec64(&tr->ts, u64_to_user_ptr(sqe->addr2)))
458 if (tr->ts.tv_sec < 0 || tr->ts.tv_nsec < 0)
460 } else if (tr->flags) {
461 /* timeout removal doesn't support flags */
468 static inline enum hrtimer_mode io_translate_timeout_mode(unsigned int flags)
470 return (flags & IORING_TIMEOUT_ABS) ? HRTIMER_MODE_ABS
475 * Remove or update an existing timeout command
477 int io_timeout_remove(struct io_kiocb *req, unsigned int issue_flags)
479 struct io_timeout_rem *tr = io_kiocb_to_cmd(req, struct io_timeout_rem);
480 struct io_ring_ctx *ctx = req->ctx;
483 if (!(tr->flags & IORING_TIMEOUT_UPDATE)) {
484 struct io_cancel_data cd = { .ctx = ctx, .data = tr->addr, };
486 spin_lock(&ctx->completion_lock);
487 ret = io_timeout_cancel(ctx, &cd);
488 spin_unlock(&ctx->completion_lock);
490 enum hrtimer_mode mode = io_translate_timeout_mode(tr->flags);
492 raw_spin_lock_irq(&ctx->timeout_lock);
494 ret = io_linked_timeout_update(ctx, tr->addr, &tr->ts, mode);
496 ret = io_timeout_update(ctx, tr->addr, &tr->ts, mode);
497 raw_spin_unlock_irq(&ctx->timeout_lock);
502 io_req_set_res(req, ret, 0);
506 static int __io_timeout_prep(struct io_kiocb *req,
507 const struct io_uring_sqe *sqe,
508 bool is_timeout_link)
510 struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout);
511 struct io_timeout_data *data;
513 u32 off = READ_ONCE(sqe->off);
515 if (sqe->buf_index || sqe->len != 1 || sqe->splice_fd_in)
517 if (off && is_timeout_link)
519 flags = READ_ONCE(sqe->timeout_flags);
520 if (flags & ~(IORING_TIMEOUT_ABS | IORING_TIMEOUT_CLOCK_MASK |
521 IORING_TIMEOUT_ETIME_SUCCESS |
522 IORING_TIMEOUT_MULTISHOT))
524 /* more than one clock specified is invalid, obviously */
525 if (hweight32(flags & IORING_TIMEOUT_CLOCK_MASK) > 1)
527 /* multishot requests only make sense with rel values */
528 if (!(~flags & (IORING_TIMEOUT_MULTISHOT | IORING_TIMEOUT_ABS)))
531 INIT_LIST_HEAD(&timeout->list);
533 if (unlikely(off && !req->ctx->off_timeout_used))
534 req->ctx->off_timeout_used = true;
536 * for multishot reqs w/ fixed nr of repeats, repeats tracks the
539 timeout->repeats = 0;
540 if ((flags & IORING_TIMEOUT_MULTISHOT) && off > 0)
541 timeout->repeats = off;
543 if (WARN_ON_ONCE(req_has_async_data(req)))
545 if (io_alloc_async_data(req))
548 data = req->async_data;
552 if (get_timespec64(&data->ts, u64_to_user_ptr(sqe->addr)))
555 if (data->ts.tv_sec < 0 || data->ts.tv_nsec < 0)
558 data->mode = io_translate_timeout_mode(flags);
559 hrtimer_init(&data->timer, io_timeout_get_clock(data), data->mode);
561 if (is_timeout_link) {
562 struct io_submit_link *link = &req->ctx->submit_state.link;
566 if (link->last->opcode == IORING_OP_LINK_TIMEOUT)
568 timeout->head = link->last;
569 link->last->flags |= REQ_F_ARM_LTIMEOUT;
574 int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
576 return __io_timeout_prep(req, sqe, false);
579 int io_link_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
581 return __io_timeout_prep(req, sqe, true);
584 int io_timeout(struct io_kiocb *req, unsigned int issue_flags)
586 struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout);
587 struct io_ring_ctx *ctx = req->ctx;
588 struct io_timeout_data *data = req->async_data;
589 struct list_head *entry;
590 u32 tail, off = timeout->off;
592 raw_spin_lock_irq(&ctx->timeout_lock);
595 * sqe->off holds how many events that need to occur for this
596 * timeout event to be satisfied. If it isn't set, then this is
597 * a pure timeout request, sequence isn't used.
599 if (io_is_timeout_noseq(req)) {
600 entry = ctx->timeout_list.prev;
604 tail = data_race(ctx->cached_cq_tail) - atomic_read(&ctx->cq_timeouts);
605 timeout->target_seq = tail + off;
607 /* Update the last seq here in case io_flush_timeouts() hasn't.
608 * This is safe because ->completion_lock is held, and submissions
609 * and completions are never mixed in the same ->completion_lock section.
611 ctx->cq_last_tm_flush = tail;
614 * Insertion sort, ensuring the first entry in the list is always
615 * the one we need first.
617 list_for_each_prev(entry, &ctx->timeout_list) {
618 struct io_timeout *nextt = list_entry(entry, struct io_timeout, list);
619 struct io_kiocb *nxt = cmd_to_io_kiocb(nextt);
621 if (io_is_timeout_noseq(nxt))
623 /* nxt.seq is behind @tail, otherwise would've been completed */
624 if (off >= nextt->target_seq - tail)
628 list_add(&timeout->list, entry);
629 data->timer.function = io_timeout_fn;
630 hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), data->mode);
631 raw_spin_unlock_irq(&ctx->timeout_lock);
632 return IOU_ISSUE_SKIP_COMPLETE;
635 void io_queue_linked_timeout(struct io_kiocb *req)
637 struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout);
638 struct io_ring_ctx *ctx = req->ctx;
640 raw_spin_lock_irq(&ctx->timeout_lock);
642 * If the back reference is NULL, then our linked request finished
643 * before we got a chance to setup the timer
646 struct io_timeout_data *data = req->async_data;
648 data->timer.function = io_link_timeout_fn;
649 hrtimer_start(&data->timer, timespec64_to_ktime(data->ts),
651 list_add_tail(&timeout->list, &ctx->ltimeout_list);
653 raw_spin_unlock_irq(&ctx->timeout_lock);
654 /* drop submission reference */
658 static bool io_match_task(struct io_kiocb *head, struct io_uring_task *tctx,
660 __must_hold(&head->ctx->timeout_lock)
662 struct io_kiocb *req;
664 if (tctx && head->tctx != tctx)
669 io_for_each_link(req, head) {
670 if (req->flags & REQ_F_INFLIGHT)
676 /* Returns true if we found and killed one or more timeouts */
677 __cold bool io_kill_timeouts(struct io_ring_ctx *ctx, struct io_uring_task *tctx,
680 struct io_timeout *timeout, *tmp;
684 * completion_lock is needed for io_match_task(). Take it before
685 * timeout_lockfirst to keep locking ordering.
687 spin_lock(&ctx->completion_lock);
688 raw_spin_lock_irq(&ctx->timeout_lock);
689 list_for_each_entry_safe(timeout, tmp, &ctx->timeout_list, list) {
690 struct io_kiocb *req = cmd_to_io_kiocb(timeout);
692 if (io_match_task(req, tctx, cancel_all))
693 io_kill_timeout(req, &list);
695 raw_spin_unlock_irq(&ctx->timeout_lock);
696 spin_unlock(&ctx->completion_lock);
698 return io_flush_killed_timeouts(&list, -ECANCELED);