1 // SPDX-License-Identifier: GPL-2.0
6 #ifdef CONFIG_NET_RX_BUSY_POLL
8 /* Timeout for cleanout of stale entries. */
9 #define NAPI_TIMEOUT (60 * SEC_CONVERSION)
11 struct io_napi_entry {
13 struct list_head list;
15 unsigned long timeout;
16 struct hlist_node node;
21 static struct io_napi_entry *io_napi_hash_find(struct hlist_head *hash_list,
24 struct io_napi_entry *e;
26 hlist_for_each_entry_rcu(e, hash_list, node) {
27 if (e->napi_id != napi_id)
29 e->timeout = jiffies + NAPI_TIMEOUT;
36 void __io_napi_add(struct io_ring_ctx *ctx, struct socket *sock)
38 struct hlist_head *hash_list;
41 struct io_napi_entry *e;
47 napi_id = READ_ONCE(sk->sk_napi_id);
49 /* Non-NAPI IDs can be rejected. */
50 if (napi_id < MIN_NAPI_ID)
53 hash_list = &ctx->napi_ht[hash_min(napi_id, HASH_BITS(ctx->napi_ht))];
56 e = io_napi_hash_find(hash_list, napi_id);
58 e->timeout = jiffies + NAPI_TIMEOUT;
64 e = kmalloc(sizeof(*e), GFP_NOWAIT);
69 e->timeout = jiffies + NAPI_TIMEOUT;
71 spin_lock(&ctx->napi_lock);
72 if (unlikely(io_napi_hash_find(hash_list, napi_id))) {
73 spin_unlock(&ctx->napi_lock);
78 hlist_add_tail_rcu(&e->node, hash_list);
79 list_add_tail(&e->list, &ctx->napi_list);
80 spin_unlock(&ctx->napi_lock);
83 static void __io_napi_remove_stale(struct io_ring_ctx *ctx)
85 struct io_napi_entry *e;
88 spin_lock(&ctx->napi_lock);
89 hash_for_each(ctx->napi_ht, i, e, node) {
90 if (time_after(jiffies, e->timeout)) {
92 hash_del_rcu(&e->node);
96 spin_unlock(&ctx->napi_lock);
99 static inline void io_napi_remove_stale(struct io_ring_ctx *ctx, bool is_stale)
102 __io_napi_remove_stale(ctx);
105 static inline bool io_napi_busy_loop_timeout(unsigned long start_time,
106 unsigned long bp_usec)
109 unsigned long end_time = start_time + bp_usec;
110 unsigned long now = busy_loop_current_time();
112 return time_after(now, end_time);
118 static bool io_napi_busy_loop_should_end(void *data,
119 unsigned long start_time)
121 struct io_wait_queue *iowq = data;
123 if (signal_pending(current))
125 if (io_should_wake(iowq) || io_has_work(iowq->ctx))
127 if (io_napi_busy_loop_timeout(start_time, iowq->napi_busy_poll_to))
133 static bool __io_napi_do_busy_loop(struct io_ring_ctx *ctx,
136 struct io_napi_entry *e;
137 bool (*loop_end)(void *, unsigned long) = NULL;
138 bool is_stale = false;
141 loop_end = io_napi_busy_loop_should_end;
143 list_for_each_entry_rcu(e, &ctx->napi_list, list) {
144 napi_busy_loop_rcu(e->napi_id, loop_end, loop_end_arg,
145 ctx->napi_prefer_busy_poll, BUSY_POLL_BUDGET);
147 if (time_after(jiffies, e->timeout))
154 static void io_napi_blocking_busy_loop(struct io_ring_ctx *ctx,
155 struct io_wait_queue *iowq)
157 unsigned long start_time = busy_loop_current_time();
158 void *loop_end_arg = NULL;
159 bool is_stale = false;
161 /* Singular lists use a different napi loop end check function and are
162 * only executed once.
164 if (list_is_singular(&ctx->napi_list))
169 is_stale = __io_napi_do_busy_loop(ctx, loop_end_arg);
170 } while (!io_napi_busy_loop_should_end(iowq, start_time) && !loop_end_arg);
173 io_napi_remove_stale(ctx, is_stale);
177 * io_napi_init() - Init napi settings
178 * @ctx: pointer to io-uring context structure
180 * Init napi settings in the io-uring context.
182 void io_napi_init(struct io_ring_ctx *ctx)
184 INIT_LIST_HEAD(&ctx->napi_list);
185 spin_lock_init(&ctx->napi_lock);
186 ctx->napi_prefer_busy_poll = false;
187 ctx->napi_busy_poll_to = READ_ONCE(sysctl_net_busy_poll);
191 * io_napi_free() - Deallocate napi
192 * @ctx: pointer to io-uring context structure
194 * Free the napi list and the hash table in the io-uring context.
196 void io_napi_free(struct io_ring_ctx *ctx)
198 struct io_napi_entry *e;
199 LIST_HEAD(napi_list);
202 spin_lock(&ctx->napi_lock);
203 hash_for_each(ctx->napi_ht, i, e, node) {
204 hash_del_rcu(&e->node);
207 spin_unlock(&ctx->napi_lock);
211 * io_napi_register() - Register napi with io-uring
212 * @ctx: pointer to io-uring context structure
213 * @arg: pointer to io_uring_napi structure
215 * Register napi in the io-uring context.
217 int io_register_napi(struct io_ring_ctx *ctx, void __user *arg)
219 const struct io_uring_napi curr = {
220 .busy_poll_to = ctx->napi_busy_poll_to,
221 .prefer_busy_poll = ctx->napi_prefer_busy_poll
223 struct io_uring_napi napi;
225 if (copy_from_user(&napi, arg, sizeof(napi)))
227 if (napi.pad[0] || napi.pad[1] || napi.pad[2] || napi.resv)
230 if (copy_to_user(arg, &curr, sizeof(curr)))
233 WRITE_ONCE(ctx->napi_busy_poll_to, napi.busy_poll_to);
234 WRITE_ONCE(ctx->napi_prefer_busy_poll, !!napi.prefer_busy_poll);
235 WRITE_ONCE(ctx->napi_enabled, true);
240 * io_napi_unregister() - Unregister napi with io-uring
241 * @ctx: pointer to io-uring context structure
242 * @arg: pointer to io_uring_napi structure
244 * Unregister napi. If arg has been specified copy the busy poll timeout and
245 * prefer busy poll setting to the passed in structure.
247 int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg)
249 const struct io_uring_napi curr = {
250 .busy_poll_to = ctx->napi_busy_poll_to,
251 .prefer_busy_poll = ctx->napi_prefer_busy_poll
254 if (arg && copy_to_user(arg, &curr, sizeof(curr)))
257 WRITE_ONCE(ctx->napi_busy_poll_to, 0);
258 WRITE_ONCE(ctx->napi_prefer_busy_poll, false);
259 WRITE_ONCE(ctx->napi_enabled, false);
264 * __io_napi_adjust_timeout() - adjust busy loop timeout
265 * @ctx: pointer to io-uring context structure
266 * @iowq: pointer to io wait queue
267 * @ts: pointer to timespec or NULL
269 * Adjust the busy loop timeout according to timespec and busy poll timeout.
270 * If the specified NAPI timeout is bigger than the wait timeout, then adjust
271 * the NAPI timeout accordingly.
273 void __io_napi_adjust_timeout(struct io_ring_ctx *ctx, struct io_wait_queue *iowq,
274 struct timespec64 *ts)
276 unsigned int poll_to = READ_ONCE(ctx->napi_busy_poll_to);
279 struct timespec64 poll_to_ts;
281 poll_to_ts = ns_to_timespec64(1000 * (s64)poll_to);
282 if (timespec64_compare(ts, &poll_to_ts) < 0) {
283 s64 poll_to_ns = timespec64_to_ns(ts);
284 if (poll_to_ns > 0) {
285 u64 val = poll_to_ns + 999;
292 iowq->napi_busy_poll_to = poll_to;
296 * __io_napi_busy_loop() - execute busy poll loop
297 * @ctx: pointer to io-uring context structure
298 * @iowq: pointer to io wait queue
300 * Execute the busy poll loop and merge the spliced off list.
302 void __io_napi_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq)
304 iowq->napi_prefer_busy_poll = READ_ONCE(ctx->napi_prefer_busy_poll);
306 if (!(ctx->flags & IORING_SETUP_SQPOLL) && ctx->napi_enabled)
307 io_napi_blocking_busy_loop(ctx, iowq);
311 * io_napi_sqpoll_busy_poll() - busy poll loop for sqpoll
312 * @ctx: pointer to io-uring context structure
314 * Splice of the napi list and execute the napi busy poll loop.
316 int io_napi_sqpoll_busy_poll(struct io_ring_ctx *ctx)
318 LIST_HEAD(napi_list);
319 bool is_stale = false;
321 if (!READ_ONCE(ctx->napi_busy_poll_to))
323 if (list_empty_careful(&ctx->napi_list))
327 is_stale = __io_napi_do_busy_loop(ctx, NULL);
330 io_napi_remove_stale(ctx, is_stale);