]> Git Repo - linux.git/blame - io_uring/napi.c
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rmk/linux
[linux.git] / io_uring / napi.c
CommitLineData
8d0c12a8
SR
1// SPDX-License-Identifier: GPL-2.0
2
3#include "io_uring.h"
4#include "napi.h"
5
6#ifdef CONFIG_NET_RX_BUSY_POLL
7
8/* Timeout for cleanout of stale entries. */
9#define NAPI_TIMEOUT (60 * SEC_CONVERSION)
10
11struct io_napi_entry {
12 unsigned int napi_id;
13 struct list_head list;
14
15 unsigned long timeout;
16 struct hlist_node node;
17
18 struct rcu_head rcu;
19};
20
21static struct io_napi_entry *io_napi_hash_find(struct hlist_head *hash_list,
22 unsigned int napi_id)
23{
24 struct io_napi_entry *e;
25
26 hlist_for_each_entry_rcu(e, hash_list, node) {
27 if (e->napi_id != napi_id)
28 continue;
8d0c12a8
SR
29 return e;
30 }
31
32 return NULL;
33}
34
342b2e39
PB
35static inline ktime_t net_to_ktime(unsigned long t)
36{
37 /* napi approximating usecs, reverse busy_loop_current_time */
38 return ns_to_ktime(t << 10);
39}
40
8d0c12a8
SR
41void __io_napi_add(struct io_ring_ctx *ctx, struct socket *sock)
42{
43 struct hlist_head *hash_list;
44 unsigned int napi_id;
45 struct sock *sk;
46 struct io_napi_entry *e;
47
48 sk = sock->sk;
49 if (!sk)
50 return;
51
52 napi_id = READ_ONCE(sk->sk_napi_id);
53
54 /* Non-NAPI IDs can be rejected. */
55 if (napi_id < MIN_NAPI_ID)
56 return;
57
58 hash_list = &ctx->napi_ht[hash_min(napi_id, HASH_BITS(ctx->napi_ht))];
59
60 rcu_read_lock();
61 e = io_napi_hash_find(hash_list, napi_id);
62 if (e) {
63 e->timeout = jiffies + NAPI_TIMEOUT;
64 rcu_read_unlock();
65 return;
66 }
67 rcu_read_unlock();
68
69 e = kmalloc(sizeof(*e), GFP_NOWAIT);
70 if (!e)
71 return;
72
73 e->napi_id = napi_id;
74 e->timeout = jiffies + NAPI_TIMEOUT;
75
76 spin_lock(&ctx->napi_lock);
77 if (unlikely(io_napi_hash_find(hash_list, napi_id))) {
78 spin_unlock(&ctx->napi_lock);
79 kfree(e);
80 return;
81 }
82
83 hlist_add_tail_rcu(&e->node, hash_list);
84 list_add_tail(&e->list, &ctx->napi_list);
85 spin_unlock(&ctx->napi_lock);
86}
87
88static void __io_napi_remove_stale(struct io_ring_ctx *ctx)
89{
90 struct io_napi_entry *e;
91 unsigned int i;
92
93 spin_lock(&ctx->napi_lock);
94 hash_for_each(ctx->napi_ht, i, e, node) {
95 if (time_after(jiffies, e->timeout)) {
96 list_del(&e->list);
97 hash_del_rcu(&e->node);
98 kfree_rcu(e, rcu);
99 }
100 }
101 spin_unlock(&ctx->napi_lock);
102}
103
104static inline void io_napi_remove_stale(struct io_ring_ctx *ctx, bool is_stale)
105{
106 if (is_stale)
107 __io_napi_remove_stale(ctx);
108}
109
342b2e39
PB
110static inline bool io_napi_busy_loop_timeout(ktime_t start_time,
111 ktime_t bp)
8d0c12a8 112{
342b2e39
PB
113 if (bp) {
114 ktime_t end_time = ktime_add(start_time, bp);
115 ktime_t now = net_to_ktime(busy_loop_current_time());
8d0c12a8 116
342b2e39 117 return ktime_after(now, end_time);
8d0c12a8
SR
118 }
119
120 return true;
121}
122
123static bool io_napi_busy_loop_should_end(void *data,
124 unsigned long start_time)
125{
126 struct io_wait_queue *iowq = data;
127
128 if (signal_pending(current))
129 return true;
428f1382 130 if (io_should_wake(iowq) || io_has_work(iowq->ctx))
8d0c12a8 131 return true;
342b2e39
PB
132 if (io_napi_busy_loop_timeout(net_to_ktime(start_time),
133 iowq->napi_busy_poll_dt))
8d0c12a8
SR
134 return true;
135
136 return false;
137}
138
139static bool __io_napi_do_busy_loop(struct io_ring_ctx *ctx,
140 void *loop_end_arg)
141{
142 struct io_napi_entry *e;
143 bool (*loop_end)(void *, unsigned long) = NULL;
144 bool is_stale = false;
145
146 if (loop_end_arg)
147 loop_end = io_napi_busy_loop_should_end;
148
149 list_for_each_entry_rcu(e, &ctx->napi_list, list) {
150 napi_busy_loop_rcu(e->napi_id, loop_end, loop_end_arg,
151 ctx->napi_prefer_busy_poll, BUSY_POLL_BUDGET);
152
153 if (time_after(jiffies, e->timeout))
154 is_stale = true;
155 }
156
157 return is_stale;
158}
159
160static void io_napi_blocking_busy_loop(struct io_ring_ctx *ctx,
161 struct io_wait_queue *iowq)
162{
163 unsigned long start_time = busy_loop_current_time();
164 void *loop_end_arg = NULL;
165 bool is_stale = false;
166
167 /* Singular lists use a different napi loop end check function and are
168 * only executed once.
169 */
170 if (list_is_singular(&ctx->napi_list))
171 loop_end_arg = iowq;
172
173 rcu_read_lock();
174 do {
175 is_stale = __io_napi_do_busy_loop(ctx, loop_end_arg);
176 } while (!io_napi_busy_loop_should_end(iowq, start_time) && !loop_end_arg);
177 rcu_read_unlock();
178
179 io_napi_remove_stale(ctx, is_stale);
180}
181
182/*
183 * io_napi_init() - Init napi settings
184 * @ctx: pointer to io-uring context structure
185 *
186 * Init napi settings in the io-uring context.
187 */
188void io_napi_init(struct io_ring_ctx *ctx)
189{
342b2e39
PB
190 u64 sys_dt = READ_ONCE(sysctl_net_busy_poll) * NSEC_PER_USEC;
191
8d0c12a8
SR
192 INIT_LIST_HEAD(&ctx->napi_list);
193 spin_lock_init(&ctx->napi_lock);
194 ctx->napi_prefer_busy_poll = false;
342b2e39 195 ctx->napi_busy_poll_dt = ns_to_ktime(sys_dt);
8d0c12a8
SR
196}
197
198/*
199 * io_napi_free() - Deallocate napi
200 * @ctx: pointer to io-uring context structure
201 *
202 * Free the napi list and the hash table in the io-uring context.
203 */
204void io_napi_free(struct io_ring_ctx *ctx)
205{
206 struct io_napi_entry *e;
8d0c12a8
SR
207 unsigned int i;
208
209 spin_lock(&ctx->napi_lock);
210 hash_for_each(ctx->napi_ht, i, e, node) {
211 hash_del_rcu(&e->node);
212 kfree_rcu(e, rcu);
213 }
214 spin_unlock(&ctx->napi_lock);
215}
216
ef1186c1
SR
217/*
218 * io_napi_register() - Register napi with io-uring
219 * @ctx: pointer to io-uring context structure
220 * @arg: pointer to io_uring_napi structure
221 *
222 * Register napi in the io-uring context.
223 */
224int io_register_napi(struct io_ring_ctx *ctx, void __user *arg)
225{
226 const struct io_uring_napi curr = {
342b2e39 227 .busy_poll_to = ktime_to_us(ctx->napi_busy_poll_dt),
ef1186c1
SR
228 .prefer_busy_poll = ctx->napi_prefer_busy_poll
229 };
230 struct io_uring_napi napi;
231
bd44d7e9
PB
232 if (ctx->flags & IORING_SETUP_IOPOLL)
233 return -EINVAL;
ef1186c1
SR
234 if (copy_from_user(&napi, arg, sizeof(napi)))
235 return -EFAULT;
236 if (napi.pad[0] || napi.pad[1] || napi.pad[2] || napi.resv)
237 return -EINVAL;
238
ef1186c1
SR
239 if (copy_to_user(arg, &curr, sizeof(curr)))
240 return -EFAULT;
241
342b2e39 242 WRITE_ONCE(ctx->napi_busy_poll_dt, napi.busy_poll_to * NSEC_PER_USEC);
b4ccc4dd
JA
243 WRITE_ONCE(ctx->napi_prefer_busy_poll, !!napi.prefer_busy_poll);
244 WRITE_ONCE(ctx->napi_enabled, true);
ef1186c1
SR
245 return 0;
246}
247
248/*
249 * io_napi_unregister() - Unregister napi with io-uring
250 * @ctx: pointer to io-uring context structure
251 * @arg: pointer to io_uring_napi structure
252 *
253 * Unregister napi. If arg has been specified copy the busy poll timeout and
254 * prefer busy poll setting to the passed in structure.
255 */
256int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg)
257{
258 const struct io_uring_napi curr = {
342b2e39 259 .busy_poll_to = ktime_to_us(ctx->napi_busy_poll_dt),
ef1186c1
SR
260 .prefer_busy_poll = ctx->napi_prefer_busy_poll
261 };
262
263 if (arg && copy_to_user(arg, &curr, sizeof(curr)))
264 return -EFAULT;
265
342b2e39 266 WRITE_ONCE(ctx->napi_busy_poll_dt, 0);
ef1186c1 267 WRITE_ONCE(ctx->napi_prefer_busy_poll, false);
b4ccc4dd 268 WRITE_ONCE(ctx->napi_enabled, false);
ef1186c1
SR
269 return 0;
270}
271
8d0c12a8 272/*
415ce0ea 273 * __io_napi_adjust_timeout() - adjust busy loop timeout
8d0c12a8
SR
274 * @ctx: pointer to io-uring context structure
275 * @iowq: pointer to io wait queue
276 * @ts: pointer to timespec or NULL
277 *
278 * Adjust the busy loop timeout according to timespec and busy poll timeout.
415ce0ea
JA
279 * If the specified NAPI timeout is bigger than the wait timeout, then adjust
280 * the NAPI timeout accordingly.
8d0c12a8
SR
281 */
282void __io_napi_adjust_timeout(struct io_ring_ctx *ctx, struct io_wait_queue *iowq,
35816961 283 ktime_t to_wait)
8d0c12a8 284{
342b2e39 285 ktime_t poll_dt = READ_ONCE(ctx->napi_busy_poll_dt);
8d0c12a8 286
35816961
PB
287 if (to_wait)
288 poll_dt = min(poll_dt, to_wait);
8d0c12a8 289
342b2e39 290 iowq->napi_busy_poll_dt = poll_dt;
8d0c12a8
SR
291}
292
293/*
294 * __io_napi_busy_loop() - execute busy poll loop
295 * @ctx: pointer to io-uring context structure
296 * @iowq: pointer to io wait queue
297 *
298 * Execute the busy poll loop and merge the spliced off list.
299 */
300void __io_napi_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq)
301{
302 iowq->napi_prefer_busy_poll = READ_ONCE(ctx->napi_prefer_busy_poll);
303
84f2eecf 304 if (!(ctx->flags & IORING_SETUP_SQPOLL))
8d0c12a8
SR
305 io_napi_blocking_busy_loop(ctx, iowq);
306}
307
ff183d42
SR
308/*
309 * io_napi_sqpoll_busy_poll() - busy poll loop for sqpoll
310 * @ctx: pointer to io-uring context structure
311 *
312 * Splice of the napi list and execute the napi busy poll loop.
313 */
314int io_napi_sqpoll_busy_poll(struct io_ring_ctx *ctx)
315{
ff183d42
SR
316 bool is_stale = false;
317
342b2e39 318 if (!READ_ONCE(ctx->napi_busy_poll_dt))
ff183d42
SR
319 return 0;
320 if (list_empty_careful(&ctx->napi_list))
321 return 0;
322
323 rcu_read_lock();
324 is_stale = __io_napi_do_busy_loop(ctx, NULL);
325 rcu_read_unlock();
326
327 io_napi_remove_stale(ctx, is_stale);
328 return 1;
329}
330
8d0c12a8 331#endif
This page took 0.173574 seconds and 4 git commands to generate.