]> Git Repo - qemu.git/blame - util/aio-posix.c
aio-posix: completely stop polling when disabled
[qemu.git] / util / aio-posix.c
CommitLineData
a76bab49
AL
1/*
2 * QEMU aio implementation
3 *
4 * Copyright IBM, Corp. 2008
5 *
6 * Authors:
7 * Anthony Liguori <[email protected]>
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
11 *
6b620ca3
PB
12 * Contributions after 2012-01-13 are licensed under the terms of the
13 * GNU GPL, version 2 or (at your option) any later version.
a76bab49
AL
14 */
15
d38ea87a 16#include "qemu/osdep.h"
737e150e 17#include "block/block.h"
f25c0b54 18#include "qemu/rcu.h"
2bbf11d7 19#include "qemu/rcu_queue.h"
1de7afc9 20#include "qemu/sockets.h"
4a1cba38 21#include "qemu/cutils.h"
c2b38b27 22#include "trace.h"
147dfab7 23#ifdef CONFIG_EPOLL_CREATE1
fbe3fc5c
FZ
24#include <sys/epoll.h>
25#endif
a76bab49 26
a76bab49
AL
27struct AioHandler
28{
cd9ba1eb 29 GPollFD pfd;
a76bab49
AL
30 IOHandler *io_read;
31 IOHandler *io_write;
4a1cba38 32 AioPollFn *io_poll;
684e508c
SH
33 IOHandler *io_poll_begin;
34 IOHandler *io_poll_end;
a76bab49 35 void *opaque;
dca21ef2 36 bool is_external;
72cf2d4f 37 QLIST_ENTRY(AioHandler) node;
7391d34c 38 QLIST_ENTRY(AioHandler) node_ready; /* only used during aio_poll() */
4749079c 39 QLIST_ENTRY(AioHandler) node_deleted;
a76bab49
AL
40};
41
7391d34c
SH
42/* Add a handler to a ready list */
43static void add_ready_handler(AioHandlerList *ready_list,
44 AioHandler *node,
45 int revents)
46{
47 QLIST_SAFE_REMOVE(node, node_ready); /* remove from nested parent's list */
48 node->pfd.revents = revents;
49 QLIST_INSERT_HEAD(ready_list, node, node_ready);
50}
51
147dfab7 52#ifdef CONFIG_EPOLL_CREATE1
fbe3fc5c 53
82dfee5a 54/* The fd number threshold to switch to epoll */
fbe3fc5c
FZ
55#define EPOLL_ENABLE_THRESHOLD 64
56
57static void aio_epoll_disable(AioContext *ctx)
58{
cd0a6d2b
JW
59 ctx->epoll_enabled = false;
60 if (!ctx->epoll_available) {
fbe3fc5c
FZ
61 return;
62 }
cd0a6d2b 63 ctx->epoll_available = false;
fbe3fc5c
FZ
64 close(ctx->epollfd);
65}
66
67static inline int epoll_events_from_pfd(int pfd_events)
68{
69 return (pfd_events & G_IO_IN ? EPOLLIN : 0) |
70 (pfd_events & G_IO_OUT ? EPOLLOUT : 0) |
71 (pfd_events & G_IO_HUP ? EPOLLHUP : 0) |
72 (pfd_events & G_IO_ERR ? EPOLLERR : 0);
73}
74
75static bool aio_epoll_try_enable(AioContext *ctx)
76{
77 AioHandler *node;
78 struct epoll_event event;
79
2bbf11d7 80 QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
fbe3fc5c 81 int r;
4749079c 82 if (QLIST_IS_INSERTED(node, node_deleted) || !node->pfd.events) {
fbe3fc5c
FZ
83 continue;
84 }
85 event.events = epoll_events_from_pfd(node->pfd.events);
86 event.data.ptr = node;
87 r = epoll_ctl(ctx->epollfd, EPOLL_CTL_ADD, node->pfd.fd, &event);
88 if (r) {
89 return false;
90 }
91 }
92 ctx->epoll_enabled = true;
93 return true;
94}
95
96static void aio_epoll_update(AioContext *ctx, AioHandler *node, bool is_new)
97{
98 struct epoll_event event;
99 int r;
35dd66e2 100 int ctl;
fbe3fc5c
FZ
101
102 if (!ctx->epoll_enabled) {
103 return;
104 }
105 if (!node->pfd.events) {
35dd66e2 106 ctl = EPOLL_CTL_DEL;
fbe3fc5c
FZ
107 } else {
108 event.data.ptr = node;
109 event.events = epoll_events_from_pfd(node->pfd.events);
35dd66e2
PB
110 ctl = is_new ? EPOLL_CTL_ADD : EPOLL_CTL_MOD;
111 }
112
113 r = epoll_ctl(ctx->epollfd, ctl, node->pfd.fd, &event);
114 if (r) {
115 aio_epoll_disable(ctx);
fbe3fc5c
FZ
116 }
117}
118
7391d34c
SH
119static int aio_epoll(AioContext *ctx, AioHandlerList *ready_list,
120 int64_t timeout)
fbe3fc5c 121{
ff29ed3a
SH
122 GPollFD pfd = {
123 .fd = ctx->epollfd,
124 .events = G_IO_IN | G_IO_OUT | G_IO_HUP | G_IO_ERR,
125 };
fbe3fc5c
FZ
126 AioHandler *node;
127 int i, ret = 0;
128 struct epoll_event events[128];
129
fbe3fc5c 130 if (timeout > 0) {
ff29ed3a 131 ret = qemu_poll_ns(&pfd, 1, timeout);
ca8c6b22
SH
132 if (ret > 0) {
133 timeout = 0;
134 }
fbe3fc5c
FZ
135 }
136 if (timeout <= 0 || ret > 0) {
137 ret = epoll_wait(ctx->epollfd, events,
8f801baf 138 ARRAY_SIZE(events),
fbe3fc5c
FZ
139 timeout);
140 if (ret <= 0) {
141 goto out;
142 }
143 for (i = 0; i < ret; i++) {
144 int ev = events[i].events;
7391d34c
SH
145 int revents = (ev & EPOLLIN ? G_IO_IN : 0) |
146 (ev & EPOLLOUT ? G_IO_OUT : 0) |
147 (ev & EPOLLHUP ? G_IO_HUP : 0) |
148 (ev & EPOLLERR ? G_IO_ERR : 0);
149
fbe3fc5c 150 node = events[i].data.ptr;
7391d34c 151 add_ready_handler(ready_list, node, revents);
fbe3fc5c
FZ
152 }
153 }
154out:
155 return ret;
156}
157
158static bool aio_epoll_enabled(AioContext *ctx)
159{
160 /* Fall back to ppoll when external clients are disabled. */
161 return !aio_external_disabled(ctx) && ctx->epoll_enabled;
162}
163
164static bool aio_epoll_check_poll(AioContext *ctx, GPollFD *pfds,
165 unsigned npfd, int64_t timeout)
166{
167 if (!ctx->epoll_available) {
168 return false;
169 }
170 if (aio_epoll_enabled(ctx)) {
171 return true;
172 }
173 if (npfd >= EPOLL_ENABLE_THRESHOLD) {
174 if (aio_epoll_try_enable(ctx)) {
175 return true;
176 } else {
177 aio_epoll_disable(ctx);
178 }
179 }
180 return false;
181}
182
183#else
184
185static void aio_epoll_update(AioContext *ctx, AioHandler *node, bool is_new)
186{
187}
188
7391d34c
SH
189static int aio_epoll(AioContext *ctx, AioHandlerList *ready_list,
190 int64_t timeout)
fbe3fc5c
FZ
191{
192 assert(false);
193}
194
195static bool aio_epoll_enabled(AioContext *ctx)
196{
197 return false;
198}
199
200static bool aio_epoll_check_poll(AioContext *ctx, GPollFD *pfds,
201 unsigned npfd, int64_t timeout)
202{
203 return false;
204}
205
206#endif
207
a915f4bc 208static AioHandler *find_aio_handler(AioContext *ctx, int fd)
a76bab49
AL
209{
210 AioHandler *node;
211
a915f4bc 212 QLIST_FOREACH(node, &ctx->aio_handlers, node) {
4749079c
SH
213 if (node->pfd.fd == fd) {
214 if (!QLIST_IS_INSERTED(node, node_deleted)) {
79d5ca56 215 return node;
4749079c
SH
216 }
217 }
a76bab49
AL
218 }
219
220 return NULL;
221}
222
fef16601
RN
223static bool aio_remove_fd_handler(AioContext *ctx, AioHandler *node)
224{
225 /* If the GSource is in the process of being destroyed then
226 * g_source_remove_poll() causes an assertion failure. Skip
227 * removal in that case, because glib cleans up its state during
228 * destruction anyway.
229 */
230 if (!g_source_is_destroyed(&ctx->source)) {
231 g_source_remove_poll(&ctx->source, &node->pfd);
232 }
233
234 /* If a read is in progress, just mark the node as deleted */
235 if (qemu_lockcnt_count(&ctx->list_lock)) {
4749079c 236 QLIST_INSERT_HEAD_RCU(&ctx->deleted_aio_handlers, node, node_deleted);
fef16601
RN
237 node->pfd.revents = 0;
238 return false;
239 }
240 /* Otherwise, delete it for real. We can't just mark it as
241 * deleted because deleted nodes are only cleaned up while
242 * no one is walking the handlers list.
243 */
244 QLIST_REMOVE(node, node);
245 return true;
246}
247
a915f4bc
PB
248void aio_set_fd_handler(AioContext *ctx,
249 int fd,
dca21ef2 250 bool is_external,
a915f4bc
PB
251 IOHandler *io_read,
252 IOHandler *io_write,
f6a51c84 253 AioPollFn *io_poll,
a915f4bc 254 void *opaque)
a76bab49
AL
255{
256 AioHandler *node;
fef16601 257 AioHandler *new_node = NULL;
fbe3fc5c 258 bool is_new = false;
0ed39f3d 259 bool deleted = false;
d7be5dd1 260 int poll_disable_change;
a76bab49 261
2bbf11d7
PB
262 qemu_lockcnt_lock(&ctx->list_lock);
263
a915f4bc 264 node = find_aio_handler(ctx, fd);
a76bab49
AL
265
266 /* Are we deleting the fd handler? */
4a1cba38 267 if (!io_read && !io_write && !io_poll) {
36173ec5 268 if (node == NULL) {
2bbf11d7 269 qemu_lockcnt_unlock(&ctx->list_lock);
36173ec5
PB
270 return;
271 }
8821b34a
RN
272 /* Clean events in order to unregister fd from the ctx epoll. */
273 node->pfd.events = 0;
274
d7be5dd1 275 poll_disable_change = -!node->io_poll;
a76bab49 276 } else {
d7be5dd1 277 poll_disable_change = !io_poll - (node && !node->io_poll);
a76bab49 278 if (node == NULL) {
fbe3fc5c 279 is_new = true;
a76bab49 280 }
fef16601
RN
281 /* Alloc and insert if it's not already there */
282 new_node = g_new0(AioHandler, 1);
4a1cba38 283
a76bab49 284 /* Update handler with latest information */
fef16601
RN
285 new_node->io_read = io_read;
286 new_node->io_write = io_write;
287 new_node->io_poll = io_poll;
288 new_node->opaque = opaque;
289 new_node->is_external = is_external;
290
291 if (is_new) {
292 new_node->pfd.fd = fd;
293 } else {
294 new_node->pfd = node->pfd;
295 }
296 g_source_add_poll(&ctx->source, &new_node->pfd);
297
298 new_node->pfd.events = (io_read ? G_IO_IN | G_IO_HUP | G_IO_ERR : 0);
299 new_node->pfd.events |= (io_write ? G_IO_OUT | G_IO_ERR : 0);
300
301 QLIST_INSERT_HEAD_RCU(&ctx->aio_handlers, new_node, node);
302 }
303 if (node) {
304 deleted = aio_remove_fd_handler(ctx, node);
a76bab49 305 }
7ed2b24c 306
d7be5dd1
PB
307 /* No need to order poll_disable_cnt writes against other updates;
308 * the counter is only used to avoid wasting time and latency on
309 * iterated polling when the system call will be ultimately necessary.
310 * Changing handlers is a rare event, and a little wasted polling until
311 * the aio_notify below is not an issue.
312 */
313 atomic_set(&ctx->poll_disable_cnt,
314 atomic_read(&ctx->poll_disable_cnt) + poll_disable_change);
315
fef16601
RN
316 if (new_node) {
317 aio_epoll_update(ctx, new_node, is_new);
318 } else if (node) {
319 /* Unregister deleted fd_handler */
320 aio_epoll_update(ctx, node, false);
321 }
2bbf11d7 322 qemu_lockcnt_unlock(&ctx->list_lock);
7ed2b24c 323 aio_notify(ctx);
4a1cba38 324
0ed39f3d
FZ
325 if (deleted) {
326 g_free(node);
327 }
9958c351
PB
328}
329
684e508c
SH
330void aio_set_fd_poll(AioContext *ctx, int fd,
331 IOHandler *io_poll_begin,
332 IOHandler *io_poll_end)
333{
334 AioHandler *node = find_aio_handler(ctx, fd);
335
336 if (!node) {
337 return;
338 }
339
340 node->io_poll_begin = io_poll_begin;
341 node->io_poll_end = io_poll_end;
342}
343
a915f4bc
PB
344void aio_set_event_notifier(AioContext *ctx,
345 EventNotifier *notifier,
dca21ef2 346 bool is_external,
f6a51c84
SH
347 EventNotifierHandler *io_read,
348 AioPollFn *io_poll)
a76bab49 349{
f6a51c84
SH
350 aio_set_fd_handler(ctx, event_notifier_get_fd(notifier), is_external,
351 (IOHandler *)io_read, NULL, io_poll, notifier);
a76bab49
AL
352}
353
684e508c
SH
354void aio_set_event_notifier_poll(AioContext *ctx,
355 EventNotifier *notifier,
356 EventNotifierHandler *io_poll_begin,
357 EventNotifierHandler *io_poll_end)
358{
359 aio_set_fd_poll(ctx, event_notifier_get_fd(notifier),
360 (IOHandler *)io_poll_begin,
361 (IOHandler *)io_poll_end);
362}
363
e4346192 364static bool poll_set_started(AioContext *ctx, bool started)
684e508c
SH
365{
366 AioHandler *node;
e4346192 367 bool progress = false;
684e508c
SH
368
369 if (started == ctx->poll_started) {
e4346192 370 return false;
684e508c
SH
371 }
372
373 ctx->poll_started = started;
374
2bbf11d7
PB
375 qemu_lockcnt_inc(&ctx->list_lock);
376 QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
684e508c
SH
377 IOHandler *fn;
378
4749079c 379 if (QLIST_IS_INSERTED(node, node_deleted)) {
684e508c
SH
380 continue;
381 }
382
383 if (started) {
384 fn = node->io_poll_begin;
385 } else {
386 fn = node->io_poll_end;
387 }
388
389 if (fn) {
390 fn(node->opaque);
391 }
e4346192
SH
392
393 /* Poll one last time in case ->io_poll_end() raced with the event */
394 if (!started) {
395 progress = node->io_poll(node->opaque) || progress;
396 }
684e508c 397 }
2bbf11d7 398 qemu_lockcnt_dec(&ctx->list_lock);
e4346192
SH
399
400 return progress;
684e508c
SH
401}
402
403
a3462c65
PB
404bool aio_prepare(AioContext *ctx)
405{
684e508c
SH
406 /* Poll mode cannot be used with glib's event loop, disable it. */
407 poll_set_started(ctx, false);
408
a3462c65
PB
409 return false;
410}
411
cd9ba1eb
PB
412bool aio_pending(AioContext *ctx)
413{
414 AioHandler *node;
2bbf11d7 415 bool result = false;
cd9ba1eb 416
2bbf11d7
PB
417 /*
418 * We have to walk very carefully in case aio_set_fd_handler is
419 * called while we're walking.
420 */
421 qemu_lockcnt_inc(&ctx->list_lock);
422
423 QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
cd9ba1eb
PB
424 int revents;
425
cd9ba1eb 426 revents = node->pfd.revents & node->pfd.events;
37989ced
FZ
427 if (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR) && node->io_read &&
428 aio_node_check(ctx, node->is_external)) {
2bbf11d7
PB
429 result = true;
430 break;
cd9ba1eb 431 }
37989ced
FZ
432 if (revents & (G_IO_OUT | G_IO_ERR) && node->io_write &&
433 aio_node_check(ctx, node->is_external)) {
2bbf11d7
PB
434 result = true;
435 break;
cd9ba1eb
PB
436 }
437 }
2bbf11d7 438 qemu_lockcnt_dec(&ctx->list_lock);
cd9ba1eb 439
2bbf11d7 440 return result;
cd9ba1eb
PB
441}
442
4749079c
SH
443static void aio_free_deleted_handlers(AioContext *ctx)
444{
445 AioHandler *node;
446
447 if (QLIST_EMPTY_RCU(&ctx->deleted_aio_handlers)) {
448 return;
449 }
450 if (!qemu_lockcnt_dec_if_lock(&ctx->list_lock)) {
451 return; /* we are nested, let the parent do the freeing */
452 }
453
454 while ((node = QLIST_FIRST_RCU(&ctx->deleted_aio_handlers))) {
455 QLIST_REMOVE(node, node);
456 QLIST_REMOVE(node, node_deleted);
457 g_free(node);
458 }
459
460 qemu_lockcnt_inc_and_unlock(&ctx->list_lock);
461}
462
7391d34c 463static bool aio_dispatch_handler(AioContext *ctx, AioHandler *node)
a76bab49 464{
d0c8d2c0 465 bool progress = false;
7391d34c 466 int revents;
7c0628b2 467
7391d34c
SH
468 revents = node->pfd.revents & node->pfd.events;
469 node->pfd.revents = 0;
cd9ba1eb 470
7391d34c
SH
471 if (!QLIST_IS_INSERTED(node, node_deleted) &&
472 (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR)) &&
473 aio_node_check(ctx, node->is_external) &&
474 node->io_read) {
475 node->io_read(node->opaque);
cd9ba1eb 476
7391d34c
SH
477 /* aio_notify() does not count as progress */
478 if (node->opaque != &ctx->notifier) {
cd9ba1eb
PB
479 progress = true;
480 }
cd9ba1eb 481 }
7391d34c
SH
482 if (!QLIST_IS_INSERTED(node, node_deleted) &&
483 (revents & (G_IO_OUT | G_IO_ERR)) &&
484 aio_node_check(ctx, node->is_external) &&
485 node->io_write) {
486 node->io_write(node->opaque);
487 progress = true;
488 }
489
490 return progress;
491}
492
493/*
494 * If we have a list of ready handlers then this is more efficient than
495 * scanning all handlers with aio_dispatch_handlers().
496 */
497static bool aio_dispatch_ready_handlers(AioContext *ctx,
498 AioHandlerList *ready_list)
499{
500 bool progress = false;
501 AioHandler *node;
502
503 while ((node = QLIST_FIRST(ready_list))) {
c39cbedb 504 QLIST_REMOVE(node, node_ready);
7391d34c
SH
505 progress = aio_dispatch_handler(ctx, node) || progress;
506 }
507
508 return progress;
509}
510
511/* Slower than aio_dispatch_ready_handlers() but only used via glib */
512static bool aio_dispatch_handlers(AioContext *ctx)
513{
514 AioHandler *node, *tmp;
515 bool progress = false;
516
517 QLIST_FOREACH_SAFE_RCU(node, &ctx->aio_handlers, node, tmp) {
518 progress = aio_dispatch_handler(ctx, node) || progress;
519 }
438e1f47 520
56d2c3c6
PB
521 return progress;
522}
523
a153bf52 524void aio_dispatch(AioContext *ctx)
56d2c3c6 525{
a153bf52 526 qemu_lockcnt_inc(&ctx->list_lock);
bd451435 527 aio_bh_poll(ctx);
a153bf52 528 aio_dispatch_handlers(ctx);
4749079c 529 aio_free_deleted_handlers(ctx);
a153bf52 530 qemu_lockcnt_dec(&ctx->list_lock);
438e1f47 531
a153bf52 532 timerlistgroup_run_timers(&ctx->tlg);
d0c8d2c0
SH
533}
534
e98ab097
PB
535/* These thread-local variables are used only in a small part of aio_poll
536 * around the call to the poll() system call. In particular they are not
537 * used while aio_poll is performing callbacks, which makes it much easier
538 * to think about reentrancy!
539 *
540 * Stack-allocated arrays would be perfect but they have size limitations;
541 * heap allocation is expensive enough that we want to reuse arrays across
542 * calls to aio_poll(). And because poll() has to be called without holding
543 * any lock, the arrays cannot be stored in AioContext. Thread-local data
544 * has none of the disadvantages of these three options.
545 */
546static __thread GPollFD *pollfds;
547static __thread AioHandler **nodes;
548static __thread unsigned npfd, nalloc;
549static __thread Notifier pollfds_cleanup_notifier;
550
551static void pollfds_cleanup(Notifier *n, void *unused)
552{
553 g_assert(npfd == 0);
554 g_free(pollfds);
555 g_free(nodes);
556 nalloc = 0;
557}
558
559static void add_pollfd(AioHandler *node)
560{
561 if (npfd == nalloc) {
562 if (nalloc == 0) {
563 pollfds_cleanup_notifier.notify = pollfds_cleanup;
564 qemu_thread_atexit_add(&pollfds_cleanup_notifier);
565 nalloc = 8;
566 } else {
567 g_assert(nalloc <= INT_MAX);
568 nalloc *= 2;
569 }
570 pollfds = g_renew(GPollFD, pollfds, nalloc);
571 nodes = g_renew(AioHandler *, nodes, nalloc);
572 }
573 nodes[npfd] = node;
574 pollfds[npfd] = (GPollFD) {
575 .fd = node->pfd.fd,
576 .events = node->pfd.events,
577 };
578 npfd++;
579}
580
e30cffa0 581static bool run_poll_handlers_once(AioContext *ctx, int64_t *timeout)
684e508c
SH
582{
583 bool progress = false;
584 AioHandler *node;
585
f25c0b54
SH
586 /*
587 * Optimization: ->io_poll() handlers often contain RCU read critical
588 * sections and we therefore see many rcu_read_lock() -> rcu_read_unlock()
589 * -> rcu_read_lock() -> ... sequences with expensive memory
590 * synchronization primitives. Make the entire polling loop an RCU
591 * critical section because nested rcu_read_lock()/rcu_read_unlock() calls
592 * are cheap.
593 */
594 RCU_READ_LOCK_GUARD();
595
2bbf11d7 596 QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
4749079c 597 if (!QLIST_IS_INSERTED(node, node_deleted) && node->io_poll &&
59c9f437 598 aio_node_check(ctx, node->is_external) &&
cfeb35d6 599 node->io_poll(node->opaque)) {
993ed89f
PB
600 /*
601 * Polling was successful, exit try_poll_mode immediately
602 * to adjust the next polling time.
603 */
e30cffa0 604 *timeout = 0;
cfeb35d6
PB
605 if (node->opaque != &ctx->notifier) {
606 progress = true;
607 }
684e508c
SH
608 }
609
610 /* Caller handles freeing deleted nodes. Don't do it here. */
611 }
612
613 return progress;
614}
615
4a1cba38
SH
616/* run_poll_handlers:
617 * @ctx: the AioContext
618 * @max_ns: maximum time to poll for, in nanoseconds
619 *
620 * Polls for a given time.
621 *
622 * Note that ctx->notify_me must be non-zero so this function can detect
623 * aio_notify().
624 *
2bbf11d7 625 * Note that the caller must have incremented ctx->list_lock.
4a1cba38
SH
626 *
627 * Returns: true if progress was made, false otherwise
628 */
e30cffa0 629static bool run_poll_handlers(AioContext *ctx, int64_t max_ns, int64_t *timeout)
4a1cba38 630{
684e508c 631 bool progress;
e30cffa0 632 int64_t start_time, elapsed_time;
4a1cba38
SH
633
634 assert(ctx->notify_me);
2bbf11d7 635 assert(qemu_lockcnt_count(&ctx->list_lock) > 0);
4a1cba38 636
e30cffa0 637 trace_run_poll_handlers_begin(ctx, max_ns, *timeout);
4a1cba38 638
e30cffa0 639 start_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
4a1cba38 640 do {
e30cffa0
PB
641 progress = run_poll_handlers_once(ctx, timeout);
642 elapsed_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start_time;
993ed89f
PB
643 max_ns = qemu_soonest_timeout(*timeout, max_ns);
644 assert(!(max_ns && progress));
645 } while (elapsed_time < max_ns && !atomic_read(&ctx->poll_disable_cnt));
4a1cba38 646
e30cffa0
PB
647 /* If time has passed with no successful polling, adjust *timeout to
648 * keep the same ending time.
649 */
650 if (*timeout != -1) {
651 *timeout -= MIN(*timeout, elapsed_time);
652 }
4a1cba38 653
e30cffa0 654 trace_run_poll_handlers_end(ctx, progress, *timeout);
4a1cba38
SH
655 return progress;
656}
657
658/* try_poll_mode:
659 * @ctx: the AioContext
e30cffa0
PB
660 * @timeout: timeout for blocking wait, computed by the caller and updated if
661 * polling succeeds.
4a1cba38 662 *
684e508c 663 * ctx->notify_me must be non-zero so this function can detect aio_notify().
4a1cba38 664 *
2bbf11d7 665 * Note that the caller must have incremented ctx->list_lock.
4a1cba38
SH
666 *
667 * Returns: true if progress was made, false otherwise
668 */
e30cffa0 669static bool try_poll_mode(AioContext *ctx, int64_t *timeout)
4a1cba38 670{
993ed89f 671 int64_t max_ns = qemu_soonest_timeout(*timeout, ctx->poll_ns);
4a1cba38 672
e30cffa0
PB
673 if (max_ns && !atomic_read(&ctx->poll_disable_cnt)) {
674 poll_set_started(ctx, true);
684e508c 675
e30cffa0
PB
676 if (run_poll_handlers(ctx, max_ns, timeout)) {
677 return true;
4a1cba38
SH
678 }
679 }
680
e4346192
SH
681 if (poll_set_started(ctx, false)) {
682 *timeout = 0;
683 return true;
684 }
684e508c 685
e4346192 686 return false;
4a1cba38
SH
687}
688
d0c8d2c0
SH
689bool aio_poll(AioContext *ctx, bool blocking)
690{
7391d34c 691 AioHandlerList ready_list = QLIST_HEAD_INITIALIZER(ready_list);
d0c8d2c0 692 AioHandler *node;
4a1cba38
SH
693 int i;
694 int ret = 0;
164a101f 695 bool progress;
e98ab097 696 int64_t timeout;
82a41186 697 int64_t start = 0;
d0c8d2c0 698
0dc165c1
KW
699 assert(in_aio_context_home_thread(ctx));
700
0ceb849b
PB
701 /* aio_notify can avoid the expensive event_notifier_set if
702 * everything (file descriptors, bottom halves, timers) will
e4c7e2d1
PB
703 * be re-evaluated before the next blocking poll(). This is
704 * already true when aio_poll is called with blocking == false;
eabc9779
PB
705 * if blocking == true, it is only true after poll() returns,
706 * so disable the optimization now.
0ceb849b 707 */
eabc9779
PB
708 if (blocking) {
709 atomic_add(&ctx->notify_me, 2);
710 }
0ceb849b 711
2bbf11d7 712 qemu_lockcnt_inc(&ctx->list_lock);
a76bab49 713
82a41186
SH
714 if (ctx->poll_max_ns) {
715 start = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
716 }
717
e30cffa0
PB
718 timeout = blocking ? aio_compute_timeout(ctx) : 0;
719 progress = try_poll_mode(ctx, &timeout);
720 assert(!(timeout && progress));
721
722 /* If polling is allowed, non-blocking aio_poll does not need the
723 * system call---a single round of run_poll_handlers_once suffices.
724 */
725 if (timeout || atomic_read(&ctx->poll_disable_cnt)) {
4a1cba38 726 assert(npfd == 0);
a76bab49 727
4a1cba38 728 /* fill pollfds */
6b942468 729
4a1cba38 730 if (!aio_epoll_enabled(ctx)) {
2bbf11d7 731 QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) {
4749079c 732 if (!QLIST_IS_INSERTED(node, node_deleted) && node->pfd.events
4a1cba38
SH
733 && aio_node_check(ctx, node->is_external)) {
734 add_pollfd(node);
735 }
6b942468 736 }
9eb0bfca 737 }
a76bab49 738
4a1cba38 739 /* wait until next event */
4a1cba38 740 if (aio_epoll_check_poll(ctx, pollfds, npfd, timeout)) {
ff29ed3a 741 npfd = 0; /* pollfds[] is not being used */
7391d34c 742 ret = aio_epoll(ctx, &ready_list, timeout);
4a1cba38
SH
743 } else {
744 ret = qemu_poll_ns(pollfds, npfd, timeout);
745 }
fbe3fc5c 746 }
4a1cba38 747
eabc9779
PB
748 if (blocking) {
749 atomic_sub(&ctx->notify_me, 2);
b37548fc 750 aio_notify_accept(ctx);
eabc9779 751 }
9eb0bfca 752
82a41186
SH
753 /* Adjust polling time */
754 if (ctx->poll_max_ns) {
755 int64_t block_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start;
756
757 if (block_ns <= ctx->poll_ns) {
758 /* This is the sweet spot, no adjustment needed */
759 } else if (block_ns > ctx->poll_max_ns) {
760 /* We'd have to poll for too long, poll less */
761 int64_t old = ctx->poll_ns;
762
763 if (ctx->poll_shrink) {
764 ctx->poll_ns /= ctx->poll_shrink;
765 } else {
766 ctx->poll_ns = 0;
767 }
768
769 trace_poll_shrink(ctx, old, ctx->poll_ns);
770 } else if (ctx->poll_ns < ctx->poll_max_ns &&
771 block_ns < ctx->poll_max_ns) {
772 /* There is room to grow, poll longer */
773 int64_t old = ctx->poll_ns;
774 int64_t grow = ctx->poll_grow;
775
776 if (grow == 0) {
777 grow = 2;
778 }
779
780 if (ctx->poll_ns) {
781 ctx->poll_ns *= grow;
782 } else {
783 ctx->poll_ns = 4000; /* start polling at 4 microseconds */
784 }
785
786 if (ctx->poll_ns > ctx->poll_max_ns) {
787 ctx->poll_ns = ctx->poll_max_ns;
788 }
789
790 trace_poll_grow(ctx, old, ctx->poll_ns);
791 }
792 }
793
9eb0bfca
PB
794 /* if we have any readable fds, dispatch event */
795 if (ret > 0) {
e98ab097 796 for (i = 0; i < npfd; i++) {
7391d34c
SH
797 int revents = pollfds[i].revents;
798
799 if (revents) {
800 add_ready_handler(&ready_list, nodes[i], revents);
801 }
a76bab49 802 }
438e1f47
AB
803 }
804
e98ab097 805 npfd = 0;
e98ab097 806
a153bf52
PB
807 progress |= aio_bh_poll(ctx);
808
809 if (ret > 0) {
7391d34c 810 progress |= aio_dispatch_ready_handlers(ctx, &ready_list);
9eb0bfca 811 }
bcdc1857 812
4749079c
SH
813 aio_free_deleted_handlers(ctx);
814
bd451435
PB
815 qemu_lockcnt_dec(&ctx->list_lock);
816
a153bf52
PB
817 progress |= timerlistgroup_run_timers(&ctx->tlg);
818
164a101f 819 return progress;
a76bab49 820}
37fcee5d 821
7e003465 822void aio_context_setup(AioContext *ctx)
37fcee5d 823{
147dfab7 824#ifdef CONFIG_EPOLL_CREATE1
fbe3fc5c
FZ
825 assert(!ctx->epollfd);
826 ctx->epollfd = epoll_create1(EPOLL_CLOEXEC);
827 if (ctx->epollfd == -1) {
7e003465 828 fprintf(stderr, "Failed to create epoll instance: %s", strerror(errno));
fbe3fc5c
FZ
829 ctx->epoll_available = false;
830 } else {
831 ctx->epoll_available = true;
832 }
833#endif
37fcee5d 834}
4a1cba38 835
cd0a6d2b
JW
836void aio_context_destroy(AioContext *ctx)
837{
838#ifdef CONFIG_EPOLL_CREATE1
839 aio_epoll_disable(ctx);
840#endif
841}
842
82a41186
SH
843void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns,
844 int64_t grow, int64_t shrink, Error **errp)
4a1cba38 845{
82a41186
SH
846 /* No thread synchronization here, it doesn't matter if an incorrect value
847 * is used once.
4a1cba38
SH
848 */
849 ctx->poll_max_ns = max_ns;
82a41186
SH
850 ctx->poll_ns = 0;
851 ctx->poll_grow = grow;
852 ctx->poll_shrink = shrink;
4a1cba38
SH
853
854 aio_notify(ctx);
855}
This page took 0.684119 seconds and 4 git commands to generate.