typedef struct AioHandler AioHandler;
typedef void QEMUBHFunc(void *opaque);
+typedef bool AioPollFn(void *opaque);
typedef void IOHandler(void *opaque);
+struct Coroutine;
struct ThreadPool;
struct LinuxAioState;
struct AioContext {
GSource source;
- /* Protects all fields from multi-threaded access */
+ /* Used by AioContext users to protect from multi-threaded access. */
QemuRecMutex lock;
- /* The list of registered AIO handlers */
+ /* The list of registered AIO handlers. Protected by ctx->list_lock. */
QLIST_HEAD(, AioHandler) aio_handlers;
- /* This is a simple lock used to protect the aio_handlers list.
- * Specifically, it's used to ensure that no callbacks are removed while
- * we're walking and dispatching callbacks.
- */
- int walking_handlers;
-
/* Used to avoid unnecessary event_notifier_set calls in aio_notify;
* accessed with atomic primitives. If this field is 0, everything
* (file descriptors, bottom halves, timers) will be re-evaluated
*/
uint32_t notify_me;
- /* lock to protect between bh's adders and deleter */
- QemuMutex bh_lock;
+ /* A lock to protect between QEMUBH and AioHandler adders and deleter,
+ * and to ensure that no callbacks are removed while we're walking and
+ * dispatching them.
+ */
+ QemuLockCnt list_lock;
/* Anchor of the list of Bottom Halves belonging to the context */
struct QEMUBH *first_bh;
- /* A simple lock used to protect the first_bh list, and ensure that
- * no callbacks are removed while we're walking and dispatching callbacks.
- */
- int walking_bh;
-
/* Used by aio_notify.
*
* "notified" is used to avoid expensive event_notifier_test_and_clear
bool notified;
EventNotifier notifier;
- /* Thread pool for performing work and receiving completion callbacks */
+ QSLIST_HEAD(, Coroutine) scheduled_coroutines;
+ QEMUBH *co_schedule_bh;
+
+ /* Thread pool for performing work and receiving completion callbacks.
+ * Has its own locking.
+ */
struct ThreadPool *thread_pool;
#ifdef CONFIG_LINUX_AIO
struct LinuxAioState *linux_aio;
#endif
- /* TimerLists for calling timers - one per clock type */
+ /* TimerLists for calling timers - one per clock type. Has its own
+ * locking.
+ */
QEMUTimerListGroup tlg;
int external_disable_cnt;
+ /* Number of AioHandlers without .io_poll() */
+ int poll_disable_cnt;
+
+ /* Polling mode parameters */
+ int64_t poll_ns; /* current polling time in nanoseconds */
+ int64_t poll_max_ns; /* maximum polling time in nanoseconds */
+ int64_t poll_grow; /* polling time growth factor */
+ int64_t poll_shrink; /* polling time shrink factor */
+
+ /* Are we in polling mode or monitoring file descriptors? */
+ bool poll_started;
+
/* epoll(7) state used when built with CONFIG_EPOLL */
int epollfd;
bool epoll_enabled;
* automatically takes care of calling aio_context_acquire and
* aio_context_release.
*
- * Access to timers and BHs from a thread that has not acquired AioContext
- * is possible. Access to callbacks for now must be done while the AioContext
- * is owned by the thread (FIXME).
+ * Note that this is separate from bdrv_drained_begin/bdrv_drained_end. A
+ * thread still has to call those to avoid being interrupted by the guest.
+ *
+ * Bottom halves, timers and callbacks can be created or removed without
+ * acquiring the AioContext.
*/
void aio_context_acquire(AioContext *ctx);
/* Dispatch any pending callbacks from the GSource attached to the AioContext.
*
* This is used internally in the implementation of the GSource.
- *
- * @dispatch_fds: true to process fds, false to skip them
- * (can be used as an optimization by callers that know there
- * are no fds ready)
*/
-bool aio_dispatch(AioContext *ctx, bool dispatch_fds);
+void aio_dispatch(AioContext *ctx);
/* Progress in completing AIO work to occur. This can issue new pending
* aio as a result of executing I/O completion or bh callbacks.
bool is_external,
IOHandler *io_read,
IOHandler *io_write,
+ AioPollFn *io_poll,
void *opaque);
+/* Set polling begin/end callbacks for a file descriptor that has already been
+ * registered with aio_set_fd_handler. Do nothing if the file descriptor is
+ * not registered.
+ */
+void aio_set_fd_poll(AioContext *ctx, int fd,
+ IOHandler *io_poll_begin,
+ IOHandler *io_poll_end);
+
/* Register an event notifier and associated callbacks. Behaves very similarly
* to event_notifier_set_handler. Unlike event_notifier_set_handler, these callbacks
* will be invoked when using aio_poll().
void aio_set_event_notifier(AioContext *ctx,
EventNotifier *notifier,
bool is_external,
- EventNotifierHandler *io_read);
+ EventNotifierHandler *io_read,
+ AioPollFn *io_poll);
+
+/* Set polling begin/end callbacks for an event notifier that has already been
+ * registered with aio_set_event_notifier. Do nothing if the event notifier is
+ * not registered.
+ */
+void aio_set_event_notifier_poll(AioContext *ctx,
+ EventNotifier *notifier,
+ EventNotifierHandler *io_poll_begin,
+ EventNotifierHandler *io_poll_end);
/* Return a GSource that lets the main loop poll the file descriptors attached
* to this AioContext.
/* Return the ThreadPool bound to this AioContext */
struct ThreadPool *aio_get_thread_pool(AioContext *ctx);
+/* Setup the LinuxAioState bound to this AioContext */
+struct LinuxAioState *aio_setup_linux_aio(AioContext *ctx, Error **errp);
+
/* Return the LinuxAioState bound to this AioContext */
struct LinuxAioState *aio_get_linux_aio(AioContext *ctx);
/**
- * aio_timer_new:
+ * aio_timer_new_with_attrs:
* @ctx: the aio context
* @type: the clock type
* @scale: the scale
+ * @attributes: 0, or one to multiple OR'ed QEMU_TIMER_ATTR_<id> values
+ * to assign
* @cb: the callback to call on timer expiry
* @opaque: the opaque pointer to pass to the callback
*
- * Allocate a new timer attached to the context @ctx.
+ * Allocate a new timer (with attributes) attached to the context @ctx.
* The function is responsible for memory allocation.
*
- * The preferred interface is aio_timer_init. Use that
- * unless you really need dynamic memory allocation.
+ * The preferred interface is aio_timer_init or aio_timer_init_with_attrs.
+ * Use that unless you really need dynamic memory allocation.
+ *
+ * Returns: a pointer to the new timer
+ */
+static inline QEMUTimer *aio_timer_new_with_attrs(AioContext *ctx,
+ QEMUClockType type,
+ int scale, int attributes,
+ QEMUTimerCB *cb, void *opaque)
+{
+ return timer_new_full(&ctx->tlg, type, scale, attributes, cb, opaque);
+}
+
+/**
+ * aio_timer_new:
+ * @ctx: the aio context
+ * @type: the clock type
+ * @scale: the scale
+ * @cb: the callback to call on timer expiry
+ * @opaque: the opaque pointer to pass to the callback
+ *
+ * Allocate a new timer attached to the context @ctx.
+ * See aio_timer_new_with_attrs for details.
*
* Returns: a pointer to the new timer
*/
int scale,
QEMUTimerCB *cb, void *opaque)
{
- return timer_new_tl(ctx->tlg.tl[type], scale, cb, opaque);
+ return timer_new_full(&ctx->tlg, type, scale, 0, cb, opaque);
+}
+
+/**
+ * aio_timer_init_with_attrs:
+ * @ctx: the aio context
+ * @ts: the timer
+ * @type: the clock type
+ * @scale: the scale
+ * @attributes: 0, or one to multiple OR'ed QEMU_TIMER_ATTR_<id> values
+ * to assign
+ * @cb: the callback to call on timer expiry
+ * @opaque: the opaque pointer to pass to the callback
+ *
+ * Initialise a new timer (with attributes) attached to the context @ctx.
+ * The caller is responsible for memory allocation.
+ */
+static inline void aio_timer_init_with_attrs(AioContext *ctx,
+ QEMUTimer *ts, QEMUClockType type,
+ int scale, int attributes,
+ QEMUTimerCB *cb, void *opaque)
+{
+ timer_init_full(ts, &ctx->tlg, type, scale, attributes, cb, opaque);
}
/**
* @opaque: the opaque pointer to pass to the callback
*
* Initialise a new timer attached to the context @ctx.
- * The caller is responsible for memory allocation.
+ * See aio_timer_init_with_attrs for details.
*/
static inline void aio_timer_init(AioContext *ctx,
QEMUTimer *ts, QEMUClockType type,
int scale,
QEMUTimerCB *cb, void *opaque)
{
- timer_init_tl(ts, ctx->tlg.tl[type], scale, cb, opaque);
+ timer_init_full(ts, &ctx->tlg, type, scale, 0, cb, opaque);
}
/**
*/
static inline void aio_enable_external(AioContext *ctx)
{
- assert(ctx->external_disable_cnt > 0);
- atomic_dec(&ctx->external_disable_cnt);
+ int old;
+
+ old = atomic_fetch_dec(&ctx->external_disable_cnt);
+ assert(old > 0);
+ if (old == 1) {
+ /* Kick event loop so it re-arms file descriptors */
+ aio_notify(ctx);
+ }
}
/**
return !is_external || !atomic_read(&ctx->external_disable_cnt);
}
+/**
+ * aio_co_schedule:
+ * @ctx: the aio context
+ * @co: the coroutine
+ *
+ * Start a coroutine on a remote AioContext.
+ *
+ * The coroutine must not be entered by anyone else while aio_co_schedule()
+ * is active. In addition the coroutine must have yielded unless ctx
+ * is the context in which the coroutine is running (i.e. the value of
+ * qemu_get_current_aio_context() from the coroutine itself).
+ */
+void aio_co_schedule(AioContext *ctx, struct Coroutine *co);
+
+/**
+ * aio_co_wake:
+ * @co: the coroutine
+ *
+ * Restart a coroutine on the AioContext where it was running last, thus
+ * preventing coroutines from jumping from one context to another when they
+ * go to sleep.
+ *
+ * aio_co_wake may be executed either in coroutine or non-coroutine
+ * context. The coroutine must not be entered by anyone else while
+ * aio_co_wake() is active.
+ */
+void aio_co_wake(struct Coroutine *co);
+
+/**
+ * aio_co_enter:
+ * @ctx: the context to run the coroutine
+ * @co: the coroutine to run
+ *
+ * Enter a coroutine in the specified AioContext.
+ */
+void aio_co_enter(AioContext *ctx, struct Coroutine *co);
+
/**
* Return the AioContext whose event loop runs in the current thread.
*
AioContext *qemu_get_current_aio_context(void);
/**
+ * in_aio_context_home_thread:
* @ctx: the aio context
*
- * Return whether we are running in the I/O thread that manages @ctx.
+ * Return whether we are running in the thread that normally runs @ctx. Note
+ * that acquiring/releasing ctx does not affect the outcome, each AioContext
+ * still only has one home thread that is responsible for running it.
*/
-static inline bool aio_context_in_iothread(AioContext *ctx)
+static inline bool in_aio_context_home_thread(AioContext *ctx)
{
return ctx == qemu_get_current_aio_context();
}
*/
void aio_context_setup(AioContext *ctx);
+/**
+ * aio_context_destroy:
+ * @ctx: the aio context
+ *
+ * Destroy the aio context.
+ */
+void aio_context_destroy(AioContext *ctx);
+
+/**
+ * aio_context_set_poll_params:
+ * @ctx: the aio context
+ * @max_ns: how long to busy poll for, in nanoseconds
+ * @grow: polling time growth factor
+ * @shrink: polling time shrink factor
+ *
+ * Poll mode can be disabled by setting poll_max_ns to 0.
+ */
+void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns,
+ int64_t grow, int64_t shrink,
+ Error **errp);
+
#endif