The previous patch fixed a race condition, in which there were
coroutines being executing doubly, or after coroutine deletion.
We can detect common scenarios when this happens, and print an error
message and abort before we corrupt memory / data, or segfault.
This patch will abort if an attempt to enter a coroutine is made while
it is currently pending execution, either in a specific AioContext bh,
or pending execution via a timer. It will also abort if a coroutine
is scheduled, before a prior scheduled run has occurred.
We cannot rely on the existing co->caller check for recursive re-entry
to catch this, as the coroutine may run and exit with
COROUTINE_TERMINATE before the scheduled coroutine executes.
(This is the scenario that was occurring and fixed in the previous
patch).
This patch also re-orders the Coroutine struct elements in an attempt to
optimize caching.
Signed-off-by: Jeff Cody <[email protected]>
Reviewed-by: Stefan Hajnoczi <[email protected]>
size_t locks_held;
+ /* Only used when the coroutine has yielded. */
+ AioContext *ctx;
+
+ /* Used to catch and abort on illegal co-routine entry.
+ * Will contain the name of the function that had first
+ * scheduled the coroutine. */
+ const char *scheduled;
+
+ QSIMPLEQ_ENTRY(Coroutine) co_queue_next;
+
/* Coroutines that should be woken up when we yield or terminate.
* Only used when the coroutine is running.
*/
QSIMPLEQ_HEAD(, Coroutine) co_queue_wakeup;
- /* Only used when the coroutine has yielded. */
- AioContext *ctx;
- QSIMPLEQ_ENTRY(Coroutine) co_queue_next;
QSLIST_ENTRY(Coroutine) co_scheduled_next;
};
QSLIST_REMOVE_HEAD(&straight, co_scheduled_next);
trace_aio_co_schedule_bh_cb(ctx, co);
aio_context_acquire(ctx);
+
+ /* Protected by write barrier in qemu_aio_coroutine_enter */
+ atomic_set(&co->scheduled, NULL);
qemu_coroutine_enter(co);
aio_context_release(ctx);
}
void aio_co_schedule(AioContext *ctx, Coroutine *co)
{
trace_aio_co_schedule(ctx, co);
+ const char *scheduled = atomic_cmpxchg(&co->scheduled, NULL,
+ __func__);
+
+ if (scheduled) {
+ fprintf(stderr,
+ "%s: Co-routine was already scheduled in '%s'\n",
+ __func__, scheduled);
+ abort();
+ }
+
QSLIST_INSERT_HEAD_ATOMIC(&ctx->scheduled_coroutines,
co, co_scheduled_next);
qemu_bh_schedule(ctx->co_schedule_bh);
#include "qemu/osdep.h"
#include "qemu/coroutine.h"
+#include "qemu/coroutine_int.h"
#include "qemu/timer.h"
#include "block/aio.h"
{
CoSleepCB *sleep_cb = opaque;
+ /* Write of schedule protected by barrier write in aio_co_schedule */
+ atomic_set(&sleep_cb->co->scheduled, NULL);
aio_co_wake(sleep_cb->co);
}
CoSleepCB sleep_cb = {
.co = qemu_coroutine_self(),
};
+
+ const char *scheduled = atomic_cmpxchg(&sleep_cb.co->scheduled, NULL,
+ __func__);
+ if (scheduled) {
+ fprintf(stderr,
+ "%s: Co-routine was already scheduled in '%s'\n",
+ __func__, scheduled);
+ abort();
+ }
sleep_cb.ts = aio_timer_new(ctx, type, SCALE_NS, co_sleep_cb, &sleep_cb);
timer_mod(sleep_cb.ts, qemu_clock_get_ns(type) + ns);
qemu_coroutine_yield();
Coroutine *self = qemu_coroutine_self();
CoroutineAction ret;
+ /* Cannot rely on the read barrier for co in aio_co_wake(), as there are
+ * callers outside of aio_co_wake() */
+ const char *scheduled = atomic_mb_read(&co->scheduled);
+
trace_qemu_aio_coroutine_enter(ctx, self, co, co->entry_arg);
+ /* if the Coroutine has already been scheduled, entering it again will
+ * cause us to enter it twice, potentially even after the coroutine has
+ * been deleted */
+ if (scheduled) {
+ fprintf(stderr,
+ "%s: Co-routine was already scheduled in '%s'\n",
+ __func__, scheduled);
+ abort();
+ }
+
if (co->caller) {
fprintf(stderr, "Co-routine re-entered recursively\n");
abort();