/* Maximum bounce buffer for copy-on-read and write zeroes, in bytes */
#define MAX_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS)
+static AioWait drain_all_aio_wait;
+
static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
int64_t offset, int bytes, BdrvRequestFlags flags);
-void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore)
+void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore,
+ bool ignore_bds_parents)
{
BdrvChild *c, *next;
QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) {
- if (c == ignore) {
+ if (c == ignore || (ignore_bds_parents && c->role->parent_is_bds)) {
continue;
}
if (c->role->drained_begin) {
}
}
-void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore)
+void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore,
+ bool ignore_bds_parents)
{
BdrvChild *c, *next;
QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) {
- if (c == ignore) {
+ if (c == ignore || (ignore_bds_parents && c->role->parent_is_bds)) {
continue;
}
if (c->role->drained_end) {
}
}
-static bool bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore)
+static bool bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore,
+ bool ignore_bds_parents)
{
BdrvChild *c, *next;
bool busy = false;
QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) {
- if (c == ignore) {
+ if (c == ignore || (ignore_bds_parents && c->role->parent_is_bds)) {
continue;
}
if (c->role->drained_poll) {
bool recursive;
bool poll;
BdrvChild *parent;
+ bool ignore_bds_parents;
} BdrvCoDrainData;
static void coroutine_fn bdrv_drain_invoke_entry(void *opaque)
/* Set data->done before reading bs->wakeup. */
atomic_mb_set(&data->done, true);
- bdrv_wakeup(bs);
+ bdrv_dec_in_flight(bs);
+
+ if (data->begin) {
+ g_free(data);
+ }
}
/* Recursively call BlockDriver.bdrv_co_drain_begin/end callbacks */
static void bdrv_drain_invoke(BlockDriverState *bs, bool begin)
{
- BdrvCoDrainData data = { .bs = bs, .done = false, .begin = begin};
+ BdrvCoDrainData *data;
if (!bs->drv || (begin && !bs->drv->bdrv_co_drain_begin) ||
(!begin && !bs->drv->bdrv_co_drain_end)) {
return;
}
- data.co = qemu_coroutine_create(bdrv_drain_invoke_entry, &data);
- bdrv_coroutine_enter(bs, data.co);
- BDRV_POLL_WHILE(bs, !data.done);
+ data = g_new(BdrvCoDrainData, 1);
+ *data = (BdrvCoDrainData) {
+ .bs = bs,
+ .done = false,
+ .begin = begin
+ };
+
+ /* Make sure the driver callback completes during the polling phase for
+ * drain_begin. */
+ bdrv_inc_in_flight(bs);
+ data->co = qemu_coroutine_create(bdrv_drain_invoke_entry, data);
+ aio_co_schedule(bdrv_get_aio_context(bs), data->co);
+
+ if (!begin) {
+ BDRV_POLL_WHILE(bs, !data->done);
+ g_free(data);
+ }
}
/* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */
bool bdrv_drain_poll(BlockDriverState *bs, bool recursive,
- BdrvChild *ignore_parent)
+ BdrvChild *ignore_parent, bool ignore_bds_parents)
{
BdrvChild *child, *next;
- if (bdrv_parent_drained_poll(bs, ignore_parent)) {
+ if (bdrv_parent_drained_poll(bs, ignore_parent, ignore_bds_parents)) {
return true;
}
}
if (recursive) {
+ assert(!ignore_bds_parents);
QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
- if (bdrv_drain_poll(child->bs, recursive, child)) {
+ if (bdrv_drain_poll(child->bs, recursive, child, false)) {
return true;
}
}
* have executed. */
while (aio_poll(bs->aio_context, false));
- return bdrv_drain_poll(bs, recursive, ignore_parent);
+ return bdrv_drain_poll(bs, recursive, ignore_parent, false);
}
static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
- BdrvChild *parent, bool poll);
+ BdrvChild *parent, bool ignore_bds_parents,
+ bool poll);
static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
- BdrvChild *parent);
+ BdrvChild *parent, bool ignore_bds_parents);
static void bdrv_co_drain_bh_cb(void *opaque)
{
Coroutine *co = data->co;
BlockDriverState *bs = data->bs;
- bdrv_dec_in_flight(bs);
- if (data->begin) {
- bdrv_do_drained_begin(bs, data->recursive, data->parent, data->poll);
+ if (bs) {
+ bdrv_dec_in_flight(bs);
+ if (data->begin) {
+ bdrv_do_drained_begin(bs, data->recursive, data->parent,
+ data->ignore_bds_parents, data->poll);
+ } else {
+ bdrv_do_drained_end(bs, data->recursive, data->parent,
+ data->ignore_bds_parents);
+ }
} else {
- bdrv_do_drained_end(bs, data->recursive, data->parent);
+ assert(data->begin);
+ bdrv_drain_all_begin();
}
data->done = true;
static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
bool begin, bool recursive,
- BdrvChild *parent, bool poll)
+ BdrvChild *parent,
+ bool ignore_bds_parents,
+ bool poll)
{
BdrvCoDrainData data;
.begin = begin,
.recursive = recursive,
.parent = parent,
+ .ignore_bds_parents = ignore_bds_parents,
.poll = poll,
};
- bdrv_inc_in_flight(bs);
+ if (bs) {
+ bdrv_inc_in_flight(bs);
+ }
aio_bh_schedule_oneshot(bdrv_get_aio_context(bs),
bdrv_co_drain_bh_cb, &data);
}
void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
- BdrvChild *parent)
+ BdrvChild *parent, bool ignore_bds_parents)
{
assert(!qemu_in_coroutine());
aio_disable_external(bdrv_get_aio_context(bs));
}
- bdrv_parent_drained_begin(bs, parent);
+ bdrv_parent_drained_begin(bs, parent, ignore_bds_parents);
bdrv_drain_invoke(bs, true);
}
static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
- BdrvChild *parent, bool poll)
+ BdrvChild *parent, bool ignore_bds_parents,
+ bool poll)
{
BdrvChild *child, *next;
if (qemu_in_coroutine()) {
- bdrv_co_yield_to_drain(bs, true, recursive, parent, poll);
+ bdrv_co_yield_to_drain(bs, true, recursive, parent, ignore_bds_parents,
+ poll);
return;
}
- bdrv_do_drained_begin_quiesce(bs, parent);
+ bdrv_do_drained_begin_quiesce(bs, parent, ignore_bds_parents);
if (recursive) {
+ assert(!ignore_bds_parents);
bs->recursive_quiesce_counter++;
QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
- bdrv_do_drained_begin(child->bs, true, child, false);
+ bdrv_do_drained_begin(child->bs, true, child, ignore_bds_parents,
+ false);
}
}
* nodes.
*/
if (poll) {
+ assert(!ignore_bds_parents);
BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, recursive, parent));
}
}
void bdrv_drained_begin(BlockDriverState *bs)
{
- bdrv_do_drained_begin(bs, false, NULL, true);
+ bdrv_do_drained_begin(bs, false, NULL, false, true);
}
void bdrv_subtree_drained_begin(BlockDriverState *bs)
{
- bdrv_do_drained_begin(bs, true, NULL, true);
+ bdrv_do_drained_begin(bs, true, NULL, false, true);
}
-void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
- BdrvChild *parent)
+static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
+ BdrvChild *parent, bool ignore_bds_parents)
{
BdrvChild *child, *next;
int old_quiesce_counter;
if (qemu_in_coroutine()) {
- bdrv_co_yield_to_drain(bs, false, recursive, parent, false);
+ bdrv_co_yield_to_drain(bs, false, recursive, parent, ignore_bds_parents,
+ false);
return;
}
assert(bs->quiesce_counter > 0);
/* Re-enable things in child-to-parent order */
bdrv_drain_invoke(bs, false);
- bdrv_parent_drained_end(bs, parent);
+ bdrv_parent_drained_end(bs, parent, ignore_bds_parents);
if (old_quiesce_counter == 1) {
aio_enable_external(bdrv_get_aio_context(bs));
}
if (recursive) {
+ assert(!ignore_bds_parents);
bs->recursive_quiesce_counter--;
QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
- bdrv_do_drained_end(child->bs, true, child);
+ bdrv_do_drained_end(child->bs, true, child, ignore_bds_parents);
}
}
}
void bdrv_drained_end(BlockDriverState *bs)
{
- bdrv_do_drained_end(bs, false, NULL);
+ bdrv_do_drained_end(bs, false, NULL, false);
}
void bdrv_subtree_drained_end(BlockDriverState *bs)
{
- bdrv_do_drained_end(bs, true, NULL);
+ bdrv_do_drained_end(bs, true, NULL, false);
}
void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent)
int i;
for (i = 0; i < new_parent->recursive_quiesce_counter; i++) {
- bdrv_do_drained_begin(child->bs, true, child, true);
+ bdrv_do_drained_begin(child->bs, true, child, false, true);
}
}
int i;
for (i = 0; i < old_parent->recursive_quiesce_counter; i++) {
- bdrv_do_drained_end(child->bs, true, child);
+ bdrv_do_drained_end(child->bs, true, child, false);
}
}
}
}
+unsigned int bdrv_drain_all_count = 0;
+
+static bool bdrv_drain_all_poll(void)
+{
+ BlockDriverState *bs = NULL;
+ bool result = false;
+
+ /* Execute pending BHs first (may modify the graph) and check everything
+ * else only after the BHs have executed. */
+ while (aio_poll(qemu_get_aio_context(), false));
+
+ /* bdrv_drain_poll() can't make changes to the graph and we are holding the
+ * main AioContext lock, so iterating bdrv_next_all_states() is safe. */
+ while ((bs = bdrv_next_all_states(bs))) {
+ AioContext *aio_context = bdrv_get_aio_context(bs);
+ aio_context_acquire(aio_context);
+ result |= bdrv_drain_poll(bs, false, NULL, true);
+ aio_context_release(aio_context);
+ }
+
+ return result;
+}
+
/*
* Wait for pending requests to complete across all BlockDriverStates
*
*/
void bdrv_drain_all_begin(void)
{
- BlockDriverState *bs;
- BdrvNextIterator it;
+ BlockDriverState *bs = NULL;
- /* BDRV_POLL_WHILE() for a node can only be called from its own I/O thread
- * or the main loop AioContext. We potentially use BDRV_POLL_WHILE() on
- * nodes in several different AioContexts, so make sure we're in the main
- * context. */
+ if (qemu_in_coroutine()) {
+ bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true);
+ return;
+ }
+
+ /* AIO_WAIT_WHILE() with a NULL context can only be called from the main
+ * loop AioContext, so make sure we're in the main context. */
assert(qemu_get_current_aio_context() == qemu_get_aio_context());
+ assert(bdrv_drain_all_count < INT_MAX);
+ bdrv_drain_all_count++;
- for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
+ /* Quiesce all nodes, without polling in-flight requests yet. The graph
+ * cannot change during this loop. */
+ while ((bs = bdrv_next_all_states(bs))) {
AioContext *aio_context = bdrv_get_aio_context(bs);
aio_context_acquire(aio_context);
- bdrv_do_drained_begin(bs, true, NULL, true);
+ bdrv_do_drained_begin(bs, false, NULL, true, false);
aio_context_release(aio_context);
}
- for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
+ /* Now poll the in-flight requests */
+ AIO_WAIT_WHILE(&drain_all_aio_wait, NULL, bdrv_drain_all_poll());
+
+ while ((bs = bdrv_next_all_states(bs))) {
bdrv_drain_assert_idle(bs);
}
}
void bdrv_drain_all_end(void)
{
- BlockDriverState *bs;
- BdrvNextIterator it;
+ BlockDriverState *bs = NULL;
- for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
+ while ((bs = bdrv_next_all_states(bs))) {
AioContext *aio_context = bdrv_get_aio_context(bs);
aio_context_acquire(aio_context);
- bdrv_do_drained_end(bs, true, NULL);
+ bdrv_do_drained_end(bs, false, NULL, true);
aio_context_release(aio_context);
}
+
+ assert(bdrv_drain_all_count > 0);
+ bdrv_drain_all_count--;
}
void bdrv_drain_all(void)
void bdrv_wakeup(BlockDriverState *bs)
{
aio_wait_kick(bdrv_get_aio_wait(bs));
+ aio_wait_kick(&drain_all_aio_wait);
}
void bdrv_dec_in_flight(BlockDriverState *bs)