X-Git-Url: https://repo.jachan.dev/qemu.git/blobdiff_plain/aa8e26de9617756febcbf794dda965df307fdaaa..0513f9841f14b918bd7aa7f1c8fb0ac3b0f6dea0:/tests/test-bdrv-drain.c diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c index c9f29c8b10..9dffd86c47 100644 --- a/tests/test-bdrv-drain.c +++ b/tests/test-bdrv-drain.c @@ -100,6 +100,13 @@ static void bdrv_test_child_perm(BlockDriverState *bs, BdrvChild *c, nperm, nshared); } +static int bdrv_test_change_backing_file(BlockDriverState *bs, + const char *backing_file, + const char *backing_fmt) +{ + return 0; +} + static BlockDriver bdrv_test = { .format_name = "test", .instance_size = sizeof(BDRVTestState), @@ -111,6 +118,8 @@ static BlockDriver bdrv_test = { .bdrv_co_drain_end = bdrv_test_co_drain_end, .bdrv_child_perm = bdrv_test_child_perm, + + .bdrv_change_backing_file = bdrv_test_change_backing_file, }; static void aio_ret_cb(void *opaque, int ret) @@ -204,14 +213,9 @@ static void test_drv_cb_common(enum drain_type drain_type, bool recursive) BlockAIOCB *acb; int aio_ret; - QEMUIOVector qiov; - struct iovec iov = { - .iov_base = NULL, - .iov_len = 0, - }; - qemu_iovec_init_external(&qiov, &iov, 1); + QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, NULL, 0); - blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR, &error_abort); s = bs->opaque; @@ -295,7 +299,7 @@ static void test_quiesce_common(enum drain_type drain_type, bool recursive) BlockBackend *blk; BlockDriverState *bs, *backing; - blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR, &error_abort); blk_insert_bs(blk, bs, &error_abort); @@ -358,7 +362,7 @@ static void test_nested(void) BDRVTestState *s, *backing_s; enum drain_type outer, inner; - blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR, &error_abort); s = bs->opaque; @@ -407,13 +411,13 @@ static void test_multiparent(void) BlockDriverState *bs_a, *bs_b, *backing; BDRVTestState *a_s, *b_s, *backing_s; - blk_a = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + blk_a = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR, &error_abort); a_s = bs_a->opaque; blk_insert_bs(blk_a, bs_a, &error_abort); - blk_b = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + blk_b = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR, &error_abort); b_s = bs_b->opaque; @@ -480,13 +484,13 @@ static void test_graph_change_drain_subtree(void) BlockDriverState *bs_a, *bs_b, *backing; BDRVTestState *a_s, *b_s, *backing_s; - blk_a = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + blk_a = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR, &error_abort); a_s = bs_a->opaque; blk_insert_bs(blk_a, bs_a, &error_abort); - blk_b = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + blk_b = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR, &error_abort); b_s = bs_b->opaque; @@ -560,7 +564,7 @@ static void test_graph_change_drain_all(void) BDRVTestState *a_s, *b_s; /* Create node A with a BlockBackend */ - blk_a = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + blk_a = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR, &error_abort); a_s = bs_a->opaque; @@ -576,7 +580,7 @@ static void test_graph_change_drain_all(void) g_assert_cmpint(a_s->drain_count, ==, 1); /* Create node B with a BlockBackend */ - blk_b = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + blk_b = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR, &error_abort); b_s = bs_b->opaque; @@ -670,30 +674,27 @@ static void test_iothread_common(enum drain_type drain_type, int drain_thread) AioContext *ctx_a = iothread_get_aio_context(a); AioContext *ctx_b = iothread_get_aio_context(b); - QEMUIOVector qiov; - struct iovec iov = { - .iov_base = NULL, - .iov_len = 0, - }; - qemu_iovec_init_external(&qiov, &iov, 1); + QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, NULL, 0); /* bdrv_drain_all() may only be called from the main loop thread */ if (drain_type == BDRV_DRAIN_ALL && drain_thread != 0) { goto out; } - blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR, &error_abort); s = bs->opaque; blk_insert_bs(blk, bs, &error_abort); - blk_set_aio_context(blk, ctx_a); + blk_set_aio_context(blk, ctx_a, &error_abort); aio_context_acquire(ctx_a); s->bh_indirection_ctx = ctx_b; aio_ret = -EINPROGRESS; + qemu_event_reset(&done_event); + if (drain_thread == 0) { acb = blk_aio_preadv(blk, 0, &qiov, 0, test_iothread_aio_cb, &aio_ret); } else { @@ -723,7 +724,6 @@ static void test_iothread_common(enum drain_type drain_type, int drain_thread) * but the drain in this thread can continue immediately after * bdrv_dec_in_flight() and aio_ret might be assigned only slightly * later. */ - qemu_event_reset(&done_event); do_drain_begin(drain_type, bs); g_assert_cmpint(bs->in_flight, ==, 0); @@ -743,7 +743,6 @@ static void test_iothread_common(enum drain_type drain_type, int drain_thread) } break; case 1: - qemu_event_reset(&done_event); aio_bh_schedule_oneshot(ctx_a, test_iothread_drain_entry, &data); qemu_event_wait(&done_event); break; @@ -752,7 +751,7 @@ static void test_iothread_common(enum drain_type drain_type, int drain_thread) } aio_context_acquire(ctx_a); - blk_set_aio_context(blk, qemu_get_aio_context()); + blk_set_aio_context(blk, qemu_get_aio_context(), &error_abort); aio_context_release(ctx_a); bdrv_unref(bs); @@ -893,7 +892,7 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type, bdrv_set_backing_hd(src, src_backing, &error_abort); bdrv_unref(src_backing); - blk_src = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + blk_src = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); blk_insert_bs(blk_src, src_overlay, &error_abort); switch (drain_node) { @@ -913,15 +912,16 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type, if (use_iothread) { iothread = iothread_new(); ctx = iothread_get_aio_context(iothread); - blk_set_aio_context(blk_src, ctx); + blk_set_aio_context(blk_src, ctx, &error_abort); } else { ctx = qemu_get_aio_context(); } target = bdrv_new_open_driver(&bdrv_test, "target", BDRV_O_RDWR, &error_abort); - blk_target = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + blk_target = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); blk_insert_bs(blk_target, target, &error_abort); + blk_set_allow_aio_context_change(blk_target, true); aio_context_acquire(ctx); tjob = block_job_create("job0", &test_job_driver, NULL, src, @@ -982,7 +982,7 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type, g_assert_false(job->job.paused); g_assert_true(job->job.busy); /* We're in qemu_co_sleep_ns() */ - do_drain_begin(drain_type, target); + do_drain_begin_unlocked(drain_type, target); if (drain_type == BDRV_DRAIN_ALL) { /* bdrv_drain_all() drains both src and target */ @@ -993,7 +993,7 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type, g_assert_true(job->job.paused); g_assert_false(job->job.busy); /* The job is paused */ - do_drain_end(drain_type, target); + do_drain_end_unlocked(drain_type, target); if (use_iothread) { /* paused is reset in the I/O thread, wait for it */ @@ -1011,7 +1011,8 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type, g_assert_cmpint(ret, ==, (result == TEST_JOB_SUCCESS ? 0 : -EIO)); if (use_iothread) { - blk_set_aio_context(blk_src, qemu_get_aio_context()); + blk_set_aio_context(blk_src, qemu_get_aio_context(), &error_abort); + assert(blk_get_aio_context(blk_target) == qemu_get_aio_context()); } aio_context_release(ctx); @@ -1148,13 +1149,7 @@ static void coroutine_fn test_co_delete_by_drain(void *opaque) BlockDriverState *bs = blk_bs(blk); BDRVTestTopState *tts = bs->opaque; void *buffer = g_malloc(65536); - QEMUIOVector qiov; - struct iovec iov = { - .iov_base = buffer, - .iov_len = 65536, - }; - - qemu_iovec_init_external(&qiov, &iov, 1); + QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buffer, 65536); /* Pretend some internal write operation from parent to child. * Important: We have to read from the child, not from the parent! @@ -1221,7 +1216,7 @@ static void do_test_delete_by_drain(bool detach_instead_of_delete, &error_abort); bdrv_attach_child(bs, null_bs, "null-child", &child_file, &error_abort); - blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); blk_insert_bs(blk, bs, &error_abort); /* Referenced by blk now */ @@ -1365,12 +1360,7 @@ static void test_detach_indirect(bool by_parent_cb) BdrvChild *child_a, *child_b; BlockAIOCB *acb; - QEMUIOVector qiov; - struct iovec iov = { - .iov_base = NULL, - .iov_len = 0, - }; - qemu_iovec_init_external(&qiov, &iov, 1); + QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, NULL, 0); if (!by_parent_cb) { detach_by_driver_cb_role = child_file; @@ -1389,7 +1379,7 @@ static void test_detach_indirect(bool by_parent_cb) c = bdrv_new_open_driver(&bdrv_test, "c", BDRV_O_RDWR, &error_abort); /* blk is a BB for parent-a */ - blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); blk_insert_bs(blk, parent_a, &error_abort); bdrv_unref(parent_a); @@ -1457,12 +1447,6 @@ static void test_detach_indirect(bool by_parent_cb) bdrv_unref(parent_b); blk_unref(blk); - /* XXX Once bdrv_close() unref's children instead of just detaching them, - * this won't be necessary any more. */ - bdrv_unref(a); - bdrv_unref(a); - bdrv_unref(c); - g_assert_cmpint(a->refcnt, ==, 1); g_assert_cmpint(b->refcnt, ==, 1); g_assert_cmpint(c->refcnt, ==, 1); @@ -1487,7 +1471,7 @@ static void test_append_to_drained(void) BlockDriverState *base, *overlay; BDRVTestState *base_s, *overlay_s; - blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL); + blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); base = bdrv_new_open_driver(&bdrv_test, "base", BDRV_O_RDWR, &error_abort); base_s = base->opaque; blk_insert_bs(blk, base, &error_abort); @@ -1522,6 +1506,640 @@ static void test_append_to_drained(void) blk_unref(blk); } +static void test_set_aio_context(void) +{ + BlockDriverState *bs; + IOThread *a = iothread_new(); + IOThread *b = iothread_new(); + AioContext *ctx_a = iothread_get_aio_context(a); + AioContext *ctx_b = iothread_get_aio_context(b); + + bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR, + &error_abort); + + bdrv_drained_begin(bs); + bdrv_try_set_aio_context(bs, ctx_a, &error_abort); + + aio_context_acquire(ctx_a); + bdrv_drained_end(bs); + + bdrv_drained_begin(bs); + bdrv_try_set_aio_context(bs, ctx_b, &error_abort); + aio_context_release(ctx_a); + aio_context_acquire(ctx_b); + bdrv_try_set_aio_context(bs, qemu_get_aio_context(), &error_abort); + aio_context_release(ctx_b); + bdrv_drained_end(bs); + + bdrv_unref(bs); + iothread_join(a); + iothread_join(b); +} + + +typedef struct TestDropBackingBlockJob { + BlockJob common; + bool should_complete; + bool *did_complete; + BlockDriverState *detach_also; +} TestDropBackingBlockJob; + +static int coroutine_fn test_drop_backing_job_run(Job *job, Error **errp) +{ + TestDropBackingBlockJob *s = + container_of(job, TestDropBackingBlockJob, common.job); + + while (!s->should_complete) { + job_sleep_ns(job, 0); + } + + return 0; +} + +static void test_drop_backing_job_commit(Job *job) +{ + TestDropBackingBlockJob *s = + container_of(job, TestDropBackingBlockJob, common.job); + + bdrv_set_backing_hd(blk_bs(s->common.blk), NULL, &error_abort); + bdrv_set_backing_hd(s->detach_also, NULL, &error_abort); + + *s->did_complete = true; +} + +static const BlockJobDriver test_drop_backing_job_driver = { + .job_driver = { + .instance_size = sizeof(TestDropBackingBlockJob), + .free = block_job_free, + .user_resume = block_job_user_resume, + .drain = block_job_drain, + .run = test_drop_backing_job_run, + .commit = test_drop_backing_job_commit, + } +}; + +/** + * Creates a child node with three parent nodes on it, and then runs a + * block job on the final one, parent-node-2. + * + * The job is then asked to complete before a section where the child + * is drained. + * + * Ending this section will undrain the child's parents, first + * parent-node-2, then parent-node-1, then parent-node-0 -- the parent + * list is in reverse order of how they were added. Ending the drain + * on parent-node-2 will resume the job, thus completing it and + * scheduling job_exit(). + * + * Ending the drain on parent-node-1 will poll the AioContext, which + * lets job_exit() and thus test_drop_backing_job_commit() run. That + * function first removes the child as parent-node-2's backing file. + * + * In old (and buggy) implementations, there are two problems with + * that: + * (A) bdrv_drain_invoke() polls for every node that leaves the + * drained section. This means that job_exit() is scheduled + * before the child has left the drained section. Its + * quiesce_counter is therefore still 1 when it is removed from + * parent-node-2. + * + * (B) bdrv_replace_child_noperm() calls drained_end() on the old + * child's parents as many times as the child is quiesced. This + * means it will call drained_end() on parent-node-2 once. + * Because parent-node-2 is no longer quiesced at this point, this + * will fail. + * + * bdrv_replace_child_noperm() therefore must call drained_end() on + * the parent only if it really is still drained because the child is + * drained. + * + * If removing child from parent-node-2 was successful (as it should + * be), test_drop_backing_job_commit() will then also remove the child + * from parent-node-0. + * + * With an old version of our drain infrastructure ((A) above), that + * resulted in the following flow: + * + * 1. child attempts to leave its drained section. The call recurses + * to its parents. + * + * 2. parent-node-2 leaves the drained section. Polling in + * bdrv_drain_invoke() will schedule job_exit(). + * + * 3. parent-node-1 leaves the drained section. Polling in + * bdrv_drain_invoke() will run job_exit(), thus disconnecting + * parent-node-0 from the child node. + * + * 4. bdrv_parent_drained_end() uses a QLIST_FOREACH_SAFE() loop to + * iterate over the parents. Thus, it now accesses the BdrvChild + * object that used to connect parent-node-0 and the child node. + * However, that object no longer exists, so it accesses a dangling + * pointer. + * + * The solution is to only poll once when running a bdrv_drained_end() + * operation, specifically at the end when all drained_end() + * operations for all involved nodes have been scheduled. + * Note that this also solves (A) above, thus hiding (B). + */ +static void test_blockjob_commit_by_drained_end(void) +{ + BlockDriverState *bs_child, *bs_parents[3]; + TestDropBackingBlockJob *job; + bool job_has_completed = false; + int i; + + bs_child = bdrv_new_open_driver(&bdrv_test, "child-node", BDRV_O_RDWR, + &error_abort); + + for (i = 0; i < 3; i++) { + char name[32]; + snprintf(name, sizeof(name), "parent-node-%i", i); + bs_parents[i] = bdrv_new_open_driver(&bdrv_test, name, BDRV_O_RDWR, + &error_abort); + bdrv_set_backing_hd(bs_parents[i], bs_child, &error_abort); + } + + job = block_job_create("job", &test_drop_backing_job_driver, NULL, + bs_parents[2], 0, BLK_PERM_ALL, 0, 0, NULL, NULL, + &error_abort); + + job->detach_also = bs_parents[0]; + job->did_complete = &job_has_completed; + + job_start(&job->common.job); + + job->should_complete = true; + bdrv_drained_begin(bs_child); + g_assert(!job_has_completed); + bdrv_drained_end(bs_child); + g_assert(job_has_completed); + + bdrv_unref(bs_parents[0]); + bdrv_unref(bs_parents[1]); + bdrv_unref(bs_parents[2]); + bdrv_unref(bs_child); +} + + +typedef struct TestSimpleBlockJob { + BlockJob common; + bool should_complete; + bool *did_complete; +} TestSimpleBlockJob; + +static int coroutine_fn test_simple_job_run(Job *job, Error **errp) +{ + TestSimpleBlockJob *s = container_of(job, TestSimpleBlockJob, common.job); + + while (!s->should_complete) { + job_sleep_ns(job, 0); + } + + return 0; +} + +static void test_simple_job_clean(Job *job) +{ + TestSimpleBlockJob *s = container_of(job, TestSimpleBlockJob, common.job); + *s->did_complete = true; +} + +static const BlockJobDriver test_simple_job_driver = { + .job_driver = { + .instance_size = sizeof(TestSimpleBlockJob), + .free = block_job_free, + .user_resume = block_job_user_resume, + .drain = block_job_drain, + .run = test_simple_job_run, + .clean = test_simple_job_clean, + }, +}; + +static int drop_intermediate_poll_update_filename(BdrvChild *child, + BlockDriverState *new_base, + const char *filename, + Error **errp) +{ + /* + * We are free to poll here, which may change the block graph, if + * it is not drained. + */ + + /* If the job is not drained: Complete it, schedule job_exit() */ + aio_poll(qemu_get_current_aio_context(), false); + /* If the job is not drained: Run job_exit(), finish the job */ + aio_poll(qemu_get_current_aio_context(), false); + + return 0; +} + +/** + * Test a poll in the midst of bdrv_drop_intermediate(). + * + * bdrv_drop_intermediate() calls BdrvChildRole.update_filename(), + * which can yield or poll. This may lead to graph changes, unless + * the whole subtree in question is drained. + * + * We test this on the following graph: + * + * Job + * + * | + * job-node + * | + * v + * + * job-node + * + * | + * backing + * | + * v + * + * node-2 --chain--> node-1 --chain--> node-0 + * + * We drop node-1 with bdrv_drop_intermediate(top=node-1, base=node-0). + * + * This first updates node-2's backing filename by invoking + * drop_intermediate_poll_update_filename(), which polls twice. This + * causes the job to finish, which in turns causes the job-node to be + * deleted. + * + * bdrv_drop_intermediate() uses a QLIST_FOREACH_SAFE() loop, so it + * already has a pointer to the BdrvChild edge between job-node and + * node-1. When it tries to handle that edge, we probably get a + * segmentation fault because the object no longer exists. + * + * + * The solution is for bdrv_drop_intermediate() to drain top's + * subtree. This prevents graph changes from happening just because + * BdrvChildRole.update_filename() yields or polls. Thus, the block + * job is paused during that drained section and must finish before or + * after. + * + * (In addition, bdrv_replace_child() must keep the job paused.) + */ +static void test_drop_intermediate_poll(void) +{ + static BdrvChildRole chain_child_role; + BlockDriverState *chain[3]; + TestSimpleBlockJob *job; + BlockDriverState *job_node; + bool job_has_completed = false; + int i; + int ret; + + chain_child_role = child_backing; + chain_child_role.update_filename = drop_intermediate_poll_update_filename; + + for (i = 0; i < 3; i++) { + char name[32]; + snprintf(name, 32, "node-%i", i); + + chain[i] = bdrv_new_open_driver(&bdrv_test, name, 0, &error_abort); + } + + job_node = bdrv_new_open_driver(&bdrv_test, "job-node", BDRV_O_RDWR, + &error_abort); + bdrv_set_backing_hd(job_node, chain[1], &error_abort); + + /* + * Establish the chain last, so the chain links are the first + * elements in the BDS.parents lists + */ + for (i = 0; i < 3; i++) { + if (i) { + /* Takes the reference to chain[i - 1] */ + chain[i]->backing = bdrv_attach_child(chain[i], chain[i - 1], + "chain", &chain_child_role, + &error_abort); + } + } + + job = block_job_create("job", &test_simple_job_driver, NULL, job_node, + 0, BLK_PERM_ALL, 0, 0, NULL, NULL, &error_abort); + + /* The job has a reference now */ + bdrv_unref(job_node); + + job->did_complete = &job_has_completed; + + job_start(&job->common.job); + job->should_complete = true; + + g_assert(!job_has_completed); + ret = bdrv_drop_intermediate(chain[1], chain[0], NULL); + g_assert(ret == 0); + g_assert(job_has_completed); + + bdrv_unref(chain[2]); +} + + +typedef struct BDRVReplaceTestState { + bool was_drained; + bool was_undrained; + bool has_read; + + int drain_count; + + bool yield_before_read; + Coroutine *io_co; + Coroutine *drain_co; +} BDRVReplaceTestState; + +static void bdrv_replace_test_close(BlockDriverState *bs) +{ +} + +/** + * If @bs has a backing file: + * Yield if .yield_before_read is true (and wait for drain_begin to + * wake us up). + * Forward the read to bs->backing. Set .has_read to true. + * If drain_begin has woken us, wake it in turn. + * + * Otherwise: + * Set .has_read to true and return success. + */ +static int coroutine_fn bdrv_replace_test_co_preadv(BlockDriverState *bs, + uint64_t offset, + uint64_t bytes, + QEMUIOVector *qiov, + int flags) +{ + BDRVReplaceTestState *s = bs->opaque; + + if (bs->backing) { + int ret; + + g_assert(!s->drain_count); + + s->io_co = qemu_coroutine_self(); + if (s->yield_before_read) { + s->yield_before_read = false; + qemu_coroutine_yield(); + } + s->io_co = NULL; + + ret = bdrv_preadv(bs->backing, offset, qiov); + s->has_read = true; + + /* Wake up drain_co if it runs */ + if (s->drain_co) { + aio_co_wake(s->drain_co); + } + + return ret; + } + + s->has_read = true; + return 0; +} + +/** + * If .drain_count is 0, wake up .io_co if there is one; and set + * .was_drained. + * Increment .drain_count. + */ +static void coroutine_fn bdrv_replace_test_co_drain_begin(BlockDriverState *bs) +{ + BDRVReplaceTestState *s = bs->opaque; + + if (!s->drain_count) { + /* Keep waking io_co up until it is done */ + s->drain_co = qemu_coroutine_self(); + while (s->io_co) { + aio_co_wake(s->io_co); + s->io_co = NULL; + qemu_coroutine_yield(); + } + s->drain_co = NULL; + + s->was_drained = true; + } + s->drain_count++; +} + +/** + * Reduce .drain_count, set .was_undrained once it reaches 0. + * If .drain_count reaches 0 and the node has a backing file, issue a + * read request. + */ +static void coroutine_fn bdrv_replace_test_co_drain_end(BlockDriverState *bs) +{ + BDRVReplaceTestState *s = bs->opaque; + + g_assert(s->drain_count > 0); + if (!--s->drain_count) { + int ret; + + s->was_undrained = true; + + if (bs->backing) { + char data; + QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, &data, 1); + + /* Queue a read request post-drain */ + ret = bdrv_replace_test_co_preadv(bs, 0, 1, &qiov, 0); + g_assert(ret >= 0); + } + } +} + +static BlockDriver bdrv_replace_test = { + .format_name = "replace_test", + .instance_size = sizeof(BDRVReplaceTestState), + + .bdrv_close = bdrv_replace_test_close, + .bdrv_co_preadv = bdrv_replace_test_co_preadv, + + .bdrv_co_drain_begin = bdrv_replace_test_co_drain_begin, + .bdrv_co_drain_end = bdrv_replace_test_co_drain_end, + + .bdrv_child_perm = bdrv_format_default_perms, +}; + +static void coroutine_fn test_replace_child_mid_drain_read_co(void *opaque) +{ + int ret; + char data; + + ret = blk_co_pread(opaque, 0, 1, &data, 0); + g_assert(ret >= 0); +} + +/** + * We test two things: + * (1) bdrv_replace_child_noperm() must not undrain the parent if both + * children are drained. + * (2) bdrv_replace_child_noperm() must never flush I/O requests to a + * drained child. If the old child is drained, it must flush I/O + * requests after the new one has been attached. If the new child + * is drained, it must flush I/O requests before the old one is + * detached. + * + * To do so, we create one parent node and two child nodes; then + * attach one of the children (old_child_bs) to the parent, then + * drain both old_child_bs and new_child_bs according to + * old_drain_count and new_drain_count, respectively, and finally + * we invoke bdrv_replace_node() to replace old_child_bs by + * new_child_bs. + * + * The test block driver we use here (bdrv_replace_test) has a read + * function that: + * - For the parent node, can optionally yield, and then forwards the + * read to bdrv_preadv(), + * - For the child node, just returns immediately. + * + * If the read yields, the drain_begin function will wake it up. + * + * The drain_end function issues a read on the parent once it is fully + * undrained (which simulates requests starting to come in again). + */ +static void do_test_replace_child_mid_drain(int old_drain_count, + int new_drain_count) +{ + BlockBackend *parent_blk; + BlockDriverState *parent_bs; + BlockDriverState *old_child_bs, *new_child_bs; + BDRVReplaceTestState *parent_s; + BDRVReplaceTestState *old_child_s, *new_child_s; + Coroutine *io_co; + int i; + + parent_bs = bdrv_new_open_driver(&bdrv_replace_test, "parent", 0, + &error_abort); + parent_s = parent_bs->opaque; + + parent_blk = blk_new(qemu_get_aio_context(), + BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL); + blk_insert_bs(parent_blk, parent_bs, &error_abort); + + old_child_bs = bdrv_new_open_driver(&bdrv_replace_test, "old-child", 0, + &error_abort); + new_child_bs = bdrv_new_open_driver(&bdrv_replace_test, "new-child", 0, + &error_abort); + old_child_s = old_child_bs->opaque; + new_child_s = new_child_bs->opaque; + + /* So that we can read something */ + parent_bs->total_sectors = 1; + old_child_bs->total_sectors = 1; + new_child_bs->total_sectors = 1; + + bdrv_ref(old_child_bs); + parent_bs->backing = bdrv_attach_child(parent_bs, old_child_bs, "child", + &child_backing, &error_abort); + + for (i = 0; i < old_drain_count; i++) { + bdrv_drained_begin(old_child_bs); + } + for (i = 0; i < new_drain_count; i++) { + bdrv_drained_begin(new_child_bs); + } + + if (!old_drain_count) { + /* + * Start a read operation that will yield, so it will not + * complete before the node is drained. + */ + parent_s->yield_before_read = true; + io_co = qemu_coroutine_create(test_replace_child_mid_drain_read_co, + parent_blk); + qemu_coroutine_enter(io_co); + } + + /* If we have started a read operation, it should have yielded */ + g_assert(!parent_s->has_read); + + /* Reset drained status so we can see what bdrv_replace_node() does */ + parent_s->was_drained = false; + parent_s->was_undrained = false; + + g_assert(parent_bs->quiesce_counter == old_drain_count); + bdrv_replace_node(old_child_bs, new_child_bs, &error_abort); + g_assert(parent_bs->quiesce_counter == new_drain_count); + + if (!old_drain_count && !new_drain_count) { + /* + * From undrained to undrained drains and undrains the parent, + * because bdrv_replace_node() contains a drained section for + * @old_child_bs. + */ + g_assert(parent_s->was_drained && parent_s->was_undrained); + } else if (!old_drain_count && new_drain_count) { + /* + * From undrained to drained should drain the parent and keep + * it that way. + */ + g_assert(parent_s->was_drained && !parent_s->was_undrained); + } else if (old_drain_count && !new_drain_count) { + /* + * From drained to undrained should undrain the parent and + * keep it that way. + */ + g_assert(!parent_s->was_drained && parent_s->was_undrained); + } else /* if (old_drain_count && new_drain_count) */ { + /* + * From drained to drained must not undrain the parent at any + * point + */ + g_assert(!parent_s->was_drained && !parent_s->was_undrained); + } + + if (!old_drain_count || !new_drain_count) { + /* + * If !old_drain_count, we have started a read request before + * bdrv_replace_node(). If !new_drain_count, the parent must + * have been undrained at some point, and + * bdrv_replace_test_co_drain_end() starts a read request + * then. + */ + g_assert(parent_s->has_read); + } else { + /* + * If the parent was never undrained, there is no way to start + * a read request. + */ + g_assert(!parent_s->has_read); + } + + /* A drained child must have not received any request */ + g_assert(!(old_drain_count && old_child_s->has_read)); + g_assert(!(new_drain_count && new_child_s->has_read)); + + for (i = 0; i < new_drain_count; i++) { + bdrv_drained_end(new_child_bs); + } + for (i = 0; i < old_drain_count; i++) { + bdrv_drained_end(old_child_bs); + } + + /* + * By now, bdrv_replace_test_co_drain_end() must have been called + * at some point while the new child was attached to the parent. + */ + g_assert(parent_s->has_read); + g_assert(new_child_s->has_read); + + blk_unref(parent_blk); + bdrv_unref(parent_bs); + bdrv_unref(old_child_bs); + bdrv_unref(new_child_bs); +} + +static void test_replace_child_mid_drain(void) +{ + int old_drain_count, new_drain_count; + + for (old_drain_count = 0; old_drain_count < 2; old_drain_count++) { + for (new_drain_count = 0; new_drain_count < 2; new_drain_count++) { + do_test_replace_child_mid_drain(old_drain_count, new_drain_count); + } + } +} + int main(int argc, char **argv) { int ret; @@ -1603,6 +2221,17 @@ int main(int argc, char **argv) g_test_add_func("/bdrv-drain/attach/drain", test_append_to_drained); + g_test_add_func("/bdrv-drain/set_aio_context", test_set_aio_context); + + g_test_add_func("/bdrv-drain/blockjob/commit_by_drained_end", + test_blockjob_commit_by_drained_end); + + g_test_add_func("/bdrv-drain/bdrv_drop_intermediate/poll", + test_drop_intermediate_poll); + + g_test_add_func("/bdrv-drain/replace_child/mid-drain", + test_replace_child_mid_drain); + ret = g_test_run(); qemu_event_destroy(&done_event); return ret;