]> Git Repo - qemu.git/blob - tests/test-bdrv-drain.c
block: Pass BdrvChildRole to bdrv_child_perm()
[qemu.git] / tests / test-bdrv-drain.c
1 /*
2  * Block node draining tests
3  *
4  * Copyright (c) 2017 Kevin Wolf <[email protected]>
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24
25 #include "qemu/osdep.h"
26 #include "block/block.h"
27 #include "block/blockjob_int.h"
28 #include "sysemu/block-backend.h"
29 #include "qapi/error.h"
30 #include "qemu/main-loop.h"
31 #include "iothread.h"
32
33 static QemuEvent done_event;
34
35 typedef struct BDRVTestState {
36     int drain_count;
37     AioContext *bh_indirection_ctx;
38     bool sleep_in_drain_begin;
39 } BDRVTestState;
40
41 static void coroutine_fn bdrv_test_co_drain_begin(BlockDriverState *bs)
42 {
43     BDRVTestState *s = bs->opaque;
44     s->drain_count++;
45     if (s->sleep_in_drain_begin) {
46         qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 100000);
47     }
48 }
49
50 static void coroutine_fn bdrv_test_co_drain_end(BlockDriverState *bs)
51 {
52     BDRVTestState *s = bs->opaque;
53     s->drain_count--;
54 }
55
56 static void bdrv_test_close(BlockDriverState *bs)
57 {
58     BDRVTestState *s = bs->opaque;
59     g_assert_cmpint(s->drain_count, >, 0);
60 }
61
62 static void co_reenter_bh(void *opaque)
63 {
64     aio_co_wake(opaque);
65 }
66
67 static int coroutine_fn bdrv_test_co_preadv(BlockDriverState *bs,
68                                             uint64_t offset, uint64_t bytes,
69                                             QEMUIOVector *qiov, int flags)
70 {
71     BDRVTestState *s = bs->opaque;
72
73     /* We want this request to stay until the polling loop in drain waits for
74      * it to complete. We need to sleep a while as bdrv_drain_invoke() comes
75      * first and polls its result, too, but it shouldn't accidentally complete
76      * this request yet. */
77     qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 100000);
78
79     if (s->bh_indirection_ctx) {
80         aio_bh_schedule_oneshot(s->bh_indirection_ctx, co_reenter_bh,
81                                 qemu_coroutine_self());
82         qemu_coroutine_yield();
83     }
84
85     return 0;
86 }
87
88 static void bdrv_test_child_perm(BlockDriverState *bs, BdrvChild *c,
89                                  const BdrvChildClass *child_class,
90                                  BdrvChildRole role,
91                                  BlockReopenQueue *reopen_queue,
92                                  uint64_t perm, uint64_t shared,
93                                  uint64_t *nperm, uint64_t *nshared)
94 {
95     /*
96      * bdrv_format_default_perms() accepts only these two, so disguise
97      * detach_by_driver_cb_parent as one of them.
98      */
99     if (child_class != &child_file && child_class != &child_backing) {
100         child_class = &child_file;
101     }
102
103     bdrv_format_default_perms(bs, c, child_class, role, reopen_queue,
104                               perm, shared, nperm, nshared);
105 }
106
107 static int bdrv_test_change_backing_file(BlockDriverState *bs,
108                                          const char *backing_file,
109                                          const char *backing_fmt)
110 {
111     return 0;
112 }
113
114 static BlockDriver bdrv_test = {
115     .format_name            = "test",
116     .instance_size          = sizeof(BDRVTestState),
117
118     .bdrv_close             = bdrv_test_close,
119     .bdrv_co_preadv         = bdrv_test_co_preadv,
120
121     .bdrv_co_drain_begin    = bdrv_test_co_drain_begin,
122     .bdrv_co_drain_end      = bdrv_test_co_drain_end,
123
124     .bdrv_child_perm        = bdrv_test_child_perm,
125
126     .bdrv_change_backing_file = bdrv_test_change_backing_file,
127 };
128
129 static void aio_ret_cb(void *opaque, int ret)
130 {
131     int *aio_ret = opaque;
132     *aio_ret = ret;
133 }
134
135 typedef struct CallInCoroutineData {
136     void (*entry)(void);
137     bool done;
138 } CallInCoroutineData;
139
140 static coroutine_fn void call_in_coroutine_entry(void *opaque)
141 {
142     CallInCoroutineData *data = opaque;
143
144     data->entry();
145     data->done = true;
146 }
147
148 static void call_in_coroutine(void (*entry)(void))
149 {
150     Coroutine *co;
151     CallInCoroutineData data = {
152         .entry  = entry,
153         .done   = false,
154     };
155
156     co = qemu_coroutine_create(call_in_coroutine_entry, &data);
157     qemu_coroutine_enter(co);
158     while (!data.done) {
159         aio_poll(qemu_get_aio_context(), true);
160     }
161 }
162
163 enum drain_type {
164     BDRV_DRAIN_ALL,
165     BDRV_DRAIN,
166     BDRV_SUBTREE_DRAIN,
167     DRAIN_TYPE_MAX,
168 };
169
170 static void do_drain_begin(enum drain_type drain_type, BlockDriverState *bs)
171 {
172     switch (drain_type) {
173     case BDRV_DRAIN_ALL:        bdrv_drain_all_begin(); break;
174     case BDRV_DRAIN:            bdrv_drained_begin(bs); break;
175     case BDRV_SUBTREE_DRAIN:    bdrv_subtree_drained_begin(bs); break;
176     default:                    g_assert_not_reached();
177     }
178 }
179
180 static void do_drain_end(enum drain_type drain_type, BlockDriverState *bs)
181 {
182     switch (drain_type) {
183     case BDRV_DRAIN_ALL:        bdrv_drain_all_end(); break;
184     case BDRV_DRAIN:            bdrv_drained_end(bs); break;
185     case BDRV_SUBTREE_DRAIN:    bdrv_subtree_drained_end(bs); break;
186     default:                    g_assert_not_reached();
187     }
188 }
189
190 static void do_drain_begin_unlocked(enum drain_type drain_type, BlockDriverState *bs)
191 {
192     if (drain_type != BDRV_DRAIN_ALL) {
193         aio_context_acquire(bdrv_get_aio_context(bs));
194     }
195     do_drain_begin(drain_type, bs);
196     if (drain_type != BDRV_DRAIN_ALL) {
197         aio_context_release(bdrv_get_aio_context(bs));
198     }
199 }
200
201 static void do_drain_end_unlocked(enum drain_type drain_type, BlockDriverState *bs)
202 {
203     if (drain_type != BDRV_DRAIN_ALL) {
204         aio_context_acquire(bdrv_get_aio_context(bs));
205     }
206     do_drain_end(drain_type, bs);
207     if (drain_type != BDRV_DRAIN_ALL) {
208         aio_context_release(bdrv_get_aio_context(bs));
209     }
210 }
211
212 static void test_drv_cb_common(enum drain_type drain_type, bool recursive)
213 {
214     BlockBackend *blk;
215     BlockDriverState *bs, *backing;
216     BDRVTestState *s, *backing_s;
217     BlockAIOCB *acb;
218     int aio_ret;
219
220     QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, NULL, 0);
221
222     blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
223     bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR,
224                               &error_abort);
225     s = bs->opaque;
226     blk_insert_bs(blk, bs, &error_abort);
227
228     backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort);
229     backing_s = backing->opaque;
230     bdrv_set_backing_hd(bs, backing, &error_abort);
231
232     /* Simple bdrv_drain_all_begin/end pair, check that CBs are called */
233     g_assert_cmpint(s->drain_count, ==, 0);
234     g_assert_cmpint(backing_s->drain_count, ==, 0);
235
236     do_drain_begin(drain_type, bs);
237
238     g_assert_cmpint(s->drain_count, ==, 1);
239     g_assert_cmpint(backing_s->drain_count, ==, !!recursive);
240
241     do_drain_end(drain_type, bs);
242
243     g_assert_cmpint(s->drain_count, ==, 0);
244     g_assert_cmpint(backing_s->drain_count, ==, 0);
245
246     /* Now do the same while a request is pending */
247     aio_ret = -EINPROGRESS;
248     acb = blk_aio_preadv(blk, 0, &qiov, 0, aio_ret_cb, &aio_ret);
249     g_assert(acb != NULL);
250     g_assert_cmpint(aio_ret, ==, -EINPROGRESS);
251
252     g_assert_cmpint(s->drain_count, ==, 0);
253     g_assert_cmpint(backing_s->drain_count, ==, 0);
254
255     do_drain_begin(drain_type, bs);
256
257     g_assert_cmpint(aio_ret, ==, 0);
258     g_assert_cmpint(s->drain_count, ==, 1);
259     g_assert_cmpint(backing_s->drain_count, ==, !!recursive);
260
261     do_drain_end(drain_type, bs);
262
263     g_assert_cmpint(s->drain_count, ==, 0);
264     g_assert_cmpint(backing_s->drain_count, ==, 0);
265
266     bdrv_unref(backing);
267     bdrv_unref(bs);
268     blk_unref(blk);
269 }
270
271 static void test_drv_cb_drain_all(void)
272 {
273     test_drv_cb_common(BDRV_DRAIN_ALL, true);
274 }
275
276 static void test_drv_cb_drain(void)
277 {
278     test_drv_cb_common(BDRV_DRAIN, false);
279 }
280
281 static void test_drv_cb_drain_subtree(void)
282 {
283     test_drv_cb_common(BDRV_SUBTREE_DRAIN, true);
284 }
285
286 static void test_drv_cb_co_drain_all(void)
287 {
288     call_in_coroutine(test_drv_cb_drain_all);
289 }
290
291 static void test_drv_cb_co_drain(void)
292 {
293     call_in_coroutine(test_drv_cb_drain);
294 }
295
296 static void test_drv_cb_co_drain_subtree(void)
297 {
298     call_in_coroutine(test_drv_cb_drain_subtree);
299 }
300
301 static void test_quiesce_common(enum drain_type drain_type, bool recursive)
302 {
303     BlockBackend *blk;
304     BlockDriverState *bs, *backing;
305
306     blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
307     bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR,
308                               &error_abort);
309     blk_insert_bs(blk, bs, &error_abort);
310
311     backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort);
312     bdrv_set_backing_hd(bs, backing, &error_abort);
313
314     g_assert_cmpint(bs->quiesce_counter, ==, 0);
315     g_assert_cmpint(backing->quiesce_counter, ==, 0);
316
317     do_drain_begin(drain_type, bs);
318
319     g_assert_cmpint(bs->quiesce_counter, ==, 1);
320     g_assert_cmpint(backing->quiesce_counter, ==, !!recursive);
321
322     do_drain_end(drain_type, bs);
323
324     g_assert_cmpint(bs->quiesce_counter, ==, 0);
325     g_assert_cmpint(backing->quiesce_counter, ==, 0);
326
327     bdrv_unref(backing);
328     bdrv_unref(bs);
329     blk_unref(blk);
330 }
331
332 static void test_quiesce_drain_all(void)
333 {
334     test_quiesce_common(BDRV_DRAIN_ALL, true);
335 }
336
337 static void test_quiesce_drain(void)
338 {
339     test_quiesce_common(BDRV_DRAIN, false);
340 }
341
342 static void test_quiesce_drain_subtree(void)
343 {
344     test_quiesce_common(BDRV_SUBTREE_DRAIN, true);
345 }
346
347 static void test_quiesce_co_drain_all(void)
348 {
349     call_in_coroutine(test_quiesce_drain_all);
350 }
351
352 static void test_quiesce_co_drain(void)
353 {
354     call_in_coroutine(test_quiesce_drain);
355 }
356
357 static void test_quiesce_co_drain_subtree(void)
358 {
359     call_in_coroutine(test_quiesce_drain_subtree);
360 }
361
362 static void test_nested(void)
363 {
364     BlockBackend *blk;
365     BlockDriverState *bs, *backing;
366     BDRVTestState *s, *backing_s;
367     enum drain_type outer, inner;
368
369     blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
370     bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR,
371                               &error_abort);
372     s = bs->opaque;
373     blk_insert_bs(blk, bs, &error_abort);
374
375     backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort);
376     backing_s = backing->opaque;
377     bdrv_set_backing_hd(bs, backing, &error_abort);
378
379     for (outer = 0; outer < DRAIN_TYPE_MAX; outer++) {
380         for (inner = 0; inner < DRAIN_TYPE_MAX; inner++) {
381             int backing_quiesce = (outer != BDRV_DRAIN) +
382                                   (inner != BDRV_DRAIN);
383
384             g_assert_cmpint(bs->quiesce_counter, ==, 0);
385             g_assert_cmpint(backing->quiesce_counter, ==, 0);
386             g_assert_cmpint(s->drain_count, ==, 0);
387             g_assert_cmpint(backing_s->drain_count, ==, 0);
388
389             do_drain_begin(outer, bs);
390             do_drain_begin(inner, bs);
391
392             g_assert_cmpint(bs->quiesce_counter, ==, 2);
393             g_assert_cmpint(backing->quiesce_counter, ==, backing_quiesce);
394             g_assert_cmpint(s->drain_count, ==, 2);
395             g_assert_cmpint(backing_s->drain_count, ==, backing_quiesce);
396
397             do_drain_end(inner, bs);
398             do_drain_end(outer, bs);
399
400             g_assert_cmpint(bs->quiesce_counter, ==, 0);
401             g_assert_cmpint(backing->quiesce_counter, ==, 0);
402             g_assert_cmpint(s->drain_count, ==, 0);
403             g_assert_cmpint(backing_s->drain_count, ==, 0);
404         }
405     }
406
407     bdrv_unref(backing);
408     bdrv_unref(bs);
409     blk_unref(blk);
410 }
411
412 static void test_multiparent(void)
413 {
414     BlockBackend *blk_a, *blk_b;
415     BlockDriverState *bs_a, *bs_b, *backing;
416     BDRVTestState *a_s, *b_s, *backing_s;
417
418     blk_a = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
419     bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR,
420                                 &error_abort);
421     a_s = bs_a->opaque;
422     blk_insert_bs(blk_a, bs_a, &error_abort);
423
424     blk_b = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
425     bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR,
426                                 &error_abort);
427     b_s = bs_b->opaque;
428     blk_insert_bs(blk_b, bs_b, &error_abort);
429
430     backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort);
431     backing_s = backing->opaque;
432     bdrv_set_backing_hd(bs_a, backing, &error_abort);
433     bdrv_set_backing_hd(bs_b, backing, &error_abort);
434
435     g_assert_cmpint(bs_a->quiesce_counter, ==, 0);
436     g_assert_cmpint(bs_b->quiesce_counter, ==, 0);
437     g_assert_cmpint(backing->quiesce_counter, ==, 0);
438     g_assert_cmpint(a_s->drain_count, ==, 0);
439     g_assert_cmpint(b_s->drain_count, ==, 0);
440     g_assert_cmpint(backing_s->drain_count, ==, 0);
441
442     do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a);
443
444     g_assert_cmpint(bs_a->quiesce_counter, ==, 1);
445     g_assert_cmpint(bs_b->quiesce_counter, ==, 1);
446     g_assert_cmpint(backing->quiesce_counter, ==, 1);
447     g_assert_cmpint(a_s->drain_count, ==, 1);
448     g_assert_cmpint(b_s->drain_count, ==, 1);
449     g_assert_cmpint(backing_s->drain_count, ==, 1);
450
451     do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b);
452
453     g_assert_cmpint(bs_a->quiesce_counter, ==, 2);
454     g_assert_cmpint(bs_b->quiesce_counter, ==, 2);
455     g_assert_cmpint(backing->quiesce_counter, ==, 2);
456     g_assert_cmpint(a_s->drain_count, ==, 2);
457     g_assert_cmpint(b_s->drain_count, ==, 2);
458     g_assert_cmpint(backing_s->drain_count, ==, 2);
459
460     do_drain_end(BDRV_SUBTREE_DRAIN, bs_b);
461
462     g_assert_cmpint(bs_a->quiesce_counter, ==, 1);
463     g_assert_cmpint(bs_b->quiesce_counter, ==, 1);
464     g_assert_cmpint(backing->quiesce_counter, ==, 1);
465     g_assert_cmpint(a_s->drain_count, ==, 1);
466     g_assert_cmpint(b_s->drain_count, ==, 1);
467     g_assert_cmpint(backing_s->drain_count, ==, 1);
468
469     do_drain_end(BDRV_SUBTREE_DRAIN, bs_a);
470
471     g_assert_cmpint(bs_a->quiesce_counter, ==, 0);
472     g_assert_cmpint(bs_b->quiesce_counter, ==, 0);
473     g_assert_cmpint(backing->quiesce_counter, ==, 0);
474     g_assert_cmpint(a_s->drain_count, ==, 0);
475     g_assert_cmpint(b_s->drain_count, ==, 0);
476     g_assert_cmpint(backing_s->drain_count, ==, 0);
477
478     bdrv_unref(backing);
479     bdrv_unref(bs_a);
480     bdrv_unref(bs_b);
481     blk_unref(blk_a);
482     blk_unref(blk_b);
483 }
484
485 static void test_graph_change_drain_subtree(void)
486 {
487     BlockBackend *blk_a, *blk_b;
488     BlockDriverState *bs_a, *bs_b, *backing;
489     BDRVTestState *a_s, *b_s, *backing_s;
490
491     blk_a = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
492     bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR,
493                                 &error_abort);
494     a_s = bs_a->opaque;
495     blk_insert_bs(blk_a, bs_a, &error_abort);
496
497     blk_b = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
498     bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR,
499                                 &error_abort);
500     b_s = bs_b->opaque;
501     blk_insert_bs(blk_b, bs_b, &error_abort);
502
503     backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort);
504     backing_s = backing->opaque;
505     bdrv_set_backing_hd(bs_a, backing, &error_abort);
506
507     g_assert_cmpint(bs_a->quiesce_counter, ==, 0);
508     g_assert_cmpint(bs_b->quiesce_counter, ==, 0);
509     g_assert_cmpint(backing->quiesce_counter, ==, 0);
510     g_assert_cmpint(a_s->drain_count, ==, 0);
511     g_assert_cmpint(b_s->drain_count, ==, 0);
512     g_assert_cmpint(backing_s->drain_count, ==, 0);
513
514     do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a);
515     do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a);
516     do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a);
517     do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b);
518     do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b);
519
520     bdrv_set_backing_hd(bs_b, backing, &error_abort);
521     g_assert_cmpint(bs_a->quiesce_counter, ==, 5);
522     g_assert_cmpint(bs_b->quiesce_counter, ==, 5);
523     g_assert_cmpint(backing->quiesce_counter, ==, 5);
524     g_assert_cmpint(a_s->drain_count, ==, 5);
525     g_assert_cmpint(b_s->drain_count, ==, 5);
526     g_assert_cmpint(backing_s->drain_count, ==, 5);
527
528     bdrv_set_backing_hd(bs_b, NULL, &error_abort);
529     g_assert_cmpint(bs_a->quiesce_counter, ==, 3);
530     g_assert_cmpint(bs_b->quiesce_counter, ==, 2);
531     g_assert_cmpint(backing->quiesce_counter, ==, 3);
532     g_assert_cmpint(a_s->drain_count, ==, 3);
533     g_assert_cmpint(b_s->drain_count, ==, 2);
534     g_assert_cmpint(backing_s->drain_count, ==, 3);
535
536     bdrv_set_backing_hd(bs_b, backing, &error_abort);
537     g_assert_cmpint(bs_a->quiesce_counter, ==, 5);
538     g_assert_cmpint(bs_b->quiesce_counter, ==, 5);
539     g_assert_cmpint(backing->quiesce_counter, ==, 5);
540     g_assert_cmpint(a_s->drain_count, ==, 5);
541     g_assert_cmpint(b_s->drain_count, ==, 5);
542     g_assert_cmpint(backing_s->drain_count, ==, 5);
543
544     do_drain_end(BDRV_SUBTREE_DRAIN, bs_b);
545     do_drain_end(BDRV_SUBTREE_DRAIN, bs_b);
546     do_drain_end(BDRV_SUBTREE_DRAIN, bs_a);
547     do_drain_end(BDRV_SUBTREE_DRAIN, bs_a);
548     do_drain_end(BDRV_SUBTREE_DRAIN, bs_a);
549
550     g_assert_cmpint(bs_a->quiesce_counter, ==, 0);
551     g_assert_cmpint(bs_b->quiesce_counter, ==, 0);
552     g_assert_cmpint(backing->quiesce_counter, ==, 0);
553     g_assert_cmpint(a_s->drain_count, ==, 0);
554     g_assert_cmpint(b_s->drain_count, ==, 0);
555     g_assert_cmpint(backing_s->drain_count, ==, 0);
556
557     bdrv_unref(backing);
558     bdrv_unref(bs_a);
559     bdrv_unref(bs_b);
560     blk_unref(blk_a);
561     blk_unref(blk_b);
562 }
563
564 static void test_graph_change_drain_all(void)
565 {
566     BlockBackend *blk_a, *blk_b;
567     BlockDriverState *bs_a, *bs_b;
568     BDRVTestState *a_s, *b_s;
569
570     /* Create node A with a BlockBackend */
571     blk_a = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
572     bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR,
573                                 &error_abort);
574     a_s = bs_a->opaque;
575     blk_insert_bs(blk_a, bs_a, &error_abort);
576
577     g_assert_cmpint(bs_a->quiesce_counter, ==, 0);
578     g_assert_cmpint(a_s->drain_count, ==, 0);
579
580     /* Call bdrv_drain_all_begin() */
581     bdrv_drain_all_begin();
582
583     g_assert_cmpint(bs_a->quiesce_counter, ==, 1);
584     g_assert_cmpint(a_s->drain_count, ==, 1);
585
586     /* Create node B with a BlockBackend */
587     blk_b = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
588     bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR,
589                                 &error_abort);
590     b_s = bs_b->opaque;
591     blk_insert_bs(blk_b, bs_b, &error_abort);
592
593     g_assert_cmpint(bs_a->quiesce_counter, ==, 1);
594     g_assert_cmpint(bs_b->quiesce_counter, ==, 1);
595     g_assert_cmpint(a_s->drain_count, ==, 1);
596     g_assert_cmpint(b_s->drain_count, ==, 1);
597
598     /* Unref and finally delete node A */
599     blk_unref(blk_a);
600
601     g_assert_cmpint(bs_a->quiesce_counter, ==, 1);
602     g_assert_cmpint(bs_b->quiesce_counter, ==, 1);
603     g_assert_cmpint(a_s->drain_count, ==, 1);
604     g_assert_cmpint(b_s->drain_count, ==, 1);
605
606     bdrv_unref(bs_a);
607
608     g_assert_cmpint(bs_b->quiesce_counter, ==, 1);
609     g_assert_cmpint(b_s->drain_count, ==, 1);
610
611     /* End the drained section */
612     bdrv_drain_all_end();
613
614     g_assert_cmpint(bs_b->quiesce_counter, ==, 0);
615     g_assert_cmpint(b_s->drain_count, ==, 0);
616
617     bdrv_unref(bs_b);
618     blk_unref(blk_b);
619 }
620
621 struct test_iothread_data {
622     BlockDriverState *bs;
623     enum drain_type drain_type;
624     int *aio_ret;
625 };
626
627 static void test_iothread_drain_entry(void *opaque)
628 {
629     struct test_iothread_data *data = opaque;
630
631     aio_context_acquire(bdrv_get_aio_context(data->bs));
632     do_drain_begin(data->drain_type, data->bs);
633     g_assert_cmpint(*data->aio_ret, ==, 0);
634     do_drain_end(data->drain_type, data->bs);
635     aio_context_release(bdrv_get_aio_context(data->bs));
636
637     qemu_event_set(&done_event);
638 }
639
640 static void test_iothread_aio_cb(void *opaque, int ret)
641 {
642     int *aio_ret = opaque;
643     *aio_ret = ret;
644     qemu_event_set(&done_event);
645 }
646
647 static void test_iothread_main_thread_bh(void *opaque)
648 {
649     struct test_iothread_data *data = opaque;
650
651     /* Test that the AioContext is not yet locked in a random BH that is
652      * executed during drain, otherwise this would deadlock. */
653     aio_context_acquire(bdrv_get_aio_context(data->bs));
654     bdrv_flush(data->bs);
655     aio_context_release(bdrv_get_aio_context(data->bs));
656 }
657
658 /*
659  * Starts an AIO request on a BDS that runs in the AioContext of iothread 1.
660  * The request involves a BH on iothread 2 before it can complete.
661  *
662  * @drain_thread = 0 means that do_drain_begin/end are called from the main
663  * thread, @drain_thread = 1 means that they are called from iothread 1. Drain
664  * for this BDS cannot be called from iothread 2 because only the main thread
665  * may do cross-AioContext polling.
666  */
667 static void test_iothread_common(enum drain_type drain_type, int drain_thread)
668 {
669     BlockBackend *blk;
670     BlockDriverState *bs;
671     BDRVTestState *s;
672     BlockAIOCB *acb;
673     int aio_ret;
674     struct test_iothread_data data;
675
676     IOThread *a = iothread_new();
677     IOThread *b = iothread_new();
678     AioContext *ctx_a = iothread_get_aio_context(a);
679     AioContext *ctx_b = iothread_get_aio_context(b);
680
681     QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, NULL, 0);
682
683     /* bdrv_drain_all() may only be called from the main loop thread */
684     if (drain_type == BDRV_DRAIN_ALL && drain_thread != 0) {
685         goto out;
686     }
687
688     blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
689     bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR,
690                               &error_abort);
691     s = bs->opaque;
692     blk_insert_bs(blk, bs, &error_abort);
693     blk_set_disable_request_queuing(blk, true);
694
695     blk_set_aio_context(blk, ctx_a, &error_abort);
696     aio_context_acquire(ctx_a);
697
698     s->bh_indirection_ctx = ctx_b;
699
700     aio_ret = -EINPROGRESS;
701     qemu_event_reset(&done_event);
702
703     if (drain_thread == 0) {
704         acb = blk_aio_preadv(blk, 0, &qiov, 0, test_iothread_aio_cb, &aio_ret);
705     } else {
706         acb = blk_aio_preadv(blk, 0, &qiov, 0, aio_ret_cb, &aio_ret);
707     }
708     g_assert(acb != NULL);
709     g_assert_cmpint(aio_ret, ==, -EINPROGRESS);
710
711     aio_context_release(ctx_a);
712
713     data = (struct test_iothread_data) {
714         .bs         = bs,
715         .drain_type = drain_type,
716         .aio_ret    = &aio_ret,
717     };
718
719     switch (drain_thread) {
720     case 0:
721         if (drain_type != BDRV_DRAIN_ALL) {
722             aio_context_acquire(ctx_a);
723         }
724
725         aio_bh_schedule_oneshot(ctx_a, test_iothread_main_thread_bh, &data);
726
727         /* The request is running on the IOThread a. Draining its block device
728          * will make sure that it has completed as far as the BDS is concerned,
729          * but the drain in this thread can continue immediately after
730          * bdrv_dec_in_flight() and aio_ret might be assigned only slightly
731          * later. */
732         do_drain_begin(drain_type, bs);
733         g_assert_cmpint(bs->in_flight, ==, 0);
734
735         if (drain_type != BDRV_DRAIN_ALL) {
736             aio_context_release(ctx_a);
737         }
738         qemu_event_wait(&done_event);
739         if (drain_type != BDRV_DRAIN_ALL) {
740             aio_context_acquire(ctx_a);
741         }
742
743         g_assert_cmpint(aio_ret, ==, 0);
744         do_drain_end(drain_type, bs);
745
746         if (drain_type != BDRV_DRAIN_ALL) {
747             aio_context_release(ctx_a);
748         }
749         break;
750     case 1:
751         aio_bh_schedule_oneshot(ctx_a, test_iothread_drain_entry, &data);
752         qemu_event_wait(&done_event);
753         break;
754     default:
755         g_assert_not_reached();
756     }
757
758     aio_context_acquire(ctx_a);
759     blk_set_aio_context(blk, qemu_get_aio_context(), &error_abort);
760     aio_context_release(ctx_a);
761
762     bdrv_unref(bs);
763     blk_unref(blk);
764
765 out:
766     iothread_join(a);
767     iothread_join(b);
768 }
769
770 static void test_iothread_drain_all(void)
771 {
772     test_iothread_common(BDRV_DRAIN_ALL, 0);
773     test_iothread_common(BDRV_DRAIN_ALL, 1);
774 }
775
776 static void test_iothread_drain(void)
777 {
778     test_iothread_common(BDRV_DRAIN, 0);
779     test_iothread_common(BDRV_DRAIN, 1);
780 }
781
782 static void test_iothread_drain_subtree(void)
783 {
784     test_iothread_common(BDRV_SUBTREE_DRAIN, 0);
785     test_iothread_common(BDRV_SUBTREE_DRAIN, 1);
786 }
787
788
789 typedef struct TestBlockJob {
790     BlockJob common;
791     int run_ret;
792     int prepare_ret;
793     bool running;
794     bool should_complete;
795 } TestBlockJob;
796
797 static int test_job_prepare(Job *job)
798 {
799     TestBlockJob *s = container_of(job, TestBlockJob, common.job);
800
801     /* Provoke an AIO_WAIT_WHILE() call to verify there is no deadlock */
802     blk_flush(s->common.blk);
803     return s->prepare_ret;
804 }
805
806 static void test_job_commit(Job *job)
807 {
808     TestBlockJob *s = container_of(job, TestBlockJob, common.job);
809
810     /* Provoke an AIO_WAIT_WHILE() call to verify there is no deadlock */
811     blk_flush(s->common.blk);
812 }
813
814 static void test_job_abort(Job *job)
815 {
816     TestBlockJob *s = container_of(job, TestBlockJob, common.job);
817
818     /* Provoke an AIO_WAIT_WHILE() call to verify there is no deadlock */
819     blk_flush(s->common.blk);
820 }
821
822 static int coroutine_fn test_job_run(Job *job, Error **errp)
823 {
824     TestBlockJob *s = container_of(job, TestBlockJob, common.job);
825
826     /* We are running the actual job code past the pause point in
827      * job_co_entry(). */
828     s->running = true;
829
830     job_transition_to_ready(&s->common.job);
831     while (!s->should_complete) {
832         /* Avoid job_sleep_ns() because it marks the job as !busy. We want to
833          * emulate some actual activity (probably some I/O) here so that drain
834          * has to wait for this activity to stop. */
835         qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 1000000);
836
837         job_pause_point(&s->common.job);
838     }
839
840     return s->run_ret;
841 }
842
843 static void test_job_complete(Job *job, Error **errp)
844 {
845     TestBlockJob *s = container_of(job, TestBlockJob, common.job);
846     s->should_complete = true;
847 }
848
849 BlockJobDriver test_job_driver = {
850     .job_driver = {
851         .instance_size  = sizeof(TestBlockJob),
852         .free           = block_job_free,
853         .user_resume    = block_job_user_resume,
854         .run            = test_job_run,
855         .complete       = test_job_complete,
856         .prepare        = test_job_prepare,
857         .commit         = test_job_commit,
858         .abort          = test_job_abort,
859     },
860 };
861
862 enum test_job_result {
863     TEST_JOB_SUCCESS,
864     TEST_JOB_FAIL_RUN,
865     TEST_JOB_FAIL_PREPARE,
866 };
867
868 enum test_job_drain_node {
869     TEST_JOB_DRAIN_SRC,
870     TEST_JOB_DRAIN_SRC_CHILD,
871     TEST_JOB_DRAIN_SRC_PARENT,
872 };
873
874 static void test_blockjob_common_drain_node(enum drain_type drain_type,
875                                             bool use_iothread,
876                                             enum test_job_result result,
877                                             enum test_job_drain_node drain_node)
878 {
879     BlockBackend *blk_src, *blk_target;
880     BlockDriverState *src, *src_backing, *src_overlay, *target, *drain_bs;
881     BlockJob *job;
882     TestBlockJob *tjob;
883     IOThread *iothread = NULL;
884     AioContext *ctx;
885     int ret;
886
887     src = bdrv_new_open_driver(&bdrv_test, "source", BDRV_O_RDWR,
888                                &error_abort);
889     src_backing = bdrv_new_open_driver(&bdrv_test, "source-backing",
890                                        BDRV_O_RDWR, &error_abort);
891     src_overlay = bdrv_new_open_driver(&bdrv_test, "source-overlay",
892                                        BDRV_O_RDWR, &error_abort);
893
894     bdrv_set_backing_hd(src_overlay, src, &error_abort);
895     bdrv_unref(src);
896     bdrv_set_backing_hd(src, src_backing, &error_abort);
897     bdrv_unref(src_backing);
898
899     blk_src = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
900     blk_insert_bs(blk_src, src_overlay, &error_abort);
901
902     switch (drain_node) {
903     case TEST_JOB_DRAIN_SRC:
904         drain_bs = src;
905         break;
906     case TEST_JOB_DRAIN_SRC_CHILD:
907         drain_bs = src_backing;
908         break;
909     case TEST_JOB_DRAIN_SRC_PARENT:
910         drain_bs = src_overlay;
911         break;
912     default:
913         g_assert_not_reached();
914     }
915
916     if (use_iothread) {
917         iothread = iothread_new();
918         ctx = iothread_get_aio_context(iothread);
919         blk_set_aio_context(blk_src, ctx, &error_abort);
920     } else {
921         ctx = qemu_get_aio_context();
922     }
923
924     target = bdrv_new_open_driver(&bdrv_test, "target", BDRV_O_RDWR,
925                                   &error_abort);
926     blk_target = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
927     blk_insert_bs(blk_target, target, &error_abort);
928     blk_set_allow_aio_context_change(blk_target, true);
929
930     aio_context_acquire(ctx);
931     tjob = block_job_create("job0", &test_job_driver, NULL, src,
932                             0, BLK_PERM_ALL,
933                             0, 0, NULL, NULL, &error_abort);
934     job = &tjob->common;
935     block_job_add_bdrv(job, "target", target, 0, BLK_PERM_ALL, &error_abort);
936
937     switch (result) {
938     case TEST_JOB_SUCCESS:
939         break;
940     case TEST_JOB_FAIL_RUN:
941         tjob->run_ret = -EIO;
942         break;
943     case TEST_JOB_FAIL_PREPARE:
944         tjob->prepare_ret = -EIO;
945         break;
946     }
947
948     job_start(&job->job);
949     aio_context_release(ctx);
950
951     if (use_iothread) {
952         /* job_co_entry() is run in the I/O thread, wait for the actual job
953          * code to start (we don't want to catch the job in the pause point in
954          * job_co_entry(). */
955         while (!tjob->running) {
956             aio_poll(qemu_get_aio_context(), false);
957         }
958     }
959
960     g_assert_cmpint(job->job.pause_count, ==, 0);
961     g_assert_false(job->job.paused);
962     g_assert_true(tjob->running);
963     g_assert_true(job->job.busy); /* We're in qemu_co_sleep_ns() */
964
965     do_drain_begin_unlocked(drain_type, drain_bs);
966
967     if (drain_type == BDRV_DRAIN_ALL) {
968         /* bdrv_drain_all() drains both src and target */
969         g_assert_cmpint(job->job.pause_count, ==, 2);
970     } else {
971         g_assert_cmpint(job->job.pause_count, ==, 1);
972     }
973     g_assert_true(job->job.paused);
974     g_assert_false(job->job.busy); /* The job is paused */
975
976     do_drain_end_unlocked(drain_type, drain_bs);
977
978     if (use_iothread) {
979         /* paused is reset in the I/O thread, wait for it */
980         while (job->job.paused) {
981             aio_poll(qemu_get_aio_context(), false);
982         }
983     }
984
985     g_assert_cmpint(job->job.pause_count, ==, 0);
986     g_assert_false(job->job.paused);
987     g_assert_true(job->job.busy); /* We're in qemu_co_sleep_ns() */
988
989     do_drain_begin_unlocked(drain_type, target);
990
991     if (drain_type == BDRV_DRAIN_ALL) {
992         /* bdrv_drain_all() drains both src and target */
993         g_assert_cmpint(job->job.pause_count, ==, 2);
994     } else {
995         g_assert_cmpint(job->job.pause_count, ==, 1);
996     }
997     g_assert_true(job->job.paused);
998     g_assert_false(job->job.busy); /* The job is paused */
999
1000     do_drain_end_unlocked(drain_type, target);
1001
1002     if (use_iothread) {
1003         /* paused is reset in the I/O thread, wait for it */
1004         while (job->job.paused) {
1005             aio_poll(qemu_get_aio_context(), false);
1006         }
1007     }
1008
1009     g_assert_cmpint(job->job.pause_count, ==, 0);
1010     g_assert_false(job->job.paused);
1011     g_assert_true(job->job.busy); /* We're in qemu_co_sleep_ns() */
1012
1013     aio_context_acquire(ctx);
1014     ret = job_complete_sync(&job->job, &error_abort);
1015     g_assert_cmpint(ret, ==, (result == TEST_JOB_SUCCESS ? 0 : -EIO));
1016
1017     if (use_iothread) {
1018         blk_set_aio_context(blk_src, qemu_get_aio_context(), &error_abort);
1019         assert(blk_get_aio_context(blk_target) == qemu_get_aio_context());
1020     }
1021     aio_context_release(ctx);
1022
1023     blk_unref(blk_src);
1024     blk_unref(blk_target);
1025     bdrv_unref(src_overlay);
1026     bdrv_unref(target);
1027
1028     if (iothread) {
1029         iothread_join(iothread);
1030     }
1031 }
1032
1033 static void test_blockjob_common(enum drain_type drain_type, bool use_iothread,
1034                                  enum test_job_result result)
1035 {
1036     test_blockjob_common_drain_node(drain_type, use_iothread, result,
1037                                     TEST_JOB_DRAIN_SRC);
1038     test_blockjob_common_drain_node(drain_type, use_iothread, result,
1039                                     TEST_JOB_DRAIN_SRC_CHILD);
1040     if (drain_type == BDRV_SUBTREE_DRAIN) {
1041         test_blockjob_common_drain_node(drain_type, use_iothread, result,
1042                                         TEST_JOB_DRAIN_SRC_PARENT);
1043     }
1044 }
1045
1046 static void test_blockjob_drain_all(void)
1047 {
1048     test_blockjob_common(BDRV_DRAIN_ALL, false, TEST_JOB_SUCCESS);
1049 }
1050
1051 static void test_blockjob_drain(void)
1052 {
1053     test_blockjob_common(BDRV_DRAIN, false, TEST_JOB_SUCCESS);
1054 }
1055
1056 static void test_blockjob_drain_subtree(void)
1057 {
1058     test_blockjob_common(BDRV_SUBTREE_DRAIN, false, TEST_JOB_SUCCESS);
1059 }
1060
1061 static void test_blockjob_error_drain_all(void)
1062 {
1063     test_blockjob_common(BDRV_DRAIN_ALL, false, TEST_JOB_FAIL_RUN);
1064     test_blockjob_common(BDRV_DRAIN_ALL, false, TEST_JOB_FAIL_PREPARE);
1065 }
1066
1067 static void test_blockjob_error_drain(void)
1068 {
1069     test_blockjob_common(BDRV_DRAIN, false, TEST_JOB_FAIL_RUN);
1070     test_blockjob_common(BDRV_DRAIN, false, TEST_JOB_FAIL_PREPARE);
1071 }
1072
1073 static void test_blockjob_error_drain_subtree(void)
1074 {
1075     test_blockjob_common(BDRV_SUBTREE_DRAIN, false, TEST_JOB_FAIL_RUN);
1076     test_blockjob_common(BDRV_SUBTREE_DRAIN, false, TEST_JOB_FAIL_PREPARE);
1077 }
1078
1079 static void test_blockjob_iothread_drain_all(void)
1080 {
1081     test_blockjob_common(BDRV_DRAIN_ALL, true, TEST_JOB_SUCCESS);
1082 }
1083
1084 static void test_blockjob_iothread_drain(void)
1085 {
1086     test_blockjob_common(BDRV_DRAIN, true, TEST_JOB_SUCCESS);
1087 }
1088
1089 static void test_blockjob_iothread_drain_subtree(void)
1090 {
1091     test_blockjob_common(BDRV_SUBTREE_DRAIN, true, TEST_JOB_SUCCESS);
1092 }
1093
1094 static void test_blockjob_iothread_error_drain_all(void)
1095 {
1096     test_blockjob_common(BDRV_DRAIN_ALL, true, TEST_JOB_FAIL_RUN);
1097     test_blockjob_common(BDRV_DRAIN_ALL, true, TEST_JOB_FAIL_PREPARE);
1098 }
1099
1100 static void test_blockjob_iothread_error_drain(void)
1101 {
1102     test_blockjob_common(BDRV_DRAIN, true, TEST_JOB_FAIL_RUN);
1103     test_blockjob_common(BDRV_DRAIN, true, TEST_JOB_FAIL_PREPARE);
1104 }
1105
1106 static void test_blockjob_iothread_error_drain_subtree(void)
1107 {
1108     test_blockjob_common(BDRV_SUBTREE_DRAIN, true, TEST_JOB_FAIL_RUN);
1109     test_blockjob_common(BDRV_SUBTREE_DRAIN, true, TEST_JOB_FAIL_PREPARE);
1110 }
1111
1112
1113 typedef struct BDRVTestTopState {
1114     BdrvChild *wait_child;
1115 } BDRVTestTopState;
1116
1117 static void bdrv_test_top_close(BlockDriverState *bs)
1118 {
1119     BdrvChild *c, *next_c;
1120     QLIST_FOREACH_SAFE(c, &bs->children, next, next_c) {
1121         bdrv_unref_child(bs, c);
1122     }
1123 }
1124
1125 static int coroutine_fn bdrv_test_top_co_preadv(BlockDriverState *bs,
1126                                                 uint64_t offset, uint64_t bytes,
1127                                                 QEMUIOVector *qiov, int flags)
1128 {
1129     BDRVTestTopState *tts = bs->opaque;
1130     return bdrv_co_preadv(tts->wait_child, offset, bytes, qiov, flags);
1131 }
1132
1133 static BlockDriver bdrv_test_top_driver = {
1134     .format_name            = "test_top_driver",
1135     .instance_size          = sizeof(BDRVTestTopState),
1136
1137     .bdrv_close             = bdrv_test_top_close,
1138     .bdrv_co_preadv         = bdrv_test_top_co_preadv,
1139
1140     .bdrv_child_perm        = bdrv_format_default_perms,
1141 };
1142
1143 typedef struct TestCoDeleteByDrainData {
1144     BlockBackend *blk;
1145     bool detach_instead_of_delete;
1146     bool done;
1147 } TestCoDeleteByDrainData;
1148
1149 static void coroutine_fn test_co_delete_by_drain(void *opaque)
1150 {
1151     TestCoDeleteByDrainData *dbdd = opaque;
1152     BlockBackend *blk = dbdd->blk;
1153     BlockDriverState *bs = blk_bs(blk);
1154     BDRVTestTopState *tts = bs->opaque;
1155     void *buffer = g_malloc(65536);
1156     QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buffer, 65536);
1157
1158     /* Pretend some internal write operation from parent to child.
1159      * Important: We have to read from the child, not from the parent!
1160      * Draining works by first propagating it all up the tree to the
1161      * root and then waiting for drainage from root to the leaves
1162      * (protocol nodes).  If we have a request waiting on the root,
1163      * everything will be drained before we go back down the tree, but
1164      * we do not want that.  We want to be in the middle of draining
1165      * when this following requests returns. */
1166     bdrv_co_preadv(tts->wait_child, 0, 65536, &qiov, 0);
1167
1168     g_assert_cmpint(bs->refcnt, ==, 1);
1169
1170     if (!dbdd->detach_instead_of_delete) {
1171         blk_unref(blk);
1172     } else {
1173         BdrvChild *c, *next_c;
1174         QLIST_FOREACH_SAFE(c, &bs->children, next, next_c) {
1175             bdrv_unref_child(bs, c);
1176         }
1177     }
1178
1179     dbdd->done = true;
1180     g_free(buffer);
1181 }
1182
1183 /**
1184  * Test what happens when some BDS has some children, you drain one of
1185  * them and this results in the BDS being deleted.
1186  *
1187  * If @detach_instead_of_delete is set, the BDS is not going to be
1188  * deleted but will only detach all of its children.
1189  */
1190 static void do_test_delete_by_drain(bool detach_instead_of_delete,
1191                                     enum drain_type drain_type)
1192 {
1193     BlockBackend *blk;
1194     BlockDriverState *bs, *child_bs, *null_bs;
1195     BDRVTestTopState *tts;
1196     TestCoDeleteByDrainData dbdd;
1197     Coroutine *co;
1198
1199     bs = bdrv_new_open_driver(&bdrv_test_top_driver, "top", BDRV_O_RDWR,
1200                               &error_abort);
1201     bs->total_sectors = 65536 >> BDRV_SECTOR_BITS;
1202     tts = bs->opaque;
1203
1204     null_bs = bdrv_open("null-co://", NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
1205                         &error_abort);
1206     bdrv_attach_child(bs, null_bs, "null-child", &child_file, 0, &error_abort);
1207
1208     /* This child will be the one to pass to requests through to, and
1209      * it will stall until a drain occurs */
1210     child_bs = bdrv_new_open_driver(&bdrv_test, "child", BDRV_O_RDWR,
1211                                     &error_abort);
1212     child_bs->total_sectors = 65536 >> BDRV_SECTOR_BITS;
1213     /* Takes our reference to child_bs */
1214     tts->wait_child = bdrv_attach_child(bs, child_bs, "wait-child", &child_file,
1215                                         0, &error_abort);
1216
1217     /* This child is just there to be deleted
1218      * (for detach_instead_of_delete == true) */
1219     null_bs = bdrv_open("null-co://", NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
1220                         &error_abort);
1221     bdrv_attach_child(bs, null_bs, "null-child", &child_file, 0, &error_abort);
1222
1223     blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
1224     blk_insert_bs(blk, bs, &error_abort);
1225
1226     /* Referenced by blk now */
1227     bdrv_unref(bs);
1228
1229     g_assert_cmpint(bs->refcnt, ==, 1);
1230     g_assert_cmpint(child_bs->refcnt, ==, 1);
1231     g_assert_cmpint(null_bs->refcnt, ==, 1);
1232
1233
1234     dbdd = (TestCoDeleteByDrainData){
1235         .blk = blk,
1236         .detach_instead_of_delete = detach_instead_of_delete,
1237         .done = false,
1238     };
1239     co = qemu_coroutine_create(test_co_delete_by_drain, &dbdd);
1240     qemu_coroutine_enter(co);
1241
1242     /* Drain the child while the read operation is still pending.
1243      * This should result in the operation finishing and
1244      * test_co_delete_by_drain() resuming.  Thus, @bs will be deleted
1245      * and the coroutine will exit while this drain operation is still
1246      * in progress. */
1247     switch (drain_type) {
1248     case BDRV_DRAIN:
1249         bdrv_ref(child_bs);
1250         bdrv_drain(child_bs);
1251         bdrv_unref(child_bs);
1252         break;
1253     case BDRV_SUBTREE_DRAIN:
1254         /* Would have to ref/unref bs here for !detach_instead_of_delete, but
1255          * then the whole test becomes pointless because the graph changes
1256          * don't occur during the drain any more. */
1257         assert(detach_instead_of_delete);
1258         bdrv_subtree_drained_begin(bs);
1259         bdrv_subtree_drained_end(bs);
1260         break;
1261     case BDRV_DRAIN_ALL:
1262         bdrv_drain_all_begin();
1263         bdrv_drain_all_end();
1264         break;
1265     default:
1266         g_assert_not_reached();
1267     }
1268
1269     while (!dbdd.done) {
1270         aio_poll(qemu_get_aio_context(), true);
1271     }
1272
1273     if (detach_instead_of_delete) {
1274         /* Here, the reference has not passed over to the coroutine,
1275          * so we have to delete the BB ourselves */
1276         blk_unref(blk);
1277     }
1278 }
1279
1280 static void test_delete_by_drain(void)
1281 {
1282     do_test_delete_by_drain(false, BDRV_DRAIN);
1283 }
1284
1285 static void test_detach_by_drain_all(void)
1286 {
1287     do_test_delete_by_drain(true, BDRV_DRAIN_ALL);
1288 }
1289
1290 static void test_detach_by_drain(void)
1291 {
1292     do_test_delete_by_drain(true, BDRV_DRAIN);
1293 }
1294
1295 static void test_detach_by_drain_subtree(void)
1296 {
1297     do_test_delete_by_drain(true, BDRV_SUBTREE_DRAIN);
1298 }
1299
1300
1301 struct detach_by_parent_data {
1302     BlockDriverState *parent_b;
1303     BdrvChild *child_b;
1304     BlockDriverState *c;
1305     BdrvChild *child_c;
1306     bool by_parent_cb;
1307 };
1308 static struct detach_by_parent_data detach_by_parent_data;
1309
1310 static void detach_indirect_bh(void *opaque)
1311 {
1312     struct detach_by_parent_data *data = opaque;
1313
1314     bdrv_unref_child(data->parent_b, data->child_b);
1315
1316     bdrv_ref(data->c);
1317     data->child_c = bdrv_attach_child(data->parent_b, data->c, "PB-C",
1318                                       &child_file, 0, &error_abort);
1319 }
1320
1321 static void detach_by_parent_aio_cb(void *opaque, int ret)
1322 {
1323     struct detach_by_parent_data *data = &detach_by_parent_data;
1324
1325     g_assert_cmpint(ret, ==, 0);
1326     if (data->by_parent_cb) {
1327         detach_indirect_bh(data);
1328     }
1329 }
1330
1331 static void detach_by_driver_cb_drained_begin(BdrvChild *child)
1332 {
1333     aio_bh_schedule_oneshot(qemu_get_current_aio_context(),
1334                             detach_indirect_bh, &detach_by_parent_data);
1335     child_file.drained_begin(child);
1336 }
1337
1338 static BdrvChildClass detach_by_driver_cb_class;
1339
1340 /*
1341  * Initial graph:
1342  *
1343  * PA     PB
1344  *    \ /   \
1345  *     A     B     C
1346  *
1347  * by_parent_cb == true:  Test that parent callbacks don't poll
1348  *
1349  *     PA has a pending write request whose callback changes the child nodes of
1350  *     PB: It removes B and adds C instead. The subtree of PB is drained, which
1351  *     will indirectly drain the write request, too.
1352  *
1353  * by_parent_cb == false: Test that bdrv_drain_invoke() doesn't poll
1354  *
1355  *     PA's BdrvChildClass has a .drained_begin callback that schedules a BH
1356  *     that does the same graph change. If bdrv_drain_invoke() calls it, the
1357  *     state is messed up, but if it is only polled in the single
1358  *     BDRV_POLL_WHILE() at the end of the drain, this should work fine.
1359  */
1360 static void test_detach_indirect(bool by_parent_cb)
1361 {
1362     BlockBackend *blk;
1363     BlockDriverState *parent_a, *parent_b, *a, *b, *c;
1364     BdrvChild *child_a, *child_b;
1365     BlockAIOCB *acb;
1366
1367     QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, NULL, 0);
1368
1369     if (!by_parent_cb) {
1370         detach_by_driver_cb_class = child_file;
1371         detach_by_driver_cb_class.drained_begin =
1372             detach_by_driver_cb_drained_begin;
1373     }
1374
1375     /* Create all involved nodes */
1376     parent_a = bdrv_new_open_driver(&bdrv_test, "parent-a", BDRV_O_RDWR,
1377                                     &error_abort);
1378     parent_b = bdrv_new_open_driver(&bdrv_test, "parent-b", 0,
1379                                     &error_abort);
1380
1381     a = bdrv_new_open_driver(&bdrv_test, "a", BDRV_O_RDWR, &error_abort);
1382     b = bdrv_new_open_driver(&bdrv_test, "b", BDRV_O_RDWR, &error_abort);
1383     c = bdrv_new_open_driver(&bdrv_test, "c", BDRV_O_RDWR, &error_abort);
1384
1385     /* blk is a BB for parent-a */
1386     blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
1387     blk_insert_bs(blk, parent_a, &error_abort);
1388     bdrv_unref(parent_a);
1389
1390     /* If we want to get bdrv_drain_invoke() to call aio_poll(), the driver
1391      * callback must not return immediately. */
1392     if (!by_parent_cb) {
1393         BDRVTestState *s = parent_a->opaque;
1394         s->sleep_in_drain_begin = true;
1395     }
1396
1397     /* Set child relationships */
1398     bdrv_ref(b);
1399     bdrv_ref(a);
1400     child_b = bdrv_attach_child(parent_b, b, "PB-B", &child_file, 0,
1401                                 &error_abort);
1402     child_a = bdrv_attach_child(parent_b, a, "PB-A", &child_backing, 0,
1403                                 &error_abort);
1404
1405     bdrv_ref(a);
1406     bdrv_attach_child(parent_a, a, "PA-A",
1407                       by_parent_cb ? &child_file : &detach_by_driver_cb_class,
1408                       0, &error_abort);
1409
1410     g_assert_cmpint(parent_a->refcnt, ==, 1);
1411     g_assert_cmpint(parent_b->refcnt, ==, 1);
1412     g_assert_cmpint(a->refcnt, ==, 3);
1413     g_assert_cmpint(b->refcnt, ==, 2);
1414     g_assert_cmpint(c->refcnt, ==, 1);
1415
1416     g_assert(QLIST_FIRST(&parent_b->children) == child_a);
1417     g_assert(QLIST_NEXT(child_a, next) == child_b);
1418     g_assert(QLIST_NEXT(child_b, next) == NULL);
1419
1420     /* Start the evil write request */
1421     detach_by_parent_data = (struct detach_by_parent_data) {
1422         .parent_b = parent_b,
1423         .child_b = child_b,
1424         .c = c,
1425         .by_parent_cb = by_parent_cb,
1426     };
1427     acb = blk_aio_preadv(blk, 0, &qiov, 0, detach_by_parent_aio_cb, NULL);
1428     g_assert(acb != NULL);
1429
1430     /* Drain and check the expected result */
1431     bdrv_subtree_drained_begin(parent_b);
1432
1433     g_assert(detach_by_parent_data.child_c != NULL);
1434
1435     g_assert_cmpint(parent_a->refcnt, ==, 1);
1436     g_assert_cmpint(parent_b->refcnt, ==, 1);
1437     g_assert_cmpint(a->refcnt, ==, 3);
1438     g_assert_cmpint(b->refcnt, ==, 1);
1439     g_assert_cmpint(c->refcnt, ==, 2);
1440
1441     g_assert(QLIST_FIRST(&parent_b->children) == detach_by_parent_data.child_c);
1442     g_assert(QLIST_NEXT(detach_by_parent_data.child_c, next) == child_a);
1443     g_assert(QLIST_NEXT(child_a, next) == NULL);
1444
1445     g_assert_cmpint(parent_a->quiesce_counter, ==, 1);
1446     g_assert_cmpint(parent_b->quiesce_counter, ==, 1);
1447     g_assert_cmpint(a->quiesce_counter, ==, 1);
1448     g_assert_cmpint(b->quiesce_counter, ==, 0);
1449     g_assert_cmpint(c->quiesce_counter, ==, 1);
1450
1451     bdrv_subtree_drained_end(parent_b);
1452
1453     bdrv_unref(parent_b);
1454     blk_unref(blk);
1455
1456     g_assert_cmpint(a->refcnt, ==, 1);
1457     g_assert_cmpint(b->refcnt, ==, 1);
1458     g_assert_cmpint(c->refcnt, ==, 1);
1459     bdrv_unref(a);
1460     bdrv_unref(b);
1461     bdrv_unref(c);
1462 }
1463
1464 static void test_detach_by_parent_cb(void)
1465 {
1466     test_detach_indirect(true);
1467 }
1468
1469 static void test_detach_by_driver_cb(void)
1470 {
1471     test_detach_indirect(false);
1472 }
1473
1474 static void test_append_to_drained(void)
1475 {
1476     BlockBackend *blk;
1477     BlockDriverState *base, *overlay;
1478     BDRVTestState *base_s, *overlay_s;
1479
1480     blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
1481     base = bdrv_new_open_driver(&bdrv_test, "base", BDRV_O_RDWR, &error_abort);
1482     base_s = base->opaque;
1483     blk_insert_bs(blk, base, &error_abort);
1484
1485     overlay = bdrv_new_open_driver(&bdrv_test, "overlay", BDRV_O_RDWR,
1486                                    &error_abort);
1487     overlay_s = overlay->opaque;
1488
1489     do_drain_begin(BDRV_DRAIN, base);
1490     g_assert_cmpint(base->quiesce_counter, ==, 1);
1491     g_assert_cmpint(base_s->drain_count, ==, 1);
1492     g_assert_cmpint(base->in_flight, ==, 0);
1493
1494     /* Takes ownership of overlay, so we don't have to unref it later */
1495     bdrv_append(overlay, base, &error_abort);
1496     g_assert_cmpint(base->in_flight, ==, 0);
1497     g_assert_cmpint(overlay->in_flight, ==, 0);
1498
1499     g_assert_cmpint(base->quiesce_counter, ==, 1);
1500     g_assert_cmpint(base_s->drain_count, ==, 1);
1501     g_assert_cmpint(overlay->quiesce_counter, ==, 1);
1502     g_assert_cmpint(overlay_s->drain_count, ==, 1);
1503
1504     do_drain_end(BDRV_DRAIN, base);
1505
1506     g_assert_cmpint(base->quiesce_counter, ==, 0);
1507     g_assert_cmpint(base_s->drain_count, ==, 0);
1508     g_assert_cmpint(overlay->quiesce_counter, ==, 0);
1509     g_assert_cmpint(overlay_s->drain_count, ==, 0);
1510
1511     bdrv_unref(base);
1512     blk_unref(blk);
1513 }
1514
1515 static void test_set_aio_context(void)
1516 {
1517     BlockDriverState *bs;
1518     IOThread *a = iothread_new();
1519     IOThread *b = iothread_new();
1520     AioContext *ctx_a = iothread_get_aio_context(a);
1521     AioContext *ctx_b = iothread_get_aio_context(b);
1522
1523     bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR,
1524                               &error_abort);
1525
1526     bdrv_drained_begin(bs);
1527     bdrv_try_set_aio_context(bs, ctx_a, &error_abort);
1528
1529     aio_context_acquire(ctx_a);
1530     bdrv_drained_end(bs);
1531
1532     bdrv_drained_begin(bs);
1533     bdrv_try_set_aio_context(bs, ctx_b, &error_abort);
1534     aio_context_release(ctx_a);
1535     aio_context_acquire(ctx_b);
1536     bdrv_try_set_aio_context(bs, qemu_get_aio_context(), &error_abort);
1537     aio_context_release(ctx_b);
1538     bdrv_drained_end(bs);
1539
1540     bdrv_unref(bs);
1541     iothread_join(a);
1542     iothread_join(b);
1543 }
1544
1545
1546 typedef struct TestDropBackingBlockJob {
1547     BlockJob common;
1548     bool should_complete;
1549     bool *did_complete;
1550     BlockDriverState *detach_also;
1551 } TestDropBackingBlockJob;
1552
1553 static int coroutine_fn test_drop_backing_job_run(Job *job, Error **errp)
1554 {
1555     TestDropBackingBlockJob *s =
1556         container_of(job, TestDropBackingBlockJob, common.job);
1557
1558     while (!s->should_complete) {
1559         job_sleep_ns(job, 0);
1560     }
1561
1562     return 0;
1563 }
1564
1565 static void test_drop_backing_job_commit(Job *job)
1566 {
1567     TestDropBackingBlockJob *s =
1568         container_of(job, TestDropBackingBlockJob, common.job);
1569
1570     bdrv_set_backing_hd(blk_bs(s->common.blk), NULL, &error_abort);
1571     bdrv_set_backing_hd(s->detach_also, NULL, &error_abort);
1572
1573     *s->did_complete = true;
1574 }
1575
1576 static const BlockJobDriver test_drop_backing_job_driver = {
1577     .job_driver = {
1578         .instance_size  = sizeof(TestDropBackingBlockJob),
1579         .free           = block_job_free,
1580         .user_resume    = block_job_user_resume,
1581         .run            = test_drop_backing_job_run,
1582         .commit         = test_drop_backing_job_commit,
1583     }
1584 };
1585
1586 /**
1587  * Creates a child node with three parent nodes on it, and then runs a
1588  * block job on the final one, parent-node-2.
1589  *
1590  * The job is then asked to complete before a section where the child
1591  * is drained.
1592  *
1593  * Ending this section will undrain the child's parents, first
1594  * parent-node-2, then parent-node-1, then parent-node-0 -- the parent
1595  * list is in reverse order of how they were added.  Ending the drain
1596  * on parent-node-2 will resume the job, thus completing it and
1597  * scheduling job_exit().
1598  *
1599  * Ending the drain on parent-node-1 will poll the AioContext, which
1600  * lets job_exit() and thus test_drop_backing_job_commit() run.  That
1601  * function first removes the child as parent-node-2's backing file.
1602  *
1603  * In old (and buggy) implementations, there are two problems with
1604  * that:
1605  * (A) bdrv_drain_invoke() polls for every node that leaves the
1606  *     drained section.  This means that job_exit() is scheduled
1607  *     before the child has left the drained section.  Its
1608  *     quiesce_counter is therefore still 1 when it is removed from
1609  *     parent-node-2.
1610  *
1611  * (B) bdrv_replace_child_noperm() calls drained_end() on the old
1612  *     child's parents as many times as the child is quiesced.  This
1613  *     means it will call drained_end() on parent-node-2 once.
1614  *     Because parent-node-2 is no longer quiesced at this point, this
1615  *     will fail.
1616  *
1617  * bdrv_replace_child_noperm() therefore must call drained_end() on
1618  * the parent only if it really is still drained because the child is
1619  * drained.
1620  *
1621  * If removing child from parent-node-2 was successful (as it should
1622  * be), test_drop_backing_job_commit() will then also remove the child
1623  * from parent-node-0.
1624  *
1625  * With an old version of our drain infrastructure ((A) above), that
1626  * resulted in the following flow:
1627  *
1628  * 1. child attempts to leave its drained section.  The call recurses
1629  *    to its parents.
1630  *
1631  * 2. parent-node-2 leaves the drained section.  Polling in
1632  *    bdrv_drain_invoke() will schedule job_exit().
1633  *
1634  * 3. parent-node-1 leaves the drained section.  Polling in
1635  *    bdrv_drain_invoke() will run job_exit(), thus disconnecting
1636  *    parent-node-0 from the child node.
1637  *
1638  * 4. bdrv_parent_drained_end() uses a QLIST_FOREACH_SAFE() loop to
1639  *    iterate over the parents.  Thus, it now accesses the BdrvChild
1640  *    object that used to connect parent-node-0 and the child node.
1641  *    However, that object no longer exists, so it accesses a dangling
1642  *    pointer.
1643  *
1644  * The solution is to only poll once when running a bdrv_drained_end()
1645  * operation, specifically at the end when all drained_end()
1646  * operations for all involved nodes have been scheduled.
1647  * Note that this also solves (A) above, thus hiding (B).
1648  */
1649 static void test_blockjob_commit_by_drained_end(void)
1650 {
1651     BlockDriverState *bs_child, *bs_parents[3];
1652     TestDropBackingBlockJob *job;
1653     bool job_has_completed = false;
1654     int i;
1655
1656     bs_child = bdrv_new_open_driver(&bdrv_test, "child-node", BDRV_O_RDWR,
1657                                     &error_abort);
1658
1659     for (i = 0; i < 3; i++) {
1660         char name[32];
1661         snprintf(name, sizeof(name), "parent-node-%i", i);
1662         bs_parents[i] = bdrv_new_open_driver(&bdrv_test, name, BDRV_O_RDWR,
1663                                              &error_abort);
1664         bdrv_set_backing_hd(bs_parents[i], bs_child, &error_abort);
1665     }
1666
1667     job = block_job_create("job", &test_drop_backing_job_driver, NULL,
1668                            bs_parents[2], 0, BLK_PERM_ALL, 0, 0, NULL, NULL,
1669                            &error_abort);
1670
1671     job->detach_also = bs_parents[0];
1672     job->did_complete = &job_has_completed;
1673
1674     job_start(&job->common.job);
1675
1676     job->should_complete = true;
1677     bdrv_drained_begin(bs_child);
1678     g_assert(!job_has_completed);
1679     bdrv_drained_end(bs_child);
1680     g_assert(job_has_completed);
1681
1682     bdrv_unref(bs_parents[0]);
1683     bdrv_unref(bs_parents[1]);
1684     bdrv_unref(bs_parents[2]);
1685     bdrv_unref(bs_child);
1686 }
1687
1688
1689 typedef struct TestSimpleBlockJob {
1690     BlockJob common;
1691     bool should_complete;
1692     bool *did_complete;
1693 } TestSimpleBlockJob;
1694
1695 static int coroutine_fn test_simple_job_run(Job *job, Error **errp)
1696 {
1697     TestSimpleBlockJob *s = container_of(job, TestSimpleBlockJob, common.job);
1698
1699     while (!s->should_complete) {
1700         job_sleep_ns(job, 0);
1701     }
1702
1703     return 0;
1704 }
1705
1706 static void test_simple_job_clean(Job *job)
1707 {
1708     TestSimpleBlockJob *s = container_of(job, TestSimpleBlockJob, common.job);
1709     *s->did_complete = true;
1710 }
1711
1712 static const BlockJobDriver test_simple_job_driver = {
1713     .job_driver = {
1714         .instance_size  = sizeof(TestSimpleBlockJob),
1715         .free           = block_job_free,
1716         .user_resume    = block_job_user_resume,
1717         .run            = test_simple_job_run,
1718         .clean          = test_simple_job_clean,
1719     },
1720 };
1721
1722 static int drop_intermediate_poll_update_filename(BdrvChild *child,
1723                                                   BlockDriverState *new_base,
1724                                                   const char *filename,
1725                                                   Error **errp)
1726 {
1727     /*
1728      * We are free to poll here, which may change the block graph, if
1729      * it is not drained.
1730      */
1731
1732     /* If the job is not drained: Complete it, schedule job_exit() */
1733     aio_poll(qemu_get_current_aio_context(), false);
1734     /* If the job is not drained: Run job_exit(), finish the job */
1735     aio_poll(qemu_get_current_aio_context(), false);
1736
1737     return 0;
1738 }
1739
1740 /**
1741  * Test a poll in the midst of bdrv_drop_intermediate().
1742  *
1743  * bdrv_drop_intermediate() calls BdrvChildClass.update_filename(),
1744  * which can yield or poll.  This may lead to graph changes, unless
1745  * the whole subtree in question is drained.
1746  *
1747  * We test this on the following graph:
1748  *
1749  *                    Job
1750  *
1751  *                     |
1752  *                  job-node
1753  *                     |
1754  *                     v
1755  *
1756  *                  job-node
1757  *
1758  *                     |
1759  *                  backing
1760  *                     |
1761  *                     v
1762  *
1763  * node-2 --chain--> node-1 --chain--> node-0
1764  *
1765  * We drop node-1 with bdrv_drop_intermediate(top=node-1, base=node-0).
1766  *
1767  * This first updates node-2's backing filename by invoking
1768  * drop_intermediate_poll_update_filename(), which polls twice.  This
1769  * causes the job to finish, which in turns causes the job-node to be
1770  * deleted.
1771  *
1772  * bdrv_drop_intermediate() uses a QLIST_FOREACH_SAFE() loop, so it
1773  * already has a pointer to the BdrvChild edge between job-node and
1774  * node-1.  When it tries to handle that edge, we probably get a
1775  * segmentation fault because the object no longer exists.
1776  *
1777  *
1778  * The solution is for bdrv_drop_intermediate() to drain top's
1779  * subtree.  This prevents graph changes from happening just because
1780  * BdrvChildClass.update_filename() yields or polls.  Thus, the block
1781  * job is paused during that drained section and must finish before or
1782  * after.
1783  *
1784  * (In addition, bdrv_replace_child() must keep the job paused.)
1785  */
1786 static void test_drop_intermediate_poll(void)
1787 {
1788     static BdrvChildClass chain_child_class;
1789     BlockDriverState *chain[3];
1790     TestSimpleBlockJob *job;
1791     BlockDriverState *job_node;
1792     bool job_has_completed = false;
1793     int i;
1794     int ret;
1795
1796     chain_child_class = child_backing;
1797     chain_child_class.update_filename = drop_intermediate_poll_update_filename;
1798
1799     for (i = 0; i < 3; i++) {
1800         char name[32];
1801         snprintf(name, 32, "node-%i", i);
1802
1803         chain[i] = bdrv_new_open_driver(&bdrv_test, name, 0, &error_abort);
1804     }
1805
1806     job_node = bdrv_new_open_driver(&bdrv_test, "job-node", BDRV_O_RDWR,
1807                                     &error_abort);
1808     bdrv_set_backing_hd(job_node, chain[1], &error_abort);
1809
1810     /*
1811      * Establish the chain last, so the chain links are the first
1812      * elements in the BDS.parents lists
1813      */
1814     for (i = 0; i < 3; i++) {
1815         if (i) {
1816             /* Takes the reference to chain[i - 1] */
1817             chain[i]->backing = bdrv_attach_child(chain[i], chain[i - 1],
1818                                                   "chain", &chain_child_class,
1819                                                   0, &error_abort);
1820         }
1821     }
1822
1823     job = block_job_create("job", &test_simple_job_driver, NULL, job_node,
1824                            0, BLK_PERM_ALL, 0, 0, NULL, NULL, &error_abort);
1825
1826     /* The job has a reference now */
1827     bdrv_unref(job_node);
1828
1829     job->did_complete = &job_has_completed;
1830
1831     job_start(&job->common.job);
1832     job->should_complete = true;
1833
1834     g_assert(!job_has_completed);
1835     ret = bdrv_drop_intermediate(chain[1], chain[0], NULL);
1836     g_assert(ret == 0);
1837     g_assert(job_has_completed);
1838
1839     bdrv_unref(chain[2]);
1840 }
1841
1842
1843 typedef struct BDRVReplaceTestState {
1844     bool was_drained;
1845     bool was_undrained;
1846     bool has_read;
1847
1848     int drain_count;
1849
1850     bool yield_before_read;
1851     Coroutine *io_co;
1852     Coroutine *drain_co;
1853 } BDRVReplaceTestState;
1854
1855 static void bdrv_replace_test_close(BlockDriverState *bs)
1856 {
1857 }
1858
1859 /**
1860  * If @bs has a backing file:
1861  *   Yield if .yield_before_read is true (and wait for drain_begin to
1862  *   wake us up).
1863  *   Forward the read to bs->backing.  Set .has_read to true.
1864  *   If drain_begin has woken us, wake it in turn.
1865  *
1866  * Otherwise:
1867  *   Set .has_read to true and return success.
1868  */
1869 static int coroutine_fn bdrv_replace_test_co_preadv(BlockDriverState *bs,
1870                                                     uint64_t offset,
1871                                                     uint64_t bytes,
1872                                                     QEMUIOVector *qiov,
1873                                                     int flags)
1874 {
1875     BDRVReplaceTestState *s = bs->opaque;
1876
1877     if (bs->backing) {
1878         int ret;
1879
1880         g_assert(!s->drain_count);
1881
1882         s->io_co = qemu_coroutine_self();
1883         if (s->yield_before_read) {
1884             s->yield_before_read = false;
1885             qemu_coroutine_yield();
1886         }
1887         s->io_co = NULL;
1888
1889         ret = bdrv_preadv(bs->backing, offset, qiov);
1890         s->has_read = true;
1891
1892         /* Wake up drain_co if it runs */
1893         if (s->drain_co) {
1894             aio_co_wake(s->drain_co);
1895         }
1896
1897         return ret;
1898     }
1899
1900     s->has_read = true;
1901     return 0;
1902 }
1903
1904 /**
1905  * If .drain_count is 0, wake up .io_co if there is one; and set
1906  * .was_drained.
1907  * Increment .drain_count.
1908  */
1909 static void coroutine_fn bdrv_replace_test_co_drain_begin(BlockDriverState *bs)
1910 {
1911     BDRVReplaceTestState *s = bs->opaque;
1912
1913     if (!s->drain_count) {
1914         /* Keep waking io_co up until it is done */
1915         s->drain_co = qemu_coroutine_self();
1916         while (s->io_co) {
1917             aio_co_wake(s->io_co);
1918             s->io_co = NULL;
1919             qemu_coroutine_yield();
1920         }
1921         s->drain_co = NULL;
1922
1923         s->was_drained = true;
1924     }
1925     s->drain_count++;
1926 }
1927
1928 /**
1929  * Reduce .drain_count, set .was_undrained once it reaches 0.
1930  * If .drain_count reaches 0 and the node has a backing file, issue a
1931  * read request.
1932  */
1933 static void coroutine_fn bdrv_replace_test_co_drain_end(BlockDriverState *bs)
1934 {
1935     BDRVReplaceTestState *s = bs->opaque;
1936
1937     g_assert(s->drain_count > 0);
1938     if (!--s->drain_count) {
1939         int ret;
1940
1941         s->was_undrained = true;
1942
1943         if (bs->backing) {
1944             char data;
1945             QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, &data, 1);
1946
1947             /* Queue a read request post-drain */
1948             ret = bdrv_replace_test_co_preadv(bs, 0, 1, &qiov, 0);
1949             g_assert(ret >= 0);
1950         }
1951     }
1952 }
1953
1954 static BlockDriver bdrv_replace_test = {
1955     .format_name            = "replace_test",
1956     .instance_size          = sizeof(BDRVReplaceTestState),
1957
1958     .bdrv_close             = bdrv_replace_test_close,
1959     .bdrv_co_preadv         = bdrv_replace_test_co_preadv,
1960
1961     .bdrv_co_drain_begin    = bdrv_replace_test_co_drain_begin,
1962     .bdrv_co_drain_end      = bdrv_replace_test_co_drain_end,
1963
1964     .bdrv_child_perm        = bdrv_format_default_perms,
1965 };
1966
1967 static void coroutine_fn test_replace_child_mid_drain_read_co(void *opaque)
1968 {
1969     int ret;
1970     char data;
1971
1972     ret = blk_co_pread(opaque, 0, 1, &data, 0);
1973     g_assert(ret >= 0);
1974 }
1975
1976 /**
1977  * We test two things:
1978  * (1) bdrv_replace_child_noperm() must not undrain the parent if both
1979  *     children are drained.
1980  * (2) bdrv_replace_child_noperm() must never flush I/O requests to a
1981  *     drained child.  If the old child is drained, it must flush I/O
1982  *     requests after the new one has been attached.  If the new child
1983  *     is drained, it must flush I/O requests before the old one is
1984  *     detached.
1985  *
1986  * To do so, we create one parent node and two child nodes; then
1987  * attach one of the children (old_child_bs) to the parent, then
1988  * drain both old_child_bs and new_child_bs according to
1989  * old_drain_count and new_drain_count, respectively, and finally
1990  * we invoke bdrv_replace_node() to replace old_child_bs by
1991  * new_child_bs.
1992  *
1993  * The test block driver we use here (bdrv_replace_test) has a read
1994  * function that:
1995  * - For the parent node, can optionally yield, and then forwards the
1996  *   read to bdrv_preadv(),
1997  * - For the child node, just returns immediately.
1998  *
1999  * If the read yields, the drain_begin function will wake it up.
2000  *
2001  * The drain_end function issues a read on the parent once it is fully
2002  * undrained (which simulates requests starting to come in again).
2003  */
2004 static void do_test_replace_child_mid_drain(int old_drain_count,
2005                                             int new_drain_count)
2006 {
2007     BlockBackend *parent_blk;
2008     BlockDriverState *parent_bs;
2009     BlockDriverState *old_child_bs, *new_child_bs;
2010     BDRVReplaceTestState *parent_s;
2011     BDRVReplaceTestState *old_child_s, *new_child_s;
2012     Coroutine *io_co;
2013     int i;
2014
2015     parent_bs = bdrv_new_open_driver(&bdrv_replace_test, "parent", 0,
2016                                      &error_abort);
2017     parent_s = parent_bs->opaque;
2018
2019     parent_blk = blk_new(qemu_get_aio_context(),
2020                          BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL);
2021     blk_insert_bs(parent_blk, parent_bs, &error_abort);
2022
2023     old_child_bs = bdrv_new_open_driver(&bdrv_replace_test, "old-child", 0,
2024                                         &error_abort);
2025     new_child_bs = bdrv_new_open_driver(&bdrv_replace_test, "new-child", 0,
2026                                         &error_abort);
2027     old_child_s = old_child_bs->opaque;
2028     new_child_s = new_child_bs->opaque;
2029
2030     /* So that we can read something */
2031     parent_bs->total_sectors = 1;
2032     old_child_bs->total_sectors = 1;
2033     new_child_bs->total_sectors = 1;
2034
2035     bdrv_ref(old_child_bs);
2036     parent_bs->backing = bdrv_attach_child(parent_bs, old_child_bs, "child",
2037                                            &child_backing, 0, &error_abort);
2038
2039     for (i = 0; i < old_drain_count; i++) {
2040         bdrv_drained_begin(old_child_bs);
2041     }
2042     for (i = 0; i < new_drain_count; i++) {
2043         bdrv_drained_begin(new_child_bs);
2044     }
2045
2046     if (!old_drain_count) {
2047         /*
2048          * Start a read operation that will yield, so it will not
2049          * complete before the node is drained.
2050          */
2051         parent_s->yield_before_read = true;
2052         io_co = qemu_coroutine_create(test_replace_child_mid_drain_read_co,
2053                                       parent_blk);
2054         qemu_coroutine_enter(io_co);
2055     }
2056
2057     /* If we have started a read operation, it should have yielded */
2058     g_assert(!parent_s->has_read);
2059
2060     /* Reset drained status so we can see what bdrv_replace_node() does */
2061     parent_s->was_drained = false;
2062     parent_s->was_undrained = false;
2063
2064     g_assert(parent_bs->quiesce_counter == old_drain_count);
2065     bdrv_replace_node(old_child_bs, new_child_bs, &error_abort);
2066     g_assert(parent_bs->quiesce_counter == new_drain_count);
2067
2068     if (!old_drain_count && !new_drain_count) {
2069         /*
2070          * From undrained to undrained drains and undrains the parent,
2071          * because bdrv_replace_node() contains a drained section for
2072          * @old_child_bs.
2073          */
2074         g_assert(parent_s->was_drained && parent_s->was_undrained);
2075     } else if (!old_drain_count && new_drain_count) {
2076         /*
2077          * From undrained to drained should drain the parent and keep
2078          * it that way.
2079          */
2080         g_assert(parent_s->was_drained && !parent_s->was_undrained);
2081     } else if (old_drain_count && !new_drain_count) {
2082         /*
2083          * From drained to undrained should undrain the parent and
2084          * keep it that way.
2085          */
2086         g_assert(!parent_s->was_drained && parent_s->was_undrained);
2087     } else /* if (old_drain_count && new_drain_count) */ {
2088         /*
2089          * From drained to drained must not undrain the parent at any
2090          * point
2091          */
2092         g_assert(!parent_s->was_drained && !parent_s->was_undrained);
2093     }
2094
2095     if (!old_drain_count || !new_drain_count) {
2096         /*
2097          * If !old_drain_count, we have started a read request before
2098          * bdrv_replace_node().  If !new_drain_count, the parent must
2099          * have been undrained at some point, and
2100          * bdrv_replace_test_co_drain_end() starts a read request
2101          * then.
2102          */
2103         g_assert(parent_s->has_read);
2104     } else {
2105         /*
2106          * If the parent was never undrained, there is no way to start
2107          * a read request.
2108          */
2109         g_assert(!parent_s->has_read);
2110     }
2111
2112     /* A drained child must have not received any request */
2113     g_assert(!(old_drain_count && old_child_s->has_read));
2114     g_assert(!(new_drain_count && new_child_s->has_read));
2115
2116     for (i = 0; i < new_drain_count; i++) {
2117         bdrv_drained_end(new_child_bs);
2118     }
2119     for (i = 0; i < old_drain_count; i++) {
2120         bdrv_drained_end(old_child_bs);
2121     }
2122
2123     /*
2124      * By now, bdrv_replace_test_co_drain_end() must have been called
2125      * at some point while the new child was attached to the parent.
2126      */
2127     g_assert(parent_s->has_read);
2128     g_assert(new_child_s->has_read);
2129
2130     blk_unref(parent_blk);
2131     bdrv_unref(parent_bs);
2132     bdrv_unref(old_child_bs);
2133     bdrv_unref(new_child_bs);
2134 }
2135
2136 static void test_replace_child_mid_drain(void)
2137 {
2138     int old_drain_count, new_drain_count;
2139
2140     for (old_drain_count = 0; old_drain_count < 2; old_drain_count++) {
2141         for (new_drain_count = 0; new_drain_count < 2; new_drain_count++) {
2142             do_test_replace_child_mid_drain(old_drain_count, new_drain_count);
2143         }
2144     }
2145 }
2146
2147 int main(int argc, char **argv)
2148 {
2149     int ret;
2150
2151     bdrv_init();
2152     qemu_init_main_loop(&error_abort);
2153
2154     g_test_init(&argc, &argv, NULL);
2155     qemu_event_init(&done_event, false);
2156
2157     g_test_add_func("/bdrv-drain/driver-cb/drain_all", test_drv_cb_drain_all);
2158     g_test_add_func("/bdrv-drain/driver-cb/drain", test_drv_cb_drain);
2159     g_test_add_func("/bdrv-drain/driver-cb/drain_subtree",
2160                     test_drv_cb_drain_subtree);
2161
2162     g_test_add_func("/bdrv-drain/driver-cb/co/drain_all",
2163                     test_drv_cb_co_drain_all);
2164     g_test_add_func("/bdrv-drain/driver-cb/co/drain", test_drv_cb_co_drain);
2165     g_test_add_func("/bdrv-drain/driver-cb/co/drain_subtree",
2166                     test_drv_cb_co_drain_subtree);
2167
2168
2169     g_test_add_func("/bdrv-drain/quiesce/drain_all", test_quiesce_drain_all);
2170     g_test_add_func("/bdrv-drain/quiesce/drain", test_quiesce_drain);
2171     g_test_add_func("/bdrv-drain/quiesce/drain_subtree",
2172                     test_quiesce_drain_subtree);
2173
2174     g_test_add_func("/bdrv-drain/quiesce/co/drain_all",
2175                     test_quiesce_co_drain_all);
2176     g_test_add_func("/bdrv-drain/quiesce/co/drain", test_quiesce_co_drain);
2177     g_test_add_func("/bdrv-drain/quiesce/co/drain_subtree",
2178                     test_quiesce_co_drain_subtree);
2179
2180     g_test_add_func("/bdrv-drain/nested", test_nested);
2181     g_test_add_func("/bdrv-drain/multiparent", test_multiparent);
2182
2183     g_test_add_func("/bdrv-drain/graph-change/drain_subtree",
2184                     test_graph_change_drain_subtree);
2185     g_test_add_func("/bdrv-drain/graph-change/drain_all",
2186                     test_graph_change_drain_all);
2187
2188     g_test_add_func("/bdrv-drain/iothread/drain_all", test_iothread_drain_all);
2189     g_test_add_func("/bdrv-drain/iothread/drain", test_iothread_drain);
2190     g_test_add_func("/bdrv-drain/iothread/drain_subtree",
2191                     test_iothread_drain_subtree);
2192
2193     g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all);
2194     g_test_add_func("/bdrv-drain/blockjob/drain", test_blockjob_drain);
2195     g_test_add_func("/bdrv-drain/blockjob/drain_subtree",
2196                     test_blockjob_drain_subtree);
2197
2198     g_test_add_func("/bdrv-drain/blockjob/error/drain_all",
2199                     test_blockjob_error_drain_all);
2200     g_test_add_func("/bdrv-drain/blockjob/error/drain",
2201                     test_blockjob_error_drain);
2202     g_test_add_func("/bdrv-drain/blockjob/error/drain_subtree",
2203                     test_blockjob_error_drain_subtree);
2204
2205     g_test_add_func("/bdrv-drain/blockjob/iothread/drain_all",
2206                     test_blockjob_iothread_drain_all);
2207     g_test_add_func("/bdrv-drain/blockjob/iothread/drain",
2208                     test_blockjob_iothread_drain);
2209     g_test_add_func("/bdrv-drain/blockjob/iothread/drain_subtree",
2210                     test_blockjob_iothread_drain_subtree);
2211
2212     g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain_all",
2213                     test_blockjob_iothread_error_drain_all);
2214     g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain",
2215                     test_blockjob_iothread_error_drain);
2216     g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain_subtree",
2217                     test_blockjob_iothread_error_drain_subtree);
2218
2219     g_test_add_func("/bdrv-drain/deletion/drain", test_delete_by_drain);
2220     g_test_add_func("/bdrv-drain/detach/drain_all", test_detach_by_drain_all);
2221     g_test_add_func("/bdrv-drain/detach/drain", test_detach_by_drain);
2222     g_test_add_func("/bdrv-drain/detach/drain_subtree", test_detach_by_drain_subtree);
2223     g_test_add_func("/bdrv-drain/detach/parent_cb", test_detach_by_parent_cb);
2224     g_test_add_func("/bdrv-drain/detach/driver_cb", test_detach_by_driver_cb);
2225
2226     g_test_add_func("/bdrv-drain/attach/drain", test_append_to_drained);
2227
2228     g_test_add_func("/bdrv-drain/set_aio_context", test_set_aio_context);
2229
2230     g_test_add_func("/bdrv-drain/blockjob/commit_by_drained_end",
2231                     test_blockjob_commit_by_drained_end);
2232
2233     g_test_add_func("/bdrv-drain/bdrv_drop_intermediate/poll",
2234                     test_drop_intermediate_poll);
2235
2236     g_test_add_func("/bdrv-drain/replace_child/mid-drain",
2237                     test_replace_child_mid_drain);
2238
2239     ret = g_test_run();
2240     qemu_event_destroy(&done_event);
2241     return ret;
2242 }
This page took 0.149945 seconds and 4 git commands to generate.