]> Git Repo - qemu.git/blob - tests/test-bdrv-drain.c
Merge remote-tracking branch 'remotes/stefanha/tags/block-pull-request' into staging
[qemu.git] / tests / test-bdrv-drain.c
1 /*
2  * Block node draining tests
3  *
4  * Copyright (c) 2017 Kevin Wolf <[email protected]>
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24
25 #include "qemu/osdep.h"
26 #include "block/block.h"
27 #include "block/blockjob_int.h"
28 #include "sysemu/block-backend.h"
29 #include "qapi/error.h"
30 #include "qemu/main-loop.h"
31 #include "iothread.h"
32
33 static QemuEvent done_event;
34
35 typedef struct BDRVTestState {
36     int drain_count;
37     AioContext *bh_indirection_ctx;
38     bool sleep_in_drain_begin;
39 } BDRVTestState;
40
41 static void coroutine_fn bdrv_test_co_drain_begin(BlockDriverState *bs)
42 {
43     BDRVTestState *s = bs->opaque;
44     s->drain_count++;
45     if (s->sleep_in_drain_begin) {
46         qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 100000);
47     }
48 }
49
50 static void coroutine_fn bdrv_test_co_drain_end(BlockDriverState *bs)
51 {
52     BDRVTestState *s = bs->opaque;
53     s->drain_count--;
54 }
55
56 static void bdrv_test_close(BlockDriverState *bs)
57 {
58     BDRVTestState *s = bs->opaque;
59     g_assert_cmpint(s->drain_count, >, 0);
60 }
61
62 static void co_reenter_bh(void *opaque)
63 {
64     aio_co_wake(opaque);
65 }
66
67 static int coroutine_fn bdrv_test_co_preadv(BlockDriverState *bs,
68                                             uint64_t offset, uint64_t bytes,
69                                             QEMUIOVector *qiov, int flags)
70 {
71     BDRVTestState *s = bs->opaque;
72
73     /* We want this request to stay until the polling loop in drain waits for
74      * it to complete. We need to sleep a while as bdrv_drain_invoke() comes
75      * first and polls its result, too, but it shouldn't accidentally complete
76      * this request yet. */
77     qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 100000);
78
79     if (s->bh_indirection_ctx) {
80         aio_bh_schedule_oneshot(s->bh_indirection_ctx, co_reenter_bh,
81                                 qemu_coroutine_self());
82         qemu_coroutine_yield();
83     }
84
85     return 0;
86 }
87
88 static int bdrv_test_change_backing_file(BlockDriverState *bs,
89                                          const char *backing_file,
90                                          const char *backing_fmt)
91 {
92     return 0;
93 }
94
95 static BlockDriver bdrv_test = {
96     .format_name            = "test",
97     .instance_size          = sizeof(BDRVTestState),
98
99     .bdrv_close             = bdrv_test_close,
100     .bdrv_co_preadv         = bdrv_test_co_preadv,
101
102     .bdrv_co_drain_begin    = bdrv_test_co_drain_begin,
103     .bdrv_co_drain_end      = bdrv_test_co_drain_end,
104
105     .bdrv_child_perm        = bdrv_default_perms,
106
107     .bdrv_change_backing_file = bdrv_test_change_backing_file,
108 };
109
110 static void aio_ret_cb(void *opaque, int ret)
111 {
112     int *aio_ret = opaque;
113     *aio_ret = ret;
114 }
115
116 typedef struct CallInCoroutineData {
117     void (*entry)(void);
118     bool done;
119 } CallInCoroutineData;
120
121 static coroutine_fn void call_in_coroutine_entry(void *opaque)
122 {
123     CallInCoroutineData *data = opaque;
124
125     data->entry();
126     data->done = true;
127 }
128
129 static void call_in_coroutine(void (*entry)(void))
130 {
131     Coroutine *co;
132     CallInCoroutineData data = {
133         .entry  = entry,
134         .done   = false,
135     };
136
137     co = qemu_coroutine_create(call_in_coroutine_entry, &data);
138     qemu_coroutine_enter(co);
139     while (!data.done) {
140         aio_poll(qemu_get_aio_context(), true);
141     }
142 }
143
144 enum drain_type {
145     BDRV_DRAIN_ALL,
146     BDRV_DRAIN,
147     BDRV_SUBTREE_DRAIN,
148     DRAIN_TYPE_MAX,
149 };
150
151 static void do_drain_begin(enum drain_type drain_type, BlockDriverState *bs)
152 {
153     switch (drain_type) {
154     case BDRV_DRAIN_ALL:        bdrv_drain_all_begin(); break;
155     case BDRV_DRAIN:            bdrv_drained_begin(bs); break;
156     case BDRV_SUBTREE_DRAIN:    bdrv_subtree_drained_begin(bs); break;
157     default:                    g_assert_not_reached();
158     }
159 }
160
161 static void do_drain_end(enum drain_type drain_type, BlockDriverState *bs)
162 {
163     switch (drain_type) {
164     case BDRV_DRAIN_ALL:        bdrv_drain_all_end(); break;
165     case BDRV_DRAIN:            bdrv_drained_end(bs); break;
166     case BDRV_SUBTREE_DRAIN:    bdrv_subtree_drained_end(bs); break;
167     default:                    g_assert_not_reached();
168     }
169 }
170
171 static void do_drain_begin_unlocked(enum drain_type drain_type, BlockDriverState *bs)
172 {
173     if (drain_type != BDRV_DRAIN_ALL) {
174         aio_context_acquire(bdrv_get_aio_context(bs));
175     }
176     do_drain_begin(drain_type, bs);
177     if (drain_type != BDRV_DRAIN_ALL) {
178         aio_context_release(bdrv_get_aio_context(bs));
179     }
180 }
181
182 static void do_drain_end_unlocked(enum drain_type drain_type, BlockDriverState *bs)
183 {
184     if (drain_type != BDRV_DRAIN_ALL) {
185         aio_context_acquire(bdrv_get_aio_context(bs));
186     }
187     do_drain_end(drain_type, bs);
188     if (drain_type != BDRV_DRAIN_ALL) {
189         aio_context_release(bdrv_get_aio_context(bs));
190     }
191 }
192
193 static void test_drv_cb_common(enum drain_type drain_type, bool recursive)
194 {
195     BlockBackend *blk;
196     BlockDriverState *bs, *backing;
197     BDRVTestState *s, *backing_s;
198     BlockAIOCB *acb;
199     int aio_ret;
200
201     QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, NULL, 0);
202
203     blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
204     bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR,
205                               &error_abort);
206     s = bs->opaque;
207     blk_insert_bs(blk, bs, &error_abort);
208
209     backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort);
210     backing_s = backing->opaque;
211     bdrv_set_backing_hd(bs, backing, &error_abort);
212
213     /* Simple bdrv_drain_all_begin/end pair, check that CBs are called */
214     g_assert_cmpint(s->drain_count, ==, 0);
215     g_assert_cmpint(backing_s->drain_count, ==, 0);
216
217     do_drain_begin(drain_type, bs);
218
219     g_assert_cmpint(s->drain_count, ==, 1);
220     g_assert_cmpint(backing_s->drain_count, ==, !!recursive);
221
222     do_drain_end(drain_type, bs);
223
224     g_assert_cmpint(s->drain_count, ==, 0);
225     g_assert_cmpint(backing_s->drain_count, ==, 0);
226
227     /* Now do the same while a request is pending */
228     aio_ret = -EINPROGRESS;
229     acb = blk_aio_preadv(blk, 0, &qiov, 0, aio_ret_cb, &aio_ret);
230     g_assert(acb != NULL);
231     g_assert_cmpint(aio_ret, ==, -EINPROGRESS);
232
233     g_assert_cmpint(s->drain_count, ==, 0);
234     g_assert_cmpint(backing_s->drain_count, ==, 0);
235
236     do_drain_begin(drain_type, bs);
237
238     g_assert_cmpint(aio_ret, ==, 0);
239     g_assert_cmpint(s->drain_count, ==, 1);
240     g_assert_cmpint(backing_s->drain_count, ==, !!recursive);
241
242     do_drain_end(drain_type, bs);
243
244     g_assert_cmpint(s->drain_count, ==, 0);
245     g_assert_cmpint(backing_s->drain_count, ==, 0);
246
247     bdrv_unref(backing);
248     bdrv_unref(bs);
249     blk_unref(blk);
250 }
251
252 static void test_drv_cb_drain_all(void)
253 {
254     test_drv_cb_common(BDRV_DRAIN_ALL, true);
255 }
256
257 static void test_drv_cb_drain(void)
258 {
259     test_drv_cb_common(BDRV_DRAIN, false);
260 }
261
262 static void test_drv_cb_drain_subtree(void)
263 {
264     test_drv_cb_common(BDRV_SUBTREE_DRAIN, true);
265 }
266
267 static void test_drv_cb_co_drain_all(void)
268 {
269     call_in_coroutine(test_drv_cb_drain_all);
270 }
271
272 static void test_drv_cb_co_drain(void)
273 {
274     call_in_coroutine(test_drv_cb_drain);
275 }
276
277 static void test_drv_cb_co_drain_subtree(void)
278 {
279     call_in_coroutine(test_drv_cb_drain_subtree);
280 }
281
282 static void test_quiesce_common(enum drain_type drain_type, bool recursive)
283 {
284     BlockBackend *blk;
285     BlockDriverState *bs, *backing;
286
287     blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
288     bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR,
289                               &error_abort);
290     blk_insert_bs(blk, bs, &error_abort);
291
292     backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort);
293     bdrv_set_backing_hd(bs, backing, &error_abort);
294
295     g_assert_cmpint(bs->quiesce_counter, ==, 0);
296     g_assert_cmpint(backing->quiesce_counter, ==, 0);
297
298     do_drain_begin(drain_type, bs);
299
300     g_assert_cmpint(bs->quiesce_counter, ==, 1);
301     g_assert_cmpint(backing->quiesce_counter, ==, !!recursive);
302
303     do_drain_end(drain_type, bs);
304
305     g_assert_cmpint(bs->quiesce_counter, ==, 0);
306     g_assert_cmpint(backing->quiesce_counter, ==, 0);
307
308     bdrv_unref(backing);
309     bdrv_unref(bs);
310     blk_unref(blk);
311 }
312
313 static void test_quiesce_drain_all(void)
314 {
315     test_quiesce_common(BDRV_DRAIN_ALL, true);
316 }
317
318 static void test_quiesce_drain(void)
319 {
320     test_quiesce_common(BDRV_DRAIN, false);
321 }
322
323 static void test_quiesce_drain_subtree(void)
324 {
325     test_quiesce_common(BDRV_SUBTREE_DRAIN, true);
326 }
327
328 static void test_quiesce_co_drain_all(void)
329 {
330     call_in_coroutine(test_quiesce_drain_all);
331 }
332
333 static void test_quiesce_co_drain(void)
334 {
335     call_in_coroutine(test_quiesce_drain);
336 }
337
338 static void test_quiesce_co_drain_subtree(void)
339 {
340     call_in_coroutine(test_quiesce_drain_subtree);
341 }
342
343 static void test_nested(void)
344 {
345     BlockBackend *blk;
346     BlockDriverState *bs, *backing;
347     BDRVTestState *s, *backing_s;
348     enum drain_type outer, inner;
349
350     blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
351     bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR,
352                               &error_abort);
353     s = bs->opaque;
354     blk_insert_bs(blk, bs, &error_abort);
355
356     backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort);
357     backing_s = backing->opaque;
358     bdrv_set_backing_hd(bs, backing, &error_abort);
359
360     for (outer = 0; outer < DRAIN_TYPE_MAX; outer++) {
361         for (inner = 0; inner < DRAIN_TYPE_MAX; inner++) {
362             int backing_quiesce = (outer != BDRV_DRAIN) +
363                                   (inner != BDRV_DRAIN);
364
365             g_assert_cmpint(bs->quiesce_counter, ==, 0);
366             g_assert_cmpint(backing->quiesce_counter, ==, 0);
367             g_assert_cmpint(s->drain_count, ==, 0);
368             g_assert_cmpint(backing_s->drain_count, ==, 0);
369
370             do_drain_begin(outer, bs);
371             do_drain_begin(inner, bs);
372
373             g_assert_cmpint(bs->quiesce_counter, ==, 2);
374             g_assert_cmpint(backing->quiesce_counter, ==, backing_quiesce);
375             g_assert_cmpint(s->drain_count, ==, 2);
376             g_assert_cmpint(backing_s->drain_count, ==, backing_quiesce);
377
378             do_drain_end(inner, bs);
379             do_drain_end(outer, bs);
380
381             g_assert_cmpint(bs->quiesce_counter, ==, 0);
382             g_assert_cmpint(backing->quiesce_counter, ==, 0);
383             g_assert_cmpint(s->drain_count, ==, 0);
384             g_assert_cmpint(backing_s->drain_count, ==, 0);
385         }
386     }
387
388     bdrv_unref(backing);
389     bdrv_unref(bs);
390     blk_unref(blk);
391 }
392
393 static void test_multiparent(void)
394 {
395     BlockBackend *blk_a, *blk_b;
396     BlockDriverState *bs_a, *bs_b, *backing;
397     BDRVTestState *a_s, *b_s, *backing_s;
398
399     blk_a = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
400     bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR,
401                                 &error_abort);
402     a_s = bs_a->opaque;
403     blk_insert_bs(blk_a, bs_a, &error_abort);
404
405     blk_b = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
406     bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR,
407                                 &error_abort);
408     b_s = bs_b->opaque;
409     blk_insert_bs(blk_b, bs_b, &error_abort);
410
411     backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort);
412     backing_s = backing->opaque;
413     bdrv_set_backing_hd(bs_a, backing, &error_abort);
414     bdrv_set_backing_hd(bs_b, backing, &error_abort);
415
416     g_assert_cmpint(bs_a->quiesce_counter, ==, 0);
417     g_assert_cmpint(bs_b->quiesce_counter, ==, 0);
418     g_assert_cmpint(backing->quiesce_counter, ==, 0);
419     g_assert_cmpint(a_s->drain_count, ==, 0);
420     g_assert_cmpint(b_s->drain_count, ==, 0);
421     g_assert_cmpint(backing_s->drain_count, ==, 0);
422
423     do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a);
424
425     g_assert_cmpint(bs_a->quiesce_counter, ==, 1);
426     g_assert_cmpint(bs_b->quiesce_counter, ==, 1);
427     g_assert_cmpint(backing->quiesce_counter, ==, 1);
428     g_assert_cmpint(a_s->drain_count, ==, 1);
429     g_assert_cmpint(b_s->drain_count, ==, 1);
430     g_assert_cmpint(backing_s->drain_count, ==, 1);
431
432     do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b);
433
434     g_assert_cmpint(bs_a->quiesce_counter, ==, 2);
435     g_assert_cmpint(bs_b->quiesce_counter, ==, 2);
436     g_assert_cmpint(backing->quiesce_counter, ==, 2);
437     g_assert_cmpint(a_s->drain_count, ==, 2);
438     g_assert_cmpint(b_s->drain_count, ==, 2);
439     g_assert_cmpint(backing_s->drain_count, ==, 2);
440
441     do_drain_end(BDRV_SUBTREE_DRAIN, bs_b);
442
443     g_assert_cmpint(bs_a->quiesce_counter, ==, 1);
444     g_assert_cmpint(bs_b->quiesce_counter, ==, 1);
445     g_assert_cmpint(backing->quiesce_counter, ==, 1);
446     g_assert_cmpint(a_s->drain_count, ==, 1);
447     g_assert_cmpint(b_s->drain_count, ==, 1);
448     g_assert_cmpint(backing_s->drain_count, ==, 1);
449
450     do_drain_end(BDRV_SUBTREE_DRAIN, bs_a);
451
452     g_assert_cmpint(bs_a->quiesce_counter, ==, 0);
453     g_assert_cmpint(bs_b->quiesce_counter, ==, 0);
454     g_assert_cmpint(backing->quiesce_counter, ==, 0);
455     g_assert_cmpint(a_s->drain_count, ==, 0);
456     g_assert_cmpint(b_s->drain_count, ==, 0);
457     g_assert_cmpint(backing_s->drain_count, ==, 0);
458
459     bdrv_unref(backing);
460     bdrv_unref(bs_a);
461     bdrv_unref(bs_b);
462     blk_unref(blk_a);
463     blk_unref(blk_b);
464 }
465
466 static void test_graph_change_drain_subtree(void)
467 {
468     BlockBackend *blk_a, *blk_b;
469     BlockDriverState *bs_a, *bs_b, *backing;
470     BDRVTestState *a_s, *b_s, *backing_s;
471
472     blk_a = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
473     bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR,
474                                 &error_abort);
475     a_s = bs_a->opaque;
476     blk_insert_bs(blk_a, bs_a, &error_abort);
477
478     blk_b = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
479     bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR,
480                                 &error_abort);
481     b_s = bs_b->opaque;
482     blk_insert_bs(blk_b, bs_b, &error_abort);
483
484     backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort);
485     backing_s = backing->opaque;
486     bdrv_set_backing_hd(bs_a, backing, &error_abort);
487
488     g_assert_cmpint(bs_a->quiesce_counter, ==, 0);
489     g_assert_cmpint(bs_b->quiesce_counter, ==, 0);
490     g_assert_cmpint(backing->quiesce_counter, ==, 0);
491     g_assert_cmpint(a_s->drain_count, ==, 0);
492     g_assert_cmpint(b_s->drain_count, ==, 0);
493     g_assert_cmpint(backing_s->drain_count, ==, 0);
494
495     do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a);
496     do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a);
497     do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a);
498     do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b);
499     do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b);
500
501     bdrv_set_backing_hd(bs_b, backing, &error_abort);
502     g_assert_cmpint(bs_a->quiesce_counter, ==, 5);
503     g_assert_cmpint(bs_b->quiesce_counter, ==, 5);
504     g_assert_cmpint(backing->quiesce_counter, ==, 5);
505     g_assert_cmpint(a_s->drain_count, ==, 5);
506     g_assert_cmpint(b_s->drain_count, ==, 5);
507     g_assert_cmpint(backing_s->drain_count, ==, 5);
508
509     bdrv_set_backing_hd(bs_b, NULL, &error_abort);
510     g_assert_cmpint(bs_a->quiesce_counter, ==, 3);
511     g_assert_cmpint(bs_b->quiesce_counter, ==, 2);
512     g_assert_cmpint(backing->quiesce_counter, ==, 3);
513     g_assert_cmpint(a_s->drain_count, ==, 3);
514     g_assert_cmpint(b_s->drain_count, ==, 2);
515     g_assert_cmpint(backing_s->drain_count, ==, 3);
516
517     bdrv_set_backing_hd(bs_b, backing, &error_abort);
518     g_assert_cmpint(bs_a->quiesce_counter, ==, 5);
519     g_assert_cmpint(bs_b->quiesce_counter, ==, 5);
520     g_assert_cmpint(backing->quiesce_counter, ==, 5);
521     g_assert_cmpint(a_s->drain_count, ==, 5);
522     g_assert_cmpint(b_s->drain_count, ==, 5);
523     g_assert_cmpint(backing_s->drain_count, ==, 5);
524
525     do_drain_end(BDRV_SUBTREE_DRAIN, bs_b);
526     do_drain_end(BDRV_SUBTREE_DRAIN, bs_b);
527     do_drain_end(BDRV_SUBTREE_DRAIN, bs_a);
528     do_drain_end(BDRV_SUBTREE_DRAIN, bs_a);
529     do_drain_end(BDRV_SUBTREE_DRAIN, bs_a);
530
531     g_assert_cmpint(bs_a->quiesce_counter, ==, 0);
532     g_assert_cmpint(bs_b->quiesce_counter, ==, 0);
533     g_assert_cmpint(backing->quiesce_counter, ==, 0);
534     g_assert_cmpint(a_s->drain_count, ==, 0);
535     g_assert_cmpint(b_s->drain_count, ==, 0);
536     g_assert_cmpint(backing_s->drain_count, ==, 0);
537
538     bdrv_unref(backing);
539     bdrv_unref(bs_a);
540     bdrv_unref(bs_b);
541     blk_unref(blk_a);
542     blk_unref(blk_b);
543 }
544
545 static void test_graph_change_drain_all(void)
546 {
547     BlockBackend *blk_a, *blk_b;
548     BlockDriverState *bs_a, *bs_b;
549     BDRVTestState *a_s, *b_s;
550
551     /* Create node A with a BlockBackend */
552     blk_a = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
553     bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR,
554                                 &error_abort);
555     a_s = bs_a->opaque;
556     blk_insert_bs(blk_a, bs_a, &error_abort);
557
558     g_assert_cmpint(bs_a->quiesce_counter, ==, 0);
559     g_assert_cmpint(a_s->drain_count, ==, 0);
560
561     /* Call bdrv_drain_all_begin() */
562     bdrv_drain_all_begin();
563
564     g_assert_cmpint(bs_a->quiesce_counter, ==, 1);
565     g_assert_cmpint(a_s->drain_count, ==, 1);
566
567     /* Create node B with a BlockBackend */
568     blk_b = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
569     bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR,
570                                 &error_abort);
571     b_s = bs_b->opaque;
572     blk_insert_bs(blk_b, bs_b, &error_abort);
573
574     g_assert_cmpint(bs_a->quiesce_counter, ==, 1);
575     g_assert_cmpint(bs_b->quiesce_counter, ==, 1);
576     g_assert_cmpint(a_s->drain_count, ==, 1);
577     g_assert_cmpint(b_s->drain_count, ==, 1);
578
579     /* Unref and finally delete node A */
580     blk_unref(blk_a);
581
582     g_assert_cmpint(bs_a->quiesce_counter, ==, 1);
583     g_assert_cmpint(bs_b->quiesce_counter, ==, 1);
584     g_assert_cmpint(a_s->drain_count, ==, 1);
585     g_assert_cmpint(b_s->drain_count, ==, 1);
586
587     bdrv_unref(bs_a);
588
589     g_assert_cmpint(bs_b->quiesce_counter, ==, 1);
590     g_assert_cmpint(b_s->drain_count, ==, 1);
591
592     /* End the drained section */
593     bdrv_drain_all_end();
594
595     g_assert_cmpint(bs_b->quiesce_counter, ==, 0);
596     g_assert_cmpint(b_s->drain_count, ==, 0);
597
598     bdrv_unref(bs_b);
599     blk_unref(blk_b);
600 }
601
602 struct test_iothread_data {
603     BlockDriverState *bs;
604     enum drain_type drain_type;
605     int *aio_ret;
606 };
607
608 static void test_iothread_drain_entry(void *opaque)
609 {
610     struct test_iothread_data *data = opaque;
611
612     aio_context_acquire(bdrv_get_aio_context(data->bs));
613     do_drain_begin(data->drain_type, data->bs);
614     g_assert_cmpint(*data->aio_ret, ==, 0);
615     do_drain_end(data->drain_type, data->bs);
616     aio_context_release(bdrv_get_aio_context(data->bs));
617
618     qemu_event_set(&done_event);
619 }
620
621 static void test_iothread_aio_cb(void *opaque, int ret)
622 {
623     int *aio_ret = opaque;
624     *aio_ret = ret;
625     qemu_event_set(&done_event);
626 }
627
628 static void test_iothread_main_thread_bh(void *opaque)
629 {
630     struct test_iothread_data *data = opaque;
631
632     /* Test that the AioContext is not yet locked in a random BH that is
633      * executed during drain, otherwise this would deadlock. */
634     aio_context_acquire(bdrv_get_aio_context(data->bs));
635     bdrv_flush(data->bs);
636     aio_context_release(bdrv_get_aio_context(data->bs));
637 }
638
639 /*
640  * Starts an AIO request on a BDS that runs in the AioContext of iothread 1.
641  * The request involves a BH on iothread 2 before it can complete.
642  *
643  * @drain_thread = 0 means that do_drain_begin/end are called from the main
644  * thread, @drain_thread = 1 means that they are called from iothread 1. Drain
645  * for this BDS cannot be called from iothread 2 because only the main thread
646  * may do cross-AioContext polling.
647  */
648 static void test_iothread_common(enum drain_type drain_type, int drain_thread)
649 {
650     BlockBackend *blk;
651     BlockDriverState *bs;
652     BDRVTestState *s;
653     BlockAIOCB *acb;
654     int aio_ret;
655     struct test_iothread_data data;
656
657     IOThread *a = iothread_new();
658     IOThread *b = iothread_new();
659     AioContext *ctx_a = iothread_get_aio_context(a);
660     AioContext *ctx_b = iothread_get_aio_context(b);
661
662     QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, NULL, 0);
663
664     /* bdrv_drain_all() may only be called from the main loop thread */
665     if (drain_type == BDRV_DRAIN_ALL && drain_thread != 0) {
666         goto out;
667     }
668
669     blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
670     bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR,
671                               &error_abort);
672     s = bs->opaque;
673     blk_insert_bs(blk, bs, &error_abort);
674     blk_set_disable_request_queuing(blk, true);
675
676     blk_set_aio_context(blk, ctx_a, &error_abort);
677     aio_context_acquire(ctx_a);
678
679     s->bh_indirection_ctx = ctx_b;
680
681     aio_ret = -EINPROGRESS;
682     qemu_event_reset(&done_event);
683
684     if (drain_thread == 0) {
685         acb = blk_aio_preadv(blk, 0, &qiov, 0, test_iothread_aio_cb, &aio_ret);
686     } else {
687         acb = blk_aio_preadv(blk, 0, &qiov, 0, aio_ret_cb, &aio_ret);
688     }
689     g_assert(acb != NULL);
690     g_assert_cmpint(aio_ret, ==, -EINPROGRESS);
691
692     aio_context_release(ctx_a);
693
694     data = (struct test_iothread_data) {
695         .bs         = bs,
696         .drain_type = drain_type,
697         .aio_ret    = &aio_ret,
698     };
699
700     switch (drain_thread) {
701     case 0:
702         if (drain_type != BDRV_DRAIN_ALL) {
703             aio_context_acquire(ctx_a);
704         }
705
706         aio_bh_schedule_oneshot(ctx_a, test_iothread_main_thread_bh, &data);
707
708         /* The request is running on the IOThread a. Draining its block device
709          * will make sure that it has completed as far as the BDS is concerned,
710          * but the drain in this thread can continue immediately after
711          * bdrv_dec_in_flight() and aio_ret might be assigned only slightly
712          * later. */
713         do_drain_begin(drain_type, bs);
714         g_assert_cmpint(bs->in_flight, ==, 0);
715
716         if (drain_type != BDRV_DRAIN_ALL) {
717             aio_context_release(ctx_a);
718         }
719         qemu_event_wait(&done_event);
720         if (drain_type != BDRV_DRAIN_ALL) {
721             aio_context_acquire(ctx_a);
722         }
723
724         g_assert_cmpint(aio_ret, ==, 0);
725         do_drain_end(drain_type, bs);
726
727         if (drain_type != BDRV_DRAIN_ALL) {
728             aio_context_release(ctx_a);
729         }
730         break;
731     case 1:
732         aio_bh_schedule_oneshot(ctx_a, test_iothread_drain_entry, &data);
733         qemu_event_wait(&done_event);
734         break;
735     default:
736         g_assert_not_reached();
737     }
738
739     aio_context_acquire(ctx_a);
740     blk_set_aio_context(blk, qemu_get_aio_context(), &error_abort);
741     aio_context_release(ctx_a);
742
743     bdrv_unref(bs);
744     blk_unref(blk);
745
746 out:
747     iothread_join(a);
748     iothread_join(b);
749 }
750
751 static void test_iothread_drain_all(void)
752 {
753     test_iothread_common(BDRV_DRAIN_ALL, 0);
754     test_iothread_common(BDRV_DRAIN_ALL, 1);
755 }
756
757 static void test_iothread_drain(void)
758 {
759     test_iothread_common(BDRV_DRAIN, 0);
760     test_iothread_common(BDRV_DRAIN, 1);
761 }
762
763 static void test_iothread_drain_subtree(void)
764 {
765     test_iothread_common(BDRV_SUBTREE_DRAIN, 0);
766     test_iothread_common(BDRV_SUBTREE_DRAIN, 1);
767 }
768
769
770 typedef struct TestBlockJob {
771     BlockJob common;
772     int run_ret;
773     int prepare_ret;
774     bool running;
775     bool should_complete;
776 } TestBlockJob;
777
778 static int test_job_prepare(Job *job)
779 {
780     TestBlockJob *s = container_of(job, TestBlockJob, common.job);
781
782     /* Provoke an AIO_WAIT_WHILE() call to verify there is no deadlock */
783     blk_flush(s->common.blk);
784     return s->prepare_ret;
785 }
786
787 static void test_job_commit(Job *job)
788 {
789     TestBlockJob *s = container_of(job, TestBlockJob, common.job);
790
791     /* Provoke an AIO_WAIT_WHILE() call to verify there is no deadlock */
792     blk_flush(s->common.blk);
793 }
794
795 static void test_job_abort(Job *job)
796 {
797     TestBlockJob *s = container_of(job, TestBlockJob, common.job);
798
799     /* Provoke an AIO_WAIT_WHILE() call to verify there is no deadlock */
800     blk_flush(s->common.blk);
801 }
802
803 static int coroutine_fn test_job_run(Job *job, Error **errp)
804 {
805     TestBlockJob *s = container_of(job, TestBlockJob, common.job);
806
807     /* We are running the actual job code past the pause point in
808      * job_co_entry(). */
809     s->running = true;
810
811     job_transition_to_ready(&s->common.job);
812     while (!s->should_complete) {
813         /* Avoid job_sleep_ns() because it marks the job as !busy. We want to
814          * emulate some actual activity (probably some I/O) here so that drain
815          * has to wait for this activity to stop. */
816         qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 1000000);
817
818         job_pause_point(&s->common.job);
819     }
820
821     return s->run_ret;
822 }
823
824 static void test_job_complete(Job *job, Error **errp)
825 {
826     TestBlockJob *s = container_of(job, TestBlockJob, common.job);
827     s->should_complete = true;
828 }
829
830 BlockJobDriver test_job_driver = {
831     .job_driver = {
832         .instance_size  = sizeof(TestBlockJob),
833         .free           = block_job_free,
834         .user_resume    = block_job_user_resume,
835         .run            = test_job_run,
836         .complete       = test_job_complete,
837         .prepare        = test_job_prepare,
838         .commit         = test_job_commit,
839         .abort          = test_job_abort,
840     },
841 };
842
843 enum test_job_result {
844     TEST_JOB_SUCCESS,
845     TEST_JOB_FAIL_RUN,
846     TEST_JOB_FAIL_PREPARE,
847 };
848
849 enum test_job_drain_node {
850     TEST_JOB_DRAIN_SRC,
851     TEST_JOB_DRAIN_SRC_CHILD,
852     TEST_JOB_DRAIN_SRC_PARENT,
853 };
854
855 static void test_blockjob_common_drain_node(enum drain_type drain_type,
856                                             bool use_iothread,
857                                             enum test_job_result result,
858                                             enum test_job_drain_node drain_node)
859 {
860     BlockBackend *blk_src, *blk_target;
861     BlockDriverState *src, *src_backing, *src_overlay, *target, *drain_bs;
862     BlockJob *job;
863     TestBlockJob *tjob;
864     IOThread *iothread = NULL;
865     AioContext *ctx;
866     int ret;
867
868     src = bdrv_new_open_driver(&bdrv_test, "source", BDRV_O_RDWR,
869                                &error_abort);
870     src_backing = bdrv_new_open_driver(&bdrv_test, "source-backing",
871                                        BDRV_O_RDWR, &error_abort);
872     src_overlay = bdrv_new_open_driver(&bdrv_test, "source-overlay",
873                                        BDRV_O_RDWR, &error_abort);
874
875     bdrv_set_backing_hd(src_overlay, src, &error_abort);
876     bdrv_unref(src);
877     bdrv_set_backing_hd(src, src_backing, &error_abort);
878     bdrv_unref(src_backing);
879
880     blk_src = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
881     blk_insert_bs(blk_src, src_overlay, &error_abort);
882
883     switch (drain_node) {
884     case TEST_JOB_DRAIN_SRC:
885         drain_bs = src;
886         break;
887     case TEST_JOB_DRAIN_SRC_CHILD:
888         drain_bs = src_backing;
889         break;
890     case TEST_JOB_DRAIN_SRC_PARENT:
891         drain_bs = src_overlay;
892         break;
893     default:
894         g_assert_not_reached();
895     }
896
897     if (use_iothread) {
898         iothread = iothread_new();
899         ctx = iothread_get_aio_context(iothread);
900         blk_set_aio_context(blk_src, ctx, &error_abort);
901     } else {
902         ctx = qemu_get_aio_context();
903     }
904
905     target = bdrv_new_open_driver(&bdrv_test, "target", BDRV_O_RDWR,
906                                   &error_abort);
907     blk_target = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
908     blk_insert_bs(blk_target, target, &error_abort);
909     blk_set_allow_aio_context_change(blk_target, true);
910
911     aio_context_acquire(ctx);
912     tjob = block_job_create("job0", &test_job_driver, NULL, src,
913                             0, BLK_PERM_ALL,
914                             0, 0, NULL, NULL, &error_abort);
915     job = &tjob->common;
916     block_job_add_bdrv(job, "target", target, 0, BLK_PERM_ALL, &error_abort);
917
918     switch (result) {
919     case TEST_JOB_SUCCESS:
920         break;
921     case TEST_JOB_FAIL_RUN:
922         tjob->run_ret = -EIO;
923         break;
924     case TEST_JOB_FAIL_PREPARE:
925         tjob->prepare_ret = -EIO;
926         break;
927     }
928
929     job_start(&job->job);
930     aio_context_release(ctx);
931
932     if (use_iothread) {
933         /* job_co_entry() is run in the I/O thread, wait for the actual job
934          * code to start (we don't want to catch the job in the pause point in
935          * job_co_entry(). */
936         while (!tjob->running) {
937             aio_poll(qemu_get_aio_context(), false);
938         }
939     }
940
941     g_assert_cmpint(job->job.pause_count, ==, 0);
942     g_assert_false(job->job.paused);
943     g_assert_true(tjob->running);
944     g_assert_true(job->job.busy); /* We're in qemu_co_sleep_ns() */
945
946     do_drain_begin_unlocked(drain_type, drain_bs);
947
948     if (drain_type == BDRV_DRAIN_ALL) {
949         /* bdrv_drain_all() drains both src and target */
950         g_assert_cmpint(job->job.pause_count, ==, 2);
951     } else {
952         g_assert_cmpint(job->job.pause_count, ==, 1);
953     }
954     g_assert_true(job->job.paused);
955     g_assert_false(job->job.busy); /* The job is paused */
956
957     do_drain_end_unlocked(drain_type, drain_bs);
958
959     if (use_iothread) {
960         /* paused is reset in the I/O thread, wait for it */
961         while (job->job.paused) {
962             aio_poll(qemu_get_aio_context(), false);
963         }
964     }
965
966     g_assert_cmpint(job->job.pause_count, ==, 0);
967     g_assert_false(job->job.paused);
968     g_assert_true(job->job.busy); /* We're in qemu_co_sleep_ns() */
969
970     do_drain_begin_unlocked(drain_type, target);
971
972     if (drain_type == BDRV_DRAIN_ALL) {
973         /* bdrv_drain_all() drains both src and target */
974         g_assert_cmpint(job->job.pause_count, ==, 2);
975     } else {
976         g_assert_cmpint(job->job.pause_count, ==, 1);
977     }
978     g_assert_true(job->job.paused);
979     g_assert_false(job->job.busy); /* The job is paused */
980
981     do_drain_end_unlocked(drain_type, target);
982
983     if (use_iothread) {
984         /* paused is reset in the I/O thread, wait for it */
985         while (job->job.paused) {
986             aio_poll(qemu_get_aio_context(), false);
987         }
988     }
989
990     g_assert_cmpint(job->job.pause_count, ==, 0);
991     g_assert_false(job->job.paused);
992     g_assert_true(job->job.busy); /* We're in qemu_co_sleep_ns() */
993
994     aio_context_acquire(ctx);
995     ret = job_complete_sync(&job->job, &error_abort);
996     g_assert_cmpint(ret, ==, (result == TEST_JOB_SUCCESS ? 0 : -EIO));
997
998     if (use_iothread) {
999         blk_set_aio_context(blk_src, qemu_get_aio_context(), &error_abort);
1000         assert(blk_get_aio_context(blk_target) == qemu_get_aio_context());
1001     }
1002     aio_context_release(ctx);
1003
1004     blk_unref(blk_src);
1005     blk_unref(blk_target);
1006     bdrv_unref(src_overlay);
1007     bdrv_unref(target);
1008
1009     if (iothread) {
1010         iothread_join(iothread);
1011     }
1012 }
1013
1014 static void test_blockjob_common(enum drain_type drain_type, bool use_iothread,
1015                                  enum test_job_result result)
1016 {
1017     test_blockjob_common_drain_node(drain_type, use_iothread, result,
1018                                     TEST_JOB_DRAIN_SRC);
1019     test_blockjob_common_drain_node(drain_type, use_iothread, result,
1020                                     TEST_JOB_DRAIN_SRC_CHILD);
1021     if (drain_type == BDRV_SUBTREE_DRAIN) {
1022         test_blockjob_common_drain_node(drain_type, use_iothread, result,
1023                                         TEST_JOB_DRAIN_SRC_PARENT);
1024     }
1025 }
1026
1027 static void test_blockjob_drain_all(void)
1028 {
1029     test_blockjob_common(BDRV_DRAIN_ALL, false, TEST_JOB_SUCCESS);
1030 }
1031
1032 static void test_blockjob_drain(void)
1033 {
1034     test_blockjob_common(BDRV_DRAIN, false, TEST_JOB_SUCCESS);
1035 }
1036
1037 static void test_blockjob_drain_subtree(void)
1038 {
1039     test_blockjob_common(BDRV_SUBTREE_DRAIN, false, TEST_JOB_SUCCESS);
1040 }
1041
1042 static void test_blockjob_error_drain_all(void)
1043 {
1044     test_blockjob_common(BDRV_DRAIN_ALL, false, TEST_JOB_FAIL_RUN);
1045     test_blockjob_common(BDRV_DRAIN_ALL, false, TEST_JOB_FAIL_PREPARE);
1046 }
1047
1048 static void test_blockjob_error_drain(void)
1049 {
1050     test_blockjob_common(BDRV_DRAIN, false, TEST_JOB_FAIL_RUN);
1051     test_blockjob_common(BDRV_DRAIN, false, TEST_JOB_FAIL_PREPARE);
1052 }
1053
1054 static void test_blockjob_error_drain_subtree(void)
1055 {
1056     test_blockjob_common(BDRV_SUBTREE_DRAIN, false, TEST_JOB_FAIL_RUN);
1057     test_blockjob_common(BDRV_SUBTREE_DRAIN, false, TEST_JOB_FAIL_PREPARE);
1058 }
1059
1060 static void test_blockjob_iothread_drain_all(void)
1061 {
1062     test_blockjob_common(BDRV_DRAIN_ALL, true, TEST_JOB_SUCCESS);
1063 }
1064
1065 static void test_blockjob_iothread_drain(void)
1066 {
1067     test_blockjob_common(BDRV_DRAIN, true, TEST_JOB_SUCCESS);
1068 }
1069
1070 static void test_blockjob_iothread_drain_subtree(void)
1071 {
1072     test_blockjob_common(BDRV_SUBTREE_DRAIN, true, TEST_JOB_SUCCESS);
1073 }
1074
1075 static void test_blockjob_iothread_error_drain_all(void)
1076 {
1077     test_blockjob_common(BDRV_DRAIN_ALL, true, TEST_JOB_FAIL_RUN);
1078     test_blockjob_common(BDRV_DRAIN_ALL, true, TEST_JOB_FAIL_PREPARE);
1079 }
1080
1081 static void test_blockjob_iothread_error_drain(void)
1082 {
1083     test_blockjob_common(BDRV_DRAIN, true, TEST_JOB_FAIL_RUN);
1084     test_blockjob_common(BDRV_DRAIN, true, TEST_JOB_FAIL_PREPARE);
1085 }
1086
1087 static void test_blockjob_iothread_error_drain_subtree(void)
1088 {
1089     test_blockjob_common(BDRV_SUBTREE_DRAIN, true, TEST_JOB_FAIL_RUN);
1090     test_blockjob_common(BDRV_SUBTREE_DRAIN, true, TEST_JOB_FAIL_PREPARE);
1091 }
1092
1093
1094 typedef struct BDRVTestTopState {
1095     BdrvChild *wait_child;
1096 } BDRVTestTopState;
1097
1098 static void bdrv_test_top_close(BlockDriverState *bs)
1099 {
1100     BdrvChild *c, *next_c;
1101     QLIST_FOREACH_SAFE(c, &bs->children, next, next_c) {
1102         bdrv_unref_child(bs, c);
1103     }
1104 }
1105
1106 static int coroutine_fn bdrv_test_top_co_preadv(BlockDriverState *bs,
1107                                                 uint64_t offset, uint64_t bytes,
1108                                                 QEMUIOVector *qiov, int flags)
1109 {
1110     BDRVTestTopState *tts = bs->opaque;
1111     return bdrv_co_preadv(tts->wait_child, offset, bytes, qiov, flags);
1112 }
1113
1114 static BlockDriver bdrv_test_top_driver = {
1115     .format_name            = "test_top_driver",
1116     .instance_size          = sizeof(BDRVTestTopState),
1117
1118     .bdrv_close             = bdrv_test_top_close,
1119     .bdrv_co_preadv         = bdrv_test_top_co_preadv,
1120
1121     .bdrv_child_perm        = bdrv_default_perms,
1122 };
1123
1124 typedef struct TestCoDeleteByDrainData {
1125     BlockBackend *blk;
1126     bool detach_instead_of_delete;
1127     bool done;
1128 } TestCoDeleteByDrainData;
1129
1130 static void coroutine_fn test_co_delete_by_drain(void *opaque)
1131 {
1132     TestCoDeleteByDrainData *dbdd = opaque;
1133     BlockBackend *blk = dbdd->blk;
1134     BlockDriverState *bs = blk_bs(blk);
1135     BDRVTestTopState *tts = bs->opaque;
1136     void *buffer = g_malloc(65536);
1137     QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buffer, 65536);
1138
1139     /* Pretend some internal write operation from parent to child.
1140      * Important: We have to read from the child, not from the parent!
1141      * Draining works by first propagating it all up the tree to the
1142      * root and then waiting for drainage from root to the leaves
1143      * (protocol nodes).  If we have a request waiting on the root,
1144      * everything will be drained before we go back down the tree, but
1145      * we do not want that.  We want to be in the middle of draining
1146      * when this following requests returns. */
1147     bdrv_co_preadv(tts->wait_child, 0, 65536, &qiov, 0);
1148
1149     g_assert_cmpint(bs->refcnt, ==, 1);
1150
1151     if (!dbdd->detach_instead_of_delete) {
1152         blk_unref(blk);
1153     } else {
1154         BdrvChild *c, *next_c;
1155         QLIST_FOREACH_SAFE(c, &bs->children, next, next_c) {
1156             bdrv_unref_child(bs, c);
1157         }
1158     }
1159
1160     dbdd->done = true;
1161     g_free(buffer);
1162 }
1163
1164 /**
1165  * Test what happens when some BDS has some children, you drain one of
1166  * them and this results in the BDS being deleted.
1167  *
1168  * If @detach_instead_of_delete is set, the BDS is not going to be
1169  * deleted but will only detach all of its children.
1170  */
1171 static void do_test_delete_by_drain(bool detach_instead_of_delete,
1172                                     enum drain_type drain_type)
1173 {
1174     BlockBackend *blk;
1175     BlockDriverState *bs, *child_bs, *null_bs;
1176     BDRVTestTopState *tts;
1177     TestCoDeleteByDrainData dbdd;
1178     Coroutine *co;
1179
1180     bs = bdrv_new_open_driver(&bdrv_test_top_driver, "top", BDRV_O_RDWR,
1181                               &error_abort);
1182     bs->total_sectors = 65536 >> BDRV_SECTOR_BITS;
1183     tts = bs->opaque;
1184
1185     null_bs = bdrv_open("null-co://", NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
1186                         &error_abort);
1187     bdrv_attach_child(bs, null_bs, "null-child", &child_of_bds,
1188                       BDRV_CHILD_DATA, &error_abort);
1189
1190     /* This child will be the one to pass to requests through to, and
1191      * it will stall until a drain occurs */
1192     child_bs = bdrv_new_open_driver(&bdrv_test, "child", BDRV_O_RDWR,
1193                                     &error_abort);
1194     child_bs->total_sectors = 65536 >> BDRV_SECTOR_BITS;
1195     /* Takes our reference to child_bs */
1196     tts->wait_child = bdrv_attach_child(bs, child_bs, "wait-child",
1197                                         &child_of_bds,
1198                                         BDRV_CHILD_DATA | BDRV_CHILD_PRIMARY,
1199                                         &error_abort);
1200
1201     /* This child is just there to be deleted
1202      * (for detach_instead_of_delete == true) */
1203     null_bs = bdrv_open("null-co://", NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
1204                         &error_abort);
1205     bdrv_attach_child(bs, null_bs, "null-child", &child_of_bds, BDRV_CHILD_DATA,
1206                       &error_abort);
1207
1208     blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
1209     blk_insert_bs(blk, bs, &error_abort);
1210
1211     /* Referenced by blk now */
1212     bdrv_unref(bs);
1213
1214     g_assert_cmpint(bs->refcnt, ==, 1);
1215     g_assert_cmpint(child_bs->refcnt, ==, 1);
1216     g_assert_cmpint(null_bs->refcnt, ==, 1);
1217
1218
1219     dbdd = (TestCoDeleteByDrainData){
1220         .blk = blk,
1221         .detach_instead_of_delete = detach_instead_of_delete,
1222         .done = false,
1223     };
1224     co = qemu_coroutine_create(test_co_delete_by_drain, &dbdd);
1225     qemu_coroutine_enter(co);
1226
1227     /* Drain the child while the read operation is still pending.
1228      * This should result in the operation finishing and
1229      * test_co_delete_by_drain() resuming.  Thus, @bs will be deleted
1230      * and the coroutine will exit while this drain operation is still
1231      * in progress. */
1232     switch (drain_type) {
1233     case BDRV_DRAIN:
1234         bdrv_ref(child_bs);
1235         bdrv_drain(child_bs);
1236         bdrv_unref(child_bs);
1237         break;
1238     case BDRV_SUBTREE_DRAIN:
1239         /* Would have to ref/unref bs here for !detach_instead_of_delete, but
1240          * then the whole test becomes pointless because the graph changes
1241          * don't occur during the drain any more. */
1242         assert(detach_instead_of_delete);
1243         bdrv_subtree_drained_begin(bs);
1244         bdrv_subtree_drained_end(bs);
1245         break;
1246     case BDRV_DRAIN_ALL:
1247         bdrv_drain_all_begin();
1248         bdrv_drain_all_end();
1249         break;
1250     default:
1251         g_assert_not_reached();
1252     }
1253
1254     while (!dbdd.done) {
1255         aio_poll(qemu_get_aio_context(), true);
1256     }
1257
1258     if (detach_instead_of_delete) {
1259         /* Here, the reference has not passed over to the coroutine,
1260          * so we have to delete the BB ourselves */
1261         blk_unref(blk);
1262     }
1263 }
1264
1265 static void test_delete_by_drain(void)
1266 {
1267     do_test_delete_by_drain(false, BDRV_DRAIN);
1268 }
1269
1270 static void test_detach_by_drain_all(void)
1271 {
1272     do_test_delete_by_drain(true, BDRV_DRAIN_ALL);
1273 }
1274
1275 static void test_detach_by_drain(void)
1276 {
1277     do_test_delete_by_drain(true, BDRV_DRAIN);
1278 }
1279
1280 static void test_detach_by_drain_subtree(void)
1281 {
1282     do_test_delete_by_drain(true, BDRV_SUBTREE_DRAIN);
1283 }
1284
1285
1286 struct detach_by_parent_data {
1287     BlockDriverState *parent_b;
1288     BdrvChild *child_b;
1289     BlockDriverState *c;
1290     BdrvChild *child_c;
1291     bool by_parent_cb;
1292 };
1293 static struct detach_by_parent_data detach_by_parent_data;
1294
1295 static void detach_indirect_bh(void *opaque)
1296 {
1297     struct detach_by_parent_data *data = opaque;
1298
1299     bdrv_unref_child(data->parent_b, data->child_b);
1300
1301     bdrv_ref(data->c);
1302     data->child_c = bdrv_attach_child(data->parent_b, data->c, "PB-C",
1303                                       &child_of_bds, BDRV_CHILD_DATA,
1304                                       &error_abort);
1305 }
1306
1307 static void detach_by_parent_aio_cb(void *opaque, int ret)
1308 {
1309     struct detach_by_parent_data *data = &detach_by_parent_data;
1310
1311     g_assert_cmpint(ret, ==, 0);
1312     if (data->by_parent_cb) {
1313         detach_indirect_bh(data);
1314     }
1315 }
1316
1317 static void detach_by_driver_cb_drained_begin(BdrvChild *child)
1318 {
1319     aio_bh_schedule_oneshot(qemu_get_current_aio_context(),
1320                             detach_indirect_bh, &detach_by_parent_data);
1321     child_of_bds.drained_begin(child);
1322 }
1323
1324 static BdrvChildClass detach_by_driver_cb_class;
1325
1326 /*
1327  * Initial graph:
1328  *
1329  * PA     PB
1330  *    \ /   \
1331  *     A     B     C
1332  *
1333  * by_parent_cb == true:  Test that parent callbacks don't poll
1334  *
1335  *     PA has a pending write request whose callback changes the child nodes of
1336  *     PB: It removes B and adds C instead. The subtree of PB is drained, which
1337  *     will indirectly drain the write request, too.
1338  *
1339  * by_parent_cb == false: Test that bdrv_drain_invoke() doesn't poll
1340  *
1341  *     PA's BdrvChildClass has a .drained_begin callback that schedules a BH
1342  *     that does the same graph change. If bdrv_drain_invoke() calls it, the
1343  *     state is messed up, but if it is only polled in the single
1344  *     BDRV_POLL_WHILE() at the end of the drain, this should work fine.
1345  */
1346 static void test_detach_indirect(bool by_parent_cb)
1347 {
1348     BlockBackend *blk;
1349     BlockDriverState *parent_a, *parent_b, *a, *b, *c;
1350     BdrvChild *child_a, *child_b;
1351     BlockAIOCB *acb;
1352
1353     QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, NULL, 0);
1354
1355     if (!by_parent_cb) {
1356         detach_by_driver_cb_class = child_of_bds;
1357         detach_by_driver_cb_class.drained_begin =
1358             detach_by_driver_cb_drained_begin;
1359     }
1360
1361     /* Create all involved nodes */
1362     parent_a = bdrv_new_open_driver(&bdrv_test, "parent-a", BDRV_O_RDWR,
1363                                     &error_abort);
1364     parent_b = bdrv_new_open_driver(&bdrv_test, "parent-b", 0,
1365                                     &error_abort);
1366
1367     a = bdrv_new_open_driver(&bdrv_test, "a", BDRV_O_RDWR, &error_abort);
1368     b = bdrv_new_open_driver(&bdrv_test, "b", BDRV_O_RDWR, &error_abort);
1369     c = bdrv_new_open_driver(&bdrv_test, "c", BDRV_O_RDWR, &error_abort);
1370
1371     /* blk is a BB for parent-a */
1372     blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
1373     blk_insert_bs(blk, parent_a, &error_abort);
1374     bdrv_unref(parent_a);
1375
1376     /* If we want to get bdrv_drain_invoke() to call aio_poll(), the driver
1377      * callback must not return immediately. */
1378     if (!by_parent_cb) {
1379         BDRVTestState *s = parent_a->opaque;
1380         s->sleep_in_drain_begin = true;
1381     }
1382
1383     /* Set child relationships */
1384     bdrv_ref(b);
1385     bdrv_ref(a);
1386     child_b = bdrv_attach_child(parent_b, b, "PB-B", &child_of_bds,
1387                                 BDRV_CHILD_DATA, &error_abort);
1388     child_a = bdrv_attach_child(parent_b, a, "PB-A", &child_of_bds,
1389                                 BDRV_CHILD_COW, &error_abort);
1390
1391     bdrv_ref(a);
1392     bdrv_attach_child(parent_a, a, "PA-A",
1393                       by_parent_cb ? &child_of_bds : &detach_by_driver_cb_class,
1394                       BDRV_CHILD_DATA, &error_abort);
1395
1396     g_assert_cmpint(parent_a->refcnt, ==, 1);
1397     g_assert_cmpint(parent_b->refcnt, ==, 1);
1398     g_assert_cmpint(a->refcnt, ==, 3);
1399     g_assert_cmpint(b->refcnt, ==, 2);
1400     g_assert_cmpint(c->refcnt, ==, 1);
1401
1402     g_assert(QLIST_FIRST(&parent_b->children) == child_a);
1403     g_assert(QLIST_NEXT(child_a, next) == child_b);
1404     g_assert(QLIST_NEXT(child_b, next) == NULL);
1405
1406     /* Start the evil write request */
1407     detach_by_parent_data = (struct detach_by_parent_data) {
1408         .parent_b = parent_b,
1409         .child_b = child_b,
1410         .c = c,
1411         .by_parent_cb = by_parent_cb,
1412     };
1413     acb = blk_aio_preadv(blk, 0, &qiov, 0, detach_by_parent_aio_cb, NULL);
1414     g_assert(acb != NULL);
1415
1416     /* Drain and check the expected result */
1417     bdrv_subtree_drained_begin(parent_b);
1418
1419     g_assert(detach_by_parent_data.child_c != NULL);
1420
1421     g_assert_cmpint(parent_a->refcnt, ==, 1);
1422     g_assert_cmpint(parent_b->refcnt, ==, 1);
1423     g_assert_cmpint(a->refcnt, ==, 3);
1424     g_assert_cmpint(b->refcnt, ==, 1);
1425     g_assert_cmpint(c->refcnt, ==, 2);
1426
1427     g_assert(QLIST_FIRST(&parent_b->children) == detach_by_parent_data.child_c);
1428     g_assert(QLIST_NEXT(detach_by_parent_data.child_c, next) == child_a);
1429     g_assert(QLIST_NEXT(child_a, next) == NULL);
1430
1431     g_assert_cmpint(parent_a->quiesce_counter, ==, 1);
1432     g_assert_cmpint(parent_b->quiesce_counter, ==, 1);
1433     g_assert_cmpint(a->quiesce_counter, ==, 1);
1434     g_assert_cmpint(b->quiesce_counter, ==, 0);
1435     g_assert_cmpint(c->quiesce_counter, ==, 1);
1436
1437     bdrv_subtree_drained_end(parent_b);
1438
1439     bdrv_unref(parent_b);
1440     blk_unref(blk);
1441
1442     g_assert_cmpint(a->refcnt, ==, 1);
1443     g_assert_cmpint(b->refcnt, ==, 1);
1444     g_assert_cmpint(c->refcnt, ==, 1);
1445     bdrv_unref(a);
1446     bdrv_unref(b);
1447     bdrv_unref(c);
1448 }
1449
1450 static void test_detach_by_parent_cb(void)
1451 {
1452     test_detach_indirect(true);
1453 }
1454
1455 static void test_detach_by_driver_cb(void)
1456 {
1457     test_detach_indirect(false);
1458 }
1459
1460 static void test_append_to_drained(void)
1461 {
1462     BlockBackend *blk;
1463     BlockDriverState *base, *overlay;
1464     BDRVTestState *base_s, *overlay_s;
1465
1466     blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
1467     base = bdrv_new_open_driver(&bdrv_test, "base", BDRV_O_RDWR, &error_abort);
1468     base_s = base->opaque;
1469     blk_insert_bs(blk, base, &error_abort);
1470
1471     overlay = bdrv_new_open_driver(&bdrv_test, "overlay", BDRV_O_RDWR,
1472                                    &error_abort);
1473     overlay_s = overlay->opaque;
1474
1475     do_drain_begin(BDRV_DRAIN, base);
1476     g_assert_cmpint(base->quiesce_counter, ==, 1);
1477     g_assert_cmpint(base_s->drain_count, ==, 1);
1478     g_assert_cmpint(base->in_flight, ==, 0);
1479
1480     /* Takes ownership of overlay, so we don't have to unref it later */
1481     bdrv_append(overlay, base, &error_abort);
1482     g_assert_cmpint(base->in_flight, ==, 0);
1483     g_assert_cmpint(overlay->in_flight, ==, 0);
1484
1485     g_assert_cmpint(base->quiesce_counter, ==, 1);
1486     g_assert_cmpint(base_s->drain_count, ==, 1);
1487     g_assert_cmpint(overlay->quiesce_counter, ==, 1);
1488     g_assert_cmpint(overlay_s->drain_count, ==, 1);
1489
1490     do_drain_end(BDRV_DRAIN, base);
1491
1492     g_assert_cmpint(base->quiesce_counter, ==, 0);
1493     g_assert_cmpint(base_s->drain_count, ==, 0);
1494     g_assert_cmpint(overlay->quiesce_counter, ==, 0);
1495     g_assert_cmpint(overlay_s->drain_count, ==, 0);
1496
1497     bdrv_unref(base);
1498     blk_unref(blk);
1499 }
1500
1501 static void test_set_aio_context(void)
1502 {
1503     BlockDriverState *bs;
1504     IOThread *a = iothread_new();
1505     IOThread *b = iothread_new();
1506     AioContext *ctx_a = iothread_get_aio_context(a);
1507     AioContext *ctx_b = iothread_get_aio_context(b);
1508
1509     bs = bdrv_new_open_driver(&bdrv_test, "test-node", BDRV_O_RDWR,
1510                               &error_abort);
1511
1512     bdrv_drained_begin(bs);
1513     bdrv_try_set_aio_context(bs, ctx_a, &error_abort);
1514
1515     aio_context_acquire(ctx_a);
1516     bdrv_drained_end(bs);
1517
1518     bdrv_drained_begin(bs);
1519     bdrv_try_set_aio_context(bs, ctx_b, &error_abort);
1520     aio_context_release(ctx_a);
1521     aio_context_acquire(ctx_b);
1522     bdrv_try_set_aio_context(bs, qemu_get_aio_context(), &error_abort);
1523     aio_context_release(ctx_b);
1524     bdrv_drained_end(bs);
1525
1526     bdrv_unref(bs);
1527     iothread_join(a);
1528     iothread_join(b);
1529 }
1530
1531
1532 typedef struct TestDropBackingBlockJob {
1533     BlockJob common;
1534     bool should_complete;
1535     bool *did_complete;
1536     BlockDriverState *detach_also;
1537 } TestDropBackingBlockJob;
1538
1539 static int coroutine_fn test_drop_backing_job_run(Job *job, Error **errp)
1540 {
1541     TestDropBackingBlockJob *s =
1542         container_of(job, TestDropBackingBlockJob, common.job);
1543
1544     while (!s->should_complete) {
1545         job_sleep_ns(job, 0);
1546     }
1547
1548     return 0;
1549 }
1550
1551 static void test_drop_backing_job_commit(Job *job)
1552 {
1553     TestDropBackingBlockJob *s =
1554         container_of(job, TestDropBackingBlockJob, common.job);
1555
1556     bdrv_set_backing_hd(blk_bs(s->common.blk), NULL, &error_abort);
1557     bdrv_set_backing_hd(s->detach_also, NULL, &error_abort);
1558
1559     *s->did_complete = true;
1560 }
1561
1562 static const BlockJobDriver test_drop_backing_job_driver = {
1563     .job_driver = {
1564         .instance_size  = sizeof(TestDropBackingBlockJob),
1565         .free           = block_job_free,
1566         .user_resume    = block_job_user_resume,
1567         .run            = test_drop_backing_job_run,
1568         .commit         = test_drop_backing_job_commit,
1569     }
1570 };
1571
1572 /**
1573  * Creates a child node with three parent nodes on it, and then runs a
1574  * block job on the final one, parent-node-2.
1575  *
1576  * The job is then asked to complete before a section where the child
1577  * is drained.
1578  *
1579  * Ending this section will undrain the child's parents, first
1580  * parent-node-2, then parent-node-1, then parent-node-0 -- the parent
1581  * list is in reverse order of how they were added.  Ending the drain
1582  * on parent-node-2 will resume the job, thus completing it and
1583  * scheduling job_exit().
1584  *
1585  * Ending the drain on parent-node-1 will poll the AioContext, which
1586  * lets job_exit() and thus test_drop_backing_job_commit() run.  That
1587  * function first removes the child as parent-node-2's backing file.
1588  *
1589  * In old (and buggy) implementations, there are two problems with
1590  * that:
1591  * (A) bdrv_drain_invoke() polls for every node that leaves the
1592  *     drained section.  This means that job_exit() is scheduled
1593  *     before the child has left the drained section.  Its
1594  *     quiesce_counter is therefore still 1 when it is removed from
1595  *     parent-node-2.
1596  *
1597  * (B) bdrv_replace_child_noperm() calls drained_end() on the old
1598  *     child's parents as many times as the child is quiesced.  This
1599  *     means it will call drained_end() on parent-node-2 once.
1600  *     Because parent-node-2 is no longer quiesced at this point, this
1601  *     will fail.
1602  *
1603  * bdrv_replace_child_noperm() therefore must call drained_end() on
1604  * the parent only if it really is still drained because the child is
1605  * drained.
1606  *
1607  * If removing child from parent-node-2 was successful (as it should
1608  * be), test_drop_backing_job_commit() will then also remove the child
1609  * from parent-node-0.
1610  *
1611  * With an old version of our drain infrastructure ((A) above), that
1612  * resulted in the following flow:
1613  *
1614  * 1. child attempts to leave its drained section.  The call recurses
1615  *    to its parents.
1616  *
1617  * 2. parent-node-2 leaves the drained section.  Polling in
1618  *    bdrv_drain_invoke() will schedule job_exit().
1619  *
1620  * 3. parent-node-1 leaves the drained section.  Polling in
1621  *    bdrv_drain_invoke() will run job_exit(), thus disconnecting
1622  *    parent-node-0 from the child node.
1623  *
1624  * 4. bdrv_parent_drained_end() uses a QLIST_FOREACH_SAFE() loop to
1625  *    iterate over the parents.  Thus, it now accesses the BdrvChild
1626  *    object that used to connect parent-node-0 and the child node.
1627  *    However, that object no longer exists, so it accesses a dangling
1628  *    pointer.
1629  *
1630  * The solution is to only poll once when running a bdrv_drained_end()
1631  * operation, specifically at the end when all drained_end()
1632  * operations for all involved nodes have been scheduled.
1633  * Note that this also solves (A) above, thus hiding (B).
1634  */
1635 static void test_blockjob_commit_by_drained_end(void)
1636 {
1637     BlockDriverState *bs_child, *bs_parents[3];
1638     TestDropBackingBlockJob *job;
1639     bool job_has_completed = false;
1640     int i;
1641
1642     bs_child = bdrv_new_open_driver(&bdrv_test, "child-node", BDRV_O_RDWR,
1643                                     &error_abort);
1644
1645     for (i = 0; i < 3; i++) {
1646         char name[32];
1647         snprintf(name, sizeof(name), "parent-node-%i", i);
1648         bs_parents[i] = bdrv_new_open_driver(&bdrv_test, name, BDRV_O_RDWR,
1649                                              &error_abort);
1650         bdrv_set_backing_hd(bs_parents[i], bs_child, &error_abort);
1651     }
1652
1653     job = block_job_create("job", &test_drop_backing_job_driver, NULL,
1654                            bs_parents[2], 0, BLK_PERM_ALL, 0, 0, NULL, NULL,
1655                            &error_abort);
1656
1657     job->detach_also = bs_parents[0];
1658     job->did_complete = &job_has_completed;
1659
1660     job_start(&job->common.job);
1661
1662     job->should_complete = true;
1663     bdrv_drained_begin(bs_child);
1664     g_assert(!job_has_completed);
1665     bdrv_drained_end(bs_child);
1666     g_assert(job_has_completed);
1667
1668     bdrv_unref(bs_parents[0]);
1669     bdrv_unref(bs_parents[1]);
1670     bdrv_unref(bs_parents[2]);
1671     bdrv_unref(bs_child);
1672 }
1673
1674
1675 typedef struct TestSimpleBlockJob {
1676     BlockJob common;
1677     bool should_complete;
1678     bool *did_complete;
1679 } TestSimpleBlockJob;
1680
1681 static int coroutine_fn test_simple_job_run(Job *job, Error **errp)
1682 {
1683     TestSimpleBlockJob *s = container_of(job, TestSimpleBlockJob, common.job);
1684
1685     while (!s->should_complete) {
1686         job_sleep_ns(job, 0);
1687     }
1688
1689     return 0;
1690 }
1691
1692 static void test_simple_job_clean(Job *job)
1693 {
1694     TestSimpleBlockJob *s = container_of(job, TestSimpleBlockJob, common.job);
1695     *s->did_complete = true;
1696 }
1697
1698 static const BlockJobDriver test_simple_job_driver = {
1699     .job_driver = {
1700         .instance_size  = sizeof(TestSimpleBlockJob),
1701         .free           = block_job_free,
1702         .user_resume    = block_job_user_resume,
1703         .run            = test_simple_job_run,
1704         .clean          = test_simple_job_clean,
1705     },
1706 };
1707
1708 static int drop_intermediate_poll_update_filename(BdrvChild *child,
1709                                                   BlockDriverState *new_base,
1710                                                   const char *filename,
1711                                                   Error **errp)
1712 {
1713     /*
1714      * We are free to poll here, which may change the block graph, if
1715      * it is not drained.
1716      */
1717
1718     /* If the job is not drained: Complete it, schedule job_exit() */
1719     aio_poll(qemu_get_current_aio_context(), false);
1720     /* If the job is not drained: Run job_exit(), finish the job */
1721     aio_poll(qemu_get_current_aio_context(), false);
1722
1723     return 0;
1724 }
1725
1726 /**
1727  * Test a poll in the midst of bdrv_drop_intermediate().
1728  *
1729  * bdrv_drop_intermediate() calls BdrvChildClass.update_filename(),
1730  * which can yield or poll.  This may lead to graph changes, unless
1731  * the whole subtree in question is drained.
1732  *
1733  * We test this on the following graph:
1734  *
1735  *                    Job
1736  *
1737  *                     |
1738  *                  job-node
1739  *                     |
1740  *                     v
1741  *
1742  *                  job-node
1743  *
1744  *                     |
1745  *                  backing
1746  *                     |
1747  *                     v
1748  *
1749  * node-2 --chain--> node-1 --chain--> node-0
1750  *
1751  * We drop node-1 with bdrv_drop_intermediate(top=node-1, base=node-0).
1752  *
1753  * This first updates node-2's backing filename by invoking
1754  * drop_intermediate_poll_update_filename(), which polls twice.  This
1755  * causes the job to finish, which in turns causes the job-node to be
1756  * deleted.
1757  *
1758  * bdrv_drop_intermediate() uses a QLIST_FOREACH_SAFE() loop, so it
1759  * already has a pointer to the BdrvChild edge between job-node and
1760  * node-1.  When it tries to handle that edge, we probably get a
1761  * segmentation fault because the object no longer exists.
1762  *
1763  *
1764  * The solution is for bdrv_drop_intermediate() to drain top's
1765  * subtree.  This prevents graph changes from happening just because
1766  * BdrvChildClass.update_filename() yields or polls.  Thus, the block
1767  * job is paused during that drained section and must finish before or
1768  * after.
1769  *
1770  * (In addition, bdrv_replace_child() must keep the job paused.)
1771  */
1772 static void test_drop_intermediate_poll(void)
1773 {
1774     static BdrvChildClass chain_child_class;
1775     BlockDriverState *chain[3];
1776     TestSimpleBlockJob *job;
1777     BlockDriverState *job_node;
1778     bool job_has_completed = false;
1779     int i;
1780     int ret;
1781
1782     chain_child_class = child_of_bds;
1783     chain_child_class.update_filename = drop_intermediate_poll_update_filename;
1784
1785     for (i = 0; i < 3; i++) {
1786         char name[32];
1787         snprintf(name, 32, "node-%i", i);
1788
1789         chain[i] = bdrv_new_open_driver(&bdrv_test, name, 0, &error_abort);
1790     }
1791
1792     job_node = bdrv_new_open_driver(&bdrv_test, "job-node", BDRV_O_RDWR,
1793                                     &error_abort);
1794     bdrv_set_backing_hd(job_node, chain[1], &error_abort);
1795
1796     /*
1797      * Establish the chain last, so the chain links are the first
1798      * elements in the BDS.parents lists
1799      */
1800     for (i = 0; i < 3; i++) {
1801         if (i) {
1802             /* Takes the reference to chain[i - 1] */
1803             chain[i]->backing = bdrv_attach_child(chain[i], chain[i - 1],
1804                                                   "chain", &chain_child_class,
1805                                                   BDRV_CHILD_COW, &error_abort);
1806         }
1807     }
1808
1809     job = block_job_create("job", &test_simple_job_driver, NULL, job_node,
1810                            0, BLK_PERM_ALL, 0, 0, NULL, NULL, &error_abort);
1811
1812     /* The job has a reference now */
1813     bdrv_unref(job_node);
1814
1815     job->did_complete = &job_has_completed;
1816
1817     job_start(&job->common.job);
1818     job->should_complete = true;
1819
1820     g_assert(!job_has_completed);
1821     ret = bdrv_drop_intermediate(chain[1], chain[0], NULL);
1822     g_assert(ret == 0);
1823     g_assert(job_has_completed);
1824
1825     bdrv_unref(chain[2]);
1826 }
1827
1828
1829 typedef struct BDRVReplaceTestState {
1830     bool was_drained;
1831     bool was_undrained;
1832     bool has_read;
1833
1834     int drain_count;
1835
1836     bool yield_before_read;
1837     Coroutine *io_co;
1838     Coroutine *drain_co;
1839 } BDRVReplaceTestState;
1840
1841 static void bdrv_replace_test_close(BlockDriverState *bs)
1842 {
1843 }
1844
1845 /**
1846  * If @bs has a backing file:
1847  *   Yield if .yield_before_read is true (and wait for drain_begin to
1848  *   wake us up).
1849  *   Forward the read to bs->backing.  Set .has_read to true.
1850  *   If drain_begin has woken us, wake it in turn.
1851  *
1852  * Otherwise:
1853  *   Set .has_read to true and return success.
1854  */
1855 static int coroutine_fn bdrv_replace_test_co_preadv(BlockDriverState *bs,
1856                                                     uint64_t offset,
1857                                                     uint64_t bytes,
1858                                                     QEMUIOVector *qiov,
1859                                                     int flags)
1860 {
1861     BDRVReplaceTestState *s = bs->opaque;
1862
1863     if (bs->backing) {
1864         int ret;
1865
1866         g_assert(!s->drain_count);
1867
1868         s->io_co = qemu_coroutine_self();
1869         if (s->yield_before_read) {
1870             s->yield_before_read = false;
1871             qemu_coroutine_yield();
1872         }
1873         s->io_co = NULL;
1874
1875         ret = bdrv_preadv(bs->backing, offset, qiov);
1876         s->has_read = true;
1877
1878         /* Wake up drain_co if it runs */
1879         if (s->drain_co) {
1880             aio_co_wake(s->drain_co);
1881         }
1882
1883         return ret;
1884     }
1885
1886     s->has_read = true;
1887     return 0;
1888 }
1889
1890 /**
1891  * If .drain_count is 0, wake up .io_co if there is one; and set
1892  * .was_drained.
1893  * Increment .drain_count.
1894  */
1895 static void coroutine_fn bdrv_replace_test_co_drain_begin(BlockDriverState *bs)
1896 {
1897     BDRVReplaceTestState *s = bs->opaque;
1898
1899     if (!s->drain_count) {
1900         /* Keep waking io_co up until it is done */
1901         s->drain_co = qemu_coroutine_self();
1902         while (s->io_co) {
1903             aio_co_wake(s->io_co);
1904             s->io_co = NULL;
1905             qemu_coroutine_yield();
1906         }
1907         s->drain_co = NULL;
1908
1909         s->was_drained = true;
1910     }
1911     s->drain_count++;
1912 }
1913
1914 /**
1915  * Reduce .drain_count, set .was_undrained once it reaches 0.
1916  * If .drain_count reaches 0 and the node has a backing file, issue a
1917  * read request.
1918  */
1919 static void coroutine_fn bdrv_replace_test_co_drain_end(BlockDriverState *bs)
1920 {
1921     BDRVReplaceTestState *s = bs->opaque;
1922
1923     g_assert(s->drain_count > 0);
1924     if (!--s->drain_count) {
1925         int ret;
1926
1927         s->was_undrained = true;
1928
1929         if (bs->backing) {
1930             char data;
1931             QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, &data, 1);
1932
1933             /* Queue a read request post-drain */
1934             ret = bdrv_replace_test_co_preadv(bs, 0, 1, &qiov, 0);
1935             g_assert(ret >= 0);
1936         }
1937     }
1938 }
1939
1940 static BlockDriver bdrv_replace_test = {
1941     .format_name            = "replace_test",
1942     .instance_size          = sizeof(BDRVReplaceTestState),
1943
1944     .bdrv_close             = bdrv_replace_test_close,
1945     .bdrv_co_preadv         = bdrv_replace_test_co_preadv,
1946
1947     .bdrv_co_drain_begin    = bdrv_replace_test_co_drain_begin,
1948     .bdrv_co_drain_end      = bdrv_replace_test_co_drain_end,
1949
1950     .bdrv_child_perm        = bdrv_default_perms,
1951 };
1952
1953 static void coroutine_fn test_replace_child_mid_drain_read_co(void *opaque)
1954 {
1955     int ret;
1956     char data;
1957
1958     ret = blk_co_pread(opaque, 0, 1, &data, 0);
1959     g_assert(ret >= 0);
1960 }
1961
1962 /**
1963  * We test two things:
1964  * (1) bdrv_replace_child_noperm() must not undrain the parent if both
1965  *     children are drained.
1966  * (2) bdrv_replace_child_noperm() must never flush I/O requests to a
1967  *     drained child.  If the old child is drained, it must flush I/O
1968  *     requests after the new one has been attached.  If the new child
1969  *     is drained, it must flush I/O requests before the old one is
1970  *     detached.
1971  *
1972  * To do so, we create one parent node and two child nodes; then
1973  * attach one of the children (old_child_bs) to the parent, then
1974  * drain both old_child_bs and new_child_bs according to
1975  * old_drain_count and new_drain_count, respectively, and finally
1976  * we invoke bdrv_replace_node() to replace old_child_bs by
1977  * new_child_bs.
1978  *
1979  * The test block driver we use here (bdrv_replace_test) has a read
1980  * function that:
1981  * - For the parent node, can optionally yield, and then forwards the
1982  *   read to bdrv_preadv(),
1983  * - For the child node, just returns immediately.
1984  *
1985  * If the read yields, the drain_begin function will wake it up.
1986  *
1987  * The drain_end function issues a read on the parent once it is fully
1988  * undrained (which simulates requests starting to come in again).
1989  */
1990 static void do_test_replace_child_mid_drain(int old_drain_count,
1991                                             int new_drain_count)
1992 {
1993     BlockBackend *parent_blk;
1994     BlockDriverState *parent_bs;
1995     BlockDriverState *old_child_bs, *new_child_bs;
1996     BDRVReplaceTestState *parent_s;
1997     BDRVReplaceTestState *old_child_s, *new_child_s;
1998     Coroutine *io_co;
1999     int i;
2000
2001     parent_bs = bdrv_new_open_driver(&bdrv_replace_test, "parent", 0,
2002                                      &error_abort);
2003     parent_s = parent_bs->opaque;
2004
2005     parent_blk = blk_new(qemu_get_aio_context(),
2006                          BLK_PERM_CONSISTENT_READ, BLK_PERM_ALL);
2007     blk_insert_bs(parent_blk, parent_bs, &error_abort);
2008
2009     old_child_bs = bdrv_new_open_driver(&bdrv_replace_test, "old-child", 0,
2010                                         &error_abort);
2011     new_child_bs = bdrv_new_open_driver(&bdrv_replace_test, "new-child", 0,
2012                                         &error_abort);
2013     old_child_s = old_child_bs->opaque;
2014     new_child_s = new_child_bs->opaque;
2015
2016     /* So that we can read something */
2017     parent_bs->total_sectors = 1;
2018     old_child_bs->total_sectors = 1;
2019     new_child_bs->total_sectors = 1;
2020
2021     bdrv_ref(old_child_bs);
2022     parent_bs->backing = bdrv_attach_child(parent_bs, old_child_bs, "child",
2023                                            &child_of_bds, BDRV_CHILD_COW,
2024                                            &error_abort);
2025
2026     for (i = 0; i < old_drain_count; i++) {
2027         bdrv_drained_begin(old_child_bs);
2028     }
2029     for (i = 0; i < new_drain_count; i++) {
2030         bdrv_drained_begin(new_child_bs);
2031     }
2032
2033     if (!old_drain_count) {
2034         /*
2035          * Start a read operation that will yield, so it will not
2036          * complete before the node is drained.
2037          */
2038         parent_s->yield_before_read = true;
2039         io_co = qemu_coroutine_create(test_replace_child_mid_drain_read_co,
2040                                       parent_blk);
2041         qemu_coroutine_enter(io_co);
2042     }
2043
2044     /* If we have started a read operation, it should have yielded */
2045     g_assert(!parent_s->has_read);
2046
2047     /* Reset drained status so we can see what bdrv_replace_node() does */
2048     parent_s->was_drained = false;
2049     parent_s->was_undrained = false;
2050
2051     g_assert(parent_bs->quiesce_counter == old_drain_count);
2052     bdrv_replace_node(old_child_bs, new_child_bs, &error_abort);
2053     g_assert(parent_bs->quiesce_counter == new_drain_count);
2054
2055     if (!old_drain_count && !new_drain_count) {
2056         /*
2057          * From undrained to undrained drains and undrains the parent,
2058          * because bdrv_replace_node() contains a drained section for
2059          * @old_child_bs.
2060          */
2061         g_assert(parent_s->was_drained && parent_s->was_undrained);
2062     } else if (!old_drain_count && new_drain_count) {
2063         /*
2064          * From undrained to drained should drain the parent and keep
2065          * it that way.
2066          */
2067         g_assert(parent_s->was_drained && !parent_s->was_undrained);
2068     } else if (old_drain_count && !new_drain_count) {
2069         /*
2070          * From drained to undrained should undrain the parent and
2071          * keep it that way.
2072          */
2073         g_assert(!parent_s->was_drained && parent_s->was_undrained);
2074     } else /* if (old_drain_count && new_drain_count) */ {
2075         /*
2076          * From drained to drained must not undrain the parent at any
2077          * point
2078          */
2079         g_assert(!parent_s->was_drained && !parent_s->was_undrained);
2080     }
2081
2082     if (!old_drain_count || !new_drain_count) {
2083         /*
2084          * If !old_drain_count, we have started a read request before
2085          * bdrv_replace_node().  If !new_drain_count, the parent must
2086          * have been undrained at some point, and
2087          * bdrv_replace_test_co_drain_end() starts a read request
2088          * then.
2089          */
2090         g_assert(parent_s->has_read);
2091     } else {
2092         /*
2093          * If the parent was never undrained, there is no way to start
2094          * a read request.
2095          */
2096         g_assert(!parent_s->has_read);
2097     }
2098
2099     /* A drained child must have not received any request */
2100     g_assert(!(old_drain_count && old_child_s->has_read));
2101     g_assert(!(new_drain_count && new_child_s->has_read));
2102
2103     for (i = 0; i < new_drain_count; i++) {
2104         bdrv_drained_end(new_child_bs);
2105     }
2106     for (i = 0; i < old_drain_count; i++) {
2107         bdrv_drained_end(old_child_bs);
2108     }
2109
2110     /*
2111      * By now, bdrv_replace_test_co_drain_end() must have been called
2112      * at some point while the new child was attached to the parent.
2113      */
2114     g_assert(parent_s->has_read);
2115     g_assert(new_child_s->has_read);
2116
2117     blk_unref(parent_blk);
2118     bdrv_unref(parent_bs);
2119     bdrv_unref(old_child_bs);
2120     bdrv_unref(new_child_bs);
2121 }
2122
2123 static void test_replace_child_mid_drain(void)
2124 {
2125     int old_drain_count, new_drain_count;
2126
2127     for (old_drain_count = 0; old_drain_count < 2; old_drain_count++) {
2128         for (new_drain_count = 0; new_drain_count < 2; new_drain_count++) {
2129             do_test_replace_child_mid_drain(old_drain_count, new_drain_count);
2130         }
2131     }
2132 }
2133
2134 int main(int argc, char **argv)
2135 {
2136     int ret;
2137
2138     bdrv_init();
2139     qemu_init_main_loop(&error_abort);
2140
2141     g_test_init(&argc, &argv, NULL);
2142     qemu_event_init(&done_event, false);
2143
2144     g_test_add_func("/bdrv-drain/driver-cb/drain_all", test_drv_cb_drain_all);
2145     g_test_add_func("/bdrv-drain/driver-cb/drain", test_drv_cb_drain);
2146     g_test_add_func("/bdrv-drain/driver-cb/drain_subtree",
2147                     test_drv_cb_drain_subtree);
2148
2149     g_test_add_func("/bdrv-drain/driver-cb/co/drain_all",
2150                     test_drv_cb_co_drain_all);
2151     g_test_add_func("/bdrv-drain/driver-cb/co/drain", test_drv_cb_co_drain);
2152     g_test_add_func("/bdrv-drain/driver-cb/co/drain_subtree",
2153                     test_drv_cb_co_drain_subtree);
2154
2155
2156     g_test_add_func("/bdrv-drain/quiesce/drain_all", test_quiesce_drain_all);
2157     g_test_add_func("/bdrv-drain/quiesce/drain", test_quiesce_drain);
2158     g_test_add_func("/bdrv-drain/quiesce/drain_subtree",
2159                     test_quiesce_drain_subtree);
2160
2161     g_test_add_func("/bdrv-drain/quiesce/co/drain_all",
2162                     test_quiesce_co_drain_all);
2163     g_test_add_func("/bdrv-drain/quiesce/co/drain", test_quiesce_co_drain);
2164     g_test_add_func("/bdrv-drain/quiesce/co/drain_subtree",
2165                     test_quiesce_co_drain_subtree);
2166
2167     g_test_add_func("/bdrv-drain/nested", test_nested);
2168     g_test_add_func("/bdrv-drain/multiparent", test_multiparent);
2169
2170     g_test_add_func("/bdrv-drain/graph-change/drain_subtree",
2171                     test_graph_change_drain_subtree);
2172     g_test_add_func("/bdrv-drain/graph-change/drain_all",
2173                     test_graph_change_drain_all);
2174
2175     g_test_add_func("/bdrv-drain/iothread/drain_all", test_iothread_drain_all);
2176     g_test_add_func("/bdrv-drain/iothread/drain", test_iothread_drain);
2177     g_test_add_func("/bdrv-drain/iothread/drain_subtree",
2178                     test_iothread_drain_subtree);
2179
2180     g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all);
2181     g_test_add_func("/bdrv-drain/blockjob/drain", test_blockjob_drain);
2182     g_test_add_func("/bdrv-drain/blockjob/drain_subtree",
2183                     test_blockjob_drain_subtree);
2184
2185     g_test_add_func("/bdrv-drain/blockjob/error/drain_all",
2186                     test_blockjob_error_drain_all);
2187     g_test_add_func("/bdrv-drain/blockjob/error/drain",
2188                     test_blockjob_error_drain);
2189     g_test_add_func("/bdrv-drain/blockjob/error/drain_subtree",
2190                     test_blockjob_error_drain_subtree);
2191
2192     g_test_add_func("/bdrv-drain/blockjob/iothread/drain_all",
2193                     test_blockjob_iothread_drain_all);
2194     g_test_add_func("/bdrv-drain/blockjob/iothread/drain",
2195                     test_blockjob_iothread_drain);
2196     g_test_add_func("/bdrv-drain/blockjob/iothread/drain_subtree",
2197                     test_blockjob_iothread_drain_subtree);
2198
2199     g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain_all",
2200                     test_blockjob_iothread_error_drain_all);
2201     g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain",
2202                     test_blockjob_iothread_error_drain);
2203     g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain_subtree",
2204                     test_blockjob_iothread_error_drain_subtree);
2205
2206     g_test_add_func("/bdrv-drain/deletion/drain", test_delete_by_drain);
2207     g_test_add_func("/bdrv-drain/detach/drain_all", test_detach_by_drain_all);
2208     g_test_add_func("/bdrv-drain/detach/drain", test_detach_by_drain);
2209     g_test_add_func("/bdrv-drain/detach/drain_subtree", test_detach_by_drain_subtree);
2210     g_test_add_func("/bdrv-drain/detach/parent_cb", test_detach_by_parent_cb);
2211     g_test_add_func("/bdrv-drain/detach/driver_cb", test_detach_by_driver_cb);
2212
2213     g_test_add_func("/bdrv-drain/attach/drain", test_append_to_drained);
2214
2215     g_test_add_func("/bdrv-drain/set_aio_context", test_set_aio_context);
2216
2217     g_test_add_func("/bdrv-drain/blockjob/commit_by_drained_end",
2218                     test_blockjob_commit_by_drained_end);
2219
2220     g_test_add_func("/bdrv-drain/bdrv_drop_intermediate/poll",
2221                     test_drop_intermediate_poll);
2222
2223     g_test_add_func("/bdrv-drain/replace_child/mid-drain",
2224                     test_replace_child_mid_drain);
2225
2226     ret = g_test_run();
2227     qemu_event_destroy(&done_event);
2228     return ret;
2229 }
This page took 0.138448 seconds and 4 git commands to generate.