]> Git Repo - qemu.git/blame - block.c
GS and IO CODE macros for blockjob_int.h
[qemu.git] / block.c
CommitLineData
fc01f7e7
FB
1/*
2 * QEMU System Emulator block driver
5fafdf24 3 *
fc01f7e7 4 * Copyright (c) 2003 Fabrice Bellard
c20555e1 5 * Copyright (c) 2020 Virtuozzo International GmbH.
5fafdf24 6 *
fc01f7e7
FB
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 * THE SOFTWARE.
24 */
e688df6b 25
d38ea87a 26#include "qemu/osdep.h"
0ab8ed18 27#include "block/trace.h"
737e150e
PB
28#include "block/block_int.h"
29#include "block/blockjob.h"
0c9b70d5 30#include "block/fuse.h"
cd7fca95 31#include "block/nbd.h"
609f45ea 32#include "block/qdict.h"
d49b6836 33#include "qemu/error-report.h"
5e5733e5 34#include "block/module_block.h"
db725815 35#include "qemu/main-loop.h"
1de7afc9 36#include "qemu/module.h"
e688df6b 37#include "qapi/error.h"
452fcdbc 38#include "qapi/qmp/qdict.h"
7b1b5d19 39#include "qapi/qmp/qjson.h"
e59a0cf1 40#include "qapi/qmp/qnull.h"
fc81fa1e 41#include "qapi/qmp/qstring.h"
e1d74bc6
KW
42#include "qapi/qobject-output-visitor.h"
43#include "qapi/qapi-visit-block-core.h"
bfb197e0 44#include "sysemu/block-backend.h"
1de7afc9 45#include "qemu/notify.h"
922a01a0 46#include "qemu/option.h"
10817bf0 47#include "qemu/coroutine.h"
c13163fb 48#include "block/qapi.h"
1de7afc9 49#include "qemu/timer.h"
f348b6d1
VB
50#include "qemu/cutils.h"
51#include "qemu/id.h"
0bc329fb
HR
52#include "qemu/range.h"
53#include "qemu/rcu.h"
21c2283e 54#include "block/coroutines.h"
fc01f7e7 55
71e72a19 56#ifdef CONFIG_BSD
7674e7bf 57#include <sys/ioctl.h>
72cf2d4f 58#include <sys/queue.h>
feccdcee 59#if defined(HAVE_SYS_DISK_H)
7674e7bf
FB
60#include <sys/disk.h>
61#endif
c5e97233 62#endif
7674e7bf 63
49dc768d
AL
64#ifdef _WIN32
65#include <windows.h>
66#endif
67
1c9805a3
SH
68#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
69
3b491a90 70/* Protected by BQL */
dc364f4c
BC
71static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
72 QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
73
3b491a90 74/* Protected by BQL */
2c1d04e0
HR
75static QTAILQ_HEAD(, BlockDriverState) all_bdrv_states =
76 QTAILQ_HEAD_INITIALIZER(all_bdrv_states);
77
3b491a90 78/* Protected by BQL */
8a22f02a
SH
79static QLIST_HEAD(, BlockDriver) bdrv_drivers =
80 QLIST_HEAD_INITIALIZER(bdrv_drivers);
ea2384d3 81
5b363937
HR
82static BlockDriverState *bdrv_open_inherit(const char *filename,
83 const char *reference,
84 QDict *options, int flags,
85 BlockDriverState *parent,
bd86fb99 86 const BdrvChildClass *child_class,
272c02ea 87 BdrvChildRole child_role,
5b363937 88 Error **errp);
f3930ed0 89
bfb8aa6d
KW
90static bool bdrv_recurse_has_child(BlockDriverState *bs,
91 BlockDriverState *child);
92
b0a9f6fe 93static void bdrv_child_free(BdrvChild *child);
be64bbb0 94static void bdrv_replace_child_noperm(BdrvChild **child,
b0a9f6fe
HR
95 BlockDriverState *new_bs,
96 bool free_empty_child);
e9238278
VSO
97static void bdrv_remove_file_or_backing_child(BlockDriverState *bs,
98 BdrvChild *child,
99 Transaction *tran);
160333e1
VSO
100static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs,
101 Transaction *tran);
0978623e 102
72373e40
VSO
103static int bdrv_reopen_prepare(BDRVReopenState *reopen_state,
104 BlockReopenQueue *queue,
ecd30d2d 105 Transaction *change_child_tran, Error **errp);
53e96d1e
VSO
106static void bdrv_reopen_commit(BDRVReopenState *reopen_state);
107static void bdrv_reopen_abort(BDRVReopenState *reopen_state);
108
fa8fc1d0
EGE
109static bool bdrv_backing_overridden(BlockDriverState *bs);
110
eb852011
MA
111/* If non-zero, use only whitelisted block drivers */
112static int use_bdrv_whitelist;
113
9e0b22f4
SH
114#ifdef _WIN32
115static int is_windows_drive_prefix(const char *filename)
116{
117 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
118 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
119 filename[1] == ':');
120}
121
122int is_windows_drive(const char *filename)
123{
124 if (is_windows_drive_prefix(filename) &&
125 filename[2] == '\0')
126 return 1;
127 if (strstart(filename, "\\\\.\\", NULL) ||
128 strstart(filename, "//./", NULL))
129 return 1;
130 return 0;
131}
132#endif
133
339064d5
KW
134size_t bdrv_opt_mem_align(BlockDriverState *bs)
135{
136 if (!bs || !bs->drv) {
459b4e66 137 /* page size or 4k (hdd sector size) should be on the safe side */
038adc2f 138 return MAX(4096, qemu_real_host_page_size);
339064d5 139 }
384a48fb 140 IO_CODE();
339064d5
KW
141
142 return bs->bl.opt_mem_alignment;
143}
144
4196d2f0
DL
145size_t bdrv_min_mem_align(BlockDriverState *bs)
146{
147 if (!bs || !bs->drv) {
459b4e66 148 /* page size or 4k (hdd sector size) should be on the safe side */
038adc2f 149 return MAX(4096, qemu_real_host_page_size);
4196d2f0 150 }
384a48fb 151 IO_CODE();
4196d2f0
DL
152
153 return bs->bl.min_mem_alignment;
154}
155
9e0b22f4 156/* check if the path starts with "<protocol>:" */
5c98415b 157int path_has_protocol(const char *path)
9e0b22f4 158{
947995c0
PB
159 const char *p;
160
9e0b22f4
SH
161#ifdef _WIN32
162 if (is_windows_drive(path) ||
163 is_windows_drive_prefix(path)) {
164 return 0;
165 }
947995c0
PB
166 p = path + strcspn(path, ":/\\");
167#else
168 p = path + strcspn(path, ":/");
9e0b22f4
SH
169#endif
170
947995c0 171 return *p == ':';
9e0b22f4
SH
172}
173
83f64091 174int path_is_absolute(const char *path)
3b0d4f61 175{
21664424
FB
176#ifdef _WIN32
177 /* specific case for names like: "\\.\d:" */
f53f4da9 178 if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
21664424 179 return 1;
f53f4da9
PB
180 }
181 return (*path == '/' || *path == '\\');
3b9f94e1 182#else
f53f4da9 183 return (*path == '/');
3b9f94e1 184#endif
3b0d4f61
FB
185}
186
009b03aa 187/* if filename is absolute, just return its duplicate. Otherwise, build a
83f64091
FB
188 path to it by considering it is relative to base_path. URL are
189 supported. */
009b03aa 190char *path_combine(const char *base_path, const char *filename)
3b0d4f61 191{
009b03aa 192 const char *protocol_stripped = NULL;
83f64091 193 const char *p, *p1;
009b03aa 194 char *result;
83f64091
FB
195 int len;
196
83f64091 197 if (path_is_absolute(filename)) {
009b03aa
HR
198 return g_strdup(filename);
199 }
0d54a6fe 200
009b03aa
HR
201 if (path_has_protocol(base_path)) {
202 protocol_stripped = strchr(base_path, ':');
203 if (protocol_stripped) {
204 protocol_stripped++;
0d54a6fe 205 }
009b03aa
HR
206 }
207 p = protocol_stripped ?: base_path;
0d54a6fe 208
009b03aa 209 p1 = strrchr(base_path, '/');
3b9f94e1 210#ifdef _WIN32
009b03aa
HR
211 {
212 const char *p2;
213 p2 = strrchr(base_path, '\\');
214 if (!p1 || p2 > p1) {
215 p1 = p2;
3b9f94e1 216 }
009b03aa 217 }
3b9f94e1 218#endif
009b03aa
HR
219 if (p1) {
220 p1++;
221 } else {
222 p1 = base_path;
223 }
224 if (p1 > p) {
225 p = p1;
3b0d4f61 226 }
009b03aa
HR
227 len = p - base_path;
228
229 result = g_malloc(len + strlen(filename) + 1);
230 memcpy(result, base_path, len);
231 strcpy(result + len, filename);
232
233 return result;
234}
235
03c320d8
HR
236/*
237 * Helper function for bdrv_parse_filename() implementations to remove optional
238 * protocol prefixes (especially "file:") from a filename and for putting the
239 * stripped filename into the options QDict if there is such a prefix.
240 */
241void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix,
242 QDict *options)
243{
244 if (strstart(filename, prefix, &filename)) {
245 /* Stripping the explicit protocol prefix may result in a protocol
246 * prefix being (wrongly) detected (if the filename contains a colon) */
247 if (path_has_protocol(filename)) {
18cf67c5 248 GString *fat_filename;
03c320d8
HR
249
250 /* This means there is some colon before the first slash; therefore,
251 * this cannot be an absolute path */
252 assert(!path_is_absolute(filename));
253
254 /* And we can thus fix the protocol detection issue by prefixing it
255 * by "./" */
18cf67c5
MA
256 fat_filename = g_string_new("./");
257 g_string_append(fat_filename, filename);
03c320d8 258
18cf67c5 259 assert(!path_has_protocol(fat_filename->str));
03c320d8 260
18cf67c5
MA
261 qdict_put(options, "filename",
262 qstring_from_gstring(fat_filename));
03c320d8
HR
263 } else {
264 /* If no protocol prefix was detected, we can use the shortened
265 * filename as-is */
266 qdict_put_str(options, "filename", filename);
267 }
268 }
269}
270
271
9c5e6594
KW
272/* Returns whether the image file is opened as read-only. Note that this can
273 * return false and writing to the image file is still not possible because the
274 * image is inactivated. */
93ed524e
JC
275bool bdrv_is_read_only(BlockDriverState *bs)
276{
384a48fb 277 IO_CODE();
975da073 278 return !(bs->open_flags & BDRV_O_RDWR);
93ed524e
JC
279}
280
54a32bfe
KW
281int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only,
282 bool ignore_allow_rdw, Error **errp)
fe5241bf 283{
384a48fb
EGE
284 IO_CODE();
285
e2b8247a
JC
286 /* Do not set read_only if copy_on_read is enabled */
287 if (bs->copy_on_read && read_only) {
288 error_setg(errp, "Can't set node '%s' to r/o with copy-on-read enabled",
289 bdrv_get_device_or_node_name(bs));
290 return -EINVAL;
291 }
292
d6fcdf06 293 /* Do not clear read_only if it is prohibited */
54a32bfe
KW
294 if (!read_only && !(bs->open_flags & BDRV_O_ALLOW_RDWR) &&
295 !ignore_allow_rdw)
296 {
d6fcdf06
JC
297 error_setg(errp, "Node '%s' is read only",
298 bdrv_get_device_or_node_name(bs));
299 return -EPERM;
300 }
301
45803a03
JC
302 return 0;
303}
304
eaa2410f
KW
305/*
306 * Called by a driver that can only provide a read-only image.
307 *
308 * Returns 0 if the node is already read-only or it could switch the node to
309 * read-only because BDRV_O_AUTO_RDONLY is set.
310 *
311 * Returns -EACCES if the node is read-write and BDRV_O_AUTO_RDONLY is not set
312 * or bdrv_can_set_read_only() forbids making the node read-only. If @errmsg
313 * is not NULL, it is used as the error message for the Error object.
314 */
315int bdrv_apply_auto_read_only(BlockDriverState *bs, const char *errmsg,
316 Error **errp)
45803a03
JC
317{
318 int ret = 0;
384a48fb 319 IO_CODE();
45803a03 320
eaa2410f
KW
321 if (!(bs->open_flags & BDRV_O_RDWR)) {
322 return 0;
323 }
324 if (!(bs->open_flags & BDRV_O_AUTO_RDONLY)) {
325 goto fail;
45803a03
JC
326 }
327
eaa2410f
KW
328 ret = bdrv_can_set_read_only(bs, true, false, NULL);
329 if (ret < 0) {
330 goto fail;
eeae6a59
KW
331 }
332
eaa2410f
KW
333 bs->open_flags &= ~BDRV_O_RDWR;
334
e2b8247a 335 return 0;
eaa2410f
KW
336
337fail:
338 error_setg(errp, "%s", errmsg ?: "Image is read-only");
339 return -EACCES;
fe5241bf
JC
340}
341
645ae7d8
HR
342/*
343 * If @backing is empty, this function returns NULL without setting
344 * @errp. In all other cases, NULL will only be returned with @errp
345 * set.
346 *
347 * Therefore, a return value of NULL without @errp set means that
348 * there is no backing file; if @errp is set, there is one but its
349 * absolute filename cannot be generated.
350 */
351char *bdrv_get_full_backing_filename_from_filename(const char *backed,
352 const char *backing,
353 Error **errp)
dc5a1371 354{
645ae7d8
HR
355 if (backing[0] == '\0') {
356 return NULL;
357 } else if (path_has_protocol(backing) || path_is_absolute(backing)) {
358 return g_strdup(backing);
9f07429e
HR
359 } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) {
360 error_setg(errp, "Cannot use relative backing file names for '%s'",
361 backed);
645ae7d8 362 return NULL;
dc5a1371 363 } else {
645ae7d8 364 return path_combine(backed, backing);
dc5a1371
PB
365 }
366}
367
9f4793d8
HR
368/*
369 * If @filename is empty or NULL, this function returns NULL without
370 * setting @errp. In all other cases, NULL will only be returned with
371 * @errp set.
372 */
373static char *bdrv_make_absolute_filename(BlockDriverState *relative_to,
374 const char *filename, Error **errp)
0a82855a 375{
8df68616 376 char *dir, *full_name;
9f4793d8 377
8df68616
HR
378 if (!filename || filename[0] == '\0') {
379 return NULL;
380 } else if (path_has_protocol(filename) || path_is_absolute(filename)) {
381 return g_strdup(filename);
382 }
9f07429e 383
8df68616
HR
384 dir = bdrv_dirname(relative_to, errp);
385 if (!dir) {
386 return NULL;
387 }
f30c66ba 388
8df68616
HR
389 full_name = g_strconcat(dir, filename, NULL);
390 g_free(dir);
391 return full_name;
9f4793d8
HR
392}
393
394char *bdrv_get_full_backing_filename(BlockDriverState *bs, Error **errp)
395{
f791bf7f 396 GLOBAL_STATE_CODE();
9f4793d8 397 return bdrv_make_absolute_filename(bs, bs->backing_file, errp);
0a82855a
HR
398}
399
0eb7217e
SH
400void bdrv_register(BlockDriver *bdrv)
401{
a15f08dc 402 assert(bdrv->format_name);
f791bf7f 403 GLOBAL_STATE_CODE();
8a22f02a 404 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
ea2384d3 405}
b338082b 406
e4e9986b
MA
407BlockDriverState *bdrv_new(void)
408{
409 BlockDriverState *bs;
410 int i;
411
f791bf7f
EGE
412 GLOBAL_STATE_CODE();
413
5839e53b 414 bs = g_new0(BlockDriverState, 1);
e4654d2d 415 QLIST_INIT(&bs->dirty_bitmaps);
fbe40ff7
FZ
416 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
417 QLIST_INIT(&bs->op_blockers[i]);
418 }
3783fa3d 419 qemu_co_mutex_init(&bs->reqs_lock);
2119882c 420 qemu_mutex_init(&bs->dirty_bitmap_mutex);
9fcb0251 421 bs->refcnt = 1;
dcd04228 422 bs->aio_context = qemu_get_aio_context();
d7d512f6 423
3ff2f67a
EY
424 qemu_co_queue_init(&bs->flush_queue);
425
0bc329fb
HR
426 qemu_co_mutex_init(&bs->bsc_modify_lock);
427 bs->block_status_cache = g_new0(BdrvBlockStatusCache, 1);
428
0f12264e
KW
429 for (i = 0; i < bdrv_drain_all_count; i++) {
430 bdrv_drained_begin(bs);
431 }
432
2c1d04e0
HR
433 QTAILQ_INSERT_TAIL(&all_bdrv_states, bs, bs_list);
434
b338082b
FB
435 return bs;
436}
437
88d88798 438static BlockDriver *bdrv_do_find_format(const char *format_name)
ea2384d3
FB
439{
440 BlockDriver *drv1;
88d88798 441
8a22f02a
SH
442 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
443 if (!strcmp(drv1->format_name, format_name)) {
ea2384d3 444 return drv1;
8a22f02a 445 }
ea2384d3 446 }
88d88798 447
ea2384d3
FB
448 return NULL;
449}
450
88d88798
MM
451BlockDriver *bdrv_find_format(const char *format_name)
452{
453 BlockDriver *drv1;
454 int i;
455
f791bf7f
EGE
456 GLOBAL_STATE_CODE();
457
88d88798
MM
458 drv1 = bdrv_do_find_format(format_name);
459 if (drv1) {
460 return drv1;
461 }
462
463 /* The driver isn't registered, maybe we need to load a module */
464 for (i = 0; i < (int)ARRAY_SIZE(block_driver_modules); ++i) {
465 if (!strcmp(block_driver_modules[i].format_name, format_name)) {
466 block_module_load_one(block_driver_modules[i].library_name);
467 break;
468 }
469 }
470
471 return bdrv_do_find_format(format_name);
472}
473
9ac404c5 474static int bdrv_format_is_whitelisted(const char *format_name, bool read_only)
eb852011 475{
b64ec4e4
FZ
476 static const char *whitelist_rw[] = {
477 CONFIG_BDRV_RW_WHITELIST
859aef02 478 NULL
b64ec4e4
FZ
479 };
480 static const char *whitelist_ro[] = {
481 CONFIG_BDRV_RO_WHITELIST
859aef02 482 NULL
eb852011
MA
483 };
484 const char **p;
485
b64ec4e4 486 if (!whitelist_rw[0] && !whitelist_ro[0]) {
eb852011 487 return 1; /* no whitelist, anything goes */
b64ec4e4 488 }
eb852011 489
b64ec4e4 490 for (p = whitelist_rw; *p; p++) {
9ac404c5 491 if (!strcmp(format_name, *p)) {
eb852011
MA
492 return 1;
493 }
494 }
b64ec4e4
FZ
495 if (read_only) {
496 for (p = whitelist_ro; *p; p++) {
9ac404c5 497 if (!strcmp(format_name, *p)) {
b64ec4e4
FZ
498 return 1;
499 }
500 }
501 }
eb852011
MA
502 return 0;
503}
504
9ac404c5
AS
505int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
506{
f791bf7f 507 GLOBAL_STATE_CODE();
9ac404c5
AS
508 return bdrv_format_is_whitelisted(drv->format_name, read_only);
509}
510
e6ff69bf
DB
511bool bdrv_uses_whitelist(void)
512{
513 return use_bdrv_whitelist;
514}
515
5b7e1542
ZYW
516typedef struct CreateCo {
517 BlockDriver *drv;
518 char *filename;
83d0521a 519 QemuOpts *opts;
5b7e1542 520 int ret;
cc84d90f 521 Error *err;
5b7e1542
ZYW
522} CreateCo;
523
524static void coroutine_fn bdrv_create_co_entry(void *opaque)
525{
cc84d90f
HR
526 Error *local_err = NULL;
527 int ret;
528
5b7e1542
ZYW
529 CreateCo *cco = opaque;
530 assert(cco->drv);
531
b92902df
ML
532 ret = cco->drv->bdrv_co_create_opts(cco->drv,
533 cco->filename, cco->opts, &local_err);
621ff94d 534 error_propagate(&cco->err, local_err);
cc84d90f 535 cco->ret = ret;
5b7e1542
ZYW
536}
537
0e7e1989 538int bdrv_create(BlockDriver *drv, const char* filename,
83d0521a 539 QemuOpts *opts, Error **errp)
ea2384d3 540{
5b7e1542
ZYW
541 int ret;
542
f791bf7f
EGE
543 GLOBAL_STATE_CODE();
544
5b7e1542
ZYW
545 Coroutine *co;
546 CreateCo cco = {
547 .drv = drv,
548 .filename = g_strdup(filename),
83d0521a 549 .opts = opts,
5b7e1542 550 .ret = NOT_DONE,
cc84d90f 551 .err = NULL,
5b7e1542
ZYW
552 };
553
efc75e2a 554 if (!drv->bdrv_co_create_opts) {
cc84d90f 555 error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
80168bff
LC
556 ret = -ENOTSUP;
557 goto out;
5b7e1542
ZYW
558 }
559
560 if (qemu_in_coroutine()) {
561 /* Fast-path if already in coroutine context */
562 bdrv_create_co_entry(&cco);
563 } else {
0b8b8753
PB
564 co = qemu_coroutine_create(bdrv_create_co_entry, &cco);
565 qemu_coroutine_enter(co);
5b7e1542 566 while (cco.ret == NOT_DONE) {
b47ec2c4 567 aio_poll(qemu_get_aio_context(), true);
5b7e1542
ZYW
568 }
569 }
570
571 ret = cco.ret;
cc84d90f 572 if (ret < 0) {
84d18f06 573 if (cco.err) {
cc84d90f
HR
574 error_propagate(errp, cco.err);
575 } else {
576 error_setg_errno(errp, -ret, "Could not create image");
577 }
578 }
0e7e1989 579
80168bff
LC
580out:
581 g_free(cco.filename);
5b7e1542 582 return ret;
ea2384d3
FB
583}
584
fd17146c
HR
585/**
586 * Helper function for bdrv_create_file_fallback(): Resize @blk to at
587 * least the given @minimum_size.
588 *
589 * On success, return @blk's actual length.
590 * Otherwise, return -errno.
591 */
592static int64_t create_file_fallback_truncate(BlockBackend *blk,
593 int64_t minimum_size, Error **errp)
84a12e66 594{
cc84d90f 595 Error *local_err = NULL;
fd17146c 596 int64_t size;
cc84d90f 597 int ret;
84a12e66 598
8c6242b6
KW
599 ret = blk_truncate(blk, minimum_size, false, PREALLOC_MODE_OFF, 0,
600 &local_err);
fd17146c
HR
601 if (ret < 0 && ret != -ENOTSUP) {
602 error_propagate(errp, local_err);
603 return ret;
604 }
605
606 size = blk_getlength(blk);
607 if (size < 0) {
608 error_free(local_err);
609 error_setg_errno(errp, -size,
610 "Failed to inquire the new image file's length");
611 return size;
612 }
613
614 if (size < minimum_size) {
615 /* Need to grow the image, but we failed to do that */
616 error_propagate(errp, local_err);
617 return -ENOTSUP;
618 }
619
620 error_free(local_err);
621 local_err = NULL;
622
623 return size;
624}
625
626/**
627 * Helper function for bdrv_create_file_fallback(): Zero the first
628 * sector to remove any potentially pre-existing image header.
629 */
630static int create_file_fallback_zero_first_sector(BlockBackend *blk,
631 int64_t current_size,
632 Error **errp)
633{
634 int64_t bytes_to_clear;
635 int ret;
636
637 bytes_to_clear = MIN(current_size, BDRV_SECTOR_SIZE);
638 if (bytes_to_clear) {
639 ret = blk_pwrite_zeroes(blk, 0, bytes_to_clear, BDRV_REQ_MAY_UNMAP);
640 if (ret < 0) {
641 error_setg_errno(errp, -ret,
642 "Failed to clear the new image's first sector");
643 return ret;
644 }
645 }
646
647 return 0;
648}
649
5a5e7f8c
ML
650/**
651 * Simple implementation of bdrv_co_create_opts for protocol drivers
652 * which only support creation via opening a file
653 * (usually existing raw storage device)
654 */
655int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv,
656 const char *filename,
657 QemuOpts *opts,
658 Error **errp)
fd17146c
HR
659{
660 BlockBackend *blk;
eeea1faa 661 QDict *options;
fd17146c
HR
662 int64_t size = 0;
663 char *buf = NULL;
664 PreallocMode prealloc;
665 Error *local_err = NULL;
666 int ret;
667
b4ad82aa
EGE
668 GLOBAL_STATE_CODE();
669
fd17146c
HR
670 size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0);
671 buf = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
672 prealloc = qapi_enum_parse(&PreallocMode_lookup, buf,
673 PREALLOC_MODE_OFF, &local_err);
674 g_free(buf);
675 if (local_err) {
676 error_propagate(errp, local_err);
677 return -EINVAL;
678 }
679
680 if (prealloc != PREALLOC_MODE_OFF) {
681 error_setg(errp, "Unsupported preallocation mode '%s'",
682 PreallocMode_str(prealloc));
683 return -ENOTSUP;
684 }
685
eeea1faa 686 options = qdict_new();
fd17146c
HR
687 qdict_put_str(options, "driver", drv->format_name);
688
689 blk = blk_new_open(filename, NULL, options,
690 BDRV_O_RDWR | BDRV_O_RESIZE, errp);
691 if (!blk) {
692 error_prepend(errp, "Protocol driver '%s' does not support image "
693 "creation, and opening the image failed: ",
694 drv->format_name);
695 return -EINVAL;
696 }
697
698 size = create_file_fallback_truncate(blk, size, errp);
699 if (size < 0) {
700 ret = size;
701 goto out;
702 }
703
704 ret = create_file_fallback_zero_first_sector(blk, size, errp);
705 if (ret < 0) {
706 goto out;
707 }
708
709 ret = 0;
710out:
711 blk_unref(blk);
712 return ret;
713}
714
715int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
716{
729222af 717 QemuOpts *protocol_opts;
fd17146c 718 BlockDriver *drv;
729222af
SG
719 QDict *qdict;
720 int ret;
fd17146c 721
f791bf7f
EGE
722 GLOBAL_STATE_CODE();
723
b65a5e12 724 drv = bdrv_find_protocol(filename, true, errp);
84a12e66 725 if (drv == NULL) {
16905d71 726 return -ENOENT;
84a12e66
CH
727 }
728
729222af
SG
729 if (!drv->create_opts) {
730 error_setg(errp, "Driver '%s' does not support image creation",
731 drv->format_name);
732 return -ENOTSUP;
733 }
734
735 /*
736 * 'opts' contains a QemuOptsList with a combination of format and protocol
737 * default values.
738 *
739 * The format properly removes its options, but the default values remain
740 * in 'opts->list'. So if the protocol has options with the same name
741 * (e.g. rbd has 'cluster_size' as qcow2), it will see the default values
742 * of the format, since for overlapping options, the format wins.
743 *
744 * To avoid this issue, lets convert QemuOpts to QDict, in this way we take
745 * only the set options, and then convert it back to QemuOpts, using the
746 * create_opts of the protocol. So the new QemuOpts, will contain only the
747 * protocol defaults.
748 */
749 qdict = qemu_opts_to_qdict(opts, NULL);
750 protocol_opts = qemu_opts_from_qdict(drv->create_opts, qdict, errp);
751 if (protocol_opts == NULL) {
752 ret = -EINVAL;
753 goto out;
754 }
755
756 ret = bdrv_create(drv, filename, protocol_opts, errp);
757out:
758 qemu_opts_del(protocol_opts);
759 qobject_unref(qdict);
760 return ret;
84a12e66
CH
761}
762
e1d7f8bb
DHB
763int coroutine_fn bdrv_co_delete_file(BlockDriverState *bs, Error **errp)
764{
765 Error *local_err = NULL;
766 int ret;
767
384a48fb 768 IO_CODE();
e1d7f8bb
DHB
769 assert(bs != NULL);
770
771 if (!bs->drv) {
772 error_setg(errp, "Block node '%s' is not opened", bs->filename);
773 return -ENOMEDIUM;
774 }
775
776 if (!bs->drv->bdrv_co_delete_file) {
777 error_setg(errp, "Driver '%s' does not support image deletion",
778 bs->drv->format_name);
779 return -ENOTSUP;
780 }
781
782 ret = bs->drv->bdrv_co_delete_file(bs, &local_err);
783 if (ret < 0) {
784 error_propagate(errp, local_err);
785 }
786
787 return ret;
788}
789
a890f08e
ML
790void coroutine_fn bdrv_co_delete_file_noerr(BlockDriverState *bs)
791{
792 Error *local_err = NULL;
793 int ret;
384a48fb 794 IO_CODE();
a890f08e
ML
795
796 if (!bs) {
797 return;
798 }
799
800 ret = bdrv_co_delete_file(bs, &local_err);
801 /*
802 * ENOTSUP will happen if the block driver doesn't support
803 * the 'bdrv_co_delete_file' interface. This is a predictable
804 * scenario and shouldn't be reported back to the user.
805 */
806 if (ret == -ENOTSUP) {
807 error_free(local_err);
808 } else if (ret < 0) {
809 error_report_err(local_err);
810 }
811}
812
892b7de8
ET
813/**
814 * Try to get @bs's logical and physical block size.
815 * On success, store them in @bsz struct and return 0.
816 * On failure return -errno.
817 * @bs must not be empty.
818 */
819int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
820{
821 BlockDriver *drv = bs->drv;
93393e69 822 BlockDriverState *filtered = bdrv_filter_bs(bs);
f791bf7f 823 GLOBAL_STATE_CODE();
892b7de8
ET
824
825 if (drv && drv->bdrv_probe_blocksizes) {
826 return drv->bdrv_probe_blocksizes(bs, bsz);
93393e69
HR
827 } else if (filtered) {
828 return bdrv_probe_blocksizes(filtered, bsz);
892b7de8
ET
829 }
830
831 return -ENOTSUP;
832}
833
834/**
835 * Try to get @bs's geometry (cyls, heads, sectors).
836 * On success, store them in @geo struct and return 0.
837 * On failure return -errno.
838 * @bs must not be empty.
839 */
840int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
841{
842 BlockDriver *drv = bs->drv;
93393e69 843 BlockDriverState *filtered = bdrv_filter_bs(bs);
f791bf7f 844 GLOBAL_STATE_CODE();
892b7de8
ET
845
846 if (drv && drv->bdrv_probe_geometry) {
847 return drv->bdrv_probe_geometry(bs, geo);
93393e69
HR
848 } else if (filtered) {
849 return bdrv_probe_geometry(filtered, geo);
892b7de8
ET
850 }
851
852 return -ENOTSUP;
853}
854
eba25057
JM
855/*
856 * Create a uniquely-named empty temporary file.
857 * Return 0 upon success, otherwise a negative errno value.
858 */
859int get_tmp_filename(char *filename, int size)
d5249393 860{
eba25057 861#ifdef _WIN32
3b9f94e1 862 char temp_dir[MAX_PATH];
eba25057
JM
863 /* GetTempFileName requires that its output buffer (4th param)
864 have length MAX_PATH or greater. */
865 assert(size >= MAX_PATH);
866 return (GetTempPath(MAX_PATH, temp_dir)
867 && GetTempFileName(temp_dir, "qem", 0, filename)
868 ? 0 : -GetLastError());
d5249393 869#else
67b915a5 870 int fd;
7ccfb2eb 871 const char *tmpdir;
0badc1ee 872 tmpdir = getenv("TMPDIR");
69bef793
AS
873 if (!tmpdir) {
874 tmpdir = "/var/tmp";
875 }
eba25057
JM
876 if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
877 return -EOVERFLOW;
878 }
ea2384d3 879 fd = mkstemp(filename);
fe235a06
DH
880 if (fd < 0) {
881 return -errno;
882 }
883 if (close(fd) != 0) {
884 unlink(filename);
eba25057
JM
885 return -errno;
886 }
887 return 0;
d5249393 888#endif
eba25057 889}
fc01f7e7 890
84a12e66
CH
891/*
892 * Detect host devices. By convention, /dev/cdrom[N] is always
893 * recognized as a host CDROM.
894 */
895static BlockDriver *find_hdev_driver(const char *filename)
896{
897 int score_max = 0, score;
898 BlockDriver *drv = NULL, *d;
899
900 QLIST_FOREACH(d, &bdrv_drivers, list) {
901 if (d->bdrv_probe_device) {
902 score = d->bdrv_probe_device(filename);
903 if (score > score_max) {
904 score_max = score;
905 drv = d;
906 }
907 }
908 }
909
910 return drv;
911}
912
88d88798
MM
913static BlockDriver *bdrv_do_find_protocol(const char *protocol)
914{
915 BlockDriver *drv1;
916
917 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
918 if (drv1->protocol_name && !strcmp(drv1->protocol_name, protocol)) {
919 return drv1;
920 }
921 }
922
923 return NULL;
924}
925
98289620 926BlockDriver *bdrv_find_protocol(const char *filename,
b65a5e12
HR
927 bool allow_protocol_prefix,
928 Error **errp)
83f64091
FB
929{
930 BlockDriver *drv1;
931 char protocol[128];
1cec71e3 932 int len;
83f64091 933 const char *p;
88d88798 934 int i;
19cb3738 935
f791bf7f 936 GLOBAL_STATE_CODE();
66f82cee
KW
937 /* TODO Drivers without bdrv_file_open must be specified explicitly */
938
39508e7a
CH
939 /*
940 * XXX(hch): we really should not let host device detection
941 * override an explicit protocol specification, but moving this
942 * later breaks access to device names with colons in them.
943 * Thanks to the brain-dead persistent naming schemes on udev-
944 * based Linux systems those actually are quite common.
945 */
946 drv1 = find_hdev_driver(filename);
947 if (drv1) {
948 return drv1;
949 }
950
98289620 951 if (!path_has_protocol(filename) || !allow_protocol_prefix) {
ef810437 952 return &bdrv_file;
84a12e66 953 }
98289620 954
9e0b22f4
SH
955 p = strchr(filename, ':');
956 assert(p != NULL);
1cec71e3
AL
957 len = p - filename;
958 if (len > sizeof(protocol) - 1)
959 len = sizeof(protocol) - 1;
960 memcpy(protocol, filename, len);
961 protocol[len] = '\0';
88d88798
MM
962
963 drv1 = bdrv_do_find_protocol(protocol);
964 if (drv1) {
965 return drv1;
966 }
967
968 for (i = 0; i < (int)ARRAY_SIZE(block_driver_modules); ++i) {
969 if (block_driver_modules[i].protocol_name &&
970 !strcmp(block_driver_modules[i].protocol_name, protocol)) {
971 block_module_load_one(block_driver_modules[i].library_name);
972 break;
8a22f02a 973 }
83f64091 974 }
b65a5e12 975
88d88798
MM
976 drv1 = bdrv_do_find_protocol(protocol);
977 if (!drv1) {
978 error_setg(errp, "Unknown protocol '%s'", protocol);
979 }
980 return drv1;
83f64091
FB
981}
982
c6684249
MA
983/*
984 * Guess image format by probing its contents.
985 * This is not a good idea when your image is raw (CVE-2008-2004), but
986 * we do it anyway for backward compatibility.
987 *
988 * @buf contains the image's first @buf_size bytes.
7cddd372
KW
989 * @buf_size is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE,
990 * but can be smaller if the image file is smaller)
c6684249
MA
991 * @filename is its filename.
992 *
993 * For all block drivers, call the bdrv_probe() method to get its
994 * probing score.
995 * Return the first block driver with the highest probing score.
996 */
38f3ef57
KW
997BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
998 const char *filename)
c6684249
MA
999{
1000 int score_max = 0, score;
1001 BlockDriver *drv = NULL, *d;
967d7905 1002 IO_CODE();
c6684249
MA
1003
1004 QLIST_FOREACH(d, &bdrv_drivers, list) {
1005 if (d->bdrv_probe) {
1006 score = d->bdrv_probe(buf, buf_size, filename);
1007 if (score > score_max) {
1008 score_max = score;
1009 drv = d;
1010 }
1011 }
1012 }
1013
1014 return drv;
1015}
1016
5696c6e3 1017static int find_image_format(BlockBackend *file, const char *filename,
34b5d2c6 1018 BlockDriver **pdrv, Error **errp)
f3a5d3f8 1019{
c6684249 1020 BlockDriver *drv;
7cddd372 1021 uint8_t buf[BLOCK_PROBE_BUF_SIZE];
f500a6d3 1022 int ret = 0;
f8ea0b00 1023
08a00559 1024 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
5696c6e3 1025 if (blk_is_sg(file) || !blk_is_inserted(file) || blk_getlength(file) == 0) {
ef810437 1026 *pdrv = &bdrv_raw;
c98ac35d 1027 return ret;
1a396859 1028 }
f8ea0b00 1029
5696c6e3 1030 ret = blk_pread(file, 0, buf, sizeof(buf));
83f64091 1031 if (ret < 0) {
34b5d2c6
HR
1032 error_setg_errno(errp, -ret, "Could not read image for determining its "
1033 "format");
c98ac35d
SW
1034 *pdrv = NULL;
1035 return ret;
83f64091
FB
1036 }
1037
c6684249 1038 drv = bdrv_probe_all(buf, ret, filename);
c98ac35d 1039 if (!drv) {
34b5d2c6
HR
1040 error_setg(errp, "Could not determine image format: No compatible "
1041 "driver found");
c98ac35d
SW
1042 ret = -ENOENT;
1043 }
1044 *pdrv = drv;
1045 return ret;
ea2384d3
FB
1046}
1047
51762288
SH
1048/**
1049 * Set the current 'total_sectors' value
65a9bb25 1050 * Return 0 on success, -errno on error.
51762288 1051 */
3d9f2d2a 1052int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
51762288
SH
1053{
1054 BlockDriver *drv = bs->drv;
967d7905 1055 IO_CODE();
51762288 1056
d470ad42
HR
1057 if (!drv) {
1058 return -ENOMEDIUM;
1059 }
1060
396759ad 1061 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
b192af8a 1062 if (bdrv_is_sg(bs))
396759ad
NB
1063 return 0;
1064
51762288
SH
1065 /* query actual device if possible, otherwise just trust the hint */
1066 if (drv->bdrv_getlength) {
1067 int64_t length = drv->bdrv_getlength(bs);
1068 if (length < 0) {
1069 return length;
1070 }
7e382003 1071 hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
51762288
SH
1072 }
1073
1074 bs->total_sectors = hint;
8b117001
VSO
1075
1076 if (bs->total_sectors * BDRV_SECTOR_SIZE > BDRV_MAX_LENGTH) {
1077 return -EFBIG;
1078 }
1079
51762288
SH
1080 return 0;
1081}
1082
cddff5ba
KW
1083/**
1084 * Combines a QDict of new block driver @options with any missing options taken
1085 * from @old_options, so that leaving out an option defaults to its old value.
1086 */
1087static void bdrv_join_options(BlockDriverState *bs, QDict *options,
1088 QDict *old_options)
1089{
1090 if (bs->drv && bs->drv->bdrv_join_options) {
1091 bs->drv->bdrv_join_options(options, old_options);
1092 } else {
1093 qdict_join(options, old_options, false);
1094 }
1095}
1096
543770bd
AG
1097static BlockdevDetectZeroesOptions bdrv_parse_detect_zeroes(QemuOpts *opts,
1098 int open_flags,
1099 Error **errp)
1100{
1101 Error *local_err = NULL;
1102 char *value = qemu_opt_get_del(opts, "detect-zeroes");
1103 BlockdevDetectZeroesOptions detect_zeroes =
1104 qapi_enum_parse(&BlockdevDetectZeroesOptions_lookup, value,
1105 BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF, &local_err);
1106 g_free(value);
1107 if (local_err) {
1108 error_propagate(errp, local_err);
1109 return detect_zeroes;
1110 }
1111
1112 if (detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP &&
1113 !(open_flags & BDRV_O_UNMAP))
1114 {
1115 error_setg(errp, "setting detect-zeroes to unmap is not allowed "
1116 "without setting discard operation to unmap");
1117 }
1118
1119 return detect_zeroes;
1120}
1121
f80f2673
AM
1122/**
1123 * Set open flags for aio engine
1124 *
1125 * Return 0 on success, -1 if the engine specified is invalid
1126 */
1127int bdrv_parse_aio(const char *mode, int *flags)
1128{
1129 if (!strcmp(mode, "threads")) {
1130 /* do nothing, default */
1131 } else if (!strcmp(mode, "native")) {
1132 *flags |= BDRV_O_NATIVE_AIO;
1133#ifdef CONFIG_LINUX_IO_URING
1134 } else if (!strcmp(mode, "io_uring")) {
1135 *flags |= BDRV_O_IO_URING;
1136#endif
1137 } else {
1138 return -1;
1139 }
1140
1141 return 0;
1142}
1143
9e8f1835
PB
1144/**
1145 * Set open flags for a given discard mode
1146 *
1147 * Return 0 on success, -1 if the discard mode was invalid.
1148 */
1149int bdrv_parse_discard_flags(const char *mode, int *flags)
1150{
1151 *flags &= ~BDRV_O_UNMAP;
1152
1153 if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
1154 /* do nothing */
1155 } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
1156 *flags |= BDRV_O_UNMAP;
1157 } else {
1158 return -1;
1159 }
1160
1161 return 0;
1162}
1163
c3993cdc
SH
1164/**
1165 * Set open flags for a given cache mode
1166 *
1167 * Return 0 on success, -1 if the cache mode was invalid.
1168 */
53e8ae01 1169int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough)
c3993cdc
SH
1170{
1171 *flags &= ~BDRV_O_CACHE_MASK;
1172
1173 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
53e8ae01
KW
1174 *writethrough = false;
1175 *flags |= BDRV_O_NOCACHE;
92196b2f 1176 } else if (!strcmp(mode, "directsync")) {
53e8ae01 1177 *writethrough = true;
92196b2f 1178 *flags |= BDRV_O_NOCACHE;
c3993cdc 1179 } else if (!strcmp(mode, "writeback")) {
53e8ae01 1180 *writethrough = false;
c3993cdc 1181 } else if (!strcmp(mode, "unsafe")) {
53e8ae01 1182 *writethrough = false;
c3993cdc
SH
1183 *flags |= BDRV_O_NO_FLUSH;
1184 } else if (!strcmp(mode, "writethrough")) {
53e8ae01 1185 *writethrough = true;
c3993cdc
SH
1186 } else {
1187 return -1;
1188 }
1189
1190 return 0;
1191}
1192
b5411555
KW
1193static char *bdrv_child_get_parent_desc(BdrvChild *c)
1194{
1195 BlockDriverState *parent = c->opaque;
2c0a3acb 1196 return g_strdup_printf("node '%s'", bdrv_get_node_name(parent));
b5411555
KW
1197}
1198
20018e12
KW
1199static void bdrv_child_cb_drained_begin(BdrvChild *child)
1200{
1201 BlockDriverState *bs = child->opaque;
6cd5c9d7 1202 bdrv_do_drained_begin_quiesce(bs, NULL, false);
20018e12
KW
1203}
1204
89bd0305
KW
1205static bool bdrv_child_cb_drained_poll(BdrvChild *child)
1206{
1207 BlockDriverState *bs = child->opaque;
6cd5c9d7 1208 return bdrv_drain_poll(bs, false, NULL, false);
89bd0305
KW
1209}
1210
e037c09c
HR
1211static void bdrv_child_cb_drained_end(BdrvChild *child,
1212 int *drained_end_counter)
20018e12
KW
1213{
1214 BlockDriverState *bs = child->opaque;
e037c09c 1215 bdrv_drained_end_no_poll(bs, drained_end_counter);
20018e12
KW
1216}
1217
38701b6a
KW
1218static int bdrv_child_cb_inactivate(BdrvChild *child)
1219{
1220 BlockDriverState *bs = child->opaque;
1221 assert(bs->open_flags & BDRV_O_INACTIVE);
1222 return 0;
1223}
1224
5d231849
KW
1225static bool bdrv_child_cb_can_set_aio_ctx(BdrvChild *child, AioContext *ctx,
1226 GSList **ignore, Error **errp)
1227{
1228 BlockDriverState *bs = child->opaque;
1229 return bdrv_can_set_aio_context(bs, ctx, ignore, errp);
1230}
1231
53a7d041
KW
1232static void bdrv_child_cb_set_aio_ctx(BdrvChild *child, AioContext *ctx,
1233 GSList **ignore)
1234{
1235 BlockDriverState *bs = child->opaque;
1236 return bdrv_set_aio_context_ignore(bs, ctx, ignore);
1237}
1238
b1e6fc08 1239/*
73176bee
KW
1240 * Returns the options and flags that a temporary snapshot should get, based on
1241 * the originally requested flags (the originally requested image will have
1242 * flags like a backing file)
b1e6fc08 1243 */
73176bee
KW
1244static void bdrv_temp_snapshot_options(int *child_flags, QDict *child_options,
1245 int parent_flags, QDict *parent_options)
b1e6fc08 1246{
73176bee
KW
1247 *child_flags = (parent_flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
1248
1249 /* For temporary files, unconditional cache=unsafe is fine */
73176bee
KW
1250 qdict_set_default_str(child_options, BDRV_OPT_CACHE_DIRECT, "off");
1251 qdict_set_default_str(child_options, BDRV_OPT_CACHE_NO_FLUSH, "on");
41869044 1252
3f48686f 1253 /* Copy the read-only and discard options from the parent */
f87a0e29 1254 qdict_copy_default(child_options, parent_options, BDRV_OPT_READ_ONLY);
3f48686f 1255 qdict_copy_default(child_options, parent_options, BDRV_OPT_DISCARD);
f87a0e29 1256
41869044
KW
1257 /* aio=native doesn't work for cache.direct=off, so disable it for the
1258 * temporary snapshot */
1259 *child_flags &= ~BDRV_O_NATIVE_AIO;
b1e6fc08
KW
1260}
1261
db95dbba
KW
1262static void bdrv_backing_attach(BdrvChild *c)
1263{
1264 BlockDriverState *parent = c->opaque;
1265 BlockDriverState *backing_hd = c->bs;
1266
1267 assert(!parent->backing_blocker);
1268 error_setg(&parent->backing_blocker,
1269 "node is used as backing hd of '%s'",
1270 bdrv_get_device_or_node_name(parent));
1271
f30c66ba
HR
1272 bdrv_refresh_filename(backing_hd);
1273
db95dbba 1274 parent->open_flags &= ~BDRV_O_NO_BACKING;
db95dbba
KW
1275
1276 bdrv_op_block_all(backing_hd, parent->backing_blocker);
1277 /* Otherwise we won't be able to commit or stream */
1278 bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
1279 parent->backing_blocker);
1280 bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_STREAM,
1281 parent->backing_blocker);
1282 /*
1283 * We do backup in 3 ways:
1284 * 1. drive backup
1285 * The target bs is new opened, and the source is top BDS
1286 * 2. blockdev backup
1287 * Both the source and the target are top BDSes.
1288 * 3. internal backup(used for block replication)
1289 * Both the source and the target are backing file
1290 *
1291 * In case 1 and 2, neither the source nor the target is the backing file.
1292 * In case 3, we will block the top BDS, so there is only one block job
1293 * for the top BDS and its backing chain.
1294 */
1295 bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_SOURCE,
1296 parent->backing_blocker);
1297 bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_TARGET,
1298 parent->backing_blocker);
ca2f1234 1299}
d736f119 1300
db95dbba
KW
1301static void bdrv_backing_detach(BdrvChild *c)
1302{
1303 BlockDriverState *parent = c->opaque;
1304
1305 assert(parent->backing_blocker);
1306 bdrv_op_unblock_all(c->bs, parent->backing_blocker);
1307 error_free(parent->backing_blocker);
1308 parent->backing_blocker = NULL;
48e08288 1309}
d736f119 1310
6858eba0
KW
1311static int bdrv_backing_update_filename(BdrvChild *c, BlockDriverState *base,
1312 const char *filename, Error **errp)
1313{
1314 BlockDriverState *parent = c->opaque;
e94d3dba 1315 bool read_only = bdrv_is_read_only(parent);
6858eba0
KW
1316 int ret;
1317
e94d3dba
AG
1318 if (read_only) {
1319 ret = bdrv_reopen_set_read_only(parent, false, errp);
61f09cea
KW
1320 if (ret < 0) {
1321 return ret;
1322 }
1323 }
1324
6858eba0 1325 ret = bdrv_change_backing_file(parent, filename,
e54ee1b3
EB
1326 base->drv ? base->drv->format_name : "",
1327 false);
6858eba0 1328 if (ret < 0) {
64730694 1329 error_setg_errno(errp, -ret, "Could not update backing file link");
6858eba0
KW
1330 }
1331
e94d3dba
AG
1332 if (read_only) {
1333 bdrv_reopen_set_read_only(parent, true, NULL);
61f09cea
KW
1334 }
1335
6858eba0
KW
1336 return ret;
1337}
1338
fae8bd39
HR
1339/*
1340 * Returns the options and flags that a generic child of a BDS should
1341 * get, based on the given options and flags for the parent BDS.
1342 */
00ff7ffd
HR
1343static void bdrv_inherited_options(BdrvChildRole role, bool parent_is_format,
1344 int *child_flags, QDict *child_options,
1345 int parent_flags, QDict *parent_options)
fae8bd39
HR
1346{
1347 int flags = parent_flags;
1348
1349 /*
1350 * First, decide whether to set, clear, or leave BDRV_O_PROTOCOL.
1351 * Generally, the question to answer is: Should this child be
1352 * format-probed by default?
1353 */
1354
1355 /*
1356 * Pure and non-filtered data children of non-format nodes should
1357 * be probed by default (even when the node itself has BDRV_O_PROTOCOL
1358 * set). This only affects a very limited set of drivers (namely
1359 * quorum and blkverify when this comment was written).
1360 * Force-clear BDRV_O_PROTOCOL then.
1361 */
1362 if (!parent_is_format &&
1363 (role & BDRV_CHILD_DATA) &&
1364 !(role & (BDRV_CHILD_METADATA | BDRV_CHILD_FILTERED)))
1365 {
1366 flags &= ~BDRV_O_PROTOCOL;
1367 }
1368
1369 /*
1370 * All children of format nodes (except for COW children) and all
1371 * metadata children in general should never be format-probed.
1372 * Force-set BDRV_O_PROTOCOL then.
1373 */
1374 if ((parent_is_format && !(role & BDRV_CHILD_COW)) ||
1375 (role & BDRV_CHILD_METADATA))
1376 {
1377 flags |= BDRV_O_PROTOCOL;
1378 }
1379
1380 /*
1381 * If the cache mode isn't explicitly set, inherit direct and no-flush from
1382 * the parent.
1383 */
1384 qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT);
1385 qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH);
1386 qdict_copy_default(child_options, parent_options, BDRV_OPT_FORCE_SHARE);
1387
1388 if (role & BDRV_CHILD_COW) {
1389 /* backing files are opened read-only by default */
1390 qdict_set_default_str(child_options, BDRV_OPT_READ_ONLY, "on");
1391 qdict_set_default_str(child_options, BDRV_OPT_AUTO_READ_ONLY, "off");
1392 } else {
1393 /* Inherit the read-only option from the parent if it's not set */
1394 qdict_copy_default(child_options, parent_options, BDRV_OPT_READ_ONLY);
1395 qdict_copy_default(child_options, parent_options,
1396 BDRV_OPT_AUTO_READ_ONLY);
1397 }
1398
1399 /*
1400 * bdrv_co_pdiscard() respects unmap policy for the parent, so we
1401 * can default to enable it on lower layers regardless of the
1402 * parent option.
1403 */
1404 qdict_set_default_str(child_options, BDRV_OPT_DISCARD, "unmap");
1405
1406 /* Clear flags that only apply to the top layer */
1407 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
1408
1409 if (role & BDRV_CHILD_METADATA) {
1410 flags &= ~BDRV_O_NO_IO;
1411 }
1412 if (role & BDRV_CHILD_COW) {
1413 flags &= ~BDRV_O_TEMPORARY;
1414 }
1415
1416 *child_flags = flags;
1417}
1418
ca2f1234
HR
1419static void bdrv_child_cb_attach(BdrvChild *child)
1420{
1421 BlockDriverState *bs = child->opaque;
1422
696bf4c7 1423 assert_bdrv_graph_writable(bs);
a225369b
HR
1424 QLIST_INSERT_HEAD(&bs->children, child, next);
1425
ca2f1234
HR
1426 if (child->role & BDRV_CHILD_COW) {
1427 bdrv_backing_attach(child);
1428 }
1429
1430 bdrv_apply_subtree_drain(child, bs);
1431}
1432
48e08288
HR
1433static void bdrv_child_cb_detach(BdrvChild *child)
1434{
1435 BlockDriverState *bs = child->opaque;
1436
1437 if (child->role & BDRV_CHILD_COW) {
1438 bdrv_backing_detach(child);
1439 }
1440
1441 bdrv_unapply_subtree_drain(child, bs);
a225369b 1442
696bf4c7 1443 assert_bdrv_graph_writable(bs);
a225369b 1444 QLIST_REMOVE(child, next);
48e08288
HR
1445}
1446
43483550
HR
1447static int bdrv_child_cb_update_filename(BdrvChild *c, BlockDriverState *base,
1448 const char *filename, Error **errp)
1449{
1450 if (c->role & BDRV_CHILD_COW) {
1451 return bdrv_backing_update_filename(c, base, filename, errp);
1452 }
1453 return 0;
1454}
1455
fb62b588 1456AioContext *child_of_bds_get_parent_aio_context(BdrvChild *c)
3ca1f322
VSO
1457{
1458 BlockDriverState *bs = c->opaque;
384a48fb 1459 IO_CODE();
3ca1f322
VSO
1460
1461 return bdrv_get_aio_context(bs);
1462}
1463
43483550
HR
1464const BdrvChildClass child_of_bds = {
1465 .parent_is_bds = true,
1466 .get_parent_desc = bdrv_child_get_parent_desc,
1467 .inherit_options = bdrv_inherited_options,
1468 .drained_begin = bdrv_child_cb_drained_begin,
1469 .drained_poll = bdrv_child_cb_drained_poll,
1470 .drained_end = bdrv_child_cb_drained_end,
1471 .attach = bdrv_child_cb_attach,
1472 .detach = bdrv_child_cb_detach,
1473 .inactivate = bdrv_child_cb_inactivate,
1474 .can_set_aio_ctx = bdrv_child_cb_can_set_aio_ctx,
1475 .set_aio_ctx = bdrv_child_cb_set_aio_ctx,
1476 .update_filename = bdrv_child_cb_update_filename,
fb62b588 1477 .get_parent_aio_context = child_of_bds_get_parent_aio_context,
43483550
HR
1478};
1479
3ca1f322
VSO
1480AioContext *bdrv_child_get_parent_aio_context(BdrvChild *c)
1481{
384a48fb 1482 IO_CODE();
3ca1f322
VSO
1483 return c->klass->get_parent_aio_context(c);
1484}
1485
7b272452
KW
1486static int bdrv_open_flags(BlockDriverState *bs, int flags)
1487{
61de4c68 1488 int open_flags = flags;
7b272452
KW
1489
1490 /*
1491 * Clear flags that are internal to the block layer before opening the
1492 * image.
1493 */
20cca275 1494 open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
7b272452 1495
7b272452
KW
1496 return open_flags;
1497}
1498
91a097e7
KW
1499static void update_flags_from_options(int *flags, QemuOpts *opts)
1500{
2a3d4331 1501 *flags &= ~(BDRV_O_CACHE_MASK | BDRV_O_RDWR | BDRV_O_AUTO_RDONLY);
91a097e7 1502
57f9db9a 1503 if (qemu_opt_get_bool_del(opts, BDRV_OPT_CACHE_NO_FLUSH, false)) {
91a097e7
KW
1504 *flags |= BDRV_O_NO_FLUSH;
1505 }
1506
57f9db9a 1507 if (qemu_opt_get_bool_del(opts, BDRV_OPT_CACHE_DIRECT, false)) {
91a097e7
KW
1508 *flags |= BDRV_O_NOCACHE;
1509 }
f87a0e29 1510
57f9db9a 1511 if (!qemu_opt_get_bool_del(opts, BDRV_OPT_READ_ONLY, false)) {
f87a0e29
AG
1512 *flags |= BDRV_O_RDWR;
1513 }
1514
e35bdc12
KW
1515 if (qemu_opt_get_bool_del(opts, BDRV_OPT_AUTO_READ_ONLY, false)) {
1516 *flags |= BDRV_O_AUTO_RDONLY;
1517 }
91a097e7
KW
1518}
1519
1520static void update_options_from_flags(QDict *options, int flags)
1521{
91a097e7 1522 if (!qdict_haskey(options, BDRV_OPT_CACHE_DIRECT)) {
46f5ac20 1523 qdict_put_bool(options, BDRV_OPT_CACHE_DIRECT, flags & BDRV_O_NOCACHE);
91a097e7
KW
1524 }
1525 if (!qdict_haskey(options, BDRV_OPT_CACHE_NO_FLUSH)) {
46f5ac20
EB
1526 qdict_put_bool(options, BDRV_OPT_CACHE_NO_FLUSH,
1527 flags & BDRV_O_NO_FLUSH);
91a097e7 1528 }
f87a0e29 1529 if (!qdict_haskey(options, BDRV_OPT_READ_ONLY)) {
46f5ac20 1530 qdict_put_bool(options, BDRV_OPT_READ_ONLY, !(flags & BDRV_O_RDWR));
f87a0e29 1531 }
e35bdc12
KW
1532 if (!qdict_haskey(options, BDRV_OPT_AUTO_READ_ONLY)) {
1533 qdict_put_bool(options, BDRV_OPT_AUTO_READ_ONLY,
1534 flags & BDRV_O_AUTO_RDONLY);
1535 }
91a097e7
KW
1536}
1537
636ea370
KW
1538static void bdrv_assign_node_name(BlockDriverState *bs,
1539 const char *node_name,
1540 Error **errp)
6913c0c2 1541{
15489c76 1542 char *gen_node_name = NULL;
6913c0c2 1543
15489c76
JC
1544 if (!node_name) {
1545 node_name = gen_node_name = id_generate(ID_BLOCK);
1546 } else if (!id_wellformed(node_name)) {
1547 /*
1548 * Check for empty string or invalid characters, but not if it is
1549 * generated (generated names use characters not available to the user)
1550 */
785ec4b1 1551 error_setg(errp, "Invalid node-name: '%s'", node_name);
636ea370 1552 return;
6913c0c2
BC
1553 }
1554
0c5e94ee 1555 /* takes care of avoiding namespaces collisions */
7f06d47e 1556 if (blk_by_name(node_name)) {
0c5e94ee
BC
1557 error_setg(errp, "node-name=%s is conflicting with a device id",
1558 node_name);
15489c76 1559 goto out;
0c5e94ee
BC
1560 }
1561
6913c0c2
BC
1562 /* takes care of avoiding duplicates node names */
1563 if (bdrv_find_node(node_name)) {
785ec4b1 1564 error_setg(errp, "Duplicate nodes with node-name='%s'", node_name);
15489c76 1565 goto out;
6913c0c2
BC
1566 }
1567
824808dd
KW
1568 /* Make sure that the node name isn't truncated */
1569 if (strlen(node_name) >= sizeof(bs->node_name)) {
1570 error_setg(errp, "Node name too long");
1571 goto out;
1572 }
1573
6913c0c2
BC
1574 /* copy node name into the bs and insert it into the graph list */
1575 pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
1576 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
15489c76
JC
1577out:
1578 g_free(gen_node_name);
6913c0c2
BC
1579}
1580
01a56501
KW
1581static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv,
1582 const char *node_name, QDict *options,
1583 int open_flags, Error **errp)
1584{
1585 Error *local_err = NULL;
0f12264e 1586 int i, ret;
01a56501
KW
1587
1588 bdrv_assign_node_name(bs, node_name, &local_err);
1589 if (local_err) {
1590 error_propagate(errp, local_err);
1591 return -EINVAL;
1592 }
1593
1594 bs->drv = drv;
1595 bs->opaque = g_malloc0(drv->instance_size);
1596
1597 if (drv->bdrv_file_open) {
1598 assert(!drv->bdrv_needs_filename || bs->filename[0]);
1599 ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
680c7f96 1600 } else if (drv->bdrv_open) {
01a56501 1601 ret = drv->bdrv_open(bs, options, open_flags, &local_err);
680c7f96
KW
1602 } else {
1603 ret = 0;
01a56501
KW
1604 }
1605
1606 if (ret < 0) {
1607 if (local_err) {
1608 error_propagate(errp, local_err);
1609 } else if (bs->filename[0]) {
1610 error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
1611 } else {
1612 error_setg_errno(errp, -ret, "Could not open image");
1613 }
180ca19a 1614 goto open_failed;
01a56501
KW
1615 }
1616
1617 ret = refresh_total_sectors(bs, bs->total_sectors);
1618 if (ret < 0) {
1619 error_setg_errno(errp, -ret, "Could not refresh total sector count");
180ca19a 1620 return ret;
01a56501
KW
1621 }
1622
1e4c797c 1623 bdrv_refresh_limits(bs, NULL, &local_err);
01a56501
KW
1624 if (local_err) {
1625 error_propagate(errp, local_err);
180ca19a 1626 return -EINVAL;
01a56501
KW
1627 }
1628
1629 assert(bdrv_opt_mem_align(bs) != 0);
1630 assert(bdrv_min_mem_align(bs) != 0);
1631 assert(is_power_of_2(bs->bl.request_alignment));
1632
0f12264e
KW
1633 for (i = 0; i < bs->quiesce_counter; i++) {
1634 if (drv->bdrv_co_drain_begin) {
1635 drv->bdrv_co_drain_begin(bs);
1636 }
1637 }
1638
01a56501 1639 return 0;
180ca19a
MP
1640open_failed:
1641 bs->drv = NULL;
1642 if (bs->file != NULL) {
1643 bdrv_unref_child(bs, bs->file);
1644 bs->file = NULL;
1645 }
01a56501
KW
1646 g_free(bs->opaque);
1647 bs->opaque = NULL;
01a56501
KW
1648 return ret;
1649}
1650
621d1737
VSO
1651/*
1652 * Create and open a block node.
1653 *
1654 * @options is a QDict of options to pass to the block drivers, or NULL for an
1655 * empty set of options. The reference to the QDict belongs to the block layer
1656 * after the call (even on failure), so if the caller intends to reuse the
1657 * dictionary, it needs to use qobject_ref() before calling bdrv_open.
1658 */
1659BlockDriverState *bdrv_new_open_driver_opts(BlockDriver *drv,
1660 const char *node_name,
1661 QDict *options, int flags,
1662 Error **errp)
680c7f96
KW
1663{
1664 BlockDriverState *bs;
1665 int ret;
1666
f791bf7f
EGE
1667 GLOBAL_STATE_CODE();
1668
680c7f96
KW
1669 bs = bdrv_new();
1670 bs->open_flags = flags;
621d1737
VSO
1671 bs->options = options ?: qdict_new();
1672 bs->explicit_options = qdict_clone_shallow(bs->options);
680c7f96
KW
1673 bs->opaque = NULL;
1674
1675 update_options_from_flags(bs->options, flags);
1676
1677 ret = bdrv_open_driver(bs, drv, node_name, bs->options, flags, errp);
1678 if (ret < 0) {
cb3e7f08 1679 qobject_unref(bs->explicit_options);
180ca19a 1680 bs->explicit_options = NULL;
cb3e7f08 1681 qobject_unref(bs->options);
180ca19a 1682 bs->options = NULL;
680c7f96
KW
1683 bdrv_unref(bs);
1684 return NULL;
1685 }
1686
1687 return bs;
1688}
1689
621d1737
VSO
1690/* Create and open a block node. */
1691BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name,
1692 int flags, Error **errp)
1693{
f791bf7f 1694 GLOBAL_STATE_CODE();
621d1737
VSO
1695 return bdrv_new_open_driver_opts(drv, node_name, NULL, flags, errp);
1696}
1697
c5f3014b 1698QemuOptsList bdrv_runtime_opts = {
18edf289
KW
1699 .name = "bdrv_common",
1700 .head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head),
1701 .desc = {
1702 {
1703 .name = "node-name",
1704 .type = QEMU_OPT_STRING,
1705 .help = "Node name of the block device node",
1706 },
62392ebb
KW
1707 {
1708 .name = "driver",
1709 .type = QEMU_OPT_STRING,
1710 .help = "Block driver to use for the node",
1711 },
91a097e7
KW
1712 {
1713 .name = BDRV_OPT_CACHE_DIRECT,
1714 .type = QEMU_OPT_BOOL,
1715 .help = "Bypass software writeback cache on the host",
1716 },
1717 {
1718 .name = BDRV_OPT_CACHE_NO_FLUSH,
1719 .type = QEMU_OPT_BOOL,
1720 .help = "Ignore flush requests",
1721 },
f87a0e29
AG
1722 {
1723 .name = BDRV_OPT_READ_ONLY,
1724 .type = QEMU_OPT_BOOL,
1725 .help = "Node is opened in read-only mode",
1726 },
e35bdc12
KW
1727 {
1728 .name = BDRV_OPT_AUTO_READ_ONLY,
1729 .type = QEMU_OPT_BOOL,
1730 .help = "Node can become read-only if opening read-write fails",
1731 },
692e01a2
KW
1732 {
1733 .name = "detect-zeroes",
1734 .type = QEMU_OPT_STRING,
1735 .help = "try to optimize zero writes (off, on, unmap)",
1736 },
818584a4 1737 {
415bbca8 1738 .name = BDRV_OPT_DISCARD,
818584a4
KW
1739 .type = QEMU_OPT_STRING,
1740 .help = "discard operation (ignore/off, unmap/on)",
1741 },
5a9347c6
FZ
1742 {
1743 .name = BDRV_OPT_FORCE_SHARE,
1744 .type = QEMU_OPT_BOOL,
1745 .help = "always accept other writers (default: off)",
1746 },
18edf289
KW
1747 { /* end of list */ }
1748 },
1749};
1750
5a5e7f8c
ML
1751QemuOptsList bdrv_create_opts_simple = {
1752 .name = "simple-create-opts",
1753 .head = QTAILQ_HEAD_INITIALIZER(bdrv_create_opts_simple.head),
fd17146c
HR
1754 .desc = {
1755 {
1756 .name = BLOCK_OPT_SIZE,
1757 .type = QEMU_OPT_SIZE,
1758 .help = "Virtual disk size"
1759 },
1760 {
1761 .name = BLOCK_OPT_PREALLOC,
1762 .type = QEMU_OPT_STRING,
1763 .help = "Preallocation mode (allowed values: off)"
1764 },
1765 { /* end of list */ }
1766 }
1767};
1768
57915332
KW
1769/*
1770 * Common part for opening disk images and files
b6ad491a
KW
1771 *
1772 * Removes all processed options from *options.
57915332 1773 */
5696c6e3 1774static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file,
82dc8b41 1775 QDict *options, Error **errp)
57915332
KW
1776{
1777 int ret, open_flags;
035fccdf 1778 const char *filename;
62392ebb 1779 const char *driver_name = NULL;
6913c0c2 1780 const char *node_name = NULL;
818584a4 1781 const char *discard;
18edf289 1782 QemuOpts *opts;
62392ebb 1783 BlockDriver *drv;
34b5d2c6 1784 Error *local_err = NULL;
307261b2 1785 bool ro;
57915332 1786
6405875c 1787 assert(bs->file == NULL);
707ff828 1788 assert(options != NULL && bs->options != options);
57915332 1789
62392ebb 1790 opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
af175e85 1791 if (!qemu_opts_absorb_qdict(opts, options, errp)) {
62392ebb
KW
1792 ret = -EINVAL;
1793 goto fail_opts;
1794 }
1795
9b7e8691
AG
1796 update_flags_from_options(&bs->open_flags, opts);
1797
62392ebb
KW
1798 driver_name = qemu_opt_get(opts, "driver");
1799 drv = bdrv_find_format(driver_name);
1800 assert(drv != NULL);
1801
5a9347c6
FZ
1802 bs->force_share = qemu_opt_get_bool(opts, BDRV_OPT_FORCE_SHARE, false);
1803
1804 if (bs->force_share && (bs->open_flags & BDRV_O_RDWR)) {
1805 error_setg(errp,
1806 BDRV_OPT_FORCE_SHARE
1807 "=on can only be used with read-only images");
1808 ret = -EINVAL;
1809 goto fail_opts;
1810 }
1811
45673671 1812 if (file != NULL) {
f30c66ba 1813 bdrv_refresh_filename(blk_bs(file));
5696c6e3 1814 filename = blk_bs(file)->filename;
45673671 1815 } else {
129c7d1c
MA
1816 /*
1817 * Caution: while qdict_get_try_str() is fine, getting
1818 * non-string types would require more care. When @options
1819 * come from -blockdev or blockdev_add, its members are typed
1820 * according to the QAPI schema, but when they come from
1821 * -drive, they're all QString.
1822 */
45673671
KW
1823 filename = qdict_get_try_str(options, "filename");
1824 }
1825
4a008240 1826 if (drv->bdrv_needs_filename && (!filename || !filename[0])) {
765003db
KW
1827 error_setg(errp, "The '%s' block driver requires a file name",
1828 drv->format_name);
18edf289
KW
1829 ret = -EINVAL;
1830 goto fail_opts;
6913c0c2 1831 }
6913c0c2 1832
82dc8b41
KW
1833 trace_bdrv_open_common(bs, filename ?: "", bs->open_flags,
1834 drv->format_name);
62392ebb 1835
307261b2
VSO
1836 ro = bdrv_is_read_only(bs);
1837
1838 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, ro)) {
1839 if (!ro && bdrv_is_whitelisted(drv, true)) {
8be25de6
KW
1840 ret = bdrv_apply_auto_read_only(bs, NULL, NULL);
1841 } else {
1842 ret = -ENOTSUP;
1843 }
1844 if (ret < 0) {
1845 error_setg(errp,
307261b2 1846 !ro && bdrv_is_whitelisted(drv, true)
8be25de6
KW
1847 ? "Driver '%s' can only be used for read-only devices"
1848 : "Driver '%s' is not whitelisted",
1849 drv->format_name);
1850 goto fail_opts;
1851 }
b64ec4e4 1852 }
57915332 1853
d3faa13e 1854 /* bdrv_new() and bdrv_close() make it so */
d73415a3 1855 assert(qatomic_read(&bs->copy_on_read) == 0);
d3faa13e 1856
82dc8b41 1857 if (bs->open_flags & BDRV_O_COPY_ON_READ) {
307261b2 1858 if (!ro) {
0ebd24e0
KW
1859 bdrv_enable_copy_on_read(bs);
1860 } else {
1861 error_setg(errp, "Can't use copy-on-read on read-only device");
18edf289
KW
1862 ret = -EINVAL;
1863 goto fail_opts;
0ebd24e0 1864 }
53fec9d3
SH
1865 }
1866
415bbca8 1867 discard = qemu_opt_get(opts, BDRV_OPT_DISCARD);
818584a4
KW
1868 if (discard != NULL) {
1869 if (bdrv_parse_discard_flags(discard, &bs->open_flags) != 0) {
1870 error_setg(errp, "Invalid discard option");
1871 ret = -EINVAL;
1872 goto fail_opts;
1873 }
1874 }
1875
543770bd
AG
1876 bs->detect_zeroes =
1877 bdrv_parse_detect_zeroes(opts, bs->open_flags, &local_err);
1878 if (local_err) {
1879 error_propagate(errp, local_err);
1880 ret = -EINVAL;
1881 goto fail_opts;
692e01a2
KW
1882 }
1883
c2ad1b0c
KW
1884 if (filename != NULL) {
1885 pstrcpy(bs->filename, sizeof(bs->filename), filename);
1886 } else {
1887 bs->filename[0] = '\0';
1888 }
91af7014 1889 pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
57915332 1890
66f82cee 1891 /* Open the image, either directly or using a protocol */
82dc8b41 1892 open_flags = bdrv_open_flags(bs, bs->open_flags);
01a56501 1893 node_name = qemu_opt_get(opts, "node-name");
57915332 1894
01a56501
KW
1895 assert(!drv->bdrv_file_open || file == NULL);
1896 ret = bdrv_open_driver(bs, drv, node_name, options, open_flags, errp);
51762288 1897 if (ret < 0) {
01a56501 1898 goto fail_opts;
3baca891
KW
1899 }
1900
18edf289 1901 qemu_opts_del(opts);
57915332
KW
1902 return 0;
1903
18edf289
KW
1904fail_opts:
1905 qemu_opts_del(opts);
57915332
KW
1906 return ret;
1907}
1908
5e5c4f63
KW
1909static QDict *parse_json_filename(const char *filename, Error **errp)
1910{
1911 QObject *options_obj;
1912 QDict *options;
1913 int ret;
1914
1915 ret = strstart(filename, "json:", &filename);
1916 assert(ret);
1917
5577fff7 1918 options_obj = qobject_from_json(filename, errp);
5e5c4f63 1919 if (!options_obj) {
5577fff7 1920 error_prepend(errp, "Could not parse the JSON options: ");
5e5c4f63
KW
1921 return NULL;
1922 }
1923
7dc847eb 1924 options = qobject_to(QDict, options_obj);
ca6b6e1e 1925 if (!options) {
cb3e7f08 1926 qobject_unref(options_obj);
5e5c4f63
KW
1927 error_setg(errp, "Invalid JSON object given");
1928 return NULL;
1929 }
1930
5e5c4f63
KW
1931 qdict_flatten(options);
1932
1933 return options;
1934}
1935
de3b53f0
KW
1936static void parse_json_protocol(QDict *options, const char **pfilename,
1937 Error **errp)
1938{
1939 QDict *json_options;
1940 Error *local_err = NULL;
1941
1942 /* Parse json: pseudo-protocol */
1943 if (!*pfilename || !g_str_has_prefix(*pfilename, "json:")) {
1944 return;
1945 }
1946
1947 json_options = parse_json_filename(*pfilename, &local_err);
1948 if (local_err) {
1949 error_propagate(errp, local_err);
1950 return;
1951 }
1952
1953 /* Options given in the filename have lower priority than options
1954 * specified directly */
1955 qdict_join(options, json_options, false);
cb3e7f08 1956 qobject_unref(json_options);
de3b53f0
KW
1957 *pfilename = NULL;
1958}
1959
b6ce07aa 1960/*
f54120ff
KW
1961 * Fills in default options for opening images and converts the legacy
1962 * filename/flags pair to option QDict entries.
53a29513
HR
1963 * The BDRV_O_PROTOCOL flag in *flags will be set or cleared accordingly if a
1964 * block driver has been specified explicitly.
b6ce07aa 1965 */
de3b53f0 1966static int bdrv_fill_options(QDict **options, const char *filename,
053e1578 1967 int *flags, Error **errp)
ea2384d3 1968{
c2ad1b0c 1969 const char *drvname;
53a29513 1970 bool protocol = *flags & BDRV_O_PROTOCOL;
e3fa4bfa 1971 bool parse_filename = false;
053e1578 1972 BlockDriver *drv = NULL;
34b5d2c6 1973 Error *local_err = NULL;
83f64091 1974
129c7d1c
MA
1975 /*
1976 * Caution: while qdict_get_try_str() is fine, getting non-string
1977 * types would require more care. When @options come from
1978 * -blockdev or blockdev_add, its members are typed according to
1979 * the QAPI schema, but when they come from -drive, they're all
1980 * QString.
1981 */
53a29513 1982 drvname = qdict_get_try_str(*options, "driver");
053e1578
HR
1983 if (drvname) {
1984 drv = bdrv_find_format(drvname);
1985 if (!drv) {
1986 error_setg(errp, "Unknown driver '%s'", drvname);
1987 return -ENOENT;
1988 }
1989 /* If the user has explicitly specified the driver, this choice should
1990 * override the BDRV_O_PROTOCOL flag */
1991 protocol = drv->bdrv_file_open;
53a29513
HR
1992 }
1993
1994 if (protocol) {
1995 *flags |= BDRV_O_PROTOCOL;
1996 } else {
1997 *flags &= ~BDRV_O_PROTOCOL;
1998 }
1999
91a097e7
KW
2000 /* Translate cache options from flags into options */
2001 update_options_from_flags(*options, *flags);
2002
035fccdf 2003 /* Fetch the file name from the options QDict if necessary */
17b005f1 2004 if (protocol && filename) {
f54120ff 2005 if (!qdict_haskey(*options, "filename")) {
46f5ac20 2006 qdict_put_str(*options, "filename", filename);
f54120ff
KW
2007 parse_filename = true;
2008 } else {
2009 error_setg(errp, "Can't specify 'file' and 'filename' options at "
2010 "the same time");
2011 return -EINVAL;
2012 }
035fccdf
KW
2013 }
2014
c2ad1b0c 2015 /* Find the right block driver */
129c7d1c 2016 /* See cautionary note on accessing @options above */
f54120ff 2017 filename = qdict_get_try_str(*options, "filename");
f54120ff 2018
053e1578
HR
2019 if (!drvname && protocol) {
2020 if (filename) {
2021 drv = bdrv_find_protocol(filename, parse_filename, errp);
17b005f1 2022 if (!drv) {
053e1578 2023 return -EINVAL;
17b005f1 2024 }
053e1578
HR
2025
2026 drvname = drv->format_name;
46f5ac20 2027 qdict_put_str(*options, "driver", drvname);
053e1578
HR
2028 } else {
2029 error_setg(errp, "Must specify either driver or file");
2030 return -EINVAL;
98289620 2031 }
c2ad1b0c
KW
2032 }
2033
17b005f1 2034 assert(drv || !protocol);
c2ad1b0c 2035
f54120ff 2036 /* Driver-specific filename parsing */
17b005f1 2037 if (drv && drv->bdrv_parse_filename && parse_filename) {
5acd9d81 2038 drv->bdrv_parse_filename(filename, *options, &local_err);
84d18f06 2039 if (local_err) {
34b5d2c6 2040 error_propagate(errp, local_err);
f54120ff 2041 return -EINVAL;
6963a30d 2042 }
cd5d031e
HR
2043
2044 if (!drv->bdrv_needs_filename) {
2045 qdict_del(*options, "filename");
cd5d031e 2046 }
6963a30d
KW
2047 }
2048
f54120ff
KW
2049 return 0;
2050}
2051
148eb13c
KW
2052typedef struct BlockReopenQueueEntry {
2053 bool prepared;
69b736e7 2054 bool perms_checked;
148eb13c 2055 BDRVReopenState state;
859443b0 2056 QTAILQ_ENTRY(BlockReopenQueueEntry) entry;
148eb13c
KW
2057} BlockReopenQueueEntry;
2058
2059/*
2060 * Return the flags that @bs will have after the reopens in @q have
2061 * successfully completed. If @q is NULL (or @bs is not contained in @q),
2062 * return the current flags.
2063 */
2064static int bdrv_reopen_get_flags(BlockReopenQueue *q, BlockDriverState *bs)
2065{
2066 BlockReopenQueueEntry *entry;
2067
2068 if (q != NULL) {
859443b0 2069 QTAILQ_FOREACH(entry, q, entry) {
148eb13c
KW
2070 if (entry->state.bs == bs) {
2071 return entry->state.flags;
2072 }
2073 }
2074 }
2075
2076 return bs->open_flags;
2077}
2078
2079/* Returns whether the image file can be written to after the reopen queue @q
2080 * has been successfully applied, or right now if @q is NULL. */
cc022140
HR
2081static bool bdrv_is_writable_after_reopen(BlockDriverState *bs,
2082 BlockReopenQueue *q)
148eb13c
KW
2083{
2084 int flags = bdrv_reopen_get_flags(q, bs);
2085
2086 return (flags & (BDRV_O_RDWR | BDRV_O_INACTIVE)) == BDRV_O_RDWR;
2087}
2088
cc022140
HR
2089/*
2090 * Return whether the BDS can be written to. This is not necessarily
2091 * the same as !bdrv_is_read_only(bs), as inactivated images may not
2092 * be written to but do not count as read-only images.
2093 */
2094bool bdrv_is_writable(BlockDriverState *bs)
2095{
384a48fb 2096 IO_CODE();
cc022140
HR
2097 return bdrv_is_writable_after_reopen(bs, NULL);
2098}
2099
3bf416ba
VSO
2100static char *bdrv_child_user_desc(BdrvChild *c)
2101{
da261b69 2102 return c->klass->get_parent_desc(c);
3bf416ba
VSO
2103}
2104
30ebb9aa
VSO
2105/*
2106 * Check that @a allows everything that @b needs. @a and @b must reference same
2107 * child node.
2108 */
3bf416ba
VSO
2109static bool bdrv_a_allow_b(BdrvChild *a, BdrvChild *b, Error **errp)
2110{
30ebb9aa
VSO
2111 const char *child_bs_name;
2112 g_autofree char *a_user = NULL;
2113 g_autofree char *b_user = NULL;
2114 g_autofree char *perms = NULL;
2115
2116 assert(a->bs);
2117 assert(a->bs == b->bs);
862fded9 2118 GLOBAL_STATE_CODE();
3bf416ba
VSO
2119
2120 if ((b->perm & a->shared_perm) == b->perm) {
2121 return true;
2122 }
2123
30ebb9aa
VSO
2124 child_bs_name = bdrv_get_node_name(b->bs);
2125 a_user = bdrv_child_user_desc(a);
2126 b_user = bdrv_child_user_desc(b);
2127 perms = bdrv_perm_names(b->perm & ~a->shared_perm);
2128
2129 error_setg(errp, "Permission conflict on node '%s': permissions '%s' are "
2130 "both required by %s (uses node '%s' as '%s' child) and "
2131 "unshared by %s (uses node '%s' as '%s' child).",
2132 child_bs_name, perms,
2133 b_user, child_bs_name, b->name,
2134 a_user, child_bs_name, a->name);
3bf416ba
VSO
2135
2136 return false;
2137}
2138
9397c14f 2139static bool bdrv_parent_perms_conflict(BlockDriverState *bs, Error **errp)
3bf416ba
VSO
2140{
2141 BdrvChild *a, *b;
862fded9 2142 GLOBAL_STATE_CODE();
3bf416ba
VSO
2143
2144 /*
2145 * During the loop we'll look at each pair twice. That's correct because
2146 * bdrv_a_allow_b() is asymmetric and we should check each pair in both
2147 * directions.
2148 */
2149 QLIST_FOREACH(a, &bs->parents, next_parent) {
2150 QLIST_FOREACH(b, &bs->parents, next_parent) {
9397c14f 2151 if (a == b) {
3bf416ba
VSO
2152 continue;
2153 }
2154
2155 if (!bdrv_a_allow_b(a, b, errp)) {
2156 return true;
2157 }
2158 }
2159 }
2160
2161 return false;
2162}
2163
ffd1a5a2 2164static void bdrv_child_perm(BlockDriverState *bs, BlockDriverState *child_bs,
e5d8a406
HR
2165 BdrvChild *c, BdrvChildRole role,
2166 BlockReopenQueue *reopen_queue,
ffd1a5a2
FZ
2167 uint64_t parent_perm, uint64_t parent_shared,
2168 uint64_t *nperm, uint64_t *nshared)
2169{
0b3ca76e 2170 assert(bs->drv && bs->drv->bdrv_child_perm);
e5d8a406 2171 bs->drv->bdrv_child_perm(bs, c, role, reopen_queue,
0b3ca76e
AG
2172 parent_perm, parent_shared,
2173 nperm, nshared);
e0995dc3 2174 /* TODO Take force_share from reopen_queue */
ffd1a5a2
FZ
2175 if (child_bs && child_bs->force_share) {
2176 *nshared = BLK_PERM_ALL;
2177 }
2178}
2179
bd57f8f7
VSO
2180/*
2181 * Adds the whole subtree of @bs (including @bs itself) to the @list (except for
2182 * nodes that are already in the @list, of course) so that final list is
2183 * topologically sorted. Return the result (GSList @list object is updated, so
2184 * don't use old reference after function call).
2185 *
2186 * On function start @list must be already topologically sorted and for any node
2187 * in the @list the whole subtree of the node must be in the @list as well. The
2188 * simplest way to satisfy this criteria: use only result of
2189 * bdrv_topological_dfs() or NULL as @list parameter.
2190 */
2191static GSList *bdrv_topological_dfs(GSList *list, GHashTable *found,
2192 BlockDriverState *bs)
2193{
2194 BdrvChild *child;
2195 g_autoptr(GHashTable) local_found = NULL;
2196
2197 if (!found) {
2198 assert(!list);
2199 found = local_found = g_hash_table_new(NULL, NULL);
2200 }
2201
2202 if (g_hash_table_contains(found, bs)) {
2203 return list;
2204 }
2205 g_hash_table_add(found, bs);
2206
2207 QLIST_FOREACH(child, &bs->children, next) {
2208 list = bdrv_topological_dfs(list, found, child->bs);
2209 }
2210
2211 return g_slist_prepend(list, bs);
2212}
2213
ecb776bd
VSO
2214typedef struct BdrvChildSetPermState {
2215 BdrvChild *child;
2216 uint64_t old_perm;
2217 uint64_t old_shared_perm;
2218} BdrvChildSetPermState;
b0defa83
VSO
2219
2220static void bdrv_child_set_perm_abort(void *opaque)
2221{
ecb776bd
VSO
2222 BdrvChildSetPermState *s = opaque;
2223
862fded9
EGE
2224 GLOBAL_STATE_CODE();
2225
ecb776bd
VSO
2226 s->child->perm = s->old_perm;
2227 s->child->shared_perm = s->old_shared_perm;
b0defa83
VSO
2228}
2229
2230static TransactionActionDrv bdrv_child_set_pem_drv = {
2231 .abort = bdrv_child_set_perm_abort,
ecb776bd 2232 .clean = g_free,
b0defa83
VSO
2233};
2234
ecb776bd
VSO
2235static void bdrv_child_set_perm(BdrvChild *c, uint64_t perm,
2236 uint64_t shared, Transaction *tran)
b0defa83 2237{
ecb776bd 2238 BdrvChildSetPermState *s = g_new(BdrvChildSetPermState, 1);
862fded9 2239 GLOBAL_STATE_CODE();
ecb776bd
VSO
2240
2241 *s = (BdrvChildSetPermState) {
2242 .child = c,
2243 .old_perm = c->perm,
2244 .old_shared_perm = c->shared_perm,
2245 };
b0defa83
VSO
2246
2247 c->perm = perm;
2248 c->shared_perm = shared;
2249
ecb776bd 2250 tran_add(tran, &bdrv_child_set_pem_drv, s);
b0defa83
VSO
2251}
2252
2513ef59
VSO
2253static void bdrv_drv_set_perm_commit(void *opaque)
2254{
2255 BlockDriverState *bs = opaque;
2256 uint64_t cumulative_perms, cumulative_shared_perms;
2257
2258 if (bs->drv->bdrv_set_perm) {
2259 bdrv_get_cumulative_perm(bs, &cumulative_perms,
2260 &cumulative_shared_perms);
2261 bs->drv->bdrv_set_perm(bs, cumulative_perms, cumulative_shared_perms);
2262 }
2263}
2264
2265static void bdrv_drv_set_perm_abort(void *opaque)
2266{
2267 BlockDriverState *bs = opaque;
2268
2269 if (bs->drv->bdrv_abort_perm_update) {
2270 bs->drv->bdrv_abort_perm_update(bs);
2271 }
2272}
2273
2274TransactionActionDrv bdrv_drv_set_perm_drv = {
2275 .abort = bdrv_drv_set_perm_abort,
2276 .commit = bdrv_drv_set_perm_commit,
2277};
2278
2279static int bdrv_drv_set_perm(BlockDriverState *bs, uint64_t perm,
2280 uint64_t shared_perm, Transaction *tran,
2281 Error **errp)
2282{
2283 if (!bs->drv) {
2284 return 0;
2285 }
2286
2287 if (bs->drv->bdrv_check_perm) {
2288 int ret = bs->drv->bdrv_check_perm(bs, perm, shared_perm, errp);
2289 if (ret < 0) {
2290 return ret;
2291 }
2292 }
2293
2294 if (tran) {
2295 tran_add(tran, &bdrv_drv_set_perm_drv, bs);
2296 }
2297
2298 return 0;
2299}
2300
0978623e
VSO
2301typedef struct BdrvReplaceChildState {
2302 BdrvChild *child;
82b54cf5 2303 BdrvChild **childp;
0978623e 2304 BlockDriverState *old_bs;
b0a9f6fe 2305 bool free_empty_child;
0978623e
VSO
2306} BdrvReplaceChildState;
2307
2308static void bdrv_replace_child_commit(void *opaque)
2309{
2310 BdrvReplaceChildState *s = opaque;
2311
b0a9f6fe
HR
2312 if (s->free_empty_child && !s->child->bs) {
2313 bdrv_child_free(s->child);
2314 }
0978623e
VSO
2315 bdrv_unref(s->old_bs);
2316}
2317
2318static void bdrv_replace_child_abort(void *opaque)
2319{
2320 BdrvReplaceChildState *s = opaque;
2321 BlockDriverState *new_bs = s->child->bs;
2322
82b54cf5
HR
2323 /*
2324 * old_bs reference is transparently moved from @s to s->child.
2325 *
2326 * Pass &s->child here instead of s->childp, because:
2327 * (1) s->old_bs must be non-NULL, so bdrv_replace_child_noperm() will not
2328 * modify the BdrvChild * pointer we indirectly pass to it, i.e. it
2329 * will not modify s->child. From that perspective, it does not matter
2330 * whether we pass s->childp or &s->child.
82b54cf5
HR
2331 * (2) If new_bs is not NULL, s->childp will be NULL. We then cannot use
2332 * it here.
2333 * (3) If new_bs is NULL, *s->childp will have been NULLed by
2334 * bdrv_replace_child_tran()'s bdrv_replace_child_noperm() call, and we
2335 * must not pass a NULL *s->childp here.
82b54cf5
HR
2336 *
2337 * So whether new_bs was NULL or not, we cannot pass s->childp here; and in
2338 * any case, there is no reason to pass it anyway.
2339 */
b0a9f6fe
HR
2340 bdrv_replace_child_noperm(&s->child, s->old_bs, true);
2341 /*
2342 * The child was pre-existing, so s->old_bs must be non-NULL, and
2343 * s->child thus must not have been freed
2344 */
2345 assert(s->child != NULL);
2346 if (!new_bs) {
2347 /* As described above, *s->childp was cleared, so restore it */
2348 assert(s->childp != NULL);
2349 *s->childp = s->child;
2350 }
0978623e
VSO
2351 bdrv_unref(new_bs);
2352}
2353
2354static TransactionActionDrv bdrv_replace_child_drv = {
2355 .commit = bdrv_replace_child_commit,
2356 .abort = bdrv_replace_child_abort,
2357 .clean = g_free,
2358};
2359
2360/*
4bf021db 2361 * bdrv_replace_child_tran
0978623e
VSO
2362 *
2363 * Note: real unref of old_bs is done only on commit.
4bf021db
VSO
2364 *
2365 * The function doesn't update permissions, caller is responsible for this.
82b54cf5 2366 *
b0a9f6fe
HR
2367 * (*childp)->bs must not be NULL.
2368 *
82b54cf5
HR
2369 * Note that if new_bs == NULL, @childp is stored in a state object attached
2370 * to @tran, so that the old child can be reinstated in the abort handler.
2371 * Therefore, if @new_bs can be NULL, @childp must stay valid until the
2372 * transaction is committed or aborted.
2373 *
b0a9f6fe
HR
2374 * If @free_empty_child is true and @new_bs is NULL, the BdrvChild is
2375 * freed (on commit). @free_empty_child should only be false if the
2376 * caller will free the BDrvChild themselves (which may be important
2377 * if this is in turn called in another transactional context).
0978623e 2378 */
82b54cf5
HR
2379static void bdrv_replace_child_tran(BdrvChild **childp,
2380 BlockDriverState *new_bs,
b0a9f6fe
HR
2381 Transaction *tran,
2382 bool free_empty_child)
0978623e
VSO
2383{
2384 BdrvReplaceChildState *s = g_new(BdrvReplaceChildState, 1);
2385 *s = (BdrvReplaceChildState) {
82b54cf5
HR
2386 .child = *childp,
2387 .childp = new_bs == NULL ? childp : NULL,
2388 .old_bs = (*childp)->bs,
b0a9f6fe 2389 .free_empty_child = free_empty_child,
0978623e
VSO
2390 };
2391 tran_add(tran, &bdrv_replace_child_drv, s);
2392
b0a9f6fe
HR
2393 /* The abort handler relies on this */
2394 assert(s->old_bs != NULL);
2395
0978623e
VSO
2396 if (new_bs) {
2397 bdrv_ref(new_bs);
2398 }
b0a9f6fe
HR
2399 /*
2400 * Pass free_empty_child=false, we will free the child (if
2401 * necessary) in bdrv_replace_child_commit() (if our
2402 * @free_empty_child parameter was true).
2403 */
2404 bdrv_replace_child_noperm(childp, new_bs, false);
82b54cf5 2405 /* old_bs reference is transparently moved from *childp to @s */
0978623e
VSO
2406}
2407
33a610c3 2408/*
c20555e1
VSO
2409 * Refresh permissions in @bs subtree. The function is intended to be called
2410 * after some graph modification that was done without permission update.
33a610c3 2411 */
c20555e1
VSO
2412static int bdrv_node_refresh_perm(BlockDriverState *bs, BlockReopenQueue *q,
2413 Transaction *tran, Error **errp)
33a610c3
KW
2414{
2415 BlockDriver *drv = bs->drv;
2416 BdrvChild *c;
2417 int ret;
c20555e1 2418 uint64_t cumulative_perms, cumulative_shared_perms;
862fded9 2419 GLOBAL_STATE_CODE();
c20555e1
VSO
2420
2421 bdrv_get_cumulative_perm(bs, &cumulative_perms, &cumulative_shared_perms);
33a610c3
KW
2422
2423 /* Write permissions never work with read-only images */
2424 if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) &&
cc022140 2425 !bdrv_is_writable_after_reopen(bs, q))
33a610c3 2426 {
481e0eee
HR
2427 if (!bdrv_is_writable_after_reopen(bs, NULL)) {
2428 error_setg(errp, "Block node is read-only");
2429 } else {
c20555e1
VSO
2430 error_setg(errp, "Read-only block node '%s' cannot support "
2431 "read-write users", bdrv_get_node_name(bs));
481e0eee
HR
2432 }
2433
33a610c3
KW
2434 return -EPERM;
2435 }
2436
9c60a5d1
KW
2437 /*
2438 * Unaligned requests will automatically be aligned to bl.request_alignment
2439 * and without RESIZE we can't extend requests to write to space beyond the
2440 * end of the image, so it's required that the image size is aligned.
2441 */
2442 if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) &&
2443 !(cumulative_perms & BLK_PERM_RESIZE))
2444 {
2445 if ((bs->total_sectors * BDRV_SECTOR_SIZE) % bs->bl.request_alignment) {
2446 error_setg(errp, "Cannot get 'write' permission without 'resize': "
2447 "Image size is not a multiple of request "
2448 "alignment");
2449 return -EPERM;
2450 }
2451 }
2452
33a610c3
KW
2453 /* Check this node */
2454 if (!drv) {
2455 return 0;
2456 }
2457
b1d2bbeb 2458 ret = bdrv_drv_set_perm(bs, cumulative_perms, cumulative_shared_perms, tran,
2513ef59
VSO
2459 errp);
2460 if (ret < 0) {
2461 return ret;
33a610c3
KW
2462 }
2463
78e421c9 2464 /* Drivers that never have children can omit .bdrv_child_perm() */
33a610c3 2465 if (!drv->bdrv_child_perm) {
78e421c9 2466 assert(QLIST_EMPTY(&bs->children));
33a610c3
KW
2467 return 0;
2468 }
2469
2470 /* Check all children */
2471 QLIST_FOREACH(c, &bs->children, next) {
2472 uint64_t cur_perm, cur_shared;
9eab1544 2473
e5d8a406 2474 bdrv_child_perm(bs, c->bs, c, c->role, q,
ffd1a5a2
FZ
2475 cumulative_perms, cumulative_shared_perms,
2476 &cur_perm, &cur_shared);
ecb776bd 2477 bdrv_child_set_perm(c, cur_perm, cur_shared, tran);
bd57f8f7
VSO
2478 }
2479
2480 return 0;
2481}
3ef45e02 2482
25409807
VSO
2483static int bdrv_list_refresh_perms(GSList *list, BlockReopenQueue *q,
2484 Transaction *tran, Error **errp)
bd57f8f7
VSO
2485{
2486 int ret;
b1d2bbeb 2487 BlockDriverState *bs;
862fded9 2488 GLOBAL_STATE_CODE();
bd57f8f7 2489
b1d2bbeb
VSO
2490 for ( ; list; list = list->next) {
2491 bs = list->data;
2492
9397c14f 2493 if (bdrv_parent_perms_conflict(bs, errp)) {
b1d2bbeb 2494 return -EINVAL;
bd57f8f7
VSO
2495 }
2496
c20555e1 2497 ret = bdrv_node_refresh_perm(bs, q, tran, errp);
33a610c3
KW
2498 if (ret < 0) {
2499 return ret;
2500 }
2501 }
2502
2503 return 0;
2504}
2505
c7a0f2be
KW
2506void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm,
2507 uint64_t *shared_perm)
33a610c3
KW
2508{
2509 BdrvChild *c;
2510 uint64_t cumulative_perms = 0;
2511 uint64_t cumulative_shared_perms = BLK_PERM_ALL;
2512
b4ad82aa
EGE
2513 GLOBAL_STATE_CODE();
2514
33a610c3
KW
2515 QLIST_FOREACH(c, &bs->parents, next_parent) {
2516 cumulative_perms |= c->perm;
2517 cumulative_shared_perms &= c->shared_perm;
2518 }
2519
2520 *perm = cumulative_perms;
2521 *shared_perm = cumulative_shared_perms;
2522}
2523
5176196c 2524char *bdrv_perm_names(uint64_t perm)
d083319f
KW
2525{
2526 struct perm_name {
2527 uint64_t perm;
2528 const char *name;
2529 } permissions[] = {
2530 { BLK_PERM_CONSISTENT_READ, "consistent read" },
2531 { BLK_PERM_WRITE, "write" },
2532 { BLK_PERM_WRITE_UNCHANGED, "write unchanged" },
2533 { BLK_PERM_RESIZE, "resize" },
d083319f
KW
2534 { 0, NULL }
2535 };
2536
e2a7423a 2537 GString *result = g_string_sized_new(30);
d083319f
KW
2538 struct perm_name *p;
2539
2540 for (p = permissions; p->name; p++) {
2541 if (perm & p->perm) {
e2a7423a
AG
2542 if (result->len > 0) {
2543 g_string_append(result, ", ");
2544 }
2545 g_string_append(result, p->name);
d083319f
KW
2546 }
2547 }
2548
e2a7423a 2549 return g_string_free(result, FALSE);
d083319f
KW
2550}
2551
33a610c3 2552
071b474f 2553static int bdrv_refresh_perms(BlockDriverState *bs, Error **errp)
bb87e4d1
VSO
2554{
2555 int ret;
b1d2bbeb
VSO
2556 Transaction *tran = tran_new();
2557 g_autoptr(GSList) list = bdrv_topological_dfs(NULL, NULL, bs);
862fded9 2558 GLOBAL_STATE_CODE();
bb87e4d1 2559
b1d2bbeb
VSO
2560 ret = bdrv_list_refresh_perms(list, NULL, tran, errp);
2561 tran_finalize(tran, ret);
bb87e4d1 2562
b1d2bbeb 2563 return ret;
bb87e4d1
VSO
2564}
2565
33a610c3
KW
2566int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
2567 Error **errp)
2568{
1046779e 2569 Error *local_err = NULL;
83928dc4 2570 Transaction *tran = tran_new();
33a610c3
KW
2571 int ret;
2572
b4ad82aa
EGE
2573 GLOBAL_STATE_CODE();
2574
ecb776bd 2575 bdrv_child_set_perm(c, perm, shared, tran);
83928dc4
VSO
2576
2577 ret = bdrv_refresh_perms(c->bs, &local_err);
2578
2579 tran_finalize(tran, ret);
2580
33a610c3 2581 if (ret < 0) {
071b474f
VSO
2582 if ((perm & ~c->perm) || (c->shared_perm & ~shared)) {
2583 /* tighten permissions */
1046779e
HR
2584 error_propagate(errp, local_err);
2585 } else {
2586 /*
2587 * Our caller may intend to only loosen restrictions and
2588 * does not expect this function to fail. Errors are not
2589 * fatal in such a case, so we can just hide them from our
2590 * caller.
2591 */
2592 error_free(local_err);
2593 ret = 0;
2594 }
33a610c3
KW
2595 }
2596
83928dc4 2597 return ret;
d5e6f437
KW
2598}
2599
c1087f12
HR
2600int bdrv_child_refresh_perms(BlockDriverState *bs, BdrvChild *c, Error **errp)
2601{
2602 uint64_t parent_perms, parent_shared;
2603 uint64_t perms, shared;
2604
b4ad82aa
EGE
2605 GLOBAL_STATE_CODE();
2606
c1087f12 2607 bdrv_get_cumulative_perm(bs, &parent_perms, &parent_shared);
e5d8a406 2608 bdrv_child_perm(bs, c->bs, c, c->role, NULL,
bf8e925e 2609 parent_perms, parent_shared, &perms, &shared);
c1087f12
HR
2610
2611 return bdrv_child_try_set_perm(c, perms, shared, errp);
2612}
2613
87278af1
HR
2614/*
2615 * Default implementation for .bdrv_child_perm() for block filters:
2616 * Forward CONSISTENT_READ, WRITE, WRITE_UNCHANGED, and RESIZE to the
2617 * filtered child.
2618 */
2619static void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c,
87278af1
HR
2620 BdrvChildRole role,
2621 BlockReopenQueue *reopen_queue,
2622 uint64_t perm, uint64_t shared,
2623 uint64_t *nperm, uint64_t *nshared)
6a1b9ee1 2624{
862fded9 2625 GLOBAL_STATE_CODE();
e444fa83
KW
2626 *nperm = perm & DEFAULT_PERM_PASSTHROUGH;
2627 *nshared = (shared & DEFAULT_PERM_PASSTHROUGH) | DEFAULT_PERM_UNCHANGED;
6a1b9ee1
KW
2628}
2629
70082db4 2630static void bdrv_default_perms_for_cow(BlockDriverState *bs, BdrvChild *c,
70082db4
HR
2631 BdrvChildRole role,
2632 BlockReopenQueue *reopen_queue,
2633 uint64_t perm, uint64_t shared,
2634 uint64_t *nperm, uint64_t *nshared)
2635{
e5d8a406 2636 assert(role & BDRV_CHILD_COW);
862fded9 2637 GLOBAL_STATE_CODE();
70082db4
HR
2638
2639 /*
2640 * We want consistent read from backing files if the parent needs it.
2641 * No other operations are performed on backing files.
2642 */
2643 perm &= BLK_PERM_CONSISTENT_READ;
2644
2645 /*
2646 * If the parent can deal with changing data, we're okay with a
2647 * writable and resizable backing file.
2648 * TODO Require !(perm & BLK_PERM_CONSISTENT_READ), too?
2649 */
2650 if (shared & BLK_PERM_WRITE) {
2651 shared = BLK_PERM_WRITE | BLK_PERM_RESIZE;
2652 } else {
2653 shared = 0;
2654 }
2655
64631f36 2656 shared |= BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED;
70082db4
HR
2657
2658 if (bs->open_flags & BDRV_O_INACTIVE) {
2659 shared |= BLK_PERM_WRITE | BLK_PERM_RESIZE;
2660 }
2661
2662 *nperm = perm;
2663 *nshared = shared;
2664}
2665
6f838a4b 2666static void bdrv_default_perms_for_storage(BlockDriverState *bs, BdrvChild *c,
6f838a4b
HR
2667 BdrvChildRole role,
2668 BlockReopenQueue *reopen_queue,
2669 uint64_t perm, uint64_t shared,
2670 uint64_t *nperm, uint64_t *nshared)
2671{
2672 int flags;
2673
862fded9 2674 GLOBAL_STATE_CODE();
e5d8a406 2675 assert(role & (BDRV_CHILD_METADATA | BDRV_CHILD_DATA));
6f838a4b
HR
2676
2677 flags = bdrv_reopen_get_flags(reopen_queue, bs);
2678
2679 /*
2680 * Apart from the modifications below, the same permissions are
2681 * forwarded and left alone as for filters
2682 */
e5d8a406 2683 bdrv_filter_default_perms(bs, c, role, reopen_queue,
6f838a4b
HR
2684 perm, shared, &perm, &shared);
2685
f889054f
HR
2686 if (role & BDRV_CHILD_METADATA) {
2687 /* Format drivers may touch metadata even if the guest doesn't write */
2688 if (bdrv_is_writable_after_reopen(bs, reopen_queue)) {
2689 perm |= BLK_PERM_WRITE | BLK_PERM_RESIZE;
2690 }
2691
2692 /*
2693 * bs->file always needs to be consistent because of the
2694 * metadata. We can never allow other users to resize or write
2695 * to it.
2696 */
2697 if (!(flags & BDRV_O_NO_IO)) {
2698 perm |= BLK_PERM_CONSISTENT_READ;
2699 }
2700 shared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
6f838a4b
HR
2701 }
2702
f889054f
HR
2703 if (role & BDRV_CHILD_DATA) {
2704 /*
2705 * Technically, everything in this block is a subset of the
2706 * BDRV_CHILD_METADATA path taken above, and so this could
2707 * be an "else if" branch. However, that is not obvious, and
2708 * this function is not performance critical, therefore we let
2709 * this be an independent "if".
2710 */
2711
2712 /*
2713 * We cannot allow other users to resize the file because the
2714 * format driver might have some assumptions about the size
2715 * (e.g. because it is stored in metadata, or because the file
2716 * is split into fixed-size data files).
2717 */
2718 shared &= ~BLK_PERM_RESIZE;
2719
2720 /*
2721 * WRITE_UNCHANGED often cannot be performed as such on the
2722 * data file. For example, the qcow2 driver may still need to
2723 * write copied clusters on copy-on-read.
2724 */
2725 if (perm & BLK_PERM_WRITE_UNCHANGED) {
2726 perm |= BLK_PERM_WRITE;
2727 }
2728
2729 /*
2730 * If the data file is written to, the format driver may
2731 * expect to be able to resize it by writing beyond the EOF.
2732 */
2733 if (perm & BLK_PERM_WRITE) {
2734 perm |= BLK_PERM_RESIZE;
2735 }
6f838a4b 2736 }
6f838a4b
HR
2737
2738 if (bs->open_flags & BDRV_O_INACTIVE) {
2739 shared |= BLK_PERM_WRITE | BLK_PERM_RESIZE;
2740 }
2741
2742 *nperm = perm;
2743 *nshared = shared;
2744}
2745
2519f549 2746void bdrv_default_perms(BlockDriverState *bs, BdrvChild *c,
e5d8a406 2747 BdrvChildRole role, BlockReopenQueue *reopen_queue,
2519f549
HR
2748 uint64_t perm, uint64_t shared,
2749 uint64_t *nperm, uint64_t *nshared)
2750{
b4ad82aa 2751 GLOBAL_STATE_CODE();
2519f549
HR
2752 if (role & BDRV_CHILD_FILTERED) {
2753 assert(!(role & (BDRV_CHILD_DATA | BDRV_CHILD_METADATA |
2754 BDRV_CHILD_COW)));
e5d8a406 2755 bdrv_filter_default_perms(bs, c, role, reopen_queue,
2519f549
HR
2756 perm, shared, nperm, nshared);
2757 } else if (role & BDRV_CHILD_COW) {
2758 assert(!(role & (BDRV_CHILD_DATA | BDRV_CHILD_METADATA)));
e5d8a406 2759 bdrv_default_perms_for_cow(bs, c, role, reopen_queue,
2519f549
HR
2760 perm, shared, nperm, nshared);
2761 } else if (role & (BDRV_CHILD_METADATA | BDRV_CHILD_DATA)) {
e5d8a406 2762 bdrv_default_perms_for_storage(bs, c, role, reopen_queue,
2519f549
HR
2763 perm, shared, nperm, nshared);
2764 } else {
2765 g_assert_not_reached();
2766 }
2767}
2768
7b1d9c4d
HR
2769uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm)
2770{
2771 static const uint64_t permissions[] = {
2772 [BLOCK_PERMISSION_CONSISTENT_READ] = BLK_PERM_CONSISTENT_READ,
2773 [BLOCK_PERMISSION_WRITE] = BLK_PERM_WRITE,
2774 [BLOCK_PERMISSION_WRITE_UNCHANGED] = BLK_PERM_WRITE_UNCHANGED,
2775 [BLOCK_PERMISSION_RESIZE] = BLK_PERM_RESIZE,
7b1d9c4d
HR
2776 };
2777
2778 QEMU_BUILD_BUG_ON(ARRAY_SIZE(permissions) != BLOCK_PERMISSION__MAX);
2779 QEMU_BUILD_BUG_ON(1UL << ARRAY_SIZE(permissions) != BLK_PERM_ALL + 1);
2780
2781 assert(qapi_perm < BLOCK_PERMISSION__MAX);
2782
2783 return permissions[qapi_perm];
2784}
2785
b0a9f6fe
HR
2786/**
2787 * Replace (*childp)->bs by @new_bs.
2788 *
2789 * If @new_bs is NULL, *childp will be set to NULL, too: BDS parents
2790 * generally cannot handle a BdrvChild with .bs == NULL, so clearing
2791 * BdrvChild.bs should generally immediately be followed by the
2792 * BdrvChild pointer being cleared as well.
2793 *
2794 * If @free_empty_child is true and @new_bs is NULL, the BdrvChild is
2795 * freed. @free_empty_child should only be false if the caller will
2796 * free the BdrvChild themselves (this may be important in a
2797 * transactional context, where it may only be freed on commit).
2798 */
be64bbb0 2799static void bdrv_replace_child_noperm(BdrvChild **childp,
b0a9f6fe
HR
2800 BlockDriverState *new_bs,
2801 bool free_empty_child)
e9740bc6 2802{
be64bbb0 2803 BdrvChild *child = *childp;
e9740bc6 2804 BlockDriverState *old_bs = child->bs;
debc2927
HR
2805 int new_bs_quiesce_counter;
2806 int drain_saldo;
e9740bc6 2807
2cad1ebe 2808 assert(!child->frozen);
bfb8aa6d 2809 assert(old_bs != new_bs);
2cad1ebe 2810
bb2614e9
FZ
2811 if (old_bs && new_bs) {
2812 assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs));
2813 }
debc2927
HR
2814
2815 new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0);
2816 drain_saldo = new_bs_quiesce_counter - child->parent_quiesce_counter;
2817
2818 /*
2819 * If the new child node is drained but the old one was not, flush
2820 * all outstanding requests to the old child node.
2821 */
bd86fb99 2822 while (drain_saldo > 0 && child->klass->drained_begin) {
debc2927
HR
2823 bdrv_parent_drained_begin_single(child, true);
2824 drain_saldo--;
2825 }
2826
e9740bc6 2827 if (old_bs) {
d736f119
KW
2828 /* Detach first so that the recursive drain sections coming from @child
2829 * are already gone and we only end the drain sections that came from
2830 * elsewhere. */
bd86fb99
HR
2831 if (child->klass->detach) {
2832 child->klass->detach(child);
d736f119 2833 }
696bf4c7 2834 assert_bdrv_graph_writable(old_bs);
e9740bc6
KW
2835 QLIST_REMOVE(child, next_parent);
2836 }
36fe1331
KW
2837
2838 child->bs = new_bs;
b0a9f6fe
HR
2839 if (!new_bs) {
2840 *childp = NULL;
2841 }
36fe1331 2842
e9740bc6 2843 if (new_bs) {
696bf4c7 2844 assert_bdrv_graph_writable(new_bs);
e9740bc6 2845 QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent);
debc2927
HR
2846
2847 /*
2848 * Detaching the old node may have led to the new node's
2849 * quiesce_counter having been decreased. Not a problem, we
2850 * just need to recognize this here and then invoke
2851 * drained_end appropriately more often.
2852 */
2853 assert(new_bs->quiesce_counter <= new_bs_quiesce_counter);
2854 drain_saldo += new_bs->quiesce_counter - new_bs_quiesce_counter;
33a610c3 2855
d736f119
KW
2856 /* Attach only after starting new drained sections, so that recursive
2857 * drain sections coming from @child don't get an extra .drained_begin
2858 * callback. */
bd86fb99
HR
2859 if (child->klass->attach) {
2860 child->klass->attach(child);
8ee03995
KW
2861 }
2862 }
debc2927
HR
2863
2864 /*
2865 * If the old child node was drained but the new one is not, allow
2866 * requests to come in only after the new node has been attached.
2867 */
bd86fb99 2868 while (drain_saldo < 0 && child->klass->drained_end) {
debc2927
HR
2869 bdrv_parent_drained_end_single(child);
2870 drain_saldo++;
2871 }
b0a9f6fe
HR
2872
2873 if (free_empty_child && !child->bs) {
2874 bdrv_child_free(child);
2875 }
8ee03995 2876}
33a610c3 2877
04c9c3a5
HR
2878/**
2879 * Free the given @child.
2880 *
2881 * The child must be empty (i.e. `child->bs == NULL`) and it must be
2882 * unused (i.e. not in a children list).
2883 */
2884static void bdrv_child_free(BdrvChild *child)
df581792 2885{
548a74c0 2886 assert(!child->bs);
a225369b 2887 assert(!child->next.le_prev); /* not in children list */
04c9c3a5
HR
2888
2889 g_free(child->name);
2890 g_free(child);
548a74c0 2891}
d5e6f437 2892
548a74c0
VSO
2893typedef struct BdrvAttachChildCommonState {
2894 BdrvChild **child;
2895 AioContext *old_parent_ctx;
2896 AioContext *old_child_ctx;
2897} BdrvAttachChildCommonState;
2898
2899static void bdrv_attach_child_common_abort(void *opaque)
2900{
2901 BdrvAttachChildCommonState *s = opaque;
2902 BdrvChild *child = *s->child;
2903 BlockDriverState *bs = child->bs;
2904
b0a9f6fe
HR
2905 /*
2906 * Pass free_empty_child=false, because we still need the child
2907 * for the AioContext operations on the parent below; those
2908 * BdrvChildClass methods all work on a BdrvChild object, so we
2909 * need to keep it as an empty shell (after this function, it will
2910 * not be attached to any parent, and it will not have a .bs).
2911 */
2912 bdrv_replace_child_noperm(s->child, NULL, false);
548a74c0
VSO
2913
2914 if (bdrv_get_aio_context(bs) != s->old_child_ctx) {
2915 bdrv_try_set_aio_context(bs, s->old_child_ctx, &error_abort);
2916 }
2917
2918 if (bdrv_child_get_parent_aio_context(child) != s->old_parent_ctx) {
26518061 2919 GSList *ignore;
548a74c0 2920
26518061
HR
2921 /* No need to ignore `child`, because it has been detached already */
2922 ignore = NULL;
548a74c0
VSO
2923 child->klass->can_set_aio_ctx(child, s->old_parent_ctx, &ignore,
2924 &error_abort);
2925 g_slist_free(ignore);
548a74c0 2926
26518061
HR
2927 ignore = NULL;
2928 child->klass->set_aio_ctx(child, s->old_parent_ctx, &ignore);
548a74c0 2929 g_slist_free(ignore);
d5e6f437
KW
2930 }
2931
548a74c0 2932 bdrv_unref(bs);
04c9c3a5 2933 bdrv_child_free(child);
548a74c0
VSO
2934}
2935
2936static TransactionActionDrv bdrv_attach_child_common_drv = {
2937 .abort = bdrv_attach_child_common_abort,
2938 .clean = g_free,
2939};
2940
2941/*
2942 * Common part of attaching bdrv child to bs or to blk or to job
f8d2ad78
VSO
2943 *
2944 * Resulting new child is returned through @child.
2945 * At start *@child must be NULL.
2946 * @child is saved to a new entry of @tran, so that *@child could be reverted to
2947 * NULL on abort(). So referenced variable must live at least until transaction
2948 * end.
7ec390d5
VSO
2949 *
2950 * Function doesn't update permissions, caller is responsible for this.
548a74c0
VSO
2951 */
2952static int bdrv_attach_child_common(BlockDriverState *child_bs,
2953 const char *child_name,
2954 const BdrvChildClass *child_class,
2955 BdrvChildRole child_role,
2956 uint64_t perm, uint64_t shared_perm,
2957 void *opaque, BdrvChild **child,
2958 Transaction *tran, Error **errp)
2959{
2960 BdrvChild *new_child;
2961 AioContext *parent_ctx;
2962 AioContext *child_ctx = bdrv_get_aio_context(child_bs);
2963
2964 assert(child);
2965 assert(*child == NULL);
da261b69 2966 assert(child_class->get_parent_desc);
548a74c0
VSO
2967
2968 new_child = g_new(BdrvChild, 1);
2969 *new_child = (BdrvChild) {
d5e6f437
KW
2970 .bs = NULL,
2971 .name = g_strdup(child_name),
bd86fb99 2972 .klass = child_class,
258b7765 2973 .role = child_role,
d5e6f437
KW
2974 .perm = perm,
2975 .shared_perm = shared_perm,
2976 .opaque = opaque,
df581792
KW
2977 };
2978
548a74c0
VSO
2979 /*
2980 * If the AioContexts don't match, first try to move the subtree of
132ada80 2981 * child_bs into the AioContext of the new parent. If this doesn't work,
548a74c0
VSO
2982 * try moving the parent into the AioContext of child_bs instead.
2983 */
2984 parent_ctx = bdrv_child_get_parent_aio_context(new_child);
2985 if (child_ctx != parent_ctx) {
2986 Error *local_err = NULL;
2987 int ret = bdrv_try_set_aio_context(child_bs, parent_ctx, &local_err);
2988
bd86fb99 2989 if (ret < 0 && child_class->can_set_aio_ctx) {
548a74c0
VSO
2990 GSList *ignore = g_slist_prepend(NULL, new_child);
2991 if (child_class->can_set_aio_ctx(new_child, child_ctx, &ignore,
2992 NULL))
2993 {
132ada80
KW
2994 error_free(local_err);
2995 ret = 0;
2996 g_slist_free(ignore);
548a74c0
VSO
2997 ignore = g_slist_prepend(NULL, new_child);
2998 child_class->set_aio_ctx(new_child, child_ctx, &ignore);
132ada80
KW
2999 }
3000 g_slist_free(ignore);
3001 }
548a74c0 3002
132ada80
KW
3003 if (ret < 0) {
3004 error_propagate(errp, local_err);
04c9c3a5 3005 bdrv_child_free(new_child);
548a74c0 3006 return ret;
132ada80
KW
3007 }
3008 }
3009
548a74c0 3010 bdrv_ref(child_bs);
b0a9f6fe
HR
3011 bdrv_replace_child_noperm(&new_child, child_bs, true);
3012 /* child_bs was non-NULL, so new_child must not have been freed */
3013 assert(new_child != NULL);
548a74c0
VSO
3014
3015 *child = new_child;
b4b059f6 3016
548a74c0
VSO
3017 BdrvAttachChildCommonState *s = g_new(BdrvAttachChildCommonState, 1);
3018 *s = (BdrvAttachChildCommonState) {
3019 .child = child,
3020 .old_parent_ctx = parent_ctx,
3021 .old_child_ctx = child_ctx,
3022 };
3023 tran_add(tran, &bdrv_attach_child_common_drv, s);
3024
3025 return 0;
3026}
3027
f8d2ad78
VSO
3028/*
3029 * Variable referenced by @child must live at least until transaction end.
3030 * (see bdrv_attach_child_common() doc for details)
7ec390d5
VSO
3031 *
3032 * Function doesn't update permissions, caller is responsible for this.
f8d2ad78 3033 */
aa5a04c7
VSO
3034static int bdrv_attach_child_noperm(BlockDriverState *parent_bs,
3035 BlockDriverState *child_bs,
3036 const char *child_name,
3037 const BdrvChildClass *child_class,
3038 BdrvChildRole child_role,
3039 BdrvChild **child,
3040 Transaction *tran,
3041 Error **errp)
3042{
3043 int ret;
3044 uint64_t perm, shared_perm;
3045
3046 assert(parent_bs->drv);
3047
bfb8aa6d
KW
3048 if (bdrv_recurse_has_child(child_bs, parent_bs)) {
3049 error_setg(errp, "Making '%s' a %s child of '%s' would create a cycle",
3050 child_bs->node_name, child_name, parent_bs->node_name);
3051 return -EINVAL;
3052 }
3053
aa5a04c7
VSO
3054 bdrv_get_cumulative_perm(parent_bs, &perm, &shared_perm);
3055 bdrv_child_perm(parent_bs, child_bs, NULL, child_role, NULL,
3056 perm, shared_perm, &perm, &shared_perm);
3057
3058 ret = bdrv_attach_child_common(child_bs, child_name, child_class,
3059 child_role, perm, shared_perm, parent_bs,
3060 child, tran, errp);
3061 if (ret < 0) {
3062 return ret;
3063 }
3064
aa5a04c7
VSO
3065 return 0;
3066}
3067
be64bbb0 3068static void bdrv_detach_child(BdrvChild **childp)
548a74c0 3069{
be64bbb0 3070 BlockDriverState *old_bs = (*childp)->bs;
4954aace 3071
b0a9f6fe 3072 bdrv_replace_child_noperm(childp, NULL, true);
4954aace
VSO
3073
3074 if (old_bs) {
3075 /*
3076 * Update permissions for old node. We're just taking a parent away, so
3077 * we're loosening restrictions. Errors of permission update are not
3078 * fatal in this case, ignore them.
3079 */
3080 bdrv_refresh_perms(old_bs, NULL);
3081
3082 /*
3083 * When the parent requiring a non-default AioContext is removed, the
3084 * node moves back to the main AioContext
3085 */
3086 bdrv_try_set_aio_context(old_bs, qemu_get_aio_context(), NULL);
3087 }
548a74c0
VSO
3088}
3089
3090/*
3091 * This function steals the reference to child_bs from the caller.
3092 * That reference is later dropped by bdrv_root_unref_child().
3093 *
3094 * On failure NULL is returned, errp is set and the reference to
3095 * child_bs is also dropped.
3096 *
3097 * The caller must hold the AioContext lock @child_bs, but not that of @ctx
3098 * (unless @child_bs is already in @ctx).
3099 */
3100BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
3101 const char *child_name,
3102 const BdrvChildClass *child_class,
3103 BdrvChildRole child_role,
3104 uint64_t perm, uint64_t shared_perm,
3105 void *opaque, Error **errp)
3106{
3107 int ret;
3108 BdrvChild *child = NULL;
3109 Transaction *tran = tran_new();
3110
b4ad82aa
EGE
3111 GLOBAL_STATE_CODE();
3112
548a74c0
VSO
3113 ret = bdrv_attach_child_common(child_bs, child_name, child_class,
3114 child_role, perm, shared_perm, opaque,
3115 &child, tran, errp);
3116 if (ret < 0) {
e878bb12 3117 goto out;
548a74c0
VSO
3118 }
3119
3120 ret = bdrv_refresh_perms(child_bs, errp);
548a74c0 3121
e878bb12
KW
3122out:
3123 tran_finalize(tran, ret);
f8d2ad78
VSO
3124 /* child is unset on failure by bdrv_attach_child_common_abort() */
3125 assert((ret < 0) == !child);
3126
548a74c0 3127 bdrv_unref(child_bs);
b4b059f6 3128 return child;
df581792
KW
3129}
3130
b441dc71
AG
3131/*
3132 * This function transfers the reference to child_bs from the caller
3133 * to parent_bs. That reference is later dropped by parent_bs on
3134 * bdrv_close() or if someone calls bdrv_unref_child().
3135 *
3136 * On failure NULL is returned, errp is set and the reference to
3137 * child_bs is also dropped.
132ada80
KW
3138 *
3139 * If @parent_bs and @child_bs are in different AioContexts, the caller must
3140 * hold the AioContext lock for @child_bs, but not for @parent_bs.
b441dc71 3141 */
98292c61
WC
3142BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
3143 BlockDriverState *child_bs,
3144 const char *child_name,
bd86fb99 3145 const BdrvChildClass *child_class,
258b7765 3146 BdrvChildRole child_role,
8b2ff529 3147 Error **errp)
f21d96d0 3148{
aa5a04c7
VSO
3149 int ret;
3150 BdrvChild *child = NULL;
3151 Transaction *tran = tran_new();
f68c598b 3152
f791bf7f
EGE
3153 GLOBAL_STATE_CODE();
3154
aa5a04c7
VSO
3155 ret = bdrv_attach_child_noperm(parent_bs, child_bs, child_name, child_class,
3156 child_role, &child, tran, errp);
3157 if (ret < 0) {
3158 goto out;
3159 }
d5e6f437 3160
aa5a04c7
VSO
3161 ret = bdrv_refresh_perms(parent_bs, errp);
3162 if (ret < 0) {
3163 goto out;
d5e6f437
KW
3164 }
3165
aa5a04c7
VSO
3166out:
3167 tran_finalize(tran, ret);
f8d2ad78
VSO
3168 /* child is unset on failure by bdrv_attach_child_common_abort() */
3169 assert((ret < 0) == !child);
aa5a04c7
VSO
3170
3171 bdrv_unref(child_bs);
3172
f21d96d0
KW
3173 return child;
3174}
3175
7b99a266 3176/* Callers must ensure that child->frozen is false. */
f21d96d0 3177void bdrv_root_unref_child(BdrvChild *child)
33a60407 3178{
779020cb
KW
3179 BlockDriverState *child_bs;
3180
f791bf7f
EGE
3181 GLOBAL_STATE_CODE();
3182
f21d96d0 3183 child_bs = child->bs;
be64bbb0 3184 bdrv_detach_child(&child);
f21d96d0
KW
3185 bdrv_unref(child_bs);
3186}
3187
332b3a17
VSO
3188typedef struct BdrvSetInheritsFrom {
3189 BlockDriverState *bs;
3190 BlockDriverState *old_inherits_from;
3191} BdrvSetInheritsFrom;
3192
3193static void bdrv_set_inherits_from_abort(void *opaque)
3194{
3195 BdrvSetInheritsFrom *s = opaque;
3196
3197 s->bs->inherits_from = s->old_inherits_from;
3198}
3199
3200static TransactionActionDrv bdrv_set_inherits_from_drv = {
3201 .abort = bdrv_set_inherits_from_abort,
3202 .clean = g_free,
3203};
3204
3205/* @tran is allowed to be NULL. In this case no rollback is possible */
3206static void bdrv_set_inherits_from(BlockDriverState *bs,
3207 BlockDriverState *new_inherits_from,
3208 Transaction *tran)
3209{
3210 if (tran) {
3211 BdrvSetInheritsFrom *s = g_new(BdrvSetInheritsFrom, 1);
3212
3213 *s = (BdrvSetInheritsFrom) {
3214 .bs = bs,
3215 .old_inherits_from = bs->inherits_from,
3216 };
3217
3218 tran_add(tran, &bdrv_set_inherits_from_drv, s);
3219 }
3220
3221 bs->inherits_from = new_inherits_from;
3222}
3223
3cf746b3
HR
3224/**
3225 * Clear all inherits_from pointers from children and grandchildren of
3226 * @root that point to @root, where necessary.
332b3a17 3227 * @tran is allowed to be NULL. In this case no rollback is possible
3cf746b3 3228 */
332b3a17
VSO
3229static void bdrv_unset_inherits_from(BlockDriverState *root, BdrvChild *child,
3230 Transaction *tran)
f21d96d0 3231{
3cf746b3 3232 BdrvChild *c;
4e4bf5c4 3233
3cf746b3
HR
3234 if (child->bs->inherits_from == root) {
3235 /*
3236 * Remove inherits_from only when the last reference between root and
3237 * child->bs goes away.
3238 */
3239 QLIST_FOREACH(c, &root->children, next) {
4e4bf5c4
KW
3240 if (c != child && c->bs == child->bs) {
3241 break;
3242 }
3243 }
3244 if (c == NULL) {
332b3a17 3245 bdrv_set_inherits_from(child->bs, NULL, tran);
4e4bf5c4 3246 }
33a60407
KW
3247 }
3248
3cf746b3 3249 QLIST_FOREACH(c, &child->bs->children, next) {
332b3a17 3250 bdrv_unset_inherits_from(root, c, tran);
3cf746b3
HR
3251 }
3252}
3253
7b99a266 3254/* Callers must ensure that child->frozen is false. */
3cf746b3
HR
3255void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child)
3256{
f791bf7f 3257 GLOBAL_STATE_CODE();
3cf746b3
HR
3258 if (child == NULL) {
3259 return;
3260 }
3261
332b3a17 3262 bdrv_unset_inherits_from(parent, child, NULL);
f21d96d0 3263 bdrv_root_unref_child(child);
33a60407
KW
3264}
3265
5c8cab48
KW
3266
3267static void bdrv_parent_cb_change_media(BlockDriverState *bs, bool load)
3268{
3269 BdrvChild *c;
3270 QLIST_FOREACH(c, &bs->parents, next_parent) {
bd86fb99
HR
3271 if (c->klass->change_media) {
3272 c->klass->change_media(c, load);
5c8cab48
KW
3273 }
3274 }
3275}
3276
0065c455
AG
3277/* Return true if you can reach parent going through child->inherits_from
3278 * recursively. If parent or child are NULL, return false */
3279static bool bdrv_inherits_from_recursive(BlockDriverState *child,
3280 BlockDriverState *parent)
3281{
3282 while (child && child != parent) {
3283 child = child->inherits_from;
3284 }
3285
3286 return child != NULL;
3287}
3288
25191e5f
HR
3289/*
3290 * Return the BdrvChildRole for @bs's backing child. bs->backing is
3291 * mostly used for COW backing children (role = COW), but also for
3292 * filtered children (role = FILTERED | PRIMARY).
3293 */
3294static BdrvChildRole bdrv_backing_role(BlockDriverState *bs)
3295{
3296 if (bs->drv && bs->drv->is_filter) {
3297 return BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY;
3298 } else {
3299 return BDRV_CHILD_COW;
3300 }
3301}
3302
5db15a57 3303/*
e9238278
VSO
3304 * Sets the bs->backing or bs->file link of a BDS. A new reference is created;
3305 * callers which don't need their own reference any more must call bdrv_unref().
7ec390d5
VSO
3306 *
3307 * Function doesn't update permissions, caller is responsible for this.
5db15a57 3308 */
e9238278
VSO
3309static int bdrv_set_file_or_backing_noperm(BlockDriverState *parent_bs,
3310 BlockDriverState *child_bs,
3311 bool is_backing,
3312 Transaction *tran, Error **errp)
8d24cce1 3313{
a1e708fc 3314 int ret = 0;
e9238278
VSO
3315 bool update_inherits_from =
3316 bdrv_inherits_from_recursive(child_bs, parent_bs);
3317 BdrvChild *child = is_backing ? parent_bs->backing : parent_bs->file;
3318 BdrvChildRole role;
0065c455 3319
e9238278
VSO
3320 if (!parent_bs->drv) {
3321 /*
3322 * Node without drv is an object without a class :/. TODO: finally fix
3323 * qcow2 driver to never clear bs->drv and implement format corruption
3324 * handling in other way.
3325 */
3326 error_setg(errp, "Node corrupted");
3327 return -EINVAL;
3328 }
3329
3330 if (child && child->frozen) {
3331 error_setg(errp, "Cannot change frozen '%s' link from '%s' to '%s'",
3332 child->name, parent_bs->node_name, child->bs->node_name);
a1e708fc 3333 return -EPERM;
2cad1ebe
AG
3334 }
3335
25f78d9e
VSO
3336 if (is_backing && !parent_bs->drv->is_filter &&
3337 !parent_bs->drv->supports_backing)
3338 {
3339 error_setg(errp, "Driver '%s' of node '%s' does not support backing "
3340 "files", parent_bs->drv->format_name, parent_bs->node_name);
3341 return -EINVAL;
3342 }
3343
e9238278
VSO
3344 if (parent_bs->drv->is_filter) {
3345 role = BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY;
3346 } else if (is_backing) {
3347 role = BDRV_CHILD_COW;
3348 } else {
3349 /*
3350 * We only can use same role as it is in existing child. We don't have
3351 * infrastructure to determine role of file child in generic way
3352 */
3353 if (!child) {
3354 error_setg(errp, "Cannot set file child to format node without "
3355 "file child");
3356 return -EINVAL;
3357 }
3358 role = child->role;
826b6ca0
FZ
3359 }
3360
e9238278
VSO
3361 if (child) {
3362 bdrv_unset_inherits_from(parent_bs, child, tran);
3363 bdrv_remove_file_or_backing_child(parent_bs, child, tran);
3364 }
3365
3366 if (!child_bs) {
8d24cce1
FZ
3367 goto out;
3368 }
12fa4af6 3369
e9238278
VSO
3370 ret = bdrv_attach_child_noperm(parent_bs, child_bs,
3371 is_backing ? "backing" : "file",
3372 &child_of_bds, role,
3373 is_backing ? &parent_bs->backing :
3374 &parent_bs->file,
3375 tran, errp);
160333e1
VSO
3376 if (ret < 0) {
3377 return ret;
a1e708fc
VSO
3378 }
3379
160333e1
VSO
3380
3381 /*
e9238278 3382 * If inherits_from pointed recursively to bs then let's update it to
160333e1
VSO
3383 * point directly to bs (else it will become NULL).
3384 */
a1e708fc 3385 if (update_inherits_from) {
e9238278 3386 bdrv_set_inherits_from(child_bs, parent_bs, tran);
0065c455 3387 }
826b6ca0 3388
8d24cce1 3389out:
e9238278 3390 bdrv_refresh_limits(parent_bs, tran, NULL);
160333e1
VSO
3391
3392 return 0;
3393}
3394
e9238278
VSO
3395static int bdrv_set_backing_noperm(BlockDriverState *bs,
3396 BlockDriverState *backing_hd,
3397 Transaction *tran, Error **errp)
3398{
3399 return bdrv_set_file_or_backing_noperm(bs, backing_hd, true, tran, errp);
3400}
3401
160333e1
VSO
3402int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
3403 Error **errp)
3404{
3405 int ret;
3406 Transaction *tran = tran_new();
3407
f791bf7f 3408 GLOBAL_STATE_CODE();
c0829cb1
VSO
3409 bdrv_drained_begin(bs);
3410
160333e1
VSO
3411 ret = bdrv_set_backing_noperm(bs, backing_hd, tran, errp);
3412 if (ret < 0) {
3413 goto out;
3414 }
3415
3416 ret = bdrv_refresh_perms(bs, errp);
3417out:
3418 tran_finalize(tran, ret);
a1e708fc 3419
c0829cb1
VSO
3420 bdrv_drained_end(bs);
3421
a1e708fc 3422 return ret;
8d24cce1
FZ
3423}
3424
31ca6d07
KW
3425/*
3426 * Opens the backing file for a BlockDriverState if not yet open
3427 *
d9b7b057
KW
3428 * bdref_key specifies the key for the image's BlockdevRef in the options QDict.
3429 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
3430 * itself, all options starting with "${bdref_key}." are considered part of the
3431 * BlockdevRef.
3432 *
3433 * TODO Can this be unified with bdrv_open_image()?
31ca6d07 3434 */
d9b7b057
KW
3435int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
3436 const char *bdref_key, Error **errp)
9156df12 3437{
6b6833c1 3438 char *backing_filename = NULL;
d9b7b057
KW
3439 char *bdref_key_dot;
3440 const char *reference = NULL;
317fc44e 3441 int ret = 0;
998c2019 3442 bool implicit_backing = false;
8d24cce1 3443 BlockDriverState *backing_hd;
d9b7b057
KW
3444 QDict *options;
3445 QDict *tmp_parent_options = NULL;
34b5d2c6 3446 Error *local_err = NULL;
9156df12 3447
f791bf7f
EGE
3448 GLOBAL_STATE_CODE();
3449
760e0063 3450 if (bs->backing != NULL) {
1ba4b6a5 3451 goto free_exit;
9156df12
PB
3452 }
3453
31ca6d07 3454 /* NULL means an empty set of options */
d9b7b057
KW
3455 if (parent_options == NULL) {
3456 tmp_parent_options = qdict_new();
3457 parent_options = tmp_parent_options;
31ca6d07
KW
3458 }
3459
9156df12 3460 bs->open_flags &= ~BDRV_O_NO_BACKING;
d9b7b057
KW
3461
3462 bdref_key_dot = g_strdup_printf("%s.", bdref_key);
3463 qdict_extract_subqdict(parent_options, &options, bdref_key_dot);
3464 g_free(bdref_key_dot);
3465
129c7d1c
MA
3466 /*
3467 * Caution: while qdict_get_try_str() is fine, getting non-string
3468 * types would require more care. When @parent_options come from
3469 * -blockdev or blockdev_add, its members are typed according to
3470 * the QAPI schema, but when they come from -drive, they're all
3471 * QString.
3472 */
d9b7b057
KW
3473 reference = qdict_get_try_str(parent_options, bdref_key);
3474 if (reference || qdict_haskey(options, "file.filename")) {
6b6833c1 3475 /* keep backing_filename NULL */
1cb6f506 3476 } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
cb3e7f08 3477 qobject_unref(options);
1ba4b6a5 3478 goto free_exit;
dbecebdd 3479 } else {
998c2019
HR
3480 if (qdict_size(options) == 0) {
3481 /* If the user specifies options that do not modify the
3482 * backing file's behavior, we might still consider it the
3483 * implicit backing file. But it's easier this way, and
3484 * just specifying some of the backing BDS's options is
3485 * only possible with -drive anyway (otherwise the QAPI
3486 * schema forces the user to specify everything). */
3487 implicit_backing = !strcmp(bs->auto_backing_file, bs->backing_file);
3488 }
3489
6b6833c1 3490 backing_filename = bdrv_get_full_backing_filename(bs, &local_err);
9f07429e
HR
3491 if (local_err) {
3492 ret = -EINVAL;
3493 error_propagate(errp, local_err);
cb3e7f08 3494 qobject_unref(options);
9f07429e
HR
3495 goto free_exit;
3496 }
9156df12
PB
3497 }
3498
8ee79e70
KW
3499 if (!bs->drv || !bs->drv->supports_backing) {
3500 ret = -EINVAL;
3501 error_setg(errp, "Driver doesn't support backing files");
cb3e7f08 3502 qobject_unref(options);
8ee79e70
KW
3503 goto free_exit;
3504 }
3505
6bff597b
PK
3506 if (!reference &&
3507 bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) {
46f5ac20 3508 qdict_put_str(options, "driver", bs->backing_format);
9156df12
PB
3509 }
3510
6b6833c1 3511 backing_hd = bdrv_open_inherit(backing_filename, reference, options, 0, bs,
25191e5f 3512 &child_of_bds, bdrv_backing_role(bs), errp);
5b363937 3513 if (!backing_hd) {
9156df12 3514 bs->open_flags |= BDRV_O_NO_BACKING;
e43bfd9c 3515 error_prepend(errp, "Could not open backing file: ");
5b363937 3516 ret = -EINVAL;
1ba4b6a5 3517 goto free_exit;
9156df12 3518 }
df581792 3519
998c2019
HR
3520 if (implicit_backing) {
3521 bdrv_refresh_filename(backing_hd);
3522 pstrcpy(bs->auto_backing_file, sizeof(bs->auto_backing_file),
3523 backing_hd->filename);
3524 }
3525
5db15a57
KW
3526 /* Hook up the backing file link; drop our reference, bs owns the
3527 * backing_hd reference now */
dc9c10a1 3528 ret = bdrv_set_backing_hd(bs, backing_hd, errp);
5db15a57 3529 bdrv_unref(backing_hd);
dc9c10a1 3530 if (ret < 0) {
12fa4af6
KW
3531 goto free_exit;
3532 }
d80ac658 3533
d9b7b057
KW
3534 qdict_del(parent_options, bdref_key);
3535
1ba4b6a5
BC
3536free_exit:
3537 g_free(backing_filename);
cb3e7f08 3538 qobject_unref(tmp_parent_options);
1ba4b6a5 3539 return ret;
9156df12
PB
3540}
3541
2d6b86af
KW
3542static BlockDriverState *
3543bdrv_open_child_bs(const char *filename, QDict *options, const char *bdref_key,
bd86fb99 3544 BlockDriverState *parent, const BdrvChildClass *child_class,
272c02ea 3545 BdrvChildRole child_role, bool allow_none, Error **errp)
da557aac 3546{
2d6b86af 3547 BlockDriverState *bs = NULL;
da557aac 3548 QDict *image_options;
da557aac
HR
3549 char *bdref_key_dot;
3550 const char *reference;
3551
bd86fb99 3552 assert(child_class != NULL);
f67503e5 3553
da557aac
HR
3554 bdref_key_dot = g_strdup_printf("%s.", bdref_key);
3555 qdict_extract_subqdict(options, &image_options, bdref_key_dot);
3556 g_free(bdref_key_dot);
3557
129c7d1c
MA
3558 /*
3559 * Caution: while qdict_get_try_str() is fine, getting non-string
3560 * types would require more care. When @options come from
3561 * -blockdev or blockdev_add, its members are typed according to
3562 * the QAPI schema, but when they come from -drive, they're all
3563 * QString.
3564 */
da557aac
HR
3565 reference = qdict_get_try_str(options, bdref_key);
3566 if (!filename && !reference && !qdict_size(image_options)) {
b4b059f6 3567 if (!allow_none) {
da557aac
HR
3568 error_setg(errp, "A block device must be specified for \"%s\"",
3569 bdref_key);
da557aac 3570 }
cb3e7f08 3571 qobject_unref(image_options);
da557aac
HR
3572 goto done;
3573 }
3574
5b363937 3575 bs = bdrv_open_inherit(filename, reference, image_options, 0,
272c02ea 3576 parent, child_class, child_role, errp);
5b363937 3577 if (!bs) {
df581792
KW
3578 goto done;
3579 }
3580
da557aac
HR
3581done:
3582 qdict_del(options, bdref_key);
2d6b86af
KW
3583 return bs;
3584}
3585
3586/*
3587 * Opens a disk image whose options are given as BlockdevRef in another block
3588 * device's options.
3589 *
3590 * If allow_none is true, no image will be opened if filename is false and no
3591 * BlockdevRef is given. NULL will be returned, but errp remains unset.
3592 *
3593 * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
3594 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
3595 * itself, all options starting with "${bdref_key}." are considered part of the
3596 * BlockdevRef.
3597 *
3598 * The BlockdevRef will be removed from the options QDict.
3599 */
3600BdrvChild *bdrv_open_child(const char *filename,
3601 QDict *options, const char *bdref_key,
3602 BlockDriverState *parent,
bd86fb99 3603 const BdrvChildClass *child_class,
258b7765 3604 BdrvChildRole child_role,
2d6b86af
KW
3605 bool allow_none, Error **errp)
3606{
3607 BlockDriverState *bs;
3608
f791bf7f
EGE
3609 GLOBAL_STATE_CODE();
3610
bd86fb99 3611 bs = bdrv_open_child_bs(filename, options, bdref_key, parent, child_class,
272c02ea 3612 child_role, allow_none, errp);
2d6b86af
KW
3613 if (bs == NULL) {
3614 return NULL;
3615 }
3616
258b7765
HR
3617 return bdrv_attach_child(parent, bs, bdref_key, child_class, child_role,
3618 errp);
b4b059f6
KW
3619}
3620
bd86fb99
HR
3621/*
3622 * TODO Future callers may need to specify parent/child_class in order for
3623 * option inheritance to work. Existing callers use it for the root node.
3624 */
e1d74bc6
KW
3625BlockDriverState *bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp)
3626{
3627 BlockDriverState *bs = NULL;
e1d74bc6
KW
3628 QObject *obj = NULL;
3629 QDict *qdict = NULL;
3630 const char *reference = NULL;
3631 Visitor *v = NULL;
3632
f791bf7f
EGE
3633 GLOBAL_STATE_CODE();
3634
e1d74bc6
KW
3635 if (ref->type == QTYPE_QSTRING) {
3636 reference = ref->u.reference;
3637 } else {
3638 BlockdevOptions *options = &ref->u.definition;
3639 assert(ref->type == QTYPE_QDICT);
3640
3641 v = qobject_output_visitor_new(&obj);
1f584248 3642 visit_type_BlockdevOptions(v, NULL, &options, &error_abort);
e1d74bc6
KW
3643 visit_complete(v, &obj);
3644
7dc847eb 3645 qdict = qobject_to(QDict, obj);
e1d74bc6
KW
3646 qdict_flatten(qdict);
3647
3648 /* bdrv_open_inherit() defaults to the values in bdrv_flags (for
3649 * compatibility with other callers) rather than what we want as the
3650 * real defaults. Apply the defaults here instead. */
3651 qdict_set_default_str(qdict, BDRV_OPT_CACHE_DIRECT, "off");
3652 qdict_set_default_str(qdict, BDRV_OPT_CACHE_NO_FLUSH, "off");
3653 qdict_set_default_str(qdict, BDRV_OPT_READ_ONLY, "off");
e35bdc12
KW
3654 qdict_set_default_str(qdict, BDRV_OPT_AUTO_READ_ONLY, "off");
3655
e1d74bc6
KW
3656 }
3657
272c02ea 3658 bs = bdrv_open_inherit(NULL, reference, qdict, 0, NULL, NULL, 0, errp);
e1d74bc6 3659 obj = NULL;
cb3e7f08 3660 qobject_unref(obj);
e1d74bc6
KW
3661 visit_free(v);
3662 return bs;
3663}
3664
66836189
HR
3665static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs,
3666 int flags,
3667 QDict *snapshot_options,
3668 Error **errp)
b998875d
KW
3669{
3670 /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
1ba4b6a5 3671 char *tmp_filename = g_malloc0(PATH_MAX + 1);
b998875d 3672 int64_t total_size;
83d0521a 3673 QemuOpts *opts = NULL;
ff6ed714 3674 BlockDriverState *bs_snapshot = NULL;
b998875d
KW
3675 int ret;
3676
3677 /* if snapshot, we create a temporary backing file and open it
3678 instead of opening 'filename' directly */
3679
3680 /* Get the required size from the image */
f187743a
KW
3681 total_size = bdrv_getlength(bs);
3682 if (total_size < 0) {
3683 error_setg_errno(errp, -total_size, "Could not get image size");
1ba4b6a5 3684 goto out;
f187743a 3685 }
b998875d
KW
3686
3687 /* Create the temporary image */
1ba4b6a5 3688 ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
b998875d
KW
3689 if (ret < 0) {
3690 error_setg_errno(errp, -ret, "Could not get temporary filename");
1ba4b6a5 3691 goto out;
b998875d
KW
3692 }
3693
ef810437 3694 opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0,
c282e1fd 3695 &error_abort);
39101f25 3696 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort);
e43bfd9c 3697 ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, errp);
83d0521a 3698 qemu_opts_del(opts);
b998875d 3699 if (ret < 0) {
e43bfd9c
MA
3700 error_prepend(errp, "Could not create temporary overlay '%s': ",
3701 tmp_filename);
1ba4b6a5 3702 goto out;
b998875d
KW
3703 }
3704
73176bee 3705 /* Prepare options QDict for the temporary file */
46f5ac20
EB
3706 qdict_put_str(snapshot_options, "file.driver", "file");
3707 qdict_put_str(snapshot_options, "file.filename", tmp_filename);
3708 qdict_put_str(snapshot_options, "driver", "qcow2");
b998875d 3709
5b363937 3710 bs_snapshot = bdrv_open(NULL, NULL, snapshot_options, flags, errp);
73176bee 3711 snapshot_options = NULL;
5b363937 3712 if (!bs_snapshot) {
1ba4b6a5 3713 goto out;
b998875d
KW
3714 }
3715
934aee14
VSO
3716 ret = bdrv_append(bs_snapshot, bs, errp);
3717 if (ret < 0) {
ff6ed714 3718 bs_snapshot = NULL;
b2c2832c
KW
3719 goto out;
3720 }
1ba4b6a5
BC
3721
3722out:
cb3e7f08 3723 qobject_unref(snapshot_options);
1ba4b6a5 3724 g_free(tmp_filename);
ff6ed714 3725 return bs_snapshot;
b998875d
KW
3726}
3727
b6ce07aa
KW
3728/*
3729 * Opens a disk image (raw, qcow2, vmdk, ...)
de9c0cec
KW
3730 *
3731 * options is a QDict of options to pass to the block drivers, or NULL for an
3732 * empty set of options. The reference to the QDict belongs to the block layer
3733 * after the call (even on failure), so if the caller intends to reuse the
cb3e7f08 3734 * dictionary, it needs to use qobject_ref() before calling bdrv_open.
f67503e5
HR
3735 *
3736 * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
3737 * If it is not NULL, the referenced BDS will be reused.
ddf5636d
HR
3738 *
3739 * The reference parameter may be used to specify an existing block device which
3740 * should be opened. If specified, neither options nor a filename may be given,
3741 * nor can an existing BDS be reused (that is, *pbs has to be NULL).
b6ce07aa 3742 */
5b363937
HR
3743static BlockDriverState *bdrv_open_inherit(const char *filename,
3744 const char *reference,
3745 QDict *options, int flags,
3746 BlockDriverState *parent,
bd86fb99 3747 const BdrvChildClass *child_class,
272c02ea 3748 BdrvChildRole child_role,
5b363937 3749 Error **errp)
ea2384d3 3750{
b6ce07aa 3751 int ret;
5696c6e3 3752 BlockBackend *file = NULL;
9a4f4c31 3753 BlockDriverState *bs;
ce343771 3754 BlockDriver *drv = NULL;
2f624b80 3755 BdrvChild *child;
74fe54f2 3756 const char *drvname;
3e8c2e57 3757 const char *backing;
34b5d2c6 3758 Error *local_err = NULL;
73176bee 3759 QDict *snapshot_options = NULL;
b1e6fc08 3760 int snapshot_flags = 0;
712e7874 3761
bd86fb99
HR
3762 assert(!child_class || !flags);
3763 assert(!child_class == !parent);
f67503e5 3764
ddf5636d
HR
3765 if (reference) {
3766 bool options_non_empty = options ? qdict_size(options) : false;
cb3e7f08 3767 qobject_unref(options);
ddf5636d 3768
ddf5636d
HR
3769 if (filename || options_non_empty) {
3770 error_setg(errp, "Cannot reference an existing block device with "
3771 "additional options or a new filename");
5b363937 3772 return NULL;
ddf5636d
HR
3773 }
3774
3775 bs = bdrv_lookup_bs(reference, reference, errp);
3776 if (!bs) {
5b363937 3777 return NULL;
ddf5636d 3778 }
76b22320 3779
ddf5636d 3780 bdrv_ref(bs);
5b363937 3781 return bs;
ddf5636d
HR
3782 }
3783
5b363937 3784 bs = bdrv_new();
f67503e5 3785
de9c0cec
KW
3786 /* NULL means an empty set of options */
3787 if (options == NULL) {
3788 options = qdict_new();
3789 }
3790
145f598e 3791 /* json: syntax counts as explicit options, as if in the QDict */
de3b53f0
KW
3792 parse_json_protocol(options, &filename, &local_err);
3793 if (local_err) {
de3b53f0
KW
3794 goto fail;
3795 }
3796
145f598e
KW
3797 bs->explicit_options = qdict_clone_shallow(options);
3798
bd86fb99 3799 if (child_class) {
3cdc69d3
HR
3800 bool parent_is_format;
3801
3802 if (parent->drv) {
3803 parent_is_format = parent->drv->is_format;
3804 } else {
3805 /*
3806 * parent->drv is not set yet because this node is opened for
3807 * (potential) format probing. That means that @parent is going
3808 * to be a format node.
3809 */
3810 parent_is_format = true;
3811 }
3812
bddcec37 3813 bs->inherits_from = parent;
3cdc69d3
HR
3814 child_class->inherit_options(child_role, parent_is_format,
3815 &flags, options,
bd86fb99 3816 parent->open_flags, parent->options);
f3930ed0
KW
3817 }
3818
de3b53f0 3819 ret = bdrv_fill_options(&options, filename, &flags, &local_err);
dfde483e 3820 if (ret < 0) {
462f5bcf
KW
3821 goto fail;
3822 }
3823
129c7d1c
MA
3824 /*
3825 * Set the BDRV_O_RDWR and BDRV_O_ALLOW_RDWR flags.
3826 * Caution: getting a boolean member of @options requires care.
3827 * When @options come from -blockdev or blockdev_add, members are
3828 * typed according to the QAPI schema, but when they come from
3829 * -drive, they're all QString.
3830 */
f87a0e29
AG
3831 if (g_strcmp0(qdict_get_try_str(options, BDRV_OPT_READ_ONLY), "on") &&
3832 !qdict_get_try_bool(options, BDRV_OPT_READ_ONLY, false)) {
3833 flags |= (BDRV_O_RDWR | BDRV_O_ALLOW_RDWR);
3834 } else {
3835 flags &= ~BDRV_O_RDWR;
14499ea5
AG
3836 }
3837
3838 if (flags & BDRV_O_SNAPSHOT) {
3839 snapshot_options = qdict_new();
3840 bdrv_temp_snapshot_options(&snapshot_flags, snapshot_options,
3841 flags, options);
f87a0e29
AG
3842 /* Let bdrv_backing_options() override "read-only" */
3843 qdict_del(options, BDRV_OPT_READ_ONLY);
00ff7ffd
HR
3844 bdrv_inherited_options(BDRV_CHILD_COW, true,
3845 &flags, options, flags, options);
14499ea5
AG
3846 }
3847
62392ebb
KW
3848 bs->open_flags = flags;
3849 bs->options = options;
3850 options = qdict_clone_shallow(options);
3851
76c591b0 3852 /* Find the right image format driver */
129c7d1c 3853 /* See cautionary note on accessing @options above */
76c591b0
KW
3854 drvname = qdict_get_try_str(options, "driver");
3855 if (drvname) {
3856 drv = bdrv_find_format(drvname);
76c591b0
KW
3857 if (!drv) {
3858 error_setg(errp, "Unknown driver: '%s'", drvname);
76c591b0
KW
3859 goto fail;
3860 }
3861 }
3862
3863 assert(drvname || !(flags & BDRV_O_PROTOCOL));
76c591b0 3864
129c7d1c 3865 /* See cautionary note on accessing @options above */
3e8c2e57 3866 backing = qdict_get_try_str(options, "backing");
e59a0cf1
HR
3867 if (qobject_to(QNull, qdict_get(options, "backing")) != NULL ||
3868 (backing && *backing == '\0'))
3869 {
4f7be280
HR
3870 if (backing) {
3871 warn_report("Use of \"backing\": \"\" is deprecated; "
3872 "use \"backing\": null instead");
3873 }
3e8c2e57 3874 flags |= BDRV_O_NO_BACKING;
ae0f57f0
KW
3875 qdict_del(bs->explicit_options, "backing");
3876 qdict_del(bs->options, "backing");
3e8c2e57
AG
3877 qdict_del(options, "backing");
3878 }
3879
5696c6e3 3880 /* Open image file without format layer. This BlockBackend is only used for
4e4bf5c4
KW
3881 * probing, the block drivers will do their own bdrv_open_child() for the
3882 * same BDS, which is why we put the node name back into options. */
f4788adc 3883 if ((flags & BDRV_O_PROTOCOL) == 0) {
5696c6e3
KW
3884 BlockDriverState *file_bs;
3885
3886 file_bs = bdrv_open_child_bs(filename, options, "file", bs,
58944401
HR
3887 &child_of_bds, BDRV_CHILD_IMAGE,
3888 true, &local_err);
1fdd6933 3889 if (local_err) {
f4788adc
KW
3890 goto fail;
3891 }
5696c6e3 3892 if (file_bs != NULL) {
dacaa162
KW
3893 /* Not requesting BLK_PERM_CONSISTENT_READ because we're only
3894 * looking at the header to guess the image format. This works even
3895 * in cases where a guest would not see a consistent state. */
d861ab3a 3896 file = blk_new(bdrv_get_aio_context(file_bs), 0, BLK_PERM_ALL);
d7086422 3897 blk_insert_bs(file, file_bs, &local_err);
5696c6e3 3898 bdrv_unref(file_bs);
d7086422
KW
3899 if (local_err) {
3900 goto fail;
3901 }
5696c6e3 3902
46f5ac20 3903 qdict_put_str(options, "file", bdrv_get_node_name(file_bs));
4e4bf5c4 3904 }
f500a6d3
KW
3905 }
3906
76c591b0 3907 /* Image format probing */
38f3ef57 3908 bs->probed = !drv;
76c591b0 3909 if (!drv && file) {
cf2ab8fc 3910 ret = find_image_format(file, filename, &drv, &local_err);
17b005f1 3911 if (ret < 0) {
8bfea15d 3912 goto fail;
2a05cbe4 3913 }
62392ebb
KW
3914 /*
3915 * This option update would logically belong in bdrv_fill_options(),
3916 * but we first need to open bs->file for the probing to work, while
3917 * opening bs->file already requires the (mostly) final set of options
3918 * so that cache mode etc. can be inherited.
3919 *
3920 * Adding the driver later is somewhat ugly, but it's not an option
3921 * that would ever be inherited, so it's correct. We just need to make
3922 * sure to update both bs->options (which has the full effective
3923 * options for bs) and options (which has file.* already removed).
3924 */
46f5ac20
EB
3925 qdict_put_str(bs->options, "driver", drv->format_name);
3926 qdict_put_str(options, "driver", drv->format_name);
76c591b0 3927 } else if (!drv) {
17b005f1 3928 error_setg(errp, "Must specify either driver or file");
8bfea15d 3929 goto fail;
ea2384d3 3930 }
b6ce07aa 3931
53a29513
HR
3932 /* BDRV_O_PROTOCOL must be set iff a protocol BDS is about to be created */
3933 assert(!!(flags & BDRV_O_PROTOCOL) == !!drv->bdrv_file_open);
3934 /* file must be NULL if a protocol BDS is about to be created
3935 * (the inverse results in an error message from bdrv_open_common()) */
3936 assert(!(flags & BDRV_O_PROTOCOL) || !file);
3937
b6ce07aa 3938 /* Open the image */
82dc8b41 3939 ret = bdrv_open_common(bs, file, options, &local_err);
b6ce07aa 3940 if (ret < 0) {
8bfea15d 3941 goto fail;
6987307c
CH
3942 }
3943
4e4bf5c4 3944 if (file) {
5696c6e3 3945 blk_unref(file);
f500a6d3
KW
3946 file = NULL;
3947 }
3948
b6ce07aa 3949 /* If there is a backing file, use it */
9156df12 3950 if ((flags & BDRV_O_NO_BACKING) == 0) {
d9b7b057 3951 ret = bdrv_open_backing_file(bs, options, "backing", &local_err);
b6ce07aa 3952 if (ret < 0) {
b6ad491a 3953 goto close_and_fail;
b6ce07aa 3954 }
b6ce07aa
KW
3955 }
3956
50196d7a
AG
3957 /* Remove all children options and references
3958 * from bs->options and bs->explicit_options */
2f624b80
AG
3959 QLIST_FOREACH(child, &bs->children, next) {
3960 char *child_key_dot;
3961 child_key_dot = g_strdup_printf("%s.", child->name);
3962 qdict_extract_subqdict(bs->explicit_options, NULL, child_key_dot);
3963 qdict_extract_subqdict(bs->options, NULL, child_key_dot);
50196d7a
AG
3964 qdict_del(bs->explicit_options, child->name);
3965 qdict_del(bs->options, child->name);
2f624b80
AG
3966 g_free(child_key_dot);
3967 }
3968
b6ad491a 3969 /* Check if any unknown options were used */
7ad2757f 3970 if (qdict_size(options) != 0) {
b6ad491a 3971 const QDictEntry *entry = qdict_first(options);
5acd9d81
HR
3972 if (flags & BDRV_O_PROTOCOL) {
3973 error_setg(errp, "Block protocol '%s' doesn't support the option "
3974 "'%s'", drv->format_name, entry->key);
3975 } else {
d0e46a55
HR
3976 error_setg(errp,
3977 "Block format '%s' does not support the option '%s'",
3978 drv->format_name, entry->key);
5acd9d81 3979 }
b6ad491a 3980
b6ad491a
KW
3981 goto close_and_fail;
3982 }
b6ad491a 3983
c01c214b 3984 bdrv_parent_cb_change_media(bs, true);
b6ce07aa 3985
cb3e7f08 3986 qobject_unref(options);
8961be33 3987 options = NULL;
dd62f1ca
KW
3988
3989 /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
3990 * temporary snapshot afterwards. */
3991 if (snapshot_flags) {
66836189
HR
3992 BlockDriverState *snapshot_bs;
3993 snapshot_bs = bdrv_append_temp_snapshot(bs, snapshot_flags,
3994 snapshot_options, &local_err);
73176bee 3995 snapshot_options = NULL;
dd62f1ca
KW
3996 if (local_err) {
3997 goto close_and_fail;
3998 }
5b363937
HR
3999 /* We are not going to return bs but the overlay on top of it
4000 * (snapshot_bs); thus, we have to drop the strong reference to bs
4001 * (which we obtained by calling bdrv_new()). bs will not be deleted,
4002 * though, because the overlay still has a reference to it. */
4003 bdrv_unref(bs);
4004 bs = snapshot_bs;
dd62f1ca
KW
4005 }
4006
5b363937 4007 return bs;
b6ce07aa 4008
8bfea15d 4009fail:
5696c6e3 4010 blk_unref(file);
cb3e7f08
MAL
4011 qobject_unref(snapshot_options);
4012 qobject_unref(bs->explicit_options);
4013 qobject_unref(bs->options);
4014 qobject_unref(options);
de9c0cec 4015 bs->options = NULL;
998cbd6a 4016 bs->explicit_options = NULL;
5b363937 4017 bdrv_unref(bs);
621ff94d 4018 error_propagate(errp, local_err);
5b363937 4019 return NULL;
de9c0cec 4020
b6ad491a 4021close_and_fail:
5b363937 4022 bdrv_unref(bs);
cb3e7f08
MAL
4023 qobject_unref(snapshot_options);
4024 qobject_unref(options);
621ff94d 4025 error_propagate(errp, local_err);
5b363937 4026 return NULL;
b6ce07aa
KW
4027}
4028
5b363937
HR
4029BlockDriverState *bdrv_open(const char *filename, const char *reference,
4030 QDict *options, int flags, Error **errp)
f3930ed0 4031{
f791bf7f
EGE
4032 GLOBAL_STATE_CODE();
4033
5b363937 4034 return bdrv_open_inherit(filename, reference, options, flags, NULL,
272c02ea 4035 NULL, 0, errp);
f3930ed0
KW
4036}
4037
faf116b4
AG
4038/* Return true if the NULL-terminated @list contains @str */
4039static bool is_str_in_list(const char *str, const char *const *list)
4040{
4041 if (str && list) {
4042 int i;
4043 for (i = 0; list[i] != NULL; i++) {
4044 if (!strcmp(str, list[i])) {
4045 return true;
4046 }
4047 }
4048 }
4049 return false;
4050}
4051
4052/*
4053 * Check that every option set in @bs->options is also set in
4054 * @new_opts.
4055 *
4056 * Options listed in the common_options list and in
4057 * @bs->drv->mutable_opts are skipped.
4058 *
4059 * Return 0 on success, otherwise return -EINVAL and set @errp.
4060 */
4061static int bdrv_reset_options_allowed(BlockDriverState *bs,
4062 const QDict *new_opts, Error **errp)
4063{
4064 const QDictEntry *e;
4065 /* These options are common to all block drivers and are handled
4066 * in bdrv_reopen_prepare() so they can be left out of @new_opts */
4067 const char *const common_options[] = {
4068 "node-name", "discard", "cache.direct", "cache.no-flush",
4069 "read-only", "auto-read-only", "detect-zeroes", NULL
4070 };
4071
4072 for (e = qdict_first(bs->options); e; e = qdict_next(bs->options, e)) {
4073 if (!qdict_haskey(new_opts, e->key) &&
4074 !is_str_in_list(e->key, common_options) &&
4075 !is_str_in_list(e->key, bs->drv->mutable_opts)) {
4076 error_setg(errp, "Option '%s' cannot be reset "
4077 "to its default value", e->key);
4078 return -EINVAL;
4079 }
4080 }
4081
4082 return 0;
4083}
4084
cb828c31
AG
4085/*
4086 * Returns true if @child can be reached recursively from @bs
4087 */
4088static bool bdrv_recurse_has_child(BlockDriverState *bs,
4089 BlockDriverState *child)
4090{
4091 BdrvChild *c;
4092
4093 if (bs == child) {
4094 return true;
4095 }
4096
4097 QLIST_FOREACH(c, &bs->children, next) {
4098 if (bdrv_recurse_has_child(c->bs, child)) {
4099 return true;
4100 }
4101 }
4102
4103 return false;
4104}
4105
e971aa12
JC
4106/*
4107 * Adds a BlockDriverState to a simple queue for an atomic, transactional
4108 * reopen of multiple devices.
4109 *
859443b0 4110 * bs_queue can either be an existing BlockReopenQueue that has had QTAILQ_INIT
e971aa12
JC
4111 * already performed, or alternatively may be NULL a new BlockReopenQueue will
4112 * be created and initialized. This newly created BlockReopenQueue should be
4113 * passed back in for subsequent calls that are intended to be of the same
4114 * atomic 'set'.
4115 *
4116 * bs is the BlockDriverState to add to the reopen queue.
4117 *
4d2cb092
KW
4118 * options contains the changed options for the associated bs
4119 * (the BlockReopenQueue takes ownership)
4120 *
e971aa12
JC
4121 * flags contains the open flags for the associated bs
4122 *
4123 * returns a pointer to bs_queue, which is either the newly allocated
4124 * bs_queue, or the existing bs_queue being used.
4125 *
1a63a907 4126 * bs must be drained between bdrv_reopen_queue() and bdrv_reopen_multiple().
e971aa12 4127 */
28518102
KW
4128static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
4129 BlockDriverState *bs,
4130 QDict *options,
bd86fb99 4131 const BdrvChildClass *klass,
272c02ea 4132 BdrvChildRole role,
3cdc69d3 4133 bool parent_is_format,
28518102 4134 QDict *parent_options,
077e8e20
AG
4135 int parent_flags,
4136 bool keep_old_opts)
e971aa12
JC
4137{
4138 assert(bs != NULL);
4139
4140 BlockReopenQueueEntry *bs_entry;
67251a31 4141 BdrvChild *child;
9aa09ddd
AG
4142 QDict *old_options, *explicit_options, *options_copy;
4143 int flags;
4144 QemuOpts *opts;
67251a31 4145
1a63a907
KW
4146 /* Make sure that the caller remembered to use a drained section. This is
4147 * important to avoid graph changes between the recursive queuing here and
4148 * bdrv_reopen_multiple(). */
4149 assert(bs->quiesce_counter > 0);
4150
e971aa12
JC
4151 if (bs_queue == NULL) {
4152 bs_queue = g_new0(BlockReopenQueue, 1);
859443b0 4153 QTAILQ_INIT(bs_queue);
e971aa12
JC
4154 }
4155
4d2cb092
KW
4156 if (!options) {
4157 options = qdict_new();
4158 }
4159
5b7ba05f 4160 /* Check if this BlockDriverState is already in the queue */
859443b0 4161 QTAILQ_FOREACH(bs_entry, bs_queue, entry) {
5b7ba05f
AG
4162 if (bs == bs_entry->state.bs) {
4163 break;
4164 }
4165 }
4166
28518102
KW
4167 /*
4168 * Precedence of options:
4169 * 1. Explicitly passed in options (highest)
9aa09ddd
AG
4170 * 2. Retained from explicitly set options of bs
4171 * 3. Inherited from parent node
4172 * 4. Retained from effective options of bs
28518102
KW
4173 */
4174
145f598e 4175 /* Old explicitly set values (don't overwrite by inherited value) */
077e8e20
AG
4176 if (bs_entry || keep_old_opts) {
4177 old_options = qdict_clone_shallow(bs_entry ?
4178 bs_entry->state.explicit_options :
4179 bs->explicit_options);
4180 bdrv_join_options(bs, options, old_options);
4181 qobject_unref(old_options);
5b7ba05f 4182 }
145f598e
KW
4183
4184 explicit_options = qdict_clone_shallow(options);
4185
28518102
KW
4186 /* Inherit from parent node */
4187 if (parent_options) {
9aa09ddd 4188 flags = 0;
3cdc69d3 4189 klass->inherit_options(role, parent_is_format, &flags, options,
272c02ea 4190 parent_flags, parent_options);
9aa09ddd
AG
4191 } else {
4192 flags = bdrv_get_flags(bs);
28518102
KW
4193 }
4194
077e8e20
AG
4195 if (keep_old_opts) {
4196 /* Old values are used for options that aren't set yet */
4197 old_options = qdict_clone_shallow(bs->options);
4198 bdrv_join_options(bs, options, old_options);
4199 qobject_unref(old_options);
4200 }
4d2cb092 4201
9aa09ddd
AG
4202 /* We have the final set of options so let's update the flags */
4203 options_copy = qdict_clone_shallow(options);
4204 opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
4205 qemu_opts_absorb_qdict(opts, options_copy, NULL);
4206 update_flags_from_options(&flags, opts);
4207 qemu_opts_del(opts);
4208 qobject_unref(options_copy);
4209
fd452021 4210 /* bdrv_open_inherit() sets and clears some additional flags internally */
f1f25a2e 4211 flags &= ~BDRV_O_PROTOCOL;
fd452021
KW
4212 if (flags & BDRV_O_RDWR) {
4213 flags |= BDRV_O_ALLOW_RDWR;
4214 }
f1f25a2e 4215
1857c97b
KW
4216 if (!bs_entry) {
4217 bs_entry = g_new0(BlockReopenQueueEntry, 1);
859443b0 4218 QTAILQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1857c97b 4219 } else {
cb3e7f08
MAL
4220 qobject_unref(bs_entry->state.options);
4221 qobject_unref(bs_entry->state.explicit_options);
1857c97b
KW
4222 }
4223
4224 bs_entry->state.bs = bs;
4225 bs_entry->state.options = options;
4226 bs_entry->state.explicit_options = explicit_options;
4227 bs_entry->state.flags = flags;
4228
8546632e
AG
4229 /*
4230 * If keep_old_opts is false then it means that unspecified
4231 * options must be reset to their original value. We don't allow
4232 * resetting 'backing' but we need to know if the option is
4233 * missing in order to decide if we have to return an error.
4234 */
4235 if (!keep_old_opts) {
4236 bs_entry->state.backing_missing =
4237 !qdict_haskey(options, "backing") &&
4238 !qdict_haskey(options, "backing.driver");
4239 }
4240
67251a31 4241 QLIST_FOREACH(child, &bs->children, next) {
8546632e
AG
4242 QDict *new_child_options = NULL;
4243 bool child_keep_old = keep_old_opts;
67251a31 4244
4c9dfe5d
KW
4245 /* reopen can only change the options of block devices that were
4246 * implicitly created and inherited options. For other (referenced)
4247 * block devices, a syntax like "backing.foo" results in an error. */
67251a31
KW
4248 if (child->bs->inherits_from != bs) {
4249 continue;
4250 }
4251
8546632e
AG
4252 /* Check if the options contain a child reference */
4253 if (qdict_haskey(options, child->name)) {
4254 const char *childref = qdict_get_try_str(options, child->name);
4255 /*
4256 * The current child must not be reopened if the child
4257 * reference is null or points to a different node.
4258 */
4259 if (g_strcmp0(childref, child->bs->node_name)) {
4260 continue;
4261 }
4262 /*
4263 * If the child reference points to the current child then
4264 * reopen it with its existing set of options (note that
4265 * it can still inherit new options from the parent).
4266 */
4267 child_keep_old = true;
4268 } else {
4269 /* Extract child options ("child-name.*") */
4270 char *child_key_dot = g_strdup_printf("%s.", child->name);
4271 qdict_extract_subqdict(explicit_options, NULL, child_key_dot);
4272 qdict_extract_subqdict(options, &new_child_options, child_key_dot);
4273 g_free(child_key_dot);
4274 }
4c9dfe5d 4275
9aa09ddd 4276 bdrv_reopen_queue_child(bs_queue, child->bs, new_child_options,
3cdc69d3
HR
4277 child->klass, child->role, bs->drv->is_format,
4278 options, flags, child_keep_old);
e971aa12
JC
4279 }
4280
e971aa12
JC
4281 return bs_queue;
4282}
4283
28518102
KW
4284BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
4285 BlockDriverState *bs,
077e8e20 4286 QDict *options, bool keep_old_opts)
28518102 4287{
f791bf7f
EGE
4288 GLOBAL_STATE_CODE();
4289
3cdc69d3
HR
4290 return bdrv_reopen_queue_child(bs_queue, bs, options, NULL, 0, false,
4291 NULL, 0, keep_old_opts);
28518102
KW
4292}
4293
ab5b5228
AG
4294void bdrv_reopen_queue_free(BlockReopenQueue *bs_queue)
4295{
f791bf7f 4296 GLOBAL_STATE_CODE();
ab5b5228
AG
4297 if (bs_queue) {
4298 BlockReopenQueueEntry *bs_entry, *next;
4299 QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
4300 qobject_unref(bs_entry->state.explicit_options);
4301 qobject_unref(bs_entry->state.options);
4302 g_free(bs_entry);
4303 }
4304 g_free(bs_queue);
4305 }
4306}
4307
e971aa12
JC
4308/*
4309 * Reopen multiple BlockDriverStates atomically & transactionally.
4310 *
4311 * The queue passed in (bs_queue) must have been built up previous
4312 * via bdrv_reopen_queue().
4313 *
4314 * Reopens all BDS specified in the queue, with the appropriate
4315 * flags. All devices are prepared for reopen, and failure of any
50d6a8a3 4316 * device will cause all device changes to be abandoned, and intermediate
e971aa12
JC
4317 * data cleaned up.
4318 *
4319 * If all devices prepare successfully, then the changes are committed
4320 * to all devices.
4321 *
1a63a907
KW
4322 * All affected nodes must be drained between bdrv_reopen_queue() and
4323 * bdrv_reopen_multiple().
6cf42ca2
KW
4324 *
4325 * To be called from the main thread, with all other AioContexts unlocked.
e971aa12 4326 */
5019aece 4327int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
e971aa12
JC
4328{
4329 int ret = -1;
4330 BlockReopenQueueEntry *bs_entry, *next;
6cf42ca2 4331 AioContext *ctx;
72373e40
VSO
4332 Transaction *tran = tran_new();
4333 g_autoptr(GHashTable) found = NULL;
4334 g_autoptr(GSList) refresh_list = NULL;
e971aa12 4335
6cf42ca2 4336 assert(qemu_get_current_aio_context() == qemu_get_aio_context());
e971aa12
JC
4337 assert(bs_queue != NULL);
4338
a2aabf88 4339 QTAILQ_FOREACH(bs_entry, bs_queue, entry) {
6cf42ca2
KW
4340 ctx = bdrv_get_aio_context(bs_entry->state.bs);
4341 aio_context_acquire(ctx);
a2aabf88 4342 ret = bdrv_flush(bs_entry->state.bs);
6cf42ca2 4343 aio_context_release(ctx);
a2aabf88
VSO
4344 if (ret < 0) {
4345 error_setg_errno(errp, -ret, "Error flushing drive");
e3fc91aa 4346 goto abort;
a2aabf88
VSO
4347 }
4348 }
4349
859443b0 4350 QTAILQ_FOREACH(bs_entry, bs_queue, entry) {
1a63a907 4351 assert(bs_entry->state.bs->quiesce_counter > 0);
6cf42ca2
KW
4352 ctx = bdrv_get_aio_context(bs_entry->state.bs);
4353 aio_context_acquire(ctx);
72373e40 4354 ret = bdrv_reopen_prepare(&bs_entry->state, bs_queue, tran, errp);
6cf42ca2 4355 aio_context_release(ctx);
72373e40
VSO
4356 if (ret < 0) {
4357 goto abort;
e971aa12
JC
4358 }
4359 bs_entry->prepared = true;
4360 }
4361
72373e40 4362 found = g_hash_table_new(NULL, NULL);
859443b0 4363 QTAILQ_FOREACH(bs_entry, bs_queue, entry) {
69b736e7 4364 BDRVReopenState *state = &bs_entry->state;
72373e40
VSO
4365
4366 refresh_list = bdrv_topological_dfs(refresh_list, found, state->bs);
4367 if (state->old_backing_bs) {
4368 refresh_list = bdrv_topological_dfs(refresh_list, found,
4369 state->old_backing_bs);
cb828c31 4370 }
ecd30d2d
AG
4371 if (state->old_file_bs) {
4372 refresh_list = bdrv_topological_dfs(refresh_list, found,
4373 state->old_file_bs);
4374 }
72373e40
VSO
4375 }
4376
4377 /*
4378 * Note that file-posix driver rely on permission update done during reopen
4379 * (even if no permission changed), because it wants "new" permissions for
4380 * reconfiguring the fd and that's why it does it in raw_check_perm(), not
4381 * in raw_reopen_prepare() which is called with "old" permissions.
4382 */
4383 ret = bdrv_list_refresh_perms(refresh_list, bs_queue, tran, errp);
4384 if (ret < 0) {
4385 goto abort;
69b736e7
KW
4386 }
4387
fcd6a4f4
VSO
4388 /*
4389 * If we reach this point, we have success and just need to apply the
4390 * changes.
4391 *
4392 * Reverse order is used to comfort qcow2 driver: on commit it need to write
4393 * IN_USE flag to the image, to mark bitmaps in the image as invalid. But
4394 * children are usually goes after parents in reopen-queue, so go from last
4395 * to first element.
e971aa12 4396 */
fcd6a4f4 4397 QTAILQ_FOREACH_REVERSE(bs_entry, bs_queue, entry) {
6cf42ca2
KW
4398 ctx = bdrv_get_aio_context(bs_entry->state.bs);
4399 aio_context_acquire(ctx);
e971aa12 4400 bdrv_reopen_commit(&bs_entry->state);
6cf42ca2 4401 aio_context_release(ctx);
e971aa12
JC
4402 }
4403
72373e40 4404 tran_commit(tran);
69b736e7 4405
72373e40
VSO
4406 QTAILQ_FOREACH_REVERSE(bs_entry, bs_queue, entry) {
4407 BlockDriverState *bs = bs_entry->state.bs;
74ad9a3b 4408
72373e40 4409 if (bs->drv->bdrv_reopen_commit_post) {
6cf42ca2
KW
4410 ctx = bdrv_get_aio_context(bs);
4411 aio_context_acquire(ctx);
72373e40 4412 bs->drv->bdrv_reopen_commit_post(&bs_entry->state);
6cf42ca2 4413 aio_context_release(ctx);
69b736e7
KW
4414 }
4415 }
17e1e2be 4416
72373e40
VSO
4417 ret = 0;
4418 goto cleanup;
17e1e2be 4419
72373e40
VSO
4420abort:
4421 tran_abort(tran);
4422 QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
4423 if (bs_entry->prepared) {
6cf42ca2
KW
4424 ctx = bdrv_get_aio_context(bs_entry->state.bs);
4425 aio_context_acquire(ctx);
72373e40 4426 bdrv_reopen_abort(&bs_entry->state);
6cf42ca2 4427 aio_context_release(ctx);
17e1e2be
PK
4428 }
4429 }
72373e40 4430
e971aa12 4431cleanup:
ab5b5228 4432 bdrv_reopen_queue_free(bs_queue);
40840e41 4433
e971aa12
JC
4434 return ret;
4435}
4436
6cf42ca2
KW
4437int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts,
4438 Error **errp)
6e1000a8 4439{
6cf42ca2 4440 AioContext *ctx = bdrv_get_aio_context(bs);
6e1000a8 4441 BlockReopenQueue *queue;
6cf42ca2 4442 int ret;
6e1000a8 4443
f791bf7f
EGE
4444 GLOBAL_STATE_CODE();
4445
6e1000a8 4446 bdrv_subtree_drained_begin(bs);
6cf42ca2
KW
4447 if (ctx != qemu_get_aio_context()) {
4448 aio_context_release(ctx);
4449 }
4450
4451 queue = bdrv_reopen_queue(NULL, bs, opts, keep_old_opts);
5019aece 4452 ret = bdrv_reopen_multiple(queue, errp);
6cf42ca2
KW
4453
4454 if (ctx != qemu_get_aio_context()) {
4455 aio_context_acquire(ctx);
4456 }
6e1000a8
AG
4457 bdrv_subtree_drained_end(bs);
4458
4459 return ret;
4460}
4461
6cf42ca2
KW
4462int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only,
4463 Error **errp)
4464{
4465 QDict *opts = qdict_new();
4466
f791bf7f
EGE
4467 GLOBAL_STATE_CODE();
4468
6cf42ca2
KW
4469 qdict_put_bool(opts, BDRV_OPT_READ_ONLY, read_only);
4470
4471 return bdrv_reopen(bs, opts, true, errp);
4472}
4473
cb828c31
AG
4474/*
4475 * Take a BDRVReopenState and check if the value of 'backing' in the
4476 * reopen_state->options QDict is valid or not.
4477 *
4478 * If 'backing' is missing from the QDict then return 0.
4479 *
4480 * If 'backing' contains the node name of the backing file of
4481 * reopen_state->bs then return 0.
4482 *
4483 * If 'backing' contains a different node name (or is null) then check
4484 * whether the current backing file can be replaced with the new one.
4485 * If that's the case then reopen_state->replace_backing_bs is set to
4486 * true and reopen_state->new_backing_bs contains a pointer to the new
4487 * backing BlockDriverState (or NULL).
4488 *
4489 * Return 0 on success, otherwise return < 0 and set @errp.
4490 */
ecd30d2d
AG
4491static int bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state,
4492 bool is_backing, Transaction *tran,
4493 Error **errp)
cb828c31
AG
4494{
4495 BlockDriverState *bs = reopen_state->bs;
ecd30d2d
AG
4496 BlockDriverState *new_child_bs;
4497 BlockDriverState *old_child_bs = is_backing ? child_bs(bs->backing) :
4498 child_bs(bs->file);
4499 const char *child_name = is_backing ? "backing" : "file";
cb828c31
AG
4500 QObject *value;
4501 const char *str;
4502
ecd30d2d 4503 value = qdict_get(reopen_state->options, child_name);
cb828c31
AG
4504 if (value == NULL) {
4505 return 0;
4506 }
4507
4508 switch (qobject_type(value)) {
4509 case QTYPE_QNULL:
ecd30d2d
AG
4510 assert(is_backing); /* The 'file' option does not allow a null value */
4511 new_child_bs = NULL;
cb828c31
AG
4512 break;
4513 case QTYPE_QSTRING:
410f44f5 4514 str = qstring_get_str(qobject_to(QString, value));
ecd30d2d
AG
4515 new_child_bs = bdrv_lookup_bs(NULL, str, errp);
4516 if (new_child_bs == NULL) {
cb828c31 4517 return -EINVAL;
ecd30d2d
AG
4518 } else if (bdrv_recurse_has_child(new_child_bs, bs)) {
4519 error_setg(errp, "Making '%s' a %s child of '%s' would create a "
4520 "cycle", str, child_name, bs->node_name);
cb828c31
AG
4521 return -EINVAL;
4522 }
4523 break;
4524 default:
ecd30d2d
AG
4525 /*
4526 * The options QDict has been flattened, so 'backing' and 'file'
4527 * do not allow any other data type here.
4528 */
cb828c31
AG
4529 g_assert_not_reached();
4530 }
4531
ecd30d2d
AG
4532 if (old_child_bs == new_child_bs) {
4533 return 0;
4534 }
4535
4536 if (old_child_bs) {
4537 if (bdrv_skip_implicit_filters(old_child_bs) == new_child_bs) {
cbfdb98c
VSO
4538 return 0;
4539 }
4540
ecd30d2d
AG
4541 if (old_child_bs->implicit) {
4542 error_setg(errp, "Cannot replace implicit %s child of %s",
4543 child_name, bs->node_name);
cbfdb98c
VSO
4544 return -EPERM;
4545 }
4546 }
4547
ecd30d2d 4548 if (bs->drv->is_filter && !old_child_bs) {
25f78d9e
VSO
4549 /*
4550 * Filters always have a file or a backing child, so we are trying to
4551 * change wrong child
4552 */
4553 error_setg(errp, "'%s' is a %s filter node that does not support a "
ecd30d2d 4554 "%s child", bs->node_name, bs->drv->format_name, child_name);
1d42f48c
HR
4555 return -EINVAL;
4556 }
4557
ecd30d2d
AG
4558 if (is_backing) {
4559 reopen_state->old_backing_bs = old_child_bs;
4560 } else {
4561 reopen_state->old_file_bs = old_child_bs;
4562 }
4563
4564 return bdrv_set_file_or_backing_noperm(bs, new_child_bs, is_backing,
4565 tran, errp);
cb828c31
AG
4566}
4567
e971aa12
JC
4568/*
4569 * Prepares a BlockDriverState for reopen. All changes are staged in the
4570 * 'opaque' field of the BDRVReopenState, which is used and allocated by
4571 * the block driver layer .bdrv_reopen_prepare()
4572 *
4573 * bs is the BlockDriverState to reopen
4574 * flags are the new open flags
4575 * queue is the reopen queue
4576 *
4577 * Returns 0 on success, non-zero on error. On error errp will be set
4578 * as well.
4579 *
4580 * On failure, bdrv_reopen_abort() will be called to clean up any data.
4581 * It is the responsibility of the caller to then call the abort() or
4582 * commit() for any other BDS that have been left in a prepare() state
4583 *
4584 */
53e96d1e 4585static int bdrv_reopen_prepare(BDRVReopenState *reopen_state,
72373e40 4586 BlockReopenQueue *queue,
ecd30d2d 4587 Transaction *change_child_tran, Error **errp)
e971aa12
JC
4588{
4589 int ret = -1;
e6d79c41 4590 int old_flags;
e971aa12
JC
4591 Error *local_err = NULL;
4592 BlockDriver *drv;
ccf9dc07 4593 QemuOpts *opts;
4c8350fe 4594 QDict *orig_reopen_opts;
593b3071 4595 char *discard = NULL;
3d8ce171 4596 bool read_only;
9ad08c44 4597 bool drv_prepared = false;
e971aa12
JC
4598
4599 assert(reopen_state != NULL);
4600 assert(reopen_state->bs->drv != NULL);
4601 drv = reopen_state->bs->drv;
4602
4c8350fe
AG
4603 /* This function and each driver's bdrv_reopen_prepare() remove
4604 * entries from reopen_state->options as they are processed, so
4605 * we need to make a copy of the original QDict. */
4606 orig_reopen_opts = qdict_clone_shallow(reopen_state->options);
4607
ccf9dc07
KW
4608 /* Process generic block layer options */
4609 opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
af175e85 4610 if (!qemu_opts_absorb_qdict(opts, reopen_state->options, errp)) {
ccf9dc07
KW
4611 ret = -EINVAL;
4612 goto error;
4613 }
4614
e6d79c41
AG
4615 /* This was already called in bdrv_reopen_queue_child() so the flags
4616 * are up-to-date. This time we simply want to remove the options from
4617 * QemuOpts in order to indicate that they have been processed. */
4618 old_flags = reopen_state->flags;
91a097e7 4619 update_flags_from_options(&reopen_state->flags, opts);
e6d79c41 4620 assert(old_flags == reopen_state->flags);
91a097e7 4621
415bbca8 4622 discard = qemu_opt_get_del(opts, BDRV_OPT_DISCARD);
593b3071
AG
4623 if (discard != NULL) {
4624 if (bdrv_parse_discard_flags(discard, &reopen_state->flags) != 0) {
4625 error_setg(errp, "Invalid discard option");
4626 ret = -EINVAL;
4627 goto error;
4628 }
4629 }
4630
543770bd
AG
4631 reopen_state->detect_zeroes =
4632 bdrv_parse_detect_zeroes(opts, reopen_state->flags, &local_err);
4633 if (local_err) {
4634 error_propagate(errp, local_err);
4635 ret = -EINVAL;
4636 goto error;
4637 }
4638
57f9db9a
AG
4639 /* All other options (including node-name and driver) must be unchanged.
4640 * Put them back into the QDict, so that they are checked at the end
4641 * of this function. */
4642 qemu_opts_to_qdict(opts, reopen_state->options);
ccf9dc07 4643
3d8ce171
JC
4644 /* If we are to stay read-only, do not allow permission change
4645 * to r/w. Attempting to set to r/w may fail if either BDRV_O_ALLOW_RDWR is
4646 * not set, or if the BDS still has copy_on_read enabled */
4647 read_only = !(reopen_state->flags & BDRV_O_RDWR);
54a32bfe 4648 ret = bdrv_can_set_read_only(reopen_state->bs, read_only, true, &local_err);
3d8ce171
JC
4649 if (local_err) {
4650 error_propagate(errp, local_err);
e971aa12
JC
4651 goto error;
4652 }
4653
e971aa12 4654 if (drv->bdrv_reopen_prepare) {
faf116b4
AG
4655 /*
4656 * If a driver-specific option is missing, it means that we
4657 * should reset it to its default value.
4658 * But not all options allow that, so we need to check it first.
4659 */
4660 ret = bdrv_reset_options_allowed(reopen_state->bs,
4661 reopen_state->options, errp);
4662 if (ret) {
4663 goto error;
4664 }
4665
e971aa12
JC
4666 ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
4667 if (ret) {
4668 if (local_err != NULL) {
4669 error_propagate(errp, local_err);
4670 } else {
f30c66ba 4671 bdrv_refresh_filename(reopen_state->bs);
d8b6895f
LC
4672 error_setg(errp, "failed while preparing to reopen image '%s'",
4673 reopen_state->bs->filename);
e971aa12
JC
4674 }
4675 goto error;
4676 }
4677 } else {
4678 /* It is currently mandatory to have a bdrv_reopen_prepare()
4679 * handler for each supported drv. */
81e5f78a
AG
4680 error_setg(errp, "Block format '%s' used by node '%s' "
4681 "does not support reopening files", drv->format_name,
4682 bdrv_get_device_or_node_name(reopen_state->bs));
e971aa12
JC
4683 ret = -1;
4684 goto error;
4685 }
4686
9ad08c44
HR
4687 drv_prepared = true;
4688
bacd9b87
AG
4689 /*
4690 * We must provide the 'backing' option if the BDS has a backing
4691 * file or if the image file has a backing file name as part of
4692 * its metadata. Otherwise the 'backing' option can be omitted.
4693 */
4694 if (drv->supports_backing && reopen_state->backing_missing &&
1d42f48c 4695 (reopen_state->bs->backing || reopen_state->bs->backing_file[0])) {
8546632e
AG
4696 error_setg(errp, "backing is missing for '%s'",
4697 reopen_state->bs->node_name);
4698 ret = -EINVAL;
4699 goto error;
4700 }
4701
cb828c31
AG
4702 /*
4703 * Allow changing the 'backing' option. The new value can be
4704 * either a reference to an existing node (using its node name)
4705 * or NULL to simply detach the current backing file.
4706 */
ecd30d2d
AG
4707 ret = bdrv_reopen_parse_file_or_backing(reopen_state, true,
4708 change_child_tran, errp);
cb828c31
AG
4709 if (ret < 0) {
4710 goto error;
4711 }
4712 qdict_del(reopen_state->options, "backing");
4713
ecd30d2d
AG
4714 /* Allow changing the 'file' option. In this case NULL is not allowed */
4715 ret = bdrv_reopen_parse_file_or_backing(reopen_state, false,
4716 change_child_tran, errp);
4717 if (ret < 0) {
4718 goto error;
4719 }
4720 qdict_del(reopen_state->options, "file");
4721
4d2cb092
KW
4722 /* Options that are not handled are only okay if they are unchanged
4723 * compared to the old state. It is expected that some options are only
4724 * used for the initial open, but not reopen (e.g. filename) */
4725 if (qdict_size(reopen_state->options)) {
4726 const QDictEntry *entry = qdict_first(reopen_state->options);
4727
4728 do {
54fd1b0d
HR
4729 QObject *new = entry->value;
4730 QObject *old = qdict_get(reopen_state->bs->options, entry->key);
4731
db905283
AG
4732 /* Allow child references (child_name=node_name) as long as they
4733 * point to the current child (i.e. everything stays the same). */
4734 if (qobject_type(new) == QTYPE_QSTRING) {
4735 BdrvChild *child;
4736 QLIST_FOREACH(child, &reopen_state->bs->children, next) {
4737 if (!strcmp(child->name, entry->key)) {
4738 break;
4739 }
4740 }
4741
4742 if (child) {
410f44f5
MA
4743 if (!strcmp(child->bs->node_name,
4744 qstring_get_str(qobject_to(QString, new)))) {
db905283
AG
4745 continue; /* Found child with this name, skip option */
4746 }
4747 }
4748 }
4749
129c7d1c 4750 /*
54fd1b0d
HR
4751 * TODO: When using -drive to specify blockdev options, all values
4752 * will be strings; however, when using -blockdev, blockdev-add or
4753 * filenames using the json:{} pseudo-protocol, they will be
4754 * correctly typed.
4755 * In contrast, reopening options are (currently) always strings
4756 * (because you can only specify them through qemu-io; all other
4757 * callers do not specify any options).
4758 * Therefore, when using anything other than -drive to create a BDS,
4759 * this cannot detect non-string options as unchanged, because
4760 * qobject_is_equal() always returns false for objects of different
4761 * type. In the future, this should be remedied by correctly typing
4762 * all options. For now, this is not too big of an issue because
4763 * the user can simply omit options which cannot be changed anyway,
4764 * so they will stay unchanged.
129c7d1c 4765 */
54fd1b0d 4766 if (!qobject_is_equal(new, old)) {
4d2cb092
KW
4767 error_setg(errp, "Cannot change the option '%s'", entry->key);
4768 ret = -EINVAL;
4769 goto error;
4770 }
4771 } while ((entry = qdict_next(reopen_state->options, entry)));
4772 }
4773
e971aa12
JC
4774 ret = 0;
4775
4c8350fe
AG
4776 /* Restore the original reopen_state->options QDict */
4777 qobject_unref(reopen_state->options);
4778 reopen_state->options = qobject_ref(orig_reopen_opts);
4779
e971aa12 4780error:
9ad08c44
HR
4781 if (ret < 0 && drv_prepared) {
4782 /* drv->bdrv_reopen_prepare() has succeeded, so we need to
4783 * call drv->bdrv_reopen_abort() before signaling an error
4784 * (bdrv_reopen_multiple() will not call bdrv_reopen_abort()
4785 * when the respective bdrv_reopen_prepare() has failed) */
4786 if (drv->bdrv_reopen_abort) {
4787 drv->bdrv_reopen_abort(reopen_state);
4788 }
4789 }
ccf9dc07 4790 qemu_opts_del(opts);
4c8350fe 4791 qobject_unref(orig_reopen_opts);
593b3071 4792 g_free(discard);
e971aa12
JC
4793 return ret;
4794}
4795
4796/*
4797 * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
4798 * makes them final by swapping the staging BlockDriverState contents into
4799 * the active BlockDriverState contents.
4800 */
53e96d1e 4801static void bdrv_reopen_commit(BDRVReopenState *reopen_state)
e971aa12
JC
4802{
4803 BlockDriver *drv;
50bf65ba 4804 BlockDriverState *bs;
50196d7a 4805 BdrvChild *child;
e971aa12
JC
4806
4807 assert(reopen_state != NULL);
50bf65ba
VSO
4808 bs = reopen_state->bs;
4809 drv = bs->drv;
e971aa12
JC
4810 assert(drv != NULL);
4811
4812 /* If there are any driver level actions to take */
4813 if (drv->bdrv_reopen_commit) {
4814 drv->bdrv_reopen_commit(reopen_state);
4815 }
4816
4817 /* set BDS specific flags now */
cb3e7f08 4818 qobject_unref(bs->explicit_options);
4c8350fe 4819 qobject_unref(bs->options);
ab5b5228
AG
4820 qobject_ref(reopen_state->explicit_options);
4821 qobject_ref(reopen_state->options);
145f598e 4822
50bf65ba 4823 bs->explicit_options = reopen_state->explicit_options;
4c8350fe 4824 bs->options = reopen_state->options;
50bf65ba 4825 bs->open_flags = reopen_state->flags;
543770bd 4826 bs->detect_zeroes = reopen_state->detect_zeroes;
355ef4ac 4827
50196d7a
AG
4828 /* Remove child references from bs->options and bs->explicit_options.
4829 * Child options were already removed in bdrv_reopen_queue_child() */
4830 QLIST_FOREACH(child, &bs->children, next) {
4831 qdict_del(bs->explicit_options, child->name);
4832 qdict_del(bs->options, child->name);
4833 }
3d0e8743
VSO
4834 /* backing is probably removed, so it's not handled by previous loop */
4835 qdict_del(bs->explicit_options, "backing");
4836 qdict_del(bs->options, "backing");
4837
1e4c797c 4838 bdrv_refresh_limits(bs, NULL, NULL);
e971aa12
JC
4839}
4840
4841/*
4842 * Abort the reopen, and delete and free the staged changes in
4843 * reopen_state
4844 */
53e96d1e 4845static void bdrv_reopen_abort(BDRVReopenState *reopen_state)
e971aa12
JC
4846{
4847 BlockDriver *drv;
4848
4849 assert(reopen_state != NULL);
4850 drv = reopen_state->bs->drv;
4851 assert(drv != NULL);
4852
4853 if (drv->bdrv_reopen_abort) {
4854 drv->bdrv_reopen_abort(reopen_state);
4855 }
4856}
4857
4858
64dff520 4859static void bdrv_close(BlockDriverState *bs)
fc01f7e7 4860{
33384421 4861 BdrvAioNotifier *ban, *ban_next;
50a3efb0 4862 BdrvChild *child, *next;
33384421 4863
f791bf7f 4864 GLOBAL_STATE_CODE();
30f55fb8 4865 assert(!bs->refcnt);
99b7e775 4866
fc27291d 4867 bdrv_drained_begin(bs); /* complete I/O */
58fda173 4868 bdrv_flush(bs);
53ec73e2 4869 bdrv_drain(bs); /* in case flush left pending I/O */
fc27291d 4870
3cbc002c 4871 if (bs->drv) {
3c005293 4872 if (bs->drv->bdrv_close) {
7b99a266 4873 /* Must unfreeze all children, so bdrv_unref_child() works */
3c005293
VSO
4874 bs->drv->bdrv_close(bs);
4875 }
9a4f4c31 4876 bs->drv = NULL;
50a3efb0 4877 }
9a7dedbc 4878
50a3efb0 4879 QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
dd4118c7 4880 bdrv_unref_child(bs, child);
b338082b 4881 }
98f90dba 4882
dd4118c7
AG
4883 bs->backing = NULL;
4884 bs->file = NULL;
50a3efb0
AG
4885 g_free(bs->opaque);
4886 bs->opaque = NULL;
d73415a3 4887 qatomic_set(&bs->copy_on_read, 0);
50a3efb0
AG
4888 bs->backing_file[0] = '\0';
4889 bs->backing_format[0] = '\0';
4890 bs->total_sectors = 0;
4891 bs->encrypted = false;
4892 bs->sg = false;
cb3e7f08
MAL
4893 qobject_unref(bs->options);
4894 qobject_unref(bs->explicit_options);
50a3efb0
AG
4895 bs->options = NULL;
4896 bs->explicit_options = NULL;
cb3e7f08 4897 qobject_unref(bs->full_open_options);
50a3efb0 4898 bs->full_open_options = NULL;
0bc329fb
HR
4899 g_free(bs->block_status_cache);
4900 bs->block_status_cache = NULL;
50a3efb0 4901
cca43ae1
VSO
4902 bdrv_release_named_dirty_bitmaps(bs);
4903 assert(QLIST_EMPTY(&bs->dirty_bitmaps));
4904
33384421
HR
4905 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
4906 g_free(ban);
4907 }
4908 QLIST_INIT(&bs->aio_notifiers);
fc27291d 4909 bdrv_drained_end(bs);
1a6d3bd2
GK
4910
4911 /*
4912 * If we're still inside some bdrv_drain_all_begin()/end() sections, end
4913 * them now since this BDS won't exist anymore when bdrv_drain_all_end()
4914 * gets called.
4915 */
4916 if (bs->quiesce_counter) {
4917 bdrv_drain_all_end_quiesce(bs);
4918 }
b338082b
FB
4919}
4920
2bc93fed
MK
4921void bdrv_close_all(void)
4922{
b3b5299d 4923 assert(job_next(NULL) == NULL);
f791bf7f 4924 GLOBAL_STATE_CODE();
ca9bd24c
HR
4925
4926 /* Drop references from requests still in flight, such as canceled block
4927 * jobs whose AIO context has not been polled yet */
4928 bdrv_drain_all();
2bc93fed 4929
ca9bd24c
HR
4930 blk_remove_all_bs();
4931 blockdev_close_all_bdrv_states();
ed78cda3 4932
a1a2af07 4933 assert(QTAILQ_EMPTY(&all_bdrv_states));
2bc93fed
MK
4934}
4935
d0ac0380
KW
4936static bool should_update_child(BdrvChild *c, BlockDriverState *to)
4937{
2f30b7c3
VSO
4938 GQueue *queue;
4939 GHashTable *found;
4940 bool ret;
d0ac0380 4941
bd86fb99 4942 if (c->klass->stay_at_node) {
d0ac0380
KW
4943 return false;
4944 }
4945
ec9f10fe
HR
4946 /* If the child @c belongs to the BDS @to, replacing the current
4947 * c->bs by @to would mean to create a loop.
4948 *
4949 * Such a case occurs when appending a BDS to a backing chain.
4950 * For instance, imagine the following chain:
4951 *
4952 * guest device -> node A -> further backing chain...
4953 *
4954 * Now we create a new BDS B which we want to put on top of this
4955 * chain, so we first attach A as its backing node:
4956 *
4957 * node B
4958 * |
4959 * v
4960 * guest device -> node A -> further backing chain...
4961 *
4962 * Finally we want to replace A by B. When doing that, we want to
4963 * replace all pointers to A by pointers to B -- except for the
4964 * pointer from B because (1) that would create a loop, and (2)
4965 * that pointer should simply stay intact:
4966 *
4967 * guest device -> node B
4968 * |
4969 * v
4970 * node A -> further backing chain...
4971 *
4972 * In general, when replacing a node A (c->bs) by a node B (@to),
4973 * if A is a child of B, that means we cannot replace A by B there
4974 * because that would create a loop. Silently detaching A from B
4975 * is also not really an option. So overall just leaving A in
2f30b7c3
VSO
4976 * place there is the most sensible choice.
4977 *
4978 * We would also create a loop in any cases where @c is only
4979 * indirectly referenced by @to. Prevent this by returning false
4980 * if @c is found (by breadth-first search) anywhere in the whole
4981 * subtree of @to.
4982 */
4983
4984 ret = true;
4985 found = g_hash_table_new(NULL, NULL);
4986 g_hash_table_add(found, to);
4987 queue = g_queue_new();
4988 g_queue_push_tail(queue, to);
4989
4990 while (!g_queue_is_empty(queue)) {
4991 BlockDriverState *v = g_queue_pop_head(queue);
4992 BdrvChild *c2;
4993
4994 QLIST_FOREACH(c2, &v->children, next) {
4995 if (c2 == c) {
4996 ret = false;
4997 break;
4998 }
4999
5000 if (g_hash_table_contains(found, c2->bs)) {
5001 continue;
5002 }
5003
5004 g_queue_push_tail(queue, c2->bs);
5005 g_hash_table_add(found, c2->bs);
d0ac0380
KW
5006 }
5007 }
5008
2f30b7c3
VSO
5009 g_queue_free(queue);
5010 g_hash_table_destroy(found);
5011
5012 return ret;
d0ac0380
KW
5013}
5014
46541ee5
VSO
5015typedef struct BdrvRemoveFilterOrCowChild {
5016 BdrvChild *child;
82b54cf5 5017 BlockDriverState *bs;
46541ee5
VSO
5018 bool is_backing;
5019} BdrvRemoveFilterOrCowChild;
5020
5021static void bdrv_remove_filter_or_cow_child_abort(void *opaque)
5022{
5023 BdrvRemoveFilterOrCowChild *s = opaque;
5024 BlockDriverState *parent_bs = s->child->opaque;
5025
46541ee5
VSO
5026 if (s->is_backing) {
5027 parent_bs->backing = s->child;
5028 } else {
5029 parent_bs->file = s->child;
5030 }
5031
5032 /*
4bf021db 5033 * We don't have to restore child->bs here to undo bdrv_replace_child_tran()
46541ee5
VSO
5034 * because that function is transactionable and it registered own completion
5035 * entries in @tran, so .abort() for bdrv_replace_child_safe() will be
5036 * called automatically.
5037 */
5038}
5039
5040static void bdrv_remove_filter_or_cow_child_commit(void *opaque)
5041{
5042 BdrvRemoveFilterOrCowChild *s = opaque;
5043
5044 bdrv_child_free(s->child);
5045}
5046
82b54cf5
HR
5047static void bdrv_remove_filter_or_cow_child_clean(void *opaque)
5048{
5049 BdrvRemoveFilterOrCowChild *s = opaque;
5050
5051 /* Drop the bs reference after the transaction is done */
5052 bdrv_unref(s->bs);
5053 g_free(s);
5054}
5055
46541ee5
VSO
5056static TransactionActionDrv bdrv_remove_filter_or_cow_child_drv = {
5057 .abort = bdrv_remove_filter_or_cow_child_abort,
5058 .commit = bdrv_remove_filter_or_cow_child_commit,
82b54cf5 5059 .clean = bdrv_remove_filter_or_cow_child_clean,
46541ee5
VSO
5060};
5061
5062/*
5b995019 5063 * A function to remove backing or file child of @bs.
7ec390d5 5064 * Function doesn't update permissions, caller is responsible for this.
46541ee5 5065 */
5b995019
VSO
5066static void bdrv_remove_file_or_backing_child(BlockDriverState *bs,
5067 BdrvChild *child,
5068 Transaction *tran)
46541ee5 5069{
562bda8b 5070 BdrvChild **childp;
46541ee5 5071 BdrvRemoveFilterOrCowChild *s;
5b995019 5072
46541ee5
VSO
5073 if (!child) {
5074 return;
5075 }
5076
82b54cf5
HR
5077 /*
5078 * Keep a reference to @bs so @childp will stay valid throughout the
5079 * transaction (required by bdrv_replace_child_tran())
5080 */
5081 bdrv_ref(bs);
562bda8b
HR
5082 if (child == bs->backing) {
5083 childp = &bs->backing;
5084 } else if (child == bs->file) {
5085 childp = &bs->file;
5086 } else {
5087 g_assert_not_reached();
5088 }
5089
46541ee5 5090 if (child->bs) {
b0a9f6fe
HR
5091 /*
5092 * Pass free_empty_child=false, we will free the child in
5093 * bdrv_remove_filter_or_cow_child_commit()
5094 */
5095 bdrv_replace_child_tran(childp, NULL, tran, false);
46541ee5
VSO
5096 }
5097
5098 s = g_new(BdrvRemoveFilterOrCowChild, 1);
5099 *s = (BdrvRemoveFilterOrCowChild) {
5100 .child = child,
82b54cf5 5101 .bs = bs,
562bda8b 5102 .is_backing = (childp == &bs->backing),
46541ee5
VSO
5103 };
5104 tran_add(tran, &bdrv_remove_filter_or_cow_child_drv, s);
46541ee5
VSO
5105}
5106
5b995019
VSO
5107/*
5108 * A function to remove backing-chain child of @bs if exists: cow child for
5109 * format nodes (always .backing) and filter child for filters (may be .file or
5110 * .backing)
5111 */
5112static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs,
5113 Transaction *tran)
5114{
5115 bdrv_remove_file_or_backing_child(bs, bdrv_filter_or_cow_child(bs), tran);
5116}
5117
117caba9
VSO
5118static int bdrv_replace_node_noperm(BlockDriverState *from,
5119 BlockDriverState *to,
5120 bool auto_skip, Transaction *tran,
5121 Error **errp)
5122{
5123 BdrvChild *c, *next;
5124
82b54cf5
HR
5125 assert(to != NULL);
5126
117caba9
VSO
5127 QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
5128 assert(c->bs == from);
5129 if (!should_update_child(c, to)) {
5130 if (auto_skip) {
5131 continue;
5132 }
5133 error_setg(errp, "Should not change '%s' link to '%s'",
5134 c->name, from->node_name);
5135 return -EINVAL;
5136 }
5137 if (c->frozen) {
5138 error_setg(errp, "Cannot change '%s' link to '%s'",
5139 c->name, from->node_name);
5140 return -EPERM;
5141 }
82b54cf5
HR
5142
5143 /*
5144 * Passing a pointer to the local variable @c is fine here, because
5145 * @to is not NULL, and so &c will not be attached to the transaction.
5146 */
b0a9f6fe 5147 bdrv_replace_child_tran(&c, to, tran, true);
117caba9
VSO
5148 }
5149
5150 return 0;
5151}
5152
313274bb
VSO
5153/*
5154 * With auto_skip=true bdrv_replace_node_common skips updating from parents
5155 * if it creates a parent-child relation loop or if parent is block-job.
5156 *
5157 * With auto_skip=false the error is returned if from has a parent which should
5158 * not be updated.
3108a15c
VSO
5159 *
5160 * With @detach_subchain=true @to must be in a backing chain of @from. In this
5161 * case backing link of the cow-parent of @to is removed.
82b54cf5
HR
5162 *
5163 * @to must not be NULL.
313274bb 5164 */
a1e708fc
VSO
5165static int bdrv_replace_node_common(BlockDriverState *from,
5166 BlockDriverState *to,
3108a15c
VSO
5167 bool auto_skip, bool detach_subchain,
5168 Error **errp)
dd62f1ca 5169{
3bb0e298
VSO
5170 Transaction *tran = tran_new();
5171 g_autoptr(GHashTable) found = NULL;
5172 g_autoptr(GSList) refresh_list = NULL;
2d369d6e 5173 BlockDriverState *to_cow_parent = NULL;
234ac1a9
KW
5174 int ret;
5175
82b54cf5
HR
5176 assert(to != NULL);
5177
3108a15c
VSO
5178 if (detach_subchain) {
5179 assert(bdrv_chain_contains(from, to));
5180 assert(from != to);
5181 for (to_cow_parent = from;
5182 bdrv_filter_or_cow_bs(to_cow_parent) != to;
5183 to_cow_parent = bdrv_filter_or_cow_bs(to_cow_parent))
5184 {
5185 ;
5186 }
5187 }
5188
234ac1a9
KW
5189 /* Make sure that @from doesn't go away until we have successfully attached
5190 * all of its parents to @to. */
5191 bdrv_ref(from);
dd62f1ca 5192
f871abd6 5193 assert(qemu_get_current_aio_context() == qemu_get_aio_context());
30dd65f3 5194 assert(bdrv_get_aio_context(from) == bdrv_get_aio_context(to));
f871abd6
KW
5195 bdrv_drained_begin(from);
5196
3bb0e298
VSO
5197 /*
5198 * Do the replacement without permission update.
5199 * Replacement may influence the permissions, we should calculate new
5200 * permissions based on new graph. If we fail, we'll roll-back the
5201 * replacement.
5202 */
117caba9
VSO
5203 ret = bdrv_replace_node_noperm(from, to, auto_skip, tran, errp);
5204 if (ret < 0) {
5205 goto out;
234ac1a9
KW
5206 }
5207
3108a15c
VSO
5208 if (detach_subchain) {
5209 bdrv_remove_filter_or_cow_child(to_cow_parent, tran);
5210 }
5211
3bb0e298 5212 found = g_hash_table_new(NULL, NULL);
234ac1a9 5213
3bb0e298
VSO
5214 refresh_list = bdrv_topological_dfs(refresh_list, found, to);
5215 refresh_list = bdrv_topological_dfs(refresh_list, found, from);
9bd910e2 5216
3bb0e298
VSO
5217 ret = bdrv_list_refresh_perms(refresh_list, NULL, tran, errp);
5218 if (ret < 0) {
5219 goto out;
dd62f1ca 5220 }
234ac1a9 5221
a1e708fc
VSO
5222 ret = 0;
5223
234ac1a9 5224out:
3bb0e298
VSO
5225 tran_finalize(tran, ret);
5226
f871abd6 5227 bdrv_drained_end(from);
234ac1a9 5228 bdrv_unref(from);
a1e708fc
VSO
5229
5230 return ret;
dd62f1ca
KW
5231}
5232
82b54cf5
HR
5233/**
5234 * Replace node @from by @to (where neither may be NULL).
5235 */
a1e708fc
VSO
5236int bdrv_replace_node(BlockDriverState *from, BlockDriverState *to,
5237 Error **errp)
313274bb 5238{
f791bf7f
EGE
5239 GLOBAL_STATE_CODE();
5240
3108a15c
VSO
5241 return bdrv_replace_node_common(from, to, true, false, errp);
5242}
5243
5244int bdrv_drop_filter(BlockDriverState *bs, Error **errp)
5245{
f791bf7f
EGE
5246 GLOBAL_STATE_CODE();
5247
3108a15c
VSO
5248 return bdrv_replace_node_common(bs, bdrv_filter_or_cow_bs(bs), true, true,
5249 errp);
313274bb
VSO
5250}
5251
4ddc07ca
PB
5252/*
5253 * Add new bs contents at the top of an image chain while the chain is
5254 * live, while keeping required fields on the top layer.
5255 *
5256 * This will modify the BlockDriverState fields, and swap contents
5257 * between bs_new and bs_top. Both bs_new and bs_top are modified.
5258 *
2272edcf
VSO
5259 * bs_new must not be attached to a BlockBackend and must not have backing
5260 * child.
4ddc07ca
PB
5261 *
5262 * This function does not create any image files.
5263 */
a1e708fc
VSO
5264int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
5265 Error **errp)
4ddc07ca 5266{
2272edcf
VSO
5267 int ret;
5268 Transaction *tran = tran_new();
5269
f791bf7f
EGE
5270 GLOBAL_STATE_CODE();
5271
2272edcf
VSO
5272 assert(!bs_new->backing);
5273
5274 ret = bdrv_attach_child_noperm(bs_new, bs_top, "backing",
5275 &child_of_bds, bdrv_backing_role(bs_new),
5276 &bs_new->backing, tran, errp);
a1e708fc 5277 if (ret < 0) {
2272edcf 5278 goto out;
b2c2832c 5279 }
dd62f1ca 5280
2272edcf 5281 ret = bdrv_replace_node_noperm(bs_top, bs_new, true, tran, errp);
a1e708fc 5282 if (ret < 0) {
2272edcf 5283 goto out;
234ac1a9 5284 }
4ddc07ca 5285
2272edcf
VSO
5286 ret = bdrv_refresh_perms(bs_new, errp);
5287out:
5288 tran_finalize(tran, ret);
5289
1e4c797c 5290 bdrv_refresh_limits(bs_top, NULL, NULL);
2272edcf
VSO
5291
5292 return ret;
8802d1fd
JC
5293}
5294
bd8f4c42
VSO
5295/* Not for empty child */
5296int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs,
5297 Error **errp)
5298{
5299 int ret;
5300 Transaction *tran = tran_new();
5301 g_autoptr(GHashTable) found = NULL;
5302 g_autoptr(GSList) refresh_list = NULL;
5303 BlockDriverState *old_bs = child->bs;
5304
f791bf7f
EGE
5305 GLOBAL_STATE_CODE();
5306
bd8f4c42
VSO
5307 bdrv_ref(old_bs);
5308 bdrv_drained_begin(old_bs);
5309 bdrv_drained_begin(new_bs);
5310
b0a9f6fe
HR
5311 bdrv_replace_child_tran(&child, new_bs, tran, true);
5312 /* @new_bs must have been non-NULL, so @child must not have been freed */
5313 assert(child != NULL);
bd8f4c42
VSO
5314
5315 found = g_hash_table_new(NULL, NULL);
5316 refresh_list = bdrv_topological_dfs(refresh_list, found, old_bs);
5317 refresh_list = bdrv_topological_dfs(refresh_list, found, new_bs);
5318
5319 ret = bdrv_list_refresh_perms(refresh_list, NULL, tran, errp);
5320
5321 tran_finalize(tran, ret);
5322
5323 bdrv_drained_end(old_bs);
5324 bdrv_drained_end(new_bs);
5325 bdrv_unref(old_bs);
5326
5327 return ret;
5328}
5329
4f6fd349 5330static void bdrv_delete(BlockDriverState *bs)
b338082b 5331{
3718d8ab 5332 assert(bdrv_op_blocker_is_empty(bs));
4f6fd349 5333 assert(!bs->refcnt);
f791bf7f 5334 GLOBAL_STATE_CODE();
18846dee 5335
1b7bdbc1 5336 /* remove from list, if necessary */
63eaaae0
KW
5337 if (bs->node_name[0] != '\0') {
5338 QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
5339 }
2c1d04e0
HR
5340 QTAILQ_REMOVE(&all_bdrv_states, bs, bs_list);
5341
30c321f9
AK
5342 bdrv_close(bs);
5343
7267c094 5344 g_free(bs);
fc01f7e7
FB
5345}
5346
96796fae
VSO
5347
5348/*
5349 * Replace @bs by newly created block node.
5350 *
5351 * @options is a QDict of options to pass to the block drivers, or NULL for an
5352 * empty set of options. The reference to the QDict belongs to the block layer
5353 * after the call (even on failure), so if the caller intends to reuse the
5354 * dictionary, it needs to use qobject_ref() before calling bdrv_open.
5355 */
5356BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *options,
8872ef78
AS
5357 int flags, Error **errp)
5358{
f053b7e8
VSO
5359 ERRP_GUARD();
5360 int ret;
b11c8739
VSO
5361 BlockDriverState *new_node_bs = NULL;
5362 const char *drvname, *node_name;
5363 BlockDriver *drv;
5364
5365 drvname = qdict_get_try_str(options, "driver");
5366 if (!drvname) {
5367 error_setg(errp, "driver is not specified");
5368 goto fail;
5369 }
5370
5371 drv = bdrv_find_format(drvname);
5372 if (!drv) {
5373 error_setg(errp, "Unknown driver: '%s'", drvname);
5374 goto fail;
5375 }
8872ef78 5376
b11c8739
VSO
5377 node_name = qdict_get_try_str(options, "node-name");
5378
f791bf7f
EGE
5379 GLOBAL_STATE_CODE();
5380
b11c8739
VSO
5381 new_node_bs = bdrv_new_open_driver_opts(drv, node_name, options, flags,
5382 errp);
5383 options = NULL; /* bdrv_new_open_driver() eats options */
5384 if (!new_node_bs) {
8872ef78 5385 error_prepend(errp, "Could not create node: ");
b11c8739 5386 goto fail;
8872ef78
AS
5387 }
5388
5389 bdrv_drained_begin(bs);
f053b7e8 5390 ret = bdrv_replace_node(bs, new_node_bs, errp);
8872ef78
AS
5391 bdrv_drained_end(bs);
5392
f053b7e8
VSO
5393 if (ret < 0) {
5394 error_prepend(errp, "Could not replace node: ");
b11c8739 5395 goto fail;
8872ef78
AS
5396 }
5397
5398 return new_node_bs;
b11c8739
VSO
5399
5400fail:
5401 qobject_unref(options);
5402 bdrv_unref(new_node_bs);
5403 return NULL;
8872ef78
AS
5404}
5405
e97fc193
AL
5406/*
5407 * Run consistency checks on an image
5408 *
e076f338 5409 * Returns 0 if the check could be completed (it doesn't mean that the image is
a1c7273b 5410 * free of errors) or -errno when an internal error occurred. The results of the
e076f338 5411 * check are stored in res.
e97fc193 5412 */
21c2283e
VSO
5413int coroutine_fn bdrv_co_check(BlockDriverState *bs,
5414 BdrvCheckResult *res, BdrvCheckMode fix)
e97fc193 5415{
908bcd54
HR
5416 if (bs->drv == NULL) {
5417 return -ENOMEDIUM;
5418 }
2fd61638 5419 if (bs->drv->bdrv_co_check == NULL) {
e97fc193
AL
5420 return -ENOTSUP;
5421 }
5422
e076f338 5423 memset(res, 0, sizeof(*res));
2fd61638
PB
5424 return bs->drv->bdrv_co_check(bs, res, fix);
5425}
5426
756e6736
KW
5427/*
5428 * Return values:
5429 * 0 - success
5430 * -EINVAL - backing format specified, but no file
5431 * -ENOSPC - can't update the backing file because no space is left in the
5432 * image file header
5433 * -ENOTSUP - format driver doesn't support changing the backing file
5434 */
e54ee1b3 5435int bdrv_change_backing_file(BlockDriverState *bs, const char *backing_file,
497a30db 5436 const char *backing_fmt, bool require)
756e6736
KW
5437{
5438 BlockDriver *drv = bs->drv;
469ef350 5439 int ret;
756e6736 5440
f791bf7f
EGE
5441 GLOBAL_STATE_CODE();
5442
d470ad42
HR
5443 if (!drv) {
5444 return -ENOMEDIUM;
5445 }
5446
5f377794
PB
5447 /* Backing file format doesn't make sense without a backing file */
5448 if (backing_fmt && !backing_file) {
5449 return -EINVAL;
5450 }
5451
497a30db
EB
5452 if (require && backing_file && !backing_fmt) {
5453 return -EINVAL;
e54ee1b3
EB
5454 }
5455
756e6736 5456 if (drv->bdrv_change_backing_file != NULL) {
469ef350 5457 ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
756e6736 5458 } else {
469ef350 5459 ret = -ENOTSUP;
756e6736 5460 }
469ef350
PB
5461
5462 if (ret == 0) {
5463 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
5464 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
998c2019
HR
5465 pstrcpy(bs->auto_backing_file, sizeof(bs->auto_backing_file),
5466 backing_file ?: "");
469ef350
PB
5467 }
5468 return ret;
756e6736
KW
5469}
5470
6ebdcee2 5471/*
dcf3f9b2
HR
5472 * Finds the first non-filter node above bs in the chain between
5473 * active and bs. The returned node is either an immediate parent of
5474 * bs, or there are only filter nodes between the two.
6ebdcee2
JC
5475 *
5476 * Returns NULL if bs is not found in active's image chain,
5477 * or if active == bs.
4caf0fcd
JC
5478 *
5479 * Returns the bottommost base image if bs == NULL.
6ebdcee2
JC
5480 */
5481BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
5482 BlockDriverState *bs)
5483{
f791bf7f
EGE
5484
5485 GLOBAL_STATE_CODE();
5486
dcf3f9b2
HR
5487 bs = bdrv_skip_filters(bs);
5488 active = bdrv_skip_filters(active);
5489
5490 while (active) {
5491 BlockDriverState *next = bdrv_backing_chain_next(active);
5492 if (bs == next) {
5493 return active;
5494 }
5495 active = next;
6ebdcee2
JC
5496 }
5497
dcf3f9b2 5498 return NULL;
4caf0fcd 5499}
6ebdcee2 5500
4caf0fcd
JC
5501/* Given a BDS, searches for the base layer. */
5502BlockDriverState *bdrv_find_base(BlockDriverState *bs)
5503{
f791bf7f
EGE
5504 GLOBAL_STATE_CODE();
5505
4caf0fcd 5506 return bdrv_find_overlay(bs, NULL);
6ebdcee2
JC
5507}
5508
2cad1ebe 5509/*
7b99a266
HR
5510 * Return true if at least one of the COW (backing) and filter links
5511 * between @bs and @base is frozen. @errp is set if that's the case.
0f0998f6 5512 * @base must be reachable from @bs, or NULL.
2cad1ebe
AG
5513 */
5514bool bdrv_is_backing_chain_frozen(BlockDriverState *bs, BlockDriverState *base,
5515 Error **errp)
5516{
5517 BlockDriverState *i;
7b99a266 5518 BdrvChild *child;
2cad1ebe 5519
f791bf7f
EGE
5520 GLOBAL_STATE_CODE();
5521
7b99a266
HR
5522 for (i = bs; i != base; i = child_bs(child)) {
5523 child = bdrv_filter_or_cow_child(i);
5524
5525 if (child && child->frozen) {
2cad1ebe 5526 error_setg(errp, "Cannot change '%s' link from '%s' to '%s'",
7b99a266 5527 child->name, i->node_name, child->bs->node_name);
2cad1ebe
AG
5528 return true;
5529 }
5530 }
5531
5532 return false;
5533}
5534
5535/*
7b99a266 5536 * Freeze all COW (backing) and filter links between @bs and @base.
2cad1ebe
AG
5537 * If any of the links is already frozen the operation is aborted and
5538 * none of the links are modified.
0f0998f6 5539 * @base must be reachable from @bs, or NULL.
2cad1ebe
AG
5540 * Returns 0 on success. On failure returns < 0 and sets @errp.
5541 */
5542int bdrv_freeze_backing_chain(BlockDriverState *bs, BlockDriverState *base,
5543 Error **errp)
5544{
5545 BlockDriverState *i;
7b99a266 5546 BdrvChild *child;
2cad1ebe 5547
f791bf7f
EGE
5548 GLOBAL_STATE_CODE();
5549
2cad1ebe
AG
5550 if (bdrv_is_backing_chain_frozen(bs, base, errp)) {
5551 return -EPERM;
5552 }
5553
7b99a266
HR
5554 for (i = bs; i != base; i = child_bs(child)) {
5555 child = bdrv_filter_or_cow_child(i);
5556 if (child && child->bs->never_freeze) {
e5182c1c 5557 error_setg(errp, "Cannot freeze '%s' link to '%s'",
7b99a266 5558 child->name, child->bs->node_name);
e5182c1c
HR
5559 return -EPERM;
5560 }
5561 }
5562
7b99a266
HR
5563 for (i = bs; i != base; i = child_bs(child)) {
5564 child = bdrv_filter_or_cow_child(i);
5565 if (child) {
5566 child->frozen = true;
0f0998f6 5567 }
2cad1ebe
AG
5568 }
5569
5570 return 0;
5571}
5572
5573/*
7b99a266
HR
5574 * Unfreeze all COW (backing) and filter links between @bs and @base.
5575 * The caller must ensure that all links are frozen before using this
5576 * function.
0f0998f6 5577 * @base must be reachable from @bs, or NULL.
2cad1ebe
AG
5578 */
5579void bdrv_unfreeze_backing_chain(BlockDriverState *bs, BlockDriverState *base)
5580{
5581 BlockDriverState *i;
7b99a266 5582 BdrvChild *child;
2cad1ebe 5583
f791bf7f
EGE
5584 GLOBAL_STATE_CODE();
5585
7b99a266
HR
5586 for (i = bs; i != base; i = child_bs(child)) {
5587 child = bdrv_filter_or_cow_child(i);
5588 if (child) {
5589 assert(child->frozen);
5590 child->frozen = false;
0f0998f6 5591 }
2cad1ebe
AG
5592 }
5593}
5594
6ebdcee2
JC
5595/*
5596 * Drops images above 'base' up to and including 'top', and sets the image
5597 * above 'top' to have base as its backing file.
5598 *
5599 * Requires that the overlay to 'top' is opened r/w, so that the backing file
5600 * information in 'bs' can be properly updated.
5601 *
5602 * E.g., this will convert the following chain:
5603 * bottom <- base <- intermediate <- top <- active
5604 *
5605 * to
5606 *
5607 * bottom <- base <- active
5608 *
5609 * It is allowed for bottom==base, in which case it converts:
5610 *
5611 * base <- intermediate <- top <- active
5612 *
5613 * to
5614 *
5615 * base <- active
5616 *
54e26900
JC
5617 * If backing_file_str is non-NULL, it will be used when modifying top's
5618 * overlay image metadata.
5619 *
6ebdcee2
JC
5620 * Error conditions:
5621 * if active == top, that is considered an error
5622 *
5623 */
bde70715
KW
5624int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base,
5625 const char *backing_file_str)
6ebdcee2 5626{
6bd858b3
AG
5627 BlockDriverState *explicit_top = top;
5628 bool update_inherits_from;
d669ed6a 5629 BdrvChild *c;
12fa4af6 5630 Error *local_err = NULL;
6ebdcee2 5631 int ret = -EIO;
d669ed6a
VSO
5632 g_autoptr(GSList) updated_children = NULL;
5633 GSList *p;
6ebdcee2 5634
f791bf7f
EGE
5635 GLOBAL_STATE_CODE();
5636
6858eba0 5637 bdrv_ref(top);
637d54a5 5638 bdrv_subtree_drained_begin(top);
6858eba0 5639
6ebdcee2
JC
5640 if (!top->drv || !base->drv) {
5641 goto exit;
5642 }
5643
5db15a57
KW
5644 /* Make sure that base is in the backing chain of top */
5645 if (!bdrv_chain_contains(top, base)) {
6ebdcee2
JC
5646 goto exit;
5647 }
5648
6bd858b3
AG
5649 /* If 'base' recursively inherits from 'top' then we should set
5650 * base->inherits_from to top->inherits_from after 'top' and all
5651 * other intermediate nodes have been dropped.
5652 * If 'top' is an implicit node (e.g. "commit_top") we should skip
5653 * it because no one inherits from it. We use explicit_top for that. */
dcf3f9b2 5654 explicit_top = bdrv_skip_implicit_filters(explicit_top);
6bd858b3
AG
5655 update_inherits_from = bdrv_inherits_from_recursive(base, explicit_top);
5656
6ebdcee2 5657 /* success - we can delete the intermediate states, and link top->base */
f30c66ba
HR
5658 if (!backing_file_str) {
5659 bdrv_refresh_filename(base);
5660 backing_file_str = base->filename;
5661 }
61f09cea 5662
d669ed6a
VSO
5663 QLIST_FOREACH(c, &top->parents, next_parent) {
5664 updated_children = g_slist_prepend(updated_children, c);
5665 }
5666
3108a15c
VSO
5667 /*
5668 * It seems correct to pass detach_subchain=true here, but it triggers
5669 * one more yet not fixed bug, when due to nested aio_poll loop we switch to
5670 * another drained section, which modify the graph (for example, removing
5671 * the child, which we keep in updated_children list). So, it's a TODO.
5672 *
5673 * Note, bug triggered if pass detach_subchain=true here and run
5674 * test-bdrv-drain. test_drop_intermediate_poll() test-case will crash.
5675 * That's a FIXME.
5676 */
5677 bdrv_replace_node_common(top, base, false, false, &local_err);
d669ed6a
VSO
5678 if (local_err) {
5679 error_report_err(local_err);
5680 goto exit;
5681 }
5682
5683 for (p = updated_children; p; p = p->next) {
5684 c = p->data;
12fa4af6 5685
bd86fb99
HR
5686 if (c->klass->update_filename) {
5687 ret = c->klass->update_filename(c, base, backing_file_str,
5688 &local_err);
61f09cea 5689 if (ret < 0) {
d669ed6a
VSO
5690 /*
5691 * TODO: Actually, we want to rollback all previous iterations
5692 * of this loop, and (which is almost impossible) previous
5693 * bdrv_replace_node()...
5694 *
5695 * Note, that c->klass->update_filename may lead to permission
5696 * update, so it's a bad idea to call it inside permission
5697 * update transaction of bdrv_replace_node.
5698 */
61f09cea
KW
5699 error_report_err(local_err);
5700 goto exit;
5701 }
5702 }
12fa4af6 5703 }
6ebdcee2 5704
6bd858b3
AG
5705 if (update_inherits_from) {
5706 base->inherits_from = explicit_top->inherits_from;
5707 }
5708
6ebdcee2 5709 ret = 0;
6ebdcee2 5710exit:
637d54a5 5711 bdrv_subtree_drained_end(top);
6858eba0 5712 bdrv_unref(top);
6ebdcee2
JC
5713 return ret;
5714}
5715
081e4650
HR
5716/**
5717 * Implementation of BlockDriver.bdrv_get_allocated_file_size() that
5718 * sums the size of all data-bearing children. (This excludes backing
5719 * children.)
5720 */
5721static int64_t bdrv_sum_allocated_file_size(BlockDriverState *bs)
5722{
5723 BdrvChild *child;
5724 int64_t child_size, sum = 0;
5725
5726 QLIST_FOREACH(child, &bs->children, next) {
5727 if (child->role & (BDRV_CHILD_DATA | BDRV_CHILD_METADATA |
5728 BDRV_CHILD_FILTERED))
5729 {
5730 child_size = bdrv_get_allocated_file_size(child->bs);
5731 if (child_size < 0) {
5732 return child_size;
5733 }
5734 sum += child_size;
5735 }
5736 }
5737
5738 return sum;
5739}
5740
61007b31
SH
5741/**
5742 * Length of a allocated file in bytes. Sparse files are counted by actual
5743 * allocated space. Return < 0 if error or unknown.
5744 */
5745int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
71d0770c 5746{
61007b31 5747 BlockDriver *drv = bs->drv;
384a48fb
EGE
5748 IO_CODE();
5749
61007b31
SH
5750 if (!drv) {
5751 return -ENOMEDIUM;
8f4754ed 5752 }
61007b31
SH
5753 if (drv->bdrv_get_allocated_file_size) {
5754 return drv->bdrv_get_allocated_file_size(bs);
5755 }
081e4650
HR
5756
5757 if (drv->bdrv_file_open) {
5758 /*
5759 * Protocol drivers default to -ENOTSUP (most of their data is
5760 * not stored in any of their children (if they even have any),
5761 * so there is no generic way to figure it out).
5762 */
5763 return -ENOTSUP;
5764 } else if (drv->is_filter) {
5765 /* Filter drivers default to the size of their filtered child */
5766 return bdrv_get_allocated_file_size(bdrv_filter_bs(bs));
5767 } else {
5768 /* Other drivers default to summing their children's sizes */
5769 return bdrv_sum_allocated_file_size(bs);
1c9805a3
SH
5770 }
5771}
e7a8a783 5772
90880ff1
SH
5773/*
5774 * bdrv_measure:
5775 * @drv: Format driver
5776 * @opts: Creation options for new image
5777 * @in_bs: Existing image containing data for new image (may be NULL)
5778 * @errp: Error object
5779 * Returns: A #BlockMeasureInfo (free using qapi_free_BlockMeasureInfo())
5780 * or NULL on error
5781 *
5782 * Calculate file size required to create a new image.
5783 *
5784 * If @in_bs is given then space for allocated clusters and zero clusters
5785 * from that image are included in the calculation. If @opts contains a
5786 * backing file that is shared by @in_bs then backing clusters may be omitted
5787 * from the calculation.
5788 *
5789 * If @in_bs is NULL then the calculation includes no allocated clusters
5790 * unless a preallocation option is given in @opts.
5791 *
5792 * Note that @in_bs may use a different BlockDriver from @drv.
5793 *
5794 * If an error occurs the @errp pointer is set.
5795 */
5796BlockMeasureInfo *bdrv_measure(BlockDriver *drv, QemuOpts *opts,
5797 BlockDriverState *in_bs, Error **errp)
5798{
384a48fb 5799 IO_CODE();
90880ff1
SH
5800 if (!drv->bdrv_measure) {
5801 error_setg(errp, "Block driver '%s' does not support size measurement",
5802 drv->format_name);
5803 return NULL;
5804 }
5805
5806 return drv->bdrv_measure(opts, in_bs, errp);
5807}
5808
61007b31
SH
5809/**
5810 * Return number of sectors on success, -errno on error.
1c9805a3 5811 */
61007b31 5812int64_t bdrv_nb_sectors(BlockDriverState *bs)
1c9805a3 5813{
61007b31 5814 BlockDriver *drv = bs->drv;
384a48fb 5815 IO_CODE();
498e386c 5816
61007b31
SH
5817 if (!drv)
5818 return -ENOMEDIUM;
2572b37a 5819
61007b31
SH
5820 if (drv->has_variable_length) {
5821 int ret = refresh_total_sectors(bs, bs->total_sectors);
5822 if (ret < 0) {
5823 return ret;
1c9805a3
SH
5824 }
5825 }
61007b31 5826 return bs->total_sectors;
1c9805a3 5827}
b338082b 5828
61007b31
SH
5829/**
5830 * Return length in bytes on success, -errno on error.
5831 * The length is always a multiple of BDRV_SECTOR_SIZE.
8d3b1a2d 5832 */
61007b31 5833int64_t bdrv_getlength(BlockDriverState *bs)
8d3b1a2d 5834{
61007b31 5835 int64_t ret = bdrv_nb_sectors(bs);
384a48fb 5836 IO_CODE();
8d3b1a2d 5837
122860ba
EB
5838 if (ret < 0) {
5839 return ret;
5840 }
5841 if (ret > INT64_MAX / BDRV_SECTOR_SIZE) {
5842 return -EFBIG;
5843 }
5844 return ret * BDRV_SECTOR_SIZE;
fc01f7e7
FB
5845}
5846
61007b31
SH
5847/* return 0 as number of sectors if no device present or error */
5848void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
07d27a44 5849{
61007b31 5850 int64_t nb_sectors = bdrv_nb_sectors(bs);
384a48fb 5851 IO_CODE();
07d27a44 5852
61007b31 5853 *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
07d27a44
MA
5854}
5855
54115412 5856bool bdrv_is_sg(BlockDriverState *bs)
f08145fe 5857{
384a48fb 5858 IO_CODE();
61007b31 5859 return bs->sg;
f08145fe
KW
5860}
5861
ae23f786
HR
5862/**
5863 * Return whether the given node supports compressed writes.
5864 */
5865bool bdrv_supports_compressed_writes(BlockDriverState *bs)
5866{
5867 BlockDriverState *filtered;
384a48fb 5868 IO_CODE();
ae23f786
HR
5869
5870 if (!bs->drv || !block_driver_can_compress(bs->drv)) {
5871 return false;
5872 }
5873
5874 filtered = bdrv_filter_bs(bs);
5875 if (filtered) {
5876 /*
5877 * Filters can only forward compressed writes, so we have to
5878 * check the child.
5879 */
5880 return bdrv_supports_compressed_writes(filtered);
5881 }
5882
5883 return true;
5884}
5885
61007b31 5886const char *bdrv_get_format_name(BlockDriverState *bs)
40b4f539 5887{
384a48fb 5888 IO_CODE();
61007b31 5889 return bs->drv ? bs->drv->format_name : NULL;
40b4f539
KW
5890}
5891
61007b31 5892static int qsort_strcmp(const void *a, const void *b)
40b4f539 5893{
ceff5bd7 5894 return strcmp(*(char *const *)a, *(char *const *)b);
40b4f539
KW
5895}
5896
61007b31 5897void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
9ac404c5 5898 void *opaque, bool read_only)
40b4f539 5899{
61007b31
SH
5900 BlockDriver *drv;
5901 int count = 0;
5902 int i;
5903 const char **formats = NULL;
40b4f539 5904
f791bf7f
EGE
5905 GLOBAL_STATE_CODE();
5906
61007b31
SH
5907 QLIST_FOREACH(drv, &bdrv_drivers, list) {
5908 if (drv->format_name) {
5909 bool found = false;
5910 int i = count;
9ac404c5
AS
5911
5912 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, read_only)) {
5913 continue;
5914 }
5915
61007b31
SH
5916 while (formats && i && !found) {
5917 found = !strcmp(formats[--i], drv->format_name);
5918 }
e2a305fb 5919
61007b31
SH
5920 if (!found) {
5921 formats = g_renew(const char *, formats, count + 1);
5922 formats[count++] = drv->format_name;
5923 }
6c5a42ac 5924 }
61007b31 5925 }
6c5a42ac 5926
eb0df69f
HR
5927 for (i = 0; i < (int)ARRAY_SIZE(block_driver_modules); i++) {
5928 const char *format_name = block_driver_modules[i].format_name;
5929
5930 if (format_name) {
5931 bool found = false;
5932 int j = count;
5933
9ac404c5
AS
5934 if (use_bdrv_whitelist &&
5935 !bdrv_format_is_whitelisted(format_name, read_only)) {
5936 continue;
5937 }
5938
eb0df69f
HR
5939 while (formats && j && !found) {
5940 found = !strcmp(formats[--j], format_name);
5941 }
5942
5943 if (!found) {
5944 formats = g_renew(const char *, formats, count + 1);
5945 formats[count++] = format_name;
5946 }
5947 }
5948 }
5949
61007b31 5950 qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
40b4f539 5951
61007b31
SH
5952 for (i = 0; i < count; i++) {
5953 it(opaque, formats[i]);
5954 }
40b4f539 5955
61007b31
SH
5956 g_free(formats);
5957}
40b4f539 5958
61007b31
SH
5959/* This function is to find a node in the bs graph */
5960BlockDriverState *bdrv_find_node(const char *node_name)
5961{
5962 BlockDriverState *bs;
391827eb 5963
61007b31 5964 assert(node_name);
f791bf7f 5965 GLOBAL_STATE_CODE();
40b4f539 5966
61007b31
SH
5967 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
5968 if (!strcmp(node_name, bs->node_name)) {
5969 return bs;
40b4f539
KW
5970 }
5971 }
61007b31 5972 return NULL;
40b4f539
KW
5973}
5974
61007b31 5975/* Put this QMP function here so it can access the static graph_bdrv_states. */
facda544
PK
5976BlockDeviceInfoList *bdrv_named_nodes_list(bool flat,
5977 Error **errp)
40b4f539 5978{
9812e712 5979 BlockDeviceInfoList *list;
61007b31 5980 BlockDriverState *bs;
40b4f539 5981
f791bf7f
EGE
5982 GLOBAL_STATE_CODE();
5983
61007b31
SH
5984 list = NULL;
5985 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
facda544 5986 BlockDeviceInfo *info = bdrv_block_device_info(NULL, bs, flat, errp);
61007b31
SH
5987 if (!info) {
5988 qapi_free_BlockDeviceInfoList(list);
5989 return NULL;
301db7c2 5990 }
9812e712 5991 QAPI_LIST_PREPEND(list, info);
301db7c2
RH
5992 }
5993
61007b31
SH
5994 return list;
5995}
40b4f539 5996
5d3b4e99
VSO
5997typedef struct XDbgBlockGraphConstructor {
5998 XDbgBlockGraph *graph;
5999 GHashTable *graph_nodes;
6000} XDbgBlockGraphConstructor;
6001
6002static XDbgBlockGraphConstructor *xdbg_graph_new(void)
6003{
6004 XDbgBlockGraphConstructor *gr = g_new(XDbgBlockGraphConstructor, 1);
6005
6006 gr->graph = g_new0(XDbgBlockGraph, 1);
6007 gr->graph_nodes = g_hash_table_new(NULL, NULL);
6008
6009 return gr;
6010}
6011
6012static XDbgBlockGraph *xdbg_graph_finalize(XDbgBlockGraphConstructor *gr)
6013{
6014 XDbgBlockGraph *graph = gr->graph;
6015
6016 g_hash_table_destroy(gr->graph_nodes);
6017 g_free(gr);
6018
6019 return graph;
6020}
6021
6022static uintptr_t xdbg_graph_node_num(XDbgBlockGraphConstructor *gr, void *node)
6023{
6024 uintptr_t ret = (uintptr_t)g_hash_table_lookup(gr->graph_nodes, node);
6025
6026 if (ret != 0) {
6027 return ret;
6028 }
6029
6030 /*
6031 * Start counting from 1, not 0, because 0 interferes with not-found (NULL)
6032 * answer of g_hash_table_lookup.
6033 */
6034 ret = g_hash_table_size(gr->graph_nodes) + 1;
6035 g_hash_table_insert(gr->graph_nodes, node, (void *)ret);
6036
6037 return ret;
6038}
6039
6040static void xdbg_graph_add_node(XDbgBlockGraphConstructor *gr, void *node,
6041 XDbgBlockGraphNodeType type, const char *name)
6042{
6043 XDbgBlockGraphNode *n;
6044
6045 n = g_new0(XDbgBlockGraphNode, 1);
6046
6047 n->id = xdbg_graph_node_num(gr, node);
6048 n->type = type;
6049 n->name = g_strdup(name);
6050
9812e712 6051 QAPI_LIST_PREPEND(gr->graph->nodes, n);
5d3b4e99
VSO
6052}
6053
6054static void xdbg_graph_add_edge(XDbgBlockGraphConstructor *gr, void *parent,
6055 const BdrvChild *child)
6056{
cdb1cec8 6057 BlockPermission qapi_perm;
5d3b4e99 6058 XDbgBlockGraphEdge *edge;
862fded9 6059 GLOBAL_STATE_CODE();
5d3b4e99 6060
5d3b4e99
VSO
6061 edge = g_new0(XDbgBlockGraphEdge, 1);
6062
6063 edge->parent = xdbg_graph_node_num(gr, parent);
6064 edge->child = xdbg_graph_node_num(gr, child->bs);
6065 edge->name = g_strdup(child->name);
6066
cdb1cec8
HR
6067 for (qapi_perm = 0; qapi_perm < BLOCK_PERMISSION__MAX; qapi_perm++) {
6068 uint64_t flag = bdrv_qapi_perm_to_blk_perm(qapi_perm);
6069
6070 if (flag & child->perm) {
9812e712 6071 QAPI_LIST_PREPEND(edge->perm, qapi_perm);
5d3b4e99 6072 }
cdb1cec8 6073 if (flag & child->shared_perm) {
9812e712 6074 QAPI_LIST_PREPEND(edge->shared_perm, qapi_perm);
5d3b4e99
VSO
6075 }
6076 }
6077
9812e712 6078 QAPI_LIST_PREPEND(gr->graph->edges, edge);
5d3b4e99
VSO
6079}
6080
6081
6082XDbgBlockGraph *bdrv_get_xdbg_block_graph(Error **errp)
6083{
6084 BlockBackend *blk;
6085 BlockJob *job;
6086 BlockDriverState *bs;
6087 BdrvChild *child;
6088 XDbgBlockGraphConstructor *gr = xdbg_graph_new();
6089
f791bf7f
EGE
6090 GLOBAL_STATE_CODE();
6091
5d3b4e99
VSO
6092 for (blk = blk_all_next(NULL); blk; blk = blk_all_next(blk)) {
6093 char *allocated_name = NULL;
6094 const char *name = blk_name(blk);
6095
6096 if (!*name) {
6097 name = allocated_name = blk_get_attached_dev_id(blk);
6098 }
6099 xdbg_graph_add_node(gr, blk, X_DBG_BLOCK_GRAPH_NODE_TYPE_BLOCK_BACKEND,
6100 name);
6101 g_free(allocated_name);
6102 if (blk_root(blk)) {
6103 xdbg_graph_add_edge(gr, blk, blk_root(blk));
6104 }
6105 }
6106
6107 for (job = block_job_next(NULL); job; job = block_job_next(job)) {
6108 GSList *el;
6109
6110 xdbg_graph_add_node(gr, job, X_DBG_BLOCK_GRAPH_NODE_TYPE_BLOCK_JOB,
6111 job->job.id);
6112 for (el = job->nodes; el; el = el->next) {
6113 xdbg_graph_add_edge(gr, job, (BdrvChild *)el->data);
6114 }
6115 }
6116
6117 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
6118 xdbg_graph_add_node(gr, bs, X_DBG_BLOCK_GRAPH_NODE_TYPE_BLOCK_DRIVER,
6119 bs->node_name);
6120 QLIST_FOREACH(child, &bs->children, next) {
6121 xdbg_graph_add_edge(gr, bs, child);
6122 }
6123 }
6124
6125 return xdbg_graph_finalize(gr);
6126}
6127
61007b31
SH
6128BlockDriverState *bdrv_lookup_bs(const char *device,
6129 const char *node_name,
6130 Error **errp)
6131{
6132 BlockBackend *blk;
6133 BlockDriverState *bs;
40b4f539 6134
f791bf7f
EGE
6135 GLOBAL_STATE_CODE();
6136
61007b31
SH
6137 if (device) {
6138 blk = blk_by_name(device);
40b4f539 6139
61007b31 6140 if (blk) {
9f4ed6fb
AG
6141 bs = blk_bs(blk);
6142 if (!bs) {
5433c24f 6143 error_setg(errp, "Device '%s' has no medium", device);
5433c24f
HR
6144 }
6145
9f4ed6fb 6146 return bs;
61007b31
SH
6147 }
6148 }
40b4f539 6149
61007b31
SH
6150 if (node_name) {
6151 bs = bdrv_find_node(node_name);
6d519a5f 6152
61007b31
SH
6153 if (bs) {
6154 return bs;
6155 }
40b4f539
KW
6156 }
6157
785ec4b1 6158 error_setg(errp, "Cannot find device=\'%s\' nor node-name=\'%s\'",
61007b31
SH
6159 device ? device : "",
6160 node_name ? node_name : "");
6161 return NULL;
40b4f539
KW
6162}
6163
61007b31
SH
6164/* If 'base' is in the same chain as 'top', return true. Otherwise,
6165 * return false. If either argument is NULL, return false. */
6166bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
83f64091 6167{
f791bf7f
EGE
6168
6169 GLOBAL_STATE_CODE();
6170
61007b31 6171 while (top && top != base) {
dcf3f9b2 6172 top = bdrv_filter_or_cow_bs(top);
02c50efe 6173 }
61007b31
SH
6174
6175 return top != NULL;
02c50efe
FZ
6176}
6177
61007b31 6178BlockDriverState *bdrv_next_node(BlockDriverState *bs)
02c50efe 6179{
f791bf7f 6180 GLOBAL_STATE_CODE();
61007b31
SH
6181 if (!bs) {
6182 return QTAILQ_FIRST(&graph_bdrv_states);
02c50efe 6183 }
61007b31 6184 return QTAILQ_NEXT(bs, node_list);
83f64091
FB
6185}
6186
0f12264e
KW
6187BlockDriverState *bdrv_next_all_states(BlockDriverState *bs)
6188{
f791bf7f 6189 GLOBAL_STATE_CODE();
0f12264e
KW
6190 if (!bs) {
6191 return QTAILQ_FIRST(&all_bdrv_states);
6192 }
6193 return QTAILQ_NEXT(bs, bs_list);
6194}
6195
61007b31 6196const char *bdrv_get_node_name(const BlockDriverState *bs)
83f64091 6197{
384a48fb 6198 IO_CODE();
61007b31 6199 return bs->node_name;
beac80cd
FB
6200}
6201
1f0c461b 6202const char *bdrv_get_parent_name(const BlockDriverState *bs)
4c265bf9
KW
6203{
6204 BdrvChild *c;
6205 const char *name;
967d7905 6206 IO_CODE();
4c265bf9
KW
6207
6208 /* If multiple parents have a name, just pick the first one. */
6209 QLIST_FOREACH(c, &bs->parents, next_parent) {
bd86fb99
HR
6210 if (c->klass->get_name) {
6211 name = c->klass->get_name(c);
4c265bf9
KW
6212 if (name && *name) {
6213 return name;
6214 }
6215 }
6216 }
6217
6218 return NULL;
6219}
6220
61007b31
SH
6221/* TODO check what callers really want: bs->node_name or blk_name() */
6222const char *bdrv_get_device_name(const BlockDriverState *bs)
beac80cd 6223{
384a48fb 6224 IO_CODE();
4c265bf9 6225 return bdrv_get_parent_name(bs) ?: "";
f141eafe 6226}
83f64091 6227
61007b31
SH
6228/* This can be used to identify nodes that might not have a device
6229 * name associated. Since node and device names live in the same
6230 * namespace, the result is unambiguous. The exception is if both are
6231 * absent, then this returns an empty (non-null) string. */
6232const char *bdrv_get_device_or_node_name(const BlockDriverState *bs)
f141eafe 6233{
384a48fb 6234 IO_CODE();
4c265bf9 6235 return bdrv_get_parent_name(bs) ?: bs->node_name;
beac80cd 6236}
beac80cd 6237
61007b31 6238int bdrv_get_flags(BlockDriverState *bs)
0b5a2445 6239{
f791bf7f 6240 GLOBAL_STATE_CODE();
61007b31 6241 return bs->open_flags;
0b5a2445
PB
6242}
6243
61007b31 6244int bdrv_has_zero_init_1(BlockDriverState *bs)
68485420 6245{
f791bf7f 6246 GLOBAL_STATE_CODE();
61007b31 6247 return 1;
0b5a2445
PB
6248}
6249
61007b31 6250int bdrv_has_zero_init(BlockDriverState *bs)
0b5a2445 6251{
93393e69 6252 BlockDriverState *filtered;
f791bf7f 6253 GLOBAL_STATE_CODE();
93393e69 6254
d470ad42
HR
6255 if (!bs->drv) {
6256 return 0;
6257 }
0b5a2445 6258
61007b31
SH
6259 /* If BS is a copy on write image, it is initialized to
6260 the contents of the base image, which may not be zeroes. */
34778172 6261 if (bdrv_cow_child(bs)) {
61007b31
SH
6262 return 0;
6263 }
6264 if (bs->drv->bdrv_has_zero_init) {
6265 return bs->drv->bdrv_has_zero_init(bs);
0b5a2445 6266 }
93393e69
HR
6267
6268 filtered = bdrv_filter_bs(bs);
6269 if (filtered) {
6270 return bdrv_has_zero_init(filtered);
5a612c00 6271 }
61007b31
SH
6272
6273 /* safe default */
6274 return 0;
68485420
KW
6275}
6276
61007b31 6277bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
68485420 6278{
384a48fb 6279 IO_CODE();
2f0342ef 6280 if (!(bs->open_flags & BDRV_O_UNMAP)) {
61007b31
SH
6281 return false;
6282 }
68485420 6283
e24d813b 6284 return bs->supported_zero_flags & BDRV_REQ_MAY_UNMAP;
68485420
KW
6285}
6286
61007b31
SH
6287void bdrv_get_backing_filename(BlockDriverState *bs,
6288 char *filename, int filename_size)
016f5cf6 6289{
384a48fb 6290 IO_CODE();
61007b31
SH
6291 pstrcpy(filename, filename_size, bs->backing_file);
6292}
d318aea9 6293
61007b31
SH
6294int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
6295{
8b117001 6296 int ret;
61007b31 6297 BlockDriver *drv = bs->drv;
384a48fb 6298 IO_CODE();
5a612c00
MP
6299 /* if bs->drv == NULL, bs is closed, so there's nothing to do here */
6300 if (!drv) {
61007b31 6301 return -ENOMEDIUM;
5a612c00
MP
6302 }
6303 if (!drv->bdrv_get_info) {
93393e69
HR
6304 BlockDriverState *filtered = bdrv_filter_bs(bs);
6305 if (filtered) {
6306 return bdrv_get_info(filtered, bdi);
5a612c00 6307 }
61007b31 6308 return -ENOTSUP;
5a612c00 6309 }
61007b31 6310 memset(bdi, 0, sizeof(*bdi));
8b117001
VSO
6311 ret = drv->bdrv_get_info(bs, bdi);
6312 if (ret < 0) {
6313 return ret;
6314 }
6315
6316 if (bdi->cluster_size > BDRV_MAX_ALIGNMENT) {
6317 return -EINVAL;
6318 }
6319
6320 return 0;
61007b31 6321}
016f5cf6 6322
1bf6e9ca
AS
6323ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs,
6324 Error **errp)
61007b31
SH
6325{
6326 BlockDriver *drv = bs->drv;
384a48fb 6327 IO_CODE();
61007b31 6328 if (drv && drv->bdrv_get_specific_info) {
1bf6e9ca 6329 return drv->bdrv_get_specific_info(bs, errp);
61007b31
SH
6330 }
6331 return NULL;
016f5cf6
AG
6332}
6333
d9245599
AN
6334BlockStatsSpecific *bdrv_get_specific_stats(BlockDriverState *bs)
6335{
6336 BlockDriver *drv = bs->drv;
384a48fb 6337 IO_CODE();
d9245599
AN
6338 if (!drv || !drv->bdrv_get_specific_stats) {
6339 return NULL;
6340 }
6341 return drv->bdrv_get_specific_stats(bs);
6342}
6343
a31939e6 6344void bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent event)
4265d620 6345{
384a48fb 6346 IO_CODE();
61007b31
SH
6347 if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
6348 return;
6349 }
4265d620 6350
61007b31 6351 bs->drv->bdrv_debug_event(bs, event);
4265d620
PB
6352}
6353
d10529a2 6354static BlockDriverState *bdrv_find_debug_node(BlockDriverState *bs)
4265d620 6355{
61007b31 6356 while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
f706a92f 6357 bs = bdrv_primary_bs(bs);
61007b31 6358 }
4265d620 6359
61007b31 6360 if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
d10529a2
VSO
6361 assert(bs->drv->bdrv_debug_remove_breakpoint);
6362 return bs;
6363 }
6364
6365 return NULL;
6366}
6367
6368int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
6369 const char *tag)
6370{
f791bf7f 6371 GLOBAL_STATE_CODE();
d10529a2
VSO
6372 bs = bdrv_find_debug_node(bs);
6373 if (bs) {
61007b31
SH
6374 return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
6375 }
4265d620 6376
61007b31 6377 return -ENOTSUP;
4265d620
PB
6378}
6379
61007b31 6380int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
ea2384d3 6381{
f791bf7f 6382 GLOBAL_STATE_CODE();
d10529a2
VSO
6383 bs = bdrv_find_debug_node(bs);
6384 if (bs) {
61007b31
SH
6385 return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
6386 }
6387
6388 return -ENOTSUP;
eb852011
MA
6389}
6390
61007b31 6391int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
ce1a14dc 6392{
f791bf7f 6393 GLOBAL_STATE_CODE();
61007b31 6394 while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
f706a92f 6395 bs = bdrv_primary_bs(bs);
61007b31 6396 }
ce1a14dc 6397
61007b31
SH
6398 if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
6399 return bs->drv->bdrv_debug_resume(bs, tag);
6400 }
ce1a14dc 6401
61007b31 6402 return -ENOTSUP;
f197fe2b
FZ
6403}
6404
61007b31 6405bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
ce1a14dc 6406{
f791bf7f 6407 GLOBAL_STATE_CODE();
61007b31 6408 while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
f706a92f 6409 bs = bdrv_primary_bs(bs);
f197fe2b 6410 }
19cb3738 6411
61007b31
SH
6412 if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
6413 return bs->drv->bdrv_debug_is_suspended(bs, tag);
6414 }
f9f05dc5 6415
61007b31
SH
6416 return false;
6417}
f9f05dc5 6418
61007b31
SH
6419/* backing_file can either be relative, or absolute, or a protocol. If it is
6420 * relative, it must be relative to the chain. So, passing in bs->filename
6421 * from a BDS as backing_file should not be done, as that may be relative to
6422 * the CWD rather than the chain. */
6423BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
6424 const char *backing_file)
f9f05dc5 6425{
61007b31
SH
6426 char *filename_full = NULL;
6427 char *backing_file_full = NULL;
6428 char *filename_tmp = NULL;
6429 int is_protocol = 0;
0b877d09 6430 bool filenames_refreshed = false;
61007b31
SH
6431 BlockDriverState *curr_bs = NULL;
6432 BlockDriverState *retval = NULL;
dcf3f9b2 6433 BlockDriverState *bs_below;
f9f05dc5 6434
f791bf7f
EGE
6435 GLOBAL_STATE_CODE();
6436
61007b31
SH
6437 if (!bs || !bs->drv || !backing_file) {
6438 return NULL;
f9f05dc5
KW
6439 }
6440
61007b31
SH
6441 filename_full = g_malloc(PATH_MAX);
6442 backing_file_full = g_malloc(PATH_MAX);
f9f05dc5 6443
61007b31 6444 is_protocol = path_has_protocol(backing_file);
f9f05dc5 6445
dcf3f9b2
HR
6446 /*
6447 * Being largely a legacy function, skip any filters here
6448 * (because filters do not have normal filenames, so they cannot
6449 * match anyway; and allowing json:{} filenames is a bit out of
6450 * scope).
6451 */
6452 for (curr_bs = bdrv_skip_filters(bs);
6453 bdrv_cow_child(curr_bs) != NULL;
6454 curr_bs = bs_below)
6455 {
6456 bs_below = bdrv_backing_chain_next(curr_bs);
f9f05dc5 6457
0b877d09
HR
6458 if (bdrv_backing_overridden(curr_bs)) {
6459 /*
6460 * If the backing file was overridden, we can only compare
6461 * directly against the backing node's filename.
6462 */
6463
6464 if (!filenames_refreshed) {
6465 /*
6466 * This will automatically refresh all of the
6467 * filenames in the rest of the backing chain, so we
6468 * only need to do this once.
6469 */
6470 bdrv_refresh_filename(bs_below);
6471 filenames_refreshed = true;
6472 }
6473
6474 if (strcmp(backing_file, bs_below->filename) == 0) {
6475 retval = bs_below;
6476 break;
6477 }
6478 } else if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
6479 /*
6480 * If either of the filename paths is actually a protocol, then
6481 * compare unmodified paths; otherwise make paths relative.
6482 */
6b6833c1
HR
6483 char *backing_file_full_ret;
6484
61007b31 6485 if (strcmp(backing_file, curr_bs->backing_file) == 0) {
dcf3f9b2 6486 retval = bs_below;
61007b31
SH
6487 break;
6488 }
418661e0 6489 /* Also check against the full backing filename for the image */
6b6833c1
HR
6490 backing_file_full_ret = bdrv_get_full_backing_filename(curr_bs,
6491 NULL);
6492 if (backing_file_full_ret) {
6493 bool equal = strcmp(backing_file, backing_file_full_ret) == 0;
6494 g_free(backing_file_full_ret);
6495 if (equal) {
dcf3f9b2 6496 retval = bs_below;
418661e0
JC
6497 break;
6498 }
418661e0 6499 }
61007b31
SH
6500 } else {
6501 /* If not an absolute filename path, make it relative to the current
6502 * image's filename path */
2d9158ce
HR
6503 filename_tmp = bdrv_make_absolute_filename(curr_bs, backing_file,
6504 NULL);
6505 /* We are going to compare canonicalized absolute pathnames */
6506 if (!filename_tmp || !realpath(filename_tmp, filename_full)) {
6507 g_free(filename_tmp);
61007b31
SH
6508 continue;
6509 }
2d9158ce 6510 g_free(filename_tmp);
07f07615 6511
61007b31
SH
6512 /* We need to make sure the backing filename we are comparing against
6513 * is relative to the current image filename (or absolute) */
2d9158ce
HR
6514 filename_tmp = bdrv_get_full_backing_filename(curr_bs, NULL);
6515 if (!filename_tmp || !realpath(filename_tmp, backing_file_full)) {
6516 g_free(filename_tmp);
61007b31
SH
6517 continue;
6518 }
2d9158ce 6519 g_free(filename_tmp);
eb489bb1 6520
61007b31 6521 if (strcmp(backing_file_full, filename_full) == 0) {
dcf3f9b2 6522 retval = bs_below;
61007b31
SH
6523 break;
6524 }
6525 }
eb489bb1
KW
6526 }
6527
61007b31
SH
6528 g_free(filename_full);
6529 g_free(backing_file_full);
61007b31
SH
6530 return retval;
6531}
6532
61007b31
SH
6533void bdrv_init(void)
6534{
e5f05f8c
KW
6535#ifdef CONFIG_BDRV_WHITELIST_TOOLS
6536 use_bdrv_whitelist = 1;
6537#endif
61007b31
SH
6538 module_call_init(MODULE_INIT_BLOCK);
6539}
29cdb251 6540
61007b31
SH
6541void bdrv_init_with_whitelist(void)
6542{
6543 use_bdrv_whitelist = 1;
6544 bdrv_init();
07f07615
PB
6545}
6546
a94750d9 6547int bdrv_activate(BlockDriverState *bs, Error **errp)
0f15423c 6548{
4417ab7a 6549 BdrvChild *child, *parent;
5a8a30db
KW
6550 Error *local_err = NULL;
6551 int ret;
9c98f145 6552 BdrvDirtyBitmap *bm;
5a8a30db 6553
f791bf7f
EGE
6554 GLOBAL_STATE_CODE();
6555
3456a8d1 6556 if (!bs->drv) {
5416645f 6557 return -ENOMEDIUM;
3456a8d1
KW
6558 }
6559
16e977d5 6560 QLIST_FOREACH(child, &bs->children, next) {
11d0c9b3 6561 bdrv_activate(child->bs, &local_err);
0d1c5c91 6562 if (local_err) {
0d1c5c91 6563 error_propagate(errp, local_err);
5416645f 6564 return -EINVAL;
0d1c5c91 6565 }
5a8a30db 6566 }
0d1c5c91 6567
dafe0960
KW
6568 /*
6569 * Update permissions, they may differ for inactive nodes.
6570 *
6571 * Note that the required permissions of inactive images are always a
6572 * subset of the permissions required after activating the image. This
6573 * allows us to just get the permissions upfront without restricting
11d0c9b3 6574 * bdrv_co_invalidate_cache().
dafe0960
KW
6575 *
6576 * It also means that in error cases, we don't have to try and revert to
6577 * the old permissions (which is an operation that could fail, too). We can
6578 * just keep the extended permissions for the next time that an activation
6579 * of the image is tried.
6580 */
7bb4941a
KW
6581 if (bs->open_flags & BDRV_O_INACTIVE) {
6582 bs->open_flags &= ~BDRV_O_INACTIVE;
071b474f 6583 ret = bdrv_refresh_perms(bs, errp);
7bb4941a 6584 if (ret < 0) {
0d1c5c91 6585 bs->open_flags |= BDRV_O_INACTIVE;
5416645f 6586 return ret;
0d1c5c91 6587 }
3456a8d1 6588
11d0c9b3
EGE
6589 ret = bdrv_invalidate_cache(bs, errp);
6590 if (ret < 0) {
6591 bs->open_flags |= BDRV_O_INACTIVE;
6592 return ret;
7bb4941a 6593 }
9c98f145 6594
7bb4941a
KW
6595 FOR_EACH_DIRTY_BITMAP(bs, bm) {
6596 bdrv_dirty_bitmap_skip_store(bm, false);
6597 }
6598
6599 ret = refresh_total_sectors(bs, bs->total_sectors);
6600 if (ret < 0) {
6601 bs->open_flags |= BDRV_O_INACTIVE;
6602 error_setg_errno(errp, -ret, "Could not refresh total sector count");
5416645f 6603 return ret;
7bb4941a 6604 }
5a8a30db 6605 }
4417ab7a
KW
6606
6607 QLIST_FOREACH(parent, &bs->parents, next_parent) {
bd86fb99
HR
6608 if (parent->klass->activate) {
6609 parent->klass->activate(parent, &local_err);
4417ab7a 6610 if (local_err) {
78fc3b3a 6611 bs->open_flags |= BDRV_O_INACTIVE;
4417ab7a 6612 error_propagate(errp, local_err);
5416645f 6613 return -EINVAL;
4417ab7a
KW
6614 }
6615 }
6616 }
5416645f
VSO
6617
6618 return 0;
0f15423c
AL
6619}
6620
11d0c9b3
EGE
6621int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp)
6622{
6623 Error *local_err = NULL;
6624
6625 assert(!(bs->open_flags & BDRV_O_INACTIVE));
6626
6627 if (bs->drv->bdrv_co_invalidate_cache) {
6628 bs->drv->bdrv_co_invalidate_cache(bs, &local_err);
6629 if (local_err) {
6630 error_propagate(errp, local_err);
6631 return -EINVAL;
6632 }
6633 }
6634
6635 return 0;
6636}
6637
3b717194 6638void bdrv_activate_all(Error **errp)
0f15423c 6639{
7c8eece4 6640 BlockDriverState *bs;
88be7b4b 6641 BdrvNextIterator it;
0f15423c 6642
f791bf7f
EGE
6643 GLOBAL_STATE_CODE();
6644
88be7b4b 6645 for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
ed78cda3 6646 AioContext *aio_context = bdrv_get_aio_context(bs);
5416645f 6647 int ret;
ed78cda3
SH
6648
6649 aio_context_acquire(aio_context);
a94750d9 6650 ret = bdrv_activate(bs, errp);
ed78cda3 6651 aio_context_release(aio_context);
5416645f 6652 if (ret < 0) {
5e003f17 6653 bdrv_next_cleanup(&it);
5a8a30db
KW
6654 return;
6655 }
0f15423c
AL
6656 }
6657}
6658
9e37271f
KW
6659static bool bdrv_has_bds_parent(BlockDriverState *bs, bool only_active)
6660{
6661 BdrvChild *parent;
6662
6663 QLIST_FOREACH(parent, &bs->parents, next_parent) {
bd86fb99 6664 if (parent->klass->parent_is_bds) {
9e37271f
KW
6665 BlockDriverState *parent_bs = parent->opaque;
6666 if (!only_active || !(parent_bs->open_flags & BDRV_O_INACTIVE)) {
6667 return true;
6668 }
6669 }
6670 }
6671
6672 return false;
6673}
6674
6675static int bdrv_inactivate_recurse(BlockDriverState *bs)
76b1c7fe 6676{
cfa1a572 6677 BdrvChild *child, *parent;
76b1c7fe 6678 int ret;
a13de40a 6679 uint64_t cumulative_perms, cumulative_shared_perms;
76b1c7fe 6680
d470ad42
HR
6681 if (!bs->drv) {
6682 return -ENOMEDIUM;
6683 }
6684
9e37271f
KW
6685 /* Make sure that we don't inactivate a child before its parent.
6686 * It will be covered by recursion from the yet active parent. */
6687 if (bdrv_has_bds_parent(bs, true)) {
6688 return 0;
6689 }
6690
6691 assert(!(bs->open_flags & BDRV_O_INACTIVE));
6692
6693 /* Inactivate this node */
6694 if (bs->drv->bdrv_inactivate) {
76b1c7fe
KW
6695 ret = bs->drv->bdrv_inactivate(bs);
6696 if (ret < 0) {
6697 return ret;
6698 }
6699 }
6700
9e37271f 6701 QLIST_FOREACH(parent, &bs->parents, next_parent) {
bd86fb99
HR
6702 if (parent->klass->inactivate) {
6703 ret = parent->klass->inactivate(parent);
9e37271f
KW
6704 if (ret < 0) {
6705 return ret;
cfa1a572
KW
6706 }
6707 }
9e37271f 6708 }
9c5e6594 6709
a13de40a
VSO
6710 bdrv_get_cumulative_perm(bs, &cumulative_perms,
6711 &cumulative_shared_perms);
6712 if (cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) {
6713 /* Our inactive parents still need write access. Inactivation failed. */
6714 return -EPERM;
6715 }
6716
9e37271f 6717 bs->open_flags |= BDRV_O_INACTIVE;
7d5b5261 6718
bb87e4d1
VSO
6719 /*
6720 * Update permissions, they may differ for inactive nodes.
6721 * We only tried to loosen restrictions, so errors are not fatal, ignore
6722 * them.
6723 */
071b474f 6724 bdrv_refresh_perms(bs, NULL);
9e37271f
KW
6725
6726 /* Recursively inactivate children */
38701b6a 6727 QLIST_FOREACH(child, &bs->children, next) {
9e37271f 6728 ret = bdrv_inactivate_recurse(child->bs);
38701b6a
KW
6729 if (ret < 0) {
6730 return ret;
6731 }
6732 }
6733
76b1c7fe
KW
6734 return 0;
6735}
6736
6737int bdrv_inactivate_all(void)
6738{
79720af6 6739 BlockDriverState *bs = NULL;
88be7b4b 6740 BdrvNextIterator it;
aad0b7a0 6741 int ret = 0;
bd6458e4 6742 GSList *aio_ctxs = NULL, *ctx;
76b1c7fe 6743
f791bf7f
EGE
6744 GLOBAL_STATE_CODE();
6745
88be7b4b 6746 for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
bd6458e4
PB
6747 AioContext *aio_context = bdrv_get_aio_context(bs);
6748
6749 if (!g_slist_find(aio_ctxs, aio_context)) {
6750 aio_ctxs = g_slist_prepend(aio_ctxs, aio_context);
6751 aio_context_acquire(aio_context);
6752 }
aad0b7a0 6753 }
76b1c7fe 6754
9e37271f
KW
6755 for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
6756 /* Nodes with BDS parents are covered by recursion from the last
6757 * parent that gets inactivated. Don't inactivate them a second
6758 * time if that has already happened. */
6759 if (bdrv_has_bds_parent(bs, false)) {
6760 continue;
6761 }
6762 ret = bdrv_inactivate_recurse(bs);
6763 if (ret < 0) {
6764 bdrv_next_cleanup(&it);
6765 goto out;
76b1c7fe
KW
6766 }
6767 }
6768
aad0b7a0 6769out:
bd6458e4
PB
6770 for (ctx = aio_ctxs; ctx != NULL; ctx = ctx->next) {
6771 AioContext *aio_context = ctx->data;
6772 aio_context_release(aio_context);
aad0b7a0 6773 }
bd6458e4 6774 g_slist_free(aio_ctxs);
aad0b7a0
FZ
6775
6776 return ret;
76b1c7fe
KW
6777}
6778
19cb3738
FB
6779/**************************************************************/
6780/* removable device support */
6781
6782/**
6783 * Return TRUE if the media is present
6784 */
e031f750 6785bool bdrv_is_inserted(BlockDriverState *bs)
19cb3738
FB
6786{
6787 BlockDriver *drv = bs->drv;
28d7a789 6788 BdrvChild *child;
384a48fb 6789 IO_CODE();
a1aff5bf 6790
e031f750
HR
6791 if (!drv) {
6792 return false;
6793 }
28d7a789
HR
6794 if (drv->bdrv_is_inserted) {
6795 return drv->bdrv_is_inserted(bs);
6796 }
6797 QLIST_FOREACH(child, &bs->children, next) {
6798 if (!bdrv_is_inserted(child->bs)) {
6799 return false;
6800 }
e031f750 6801 }
28d7a789 6802 return true;
19cb3738
FB
6803}
6804
19cb3738
FB
6805/**
6806 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
6807 */
f36f3949 6808void bdrv_eject(BlockDriverState *bs, bool eject_flag)
19cb3738
FB
6809{
6810 BlockDriver *drv = bs->drv;
384a48fb 6811 IO_CODE();
19cb3738 6812
822e1cd1
MA
6813 if (drv && drv->bdrv_eject) {
6814 drv->bdrv_eject(bs, eject_flag);
19cb3738
FB
6815 }
6816}
6817
19cb3738
FB
6818/**
6819 * Lock or unlock the media (if it is locked, the user won't be able
6820 * to eject it manually).
6821 */
025e849a 6822void bdrv_lock_medium(BlockDriverState *bs, bool locked)
19cb3738
FB
6823{
6824 BlockDriver *drv = bs->drv;
384a48fb 6825 IO_CODE();
025e849a 6826 trace_bdrv_lock_medium(bs, locked);
b8c6d095 6827
025e849a
MA
6828 if (drv && drv->bdrv_lock_medium) {
6829 drv->bdrv_lock_medium(bs, locked);
19cb3738
FB
6830 }
6831}
985a03b0 6832
9fcb0251
FZ
6833/* Get a reference to bs */
6834void bdrv_ref(BlockDriverState *bs)
6835{
f791bf7f 6836 GLOBAL_STATE_CODE();
9fcb0251
FZ
6837 bs->refcnt++;
6838}
6839
6840/* Release a previously grabbed reference to bs.
6841 * If after releasing, reference count is zero, the BlockDriverState is
6842 * deleted. */
6843void bdrv_unref(BlockDriverState *bs)
6844{
f791bf7f 6845 GLOBAL_STATE_CODE();
9a4d5ca6
JC
6846 if (!bs) {
6847 return;
6848 }
9fcb0251
FZ
6849 assert(bs->refcnt > 0);
6850 if (--bs->refcnt == 0) {
6851 bdrv_delete(bs);
6852 }
6853}
6854
fbe40ff7
FZ
6855struct BdrvOpBlocker {
6856 Error *reason;
6857 QLIST_ENTRY(BdrvOpBlocker) list;
6858};
6859
6860bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
6861{
6862 BdrvOpBlocker *blocker;
f791bf7f 6863 GLOBAL_STATE_CODE();
fbe40ff7
FZ
6864 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
6865 if (!QLIST_EMPTY(&bs->op_blockers[op])) {
6866 blocker = QLIST_FIRST(&bs->op_blockers[op]);
4b576648
MA
6867 error_propagate_prepend(errp, error_copy(blocker->reason),
6868 "Node '%s' is busy: ",
6869 bdrv_get_device_or_node_name(bs));
fbe40ff7
FZ
6870 return true;
6871 }
6872 return false;
6873}
6874
6875void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
6876{
6877 BdrvOpBlocker *blocker;
f791bf7f 6878 GLOBAL_STATE_CODE();
fbe40ff7
FZ
6879 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
6880
5839e53b 6881 blocker = g_new0(BdrvOpBlocker, 1);
fbe40ff7
FZ
6882 blocker->reason = reason;
6883 QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
6884}
6885
6886void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
6887{
6888 BdrvOpBlocker *blocker, *next;
f791bf7f 6889 GLOBAL_STATE_CODE();
fbe40ff7
FZ
6890 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
6891 QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
6892 if (blocker->reason == reason) {
6893 QLIST_REMOVE(blocker, list);
6894 g_free(blocker);
6895 }
6896 }
6897}
6898
6899void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
6900{
6901 int i;
f791bf7f 6902 GLOBAL_STATE_CODE();
fbe40ff7
FZ
6903 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
6904 bdrv_op_block(bs, i, reason);
6905 }
6906}
6907
6908void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
6909{
6910 int i;
f791bf7f 6911 GLOBAL_STATE_CODE();
fbe40ff7
FZ
6912 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
6913 bdrv_op_unblock(bs, i, reason);
6914 }
6915}
6916
6917bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
6918{
6919 int i;
f791bf7f 6920 GLOBAL_STATE_CODE();
fbe40ff7
FZ
6921 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
6922 if (!QLIST_EMPTY(&bs->op_blockers[i])) {
6923 return false;
6924 }
6925 }
6926 return true;
6927}
6928
d92ada22
LC
6929void bdrv_img_create(const char *filename, const char *fmt,
6930 const char *base_filename, const char *base_fmt,
9217283d
FZ
6931 char *options, uint64_t img_size, int flags, bool quiet,
6932 Error **errp)
f88e1a42 6933{
83d0521a
CL
6934 QemuOptsList *create_opts = NULL;
6935 QemuOpts *opts = NULL;
6936 const char *backing_fmt, *backing_file;
6937 int64_t size;
f88e1a42 6938 BlockDriver *drv, *proto_drv;
cc84d90f 6939 Error *local_err = NULL;
f88e1a42
JS
6940 int ret = 0;
6941
f791bf7f
EGE
6942 GLOBAL_STATE_CODE();
6943
f88e1a42
JS
6944 /* Find driver and parse its options */
6945 drv = bdrv_find_format(fmt);
6946 if (!drv) {
71c79813 6947 error_setg(errp, "Unknown file format '%s'", fmt);
d92ada22 6948 return;
f88e1a42
JS
6949 }
6950
b65a5e12 6951 proto_drv = bdrv_find_protocol(filename, true, errp);
f88e1a42 6952 if (!proto_drv) {
d92ada22 6953 return;
f88e1a42
JS
6954 }
6955
c6149724
HR
6956 if (!drv->create_opts) {
6957 error_setg(errp, "Format driver '%s' does not support image creation",
6958 drv->format_name);
6959 return;
6960 }
6961
5a5e7f8c
ML
6962 if (!proto_drv->create_opts) {
6963 error_setg(errp, "Protocol driver '%s' does not support image creation",
6964 proto_drv->format_name);
6965 return;
6966 }
6967
f6dc1c31 6968 /* Create parameter list */
c282e1fd 6969 create_opts = qemu_opts_append(create_opts, drv->create_opts);
5a5e7f8c 6970 create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
f88e1a42 6971
83d0521a 6972 opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
f88e1a42
JS
6973
6974 /* Parse -o options */
6975 if (options) {
a5f9b9df 6976 if (!qemu_opts_do_parse(opts, options, NULL, errp)) {
f88e1a42
JS
6977 goto out;
6978 }
6979 }
6980
f6dc1c31
KW
6981 if (!qemu_opt_get(opts, BLOCK_OPT_SIZE)) {
6982 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
6983 } else if (img_size != UINT64_C(-1)) {
6984 error_setg(errp, "The image size must be specified only once");
6985 goto out;
6986 }
6987
f88e1a42 6988 if (base_filename) {
235e59cf 6989 if (!qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename,
3882578b 6990 NULL)) {
71c79813
LC
6991 error_setg(errp, "Backing file not supported for file format '%s'",
6992 fmt);
f88e1a42
JS
6993 goto out;
6994 }
6995 }
6996
6997 if (base_fmt) {
3882578b 6998 if (!qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, NULL)) {
71c79813
LC
6999 error_setg(errp, "Backing file format not supported for file "
7000 "format '%s'", fmt);
f88e1a42
JS
7001 goto out;
7002 }
7003 }
7004
83d0521a
CL
7005 backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
7006 if (backing_file) {
7007 if (!strcmp(filename, backing_file)) {
71c79813
LC
7008 error_setg(errp, "Error: Trying to create an image with the "
7009 "same filename as the backing file");
792da93a
JS
7010 goto out;
7011 }
975a7bd2
CK
7012 if (backing_file[0] == '\0') {
7013 error_setg(errp, "Expected backing file name, got empty string");
7014 goto out;
7015 }
792da93a
JS
7016 }
7017
83d0521a 7018 backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
f88e1a42 7019
6e6e55f5
JS
7020 /* The size for the image must always be specified, unless we have a backing
7021 * file and we have not been forbidden from opening it. */
a8b42a1c 7022 size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, img_size);
6e6e55f5
JS
7023 if (backing_file && !(flags & BDRV_O_NO_BACKING)) {
7024 BlockDriverState *bs;
645ae7d8 7025 char *full_backing;
6e6e55f5
JS
7026 int back_flags;
7027 QDict *backing_options = NULL;
7028
645ae7d8
HR
7029 full_backing =
7030 bdrv_get_full_backing_filename_from_filename(filename, backing_file,
7031 &local_err);
6e6e55f5 7032 if (local_err) {
6e6e55f5
JS
7033 goto out;
7034 }
645ae7d8 7035 assert(full_backing);
29168018 7036
d5b23994
HR
7037 /*
7038 * No need to do I/O here, which allows us to open encrypted
7039 * backing images without needing the secret
7040 */
6e6e55f5
JS
7041 back_flags = flags;
7042 back_flags &= ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
d5b23994 7043 back_flags |= BDRV_O_NO_IO;
f88e1a42 7044
cc954f01 7045 backing_options = qdict_new();
6e6e55f5 7046 if (backing_fmt) {
6e6e55f5
JS
7047 qdict_put_str(backing_options, "driver", backing_fmt);
7048 }
cc954f01 7049 qdict_put_bool(backing_options, BDRV_OPT_FORCE_SHARE, true);
e6641719 7050
6e6e55f5
JS
7051 bs = bdrv_open(full_backing, NULL, backing_options, back_flags,
7052 &local_err);
7053 g_free(full_backing);
add8200d
EB
7054 if (!bs) {
7055 error_append_hint(&local_err, "Could not open backing image.\n");
6e6e55f5
JS
7056 goto out;
7057 } else {
d9f059aa 7058 if (!backing_fmt) {
497a30db
EB
7059 error_setg(&local_err,
7060 "Backing file specified without backing format");
7061 error_append_hint(&local_err, "Detected format of %s.",
7062 bs->drv->format_name);
7063 goto out;
d9f059aa 7064 }
6e6e55f5
JS
7065 if (size == -1) {
7066 /* Opened BS, have no size */
7067 size = bdrv_getlength(bs);
7068 if (size < 0) {
7069 error_setg_errno(errp, -size, "Could not get size of '%s'",
7070 backing_file);
7071 bdrv_unref(bs);
7072 goto out;
7073 }
7074 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
52bf1e72 7075 }
66f6b814 7076 bdrv_unref(bs);
f88e1a42 7077 }
d9f059aa
EB
7078 /* (backing_file && !(flags & BDRV_O_NO_BACKING)) */
7079 } else if (backing_file && !backing_fmt) {
497a30db
EB
7080 error_setg(&local_err,
7081 "Backing file specified without backing format");
7082 goto out;
d9f059aa 7083 }
6e6e55f5
JS
7084
7085 if (size == -1) {
7086 error_setg(errp, "Image creation needs a size parameter");
7087 goto out;
f88e1a42
JS
7088 }
7089
f382d43a 7090 if (!quiet) {
fe646693 7091 printf("Formatting '%s', fmt=%s ", filename, fmt);
43c5d8f8 7092 qemu_opts_print(opts, " ");
f382d43a 7093 puts("");
4e2f4418 7094 fflush(stdout);
f382d43a 7095 }
83d0521a 7096
c282e1fd 7097 ret = bdrv_create(drv, filename, opts, &local_err);
83d0521a 7098
cc84d90f
HR
7099 if (ret == -EFBIG) {
7100 /* This is generally a better message than whatever the driver would
7101 * deliver (especially because of the cluster_size_hint), since that
7102 * is most probably not much different from "image too large". */
7103 const char *cluster_size_hint = "";
83d0521a 7104 if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
cc84d90f 7105 cluster_size_hint = " (try using a larger cluster size)";
f88e1a42 7106 }
cc84d90f
HR
7107 error_setg(errp, "The image size is too large for file format '%s'"
7108 "%s", fmt, cluster_size_hint);
7109 error_free(local_err);
7110 local_err = NULL;
f88e1a42
JS
7111 }
7112
7113out:
83d0521a
CL
7114 qemu_opts_del(opts);
7115 qemu_opts_free(create_opts);
621ff94d 7116 error_propagate(errp, local_err);
f88e1a42 7117}
85d126f3
SH
7118
7119AioContext *bdrv_get_aio_context(BlockDriverState *bs)
7120{
384a48fb 7121 IO_CODE();
33f2a757 7122 return bs ? bs->aio_context : qemu_get_aio_context();
dcd04228
SH
7123}
7124
e336fd4c
KW
7125AioContext *coroutine_fn bdrv_co_enter(BlockDriverState *bs)
7126{
7127 Coroutine *self = qemu_coroutine_self();
7128 AioContext *old_ctx = qemu_coroutine_get_aio_context(self);
7129 AioContext *new_ctx;
384a48fb 7130 IO_CODE();
e336fd4c
KW
7131
7132 /*
7133 * Increase bs->in_flight to ensure that this operation is completed before
7134 * moving the node to a different AioContext. Read new_ctx only afterwards.
7135 */
7136 bdrv_inc_in_flight(bs);
7137
7138 new_ctx = bdrv_get_aio_context(bs);
7139 aio_co_reschedule_self(new_ctx);
7140 return old_ctx;
7141}
7142
7143void coroutine_fn bdrv_co_leave(BlockDriverState *bs, AioContext *old_ctx)
7144{
384a48fb 7145 IO_CODE();
e336fd4c
KW
7146 aio_co_reschedule_self(old_ctx);
7147 bdrv_dec_in_flight(bs);
7148}
7149
18c6ac1c
KW
7150void coroutine_fn bdrv_co_lock(BlockDriverState *bs)
7151{
7152 AioContext *ctx = bdrv_get_aio_context(bs);
7153
7154 /* In the main thread, bs->aio_context won't change concurrently */
7155 assert(qemu_get_current_aio_context() == qemu_get_aio_context());
7156
7157 /*
7158 * We're in coroutine context, so we already hold the lock of the main
7159 * loop AioContext. Don't lock it twice to avoid deadlocks.
7160 */
7161 assert(qemu_in_coroutine());
7162 if (ctx != qemu_get_aio_context()) {
7163 aio_context_acquire(ctx);
7164 }
7165}
7166
7167void coroutine_fn bdrv_co_unlock(BlockDriverState *bs)
7168{
7169 AioContext *ctx = bdrv_get_aio_context(bs);
7170
7171 assert(qemu_in_coroutine());
7172 if (ctx != qemu_get_aio_context()) {
7173 aio_context_release(ctx);
7174 }
7175}
7176
052a7572
FZ
7177void bdrv_coroutine_enter(BlockDriverState *bs, Coroutine *co)
7178{
384a48fb 7179 IO_CODE();
052a7572
FZ
7180 aio_co_enter(bdrv_get_aio_context(bs), co);
7181}
7182
e8a095da
SH
7183static void bdrv_do_remove_aio_context_notifier(BdrvAioNotifier *ban)
7184{
7185 QLIST_REMOVE(ban, list);
7186 g_free(ban);
7187}
7188
a3a683c3 7189static void bdrv_detach_aio_context(BlockDriverState *bs)
dcd04228 7190{
e8a095da 7191 BdrvAioNotifier *baf, *baf_tmp;
33384421 7192
e8a095da
SH
7193 assert(!bs->walking_aio_notifiers);
7194 bs->walking_aio_notifiers = true;
7195 QLIST_FOREACH_SAFE(baf, &bs->aio_notifiers, list, baf_tmp) {
7196 if (baf->deleted) {
7197 bdrv_do_remove_aio_context_notifier(baf);
7198 } else {
7199 baf->detach_aio_context(baf->opaque);
7200 }
33384421 7201 }
e8a095da
SH
7202 /* Never mind iterating again to check for ->deleted. bdrv_close() will
7203 * remove remaining aio notifiers if we aren't called again.
7204 */
7205 bs->walking_aio_notifiers = false;
33384421 7206
1bffe1ae 7207 if (bs->drv && bs->drv->bdrv_detach_aio_context) {
dcd04228
SH
7208 bs->drv->bdrv_detach_aio_context(bs);
7209 }
dcd04228 7210
e64f25f3
KW
7211 if (bs->quiesce_counter) {
7212 aio_enable_external(bs->aio_context);
7213 }
dcd04228
SH
7214 bs->aio_context = NULL;
7215}
7216
a3a683c3
KW
7217static void bdrv_attach_aio_context(BlockDriverState *bs,
7218 AioContext *new_context)
dcd04228 7219{
e8a095da 7220 BdrvAioNotifier *ban, *ban_tmp;
33384421 7221
e64f25f3
KW
7222 if (bs->quiesce_counter) {
7223 aio_disable_external(new_context);
7224 }
7225
dcd04228
SH
7226 bs->aio_context = new_context;
7227
1bffe1ae 7228 if (bs->drv && bs->drv->bdrv_attach_aio_context) {
dcd04228
SH
7229 bs->drv->bdrv_attach_aio_context(bs, new_context);
7230 }
33384421 7231
e8a095da
SH
7232 assert(!bs->walking_aio_notifiers);
7233 bs->walking_aio_notifiers = true;
7234 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_tmp) {
7235 if (ban->deleted) {
7236 bdrv_do_remove_aio_context_notifier(ban);
7237 } else {
7238 ban->attached_aio_context(new_context, ban->opaque);
7239 }
33384421 7240 }
e8a095da 7241 bs->walking_aio_notifiers = false;
dcd04228
SH
7242}
7243
42a65f02
KW
7244/*
7245 * Changes the AioContext used for fd handlers, timers, and BHs by this
7246 * BlockDriverState and all its children and parents.
7247 *
43eaaaef
HR
7248 * Must be called from the main AioContext.
7249 *
42a65f02
KW
7250 * The caller must own the AioContext lock for the old AioContext of bs, but it
7251 * must not own the AioContext lock for new_context (unless new_context is the
7252 * same as the current context of bs).
7253 *
7254 * @ignore will accumulate all visited BdrvChild object. The caller is
7255 * responsible for freeing the list afterwards.
7256 */
53a7d041
KW
7257void bdrv_set_aio_context_ignore(BlockDriverState *bs,
7258 AioContext *new_context, GSList **ignore)
dcd04228 7259{
e037c09c 7260 AioContext *old_context = bdrv_get_aio_context(bs);
722d8e73
SL
7261 GSList *children_to_process = NULL;
7262 GSList *parents_to_process = NULL;
7263 GSList *entry;
7264 BdrvChild *child, *parent;
0d83708a 7265
43eaaaef
HR
7266 g_assert(qemu_get_current_aio_context() == qemu_get_aio_context());
7267
e037c09c 7268 if (old_context == new_context) {
57830a49
DP
7269 return;
7270 }
7271
d70d5954 7272 bdrv_drained_begin(bs);
0d83708a
KW
7273
7274 QLIST_FOREACH(child, &bs->children, next) {
53a7d041
KW
7275 if (g_slist_find(*ignore, child)) {
7276 continue;
7277 }
7278 *ignore = g_slist_prepend(*ignore, child);
722d8e73 7279 children_to_process = g_slist_prepend(children_to_process, child);
53a7d041 7280 }
722d8e73
SL
7281
7282 QLIST_FOREACH(parent, &bs->parents, next_parent) {
7283 if (g_slist_find(*ignore, parent)) {
53a7d041
KW
7284 continue;
7285 }
722d8e73
SL
7286 *ignore = g_slist_prepend(*ignore, parent);
7287 parents_to_process = g_slist_prepend(parents_to_process, parent);
7288 }
7289
7290 for (entry = children_to_process;
7291 entry != NULL;
7292 entry = g_slist_next(entry)) {
7293 child = entry->data;
7294 bdrv_set_aio_context_ignore(child->bs, new_context, ignore);
7295 }
7296 g_slist_free(children_to_process);
7297
7298 for (entry = parents_to_process;
7299 entry != NULL;
7300 entry = g_slist_next(entry)) {
7301 parent = entry->data;
7302 assert(parent->klass->set_aio_ctx);
7303 parent->klass->set_aio_ctx(parent, new_context, ignore);
0d83708a 7304 }
722d8e73 7305 g_slist_free(parents_to_process);
0d83708a 7306
dcd04228
SH
7307 bdrv_detach_aio_context(bs);
7308
e037c09c 7309 /* Acquire the new context, if necessary */
43eaaaef 7310 if (qemu_get_aio_context() != new_context) {
e037c09c
HR
7311 aio_context_acquire(new_context);
7312 }
7313
dcd04228 7314 bdrv_attach_aio_context(bs, new_context);
e037c09c
HR
7315
7316 /*
7317 * If this function was recursively called from
7318 * bdrv_set_aio_context_ignore(), there may be nodes in the
7319 * subtree that have not yet been moved to the new AioContext.
7320 * Release the old one so bdrv_drained_end() can poll them.
7321 */
43eaaaef 7322 if (qemu_get_aio_context() != old_context) {
e037c09c
HR
7323 aio_context_release(old_context);
7324 }
7325
d70d5954 7326 bdrv_drained_end(bs);
e037c09c 7327
43eaaaef 7328 if (qemu_get_aio_context() != old_context) {
e037c09c
HR
7329 aio_context_acquire(old_context);
7330 }
43eaaaef 7331 if (qemu_get_aio_context() != new_context) {
e037c09c
HR
7332 aio_context_release(new_context);
7333 }
85d126f3 7334}
d616b224 7335
5d231849
KW
7336static bool bdrv_parent_can_set_aio_context(BdrvChild *c, AioContext *ctx,
7337 GSList **ignore, Error **errp)
7338{
7339 if (g_slist_find(*ignore, c)) {
7340 return true;
7341 }
7342 *ignore = g_slist_prepend(*ignore, c);
7343
bd86fb99
HR
7344 /*
7345 * A BdrvChildClass that doesn't handle AioContext changes cannot
7346 * tolerate any AioContext changes
7347 */
7348 if (!c->klass->can_set_aio_ctx) {
5d231849
KW
7349 char *user = bdrv_child_user_desc(c);
7350 error_setg(errp, "Changing iothreads is not supported by %s", user);
7351 g_free(user);
7352 return false;
7353 }
bd86fb99 7354 if (!c->klass->can_set_aio_ctx(c, ctx, ignore, errp)) {
5d231849
KW
7355 assert(!errp || *errp);
7356 return false;
7357 }
7358 return true;
7359}
7360
7361bool bdrv_child_can_set_aio_context(BdrvChild *c, AioContext *ctx,
7362 GSList **ignore, Error **errp)
7363{
f791bf7f 7364 GLOBAL_STATE_CODE();
5d231849
KW
7365 if (g_slist_find(*ignore, c)) {
7366 return true;
7367 }
7368 *ignore = g_slist_prepend(*ignore, c);
7369 return bdrv_can_set_aio_context(c->bs, ctx, ignore, errp);
7370}
7371
7372/* @ignore will accumulate all visited BdrvChild object. The caller is
7373 * responsible for freeing the list afterwards. */
7374bool bdrv_can_set_aio_context(BlockDriverState *bs, AioContext *ctx,
7375 GSList **ignore, Error **errp)
7376{
7377 BdrvChild *c;
7378
7379 if (bdrv_get_aio_context(bs) == ctx) {
7380 return true;
7381 }
7382
f791bf7f
EGE
7383 GLOBAL_STATE_CODE();
7384
5d231849
KW
7385 QLIST_FOREACH(c, &bs->parents, next_parent) {
7386 if (!bdrv_parent_can_set_aio_context(c, ctx, ignore, errp)) {
7387 return false;
7388 }
7389 }
7390 QLIST_FOREACH(c, &bs->children, next) {
7391 if (!bdrv_child_can_set_aio_context(c, ctx, ignore, errp)) {
7392 return false;
7393 }
7394 }
7395
7396 return true;
7397}
7398
7399int bdrv_child_try_set_aio_context(BlockDriverState *bs, AioContext *ctx,
7400 BdrvChild *ignore_child, Error **errp)
7401{
7402 GSList *ignore;
7403 bool ret;
7404
f791bf7f
EGE
7405 GLOBAL_STATE_CODE();
7406
5d231849
KW
7407 ignore = ignore_child ? g_slist_prepend(NULL, ignore_child) : NULL;
7408 ret = bdrv_can_set_aio_context(bs, ctx, &ignore, errp);
7409 g_slist_free(ignore);
7410
7411 if (!ret) {
7412 return -EPERM;
7413 }
7414
53a7d041
KW
7415 ignore = ignore_child ? g_slist_prepend(NULL, ignore_child) : NULL;
7416 bdrv_set_aio_context_ignore(bs, ctx, &ignore);
7417 g_slist_free(ignore);
7418
5d231849
KW
7419 return 0;
7420}
7421
7422int bdrv_try_set_aio_context(BlockDriverState *bs, AioContext *ctx,
7423 Error **errp)
7424{
f791bf7f 7425 GLOBAL_STATE_CODE();
5d231849
KW
7426 return bdrv_child_try_set_aio_context(bs, ctx, NULL, errp);
7427}
7428
33384421
HR
7429void bdrv_add_aio_context_notifier(BlockDriverState *bs,
7430 void (*attached_aio_context)(AioContext *new_context, void *opaque),
7431 void (*detach_aio_context)(void *opaque), void *opaque)
7432{
7433 BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
7434 *ban = (BdrvAioNotifier){
7435 .attached_aio_context = attached_aio_context,
7436 .detach_aio_context = detach_aio_context,
7437 .opaque = opaque
7438 };
f791bf7f 7439 GLOBAL_STATE_CODE();
33384421
HR
7440
7441 QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
7442}
7443
7444void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
7445 void (*attached_aio_context)(AioContext *,
7446 void *),
7447 void (*detach_aio_context)(void *),
7448 void *opaque)
7449{
7450 BdrvAioNotifier *ban, *ban_next;
f791bf7f 7451 GLOBAL_STATE_CODE();
33384421
HR
7452
7453 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
7454 if (ban->attached_aio_context == attached_aio_context &&
7455 ban->detach_aio_context == detach_aio_context &&
e8a095da
SH
7456 ban->opaque == opaque &&
7457 ban->deleted == false)
33384421 7458 {
e8a095da
SH
7459 if (bs->walking_aio_notifiers) {
7460 ban->deleted = true;
7461 } else {
7462 bdrv_do_remove_aio_context_notifier(ban);
7463 }
33384421
HR
7464 return;
7465 }
7466 }
7467
7468 abort();
7469}
7470
77485434 7471int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
d1402b50 7472 BlockDriverAmendStatusCB *status_cb, void *cb_opaque,
a3579bfa 7473 bool force,
d1402b50 7474 Error **errp)
6f176b48 7475{
f791bf7f 7476 GLOBAL_STATE_CODE();
d470ad42 7477 if (!bs->drv) {
d1402b50 7478 error_setg(errp, "Node is ejected");
d470ad42
HR
7479 return -ENOMEDIUM;
7480 }
c282e1fd 7481 if (!bs->drv->bdrv_amend_options) {
d1402b50
HR
7482 error_setg(errp, "Block driver '%s' does not support option amendment",
7483 bs->drv->format_name);
6f176b48
HR
7484 return -ENOTSUP;
7485 }
a3579bfa
ML
7486 return bs->drv->bdrv_amend_options(bs, opts, status_cb,
7487 cb_opaque, force, errp);
6f176b48 7488}
f6186f49 7489
5d69b5ab
HR
7490/*
7491 * This function checks whether the given @to_replace is allowed to be
7492 * replaced by a node that always shows the same data as @bs. This is
7493 * used for example to verify whether the mirror job can replace
7494 * @to_replace by the target mirrored from @bs.
7495 * To be replaceable, @bs and @to_replace may either be guaranteed to
7496 * always show the same data (because they are only connected through
7497 * filters), or some driver may allow replacing one of its children
7498 * because it can guarantee that this child's data is not visible at
7499 * all (for example, for dissenting quorum children that have no other
7500 * parents).
7501 */
7502bool bdrv_recurse_can_replace(BlockDriverState *bs,
7503 BlockDriverState *to_replace)
7504{
93393e69
HR
7505 BlockDriverState *filtered;
7506
b4ad82aa
EGE
7507 GLOBAL_STATE_CODE();
7508
5d69b5ab
HR
7509 if (!bs || !bs->drv) {
7510 return false;
7511 }
7512
7513 if (bs == to_replace) {
7514 return true;
7515 }
7516
7517 /* See what the driver can do */
7518 if (bs->drv->bdrv_recurse_can_replace) {
7519 return bs->drv->bdrv_recurse_can_replace(bs, to_replace);
7520 }
7521
7522 /* For filters without an own implementation, we can recurse on our own */
93393e69
HR
7523 filtered = bdrv_filter_bs(bs);
7524 if (filtered) {
7525 return bdrv_recurse_can_replace(filtered, to_replace);
5d69b5ab
HR
7526 }
7527
7528 /* Safe default */
7529 return false;
7530}
7531
810803a8
HR
7532/*
7533 * Check whether the given @node_name can be replaced by a node that
7534 * has the same data as @parent_bs. If so, return @node_name's BDS;
7535 * NULL otherwise.
7536 *
7537 * @node_name must be a (recursive) *child of @parent_bs (or this
7538 * function will return NULL).
7539 *
7540 * The result (whether the node can be replaced or not) is only valid
7541 * for as long as no graph or permission changes occur.
7542 */
e12f3784
WC
7543BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs,
7544 const char *node_name, Error **errp)
09158f00
BC
7545{
7546 BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
5a7e7a0b
SH
7547 AioContext *aio_context;
7548
f791bf7f
EGE
7549 GLOBAL_STATE_CODE();
7550
09158f00 7551 if (!to_replace_bs) {
785ec4b1 7552 error_setg(errp, "Failed to find node with node-name='%s'", node_name);
09158f00
BC
7553 return NULL;
7554 }
7555
5a7e7a0b
SH
7556 aio_context = bdrv_get_aio_context(to_replace_bs);
7557 aio_context_acquire(aio_context);
7558
09158f00 7559 if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
5a7e7a0b
SH
7560 to_replace_bs = NULL;
7561 goto out;
09158f00
BC
7562 }
7563
7564 /* We don't want arbitrary node of the BDS chain to be replaced only the top
7565 * most non filter in order to prevent data corruption.
7566 * Another benefit is that this tests exclude backing files which are
7567 * blocked by the backing blockers.
7568 */
810803a8
HR
7569 if (!bdrv_recurse_can_replace(parent_bs, to_replace_bs)) {
7570 error_setg(errp, "Cannot replace '%s' by a node mirrored from '%s', "
7571 "because it cannot be guaranteed that doing so would not "
7572 "lead to an abrupt change of visible data",
7573 node_name, parent_bs->node_name);
5a7e7a0b
SH
7574 to_replace_bs = NULL;
7575 goto out;
09158f00
BC
7576 }
7577
5a7e7a0b
SH
7578out:
7579 aio_context_release(aio_context);
09158f00
BC
7580 return to_replace_bs;
7581}
448ad91d 7582
97e2f021
HR
7583/**
7584 * Iterates through the list of runtime option keys that are said to
7585 * be "strong" for a BDS. An option is called "strong" if it changes
7586 * a BDS's data. For example, the null block driver's "size" and
7587 * "read-zeroes" options are strong, but its "latency-ns" option is
7588 * not.
7589 *
7590 * If a key returned by this function ends with a dot, all options
7591 * starting with that prefix are strong.
7592 */
7593static const char *const *strong_options(BlockDriverState *bs,
7594 const char *const *curopt)
7595{
7596 static const char *const global_options[] = {
7597 "driver", "filename", NULL
7598 };
7599
7600 if (!curopt) {
7601 return &global_options[0];
7602 }
7603
7604 curopt++;
7605 if (curopt == &global_options[ARRAY_SIZE(global_options) - 1] && bs->drv) {
7606 curopt = bs->drv->strong_runtime_opts;
7607 }
7608
7609 return (curopt && *curopt) ? curopt : NULL;
7610}
7611
7612/**
7613 * Copies all strong runtime options from bs->options to the given
7614 * QDict. The set of strong option keys is determined by invoking
7615 * strong_options().
7616 *
7617 * Returns true iff any strong option was present in bs->options (and
7618 * thus copied to the target QDict) with the exception of "filename"
7619 * and "driver". The caller is expected to use this value to decide
7620 * whether the existence of strong options prevents the generation of
7621 * a plain filename.
7622 */
7623static bool append_strong_runtime_options(QDict *d, BlockDriverState *bs)
7624{
7625 bool found_any = false;
7626 const char *const *option_name = NULL;
7627
7628 if (!bs->drv) {
7629 return false;
7630 }
7631
7632 while ((option_name = strong_options(bs, option_name))) {
7633 bool option_given = false;
7634
7635 assert(strlen(*option_name) > 0);
7636 if ((*option_name)[strlen(*option_name) - 1] != '.') {
7637 QObject *entry = qdict_get(bs->options, *option_name);
7638 if (!entry) {
7639 continue;
7640 }
7641
7642 qdict_put_obj(d, *option_name, qobject_ref(entry));
7643 option_given = true;
7644 } else {
7645 const QDictEntry *entry;
7646 for (entry = qdict_first(bs->options); entry;
7647 entry = qdict_next(bs->options, entry))
7648 {
7649 if (strstart(qdict_entry_key(entry), *option_name, NULL)) {
7650 qdict_put_obj(d, qdict_entry_key(entry),
7651 qobject_ref(qdict_entry_value(entry)));
7652 option_given = true;
7653 }
7654 }
7655 }
7656
7657 /* While "driver" and "filename" need to be included in a JSON filename,
7658 * their existence does not prohibit generation of a plain filename. */
7659 if (!found_any && option_given &&
7660 strcmp(*option_name, "driver") && strcmp(*option_name, "filename"))
7661 {
7662 found_any = true;
7663 }
7664 }
7665
62a01a27
HR
7666 if (!qdict_haskey(d, "driver")) {
7667 /* Drivers created with bdrv_new_open_driver() may not have a
7668 * @driver option. Add it here. */
7669 qdict_put_str(d, "driver", bs->drv->format_name);
7670 }
7671
97e2f021
HR
7672 return found_any;
7673}
7674
90993623
HR
7675/* Note: This function may return false positives; it may return true
7676 * even if opening the backing file specified by bs's image header
7677 * would result in exactly bs->backing. */
fa8fc1d0 7678static bool bdrv_backing_overridden(BlockDriverState *bs)
90993623 7679{
b4ad82aa 7680 GLOBAL_STATE_CODE();
90993623
HR
7681 if (bs->backing) {
7682 return strcmp(bs->auto_backing_file,
7683 bs->backing->bs->filename);
7684 } else {
7685 /* No backing BDS, so if the image header reports any backing
7686 * file, it must have been suppressed */
7687 return bs->auto_backing_file[0] != '\0';
7688 }
7689}
7690
91af7014
HR
7691/* Updates the following BDS fields:
7692 * - exact_filename: A filename which may be used for opening a block device
7693 * which (mostly) equals the given BDS (even without any
7694 * other options; so reading and writing must return the same
7695 * results, but caching etc. may be different)
7696 * - full_open_options: Options which, when given when opening a block device
7697 * (without a filename), result in a BDS (mostly)
7698 * equalling the given one
7699 * - filename: If exact_filename is set, it is copied here. Otherwise,
7700 * full_open_options is converted to a JSON object, prefixed with
7701 * "json:" (for use through the JSON pseudo protocol) and put here.
7702 */
7703void bdrv_refresh_filename(BlockDriverState *bs)
7704{
7705 BlockDriver *drv = bs->drv;
e24518e3 7706 BdrvChild *child;
52f72d6f 7707 BlockDriverState *primary_child_bs;
91af7014 7708 QDict *opts;
90993623 7709 bool backing_overridden;
998b3a1e
HR
7710 bool generate_json_filename; /* Whether our default implementation should
7711 fill exact_filename (false) or not (true) */
91af7014 7712
f791bf7f
EGE
7713 GLOBAL_STATE_CODE();
7714
91af7014
HR
7715 if (!drv) {
7716 return;
7717 }
7718
e24518e3
HR
7719 /* This BDS's file name may depend on any of its children's file names, so
7720 * refresh those first */
7721 QLIST_FOREACH(child, &bs->children, next) {
7722 bdrv_refresh_filename(child->bs);
91af7014
HR
7723 }
7724
bb808d5f
HR
7725 if (bs->implicit) {
7726 /* For implicit nodes, just copy everything from the single child */
7727 child = QLIST_FIRST(&bs->children);
7728 assert(QLIST_NEXT(child, next) == NULL);
7729
7730 pstrcpy(bs->exact_filename, sizeof(bs->exact_filename),
7731 child->bs->exact_filename);
7732 pstrcpy(bs->filename, sizeof(bs->filename), child->bs->filename);
7733
cb895614 7734 qobject_unref(bs->full_open_options);
bb808d5f
HR
7735 bs->full_open_options = qobject_ref(child->bs->full_open_options);
7736
7737 return;
7738 }
7739
90993623
HR
7740 backing_overridden = bdrv_backing_overridden(bs);
7741
7742 if (bs->open_flags & BDRV_O_NO_IO) {
7743 /* Without I/O, the backing file does not change anything.
7744 * Therefore, in such a case (primarily qemu-img), we can
7745 * pretend the backing file has not been overridden even if
7746 * it technically has been. */
7747 backing_overridden = false;
7748 }
7749
97e2f021
HR
7750 /* Gather the options QDict */
7751 opts = qdict_new();
998b3a1e
HR
7752 generate_json_filename = append_strong_runtime_options(opts, bs);
7753 generate_json_filename |= backing_overridden;
97e2f021
HR
7754
7755 if (drv->bdrv_gather_child_options) {
7756 /* Some block drivers may not want to present all of their children's
7757 * options, or name them differently from BdrvChild.name */
7758 drv->bdrv_gather_child_options(bs, opts, backing_overridden);
7759 } else {
7760 QLIST_FOREACH(child, &bs->children, next) {
25191e5f 7761 if (child == bs->backing && !backing_overridden) {
97e2f021
HR
7762 /* We can skip the backing BDS if it has not been overridden */
7763 continue;
7764 }
7765
7766 qdict_put(opts, child->name,
7767 qobject_ref(child->bs->full_open_options));
7768 }
7769
7770 if (backing_overridden && !bs->backing) {
7771 /* Force no backing file */
7772 qdict_put_null(opts, "backing");
7773 }
7774 }
7775
7776 qobject_unref(bs->full_open_options);
7777 bs->full_open_options = opts;
7778
52f72d6f
HR
7779 primary_child_bs = bdrv_primary_bs(bs);
7780
998b3a1e
HR
7781 if (drv->bdrv_refresh_filename) {
7782 /* Obsolete information is of no use here, so drop the old file name
7783 * information before refreshing it */
7784 bs->exact_filename[0] = '\0';
7785
7786 drv->bdrv_refresh_filename(bs);
52f72d6f
HR
7787 } else if (primary_child_bs) {
7788 /*
7789 * Try to reconstruct valid information from the underlying
7790 * file -- this only works for format nodes (filter nodes
7791 * cannot be probed and as such must be selected by the user
7792 * either through an options dict, or through a special
7793 * filename which the filter driver must construct in its
7794 * .bdrv_refresh_filename() implementation).
7795 */
998b3a1e
HR
7796
7797 bs->exact_filename[0] = '\0';
7798
fb695c74
HR
7799 /*
7800 * We can use the underlying file's filename if:
7801 * - it has a filename,
52f72d6f 7802 * - the current BDS is not a filter,
fb695c74
HR
7803 * - the file is a protocol BDS, and
7804 * - opening that file (as this BDS's format) will automatically create
7805 * the BDS tree we have right now, that is:
7806 * - the user did not significantly change this BDS's behavior with
7807 * some explicit (strong) options
7808 * - no non-file child of this BDS has been overridden by the user
7809 * Both of these conditions are represented by generate_json_filename.
7810 */
52f72d6f
HR
7811 if (primary_child_bs->exact_filename[0] &&
7812 primary_child_bs->drv->bdrv_file_open &&
7813 !drv->is_filter && !generate_json_filename)
fb695c74 7814 {
52f72d6f 7815 strcpy(bs->exact_filename, primary_child_bs->exact_filename);
998b3a1e
HR
7816 }
7817 }
7818
91af7014
HR
7819 if (bs->exact_filename[0]) {
7820 pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
97e2f021 7821 } else {
eab3a467 7822 GString *json = qobject_to_json(QOBJECT(bs->full_open_options));
5c86bdf1 7823 if (snprintf(bs->filename, sizeof(bs->filename), "json:%s",
eab3a467 7824 json->str) >= sizeof(bs->filename)) {
5c86bdf1
EB
7825 /* Give user a hint if we truncated things. */
7826 strcpy(bs->filename + sizeof(bs->filename) - 4, "...");
7827 }
eab3a467 7828 g_string_free(json, true);
91af7014
HR
7829 }
7830}
e06018ad 7831
1e89d0f9
HR
7832char *bdrv_dirname(BlockDriverState *bs, Error **errp)
7833{
7834 BlockDriver *drv = bs->drv;
52f72d6f 7835 BlockDriverState *child_bs;
1e89d0f9 7836
f791bf7f
EGE
7837 GLOBAL_STATE_CODE();
7838
1e89d0f9
HR
7839 if (!drv) {
7840 error_setg(errp, "Node '%s' is ejected", bs->node_name);
7841 return NULL;
7842 }
7843
7844 if (drv->bdrv_dirname) {
7845 return drv->bdrv_dirname(bs, errp);
7846 }
7847
52f72d6f
HR
7848 child_bs = bdrv_primary_bs(bs);
7849 if (child_bs) {
7850 return bdrv_dirname(child_bs, errp);
1e89d0f9
HR
7851 }
7852
7853 bdrv_refresh_filename(bs);
7854 if (bs->exact_filename[0] != '\0') {
7855 return path_combine(bs->exact_filename, "");
7856 }
7857
7858 error_setg(errp, "Cannot generate a base directory for %s nodes",
7859 drv->format_name);
7860 return NULL;
7861}
7862
e06018ad
WC
7863/*
7864 * Hot add/remove a BDS's child. So the user can take a child offline when
7865 * it is broken and take a new child online
7866 */
7867void bdrv_add_child(BlockDriverState *parent_bs, BlockDriverState *child_bs,
7868 Error **errp)
7869{
f791bf7f 7870 GLOBAL_STATE_CODE();
e06018ad
WC
7871 if (!parent_bs->drv || !parent_bs->drv->bdrv_add_child) {
7872 error_setg(errp, "The node %s does not support adding a child",
7873 bdrv_get_device_or_node_name(parent_bs));
7874 return;
7875 }
7876
7877 if (!QLIST_EMPTY(&child_bs->parents)) {
7878 error_setg(errp, "The node %s already has a parent",
7879 child_bs->node_name);
7880 return;
7881 }
7882
7883 parent_bs->drv->bdrv_add_child(parent_bs, child_bs, errp);
7884}
7885
7886void bdrv_del_child(BlockDriverState *parent_bs, BdrvChild *child, Error **errp)
7887{
7888 BdrvChild *tmp;
7889
f791bf7f 7890 GLOBAL_STATE_CODE();
e06018ad
WC
7891 if (!parent_bs->drv || !parent_bs->drv->bdrv_del_child) {
7892 error_setg(errp, "The node %s does not support removing a child",
7893 bdrv_get_device_or_node_name(parent_bs));
7894 return;
7895 }
7896
7897 QLIST_FOREACH(tmp, &parent_bs->children, next) {
7898 if (tmp == child) {
7899 break;
7900 }
7901 }
7902
7903 if (!tmp) {
7904 error_setg(errp, "The node %s does not have a child named %s",
7905 bdrv_get_device_or_node_name(parent_bs),
7906 bdrv_get_device_or_node_name(child->bs));
7907 return;
7908 }
7909
7910 parent_bs->drv->bdrv_del_child(parent_bs, child, errp);
7911}
6f7a3b53
HR
7912
7913int bdrv_make_empty(BdrvChild *c, Error **errp)
7914{
7915 BlockDriver *drv = c->bs->drv;
7916 int ret;
7917
f791bf7f 7918 GLOBAL_STATE_CODE();
6f7a3b53
HR
7919 assert(c->perm & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED));
7920
7921 if (!drv->bdrv_make_empty) {
7922 error_setg(errp, "%s does not support emptying nodes",
7923 drv->format_name);
7924 return -ENOTSUP;
7925 }
7926
7927 ret = drv->bdrv_make_empty(c->bs);
7928 if (ret < 0) {
7929 error_setg_errno(errp, -ret, "Failed to empty %s",
7930 c->bs->filename);
7931 return ret;
7932 }
7933
7934 return 0;
7935}
9a6fc887
HR
7936
7937/*
7938 * Return the child that @bs acts as an overlay for, and from which data may be
7939 * copied in COW or COR operations. Usually this is the backing file.
7940 */
7941BdrvChild *bdrv_cow_child(BlockDriverState *bs)
7942{
967d7905
EGE
7943 IO_CODE();
7944
9a6fc887
HR
7945 if (!bs || !bs->drv) {
7946 return NULL;
7947 }
7948
7949 if (bs->drv->is_filter) {
7950 return NULL;
7951 }
7952
7953 if (!bs->backing) {
7954 return NULL;
7955 }
7956
7957 assert(bs->backing->role & BDRV_CHILD_COW);
7958 return bs->backing;
7959}
7960
7961/*
7962 * If @bs acts as a filter for exactly one of its children, return
7963 * that child.
7964 */
7965BdrvChild *bdrv_filter_child(BlockDriverState *bs)
7966{
7967 BdrvChild *c;
967d7905 7968 IO_CODE();
9a6fc887
HR
7969
7970 if (!bs || !bs->drv) {
7971 return NULL;
7972 }
7973
7974 if (!bs->drv->is_filter) {
7975 return NULL;
7976 }
7977
7978 /* Only one of @backing or @file may be used */
7979 assert(!(bs->backing && bs->file));
7980
7981 c = bs->backing ?: bs->file;
7982 if (!c) {
7983 return NULL;
7984 }
7985
7986 assert(c->role & BDRV_CHILD_FILTERED);
7987 return c;
7988}
7989
7990/*
7991 * Return either the result of bdrv_cow_child() or bdrv_filter_child(),
7992 * whichever is non-NULL.
7993 *
7994 * Return NULL if both are NULL.
7995 */
7996BdrvChild *bdrv_filter_or_cow_child(BlockDriverState *bs)
7997{
7998 BdrvChild *cow_child = bdrv_cow_child(bs);
7999 BdrvChild *filter_child = bdrv_filter_child(bs);
967d7905 8000 IO_CODE();
9a6fc887
HR
8001
8002 /* Filter nodes cannot have COW backing files */
8003 assert(!(cow_child && filter_child));
8004
8005 return cow_child ?: filter_child;
8006}
8007
8008/*
8009 * Return the primary child of this node: For filters, that is the
8010 * filtered child. For other nodes, that is usually the child storing
8011 * metadata.
8012 * (A generally more helpful description is that this is (usually) the
8013 * child that has the same filename as @bs.)
8014 *
8015 * Drivers do not necessarily have a primary child; for example quorum
8016 * does not.
8017 */
8018BdrvChild *bdrv_primary_child(BlockDriverState *bs)
8019{
8020 BdrvChild *c, *found = NULL;
967d7905 8021 IO_CODE();
9a6fc887
HR
8022
8023 QLIST_FOREACH(c, &bs->children, next) {
8024 if (c->role & BDRV_CHILD_PRIMARY) {
8025 assert(!found);
8026 found = c;
8027 }
8028 }
8029
8030 return found;
8031}
d38d7eb8
HR
8032
8033static BlockDriverState *bdrv_do_skip_filters(BlockDriverState *bs,
8034 bool stop_on_explicit_filter)
8035{
8036 BdrvChild *c;
8037
8038 if (!bs) {
8039 return NULL;
8040 }
8041
8042 while (!(stop_on_explicit_filter && !bs->implicit)) {
8043 c = bdrv_filter_child(bs);
8044 if (!c) {
8045 /*
8046 * A filter that is embedded in a working block graph must
8047 * have a child. Assert this here so this function does
8048 * not return a filter node that is not expected by the
8049 * caller.
8050 */
8051 assert(!bs->drv || !bs->drv->is_filter);
8052 break;
8053 }
8054 bs = c->bs;
8055 }
8056 /*
8057 * Note that this treats nodes with bs->drv == NULL as not being
8058 * filters (bs->drv == NULL should be replaced by something else
8059 * anyway).
8060 * The advantage of this behavior is that this function will thus
8061 * always return a non-NULL value (given a non-NULL @bs).
8062 */
8063
8064 return bs;
8065}
8066
8067/*
8068 * Return the first BDS that has not been added implicitly or that
8069 * does not have a filtered child down the chain starting from @bs
8070 * (including @bs itself).
8071 */
8072BlockDriverState *bdrv_skip_implicit_filters(BlockDriverState *bs)
8073{
b4ad82aa 8074 GLOBAL_STATE_CODE();
d38d7eb8
HR
8075 return bdrv_do_skip_filters(bs, true);
8076}
8077
8078/*
8079 * Return the first BDS that does not have a filtered child down the
8080 * chain starting from @bs (including @bs itself).
8081 */
8082BlockDriverState *bdrv_skip_filters(BlockDriverState *bs)
8083{
967d7905 8084 IO_CODE();
d38d7eb8
HR
8085 return bdrv_do_skip_filters(bs, false);
8086}
8087
8088/*
8089 * For a backing chain, return the first non-filter backing image of
8090 * the first non-filter image.
8091 */
8092BlockDriverState *bdrv_backing_chain_next(BlockDriverState *bs)
8093{
967d7905 8094 IO_CODE();
d38d7eb8
HR
8095 return bdrv_skip_filters(bdrv_cow_bs(bdrv_skip_filters(bs)));
8096}
0bc329fb
HR
8097
8098/**
8099 * Check whether [offset, offset + bytes) overlaps with the cached
8100 * block-status data region.
8101 *
8102 * If so, and @pnum is not NULL, set *pnum to `bsc.data_end - offset`,
8103 * which is what bdrv_bsc_is_data()'s interface needs.
8104 * Otherwise, *pnum is not touched.
8105 */
8106static bool bdrv_bsc_range_overlaps_locked(BlockDriverState *bs,
8107 int64_t offset, int64_t bytes,
8108 int64_t *pnum)
8109{
8110 BdrvBlockStatusCache *bsc = qatomic_rcu_read(&bs->block_status_cache);
8111 bool overlaps;
8112
8113 overlaps =
8114 qatomic_read(&bsc->valid) &&
8115 ranges_overlap(offset, bytes, bsc->data_start,
8116 bsc->data_end - bsc->data_start);
8117
8118 if (overlaps && pnum) {
8119 *pnum = bsc->data_end - offset;
8120 }
8121
8122 return overlaps;
8123}
8124
8125/**
8126 * See block_int.h for this function's documentation.
8127 */
8128bool bdrv_bsc_is_data(BlockDriverState *bs, int64_t offset, int64_t *pnum)
8129{
967d7905 8130 IO_CODE();
0bc329fb 8131 RCU_READ_LOCK_GUARD();
0bc329fb
HR
8132 return bdrv_bsc_range_overlaps_locked(bs, offset, 1, pnum);
8133}
8134
8135/**
8136 * See block_int.h for this function's documentation.
8137 */
8138void bdrv_bsc_invalidate_range(BlockDriverState *bs,
8139 int64_t offset, int64_t bytes)
8140{
967d7905 8141 IO_CODE();
0bc329fb
HR
8142 RCU_READ_LOCK_GUARD();
8143
8144 if (bdrv_bsc_range_overlaps_locked(bs, offset, bytes, NULL)) {
8145 qatomic_set(&bs->block_status_cache->valid, false);
8146 }
8147}
8148
8149/**
8150 * See block_int.h for this function's documentation.
8151 */
8152void bdrv_bsc_fill(BlockDriverState *bs, int64_t offset, int64_t bytes)
8153{
8154 BdrvBlockStatusCache *new_bsc = g_new(BdrvBlockStatusCache, 1);
8155 BdrvBlockStatusCache *old_bsc;
967d7905 8156 IO_CODE();
0bc329fb
HR
8157
8158 *new_bsc = (BdrvBlockStatusCache) {
8159 .valid = true,
8160 .data_start = offset,
8161 .data_end = offset + bytes,
8162 };
8163
8164 QEMU_LOCK_GUARD(&bs->bsc_modify_lock);
8165
8166 old_bsc = qatomic_rcu_read(&bs->block_status_cache);
8167 qatomic_rcu_set(&bs->block_status_cache, new_bsc);
8168 if (old_bsc) {
8169 g_free_rcu(old_bsc, rcu);
8170 }
8171}
This page took 2.828906 seconds and 4 git commands to generate.