]> Git Repo - qemu.git/blame_incremental - block.c
qcow2: Rename BDRVQcowState to BDRVQcow2State
[qemu.git] / block.c
... / ...
CommitLineData
1/*
2 * QEMU System Emulator block driver
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24#include "config-host.h"
25#include "qemu-common.h"
26#include "trace.h"
27#include "block/block_int.h"
28#include "block/blockjob.h"
29#include "qemu/error-report.h"
30#include "qemu/module.h"
31#include "qapi/qmp/qerror.h"
32#include "qapi/qmp/qjson.h"
33#include "sysemu/block-backend.h"
34#include "sysemu/sysemu.h"
35#include "qemu/notify.h"
36#include "block/coroutine.h"
37#include "block/qapi.h"
38#include "qmp-commands.h"
39#include "qemu/timer.h"
40#include "qapi-event.h"
41#include "block/throttle-groups.h"
42
43#ifdef CONFIG_BSD
44#include <sys/types.h>
45#include <sys/stat.h>
46#include <sys/ioctl.h>
47#include <sys/queue.h>
48#ifndef __DragonFly__
49#include <sys/disk.h>
50#endif
51#endif
52
53#ifdef _WIN32
54#include <windows.h>
55#endif
56
57/**
58 * A BdrvDirtyBitmap can be in three possible states:
59 * (1) successor is NULL and disabled is false: full r/w mode
60 * (2) successor is NULL and disabled is true: read only mode ("disabled")
61 * (3) successor is set: frozen mode.
62 * A frozen bitmap cannot be renamed, deleted, anonymized, cleared, set,
63 * or enabled. A frozen bitmap can only abdicate() or reclaim().
64 */
65struct BdrvDirtyBitmap {
66 HBitmap *bitmap; /* Dirty sector bitmap implementation */
67 BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
68 char *name; /* Optional non-empty unique ID */
69 int64_t size; /* Size of the bitmap (Number of sectors) */
70 bool disabled; /* Bitmap is read-only */
71 QLIST_ENTRY(BdrvDirtyBitmap) list;
72};
73
74#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
75
76static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
77 QTAILQ_HEAD_INITIALIZER(bdrv_states);
78
79static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
80 QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
81
82static QLIST_HEAD(, BlockDriver) bdrv_drivers =
83 QLIST_HEAD_INITIALIZER(bdrv_drivers);
84
85static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
86 const char *reference, QDict *options, int flags,
87 BlockDriverState *parent,
88 const BdrvChildRole *child_role, Error **errp);
89
90static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs);
91/* If non-zero, use only whitelisted block drivers */
92static int use_bdrv_whitelist;
93
94#ifdef _WIN32
95static int is_windows_drive_prefix(const char *filename)
96{
97 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
98 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
99 filename[1] == ':');
100}
101
102int is_windows_drive(const char *filename)
103{
104 if (is_windows_drive_prefix(filename) &&
105 filename[2] == '\0')
106 return 1;
107 if (strstart(filename, "\\\\.\\", NULL) ||
108 strstart(filename, "//./", NULL))
109 return 1;
110 return 0;
111}
112#endif
113
114size_t bdrv_opt_mem_align(BlockDriverState *bs)
115{
116 if (!bs || !bs->drv) {
117 /* page size or 4k (hdd sector size) should be on the safe side */
118 return MAX(4096, getpagesize());
119 }
120
121 return bs->bl.opt_mem_alignment;
122}
123
124size_t bdrv_min_mem_align(BlockDriverState *bs)
125{
126 if (!bs || !bs->drv) {
127 /* page size or 4k (hdd sector size) should be on the safe side */
128 return MAX(4096, getpagesize());
129 }
130
131 return bs->bl.min_mem_alignment;
132}
133
134/* check if the path starts with "<protocol>:" */
135int path_has_protocol(const char *path)
136{
137 const char *p;
138
139#ifdef _WIN32
140 if (is_windows_drive(path) ||
141 is_windows_drive_prefix(path)) {
142 return 0;
143 }
144 p = path + strcspn(path, ":/\\");
145#else
146 p = path + strcspn(path, ":/");
147#endif
148
149 return *p == ':';
150}
151
152int path_is_absolute(const char *path)
153{
154#ifdef _WIN32
155 /* specific case for names like: "\\.\d:" */
156 if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
157 return 1;
158 }
159 return (*path == '/' || *path == '\\');
160#else
161 return (*path == '/');
162#endif
163}
164
165/* if filename is absolute, just copy it to dest. Otherwise, build a
166 path to it by considering it is relative to base_path. URL are
167 supported. */
168void path_combine(char *dest, int dest_size,
169 const char *base_path,
170 const char *filename)
171{
172 const char *p, *p1;
173 int len;
174
175 if (dest_size <= 0)
176 return;
177 if (path_is_absolute(filename)) {
178 pstrcpy(dest, dest_size, filename);
179 } else {
180 p = strchr(base_path, ':');
181 if (p)
182 p++;
183 else
184 p = base_path;
185 p1 = strrchr(base_path, '/');
186#ifdef _WIN32
187 {
188 const char *p2;
189 p2 = strrchr(base_path, '\\');
190 if (!p1 || p2 > p1)
191 p1 = p2;
192 }
193#endif
194 if (p1)
195 p1++;
196 else
197 p1 = base_path;
198 if (p1 > p)
199 p = p1;
200 len = p - base_path;
201 if (len > dest_size - 1)
202 len = dest_size - 1;
203 memcpy(dest, base_path, len);
204 dest[len] = '\0';
205 pstrcat(dest, dest_size, filename);
206 }
207}
208
209void bdrv_get_full_backing_filename_from_filename(const char *backed,
210 const char *backing,
211 char *dest, size_t sz,
212 Error **errp)
213{
214 if (backing[0] == '\0' || path_has_protocol(backing) ||
215 path_is_absolute(backing))
216 {
217 pstrcpy(dest, sz, backing);
218 } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) {
219 error_setg(errp, "Cannot use relative backing file names for '%s'",
220 backed);
221 } else {
222 path_combine(dest, sz, backed, backing);
223 }
224}
225
226void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz,
227 Error **errp)
228{
229 char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename;
230
231 bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file,
232 dest, sz, errp);
233}
234
235void bdrv_register(BlockDriver *bdrv)
236{
237 bdrv_setup_io_funcs(bdrv);
238
239 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
240}
241
242BlockDriverState *bdrv_new_root(void)
243{
244 BlockDriverState *bs = bdrv_new();
245
246 QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
247 return bs;
248}
249
250BlockDriverState *bdrv_new(void)
251{
252 BlockDriverState *bs;
253 int i;
254
255 bs = g_new0(BlockDriverState, 1);
256 QLIST_INIT(&bs->dirty_bitmaps);
257 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
258 QLIST_INIT(&bs->op_blockers[i]);
259 }
260 bdrv_iostatus_disable(bs);
261 notifier_list_init(&bs->close_notifiers);
262 notifier_with_return_list_init(&bs->before_write_notifiers);
263 qemu_co_queue_init(&bs->throttled_reqs[0]);
264 qemu_co_queue_init(&bs->throttled_reqs[1]);
265 bs->refcnt = 1;
266 bs->aio_context = qemu_get_aio_context();
267
268 return bs;
269}
270
271void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
272{
273 notifier_list_add(&bs->close_notifiers, notify);
274}
275
276BlockDriver *bdrv_find_format(const char *format_name)
277{
278 BlockDriver *drv1;
279 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
280 if (!strcmp(drv1->format_name, format_name)) {
281 return drv1;
282 }
283 }
284 return NULL;
285}
286
287static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
288{
289 static const char *whitelist_rw[] = {
290 CONFIG_BDRV_RW_WHITELIST
291 };
292 static const char *whitelist_ro[] = {
293 CONFIG_BDRV_RO_WHITELIST
294 };
295 const char **p;
296
297 if (!whitelist_rw[0] && !whitelist_ro[0]) {
298 return 1; /* no whitelist, anything goes */
299 }
300
301 for (p = whitelist_rw; *p; p++) {
302 if (!strcmp(drv->format_name, *p)) {
303 return 1;
304 }
305 }
306 if (read_only) {
307 for (p = whitelist_ro; *p; p++) {
308 if (!strcmp(drv->format_name, *p)) {
309 return 1;
310 }
311 }
312 }
313 return 0;
314}
315
316typedef struct CreateCo {
317 BlockDriver *drv;
318 char *filename;
319 QemuOpts *opts;
320 int ret;
321 Error *err;
322} CreateCo;
323
324static void coroutine_fn bdrv_create_co_entry(void *opaque)
325{
326 Error *local_err = NULL;
327 int ret;
328
329 CreateCo *cco = opaque;
330 assert(cco->drv);
331
332 ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
333 if (local_err) {
334 error_propagate(&cco->err, local_err);
335 }
336 cco->ret = ret;
337}
338
339int bdrv_create(BlockDriver *drv, const char* filename,
340 QemuOpts *opts, Error **errp)
341{
342 int ret;
343
344 Coroutine *co;
345 CreateCo cco = {
346 .drv = drv,
347 .filename = g_strdup(filename),
348 .opts = opts,
349 .ret = NOT_DONE,
350 .err = NULL,
351 };
352
353 if (!drv->bdrv_create) {
354 error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
355 ret = -ENOTSUP;
356 goto out;
357 }
358
359 if (qemu_in_coroutine()) {
360 /* Fast-path if already in coroutine context */
361 bdrv_create_co_entry(&cco);
362 } else {
363 co = qemu_coroutine_create(bdrv_create_co_entry);
364 qemu_coroutine_enter(co, &cco);
365 while (cco.ret == NOT_DONE) {
366 aio_poll(qemu_get_aio_context(), true);
367 }
368 }
369
370 ret = cco.ret;
371 if (ret < 0) {
372 if (cco.err) {
373 error_propagate(errp, cco.err);
374 } else {
375 error_setg_errno(errp, -ret, "Could not create image");
376 }
377 }
378
379out:
380 g_free(cco.filename);
381 return ret;
382}
383
384int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
385{
386 BlockDriver *drv;
387 Error *local_err = NULL;
388 int ret;
389
390 drv = bdrv_find_protocol(filename, true, errp);
391 if (drv == NULL) {
392 return -ENOENT;
393 }
394
395 ret = bdrv_create(drv, filename, opts, &local_err);
396 if (local_err) {
397 error_propagate(errp, local_err);
398 }
399 return ret;
400}
401
402/**
403 * Try to get @bs's logical and physical block size.
404 * On success, store them in @bsz struct and return 0.
405 * On failure return -errno.
406 * @bs must not be empty.
407 */
408int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
409{
410 BlockDriver *drv = bs->drv;
411
412 if (drv && drv->bdrv_probe_blocksizes) {
413 return drv->bdrv_probe_blocksizes(bs, bsz);
414 }
415
416 return -ENOTSUP;
417}
418
419/**
420 * Try to get @bs's geometry (cyls, heads, sectors).
421 * On success, store them in @geo struct and return 0.
422 * On failure return -errno.
423 * @bs must not be empty.
424 */
425int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
426{
427 BlockDriver *drv = bs->drv;
428
429 if (drv && drv->bdrv_probe_geometry) {
430 return drv->bdrv_probe_geometry(bs, geo);
431 }
432
433 return -ENOTSUP;
434}
435
436/*
437 * Create a uniquely-named empty temporary file.
438 * Return 0 upon success, otherwise a negative errno value.
439 */
440int get_tmp_filename(char *filename, int size)
441{
442#ifdef _WIN32
443 char temp_dir[MAX_PATH];
444 /* GetTempFileName requires that its output buffer (4th param)
445 have length MAX_PATH or greater. */
446 assert(size >= MAX_PATH);
447 return (GetTempPath(MAX_PATH, temp_dir)
448 && GetTempFileName(temp_dir, "qem", 0, filename)
449 ? 0 : -GetLastError());
450#else
451 int fd;
452 const char *tmpdir;
453 tmpdir = getenv("TMPDIR");
454 if (!tmpdir) {
455 tmpdir = "/var/tmp";
456 }
457 if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
458 return -EOVERFLOW;
459 }
460 fd = mkstemp(filename);
461 if (fd < 0) {
462 return -errno;
463 }
464 if (close(fd) != 0) {
465 unlink(filename);
466 return -errno;
467 }
468 return 0;
469#endif
470}
471
472/*
473 * Detect host devices. By convention, /dev/cdrom[N] is always
474 * recognized as a host CDROM.
475 */
476static BlockDriver *find_hdev_driver(const char *filename)
477{
478 int score_max = 0, score;
479 BlockDriver *drv = NULL, *d;
480
481 QLIST_FOREACH(d, &bdrv_drivers, list) {
482 if (d->bdrv_probe_device) {
483 score = d->bdrv_probe_device(filename);
484 if (score > score_max) {
485 score_max = score;
486 drv = d;
487 }
488 }
489 }
490
491 return drv;
492}
493
494BlockDriver *bdrv_find_protocol(const char *filename,
495 bool allow_protocol_prefix,
496 Error **errp)
497{
498 BlockDriver *drv1;
499 char protocol[128];
500 int len;
501 const char *p;
502
503 /* TODO Drivers without bdrv_file_open must be specified explicitly */
504
505 /*
506 * XXX(hch): we really should not let host device detection
507 * override an explicit protocol specification, but moving this
508 * later breaks access to device names with colons in them.
509 * Thanks to the brain-dead persistent naming schemes on udev-
510 * based Linux systems those actually are quite common.
511 */
512 drv1 = find_hdev_driver(filename);
513 if (drv1) {
514 return drv1;
515 }
516
517 if (!path_has_protocol(filename) || !allow_protocol_prefix) {
518 return &bdrv_file;
519 }
520
521 p = strchr(filename, ':');
522 assert(p != NULL);
523 len = p - filename;
524 if (len > sizeof(protocol) - 1)
525 len = sizeof(protocol) - 1;
526 memcpy(protocol, filename, len);
527 protocol[len] = '\0';
528 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
529 if (drv1->protocol_name &&
530 !strcmp(drv1->protocol_name, protocol)) {
531 return drv1;
532 }
533 }
534
535 error_setg(errp, "Unknown protocol '%s'", protocol);
536 return NULL;
537}
538
539/*
540 * Guess image format by probing its contents.
541 * This is not a good idea when your image is raw (CVE-2008-2004), but
542 * we do it anyway for backward compatibility.
543 *
544 * @buf contains the image's first @buf_size bytes.
545 * @buf_size is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE,
546 * but can be smaller if the image file is smaller)
547 * @filename is its filename.
548 *
549 * For all block drivers, call the bdrv_probe() method to get its
550 * probing score.
551 * Return the first block driver with the highest probing score.
552 */
553BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
554 const char *filename)
555{
556 int score_max = 0, score;
557 BlockDriver *drv = NULL, *d;
558
559 QLIST_FOREACH(d, &bdrv_drivers, list) {
560 if (d->bdrv_probe) {
561 score = d->bdrv_probe(buf, buf_size, filename);
562 if (score > score_max) {
563 score_max = score;
564 drv = d;
565 }
566 }
567 }
568
569 return drv;
570}
571
572static int find_image_format(BlockDriverState *bs, const char *filename,
573 BlockDriver **pdrv, Error **errp)
574{
575 BlockDriver *drv;
576 uint8_t buf[BLOCK_PROBE_BUF_SIZE];
577 int ret = 0;
578
579 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
580 if (bdrv_is_sg(bs) || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
581 *pdrv = &bdrv_raw;
582 return ret;
583 }
584
585 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
586 if (ret < 0) {
587 error_setg_errno(errp, -ret, "Could not read image for determining its "
588 "format");
589 *pdrv = NULL;
590 return ret;
591 }
592
593 drv = bdrv_probe_all(buf, ret, filename);
594 if (!drv) {
595 error_setg(errp, "Could not determine image format: No compatible "
596 "driver found");
597 ret = -ENOENT;
598 }
599 *pdrv = drv;
600 return ret;
601}
602
603/**
604 * Set the current 'total_sectors' value
605 * Return 0 on success, -errno on error.
606 */
607static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
608{
609 BlockDriver *drv = bs->drv;
610
611 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
612 if (bdrv_is_sg(bs))
613 return 0;
614
615 /* query actual device if possible, otherwise just trust the hint */
616 if (drv->bdrv_getlength) {
617 int64_t length = drv->bdrv_getlength(bs);
618 if (length < 0) {
619 return length;
620 }
621 hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
622 }
623
624 bs->total_sectors = hint;
625 return 0;
626}
627
628/**
629 * Set open flags for a given discard mode
630 *
631 * Return 0 on success, -1 if the discard mode was invalid.
632 */
633int bdrv_parse_discard_flags(const char *mode, int *flags)
634{
635 *flags &= ~BDRV_O_UNMAP;
636
637 if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
638 /* do nothing */
639 } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
640 *flags |= BDRV_O_UNMAP;
641 } else {
642 return -1;
643 }
644
645 return 0;
646}
647
648/**
649 * Set open flags for a given cache mode
650 *
651 * Return 0 on success, -1 if the cache mode was invalid.
652 */
653int bdrv_parse_cache_flags(const char *mode, int *flags)
654{
655 *flags &= ~BDRV_O_CACHE_MASK;
656
657 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
658 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
659 } else if (!strcmp(mode, "directsync")) {
660 *flags |= BDRV_O_NOCACHE;
661 } else if (!strcmp(mode, "writeback")) {
662 *flags |= BDRV_O_CACHE_WB;
663 } else if (!strcmp(mode, "unsafe")) {
664 *flags |= BDRV_O_CACHE_WB;
665 *flags |= BDRV_O_NO_FLUSH;
666 } else if (!strcmp(mode, "writethrough")) {
667 /* this is the default */
668 } else {
669 return -1;
670 }
671
672 return 0;
673}
674
675/*
676 * Returns the flags that a temporary snapshot should get, based on the
677 * originally requested flags (the originally requested image will have flags
678 * like a backing file)
679 */
680static int bdrv_temp_snapshot_flags(int flags)
681{
682 return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
683}
684
685/*
686 * Returns the flags that bs->file should get if a protocol driver is expected,
687 * based on the given flags for the parent BDS
688 */
689static int bdrv_inherited_flags(int flags)
690{
691 /* Enable protocol handling, disable format probing for bs->file */
692 flags |= BDRV_O_PROTOCOL;
693
694 /* Our block drivers take care to send flushes and respect unmap policy,
695 * so we can enable both unconditionally on lower layers. */
696 flags |= BDRV_O_CACHE_WB | BDRV_O_UNMAP;
697
698 /* Clear flags that only apply to the top layer */
699 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
700
701 return flags;
702}
703
704const BdrvChildRole child_file = {
705 .inherit_flags = bdrv_inherited_flags,
706};
707
708/*
709 * Returns the flags that bs->file should get if the use of formats (and not
710 * only protocols) is permitted for it, based on the given flags for the parent
711 * BDS
712 */
713static int bdrv_inherited_fmt_flags(int parent_flags)
714{
715 int flags = child_file.inherit_flags(parent_flags);
716 return flags & ~BDRV_O_PROTOCOL;
717}
718
719const BdrvChildRole child_format = {
720 .inherit_flags = bdrv_inherited_fmt_flags,
721};
722
723/*
724 * Returns the flags that bs->backing_hd should get, based on the given flags
725 * for the parent BDS
726 */
727static int bdrv_backing_flags(int flags)
728{
729 /* backing files always opened read-only */
730 flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ);
731
732 /* snapshot=on is handled on the top layer */
733 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
734
735 return flags;
736}
737
738static const BdrvChildRole child_backing = {
739 .inherit_flags = bdrv_backing_flags,
740};
741
742static int bdrv_open_flags(BlockDriverState *bs, int flags)
743{
744 int open_flags = flags | BDRV_O_CACHE_WB;
745
746 /*
747 * Clear flags that are internal to the block layer before opening the
748 * image.
749 */
750 open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
751
752 /*
753 * Snapshots should be writable.
754 */
755 if (flags & BDRV_O_TEMPORARY) {
756 open_flags |= BDRV_O_RDWR;
757 }
758
759 return open_flags;
760}
761
762static void bdrv_assign_node_name(BlockDriverState *bs,
763 const char *node_name,
764 Error **errp)
765{
766 if (!node_name) {
767 return;
768 }
769
770 /* Check for empty string or invalid characters */
771 if (!id_wellformed(node_name)) {
772 error_setg(errp, "Invalid node name");
773 return;
774 }
775
776 /* takes care of avoiding namespaces collisions */
777 if (blk_by_name(node_name)) {
778 error_setg(errp, "node-name=%s is conflicting with a device id",
779 node_name);
780 return;
781 }
782
783 /* takes care of avoiding duplicates node names */
784 if (bdrv_find_node(node_name)) {
785 error_setg(errp, "Duplicate node name");
786 return;
787 }
788
789 /* copy node name into the bs and insert it into the graph list */
790 pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
791 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
792}
793
794static QemuOptsList bdrv_runtime_opts = {
795 .name = "bdrv_common",
796 .head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head),
797 .desc = {
798 {
799 .name = "node-name",
800 .type = QEMU_OPT_STRING,
801 .help = "Node name of the block device node",
802 },
803 { /* end of list */ }
804 },
805};
806
807/*
808 * Common part for opening disk images and files
809 *
810 * Removes all processed options from *options.
811 */
812static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
813 QDict *options, int flags, BlockDriver *drv, Error **errp)
814{
815 int ret, open_flags;
816 const char *filename;
817 const char *node_name = NULL;
818 QemuOpts *opts;
819 Error *local_err = NULL;
820
821 assert(drv != NULL);
822 assert(bs->file == NULL);
823 assert(options != NULL && bs->options != options);
824
825 if (file != NULL) {
826 filename = file->filename;
827 } else {
828 filename = qdict_get_try_str(options, "filename");
829 }
830
831 if (drv->bdrv_needs_filename && !filename) {
832 error_setg(errp, "The '%s' block driver requires a file name",
833 drv->format_name);
834 return -EINVAL;
835 }
836
837 trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
838
839 opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
840 qemu_opts_absorb_qdict(opts, options, &local_err);
841 if (local_err) {
842 error_propagate(errp, local_err);
843 ret = -EINVAL;
844 goto fail_opts;
845 }
846
847 node_name = qemu_opt_get(opts, "node-name");
848 bdrv_assign_node_name(bs, node_name, &local_err);
849 if (local_err) {
850 error_propagate(errp, local_err);
851 ret = -EINVAL;
852 goto fail_opts;
853 }
854
855 bs->guest_block_size = 512;
856 bs->request_alignment = 512;
857 bs->zero_beyond_eof = true;
858 open_flags = bdrv_open_flags(bs, flags);
859 bs->read_only = !(open_flags & BDRV_O_RDWR);
860
861 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
862 error_setg(errp,
863 !bs->read_only && bdrv_is_whitelisted(drv, true)
864 ? "Driver '%s' can only be used for read-only devices"
865 : "Driver '%s' is not whitelisted",
866 drv->format_name);
867 ret = -ENOTSUP;
868 goto fail_opts;
869 }
870
871 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
872 if (flags & BDRV_O_COPY_ON_READ) {
873 if (!bs->read_only) {
874 bdrv_enable_copy_on_read(bs);
875 } else {
876 error_setg(errp, "Can't use copy-on-read on read-only device");
877 ret = -EINVAL;
878 goto fail_opts;
879 }
880 }
881
882 if (filename != NULL) {
883 pstrcpy(bs->filename, sizeof(bs->filename), filename);
884 } else {
885 bs->filename[0] = '\0';
886 }
887 pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
888
889 bs->drv = drv;
890 bs->opaque = g_malloc0(drv->instance_size);
891
892 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
893
894 /* Open the image, either directly or using a protocol */
895 if (drv->bdrv_file_open) {
896 assert(file == NULL);
897 assert(!drv->bdrv_needs_filename || filename != NULL);
898 ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
899 } else {
900 if (file == NULL) {
901 error_setg(errp, "Can't use '%s' as a block driver for the "
902 "protocol level", drv->format_name);
903 ret = -EINVAL;
904 goto free_and_fail;
905 }
906 bs->file = file;
907 ret = drv->bdrv_open(bs, options, open_flags, &local_err);
908 }
909
910 if (ret < 0) {
911 if (local_err) {
912 error_propagate(errp, local_err);
913 } else if (bs->filename[0]) {
914 error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
915 } else {
916 error_setg_errno(errp, -ret, "Could not open image");
917 }
918 goto free_and_fail;
919 }
920
921 if (bs->encrypted) {
922 error_report("Encrypted images are deprecated");
923 error_printf("Support for them will be removed in a future release.\n"
924 "You can use 'qemu-img convert' to convert your image"
925 " to an unencrypted one.\n");
926 }
927
928 ret = refresh_total_sectors(bs, bs->total_sectors);
929 if (ret < 0) {
930 error_setg_errno(errp, -ret, "Could not refresh total sector count");
931 goto free_and_fail;
932 }
933
934 bdrv_refresh_limits(bs, &local_err);
935 if (local_err) {
936 error_propagate(errp, local_err);
937 ret = -EINVAL;
938 goto free_and_fail;
939 }
940
941 assert(bdrv_opt_mem_align(bs) != 0);
942 assert(bdrv_min_mem_align(bs) != 0);
943 assert((bs->request_alignment != 0) || bdrv_is_sg(bs));
944
945 qemu_opts_del(opts);
946 return 0;
947
948free_and_fail:
949 bs->file = NULL;
950 g_free(bs->opaque);
951 bs->opaque = NULL;
952 bs->drv = NULL;
953fail_opts:
954 qemu_opts_del(opts);
955 return ret;
956}
957
958static QDict *parse_json_filename(const char *filename, Error **errp)
959{
960 QObject *options_obj;
961 QDict *options;
962 int ret;
963
964 ret = strstart(filename, "json:", &filename);
965 assert(ret);
966
967 options_obj = qobject_from_json(filename);
968 if (!options_obj) {
969 error_setg(errp, "Could not parse the JSON options");
970 return NULL;
971 }
972
973 if (qobject_type(options_obj) != QTYPE_QDICT) {
974 qobject_decref(options_obj);
975 error_setg(errp, "Invalid JSON object given");
976 return NULL;
977 }
978
979 options = qobject_to_qdict(options_obj);
980 qdict_flatten(options);
981
982 return options;
983}
984
985/*
986 * Fills in default options for opening images and converts the legacy
987 * filename/flags pair to option QDict entries.
988 * The BDRV_O_PROTOCOL flag in *flags will be set or cleared accordingly if a
989 * block driver has been specified explicitly.
990 */
991static int bdrv_fill_options(QDict **options, const char **pfilename,
992 int *flags, Error **errp)
993{
994 const char *filename = *pfilename;
995 const char *drvname;
996 bool protocol = *flags & BDRV_O_PROTOCOL;
997 bool parse_filename = false;
998 BlockDriver *drv = NULL;
999 Error *local_err = NULL;
1000
1001 /* Parse json: pseudo-protocol */
1002 if (filename && g_str_has_prefix(filename, "json:")) {
1003 QDict *json_options = parse_json_filename(filename, &local_err);
1004 if (local_err) {
1005 error_propagate(errp, local_err);
1006 return -EINVAL;
1007 }
1008
1009 /* Options given in the filename have lower priority than options
1010 * specified directly */
1011 qdict_join(*options, json_options, false);
1012 QDECREF(json_options);
1013 *pfilename = filename = NULL;
1014 }
1015
1016 drvname = qdict_get_try_str(*options, "driver");
1017 if (drvname) {
1018 drv = bdrv_find_format(drvname);
1019 if (!drv) {
1020 error_setg(errp, "Unknown driver '%s'", drvname);
1021 return -ENOENT;
1022 }
1023 /* If the user has explicitly specified the driver, this choice should
1024 * override the BDRV_O_PROTOCOL flag */
1025 protocol = drv->bdrv_file_open;
1026 }
1027
1028 if (protocol) {
1029 *flags |= BDRV_O_PROTOCOL;
1030 } else {
1031 *flags &= ~BDRV_O_PROTOCOL;
1032 }
1033
1034 /* Fetch the file name from the options QDict if necessary */
1035 if (protocol && filename) {
1036 if (!qdict_haskey(*options, "filename")) {
1037 qdict_put(*options, "filename", qstring_from_str(filename));
1038 parse_filename = true;
1039 } else {
1040 error_setg(errp, "Can't specify 'file' and 'filename' options at "
1041 "the same time");
1042 return -EINVAL;
1043 }
1044 }
1045
1046 /* Find the right block driver */
1047 filename = qdict_get_try_str(*options, "filename");
1048
1049 if (!drvname && protocol) {
1050 if (filename) {
1051 drv = bdrv_find_protocol(filename, parse_filename, errp);
1052 if (!drv) {
1053 return -EINVAL;
1054 }
1055
1056 drvname = drv->format_name;
1057 qdict_put(*options, "driver", qstring_from_str(drvname));
1058 } else {
1059 error_setg(errp, "Must specify either driver or file");
1060 return -EINVAL;
1061 }
1062 }
1063
1064 assert(drv || !protocol);
1065
1066 /* Driver-specific filename parsing */
1067 if (drv && drv->bdrv_parse_filename && parse_filename) {
1068 drv->bdrv_parse_filename(filename, *options, &local_err);
1069 if (local_err) {
1070 error_propagate(errp, local_err);
1071 return -EINVAL;
1072 }
1073
1074 if (!drv->bdrv_needs_filename) {
1075 qdict_del(*options, "filename");
1076 }
1077 }
1078
1079 return 0;
1080}
1081
1082static BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
1083 BlockDriverState *child_bs,
1084 const BdrvChildRole *child_role)
1085{
1086 BdrvChild *child = g_new(BdrvChild, 1);
1087 *child = (BdrvChild) {
1088 .bs = child_bs,
1089 .role = child_role,
1090 };
1091
1092 QLIST_INSERT_HEAD(&parent_bs->children, child, next);
1093
1094 return child;
1095}
1096
1097static void bdrv_detach_child(BdrvChild *child)
1098{
1099 QLIST_REMOVE(child, next);
1100 g_free(child);
1101}
1102
1103void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child)
1104{
1105 BlockDriverState *child_bs = child->bs;
1106
1107 if (child->bs->inherits_from == parent) {
1108 child->bs->inherits_from = NULL;
1109 }
1110
1111 bdrv_detach_child(child);
1112 bdrv_unref(child_bs);
1113}
1114
1115void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
1116{
1117
1118 if (bs->backing_hd) {
1119 assert(bs->backing_blocker);
1120 bdrv_op_unblock_all(bs->backing_hd, bs->backing_blocker);
1121 bdrv_detach_child(bs->backing_child);
1122 } else if (backing_hd) {
1123 error_setg(&bs->backing_blocker,
1124 "node is used as backing hd of '%s'",
1125 bdrv_get_device_or_node_name(bs));
1126 }
1127
1128 bs->backing_hd = backing_hd;
1129 if (!backing_hd) {
1130 error_free(bs->backing_blocker);
1131 bs->backing_blocker = NULL;
1132 bs->backing_child = NULL;
1133 goto out;
1134 }
1135 bs->backing_child = bdrv_attach_child(bs, backing_hd, &child_backing);
1136 bs->open_flags &= ~BDRV_O_NO_BACKING;
1137 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
1138 pstrcpy(bs->backing_format, sizeof(bs->backing_format),
1139 backing_hd->drv ? backing_hd->drv->format_name : "");
1140
1141 bdrv_op_block_all(bs->backing_hd, bs->backing_blocker);
1142 /* Otherwise we won't be able to commit due to check in bdrv_commit */
1143 bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
1144 bs->backing_blocker);
1145out:
1146 bdrv_refresh_limits(bs, NULL);
1147}
1148
1149/*
1150 * Opens the backing file for a BlockDriverState if not yet open
1151 *
1152 * options is a QDict of options to pass to the block drivers, or NULL for an
1153 * empty set of options. The reference to the QDict is transferred to this
1154 * function (even on failure), so if the caller intends to reuse the dictionary,
1155 * it needs to use QINCREF() before calling bdrv_file_open.
1156 */
1157int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
1158{
1159 char *backing_filename = g_malloc0(PATH_MAX);
1160 int ret = 0;
1161 BlockDriverState *backing_hd;
1162 Error *local_err = NULL;
1163
1164 if (bs->backing_hd != NULL) {
1165 QDECREF(options);
1166 goto free_exit;
1167 }
1168
1169 /* NULL means an empty set of options */
1170 if (options == NULL) {
1171 options = qdict_new();
1172 }
1173
1174 bs->open_flags &= ~BDRV_O_NO_BACKING;
1175 if (qdict_haskey(options, "file.filename")) {
1176 backing_filename[0] = '\0';
1177 } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
1178 QDECREF(options);
1179 goto free_exit;
1180 } else {
1181 bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX,
1182 &local_err);
1183 if (local_err) {
1184 ret = -EINVAL;
1185 error_propagate(errp, local_err);
1186 QDECREF(options);
1187 goto free_exit;
1188 }
1189 }
1190
1191 if (!bs->drv || !bs->drv->supports_backing) {
1192 ret = -EINVAL;
1193 error_setg(errp, "Driver doesn't support backing files");
1194 QDECREF(options);
1195 goto free_exit;
1196 }
1197
1198 backing_hd = bdrv_new();
1199
1200 if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) {
1201 qdict_put(options, "driver", qstring_from_str(bs->backing_format));
1202 }
1203
1204 assert(bs->backing_hd == NULL);
1205 ret = bdrv_open_inherit(&backing_hd,
1206 *backing_filename ? backing_filename : NULL,
1207 NULL, options, 0, bs, &child_backing, &local_err);
1208 if (ret < 0) {
1209 bdrv_unref(backing_hd);
1210 backing_hd = NULL;
1211 bs->open_flags |= BDRV_O_NO_BACKING;
1212 error_setg(errp, "Could not open backing file: %s",
1213 error_get_pretty(local_err));
1214 error_free(local_err);
1215 goto free_exit;
1216 }
1217
1218 bdrv_set_backing_hd(bs, backing_hd);
1219
1220free_exit:
1221 g_free(backing_filename);
1222 return ret;
1223}
1224
1225/*
1226 * Opens a disk image whose options are given as BlockdevRef in another block
1227 * device's options.
1228 *
1229 * If allow_none is true, no image will be opened if filename is false and no
1230 * BlockdevRef is given. NULL will be returned, but errp remains unset.
1231 *
1232 * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1233 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1234 * itself, all options starting with "${bdref_key}." are considered part of the
1235 * BlockdevRef.
1236 *
1237 * The BlockdevRef will be removed from the options QDict.
1238 */
1239BdrvChild *bdrv_open_child(const char *filename,
1240 QDict *options, const char *bdref_key,
1241 BlockDriverState* parent,
1242 const BdrvChildRole *child_role,
1243 bool allow_none, Error **errp)
1244{
1245 BdrvChild *c = NULL;
1246 BlockDriverState *bs;
1247 QDict *image_options;
1248 int ret;
1249 char *bdref_key_dot;
1250 const char *reference;
1251
1252 assert(child_role != NULL);
1253
1254 bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1255 qdict_extract_subqdict(options, &image_options, bdref_key_dot);
1256 g_free(bdref_key_dot);
1257
1258 reference = qdict_get_try_str(options, bdref_key);
1259 if (!filename && !reference && !qdict_size(image_options)) {
1260 if (!allow_none) {
1261 error_setg(errp, "A block device must be specified for \"%s\"",
1262 bdref_key);
1263 }
1264 QDECREF(image_options);
1265 goto done;
1266 }
1267
1268 bs = NULL;
1269 ret = bdrv_open_inherit(&bs, filename, reference, image_options, 0,
1270 parent, child_role, errp);
1271 if (ret < 0) {
1272 goto done;
1273 }
1274
1275 c = bdrv_attach_child(parent, bs, child_role);
1276
1277done:
1278 qdict_del(options, bdref_key);
1279 return c;
1280}
1281
1282/*
1283 * This is a version of bdrv_open_child() that returns 0/-EINVAL instead of
1284 * a BdrvChild object.
1285 *
1286 * If allow_none is true, no image will be opened if filename is false and no
1287 * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned.
1288 *
1289 * To conform with the behavior of bdrv_open(), *pbs has to be NULL.
1290 */
1291int bdrv_open_image(BlockDriverState **pbs, const char *filename,
1292 QDict *options, const char *bdref_key,
1293 BlockDriverState* parent, const BdrvChildRole *child_role,
1294 bool allow_none, Error **errp)
1295{
1296 Error *local_err = NULL;
1297 BdrvChild *c;
1298
1299 assert(pbs);
1300 assert(*pbs == NULL);
1301
1302 c = bdrv_open_child(filename, options, bdref_key, parent, child_role,
1303 allow_none, &local_err);
1304 if (local_err) {
1305 error_propagate(errp, local_err);
1306 return -EINVAL;
1307 }
1308
1309 if (c != NULL) {
1310 *pbs = c->bs;
1311 }
1312
1313 return 0;
1314}
1315
1316int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
1317{
1318 /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
1319 char *tmp_filename = g_malloc0(PATH_MAX + 1);
1320 int64_t total_size;
1321 QemuOpts *opts = NULL;
1322 QDict *snapshot_options;
1323 BlockDriverState *bs_snapshot;
1324 Error *local_err = NULL;
1325 int ret;
1326
1327 /* if snapshot, we create a temporary backing file and open it
1328 instead of opening 'filename' directly */
1329
1330 /* Get the required size from the image */
1331 total_size = bdrv_getlength(bs);
1332 if (total_size < 0) {
1333 ret = total_size;
1334 error_setg_errno(errp, -total_size, "Could not get image size");
1335 goto out;
1336 }
1337
1338 /* Create the temporary image */
1339 ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
1340 if (ret < 0) {
1341 error_setg_errno(errp, -ret, "Could not get temporary filename");
1342 goto out;
1343 }
1344
1345 opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0,
1346 &error_abort);
1347 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort);
1348 ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, &local_err);
1349 qemu_opts_del(opts);
1350 if (ret < 0) {
1351 error_setg_errno(errp, -ret, "Could not create temporary overlay "
1352 "'%s': %s", tmp_filename,
1353 error_get_pretty(local_err));
1354 error_free(local_err);
1355 goto out;
1356 }
1357
1358 /* Prepare a new options QDict for the temporary file */
1359 snapshot_options = qdict_new();
1360 qdict_put(snapshot_options, "file.driver",
1361 qstring_from_str("file"));
1362 qdict_put(snapshot_options, "file.filename",
1363 qstring_from_str(tmp_filename));
1364 qdict_put(snapshot_options, "driver",
1365 qstring_from_str("qcow2"));
1366
1367 bs_snapshot = bdrv_new();
1368
1369 ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
1370 flags, &local_err);
1371 if (ret < 0) {
1372 error_propagate(errp, local_err);
1373 goto out;
1374 }
1375
1376 bdrv_append(bs_snapshot, bs);
1377
1378out:
1379 g_free(tmp_filename);
1380 return ret;
1381}
1382
1383/*
1384 * Opens a disk image (raw, qcow2, vmdk, ...)
1385 *
1386 * options is a QDict of options to pass to the block drivers, or NULL for an
1387 * empty set of options. The reference to the QDict belongs to the block layer
1388 * after the call (even on failure), so if the caller intends to reuse the
1389 * dictionary, it needs to use QINCREF() before calling bdrv_open.
1390 *
1391 * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1392 * If it is not NULL, the referenced BDS will be reused.
1393 *
1394 * The reference parameter may be used to specify an existing block device which
1395 * should be opened. If specified, neither options nor a filename may be given,
1396 * nor can an existing BDS be reused (that is, *pbs has to be NULL).
1397 */
1398static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
1399 const char *reference, QDict *options, int flags,
1400 BlockDriverState *parent,
1401 const BdrvChildRole *child_role, Error **errp)
1402{
1403 int ret;
1404 BlockDriverState *file = NULL, *bs;
1405 BlockDriver *drv = NULL;
1406 const char *drvname;
1407 Error *local_err = NULL;
1408 int snapshot_flags = 0;
1409
1410 assert(pbs);
1411 assert(!child_role || !flags);
1412 assert(!child_role == !parent);
1413
1414 if (reference) {
1415 bool options_non_empty = options ? qdict_size(options) : false;
1416 QDECREF(options);
1417
1418 if (*pbs) {
1419 error_setg(errp, "Cannot reuse an existing BDS when referencing "
1420 "another block device");
1421 return -EINVAL;
1422 }
1423
1424 if (filename || options_non_empty) {
1425 error_setg(errp, "Cannot reference an existing block device with "
1426 "additional options or a new filename");
1427 return -EINVAL;
1428 }
1429
1430 bs = bdrv_lookup_bs(reference, reference, errp);
1431 if (!bs) {
1432 return -ENODEV;
1433 }
1434 bdrv_ref(bs);
1435 *pbs = bs;
1436 return 0;
1437 }
1438
1439 if (*pbs) {
1440 bs = *pbs;
1441 } else {
1442 bs = bdrv_new();
1443 }
1444
1445 /* NULL means an empty set of options */
1446 if (options == NULL) {
1447 options = qdict_new();
1448 }
1449
1450 if (child_role) {
1451 bs->inherits_from = parent;
1452 flags = child_role->inherit_flags(parent->open_flags);
1453 }
1454
1455 ret = bdrv_fill_options(&options, &filename, &flags, &local_err);
1456 if (local_err) {
1457 goto fail;
1458 }
1459
1460 /* Find the right image format driver */
1461 drvname = qdict_get_try_str(options, "driver");
1462 if (drvname) {
1463 drv = bdrv_find_format(drvname);
1464 qdict_del(options, "driver");
1465 if (!drv) {
1466 error_setg(errp, "Unknown driver: '%s'", drvname);
1467 ret = -EINVAL;
1468 goto fail;
1469 }
1470 }
1471
1472 assert(drvname || !(flags & BDRV_O_PROTOCOL));
1473
1474 bs->open_flags = flags;
1475 bs->options = options;
1476 options = qdict_clone_shallow(options);
1477
1478 /* Open image file without format layer */
1479 if ((flags & BDRV_O_PROTOCOL) == 0) {
1480 if (flags & BDRV_O_RDWR) {
1481 flags |= BDRV_O_ALLOW_RDWR;
1482 }
1483 if (flags & BDRV_O_SNAPSHOT) {
1484 snapshot_flags = bdrv_temp_snapshot_flags(flags);
1485 flags = bdrv_backing_flags(flags);
1486 }
1487
1488 assert(file == NULL);
1489 bs->open_flags = flags;
1490 ret = bdrv_open_image(&file, filename, options, "file",
1491 bs, &child_file, true, &local_err);
1492 if (ret < 0) {
1493 goto fail;
1494 }
1495 }
1496
1497 /* Image format probing */
1498 bs->probed = !drv;
1499 if (!drv && file) {
1500 ret = find_image_format(file, filename, &drv, &local_err);
1501 if (ret < 0) {
1502 goto fail;
1503 }
1504 } else if (!drv) {
1505 error_setg(errp, "Must specify either driver or file");
1506 ret = -EINVAL;
1507 goto fail;
1508 }
1509
1510 /* BDRV_O_PROTOCOL must be set iff a protocol BDS is about to be created */
1511 assert(!!(flags & BDRV_O_PROTOCOL) == !!drv->bdrv_file_open);
1512 /* file must be NULL if a protocol BDS is about to be created
1513 * (the inverse results in an error message from bdrv_open_common()) */
1514 assert(!(flags & BDRV_O_PROTOCOL) || !file);
1515
1516 /* Open the image */
1517 ret = bdrv_open_common(bs, file, options, flags, drv, &local_err);
1518 if (ret < 0) {
1519 goto fail;
1520 }
1521
1522 if (file && (bs->file != file)) {
1523 bdrv_unref(file);
1524 file = NULL;
1525 }
1526
1527 /* If there is a backing file, use it */
1528 if ((flags & BDRV_O_NO_BACKING) == 0) {
1529 QDict *backing_options;
1530
1531 qdict_extract_subqdict(options, &backing_options, "backing.");
1532 ret = bdrv_open_backing_file(bs, backing_options, &local_err);
1533 if (ret < 0) {
1534 goto close_and_fail;
1535 }
1536 }
1537
1538 bdrv_refresh_filename(bs);
1539
1540 /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
1541 * temporary snapshot afterwards. */
1542 if (snapshot_flags) {
1543 ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
1544 if (local_err) {
1545 goto close_and_fail;
1546 }
1547 }
1548
1549 /* Check if any unknown options were used */
1550 if (options && (qdict_size(options) != 0)) {
1551 const QDictEntry *entry = qdict_first(options);
1552 if (flags & BDRV_O_PROTOCOL) {
1553 error_setg(errp, "Block protocol '%s' doesn't support the option "
1554 "'%s'", drv->format_name, entry->key);
1555 } else {
1556 error_setg(errp, "Block format '%s' used by device '%s' doesn't "
1557 "support the option '%s'", drv->format_name,
1558 bdrv_get_device_name(bs), entry->key);
1559 }
1560
1561 ret = -EINVAL;
1562 goto close_and_fail;
1563 }
1564
1565 if (!bdrv_key_required(bs)) {
1566 if (bs->blk) {
1567 blk_dev_change_media_cb(bs->blk, true);
1568 }
1569 } else if (!runstate_check(RUN_STATE_PRELAUNCH)
1570 && !runstate_check(RUN_STATE_INMIGRATE)
1571 && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
1572 error_setg(errp,
1573 "Guest must be stopped for opening of encrypted image");
1574 ret = -EBUSY;
1575 goto close_and_fail;
1576 }
1577
1578 QDECREF(options);
1579 *pbs = bs;
1580 return 0;
1581
1582fail:
1583 if (file != NULL) {
1584 bdrv_unref(file);
1585 }
1586 QDECREF(bs->options);
1587 QDECREF(options);
1588 bs->options = NULL;
1589 if (!*pbs) {
1590 /* If *pbs is NULL, a new BDS has been created in this function and
1591 needs to be freed now. Otherwise, it does not need to be closed,
1592 since it has not really been opened yet. */
1593 bdrv_unref(bs);
1594 }
1595 if (local_err) {
1596 error_propagate(errp, local_err);
1597 }
1598 return ret;
1599
1600close_and_fail:
1601 /* See fail path, but now the BDS has to be always closed */
1602 if (*pbs) {
1603 bdrv_close(bs);
1604 } else {
1605 bdrv_unref(bs);
1606 }
1607 QDECREF(options);
1608 if (local_err) {
1609 error_propagate(errp, local_err);
1610 }
1611 return ret;
1612}
1613
1614int bdrv_open(BlockDriverState **pbs, const char *filename,
1615 const char *reference, QDict *options, int flags, Error **errp)
1616{
1617 return bdrv_open_inherit(pbs, filename, reference, options, flags, NULL,
1618 NULL, errp);
1619}
1620
1621typedef struct BlockReopenQueueEntry {
1622 bool prepared;
1623 BDRVReopenState state;
1624 QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1625} BlockReopenQueueEntry;
1626
1627/*
1628 * Adds a BlockDriverState to a simple queue for an atomic, transactional
1629 * reopen of multiple devices.
1630 *
1631 * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1632 * already performed, or alternatively may be NULL a new BlockReopenQueue will
1633 * be created and initialized. This newly created BlockReopenQueue should be
1634 * passed back in for subsequent calls that are intended to be of the same
1635 * atomic 'set'.
1636 *
1637 * bs is the BlockDriverState to add to the reopen queue.
1638 *
1639 * flags contains the open flags for the associated bs
1640 *
1641 * returns a pointer to bs_queue, which is either the newly allocated
1642 * bs_queue, or the existing bs_queue being used.
1643 *
1644 */
1645BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1646 BlockDriverState *bs, int flags)
1647{
1648 assert(bs != NULL);
1649
1650 BlockReopenQueueEntry *bs_entry;
1651 BdrvChild *child;
1652
1653 if (bs_queue == NULL) {
1654 bs_queue = g_new0(BlockReopenQueue, 1);
1655 QSIMPLEQ_INIT(bs_queue);
1656 }
1657
1658 /* bdrv_open() masks this flag out */
1659 flags &= ~BDRV_O_PROTOCOL;
1660
1661 QLIST_FOREACH(child, &bs->children, next) {
1662 int child_flags;
1663
1664 if (child->bs->inherits_from != bs) {
1665 continue;
1666 }
1667
1668 child_flags = child->role->inherit_flags(flags);
1669 bdrv_reopen_queue(bs_queue, child->bs, child_flags);
1670 }
1671
1672 bs_entry = g_new0(BlockReopenQueueEntry, 1);
1673 QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1674
1675 bs_entry->state.bs = bs;
1676 bs_entry->state.flags = flags;
1677
1678 return bs_queue;
1679}
1680
1681/*
1682 * Reopen multiple BlockDriverStates atomically & transactionally.
1683 *
1684 * The queue passed in (bs_queue) must have been built up previous
1685 * via bdrv_reopen_queue().
1686 *
1687 * Reopens all BDS specified in the queue, with the appropriate
1688 * flags. All devices are prepared for reopen, and failure of any
1689 * device will cause all device changes to be abandonded, and intermediate
1690 * data cleaned up.
1691 *
1692 * If all devices prepare successfully, then the changes are committed
1693 * to all devices.
1694 *
1695 */
1696int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
1697{
1698 int ret = -1;
1699 BlockReopenQueueEntry *bs_entry, *next;
1700 Error *local_err = NULL;
1701
1702 assert(bs_queue != NULL);
1703
1704 bdrv_drain_all();
1705
1706 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1707 if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
1708 error_propagate(errp, local_err);
1709 goto cleanup;
1710 }
1711 bs_entry->prepared = true;
1712 }
1713
1714 /* If we reach this point, we have success and just need to apply the
1715 * changes
1716 */
1717 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1718 bdrv_reopen_commit(&bs_entry->state);
1719 }
1720
1721 ret = 0;
1722
1723cleanup:
1724 QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1725 if (ret && bs_entry->prepared) {
1726 bdrv_reopen_abort(&bs_entry->state);
1727 }
1728 g_free(bs_entry);
1729 }
1730 g_free(bs_queue);
1731 return ret;
1732}
1733
1734
1735/* Reopen a single BlockDriverState with the specified flags. */
1736int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1737{
1738 int ret = -1;
1739 Error *local_err = NULL;
1740 BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags);
1741
1742 ret = bdrv_reopen_multiple(queue, &local_err);
1743 if (local_err != NULL) {
1744 error_propagate(errp, local_err);
1745 }
1746 return ret;
1747}
1748
1749
1750/*
1751 * Prepares a BlockDriverState for reopen. All changes are staged in the
1752 * 'opaque' field of the BDRVReopenState, which is used and allocated by
1753 * the block driver layer .bdrv_reopen_prepare()
1754 *
1755 * bs is the BlockDriverState to reopen
1756 * flags are the new open flags
1757 * queue is the reopen queue
1758 *
1759 * Returns 0 on success, non-zero on error. On error errp will be set
1760 * as well.
1761 *
1762 * On failure, bdrv_reopen_abort() will be called to clean up any data.
1763 * It is the responsibility of the caller to then call the abort() or
1764 * commit() for any other BDS that have been left in a prepare() state
1765 *
1766 */
1767int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1768 Error **errp)
1769{
1770 int ret = -1;
1771 Error *local_err = NULL;
1772 BlockDriver *drv;
1773
1774 assert(reopen_state != NULL);
1775 assert(reopen_state->bs->drv != NULL);
1776 drv = reopen_state->bs->drv;
1777
1778 /* if we are to stay read-only, do not allow permission change
1779 * to r/w */
1780 if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
1781 reopen_state->flags & BDRV_O_RDWR) {
1782 error_setg(errp, "Node '%s' is read only",
1783 bdrv_get_device_or_node_name(reopen_state->bs));
1784 goto error;
1785 }
1786
1787
1788 ret = bdrv_flush(reopen_state->bs);
1789 if (ret) {
1790 error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive",
1791 strerror(-ret));
1792 goto error;
1793 }
1794
1795 if (drv->bdrv_reopen_prepare) {
1796 ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
1797 if (ret) {
1798 if (local_err != NULL) {
1799 error_propagate(errp, local_err);
1800 } else {
1801 error_setg(errp, "failed while preparing to reopen image '%s'",
1802 reopen_state->bs->filename);
1803 }
1804 goto error;
1805 }
1806 } else {
1807 /* It is currently mandatory to have a bdrv_reopen_prepare()
1808 * handler for each supported drv. */
1809 error_setg(errp, "Block format '%s' used by node '%s' "
1810 "does not support reopening files", drv->format_name,
1811 bdrv_get_device_or_node_name(reopen_state->bs));
1812 ret = -1;
1813 goto error;
1814 }
1815
1816 ret = 0;
1817
1818error:
1819 return ret;
1820}
1821
1822/*
1823 * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
1824 * makes them final by swapping the staging BlockDriverState contents into
1825 * the active BlockDriverState contents.
1826 */
1827void bdrv_reopen_commit(BDRVReopenState *reopen_state)
1828{
1829 BlockDriver *drv;
1830
1831 assert(reopen_state != NULL);
1832 drv = reopen_state->bs->drv;
1833 assert(drv != NULL);
1834
1835 /* If there are any driver level actions to take */
1836 if (drv->bdrv_reopen_commit) {
1837 drv->bdrv_reopen_commit(reopen_state);
1838 }
1839
1840 /* set BDS specific flags now */
1841 reopen_state->bs->open_flags = reopen_state->flags;
1842 reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
1843 BDRV_O_CACHE_WB);
1844 reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
1845
1846 bdrv_refresh_limits(reopen_state->bs, NULL);
1847}
1848
1849/*
1850 * Abort the reopen, and delete and free the staged changes in
1851 * reopen_state
1852 */
1853void bdrv_reopen_abort(BDRVReopenState *reopen_state)
1854{
1855 BlockDriver *drv;
1856
1857 assert(reopen_state != NULL);
1858 drv = reopen_state->bs->drv;
1859 assert(drv != NULL);
1860
1861 if (drv->bdrv_reopen_abort) {
1862 drv->bdrv_reopen_abort(reopen_state);
1863 }
1864}
1865
1866
1867void bdrv_close(BlockDriverState *bs)
1868{
1869 BdrvAioNotifier *ban, *ban_next;
1870
1871 if (bs->job) {
1872 block_job_cancel_sync(bs->job);
1873 }
1874 bdrv_drain(bs); /* complete I/O */
1875 bdrv_flush(bs);
1876 bdrv_drain(bs); /* in case flush left pending I/O */
1877 notifier_list_notify(&bs->close_notifiers, bs);
1878
1879 if (bs->drv) {
1880 BdrvChild *child, *next;
1881
1882 bs->drv->bdrv_close(bs);
1883
1884 if (bs->backing_hd) {
1885 BlockDriverState *backing_hd = bs->backing_hd;
1886 bdrv_set_backing_hd(bs, NULL);
1887 bdrv_unref(backing_hd);
1888 }
1889
1890 QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
1891 /* TODO Remove bdrv_unref() from drivers' close function and use
1892 * bdrv_unref_child() here */
1893 if (child->bs->inherits_from == bs) {
1894 child->bs->inherits_from = NULL;
1895 }
1896 bdrv_detach_child(child);
1897 }
1898
1899 g_free(bs->opaque);
1900 bs->opaque = NULL;
1901 bs->drv = NULL;
1902 bs->copy_on_read = 0;
1903 bs->backing_file[0] = '\0';
1904 bs->backing_format[0] = '\0';
1905 bs->total_sectors = 0;
1906 bs->encrypted = 0;
1907 bs->valid_key = 0;
1908 bs->sg = 0;
1909 bs->zero_beyond_eof = false;
1910 QDECREF(bs->options);
1911 bs->options = NULL;
1912 QDECREF(bs->full_open_options);
1913 bs->full_open_options = NULL;
1914
1915 if (bs->file != NULL) {
1916 bdrv_unref(bs->file);
1917 bs->file = NULL;
1918 }
1919 }
1920
1921 if (bs->blk) {
1922 blk_dev_change_media_cb(bs->blk, false);
1923 }
1924
1925 /*throttling disk I/O limits*/
1926 if (bs->io_limits_enabled) {
1927 bdrv_io_limits_disable(bs);
1928 }
1929
1930 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
1931 g_free(ban);
1932 }
1933 QLIST_INIT(&bs->aio_notifiers);
1934}
1935
1936void bdrv_close_all(void)
1937{
1938 BlockDriverState *bs;
1939
1940 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
1941 AioContext *aio_context = bdrv_get_aio_context(bs);
1942
1943 aio_context_acquire(aio_context);
1944 bdrv_close(bs);
1945 aio_context_release(aio_context);
1946 }
1947}
1948
1949/* make a BlockDriverState anonymous by removing from bdrv_state and
1950 * graph_bdrv_state list.
1951 Also, NULL terminate the device_name to prevent double remove */
1952void bdrv_make_anon(BlockDriverState *bs)
1953{
1954 /*
1955 * Take care to remove bs from bdrv_states only when it's actually
1956 * in it. Note that bs->device_list.tqe_prev is initially null,
1957 * and gets set to non-null by QTAILQ_INSERT_TAIL(). Establish
1958 * the useful invariant "bs in bdrv_states iff bs->tqe_prev" by
1959 * resetting it to null on remove.
1960 */
1961 if (bs->device_list.tqe_prev) {
1962 QTAILQ_REMOVE(&bdrv_states, bs, device_list);
1963 bs->device_list.tqe_prev = NULL;
1964 }
1965 if (bs->node_name[0] != '\0') {
1966 QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
1967 }
1968 bs->node_name[0] = '\0';
1969}
1970
1971static void bdrv_rebind(BlockDriverState *bs)
1972{
1973 if (bs->drv && bs->drv->bdrv_rebind) {
1974 bs->drv->bdrv_rebind(bs);
1975 }
1976}
1977
1978static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
1979 BlockDriverState *bs_src)
1980{
1981 /* move some fields that need to stay attached to the device */
1982
1983 /* dev info */
1984 bs_dest->guest_block_size = bs_src->guest_block_size;
1985 bs_dest->copy_on_read = bs_src->copy_on_read;
1986
1987 bs_dest->enable_write_cache = bs_src->enable_write_cache;
1988
1989 /* i/o throttled req */
1990 bs_dest->throttle_state = bs_src->throttle_state,
1991 bs_dest->io_limits_enabled = bs_src->io_limits_enabled;
1992 bs_dest->pending_reqs[0] = bs_src->pending_reqs[0];
1993 bs_dest->pending_reqs[1] = bs_src->pending_reqs[1];
1994 bs_dest->throttled_reqs[0] = bs_src->throttled_reqs[0];
1995 bs_dest->throttled_reqs[1] = bs_src->throttled_reqs[1];
1996 memcpy(&bs_dest->round_robin,
1997 &bs_src->round_robin,
1998 sizeof(bs_dest->round_robin));
1999 memcpy(&bs_dest->throttle_timers,
2000 &bs_src->throttle_timers,
2001 sizeof(ThrottleTimers));
2002
2003 /* r/w error */
2004 bs_dest->on_read_error = bs_src->on_read_error;
2005 bs_dest->on_write_error = bs_src->on_write_error;
2006
2007 /* i/o status */
2008 bs_dest->iostatus_enabled = bs_src->iostatus_enabled;
2009 bs_dest->iostatus = bs_src->iostatus;
2010
2011 /* dirty bitmap */
2012 bs_dest->dirty_bitmaps = bs_src->dirty_bitmaps;
2013
2014 /* reference count */
2015 bs_dest->refcnt = bs_src->refcnt;
2016
2017 /* job */
2018 bs_dest->job = bs_src->job;
2019
2020 /* keep the same entry in bdrv_states */
2021 bs_dest->device_list = bs_src->device_list;
2022 bs_dest->blk = bs_src->blk;
2023
2024 memcpy(bs_dest->op_blockers, bs_src->op_blockers,
2025 sizeof(bs_dest->op_blockers));
2026}
2027
2028/*
2029 * Swap bs contents for two image chains while they are live,
2030 * while keeping required fields on the BlockDriverState that is
2031 * actually attached to a device.
2032 *
2033 * This will modify the BlockDriverState fields, and swap contents
2034 * between bs_new and bs_old. Both bs_new and bs_old are modified.
2035 *
2036 * bs_new must not be attached to a BlockBackend.
2037 *
2038 * This function does not create any image files.
2039 */
2040void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
2041{
2042 BlockDriverState tmp;
2043 BdrvChild *child;
2044
2045 bdrv_drain(bs_new);
2046 bdrv_drain(bs_old);
2047
2048 /* The code needs to swap the node_name but simply swapping node_list won't
2049 * work so first remove the nodes from the graph list, do the swap then
2050 * insert them back if needed.
2051 */
2052 if (bs_new->node_name[0] != '\0') {
2053 QTAILQ_REMOVE(&graph_bdrv_states, bs_new, node_list);
2054 }
2055 if (bs_old->node_name[0] != '\0') {
2056 QTAILQ_REMOVE(&graph_bdrv_states, bs_old, node_list);
2057 }
2058
2059 /* If the BlockDriverState is part of a throttling group acquire
2060 * its lock since we're going to mess with the protected fields.
2061 * Otherwise there's no need to worry since no one else can touch
2062 * them. */
2063 if (bs_old->throttle_state) {
2064 throttle_group_lock(bs_old);
2065 }
2066
2067 /* bs_new must be unattached and shouldn't have anything fancy enabled */
2068 assert(!bs_new->blk);
2069 assert(QLIST_EMPTY(&bs_new->dirty_bitmaps));
2070 assert(bs_new->job == NULL);
2071 assert(bs_new->io_limits_enabled == false);
2072 assert(bs_new->throttle_state == NULL);
2073 assert(!throttle_timers_are_initialized(&bs_new->throttle_timers));
2074
2075 tmp = *bs_new;
2076 *bs_new = *bs_old;
2077 *bs_old = tmp;
2078
2079 /* there are some fields that should not be swapped, move them back */
2080 bdrv_move_feature_fields(&tmp, bs_old);
2081 bdrv_move_feature_fields(bs_old, bs_new);
2082 bdrv_move_feature_fields(bs_new, &tmp);
2083
2084 /* bs_new must remain unattached */
2085 assert(!bs_new->blk);
2086
2087 /* Check a few fields that should remain attached to the device */
2088 assert(bs_new->job == NULL);
2089 assert(bs_new->io_limits_enabled == false);
2090 assert(bs_new->throttle_state == NULL);
2091 assert(!throttle_timers_are_initialized(&bs_new->throttle_timers));
2092
2093 /* Release the ThrottleGroup lock */
2094 if (bs_old->throttle_state) {
2095 throttle_group_unlock(bs_old);
2096 }
2097
2098 /* insert the nodes back into the graph node list if needed */
2099 if (bs_new->node_name[0] != '\0') {
2100 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_new, node_list);
2101 }
2102 if (bs_old->node_name[0] != '\0') {
2103 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_old, node_list);
2104 }
2105
2106 /*
2107 * Update lh_first.le_prev for non-empty lists.
2108 *
2109 * The head of the op blocker list doesn't change because it is moved back
2110 * in bdrv_move_feature_fields().
2111 */
2112 assert(QLIST_EMPTY(&bs_old->tracked_requests));
2113 assert(QLIST_EMPTY(&bs_new->tracked_requests));
2114
2115 QLIST_FIX_HEAD_PTR(&bs_new->children, next);
2116 QLIST_FIX_HEAD_PTR(&bs_old->children, next);
2117
2118 /* Update references in bs->opaque and children */
2119 QLIST_FOREACH(child, &bs_old->children, next) {
2120 if (child->bs->inherits_from == bs_new) {
2121 child->bs->inherits_from = bs_old;
2122 }
2123 }
2124 QLIST_FOREACH(child, &bs_new->children, next) {
2125 if (child->bs->inherits_from == bs_old) {
2126 child->bs->inherits_from = bs_new;
2127 }
2128 }
2129
2130 bdrv_rebind(bs_new);
2131 bdrv_rebind(bs_old);
2132}
2133
2134/*
2135 * Add new bs contents at the top of an image chain while the chain is
2136 * live, while keeping required fields on the top layer.
2137 *
2138 * This will modify the BlockDriverState fields, and swap contents
2139 * between bs_new and bs_top. Both bs_new and bs_top are modified.
2140 *
2141 * bs_new must not be attached to a BlockBackend.
2142 *
2143 * This function does not create any image files.
2144 */
2145void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
2146{
2147 bdrv_swap(bs_new, bs_top);
2148
2149 /* The contents of 'tmp' will become bs_top, as we are
2150 * swapping bs_new and bs_top contents. */
2151 bdrv_set_backing_hd(bs_top, bs_new);
2152}
2153
2154static void bdrv_delete(BlockDriverState *bs)
2155{
2156 assert(!bs->job);
2157 assert(bdrv_op_blocker_is_empty(bs));
2158 assert(!bs->refcnt);
2159 assert(QLIST_EMPTY(&bs->dirty_bitmaps));
2160
2161 bdrv_close(bs);
2162
2163 /* remove from list, if necessary */
2164 bdrv_make_anon(bs);
2165
2166 g_free(bs);
2167}
2168
2169/*
2170 * Run consistency checks on an image
2171 *
2172 * Returns 0 if the check could be completed (it doesn't mean that the image is
2173 * free of errors) or -errno when an internal error occurred. The results of the
2174 * check are stored in res.
2175 */
2176int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
2177{
2178 if (bs->drv == NULL) {
2179 return -ENOMEDIUM;
2180 }
2181 if (bs->drv->bdrv_check == NULL) {
2182 return -ENOTSUP;
2183 }
2184
2185 memset(res, 0, sizeof(*res));
2186 return bs->drv->bdrv_check(bs, res, fix);
2187}
2188
2189#define COMMIT_BUF_SECTORS 2048
2190
2191/* commit COW file into the raw image */
2192int bdrv_commit(BlockDriverState *bs)
2193{
2194 BlockDriver *drv = bs->drv;
2195 int64_t sector, total_sectors, length, backing_length;
2196 int n, ro, open_flags;
2197 int ret = 0;
2198 uint8_t *buf = NULL;
2199
2200 if (!drv)
2201 return -ENOMEDIUM;
2202
2203 if (!bs->backing_hd) {
2204 return -ENOTSUP;
2205 }
2206
2207 if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
2208 bdrv_op_is_blocked(bs->backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
2209 return -EBUSY;
2210 }
2211
2212 ro = bs->backing_hd->read_only;
2213 open_flags = bs->backing_hd->open_flags;
2214
2215 if (ro) {
2216 if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) {
2217 return -EACCES;
2218 }
2219 }
2220
2221 length = bdrv_getlength(bs);
2222 if (length < 0) {
2223 ret = length;
2224 goto ro_cleanup;
2225 }
2226
2227 backing_length = bdrv_getlength(bs->backing_hd);
2228 if (backing_length < 0) {
2229 ret = backing_length;
2230 goto ro_cleanup;
2231 }
2232
2233 /* If our top snapshot is larger than the backing file image,
2234 * grow the backing file image if possible. If not possible,
2235 * we must return an error */
2236 if (length > backing_length) {
2237 ret = bdrv_truncate(bs->backing_hd, length);
2238 if (ret < 0) {
2239 goto ro_cleanup;
2240 }
2241 }
2242
2243 total_sectors = length >> BDRV_SECTOR_BITS;
2244
2245 /* qemu_try_blockalign() for bs will choose an alignment that works for
2246 * bs->backing_hd as well, so no need to compare the alignment manually. */
2247 buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
2248 if (buf == NULL) {
2249 ret = -ENOMEM;
2250 goto ro_cleanup;
2251 }
2252
2253 for (sector = 0; sector < total_sectors; sector += n) {
2254 ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
2255 if (ret < 0) {
2256 goto ro_cleanup;
2257 }
2258 if (ret) {
2259 ret = bdrv_read(bs, sector, buf, n);
2260 if (ret < 0) {
2261 goto ro_cleanup;
2262 }
2263
2264 ret = bdrv_write(bs->backing_hd, sector, buf, n);
2265 if (ret < 0) {
2266 goto ro_cleanup;
2267 }
2268 }
2269 }
2270
2271 if (drv->bdrv_make_empty) {
2272 ret = drv->bdrv_make_empty(bs);
2273 if (ret < 0) {
2274 goto ro_cleanup;
2275 }
2276 bdrv_flush(bs);
2277 }
2278
2279 /*
2280 * Make sure all data we wrote to the backing device is actually
2281 * stable on disk.
2282 */
2283 if (bs->backing_hd) {
2284 bdrv_flush(bs->backing_hd);
2285 }
2286
2287 ret = 0;
2288ro_cleanup:
2289 qemu_vfree(buf);
2290
2291 if (ro) {
2292 /* ignoring error return here */
2293 bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL);
2294 }
2295
2296 return ret;
2297}
2298
2299int bdrv_commit_all(void)
2300{
2301 BlockDriverState *bs;
2302
2303 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
2304 AioContext *aio_context = bdrv_get_aio_context(bs);
2305
2306 aio_context_acquire(aio_context);
2307 if (bs->drv && bs->backing_hd) {
2308 int ret = bdrv_commit(bs);
2309 if (ret < 0) {
2310 aio_context_release(aio_context);
2311 return ret;
2312 }
2313 }
2314 aio_context_release(aio_context);
2315 }
2316 return 0;
2317}
2318
2319/*
2320 * Return values:
2321 * 0 - success
2322 * -EINVAL - backing format specified, but no file
2323 * -ENOSPC - can't update the backing file because no space is left in the
2324 * image file header
2325 * -ENOTSUP - format driver doesn't support changing the backing file
2326 */
2327int bdrv_change_backing_file(BlockDriverState *bs,
2328 const char *backing_file, const char *backing_fmt)
2329{
2330 BlockDriver *drv = bs->drv;
2331 int ret;
2332
2333 /* Backing file format doesn't make sense without a backing file */
2334 if (backing_fmt && !backing_file) {
2335 return -EINVAL;
2336 }
2337
2338 if (drv->bdrv_change_backing_file != NULL) {
2339 ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
2340 } else {
2341 ret = -ENOTSUP;
2342 }
2343
2344 if (ret == 0) {
2345 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2346 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2347 }
2348 return ret;
2349}
2350
2351/*
2352 * Finds the image layer in the chain that has 'bs' as its backing file.
2353 *
2354 * active is the current topmost image.
2355 *
2356 * Returns NULL if bs is not found in active's image chain,
2357 * or if active == bs.
2358 *
2359 * Returns the bottommost base image if bs == NULL.
2360 */
2361BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
2362 BlockDriverState *bs)
2363{
2364 while (active && bs != active->backing_hd) {
2365 active = active->backing_hd;
2366 }
2367
2368 return active;
2369}
2370
2371/* Given a BDS, searches for the base layer. */
2372BlockDriverState *bdrv_find_base(BlockDriverState *bs)
2373{
2374 return bdrv_find_overlay(bs, NULL);
2375}
2376
2377typedef struct BlkIntermediateStates {
2378 BlockDriverState *bs;
2379 QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
2380} BlkIntermediateStates;
2381
2382
2383/*
2384 * Drops images above 'base' up to and including 'top', and sets the image
2385 * above 'top' to have base as its backing file.
2386 *
2387 * Requires that the overlay to 'top' is opened r/w, so that the backing file
2388 * information in 'bs' can be properly updated.
2389 *
2390 * E.g., this will convert the following chain:
2391 * bottom <- base <- intermediate <- top <- active
2392 *
2393 * to
2394 *
2395 * bottom <- base <- active
2396 *
2397 * It is allowed for bottom==base, in which case it converts:
2398 *
2399 * base <- intermediate <- top <- active
2400 *
2401 * to
2402 *
2403 * base <- active
2404 *
2405 * If backing_file_str is non-NULL, it will be used when modifying top's
2406 * overlay image metadata.
2407 *
2408 * Error conditions:
2409 * if active == top, that is considered an error
2410 *
2411 */
2412int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
2413 BlockDriverState *base, const char *backing_file_str)
2414{
2415 BlockDriverState *intermediate;
2416 BlockDriverState *base_bs = NULL;
2417 BlockDriverState *new_top_bs = NULL;
2418 BlkIntermediateStates *intermediate_state, *next;
2419 int ret = -EIO;
2420
2421 QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
2422 QSIMPLEQ_INIT(&states_to_delete);
2423
2424 if (!top->drv || !base->drv) {
2425 goto exit;
2426 }
2427
2428 new_top_bs = bdrv_find_overlay(active, top);
2429
2430 if (new_top_bs == NULL) {
2431 /* we could not find the image above 'top', this is an error */
2432 goto exit;
2433 }
2434
2435 /* special case of new_top_bs->backing_hd already pointing to base - nothing
2436 * to do, no intermediate images */
2437 if (new_top_bs->backing_hd == base) {
2438 ret = 0;
2439 goto exit;
2440 }
2441
2442 intermediate = top;
2443
2444 /* now we will go down through the list, and add each BDS we find
2445 * into our deletion queue, until we hit the 'base'
2446 */
2447 while (intermediate) {
2448 intermediate_state = g_new0(BlkIntermediateStates, 1);
2449 intermediate_state->bs = intermediate;
2450 QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry);
2451
2452 if (intermediate->backing_hd == base) {
2453 base_bs = intermediate->backing_hd;
2454 break;
2455 }
2456 intermediate = intermediate->backing_hd;
2457 }
2458 if (base_bs == NULL) {
2459 /* something went wrong, we did not end at the base. safely
2460 * unravel everything, and exit with error */
2461 goto exit;
2462 }
2463
2464 /* success - we can delete the intermediate states, and link top->base */
2465 backing_file_str = backing_file_str ? backing_file_str : base_bs->filename;
2466 ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
2467 base_bs->drv ? base_bs->drv->format_name : "");
2468 if (ret) {
2469 goto exit;
2470 }
2471 bdrv_set_backing_hd(new_top_bs, base_bs);
2472
2473 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2474 /* so that bdrv_close() does not recursively close the chain */
2475 bdrv_set_backing_hd(intermediate_state->bs, NULL);
2476 bdrv_unref(intermediate_state->bs);
2477 }
2478 ret = 0;
2479
2480exit:
2481 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2482 g_free(intermediate_state);
2483 }
2484 return ret;
2485}
2486
2487/**
2488 * Truncate file to 'offset' bytes (needed only for file protocols)
2489 */
2490int bdrv_truncate(BlockDriverState *bs, int64_t offset)
2491{
2492 BlockDriver *drv = bs->drv;
2493 int ret;
2494 if (!drv)
2495 return -ENOMEDIUM;
2496 if (!drv->bdrv_truncate)
2497 return -ENOTSUP;
2498 if (bs->read_only)
2499 return -EACCES;
2500
2501 ret = drv->bdrv_truncate(bs, offset);
2502 if (ret == 0) {
2503 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
2504 bdrv_dirty_bitmap_truncate(bs);
2505 if (bs->blk) {
2506 blk_dev_resize_cb(bs->blk);
2507 }
2508 }
2509 return ret;
2510}
2511
2512/**
2513 * Length of a allocated file in bytes. Sparse files are counted by actual
2514 * allocated space. Return < 0 if error or unknown.
2515 */
2516int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
2517{
2518 BlockDriver *drv = bs->drv;
2519 if (!drv) {
2520 return -ENOMEDIUM;
2521 }
2522 if (drv->bdrv_get_allocated_file_size) {
2523 return drv->bdrv_get_allocated_file_size(bs);
2524 }
2525 if (bs->file) {
2526 return bdrv_get_allocated_file_size(bs->file);
2527 }
2528 return -ENOTSUP;
2529}
2530
2531/**
2532 * Return number of sectors on success, -errno on error.
2533 */
2534int64_t bdrv_nb_sectors(BlockDriverState *bs)
2535{
2536 BlockDriver *drv = bs->drv;
2537
2538 if (!drv)
2539 return -ENOMEDIUM;
2540
2541 if (drv->has_variable_length) {
2542 int ret = refresh_total_sectors(bs, bs->total_sectors);
2543 if (ret < 0) {
2544 return ret;
2545 }
2546 }
2547 return bs->total_sectors;
2548}
2549
2550/**
2551 * Return length in bytes on success, -errno on error.
2552 * The length is always a multiple of BDRV_SECTOR_SIZE.
2553 */
2554int64_t bdrv_getlength(BlockDriverState *bs)
2555{
2556 int64_t ret = bdrv_nb_sectors(bs);
2557
2558 ret = ret > INT64_MAX / BDRV_SECTOR_SIZE ? -EFBIG : ret;
2559 return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
2560}
2561
2562/* return 0 as number of sectors if no device present or error */
2563void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
2564{
2565 int64_t nb_sectors = bdrv_nb_sectors(bs);
2566
2567 *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
2568}
2569
2570void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
2571 BlockdevOnError on_write_error)
2572{
2573 bs->on_read_error = on_read_error;
2574 bs->on_write_error = on_write_error;
2575}
2576
2577BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read)
2578{
2579 return is_read ? bs->on_read_error : bs->on_write_error;
2580}
2581
2582BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error)
2583{
2584 BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error;
2585
2586 switch (on_err) {
2587 case BLOCKDEV_ON_ERROR_ENOSPC:
2588 return (error == ENOSPC) ?
2589 BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
2590 case BLOCKDEV_ON_ERROR_STOP:
2591 return BLOCK_ERROR_ACTION_STOP;
2592 case BLOCKDEV_ON_ERROR_REPORT:
2593 return BLOCK_ERROR_ACTION_REPORT;
2594 case BLOCKDEV_ON_ERROR_IGNORE:
2595 return BLOCK_ERROR_ACTION_IGNORE;
2596 default:
2597 abort();
2598 }
2599}
2600
2601static void send_qmp_error_event(BlockDriverState *bs,
2602 BlockErrorAction action,
2603 bool is_read, int error)
2604{
2605 IoOperationType optype;
2606
2607 optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
2608 qapi_event_send_block_io_error(bdrv_get_device_name(bs), optype, action,
2609 bdrv_iostatus_is_enabled(bs),
2610 error == ENOSPC, strerror(error),
2611 &error_abort);
2612}
2613
2614/* This is done by device models because, while the block layer knows
2615 * about the error, it does not know whether an operation comes from
2616 * the device or the block layer (from a job, for example).
2617 */
2618void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
2619 bool is_read, int error)
2620{
2621 assert(error >= 0);
2622
2623 if (action == BLOCK_ERROR_ACTION_STOP) {
2624 /* First set the iostatus, so that "info block" returns an iostatus
2625 * that matches the events raised so far (an additional error iostatus
2626 * is fine, but not a lost one).
2627 */
2628 bdrv_iostatus_set_err(bs, error);
2629
2630 /* Then raise the request to stop the VM and the event.
2631 * qemu_system_vmstop_request_prepare has two effects. First,
2632 * it ensures that the STOP event always comes after the
2633 * BLOCK_IO_ERROR event. Second, it ensures that even if management
2634 * can observe the STOP event and do a "cont" before the STOP
2635 * event is issued, the VM will not stop. In this case, vm_start()
2636 * also ensures that the STOP/RESUME pair of events is emitted.
2637 */
2638 qemu_system_vmstop_request_prepare();
2639 send_qmp_error_event(bs, action, is_read, error);
2640 qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
2641 } else {
2642 send_qmp_error_event(bs, action, is_read, error);
2643 }
2644}
2645
2646int bdrv_is_read_only(BlockDriverState *bs)
2647{
2648 return bs->read_only;
2649}
2650
2651int bdrv_is_sg(BlockDriverState *bs)
2652{
2653 return bs->sg;
2654}
2655
2656int bdrv_enable_write_cache(BlockDriverState *bs)
2657{
2658 return bs->enable_write_cache;
2659}
2660
2661void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
2662{
2663 bs->enable_write_cache = wce;
2664
2665 /* so a reopen() will preserve wce */
2666 if (wce) {
2667 bs->open_flags |= BDRV_O_CACHE_WB;
2668 } else {
2669 bs->open_flags &= ~BDRV_O_CACHE_WB;
2670 }
2671}
2672
2673int bdrv_is_encrypted(BlockDriverState *bs)
2674{
2675 if (bs->backing_hd && bs->backing_hd->encrypted)
2676 return 1;
2677 return bs->encrypted;
2678}
2679
2680int bdrv_key_required(BlockDriverState *bs)
2681{
2682 BlockDriverState *backing_hd = bs->backing_hd;
2683
2684 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
2685 return 1;
2686 return (bs->encrypted && !bs->valid_key);
2687}
2688
2689int bdrv_set_key(BlockDriverState *bs, const char *key)
2690{
2691 int ret;
2692 if (bs->backing_hd && bs->backing_hd->encrypted) {
2693 ret = bdrv_set_key(bs->backing_hd, key);
2694 if (ret < 0)
2695 return ret;
2696 if (!bs->encrypted)
2697 return 0;
2698 }
2699 if (!bs->encrypted) {
2700 return -EINVAL;
2701 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
2702 return -ENOMEDIUM;
2703 }
2704 ret = bs->drv->bdrv_set_key(bs, key);
2705 if (ret < 0) {
2706 bs->valid_key = 0;
2707 } else if (!bs->valid_key) {
2708 bs->valid_key = 1;
2709 if (bs->blk) {
2710 /* call the change callback now, we skipped it on open */
2711 blk_dev_change_media_cb(bs->blk, true);
2712 }
2713 }
2714 return ret;
2715}
2716
2717/*
2718 * Provide an encryption key for @bs.
2719 * If @key is non-null:
2720 * If @bs is not encrypted, fail.
2721 * Else if the key is invalid, fail.
2722 * Else set @bs's key to @key, replacing the existing key, if any.
2723 * If @key is null:
2724 * If @bs is encrypted and still lacks a key, fail.
2725 * Else do nothing.
2726 * On failure, store an error object through @errp if non-null.
2727 */
2728void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp)
2729{
2730 if (key) {
2731 if (!bdrv_is_encrypted(bs)) {
2732 error_setg(errp, "Node '%s' is not encrypted",
2733 bdrv_get_device_or_node_name(bs));
2734 } else if (bdrv_set_key(bs, key) < 0) {
2735 error_setg(errp, QERR_INVALID_PASSWORD);
2736 }
2737 } else {
2738 if (bdrv_key_required(bs)) {
2739 error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED,
2740 "'%s' (%s) is encrypted",
2741 bdrv_get_device_or_node_name(bs),
2742 bdrv_get_encrypted_filename(bs));
2743 }
2744 }
2745}
2746
2747const char *bdrv_get_format_name(BlockDriverState *bs)
2748{
2749 return bs->drv ? bs->drv->format_name : NULL;
2750}
2751
2752static int qsort_strcmp(const void *a, const void *b)
2753{
2754 return strcmp(a, b);
2755}
2756
2757void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
2758 void *opaque)
2759{
2760 BlockDriver *drv;
2761 int count = 0;
2762 int i;
2763 const char **formats = NULL;
2764
2765 QLIST_FOREACH(drv, &bdrv_drivers, list) {
2766 if (drv->format_name) {
2767 bool found = false;
2768 int i = count;
2769 while (formats && i && !found) {
2770 found = !strcmp(formats[--i], drv->format_name);
2771 }
2772
2773 if (!found) {
2774 formats = g_renew(const char *, formats, count + 1);
2775 formats[count++] = drv->format_name;
2776 }
2777 }
2778 }
2779
2780 qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
2781
2782 for (i = 0; i < count; i++) {
2783 it(opaque, formats[i]);
2784 }
2785
2786 g_free(formats);
2787}
2788
2789/* This function is to find a node in the bs graph */
2790BlockDriverState *bdrv_find_node(const char *node_name)
2791{
2792 BlockDriverState *bs;
2793
2794 assert(node_name);
2795
2796 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2797 if (!strcmp(node_name, bs->node_name)) {
2798 return bs;
2799 }
2800 }
2801 return NULL;
2802}
2803
2804/* Put this QMP function here so it can access the static graph_bdrv_states. */
2805BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp)
2806{
2807 BlockDeviceInfoList *list, *entry;
2808 BlockDriverState *bs;
2809
2810 list = NULL;
2811 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2812 BlockDeviceInfo *info = bdrv_block_device_info(bs, errp);
2813 if (!info) {
2814 qapi_free_BlockDeviceInfoList(list);
2815 return NULL;
2816 }
2817 entry = g_malloc0(sizeof(*entry));
2818 entry->value = info;
2819 entry->next = list;
2820 list = entry;
2821 }
2822
2823 return list;
2824}
2825
2826BlockDriverState *bdrv_lookup_bs(const char *device,
2827 const char *node_name,
2828 Error **errp)
2829{
2830 BlockBackend *blk;
2831 BlockDriverState *bs;
2832
2833 if (device) {
2834 blk = blk_by_name(device);
2835
2836 if (blk) {
2837 return blk_bs(blk);
2838 }
2839 }
2840
2841 if (node_name) {
2842 bs = bdrv_find_node(node_name);
2843
2844 if (bs) {
2845 return bs;
2846 }
2847 }
2848
2849 error_setg(errp, "Cannot find device=%s nor node_name=%s",
2850 device ? device : "",
2851 node_name ? node_name : "");
2852 return NULL;
2853}
2854
2855/* If 'base' is in the same chain as 'top', return true. Otherwise,
2856 * return false. If either argument is NULL, return false. */
2857bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
2858{
2859 while (top && top != base) {
2860 top = top->backing_hd;
2861 }
2862
2863 return top != NULL;
2864}
2865
2866BlockDriverState *bdrv_next_node(BlockDriverState *bs)
2867{
2868 if (!bs) {
2869 return QTAILQ_FIRST(&graph_bdrv_states);
2870 }
2871 return QTAILQ_NEXT(bs, node_list);
2872}
2873
2874BlockDriverState *bdrv_next(BlockDriverState *bs)
2875{
2876 if (!bs) {
2877 return QTAILQ_FIRST(&bdrv_states);
2878 }
2879 return QTAILQ_NEXT(bs, device_list);
2880}
2881
2882const char *bdrv_get_node_name(const BlockDriverState *bs)
2883{
2884 return bs->node_name;
2885}
2886
2887/* TODO check what callers really want: bs->node_name or blk_name() */
2888const char *bdrv_get_device_name(const BlockDriverState *bs)
2889{
2890 return bs->blk ? blk_name(bs->blk) : "";
2891}
2892
2893/* This can be used to identify nodes that might not have a device
2894 * name associated. Since node and device names live in the same
2895 * namespace, the result is unambiguous. The exception is if both are
2896 * absent, then this returns an empty (non-null) string. */
2897const char *bdrv_get_device_or_node_name(const BlockDriverState *bs)
2898{
2899 return bs->blk ? blk_name(bs->blk) : bs->node_name;
2900}
2901
2902int bdrv_get_flags(BlockDriverState *bs)
2903{
2904 return bs->open_flags;
2905}
2906
2907int bdrv_has_zero_init_1(BlockDriverState *bs)
2908{
2909 return 1;
2910}
2911
2912int bdrv_has_zero_init(BlockDriverState *bs)
2913{
2914 assert(bs->drv);
2915
2916 /* If BS is a copy on write image, it is initialized to
2917 the contents of the base image, which may not be zeroes. */
2918 if (bs->backing_hd) {
2919 return 0;
2920 }
2921 if (bs->drv->bdrv_has_zero_init) {
2922 return bs->drv->bdrv_has_zero_init(bs);
2923 }
2924
2925 /* safe default */
2926 return 0;
2927}
2928
2929bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
2930{
2931 BlockDriverInfo bdi;
2932
2933 if (bs->backing_hd) {
2934 return false;
2935 }
2936
2937 if (bdrv_get_info(bs, &bdi) == 0) {
2938 return bdi.unallocated_blocks_are_zero;
2939 }
2940
2941 return false;
2942}
2943
2944bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
2945{
2946 BlockDriverInfo bdi;
2947
2948 if (bs->backing_hd || !(bs->open_flags & BDRV_O_UNMAP)) {
2949 return false;
2950 }
2951
2952 if (bdrv_get_info(bs, &bdi) == 0) {
2953 return bdi.can_write_zeroes_with_unmap;
2954 }
2955
2956 return false;
2957}
2958
2959const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2960{
2961 if (bs->backing_hd && bs->backing_hd->encrypted)
2962 return bs->backing_file;
2963 else if (bs->encrypted)
2964 return bs->filename;
2965 else
2966 return NULL;
2967}
2968
2969void bdrv_get_backing_filename(BlockDriverState *bs,
2970 char *filename, int filename_size)
2971{
2972 pstrcpy(filename, filename_size, bs->backing_file);
2973}
2974
2975int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2976{
2977 BlockDriver *drv = bs->drv;
2978 if (!drv)
2979 return -ENOMEDIUM;
2980 if (!drv->bdrv_get_info)
2981 return -ENOTSUP;
2982 memset(bdi, 0, sizeof(*bdi));
2983 return drv->bdrv_get_info(bs, bdi);
2984}
2985
2986ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
2987{
2988 BlockDriver *drv = bs->drv;
2989 if (drv && drv->bdrv_get_specific_info) {
2990 return drv->bdrv_get_specific_info(bs);
2991 }
2992 return NULL;
2993}
2994
2995void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2996{
2997 if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
2998 return;
2999 }
3000
3001 bs->drv->bdrv_debug_event(bs, event);
3002}
3003
3004int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
3005 const char *tag)
3006{
3007 while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
3008 bs = bs->file;
3009 }
3010
3011 if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
3012 return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
3013 }
3014
3015 return -ENOTSUP;
3016}
3017
3018int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
3019{
3020 while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
3021 bs = bs->file;
3022 }
3023
3024 if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
3025 return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
3026 }
3027
3028 return -ENOTSUP;
3029}
3030
3031int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
3032{
3033 while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
3034 bs = bs->file;
3035 }
3036
3037 if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
3038 return bs->drv->bdrv_debug_resume(bs, tag);
3039 }
3040
3041 return -ENOTSUP;
3042}
3043
3044bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
3045{
3046 while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
3047 bs = bs->file;
3048 }
3049
3050 if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
3051 return bs->drv->bdrv_debug_is_suspended(bs, tag);
3052 }
3053
3054 return false;
3055}
3056
3057int bdrv_is_snapshot(BlockDriverState *bs)
3058{
3059 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
3060}
3061
3062/* backing_file can either be relative, or absolute, or a protocol. If it is
3063 * relative, it must be relative to the chain. So, passing in bs->filename
3064 * from a BDS as backing_file should not be done, as that may be relative to
3065 * the CWD rather than the chain. */
3066BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
3067 const char *backing_file)
3068{
3069 char *filename_full = NULL;
3070 char *backing_file_full = NULL;
3071 char *filename_tmp = NULL;
3072 int is_protocol = 0;
3073 BlockDriverState *curr_bs = NULL;
3074 BlockDriverState *retval = NULL;
3075
3076 if (!bs || !bs->drv || !backing_file) {
3077 return NULL;
3078 }
3079
3080 filename_full = g_malloc(PATH_MAX);
3081 backing_file_full = g_malloc(PATH_MAX);
3082 filename_tmp = g_malloc(PATH_MAX);
3083
3084 is_protocol = path_has_protocol(backing_file);
3085
3086 for (curr_bs = bs; curr_bs->backing_hd; curr_bs = curr_bs->backing_hd) {
3087
3088 /* If either of the filename paths is actually a protocol, then
3089 * compare unmodified paths; otherwise make paths relative */
3090 if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
3091 if (strcmp(backing_file, curr_bs->backing_file) == 0) {
3092 retval = curr_bs->backing_hd;
3093 break;
3094 }
3095 } else {
3096 /* If not an absolute filename path, make it relative to the current
3097 * image's filename path */
3098 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
3099 backing_file);
3100
3101 /* We are going to compare absolute pathnames */
3102 if (!realpath(filename_tmp, filename_full)) {
3103 continue;
3104 }
3105
3106 /* We need to make sure the backing filename we are comparing against
3107 * is relative to the current image filename (or absolute) */
3108 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
3109 curr_bs->backing_file);
3110
3111 if (!realpath(filename_tmp, backing_file_full)) {
3112 continue;
3113 }
3114
3115 if (strcmp(backing_file_full, filename_full) == 0) {
3116 retval = curr_bs->backing_hd;
3117 break;
3118 }
3119 }
3120 }
3121
3122 g_free(filename_full);
3123 g_free(backing_file_full);
3124 g_free(filename_tmp);
3125 return retval;
3126}
3127
3128int bdrv_get_backing_file_depth(BlockDriverState *bs)
3129{
3130 if (!bs->drv) {
3131 return 0;
3132 }
3133
3134 if (!bs->backing_hd) {
3135 return 0;
3136 }
3137
3138 return 1 + bdrv_get_backing_file_depth(bs->backing_hd);
3139}
3140
3141void bdrv_init(void)
3142{
3143 module_call_init(MODULE_INIT_BLOCK);
3144}
3145
3146void bdrv_init_with_whitelist(void)
3147{
3148 use_bdrv_whitelist = 1;
3149 bdrv_init();
3150}
3151
3152void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
3153{
3154 Error *local_err = NULL;
3155 int ret;
3156
3157 if (!bs->drv) {
3158 return;
3159 }
3160
3161 if (!(bs->open_flags & BDRV_O_INCOMING)) {
3162 return;
3163 }
3164 bs->open_flags &= ~BDRV_O_INCOMING;
3165
3166 if (bs->drv->bdrv_invalidate_cache) {
3167 bs->drv->bdrv_invalidate_cache(bs, &local_err);
3168 } else if (bs->file) {
3169 bdrv_invalidate_cache(bs->file, &local_err);
3170 }
3171 if (local_err) {
3172 error_propagate(errp, local_err);
3173 return;
3174 }
3175
3176 ret = refresh_total_sectors(bs, bs->total_sectors);
3177 if (ret < 0) {
3178 error_setg_errno(errp, -ret, "Could not refresh total sector count");
3179 return;
3180 }
3181}
3182
3183void bdrv_invalidate_cache_all(Error **errp)
3184{
3185 BlockDriverState *bs;
3186 Error *local_err = NULL;
3187
3188 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
3189 AioContext *aio_context = bdrv_get_aio_context(bs);
3190
3191 aio_context_acquire(aio_context);
3192 bdrv_invalidate_cache(bs, &local_err);
3193 aio_context_release(aio_context);
3194 if (local_err) {
3195 error_propagate(errp, local_err);
3196 return;
3197 }
3198 }
3199}
3200
3201/**************************************************************/
3202/* removable device support */
3203
3204/**
3205 * Return TRUE if the media is present
3206 */
3207int bdrv_is_inserted(BlockDriverState *bs)
3208{
3209 BlockDriver *drv = bs->drv;
3210
3211 if (!drv)
3212 return 0;
3213 if (!drv->bdrv_is_inserted)
3214 return 1;
3215 return drv->bdrv_is_inserted(bs);
3216}
3217
3218/**
3219 * Return whether the media changed since the last call to this
3220 * function, or -ENOTSUP if we don't know. Most drivers don't know.
3221 */
3222int bdrv_media_changed(BlockDriverState *bs)
3223{
3224 BlockDriver *drv = bs->drv;
3225
3226 if (drv && drv->bdrv_media_changed) {
3227 return drv->bdrv_media_changed(bs);
3228 }
3229 return -ENOTSUP;
3230}
3231
3232/**
3233 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3234 */
3235void bdrv_eject(BlockDriverState *bs, bool eject_flag)
3236{
3237 BlockDriver *drv = bs->drv;
3238 const char *device_name;
3239
3240 if (drv && drv->bdrv_eject) {
3241 drv->bdrv_eject(bs, eject_flag);
3242 }
3243
3244 device_name = bdrv_get_device_name(bs);
3245 if (device_name[0] != '\0') {
3246 qapi_event_send_device_tray_moved(device_name,
3247 eject_flag, &error_abort);
3248 }
3249}
3250
3251/**
3252 * Lock or unlock the media (if it is locked, the user won't be able
3253 * to eject it manually).
3254 */
3255void bdrv_lock_medium(BlockDriverState *bs, bool locked)
3256{
3257 BlockDriver *drv = bs->drv;
3258
3259 trace_bdrv_lock_medium(bs, locked);
3260
3261 if (drv && drv->bdrv_lock_medium) {
3262 drv->bdrv_lock_medium(bs, locked);
3263 }
3264}
3265
3266void bdrv_set_guest_block_size(BlockDriverState *bs, int align)
3267{
3268 bs->guest_block_size = align;
3269}
3270
3271BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
3272{
3273 BdrvDirtyBitmap *bm;
3274
3275 assert(name);
3276 QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
3277 if (bm->name && !strcmp(name, bm->name)) {
3278 return bm;
3279 }
3280 }
3281 return NULL;
3282}
3283
3284void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
3285{
3286 assert(!bdrv_dirty_bitmap_frozen(bitmap));
3287 g_free(bitmap->name);
3288 bitmap->name = NULL;
3289}
3290
3291BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
3292 uint32_t granularity,
3293 const char *name,
3294 Error **errp)
3295{
3296 int64_t bitmap_size;
3297 BdrvDirtyBitmap *bitmap;
3298 uint32_t sector_granularity;
3299
3300 assert((granularity & (granularity - 1)) == 0);
3301
3302 if (name && bdrv_find_dirty_bitmap(bs, name)) {
3303 error_setg(errp, "Bitmap already exists: %s", name);
3304 return NULL;
3305 }
3306 sector_granularity = granularity >> BDRV_SECTOR_BITS;
3307 assert(sector_granularity);
3308 bitmap_size = bdrv_nb_sectors(bs);
3309 if (bitmap_size < 0) {
3310 error_setg_errno(errp, -bitmap_size, "could not get length of device");
3311 errno = -bitmap_size;
3312 return NULL;
3313 }
3314 bitmap = g_new0(BdrvDirtyBitmap, 1);
3315 bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity));
3316 bitmap->size = bitmap_size;
3317 bitmap->name = g_strdup(name);
3318 bitmap->disabled = false;
3319 QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
3320 return bitmap;
3321}
3322
3323bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap)
3324{
3325 return bitmap->successor;
3326}
3327
3328bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap)
3329{
3330 return !(bitmap->disabled || bitmap->successor);
3331}
3332
3333DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap)
3334{
3335 if (bdrv_dirty_bitmap_frozen(bitmap)) {
3336 return DIRTY_BITMAP_STATUS_FROZEN;
3337 } else if (!bdrv_dirty_bitmap_enabled(bitmap)) {
3338 return DIRTY_BITMAP_STATUS_DISABLED;
3339 } else {
3340 return DIRTY_BITMAP_STATUS_ACTIVE;
3341 }
3342}
3343
3344/**
3345 * Create a successor bitmap destined to replace this bitmap after an operation.
3346 * Requires that the bitmap is not frozen and has no successor.
3347 */
3348int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
3349 BdrvDirtyBitmap *bitmap, Error **errp)
3350{
3351 uint64_t granularity;
3352 BdrvDirtyBitmap *child;
3353
3354 if (bdrv_dirty_bitmap_frozen(bitmap)) {
3355 error_setg(errp, "Cannot create a successor for a bitmap that is "
3356 "currently frozen");
3357 return -1;
3358 }
3359 assert(!bitmap->successor);
3360
3361 /* Create an anonymous successor */
3362 granularity = bdrv_dirty_bitmap_granularity(bitmap);
3363 child = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
3364 if (!child) {
3365 return -1;
3366 }
3367
3368 /* Successor will be on or off based on our current state. */
3369 child->disabled = bitmap->disabled;
3370
3371 /* Install the successor and freeze the parent */
3372 bitmap->successor = child;
3373 return 0;
3374}
3375
3376/**
3377 * For a bitmap with a successor, yield our name to the successor,
3378 * delete the old bitmap, and return a handle to the new bitmap.
3379 */
3380BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
3381 BdrvDirtyBitmap *bitmap,
3382 Error **errp)
3383{
3384 char *name;
3385 BdrvDirtyBitmap *successor = bitmap->successor;
3386
3387 if (successor == NULL) {
3388 error_setg(errp, "Cannot relinquish control if "
3389 "there's no successor present");
3390 return NULL;
3391 }
3392
3393 name = bitmap->name;
3394 bitmap->name = NULL;
3395 successor->name = name;
3396 bitmap->successor = NULL;
3397 bdrv_release_dirty_bitmap(bs, bitmap);
3398
3399 return successor;
3400}
3401
3402/**
3403 * In cases of failure where we can no longer safely delete the parent,
3404 * we may wish to re-join the parent and child/successor.
3405 * The merged parent will be un-frozen, but not explicitly re-enabled.
3406 */
3407BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
3408 BdrvDirtyBitmap *parent,
3409 Error **errp)
3410{
3411 BdrvDirtyBitmap *successor = parent->successor;
3412
3413 if (!successor) {
3414 error_setg(errp, "Cannot reclaim a successor when none is present");
3415 return NULL;
3416 }
3417
3418 if (!hbitmap_merge(parent->bitmap, successor->bitmap)) {
3419 error_setg(errp, "Merging of parent and successor bitmap failed");
3420 return NULL;
3421 }
3422 bdrv_release_dirty_bitmap(bs, successor);
3423 parent->successor = NULL;
3424
3425 return parent;
3426}
3427
3428/**
3429 * Truncates _all_ bitmaps attached to a BDS.
3430 */
3431static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs)
3432{
3433 BdrvDirtyBitmap *bitmap;
3434 uint64_t size = bdrv_nb_sectors(bs);
3435
3436 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
3437 assert(!bdrv_dirty_bitmap_frozen(bitmap));
3438 hbitmap_truncate(bitmap->bitmap, size);
3439 bitmap->size = size;
3440 }
3441}
3442
3443void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
3444{
3445 BdrvDirtyBitmap *bm, *next;
3446 QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
3447 if (bm == bitmap) {
3448 assert(!bdrv_dirty_bitmap_frozen(bm));
3449 QLIST_REMOVE(bitmap, list);
3450 hbitmap_free(bitmap->bitmap);
3451 g_free(bitmap->name);
3452 g_free(bitmap);
3453 return;
3454 }
3455 }
3456}
3457
3458void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3459{
3460 assert(!bdrv_dirty_bitmap_frozen(bitmap));
3461 bitmap->disabled = true;
3462}
3463
3464void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3465{
3466 assert(!bdrv_dirty_bitmap_frozen(bitmap));
3467 bitmap->disabled = false;
3468}
3469
3470BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
3471{
3472 BdrvDirtyBitmap *bm;
3473 BlockDirtyInfoList *list = NULL;
3474 BlockDirtyInfoList **plist = &list;
3475
3476 QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
3477 BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
3478 BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
3479 info->count = bdrv_get_dirty_count(bm);
3480 info->granularity = bdrv_dirty_bitmap_granularity(bm);
3481 info->has_name = !!bm->name;
3482 info->name = g_strdup(bm->name);
3483 info->status = bdrv_dirty_bitmap_status(bm);
3484 entry->value = info;
3485 *plist = entry;
3486 plist = &entry->next;
3487 }
3488
3489 return list;
3490}
3491
3492int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector)
3493{
3494 if (bitmap) {
3495 return hbitmap_get(bitmap->bitmap, sector);
3496 } else {
3497 return 0;
3498 }
3499}
3500
3501/**
3502 * Chooses a default granularity based on the existing cluster size,
3503 * but clamped between [4K, 64K]. Defaults to 64K in the case that there
3504 * is no cluster size information available.
3505 */
3506uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs)
3507{
3508 BlockDriverInfo bdi;
3509 uint32_t granularity;
3510
3511 if (bdrv_get_info(bs, &bdi) >= 0 && bdi.cluster_size > 0) {
3512 granularity = MAX(4096, bdi.cluster_size);
3513 granularity = MIN(65536, granularity);
3514 } else {
3515 granularity = 65536;
3516 }
3517
3518 return granularity;
3519}
3520
3521uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap)
3522{
3523 return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap);
3524}
3525
3526void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
3527{
3528 hbitmap_iter_init(hbi, bitmap->bitmap, 0);
3529}
3530
3531void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
3532 int64_t cur_sector, int nr_sectors)
3533{
3534 assert(bdrv_dirty_bitmap_enabled(bitmap));
3535 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
3536}
3537
3538void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
3539 int64_t cur_sector, int nr_sectors)
3540{
3541 assert(bdrv_dirty_bitmap_enabled(bitmap));
3542 hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
3543}
3544
3545void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3546{
3547 assert(bdrv_dirty_bitmap_enabled(bitmap));
3548 hbitmap_reset_all(bitmap->bitmap);
3549}
3550
3551void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
3552 int nr_sectors)
3553{
3554 BdrvDirtyBitmap *bitmap;
3555 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
3556 if (!bdrv_dirty_bitmap_enabled(bitmap)) {
3557 continue;
3558 }
3559 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
3560 }
3561}
3562
3563/**
3564 * Advance an HBitmapIter to an arbitrary offset.
3565 */
3566void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset)
3567{
3568 assert(hbi->hb);
3569 hbitmap_iter_init(hbi, hbi->hb, offset);
3570}
3571
3572int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap)
3573{
3574 return hbitmap_count(bitmap->bitmap);
3575}
3576
3577/* Get a reference to bs */
3578void bdrv_ref(BlockDriverState *bs)
3579{
3580 bs->refcnt++;
3581}
3582
3583/* Release a previously grabbed reference to bs.
3584 * If after releasing, reference count is zero, the BlockDriverState is
3585 * deleted. */
3586void bdrv_unref(BlockDriverState *bs)
3587{
3588 if (!bs) {
3589 return;
3590 }
3591 assert(bs->refcnt > 0);
3592 if (--bs->refcnt == 0) {
3593 bdrv_delete(bs);
3594 }
3595}
3596
3597struct BdrvOpBlocker {
3598 Error *reason;
3599 QLIST_ENTRY(BdrvOpBlocker) list;
3600};
3601
3602bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
3603{
3604 BdrvOpBlocker *blocker;
3605 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3606 if (!QLIST_EMPTY(&bs->op_blockers[op])) {
3607 blocker = QLIST_FIRST(&bs->op_blockers[op]);
3608 if (errp) {
3609 error_setg(errp, "Node '%s' is busy: %s",
3610 bdrv_get_device_or_node_name(bs),
3611 error_get_pretty(blocker->reason));
3612 }
3613 return true;
3614 }
3615 return false;
3616}
3617
3618void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
3619{
3620 BdrvOpBlocker *blocker;
3621 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3622
3623 blocker = g_new0(BdrvOpBlocker, 1);
3624 blocker->reason = reason;
3625 QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
3626}
3627
3628void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
3629{
3630 BdrvOpBlocker *blocker, *next;
3631 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3632 QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
3633 if (blocker->reason == reason) {
3634 QLIST_REMOVE(blocker, list);
3635 g_free(blocker);
3636 }
3637 }
3638}
3639
3640void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
3641{
3642 int i;
3643 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3644 bdrv_op_block(bs, i, reason);
3645 }
3646}
3647
3648void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
3649{
3650 int i;
3651 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3652 bdrv_op_unblock(bs, i, reason);
3653 }
3654}
3655
3656bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
3657{
3658 int i;
3659
3660 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3661 if (!QLIST_EMPTY(&bs->op_blockers[i])) {
3662 return false;
3663 }
3664 }
3665 return true;
3666}
3667
3668void bdrv_iostatus_enable(BlockDriverState *bs)
3669{
3670 bs->iostatus_enabled = true;
3671 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
3672}
3673
3674/* The I/O status is only enabled if the drive explicitly
3675 * enables it _and_ the VM is configured to stop on errors */
3676bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3677{
3678 return (bs->iostatus_enabled &&
3679 (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
3680 bs->on_write_error == BLOCKDEV_ON_ERROR_STOP ||
3681 bs->on_read_error == BLOCKDEV_ON_ERROR_STOP));
3682}
3683
3684void bdrv_iostatus_disable(BlockDriverState *bs)
3685{
3686 bs->iostatus_enabled = false;
3687}
3688
3689void bdrv_iostatus_reset(BlockDriverState *bs)
3690{
3691 if (bdrv_iostatus_is_enabled(bs)) {
3692 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
3693 if (bs->job) {
3694 block_job_iostatus_reset(bs->job);
3695 }
3696 }
3697}
3698
3699void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3700{
3701 assert(bdrv_iostatus_is_enabled(bs));
3702 if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
3703 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
3704 BLOCK_DEVICE_IO_STATUS_FAILED;
3705 }
3706}
3707
3708void bdrv_img_create(const char *filename, const char *fmt,
3709 const char *base_filename, const char *base_fmt,
3710 char *options, uint64_t img_size, int flags,
3711 Error **errp, bool quiet)
3712{
3713 QemuOptsList *create_opts = NULL;
3714 QemuOpts *opts = NULL;
3715 const char *backing_fmt, *backing_file;
3716 int64_t size;
3717 BlockDriver *drv, *proto_drv;
3718 Error *local_err = NULL;
3719 int ret = 0;
3720
3721 /* Find driver and parse its options */
3722 drv = bdrv_find_format(fmt);
3723 if (!drv) {
3724 error_setg(errp, "Unknown file format '%s'", fmt);
3725 return;
3726 }
3727
3728 proto_drv = bdrv_find_protocol(filename, true, errp);
3729 if (!proto_drv) {
3730 return;
3731 }
3732
3733 if (!drv->create_opts) {
3734 error_setg(errp, "Format driver '%s' does not support image creation",
3735 drv->format_name);
3736 return;
3737 }
3738
3739 if (!proto_drv->create_opts) {
3740 error_setg(errp, "Protocol driver '%s' does not support image creation",
3741 proto_drv->format_name);
3742 return;
3743 }
3744
3745 create_opts = qemu_opts_append(create_opts, drv->create_opts);
3746 create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
3747
3748 /* Create parameter list with default values */
3749 opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
3750 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
3751
3752 /* Parse -o options */
3753 if (options) {
3754 qemu_opts_do_parse(opts, options, NULL, &local_err);
3755 if (local_err) {
3756 error_report_err(local_err);
3757 local_err = NULL;
3758 error_setg(errp, "Invalid options for file format '%s'", fmt);
3759 goto out;
3760 }
3761 }
3762
3763 if (base_filename) {
3764 qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &local_err);
3765 if (local_err) {
3766 error_setg(errp, "Backing file not supported for file format '%s'",
3767 fmt);
3768 goto out;
3769 }
3770 }
3771
3772 if (base_fmt) {
3773 qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &local_err);
3774 if (local_err) {
3775 error_setg(errp, "Backing file format not supported for file "
3776 "format '%s'", fmt);
3777 goto out;
3778 }
3779 }
3780
3781 backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
3782 if (backing_file) {
3783 if (!strcmp(filename, backing_file)) {
3784 error_setg(errp, "Error: Trying to create an image with the "
3785 "same filename as the backing file");
3786 goto out;
3787 }
3788 }
3789
3790 backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
3791
3792 // The size for the image must always be specified, with one exception:
3793 // If we are using a backing file, we can obtain the size from there
3794 size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
3795 if (size == -1) {
3796 if (backing_file) {
3797 BlockDriverState *bs;
3798 char *full_backing = g_new0(char, PATH_MAX);
3799 int64_t size;
3800 int back_flags;
3801 QDict *backing_options = NULL;
3802
3803 bdrv_get_full_backing_filename_from_filename(filename, backing_file,
3804 full_backing, PATH_MAX,
3805 &local_err);
3806 if (local_err) {
3807 g_free(full_backing);
3808 goto out;
3809 }
3810
3811 /* backing files always opened read-only */
3812 back_flags =
3813 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
3814
3815 if (backing_fmt) {
3816 backing_options = qdict_new();
3817 qdict_put(backing_options, "driver",
3818 qstring_from_str(backing_fmt));
3819 }
3820
3821 bs = NULL;
3822 ret = bdrv_open(&bs, full_backing, NULL, backing_options,
3823 back_flags, &local_err);
3824 g_free(full_backing);
3825 if (ret < 0) {
3826 goto out;
3827 }
3828 size = bdrv_getlength(bs);
3829 if (size < 0) {
3830 error_setg_errno(errp, -size, "Could not get size of '%s'",
3831 backing_file);
3832 bdrv_unref(bs);
3833 goto out;
3834 }
3835
3836 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
3837
3838 bdrv_unref(bs);
3839 } else {
3840 error_setg(errp, "Image creation needs a size parameter");
3841 goto out;
3842 }
3843 }
3844
3845 if (!quiet) {
3846 printf("Formatting '%s', fmt=%s ", filename, fmt);
3847 qemu_opts_print(opts, " ");
3848 puts("");
3849 }
3850
3851 ret = bdrv_create(drv, filename, opts, &local_err);
3852
3853 if (ret == -EFBIG) {
3854 /* This is generally a better message than whatever the driver would
3855 * deliver (especially because of the cluster_size_hint), since that
3856 * is most probably not much different from "image too large". */
3857 const char *cluster_size_hint = "";
3858 if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
3859 cluster_size_hint = " (try using a larger cluster size)";
3860 }
3861 error_setg(errp, "The image size is too large for file format '%s'"
3862 "%s", fmt, cluster_size_hint);
3863 error_free(local_err);
3864 local_err = NULL;
3865 }
3866
3867out:
3868 qemu_opts_del(opts);
3869 qemu_opts_free(create_opts);
3870 if (local_err) {
3871 error_propagate(errp, local_err);
3872 }
3873}
3874
3875AioContext *bdrv_get_aio_context(BlockDriverState *bs)
3876{
3877 return bs->aio_context;
3878}
3879
3880void bdrv_detach_aio_context(BlockDriverState *bs)
3881{
3882 BdrvAioNotifier *baf;
3883
3884 if (!bs->drv) {
3885 return;
3886 }
3887
3888 QLIST_FOREACH(baf, &bs->aio_notifiers, list) {
3889 baf->detach_aio_context(baf->opaque);
3890 }
3891
3892 if (bs->io_limits_enabled) {
3893 throttle_timers_detach_aio_context(&bs->throttle_timers);
3894 }
3895 if (bs->drv->bdrv_detach_aio_context) {
3896 bs->drv->bdrv_detach_aio_context(bs);
3897 }
3898 if (bs->file) {
3899 bdrv_detach_aio_context(bs->file);
3900 }
3901 if (bs->backing_hd) {
3902 bdrv_detach_aio_context(bs->backing_hd);
3903 }
3904
3905 bs->aio_context = NULL;
3906}
3907
3908void bdrv_attach_aio_context(BlockDriverState *bs,
3909 AioContext *new_context)
3910{
3911 BdrvAioNotifier *ban;
3912
3913 if (!bs->drv) {
3914 return;
3915 }
3916
3917 bs->aio_context = new_context;
3918
3919 if (bs->backing_hd) {
3920 bdrv_attach_aio_context(bs->backing_hd, new_context);
3921 }
3922 if (bs->file) {
3923 bdrv_attach_aio_context(bs->file, new_context);
3924 }
3925 if (bs->drv->bdrv_attach_aio_context) {
3926 bs->drv->bdrv_attach_aio_context(bs, new_context);
3927 }
3928 if (bs->io_limits_enabled) {
3929 throttle_timers_attach_aio_context(&bs->throttle_timers, new_context);
3930 }
3931
3932 QLIST_FOREACH(ban, &bs->aio_notifiers, list) {
3933 ban->attached_aio_context(new_context, ban->opaque);
3934 }
3935}
3936
3937void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
3938{
3939 bdrv_drain(bs); /* ensure there are no in-flight requests */
3940
3941 bdrv_detach_aio_context(bs);
3942
3943 /* This function executes in the old AioContext so acquire the new one in
3944 * case it runs in a different thread.
3945 */
3946 aio_context_acquire(new_context);
3947 bdrv_attach_aio_context(bs, new_context);
3948 aio_context_release(new_context);
3949}
3950
3951void bdrv_add_aio_context_notifier(BlockDriverState *bs,
3952 void (*attached_aio_context)(AioContext *new_context, void *opaque),
3953 void (*detach_aio_context)(void *opaque), void *opaque)
3954{
3955 BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
3956 *ban = (BdrvAioNotifier){
3957 .attached_aio_context = attached_aio_context,
3958 .detach_aio_context = detach_aio_context,
3959 .opaque = opaque
3960 };
3961
3962 QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
3963}
3964
3965void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
3966 void (*attached_aio_context)(AioContext *,
3967 void *),
3968 void (*detach_aio_context)(void *),
3969 void *opaque)
3970{
3971 BdrvAioNotifier *ban, *ban_next;
3972
3973 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
3974 if (ban->attached_aio_context == attached_aio_context &&
3975 ban->detach_aio_context == detach_aio_context &&
3976 ban->opaque == opaque)
3977 {
3978 QLIST_REMOVE(ban, list);
3979 g_free(ban);
3980
3981 return;
3982 }
3983 }
3984
3985 abort();
3986}
3987
3988int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
3989 BlockDriverAmendStatusCB *status_cb)
3990{
3991 if (!bs->drv->bdrv_amend_options) {
3992 return -ENOTSUP;
3993 }
3994 return bs->drv->bdrv_amend_options(bs, opts, status_cb);
3995}
3996
3997/* This function will be called by the bdrv_recurse_is_first_non_filter method
3998 * of block filter and by bdrv_is_first_non_filter.
3999 * It is used to test if the given bs is the candidate or recurse more in the
4000 * node graph.
4001 */
4002bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
4003 BlockDriverState *candidate)
4004{
4005 /* return false if basic checks fails */
4006 if (!bs || !bs->drv) {
4007 return false;
4008 }
4009
4010 /* the code reached a non block filter driver -> check if the bs is
4011 * the same as the candidate. It's the recursion termination condition.
4012 */
4013 if (!bs->drv->is_filter) {
4014 return bs == candidate;
4015 }
4016 /* Down this path the driver is a block filter driver */
4017
4018 /* If the block filter recursion method is defined use it to recurse down
4019 * the node graph.
4020 */
4021 if (bs->drv->bdrv_recurse_is_first_non_filter) {
4022 return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
4023 }
4024
4025 /* the driver is a block filter but don't allow to recurse -> return false
4026 */
4027 return false;
4028}
4029
4030/* This function checks if the candidate is the first non filter bs down it's
4031 * bs chain. Since we don't have pointers to parents it explore all bs chains
4032 * from the top. Some filters can choose not to pass down the recursion.
4033 */
4034bool bdrv_is_first_non_filter(BlockDriverState *candidate)
4035{
4036 BlockDriverState *bs;
4037
4038 /* walk down the bs forest recursively */
4039 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
4040 bool perm;
4041
4042 /* try to recurse in this top level bs */
4043 perm = bdrv_recurse_is_first_non_filter(bs, candidate);
4044
4045 /* candidate is the first non filter */
4046 if (perm) {
4047 return true;
4048 }
4049 }
4050
4051 return false;
4052}
4053
4054BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs,
4055 const char *node_name, Error **errp)
4056{
4057 BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
4058 AioContext *aio_context;
4059
4060 if (!to_replace_bs) {
4061 error_setg(errp, "Node name '%s' not found", node_name);
4062 return NULL;
4063 }
4064
4065 aio_context = bdrv_get_aio_context(to_replace_bs);
4066 aio_context_acquire(aio_context);
4067
4068 if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
4069 to_replace_bs = NULL;
4070 goto out;
4071 }
4072
4073 /* We don't want arbitrary node of the BDS chain to be replaced only the top
4074 * most non filter in order to prevent data corruption.
4075 * Another benefit is that this tests exclude backing files which are
4076 * blocked by the backing blockers.
4077 */
4078 if (!bdrv_recurse_is_first_non_filter(parent_bs, to_replace_bs)) {
4079 error_setg(errp, "Only top most non filter can be replaced");
4080 to_replace_bs = NULL;
4081 goto out;
4082 }
4083
4084out:
4085 aio_context_release(aio_context);
4086 return to_replace_bs;
4087}
4088
4089static bool append_open_options(QDict *d, BlockDriverState *bs)
4090{
4091 const QDictEntry *entry;
4092 bool found_any = false;
4093
4094 for (entry = qdict_first(bs->options); entry;
4095 entry = qdict_next(bs->options, entry))
4096 {
4097 /* Only take options for this level and exclude all non-driver-specific
4098 * options */
4099 if (!strchr(qdict_entry_key(entry), '.') &&
4100 strcmp(qdict_entry_key(entry), "node-name"))
4101 {
4102 qobject_incref(qdict_entry_value(entry));
4103 qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry));
4104 found_any = true;
4105 }
4106 }
4107
4108 return found_any;
4109}
4110
4111/* Updates the following BDS fields:
4112 * - exact_filename: A filename which may be used for opening a block device
4113 * which (mostly) equals the given BDS (even without any
4114 * other options; so reading and writing must return the same
4115 * results, but caching etc. may be different)
4116 * - full_open_options: Options which, when given when opening a block device
4117 * (without a filename), result in a BDS (mostly)
4118 * equalling the given one
4119 * - filename: If exact_filename is set, it is copied here. Otherwise,
4120 * full_open_options is converted to a JSON object, prefixed with
4121 * "json:" (for use through the JSON pseudo protocol) and put here.
4122 */
4123void bdrv_refresh_filename(BlockDriverState *bs)
4124{
4125 BlockDriver *drv = bs->drv;
4126 QDict *opts;
4127
4128 if (!drv) {
4129 return;
4130 }
4131
4132 /* This BDS's file name will most probably depend on its file's name, so
4133 * refresh that first */
4134 if (bs->file) {
4135 bdrv_refresh_filename(bs->file);
4136 }
4137
4138 if (drv->bdrv_refresh_filename) {
4139 /* Obsolete information is of no use here, so drop the old file name
4140 * information before refreshing it */
4141 bs->exact_filename[0] = '\0';
4142 if (bs->full_open_options) {
4143 QDECREF(bs->full_open_options);
4144 bs->full_open_options = NULL;
4145 }
4146
4147 drv->bdrv_refresh_filename(bs);
4148 } else if (bs->file) {
4149 /* Try to reconstruct valid information from the underlying file */
4150 bool has_open_options;
4151
4152 bs->exact_filename[0] = '\0';
4153 if (bs->full_open_options) {
4154 QDECREF(bs->full_open_options);
4155 bs->full_open_options = NULL;
4156 }
4157
4158 opts = qdict_new();
4159 has_open_options = append_open_options(opts, bs);
4160
4161 /* If no specific options have been given for this BDS, the filename of
4162 * the underlying file should suffice for this one as well */
4163 if (bs->file->exact_filename[0] && !has_open_options) {
4164 strcpy(bs->exact_filename, bs->file->exact_filename);
4165 }
4166 /* Reconstructing the full options QDict is simple for most format block
4167 * drivers, as long as the full options are known for the underlying
4168 * file BDS. The full options QDict of that file BDS should somehow
4169 * contain a representation of the filename, therefore the following
4170 * suffices without querying the (exact_)filename of this BDS. */
4171 if (bs->file->full_open_options) {
4172 qdict_put_obj(opts, "driver",
4173 QOBJECT(qstring_from_str(drv->format_name)));
4174 QINCREF(bs->file->full_open_options);
4175 qdict_put_obj(opts, "file", QOBJECT(bs->file->full_open_options));
4176
4177 bs->full_open_options = opts;
4178 } else {
4179 QDECREF(opts);
4180 }
4181 } else if (!bs->full_open_options && qdict_size(bs->options)) {
4182 /* There is no underlying file BDS (at least referenced by BDS.file),
4183 * so the full options QDict should be equal to the options given
4184 * specifically for this block device when it was opened (plus the
4185 * driver specification).
4186 * Because those options don't change, there is no need to update
4187 * full_open_options when it's already set. */
4188
4189 opts = qdict_new();
4190 append_open_options(opts, bs);
4191 qdict_put_obj(opts, "driver",
4192 QOBJECT(qstring_from_str(drv->format_name)));
4193
4194 if (bs->exact_filename[0]) {
4195 /* This may not work for all block protocol drivers (some may
4196 * require this filename to be parsed), but we have to find some
4197 * default solution here, so just include it. If some block driver
4198 * does not support pure options without any filename at all or
4199 * needs some special format of the options QDict, it needs to
4200 * implement the driver-specific bdrv_refresh_filename() function.
4201 */
4202 qdict_put_obj(opts, "filename",
4203 QOBJECT(qstring_from_str(bs->exact_filename)));
4204 }
4205
4206 bs->full_open_options = opts;
4207 }
4208
4209 if (bs->exact_filename[0]) {
4210 pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
4211 } else if (bs->full_open_options) {
4212 QString *json = qobject_to_json(QOBJECT(bs->full_open_options));
4213 snprintf(bs->filename, sizeof(bs->filename), "json:%s",
4214 qstring_get_str(json));
4215 QDECREF(json);
4216 }
4217}
4218
4219/* This accessor function purpose is to allow the device models to access the
4220 * BlockAcctStats structure embedded inside a BlockDriverState without being
4221 * aware of the BlockDriverState structure layout.
4222 * It will go away when the BlockAcctStats structure will be moved inside
4223 * the device models.
4224 */
4225BlockAcctStats *bdrv_get_stats(BlockDriverState *bs)
4226{
4227 return &bs->stats;
4228}
This page took 0.06441 seconds and 4 git commands to generate.