2 * QEMU System Emulator block driver
4 * Copyright (c) 2003 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 #include "config-host.h"
25 #include "qemu-common.h"
28 #include "block_int.h"
30 #include "qemu-objects.h"
31 #include "qemu-coroutine.h"
34 #include <sys/types.h>
36 #include <sys/ioctl.h>
37 #include <sys/queue.h>
47 static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
48 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
49 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
50 BlockDriverCompletionFunc *cb, void *opaque);
51 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
52 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
53 BlockDriverCompletionFunc *cb, void *opaque);
54 static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
55 BlockDriverCompletionFunc *cb, void *opaque);
56 static BlockDriverAIOCB *bdrv_aio_noop_em(BlockDriverState *bs,
57 BlockDriverCompletionFunc *cb, void *opaque);
58 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
59 uint8_t *buf, int nb_sectors);
60 static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
61 const uint8_t *buf, int nb_sectors);
62 static BlockDriverAIOCB *bdrv_co_aio_readv_em(BlockDriverState *bs,
63 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
64 BlockDriverCompletionFunc *cb, void *opaque);
65 static BlockDriverAIOCB *bdrv_co_aio_writev_em(BlockDriverState *bs,
66 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
67 BlockDriverCompletionFunc *cb, void *opaque);
68 static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
69 int64_t sector_num, int nb_sectors,
71 static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
72 int64_t sector_num, int nb_sectors,
74 static int coroutine_fn bdrv_co_flush_em(BlockDriverState *bs);
76 static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
77 QTAILQ_HEAD_INITIALIZER(bdrv_states);
79 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
80 QLIST_HEAD_INITIALIZER(bdrv_drivers);
82 /* The device to use for VM snapshots */
83 static BlockDriverState *bs_snapshots;
85 /* If non-zero, use only whitelisted block drivers */
86 static int use_bdrv_whitelist;
89 static int is_windows_drive_prefix(const char *filename)
91 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
92 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
96 int is_windows_drive(const char *filename)
98 if (is_windows_drive_prefix(filename) &&
101 if (strstart(filename, "\\\\.\\", NULL) ||
102 strstart(filename, "//./", NULL))
108 /* check if the path starts with "<protocol>:" */
109 static int path_has_protocol(const char *path)
112 if (is_windows_drive(path) ||
113 is_windows_drive_prefix(path)) {
118 return strchr(path, ':') != NULL;
121 int path_is_absolute(const char *path)
125 /* specific case for names like: "\\.\d:" */
126 if (*path == '/' || *path == '\\')
129 p = strchr(path, ':');
135 return (*p == '/' || *p == '\\');
141 /* if filename is absolute, just copy it to dest. Otherwise, build a
142 path to it by considering it is relative to base_path. URL are
144 void path_combine(char *dest, int dest_size,
145 const char *base_path,
146 const char *filename)
153 if (path_is_absolute(filename)) {
154 pstrcpy(dest, dest_size, filename);
156 p = strchr(base_path, ':');
161 p1 = strrchr(base_path, '/');
165 p2 = strrchr(base_path, '\\');
177 if (len > dest_size - 1)
179 memcpy(dest, base_path, len);
181 pstrcat(dest, dest_size, filename);
185 void bdrv_register(BlockDriver *bdrv)
187 if (bdrv->bdrv_co_readv) {
188 /* Emulate AIO by coroutines, and sync by AIO */
189 bdrv->bdrv_aio_readv = bdrv_co_aio_readv_em;
190 bdrv->bdrv_aio_writev = bdrv_co_aio_writev_em;
191 bdrv->bdrv_read = bdrv_read_em;
192 bdrv->bdrv_write = bdrv_write_em;
194 bdrv->bdrv_co_readv = bdrv_co_readv_em;
195 bdrv->bdrv_co_writev = bdrv_co_writev_em;
197 if (!bdrv->bdrv_aio_readv) {
198 /* add AIO emulation layer */
199 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
200 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
201 } else if (!bdrv->bdrv_read) {
202 /* add synchronous IO emulation layer */
203 bdrv->bdrv_read = bdrv_read_em;
204 bdrv->bdrv_write = bdrv_write_em;
208 if (!bdrv->bdrv_aio_flush)
209 bdrv->bdrv_aio_flush = bdrv_aio_flush_em;
211 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
214 /* create a new block device (by default it is empty) */
215 BlockDriverState *bdrv_new(const char *device_name)
217 BlockDriverState *bs;
219 bs = g_malloc0(sizeof(BlockDriverState));
220 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
221 if (device_name[0] != '\0') {
222 QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
224 bdrv_iostatus_disable(bs);
228 BlockDriver *bdrv_find_format(const char *format_name)
231 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
232 if (!strcmp(drv1->format_name, format_name)) {
239 static int bdrv_is_whitelisted(BlockDriver *drv)
241 static const char *whitelist[] = {
242 CONFIG_BDRV_WHITELIST
247 return 1; /* no whitelist, anything goes */
249 for (p = whitelist; *p; p++) {
250 if (!strcmp(drv->format_name, *p)) {
257 BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
259 BlockDriver *drv = bdrv_find_format(format_name);
260 return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
263 int bdrv_create(BlockDriver *drv, const char* filename,
264 QEMUOptionParameter *options)
266 if (!drv->bdrv_create)
269 return drv->bdrv_create(filename, options);
272 int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
276 drv = bdrv_find_protocol(filename);
281 return bdrv_create(drv, filename, options);
285 void get_tmp_filename(char *filename, int size)
287 char temp_dir[MAX_PATH];
289 GetTempPath(MAX_PATH, temp_dir);
290 GetTempFileName(temp_dir, "qem", 0, filename);
293 void get_tmp_filename(char *filename, int size)
297 /* XXX: race condition possible */
298 tmpdir = getenv("TMPDIR");
301 snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
302 fd = mkstemp(filename);
308 * Detect host devices. By convention, /dev/cdrom[N] is always
309 * recognized as a host CDROM.
311 static BlockDriver *find_hdev_driver(const char *filename)
313 int score_max = 0, score;
314 BlockDriver *drv = NULL, *d;
316 QLIST_FOREACH(d, &bdrv_drivers, list) {
317 if (d->bdrv_probe_device) {
318 score = d->bdrv_probe_device(filename);
319 if (score > score_max) {
329 BlockDriver *bdrv_find_protocol(const char *filename)
336 /* TODO Drivers without bdrv_file_open must be specified explicitly */
339 * XXX(hch): we really should not let host device detection
340 * override an explicit protocol specification, but moving this
341 * later breaks access to device names with colons in them.
342 * Thanks to the brain-dead persistent naming schemes on udev-
343 * based Linux systems those actually are quite common.
345 drv1 = find_hdev_driver(filename);
350 if (!path_has_protocol(filename)) {
351 return bdrv_find_format("file");
353 p = strchr(filename, ':');
356 if (len > sizeof(protocol) - 1)
357 len = sizeof(protocol) - 1;
358 memcpy(protocol, filename, len);
359 protocol[len] = '\0';
360 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
361 if (drv1->protocol_name &&
362 !strcmp(drv1->protocol_name, protocol)) {
369 static int find_image_format(const char *filename, BlockDriver **pdrv)
371 int ret, score, score_max;
372 BlockDriver *drv1, *drv;
374 BlockDriverState *bs;
376 ret = bdrv_file_open(&bs, filename, 0);
382 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
383 if (bs->sg || !bdrv_is_inserted(bs)) {
385 drv = bdrv_find_format("raw");
393 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
402 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
403 if (drv1->bdrv_probe) {
404 score = drv1->bdrv_probe(buf, ret, filename);
405 if (score > score_max) {
419 * Set the current 'total_sectors' value
421 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
423 BlockDriver *drv = bs->drv;
425 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
429 /* query actual device if possible, otherwise just trust the hint */
430 if (drv->bdrv_getlength) {
431 int64_t length = drv->bdrv_getlength(bs);
435 hint = length >> BDRV_SECTOR_BITS;
438 bs->total_sectors = hint;
443 * Set open flags for a given cache mode
445 * Return 0 on success, -1 if the cache mode was invalid.
447 int bdrv_parse_cache_flags(const char *mode, int *flags)
449 *flags &= ~BDRV_O_CACHE_MASK;
451 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
452 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
453 } else if (!strcmp(mode, "directsync")) {
454 *flags |= BDRV_O_NOCACHE;
455 } else if (!strcmp(mode, "writeback")) {
456 *flags |= BDRV_O_CACHE_WB;
457 } else if (!strcmp(mode, "unsafe")) {
458 *flags |= BDRV_O_CACHE_WB;
459 *flags |= BDRV_O_NO_FLUSH;
460 } else if (!strcmp(mode, "writethrough")) {
461 /* this is the default */
470 * Common part for opening disk images and files
472 static int bdrv_open_common(BlockDriverState *bs, const char *filename,
473 int flags, BlockDriver *drv)
479 trace_bdrv_open_common(bs, filename, flags, drv->format_name);
482 bs->total_sectors = 0;
485 bs->open_flags = flags;
486 bs->buffer_alignment = 512;
488 pstrcpy(bs->filename, sizeof(bs->filename), filename);
490 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
495 bs->opaque = g_malloc0(drv->instance_size);
497 if (flags & BDRV_O_CACHE_WB)
498 bs->enable_write_cache = 1;
501 * Clear flags that are internal to the block layer before opening the
504 open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
507 * Snapshots should be writable.
509 if (bs->is_temporary) {
510 open_flags |= BDRV_O_RDWR;
513 /* Open the image, either directly or using a protocol */
514 if (drv->bdrv_file_open) {
515 ret = drv->bdrv_file_open(bs, filename, open_flags);
517 ret = bdrv_file_open(&bs->file, filename, open_flags);
519 ret = drv->bdrv_open(bs, open_flags);
527 bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
529 ret = refresh_total_sectors(bs, bs->total_sectors);
535 if (bs->is_temporary) {
543 bdrv_delete(bs->file);
553 * Opens a file using a protocol (file, host_device, nbd, ...)
555 int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
557 BlockDriverState *bs;
561 drv = bdrv_find_protocol(filename);
567 ret = bdrv_open_common(bs, filename, flags, drv);
578 * Opens a disk image (raw, qcow2, vmdk, ...)
580 int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
585 if (flags & BDRV_O_SNAPSHOT) {
586 BlockDriverState *bs1;
589 BlockDriver *bdrv_qcow2;
590 QEMUOptionParameter *options;
591 char tmp_filename[PATH_MAX];
592 char backing_filename[PATH_MAX];
594 /* if snapshot, we create a temporary backing file and open it
595 instead of opening 'filename' directly */
597 /* if there is a backing file, use it */
599 ret = bdrv_open(bs1, filename, 0, drv);
604 total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
606 if (bs1->drv && bs1->drv->protocol_name)
611 get_tmp_filename(tmp_filename, sizeof(tmp_filename));
613 /* Real path is meaningless for protocols */
615 snprintf(backing_filename, sizeof(backing_filename),
617 else if (!realpath(filename, backing_filename))
620 bdrv_qcow2 = bdrv_find_format("qcow2");
621 options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
623 set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
624 set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
626 set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
630 ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
631 free_option_parameters(options);
636 filename = tmp_filename;
638 bs->is_temporary = 1;
641 /* Find the right image format driver */
643 ret = find_image_format(filename, &drv);
647 goto unlink_and_fail;
651 ret = bdrv_open_common(bs, filename, flags, drv);
653 goto unlink_and_fail;
656 /* If there is a backing file, use it */
657 if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
658 char backing_filename[PATH_MAX];
660 BlockDriver *back_drv = NULL;
662 bs->backing_hd = bdrv_new("");
664 if (path_has_protocol(bs->backing_file)) {
665 pstrcpy(backing_filename, sizeof(backing_filename),
668 path_combine(backing_filename, sizeof(backing_filename),
669 filename, bs->backing_file);
672 if (bs->backing_format[0] != '\0') {
673 back_drv = bdrv_find_format(bs->backing_format);
676 /* backing files always opened read-only */
678 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
680 ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
685 if (bs->is_temporary) {
686 bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
688 /* base image inherits from "parent" */
689 bs->backing_hd->keep_read_only = bs->keep_read_only;
693 if (!bdrv_key_required(bs)) {
694 bdrv_dev_change_media_cb(bs, true);
700 if (bs->is_temporary) {
706 void bdrv_close(BlockDriverState *bs)
709 if (bs == bs_snapshots) {
712 if (bs->backing_hd) {
713 bdrv_delete(bs->backing_hd);
714 bs->backing_hd = NULL;
716 bs->drv->bdrv_close(bs);
719 if (bs->is_temporary) {
720 unlink(bs->filename);
726 if (bs->file != NULL) {
727 bdrv_close(bs->file);
730 bdrv_dev_change_media_cb(bs, false);
734 void bdrv_close_all(void)
736 BlockDriverState *bs;
738 QTAILQ_FOREACH(bs, &bdrv_states, list) {
743 /* make a BlockDriverState anonymous by removing from bdrv_state list.
744 Also, NULL terminate the device_name to prevent double remove */
745 void bdrv_make_anon(BlockDriverState *bs)
747 if (bs->device_name[0] != '\0') {
748 QTAILQ_REMOVE(&bdrv_states, bs, list);
750 bs->device_name[0] = '\0';
753 void bdrv_delete(BlockDriverState *bs)
757 /* remove from list, if necessary */
761 if (bs->file != NULL) {
762 bdrv_delete(bs->file);
765 assert(bs != bs_snapshots);
769 int bdrv_attach_dev(BlockDriverState *bs, void *dev)
770 /* TODO change to DeviceState *dev when all users are qdevified */
776 bdrv_iostatus_reset(bs);
780 /* TODO qdevified devices don't use this, remove when devices are qdevified */
781 void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
783 if (bdrv_attach_dev(bs, dev) < 0) {
788 void bdrv_detach_dev(BlockDriverState *bs, void *dev)
789 /* TODO change to DeviceState *dev when all users are qdevified */
791 assert(bs->dev == dev);
794 bs->dev_opaque = NULL;
795 bs->buffer_alignment = 512;
798 /* TODO change to return DeviceState * when all users are qdevified */
799 void *bdrv_get_attached_dev(BlockDriverState *bs)
804 void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
808 bs->dev_opaque = opaque;
809 if (bdrv_dev_has_removable_media(bs) && bs == bs_snapshots) {
814 static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
816 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
817 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
821 bool bdrv_dev_has_removable_media(BlockDriverState *bs)
823 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
826 bool bdrv_dev_is_tray_open(BlockDriverState *bs)
828 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
829 return bs->dev_ops->is_tray_open(bs->dev_opaque);
834 static void bdrv_dev_resize_cb(BlockDriverState *bs)
836 if (bs->dev_ops && bs->dev_ops->resize_cb) {
837 bs->dev_ops->resize_cb(bs->dev_opaque);
841 bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
843 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
844 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
850 * Run consistency checks on an image
852 * Returns 0 if the check could be completed (it doesn't mean that the image is
853 * free of errors) or -errno when an internal error occurred. The results of the
854 * check are stored in res.
856 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res)
858 if (bs->drv->bdrv_check == NULL) {
862 memset(res, 0, sizeof(*res));
863 return bs->drv->bdrv_check(bs, res);
866 #define COMMIT_BUF_SECTORS 2048
868 /* commit COW file into the raw image */
869 int bdrv_commit(BlockDriverState *bs)
871 BlockDriver *drv = bs->drv;
872 BlockDriver *backing_drv;
873 int64_t sector, total_sectors;
874 int n, ro, open_flags;
875 int ret = 0, rw_ret = 0;
878 BlockDriverState *bs_rw, *bs_ro;
883 if (!bs->backing_hd) {
887 if (bs->backing_hd->keep_read_only) {
891 backing_drv = bs->backing_hd->drv;
892 ro = bs->backing_hd->read_only;
893 strncpy(filename, bs->backing_hd->filename, sizeof(filename));
894 open_flags = bs->backing_hd->open_flags;
898 bdrv_delete(bs->backing_hd);
899 bs->backing_hd = NULL;
900 bs_rw = bdrv_new("");
901 rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
905 /* try to re-open read-only */
906 bs_ro = bdrv_new("");
907 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
911 /* drive not functional anymore */
915 bs->backing_hd = bs_ro;
918 bs->backing_hd = bs_rw;
921 total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
922 buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
924 for (sector = 0; sector < total_sectors; sector += n) {
925 if (drv->bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
927 if (bdrv_read(bs, sector, buf, n) != 0) {
932 if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
939 if (drv->bdrv_make_empty) {
940 ret = drv->bdrv_make_empty(bs);
945 * Make sure all data we wrote to the backing device is actually
949 bdrv_flush(bs->backing_hd);
956 bdrv_delete(bs->backing_hd);
957 bs->backing_hd = NULL;
958 bs_ro = bdrv_new("");
959 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
963 /* drive not functional anymore */
967 bs->backing_hd = bs_ro;
968 bs->backing_hd->keep_read_only = 0;
974 void bdrv_commit_all(void)
976 BlockDriverState *bs;
978 QTAILQ_FOREACH(bs, &bdrv_states, list) {
986 * -EINVAL - backing format specified, but no file
987 * -ENOSPC - can't update the backing file because no space is left in the
989 * -ENOTSUP - format driver doesn't support changing the backing file
991 int bdrv_change_backing_file(BlockDriverState *bs,
992 const char *backing_file, const char *backing_fmt)
994 BlockDriver *drv = bs->drv;
996 if (drv->bdrv_change_backing_file != NULL) {
997 return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
1003 static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
1008 if (!bdrv_is_inserted(bs))
1014 len = bdrv_getlength(bs);
1019 if ((offset > len) || (len - offset < size))
1025 static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
1028 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
1029 nb_sectors * BDRV_SECTOR_SIZE);
1032 static inline bool bdrv_has_async_rw(BlockDriver *drv)
1034 return drv->bdrv_co_readv != bdrv_co_readv_em
1035 || drv->bdrv_aio_readv != bdrv_aio_readv_em;
1038 static inline bool bdrv_has_async_flush(BlockDriver *drv)
1040 return drv->bdrv_aio_flush != bdrv_aio_flush_em;
1043 /* return < 0 if error. See bdrv_write() for the return codes */
1044 int bdrv_read(BlockDriverState *bs, int64_t sector_num,
1045 uint8_t *buf, int nb_sectors)
1047 BlockDriver *drv = bs->drv;
1052 if (bdrv_has_async_rw(drv) && qemu_in_coroutine()) {
1054 struct iovec iov = {
1055 .iov_base = (void *)buf,
1056 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1059 qemu_iovec_init_external(&qiov, &iov, 1);
1060 return bdrv_co_readv(bs, sector_num, nb_sectors, &qiov);
1063 if (bdrv_check_request(bs, sector_num, nb_sectors))
1066 return drv->bdrv_read(bs, sector_num, buf, nb_sectors);
1069 static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
1070 int nb_sectors, int dirty)
1073 unsigned long val, idx, bit;
1075 start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
1076 end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
1078 for (; start <= end; start++) {
1079 idx = start / (sizeof(unsigned long) * 8);
1080 bit = start % (sizeof(unsigned long) * 8);
1081 val = bs->dirty_bitmap[idx];
1083 if (!(val & (1UL << bit))) {
1088 if (val & (1UL << bit)) {
1090 val &= ~(1UL << bit);
1093 bs->dirty_bitmap[idx] = val;
1097 /* Return < 0 if error. Important errors are:
1098 -EIO generic I/O error (may happen for all errors)
1099 -ENOMEDIUM No media inserted.
1100 -EINVAL Invalid sector number or nb_sectors
1101 -EACCES Trying to write a read-only device
1103 int bdrv_write(BlockDriverState *bs, int64_t sector_num,
1104 const uint8_t *buf, int nb_sectors)
1106 BlockDriver *drv = bs->drv;
1111 if (bdrv_has_async_rw(drv) && qemu_in_coroutine()) {
1113 struct iovec iov = {
1114 .iov_base = (void *)buf,
1115 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1118 qemu_iovec_init_external(&qiov, &iov, 1);
1119 return bdrv_co_writev(bs, sector_num, nb_sectors, &qiov);
1124 if (bdrv_check_request(bs, sector_num, nb_sectors))
1127 if (bs->dirty_bitmap) {
1128 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1131 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1132 bs->wr_highest_sector = sector_num + nb_sectors - 1;
1135 return drv->bdrv_write(bs, sector_num, buf, nb_sectors);
1138 int bdrv_pread(BlockDriverState *bs, int64_t offset,
1139 void *buf, int count1)
1141 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
1142 int len, nb_sectors, count;
1147 /* first read to align to sector start */
1148 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
1151 sector_num = offset >> BDRV_SECTOR_BITS;
1153 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1155 memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
1163 /* read the sectors "in place" */
1164 nb_sectors = count >> BDRV_SECTOR_BITS;
1165 if (nb_sectors > 0) {
1166 if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
1168 sector_num += nb_sectors;
1169 len = nb_sectors << BDRV_SECTOR_BITS;
1174 /* add data from the last sector */
1176 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1178 memcpy(buf, tmp_buf, count);
1183 int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
1184 const void *buf, int count1)
1186 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
1187 int len, nb_sectors, count;
1192 /* first write to align to sector start */
1193 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
1196 sector_num = offset >> BDRV_SECTOR_BITS;
1198 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1200 memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
1201 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1210 /* write the sectors "in place" */
1211 nb_sectors = count >> BDRV_SECTOR_BITS;
1212 if (nb_sectors > 0) {
1213 if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
1215 sector_num += nb_sectors;
1216 len = nb_sectors << BDRV_SECTOR_BITS;
1221 /* add data from the last sector */
1223 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1225 memcpy(tmp_buf, buf, count);
1226 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1233 * Writes to the file and ensures that no writes are reordered across this
1234 * request (acts as a barrier)
1236 * Returns 0 on success, -errno in error cases.
1238 int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
1239 const void *buf, int count)
1243 ret = bdrv_pwrite(bs, offset, buf, count);
1248 /* No flush needed for cache modes that use O_DSYNC */
1249 if ((bs->open_flags & BDRV_O_CACHE_WB) != 0) {
1256 int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
1257 int nb_sectors, QEMUIOVector *qiov)
1259 BlockDriver *drv = bs->drv;
1261 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
1266 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1270 return drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
1273 int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
1274 int nb_sectors, QEMUIOVector *qiov)
1276 BlockDriver *drv = bs->drv;
1278 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
1283 if (bs->read_only) {
1286 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1290 if (bs->dirty_bitmap) {
1291 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1294 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1295 bs->wr_highest_sector = sector_num + nb_sectors - 1;
1298 return drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
1302 * Truncate file to 'offset' bytes (needed only for file protocols)
1304 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
1306 BlockDriver *drv = bs->drv;
1310 if (!drv->bdrv_truncate)
1314 if (bdrv_in_use(bs))
1316 ret = drv->bdrv_truncate(bs, offset);
1318 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
1319 bdrv_dev_resize_cb(bs);
1325 * Length of a allocated file in bytes. Sparse files are counted by actual
1326 * allocated space. Return < 0 if error or unknown.
1328 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
1330 BlockDriver *drv = bs->drv;
1334 if (drv->bdrv_get_allocated_file_size) {
1335 return drv->bdrv_get_allocated_file_size(bs);
1338 return bdrv_get_allocated_file_size(bs->file);
1344 * Length of a file in bytes. Return < 0 if error or unknown.
1346 int64_t bdrv_getlength(BlockDriverState *bs)
1348 BlockDriver *drv = bs->drv;
1352 if (bs->growable || bdrv_dev_has_removable_media(bs)) {
1353 if (drv->bdrv_getlength) {
1354 return drv->bdrv_getlength(bs);
1357 return bs->total_sectors * BDRV_SECTOR_SIZE;
1360 /* return 0 as number of sectors if no device present or error */
1361 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
1364 length = bdrv_getlength(bs);
1368 length = length >> BDRV_SECTOR_BITS;
1369 *nb_sectors_ptr = length;
1373 uint8_t boot_ind; /* 0x80 - active */
1374 uint8_t head; /* starting head */
1375 uint8_t sector; /* starting sector */
1376 uint8_t cyl; /* starting cylinder */
1377 uint8_t sys_ind; /* What partition type */
1378 uint8_t end_head; /* end head */
1379 uint8_t end_sector; /* end sector */
1380 uint8_t end_cyl; /* end cylinder */
1381 uint32_t start_sect; /* starting sector counting from 0 */
1382 uint32_t nr_sects; /* nr of sectors in partition */
1385 /* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
1386 static int guess_disk_lchs(BlockDriverState *bs,
1387 int *pcylinders, int *pheads, int *psectors)
1389 uint8_t buf[BDRV_SECTOR_SIZE];
1390 int ret, i, heads, sectors, cylinders;
1391 struct partition *p;
1393 uint64_t nb_sectors;
1395 bdrv_get_geometry(bs, &nb_sectors);
1397 ret = bdrv_read(bs, 0, buf, 1);
1400 /* test msdos magic */
1401 if (buf[510] != 0x55 || buf[511] != 0xaa)
1403 for(i = 0; i < 4; i++) {
1404 p = ((struct partition *)(buf + 0x1be)) + i;
1405 nr_sects = le32_to_cpu(p->nr_sects);
1406 if (nr_sects && p->end_head) {
1407 /* We make the assumption that the partition terminates on
1408 a cylinder boundary */
1409 heads = p->end_head + 1;
1410 sectors = p->end_sector & 63;
1413 cylinders = nb_sectors / (heads * sectors);
1414 if (cylinders < 1 || cylinders > 16383)
1417 *psectors = sectors;
1418 *pcylinders = cylinders;
1420 printf("guessed geometry: LCHS=%d %d %d\n",
1421 cylinders, heads, sectors);
1429 void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
1431 int translation, lba_detected = 0;
1432 int cylinders, heads, secs;
1433 uint64_t nb_sectors;
1435 /* if a geometry hint is available, use it */
1436 bdrv_get_geometry(bs, &nb_sectors);
1437 bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
1438 translation = bdrv_get_translation_hint(bs);
1439 if (cylinders != 0) {
1444 if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
1446 /* if heads > 16, it means that a BIOS LBA
1447 translation was active, so the default
1448 hardware geometry is OK */
1450 goto default_geometry;
1455 /* disable any translation to be in sync with
1456 the logical geometry */
1457 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
1458 bdrv_set_translation_hint(bs,
1459 BIOS_ATA_TRANSLATION_NONE);
1464 /* if no geometry, use a standard physical disk geometry */
1465 cylinders = nb_sectors / (16 * 63);
1467 if (cylinders > 16383)
1469 else if (cylinders < 2)
1474 if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
1475 if ((*pcyls * *pheads) <= 131072) {
1476 bdrv_set_translation_hint(bs,
1477 BIOS_ATA_TRANSLATION_LARGE);
1479 bdrv_set_translation_hint(bs,
1480 BIOS_ATA_TRANSLATION_LBA);
1484 bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
1488 void bdrv_set_geometry_hint(BlockDriverState *bs,
1489 int cyls, int heads, int secs)
1496 void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
1498 bs->translation = translation;
1501 void bdrv_get_geometry_hint(BlockDriverState *bs,
1502 int *pcyls, int *pheads, int *psecs)
1505 *pheads = bs->heads;
1509 /* Recognize floppy formats */
1510 typedef struct FDFormat {
1517 static const FDFormat fd_formats[] = {
1518 /* First entry is default format */
1519 /* 1.44 MB 3"1/2 floppy disks */
1520 { FDRIVE_DRV_144, 18, 80, 1, },
1521 { FDRIVE_DRV_144, 20, 80, 1, },
1522 { FDRIVE_DRV_144, 21, 80, 1, },
1523 { FDRIVE_DRV_144, 21, 82, 1, },
1524 { FDRIVE_DRV_144, 21, 83, 1, },
1525 { FDRIVE_DRV_144, 22, 80, 1, },
1526 { FDRIVE_DRV_144, 23, 80, 1, },
1527 { FDRIVE_DRV_144, 24, 80, 1, },
1528 /* 2.88 MB 3"1/2 floppy disks */
1529 { FDRIVE_DRV_288, 36, 80, 1, },
1530 { FDRIVE_DRV_288, 39, 80, 1, },
1531 { FDRIVE_DRV_288, 40, 80, 1, },
1532 { FDRIVE_DRV_288, 44, 80, 1, },
1533 { FDRIVE_DRV_288, 48, 80, 1, },
1534 /* 720 kB 3"1/2 floppy disks */
1535 { FDRIVE_DRV_144, 9, 80, 1, },
1536 { FDRIVE_DRV_144, 10, 80, 1, },
1537 { FDRIVE_DRV_144, 10, 82, 1, },
1538 { FDRIVE_DRV_144, 10, 83, 1, },
1539 { FDRIVE_DRV_144, 13, 80, 1, },
1540 { FDRIVE_DRV_144, 14, 80, 1, },
1541 /* 1.2 MB 5"1/4 floppy disks */
1542 { FDRIVE_DRV_120, 15, 80, 1, },
1543 { FDRIVE_DRV_120, 18, 80, 1, },
1544 { FDRIVE_DRV_120, 18, 82, 1, },
1545 { FDRIVE_DRV_120, 18, 83, 1, },
1546 { FDRIVE_DRV_120, 20, 80, 1, },
1547 /* 720 kB 5"1/4 floppy disks */
1548 { FDRIVE_DRV_120, 9, 80, 1, },
1549 { FDRIVE_DRV_120, 11, 80, 1, },
1550 /* 360 kB 5"1/4 floppy disks */
1551 { FDRIVE_DRV_120, 9, 40, 1, },
1552 { FDRIVE_DRV_120, 9, 40, 0, },
1553 { FDRIVE_DRV_120, 10, 41, 1, },
1554 { FDRIVE_DRV_120, 10, 42, 1, },
1555 /* 320 kB 5"1/4 floppy disks */
1556 { FDRIVE_DRV_120, 8, 40, 1, },
1557 { FDRIVE_DRV_120, 8, 40, 0, },
1558 /* 360 kB must match 5"1/4 better than 3"1/2... */
1559 { FDRIVE_DRV_144, 9, 80, 0, },
1561 { FDRIVE_DRV_NONE, -1, -1, 0, },
1564 void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads,
1565 int *max_track, int *last_sect,
1566 FDriveType drive_in, FDriveType *drive)
1568 const FDFormat *parse;
1569 uint64_t nb_sectors, size;
1570 int i, first_match, match;
1572 bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect);
1573 if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) {
1574 /* User defined disk */
1576 bdrv_get_geometry(bs, &nb_sectors);
1579 for (i = 0; ; i++) {
1580 parse = &fd_formats[i];
1581 if (parse->drive == FDRIVE_DRV_NONE) {
1584 if (drive_in == parse->drive ||
1585 drive_in == FDRIVE_DRV_NONE) {
1586 size = (parse->max_head + 1) * parse->max_track *
1588 if (nb_sectors == size) {
1592 if (first_match == -1) {
1598 if (first_match == -1) {
1601 match = first_match;
1603 parse = &fd_formats[match];
1605 *nb_heads = parse->max_head + 1;
1606 *max_track = parse->max_track;
1607 *last_sect = parse->last_sect;
1608 *drive = parse->drive;
1612 int bdrv_get_translation_hint(BlockDriverState *bs)
1614 return bs->translation;
1617 void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
1618 BlockErrorAction on_write_error)
1620 bs->on_read_error = on_read_error;
1621 bs->on_write_error = on_write_error;
1624 BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
1626 return is_read ? bs->on_read_error : bs->on_write_error;
1629 int bdrv_is_read_only(BlockDriverState *bs)
1631 return bs->read_only;
1634 int bdrv_is_sg(BlockDriverState *bs)
1639 int bdrv_enable_write_cache(BlockDriverState *bs)
1641 return bs->enable_write_cache;
1644 int bdrv_is_encrypted(BlockDriverState *bs)
1646 if (bs->backing_hd && bs->backing_hd->encrypted)
1648 return bs->encrypted;
1651 int bdrv_key_required(BlockDriverState *bs)
1653 BlockDriverState *backing_hd = bs->backing_hd;
1655 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
1657 return (bs->encrypted && !bs->valid_key);
1660 int bdrv_set_key(BlockDriverState *bs, const char *key)
1663 if (bs->backing_hd && bs->backing_hd->encrypted) {
1664 ret = bdrv_set_key(bs->backing_hd, key);
1670 if (!bs->encrypted) {
1672 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
1675 ret = bs->drv->bdrv_set_key(bs, key);
1678 } else if (!bs->valid_key) {
1680 /* call the change callback now, we skipped it on open */
1681 bdrv_dev_change_media_cb(bs, true);
1686 void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
1691 pstrcpy(buf, buf_size, bs->drv->format_name);
1695 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
1700 QLIST_FOREACH(drv, &bdrv_drivers, list) {
1701 it(opaque, drv->format_name);
1705 BlockDriverState *bdrv_find(const char *name)
1707 BlockDriverState *bs;
1709 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1710 if (!strcmp(name, bs->device_name)) {
1717 BlockDriverState *bdrv_next(BlockDriverState *bs)
1720 return QTAILQ_FIRST(&bdrv_states);
1722 return QTAILQ_NEXT(bs, list);
1725 void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
1727 BlockDriverState *bs;
1729 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1734 const char *bdrv_get_device_name(BlockDriverState *bs)
1736 return bs->device_name;
1739 int bdrv_flush(BlockDriverState *bs)
1741 if (bs->open_flags & BDRV_O_NO_FLUSH) {
1745 if (bs->drv && bdrv_has_async_flush(bs->drv) && qemu_in_coroutine()) {
1746 return bdrv_co_flush_em(bs);
1749 if (bs->drv && bs->drv->bdrv_flush) {
1750 return bs->drv->bdrv_flush(bs);
1754 * Some block drivers always operate in either writethrough or unsafe mode
1755 * and don't support bdrv_flush therefore. Usually qemu doesn't know how
1756 * the server works (because the behaviour is hardcoded or depends on
1757 * server-side configuration), so we can't ensure that everything is safe
1758 * on disk. Returning an error doesn't work because that would break guests
1759 * even if the server operates in writethrough mode.
1761 * Let's hope the user knows what he's doing.
1766 void bdrv_flush_all(void)
1768 BlockDriverState *bs;
1770 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1771 if (!bdrv_is_read_only(bs) && bdrv_is_inserted(bs)) {
1777 int bdrv_has_zero_init(BlockDriverState *bs)
1781 if (bs->drv->bdrv_has_zero_init) {
1782 return bs->drv->bdrv_has_zero_init(bs);
1788 int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
1793 if (!bs->drv->bdrv_discard) {
1796 return bs->drv->bdrv_discard(bs, sector_num, nb_sectors);
1800 * Returns true iff the specified sector is present in the disk image. Drivers
1801 * not implementing the functionality are assumed to not support backing files,
1802 * hence all their sectors are reported as allocated.
1804 * 'pnum' is set to the number of sectors (including and immediately following
1805 * the specified sector) that are known to be in the same
1806 * allocated/unallocated state.
1808 * 'nb_sectors' is the max value 'pnum' should be set to.
1810 int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
1814 if (!bs->drv->bdrv_is_allocated) {
1815 if (sector_num >= bs->total_sectors) {
1819 n = bs->total_sectors - sector_num;
1820 *pnum = (n < nb_sectors) ? (n) : (nb_sectors);
1823 return bs->drv->bdrv_is_allocated(bs, sector_num, nb_sectors, pnum);
1826 void bdrv_mon_event(const BlockDriverState *bdrv,
1827 BlockMonEventAction action, int is_read)
1830 const char *action_str;
1833 case BDRV_ACTION_REPORT:
1834 action_str = "report";
1836 case BDRV_ACTION_IGNORE:
1837 action_str = "ignore";
1839 case BDRV_ACTION_STOP:
1840 action_str = "stop";
1846 data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
1849 is_read ? "read" : "write");
1850 monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
1852 qobject_decref(data);
1855 static void bdrv_print_dict(QObject *obj, void *opaque)
1858 Monitor *mon = opaque;
1860 bs_dict = qobject_to_qdict(obj);
1862 monitor_printf(mon, "%s: removable=%d",
1863 qdict_get_str(bs_dict, "device"),
1864 qdict_get_bool(bs_dict, "removable"));
1866 if (qdict_get_bool(bs_dict, "removable")) {
1867 monitor_printf(mon, " locked=%d", qdict_get_bool(bs_dict, "locked"));
1868 monitor_printf(mon, " tray-open=%d",
1869 qdict_get_bool(bs_dict, "tray-open"));
1871 if (qdict_haskey(bs_dict, "inserted")) {
1872 QDict *qdict = qobject_to_qdict(qdict_get(bs_dict, "inserted"));
1874 monitor_printf(mon, " file=");
1875 monitor_print_filename(mon, qdict_get_str(qdict, "file"));
1876 if (qdict_haskey(qdict, "backing_file")) {
1877 monitor_printf(mon, " backing_file=");
1878 monitor_print_filename(mon, qdict_get_str(qdict, "backing_file"));
1880 monitor_printf(mon, " ro=%d drv=%s encrypted=%d",
1881 qdict_get_bool(qdict, "ro"),
1882 qdict_get_str(qdict, "drv"),
1883 qdict_get_bool(qdict, "encrypted"));
1885 monitor_printf(mon, " [not inserted]");
1888 monitor_printf(mon, "\n");
1891 void bdrv_info_print(Monitor *mon, const QObject *data)
1893 qlist_iter(qobject_to_qlist(data), bdrv_print_dict, mon);
1896 void bdrv_info(Monitor *mon, QObject **ret_data)
1899 BlockDriverState *bs;
1901 bs_list = qlist_new();
1903 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1907 bs_obj = qobject_from_jsonf("{ 'device': %s, 'type': 'unknown', "
1908 "'removable': %i, 'locked': %i }",
1910 bdrv_dev_has_removable_media(bs),
1911 bdrv_dev_is_medium_locked(bs));
1912 bs_dict = qobject_to_qdict(bs_obj);
1914 if (bdrv_dev_has_removable_media(bs)) {
1915 qdict_put(bs_dict, "tray-open",
1916 qbool_from_int(bdrv_dev_is_tray_open(bs)));
1921 obj = qobject_from_jsonf("{ 'file': %s, 'ro': %i, 'drv': %s, "
1922 "'encrypted': %i }",
1923 bs->filename, bs->read_only,
1924 bs->drv->format_name,
1925 bdrv_is_encrypted(bs));
1926 if (bs->backing_file[0] != '\0') {
1927 QDict *qdict = qobject_to_qdict(obj);
1928 qdict_put(qdict, "backing_file",
1929 qstring_from_str(bs->backing_file));
1932 qdict_put_obj(bs_dict, "inserted", obj);
1934 qlist_append_obj(bs_list, bs_obj);
1937 *ret_data = QOBJECT(bs_list);
1940 static void bdrv_stats_iter(QObject *data, void *opaque)
1943 Monitor *mon = opaque;
1945 qdict = qobject_to_qdict(data);
1946 monitor_printf(mon, "%s:", qdict_get_str(qdict, "device"));
1948 qdict = qobject_to_qdict(qdict_get(qdict, "stats"));
1949 monitor_printf(mon, " rd_bytes=%" PRId64
1950 " wr_bytes=%" PRId64
1951 " rd_operations=%" PRId64
1952 " wr_operations=%" PRId64
1953 " flush_operations=%" PRId64
1954 " wr_total_time_ns=%" PRId64
1955 " rd_total_time_ns=%" PRId64
1956 " flush_total_time_ns=%" PRId64
1958 qdict_get_int(qdict, "rd_bytes"),
1959 qdict_get_int(qdict, "wr_bytes"),
1960 qdict_get_int(qdict, "rd_operations"),
1961 qdict_get_int(qdict, "wr_operations"),
1962 qdict_get_int(qdict, "flush_operations"),
1963 qdict_get_int(qdict, "wr_total_time_ns"),
1964 qdict_get_int(qdict, "rd_total_time_ns"),
1965 qdict_get_int(qdict, "flush_total_time_ns"));
1968 void bdrv_stats_print(Monitor *mon, const QObject *data)
1970 qlist_iter(qobject_to_qlist(data), bdrv_stats_iter, mon);
1973 static QObject* bdrv_info_stats_bs(BlockDriverState *bs)
1978 res = qobject_from_jsonf("{ 'stats': {"
1979 "'rd_bytes': %" PRId64 ","
1980 "'wr_bytes': %" PRId64 ","
1981 "'rd_operations': %" PRId64 ","
1982 "'wr_operations': %" PRId64 ","
1983 "'wr_highest_offset': %" PRId64 ","
1984 "'flush_operations': %" PRId64 ","
1985 "'wr_total_time_ns': %" PRId64 ","
1986 "'rd_total_time_ns': %" PRId64 ","
1987 "'flush_total_time_ns': %" PRId64
1989 bs->nr_bytes[BDRV_ACCT_READ],
1990 bs->nr_bytes[BDRV_ACCT_WRITE],
1991 bs->nr_ops[BDRV_ACCT_READ],
1992 bs->nr_ops[BDRV_ACCT_WRITE],
1993 bs->wr_highest_sector *
1994 (uint64_t)BDRV_SECTOR_SIZE,
1995 bs->nr_ops[BDRV_ACCT_FLUSH],
1996 bs->total_time_ns[BDRV_ACCT_WRITE],
1997 bs->total_time_ns[BDRV_ACCT_READ],
1998 bs->total_time_ns[BDRV_ACCT_FLUSH]);
1999 dict = qobject_to_qdict(res);
2001 if (*bs->device_name) {
2002 qdict_put(dict, "device", qstring_from_str(bs->device_name));
2006 QObject *parent = bdrv_info_stats_bs(bs->file);
2007 qdict_put_obj(dict, "parent", parent);
2013 void bdrv_info_stats(Monitor *mon, QObject **ret_data)
2017 BlockDriverState *bs;
2019 devices = qlist_new();
2021 QTAILQ_FOREACH(bs, &bdrv_states, list) {
2022 obj = bdrv_info_stats_bs(bs);
2023 qlist_append_obj(devices, obj);
2026 *ret_data = QOBJECT(devices);
2029 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2031 if (bs->backing_hd && bs->backing_hd->encrypted)
2032 return bs->backing_file;
2033 else if (bs->encrypted)
2034 return bs->filename;
2039 void bdrv_get_backing_filename(BlockDriverState *bs,
2040 char *filename, int filename_size)
2042 if (!bs->backing_file) {
2043 pstrcpy(filename, filename_size, "");
2045 pstrcpy(filename, filename_size, bs->backing_file);
2049 int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
2050 const uint8_t *buf, int nb_sectors)
2052 BlockDriver *drv = bs->drv;
2055 if (!drv->bdrv_write_compressed)
2057 if (bdrv_check_request(bs, sector_num, nb_sectors))
2060 if (bs->dirty_bitmap) {
2061 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
2064 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
2067 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2069 BlockDriver *drv = bs->drv;
2072 if (!drv->bdrv_get_info)
2074 memset(bdi, 0, sizeof(*bdi));
2075 return drv->bdrv_get_info(bs, bdi);
2078 int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2079 int64_t pos, int size)
2081 BlockDriver *drv = bs->drv;
2084 if (drv->bdrv_save_vmstate)
2085 return drv->bdrv_save_vmstate(bs, buf, pos, size);
2087 return bdrv_save_vmstate(bs->file, buf, pos, size);
2091 int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2092 int64_t pos, int size)
2094 BlockDriver *drv = bs->drv;
2097 if (drv->bdrv_load_vmstate)
2098 return drv->bdrv_load_vmstate(bs, buf, pos, size);
2100 return bdrv_load_vmstate(bs->file, buf, pos, size);
2104 void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2106 BlockDriver *drv = bs->drv;
2108 if (!drv || !drv->bdrv_debug_event) {
2112 return drv->bdrv_debug_event(bs, event);
2116 /**************************************************************/
2117 /* handling of snapshots */
2119 int bdrv_can_snapshot(BlockDriverState *bs)
2121 BlockDriver *drv = bs->drv;
2122 if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
2126 if (!drv->bdrv_snapshot_create) {
2127 if (bs->file != NULL) {
2128 return bdrv_can_snapshot(bs->file);
2136 int bdrv_is_snapshot(BlockDriverState *bs)
2138 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2141 BlockDriverState *bdrv_snapshots(void)
2143 BlockDriverState *bs;
2146 return bs_snapshots;
2150 while ((bs = bdrv_next(bs))) {
2151 if (bdrv_can_snapshot(bs)) {
2159 int bdrv_snapshot_create(BlockDriverState *bs,
2160 QEMUSnapshotInfo *sn_info)
2162 BlockDriver *drv = bs->drv;
2165 if (drv->bdrv_snapshot_create)
2166 return drv->bdrv_snapshot_create(bs, sn_info);
2168 return bdrv_snapshot_create(bs->file, sn_info);
2172 int bdrv_snapshot_goto(BlockDriverState *bs,
2173 const char *snapshot_id)
2175 BlockDriver *drv = bs->drv;
2180 if (drv->bdrv_snapshot_goto)
2181 return drv->bdrv_snapshot_goto(bs, snapshot_id);
2184 drv->bdrv_close(bs);
2185 ret = bdrv_snapshot_goto(bs->file, snapshot_id);
2186 open_ret = drv->bdrv_open(bs, bs->open_flags);
2188 bdrv_delete(bs->file);
2198 int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
2200 BlockDriver *drv = bs->drv;
2203 if (drv->bdrv_snapshot_delete)
2204 return drv->bdrv_snapshot_delete(bs, snapshot_id);
2206 return bdrv_snapshot_delete(bs->file, snapshot_id);
2210 int bdrv_snapshot_list(BlockDriverState *bs,
2211 QEMUSnapshotInfo **psn_info)
2213 BlockDriver *drv = bs->drv;
2216 if (drv->bdrv_snapshot_list)
2217 return drv->bdrv_snapshot_list(bs, psn_info);
2219 return bdrv_snapshot_list(bs->file, psn_info);
2223 int bdrv_snapshot_load_tmp(BlockDriverState *bs,
2224 const char *snapshot_name)
2226 BlockDriver *drv = bs->drv;
2230 if (!bs->read_only) {
2233 if (drv->bdrv_snapshot_load_tmp) {
2234 return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
2239 #define NB_SUFFIXES 4
2241 char *get_human_readable_size(char *buf, int buf_size, int64_t size)
2243 static const char suffixes[NB_SUFFIXES] = "KMGT";
2248 snprintf(buf, buf_size, "%" PRId64, size);
2251 for(i = 0; i < NB_SUFFIXES; i++) {
2252 if (size < (10 * base)) {
2253 snprintf(buf, buf_size, "%0.1f%c",
2254 (double)size / base,
2257 } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
2258 snprintf(buf, buf_size, "%" PRId64 "%c",
2259 ((size + (base >> 1)) / base),
2269 char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
2271 char buf1[128], date_buf[128], clock_buf[128];
2281 snprintf(buf, buf_size,
2282 "%-10s%-20s%7s%20s%15s",
2283 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2287 ptm = localtime(&ti);
2288 strftime(date_buf, sizeof(date_buf),
2289 "%Y-%m-%d %H:%M:%S", ptm);
2291 localtime_r(&ti, &tm);
2292 strftime(date_buf, sizeof(date_buf),
2293 "%Y-%m-%d %H:%M:%S", &tm);
2295 secs = sn->vm_clock_nsec / 1000000000;
2296 snprintf(clock_buf, sizeof(clock_buf),
2297 "%02d:%02d:%02d.%03d",
2299 (int)((secs / 60) % 60),
2301 (int)((sn->vm_clock_nsec / 1000000) % 1000));
2302 snprintf(buf, buf_size,
2303 "%-10s%-20s%7s%20s%15s",
2304 sn->id_str, sn->name,
2305 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
2312 /**************************************************************/
2315 BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
2316 QEMUIOVector *qiov, int nb_sectors,
2317 BlockDriverCompletionFunc *cb, void *opaque)
2319 BlockDriver *drv = bs->drv;
2321 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
2325 if (bdrv_check_request(bs, sector_num, nb_sectors))
2328 return drv->bdrv_aio_readv(bs, sector_num, qiov, nb_sectors,
2332 typedef struct BlockCompleteData {
2333 BlockDriverCompletionFunc *cb;
2335 BlockDriverState *bs;
2338 } BlockCompleteData;
2340 static void block_complete_cb(void *opaque, int ret)
2342 BlockCompleteData *b = opaque;
2344 if (b->bs->dirty_bitmap) {
2345 set_dirty_bitmap(b->bs, b->sector_num, b->nb_sectors, 1);
2347 b->cb(b->opaque, ret);
2351 static BlockCompleteData *blk_dirty_cb_alloc(BlockDriverState *bs,
2354 BlockDriverCompletionFunc *cb,
2357 BlockCompleteData *blkdata = g_malloc0(sizeof(BlockCompleteData));
2361 blkdata->opaque = opaque;
2362 blkdata->sector_num = sector_num;
2363 blkdata->nb_sectors = nb_sectors;
2368 BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
2369 QEMUIOVector *qiov, int nb_sectors,
2370 BlockDriverCompletionFunc *cb, void *opaque)
2372 BlockDriver *drv = bs->drv;
2373 BlockDriverAIOCB *ret;
2374 BlockCompleteData *blk_cb_data;
2376 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
2382 if (bdrv_check_request(bs, sector_num, nb_sectors))
2385 if (bs->dirty_bitmap) {
2386 blk_cb_data = blk_dirty_cb_alloc(bs, sector_num, nb_sectors, cb,
2388 cb = &block_complete_cb;
2389 opaque = blk_cb_data;
2392 ret = drv->bdrv_aio_writev(bs, sector_num, qiov, nb_sectors,
2396 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
2397 bs->wr_highest_sector = sector_num + nb_sectors - 1;
2405 typedef struct MultiwriteCB {
2410 BlockDriverCompletionFunc *cb;
2412 QEMUIOVector *free_qiov;
2417 static void multiwrite_user_cb(MultiwriteCB *mcb)
2421 for (i = 0; i < mcb->num_callbacks; i++) {
2422 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
2423 if (mcb->callbacks[i].free_qiov) {
2424 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
2426 g_free(mcb->callbacks[i].free_qiov);
2427 qemu_vfree(mcb->callbacks[i].free_buf);
2431 static void multiwrite_cb(void *opaque, int ret)
2433 MultiwriteCB *mcb = opaque;
2435 trace_multiwrite_cb(mcb, ret);
2437 if (ret < 0 && !mcb->error) {
2441 mcb->num_requests--;
2442 if (mcb->num_requests == 0) {
2443 multiwrite_user_cb(mcb);
2448 static int multiwrite_req_compare(const void *a, const void *b)
2450 const BlockRequest *req1 = a, *req2 = b;
2453 * Note that we can't simply subtract req2->sector from req1->sector
2454 * here as that could overflow the return value.
2456 if (req1->sector > req2->sector) {
2458 } else if (req1->sector < req2->sector) {
2466 * Takes a bunch of requests and tries to merge them. Returns the number of
2467 * requests that remain after merging.
2469 static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
2470 int num_reqs, MultiwriteCB *mcb)
2474 // Sort requests by start sector
2475 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
2477 // Check if adjacent requests touch the same clusters. If so, combine them,
2478 // filling up gaps with zero sectors.
2480 for (i = 1; i < num_reqs; i++) {
2482 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
2484 // This handles the cases that are valid for all block drivers, namely
2485 // exactly sequential writes and overlapping writes.
2486 if (reqs[i].sector <= oldreq_last) {
2490 // The block driver may decide that it makes sense to combine requests
2491 // even if there is a gap of some sectors between them. In this case,
2492 // the gap is filled with zeros (therefore only applicable for yet
2493 // unused space in format like qcow2).
2494 if (!merge && bs->drv->bdrv_merge_requests) {
2495 merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]);
2498 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
2504 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
2505 qemu_iovec_init(qiov,
2506 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
2508 // Add the first request to the merged one. If the requests are
2509 // overlapping, drop the last sectors of the first request.
2510 size = (reqs[i].sector - reqs[outidx].sector) << 9;
2511 qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
2513 // We might need to add some zeros between the two requests
2514 if (reqs[i].sector > oldreq_last) {
2515 size_t zero_bytes = (reqs[i].sector - oldreq_last) << 9;
2516 uint8_t *buf = qemu_blockalign(bs, zero_bytes);
2517 memset(buf, 0, zero_bytes);
2518 qemu_iovec_add(qiov, buf, zero_bytes);
2519 mcb->callbacks[i].free_buf = buf;
2522 // Add the second request
2523 qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
2525 reqs[outidx].nb_sectors = qiov->size >> 9;
2526 reqs[outidx].qiov = qiov;
2528 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
2531 reqs[outidx].sector = reqs[i].sector;
2532 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
2533 reqs[outidx].qiov = reqs[i].qiov;
2541 * Submit multiple AIO write requests at once.
2543 * On success, the function returns 0 and all requests in the reqs array have
2544 * been submitted. In error case this function returns -1, and any of the
2545 * requests may or may not be submitted yet. In particular, this means that the
2546 * callback will be called for some of the requests, for others it won't. The
2547 * caller must check the error field of the BlockRequest to wait for the right
2548 * callbacks (if error != 0, no callback will be called).
2550 * The implementation may modify the contents of the reqs array, e.g. to merge
2551 * requests. However, the fields opaque and error are left unmodified as they
2552 * are used to signal failure for a single request to the caller.
2554 int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
2556 BlockDriverAIOCB *acb;
2560 /* don't submit writes if we don't have a medium */
2561 if (bs->drv == NULL) {
2562 for (i = 0; i < num_reqs; i++) {
2563 reqs[i].error = -ENOMEDIUM;
2568 if (num_reqs == 0) {
2572 // Create MultiwriteCB structure
2573 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
2574 mcb->num_requests = 0;
2575 mcb->num_callbacks = num_reqs;
2577 for (i = 0; i < num_reqs; i++) {
2578 mcb->callbacks[i].cb = reqs[i].cb;
2579 mcb->callbacks[i].opaque = reqs[i].opaque;
2582 // Check for mergable requests
2583 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
2585 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
2588 * Run the aio requests. As soon as one request can't be submitted
2589 * successfully, fail all requests that are not yet submitted (we must
2590 * return failure for all requests anyway)
2592 * num_requests cannot be set to the right value immediately: If
2593 * bdrv_aio_writev fails for some request, num_requests would be too high
2594 * and therefore multiwrite_cb() would never recognize the multiwrite
2595 * request as completed. We also cannot use the loop variable i to set it
2596 * when the first request fails because the callback may already have been
2597 * called for previously submitted requests. Thus, num_requests must be
2598 * incremented for each request that is submitted.
2600 * The problem that callbacks may be called early also means that we need
2601 * to take care that num_requests doesn't become 0 before all requests are
2602 * submitted - multiwrite_cb() would consider the multiwrite request
2603 * completed. A dummy request that is "completed" by a manual call to
2604 * multiwrite_cb() takes care of this.
2606 mcb->num_requests = 1;
2608 // Run the aio requests
2609 for (i = 0; i < num_reqs; i++) {
2610 mcb->num_requests++;
2611 acb = bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
2612 reqs[i].nb_sectors, multiwrite_cb, mcb);
2615 // We can only fail the whole thing if no request has been
2616 // submitted yet. Otherwise we'll wait for the submitted AIOs to
2617 // complete and report the error in the callback.
2619 trace_bdrv_aio_multiwrite_earlyfail(mcb);
2622 trace_bdrv_aio_multiwrite_latefail(mcb, i);
2623 multiwrite_cb(mcb, -EIO);
2629 /* Complete the dummy request */
2630 multiwrite_cb(mcb, 0);
2635 for (i = 0; i < mcb->num_callbacks; i++) {
2636 reqs[i].error = -EIO;
2642 BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
2643 BlockDriverCompletionFunc *cb, void *opaque)
2645 BlockDriver *drv = bs->drv;
2647 trace_bdrv_aio_flush(bs, opaque);
2649 if (bs->open_flags & BDRV_O_NO_FLUSH) {
2650 return bdrv_aio_noop_em(bs, cb, opaque);
2655 return drv->bdrv_aio_flush(bs, cb, opaque);
2658 void bdrv_aio_cancel(BlockDriverAIOCB *acb)
2660 acb->pool->cancel(acb);
2664 /**************************************************************/
2665 /* async block device emulation */
2667 typedef struct BlockDriverAIOCBSync {
2668 BlockDriverAIOCB common;
2671 /* vector translation state */
2675 } BlockDriverAIOCBSync;
2677 static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
2679 BlockDriverAIOCBSync *acb =
2680 container_of(blockacb, BlockDriverAIOCBSync, common);
2681 qemu_bh_delete(acb->bh);
2683 qemu_aio_release(acb);
2686 static AIOPool bdrv_em_aio_pool = {
2687 .aiocb_size = sizeof(BlockDriverAIOCBSync),
2688 .cancel = bdrv_aio_cancel_em,
2691 static void bdrv_aio_bh_cb(void *opaque)
2693 BlockDriverAIOCBSync *acb = opaque;
2696 qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
2697 qemu_vfree(acb->bounce);
2698 acb->common.cb(acb->common.opaque, acb->ret);
2699 qemu_bh_delete(acb->bh);
2701 qemu_aio_release(acb);
2704 static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
2708 BlockDriverCompletionFunc *cb,
2713 BlockDriverAIOCBSync *acb;
2715 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2716 acb->is_write = is_write;
2718 acb->bounce = qemu_blockalign(bs, qiov->size);
2721 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2724 qemu_iovec_to_buffer(acb->qiov, acb->bounce);
2725 acb->ret = bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
2727 acb->ret = bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
2730 qemu_bh_schedule(acb->bh);
2732 return &acb->common;
2735 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
2736 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2737 BlockDriverCompletionFunc *cb, void *opaque)
2739 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
2742 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
2743 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2744 BlockDriverCompletionFunc *cb, void *opaque)
2746 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
2750 typedef struct BlockDriverAIOCBCoroutine {
2751 BlockDriverAIOCB common;
2755 } BlockDriverAIOCBCoroutine;
2757 static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
2762 static AIOPool bdrv_em_co_aio_pool = {
2763 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
2764 .cancel = bdrv_aio_co_cancel_em,
2767 static void bdrv_co_rw_bh(void *opaque)
2769 BlockDriverAIOCBCoroutine *acb = opaque;
2771 acb->common.cb(acb->common.opaque, acb->req.error);
2772 qemu_bh_delete(acb->bh);
2773 qemu_aio_release(acb);
2776 static void coroutine_fn bdrv_co_rw(void *opaque)
2778 BlockDriverAIOCBCoroutine *acb = opaque;
2779 BlockDriverState *bs = acb->common.bs;
2781 if (!acb->is_write) {
2782 acb->req.error = bs->drv->bdrv_co_readv(bs, acb->req.sector,
2783 acb->req.nb_sectors, acb->req.qiov);
2785 acb->req.error = bs->drv->bdrv_co_writev(bs, acb->req.sector,
2786 acb->req.nb_sectors, acb->req.qiov);
2789 acb->bh = qemu_bh_new(bdrv_co_rw_bh, acb);
2790 qemu_bh_schedule(acb->bh);
2793 static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
2797 BlockDriverCompletionFunc *cb,
2802 BlockDriverAIOCBCoroutine *acb;
2804 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
2805 acb->req.sector = sector_num;
2806 acb->req.nb_sectors = nb_sectors;
2807 acb->req.qiov = qiov;
2808 acb->is_write = is_write;
2810 co = qemu_coroutine_create(bdrv_co_rw);
2811 qemu_coroutine_enter(co, acb);
2813 return &acb->common;
2816 static BlockDriverAIOCB *bdrv_co_aio_readv_em(BlockDriverState *bs,
2817 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2818 BlockDriverCompletionFunc *cb, void *opaque)
2820 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque,
2824 static BlockDriverAIOCB *bdrv_co_aio_writev_em(BlockDriverState *bs,
2825 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2826 BlockDriverCompletionFunc *cb, void *opaque)
2828 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque,
2832 static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
2833 BlockDriverCompletionFunc *cb, void *opaque)
2835 BlockDriverAIOCBSync *acb;
2837 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2838 acb->is_write = 1; /* don't bounce in the completion hadler */
2844 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2847 qemu_bh_schedule(acb->bh);
2848 return &acb->common;
2851 static BlockDriverAIOCB *bdrv_aio_noop_em(BlockDriverState *bs,
2852 BlockDriverCompletionFunc *cb, void *opaque)
2854 BlockDriverAIOCBSync *acb;
2856 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2857 acb->is_write = 1; /* don't bounce in the completion handler */
2863 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2866 qemu_bh_schedule(acb->bh);
2867 return &acb->common;
2870 /**************************************************************/
2871 /* sync block device emulation */
2873 static void bdrv_rw_em_cb(void *opaque, int ret)
2875 *(int *)opaque = ret;
2878 #define NOT_DONE 0x7fffffff
2880 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
2881 uint8_t *buf, int nb_sectors)
2884 BlockDriverAIOCB *acb;
2888 async_ret = NOT_DONE;
2889 iov.iov_base = (void *)buf;
2890 iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
2891 qemu_iovec_init_external(&qiov, &iov, 1);
2892 acb = bdrv_aio_readv(bs, sector_num, &qiov, nb_sectors,
2893 bdrv_rw_em_cb, &async_ret);
2899 while (async_ret == NOT_DONE) {
2908 static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
2909 const uint8_t *buf, int nb_sectors)
2912 BlockDriverAIOCB *acb;
2916 async_ret = NOT_DONE;
2917 iov.iov_base = (void *)buf;
2918 iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
2919 qemu_iovec_init_external(&qiov, &iov, 1);
2920 acb = bdrv_aio_writev(bs, sector_num, &qiov, nb_sectors,
2921 bdrv_rw_em_cb, &async_ret);
2926 while (async_ret == NOT_DONE) {
2934 void bdrv_init(void)
2936 module_call_init(MODULE_INIT_BLOCK);
2939 void bdrv_init_with_whitelist(void)
2941 use_bdrv_whitelist = 1;
2945 void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
2946 BlockDriverCompletionFunc *cb, void *opaque)
2948 BlockDriverAIOCB *acb;
2950 if (pool->free_aiocb) {
2951 acb = pool->free_aiocb;
2952 pool->free_aiocb = acb->next;
2954 acb = g_malloc0(pool->aiocb_size);
2959 acb->opaque = opaque;
2963 void qemu_aio_release(void *p)
2965 BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
2966 AIOPool *pool = acb->pool;
2967 acb->next = pool->free_aiocb;
2968 pool->free_aiocb = acb;
2971 /**************************************************************/
2972 /* Coroutine block device emulation */
2974 typedef struct CoroutineIOCompletion {
2975 Coroutine *coroutine;
2977 } CoroutineIOCompletion;
2979 static void bdrv_co_io_em_complete(void *opaque, int ret)
2981 CoroutineIOCompletion *co = opaque;
2984 qemu_coroutine_enter(co->coroutine, NULL);
2987 static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
2988 int nb_sectors, QEMUIOVector *iov,
2991 CoroutineIOCompletion co = {
2992 .coroutine = qemu_coroutine_self(),
2994 BlockDriverAIOCB *acb;
2997 acb = bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
2998 bdrv_co_io_em_complete, &co);
3000 acb = bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
3001 bdrv_co_io_em_complete, &co);
3004 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
3008 qemu_coroutine_yield();
3013 static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
3014 int64_t sector_num, int nb_sectors,
3017 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
3020 static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
3021 int64_t sector_num, int nb_sectors,
3024 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
3027 static int coroutine_fn bdrv_co_flush_em(BlockDriverState *bs)
3029 CoroutineIOCompletion co = {
3030 .coroutine = qemu_coroutine_self(),
3032 BlockDriverAIOCB *acb;
3034 acb = bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
3038 qemu_coroutine_yield();
3042 /**************************************************************/
3043 /* removable device support */
3046 * Return TRUE if the media is present
3048 int bdrv_is_inserted(BlockDriverState *bs)
3050 BlockDriver *drv = bs->drv;
3054 if (!drv->bdrv_is_inserted)
3056 return drv->bdrv_is_inserted(bs);
3060 * Return whether the media changed since the last call to this
3061 * function, or -ENOTSUP if we don't know. Most drivers don't know.
3063 int bdrv_media_changed(BlockDriverState *bs)
3065 BlockDriver *drv = bs->drv;
3067 if (drv && drv->bdrv_media_changed) {
3068 return drv->bdrv_media_changed(bs);
3074 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3076 void bdrv_eject(BlockDriverState *bs, int eject_flag)
3078 BlockDriver *drv = bs->drv;
3080 if (drv && drv->bdrv_eject) {
3081 drv->bdrv_eject(bs, eject_flag);
3086 * Lock or unlock the media (if it is locked, the user won't be able
3087 * to eject it manually).
3089 void bdrv_lock_medium(BlockDriverState *bs, bool locked)
3091 BlockDriver *drv = bs->drv;
3093 trace_bdrv_lock_medium(bs, locked);
3095 if (drv && drv->bdrv_lock_medium) {
3096 drv->bdrv_lock_medium(bs, locked);
3100 /* needed for generic scsi interface */
3102 int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
3104 BlockDriver *drv = bs->drv;
3106 if (drv && drv->bdrv_ioctl)
3107 return drv->bdrv_ioctl(bs, req, buf);
3111 BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
3112 unsigned long int req, void *buf,
3113 BlockDriverCompletionFunc *cb, void *opaque)
3115 BlockDriver *drv = bs->drv;
3117 if (drv && drv->bdrv_aio_ioctl)
3118 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
3122 void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
3124 bs->buffer_alignment = align;
3127 void *qemu_blockalign(BlockDriverState *bs, size_t size)
3129 return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
3132 void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
3134 int64_t bitmap_size;
3136 bs->dirty_count = 0;
3138 if (!bs->dirty_bitmap) {
3139 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
3140 BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
3141 bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
3143 bs->dirty_bitmap = g_malloc0(bitmap_size);
3146 if (bs->dirty_bitmap) {
3147 g_free(bs->dirty_bitmap);
3148 bs->dirty_bitmap = NULL;
3153 int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
3155 int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
3157 if (bs->dirty_bitmap &&
3158 (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
3159 return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
3160 (1UL << (chunk % (sizeof(unsigned long) * 8))));
3166 void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3169 set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
3172 int64_t bdrv_get_dirty_count(BlockDriverState *bs)
3174 return bs->dirty_count;
3177 void bdrv_set_in_use(BlockDriverState *bs, int in_use)
3179 assert(bs->in_use != in_use);
3180 bs->in_use = in_use;
3183 int bdrv_in_use(BlockDriverState *bs)
3188 void bdrv_iostatus_enable(BlockDriverState *bs)
3190 bs->iostatus = BDRV_IOS_OK;
3193 /* The I/O status is only enabled if the drive explicitly
3194 * enables it _and_ the VM is configured to stop on errors */
3195 bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3197 return (bs->iostatus != BDRV_IOS_INVAL &&
3198 (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC ||
3199 bs->on_write_error == BLOCK_ERR_STOP_ANY ||
3200 bs->on_read_error == BLOCK_ERR_STOP_ANY));
3203 void bdrv_iostatus_disable(BlockDriverState *bs)
3205 bs->iostatus = BDRV_IOS_INVAL;
3208 void bdrv_iostatus_reset(BlockDriverState *bs)
3210 if (bdrv_iostatus_is_enabled(bs)) {
3211 bs->iostatus = BDRV_IOS_OK;
3215 /* XXX: Today this is set by device models because it makes the implementation
3216 quite simple. However, the block layer knows about the error, so it's
3217 possible to implement this without device models being involved */
3218 void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3220 if (bdrv_iostatus_is_enabled(bs) && bs->iostatus == BDRV_IOS_OK) {
3222 bs->iostatus = error == ENOSPC ? BDRV_IOS_ENOSPC : BDRV_IOS_FAILED;
3227 bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
3228 enum BlockAcctType type)
3230 assert(type < BDRV_MAX_IOTYPE);
3232 cookie->bytes = bytes;
3233 cookie->start_time_ns = get_clock();
3234 cookie->type = type;
3238 bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
3240 assert(cookie->type < BDRV_MAX_IOTYPE);
3242 bs->nr_bytes[cookie->type] += cookie->bytes;
3243 bs->nr_ops[cookie->type]++;
3244 bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
3247 int bdrv_img_create(const char *filename, const char *fmt,
3248 const char *base_filename, const char *base_fmt,
3249 char *options, uint64_t img_size, int flags)
3251 QEMUOptionParameter *param = NULL, *create_options = NULL;
3252 QEMUOptionParameter *backing_fmt, *backing_file, *size;
3253 BlockDriverState *bs = NULL;
3254 BlockDriver *drv, *proto_drv;
3255 BlockDriver *backing_drv = NULL;
3258 /* Find driver and parse its options */
3259 drv = bdrv_find_format(fmt);
3261 error_report("Unknown file format '%s'", fmt);
3266 proto_drv = bdrv_find_protocol(filename);
3268 error_report("Unknown protocol '%s'", filename);
3273 create_options = append_option_parameters(create_options,
3274 drv->create_options);
3275 create_options = append_option_parameters(create_options,
3276 proto_drv->create_options);
3278 /* Create parameter list with default values */
3279 param = parse_option_parameters("", create_options, param);
3281 set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
3283 /* Parse -o options */
3285 param = parse_option_parameters(options, create_options, param);
3286 if (param == NULL) {
3287 error_report("Invalid options for file format '%s'.", fmt);
3293 if (base_filename) {
3294 if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
3296 error_report("Backing file not supported for file format '%s'",
3304 if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
3305 error_report("Backing file format not supported for file "
3306 "format '%s'", fmt);
3312 backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
3313 if (backing_file && backing_file->value.s) {
3314 if (!strcmp(filename, backing_file->value.s)) {
3315 error_report("Error: Trying to create an image with the "
3316 "same filename as the backing file");
3322 backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
3323 if (backing_fmt && backing_fmt->value.s) {
3324 backing_drv = bdrv_find_format(backing_fmt->value.s);
3326 error_report("Unknown backing file format '%s'",
3327 backing_fmt->value.s);
3333 // The size for the image must always be specified, with one exception:
3334 // If we are using a backing file, we can obtain the size from there
3335 size = get_option_parameter(param, BLOCK_OPT_SIZE);
3336 if (size && size->value.n == -1) {
3337 if (backing_file && backing_file->value.s) {
3343 ret = bdrv_open(bs, backing_file->value.s, flags, backing_drv);
3345 error_report("Could not open '%s'", backing_file->value.s);
3348 bdrv_get_geometry(bs, &size);
3351 snprintf(buf, sizeof(buf), "%" PRId64, size);
3352 set_option_parameter(param, BLOCK_OPT_SIZE, buf);
3354 error_report("Image creation needs a size parameter");
3360 printf("Formatting '%s', fmt=%s ", filename, fmt);
3361 print_option_parameters(param);
3364 ret = bdrv_create(drv, filename, param);
3367 if (ret == -ENOTSUP) {
3368 error_report("Formatting or formatting option not supported for "
3369 "file format '%s'", fmt);
3370 } else if (ret == -EFBIG) {
3371 error_report("The image size is too large for file format '%s'",
3374 error_report("%s: error while creating %s: %s", filename, fmt,
3380 free_option_parameters(create_options);
3381 free_option_parameters(param);