2 * QEMU System Emulator block driver
4 * Copyright (c) 2003 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 #include "config-host.h"
25 #include "qemu-common.h"
28 #include "block_int.h"
30 #include "qemu-objects.h"
31 #include "qemu-coroutine.h"
34 #include <sys/types.h>
36 #include <sys/ioctl.h>
37 #include <sys/queue.h>
47 static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
48 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
49 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
50 BlockDriverCompletionFunc *cb, void *opaque);
51 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
52 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
53 BlockDriverCompletionFunc *cb, void *opaque);
54 static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
55 BlockDriverCompletionFunc *cb, void *opaque);
56 static BlockDriverAIOCB *bdrv_aio_noop_em(BlockDriverState *bs,
57 BlockDriverCompletionFunc *cb, void *opaque);
58 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
59 uint8_t *buf, int nb_sectors);
60 static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
61 const uint8_t *buf, int nb_sectors);
62 static BlockDriverAIOCB *bdrv_co_aio_readv_em(BlockDriverState *bs,
63 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
64 BlockDriverCompletionFunc *cb, void *opaque);
65 static BlockDriverAIOCB *bdrv_co_aio_writev_em(BlockDriverState *bs,
66 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
67 BlockDriverCompletionFunc *cb, void *opaque);
68 static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
69 int64_t sector_num, int nb_sectors,
71 static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
72 int64_t sector_num, int nb_sectors,
74 static int coroutine_fn bdrv_co_flush_em(BlockDriverState *bs);
76 static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
77 QTAILQ_HEAD_INITIALIZER(bdrv_states);
79 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
80 QLIST_HEAD_INITIALIZER(bdrv_drivers);
82 /* The device to use for VM snapshots */
83 static BlockDriverState *bs_snapshots;
85 /* If non-zero, use only whitelisted block drivers */
86 static int use_bdrv_whitelist;
89 static int is_windows_drive_prefix(const char *filename)
91 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
92 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
96 int is_windows_drive(const char *filename)
98 if (is_windows_drive_prefix(filename) &&
101 if (strstart(filename, "\\\\.\\", NULL) ||
102 strstart(filename, "//./", NULL))
108 /* check if the path starts with "<protocol>:" */
109 static int path_has_protocol(const char *path)
112 if (is_windows_drive(path) ||
113 is_windows_drive_prefix(path)) {
118 return strchr(path, ':') != NULL;
121 int path_is_absolute(const char *path)
125 /* specific case for names like: "\\.\d:" */
126 if (*path == '/' || *path == '\\')
129 p = strchr(path, ':');
135 return (*p == '/' || *p == '\\');
141 /* if filename is absolute, just copy it to dest. Otherwise, build a
142 path to it by considering it is relative to base_path. URL are
144 void path_combine(char *dest, int dest_size,
145 const char *base_path,
146 const char *filename)
153 if (path_is_absolute(filename)) {
154 pstrcpy(dest, dest_size, filename);
156 p = strchr(base_path, ':');
161 p1 = strrchr(base_path, '/');
165 p2 = strrchr(base_path, '\\');
177 if (len > dest_size - 1)
179 memcpy(dest, base_path, len);
181 pstrcat(dest, dest_size, filename);
185 void bdrv_register(BlockDriver *bdrv)
187 if (bdrv->bdrv_co_readv) {
188 /* Emulate AIO by coroutines, and sync by AIO */
189 bdrv->bdrv_aio_readv = bdrv_co_aio_readv_em;
190 bdrv->bdrv_aio_writev = bdrv_co_aio_writev_em;
191 bdrv->bdrv_read = bdrv_read_em;
192 bdrv->bdrv_write = bdrv_write_em;
194 bdrv->bdrv_co_readv = bdrv_co_readv_em;
195 bdrv->bdrv_co_writev = bdrv_co_writev_em;
197 if (!bdrv->bdrv_aio_readv) {
198 /* add AIO emulation layer */
199 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
200 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
201 } else if (!bdrv->bdrv_read) {
202 /* add synchronous IO emulation layer */
203 bdrv->bdrv_read = bdrv_read_em;
204 bdrv->bdrv_write = bdrv_write_em;
208 if (!bdrv->bdrv_aio_flush)
209 bdrv->bdrv_aio_flush = bdrv_aio_flush_em;
211 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
214 /* create a new block device (by default it is empty) */
215 BlockDriverState *bdrv_new(const char *device_name)
217 BlockDriverState *bs;
219 bs = g_malloc0(sizeof(BlockDriverState));
220 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
221 if (device_name[0] != '\0') {
222 QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
224 bdrv_iostatus_disable(bs);
228 BlockDriver *bdrv_find_format(const char *format_name)
231 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
232 if (!strcmp(drv1->format_name, format_name)) {
239 static int bdrv_is_whitelisted(BlockDriver *drv)
241 static const char *whitelist[] = {
242 CONFIG_BDRV_WHITELIST
247 return 1; /* no whitelist, anything goes */
249 for (p = whitelist; *p; p++) {
250 if (!strcmp(drv->format_name, *p)) {
257 BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
259 BlockDriver *drv = bdrv_find_format(format_name);
260 return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
263 int bdrv_create(BlockDriver *drv, const char* filename,
264 QEMUOptionParameter *options)
266 if (!drv->bdrv_create)
269 return drv->bdrv_create(filename, options);
272 int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
276 drv = bdrv_find_protocol(filename);
281 return bdrv_create(drv, filename, options);
285 void get_tmp_filename(char *filename, int size)
287 char temp_dir[MAX_PATH];
289 GetTempPath(MAX_PATH, temp_dir);
290 GetTempFileName(temp_dir, "qem", 0, filename);
293 void get_tmp_filename(char *filename, int size)
297 /* XXX: race condition possible */
298 tmpdir = getenv("TMPDIR");
301 snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
302 fd = mkstemp(filename);
308 * Detect host devices. By convention, /dev/cdrom[N] is always
309 * recognized as a host CDROM.
311 static BlockDriver *find_hdev_driver(const char *filename)
313 int score_max = 0, score;
314 BlockDriver *drv = NULL, *d;
316 QLIST_FOREACH(d, &bdrv_drivers, list) {
317 if (d->bdrv_probe_device) {
318 score = d->bdrv_probe_device(filename);
319 if (score > score_max) {
329 BlockDriver *bdrv_find_protocol(const char *filename)
336 /* TODO Drivers without bdrv_file_open must be specified explicitly */
339 * XXX(hch): we really should not let host device detection
340 * override an explicit protocol specification, but moving this
341 * later breaks access to device names with colons in them.
342 * Thanks to the brain-dead persistent naming schemes on udev-
343 * based Linux systems those actually are quite common.
345 drv1 = find_hdev_driver(filename);
350 if (!path_has_protocol(filename)) {
351 return bdrv_find_format("file");
353 p = strchr(filename, ':');
356 if (len > sizeof(protocol) - 1)
357 len = sizeof(protocol) - 1;
358 memcpy(protocol, filename, len);
359 protocol[len] = '\0';
360 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
361 if (drv1->protocol_name &&
362 !strcmp(drv1->protocol_name, protocol)) {
369 static int find_image_format(const char *filename, BlockDriver **pdrv)
371 int ret, score, score_max;
372 BlockDriver *drv1, *drv;
374 BlockDriverState *bs;
376 ret = bdrv_file_open(&bs, filename, 0);
382 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
383 if (bs->sg || !bdrv_is_inserted(bs)) {
385 drv = bdrv_find_format("raw");
393 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
402 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
403 if (drv1->bdrv_probe) {
404 score = drv1->bdrv_probe(buf, ret, filename);
405 if (score > score_max) {
419 * Set the current 'total_sectors' value
421 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
423 BlockDriver *drv = bs->drv;
425 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
429 /* query actual device if possible, otherwise just trust the hint */
430 if (drv->bdrv_getlength) {
431 int64_t length = drv->bdrv_getlength(bs);
435 hint = length >> BDRV_SECTOR_BITS;
438 bs->total_sectors = hint;
443 * Set open flags for a given cache mode
445 * Return 0 on success, -1 if the cache mode was invalid.
447 int bdrv_parse_cache_flags(const char *mode, int *flags)
449 *flags &= ~BDRV_O_CACHE_MASK;
451 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
452 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
453 } else if (!strcmp(mode, "directsync")) {
454 *flags |= BDRV_O_NOCACHE;
455 } else if (!strcmp(mode, "writeback")) {
456 *flags |= BDRV_O_CACHE_WB;
457 } else if (!strcmp(mode, "unsafe")) {
458 *flags |= BDRV_O_CACHE_WB;
459 *flags |= BDRV_O_NO_FLUSH;
460 } else if (!strcmp(mode, "writethrough")) {
461 /* this is the default */
470 * Common part for opening disk images and files
472 static int bdrv_open_common(BlockDriverState *bs, const char *filename,
473 int flags, BlockDriver *drv)
479 trace_bdrv_open_common(bs, filename, flags, drv->format_name);
482 bs->total_sectors = 0;
485 bs->open_flags = flags;
486 bs->buffer_alignment = 512;
488 pstrcpy(bs->filename, sizeof(bs->filename), filename);
490 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
495 bs->opaque = g_malloc0(drv->instance_size);
497 if (flags & BDRV_O_CACHE_WB)
498 bs->enable_write_cache = 1;
501 * Clear flags that are internal to the block layer before opening the
504 open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
507 * Snapshots should be writable.
509 if (bs->is_temporary) {
510 open_flags |= BDRV_O_RDWR;
513 /* Open the image, either directly or using a protocol */
514 if (drv->bdrv_file_open) {
515 ret = drv->bdrv_file_open(bs, filename, open_flags);
517 ret = bdrv_file_open(&bs->file, filename, open_flags);
519 ret = drv->bdrv_open(bs, open_flags);
527 bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
529 ret = refresh_total_sectors(bs, bs->total_sectors);
535 if (bs->is_temporary) {
543 bdrv_delete(bs->file);
553 * Opens a file using a protocol (file, host_device, nbd, ...)
555 int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
557 BlockDriverState *bs;
561 drv = bdrv_find_protocol(filename);
567 ret = bdrv_open_common(bs, filename, flags, drv);
578 * Opens a disk image (raw, qcow2, vmdk, ...)
580 int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
585 if (flags & BDRV_O_SNAPSHOT) {
586 BlockDriverState *bs1;
589 BlockDriver *bdrv_qcow2;
590 QEMUOptionParameter *options;
591 char tmp_filename[PATH_MAX];
592 char backing_filename[PATH_MAX];
594 /* if snapshot, we create a temporary backing file and open it
595 instead of opening 'filename' directly */
597 /* if there is a backing file, use it */
599 ret = bdrv_open(bs1, filename, 0, drv);
604 total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
606 if (bs1->drv && bs1->drv->protocol_name)
611 get_tmp_filename(tmp_filename, sizeof(tmp_filename));
613 /* Real path is meaningless for protocols */
615 snprintf(backing_filename, sizeof(backing_filename),
617 else if (!realpath(filename, backing_filename))
620 bdrv_qcow2 = bdrv_find_format("qcow2");
621 options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
623 set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
624 set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
626 set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
630 ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
631 free_option_parameters(options);
636 filename = tmp_filename;
638 bs->is_temporary = 1;
641 /* Find the right image format driver */
643 ret = find_image_format(filename, &drv);
647 goto unlink_and_fail;
651 ret = bdrv_open_common(bs, filename, flags, drv);
653 goto unlink_and_fail;
656 /* If there is a backing file, use it */
657 if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
658 char backing_filename[PATH_MAX];
660 BlockDriver *back_drv = NULL;
662 bs->backing_hd = bdrv_new("");
664 if (path_has_protocol(bs->backing_file)) {
665 pstrcpy(backing_filename, sizeof(backing_filename),
668 path_combine(backing_filename, sizeof(backing_filename),
669 filename, bs->backing_file);
672 if (bs->backing_format[0] != '\0') {
673 back_drv = bdrv_find_format(bs->backing_format);
676 /* backing files always opened read-only */
678 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
680 ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
685 if (bs->is_temporary) {
686 bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
688 /* base image inherits from "parent" */
689 bs->backing_hd->keep_read_only = bs->keep_read_only;
693 if (!bdrv_key_required(bs)) {
694 bdrv_dev_change_media_cb(bs, true);
700 if (bs->is_temporary) {
706 void bdrv_close(BlockDriverState *bs)
709 if (bs == bs_snapshots) {
712 if (bs->backing_hd) {
713 bdrv_delete(bs->backing_hd);
714 bs->backing_hd = NULL;
716 bs->drv->bdrv_close(bs);
719 if (bs->is_temporary) {
720 unlink(bs->filename);
726 if (bs->file != NULL) {
727 bdrv_close(bs->file);
730 bdrv_dev_change_media_cb(bs, false);
734 void bdrv_close_all(void)
736 BlockDriverState *bs;
738 QTAILQ_FOREACH(bs, &bdrv_states, list) {
743 /* make a BlockDriverState anonymous by removing from bdrv_state list.
744 Also, NULL terminate the device_name to prevent double remove */
745 void bdrv_make_anon(BlockDriverState *bs)
747 if (bs->device_name[0] != '\0') {
748 QTAILQ_REMOVE(&bdrv_states, bs, list);
750 bs->device_name[0] = '\0';
753 void bdrv_delete(BlockDriverState *bs)
757 /* remove from list, if necessary */
761 if (bs->file != NULL) {
762 bdrv_delete(bs->file);
765 assert(bs != bs_snapshots);
769 int bdrv_attach_dev(BlockDriverState *bs, void *dev)
770 /* TODO change to DeviceState *dev when all users are qdevified */
776 bdrv_iostatus_reset(bs);
780 /* TODO qdevified devices don't use this, remove when devices are qdevified */
781 void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
783 if (bdrv_attach_dev(bs, dev) < 0) {
788 void bdrv_detach_dev(BlockDriverState *bs, void *dev)
789 /* TODO change to DeviceState *dev when all users are qdevified */
791 assert(bs->dev == dev);
794 bs->dev_opaque = NULL;
795 bs->buffer_alignment = 512;
798 /* TODO change to return DeviceState * when all users are qdevified */
799 void *bdrv_get_attached_dev(BlockDriverState *bs)
804 void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
808 bs->dev_opaque = opaque;
809 if (bdrv_dev_has_removable_media(bs) && bs == bs_snapshots) {
814 static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
816 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
817 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
821 bool bdrv_dev_has_removable_media(BlockDriverState *bs)
823 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
826 bool bdrv_dev_is_tray_open(BlockDriverState *bs)
828 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
829 return bs->dev_ops->is_tray_open(bs->dev_opaque);
834 static void bdrv_dev_resize_cb(BlockDriverState *bs)
836 if (bs->dev_ops && bs->dev_ops->resize_cb) {
837 bs->dev_ops->resize_cb(bs->dev_opaque);
841 bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
843 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
844 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
850 * Run consistency checks on an image
852 * Returns 0 if the check could be completed (it doesn't mean that the image is
853 * free of errors) or -errno when an internal error occurred. The results of the
854 * check are stored in res.
856 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res)
858 if (bs->drv->bdrv_check == NULL) {
862 memset(res, 0, sizeof(*res));
863 return bs->drv->bdrv_check(bs, res);
866 #define COMMIT_BUF_SECTORS 2048
868 /* commit COW file into the raw image */
869 int bdrv_commit(BlockDriverState *bs)
871 BlockDriver *drv = bs->drv;
872 BlockDriver *backing_drv;
873 int64_t sector, total_sectors;
874 int n, ro, open_flags;
875 int ret = 0, rw_ret = 0;
878 BlockDriverState *bs_rw, *bs_ro;
883 if (!bs->backing_hd) {
887 if (bs->backing_hd->keep_read_only) {
891 backing_drv = bs->backing_hd->drv;
892 ro = bs->backing_hd->read_only;
893 strncpy(filename, bs->backing_hd->filename, sizeof(filename));
894 open_flags = bs->backing_hd->open_flags;
898 bdrv_delete(bs->backing_hd);
899 bs->backing_hd = NULL;
900 bs_rw = bdrv_new("");
901 rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
905 /* try to re-open read-only */
906 bs_ro = bdrv_new("");
907 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
911 /* drive not functional anymore */
915 bs->backing_hd = bs_ro;
918 bs->backing_hd = bs_rw;
921 total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
922 buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
924 for (sector = 0; sector < total_sectors; sector += n) {
925 if (drv->bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
927 if (bdrv_read(bs, sector, buf, n) != 0) {
932 if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
939 if (drv->bdrv_make_empty) {
940 ret = drv->bdrv_make_empty(bs);
945 * Make sure all data we wrote to the backing device is actually
949 bdrv_flush(bs->backing_hd);
956 bdrv_delete(bs->backing_hd);
957 bs->backing_hd = NULL;
958 bs_ro = bdrv_new("");
959 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
963 /* drive not functional anymore */
967 bs->backing_hd = bs_ro;
968 bs->backing_hd->keep_read_only = 0;
974 void bdrv_commit_all(void)
976 BlockDriverState *bs;
978 QTAILQ_FOREACH(bs, &bdrv_states, list) {
986 * -EINVAL - backing format specified, but no file
987 * -ENOSPC - can't update the backing file because no space is left in the
989 * -ENOTSUP - format driver doesn't support changing the backing file
991 int bdrv_change_backing_file(BlockDriverState *bs,
992 const char *backing_file, const char *backing_fmt)
994 BlockDriver *drv = bs->drv;
996 if (drv->bdrv_change_backing_file != NULL) {
997 return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
1003 static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
1008 if (!bdrv_is_inserted(bs))
1014 len = bdrv_getlength(bs);
1019 if ((offset > len) || (len - offset < size))
1025 static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
1028 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
1029 nb_sectors * BDRV_SECTOR_SIZE);
1032 static inline bool bdrv_has_async_rw(BlockDriver *drv)
1034 return drv->bdrv_co_readv != bdrv_co_readv_em
1035 || drv->bdrv_aio_readv != bdrv_aio_readv_em;
1038 static inline bool bdrv_has_async_flush(BlockDriver *drv)
1040 return drv->bdrv_aio_flush != bdrv_aio_flush_em;
1043 /* return < 0 if error. See bdrv_write() for the return codes */
1044 int bdrv_read(BlockDriverState *bs, int64_t sector_num,
1045 uint8_t *buf, int nb_sectors)
1047 BlockDriver *drv = bs->drv;
1052 if (bdrv_has_async_rw(drv) && qemu_in_coroutine()) {
1054 struct iovec iov = {
1055 .iov_base = (void *)buf,
1056 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1059 qemu_iovec_init_external(&qiov, &iov, 1);
1060 return bdrv_co_readv(bs, sector_num, nb_sectors, &qiov);
1063 if (bdrv_check_request(bs, sector_num, nb_sectors))
1066 return drv->bdrv_read(bs, sector_num, buf, nb_sectors);
1069 static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
1070 int nb_sectors, int dirty)
1073 unsigned long val, idx, bit;
1075 start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
1076 end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
1078 for (; start <= end; start++) {
1079 idx = start / (sizeof(unsigned long) * 8);
1080 bit = start % (sizeof(unsigned long) * 8);
1081 val = bs->dirty_bitmap[idx];
1083 if (!(val & (1UL << bit))) {
1088 if (val & (1UL << bit)) {
1090 val &= ~(1UL << bit);
1093 bs->dirty_bitmap[idx] = val;
1097 /* Return < 0 if error. Important errors are:
1098 -EIO generic I/O error (may happen for all errors)
1099 -ENOMEDIUM No media inserted.
1100 -EINVAL Invalid sector number or nb_sectors
1101 -EACCES Trying to write a read-only device
1103 int bdrv_write(BlockDriverState *bs, int64_t sector_num,
1104 const uint8_t *buf, int nb_sectors)
1106 BlockDriver *drv = bs->drv;
1111 if (bdrv_has_async_rw(drv) && qemu_in_coroutine()) {
1113 struct iovec iov = {
1114 .iov_base = (void *)buf,
1115 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1118 qemu_iovec_init_external(&qiov, &iov, 1);
1119 return bdrv_co_writev(bs, sector_num, nb_sectors, &qiov);
1124 if (bdrv_check_request(bs, sector_num, nb_sectors))
1127 if (bs->dirty_bitmap) {
1128 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1131 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1132 bs->wr_highest_sector = sector_num + nb_sectors - 1;
1135 return drv->bdrv_write(bs, sector_num, buf, nb_sectors);
1138 int bdrv_pread(BlockDriverState *bs, int64_t offset,
1139 void *buf, int count1)
1141 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
1142 int len, nb_sectors, count;
1147 /* first read to align to sector start */
1148 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
1151 sector_num = offset >> BDRV_SECTOR_BITS;
1153 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1155 memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
1163 /* read the sectors "in place" */
1164 nb_sectors = count >> BDRV_SECTOR_BITS;
1165 if (nb_sectors > 0) {
1166 if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
1168 sector_num += nb_sectors;
1169 len = nb_sectors << BDRV_SECTOR_BITS;
1174 /* add data from the last sector */
1176 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1178 memcpy(buf, tmp_buf, count);
1183 int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
1184 const void *buf, int count1)
1186 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
1187 int len, nb_sectors, count;
1192 /* first write to align to sector start */
1193 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
1196 sector_num = offset >> BDRV_SECTOR_BITS;
1198 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1200 memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
1201 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1210 /* write the sectors "in place" */
1211 nb_sectors = count >> BDRV_SECTOR_BITS;
1212 if (nb_sectors > 0) {
1213 if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
1215 sector_num += nb_sectors;
1216 len = nb_sectors << BDRV_SECTOR_BITS;
1221 /* add data from the last sector */
1223 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1225 memcpy(tmp_buf, buf, count);
1226 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1233 * Writes to the file and ensures that no writes are reordered across this
1234 * request (acts as a barrier)
1236 * Returns 0 on success, -errno in error cases.
1238 int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
1239 const void *buf, int count)
1243 ret = bdrv_pwrite(bs, offset, buf, count);
1248 /* No flush needed for cache modes that use O_DSYNC */
1249 if ((bs->open_flags & BDRV_O_CACHE_WB) != 0) {
1256 int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
1257 int nb_sectors, QEMUIOVector *qiov)
1259 BlockDriver *drv = bs->drv;
1261 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
1266 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1270 return drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
1273 int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
1274 int nb_sectors, QEMUIOVector *qiov)
1276 BlockDriver *drv = bs->drv;
1278 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
1283 if (bs->read_only) {
1286 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1290 if (bs->dirty_bitmap) {
1291 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1294 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1295 bs->wr_highest_sector = sector_num + nb_sectors - 1;
1298 return drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
1302 * Truncate file to 'offset' bytes (needed only for file protocols)
1304 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
1306 BlockDriver *drv = bs->drv;
1310 if (!drv->bdrv_truncate)
1314 if (bdrv_in_use(bs))
1316 ret = drv->bdrv_truncate(bs, offset);
1318 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
1319 bdrv_dev_resize_cb(bs);
1325 * Length of a allocated file in bytes. Sparse files are counted by actual
1326 * allocated space. Return < 0 if error or unknown.
1328 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
1330 BlockDriver *drv = bs->drv;
1334 if (drv->bdrv_get_allocated_file_size) {
1335 return drv->bdrv_get_allocated_file_size(bs);
1338 return bdrv_get_allocated_file_size(bs->file);
1344 * Length of a file in bytes. Return < 0 if error or unknown.
1346 int64_t bdrv_getlength(BlockDriverState *bs)
1348 BlockDriver *drv = bs->drv;
1352 if (bs->growable || bdrv_dev_has_removable_media(bs)) {
1353 if (drv->bdrv_getlength) {
1354 return drv->bdrv_getlength(bs);
1357 return bs->total_sectors * BDRV_SECTOR_SIZE;
1360 /* return 0 as number of sectors if no device present or error */
1361 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
1364 length = bdrv_getlength(bs);
1368 length = length >> BDRV_SECTOR_BITS;
1369 *nb_sectors_ptr = length;
1373 uint8_t boot_ind; /* 0x80 - active */
1374 uint8_t head; /* starting head */
1375 uint8_t sector; /* starting sector */
1376 uint8_t cyl; /* starting cylinder */
1377 uint8_t sys_ind; /* What partition type */
1378 uint8_t end_head; /* end head */
1379 uint8_t end_sector; /* end sector */
1380 uint8_t end_cyl; /* end cylinder */
1381 uint32_t start_sect; /* starting sector counting from 0 */
1382 uint32_t nr_sects; /* nr of sectors in partition */
1385 /* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
1386 static int guess_disk_lchs(BlockDriverState *bs,
1387 int *pcylinders, int *pheads, int *psectors)
1389 uint8_t buf[BDRV_SECTOR_SIZE];
1390 int ret, i, heads, sectors, cylinders;
1391 struct partition *p;
1393 uint64_t nb_sectors;
1395 bdrv_get_geometry(bs, &nb_sectors);
1397 ret = bdrv_read(bs, 0, buf, 1);
1400 /* test msdos magic */
1401 if (buf[510] != 0x55 || buf[511] != 0xaa)
1403 for(i = 0; i < 4; i++) {
1404 p = ((struct partition *)(buf + 0x1be)) + i;
1405 nr_sects = le32_to_cpu(p->nr_sects);
1406 if (nr_sects && p->end_head) {
1407 /* We make the assumption that the partition terminates on
1408 a cylinder boundary */
1409 heads = p->end_head + 1;
1410 sectors = p->end_sector & 63;
1413 cylinders = nb_sectors / (heads * sectors);
1414 if (cylinders < 1 || cylinders > 16383)
1417 *psectors = sectors;
1418 *pcylinders = cylinders;
1420 printf("guessed geometry: LCHS=%d %d %d\n",
1421 cylinders, heads, sectors);
1429 void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
1431 int translation, lba_detected = 0;
1432 int cylinders, heads, secs;
1433 uint64_t nb_sectors;
1435 /* if a geometry hint is available, use it */
1436 bdrv_get_geometry(bs, &nb_sectors);
1437 bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
1438 translation = bdrv_get_translation_hint(bs);
1439 if (cylinders != 0) {
1444 if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
1446 /* if heads > 16, it means that a BIOS LBA
1447 translation was active, so the default
1448 hardware geometry is OK */
1450 goto default_geometry;
1455 /* disable any translation to be in sync with
1456 the logical geometry */
1457 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
1458 bdrv_set_translation_hint(bs,
1459 BIOS_ATA_TRANSLATION_NONE);
1464 /* if no geometry, use a standard physical disk geometry */
1465 cylinders = nb_sectors / (16 * 63);
1467 if (cylinders > 16383)
1469 else if (cylinders < 2)
1474 if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
1475 if ((*pcyls * *pheads) <= 131072) {
1476 bdrv_set_translation_hint(bs,
1477 BIOS_ATA_TRANSLATION_LARGE);
1479 bdrv_set_translation_hint(bs,
1480 BIOS_ATA_TRANSLATION_LBA);
1484 bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
1488 void bdrv_set_geometry_hint(BlockDriverState *bs,
1489 int cyls, int heads, int secs)
1496 void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
1498 bs->translation = translation;
1501 void bdrv_get_geometry_hint(BlockDriverState *bs,
1502 int *pcyls, int *pheads, int *psecs)
1505 *pheads = bs->heads;
1509 /* Recognize floppy formats */
1510 typedef struct FDFormat {
1517 static const FDFormat fd_formats[] = {
1518 /* First entry is default format */
1519 /* 1.44 MB 3"1/2 floppy disks */
1520 { FDRIVE_DRV_144, 18, 80, 1, },
1521 { FDRIVE_DRV_144, 20, 80, 1, },
1522 { FDRIVE_DRV_144, 21, 80, 1, },
1523 { FDRIVE_DRV_144, 21, 82, 1, },
1524 { FDRIVE_DRV_144, 21, 83, 1, },
1525 { FDRIVE_DRV_144, 22, 80, 1, },
1526 { FDRIVE_DRV_144, 23, 80, 1, },
1527 { FDRIVE_DRV_144, 24, 80, 1, },
1528 /* 2.88 MB 3"1/2 floppy disks */
1529 { FDRIVE_DRV_288, 36, 80, 1, },
1530 { FDRIVE_DRV_288, 39, 80, 1, },
1531 { FDRIVE_DRV_288, 40, 80, 1, },
1532 { FDRIVE_DRV_288, 44, 80, 1, },
1533 { FDRIVE_DRV_288, 48, 80, 1, },
1534 /* 720 kB 3"1/2 floppy disks */
1535 { FDRIVE_DRV_144, 9, 80, 1, },
1536 { FDRIVE_DRV_144, 10, 80, 1, },
1537 { FDRIVE_DRV_144, 10, 82, 1, },
1538 { FDRIVE_DRV_144, 10, 83, 1, },
1539 { FDRIVE_DRV_144, 13, 80, 1, },
1540 { FDRIVE_DRV_144, 14, 80, 1, },
1541 /* 1.2 MB 5"1/4 floppy disks */
1542 { FDRIVE_DRV_120, 15, 80, 1, },
1543 { FDRIVE_DRV_120, 18, 80, 1, },
1544 { FDRIVE_DRV_120, 18, 82, 1, },
1545 { FDRIVE_DRV_120, 18, 83, 1, },
1546 { FDRIVE_DRV_120, 20, 80, 1, },
1547 /* 720 kB 5"1/4 floppy disks */
1548 { FDRIVE_DRV_120, 9, 80, 1, },
1549 { FDRIVE_DRV_120, 11, 80, 1, },
1550 /* 360 kB 5"1/4 floppy disks */
1551 { FDRIVE_DRV_120, 9, 40, 1, },
1552 { FDRIVE_DRV_120, 9, 40, 0, },
1553 { FDRIVE_DRV_120, 10, 41, 1, },
1554 { FDRIVE_DRV_120, 10, 42, 1, },
1555 /* 320 kB 5"1/4 floppy disks */
1556 { FDRIVE_DRV_120, 8, 40, 1, },
1557 { FDRIVE_DRV_120, 8, 40, 0, },
1558 /* 360 kB must match 5"1/4 better than 3"1/2... */
1559 { FDRIVE_DRV_144, 9, 80, 0, },
1561 { FDRIVE_DRV_NONE, -1, -1, 0, },
1564 void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads,
1565 int *max_track, int *last_sect,
1566 FDriveType drive_in, FDriveType *drive)
1568 const FDFormat *parse;
1569 uint64_t nb_sectors, size;
1570 int i, first_match, match;
1572 bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect);
1573 if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) {
1574 /* User defined disk */
1576 bdrv_get_geometry(bs, &nb_sectors);
1579 for (i = 0; ; i++) {
1580 parse = &fd_formats[i];
1581 if (parse->drive == FDRIVE_DRV_NONE) {
1584 if (drive_in == parse->drive ||
1585 drive_in == FDRIVE_DRV_NONE) {
1586 size = (parse->max_head + 1) * parse->max_track *
1588 if (nb_sectors == size) {
1592 if (first_match == -1) {
1598 if (first_match == -1) {
1601 match = first_match;
1603 parse = &fd_formats[match];
1605 *nb_heads = parse->max_head + 1;
1606 *max_track = parse->max_track;
1607 *last_sect = parse->last_sect;
1608 *drive = parse->drive;
1612 int bdrv_get_translation_hint(BlockDriverState *bs)
1614 return bs->translation;
1617 void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
1618 BlockErrorAction on_write_error)
1620 bs->on_read_error = on_read_error;
1621 bs->on_write_error = on_write_error;
1624 BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
1626 return is_read ? bs->on_read_error : bs->on_write_error;
1629 int bdrv_is_read_only(BlockDriverState *bs)
1631 return bs->read_only;
1634 int bdrv_is_sg(BlockDriverState *bs)
1639 int bdrv_enable_write_cache(BlockDriverState *bs)
1641 return bs->enable_write_cache;
1644 int bdrv_is_encrypted(BlockDriverState *bs)
1646 if (bs->backing_hd && bs->backing_hd->encrypted)
1648 return bs->encrypted;
1651 int bdrv_key_required(BlockDriverState *bs)
1653 BlockDriverState *backing_hd = bs->backing_hd;
1655 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
1657 return (bs->encrypted && !bs->valid_key);
1660 int bdrv_set_key(BlockDriverState *bs, const char *key)
1663 if (bs->backing_hd && bs->backing_hd->encrypted) {
1664 ret = bdrv_set_key(bs->backing_hd, key);
1670 if (!bs->encrypted) {
1672 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
1675 ret = bs->drv->bdrv_set_key(bs, key);
1678 } else if (!bs->valid_key) {
1680 /* call the change callback now, we skipped it on open */
1681 bdrv_dev_change_media_cb(bs, true);
1686 void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
1691 pstrcpy(buf, buf_size, bs->drv->format_name);
1695 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
1700 QLIST_FOREACH(drv, &bdrv_drivers, list) {
1701 it(opaque, drv->format_name);
1705 BlockDriverState *bdrv_find(const char *name)
1707 BlockDriverState *bs;
1709 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1710 if (!strcmp(name, bs->device_name)) {
1717 BlockDriverState *bdrv_next(BlockDriverState *bs)
1720 return QTAILQ_FIRST(&bdrv_states);
1722 return QTAILQ_NEXT(bs, list);
1725 void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
1727 BlockDriverState *bs;
1729 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1734 const char *bdrv_get_device_name(BlockDriverState *bs)
1736 return bs->device_name;
1739 int bdrv_flush(BlockDriverState *bs)
1741 if (bs->open_flags & BDRV_O_NO_FLUSH) {
1745 if (bs->drv && bdrv_has_async_flush(bs->drv) && qemu_in_coroutine()) {
1746 return bdrv_co_flush_em(bs);
1749 if (bs->drv && bs->drv->bdrv_flush) {
1750 return bs->drv->bdrv_flush(bs);
1754 * Some block drivers always operate in either writethrough or unsafe mode
1755 * and don't support bdrv_flush therefore. Usually qemu doesn't know how
1756 * the server works (because the behaviour is hardcoded or depends on
1757 * server-side configuration), so we can't ensure that everything is safe
1758 * on disk. Returning an error doesn't work because that would break guests
1759 * even if the server operates in writethrough mode.
1761 * Let's hope the user knows what he's doing.
1766 void bdrv_flush_all(void)
1768 BlockDriverState *bs;
1770 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1771 if (!bdrv_is_read_only(bs) && bdrv_is_inserted(bs)) {
1777 int bdrv_has_zero_init(BlockDriverState *bs)
1781 if (bs->drv->bdrv_has_zero_init) {
1782 return bs->drv->bdrv_has_zero_init(bs);
1788 int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
1793 if (!bs->drv->bdrv_discard) {
1796 return bs->drv->bdrv_discard(bs, sector_num, nb_sectors);
1800 * Returns true iff the specified sector is present in the disk image. Drivers
1801 * not implementing the functionality are assumed to not support backing files,
1802 * hence all their sectors are reported as allocated.
1804 * 'pnum' is set to the number of sectors (including and immediately following
1805 * the specified sector) that are known to be in the same
1806 * allocated/unallocated state.
1808 * 'nb_sectors' is the max value 'pnum' should be set to.
1810 int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
1814 if (!bs->drv->bdrv_is_allocated) {
1815 if (sector_num >= bs->total_sectors) {
1819 n = bs->total_sectors - sector_num;
1820 *pnum = (n < nb_sectors) ? (n) : (nb_sectors);
1823 return bs->drv->bdrv_is_allocated(bs, sector_num, nb_sectors, pnum);
1826 void bdrv_mon_event(const BlockDriverState *bdrv,
1827 BlockMonEventAction action, int is_read)
1830 const char *action_str;
1833 case BDRV_ACTION_REPORT:
1834 action_str = "report";
1836 case BDRV_ACTION_IGNORE:
1837 action_str = "ignore";
1839 case BDRV_ACTION_STOP:
1840 action_str = "stop";
1846 data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
1849 is_read ? "read" : "write");
1850 monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
1852 qobject_decref(data);
1855 static void bdrv_print_dict(QObject *obj, void *opaque)
1858 Monitor *mon = opaque;
1860 bs_dict = qobject_to_qdict(obj);
1862 monitor_printf(mon, "%s: removable=%d",
1863 qdict_get_str(bs_dict, "device"),
1864 qdict_get_bool(bs_dict, "removable"));
1866 if (qdict_get_bool(bs_dict, "removable")) {
1867 monitor_printf(mon, " locked=%d", qdict_get_bool(bs_dict, "locked"));
1868 monitor_printf(mon, " tray-open=%d",
1869 qdict_get_bool(bs_dict, "tray-open"));
1872 if (qdict_haskey(bs_dict, "io-status")) {
1873 monitor_printf(mon, " io-status=%s", qdict_get_str(bs_dict, "io-status"));
1876 if (qdict_haskey(bs_dict, "inserted")) {
1877 QDict *qdict = qobject_to_qdict(qdict_get(bs_dict, "inserted"));
1879 monitor_printf(mon, " file=");
1880 monitor_print_filename(mon, qdict_get_str(qdict, "file"));
1881 if (qdict_haskey(qdict, "backing_file")) {
1882 monitor_printf(mon, " backing_file=");
1883 monitor_print_filename(mon, qdict_get_str(qdict, "backing_file"));
1885 monitor_printf(mon, " ro=%d drv=%s encrypted=%d",
1886 qdict_get_bool(qdict, "ro"),
1887 qdict_get_str(qdict, "drv"),
1888 qdict_get_bool(qdict, "encrypted"));
1890 monitor_printf(mon, " [not inserted]");
1893 monitor_printf(mon, "\n");
1896 void bdrv_info_print(Monitor *mon, const QObject *data)
1898 qlist_iter(qobject_to_qlist(data), bdrv_print_dict, mon);
1901 static const char *const io_status_name[BDRV_IOS_MAX] = {
1902 [BDRV_IOS_OK] = "ok",
1903 [BDRV_IOS_FAILED] = "failed",
1904 [BDRV_IOS_ENOSPC] = "nospace",
1907 void bdrv_info(Monitor *mon, QObject **ret_data)
1910 BlockDriverState *bs;
1912 bs_list = qlist_new();
1914 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1918 bs_obj = qobject_from_jsonf("{ 'device': %s, 'type': 'unknown', "
1919 "'removable': %i, 'locked': %i }",
1921 bdrv_dev_has_removable_media(bs),
1922 bdrv_dev_is_medium_locked(bs));
1923 bs_dict = qobject_to_qdict(bs_obj);
1925 if (bdrv_dev_has_removable_media(bs)) {
1926 qdict_put(bs_dict, "tray-open",
1927 qbool_from_int(bdrv_dev_is_tray_open(bs)));
1930 if (bdrv_iostatus_is_enabled(bs)) {
1931 qdict_put(bs_dict, "io-status",
1932 qstring_from_str(io_status_name[bs->iostatus]));
1938 obj = qobject_from_jsonf("{ 'file': %s, 'ro': %i, 'drv': %s, "
1939 "'encrypted': %i }",
1940 bs->filename, bs->read_only,
1941 bs->drv->format_name,
1942 bdrv_is_encrypted(bs));
1943 if (bs->backing_file[0] != '\0') {
1944 QDict *qdict = qobject_to_qdict(obj);
1945 qdict_put(qdict, "backing_file",
1946 qstring_from_str(bs->backing_file));
1949 qdict_put_obj(bs_dict, "inserted", obj);
1951 qlist_append_obj(bs_list, bs_obj);
1954 *ret_data = QOBJECT(bs_list);
1957 static void bdrv_stats_iter(QObject *data, void *opaque)
1960 Monitor *mon = opaque;
1962 qdict = qobject_to_qdict(data);
1963 monitor_printf(mon, "%s:", qdict_get_str(qdict, "device"));
1965 qdict = qobject_to_qdict(qdict_get(qdict, "stats"));
1966 monitor_printf(mon, " rd_bytes=%" PRId64
1967 " wr_bytes=%" PRId64
1968 " rd_operations=%" PRId64
1969 " wr_operations=%" PRId64
1970 " flush_operations=%" PRId64
1971 " wr_total_time_ns=%" PRId64
1972 " rd_total_time_ns=%" PRId64
1973 " flush_total_time_ns=%" PRId64
1975 qdict_get_int(qdict, "rd_bytes"),
1976 qdict_get_int(qdict, "wr_bytes"),
1977 qdict_get_int(qdict, "rd_operations"),
1978 qdict_get_int(qdict, "wr_operations"),
1979 qdict_get_int(qdict, "flush_operations"),
1980 qdict_get_int(qdict, "wr_total_time_ns"),
1981 qdict_get_int(qdict, "rd_total_time_ns"),
1982 qdict_get_int(qdict, "flush_total_time_ns"));
1985 void bdrv_stats_print(Monitor *mon, const QObject *data)
1987 qlist_iter(qobject_to_qlist(data), bdrv_stats_iter, mon);
1990 static QObject* bdrv_info_stats_bs(BlockDriverState *bs)
1995 res = qobject_from_jsonf("{ 'stats': {"
1996 "'rd_bytes': %" PRId64 ","
1997 "'wr_bytes': %" PRId64 ","
1998 "'rd_operations': %" PRId64 ","
1999 "'wr_operations': %" PRId64 ","
2000 "'wr_highest_offset': %" PRId64 ","
2001 "'flush_operations': %" PRId64 ","
2002 "'wr_total_time_ns': %" PRId64 ","
2003 "'rd_total_time_ns': %" PRId64 ","
2004 "'flush_total_time_ns': %" PRId64
2006 bs->nr_bytes[BDRV_ACCT_READ],
2007 bs->nr_bytes[BDRV_ACCT_WRITE],
2008 bs->nr_ops[BDRV_ACCT_READ],
2009 bs->nr_ops[BDRV_ACCT_WRITE],
2010 bs->wr_highest_sector *
2011 (uint64_t)BDRV_SECTOR_SIZE,
2012 bs->nr_ops[BDRV_ACCT_FLUSH],
2013 bs->total_time_ns[BDRV_ACCT_WRITE],
2014 bs->total_time_ns[BDRV_ACCT_READ],
2015 bs->total_time_ns[BDRV_ACCT_FLUSH]);
2016 dict = qobject_to_qdict(res);
2018 if (*bs->device_name) {
2019 qdict_put(dict, "device", qstring_from_str(bs->device_name));
2023 QObject *parent = bdrv_info_stats_bs(bs->file);
2024 qdict_put_obj(dict, "parent", parent);
2030 void bdrv_info_stats(Monitor *mon, QObject **ret_data)
2034 BlockDriverState *bs;
2036 devices = qlist_new();
2038 QTAILQ_FOREACH(bs, &bdrv_states, list) {
2039 obj = bdrv_info_stats_bs(bs);
2040 qlist_append_obj(devices, obj);
2043 *ret_data = QOBJECT(devices);
2046 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2048 if (bs->backing_hd && bs->backing_hd->encrypted)
2049 return bs->backing_file;
2050 else if (bs->encrypted)
2051 return bs->filename;
2056 void bdrv_get_backing_filename(BlockDriverState *bs,
2057 char *filename, int filename_size)
2059 if (!bs->backing_file) {
2060 pstrcpy(filename, filename_size, "");
2062 pstrcpy(filename, filename_size, bs->backing_file);
2066 int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
2067 const uint8_t *buf, int nb_sectors)
2069 BlockDriver *drv = bs->drv;
2072 if (!drv->bdrv_write_compressed)
2074 if (bdrv_check_request(bs, sector_num, nb_sectors))
2077 if (bs->dirty_bitmap) {
2078 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
2081 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
2084 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2086 BlockDriver *drv = bs->drv;
2089 if (!drv->bdrv_get_info)
2091 memset(bdi, 0, sizeof(*bdi));
2092 return drv->bdrv_get_info(bs, bdi);
2095 int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2096 int64_t pos, int size)
2098 BlockDriver *drv = bs->drv;
2101 if (drv->bdrv_save_vmstate)
2102 return drv->bdrv_save_vmstate(bs, buf, pos, size);
2104 return bdrv_save_vmstate(bs->file, buf, pos, size);
2108 int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2109 int64_t pos, int size)
2111 BlockDriver *drv = bs->drv;
2114 if (drv->bdrv_load_vmstate)
2115 return drv->bdrv_load_vmstate(bs, buf, pos, size);
2117 return bdrv_load_vmstate(bs->file, buf, pos, size);
2121 void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2123 BlockDriver *drv = bs->drv;
2125 if (!drv || !drv->bdrv_debug_event) {
2129 return drv->bdrv_debug_event(bs, event);
2133 /**************************************************************/
2134 /* handling of snapshots */
2136 int bdrv_can_snapshot(BlockDriverState *bs)
2138 BlockDriver *drv = bs->drv;
2139 if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
2143 if (!drv->bdrv_snapshot_create) {
2144 if (bs->file != NULL) {
2145 return bdrv_can_snapshot(bs->file);
2153 int bdrv_is_snapshot(BlockDriverState *bs)
2155 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2158 BlockDriverState *bdrv_snapshots(void)
2160 BlockDriverState *bs;
2163 return bs_snapshots;
2167 while ((bs = bdrv_next(bs))) {
2168 if (bdrv_can_snapshot(bs)) {
2176 int bdrv_snapshot_create(BlockDriverState *bs,
2177 QEMUSnapshotInfo *sn_info)
2179 BlockDriver *drv = bs->drv;
2182 if (drv->bdrv_snapshot_create)
2183 return drv->bdrv_snapshot_create(bs, sn_info);
2185 return bdrv_snapshot_create(bs->file, sn_info);
2189 int bdrv_snapshot_goto(BlockDriverState *bs,
2190 const char *snapshot_id)
2192 BlockDriver *drv = bs->drv;
2197 if (drv->bdrv_snapshot_goto)
2198 return drv->bdrv_snapshot_goto(bs, snapshot_id);
2201 drv->bdrv_close(bs);
2202 ret = bdrv_snapshot_goto(bs->file, snapshot_id);
2203 open_ret = drv->bdrv_open(bs, bs->open_flags);
2205 bdrv_delete(bs->file);
2215 int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
2217 BlockDriver *drv = bs->drv;
2220 if (drv->bdrv_snapshot_delete)
2221 return drv->bdrv_snapshot_delete(bs, snapshot_id);
2223 return bdrv_snapshot_delete(bs->file, snapshot_id);
2227 int bdrv_snapshot_list(BlockDriverState *bs,
2228 QEMUSnapshotInfo **psn_info)
2230 BlockDriver *drv = bs->drv;
2233 if (drv->bdrv_snapshot_list)
2234 return drv->bdrv_snapshot_list(bs, psn_info);
2236 return bdrv_snapshot_list(bs->file, psn_info);
2240 int bdrv_snapshot_load_tmp(BlockDriverState *bs,
2241 const char *snapshot_name)
2243 BlockDriver *drv = bs->drv;
2247 if (!bs->read_only) {
2250 if (drv->bdrv_snapshot_load_tmp) {
2251 return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
2256 #define NB_SUFFIXES 4
2258 char *get_human_readable_size(char *buf, int buf_size, int64_t size)
2260 static const char suffixes[NB_SUFFIXES] = "KMGT";
2265 snprintf(buf, buf_size, "%" PRId64, size);
2268 for(i = 0; i < NB_SUFFIXES; i++) {
2269 if (size < (10 * base)) {
2270 snprintf(buf, buf_size, "%0.1f%c",
2271 (double)size / base,
2274 } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
2275 snprintf(buf, buf_size, "%" PRId64 "%c",
2276 ((size + (base >> 1)) / base),
2286 char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
2288 char buf1[128], date_buf[128], clock_buf[128];
2298 snprintf(buf, buf_size,
2299 "%-10s%-20s%7s%20s%15s",
2300 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2304 ptm = localtime(&ti);
2305 strftime(date_buf, sizeof(date_buf),
2306 "%Y-%m-%d %H:%M:%S", ptm);
2308 localtime_r(&ti, &tm);
2309 strftime(date_buf, sizeof(date_buf),
2310 "%Y-%m-%d %H:%M:%S", &tm);
2312 secs = sn->vm_clock_nsec / 1000000000;
2313 snprintf(clock_buf, sizeof(clock_buf),
2314 "%02d:%02d:%02d.%03d",
2316 (int)((secs / 60) % 60),
2318 (int)((sn->vm_clock_nsec / 1000000) % 1000));
2319 snprintf(buf, buf_size,
2320 "%-10s%-20s%7s%20s%15s",
2321 sn->id_str, sn->name,
2322 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
2329 /**************************************************************/
2332 BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
2333 QEMUIOVector *qiov, int nb_sectors,
2334 BlockDriverCompletionFunc *cb, void *opaque)
2336 BlockDriver *drv = bs->drv;
2338 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
2342 if (bdrv_check_request(bs, sector_num, nb_sectors))
2345 return drv->bdrv_aio_readv(bs, sector_num, qiov, nb_sectors,
2349 typedef struct BlockCompleteData {
2350 BlockDriverCompletionFunc *cb;
2352 BlockDriverState *bs;
2355 } BlockCompleteData;
2357 static void block_complete_cb(void *opaque, int ret)
2359 BlockCompleteData *b = opaque;
2361 if (b->bs->dirty_bitmap) {
2362 set_dirty_bitmap(b->bs, b->sector_num, b->nb_sectors, 1);
2364 b->cb(b->opaque, ret);
2368 static BlockCompleteData *blk_dirty_cb_alloc(BlockDriverState *bs,
2371 BlockDriverCompletionFunc *cb,
2374 BlockCompleteData *blkdata = g_malloc0(sizeof(BlockCompleteData));
2378 blkdata->opaque = opaque;
2379 blkdata->sector_num = sector_num;
2380 blkdata->nb_sectors = nb_sectors;
2385 BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
2386 QEMUIOVector *qiov, int nb_sectors,
2387 BlockDriverCompletionFunc *cb, void *opaque)
2389 BlockDriver *drv = bs->drv;
2390 BlockDriverAIOCB *ret;
2391 BlockCompleteData *blk_cb_data;
2393 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
2399 if (bdrv_check_request(bs, sector_num, nb_sectors))
2402 if (bs->dirty_bitmap) {
2403 blk_cb_data = blk_dirty_cb_alloc(bs, sector_num, nb_sectors, cb,
2405 cb = &block_complete_cb;
2406 opaque = blk_cb_data;
2409 ret = drv->bdrv_aio_writev(bs, sector_num, qiov, nb_sectors,
2413 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
2414 bs->wr_highest_sector = sector_num + nb_sectors - 1;
2422 typedef struct MultiwriteCB {
2427 BlockDriverCompletionFunc *cb;
2429 QEMUIOVector *free_qiov;
2434 static void multiwrite_user_cb(MultiwriteCB *mcb)
2438 for (i = 0; i < mcb->num_callbacks; i++) {
2439 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
2440 if (mcb->callbacks[i].free_qiov) {
2441 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
2443 g_free(mcb->callbacks[i].free_qiov);
2444 qemu_vfree(mcb->callbacks[i].free_buf);
2448 static void multiwrite_cb(void *opaque, int ret)
2450 MultiwriteCB *mcb = opaque;
2452 trace_multiwrite_cb(mcb, ret);
2454 if (ret < 0 && !mcb->error) {
2458 mcb->num_requests--;
2459 if (mcb->num_requests == 0) {
2460 multiwrite_user_cb(mcb);
2465 static int multiwrite_req_compare(const void *a, const void *b)
2467 const BlockRequest *req1 = a, *req2 = b;
2470 * Note that we can't simply subtract req2->sector from req1->sector
2471 * here as that could overflow the return value.
2473 if (req1->sector > req2->sector) {
2475 } else if (req1->sector < req2->sector) {
2483 * Takes a bunch of requests and tries to merge them. Returns the number of
2484 * requests that remain after merging.
2486 static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
2487 int num_reqs, MultiwriteCB *mcb)
2491 // Sort requests by start sector
2492 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
2494 // Check if adjacent requests touch the same clusters. If so, combine them,
2495 // filling up gaps with zero sectors.
2497 for (i = 1; i < num_reqs; i++) {
2499 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
2501 // This handles the cases that are valid for all block drivers, namely
2502 // exactly sequential writes and overlapping writes.
2503 if (reqs[i].sector <= oldreq_last) {
2507 // The block driver may decide that it makes sense to combine requests
2508 // even if there is a gap of some sectors between them. In this case,
2509 // the gap is filled with zeros (therefore only applicable for yet
2510 // unused space in format like qcow2).
2511 if (!merge && bs->drv->bdrv_merge_requests) {
2512 merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]);
2515 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
2521 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
2522 qemu_iovec_init(qiov,
2523 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
2525 // Add the first request to the merged one. If the requests are
2526 // overlapping, drop the last sectors of the first request.
2527 size = (reqs[i].sector - reqs[outidx].sector) << 9;
2528 qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
2530 // We might need to add some zeros between the two requests
2531 if (reqs[i].sector > oldreq_last) {
2532 size_t zero_bytes = (reqs[i].sector - oldreq_last) << 9;
2533 uint8_t *buf = qemu_blockalign(bs, zero_bytes);
2534 memset(buf, 0, zero_bytes);
2535 qemu_iovec_add(qiov, buf, zero_bytes);
2536 mcb->callbacks[i].free_buf = buf;
2539 // Add the second request
2540 qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
2542 reqs[outidx].nb_sectors = qiov->size >> 9;
2543 reqs[outidx].qiov = qiov;
2545 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
2548 reqs[outidx].sector = reqs[i].sector;
2549 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
2550 reqs[outidx].qiov = reqs[i].qiov;
2558 * Submit multiple AIO write requests at once.
2560 * On success, the function returns 0 and all requests in the reqs array have
2561 * been submitted. In error case this function returns -1, and any of the
2562 * requests may or may not be submitted yet. In particular, this means that the
2563 * callback will be called for some of the requests, for others it won't. The
2564 * caller must check the error field of the BlockRequest to wait for the right
2565 * callbacks (if error != 0, no callback will be called).
2567 * The implementation may modify the contents of the reqs array, e.g. to merge
2568 * requests. However, the fields opaque and error are left unmodified as they
2569 * are used to signal failure for a single request to the caller.
2571 int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
2573 BlockDriverAIOCB *acb;
2577 /* don't submit writes if we don't have a medium */
2578 if (bs->drv == NULL) {
2579 for (i = 0; i < num_reqs; i++) {
2580 reqs[i].error = -ENOMEDIUM;
2585 if (num_reqs == 0) {
2589 // Create MultiwriteCB structure
2590 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
2591 mcb->num_requests = 0;
2592 mcb->num_callbacks = num_reqs;
2594 for (i = 0; i < num_reqs; i++) {
2595 mcb->callbacks[i].cb = reqs[i].cb;
2596 mcb->callbacks[i].opaque = reqs[i].opaque;
2599 // Check for mergable requests
2600 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
2602 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
2605 * Run the aio requests. As soon as one request can't be submitted
2606 * successfully, fail all requests that are not yet submitted (we must
2607 * return failure for all requests anyway)
2609 * num_requests cannot be set to the right value immediately: If
2610 * bdrv_aio_writev fails for some request, num_requests would be too high
2611 * and therefore multiwrite_cb() would never recognize the multiwrite
2612 * request as completed. We also cannot use the loop variable i to set it
2613 * when the first request fails because the callback may already have been
2614 * called for previously submitted requests. Thus, num_requests must be
2615 * incremented for each request that is submitted.
2617 * The problem that callbacks may be called early also means that we need
2618 * to take care that num_requests doesn't become 0 before all requests are
2619 * submitted - multiwrite_cb() would consider the multiwrite request
2620 * completed. A dummy request that is "completed" by a manual call to
2621 * multiwrite_cb() takes care of this.
2623 mcb->num_requests = 1;
2625 // Run the aio requests
2626 for (i = 0; i < num_reqs; i++) {
2627 mcb->num_requests++;
2628 acb = bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
2629 reqs[i].nb_sectors, multiwrite_cb, mcb);
2632 // We can only fail the whole thing if no request has been
2633 // submitted yet. Otherwise we'll wait for the submitted AIOs to
2634 // complete and report the error in the callback.
2636 trace_bdrv_aio_multiwrite_earlyfail(mcb);
2639 trace_bdrv_aio_multiwrite_latefail(mcb, i);
2640 multiwrite_cb(mcb, -EIO);
2646 /* Complete the dummy request */
2647 multiwrite_cb(mcb, 0);
2652 for (i = 0; i < mcb->num_callbacks; i++) {
2653 reqs[i].error = -EIO;
2659 BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
2660 BlockDriverCompletionFunc *cb, void *opaque)
2662 BlockDriver *drv = bs->drv;
2664 trace_bdrv_aio_flush(bs, opaque);
2666 if (bs->open_flags & BDRV_O_NO_FLUSH) {
2667 return bdrv_aio_noop_em(bs, cb, opaque);
2672 return drv->bdrv_aio_flush(bs, cb, opaque);
2675 void bdrv_aio_cancel(BlockDriverAIOCB *acb)
2677 acb->pool->cancel(acb);
2681 /**************************************************************/
2682 /* async block device emulation */
2684 typedef struct BlockDriverAIOCBSync {
2685 BlockDriverAIOCB common;
2688 /* vector translation state */
2692 } BlockDriverAIOCBSync;
2694 static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
2696 BlockDriverAIOCBSync *acb =
2697 container_of(blockacb, BlockDriverAIOCBSync, common);
2698 qemu_bh_delete(acb->bh);
2700 qemu_aio_release(acb);
2703 static AIOPool bdrv_em_aio_pool = {
2704 .aiocb_size = sizeof(BlockDriverAIOCBSync),
2705 .cancel = bdrv_aio_cancel_em,
2708 static void bdrv_aio_bh_cb(void *opaque)
2710 BlockDriverAIOCBSync *acb = opaque;
2713 qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
2714 qemu_vfree(acb->bounce);
2715 acb->common.cb(acb->common.opaque, acb->ret);
2716 qemu_bh_delete(acb->bh);
2718 qemu_aio_release(acb);
2721 static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
2725 BlockDriverCompletionFunc *cb,
2730 BlockDriverAIOCBSync *acb;
2732 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2733 acb->is_write = is_write;
2735 acb->bounce = qemu_blockalign(bs, qiov->size);
2738 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2741 qemu_iovec_to_buffer(acb->qiov, acb->bounce);
2742 acb->ret = bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
2744 acb->ret = bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
2747 qemu_bh_schedule(acb->bh);
2749 return &acb->common;
2752 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
2753 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2754 BlockDriverCompletionFunc *cb, void *opaque)
2756 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
2759 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
2760 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2761 BlockDriverCompletionFunc *cb, void *opaque)
2763 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
2767 typedef struct BlockDriverAIOCBCoroutine {
2768 BlockDriverAIOCB common;
2772 } BlockDriverAIOCBCoroutine;
2774 static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
2779 static AIOPool bdrv_em_co_aio_pool = {
2780 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
2781 .cancel = bdrv_aio_co_cancel_em,
2784 static void bdrv_co_rw_bh(void *opaque)
2786 BlockDriverAIOCBCoroutine *acb = opaque;
2788 acb->common.cb(acb->common.opaque, acb->req.error);
2789 qemu_bh_delete(acb->bh);
2790 qemu_aio_release(acb);
2793 static void coroutine_fn bdrv_co_rw(void *opaque)
2795 BlockDriverAIOCBCoroutine *acb = opaque;
2796 BlockDriverState *bs = acb->common.bs;
2798 if (!acb->is_write) {
2799 acb->req.error = bs->drv->bdrv_co_readv(bs, acb->req.sector,
2800 acb->req.nb_sectors, acb->req.qiov);
2802 acb->req.error = bs->drv->bdrv_co_writev(bs, acb->req.sector,
2803 acb->req.nb_sectors, acb->req.qiov);
2806 acb->bh = qemu_bh_new(bdrv_co_rw_bh, acb);
2807 qemu_bh_schedule(acb->bh);
2810 static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
2814 BlockDriverCompletionFunc *cb,
2819 BlockDriverAIOCBCoroutine *acb;
2821 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
2822 acb->req.sector = sector_num;
2823 acb->req.nb_sectors = nb_sectors;
2824 acb->req.qiov = qiov;
2825 acb->is_write = is_write;
2827 co = qemu_coroutine_create(bdrv_co_rw);
2828 qemu_coroutine_enter(co, acb);
2830 return &acb->common;
2833 static BlockDriverAIOCB *bdrv_co_aio_readv_em(BlockDriverState *bs,
2834 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2835 BlockDriverCompletionFunc *cb, void *opaque)
2837 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque,
2841 static BlockDriverAIOCB *bdrv_co_aio_writev_em(BlockDriverState *bs,
2842 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2843 BlockDriverCompletionFunc *cb, void *opaque)
2845 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque,
2849 static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
2850 BlockDriverCompletionFunc *cb, void *opaque)
2852 BlockDriverAIOCBSync *acb;
2854 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2855 acb->is_write = 1; /* don't bounce in the completion hadler */
2861 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2864 qemu_bh_schedule(acb->bh);
2865 return &acb->common;
2868 static BlockDriverAIOCB *bdrv_aio_noop_em(BlockDriverState *bs,
2869 BlockDriverCompletionFunc *cb, void *opaque)
2871 BlockDriverAIOCBSync *acb;
2873 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2874 acb->is_write = 1; /* don't bounce in the completion handler */
2880 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2883 qemu_bh_schedule(acb->bh);
2884 return &acb->common;
2887 /**************************************************************/
2888 /* sync block device emulation */
2890 static void bdrv_rw_em_cb(void *opaque, int ret)
2892 *(int *)opaque = ret;
2895 #define NOT_DONE 0x7fffffff
2897 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
2898 uint8_t *buf, int nb_sectors)
2901 BlockDriverAIOCB *acb;
2905 async_ret = NOT_DONE;
2906 iov.iov_base = (void *)buf;
2907 iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
2908 qemu_iovec_init_external(&qiov, &iov, 1);
2909 acb = bdrv_aio_readv(bs, sector_num, &qiov, nb_sectors,
2910 bdrv_rw_em_cb, &async_ret);
2916 while (async_ret == NOT_DONE) {
2925 static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
2926 const uint8_t *buf, int nb_sectors)
2929 BlockDriverAIOCB *acb;
2933 async_ret = NOT_DONE;
2934 iov.iov_base = (void *)buf;
2935 iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
2936 qemu_iovec_init_external(&qiov, &iov, 1);
2937 acb = bdrv_aio_writev(bs, sector_num, &qiov, nb_sectors,
2938 bdrv_rw_em_cb, &async_ret);
2943 while (async_ret == NOT_DONE) {
2951 void bdrv_init(void)
2953 module_call_init(MODULE_INIT_BLOCK);
2956 void bdrv_init_with_whitelist(void)
2958 use_bdrv_whitelist = 1;
2962 void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
2963 BlockDriverCompletionFunc *cb, void *opaque)
2965 BlockDriverAIOCB *acb;
2967 if (pool->free_aiocb) {
2968 acb = pool->free_aiocb;
2969 pool->free_aiocb = acb->next;
2971 acb = g_malloc0(pool->aiocb_size);
2976 acb->opaque = opaque;
2980 void qemu_aio_release(void *p)
2982 BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
2983 AIOPool *pool = acb->pool;
2984 acb->next = pool->free_aiocb;
2985 pool->free_aiocb = acb;
2988 /**************************************************************/
2989 /* Coroutine block device emulation */
2991 typedef struct CoroutineIOCompletion {
2992 Coroutine *coroutine;
2994 } CoroutineIOCompletion;
2996 static void bdrv_co_io_em_complete(void *opaque, int ret)
2998 CoroutineIOCompletion *co = opaque;
3001 qemu_coroutine_enter(co->coroutine, NULL);
3004 static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
3005 int nb_sectors, QEMUIOVector *iov,
3008 CoroutineIOCompletion co = {
3009 .coroutine = qemu_coroutine_self(),
3011 BlockDriverAIOCB *acb;
3014 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
3015 bdrv_co_io_em_complete, &co);
3017 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
3018 bdrv_co_io_em_complete, &co);
3021 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
3025 qemu_coroutine_yield();
3030 static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
3031 int64_t sector_num, int nb_sectors,
3034 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
3037 static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
3038 int64_t sector_num, int nb_sectors,
3041 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
3044 static int coroutine_fn bdrv_co_flush_em(BlockDriverState *bs)
3046 CoroutineIOCompletion co = {
3047 .coroutine = qemu_coroutine_self(),
3049 BlockDriverAIOCB *acb;
3051 acb = bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
3055 qemu_coroutine_yield();
3059 /**************************************************************/
3060 /* removable device support */
3063 * Return TRUE if the media is present
3065 int bdrv_is_inserted(BlockDriverState *bs)
3067 BlockDriver *drv = bs->drv;
3071 if (!drv->bdrv_is_inserted)
3073 return drv->bdrv_is_inserted(bs);
3077 * Return whether the media changed since the last call to this
3078 * function, or -ENOTSUP if we don't know. Most drivers don't know.
3080 int bdrv_media_changed(BlockDriverState *bs)
3082 BlockDriver *drv = bs->drv;
3084 if (drv && drv->bdrv_media_changed) {
3085 return drv->bdrv_media_changed(bs);
3091 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3093 void bdrv_eject(BlockDriverState *bs, int eject_flag)
3095 BlockDriver *drv = bs->drv;
3097 if (drv && drv->bdrv_eject) {
3098 drv->bdrv_eject(bs, eject_flag);
3103 * Lock or unlock the media (if it is locked, the user won't be able
3104 * to eject it manually).
3106 void bdrv_lock_medium(BlockDriverState *bs, bool locked)
3108 BlockDriver *drv = bs->drv;
3110 trace_bdrv_lock_medium(bs, locked);
3112 if (drv && drv->bdrv_lock_medium) {
3113 drv->bdrv_lock_medium(bs, locked);
3117 /* needed for generic scsi interface */
3119 int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
3121 BlockDriver *drv = bs->drv;
3123 if (drv && drv->bdrv_ioctl)
3124 return drv->bdrv_ioctl(bs, req, buf);
3128 BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
3129 unsigned long int req, void *buf,
3130 BlockDriverCompletionFunc *cb, void *opaque)
3132 BlockDriver *drv = bs->drv;
3134 if (drv && drv->bdrv_aio_ioctl)
3135 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
3139 void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
3141 bs->buffer_alignment = align;
3144 void *qemu_blockalign(BlockDriverState *bs, size_t size)
3146 return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
3149 void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
3151 int64_t bitmap_size;
3153 bs->dirty_count = 0;
3155 if (!bs->dirty_bitmap) {
3156 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
3157 BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
3158 bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
3160 bs->dirty_bitmap = g_malloc0(bitmap_size);
3163 if (bs->dirty_bitmap) {
3164 g_free(bs->dirty_bitmap);
3165 bs->dirty_bitmap = NULL;
3170 int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
3172 int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
3174 if (bs->dirty_bitmap &&
3175 (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
3176 return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
3177 (1UL << (chunk % (sizeof(unsigned long) * 8))));
3183 void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3186 set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
3189 int64_t bdrv_get_dirty_count(BlockDriverState *bs)
3191 return bs->dirty_count;
3194 void bdrv_set_in_use(BlockDriverState *bs, int in_use)
3196 assert(bs->in_use != in_use);
3197 bs->in_use = in_use;
3200 int bdrv_in_use(BlockDriverState *bs)
3205 void bdrv_iostatus_enable(BlockDriverState *bs)
3207 bs->iostatus = BDRV_IOS_OK;
3210 /* The I/O status is only enabled if the drive explicitly
3211 * enables it _and_ the VM is configured to stop on errors */
3212 bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3214 return (bs->iostatus != BDRV_IOS_INVAL &&
3215 (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC ||
3216 bs->on_write_error == BLOCK_ERR_STOP_ANY ||
3217 bs->on_read_error == BLOCK_ERR_STOP_ANY));
3220 void bdrv_iostatus_disable(BlockDriverState *bs)
3222 bs->iostatus = BDRV_IOS_INVAL;
3225 void bdrv_iostatus_reset(BlockDriverState *bs)
3227 if (bdrv_iostatus_is_enabled(bs)) {
3228 bs->iostatus = BDRV_IOS_OK;
3232 /* XXX: Today this is set by device models because it makes the implementation
3233 quite simple. However, the block layer knows about the error, so it's
3234 possible to implement this without device models being involved */
3235 void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3237 if (bdrv_iostatus_is_enabled(bs) && bs->iostatus == BDRV_IOS_OK) {
3239 bs->iostatus = error == ENOSPC ? BDRV_IOS_ENOSPC : BDRV_IOS_FAILED;
3244 bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
3245 enum BlockAcctType type)
3247 assert(type < BDRV_MAX_IOTYPE);
3249 cookie->bytes = bytes;
3250 cookie->start_time_ns = get_clock();
3251 cookie->type = type;
3255 bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
3257 assert(cookie->type < BDRV_MAX_IOTYPE);
3259 bs->nr_bytes[cookie->type] += cookie->bytes;
3260 bs->nr_ops[cookie->type]++;
3261 bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
3264 int bdrv_img_create(const char *filename, const char *fmt,
3265 const char *base_filename, const char *base_fmt,
3266 char *options, uint64_t img_size, int flags)
3268 QEMUOptionParameter *param = NULL, *create_options = NULL;
3269 QEMUOptionParameter *backing_fmt, *backing_file, *size;
3270 BlockDriverState *bs = NULL;
3271 BlockDriver *drv, *proto_drv;
3272 BlockDriver *backing_drv = NULL;
3275 /* Find driver and parse its options */
3276 drv = bdrv_find_format(fmt);
3278 error_report("Unknown file format '%s'", fmt);
3283 proto_drv = bdrv_find_protocol(filename);
3285 error_report("Unknown protocol '%s'", filename);
3290 create_options = append_option_parameters(create_options,
3291 drv->create_options);
3292 create_options = append_option_parameters(create_options,
3293 proto_drv->create_options);
3295 /* Create parameter list with default values */
3296 param = parse_option_parameters("", create_options, param);
3298 set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
3300 /* Parse -o options */
3302 param = parse_option_parameters(options, create_options, param);
3303 if (param == NULL) {
3304 error_report("Invalid options for file format '%s'.", fmt);
3310 if (base_filename) {
3311 if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
3313 error_report("Backing file not supported for file format '%s'",
3321 if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
3322 error_report("Backing file format not supported for file "
3323 "format '%s'", fmt);
3329 backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
3330 if (backing_file && backing_file->value.s) {
3331 if (!strcmp(filename, backing_file->value.s)) {
3332 error_report("Error: Trying to create an image with the "
3333 "same filename as the backing file");
3339 backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
3340 if (backing_fmt && backing_fmt->value.s) {
3341 backing_drv = bdrv_find_format(backing_fmt->value.s);
3343 error_report("Unknown backing file format '%s'",
3344 backing_fmt->value.s);
3350 // The size for the image must always be specified, with one exception:
3351 // If we are using a backing file, we can obtain the size from there
3352 size = get_option_parameter(param, BLOCK_OPT_SIZE);
3353 if (size && size->value.n == -1) {
3354 if (backing_file && backing_file->value.s) {
3360 ret = bdrv_open(bs, backing_file->value.s, flags, backing_drv);
3362 error_report("Could not open '%s'", backing_file->value.s);
3365 bdrv_get_geometry(bs, &size);
3368 snprintf(buf, sizeof(buf), "%" PRId64, size);
3369 set_option_parameter(param, BLOCK_OPT_SIZE, buf);
3371 error_report("Image creation needs a size parameter");
3377 printf("Formatting '%s', fmt=%s ", filename, fmt);
3378 print_option_parameters(param);
3381 ret = bdrv_create(drv, filename, param);
3384 if (ret == -ENOTSUP) {
3385 error_report("Formatting or formatting option not supported for "
3386 "file format '%s'", fmt);
3387 } else if (ret == -EFBIG) {
3388 error_report("The image size is too large for file format '%s'",
3391 error_report("%s: error while creating %s: %s", filename, fmt,
3397 free_option_parameters(create_options);
3398 free_option_parameters(param);