2 * QEMU System Emulator block driver
4 * Copyright (c) 2003 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 #include "config-host.h"
25 #include "qemu-common.h"
28 #include "block_int.h"
31 #include "qemu-coroutine.h"
32 #include "qmp-commands.h"
33 #include "qemu-timer.h"
36 #include <sys/types.h>
38 #include <sys/ioctl.h>
39 #include <sys/queue.h>
49 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
51 static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
52 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
53 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
54 BlockDriverCompletionFunc *cb, void *opaque);
55 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
56 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
57 BlockDriverCompletionFunc *cb, void *opaque);
58 static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
59 int64_t sector_num, int nb_sectors,
61 static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
62 int64_t sector_num, int nb_sectors,
64 static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
65 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
66 static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
67 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
68 static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
72 BlockDriverCompletionFunc *cb,
75 static void coroutine_fn bdrv_co_do_rw(void *opaque);
77 static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
78 bool is_write, double elapsed_time, uint64_t *wait);
79 static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
80 double elapsed_time, uint64_t *wait);
81 static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
82 bool is_write, int64_t *wait);
84 static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
85 QTAILQ_HEAD_INITIALIZER(bdrv_states);
87 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
88 QLIST_HEAD_INITIALIZER(bdrv_drivers);
90 /* The device to use for VM snapshots */
91 static BlockDriverState *bs_snapshots;
93 /* If non-zero, use only whitelisted block drivers */
94 static int use_bdrv_whitelist;
97 static int is_windows_drive_prefix(const char *filename)
99 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
100 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
104 int is_windows_drive(const char *filename)
106 if (is_windows_drive_prefix(filename) &&
109 if (strstart(filename, "\\\\.\\", NULL) ||
110 strstart(filename, "//./", NULL))
116 /* throttling disk I/O limits */
117 void bdrv_io_limits_disable(BlockDriverState *bs)
119 bs->io_limits_enabled = false;
121 while (qemu_co_queue_next(&bs->throttled_reqs));
123 if (bs->block_timer) {
124 qemu_del_timer(bs->block_timer);
125 qemu_free_timer(bs->block_timer);
126 bs->block_timer = NULL;
132 memset(&bs->io_base, 0, sizeof(bs->io_base));
135 static void bdrv_block_timer(void *opaque)
137 BlockDriverState *bs = opaque;
139 qemu_co_queue_next(&bs->throttled_reqs);
142 void bdrv_io_limits_enable(BlockDriverState *bs)
144 qemu_co_queue_init(&bs->throttled_reqs);
145 bs->block_timer = qemu_new_timer_ns(vm_clock, bdrv_block_timer, bs);
146 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
147 bs->slice_start = qemu_get_clock_ns(vm_clock);
148 bs->slice_end = bs->slice_start + bs->slice_time;
149 memset(&bs->io_base, 0, sizeof(bs->io_base));
150 bs->io_limits_enabled = true;
153 bool bdrv_io_limits_enabled(BlockDriverState *bs)
155 BlockIOLimit *io_limits = &bs->io_limits;
156 return io_limits->bps[BLOCK_IO_LIMIT_READ]
157 || io_limits->bps[BLOCK_IO_LIMIT_WRITE]
158 || io_limits->bps[BLOCK_IO_LIMIT_TOTAL]
159 || io_limits->iops[BLOCK_IO_LIMIT_READ]
160 || io_limits->iops[BLOCK_IO_LIMIT_WRITE]
161 || io_limits->iops[BLOCK_IO_LIMIT_TOTAL];
164 static void bdrv_io_limits_intercept(BlockDriverState *bs,
165 bool is_write, int nb_sectors)
167 int64_t wait_time = -1;
169 if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
170 qemu_co_queue_wait(&bs->throttled_reqs);
173 /* In fact, we hope to keep each request's timing, in FIFO mode. The next
174 * throttled requests will not be dequeued until the current request is
175 * allowed to be serviced. So if the current request still exceeds the
176 * limits, it will be inserted to the head. All requests followed it will
177 * be still in throttled_reqs queue.
180 while (bdrv_exceed_io_limits(bs, nb_sectors, is_write, &wait_time)) {
181 qemu_mod_timer(bs->block_timer,
182 wait_time + qemu_get_clock_ns(vm_clock));
183 qemu_co_queue_wait_insert_head(&bs->throttled_reqs);
186 qemu_co_queue_next(&bs->throttled_reqs);
189 /* check if the path starts with "<protocol>:" */
190 static int path_has_protocol(const char *path)
193 if (is_windows_drive(path) ||
194 is_windows_drive_prefix(path)) {
199 return strchr(path, ':') != NULL;
202 int path_is_absolute(const char *path)
206 /* specific case for names like: "\\.\d:" */
207 if (*path == '/' || *path == '\\')
210 p = strchr(path, ':');
216 return (*p == '/' || *p == '\\');
222 /* if filename is absolute, just copy it to dest. Otherwise, build a
223 path to it by considering it is relative to base_path. URL are
225 void path_combine(char *dest, int dest_size,
226 const char *base_path,
227 const char *filename)
234 if (path_is_absolute(filename)) {
235 pstrcpy(dest, dest_size, filename);
237 p = strchr(base_path, ':');
242 p1 = strrchr(base_path, '/');
246 p2 = strrchr(base_path, '\\');
258 if (len > dest_size - 1)
260 memcpy(dest, base_path, len);
262 pstrcat(dest, dest_size, filename);
266 void bdrv_register(BlockDriver *bdrv)
268 /* Block drivers without coroutine functions need emulation */
269 if (!bdrv->bdrv_co_readv) {
270 bdrv->bdrv_co_readv = bdrv_co_readv_em;
271 bdrv->bdrv_co_writev = bdrv_co_writev_em;
273 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
274 * the block driver lacks aio we need to emulate that too.
276 if (!bdrv->bdrv_aio_readv) {
277 /* add AIO emulation layer */
278 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
279 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
283 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
286 /* create a new block device (by default it is empty) */
287 BlockDriverState *bdrv_new(const char *device_name)
289 BlockDriverState *bs;
291 bs = g_malloc0(sizeof(BlockDriverState));
292 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
293 if (device_name[0] != '\0') {
294 QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
296 bdrv_iostatus_disable(bs);
300 BlockDriver *bdrv_find_format(const char *format_name)
303 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
304 if (!strcmp(drv1->format_name, format_name)) {
311 static int bdrv_is_whitelisted(BlockDriver *drv)
313 static const char *whitelist[] = {
314 CONFIG_BDRV_WHITELIST
319 return 1; /* no whitelist, anything goes */
321 for (p = whitelist; *p; p++) {
322 if (!strcmp(drv->format_name, *p)) {
329 BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
331 BlockDriver *drv = bdrv_find_format(format_name);
332 return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
335 int bdrv_create(BlockDriver *drv, const char* filename,
336 QEMUOptionParameter *options)
338 if (!drv->bdrv_create)
341 return drv->bdrv_create(filename, options);
344 int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
348 drv = bdrv_find_protocol(filename);
353 return bdrv_create(drv, filename, options);
357 void get_tmp_filename(char *filename, int size)
359 char temp_dir[MAX_PATH];
361 GetTempPath(MAX_PATH, temp_dir);
362 GetTempFileName(temp_dir, "qem", 0, filename);
365 void get_tmp_filename(char *filename, int size)
369 /* XXX: race condition possible */
370 tmpdir = getenv("TMPDIR");
373 snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
374 fd = mkstemp(filename);
380 * Detect host devices. By convention, /dev/cdrom[N] is always
381 * recognized as a host CDROM.
383 static BlockDriver *find_hdev_driver(const char *filename)
385 int score_max = 0, score;
386 BlockDriver *drv = NULL, *d;
388 QLIST_FOREACH(d, &bdrv_drivers, list) {
389 if (d->bdrv_probe_device) {
390 score = d->bdrv_probe_device(filename);
391 if (score > score_max) {
401 BlockDriver *bdrv_find_protocol(const char *filename)
408 /* TODO Drivers without bdrv_file_open must be specified explicitly */
411 * XXX(hch): we really should not let host device detection
412 * override an explicit protocol specification, but moving this
413 * later breaks access to device names with colons in them.
414 * Thanks to the brain-dead persistent naming schemes on udev-
415 * based Linux systems those actually are quite common.
417 drv1 = find_hdev_driver(filename);
422 if (!path_has_protocol(filename)) {
423 return bdrv_find_format("file");
425 p = strchr(filename, ':');
428 if (len > sizeof(protocol) - 1)
429 len = sizeof(protocol) - 1;
430 memcpy(protocol, filename, len);
431 protocol[len] = '\0';
432 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
433 if (drv1->protocol_name &&
434 !strcmp(drv1->protocol_name, protocol)) {
441 static int find_image_format(const char *filename, BlockDriver **pdrv)
443 int ret, score, score_max;
444 BlockDriver *drv1, *drv;
446 BlockDriverState *bs;
448 ret = bdrv_file_open(&bs, filename, 0);
454 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
455 if (bs->sg || !bdrv_is_inserted(bs)) {
457 drv = bdrv_find_format("raw");
465 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
474 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
475 if (drv1->bdrv_probe) {
476 score = drv1->bdrv_probe(buf, ret, filename);
477 if (score > score_max) {
491 * Set the current 'total_sectors' value
493 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
495 BlockDriver *drv = bs->drv;
497 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
501 /* query actual device if possible, otherwise just trust the hint */
502 if (drv->bdrv_getlength) {
503 int64_t length = drv->bdrv_getlength(bs);
507 hint = length >> BDRV_SECTOR_BITS;
510 bs->total_sectors = hint;
515 * Set open flags for a given cache mode
517 * Return 0 on success, -1 if the cache mode was invalid.
519 int bdrv_parse_cache_flags(const char *mode, int *flags)
521 *flags &= ~BDRV_O_CACHE_MASK;
523 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
524 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
525 } else if (!strcmp(mode, "directsync")) {
526 *flags |= BDRV_O_NOCACHE;
527 } else if (!strcmp(mode, "writeback")) {
528 *flags |= BDRV_O_CACHE_WB;
529 } else if (!strcmp(mode, "unsafe")) {
530 *flags |= BDRV_O_CACHE_WB;
531 *flags |= BDRV_O_NO_FLUSH;
532 } else if (!strcmp(mode, "writethrough")) {
533 /* this is the default */
542 * The copy-on-read flag is actually a reference count so multiple users may
543 * use the feature without worrying about clobbering its previous state.
544 * Copy-on-read stays enabled until all users have called to disable it.
546 void bdrv_enable_copy_on_read(BlockDriverState *bs)
551 void bdrv_disable_copy_on_read(BlockDriverState *bs)
553 assert(bs->copy_on_read > 0);
558 * Common part for opening disk images and files
560 static int bdrv_open_common(BlockDriverState *bs, const char *filename,
561 int flags, BlockDriver *drv)
567 trace_bdrv_open_common(bs, filename, flags, drv->format_name);
570 bs->total_sectors = 0;
574 bs->open_flags = flags;
576 bs->buffer_alignment = 512;
578 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
579 if ((flags & BDRV_O_RDWR) && (flags & BDRV_O_COPY_ON_READ)) {
580 bdrv_enable_copy_on_read(bs);
583 pstrcpy(bs->filename, sizeof(bs->filename), filename);
584 bs->backing_file[0] = '\0';
586 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
591 bs->opaque = g_malloc0(drv->instance_size);
593 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
596 * Clear flags that are internal to the block layer before opening the
599 open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
602 * Snapshots should be writable.
604 if (bs->is_temporary) {
605 open_flags |= BDRV_O_RDWR;
608 bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
610 /* Open the image, either directly or using a protocol */
611 if (drv->bdrv_file_open) {
612 ret = drv->bdrv_file_open(bs, filename, open_flags);
614 ret = bdrv_file_open(&bs->file, filename, open_flags);
616 ret = drv->bdrv_open(bs, open_flags);
624 ret = refresh_total_sectors(bs, bs->total_sectors);
630 if (bs->is_temporary) {
638 bdrv_delete(bs->file);
648 * Opens a file using a protocol (file, host_device, nbd, ...)
650 int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
652 BlockDriverState *bs;
656 drv = bdrv_find_protocol(filename);
662 ret = bdrv_open_common(bs, filename, flags, drv);
673 * Opens a disk image (raw, qcow2, vmdk, ...)
675 int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
679 char tmp_filename[PATH_MAX];
681 if (flags & BDRV_O_SNAPSHOT) {
682 BlockDriverState *bs1;
685 BlockDriver *bdrv_qcow2;
686 QEMUOptionParameter *options;
687 char backing_filename[PATH_MAX];
689 /* if snapshot, we create a temporary backing file and open it
690 instead of opening 'filename' directly */
692 /* if there is a backing file, use it */
694 ret = bdrv_open(bs1, filename, 0, drv);
699 total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
701 if (bs1->drv && bs1->drv->protocol_name)
706 get_tmp_filename(tmp_filename, sizeof(tmp_filename));
708 /* Real path is meaningless for protocols */
710 snprintf(backing_filename, sizeof(backing_filename),
712 else if (!realpath(filename, backing_filename))
715 bdrv_qcow2 = bdrv_find_format("qcow2");
716 options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
718 set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
719 set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
721 set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
725 ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
726 free_option_parameters(options);
731 filename = tmp_filename;
733 bs->is_temporary = 1;
736 /* Find the right image format driver */
738 ret = find_image_format(filename, &drv);
742 goto unlink_and_fail;
746 ret = bdrv_open_common(bs, filename, flags, drv);
748 goto unlink_and_fail;
751 /* If there is a backing file, use it */
752 if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
753 char backing_filename[PATH_MAX];
755 BlockDriver *back_drv = NULL;
757 bs->backing_hd = bdrv_new("");
759 if (path_has_protocol(bs->backing_file)) {
760 pstrcpy(backing_filename, sizeof(backing_filename),
763 path_combine(backing_filename, sizeof(backing_filename),
764 filename, bs->backing_file);
767 if (bs->backing_format[0] != '\0') {
768 back_drv = bdrv_find_format(bs->backing_format);
771 /* backing files always opened read-only */
773 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
775 ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
780 if (bs->is_temporary) {
781 bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
783 /* base image inherits from "parent" */
784 bs->backing_hd->keep_read_only = bs->keep_read_only;
788 if (!bdrv_key_required(bs)) {
789 bdrv_dev_change_media_cb(bs, true);
792 /* throttling disk I/O limits */
793 if (bs->io_limits_enabled) {
794 bdrv_io_limits_enable(bs);
800 if (bs->is_temporary) {
806 void bdrv_close(BlockDriverState *bs)
809 if (bs == bs_snapshots) {
812 if (bs->backing_hd) {
813 bdrv_delete(bs->backing_hd);
814 bs->backing_hd = NULL;
816 bs->drv->bdrv_close(bs);
819 if (bs->is_temporary) {
820 unlink(bs->filename);
825 bs->copy_on_read = 0;
827 if (bs->file != NULL) {
828 bdrv_close(bs->file);
831 bdrv_dev_change_media_cb(bs, false);
834 /*throttling disk I/O limits*/
835 if (bs->io_limits_enabled) {
836 bdrv_io_limits_disable(bs);
840 void bdrv_close_all(void)
842 BlockDriverState *bs;
844 QTAILQ_FOREACH(bs, &bdrv_states, list) {
849 /* make a BlockDriverState anonymous by removing from bdrv_state list.
850 Also, NULL terminate the device_name to prevent double remove */
851 void bdrv_make_anon(BlockDriverState *bs)
853 if (bs->device_name[0] != '\0') {
854 QTAILQ_REMOVE(&bdrv_states, bs, list);
856 bs->device_name[0] = '\0';
859 void bdrv_delete(BlockDriverState *bs)
863 /* remove from list, if necessary */
867 if (bs->file != NULL) {
868 bdrv_delete(bs->file);
871 assert(bs != bs_snapshots);
875 int bdrv_attach_dev(BlockDriverState *bs, void *dev)
876 /* TODO change to DeviceState *dev when all users are qdevified */
882 bdrv_iostatus_reset(bs);
886 /* TODO qdevified devices don't use this, remove when devices are qdevified */
887 void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
889 if (bdrv_attach_dev(bs, dev) < 0) {
894 void bdrv_detach_dev(BlockDriverState *bs, void *dev)
895 /* TODO change to DeviceState *dev when all users are qdevified */
897 assert(bs->dev == dev);
900 bs->dev_opaque = NULL;
901 bs->buffer_alignment = 512;
904 /* TODO change to return DeviceState * when all users are qdevified */
905 void *bdrv_get_attached_dev(BlockDriverState *bs)
910 void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
914 bs->dev_opaque = opaque;
915 if (bdrv_dev_has_removable_media(bs) && bs == bs_snapshots) {
920 static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
922 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
923 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
927 bool bdrv_dev_has_removable_media(BlockDriverState *bs)
929 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
932 void bdrv_dev_eject_request(BlockDriverState *bs, bool force)
934 if (bs->dev_ops && bs->dev_ops->eject_request_cb) {
935 bs->dev_ops->eject_request_cb(bs->dev_opaque, force);
939 bool bdrv_dev_is_tray_open(BlockDriverState *bs)
941 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
942 return bs->dev_ops->is_tray_open(bs->dev_opaque);
947 static void bdrv_dev_resize_cb(BlockDriverState *bs)
949 if (bs->dev_ops && bs->dev_ops->resize_cb) {
950 bs->dev_ops->resize_cb(bs->dev_opaque);
954 bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
956 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
957 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
963 * Run consistency checks on an image
965 * Returns 0 if the check could be completed (it doesn't mean that the image is
966 * free of errors) or -errno when an internal error occurred. The results of the
967 * check are stored in res.
969 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res)
971 if (bs->drv->bdrv_check == NULL) {
975 memset(res, 0, sizeof(*res));
976 return bs->drv->bdrv_check(bs, res);
979 #define COMMIT_BUF_SECTORS 2048
981 /* commit COW file into the raw image */
982 int bdrv_commit(BlockDriverState *bs)
984 BlockDriver *drv = bs->drv;
985 BlockDriver *backing_drv;
986 int64_t sector, total_sectors;
987 int n, ro, open_flags;
988 int ret = 0, rw_ret = 0;
991 BlockDriverState *bs_rw, *bs_ro;
996 if (!bs->backing_hd) {
1000 if (bs->backing_hd->keep_read_only) {
1004 backing_drv = bs->backing_hd->drv;
1005 ro = bs->backing_hd->read_only;
1006 strncpy(filename, bs->backing_hd->filename, sizeof(filename));
1007 open_flags = bs->backing_hd->open_flags;
1011 bdrv_delete(bs->backing_hd);
1012 bs->backing_hd = NULL;
1013 bs_rw = bdrv_new("");
1014 rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
1018 /* try to re-open read-only */
1019 bs_ro = bdrv_new("");
1020 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
1024 /* drive not functional anymore */
1028 bs->backing_hd = bs_ro;
1031 bs->backing_hd = bs_rw;
1034 total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
1035 buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
1037 for (sector = 0; sector < total_sectors; sector += n) {
1038 if (bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
1040 if (bdrv_read(bs, sector, buf, n) != 0) {
1045 if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
1052 if (drv->bdrv_make_empty) {
1053 ret = drv->bdrv_make_empty(bs);
1058 * Make sure all data we wrote to the backing device is actually
1062 bdrv_flush(bs->backing_hd);
1069 bdrv_delete(bs->backing_hd);
1070 bs->backing_hd = NULL;
1071 bs_ro = bdrv_new("");
1072 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
1076 /* drive not functional anymore */
1080 bs->backing_hd = bs_ro;
1081 bs->backing_hd->keep_read_only = 0;
1087 void bdrv_commit_all(void)
1089 BlockDriverState *bs;
1091 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1096 struct BdrvTrackedRequest {
1097 BlockDriverState *bs;
1101 QLIST_ENTRY(BdrvTrackedRequest) list;
1102 CoQueue wait_queue; /* coroutines blocked on this request */
1106 * Remove an active request from the tracked requests list
1108 * This function should be called when a tracked request is completing.
1110 static void tracked_request_end(BdrvTrackedRequest *req)
1112 QLIST_REMOVE(req, list);
1113 qemu_co_queue_restart_all(&req->wait_queue);
1117 * Add an active request to the tracked requests list
1119 static void tracked_request_begin(BdrvTrackedRequest *req,
1120 BlockDriverState *bs,
1122 int nb_sectors, bool is_write)
1124 *req = (BdrvTrackedRequest){
1126 .sector_num = sector_num,
1127 .nb_sectors = nb_sectors,
1128 .is_write = is_write,
1131 qemu_co_queue_init(&req->wait_queue);
1133 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
1137 * Round a region to cluster boundaries
1139 static void round_to_clusters(BlockDriverState *bs,
1140 int64_t sector_num, int nb_sectors,
1141 int64_t *cluster_sector_num,
1142 int *cluster_nb_sectors)
1144 BlockDriverInfo bdi;
1146 if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
1147 *cluster_sector_num = sector_num;
1148 *cluster_nb_sectors = nb_sectors;
1150 int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
1151 *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
1152 *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
1157 static bool tracked_request_overlaps(BdrvTrackedRequest *req,
1158 int64_t sector_num, int nb_sectors) {
1160 if (sector_num >= req->sector_num + req->nb_sectors) {
1164 if (req->sector_num >= sector_num + nb_sectors) {
1170 static void coroutine_fn wait_for_overlapping_requests(BlockDriverState *bs,
1171 int64_t sector_num, int nb_sectors)
1173 BdrvTrackedRequest *req;
1174 int64_t cluster_sector_num;
1175 int cluster_nb_sectors;
1178 /* If we touch the same cluster it counts as an overlap. This guarantees
1179 * that allocating writes will be serialized and not race with each other
1180 * for the same cluster. For example, in copy-on-read it ensures that the
1181 * CoR read and write operations are atomic and guest writes cannot
1182 * interleave between them.
1184 round_to_clusters(bs, sector_num, nb_sectors,
1185 &cluster_sector_num, &cluster_nb_sectors);
1189 QLIST_FOREACH(req, &bs->tracked_requests, list) {
1190 if (tracked_request_overlaps(req, cluster_sector_num,
1191 cluster_nb_sectors)) {
1192 qemu_co_queue_wait(&req->wait_queue);
1203 * -EINVAL - backing format specified, but no file
1204 * -ENOSPC - can't update the backing file because no space is left in the
1206 * -ENOTSUP - format driver doesn't support changing the backing file
1208 int bdrv_change_backing_file(BlockDriverState *bs,
1209 const char *backing_file, const char *backing_fmt)
1211 BlockDriver *drv = bs->drv;
1213 if (drv->bdrv_change_backing_file != NULL) {
1214 return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
1220 static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
1225 if (!bdrv_is_inserted(bs))
1231 len = bdrv_getlength(bs);
1236 if ((offset > len) || (len - offset < size))
1242 static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
1245 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
1246 nb_sectors * BDRV_SECTOR_SIZE);
1249 typedef struct RwCo {
1250 BlockDriverState *bs;
1258 static void coroutine_fn bdrv_rw_co_entry(void *opaque)
1260 RwCo *rwco = opaque;
1262 if (!rwco->is_write) {
1263 rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
1264 rwco->nb_sectors, rwco->qiov);
1266 rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
1267 rwco->nb_sectors, rwco->qiov);
1272 * Process a synchronous request using coroutines
1274 static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
1275 int nb_sectors, bool is_write)
1278 struct iovec iov = {
1279 .iov_base = (void *)buf,
1280 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1285 .sector_num = sector_num,
1286 .nb_sectors = nb_sectors,
1288 .is_write = is_write,
1292 qemu_iovec_init_external(&qiov, &iov, 1);
1294 if (qemu_in_coroutine()) {
1295 /* Fast-path if already in coroutine context */
1296 bdrv_rw_co_entry(&rwco);
1298 co = qemu_coroutine_create(bdrv_rw_co_entry);
1299 qemu_coroutine_enter(co, &rwco);
1300 while (rwco.ret == NOT_DONE) {
1307 /* return < 0 if error. See bdrv_write() for the return codes */
1308 int bdrv_read(BlockDriverState *bs, int64_t sector_num,
1309 uint8_t *buf, int nb_sectors)
1311 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false);
1314 static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
1315 int nb_sectors, int dirty)
1318 unsigned long val, idx, bit;
1320 start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
1321 end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
1323 for (; start <= end; start++) {
1324 idx = start / (sizeof(unsigned long) * 8);
1325 bit = start % (sizeof(unsigned long) * 8);
1326 val = bs->dirty_bitmap[idx];
1328 if (!(val & (1UL << bit))) {
1333 if (val & (1UL << bit)) {
1335 val &= ~(1UL << bit);
1338 bs->dirty_bitmap[idx] = val;
1342 /* Return < 0 if error. Important errors are:
1343 -EIO generic I/O error (may happen for all errors)
1344 -ENOMEDIUM No media inserted.
1345 -EINVAL Invalid sector number or nb_sectors
1346 -EACCES Trying to write a read-only device
1348 int bdrv_write(BlockDriverState *bs, int64_t sector_num,
1349 const uint8_t *buf, int nb_sectors)
1351 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true);
1354 int bdrv_pread(BlockDriverState *bs, int64_t offset,
1355 void *buf, int count1)
1357 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
1358 int len, nb_sectors, count;
1363 /* first read to align to sector start */
1364 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
1367 sector_num = offset >> BDRV_SECTOR_BITS;
1369 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1371 memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
1379 /* read the sectors "in place" */
1380 nb_sectors = count >> BDRV_SECTOR_BITS;
1381 if (nb_sectors > 0) {
1382 if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
1384 sector_num += nb_sectors;
1385 len = nb_sectors << BDRV_SECTOR_BITS;
1390 /* add data from the last sector */
1392 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1394 memcpy(buf, tmp_buf, count);
1399 int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
1400 const void *buf, int count1)
1402 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
1403 int len, nb_sectors, count;
1408 /* first write to align to sector start */
1409 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
1412 sector_num = offset >> BDRV_SECTOR_BITS;
1414 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1416 memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
1417 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1426 /* write the sectors "in place" */
1427 nb_sectors = count >> BDRV_SECTOR_BITS;
1428 if (nb_sectors > 0) {
1429 if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
1431 sector_num += nb_sectors;
1432 len = nb_sectors << BDRV_SECTOR_BITS;
1437 /* add data from the last sector */
1439 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1441 memcpy(tmp_buf, buf, count);
1442 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1449 * Writes to the file and ensures that no writes are reordered across this
1450 * request (acts as a barrier)
1452 * Returns 0 on success, -errno in error cases.
1454 int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
1455 const void *buf, int count)
1459 ret = bdrv_pwrite(bs, offset, buf, count);
1464 /* No flush needed for cache modes that use O_DSYNC */
1465 if ((bs->open_flags & BDRV_O_CACHE_WB) != 0) {
1472 static int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
1473 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1475 /* Perform I/O through a temporary buffer so that users who scribble over
1476 * their read buffer while the operation is in progress do not end up
1477 * modifying the image file. This is critical for zero-copy guest I/O
1478 * where anything might happen inside guest memory.
1480 void *bounce_buffer;
1483 QEMUIOVector bounce_qiov;
1484 int64_t cluster_sector_num;
1485 int cluster_nb_sectors;
1489 /* Cover entire cluster so no additional backing file I/O is required when
1490 * allocating cluster in the image file.
1492 round_to_clusters(bs, sector_num, nb_sectors,
1493 &cluster_sector_num, &cluster_nb_sectors);
1495 trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors,
1496 cluster_sector_num, cluster_nb_sectors);
1498 iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
1499 iov.iov_base = bounce_buffer = qemu_blockalign(bs, iov.iov_len);
1500 qemu_iovec_init_external(&bounce_qiov, &iov, 1);
1502 ret = bs->drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
1508 ret = bs->drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
1511 /* It might be okay to ignore write errors for guest requests. If this
1512 * is a deliberate copy-on-read then we don't want to ignore the error.
1513 * Simply report it in all cases.
1518 skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
1519 qemu_iovec_from_buffer(qiov, bounce_buffer + skip_bytes,
1520 nb_sectors * BDRV_SECTOR_SIZE);
1523 qemu_vfree(bounce_buffer);
1528 * Handle a read request in coroutine context
1530 static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
1531 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1533 BlockDriver *drv = bs->drv;
1534 BdrvTrackedRequest req;
1540 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1544 /* throttling disk read I/O */
1545 if (bs->io_limits_enabled) {
1546 bdrv_io_limits_intercept(bs, false, nb_sectors);
1549 if (bs->copy_on_read) {
1550 wait_for_overlapping_requests(bs, sector_num, nb_sectors);
1553 tracked_request_begin(&req, bs, sector_num, nb_sectors, false);
1555 if (bs->copy_on_read) {
1558 ret = bdrv_co_is_allocated(bs, sector_num, nb_sectors, &pnum);
1563 if (!ret || pnum != nb_sectors) {
1564 ret = bdrv_co_copy_on_readv(bs, sector_num, nb_sectors, qiov);
1569 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
1572 tracked_request_end(&req);
1576 int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
1577 int nb_sectors, QEMUIOVector *qiov)
1579 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
1581 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov);
1585 * Handle a write request in coroutine context
1587 static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
1588 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1590 BlockDriver *drv = bs->drv;
1591 BdrvTrackedRequest req;
1597 if (bs->read_only) {
1600 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1604 /* throttling disk write I/O */
1605 if (bs->io_limits_enabled) {
1606 bdrv_io_limits_intercept(bs, true, nb_sectors);
1609 if (bs->copy_on_read) {
1610 wait_for_overlapping_requests(bs, sector_num, nb_sectors);
1613 tracked_request_begin(&req, bs, sector_num, nb_sectors, true);
1615 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
1617 if (bs->dirty_bitmap) {
1618 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1621 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1622 bs->wr_highest_sector = sector_num + nb_sectors - 1;
1625 tracked_request_end(&req);
1630 int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
1631 int nb_sectors, QEMUIOVector *qiov)
1633 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
1635 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov);
1639 * Truncate file to 'offset' bytes (needed only for file protocols)
1641 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
1643 BlockDriver *drv = bs->drv;
1647 if (!drv->bdrv_truncate)
1651 if (bdrv_in_use(bs))
1653 ret = drv->bdrv_truncate(bs, offset);
1655 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
1656 bdrv_dev_resize_cb(bs);
1662 * Length of a allocated file in bytes. Sparse files are counted by actual
1663 * allocated space. Return < 0 if error or unknown.
1665 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
1667 BlockDriver *drv = bs->drv;
1671 if (drv->bdrv_get_allocated_file_size) {
1672 return drv->bdrv_get_allocated_file_size(bs);
1675 return bdrv_get_allocated_file_size(bs->file);
1681 * Length of a file in bytes. Return < 0 if error or unknown.
1683 int64_t bdrv_getlength(BlockDriverState *bs)
1685 BlockDriver *drv = bs->drv;
1689 if (bs->growable || bdrv_dev_has_removable_media(bs)) {
1690 if (drv->bdrv_getlength) {
1691 return drv->bdrv_getlength(bs);
1694 return bs->total_sectors * BDRV_SECTOR_SIZE;
1697 /* return 0 as number of sectors if no device present or error */
1698 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
1701 length = bdrv_getlength(bs);
1705 length = length >> BDRV_SECTOR_BITS;
1706 *nb_sectors_ptr = length;
1710 uint8_t boot_ind; /* 0x80 - active */
1711 uint8_t head; /* starting head */
1712 uint8_t sector; /* starting sector */
1713 uint8_t cyl; /* starting cylinder */
1714 uint8_t sys_ind; /* What partition type */
1715 uint8_t end_head; /* end head */
1716 uint8_t end_sector; /* end sector */
1717 uint8_t end_cyl; /* end cylinder */
1718 uint32_t start_sect; /* starting sector counting from 0 */
1719 uint32_t nr_sects; /* nr of sectors in partition */
1722 /* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
1723 static int guess_disk_lchs(BlockDriverState *bs,
1724 int *pcylinders, int *pheads, int *psectors)
1726 uint8_t buf[BDRV_SECTOR_SIZE];
1727 int ret, i, heads, sectors, cylinders;
1728 struct partition *p;
1730 uint64_t nb_sectors;
1732 bdrv_get_geometry(bs, &nb_sectors);
1734 ret = bdrv_read(bs, 0, buf, 1);
1737 /* test msdos magic */
1738 if (buf[510] != 0x55 || buf[511] != 0xaa)
1740 for(i = 0; i < 4; i++) {
1741 p = ((struct partition *)(buf + 0x1be)) + i;
1742 nr_sects = le32_to_cpu(p->nr_sects);
1743 if (nr_sects && p->end_head) {
1744 /* We make the assumption that the partition terminates on
1745 a cylinder boundary */
1746 heads = p->end_head + 1;
1747 sectors = p->end_sector & 63;
1750 cylinders = nb_sectors / (heads * sectors);
1751 if (cylinders < 1 || cylinders > 16383)
1754 *psectors = sectors;
1755 *pcylinders = cylinders;
1757 printf("guessed geometry: LCHS=%d %d %d\n",
1758 cylinders, heads, sectors);
1766 void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
1768 int translation, lba_detected = 0;
1769 int cylinders, heads, secs;
1770 uint64_t nb_sectors;
1772 /* if a geometry hint is available, use it */
1773 bdrv_get_geometry(bs, &nb_sectors);
1774 bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
1775 translation = bdrv_get_translation_hint(bs);
1776 if (cylinders != 0) {
1781 if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
1783 /* if heads > 16, it means that a BIOS LBA
1784 translation was active, so the default
1785 hardware geometry is OK */
1787 goto default_geometry;
1792 /* disable any translation to be in sync with
1793 the logical geometry */
1794 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
1795 bdrv_set_translation_hint(bs,
1796 BIOS_ATA_TRANSLATION_NONE);
1801 /* if no geometry, use a standard physical disk geometry */
1802 cylinders = nb_sectors / (16 * 63);
1804 if (cylinders > 16383)
1806 else if (cylinders < 2)
1811 if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
1812 if ((*pcyls * *pheads) <= 131072) {
1813 bdrv_set_translation_hint(bs,
1814 BIOS_ATA_TRANSLATION_LARGE);
1816 bdrv_set_translation_hint(bs,
1817 BIOS_ATA_TRANSLATION_LBA);
1821 bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
1825 void bdrv_set_geometry_hint(BlockDriverState *bs,
1826 int cyls, int heads, int secs)
1833 void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
1835 bs->translation = translation;
1838 void bdrv_get_geometry_hint(BlockDriverState *bs,
1839 int *pcyls, int *pheads, int *psecs)
1842 *pheads = bs->heads;
1846 /* throttling disk io limits */
1847 void bdrv_set_io_limits(BlockDriverState *bs,
1848 BlockIOLimit *io_limits)
1850 bs->io_limits = *io_limits;
1851 bs->io_limits_enabled = bdrv_io_limits_enabled(bs);
1854 /* Recognize floppy formats */
1855 typedef struct FDFormat {
1862 static const FDFormat fd_formats[] = {
1863 /* First entry is default format */
1864 /* 1.44 MB 3"1/2 floppy disks */
1865 { FDRIVE_DRV_144, 18, 80, 1, },
1866 { FDRIVE_DRV_144, 20, 80, 1, },
1867 { FDRIVE_DRV_144, 21, 80, 1, },
1868 { FDRIVE_DRV_144, 21, 82, 1, },
1869 { FDRIVE_DRV_144, 21, 83, 1, },
1870 { FDRIVE_DRV_144, 22, 80, 1, },
1871 { FDRIVE_DRV_144, 23, 80, 1, },
1872 { FDRIVE_DRV_144, 24, 80, 1, },
1873 /* 2.88 MB 3"1/2 floppy disks */
1874 { FDRIVE_DRV_288, 36, 80, 1, },
1875 { FDRIVE_DRV_288, 39, 80, 1, },
1876 { FDRIVE_DRV_288, 40, 80, 1, },
1877 { FDRIVE_DRV_288, 44, 80, 1, },
1878 { FDRIVE_DRV_288, 48, 80, 1, },
1879 /* 720 kB 3"1/2 floppy disks */
1880 { FDRIVE_DRV_144, 9, 80, 1, },
1881 { FDRIVE_DRV_144, 10, 80, 1, },
1882 { FDRIVE_DRV_144, 10, 82, 1, },
1883 { FDRIVE_DRV_144, 10, 83, 1, },
1884 { FDRIVE_DRV_144, 13, 80, 1, },
1885 { FDRIVE_DRV_144, 14, 80, 1, },
1886 /* 1.2 MB 5"1/4 floppy disks */
1887 { FDRIVE_DRV_120, 15, 80, 1, },
1888 { FDRIVE_DRV_120, 18, 80, 1, },
1889 { FDRIVE_DRV_120, 18, 82, 1, },
1890 { FDRIVE_DRV_120, 18, 83, 1, },
1891 { FDRIVE_DRV_120, 20, 80, 1, },
1892 /* 720 kB 5"1/4 floppy disks */
1893 { FDRIVE_DRV_120, 9, 80, 1, },
1894 { FDRIVE_DRV_120, 11, 80, 1, },
1895 /* 360 kB 5"1/4 floppy disks */
1896 { FDRIVE_DRV_120, 9, 40, 1, },
1897 { FDRIVE_DRV_120, 9, 40, 0, },
1898 { FDRIVE_DRV_120, 10, 41, 1, },
1899 { FDRIVE_DRV_120, 10, 42, 1, },
1900 /* 320 kB 5"1/4 floppy disks */
1901 { FDRIVE_DRV_120, 8, 40, 1, },
1902 { FDRIVE_DRV_120, 8, 40, 0, },
1903 /* 360 kB must match 5"1/4 better than 3"1/2... */
1904 { FDRIVE_DRV_144, 9, 80, 0, },
1906 { FDRIVE_DRV_NONE, -1, -1, 0, },
1909 void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads,
1910 int *max_track, int *last_sect,
1911 FDriveType drive_in, FDriveType *drive)
1913 const FDFormat *parse;
1914 uint64_t nb_sectors, size;
1915 int i, first_match, match;
1917 bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect);
1918 if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) {
1919 /* User defined disk */
1921 bdrv_get_geometry(bs, &nb_sectors);
1924 for (i = 0; ; i++) {
1925 parse = &fd_formats[i];
1926 if (parse->drive == FDRIVE_DRV_NONE) {
1929 if (drive_in == parse->drive ||
1930 drive_in == FDRIVE_DRV_NONE) {
1931 size = (parse->max_head + 1) * parse->max_track *
1933 if (nb_sectors == size) {
1937 if (first_match == -1) {
1943 if (first_match == -1) {
1946 match = first_match;
1948 parse = &fd_formats[match];
1950 *nb_heads = parse->max_head + 1;
1951 *max_track = parse->max_track;
1952 *last_sect = parse->last_sect;
1953 *drive = parse->drive;
1957 int bdrv_get_translation_hint(BlockDriverState *bs)
1959 return bs->translation;
1962 void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
1963 BlockErrorAction on_write_error)
1965 bs->on_read_error = on_read_error;
1966 bs->on_write_error = on_write_error;
1969 BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
1971 return is_read ? bs->on_read_error : bs->on_write_error;
1974 int bdrv_is_read_only(BlockDriverState *bs)
1976 return bs->read_only;
1979 int bdrv_is_sg(BlockDriverState *bs)
1984 int bdrv_enable_write_cache(BlockDriverState *bs)
1986 return bs->enable_write_cache;
1989 int bdrv_is_encrypted(BlockDriverState *bs)
1991 if (bs->backing_hd && bs->backing_hd->encrypted)
1993 return bs->encrypted;
1996 int bdrv_key_required(BlockDriverState *bs)
1998 BlockDriverState *backing_hd = bs->backing_hd;
2000 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
2002 return (bs->encrypted && !bs->valid_key);
2005 int bdrv_set_key(BlockDriverState *bs, const char *key)
2008 if (bs->backing_hd && bs->backing_hd->encrypted) {
2009 ret = bdrv_set_key(bs->backing_hd, key);
2015 if (!bs->encrypted) {
2017 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
2020 ret = bs->drv->bdrv_set_key(bs, key);
2023 } else if (!bs->valid_key) {
2025 /* call the change callback now, we skipped it on open */
2026 bdrv_dev_change_media_cb(bs, true);
2031 void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
2036 pstrcpy(buf, buf_size, bs->drv->format_name);
2040 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
2045 QLIST_FOREACH(drv, &bdrv_drivers, list) {
2046 it(opaque, drv->format_name);
2050 BlockDriverState *bdrv_find(const char *name)
2052 BlockDriverState *bs;
2054 QTAILQ_FOREACH(bs, &bdrv_states, list) {
2055 if (!strcmp(name, bs->device_name)) {
2062 BlockDriverState *bdrv_next(BlockDriverState *bs)
2065 return QTAILQ_FIRST(&bdrv_states);
2067 return QTAILQ_NEXT(bs, list);
2070 void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
2072 BlockDriverState *bs;
2074 QTAILQ_FOREACH(bs, &bdrv_states, list) {
2079 const char *bdrv_get_device_name(BlockDriverState *bs)
2081 return bs->device_name;
2084 void bdrv_flush_all(void)
2086 BlockDriverState *bs;
2088 QTAILQ_FOREACH(bs, &bdrv_states, list) {
2089 if (!bdrv_is_read_only(bs) && bdrv_is_inserted(bs)) {
2095 int bdrv_has_zero_init(BlockDriverState *bs)
2099 if (bs->drv->bdrv_has_zero_init) {
2100 return bs->drv->bdrv_has_zero_init(bs);
2106 typedef struct BdrvCoIsAllocatedData {
2107 BlockDriverState *bs;
2113 } BdrvCoIsAllocatedData;
2116 * Returns true iff the specified sector is present in the disk image. Drivers
2117 * not implementing the functionality are assumed to not support backing files,
2118 * hence all their sectors are reported as allocated.
2120 * 'pnum' is set to the number of sectors (including and immediately following
2121 * the specified sector) that are known to be in the same
2122 * allocated/unallocated state.
2124 * 'nb_sectors' is the max value 'pnum' should be set to.
2126 int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num,
2127 int nb_sectors, int *pnum)
2129 if (!bs->drv->bdrv_co_is_allocated) {
2131 if (sector_num >= bs->total_sectors) {
2135 n = bs->total_sectors - sector_num;
2136 *pnum = (n < nb_sectors) ? (n) : (nb_sectors);
2140 return bs->drv->bdrv_co_is_allocated(bs, sector_num, nb_sectors, pnum);
2143 /* Coroutine wrapper for bdrv_is_allocated() */
2144 static void coroutine_fn bdrv_is_allocated_co_entry(void *opaque)
2146 BdrvCoIsAllocatedData *data = opaque;
2147 BlockDriverState *bs = data->bs;
2149 data->ret = bdrv_co_is_allocated(bs, data->sector_num, data->nb_sectors,
2155 * Synchronous wrapper around bdrv_co_is_allocated().
2157 * See bdrv_co_is_allocated() for details.
2159 int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
2163 BdrvCoIsAllocatedData data = {
2165 .sector_num = sector_num,
2166 .nb_sectors = nb_sectors,
2171 co = qemu_coroutine_create(bdrv_is_allocated_co_entry);
2172 qemu_coroutine_enter(co, &data);
2173 while (!data.done) {
2179 void bdrv_mon_event(const BlockDriverState *bdrv,
2180 BlockMonEventAction action, int is_read)
2183 const char *action_str;
2186 case BDRV_ACTION_REPORT:
2187 action_str = "report";
2189 case BDRV_ACTION_IGNORE:
2190 action_str = "ignore";
2192 case BDRV_ACTION_STOP:
2193 action_str = "stop";
2199 data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
2202 is_read ? "read" : "write");
2203 monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
2205 qobject_decref(data);
2208 BlockInfoList *qmp_query_block(Error **errp)
2210 BlockInfoList *head = NULL, *cur_item = NULL;
2211 BlockDriverState *bs;
2213 QTAILQ_FOREACH(bs, &bdrv_states, list) {
2214 BlockInfoList *info = g_malloc0(sizeof(*info));
2216 info->value = g_malloc0(sizeof(*info->value));
2217 info->value->device = g_strdup(bs->device_name);
2218 info->value->type = g_strdup("unknown");
2219 info->value->locked = bdrv_dev_is_medium_locked(bs);
2220 info->value->removable = bdrv_dev_has_removable_media(bs);
2222 if (bdrv_dev_has_removable_media(bs)) {
2223 info->value->has_tray_open = true;
2224 info->value->tray_open = bdrv_dev_is_tray_open(bs);
2227 if (bdrv_iostatus_is_enabled(bs)) {
2228 info->value->has_io_status = true;
2229 info->value->io_status = bs->iostatus;
2233 info->value->has_inserted = true;
2234 info->value->inserted = g_malloc0(sizeof(*info->value->inserted));
2235 info->value->inserted->file = g_strdup(bs->filename);
2236 info->value->inserted->ro = bs->read_only;
2237 info->value->inserted->drv = g_strdup(bs->drv->format_name);
2238 info->value->inserted->encrypted = bs->encrypted;
2239 if (bs->backing_file[0]) {
2240 info->value->inserted->has_backing_file = true;
2241 info->value->inserted->backing_file = g_strdup(bs->backing_file);
2244 if (bs->io_limits_enabled) {
2245 info->value->inserted->bps =
2246 bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
2247 info->value->inserted->bps_rd =
2248 bs->io_limits.bps[BLOCK_IO_LIMIT_READ];
2249 info->value->inserted->bps_wr =
2250 bs->io_limits.bps[BLOCK_IO_LIMIT_WRITE];
2251 info->value->inserted->iops =
2252 bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
2253 info->value->inserted->iops_rd =
2254 bs->io_limits.iops[BLOCK_IO_LIMIT_READ];
2255 info->value->inserted->iops_wr =
2256 bs->io_limits.iops[BLOCK_IO_LIMIT_WRITE];
2260 /* XXX: waiting for the qapi to support GSList */
2262 head = cur_item = info;
2264 cur_item->next = info;
2272 /* Consider exposing this as a full fledged QMP command */
2273 static BlockStats *qmp_query_blockstat(const BlockDriverState *bs, Error **errp)
2277 s = g_malloc0(sizeof(*s));
2279 if (bs->device_name[0]) {
2280 s->has_device = true;
2281 s->device = g_strdup(bs->device_name);
2284 s->stats = g_malloc0(sizeof(*s->stats));
2285 s->stats->rd_bytes = bs->nr_bytes[BDRV_ACCT_READ];
2286 s->stats->wr_bytes = bs->nr_bytes[BDRV_ACCT_WRITE];
2287 s->stats->rd_operations = bs->nr_ops[BDRV_ACCT_READ];
2288 s->stats->wr_operations = bs->nr_ops[BDRV_ACCT_WRITE];
2289 s->stats->wr_highest_offset = bs->wr_highest_sector * BDRV_SECTOR_SIZE;
2290 s->stats->flush_operations = bs->nr_ops[BDRV_ACCT_FLUSH];
2291 s->stats->wr_total_time_ns = bs->total_time_ns[BDRV_ACCT_WRITE];
2292 s->stats->rd_total_time_ns = bs->total_time_ns[BDRV_ACCT_READ];
2293 s->stats->flush_total_time_ns = bs->total_time_ns[BDRV_ACCT_FLUSH];
2296 s->has_parent = true;
2297 s->parent = qmp_query_blockstat(bs->file, NULL);
2303 BlockStatsList *qmp_query_blockstats(Error **errp)
2305 BlockStatsList *head = NULL, *cur_item = NULL;
2306 BlockDriverState *bs;
2308 QTAILQ_FOREACH(bs, &bdrv_states, list) {
2309 BlockStatsList *info = g_malloc0(sizeof(*info));
2310 info->value = qmp_query_blockstat(bs, NULL);
2312 /* XXX: waiting for the qapi to support GSList */
2314 head = cur_item = info;
2316 cur_item->next = info;
2324 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2326 if (bs->backing_hd && bs->backing_hd->encrypted)
2327 return bs->backing_file;
2328 else if (bs->encrypted)
2329 return bs->filename;
2334 void bdrv_get_backing_filename(BlockDriverState *bs,
2335 char *filename, int filename_size)
2337 pstrcpy(filename, filename_size, bs->backing_file);
2340 int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
2341 const uint8_t *buf, int nb_sectors)
2343 BlockDriver *drv = bs->drv;
2346 if (!drv->bdrv_write_compressed)
2348 if (bdrv_check_request(bs, sector_num, nb_sectors))
2351 if (bs->dirty_bitmap) {
2352 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
2355 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
2358 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2360 BlockDriver *drv = bs->drv;
2363 if (!drv->bdrv_get_info)
2365 memset(bdi, 0, sizeof(*bdi));
2366 return drv->bdrv_get_info(bs, bdi);
2369 int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2370 int64_t pos, int size)
2372 BlockDriver *drv = bs->drv;
2375 if (drv->bdrv_save_vmstate)
2376 return drv->bdrv_save_vmstate(bs, buf, pos, size);
2378 return bdrv_save_vmstate(bs->file, buf, pos, size);
2382 int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2383 int64_t pos, int size)
2385 BlockDriver *drv = bs->drv;
2388 if (drv->bdrv_load_vmstate)
2389 return drv->bdrv_load_vmstate(bs, buf, pos, size);
2391 return bdrv_load_vmstate(bs->file, buf, pos, size);
2395 void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2397 BlockDriver *drv = bs->drv;
2399 if (!drv || !drv->bdrv_debug_event) {
2403 return drv->bdrv_debug_event(bs, event);
2407 /**************************************************************/
2408 /* handling of snapshots */
2410 int bdrv_can_snapshot(BlockDriverState *bs)
2412 BlockDriver *drv = bs->drv;
2413 if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
2417 if (!drv->bdrv_snapshot_create) {
2418 if (bs->file != NULL) {
2419 return bdrv_can_snapshot(bs->file);
2427 int bdrv_is_snapshot(BlockDriverState *bs)
2429 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2432 BlockDriverState *bdrv_snapshots(void)
2434 BlockDriverState *bs;
2437 return bs_snapshots;
2441 while ((bs = bdrv_next(bs))) {
2442 if (bdrv_can_snapshot(bs)) {
2450 int bdrv_snapshot_create(BlockDriverState *bs,
2451 QEMUSnapshotInfo *sn_info)
2453 BlockDriver *drv = bs->drv;
2456 if (drv->bdrv_snapshot_create)
2457 return drv->bdrv_snapshot_create(bs, sn_info);
2459 return bdrv_snapshot_create(bs->file, sn_info);
2463 int bdrv_snapshot_goto(BlockDriverState *bs,
2464 const char *snapshot_id)
2466 BlockDriver *drv = bs->drv;
2471 if (drv->bdrv_snapshot_goto)
2472 return drv->bdrv_snapshot_goto(bs, snapshot_id);
2475 drv->bdrv_close(bs);
2476 ret = bdrv_snapshot_goto(bs->file, snapshot_id);
2477 open_ret = drv->bdrv_open(bs, bs->open_flags);
2479 bdrv_delete(bs->file);
2489 int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
2491 BlockDriver *drv = bs->drv;
2494 if (drv->bdrv_snapshot_delete)
2495 return drv->bdrv_snapshot_delete(bs, snapshot_id);
2497 return bdrv_snapshot_delete(bs->file, snapshot_id);
2501 int bdrv_snapshot_list(BlockDriverState *bs,
2502 QEMUSnapshotInfo **psn_info)
2504 BlockDriver *drv = bs->drv;
2507 if (drv->bdrv_snapshot_list)
2508 return drv->bdrv_snapshot_list(bs, psn_info);
2510 return bdrv_snapshot_list(bs->file, psn_info);
2514 int bdrv_snapshot_load_tmp(BlockDriverState *bs,
2515 const char *snapshot_name)
2517 BlockDriver *drv = bs->drv;
2521 if (!bs->read_only) {
2524 if (drv->bdrv_snapshot_load_tmp) {
2525 return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
2530 #define NB_SUFFIXES 4
2532 char *get_human_readable_size(char *buf, int buf_size, int64_t size)
2534 static const char suffixes[NB_SUFFIXES] = "KMGT";
2539 snprintf(buf, buf_size, "%" PRId64, size);
2542 for(i = 0; i < NB_SUFFIXES; i++) {
2543 if (size < (10 * base)) {
2544 snprintf(buf, buf_size, "%0.1f%c",
2545 (double)size / base,
2548 } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
2549 snprintf(buf, buf_size, "%" PRId64 "%c",
2550 ((size + (base >> 1)) / base),
2560 char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
2562 char buf1[128], date_buf[128], clock_buf[128];
2572 snprintf(buf, buf_size,
2573 "%-10s%-20s%7s%20s%15s",
2574 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2578 ptm = localtime(&ti);
2579 strftime(date_buf, sizeof(date_buf),
2580 "%Y-%m-%d %H:%M:%S", ptm);
2582 localtime_r(&ti, &tm);
2583 strftime(date_buf, sizeof(date_buf),
2584 "%Y-%m-%d %H:%M:%S", &tm);
2586 secs = sn->vm_clock_nsec / 1000000000;
2587 snprintf(clock_buf, sizeof(clock_buf),
2588 "%02d:%02d:%02d.%03d",
2590 (int)((secs / 60) % 60),
2592 (int)((sn->vm_clock_nsec / 1000000) % 1000));
2593 snprintf(buf, buf_size,
2594 "%-10s%-20s%7s%20s%15s",
2595 sn->id_str, sn->name,
2596 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
2603 /**************************************************************/
2606 BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
2607 QEMUIOVector *qiov, int nb_sectors,
2608 BlockDriverCompletionFunc *cb, void *opaque)
2610 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
2612 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
2616 BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
2617 QEMUIOVector *qiov, int nb_sectors,
2618 BlockDriverCompletionFunc *cb, void *opaque)
2620 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
2622 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
2627 typedef struct MultiwriteCB {
2632 BlockDriverCompletionFunc *cb;
2634 QEMUIOVector *free_qiov;
2639 static void multiwrite_user_cb(MultiwriteCB *mcb)
2643 for (i = 0; i < mcb->num_callbacks; i++) {
2644 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
2645 if (mcb->callbacks[i].free_qiov) {
2646 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
2648 g_free(mcb->callbacks[i].free_qiov);
2649 qemu_vfree(mcb->callbacks[i].free_buf);
2653 static void multiwrite_cb(void *opaque, int ret)
2655 MultiwriteCB *mcb = opaque;
2657 trace_multiwrite_cb(mcb, ret);
2659 if (ret < 0 && !mcb->error) {
2663 mcb->num_requests--;
2664 if (mcb->num_requests == 0) {
2665 multiwrite_user_cb(mcb);
2670 static int multiwrite_req_compare(const void *a, const void *b)
2672 const BlockRequest *req1 = a, *req2 = b;
2675 * Note that we can't simply subtract req2->sector from req1->sector
2676 * here as that could overflow the return value.
2678 if (req1->sector > req2->sector) {
2680 } else if (req1->sector < req2->sector) {
2688 * Takes a bunch of requests and tries to merge them. Returns the number of
2689 * requests that remain after merging.
2691 static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
2692 int num_reqs, MultiwriteCB *mcb)
2696 // Sort requests by start sector
2697 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
2699 // Check if adjacent requests touch the same clusters. If so, combine them,
2700 // filling up gaps with zero sectors.
2702 for (i = 1; i < num_reqs; i++) {
2704 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
2706 // This handles the cases that are valid for all block drivers, namely
2707 // exactly sequential writes and overlapping writes.
2708 if (reqs[i].sector <= oldreq_last) {
2712 // The block driver may decide that it makes sense to combine requests
2713 // even if there is a gap of some sectors between them. In this case,
2714 // the gap is filled with zeros (therefore only applicable for yet
2715 // unused space in format like qcow2).
2716 if (!merge && bs->drv->bdrv_merge_requests) {
2717 merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]);
2720 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
2726 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
2727 qemu_iovec_init(qiov,
2728 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
2730 // Add the first request to the merged one. If the requests are
2731 // overlapping, drop the last sectors of the first request.
2732 size = (reqs[i].sector - reqs[outidx].sector) << 9;
2733 qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
2735 // We might need to add some zeros between the two requests
2736 if (reqs[i].sector > oldreq_last) {
2737 size_t zero_bytes = (reqs[i].sector - oldreq_last) << 9;
2738 uint8_t *buf = qemu_blockalign(bs, zero_bytes);
2739 memset(buf, 0, zero_bytes);
2740 qemu_iovec_add(qiov, buf, zero_bytes);
2741 mcb->callbacks[i].free_buf = buf;
2744 // Add the second request
2745 qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
2747 reqs[outidx].nb_sectors = qiov->size >> 9;
2748 reqs[outidx].qiov = qiov;
2750 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
2753 reqs[outidx].sector = reqs[i].sector;
2754 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
2755 reqs[outidx].qiov = reqs[i].qiov;
2763 * Submit multiple AIO write requests at once.
2765 * On success, the function returns 0 and all requests in the reqs array have
2766 * been submitted. In error case this function returns -1, and any of the
2767 * requests may or may not be submitted yet. In particular, this means that the
2768 * callback will be called for some of the requests, for others it won't. The
2769 * caller must check the error field of the BlockRequest to wait for the right
2770 * callbacks (if error != 0, no callback will be called).
2772 * The implementation may modify the contents of the reqs array, e.g. to merge
2773 * requests. However, the fields opaque and error are left unmodified as they
2774 * are used to signal failure for a single request to the caller.
2776 int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
2778 BlockDriverAIOCB *acb;
2782 /* don't submit writes if we don't have a medium */
2783 if (bs->drv == NULL) {
2784 for (i = 0; i < num_reqs; i++) {
2785 reqs[i].error = -ENOMEDIUM;
2790 if (num_reqs == 0) {
2794 // Create MultiwriteCB structure
2795 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
2796 mcb->num_requests = 0;
2797 mcb->num_callbacks = num_reqs;
2799 for (i = 0; i < num_reqs; i++) {
2800 mcb->callbacks[i].cb = reqs[i].cb;
2801 mcb->callbacks[i].opaque = reqs[i].opaque;
2804 // Check for mergable requests
2805 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
2807 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
2810 * Run the aio requests. As soon as one request can't be submitted
2811 * successfully, fail all requests that are not yet submitted (we must
2812 * return failure for all requests anyway)
2814 * num_requests cannot be set to the right value immediately: If
2815 * bdrv_aio_writev fails for some request, num_requests would be too high
2816 * and therefore multiwrite_cb() would never recognize the multiwrite
2817 * request as completed. We also cannot use the loop variable i to set it
2818 * when the first request fails because the callback may already have been
2819 * called for previously submitted requests. Thus, num_requests must be
2820 * incremented for each request that is submitted.
2822 * The problem that callbacks may be called early also means that we need
2823 * to take care that num_requests doesn't become 0 before all requests are
2824 * submitted - multiwrite_cb() would consider the multiwrite request
2825 * completed. A dummy request that is "completed" by a manual call to
2826 * multiwrite_cb() takes care of this.
2828 mcb->num_requests = 1;
2830 // Run the aio requests
2831 for (i = 0; i < num_reqs; i++) {
2832 mcb->num_requests++;
2833 acb = bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
2834 reqs[i].nb_sectors, multiwrite_cb, mcb);
2837 // We can only fail the whole thing if no request has been
2838 // submitted yet. Otherwise we'll wait for the submitted AIOs to
2839 // complete and report the error in the callback.
2841 trace_bdrv_aio_multiwrite_earlyfail(mcb);
2844 trace_bdrv_aio_multiwrite_latefail(mcb, i);
2845 multiwrite_cb(mcb, -EIO);
2851 /* Complete the dummy request */
2852 multiwrite_cb(mcb, 0);
2857 for (i = 0; i < mcb->num_callbacks; i++) {
2858 reqs[i].error = -EIO;
2864 void bdrv_aio_cancel(BlockDriverAIOCB *acb)
2866 acb->pool->cancel(acb);
2869 /* block I/O throttling */
2870 static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
2871 bool is_write, double elapsed_time, uint64_t *wait)
2873 uint64_t bps_limit = 0;
2874 double bytes_limit, bytes_base, bytes_res;
2875 double slice_time, wait_time;
2877 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
2878 bps_limit = bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
2879 } else if (bs->io_limits.bps[is_write]) {
2880 bps_limit = bs->io_limits.bps[is_write];
2889 slice_time = bs->slice_end - bs->slice_start;
2890 slice_time /= (NANOSECONDS_PER_SECOND);
2891 bytes_limit = bps_limit * slice_time;
2892 bytes_base = bs->nr_bytes[is_write] - bs->io_base.bytes[is_write];
2893 if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
2894 bytes_base += bs->nr_bytes[!is_write] - bs->io_base.bytes[!is_write];
2897 /* bytes_base: the bytes of data which have been read/written; and
2898 * it is obtained from the history statistic info.
2899 * bytes_res: the remaining bytes of data which need to be read/written.
2900 * (bytes_base + bytes_res) / bps_limit: used to calcuate
2901 * the total time for completing reading/writting all data.
2903 bytes_res = (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
2905 if (bytes_base + bytes_res <= bytes_limit) {
2913 /* Calc approx time to dispatch */
2914 wait_time = (bytes_base + bytes_res) / bps_limit - elapsed_time;
2916 /* When the I/O rate at runtime exceeds the limits,
2917 * bs->slice_end need to be extended in order that the current statistic
2918 * info can be kept until the timer fire, so it is increased and tuned
2919 * based on the result of experiment.
2921 bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
2922 bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
2924 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
2930 static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
2931 double elapsed_time, uint64_t *wait)
2933 uint64_t iops_limit = 0;
2934 double ios_limit, ios_base;
2935 double slice_time, wait_time;
2937 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
2938 iops_limit = bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
2939 } else if (bs->io_limits.iops[is_write]) {
2940 iops_limit = bs->io_limits.iops[is_write];
2949 slice_time = bs->slice_end - bs->slice_start;
2950 slice_time /= (NANOSECONDS_PER_SECOND);
2951 ios_limit = iops_limit * slice_time;
2952 ios_base = bs->nr_ops[is_write] - bs->io_base.ios[is_write];
2953 if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
2954 ios_base += bs->nr_ops[!is_write] - bs->io_base.ios[!is_write];
2957 if (ios_base + 1 <= ios_limit) {
2965 /* Calc approx time to dispatch */
2966 wait_time = (ios_base + 1) / iops_limit;
2967 if (wait_time > elapsed_time) {
2968 wait_time = wait_time - elapsed_time;
2973 bs->slice_time = wait_time * BLOCK_IO_SLICE_TIME * 10;
2974 bs->slice_end += bs->slice_time - 3 * BLOCK_IO_SLICE_TIME;
2976 *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
2982 static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
2983 bool is_write, int64_t *wait)
2985 int64_t now, max_wait;
2986 uint64_t bps_wait = 0, iops_wait = 0;
2987 double elapsed_time;
2988 int bps_ret, iops_ret;
2990 now = qemu_get_clock_ns(vm_clock);
2991 if ((bs->slice_start < now)
2992 && (bs->slice_end > now)) {
2993 bs->slice_end = now + bs->slice_time;
2995 bs->slice_time = 5 * BLOCK_IO_SLICE_TIME;
2996 bs->slice_start = now;
2997 bs->slice_end = now + bs->slice_time;
2999 bs->io_base.bytes[is_write] = bs->nr_bytes[is_write];
3000 bs->io_base.bytes[!is_write] = bs->nr_bytes[!is_write];
3002 bs->io_base.ios[is_write] = bs->nr_ops[is_write];
3003 bs->io_base.ios[!is_write] = bs->nr_ops[!is_write];
3006 elapsed_time = now - bs->slice_start;
3007 elapsed_time /= (NANOSECONDS_PER_SECOND);
3009 bps_ret = bdrv_exceed_bps_limits(bs, nb_sectors,
3010 is_write, elapsed_time, &bps_wait);
3011 iops_ret = bdrv_exceed_iops_limits(bs, is_write,
3012 elapsed_time, &iops_wait);
3013 if (bps_ret || iops_ret) {
3014 max_wait = bps_wait > iops_wait ? bps_wait : iops_wait;
3019 now = qemu_get_clock_ns(vm_clock);
3020 if (bs->slice_end < now + max_wait) {
3021 bs->slice_end = now + max_wait;
3034 /**************************************************************/
3035 /* async block device emulation */
3037 typedef struct BlockDriverAIOCBSync {
3038 BlockDriverAIOCB common;
3041 /* vector translation state */
3045 } BlockDriverAIOCBSync;
3047 static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
3049 BlockDriverAIOCBSync *acb =
3050 container_of(blockacb, BlockDriverAIOCBSync, common);
3051 qemu_bh_delete(acb->bh);
3053 qemu_aio_release(acb);
3056 static AIOPool bdrv_em_aio_pool = {
3057 .aiocb_size = sizeof(BlockDriverAIOCBSync),
3058 .cancel = bdrv_aio_cancel_em,
3061 static void bdrv_aio_bh_cb(void *opaque)
3063 BlockDriverAIOCBSync *acb = opaque;
3066 qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
3067 qemu_vfree(acb->bounce);
3068 acb->common.cb(acb->common.opaque, acb->ret);
3069 qemu_bh_delete(acb->bh);
3071 qemu_aio_release(acb);
3074 static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
3078 BlockDriverCompletionFunc *cb,
3083 BlockDriverAIOCBSync *acb;
3085 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
3086 acb->is_write = is_write;
3088 acb->bounce = qemu_blockalign(bs, qiov->size);
3091 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
3094 qemu_iovec_to_buffer(acb->qiov, acb->bounce);
3095 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
3097 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
3100 qemu_bh_schedule(acb->bh);
3102 return &acb->common;
3105 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
3106 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
3107 BlockDriverCompletionFunc *cb, void *opaque)
3109 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
3112 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
3113 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
3114 BlockDriverCompletionFunc *cb, void *opaque)
3116 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
3120 typedef struct BlockDriverAIOCBCoroutine {
3121 BlockDriverAIOCB common;
3125 } BlockDriverAIOCBCoroutine;
3127 static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
3132 static AIOPool bdrv_em_co_aio_pool = {
3133 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
3134 .cancel = bdrv_aio_co_cancel_em,
3137 static void bdrv_co_em_bh(void *opaque)
3139 BlockDriverAIOCBCoroutine *acb = opaque;
3141 acb->common.cb(acb->common.opaque, acb->req.error);
3142 qemu_bh_delete(acb->bh);
3143 qemu_aio_release(acb);
3146 /* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
3147 static void coroutine_fn bdrv_co_do_rw(void *opaque)
3149 BlockDriverAIOCBCoroutine *acb = opaque;
3150 BlockDriverState *bs = acb->common.bs;
3152 if (!acb->is_write) {
3153 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
3154 acb->req.nb_sectors, acb->req.qiov);
3156 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
3157 acb->req.nb_sectors, acb->req.qiov);
3160 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
3161 qemu_bh_schedule(acb->bh);
3164 static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
3168 BlockDriverCompletionFunc *cb,
3173 BlockDriverAIOCBCoroutine *acb;
3175 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3176 acb->req.sector = sector_num;
3177 acb->req.nb_sectors = nb_sectors;
3178 acb->req.qiov = qiov;
3179 acb->is_write = is_write;
3181 co = qemu_coroutine_create(bdrv_co_do_rw);
3182 qemu_coroutine_enter(co, acb);
3184 return &acb->common;
3187 static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
3189 BlockDriverAIOCBCoroutine *acb = opaque;
3190 BlockDriverState *bs = acb->common.bs;
3192 acb->req.error = bdrv_co_flush(bs);
3193 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
3194 qemu_bh_schedule(acb->bh);
3197 BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
3198 BlockDriverCompletionFunc *cb, void *opaque)
3200 trace_bdrv_aio_flush(bs, opaque);
3203 BlockDriverAIOCBCoroutine *acb;
3205 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3206 co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
3207 qemu_coroutine_enter(co, acb);
3209 return &acb->common;
3212 static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
3214 BlockDriverAIOCBCoroutine *acb = opaque;
3215 BlockDriverState *bs = acb->common.bs;
3217 acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
3218 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
3219 qemu_bh_schedule(acb->bh);
3222 BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
3223 int64_t sector_num, int nb_sectors,
3224 BlockDriverCompletionFunc *cb, void *opaque)
3227 BlockDriverAIOCBCoroutine *acb;
3229 trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
3231 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
3232 acb->req.sector = sector_num;
3233 acb->req.nb_sectors = nb_sectors;
3234 co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
3235 qemu_coroutine_enter(co, acb);
3237 return &acb->common;
3240 void bdrv_init(void)
3242 module_call_init(MODULE_INIT_BLOCK);
3245 void bdrv_init_with_whitelist(void)
3247 use_bdrv_whitelist = 1;
3251 void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
3252 BlockDriverCompletionFunc *cb, void *opaque)
3254 BlockDriverAIOCB *acb;
3256 if (pool->free_aiocb) {
3257 acb = pool->free_aiocb;
3258 pool->free_aiocb = acb->next;
3260 acb = g_malloc0(pool->aiocb_size);
3265 acb->opaque = opaque;
3269 void qemu_aio_release(void *p)
3271 BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
3272 AIOPool *pool = acb->pool;
3273 acb->next = pool->free_aiocb;
3274 pool->free_aiocb = acb;
3277 /**************************************************************/
3278 /* Coroutine block device emulation */
3280 typedef struct CoroutineIOCompletion {
3281 Coroutine *coroutine;
3283 } CoroutineIOCompletion;
3285 static void bdrv_co_io_em_complete(void *opaque, int ret)
3287 CoroutineIOCompletion *co = opaque;
3290 qemu_coroutine_enter(co->coroutine, NULL);
3293 static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
3294 int nb_sectors, QEMUIOVector *iov,
3297 CoroutineIOCompletion co = {
3298 .coroutine = qemu_coroutine_self(),
3300 BlockDriverAIOCB *acb;
3303 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
3304 bdrv_co_io_em_complete, &co);
3306 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
3307 bdrv_co_io_em_complete, &co);
3310 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
3314 qemu_coroutine_yield();
3319 static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
3320 int64_t sector_num, int nb_sectors,
3323 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
3326 static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
3327 int64_t sector_num, int nb_sectors,
3330 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
3333 static void coroutine_fn bdrv_flush_co_entry(void *opaque)
3335 RwCo *rwco = opaque;
3337 rwco->ret = bdrv_co_flush(rwco->bs);
3340 int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
3348 /* Write back cached data to the OS even with cache=unsafe */
3349 if (bs->drv->bdrv_co_flush_to_os) {
3350 ret = bs->drv->bdrv_co_flush_to_os(bs);
3356 /* But don't actually force it to the disk with cache=unsafe */
3357 if (bs->open_flags & BDRV_O_NO_FLUSH) {
3361 if (bs->drv->bdrv_co_flush_to_disk) {
3362 return bs->drv->bdrv_co_flush_to_disk(bs);
3363 } else if (bs->drv->bdrv_aio_flush) {
3364 BlockDriverAIOCB *acb;
3365 CoroutineIOCompletion co = {
3366 .coroutine = qemu_coroutine_self(),
3369 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
3373 qemu_coroutine_yield();
3378 * Some block drivers always operate in either writethrough or unsafe
3379 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
3380 * know how the server works (because the behaviour is hardcoded or
3381 * depends on server-side configuration), so we can't ensure that
3382 * everything is safe on disk. Returning an error doesn't work because
3383 * that would break guests even if the server operates in writethrough
3386 * Let's hope the user knows what he's doing.
3392 void bdrv_invalidate_cache(BlockDriverState *bs)
3394 if (bs->drv && bs->drv->bdrv_invalidate_cache) {
3395 bs->drv->bdrv_invalidate_cache(bs);
3399 void bdrv_invalidate_cache_all(void)
3401 BlockDriverState *bs;
3403 QTAILQ_FOREACH(bs, &bdrv_states, list) {
3404 bdrv_invalidate_cache(bs);
3408 int bdrv_flush(BlockDriverState *bs)
3416 if (qemu_in_coroutine()) {
3417 /* Fast-path if already in coroutine context */
3418 bdrv_flush_co_entry(&rwco);
3420 co = qemu_coroutine_create(bdrv_flush_co_entry);
3421 qemu_coroutine_enter(co, &rwco);
3422 while (rwco.ret == NOT_DONE) {
3430 static void coroutine_fn bdrv_discard_co_entry(void *opaque)
3432 RwCo *rwco = opaque;
3434 rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
3437 int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
3442 } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
3444 } else if (bs->read_only) {
3446 } else if (bs->drv->bdrv_co_discard) {
3447 return bs->drv->bdrv_co_discard(bs, sector_num, nb_sectors);
3448 } else if (bs->drv->bdrv_aio_discard) {
3449 BlockDriverAIOCB *acb;
3450 CoroutineIOCompletion co = {
3451 .coroutine = qemu_coroutine_self(),
3454 acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
3455 bdrv_co_io_em_complete, &co);
3459 qemu_coroutine_yield();
3467 int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
3472 .sector_num = sector_num,
3473 .nb_sectors = nb_sectors,
3477 if (qemu_in_coroutine()) {
3478 /* Fast-path if already in coroutine context */
3479 bdrv_discard_co_entry(&rwco);
3481 co = qemu_coroutine_create(bdrv_discard_co_entry);
3482 qemu_coroutine_enter(co, &rwco);
3483 while (rwco.ret == NOT_DONE) {
3491 /**************************************************************/
3492 /* removable device support */
3495 * Return TRUE if the media is present
3497 int bdrv_is_inserted(BlockDriverState *bs)
3499 BlockDriver *drv = bs->drv;
3503 if (!drv->bdrv_is_inserted)
3505 return drv->bdrv_is_inserted(bs);
3509 * Return whether the media changed since the last call to this
3510 * function, or -ENOTSUP if we don't know. Most drivers don't know.
3512 int bdrv_media_changed(BlockDriverState *bs)
3514 BlockDriver *drv = bs->drv;
3516 if (drv && drv->bdrv_media_changed) {
3517 return drv->bdrv_media_changed(bs);
3523 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3525 void bdrv_eject(BlockDriverState *bs, int eject_flag)
3527 BlockDriver *drv = bs->drv;
3529 if (drv && drv->bdrv_eject) {
3530 drv->bdrv_eject(bs, eject_flag);
3535 * Lock or unlock the media (if it is locked, the user won't be able
3536 * to eject it manually).
3538 void bdrv_lock_medium(BlockDriverState *bs, bool locked)
3540 BlockDriver *drv = bs->drv;
3542 trace_bdrv_lock_medium(bs, locked);
3544 if (drv && drv->bdrv_lock_medium) {
3545 drv->bdrv_lock_medium(bs, locked);
3549 /* needed for generic scsi interface */
3551 int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
3553 BlockDriver *drv = bs->drv;
3555 if (drv && drv->bdrv_ioctl)
3556 return drv->bdrv_ioctl(bs, req, buf);
3560 BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
3561 unsigned long int req, void *buf,
3562 BlockDriverCompletionFunc *cb, void *opaque)
3564 BlockDriver *drv = bs->drv;
3566 if (drv && drv->bdrv_aio_ioctl)
3567 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
3571 void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
3573 bs->buffer_alignment = align;
3576 void *qemu_blockalign(BlockDriverState *bs, size_t size)
3578 return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
3581 void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
3583 int64_t bitmap_size;
3585 bs->dirty_count = 0;
3587 if (!bs->dirty_bitmap) {
3588 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
3589 BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
3590 bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
3592 bs->dirty_bitmap = g_malloc0(bitmap_size);
3595 if (bs->dirty_bitmap) {
3596 g_free(bs->dirty_bitmap);
3597 bs->dirty_bitmap = NULL;
3602 int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
3604 int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
3606 if (bs->dirty_bitmap &&
3607 (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
3608 return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
3609 (1UL << (chunk % (sizeof(unsigned long) * 8))));
3615 void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3618 set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
3621 int64_t bdrv_get_dirty_count(BlockDriverState *bs)
3623 return bs->dirty_count;
3626 void bdrv_set_in_use(BlockDriverState *bs, int in_use)
3628 assert(bs->in_use != in_use);
3629 bs->in_use = in_use;
3632 int bdrv_in_use(BlockDriverState *bs)
3637 void bdrv_iostatus_enable(BlockDriverState *bs)
3639 bs->iostatus_enabled = true;
3640 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
3643 /* The I/O status is only enabled if the drive explicitly
3644 * enables it _and_ the VM is configured to stop on errors */
3645 bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3647 return (bs->iostatus_enabled &&
3648 (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC ||
3649 bs->on_write_error == BLOCK_ERR_STOP_ANY ||
3650 bs->on_read_error == BLOCK_ERR_STOP_ANY));
3653 void bdrv_iostatus_disable(BlockDriverState *bs)
3655 bs->iostatus_enabled = false;
3658 void bdrv_iostatus_reset(BlockDriverState *bs)
3660 if (bdrv_iostatus_is_enabled(bs)) {
3661 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
3665 /* XXX: Today this is set by device models because it makes the implementation
3666 quite simple. However, the block layer knows about the error, so it's
3667 possible to implement this without device models being involved */
3668 void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3670 if (bdrv_iostatus_is_enabled(bs) &&
3671 bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
3673 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
3674 BLOCK_DEVICE_IO_STATUS_FAILED;
3679 bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
3680 enum BlockAcctType type)
3682 assert(type < BDRV_MAX_IOTYPE);
3684 cookie->bytes = bytes;
3685 cookie->start_time_ns = get_clock();
3686 cookie->type = type;
3690 bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
3692 assert(cookie->type < BDRV_MAX_IOTYPE);
3694 bs->nr_bytes[cookie->type] += cookie->bytes;
3695 bs->nr_ops[cookie->type]++;
3696 bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
3699 int bdrv_img_create(const char *filename, const char *fmt,
3700 const char *base_filename, const char *base_fmt,
3701 char *options, uint64_t img_size, int flags)
3703 QEMUOptionParameter *param = NULL, *create_options = NULL;
3704 QEMUOptionParameter *backing_fmt, *backing_file, *size;
3705 BlockDriverState *bs = NULL;
3706 BlockDriver *drv, *proto_drv;
3707 BlockDriver *backing_drv = NULL;
3710 /* Find driver and parse its options */
3711 drv = bdrv_find_format(fmt);
3713 error_report("Unknown file format '%s'", fmt);
3718 proto_drv = bdrv_find_protocol(filename);
3720 error_report("Unknown protocol '%s'", filename);
3725 create_options = append_option_parameters(create_options,
3726 drv->create_options);
3727 create_options = append_option_parameters(create_options,
3728 proto_drv->create_options);
3730 /* Create parameter list with default values */
3731 param = parse_option_parameters("", create_options, param);
3733 set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
3735 /* Parse -o options */
3737 param = parse_option_parameters(options, create_options, param);
3738 if (param == NULL) {
3739 error_report("Invalid options for file format '%s'.", fmt);
3745 if (base_filename) {
3746 if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
3748 error_report("Backing file not supported for file format '%s'",
3756 if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
3757 error_report("Backing file format not supported for file "
3758 "format '%s'", fmt);
3764 backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
3765 if (backing_file && backing_file->value.s) {
3766 if (!strcmp(filename, backing_file->value.s)) {
3767 error_report("Error: Trying to create an image with the "
3768 "same filename as the backing file");
3774 backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
3775 if (backing_fmt && backing_fmt->value.s) {
3776 backing_drv = bdrv_find_format(backing_fmt->value.s);
3778 error_report("Unknown backing file format '%s'",
3779 backing_fmt->value.s);
3785 // The size for the image must always be specified, with one exception:
3786 // If we are using a backing file, we can obtain the size from there
3787 size = get_option_parameter(param, BLOCK_OPT_SIZE);
3788 if (size && size->value.n == -1) {
3789 if (backing_file && backing_file->value.s) {
3795 ret = bdrv_open(bs, backing_file->value.s, flags, backing_drv);
3797 error_report("Could not open '%s'", backing_file->value.s);
3800 bdrv_get_geometry(bs, &size);
3803 snprintf(buf, sizeof(buf), "%" PRId64, size);
3804 set_option_parameter(param, BLOCK_OPT_SIZE, buf);
3806 error_report("Image creation needs a size parameter");
3812 printf("Formatting '%s', fmt=%s ", filename, fmt);
3813 print_option_parameters(param);
3816 ret = bdrv_create(drv, filename, param);
3819 if (ret == -ENOTSUP) {
3820 error_report("Formatting or formatting option not supported for "
3821 "file format '%s'", fmt);
3822 } else if (ret == -EFBIG) {
3823 error_report("The image size is too large for file format '%s'",
3826 error_report("%s: error while creating %s: %s", filename, fmt,
3832 free_option_parameters(create_options);
3833 free_option_parameters(param);