X-Git-Url: https://repo.jachan.dev/qemu.git/blobdiff_plain/592fdd02ae987a439a2ba25a2a973673f1484805..259434b8067e1c61017e9a5b8667b6526b474ff2:/block.c diff --git a/block.c b/block.c index b8df11d647..d088ee02ff 100644 --- a/block.c +++ b/block.c @@ -26,17 +26,19 @@ #include "trace.h" #include "block/block_int.h" #include "block/blockjob.h" +#include "qemu/error-report.h" #include "qemu/module.h" +#include "qapi/qmp/qerror.h" #include "qapi/qmp/qjson.h" #include "sysemu/block-backend.h" #include "sysemu/sysemu.h" -#include "sysemu/qtest.h" #include "qemu/notify.h" #include "block/coroutine.h" #include "block/qapi.h" #include "qmp-commands.h" #include "qemu/timer.h" #include "qapi-event.h" +#include "block/throttle-groups.h" #ifdef CONFIG_BSD #include @@ -52,44 +54,25 @@ #include #endif +/** + * A BdrvDirtyBitmap can be in three possible states: + * (1) successor is NULL and disabled is false: full r/w mode + * (2) successor is NULL and disabled is true: read only mode ("disabled") + * (3) successor is set: frozen mode. + * A frozen bitmap cannot be renamed, deleted, anonymized, cleared, set, + * or enabled. A frozen bitmap can only abdicate() or reclaim(). + */ struct BdrvDirtyBitmap { - HBitmap *bitmap; - char *name; + HBitmap *bitmap; /* Dirty sector bitmap implementation */ + BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */ + char *name; /* Optional non-empty unique ID */ + int64_t size; /* Size of the bitmap (Number of sectors) */ + bool disabled; /* Bitmap is read-only */ QLIST_ENTRY(BdrvDirtyBitmap) list; }; #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */ -static BlockAIOCB *bdrv_aio_readv_em(BlockDriverState *bs, - int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, - BlockCompletionFunc *cb, void *opaque); -static BlockAIOCB *bdrv_aio_writev_em(BlockDriverState *bs, - int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, - BlockCompletionFunc *cb, void *opaque); -static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, - QEMUIOVector *iov); -static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, - QEMUIOVector *iov); -static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs, - int64_t offset, unsigned int bytes, QEMUIOVector *qiov, - BdrvRequestFlags flags); -static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs, - int64_t offset, unsigned int bytes, QEMUIOVector *qiov, - BdrvRequestFlags flags); -static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs, - int64_t sector_num, - QEMUIOVector *qiov, - int nb_sectors, - BdrvRequestFlags flags, - BlockCompletionFunc *cb, - void *opaque, - bool is_write); -static void coroutine_fn bdrv_co_do_rw(void *opaque); -static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, BdrvRequestFlags flags); - static QTAILQ_HEAD(, BlockDriverState) bdrv_states = QTAILQ_HEAD_INITIALIZER(bdrv_states); @@ -99,10 +82,13 @@ static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states = static QLIST_HEAD(, BlockDriver) bdrv_drivers = QLIST_HEAD_INITIALIZER(bdrv_drivers); -static void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, - int nr_sectors); -static void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, - int nr_sectors); +static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename, + const char *reference, QDict *options, int flags, + BlockDriverState *parent, + const BdrvChildRole *child_role, + BlockDriver *drv, Error **errp); + +static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs); /* If non-zero, use only whitelisted block drivers */ static int use_bdrv_whitelist; @@ -126,118 +112,24 @@ int is_windows_drive(const char *filename) } #endif -/* throttling disk I/O limits */ -void bdrv_set_io_limits(BlockDriverState *bs, - ThrottleConfig *cfg) -{ - int i; - - throttle_config(&bs->throttle_state, cfg); - - for (i = 0; i < 2; i++) { - qemu_co_enter_next(&bs->throttled_reqs[i]); - } -} - -/* this function drain all the throttled IOs */ -static bool bdrv_start_throttled_reqs(BlockDriverState *bs) -{ - bool drained = false; - bool enabled = bs->io_limits_enabled; - int i; - - bs->io_limits_enabled = false; - - for (i = 0; i < 2; i++) { - while (qemu_co_enter_next(&bs->throttled_reqs[i])) { - drained = true; - } - } - - bs->io_limits_enabled = enabled; - - return drained; -} - -void bdrv_io_limits_disable(BlockDriverState *bs) -{ - bs->io_limits_enabled = false; - - bdrv_start_throttled_reqs(bs); - - throttle_destroy(&bs->throttle_state); -} - -static void bdrv_throttle_read_timer_cb(void *opaque) -{ - BlockDriverState *bs = opaque; - qemu_co_enter_next(&bs->throttled_reqs[0]); -} - -static void bdrv_throttle_write_timer_cb(void *opaque) -{ - BlockDriverState *bs = opaque; - qemu_co_enter_next(&bs->throttled_reqs[1]); -} - -/* should be called before bdrv_set_io_limits if a limit is set */ -void bdrv_io_limits_enable(BlockDriverState *bs) -{ - int clock_type = QEMU_CLOCK_REALTIME; - - if (qtest_enabled()) { - /* For testing block IO throttling only */ - clock_type = QEMU_CLOCK_VIRTUAL; - } - assert(!bs->io_limits_enabled); - throttle_init(&bs->throttle_state, - bdrv_get_aio_context(bs), - clock_type, - bdrv_throttle_read_timer_cb, - bdrv_throttle_write_timer_cb, - bs); - bs->io_limits_enabled = true; -} - -/* This function makes an IO wait if needed - * - * @nb_sectors: the number of sectors of the IO - * @is_write: is the IO a write - */ -static void bdrv_io_limits_intercept(BlockDriverState *bs, - unsigned int bytes, - bool is_write) +size_t bdrv_opt_mem_align(BlockDriverState *bs) { - /* does this io must wait */ - bool must_wait = throttle_schedule_timer(&bs->throttle_state, is_write); - - /* if must wait or any request of this type throttled queue the IO */ - if (must_wait || - !qemu_co_queue_empty(&bs->throttled_reqs[is_write])) { - qemu_co_queue_wait(&bs->throttled_reqs[is_write]); - } - - /* the IO will be executed, do the accounting */ - throttle_account(&bs->throttle_state, is_write, bytes); - - - /* if the next request must wait -> do nothing */ - if (throttle_schedule_timer(&bs->throttle_state, is_write)) { - return; + if (!bs || !bs->drv) { + /* page size or 4k (hdd sector size) should be on the safe side */ + return MAX(4096, getpagesize()); } - /* else queue next request for execution */ - qemu_co_queue_next(&bs->throttled_reqs[is_write]); + return bs->bl.opt_mem_alignment; } -size_t bdrv_opt_mem_align(BlockDriverState *bs) +size_t bdrv_min_mem_align(BlockDriverState *bs) { if (!bs || !bs->drv) { - /* 4k should be on the safe side */ - return 4096; + /* page size or 4k (hdd sector size) should be on the safe side */ + return MAX(4096, getpagesize()); } - return bs->bl.opt_mem_alignment; + return bs->bl.min_mem_alignment; } /* check if the path starts with ":" */ @@ -343,20 +235,7 @@ void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz, void bdrv_register(BlockDriver *bdrv) { - /* Block drivers without coroutine functions need emulation */ - if (!bdrv->bdrv_co_readv) { - bdrv->bdrv_co_readv = bdrv_co_readv_em; - bdrv->bdrv_co_writev = bdrv_co_writev_em; - - /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if - * the block driver lacks aio we need to emulate that too. - */ - if (!bdrv->bdrv_aio_readv) { - /* add AIO emulation layer */ - bdrv->bdrv_aio_readv = bdrv_aio_readv_em; - bdrv->bdrv_aio_writev = bdrv_aio_writev_em; - } - } + bdrv_setup_io_funcs(bdrv); QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list); } @@ -528,54 +407,6 @@ int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp) return ret; } -void bdrv_refresh_limits(BlockDriverState *bs, Error **errp) -{ - BlockDriver *drv = bs->drv; - Error *local_err = NULL; - - memset(&bs->bl, 0, sizeof(bs->bl)); - - if (!drv) { - return; - } - - /* Take some limits from the children as a default */ - if (bs->file) { - bdrv_refresh_limits(bs->file, &local_err); - if (local_err) { - error_propagate(errp, local_err); - return; - } - bs->bl.opt_transfer_length = bs->file->bl.opt_transfer_length; - bs->bl.max_transfer_length = bs->file->bl.max_transfer_length; - bs->bl.opt_mem_alignment = bs->file->bl.opt_mem_alignment; - } else { - bs->bl.opt_mem_alignment = 512; - } - - if (bs->backing_hd) { - bdrv_refresh_limits(bs->backing_hd, &local_err); - if (local_err) { - error_propagate(errp, local_err); - return; - } - bs->bl.opt_transfer_length = - MAX(bs->bl.opt_transfer_length, - bs->backing_hd->bl.opt_transfer_length); - bs->bl.max_transfer_length = - MIN_NON_ZERO(bs->bl.max_transfer_length, - bs->backing_hd->bl.max_transfer_length); - bs->bl.opt_mem_alignment = - MAX(bs->bl.opt_mem_alignment, - bs->backing_hd->bl.opt_mem_alignment); - } - - /* Then let the driver override it */ - if (drv->bdrv_refresh_limits) { - drv->bdrv_refresh_limits(bs, errp); - } -} - /** * Try to get @bs's logical and physical block size. * On success, store them in @bsz struct and return 0. @@ -754,7 +585,7 @@ static int find_image_format(BlockDriverState *bs, const char *filename, int ret = 0; /* Return the raw BlockDriver * to scsi-generic devices or empty drives */ - if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) { + if (bdrv_is_sg(bs) || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) { *pdrv = &bdrv_raw; return ret; } @@ -786,7 +617,7 @@ static int refresh_total_sectors(BlockDriverState *bs, int64_t hint) BlockDriver *drv = bs->drv; /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */ - if (bs->sg) + if (bdrv_is_sg(bs)) return 0; /* query actual device if possible, otherwise just trust the hint */ @@ -849,22 +680,6 @@ int bdrv_parse_cache_flags(const char *mode, int *flags) return 0; } -/** - * The copy-on-read flag is actually a reference count so multiple users may - * use the feature without worrying about clobbering its previous state. - * Copy-on-read stays enabled until all users have called to disable it. - */ -void bdrv_enable_copy_on_read(BlockDriverState *bs) -{ - bs->copy_on_read++; -} - -void bdrv_disable_copy_on_read(BlockDriverState *bs) -{ - assert(bs->copy_on_read > 0); - bs->copy_on_read--; -} - /* * Returns the flags that a temporary snapshot should get, based on the * originally requested flags (the originally requested image will have flags @@ -876,8 +691,8 @@ static int bdrv_temp_snapshot_flags(int flags) } /* - * Returns the flags that bs->file should get, based on the given flags for - * the parent BDS + * Returns the flags that bs->file should get if a protocol driver is expected, + * based on the given flags for the parent BDS */ static int bdrv_inherited_flags(int flags) { @@ -894,6 +709,25 @@ static int bdrv_inherited_flags(int flags) return flags; } +const BdrvChildRole child_file = { + .inherit_flags = bdrv_inherited_flags, +}; + +/* + * Returns the flags that bs->file should get if the use of formats (and not + * only protocols) is permitted for it, based on the given flags for the parent + * BDS + */ +static int bdrv_inherited_fmt_flags(int parent_flags) +{ + int flags = child_file.inherit_flags(parent_flags); + return flags & ~BDRV_O_PROTOCOL; +} + +const BdrvChildRole child_format = { + .inherit_flags = bdrv_inherited_fmt_flags, +}; + /* * Returns the flags that bs->backing_hd should get, based on the given flags * for the parent BDS @@ -909,6 +743,10 @@ static int bdrv_backing_flags(int flags) return flags; } +static const BdrvChildRole child_backing = { + .inherit_flags = bdrv_backing_flags, +}; + static int bdrv_open_flags(BlockDriverState *bs, int flags) { int open_flags = flags | BDRV_O_CACHE_WB; @@ -961,6 +799,19 @@ static void bdrv_assign_node_name(BlockDriverState *bs, QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list); } +static QemuOptsList bdrv_runtime_opts = { + .name = "bdrv_common", + .head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head), + .desc = { + { + .name = "node-name", + .type = QEMU_OPT_STRING, + .help = "Node name of the block device node", + }, + { /* end of list */ } + }, +}; + /* * Common part for opening disk images and files * @@ -972,6 +823,7 @@ static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file, int ret, open_flags; const char *filename; const char *node_name = NULL; + QemuOpts *opts; Error *local_err = NULL; assert(drv != NULL); @@ -992,23 +844,22 @@ static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file, trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name); - node_name = qdict_get_try_str(options, "node-name"); - bdrv_assign_node_name(bs, node_name, &local_err); + opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort); + qemu_opts_absorb_qdict(opts, options, &local_err); if (local_err) { error_propagate(errp, local_err); - return -EINVAL; + ret = -EINVAL; + goto fail_opts; } - qdict_del(options, "node-name"); - /* bdrv_open() with directly using a protocol as drv. This layer is already - * opened, so assign it to bs (while file becomes a closed BlockDriverState) - * and return immediately. */ - if (file != NULL && drv->bdrv_file_open) { - bdrv_swap(file, bs); - return 0; + node_name = qemu_opt_get(opts, "node-name"); + bdrv_assign_node_name(bs, node_name, &local_err); + if (local_err) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto fail_opts; } - bs->open_flags = flags; bs->guest_block_size = 512; bs->request_alignment = 512; bs->zero_beyond_eof = true; @@ -1021,7 +872,8 @@ static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file, ? "Driver '%s' can only be used for read-only devices" : "Driver '%s' is not whitelisted", drv->format_name); - return -ENOTSUP; + ret = -ENOTSUP; + goto fail_opts; } assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */ @@ -1030,7 +882,8 @@ static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file, bdrv_enable_copy_on_read(bs); } else { error_setg(errp, "Can't use copy-on-read on read-only device"); - return -EINVAL; + ret = -EINVAL; + goto fail_opts; } } @@ -1094,7 +947,10 @@ static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file, } assert(bdrv_opt_mem_align(bs) != 0); - assert((bs->request_alignment != 0) || bs->sg); + assert(bdrv_min_mem_align(bs) != 0); + assert((bs->request_alignment != 0) || bdrv_is_sg(bs)); + + qemu_opts_del(opts); return 0; free_and_fail: @@ -1102,6 +958,8 @@ free_and_fail: g_free(bs->opaque); bs->opaque = NULL; bs->drv = NULL; +fail_opts: + qemu_opts_del(opts); return ret; } @@ -1135,14 +993,17 @@ static QDict *parse_json_filename(const char *filename, Error **errp) /* * Fills in default options for opening images and converts the legacy * filename/flags pair to option QDict entries. + * The BDRV_O_PROTOCOL flag in *flags will be set or cleared accordingly if a + * block driver has been specified explicitly. */ -static int bdrv_fill_options(QDict **options, const char **pfilename, int flags, - BlockDriver *drv, Error **errp) +static int bdrv_fill_options(QDict **options, const char **pfilename, + int *flags, BlockDriver *drv, Error **errp) { const char *filename = *pfilename; const char *drvname; - bool protocol = flags & BDRV_O_PROTOCOL; + bool protocol = *flags & BDRV_O_PROTOCOL; bool parse_filename = false; + BlockDriver *tmp_drv; Error *local_err = NULL; /* Parse json: pseudo-protocol */ @@ -1160,6 +1021,24 @@ static int bdrv_fill_options(QDict **options, const char **pfilename, int flags, *pfilename = filename = NULL; } + drvname = qdict_get_try_str(*options, "driver"); + + /* If the user has explicitly specified the driver, this choice should + * override the BDRV_O_PROTOCOL flag */ + tmp_drv = drv; + if (!tmp_drv && drvname) { + tmp_drv = bdrv_find_format(drvname); + } + if (tmp_drv) { + protocol = tmp_drv->bdrv_file_open; + } + + if (protocol) { + *flags |= BDRV_O_PROTOCOL; + } else { + *flags &= ~BDRV_O_PROTOCOL; + } + /* Fetch the file name from the options QDict if necessary */ if (protocol && filename) { if (!qdict_haskey(*options, "filename")) { @@ -1174,7 +1053,6 @@ static int bdrv_fill_options(QDict **options, const char **pfilename, int flags, /* Find the right block driver */ filename = qdict_get_try_str(*options, "filename"); - drvname = qdict_get_try_str(*options, "driver"); if (drv) { if (drvname) { @@ -1224,12 +1102,46 @@ static int bdrv_fill_options(QDict **options, const char **pfilename, int flags, return 0; } +static BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, + BlockDriverState *child_bs, + const BdrvChildRole *child_role) +{ + BdrvChild *child = g_new(BdrvChild, 1); + *child = (BdrvChild) { + .bs = child_bs, + .role = child_role, + }; + + QLIST_INSERT_HEAD(&parent_bs->children, child, next); + + return child; +} + +static void bdrv_detach_child(BdrvChild *child) +{ + QLIST_REMOVE(child, next); + g_free(child); +} + +void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child) +{ + BlockDriverState *child_bs = child->bs; + + if (child->bs->inherits_from == parent) { + child->bs->inherits_from = NULL; + } + + bdrv_detach_child(child); + bdrv_unref(child_bs); +} + void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd) { if (bs->backing_hd) { assert(bs->backing_blocker); bdrv_op_unblock_all(bs->backing_hd, bs->backing_blocker); + bdrv_detach_child(bs->backing_child); } else if (backing_hd) { error_setg(&bs->backing_blocker, "node is used as backing hd of '%s'", @@ -1240,8 +1152,10 @@ void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd) if (!backing_hd) { error_free(bs->backing_blocker); bs->backing_blocker = NULL; + bs->backing_child = NULL; goto out; } + bs->backing_child = bdrv_attach_child(bs, backing_hd, &child_backing); bs->open_flags &= ~BDRV_O_NO_BACKING; pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename); pstrcpy(bs->backing_format, sizeof(bs->backing_format), @@ -1311,9 +1225,10 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp) } assert(bs->backing_hd == NULL); - ret = bdrv_open(&backing_hd, - *backing_filename ? backing_filename : NULL, NULL, options, - bdrv_backing_flags(bs->open_flags), NULL, &local_err); + ret = bdrv_open_inherit(&backing_hd, + *backing_filename ? backing_filename : NULL, + NULL, options, 0, bs, &child_backing, + NULL, &local_err); if (ret < 0) { bdrv_unref(backing_hd); backing_hd = NULL; @@ -1323,6 +1238,7 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp) error_free(local_err); goto free_exit; } + bdrv_set_backing_hd(bs, backing_hd); free_exit: @@ -1335,7 +1251,7 @@ free_exit: * device's options. * * If allow_none is true, no image will be opened if filename is false and no - * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned. + * BlockdevRef is given. NULL will be returned, but errp remains unset. * * bdrev_key specifies the key for the image's BlockdevRef in the options QDict. * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict @@ -1343,20 +1259,21 @@ free_exit: * BlockdevRef. * * The BlockdevRef will be removed from the options QDict. - * - * To conform with the behavior of bdrv_open(), *pbs has to be NULL. */ -int bdrv_open_image(BlockDriverState **pbs, const char *filename, - QDict *options, const char *bdref_key, int flags, - bool allow_none, Error **errp) +BdrvChild *bdrv_open_child(const char *filename, + QDict *options, const char *bdref_key, + BlockDriverState* parent, + const BdrvChildRole *child_role, + bool allow_none, Error **errp) { + BdrvChild *c = NULL; + BlockDriverState *bs; QDict *image_options; int ret; char *bdref_key_dot; const char *reference; - assert(pbs); - assert(*pbs == NULL); + assert(child_role != NULL); bdref_key_dot = g_strdup_printf("%s.", bdref_key); qdict_extract_subqdict(options, &image_options, bdref_key_dot); @@ -1364,22 +1281,60 @@ int bdrv_open_image(BlockDriverState **pbs, const char *filename, reference = qdict_get_try_str(options, bdref_key); if (!filename && !reference && !qdict_size(image_options)) { - if (allow_none) { - ret = 0; - } else { + if (!allow_none) { error_setg(errp, "A block device must be specified for \"%s\"", bdref_key); - ret = -EINVAL; } QDECREF(image_options); goto done; } - ret = bdrv_open(pbs, filename, reference, image_options, flags, NULL, errp); + bs = NULL; + ret = bdrv_open_inherit(&bs, filename, reference, image_options, 0, + parent, child_role, NULL, errp); + if (ret < 0) { + goto done; + } + + c = bdrv_attach_child(parent, bs, child_role); done: qdict_del(options, bdref_key); - return ret; + return c; +} + +/* + * This is a version of bdrv_open_child() that returns 0/-EINVAL instead of + * a BdrvChild object. + * + * If allow_none is true, no image will be opened if filename is false and no + * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned. + * + * To conform with the behavior of bdrv_open(), *pbs has to be NULL. + */ +int bdrv_open_image(BlockDriverState **pbs, const char *filename, + QDict *options, const char *bdref_key, + BlockDriverState* parent, const BdrvChildRole *child_role, + bool allow_none, Error **errp) +{ + Error *local_err = NULL; + BdrvChild *c; + + assert(pbs); + assert(*pbs == NULL); + + c = bdrv_open_child(filename, options, bdref_key, parent, child_role, + allow_none, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return -EINVAL; + } + + if (c != NULL) { + *pbs = c->bs; + } + + return 0; } int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp) @@ -1390,7 +1345,7 @@ int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp) QemuOpts *opts = NULL; QDict *snapshot_options; BlockDriverState *bs_snapshot; - Error *local_err; + Error *local_err = NULL; int ret; /* if snapshot, we create a temporary backing file and open it @@ -1462,9 +1417,11 @@ out: * should be opened. If specified, neither options nor a filename may be given, * nor can an existing BDS be reused (that is, *pbs has to be NULL). */ -int bdrv_open(BlockDriverState **pbs, const char *filename, - const char *reference, QDict *options, int flags, - BlockDriver *drv, Error **errp) +static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename, + const char *reference, QDict *options, int flags, + BlockDriverState *parent, + const BdrvChildRole *child_role, + BlockDriver *drv, Error **errp) { int ret; BlockDriverState *file = NULL, *bs; @@ -1473,6 +1430,8 @@ int bdrv_open(BlockDriverState **pbs, const char *filename, int snapshot_flags = 0; assert(pbs); + assert(!child_role || !flags); + assert(!child_role == !parent); if (reference) { bool options_non_empty = options ? qdict_size(options) : false; @@ -1510,7 +1469,12 @@ int bdrv_open(BlockDriverState **pbs, const char *filename, options = qdict_new(); } - ret = bdrv_fill_options(&options, &filename, flags, drv, &local_err); + if (child_role) { + bs->inherits_from = parent; + flags = child_role->inherit_flags(parent->open_flags); + } + + ret = bdrv_fill_options(&options, &filename, &flags, drv, &local_err); if (local_err) { goto fail; } @@ -1529,12 +1493,8 @@ int bdrv_open(BlockDriverState **pbs, const char *filename, } assert(drvname || !(flags & BDRV_O_PROTOCOL)); - if (drv && !drv->bdrv_file_open) { - /* If the user explicitly wants a format driver here, we'll need to add - * another layer for the protocol in bs->file */ - flags &= ~BDRV_O_PROTOCOL; - } + bs->open_flags = flags; bs->options = options; options = qdict_clone_shallow(options); @@ -1549,9 +1509,9 @@ int bdrv_open(BlockDriverState **pbs, const char *filename, } assert(file == NULL); + bs->open_flags = flags; ret = bdrv_open_image(&file, filename, options, "file", - bdrv_inherited_flags(flags), - true, &local_err); + bs, &child_file, true, &local_err); if (ret < 0) { goto fail; } @@ -1570,6 +1530,12 @@ int bdrv_open(BlockDriverState **pbs, const char *filename, goto fail; } + /* BDRV_O_PROTOCOL must be set iff a protocol BDS is about to be created */ + assert(!!(flags & BDRV_O_PROTOCOL) == !!drv->bdrv_file_open); + /* file must be NULL if a protocol BDS is about to be created + * (the inverse results in an error message from bdrv_open_common()) */ + assert(!(flags & BDRV_O_PROTOCOL) || !file); + /* Open the image */ ret = bdrv_open_common(bs, file, options, flags, drv, &local_err); if (ret < 0) { @@ -1668,6 +1634,14 @@ close_and_fail: return ret; } +int bdrv_open(BlockDriverState **pbs, const char *filename, + const char *reference, QDict *options, int flags, + BlockDriver *drv, Error **errp) +{ + return bdrv_open_inherit(pbs, filename, reference, options, flags, NULL, + NULL, drv, errp); +} + typedef struct BlockReopenQueueEntry { bool prepared; BDRVReopenState state; @@ -1698,6 +1672,8 @@ BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, assert(bs != NULL); BlockReopenQueueEntry *bs_entry; + BdrvChild *child; + if (bs_queue == NULL) { bs_queue = g_new0(BlockReopenQueue, 1); QSIMPLEQ_INIT(bs_queue); @@ -1706,8 +1682,15 @@ BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, /* bdrv_open() masks this flag out */ flags &= ~BDRV_O_PROTOCOL; - if (bs->file) { - bdrv_reopen_queue(bs_queue, bs->file, bdrv_inherited_flags(flags)); + QLIST_FOREACH(child, &bs->children, next) { + int child_flags; + + if (child->bs->inherits_from != bs) { + continue; + } + + child_flags = child->role->inherit_flags(flags); + bdrv_reopen_queue(bs_queue, child->bs, child_flags); } bs_entry = g_new0(BlockReopenQueueEntry, 1); @@ -1912,18 +1895,31 @@ void bdrv_close(BlockDriverState *bs) if (bs->job) { block_job_cancel_sync(bs->job); } - bdrv_drain_all(); /* complete I/O */ + bdrv_drain(bs); /* complete I/O */ bdrv_flush(bs); - bdrv_drain_all(); /* in case flush left pending I/O */ + bdrv_drain(bs); /* in case flush left pending I/O */ notifier_list_notify(&bs->close_notifiers, bs); if (bs->drv) { + BdrvChild *child, *next; + + bs->drv->bdrv_close(bs); + if (bs->backing_hd) { BlockDriverState *backing_hd = bs->backing_hd; bdrv_set_backing_hd(bs, NULL); bdrv_unref(backing_hd); } - bs->drv->bdrv_close(bs); + + QLIST_FOREACH_SAFE(child, &bs->children, next, next) { + /* TODO Remove bdrv_unref() from drivers' close function and use + * bdrv_unref_child() here */ + if (child->bs->inherits_from == bs) { + child->bs->inherits_from = NULL; + } + bdrv_detach_child(child); + } + g_free(bs->opaque); bs->opaque = NULL; bs->drv = NULL; @@ -1974,132 +1970,32 @@ void bdrv_close_all(void) } } -/* Check if any requests are in-flight (including throttled requests) */ -static bool bdrv_requests_pending(BlockDriverState *bs) +/* make a BlockDriverState anonymous by removing from bdrv_state and + * graph_bdrv_state list. + Also, NULL terminate the device_name to prevent double remove */ +void bdrv_make_anon(BlockDriverState *bs) { - if (!QLIST_EMPTY(&bs->tracked_requests)) { - return true; - } - if (!qemu_co_queue_empty(&bs->throttled_reqs[0])) { - return true; - } - if (!qemu_co_queue_empty(&bs->throttled_reqs[1])) { - return true; - } - if (bs->file && bdrv_requests_pending(bs->file)) { - return true; + /* + * Take care to remove bs from bdrv_states only when it's actually + * in it. Note that bs->device_list.tqe_prev is initially null, + * and gets set to non-null by QTAILQ_INSERT_TAIL(). Establish + * the useful invariant "bs in bdrv_states iff bs->tqe_prev" by + * resetting it to null on remove. + */ + if (bs->device_list.tqe_prev) { + QTAILQ_REMOVE(&bdrv_states, bs, device_list); + bs->device_list.tqe_prev = NULL; } - if (bs->backing_hd && bdrv_requests_pending(bs->backing_hd)) { - return true; + if (bs->node_name[0] != '\0') { + QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list); } - return false; -} - -static bool bdrv_drain_one(BlockDriverState *bs) -{ - bool bs_busy; - - bdrv_flush_io_queue(bs); - bdrv_start_throttled_reqs(bs); - bs_busy = bdrv_requests_pending(bs); - bs_busy |= aio_poll(bdrv_get_aio_context(bs), bs_busy); - return bs_busy; + bs->node_name[0] = '\0'; } -/* - * Wait for pending requests to complete on a single BlockDriverState subtree - * - * See the warning in bdrv_drain_all(). This function can only be called if - * you are sure nothing can generate I/O because you have op blockers - * installed. - * - * Note that unlike bdrv_drain_all(), the caller must hold the BlockDriverState - * AioContext. - */ -void bdrv_drain(BlockDriverState *bs) +static void bdrv_rebind(BlockDriverState *bs) { - while (bdrv_drain_one(bs)) { - /* Keep iterating */ - } -} - -/* - * Wait for pending requests to complete across all BlockDriverStates - * - * This function does not flush data to disk, use bdrv_flush_all() for that - * after calling this function. - * - * Note that completion of an asynchronous I/O operation can trigger any - * number of other I/O operations on other devices---for example a coroutine - * can be arbitrarily complex and a constant flow of I/O can come until the - * coroutine is complete. Because of this, it is not possible to have a - * function to drain a single device's I/O queue. - */ -void bdrv_drain_all(void) -{ - /* Always run first iteration so any pending completion BHs run */ - bool busy = true; - BlockDriverState *bs; - - QTAILQ_FOREACH(bs, &bdrv_states, device_list) { - AioContext *aio_context = bdrv_get_aio_context(bs); - - aio_context_acquire(aio_context); - if (bs->job) { - block_job_pause(bs->job); - } - aio_context_release(aio_context); - } - - while (busy) { - busy = false; - - QTAILQ_FOREACH(bs, &bdrv_states, device_list) { - AioContext *aio_context = bdrv_get_aio_context(bs); - - aio_context_acquire(aio_context); - busy |= bdrv_drain_one(bs); - aio_context_release(aio_context); - } - } - - QTAILQ_FOREACH(bs, &bdrv_states, device_list) { - AioContext *aio_context = bdrv_get_aio_context(bs); - - aio_context_acquire(aio_context); - if (bs->job) { - block_job_resume(bs->job); - } - aio_context_release(aio_context); - } -} - -/* make a BlockDriverState anonymous by removing from bdrv_state and - * graph_bdrv_state list. - Also, NULL terminate the device_name to prevent double remove */ -void bdrv_make_anon(BlockDriverState *bs) -{ - /* - * Take care to remove bs from bdrv_states only when it's actually - * in it. Note that bs->device_list.tqe_prev is initially null, - * and gets set to non-null by QTAILQ_INSERT_TAIL(). Establish - * the useful invariant "bs in bdrv_states iff bs->tqe_prev" by - * resetting it to null on remove. - */ - if (bs->device_list.tqe_prev) { - QTAILQ_REMOVE(&bdrv_states, bs, device_list); - bs->device_list.tqe_prev = NULL; - } - if (bs->node_name[0] != '\0') { - QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list); - } - bs->node_name[0] = '\0'; -} - -static void bdrv_rebind(BlockDriverState *bs) -{ - if (bs->drv && bs->drv->bdrv_rebind) { - bs->drv->bdrv_rebind(bs); + if (bs->drv && bs->drv->bdrv_rebind) { + bs->drv->bdrv_rebind(bs); } } @@ -2115,12 +2011,18 @@ static void bdrv_move_feature_fields(BlockDriverState *bs_dest, bs_dest->enable_write_cache = bs_src->enable_write_cache; /* i/o throttled req */ - memcpy(&bs_dest->throttle_state, - &bs_src->throttle_state, - sizeof(ThrottleState)); + bs_dest->throttle_state = bs_src->throttle_state, + bs_dest->io_limits_enabled = bs_src->io_limits_enabled; + bs_dest->pending_reqs[0] = bs_src->pending_reqs[0]; + bs_dest->pending_reqs[1] = bs_src->pending_reqs[1]; bs_dest->throttled_reqs[0] = bs_src->throttled_reqs[0]; bs_dest->throttled_reqs[1] = bs_src->throttled_reqs[1]; - bs_dest->io_limits_enabled = bs_src->io_limits_enabled; + memcpy(&bs_dest->round_robin, + &bs_src->round_robin, + sizeof(bs_dest->round_robin)); + memcpy(&bs_dest->throttle_timers, + &bs_src->throttle_timers, + sizeof(ThrottleTimers)); /* r/w error */ bs_dest->on_read_error = bs_src->on_read_error; @@ -2162,6 +2064,10 @@ static void bdrv_move_feature_fields(BlockDriverState *bs_dest, void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old) { BlockDriverState tmp; + BdrvChild *child; + + bdrv_drain(bs_new); + bdrv_drain(bs_old); /* The code needs to swap the node_name but simply swapping node_list won't * work so first remove the nodes from the graph list, do the swap then @@ -2174,12 +2080,21 @@ void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old) QTAILQ_REMOVE(&graph_bdrv_states, bs_old, node_list); } + /* If the BlockDriverState is part of a throttling group acquire + * its lock since we're going to mess with the protected fields. + * Otherwise there's no need to worry since no one else can touch + * them. */ + if (bs_old->throttle_state) { + throttle_group_lock(bs_old); + } + /* bs_new must be unattached and shouldn't have anything fancy enabled */ assert(!bs_new->blk); assert(QLIST_EMPTY(&bs_new->dirty_bitmaps)); assert(bs_new->job == NULL); assert(bs_new->io_limits_enabled == false); - assert(!throttle_have_timer(&bs_new->throttle_state)); + assert(bs_new->throttle_state == NULL); + assert(!throttle_timers_are_initialized(&bs_new->throttle_timers)); tmp = *bs_new; *bs_new = *bs_old; @@ -2196,7 +2111,13 @@ void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old) /* Check a few fields that should remain attached to the device */ assert(bs_new->job == NULL); assert(bs_new->io_limits_enabled == false); - assert(!throttle_have_timer(&bs_new->throttle_state)); + assert(bs_new->throttle_state == NULL); + assert(!throttle_timers_are_initialized(&bs_new->throttle_timers)); + + /* Release the ThrottleGroup lock */ + if (bs_old->throttle_state) { + throttle_group_unlock(bs_old); + } /* insert the nodes back into the graph node list if needed */ if (bs_new->node_name[0] != '\0') { @@ -2206,6 +2127,30 @@ void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old) QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_old, node_list); } + /* + * Update lh_first.le_prev for non-empty lists. + * + * The head of the op blocker list doesn't change because it is moved back + * in bdrv_move_feature_fields(). + */ + assert(QLIST_EMPTY(&bs_old->tracked_requests)); + assert(QLIST_EMPTY(&bs_new->tracked_requests)); + + QLIST_FIX_HEAD_PTR(&bs_new->children, next); + QLIST_FIX_HEAD_PTR(&bs_old->children, next); + + /* Update references in bs->opaque and children */ + QLIST_FOREACH(child, &bs_old->children, next) { + if (child->bs->inherits_from == bs_new) { + child->bs->inherits_from = bs_old; + } + } + QLIST_FOREACH(child, &bs_new->children, next) { + if (child->bs->inherits_from == bs_old) { + child->bs->inherits_from = bs_new; + } + } + bdrv_rebind(bs_new); bdrv_rebind(bs_old); } @@ -2395,152 +2340,6 @@ int bdrv_commit_all(void) return 0; } -/** - * Remove an active request from the tracked requests list - * - * This function should be called when a tracked request is completing. - */ -static void tracked_request_end(BdrvTrackedRequest *req) -{ - if (req->serialising) { - req->bs->serialising_in_flight--; - } - - QLIST_REMOVE(req, list); - qemu_co_queue_restart_all(&req->wait_queue); -} - -/** - * Add an active request to the tracked requests list - */ -static void tracked_request_begin(BdrvTrackedRequest *req, - BlockDriverState *bs, - int64_t offset, - unsigned int bytes, bool is_write) -{ - *req = (BdrvTrackedRequest){ - .bs = bs, - .offset = offset, - .bytes = bytes, - .is_write = is_write, - .co = qemu_coroutine_self(), - .serialising = false, - .overlap_offset = offset, - .overlap_bytes = bytes, - }; - - qemu_co_queue_init(&req->wait_queue); - - QLIST_INSERT_HEAD(&bs->tracked_requests, req, list); -} - -static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align) -{ - int64_t overlap_offset = req->offset & ~(align - 1); - unsigned int overlap_bytes = ROUND_UP(req->offset + req->bytes, align) - - overlap_offset; - - if (!req->serialising) { - req->bs->serialising_in_flight++; - req->serialising = true; - } - - req->overlap_offset = MIN(req->overlap_offset, overlap_offset); - req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes); -} - -/** - * Round a region to cluster boundaries - */ -void bdrv_round_to_clusters(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, - int64_t *cluster_sector_num, - int *cluster_nb_sectors) -{ - BlockDriverInfo bdi; - - if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) { - *cluster_sector_num = sector_num; - *cluster_nb_sectors = nb_sectors; - } else { - int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE; - *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c); - *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num + - nb_sectors, c); - } -} - -static int bdrv_get_cluster_size(BlockDriverState *bs) -{ - BlockDriverInfo bdi; - int ret; - - ret = bdrv_get_info(bs, &bdi); - if (ret < 0 || bdi.cluster_size == 0) { - return bs->request_alignment; - } else { - return bdi.cluster_size; - } -} - -static bool tracked_request_overlaps(BdrvTrackedRequest *req, - int64_t offset, unsigned int bytes) -{ - /* aaaa bbbb */ - if (offset >= req->overlap_offset + req->overlap_bytes) { - return false; - } - /* bbbb aaaa */ - if (req->overlap_offset >= offset + bytes) { - return false; - } - return true; -} - -static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self) -{ - BlockDriverState *bs = self->bs; - BdrvTrackedRequest *req; - bool retry; - bool waited = false; - - if (!bs->serialising_in_flight) { - return false; - } - - do { - retry = false; - QLIST_FOREACH(req, &bs->tracked_requests, list) { - if (req == self || (!req->serialising && !self->serialising)) { - continue; - } - if (tracked_request_overlaps(req, self->overlap_offset, - self->overlap_bytes)) - { - /* Hitting this means there was a reentrant request, for - * example, a block driver issuing nested requests. This must - * never happen since it means deadlock. - */ - assert(qemu_coroutine_self() != req->co); - - /* If the request is already (indirectly) waiting for us, or - * will wait for us as soon as it wakes up, then just go on - * (instead of producing a deadlock in the former case). */ - if (!req->waiting_for) { - self->waiting_for = req; - qemu_co_queue_wait(&req->wait_queue); - self->waiting_for = NULL; - retry = true; - waited = true; - break; - } - } - } - } while (retry); - - return waited; -} - /* * Return values: * 0 - success @@ -2709,990 +2508,119 @@ exit: return ret; } - -static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset, - size_t size) +/** + * Truncate file to 'offset' bytes (needed only for file protocols) + */ +int bdrv_truncate(BlockDriverState *bs, int64_t offset) { - if (size > BDRV_REQUEST_MAX_SECTORS << BDRV_SECTOR_BITS) { - return -EIO; - } - - if (!bdrv_is_inserted(bs)) { + BlockDriver *drv = bs->drv; + int ret; + if (!drv) return -ENOMEDIUM; - } + if (!drv->bdrv_truncate) + return -ENOTSUP; + if (bs->read_only) + return -EACCES; - if (offset < 0) { - return -EIO; + ret = drv->bdrv_truncate(bs, offset); + if (ret == 0) { + ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS); + bdrv_dirty_bitmap_truncate(bs); + if (bs->blk) { + blk_dev_resize_cb(bs->blk); + } } - - return 0; + return ret; } -static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num, - int nb_sectors) +/** + * Length of a allocated file in bytes. Sparse files are counted by actual + * allocated space. Return < 0 if error or unknown. + */ +int64_t bdrv_get_allocated_file_size(BlockDriverState *bs) { - if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) { - return -EIO; + BlockDriver *drv = bs->drv; + if (!drv) { + return -ENOMEDIUM; } - - return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE, - nb_sectors * BDRV_SECTOR_SIZE); -} - -typedef struct RwCo { - BlockDriverState *bs; - int64_t offset; - QEMUIOVector *qiov; - bool is_write; - int ret; - BdrvRequestFlags flags; -} RwCo; - -static void coroutine_fn bdrv_rw_co_entry(void *opaque) -{ - RwCo *rwco = opaque; - - if (!rwco->is_write) { - rwco->ret = bdrv_co_do_preadv(rwco->bs, rwco->offset, - rwco->qiov->size, rwco->qiov, - rwco->flags); - } else { - rwco->ret = bdrv_co_do_pwritev(rwco->bs, rwco->offset, - rwco->qiov->size, rwco->qiov, - rwco->flags); + if (drv->bdrv_get_allocated_file_size) { + return drv->bdrv_get_allocated_file_size(bs); } + if (bs->file) { + return bdrv_get_allocated_file_size(bs->file); + } + return -ENOTSUP; } -/* - * Process a vectored synchronous request using coroutines +/** + * Return number of sectors on success, -errno on error. */ -static int bdrv_prwv_co(BlockDriverState *bs, int64_t offset, - QEMUIOVector *qiov, bool is_write, - BdrvRequestFlags flags) +int64_t bdrv_nb_sectors(BlockDriverState *bs) { - Coroutine *co; - RwCo rwco = { - .bs = bs, - .offset = offset, - .qiov = qiov, - .is_write = is_write, - .ret = NOT_DONE, - .flags = flags, - }; - - /** - * In sync call context, when the vcpu is blocked, this throttling timer - * will not fire; so the I/O throttling function has to be disabled here - * if it has been enabled. - */ - if (bs->io_limits_enabled) { - fprintf(stderr, "Disabling I/O throttling on '%s' due " - "to synchronous I/O.\n", bdrv_get_device_name(bs)); - bdrv_io_limits_disable(bs); - } + BlockDriver *drv = bs->drv; - if (qemu_in_coroutine()) { - /* Fast-path if already in coroutine context */ - bdrv_rw_co_entry(&rwco); - } else { - AioContext *aio_context = bdrv_get_aio_context(bs); + if (!drv) + return -ENOMEDIUM; - co = qemu_coroutine_create(bdrv_rw_co_entry); - qemu_coroutine_enter(co, &rwco); - while (rwco.ret == NOT_DONE) { - aio_poll(aio_context, true); + if (drv->has_variable_length) { + int ret = refresh_total_sectors(bs, bs->total_sectors); + if (ret < 0) { + return ret; } } - return rwco.ret; + return bs->total_sectors; } -/* - * Process a synchronous request using coroutines +/** + * Return length in bytes on success, -errno on error. + * The length is always a multiple of BDRV_SECTOR_SIZE. */ -static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf, - int nb_sectors, bool is_write, BdrvRequestFlags flags) +int64_t bdrv_getlength(BlockDriverState *bs) { - QEMUIOVector qiov; - struct iovec iov = { - .iov_base = (void *)buf, - .iov_len = nb_sectors * BDRV_SECTOR_SIZE, - }; - - if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) { - return -EINVAL; - } - - qemu_iovec_init_external(&qiov, &iov, 1); - return bdrv_prwv_co(bs, sector_num << BDRV_SECTOR_BITS, - &qiov, is_write, flags); -} + int64_t ret = bdrv_nb_sectors(bs); -/* return < 0 if error. See bdrv_write() for the return codes */ -int bdrv_read(BlockDriverState *bs, int64_t sector_num, - uint8_t *buf, int nb_sectors) -{ - return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false, 0); + ret = ret > INT64_MAX / BDRV_SECTOR_SIZE ? -EFBIG : ret; + return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE; } -/* Just like bdrv_read(), but with I/O throttling temporarily disabled */ -int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num, - uint8_t *buf, int nb_sectors) +/* return 0 as number of sectors if no device present or error */ +void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr) { - bool enabled; - int ret; + int64_t nb_sectors = bdrv_nb_sectors(bs); - enabled = bs->io_limits_enabled; - bs->io_limits_enabled = false; - ret = bdrv_read(bs, sector_num, buf, nb_sectors); - bs->io_limits_enabled = enabled; - return ret; + *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors; } -/* Return < 0 if error. Important errors are: - -EIO generic I/O error (may happen for all errors) - -ENOMEDIUM No media inserted. - -EINVAL Invalid sector number or nb_sectors - -EACCES Trying to write a read-only device -*/ -int bdrv_write(BlockDriverState *bs, int64_t sector_num, - const uint8_t *buf, int nb_sectors) +void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error, + BlockdevOnError on_write_error) { - return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true, 0); + bs->on_read_error = on_read_error; + bs->on_write_error = on_write_error; } -int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num, - int nb_sectors, BdrvRequestFlags flags) +BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read) { - return bdrv_rw_co(bs, sector_num, NULL, nb_sectors, true, - BDRV_REQ_ZERO_WRITE | flags); + return is_read ? bs->on_read_error : bs->on_write_error; } -/* - * Completely zero out a block device with the help of bdrv_write_zeroes. - * The operation is sped up by checking the block status and only writing - * zeroes to the device if they currently do not return zeroes. Optional - * flags are passed through to bdrv_write_zeroes (e.g. BDRV_REQ_MAY_UNMAP). - * - * Returns < 0 on error, 0 on success. For error codes see bdrv_write(). - */ -int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags) +BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error) { - int64_t target_sectors, ret, nb_sectors, sector_num = 0; - int n; + BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error; - target_sectors = bdrv_nb_sectors(bs); - if (target_sectors < 0) { - return target_sectors; + switch (on_err) { + case BLOCKDEV_ON_ERROR_ENOSPC: + return (error == ENOSPC) ? + BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT; + case BLOCKDEV_ON_ERROR_STOP: + return BLOCK_ERROR_ACTION_STOP; + case BLOCKDEV_ON_ERROR_REPORT: + return BLOCK_ERROR_ACTION_REPORT; + case BLOCKDEV_ON_ERROR_IGNORE: + return BLOCK_ERROR_ACTION_IGNORE; + default: + abort(); } - - for (;;) { - nb_sectors = MIN(target_sectors - sector_num, BDRV_REQUEST_MAX_SECTORS); - if (nb_sectors <= 0) { - return 0; - } - ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &n); - if (ret < 0) { - error_report("error getting block status at sector %" PRId64 ": %s", - sector_num, strerror(-ret)); - return ret; - } - if (ret & BDRV_BLOCK_ZERO) { - sector_num += n; - continue; - } - ret = bdrv_write_zeroes(bs, sector_num, n, flags); - if (ret < 0) { - error_report("error writing zeroes at sector %" PRId64 ": %s", - sector_num, strerror(-ret)); - return ret; - } - sector_num += n; - } -} - -int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int bytes) -{ - QEMUIOVector qiov; - struct iovec iov = { - .iov_base = (void *)buf, - .iov_len = bytes, - }; - int ret; - - if (bytes < 0) { - return -EINVAL; - } - - qemu_iovec_init_external(&qiov, &iov, 1); - ret = bdrv_prwv_co(bs, offset, &qiov, false, 0); - if (ret < 0) { - return ret; - } - - return bytes; -} - -int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov) -{ - int ret; - - ret = bdrv_prwv_co(bs, offset, qiov, true, 0); - if (ret < 0) { - return ret; - } - - return qiov->size; -} - -int bdrv_pwrite(BlockDriverState *bs, int64_t offset, - const void *buf, int bytes) -{ - QEMUIOVector qiov; - struct iovec iov = { - .iov_base = (void *) buf, - .iov_len = bytes, - }; - - if (bytes < 0) { - return -EINVAL; - } - - qemu_iovec_init_external(&qiov, &iov, 1); - return bdrv_pwritev(bs, offset, &qiov); -} - -/* - * Writes to the file and ensures that no writes are reordered across this - * request (acts as a barrier) - * - * Returns 0 on success, -errno in error cases. - */ -int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset, - const void *buf, int count) -{ - int ret; - - ret = bdrv_pwrite(bs, offset, buf, count); - if (ret < 0) { - return ret; - } - - /* No flush needed for cache modes that already do it */ - if (bs->enable_write_cache) { - bdrv_flush(bs); - } - - return 0; -} - -static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) -{ - /* Perform I/O through a temporary buffer so that users who scribble over - * their read buffer while the operation is in progress do not end up - * modifying the image file. This is critical for zero-copy guest I/O - * where anything might happen inside guest memory. - */ - void *bounce_buffer; - - BlockDriver *drv = bs->drv; - struct iovec iov; - QEMUIOVector bounce_qiov; - int64_t cluster_sector_num; - int cluster_nb_sectors; - size_t skip_bytes; - int ret; - - /* Cover entire cluster so no additional backing file I/O is required when - * allocating cluster in the image file. - */ - bdrv_round_to_clusters(bs, sector_num, nb_sectors, - &cluster_sector_num, &cluster_nb_sectors); - - trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, - cluster_sector_num, cluster_nb_sectors); - - iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE; - iov.iov_base = bounce_buffer = qemu_try_blockalign(bs, iov.iov_len); - if (bounce_buffer == NULL) { - ret = -ENOMEM; - goto err; - } - - qemu_iovec_init_external(&bounce_qiov, &iov, 1); - - ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors, - &bounce_qiov); - if (ret < 0) { - goto err; - } - - if (drv->bdrv_co_write_zeroes && - buffer_is_zero(bounce_buffer, iov.iov_len)) { - ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num, - cluster_nb_sectors, 0); - } else { - /* This does not change the data on the disk, it is not necessary - * to flush even in cache=writethrough mode. - */ - ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors, - &bounce_qiov); - } - - if (ret < 0) { - /* It might be okay to ignore write errors for guest requests. If this - * is a deliberate copy-on-read then we don't want to ignore the error. - * Simply report it in all cases. - */ - goto err; - } - - skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE; - qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes, - nb_sectors * BDRV_SECTOR_SIZE); - -err: - qemu_vfree(bounce_buffer); - return ret; -} - -/* - * Forwards an already correctly aligned request to the BlockDriver. This - * handles copy on read and zeroing after EOF; any other features must be - * implemented by the caller. - */ -static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs, - BdrvTrackedRequest *req, int64_t offset, unsigned int bytes, - int64_t align, QEMUIOVector *qiov, int flags) -{ - BlockDriver *drv = bs->drv; - int ret; - - int64_t sector_num = offset >> BDRV_SECTOR_BITS; - unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS; - - assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0); - assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0); - assert(!qiov || bytes == qiov->size); - - /* Handle Copy on Read and associated serialisation */ - if (flags & BDRV_REQ_COPY_ON_READ) { - /* If we touch the same cluster it counts as an overlap. This - * guarantees that allocating writes will be serialized and not race - * with each other for the same cluster. For example, in copy-on-read - * it ensures that the CoR read and write operations are atomic and - * guest writes cannot interleave between them. */ - mark_request_serialising(req, bdrv_get_cluster_size(bs)); - } - - wait_serialising_requests(req); - - if (flags & BDRV_REQ_COPY_ON_READ) { - int pnum; - - ret = bdrv_is_allocated(bs, sector_num, nb_sectors, &pnum); - if (ret < 0) { - goto out; - } - - if (!ret || pnum != nb_sectors) { - ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov); - goto out; - } - } - - /* Forward the request to the BlockDriver */ - if (!bs->zero_beyond_eof) { - ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov); - } else { - /* Read zeros after EOF */ - int64_t total_sectors, max_nb_sectors; - - total_sectors = bdrv_nb_sectors(bs); - if (total_sectors < 0) { - ret = total_sectors; - goto out; - } - - max_nb_sectors = ROUND_UP(MAX(0, total_sectors - sector_num), - align >> BDRV_SECTOR_BITS); - if (nb_sectors < max_nb_sectors) { - ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov); - } else if (max_nb_sectors > 0) { - QEMUIOVector local_qiov; - - qemu_iovec_init(&local_qiov, qiov->niov); - qemu_iovec_concat(&local_qiov, qiov, 0, - max_nb_sectors * BDRV_SECTOR_SIZE); - - ret = drv->bdrv_co_readv(bs, sector_num, max_nb_sectors, - &local_qiov); - - qemu_iovec_destroy(&local_qiov); - } else { - ret = 0; - } - - /* Reading beyond end of file is supposed to produce zeroes */ - if (ret == 0 && total_sectors < sector_num + nb_sectors) { - uint64_t offset = MAX(0, total_sectors - sector_num); - uint64_t bytes = (sector_num + nb_sectors - offset) * - BDRV_SECTOR_SIZE; - qemu_iovec_memset(qiov, offset * BDRV_SECTOR_SIZE, 0, bytes); - } - } - -out: - return ret; -} - -static inline uint64_t bdrv_get_align(BlockDriverState *bs) -{ - /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */ - return MAX(BDRV_SECTOR_SIZE, bs->request_alignment); -} - -static inline bool bdrv_req_is_aligned(BlockDriverState *bs, - int64_t offset, size_t bytes) -{ - int64_t align = bdrv_get_align(bs); - return !(offset & (align - 1) || (bytes & (align - 1))); -} - -/* - * Handle a read request in coroutine context - */ -static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs, - int64_t offset, unsigned int bytes, QEMUIOVector *qiov, - BdrvRequestFlags flags) -{ - BlockDriver *drv = bs->drv; - BdrvTrackedRequest req; - - uint64_t align = bdrv_get_align(bs); - uint8_t *head_buf = NULL; - uint8_t *tail_buf = NULL; - QEMUIOVector local_qiov; - bool use_local_qiov = false; - int ret; - - if (!drv) { - return -ENOMEDIUM; - } - - ret = bdrv_check_byte_request(bs, offset, bytes); - if (ret < 0) { - return ret; - } - - if (bs->copy_on_read) { - flags |= BDRV_REQ_COPY_ON_READ; - } - - /* throttling disk I/O */ - if (bs->io_limits_enabled) { - bdrv_io_limits_intercept(bs, bytes, false); - } - - /* Align read if necessary by padding qiov */ - if (offset & (align - 1)) { - head_buf = qemu_blockalign(bs, align); - qemu_iovec_init(&local_qiov, qiov->niov + 2); - qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1)); - qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size); - use_local_qiov = true; - - bytes += offset & (align - 1); - offset = offset & ~(align - 1); - } - - if ((offset + bytes) & (align - 1)) { - if (!use_local_qiov) { - qemu_iovec_init(&local_qiov, qiov->niov + 1); - qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size); - use_local_qiov = true; - } - tail_buf = qemu_blockalign(bs, align); - qemu_iovec_add(&local_qiov, tail_buf, - align - ((offset + bytes) & (align - 1))); - - bytes = ROUND_UP(bytes, align); - } - - tracked_request_begin(&req, bs, offset, bytes, false); - ret = bdrv_aligned_preadv(bs, &req, offset, bytes, align, - use_local_qiov ? &local_qiov : qiov, - flags); - tracked_request_end(&req); - - if (use_local_qiov) { - qemu_iovec_destroy(&local_qiov); - qemu_vfree(head_buf); - qemu_vfree(tail_buf); - } - - return ret; -} - -static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, - BdrvRequestFlags flags) -{ - if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) { - return -EINVAL; - } - - return bdrv_co_do_preadv(bs, sector_num << BDRV_SECTOR_BITS, - nb_sectors << BDRV_SECTOR_BITS, qiov, flags); -} - -int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num, - int nb_sectors, QEMUIOVector *qiov) -{ - trace_bdrv_co_readv(bs, sector_num, nb_sectors); - - return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0); -} - -int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) -{ - trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors); - - return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, - BDRV_REQ_COPY_ON_READ); -} - -#define MAX_WRITE_ZEROES_BOUNCE_BUFFER 32768 - -static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, BdrvRequestFlags flags) -{ - BlockDriver *drv = bs->drv; - QEMUIOVector qiov; - struct iovec iov = {0}; - int ret = 0; - - int max_write_zeroes = MIN_NON_ZERO(bs->bl.max_write_zeroes, - BDRV_REQUEST_MAX_SECTORS); - - while (nb_sectors > 0 && !ret) { - int num = nb_sectors; - - /* Align request. Block drivers can expect the "bulk" of the request - * to be aligned. - */ - if (bs->bl.write_zeroes_alignment - && num > bs->bl.write_zeroes_alignment) { - if (sector_num % bs->bl.write_zeroes_alignment != 0) { - /* Make a small request up to the first aligned sector. */ - num = bs->bl.write_zeroes_alignment; - num -= sector_num % bs->bl.write_zeroes_alignment; - } else if ((sector_num + num) % bs->bl.write_zeroes_alignment != 0) { - /* Shorten the request to the last aligned sector. num cannot - * underflow because num > bs->bl.write_zeroes_alignment. - */ - num -= (sector_num + num) % bs->bl.write_zeroes_alignment; - } - } - - /* limit request size */ - if (num > max_write_zeroes) { - num = max_write_zeroes; - } - - ret = -ENOTSUP; - /* First try the efficient write zeroes operation */ - if (drv->bdrv_co_write_zeroes) { - ret = drv->bdrv_co_write_zeroes(bs, sector_num, num, flags); - } - - if (ret == -ENOTSUP) { - /* Fall back to bounce buffer if write zeroes is unsupported */ - int max_xfer_len = MIN_NON_ZERO(bs->bl.max_transfer_length, - MAX_WRITE_ZEROES_BOUNCE_BUFFER); - num = MIN(num, max_xfer_len); - iov.iov_len = num * BDRV_SECTOR_SIZE; - if (iov.iov_base == NULL) { - iov.iov_base = qemu_try_blockalign(bs, num * BDRV_SECTOR_SIZE); - if (iov.iov_base == NULL) { - ret = -ENOMEM; - goto fail; - } - memset(iov.iov_base, 0, num * BDRV_SECTOR_SIZE); - } - qemu_iovec_init_external(&qiov, &iov, 1); - - ret = drv->bdrv_co_writev(bs, sector_num, num, &qiov); - - /* Keep bounce buffer around if it is big enough for all - * all future requests. - */ - if (num < max_xfer_len) { - qemu_vfree(iov.iov_base); - iov.iov_base = NULL; - } - } - - sector_num += num; - nb_sectors -= num; - } - -fail: - qemu_vfree(iov.iov_base); - return ret; -} - -/* - * Forwards an already correctly aligned write request to the BlockDriver. - */ -static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs, - BdrvTrackedRequest *req, int64_t offset, unsigned int bytes, - QEMUIOVector *qiov, int flags) -{ - BlockDriver *drv = bs->drv; - bool waited; - int ret; - - int64_t sector_num = offset >> BDRV_SECTOR_BITS; - unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS; - - assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0); - assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0); - assert(!qiov || bytes == qiov->size); - - waited = wait_serialising_requests(req); - assert(!waited || !req->serialising); - assert(req->overlap_offset <= offset); - assert(offset + bytes <= req->overlap_offset + req->overlap_bytes); - - ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req); - - if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF && - !(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_write_zeroes && - qemu_iovec_is_zero(qiov)) { - flags |= BDRV_REQ_ZERO_WRITE; - if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) { - flags |= BDRV_REQ_MAY_UNMAP; - } - } - - if (ret < 0) { - /* Do nothing, write notifier decided to fail this request */ - } else if (flags & BDRV_REQ_ZERO_WRITE) { - BLKDBG_EVENT(bs, BLKDBG_PWRITEV_ZERO); - ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags); - } else { - BLKDBG_EVENT(bs, BLKDBG_PWRITEV); - ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov); - } - BLKDBG_EVENT(bs, BLKDBG_PWRITEV_DONE); - - if (ret == 0 && !bs->enable_write_cache) { - ret = bdrv_co_flush(bs); - } - - bdrv_set_dirty(bs, sector_num, nb_sectors); - - block_acct_highest_sector(&bs->stats, sector_num, nb_sectors); - - if (ret >= 0) { - bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors); - } - - return ret; -} - -/* - * Handle a write request in coroutine context - */ -static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs, - int64_t offset, unsigned int bytes, QEMUIOVector *qiov, - BdrvRequestFlags flags) -{ - BdrvTrackedRequest req; - uint64_t align = bdrv_get_align(bs); - uint8_t *head_buf = NULL; - uint8_t *tail_buf = NULL; - QEMUIOVector local_qiov; - bool use_local_qiov = false; - int ret; - - if (!bs->drv) { - return -ENOMEDIUM; - } - if (bs->read_only) { - return -EACCES; - } - - ret = bdrv_check_byte_request(bs, offset, bytes); - if (ret < 0) { - return ret; - } - - /* throttling disk I/O */ - if (bs->io_limits_enabled) { - bdrv_io_limits_intercept(bs, bytes, true); - } - - /* - * Align write if necessary by performing a read-modify-write cycle. - * Pad qiov with the read parts and be sure to have a tracked request not - * only for bdrv_aligned_pwritev, but also for the reads of the RMW cycle. - */ - tracked_request_begin(&req, bs, offset, bytes, true); - - if (offset & (align - 1)) { - QEMUIOVector head_qiov; - struct iovec head_iov; - - mark_request_serialising(&req, align); - wait_serialising_requests(&req); - - head_buf = qemu_blockalign(bs, align); - head_iov = (struct iovec) { - .iov_base = head_buf, - .iov_len = align, - }; - qemu_iovec_init_external(&head_qiov, &head_iov, 1); - - BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_HEAD); - ret = bdrv_aligned_preadv(bs, &req, offset & ~(align - 1), align, - align, &head_qiov, 0); - if (ret < 0) { - goto fail; - } - BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD); - - qemu_iovec_init(&local_qiov, qiov->niov + 2); - qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1)); - qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size); - use_local_qiov = true; - - bytes += offset & (align - 1); - offset = offset & ~(align - 1); - } - - if ((offset + bytes) & (align - 1)) { - QEMUIOVector tail_qiov; - struct iovec tail_iov; - size_t tail_bytes; - bool waited; - - mark_request_serialising(&req, align); - waited = wait_serialising_requests(&req); - assert(!waited || !use_local_qiov); - - tail_buf = qemu_blockalign(bs, align); - tail_iov = (struct iovec) { - .iov_base = tail_buf, - .iov_len = align, - }; - qemu_iovec_init_external(&tail_qiov, &tail_iov, 1); - - BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_TAIL); - ret = bdrv_aligned_preadv(bs, &req, (offset + bytes) & ~(align - 1), align, - align, &tail_qiov, 0); - if (ret < 0) { - goto fail; - } - BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL); - - if (!use_local_qiov) { - qemu_iovec_init(&local_qiov, qiov->niov + 1); - qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size); - use_local_qiov = true; - } - - tail_bytes = (offset + bytes) & (align - 1); - qemu_iovec_add(&local_qiov, tail_buf + tail_bytes, align - tail_bytes); - - bytes = ROUND_UP(bytes, align); - } - - if (use_local_qiov) { - /* Local buffer may have non-zero data. */ - flags &= ~BDRV_REQ_ZERO_WRITE; - } - ret = bdrv_aligned_pwritev(bs, &req, offset, bytes, - use_local_qiov ? &local_qiov : qiov, - flags); - -fail: - tracked_request_end(&req); - - if (use_local_qiov) { - qemu_iovec_destroy(&local_qiov); - } - qemu_vfree(head_buf); - qemu_vfree(tail_buf); - - return ret; -} - -static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, - BdrvRequestFlags flags) -{ - if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) { - return -EINVAL; - } - - return bdrv_co_do_pwritev(bs, sector_num << BDRV_SECTOR_BITS, - nb_sectors << BDRV_SECTOR_BITS, qiov, flags); -} - -int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num, - int nb_sectors, QEMUIOVector *qiov) -{ - trace_bdrv_co_writev(bs, sector_num, nb_sectors); - - return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0); -} - -int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, - BdrvRequestFlags flags) -{ - int ret; - - trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors, flags); - - if (!(bs->open_flags & BDRV_O_UNMAP)) { - flags &= ~BDRV_REQ_MAY_UNMAP; - } - if (bdrv_req_is_aligned(bs, sector_num << BDRV_SECTOR_BITS, - nb_sectors << BDRV_SECTOR_BITS)) { - ret = bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL, - BDRV_REQ_ZERO_WRITE | flags); - } else { - uint8_t *buf; - QEMUIOVector local_qiov; - size_t bytes = nb_sectors << BDRV_SECTOR_BITS; - - buf = qemu_memalign(bdrv_opt_mem_align(bs), bytes); - memset(buf, 0, bytes); - qemu_iovec_init(&local_qiov, 1); - qemu_iovec_add(&local_qiov, buf, bytes); - - ret = bdrv_co_do_writev(bs, sector_num, nb_sectors, &local_qiov, - BDRV_REQ_ZERO_WRITE | flags); - qemu_vfree(buf); - } - return ret; -} - -/** - * Truncate file to 'offset' bytes (needed only for file protocols) - */ -int bdrv_truncate(BlockDriverState *bs, int64_t offset) -{ - BlockDriver *drv = bs->drv; - int ret; - if (!drv) - return -ENOMEDIUM; - if (!drv->bdrv_truncate) - return -ENOTSUP; - if (bs->read_only) - return -EACCES; - - ret = drv->bdrv_truncate(bs, offset); - if (ret == 0) { - ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS); - if (bs->blk) { - blk_dev_resize_cb(bs->blk); - } - } - return ret; -} - -/** - * Length of a allocated file in bytes. Sparse files are counted by actual - * allocated space. Return < 0 if error or unknown. - */ -int64_t bdrv_get_allocated_file_size(BlockDriverState *bs) -{ - BlockDriver *drv = bs->drv; - if (!drv) { - return -ENOMEDIUM; - } - if (drv->bdrv_get_allocated_file_size) { - return drv->bdrv_get_allocated_file_size(bs); - } - if (bs->file) { - return bdrv_get_allocated_file_size(bs->file); - } - return -ENOTSUP; -} - -/** - * Return number of sectors on success, -errno on error. - */ -int64_t bdrv_nb_sectors(BlockDriverState *bs) -{ - BlockDriver *drv = bs->drv; - - if (!drv) - return -ENOMEDIUM; - - if (drv->has_variable_length) { - int ret = refresh_total_sectors(bs, bs->total_sectors); - if (ret < 0) { - return ret; - } - } - return bs->total_sectors; -} - -/** - * Return length in bytes on success, -errno on error. - * The length is always a multiple of BDRV_SECTOR_SIZE. - */ -int64_t bdrv_getlength(BlockDriverState *bs) -{ - int64_t ret = bdrv_nb_sectors(bs); - - return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE; -} - -/* return 0 as number of sectors if no device present or error */ -void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr) -{ - int64_t nb_sectors = bdrv_nb_sectors(bs); - - *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors; -} - -void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error, - BlockdevOnError on_write_error) -{ - bs->on_read_error = on_read_error; - bs->on_write_error = on_write_error; -} - -BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read) -{ - return is_read ? bs->on_read_error : bs->on_write_error; -} - -BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error) -{ - BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error; - - switch (on_err) { - case BLOCKDEV_ON_ERROR_ENOSPC: - return (error == ENOSPC) ? - BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT; - case BLOCKDEV_ON_ERROR_STOP: - return BLOCK_ERROR_ACTION_STOP; - case BLOCKDEV_ON_ERROR_REPORT: - return BLOCK_ERROR_ACTION_REPORT; - case BLOCKDEV_ON_ERROR_IGNORE: - return BLOCK_ERROR_ACTION_IGNORE; - default: - abort(); - } -} +} static void send_qmp_error_event(BlockDriverState *bs, BlockErrorAction action, @@ -3828,7 +2756,7 @@ void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp) error_setg(errp, "Node '%s' is not encrypted", bdrv_get_device_or_node_name(bs)); } else if (bdrv_set_key(bs, key) < 0) { - error_set(errp, QERR_INVALID_PASSWORD); + error_setg(errp, QERR_INVALID_PASSWORD); } } else { if (bdrv_key_required(bs)) { @@ -4000,26 +2928,6 @@ int bdrv_get_flags(BlockDriverState *bs) return bs->open_flags; } -int bdrv_flush_all(void) -{ - BlockDriverState *bs; - int result = 0; - - QTAILQ_FOREACH(bs, &bdrv_states, device_list) { - AioContext *aio_context = bdrv_get_aio_context(bs); - int ret; - - aio_context_acquire(aio_context); - ret = bdrv_flush(bs); - if (ret < 0 && !result) { - result = ret; - } - aio_context_release(aio_context); - } - - return result; -} - int bdrv_has_zero_init_1(BlockDriverState *bs) { return 1; @@ -4072,222 +2980,6 @@ bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs) return false; } -typedef struct BdrvCoGetBlockStatusData { - BlockDriverState *bs; - BlockDriverState *base; - int64_t sector_num; - int nb_sectors; - int *pnum; - int64_t ret; - bool done; -} BdrvCoGetBlockStatusData; - -/* - * Returns the allocation status of the specified sectors. - * Drivers not implementing the functionality are assumed to not support - * backing files, hence all their sectors are reported as allocated. - * - * If 'sector_num' is beyond the end of the disk image the return value is 0 - * and 'pnum' is set to 0. - * - * 'pnum' is set to the number of sectors (including and immediately following - * the specified sector) that are known to be in the same - * allocated/unallocated state. - * - * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes - * beyond the end of the disk image it will be clamped. - */ -static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs, - int64_t sector_num, - int nb_sectors, int *pnum) -{ - int64_t total_sectors; - int64_t n; - int64_t ret, ret2; - - total_sectors = bdrv_nb_sectors(bs); - if (total_sectors < 0) { - return total_sectors; - } - - if (sector_num >= total_sectors) { - *pnum = 0; - return 0; - } - - n = total_sectors - sector_num; - if (n < nb_sectors) { - nb_sectors = n; - } - - if (!bs->drv->bdrv_co_get_block_status) { - *pnum = nb_sectors; - ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED; - if (bs->drv->protocol_name) { - ret |= BDRV_BLOCK_OFFSET_VALID | (sector_num * BDRV_SECTOR_SIZE); - } - return ret; - } - - ret = bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum); - if (ret < 0) { - *pnum = 0; - return ret; - } - - if (ret & BDRV_BLOCK_RAW) { - assert(ret & BDRV_BLOCK_OFFSET_VALID); - return bdrv_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS, - *pnum, pnum); - } - - if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) { - ret |= BDRV_BLOCK_ALLOCATED; - } - - if (!(ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO)) { - if (bdrv_unallocated_blocks_are_zero(bs)) { - ret |= BDRV_BLOCK_ZERO; - } else if (bs->backing_hd) { - BlockDriverState *bs2 = bs->backing_hd; - int64_t nb_sectors2 = bdrv_nb_sectors(bs2); - if (nb_sectors2 >= 0 && sector_num >= nb_sectors2) { - ret |= BDRV_BLOCK_ZERO; - } - } - } - - if (bs->file && - (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) && - (ret & BDRV_BLOCK_OFFSET_VALID)) { - int file_pnum; - - ret2 = bdrv_co_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS, - *pnum, &file_pnum); - if (ret2 >= 0) { - /* Ignore errors. This is just providing extra information, it - * is useful but not necessary. - */ - if (!file_pnum) { - /* !file_pnum indicates an offset at or beyond the EOF; it is - * perfectly valid for the format block driver to point to such - * offsets, so catch it and mark everything as zero */ - ret |= BDRV_BLOCK_ZERO; - } else { - /* Limit request to the range reported by the protocol driver */ - *pnum = file_pnum; - ret |= (ret2 & BDRV_BLOCK_ZERO); - } - } - } - - return ret; -} - -/* Coroutine wrapper for bdrv_get_block_status() */ -static void coroutine_fn bdrv_get_block_status_co_entry(void *opaque) -{ - BdrvCoGetBlockStatusData *data = opaque; - BlockDriverState *bs = data->bs; - - data->ret = bdrv_co_get_block_status(bs, data->sector_num, data->nb_sectors, - data->pnum); - data->done = true; -} - -/* - * Synchronous wrapper around bdrv_co_get_block_status(). - * - * See bdrv_co_get_block_status() for details. - */ -int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num, - int nb_sectors, int *pnum) -{ - Coroutine *co; - BdrvCoGetBlockStatusData data = { - .bs = bs, - .sector_num = sector_num, - .nb_sectors = nb_sectors, - .pnum = pnum, - .done = false, - }; - - if (qemu_in_coroutine()) { - /* Fast-path if already in coroutine context */ - bdrv_get_block_status_co_entry(&data); - } else { - AioContext *aio_context = bdrv_get_aio_context(bs); - - co = qemu_coroutine_create(bdrv_get_block_status_co_entry); - qemu_coroutine_enter(co, &data); - while (!data.done) { - aio_poll(aio_context, true); - } - } - return data.ret; -} - -int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, - int nb_sectors, int *pnum) -{ - int64_t ret = bdrv_get_block_status(bs, sector_num, nb_sectors, pnum); - if (ret < 0) { - return ret; - } - return !!(ret & BDRV_BLOCK_ALLOCATED); -} - -/* - * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP] - * - * Return true if the given sector is allocated in any image between - * BASE and TOP (inclusive). BASE can be NULL to check if the given - * sector is allocated in any image of the chain. Return false otherwise. - * - * 'pnum' is set to the number of sectors (including and immediately following - * the specified sector) that are known to be in the same - * allocated/unallocated state. - * - */ -int bdrv_is_allocated_above(BlockDriverState *top, - BlockDriverState *base, - int64_t sector_num, - int nb_sectors, int *pnum) -{ - BlockDriverState *intermediate; - int ret, n = nb_sectors; - - intermediate = top; - while (intermediate && intermediate != base) { - int pnum_inter; - ret = bdrv_is_allocated(intermediate, sector_num, nb_sectors, - &pnum_inter); - if (ret < 0) { - return ret; - } else if (ret) { - *pnum = pnum_inter; - return 1; - } - - /* - * [sector_num, nb_sectors] is unallocated on top but intermediate - * might have - * - * [sector_num+x, nr_sectors] allocated. - */ - if (n > pnum_inter && - (intermediate == top || - sector_num + pnum_inter < intermediate->total_sectors)) { - n = pnum_inter; - } - - intermediate = intermediate->backing_hd; - } - - *pnum = n; - return 0; -} - const char *bdrv_get_encrypted_filename(BlockDriverState *bs) { if (bs->backing_hd && bs->backing_hd->encrypted) @@ -4295,35 +2987,13 @@ const char *bdrv_get_encrypted_filename(BlockDriverState *bs) else if (bs->encrypted) return bs->filename; else - return NULL; -} - -void bdrv_get_backing_filename(BlockDriverState *bs, - char *filename, int filename_size) -{ - pstrcpy(filename, filename_size, bs->backing_file); -} - -int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num, - const uint8_t *buf, int nb_sectors) -{ - BlockDriver *drv = bs->drv; - int ret; - - if (!drv) { - return -ENOMEDIUM; - } - if (!drv->bdrv_write_compressed) { - return -ENOTSUP; - } - ret = bdrv_check_request(bs, sector_num, nb_sectors); - if (ret < 0) { - return ret; - } - - assert(QLIST_EMPTY(&bs->dirty_bitmaps)); + return NULL; +} - return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors); +void bdrv_get_backing_filename(BlockDriverState *bs, + char *filename, int filename_size) +{ + pstrcpy(filename, filename_size, bs->backing_file); } int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) @@ -4346,47 +3016,6 @@ ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs) return NULL; } -int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf, - int64_t pos, int size) -{ - QEMUIOVector qiov; - struct iovec iov = { - .iov_base = (void *) buf, - .iov_len = size, - }; - - qemu_iovec_init_external(&qiov, &iov, 1); - return bdrv_writev_vmstate(bs, &qiov, pos); -} - -int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos) -{ - BlockDriver *drv = bs->drv; - - if (!drv) { - return -ENOMEDIUM; - } else if (drv->bdrv_save_vmstate) { - return drv->bdrv_save_vmstate(bs, qiov, pos); - } else if (bs->file) { - return bdrv_writev_vmstate(bs->file, qiov, pos); - } - - return -ENOTSUP; -} - -int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf, - int64_t pos, int size) -{ - BlockDriver *drv = bs->drv; - if (!drv) - return -ENOMEDIUM; - if (drv->bdrv_load_vmstate) - return drv->bdrv_load_vmstate(bs, buf, pos, size); - if (bs->file) - return bdrv_load_vmstate(bs->file, buf, pos, size); - return -ENOTSUP; -} - void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event) { if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) { @@ -4517,657 +3146,31 @@ BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs, g_free(filename_full); g_free(backing_file_full); g_free(filename_tmp); - return retval; -} - -int bdrv_get_backing_file_depth(BlockDriverState *bs) -{ - if (!bs->drv) { - return 0; - } - - if (!bs->backing_hd) { - return 0; - } - - return 1 + bdrv_get_backing_file_depth(bs->backing_hd); -} - -/**************************************************************/ -/* async I/Os */ - -BlockAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num, - QEMUIOVector *qiov, int nb_sectors, - BlockCompletionFunc *cb, void *opaque) -{ - trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque); - - return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0, - cb, opaque, false); -} - -BlockAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num, - QEMUIOVector *qiov, int nb_sectors, - BlockCompletionFunc *cb, void *opaque) -{ - trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque); - - return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0, - cb, opaque, true); -} - -BlockAIOCB *bdrv_aio_write_zeroes(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, BdrvRequestFlags flags, - BlockCompletionFunc *cb, void *opaque) -{ - trace_bdrv_aio_write_zeroes(bs, sector_num, nb_sectors, flags, opaque); - - return bdrv_co_aio_rw_vector(bs, sector_num, NULL, nb_sectors, - BDRV_REQ_ZERO_WRITE | flags, - cb, opaque, true); -} - - -typedef struct MultiwriteCB { - int error; - int num_requests; - int num_callbacks; - struct { - BlockCompletionFunc *cb; - void *opaque; - QEMUIOVector *free_qiov; - } callbacks[]; -} MultiwriteCB; - -static void multiwrite_user_cb(MultiwriteCB *mcb) -{ - int i; - - for (i = 0; i < mcb->num_callbacks; i++) { - mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error); - if (mcb->callbacks[i].free_qiov) { - qemu_iovec_destroy(mcb->callbacks[i].free_qiov); - } - g_free(mcb->callbacks[i].free_qiov); - } -} - -static void multiwrite_cb(void *opaque, int ret) -{ - MultiwriteCB *mcb = opaque; - - trace_multiwrite_cb(mcb, ret); - - if (ret < 0 && !mcb->error) { - mcb->error = ret; - } - - mcb->num_requests--; - if (mcb->num_requests == 0) { - multiwrite_user_cb(mcb); - g_free(mcb); - } -} - -static int multiwrite_req_compare(const void *a, const void *b) -{ - const BlockRequest *req1 = a, *req2 = b; - - /* - * Note that we can't simply subtract req2->sector from req1->sector - * here as that could overflow the return value. - */ - if (req1->sector > req2->sector) { - return 1; - } else if (req1->sector < req2->sector) { - return -1; - } else { - return 0; - } -} - -/* - * Takes a bunch of requests and tries to merge them. Returns the number of - * requests that remain after merging. - */ -static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs, - int num_reqs, MultiwriteCB *mcb) -{ - int i, outidx; - - // Sort requests by start sector - qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare); - - // Check if adjacent requests touch the same clusters. If so, combine them, - // filling up gaps with zero sectors. - outidx = 0; - for (i = 1; i < num_reqs; i++) { - int merge = 0; - int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors; - - // Handle exactly sequential writes and overlapping writes. - if (reqs[i].sector <= oldreq_last) { - merge = 1; - } - - if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) { - merge = 0; - } - - if (bs->bl.max_transfer_length && reqs[outidx].nb_sectors + - reqs[i].nb_sectors > bs->bl.max_transfer_length) { - merge = 0; - } - - if (merge) { - size_t size; - QEMUIOVector *qiov = g_malloc0(sizeof(*qiov)); - qemu_iovec_init(qiov, - reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1); - - // Add the first request to the merged one. If the requests are - // overlapping, drop the last sectors of the first request. - size = (reqs[i].sector - reqs[outidx].sector) << 9; - qemu_iovec_concat(qiov, reqs[outidx].qiov, 0, size); - - // We should need to add any zeros between the two requests - assert (reqs[i].sector <= oldreq_last); - - // Add the second request - qemu_iovec_concat(qiov, reqs[i].qiov, 0, reqs[i].qiov->size); - - // Add tail of first request, if necessary - if (qiov->size < reqs[outidx].qiov->size) { - qemu_iovec_concat(qiov, reqs[outidx].qiov, qiov->size, - reqs[outidx].qiov->size - qiov->size); - } - - reqs[outidx].nb_sectors = qiov->size >> 9; - reqs[outidx].qiov = qiov; - - mcb->callbacks[i].free_qiov = reqs[outidx].qiov; - } else { - outidx++; - reqs[outidx].sector = reqs[i].sector; - reqs[outidx].nb_sectors = reqs[i].nb_sectors; - reqs[outidx].qiov = reqs[i].qiov; - } - } - - block_acct_merge_done(&bs->stats, BLOCK_ACCT_WRITE, num_reqs - outidx - 1); - - return outidx + 1; -} - -/* - * Submit multiple AIO write requests at once. - * - * On success, the function returns 0 and all requests in the reqs array have - * been submitted. In error case this function returns -1, and any of the - * requests may or may not be submitted yet. In particular, this means that the - * callback will be called for some of the requests, for others it won't. The - * caller must check the error field of the BlockRequest to wait for the right - * callbacks (if error != 0, no callback will be called). - * - * The implementation may modify the contents of the reqs array, e.g. to merge - * requests. However, the fields opaque and error are left unmodified as they - * are used to signal failure for a single request to the caller. - */ -int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs) -{ - MultiwriteCB *mcb; - int i; - - /* don't submit writes if we don't have a medium */ - if (bs->drv == NULL) { - for (i = 0; i < num_reqs; i++) { - reqs[i].error = -ENOMEDIUM; - } - return -1; - } - - if (num_reqs == 0) { - return 0; - } - - // Create MultiwriteCB structure - mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks)); - mcb->num_requests = 0; - mcb->num_callbacks = num_reqs; - - for (i = 0; i < num_reqs; i++) { - mcb->callbacks[i].cb = reqs[i].cb; - mcb->callbacks[i].opaque = reqs[i].opaque; - } - - // Check for mergable requests - num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb); - - trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs); - - /* Run the aio requests. */ - mcb->num_requests = num_reqs; - for (i = 0; i < num_reqs; i++) { - bdrv_co_aio_rw_vector(bs, reqs[i].sector, reqs[i].qiov, - reqs[i].nb_sectors, reqs[i].flags, - multiwrite_cb, mcb, - true); - } - - return 0; -} - -void bdrv_aio_cancel(BlockAIOCB *acb) -{ - qemu_aio_ref(acb); - bdrv_aio_cancel_async(acb); - while (acb->refcnt > 1) { - if (acb->aiocb_info->get_aio_context) { - aio_poll(acb->aiocb_info->get_aio_context(acb), true); - } else if (acb->bs) { - aio_poll(bdrv_get_aio_context(acb->bs), true); - } else { - abort(); - } - } - qemu_aio_unref(acb); -} - -/* Async version of aio cancel. The caller is not blocked if the acb implements - * cancel_async, otherwise we do nothing and let the request normally complete. - * In either case the completion callback must be called. */ -void bdrv_aio_cancel_async(BlockAIOCB *acb) -{ - if (acb->aiocb_info->cancel_async) { - acb->aiocb_info->cancel_async(acb); - } -} - -/**************************************************************/ -/* async block device emulation */ - -typedef struct BlockAIOCBSync { - BlockAIOCB common; - QEMUBH *bh; - int ret; - /* vector translation state */ - QEMUIOVector *qiov; - uint8_t *bounce; - int is_write; -} BlockAIOCBSync; - -static const AIOCBInfo bdrv_em_aiocb_info = { - .aiocb_size = sizeof(BlockAIOCBSync), -}; - -static void bdrv_aio_bh_cb(void *opaque) -{ - BlockAIOCBSync *acb = opaque; - - if (!acb->is_write && acb->ret >= 0) { - qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size); - } - qemu_vfree(acb->bounce); - acb->common.cb(acb->common.opaque, acb->ret); - qemu_bh_delete(acb->bh); - acb->bh = NULL; - qemu_aio_unref(acb); -} - -static BlockAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs, - int64_t sector_num, - QEMUIOVector *qiov, - int nb_sectors, - BlockCompletionFunc *cb, - void *opaque, - int is_write) - -{ - BlockAIOCBSync *acb; - - acb = qemu_aio_get(&bdrv_em_aiocb_info, bs, cb, opaque); - acb->is_write = is_write; - acb->qiov = qiov; - acb->bounce = qemu_try_blockalign(bs, qiov->size); - acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_aio_bh_cb, acb); - - if (acb->bounce == NULL) { - acb->ret = -ENOMEM; - } else if (is_write) { - qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size); - acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors); - } else { - acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors); - } - - qemu_bh_schedule(acb->bh); - - return &acb->common; -} - -static BlockAIOCB *bdrv_aio_readv_em(BlockDriverState *bs, - int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, - BlockCompletionFunc *cb, void *opaque) -{ - return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0); -} - -static BlockAIOCB *bdrv_aio_writev_em(BlockDriverState *bs, - int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, - BlockCompletionFunc *cb, void *opaque) -{ - return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1); -} - - -typedef struct BlockAIOCBCoroutine { - BlockAIOCB common; - BlockRequest req; - bool is_write; - bool need_bh; - bool *done; - QEMUBH* bh; -} BlockAIOCBCoroutine; - -static const AIOCBInfo bdrv_em_co_aiocb_info = { - .aiocb_size = sizeof(BlockAIOCBCoroutine), -}; - -static void bdrv_co_complete(BlockAIOCBCoroutine *acb) -{ - if (!acb->need_bh) { - acb->common.cb(acb->common.opaque, acb->req.error); - qemu_aio_unref(acb); - } -} - -static void bdrv_co_em_bh(void *opaque) -{ - BlockAIOCBCoroutine *acb = opaque; - - assert(!acb->need_bh); - qemu_bh_delete(acb->bh); - bdrv_co_complete(acb); -} - -static void bdrv_co_maybe_schedule_bh(BlockAIOCBCoroutine *acb) -{ - acb->need_bh = false; - if (acb->req.error != -EINPROGRESS) { - BlockDriverState *bs = acb->common.bs; - - acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb); - qemu_bh_schedule(acb->bh); - } -} - -/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */ -static void coroutine_fn bdrv_co_do_rw(void *opaque) -{ - BlockAIOCBCoroutine *acb = opaque; - BlockDriverState *bs = acb->common.bs; - - if (!acb->is_write) { - acb->req.error = bdrv_co_do_readv(bs, acb->req.sector, - acb->req.nb_sectors, acb->req.qiov, acb->req.flags); - } else { - acb->req.error = bdrv_co_do_writev(bs, acb->req.sector, - acb->req.nb_sectors, acb->req.qiov, acb->req.flags); - } - - bdrv_co_complete(acb); -} - -static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs, - int64_t sector_num, - QEMUIOVector *qiov, - int nb_sectors, - BdrvRequestFlags flags, - BlockCompletionFunc *cb, - void *opaque, - bool is_write) -{ - Coroutine *co; - BlockAIOCBCoroutine *acb; - - acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque); - acb->need_bh = true; - acb->req.error = -EINPROGRESS; - acb->req.sector = sector_num; - acb->req.nb_sectors = nb_sectors; - acb->req.qiov = qiov; - acb->req.flags = flags; - acb->is_write = is_write; - - co = qemu_coroutine_create(bdrv_co_do_rw); - qemu_coroutine_enter(co, acb); - - bdrv_co_maybe_schedule_bh(acb); - return &acb->common; -} - -static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque) -{ - BlockAIOCBCoroutine *acb = opaque; - BlockDriverState *bs = acb->common.bs; - - acb->req.error = bdrv_co_flush(bs); - bdrv_co_complete(acb); -} - -BlockAIOCB *bdrv_aio_flush(BlockDriverState *bs, - BlockCompletionFunc *cb, void *opaque) -{ - trace_bdrv_aio_flush(bs, opaque); - - Coroutine *co; - BlockAIOCBCoroutine *acb; - - acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque); - acb->need_bh = true; - acb->req.error = -EINPROGRESS; - - co = qemu_coroutine_create(bdrv_aio_flush_co_entry); - qemu_coroutine_enter(co, acb); - - bdrv_co_maybe_schedule_bh(acb); - return &acb->common; -} - -static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque) -{ - BlockAIOCBCoroutine *acb = opaque; - BlockDriverState *bs = acb->common.bs; - - acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors); - bdrv_co_complete(acb); -} - -BlockAIOCB *bdrv_aio_discard(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, - BlockCompletionFunc *cb, void *opaque) -{ - Coroutine *co; - BlockAIOCBCoroutine *acb; - - trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque); - - acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque); - acb->need_bh = true; - acb->req.error = -EINPROGRESS; - acb->req.sector = sector_num; - acb->req.nb_sectors = nb_sectors; - co = qemu_coroutine_create(bdrv_aio_discard_co_entry); - qemu_coroutine_enter(co, acb); - - bdrv_co_maybe_schedule_bh(acb); - return &acb->common; -} - -void bdrv_init(void) -{ - module_call_init(MODULE_INIT_BLOCK); -} - -void bdrv_init_with_whitelist(void) -{ - use_bdrv_whitelist = 1; - bdrv_init(); -} - -void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs, - BlockCompletionFunc *cb, void *opaque) -{ - BlockAIOCB *acb; - - acb = g_slice_alloc(aiocb_info->aiocb_size); - acb->aiocb_info = aiocb_info; - acb->bs = bs; - acb->cb = cb; - acb->opaque = opaque; - acb->refcnt = 1; - return acb; -} - -void qemu_aio_ref(void *p) -{ - BlockAIOCB *acb = p; - acb->refcnt++; -} - -void qemu_aio_unref(void *p) -{ - BlockAIOCB *acb = p; - assert(acb->refcnt > 0); - if (--acb->refcnt == 0) { - g_slice_free1(acb->aiocb_info->aiocb_size, acb); - } -} - -/**************************************************************/ -/* Coroutine block device emulation */ - -typedef struct CoroutineIOCompletion { - Coroutine *coroutine; - int ret; -} CoroutineIOCompletion; - -static void bdrv_co_io_em_complete(void *opaque, int ret) -{ - CoroutineIOCompletion *co = opaque; - - co->ret = ret; - qemu_coroutine_enter(co->coroutine, NULL); + return retval; } -static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num, - int nb_sectors, QEMUIOVector *iov, - bool is_write) +int bdrv_get_backing_file_depth(BlockDriverState *bs) { - CoroutineIOCompletion co = { - .coroutine = qemu_coroutine_self(), - }; - BlockAIOCB *acb; - - if (is_write) { - acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors, - bdrv_co_io_em_complete, &co); - } else { - acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors, - bdrv_co_io_em_complete, &co); + if (!bs->drv) { + return 0; } - trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb); - if (!acb) { - return -EIO; + if (!bs->backing_hd) { + return 0; } - qemu_coroutine_yield(); - - return co.ret; -} - -static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, - QEMUIOVector *iov) -{ - return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false); -} -static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, - QEMUIOVector *iov) -{ - return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true); + return 1 + bdrv_get_backing_file_depth(bs->backing_hd); } -static void coroutine_fn bdrv_flush_co_entry(void *opaque) +void bdrv_init(void) { - RwCo *rwco = opaque; - - rwco->ret = bdrv_co_flush(rwco->bs); + module_call_init(MODULE_INIT_BLOCK); } -int coroutine_fn bdrv_co_flush(BlockDriverState *bs) +void bdrv_init_with_whitelist(void) { - int ret; - - if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) { - return 0; - } - - /* Write back cached data to the OS even with cache=unsafe */ - BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS); - if (bs->drv->bdrv_co_flush_to_os) { - ret = bs->drv->bdrv_co_flush_to_os(bs); - if (ret < 0) { - return ret; - } - } - - /* But don't actually force it to the disk with cache=unsafe */ - if (bs->open_flags & BDRV_O_NO_FLUSH) { - goto flush_parent; - } - - BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_DISK); - if (bs->drv->bdrv_co_flush_to_disk) { - ret = bs->drv->bdrv_co_flush_to_disk(bs); - } else if (bs->drv->bdrv_aio_flush) { - BlockAIOCB *acb; - CoroutineIOCompletion co = { - .coroutine = qemu_coroutine_self(), - }; - - acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co); - if (acb == NULL) { - ret = -EIO; - } else { - qemu_coroutine_yield(); - ret = co.ret; - } - } else { - /* - * Some block drivers always operate in either writethrough or unsafe - * mode and don't support bdrv_flush therefore. Usually qemu doesn't - * know how the server works (because the behaviour is hardcoded or - * depends on server-side configuration), so we can't ensure that - * everything is safe on disk. Returning an error doesn't work because - * that would break guests even if the server operates in writethrough - * mode. - * - * Let's hope the user knows what he's doing. - */ - ret = 0; - } - if (ret < 0) { - return ret; - } - - /* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH - * in the case of cache=unsafe, so there are no useless flushes. - */ -flush_parent: - return bdrv_co_flush(bs->file); + use_bdrv_whitelist = 1; + bdrv_init(); } void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp) @@ -5219,143 +3222,6 @@ void bdrv_invalidate_cache_all(Error **errp) } } -int bdrv_flush(BlockDriverState *bs) -{ - Coroutine *co; - RwCo rwco = { - .bs = bs, - .ret = NOT_DONE, - }; - - if (qemu_in_coroutine()) { - /* Fast-path if already in coroutine context */ - bdrv_flush_co_entry(&rwco); - } else { - AioContext *aio_context = bdrv_get_aio_context(bs); - - co = qemu_coroutine_create(bdrv_flush_co_entry); - qemu_coroutine_enter(co, &rwco); - while (rwco.ret == NOT_DONE) { - aio_poll(aio_context, true); - } - } - - return rwco.ret; -} - -typedef struct DiscardCo { - BlockDriverState *bs; - int64_t sector_num; - int nb_sectors; - int ret; -} DiscardCo; -static void coroutine_fn bdrv_discard_co_entry(void *opaque) -{ - DiscardCo *rwco = opaque; - - rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors); -} - -int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num, - int nb_sectors) -{ - int max_discard, ret; - - if (!bs->drv) { - return -ENOMEDIUM; - } - - ret = bdrv_check_request(bs, sector_num, nb_sectors); - if (ret < 0) { - return ret; - } else if (bs->read_only) { - return -EROFS; - } - - bdrv_reset_dirty(bs, sector_num, nb_sectors); - - /* Do nothing if disabled. */ - if (!(bs->open_flags & BDRV_O_UNMAP)) { - return 0; - } - - if (!bs->drv->bdrv_co_discard && !bs->drv->bdrv_aio_discard) { - return 0; - } - - max_discard = MIN_NON_ZERO(bs->bl.max_discard, BDRV_REQUEST_MAX_SECTORS); - while (nb_sectors > 0) { - int ret; - int num = nb_sectors; - - /* align request */ - if (bs->bl.discard_alignment && - num >= bs->bl.discard_alignment && - sector_num % bs->bl.discard_alignment) { - if (num > bs->bl.discard_alignment) { - num = bs->bl.discard_alignment; - } - num -= sector_num % bs->bl.discard_alignment; - } - - /* limit request size */ - if (num > max_discard) { - num = max_discard; - } - - if (bs->drv->bdrv_co_discard) { - ret = bs->drv->bdrv_co_discard(bs, sector_num, num); - } else { - BlockAIOCB *acb; - CoroutineIOCompletion co = { - .coroutine = qemu_coroutine_self(), - }; - - acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors, - bdrv_co_io_em_complete, &co); - if (acb == NULL) { - return -EIO; - } else { - qemu_coroutine_yield(); - ret = co.ret; - } - } - if (ret && ret != -ENOTSUP) { - return ret; - } - - sector_num += num; - nb_sectors -= num; - } - return 0; -} - -int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors) -{ - Coroutine *co; - DiscardCo rwco = { - .bs = bs, - .sector_num = sector_num, - .nb_sectors = nb_sectors, - .ret = NOT_DONE, - }; - - if (qemu_in_coroutine()) { - /* Fast-path if already in coroutine context */ - bdrv_discard_co_entry(&rwco); - } else { - AioContext *aio_context = bdrv_get_aio_context(bs); - - co = qemu_coroutine_create(bdrv_discard_co_entry); - qemu_coroutine_enter(co, &rwco); - while (rwco.ret == NOT_DONE) { - aio_poll(aio_context, true); - } - } - - return rwco.ret; -} - /**************************************************************/ /* removable device support */ @@ -5421,87 +3287,11 @@ void bdrv_lock_medium(BlockDriverState *bs, bool locked) } } -/* needed for generic scsi interface */ - -int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf) -{ - BlockDriver *drv = bs->drv; - - if (drv && drv->bdrv_ioctl) - return drv->bdrv_ioctl(bs, req, buf); - return -ENOTSUP; -} - -BlockAIOCB *bdrv_aio_ioctl(BlockDriverState *bs, - unsigned long int req, void *buf, - BlockCompletionFunc *cb, void *opaque) -{ - BlockDriver *drv = bs->drv; - - if (drv && drv->bdrv_aio_ioctl) - return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque); - return NULL; -} - void bdrv_set_guest_block_size(BlockDriverState *bs, int align) { bs->guest_block_size = align; } -void *qemu_blockalign(BlockDriverState *bs, size_t size) -{ - return qemu_memalign(bdrv_opt_mem_align(bs), size); -} - -void *qemu_blockalign0(BlockDriverState *bs, size_t size) -{ - return memset(qemu_blockalign(bs, size), 0, size); -} - -void *qemu_try_blockalign(BlockDriverState *bs, size_t size) -{ - size_t align = bdrv_opt_mem_align(bs); - - /* Ensure that NULL is never returned on success */ - assert(align > 0); - if (size == 0) { - size = align; - } - - return qemu_try_memalign(align, size); -} - -void *qemu_try_blockalign0(BlockDriverState *bs, size_t size) -{ - void *mem = qemu_try_blockalign(bs, size); - - if (mem) { - memset(mem, 0, size); - } - - return mem; -} - -/* - * Check if all memory in this vector is sector aligned. - */ -bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov) -{ - int i; - size_t alignment = bdrv_opt_mem_align(bs); - - for (i = 0; i < qiov->niov; i++) { - if ((uintptr_t) qiov->iov[i].iov_base % alignment) { - return false; - } - if (qiov->iov[i].iov_len % alignment) { - return false; - } - } - - return true; -} - BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name) { BdrvDirtyBitmap *bm; @@ -5515,8 +3305,9 @@ BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name) return NULL; } -void bdrv_dirty_bitmap_make_anon(BlockDriverState *bs, BdrvDirtyBitmap *bitmap) +void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap) { + assert(!bdrv_dirty_bitmap_frozen(bitmap)); g_free(bitmap->name); bitmap->name = NULL; } @@ -5546,16 +3337,139 @@ BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, } bitmap = g_new0(BdrvDirtyBitmap, 1); bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity)); + bitmap->size = bitmap_size; bitmap->name = g_strdup(name); + bitmap->disabled = false; QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list); return bitmap; } +bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap) +{ + return bitmap->successor; +} + +bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap) +{ + return !(bitmap->disabled || bitmap->successor); +} + +DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap) +{ + if (bdrv_dirty_bitmap_frozen(bitmap)) { + return DIRTY_BITMAP_STATUS_FROZEN; + } else if (!bdrv_dirty_bitmap_enabled(bitmap)) { + return DIRTY_BITMAP_STATUS_DISABLED; + } else { + return DIRTY_BITMAP_STATUS_ACTIVE; + } +} + +/** + * Create a successor bitmap destined to replace this bitmap after an operation. + * Requires that the bitmap is not frozen and has no successor. + */ +int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs, + BdrvDirtyBitmap *bitmap, Error **errp) +{ + uint64_t granularity; + BdrvDirtyBitmap *child; + + if (bdrv_dirty_bitmap_frozen(bitmap)) { + error_setg(errp, "Cannot create a successor for a bitmap that is " + "currently frozen"); + return -1; + } + assert(!bitmap->successor); + + /* Create an anonymous successor */ + granularity = bdrv_dirty_bitmap_granularity(bitmap); + child = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp); + if (!child) { + return -1; + } + + /* Successor will be on or off based on our current state. */ + child->disabled = bitmap->disabled; + + /* Install the successor and freeze the parent */ + bitmap->successor = child; + return 0; +} + +/** + * For a bitmap with a successor, yield our name to the successor, + * delete the old bitmap, and return a handle to the new bitmap. + */ +BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs, + BdrvDirtyBitmap *bitmap, + Error **errp) +{ + char *name; + BdrvDirtyBitmap *successor = bitmap->successor; + + if (successor == NULL) { + error_setg(errp, "Cannot relinquish control if " + "there's no successor present"); + return NULL; + } + + name = bitmap->name; + bitmap->name = NULL; + successor->name = name; + bitmap->successor = NULL; + bdrv_release_dirty_bitmap(bs, bitmap); + + return successor; +} + +/** + * In cases of failure where we can no longer safely delete the parent, + * we may wish to re-join the parent and child/successor. + * The merged parent will be un-frozen, but not explicitly re-enabled. + */ +BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs, + BdrvDirtyBitmap *parent, + Error **errp) +{ + BdrvDirtyBitmap *successor = parent->successor; + + if (!successor) { + error_setg(errp, "Cannot reclaim a successor when none is present"); + return NULL; + } + + if (!hbitmap_merge(parent->bitmap, successor->bitmap)) { + error_setg(errp, "Merging of parent and successor bitmap failed"); + return NULL; + } + bdrv_release_dirty_bitmap(bs, successor); + parent->successor = NULL; + + return parent; +} + +/** + * Truncates _all_ bitmaps attached to a BDS. + */ +static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs) +{ + BdrvDirtyBitmap *bitmap; + uint64_t size = bdrv_nb_sectors(bs); + + QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) { + assert(!bdrv_dirty_bitmap_frozen(bitmap)); + hbitmap_truncate(bitmap->bitmap, size); + bitmap->size = size; + } +} + void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap) { BdrvDirtyBitmap *bm, *next; QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) { if (bm == bitmap) { + assert(!bdrv_dirty_bitmap_frozen(bm)); QLIST_REMOVE(bitmap, list); hbitmap_free(bitmap->bitmap); g_free(bitmap->name); @@ -5565,6 +3479,18 @@ void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap) } } +void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap) +{ + assert(!bdrv_dirty_bitmap_frozen(bitmap)); + bitmap->disabled = true; +} + +void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap) +{ + assert(!bdrv_dirty_bitmap_frozen(bitmap)); + bitmap->disabled = false; +} + BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs) { BdrvDirtyBitmap *bm; @@ -5574,10 +3500,11 @@ BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs) QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) { BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1); BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1); - info->count = bdrv_get_dirty_count(bs, bm); + info->count = bdrv_get_dirty_count(bm); info->granularity = bdrv_dirty_bitmap_granularity(bm); info->has_name = !!bm->name; info->name = g_strdup(bm->name); + info->status = bdrv_dirty_bitmap_status(bm); entry->value = info; *plist = entry; plist = &entry->next; @@ -5620,43 +3547,53 @@ uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap) return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap); } -void bdrv_dirty_iter_init(BlockDriverState *bs, - BdrvDirtyBitmap *bitmap, HBitmapIter *hbi) +void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi) { hbitmap_iter_init(hbi, bitmap->bitmap, 0); } -void bdrv_set_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, +void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap, int64_t cur_sector, int nr_sectors) { + assert(bdrv_dirty_bitmap_enabled(bitmap)); hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors); } -void bdrv_reset_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, +void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap, int64_t cur_sector, int nr_sectors) { + assert(bdrv_dirty_bitmap_enabled(bitmap)); hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors); } -static void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, - int nr_sectors) +void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap) +{ + assert(bdrv_dirty_bitmap_enabled(bitmap)); + hbitmap_reset_all(bitmap->bitmap); +} + +void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, + int nr_sectors) { BdrvDirtyBitmap *bitmap; QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) { + if (!bdrv_dirty_bitmap_enabled(bitmap)) { + continue; + } hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors); } } -static void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, - int nr_sectors) +/** + * Advance an HBitmapIter to an arbitrary offset. + */ +void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset) { - BdrvDirtyBitmap *bitmap; - QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) { - hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors); - } + assert(hbi->hb); + hbitmap_iter_init(hbi, hbi->hb, offset); } -int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap *bitmap) +int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap) { return hbitmap_count(bitmap->bitmap); } @@ -5979,7 +3916,7 @@ void bdrv_detach_aio_context(BlockDriverState *bs) } if (bs->io_limits_enabled) { - throttle_detach_aio_context(&bs->throttle_state); + throttle_timers_detach_aio_context(&bs->throttle_timers); } if (bs->drv->bdrv_detach_aio_context) { bs->drv->bdrv_detach_aio_context(bs); @@ -6015,7 +3952,7 @@ void bdrv_attach_aio_context(BlockDriverState *bs, bs->drv->bdrv_attach_aio_context(bs, new_context); } if (bs->io_limits_enabled) { - throttle_attach_aio_context(&bs->throttle_state, new_context); + throttle_timers_attach_aio_context(&bs->throttle_timers, new_context); } QLIST_FOREACH(ban, &bs->aio_notifiers, list) { @@ -6025,7 +3962,7 @@ void bdrv_attach_aio_context(BlockDriverState *bs, void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context) { - bdrv_drain_all(); /* ensure there are no in-flight requests */ + bdrv_drain(bs); /* ensure there are no in-flight requests */ bdrv_detach_aio_context(bs); @@ -6074,12 +4011,6 @@ void bdrv_remove_aio_context_notifier(BlockDriverState *bs, abort(); } -void bdrv_add_before_write_notifier(BlockDriverState *bs, - NotifierWithReturn *notifier) -{ - notifier_with_return_list_add(&bs->before_write_notifiers, notifier); -} - int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts, BlockDriverAmendStatusCB *status_cb) { @@ -6180,36 +4111,6 @@ out: return to_replace_bs; } -void bdrv_io_plug(BlockDriverState *bs) -{ - BlockDriver *drv = bs->drv; - if (drv && drv->bdrv_io_plug) { - drv->bdrv_io_plug(bs); - } else if (bs->file) { - bdrv_io_plug(bs->file); - } -} - -void bdrv_io_unplug(BlockDriverState *bs) -{ - BlockDriver *drv = bs->drv; - if (drv && drv->bdrv_io_unplug) { - drv->bdrv_io_unplug(bs); - } else if (bs->file) { - bdrv_io_unplug(bs->file); - } -} - -void bdrv_flush_io_queue(BlockDriverState *bs) -{ - BlockDriver *drv = bs->drv; - if (drv && drv->bdrv_flush_io_queue) { - drv->bdrv_flush_io_queue(bs); - } else if (bs->file) { - bdrv_flush_io_queue(bs->file); - } -} - static bool append_open_options(QDict *d, BlockDriverState *bs) { const QDictEntry *entry;