#include "trace.h"
#include "block/block_int.h"
#include "block/blockjob.h"
+#include "block/nbd.h"
#include "qemu/error-report.h"
+#include "module_block.h"
#include "qemu/module.h"
#include "qapi/qmp/qerror.h"
#include "qapi/qmp/qbool.h"
#include "qapi-event.h"
#include "qemu/cutils.h"
#include "qemu/id.h"
+#include "qapi/util.h"
#ifdef CONFIG_BSD
#include <sys/ioctl.h>
static QLIST_HEAD(, BlockDriver) bdrv_drivers =
QLIST_HEAD_INITIALIZER(bdrv_drivers);
-static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
- const char *reference, QDict *options, int flags,
- BlockDriverState *parent,
- const BdrvChildRole *child_role, Error **errp);
+static BlockDriverState *bdrv_open_inherit(const char *filename,
+ const char *reference,
+ QDict *options, int flags,
+ BlockDriverState *parent,
+ const BdrvChildRole *child_role,
+ Error **errp);
/* If non-zero, use only whitelisted block drivers */
static int use_bdrv_whitelist;
-static void bdrv_close(BlockDriverState *bs);
-
#ifdef _WIN32
static int is_windows_drive_prefix(const char *filename)
{
QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
}
-BlockDriverState *bdrv_new_root(void)
-{
- return bdrv_new();
-}
-
BlockDriverState *bdrv_new(void)
{
BlockDriverState *bs;
bs->refcnt = 1;
bs->aio_context = qemu_get_aio_context();
+ qemu_co_queue_init(&bs->flush_queue);
+
QTAILQ_INSERT_TAIL(&all_bdrv_states, bs, bs_list);
return bs;
}
-BlockDriver *bdrv_find_format(const char *format_name)
+static BlockDriver *bdrv_do_find_format(const char *format_name)
{
BlockDriver *drv1;
+
QLIST_FOREACH(drv1, &bdrv_drivers, list) {
if (!strcmp(drv1->format_name, format_name)) {
return drv1;
}
}
+
return NULL;
}
+BlockDriver *bdrv_find_format(const char *format_name)
+{
+ BlockDriver *drv1;
+ int i;
+
+ drv1 = bdrv_do_find_format(format_name);
+ if (drv1) {
+ return drv1;
+ }
+
+ /* The driver isn't registered, maybe we need to load a module */
+ for (i = 0; i < (int)ARRAY_SIZE(block_driver_modules); ++i) {
+ if (!strcmp(block_driver_modules[i].format_name, format_name)) {
+ block_module_load_one(block_driver_modules[i].library_name);
+ break;
+ }
+ }
+
+ return bdrv_do_find_format(format_name);
+}
+
static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
{
static const char *whitelist_rw[] = {
assert(cco->drv);
ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
- if (local_err) {
- error_propagate(&cco->err, local_err);
- }
+ error_propagate(&cco->err, local_err);
cco->ret = ret;
}
/* Fast-path if already in coroutine context */
bdrv_create_co_entry(&cco);
} else {
- co = qemu_coroutine_create(bdrv_create_co_entry);
- qemu_coroutine_enter(co, &cco);
+ co = qemu_coroutine_create(bdrv_create_co_entry, &cco);
+ qemu_coroutine_enter(co);
while (cco.ret == NOT_DONE) {
aio_poll(qemu_get_aio_context(), true);
}
}
ret = bdrv_create(drv, filename, opts, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
- }
+ error_propagate(errp, local_err);
return ret;
}
return drv;
}
+static BlockDriver *bdrv_do_find_protocol(const char *protocol)
+{
+ BlockDriver *drv1;
+
+ QLIST_FOREACH(drv1, &bdrv_drivers, list) {
+ if (drv1->protocol_name && !strcmp(drv1->protocol_name, protocol)) {
+ return drv1;
+ }
+ }
+
+ return NULL;
+}
+
BlockDriver *bdrv_find_protocol(const char *filename,
bool allow_protocol_prefix,
Error **errp)
char protocol[128];
int len;
const char *p;
+ int i;
/* TODO Drivers without bdrv_file_open must be specified explicitly */
len = sizeof(protocol) - 1;
memcpy(protocol, filename, len);
protocol[len] = '\0';
- QLIST_FOREACH(drv1, &bdrv_drivers, list) {
- if (drv1->protocol_name &&
- !strcmp(drv1->protocol_name, protocol)) {
- return drv1;
+
+ drv1 = bdrv_do_find_protocol(protocol);
+ if (drv1) {
+ return drv1;
+ }
+
+ for (i = 0; i < (int)ARRAY_SIZE(block_driver_modules); ++i) {
+ if (block_driver_modules[i].protocol_name &&
+ !strcmp(block_driver_modules[i].protocol_name, protocol)) {
+ block_module_load_one(block_driver_modules[i].library_name);
+ break;
}
}
- error_setg(errp, "Unknown protocol '%s'", protocol);
- return NULL;
+ drv1 = bdrv_do_find_protocol(protocol);
+ if (!drv1) {
+ error_setg(errp, "Unknown protocol '%s'", protocol);
+ }
+ return drv1;
}
/*
return drv;
}
-static int find_image_format(BlockDriverState *bs, const char *filename,
+static int find_image_format(BdrvChild *file, const char *filename,
BlockDriver **pdrv, Error **errp)
{
+ BlockDriverState *bs = file->bs;
BlockDriver *drv;
uint8_t buf[BLOCK_PROBE_BUF_SIZE];
int ret = 0;
return ret;
}
- ret = bdrv_pread(bs, 0, buf, sizeof(buf));
+ ret = bdrv_pread(file, 0, buf, sizeof(buf));
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not read image for determining its "
"format");
return 0;
}
+static void bdrv_child_cb_drained_begin(BdrvChild *child)
+{
+ BlockDriverState *bs = child->opaque;
+ bdrv_drained_begin(bs);
+}
+
+static void bdrv_child_cb_drained_end(BdrvChild *child)
+{
+ BlockDriverState *bs = child->opaque;
+ bdrv_drained_end(bs);
+}
+
/*
* Returns the options and flags that a temporary snapshot should get, based on
* the originally requested flags (the originally requested image will have
/* For temporary files, unconditional cache=unsafe is fine */
qdict_set_default_str(child_options, BDRV_OPT_CACHE_DIRECT, "off");
qdict_set_default_str(child_options, BDRV_OPT_CACHE_NO_FLUSH, "on");
+
+ /* Copy the read-only option from the parent */
+ qdict_copy_default(child_options, parent_options, BDRV_OPT_READ_ONLY);
+
+ /* aio=native doesn't work for cache.direct=off, so disable it for the
+ * temporary snapshot */
+ *child_flags &= ~BDRV_O_NATIVE_AIO;
}
/*
qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT);
qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH);
+ /* Inherit the read-only option from the parent if it's not set */
+ qdict_copy_default(child_options, parent_options, BDRV_OPT_READ_ONLY);
+
/* Our block drivers take care to send flushes and respect unmap policy,
* so we can default to enable both on lower layers regardless of the
* corresponding parent options. */
- flags |= BDRV_O_UNMAP;
+ qdict_set_default_str(child_options, BDRV_OPT_DISCARD, "unmap");
/* Clear flags that only apply to the top layer */
flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ |
const BdrvChildRole child_file = {
.inherit_options = bdrv_inherited_options,
+ .drained_begin = bdrv_child_cb_drained_begin,
+ .drained_end = bdrv_child_cb_drained_end,
};
/*
const BdrvChildRole child_format = {
.inherit_options = bdrv_inherited_fmt_options,
+ .drained_begin = bdrv_child_cb_drained_begin,
+ .drained_end = bdrv_child_cb_drained_end,
};
/*
qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH);
/* backing files always opened read-only */
- flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ);
+ qdict_set_default_str(child_options, BDRV_OPT_READ_ONLY, "on");
+ flags &= ~BDRV_O_COPY_ON_READ;
/* snapshot=on is handled on the top layer */
flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
static const BdrvChildRole child_backing = {
.inherit_options = bdrv_backing_options,
+ .drained_begin = bdrv_child_cb_drained_begin,
+ .drained_end = bdrv_child_cb_drained_end,
};
static int bdrv_open_flags(BlockDriverState *bs, int flags)
if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_DIRECT, false)) {
*flags |= BDRV_O_NOCACHE;
}
+
+ *flags &= ~BDRV_O_RDWR;
+
+ assert(qemu_opt_find(opts, BDRV_OPT_READ_ONLY));
+ if (!qemu_opt_get_bool(opts, BDRV_OPT_READ_ONLY, false)) {
+ *flags |= BDRV_O_RDWR;
+ }
+
}
static void update_options_from_flags(QDict *options, int flags)
qdict_put(options, BDRV_OPT_CACHE_NO_FLUSH,
qbool_from_bool(flags & BDRV_O_NO_FLUSH));
}
+ if (!qdict_haskey(options, BDRV_OPT_READ_ONLY)) {
+ qdict_put(options, BDRV_OPT_READ_ONLY,
+ qbool_from_bool(!(flags & BDRV_O_RDWR)));
+ }
}
static void bdrv_assign_node_name(BlockDriverState *bs,
g_free(gen_node_name);
}
-static QemuOptsList bdrv_runtime_opts = {
+QemuOptsList bdrv_runtime_opts = {
.name = "bdrv_common",
.head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head),
.desc = {
.type = QEMU_OPT_BOOL,
.help = "Ignore flush requests",
},
+ {
+ .name = BDRV_OPT_READ_ONLY,
+ .type = QEMU_OPT_BOOL,
+ .help = "Node is opened in read-only mode",
+ },
+ {
+ .name = "detect-zeroes",
+ .type = QEMU_OPT_STRING,
+ .help = "try to optimize zero writes (off, on, unmap)",
+ },
+ {
+ .name = "discard",
+ .type = QEMU_OPT_STRING,
+ .help = "discard operation (ignore/off, unmap/on)",
+ },
{ /* end of list */ }
},
};
const char *filename;
const char *driver_name = NULL;
const char *node_name = NULL;
+ const char *discard;
+ const char *detect_zeroes;
QemuOpts *opts;
BlockDriver *drv;
Error *local_err = NULL;
goto fail_opts;
}
+ update_flags_from_options(&bs->open_flags, opts);
+
driver_name = qemu_opt_get(opts, "driver");
drv = bdrv_find_format(driver_name);
assert(drv != NULL);
goto fail_opts;
}
- bs->request_alignment = 512;
- bs->zero_beyond_eof = true;
bs->read_only = !(bs->open_flags & BDRV_O_RDWR);
if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
}
}
+ discard = qemu_opt_get(opts, "discard");
+ if (discard != NULL) {
+ if (bdrv_parse_discard_flags(discard, &bs->open_flags) != 0) {
+ error_setg(errp, "Invalid discard option");
+ ret = -EINVAL;
+ goto fail_opts;
+ }
+ }
+
+ detect_zeroes = qemu_opt_get(opts, "detect-zeroes");
+ if (detect_zeroes) {
+ BlockdevDetectZeroesOptions value =
+ qapi_enum_parse(BlockdevDetectZeroesOptions_lookup,
+ detect_zeroes,
+ BLOCKDEV_DETECT_ZEROES_OPTIONS__MAX,
+ BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF,
+ &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ ret = -EINVAL;
+ goto fail_opts;
+ }
+
+ if (value == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP &&
+ !(bs->open_flags & BDRV_O_UNMAP))
+ {
+ error_setg(errp, "setting detect-zeroes to unmap is not allowed "
+ "without setting discard operation to unmap");
+ ret = -EINVAL;
+ goto fail_opts;
+ }
+
+ bs->detect_zeroes = value;
+ }
+
if (filename != NULL) {
pstrcpy(bs->filename, sizeof(bs->filename), filename);
} else {
bs->drv = drv;
bs->opaque = g_malloc0(drv->instance_size);
- /* Apply cache mode options */
- update_flags_from_options(&bs->open_flags, opts);
-
/* Open the image, either directly or using a protocol */
open_flags = bdrv_open_flags(bs, bs->open_flags);
if (drv->bdrv_file_open) {
assert(bdrv_opt_mem_align(bs) != 0);
assert(bdrv_min_mem_align(bs) != 0);
- assert((bs->request_alignment != 0) || bdrv_is_sg(bs));
+ assert(is_power_of_2(bs->bl.request_alignment));
qemu_opts_del(opts);
return 0;
return 0;
}
+static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs)
+{
+ BlockDriverState *old_bs = child->bs;
+
+ if (old_bs) {
+ if (old_bs->quiesce_counter && child->role->drained_end) {
+ child->role->drained_end(child);
+ }
+ QLIST_REMOVE(child, next_parent);
+ }
+
+ child->bs = new_bs;
+
+ if (new_bs) {
+ QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent);
+ if (new_bs->quiesce_counter && child->role->drained_begin) {
+ child->role->drained_begin(child);
+ }
+ }
+}
+
BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
const char *child_name,
- const BdrvChildRole *child_role)
+ const BdrvChildRole *child_role,
+ void *opaque)
{
BdrvChild *child = g_new(BdrvChild, 1);
*child = (BdrvChild) {
- .bs = child_bs,
+ .bs = NULL,
.name = g_strdup(child_name),
.role = child_role,
+ .opaque = opaque,
};
- QLIST_INSERT_HEAD(&child_bs->parents, child, next_parent);
+ bdrv_replace_child(child, child_bs);
return child;
}
const char *child_name,
const BdrvChildRole *child_role)
{
- BdrvChild *child = bdrv_root_attach_child(child_bs, child_name, child_role);
+ BdrvChild *child = bdrv_root_attach_child(child_bs, child_name, child_role,
+ parent_bs);
QLIST_INSERT_HEAD(&parent_bs->children, child, next);
return child;
}
QLIST_REMOVE(child, next);
child->next.le_prev = NULL;
}
- QLIST_REMOVE(child, next_parent);
+
+ bdrv_replace_child(child, NULL);
+
g_free(child->name);
g_free(child);
}
/* Otherwise we won't be able to commit due to check in bdrv_commit */
bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
bs->backing_blocker);
+ /*
+ * We do backup in 3 ways:
+ * 1. drive backup
+ * The target bs is new opened, and the source is top BDS
+ * 2. blockdev backup
+ * Both the source and the target are top BDSes.
+ * 3. internal backup(used for block replication)
+ * Both the source and the target are backing file
+ *
+ * In case 1 and 2, neither the source nor the target is the backing file.
+ * In case 3, we will block the top BDS, so there is only one block job
+ * for the top BDS and its backing chain.
+ */
+ bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_SOURCE,
+ bs->backing_blocker);
+ bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_TARGET,
+ bs->backing_blocker);
out:
bdrv_refresh_limits(bs, NULL);
}
qdict_put(options, "driver", qstring_from_str(bs->backing_format));
}
- backing_hd = NULL;
- ret = bdrv_open_inherit(&backing_hd,
- *backing_filename ? backing_filename : NULL,
- reference, options, 0, bs, &child_backing,
- errp);
- if (ret < 0) {
+ backing_hd = bdrv_open_inherit(*backing_filename ? backing_filename : NULL,
+ reference, options, 0, bs, &child_backing,
+ errp);
+ if (!backing_hd) {
bs->open_flags |= BDRV_O_NO_BACKING;
error_prepend(errp, "Could not open backing file: ");
+ ret = -EINVAL;
goto free_exit;
}
BdrvChild *c = NULL;
BlockDriverState *bs;
QDict *image_options;
- int ret;
char *bdref_key_dot;
const char *reference;
goto done;
}
- bs = NULL;
- ret = bdrv_open_inherit(&bs, filename, reference, image_options, 0,
- parent, child_role, errp);
- if (ret < 0) {
+ bs = bdrv_open_inherit(filename, reference, image_options, 0,
+ parent, child_role, errp);
+ if (!bs) {
goto done;
}
return c;
}
-static int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags,
- QDict *snapshot_options, Error **errp)
+static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs,
+ int flags,
+ QDict *snapshot_options,
+ Error **errp)
{
/* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
char *tmp_filename = g_malloc0(PATH_MAX + 1);
int64_t total_size;
QemuOpts *opts = NULL;
BlockDriverState *bs_snapshot;
- Error *local_err = NULL;
int ret;
/* if snapshot, we create a temporary backing file and open it
/* Get the required size from the image */
total_size = bdrv_getlength(bs);
if (total_size < 0) {
- ret = total_size;
error_setg_errno(errp, -total_size, "Could not get image size");
goto out;
}
qdict_put(snapshot_options, "driver",
qstring_from_str("qcow2"));
- bs_snapshot = bdrv_new();
-
- ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
- flags, &local_err);
+ bs_snapshot = bdrv_open(NULL, NULL, snapshot_options, flags, errp);
snapshot_options = NULL;
- if (ret < 0) {
- error_propagate(errp, local_err);
+ if (!bs_snapshot) {
+ ret = -EINVAL;
goto out;
}
+ /* bdrv_append() consumes a strong reference to bs_snapshot (i.e. it will
+ * call bdrv_unref() on it), so in order to be able to return one, we have
+ * to increase bs_snapshot's refcount here */
+ bdrv_ref(bs_snapshot);
bdrv_append(bs_snapshot, bs);
+ g_free(tmp_filename);
+ return bs_snapshot;
+
out:
QDECREF(snapshot_options);
g_free(tmp_filename);
- return ret;
+ return NULL;
}
/*
* should be opened. If specified, neither options nor a filename may be given,
* nor can an existing BDS be reused (that is, *pbs has to be NULL).
*/
-static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
- const char *reference, QDict *options, int flags,
- BlockDriverState *parent,
- const BdrvChildRole *child_role, Error **errp)
+static BlockDriverState *bdrv_open_inherit(const char *filename,
+ const char *reference,
+ QDict *options, int flags,
+ BlockDriverState *parent,
+ const BdrvChildRole *child_role,
+ Error **errp)
{
int ret;
BdrvChild *file = NULL;
QDict *snapshot_options = NULL;
int snapshot_flags = 0;
- assert(pbs);
assert(!child_role || !flags);
assert(!child_role == !parent);
bool options_non_empty = options ? qdict_size(options) : false;
QDECREF(options);
- if (*pbs) {
- error_setg(errp, "Cannot reuse an existing BDS when referencing "
- "another block device");
- return -EINVAL;
- }
-
if (filename || options_non_empty) {
error_setg(errp, "Cannot reference an existing block device with "
"additional options or a new filename");
- return -EINVAL;
+ return NULL;
}
bs = bdrv_lookup_bs(reference, reference, errp);
if (!bs) {
- return -ENODEV;
+ return NULL;
}
bdrv_ref(bs);
- *pbs = bs;
- return 0;
+ return bs;
}
- if (*pbs) {
- bs = *pbs;
- } else {
- bs = bdrv_new();
- }
+ bs = bdrv_new();
/* NULL means an empty set of options */
if (options == NULL) {
/* json: syntax counts as explicit options, as if in the QDict */
parse_json_protocol(options, &filename, &local_err);
if (local_err) {
- ret = -EINVAL;
goto fail;
}
goto fail;
}
+ /* Set the BDRV_O_RDWR and BDRV_O_ALLOW_RDWR flags.
+ * FIXME: we're parsing the QDict to avoid having to create a
+ * QemuOpts just for this, but neither option is optimal. */
+ if (g_strcmp0(qdict_get_try_str(options, BDRV_OPT_READ_ONLY), "on") &&
+ !qdict_get_try_bool(options, BDRV_OPT_READ_ONLY, false)) {
+ flags |= (BDRV_O_RDWR | BDRV_O_ALLOW_RDWR);
+ } else {
+ flags &= ~BDRV_O_RDWR;
+ }
+
+ if (flags & BDRV_O_SNAPSHOT) {
+ snapshot_options = qdict_new();
+ bdrv_temp_snapshot_options(&snapshot_flags, snapshot_options,
+ flags, options);
+ /* Let bdrv_backing_options() override "read-only" */
+ qdict_del(options, BDRV_OPT_READ_ONLY);
+ bdrv_backing_options(&flags, options, flags, options);
+ }
+
bs->open_flags = flags;
bs->options = options;
options = qdict_clone_shallow(options);
drv = bdrv_find_format(drvname);
if (!drv) {
error_setg(errp, "Unknown driver: '%s'", drvname);
- ret = -EINVAL;
goto fail;
}
}
/* Open image file without format layer */
if ((flags & BDRV_O_PROTOCOL) == 0) {
- if (flags & BDRV_O_RDWR) {
- flags |= BDRV_O_ALLOW_RDWR;
- }
- if (flags & BDRV_O_SNAPSHOT) {
- snapshot_options = qdict_new();
- bdrv_temp_snapshot_options(&snapshot_flags, snapshot_options,
- flags, options);
- bdrv_backing_options(&flags, options, flags, options);
- }
-
- bs->open_flags = flags;
-
file = bdrv_open_child(filename, options, "file", bs,
&child_file, true, &local_err);
if (local_err) {
- ret = -EINVAL;
goto fail;
}
}
/* Image format probing */
bs->probed = !drv;
if (!drv && file) {
- ret = find_image_format(file->bs, filename, &drv, &local_err);
+ ret = find_image_format(file, filename, &drv, &local_err);
if (ret < 0) {
goto fail;
}
qdict_put(options, "driver", qstring_from_str(drv->format_name));
} else if (!drv) {
error_setg(errp, "Must specify either driver or file");
- ret = -EINVAL;
goto fail;
}
drv->format_name, entry->key);
}
- ret = -EINVAL;
goto close_and_fail;
}
&& !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
error_setg(errp,
"Guest must be stopped for opening of encrypted image");
- ret = -EBUSY;
goto close_and_fail;
}
QDECREF(options);
- *pbs = bs;
/* For snapshot=on, create a temporary qcow2 overlay. bs points to the
* temporary snapshot afterwards. */
if (snapshot_flags) {
- ret = bdrv_append_temp_snapshot(bs, snapshot_flags, snapshot_options,
- &local_err);
+ BlockDriverState *snapshot_bs;
+ snapshot_bs = bdrv_append_temp_snapshot(bs, snapshot_flags,
+ snapshot_options, &local_err);
snapshot_options = NULL;
if (local_err) {
goto close_and_fail;
}
+ /* We are not going to return bs but the overlay on top of it
+ * (snapshot_bs); thus, we have to drop the strong reference to bs
+ * (which we obtained by calling bdrv_new()). bs will not be deleted,
+ * though, because the overlay still has a reference to it. */
+ bdrv_unref(bs);
+ bs = snapshot_bs;
}
- return 0;
+ return bs;
fail:
if (file != NULL) {
QDECREF(bs->options);
QDECREF(options);
bs->options = NULL;
- if (!*pbs) {
- /* If *pbs is NULL, a new BDS has been created in this function and
- needs to be freed now. Otherwise, it does not need to be closed,
- since it has not really been opened yet. */
- bdrv_unref(bs);
- }
- if (local_err) {
- error_propagate(errp, local_err);
- }
- return ret;
+ bdrv_unref(bs);
+ error_propagate(errp, local_err);
+ return NULL;
close_and_fail:
- /* See fail path, but now the BDS has to be always closed */
- if (*pbs) {
- bdrv_close(bs);
- } else {
- bdrv_unref(bs);
- }
+ bdrv_unref(bs);
QDECREF(snapshot_options);
QDECREF(options);
- if (local_err) {
- error_propagate(errp, local_err);
- }
- return ret;
+ error_propagate(errp, local_err);
+ return NULL;
}
-int bdrv_open(BlockDriverState **pbs, const char *filename,
- const char *reference, QDict *options, int flags, Error **errp)
+BlockDriverState *bdrv_open(const char *filename, const char *reference,
+ QDict *options, int flags, Error **errp)
{
- return bdrv_open_inherit(pbs, filename, reference, options, flags, NULL,
+ return bdrv_open_inherit(filename, reference, options, flags, NULL,
NULL, errp);
}
options = qdict_new();
}
+ /* Check if this BlockDriverState is already in the queue */
+ QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
+ if (bs == bs_entry->state.bs) {
+ break;
+ }
+ }
+
/*
* Precedence of options:
* 1. Explicitly passed in options (highest)
}
/* Old explicitly set values (don't overwrite by inherited value) */
- old_options = qdict_clone_shallow(bs->explicit_options);
+ if (bs_entry) {
+ old_options = qdict_clone_shallow(bs_entry->state.explicit_options);
+ } else {
+ old_options = qdict_clone_shallow(bs->explicit_options);
+ }
bdrv_join_options(bs, options, old_options);
QDECREF(old_options);
child->role, options, flags);
}
- bs_entry = g_new0(BlockReopenQueueEntry, 1);
- QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
+ if (!bs_entry) {
+ bs_entry = g_new0(BlockReopenQueueEntry, 1);
+ QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
+ } else {
+ QDECREF(bs_entry->state.options);
+ QDECREF(bs_entry->state.explicit_options);
+ }
bs_entry->state.bs = bs;
bs_entry->state.options = options;
BdrvAioNotifier *ban, *ban_next;
assert(!bs->job);
+ assert(!bs->refcnt);
bdrv_drained_begin(bs); /* complete I/O */
bdrv_flush(bs);
bdrv_release_named_dirty_bitmaps(bs);
assert(QLIST_EMPTY(&bs->dirty_bitmaps));
- bdrv_parent_cb_change_media(bs, false);
-
if (bs->drv) {
BdrvChild *child, *next;
bs->backing_file[0] = '\0';
bs->backing_format[0] = '\0';
bs->total_sectors = 0;
- bs->encrypted = 0;
- bs->valid_key = 0;
- bs->sg = 0;
- bs->zero_beyond_eof = false;
+ bs->encrypted = false;
+ bs->valid_key = false;
+ bs->sg = false;
QDECREF(bs->options);
QDECREF(bs->explicit_options);
bs->options = NULL;
void bdrv_close_all(void)
{
- BlockDriverState *bs;
- AioContext *aio_context;
+ block_job_cancel_sync_all();
+ nbd_export_close_all();
/* Drop references from requests still in flight, such as canceled block
* jobs whose AIO context has not been polled yet */
blk_remove_all_bs();
blockdev_close_all_bdrv_states();
- /* Cancel all block jobs */
- while (!QTAILQ_EMPTY(&all_bdrv_states)) {
- QTAILQ_FOREACH(bs, &all_bdrv_states, bs_list) {
- aio_context = bdrv_get_aio_context(bs);
-
- aio_context_acquire(aio_context);
- if (bs->job) {
- block_job_cancel_sync(bs->job);
- aio_context_release(aio_context);
- break;
- }
- aio_context_release(aio_context);
- }
-
- /* All the remaining BlockDriverStates are referenced directly or
- * indirectly from block jobs, so there needs to be at least one BDS
- * directly used by a block job */
- assert(bs);
- }
+ assert(QTAILQ_EMPTY(&all_bdrv_states));
}
static void change_parent_backing_link(BlockDriverState *from,
BlockDriverState *to)
{
- BdrvChild *c, *next;
-
- if (from->blk) {
- /* FIXME We bypass blk_set_bs(), so we need to make these updates
- * manually. The root problem is not in this change function, but the
- * existence of BlockDriverState.blk. */
- to->blk = from->blk;
- from->blk = NULL;
- }
+ BdrvChild *c, *next, *to_c;
QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
+ if (c->role == &child_backing) {
+ /* @from is generally not allowed to be a backing file, except for
+ * when @to is the overlay. In that case, @from may not be replaced
+ * by @to as @to's backing node. */
+ QLIST_FOREACH(to_c, &to->children, next) {
+ if (to_c == c) {
+ break;
+ }
+ }
+ if (to_c) {
+ continue;
+ }
+ }
+
assert(c->role != &child_backing);
- c->bs = to;
- QLIST_REMOVE(c, next_parent);
- QLIST_INSERT_HEAD(&to->parents, c, next_parent);
bdrv_ref(to);
+ bdrv_replace_child(c, to);
bdrv_unref(from);
}
}
change_parent_backing_link(old, new);
- /* Change backing files if a previously independent node is added to the
- * chain. For active commit, we replace top by its own (indirect) backing
- * file and don't do anything here so we don't build a loop. */
- if (new->backing == NULL && !bdrv_chain_contains(backing_bs(old), new)) {
- bdrv_set_backing_hd(new, backing_bs(old));
- bdrv_set_backing_hd(old, NULL);
- }
-
bdrv_unref(old);
}
return bs->drv->bdrv_check(bs, res, fix);
}
-#define COMMIT_BUF_SECTORS 2048
-
-/* commit COW file into the raw image */
-int bdrv_commit(BlockDriverState *bs)
-{
- BlockDriver *drv = bs->drv;
- int64_t sector, total_sectors, length, backing_length;
- int n, ro, open_flags;
- int ret = 0;
- uint8_t *buf = NULL;
-
- if (!drv)
- return -ENOMEDIUM;
-
- if (!bs->backing) {
- return -ENOTSUP;
- }
-
- if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
- bdrv_op_is_blocked(bs->backing->bs, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
- return -EBUSY;
- }
-
- ro = bs->backing->bs->read_only;
- open_flags = bs->backing->bs->open_flags;
-
- if (ro) {
- if (bdrv_reopen(bs->backing->bs, open_flags | BDRV_O_RDWR, NULL)) {
- return -EACCES;
- }
- }
-
- length = bdrv_getlength(bs);
- if (length < 0) {
- ret = length;
- goto ro_cleanup;
- }
-
- backing_length = bdrv_getlength(bs->backing->bs);
- if (backing_length < 0) {
- ret = backing_length;
- goto ro_cleanup;
- }
-
- /* If our top snapshot is larger than the backing file image,
- * grow the backing file image if possible. If not possible,
- * we must return an error */
- if (length > backing_length) {
- ret = bdrv_truncate(bs->backing->bs, length);
- if (ret < 0) {
- goto ro_cleanup;
- }
- }
-
- total_sectors = length >> BDRV_SECTOR_BITS;
-
- /* qemu_try_blockalign() for bs will choose an alignment that works for
- * bs->backing->bs as well, so no need to compare the alignment manually. */
- buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
- if (buf == NULL) {
- ret = -ENOMEM;
- goto ro_cleanup;
- }
-
- for (sector = 0; sector < total_sectors; sector += n) {
- ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
- if (ret < 0) {
- goto ro_cleanup;
- }
- if (ret) {
- ret = bdrv_read(bs, sector, buf, n);
- if (ret < 0) {
- goto ro_cleanup;
- }
-
- ret = bdrv_write(bs->backing->bs, sector, buf, n);
- if (ret < 0) {
- goto ro_cleanup;
- }
- }
- }
-
- if (drv->bdrv_make_empty) {
- ret = drv->bdrv_make_empty(bs);
- if (ret < 0) {
- goto ro_cleanup;
- }
- bdrv_flush(bs);
- }
-
- /*
- * Make sure all data we wrote to the backing device is actually
- * stable on disk.
- */
- if (bs->backing) {
- bdrv_flush(bs->backing->bs);
- }
-
- ret = 0;
-ro_cleanup:
- qemu_vfree(buf);
-
- if (ro) {
- /* ignoring error return here */
- bdrv_reopen(bs->backing->bs, open_flags & ~BDRV_O_RDWR, NULL);
- }
-
- return ret;
-}
-
/*
* Return values:
* 0 - success
ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
bdrv_dirty_bitmap_truncate(bs);
bdrv_parent_cb_resize(bs);
+ ++bs->write_gen;
}
return ret;
}
*nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
}
-int bdrv_is_read_only(BlockDriverState *bs)
+bool bdrv_is_read_only(BlockDriverState *bs)
{
return bs->read_only;
}
-int bdrv_is_sg(BlockDriverState *bs)
+bool bdrv_is_sg(BlockDriverState *bs)
{
return bs->sg;
}
-int bdrv_is_encrypted(BlockDriverState *bs)
+bool bdrv_is_encrypted(BlockDriverState *bs)
{
if (bs->backing && bs->backing->bs->encrypted) {
- return 1;
+ return true;
}
return bs->encrypted;
}
-int bdrv_key_required(BlockDriverState *bs)
+bool bdrv_key_required(BlockDriverState *bs)
{
BdrvChild *backing = bs->backing;
if (backing && backing->bs->encrypted && !backing->bs->valid_key) {
- return 1;
+ return true;
}
return (bs->encrypted && !bs->valid_key);
}
}
ret = bs->drv->bdrv_set_key(bs, key);
if (ret < 0) {
- bs->valid_key = 0;
+ bs->valid_key = false;
} else if (!bs->valid_key) {
/* call the change callback now, we skipped it on open */
- bs->valid_key = 1;
+ bs->valid_key = true;
bdrv_parent_cb_change_media(bs, true);
}
return ret;
return bs->node_name;
}
-static const char *bdrv_get_parent_name(const BlockDriverState *bs)
+const char *bdrv_get_parent_name(const BlockDriverState *bs)
{
BdrvChild *c;
const char *name;
{
BlockDriverInfo bdi;
- if (bs->backing || !(bs->open_flags & BDRV_O_UNMAP)) {
+ if (!(bs->open_flags & BDRV_O_UNMAP)) {
return false;
}
return false;
}
-int bdrv_is_snapshot(BlockDriverState *bs)
-{
- return !!(bs->open_flags & BDRV_O_SNAPSHOT);
-}
-
/* backing_file can either be relative, or absolute, or a protocol. If it is
* relative, it must be relative to the chain. So, passing in bs->filename
* from a BDS as backing_file should not be done, as that may be relative to
{
BlockDriverState *bs;
Error *local_err = NULL;
- BdrvNextIterator *it = NULL;
+ BdrvNextIterator it;
- while ((it = bdrv_next(it, &bs)) != NULL) {
+ for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
AioContext *aio_context = bdrv_get_aio_context(bs);
aio_context_acquire(aio_context);
int bdrv_inactivate_all(void)
{
BlockDriverState *bs = NULL;
- BdrvNextIterator *it = NULL;
+ BdrvNextIterator it;
int ret = 0;
int pass;
- while ((it = bdrv_next(it, &bs)) != NULL) {
+ for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
aio_context_acquire(bdrv_get_aio_context(bs));
}
* the second pass sets the BDRV_O_INACTIVE flag so that no further write
* is allowed. */
for (pass = 0; pass < 2; pass++) {
- it = NULL;
- while ((it = bdrv_next(it, &bs)) != NULL) {
+ for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
ret = bdrv_inactivate_recurse(bs, pass);
if (ret < 0) {
goto out;
}
out:
- it = NULL;
- while ((it = bdrv_next(it, &bs)) != NULL) {
+ for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
aio_context_release(bdrv_get_aio_context(bs));
}
void bdrv_eject(BlockDriverState *bs, bool eject_flag)
{
BlockDriver *drv = bs->drv;
- const char *device_name;
if (drv && drv->bdrv_eject) {
drv->bdrv_eject(bs, eject_flag);
}
-
- device_name = bdrv_get_device_name(bs);
- if (device_name[0] != '\0') {
- qapi_event_send_device_tray_moved(device_name,
- eject_flag, &error_abort);
- }
}
/**
qstring_from_str(backing_fmt));
}
- bs = NULL;
- ret = bdrv_open(&bs, full_backing, NULL, backing_options,
- back_flags, &local_err);
+ bs = bdrv_open(full_backing, NULL, backing_options, back_flags,
+ &local_err);
g_free(full_backing);
- if (ret < 0) {
+ if (!bs) {
goto out;
}
size = bdrv_getlength(bs);
out:
qemu_opts_del(opts);
qemu_opts_free(create_opts);
- if (local_err) {
- error_propagate(errp, local_err);
- }
+ error_propagate(errp, local_err);
}
AioContext *bdrv_get_aio_context(BlockDriverState *bs)
return bs->aio_context;
}
+static void bdrv_do_remove_aio_context_notifier(BdrvAioNotifier *ban)
+{
+ QLIST_REMOVE(ban, list);
+ g_free(ban);
+}
+
void bdrv_detach_aio_context(BlockDriverState *bs)
{
- BdrvAioNotifier *baf;
+ BdrvAioNotifier *baf, *baf_tmp;
+ BdrvChild *child;
if (!bs->drv) {
return;
}
- QLIST_FOREACH(baf, &bs->aio_notifiers, list) {
- baf->detach_aio_context(baf->opaque);
+ assert(!bs->walking_aio_notifiers);
+ bs->walking_aio_notifiers = true;
+ QLIST_FOREACH_SAFE(baf, &bs->aio_notifiers, list, baf_tmp) {
+ if (baf->deleted) {
+ bdrv_do_remove_aio_context_notifier(baf);
+ } else {
+ baf->detach_aio_context(baf->opaque);
+ }
}
+ /* Never mind iterating again to check for ->deleted. bdrv_close() will
+ * remove remaining aio notifiers if we aren't called again.
+ */
+ bs->walking_aio_notifiers = false;
if (bs->drv->bdrv_detach_aio_context) {
bs->drv->bdrv_detach_aio_context(bs);
}
- if (bs->file) {
- bdrv_detach_aio_context(bs->file->bs);
- }
- if (bs->backing) {
- bdrv_detach_aio_context(bs->backing->bs);
+ QLIST_FOREACH(child, &bs->children, next) {
+ bdrv_detach_aio_context(child->bs);
}
bs->aio_context = NULL;
void bdrv_attach_aio_context(BlockDriverState *bs,
AioContext *new_context)
{
- BdrvAioNotifier *ban;
+ BdrvAioNotifier *ban, *ban_tmp;
+ BdrvChild *child;
if (!bs->drv) {
return;
bs->aio_context = new_context;
- if (bs->backing) {
- bdrv_attach_aio_context(bs->backing->bs, new_context);
- }
- if (bs->file) {
- bdrv_attach_aio_context(bs->file->bs, new_context);
+ QLIST_FOREACH(child, &bs->children, next) {
+ bdrv_attach_aio_context(child->bs, new_context);
}
if (bs->drv->bdrv_attach_aio_context) {
bs->drv->bdrv_attach_aio_context(bs, new_context);
}
- QLIST_FOREACH(ban, &bs->aio_notifiers, list) {
- ban->attached_aio_context(new_context, ban->opaque);
+ assert(!bs->walking_aio_notifiers);
+ bs->walking_aio_notifiers = true;
+ QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_tmp) {
+ if (ban->deleted) {
+ bdrv_do_remove_aio_context_notifier(ban);
+ } else {
+ ban->attached_aio_context(new_context, ban->opaque);
+ }
}
+ bs->walking_aio_notifiers = false;
}
void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
if (ban->attached_aio_context == attached_aio_context &&
ban->detach_aio_context == detach_aio_context &&
- ban->opaque == opaque)
+ ban->opaque == opaque &&
+ ban->deleted == false)
{
- QLIST_REMOVE(ban, list);
- g_free(ban);
-
+ if (bs->walking_aio_notifiers) {
+ ban->deleted = true;
+ } else {
+ bdrv_do_remove_aio_context_notifier(ban);
+ }
return;
}
}
bool bdrv_is_first_non_filter(BlockDriverState *candidate)
{
BlockDriverState *bs;
- BdrvNextIterator *it = NULL;
+ BdrvNextIterator it;
/* walk down the bs forest recursively */
- while ((it = bdrv_next(it, &bs)) != NULL) {
+ for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
bool perm;
/* try to recurse in this top level bs */